1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit Builtin calls as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCXXABI.h" 15 #include "CGObjCRuntime.h" 16 #include "CGOpenCLRuntime.h" 17 #include "CodeGenFunction.h" 18 #include "CodeGenModule.h" 19 #include "TargetInfo.h" 20 #include "clang/AST/ASTContext.h" 21 #include "clang/AST/Decl.h" 22 #include "clang/Analysis/Analyses/OSLog.h" 23 #include "clang/Basic/TargetBuiltins.h" 24 #include "clang/Basic/TargetInfo.h" 25 #include "clang/CodeGen/CGFunctionInfo.h" 26 #include "llvm/ADT/StringExtras.h" 27 #include "llvm/IR/CallSite.h" 28 #include "llvm/IR/DataLayout.h" 29 #include "llvm/IR/InlineAsm.h" 30 #include "llvm/IR/Intrinsics.h" 31 #include "llvm/IR/MDBuilder.h" 32 #include <sstream> 33 34 using namespace clang; 35 using namespace CodeGen; 36 using namespace llvm; 37 38 static 39 int64_t clamp(int64_t Value, int64_t Low, int64_t High) { 40 return std::min(High, std::max(Low, Value)); 41 } 42 43 /// getBuiltinLibFunction - Given a builtin id for a function like 44 /// "__builtin_fabsf", return a Function* for "fabsf". 45 llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD, 46 unsigned BuiltinID) { 47 assert(Context.BuiltinInfo.isLibFunction(BuiltinID)); 48 49 // Get the name, skip over the __builtin_ prefix (if necessary). 50 StringRef Name; 51 GlobalDecl D(FD); 52 53 // If the builtin has been declared explicitly with an assembler label, 54 // use the mangled name. This differs from the plain label on platforms 55 // that prefix labels. 56 if (FD->hasAttr<AsmLabelAttr>()) 57 Name = getMangledName(D); 58 else 59 Name = Context.BuiltinInfo.getName(BuiltinID) + 10; 60 61 llvm::FunctionType *Ty = 62 cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType())); 63 64 return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false); 65 } 66 67 /// Emit the conversions required to turn the given value into an 68 /// integer of the given size. 69 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V, 70 QualType T, llvm::IntegerType *IntType) { 71 V = CGF.EmitToMemory(V, T); 72 73 if (V->getType()->isPointerTy()) 74 return CGF.Builder.CreatePtrToInt(V, IntType); 75 76 assert(V->getType() == IntType); 77 return V; 78 } 79 80 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, 81 QualType T, llvm::Type *ResultType) { 82 V = CGF.EmitFromMemory(V, T); 83 84 if (ResultType->isPointerTy()) 85 return CGF.Builder.CreateIntToPtr(V, ResultType); 86 87 assert(V->getType() == ResultType); 88 return V; 89 } 90 91 /// Utility to insert an atomic instruction based on Instrinsic::ID 92 /// and the expression node. 93 static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF, 94 llvm::AtomicRMWInst::BinOp Kind, 95 const CallExpr *E) { 96 QualType T = E->getType(); 97 assert(E->getArg(0)->getType()->isPointerType()); 98 assert(CGF.getContext().hasSameUnqualifiedType(T, 99 E->getArg(0)->getType()->getPointeeType())); 100 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); 101 102 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 103 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 104 105 llvm::IntegerType *IntType = 106 llvm::IntegerType::get(CGF.getLLVMContext(), 107 CGF.getContext().getTypeSize(T)); 108 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 109 110 llvm::Value *Args[2]; 111 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 112 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 113 llvm::Type *ValueType = Args[1]->getType(); 114 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 115 116 llvm::Value *Result = CGF.Builder.CreateAtomicRMW( 117 Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent); 118 return EmitFromInt(CGF, Result, T, ValueType); 119 } 120 121 static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) { 122 Value *Val = CGF.EmitScalarExpr(E->getArg(0)); 123 Value *Address = CGF.EmitScalarExpr(E->getArg(1)); 124 125 // Convert the type of the pointer to a pointer to the stored type. 126 Val = CGF.EmitToMemory(Val, E->getArg(0)->getType()); 127 Value *BC = CGF.Builder.CreateBitCast( 128 Address, llvm::PointerType::getUnqual(Val->getType()), "cast"); 129 LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType()); 130 LV.setNontemporal(true); 131 CGF.EmitStoreOfScalar(Val, LV, false); 132 return nullptr; 133 } 134 135 static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) { 136 Value *Address = CGF.EmitScalarExpr(E->getArg(0)); 137 138 LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType()); 139 LV.setNontemporal(true); 140 return CGF.EmitLoadOfScalar(LV, E->getExprLoc()); 141 } 142 143 static RValue EmitBinaryAtomic(CodeGenFunction &CGF, 144 llvm::AtomicRMWInst::BinOp Kind, 145 const CallExpr *E) { 146 return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E)); 147 } 148 149 /// Utility to insert an atomic instruction based Instrinsic::ID and 150 /// the expression node, where the return value is the result of the 151 /// operation. 152 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, 153 llvm::AtomicRMWInst::BinOp Kind, 154 const CallExpr *E, 155 Instruction::BinaryOps Op, 156 bool Invert = false) { 157 QualType T = E->getType(); 158 assert(E->getArg(0)->getType()->isPointerType()); 159 assert(CGF.getContext().hasSameUnqualifiedType(T, 160 E->getArg(0)->getType()->getPointeeType())); 161 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); 162 163 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 164 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 165 166 llvm::IntegerType *IntType = 167 llvm::IntegerType::get(CGF.getLLVMContext(), 168 CGF.getContext().getTypeSize(T)); 169 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 170 171 llvm::Value *Args[2]; 172 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 173 llvm::Type *ValueType = Args[1]->getType(); 174 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 175 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 176 177 llvm::Value *Result = CGF.Builder.CreateAtomicRMW( 178 Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent); 179 Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]); 180 if (Invert) 181 Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result, 182 llvm::ConstantInt::get(IntType, -1)); 183 Result = EmitFromInt(CGF, Result, T, ValueType); 184 return RValue::get(Result); 185 } 186 187 /// @brief Utility to insert an atomic cmpxchg instruction. 188 /// 189 /// @param CGF The current codegen function. 190 /// @param E Builtin call expression to convert to cmpxchg. 191 /// arg0 - address to operate on 192 /// arg1 - value to compare with 193 /// arg2 - new value 194 /// @param ReturnBool Specifies whether to return success flag of 195 /// cmpxchg result or the old value. 196 /// 197 /// @returns result of cmpxchg, according to ReturnBool 198 static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, 199 bool ReturnBool) { 200 QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType(); 201 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 202 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 203 204 llvm::IntegerType *IntType = llvm::IntegerType::get( 205 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T)); 206 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 207 208 Value *Args[3]; 209 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 210 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 211 llvm::Type *ValueType = Args[1]->getType(); 212 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 213 Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType); 214 215 Value *Pair = CGF.Builder.CreateAtomicCmpXchg( 216 Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent, 217 llvm::AtomicOrdering::SequentiallyConsistent); 218 if (ReturnBool) 219 // Extract boolean success flag and zext it to int. 220 return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1), 221 CGF.ConvertType(E->getType())); 222 else 223 // Extract old value and emit it using the same type as compare value. 224 return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T, 225 ValueType); 226 } 227 228 // Emit a simple mangled intrinsic that has 1 argument and a return type 229 // matching the argument type. 230 static Value *emitUnaryBuiltin(CodeGenFunction &CGF, 231 const CallExpr *E, 232 unsigned IntrinsicID) { 233 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 234 235 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 236 return CGF.Builder.CreateCall(F, Src0); 237 } 238 239 // Emit an intrinsic that has 2 operands of the same type as its result. 240 static Value *emitBinaryBuiltin(CodeGenFunction &CGF, 241 const CallExpr *E, 242 unsigned IntrinsicID) { 243 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 244 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 245 246 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 247 return CGF.Builder.CreateCall(F, { Src0, Src1 }); 248 } 249 250 // Emit an intrinsic that has 3 operands of the same type as its result. 251 static Value *emitTernaryBuiltin(CodeGenFunction &CGF, 252 const CallExpr *E, 253 unsigned IntrinsicID) { 254 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 255 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 256 llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); 257 258 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 259 return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 }); 260 } 261 262 // Emit an intrinsic that has 1 float or double operand, and 1 integer. 263 static Value *emitFPIntBuiltin(CodeGenFunction &CGF, 264 const CallExpr *E, 265 unsigned IntrinsicID) { 266 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 267 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 268 269 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 270 return CGF.Builder.CreateCall(F, {Src0, Src1}); 271 } 272 273 /// EmitFAbs - Emit a call to @llvm.fabs(). 274 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) { 275 Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType()); 276 llvm::CallInst *Call = CGF.Builder.CreateCall(F, V); 277 Call->setDoesNotAccessMemory(); 278 return Call; 279 } 280 281 /// Emit the computation of the sign bit for a floating point value. Returns 282 /// the i1 sign bit value. 283 static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) { 284 LLVMContext &C = CGF.CGM.getLLVMContext(); 285 286 llvm::Type *Ty = V->getType(); 287 int Width = Ty->getPrimitiveSizeInBits(); 288 llvm::Type *IntTy = llvm::IntegerType::get(C, Width); 289 V = CGF.Builder.CreateBitCast(V, IntTy); 290 if (Ty->isPPC_FP128Ty()) { 291 // We want the sign bit of the higher-order double. The bitcast we just 292 // did works as if the double-double was stored to memory and then 293 // read as an i128. The "store" will put the higher-order double in the 294 // lower address in both little- and big-Endian modes, but the "load" 295 // will treat those bits as a different part of the i128: the low bits in 296 // little-Endian, the high bits in big-Endian. Therefore, on big-Endian 297 // we need to shift the high bits down to the low before truncating. 298 Width >>= 1; 299 if (CGF.getTarget().isBigEndian()) { 300 Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width); 301 V = CGF.Builder.CreateLShr(V, ShiftCst); 302 } 303 // We are truncating value in order to extract the higher-order 304 // double, which we will be using to extract the sign from. 305 IntTy = llvm::IntegerType::get(C, Width); 306 V = CGF.Builder.CreateTrunc(V, IntTy); 307 } 308 Value *Zero = llvm::Constant::getNullValue(IntTy); 309 return CGF.Builder.CreateICmpSLT(V, Zero); 310 } 311 312 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD, 313 const CallExpr *E, llvm::Constant *calleeValue) { 314 CGCallee callee = CGCallee::forDirect(calleeValue, FD); 315 return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot()); 316 } 317 318 /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.* 319 /// depending on IntrinsicID. 320 /// 321 /// \arg CGF The current codegen function. 322 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate. 323 /// \arg X The first argument to the llvm.*.with.overflow.*. 324 /// \arg Y The second argument to the llvm.*.with.overflow.*. 325 /// \arg Carry The carry returned by the llvm.*.with.overflow.*. 326 /// \returns The result (i.e. sum/product) returned by the intrinsic. 327 static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF, 328 const llvm::Intrinsic::ID IntrinsicID, 329 llvm::Value *X, llvm::Value *Y, 330 llvm::Value *&Carry) { 331 // Make sure we have integers of the same width. 332 assert(X->getType() == Y->getType() && 333 "Arguments must be the same type. (Did you forget to make sure both " 334 "arguments have the same integer width?)"); 335 336 llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType()); 337 llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y}); 338 Carry = CGF.Builder.CreateExtractValue(Tmp, 1); 339 return CGF.Builder.CreateExtractValue(Tmp, 0); 340 } 341 342 static Value *emitRangedBuiltin(CodeGenFunction &CGF, 343 unsigned IntrinsicID, 344 int low, int high) { 345 llvm::MDBuilder MDHelper(CGF.getLLVMContext()); 346 llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high)); 347 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {}); 348 llvm::Instruction *Call = CGF.Builder.CreateCall(F); 349 Call->setMetadata(llvm::LLVMContext::MD_range, RNode); 350 return Call; 351 } 352 353 namespace { 354 struct WidthAndSignedness { 355 unsigned Width; 356 bool Signed; 357 }; 358 } 359 360 static WidthAndSignedness 361 getIntegerWidthAndSignedness(const clang::ASTContext &context, 362 const clang::QualType Type) { 363 assert(Type->isIntegerType() && "Given type is not an integer."); 364 unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width; 365 bool Signed = Type->isSignedIntegerType(); 366 return {Width, Signed}; 367 } 368 369 // Given one or more integer types, this function produces an integer type that 370 // encompasses them: any value in one of the given types could be expressed in 371 // the encompassing type. 372 static struct WidthAndSignedness 373 EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) { 374 assert(Types.size() > 0 && "Empty list of types."); 375 376 // If any of the given types is signed, we must return a signed type. 377 bool Signed = false; 378 for (const auto &Type : Types) { 379 Signed |= Type.Signed; 380 } 381 382 // The encompassing type must have a width greater than or equal to the width 383 // of the specified types. Aditionally, if the encompassing type is signed, 384 // its width must be strictly greater than the width of any unsigned types 385 // given. 386 unsigned Width = 0; 387 for (const auto &Type : Types) { 388 unsigned MinWidth = Type.Width + (Signed && !Type.Signed); 389 if (Width < MinWidth) { 390 Width = MinWidth; 391 } 392 } 393 394 return {Width, Signed}; 395 } 396 397 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) { 398 llvm::Type *DestType = Int8PtrTy; 399 if (ArgValue->getType() != DestType) 400 ArgValue = 401 Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data()); 402 403 Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend; 404 return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue); 405 } 406 407 /// Checks if using the result of __builtin_object_size(p, @p From) in place of 408 /// __builtin_object_size(p, @p To) is correct 409 static bool areBOSTypesCompatible(int From, int To) { 410 // Note: Our __builtin_object_size implementation currently treats Type=0 and 411 // Type=2 identically. Encoding this implementation detail here may make 412 // improving __builtin_object_size difficult in the future, so it's omitted. 413 return From == To || (From == 0 && To == 1) || (From == 3 && To == 2); 414 } 415 416 static llvm::Value * 417 getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) { 418 return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true); 419 } 420 421 llvm::Value * 422 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type, 423 llvm::IntegerType *ResType, 424 llvm::Value *EmittedE) { 425 uint64_t ObjectSize; 426 if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type)) 427 return emitBuiltinObjectSize(E, Type, ResType, EmittedE); 428 return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true); 429 } 430 431 /// Returns a Value corresponding to the size of the given expression. 432 /// This Value may be either of the following: 433 /// - A llvm::Argument (if E is a param with the pass_object_size attribute on 434 /// it) 435 /// - A call to the @llvm.objectsize intrinsic 436 /// 437 /// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null 438 /// and we wouldn't otherwise try to reference a pass_object_size parameter, 439 /// we'll call @llvm.objectsize on EmittedE, rather than emitting E. 440 llvm::Value * 441 CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type, 442 llvm::IntegerType *ResType, 443 llvm::Value *EmittedE) { 444 // We need to reference an argument if the pointer is a parameter with the 445 // pass_object_size attribute. 446 if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) { 447 auto *Param = dyn_cast<ParmVarDecl>(D->getDecl()); 448 auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>(); 449 if (Param != nullptr && PS != nullptr && 450 areBOSTypesCompatible(PS->getType(), Type)) { 451 auto Iter = SizeArguments.find(Param); 452 assert(Iter != SizeArguments.end()); 453 454 const ImplicitParamDecl *D = Iter->second; 455 auto DIter = LocalDeclMap.find(D); 456 assert(DIter != LocalDeclMap.end()); 457 458 return EmitLoadOfScalar(DIter->second, /*volatile=*/false, 459 getContext().getSizeType(), E->getLocStart()); 460 } 461 } 462 463 // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't 464 // evaluate E for side-effects. In either case, we shouldn't lower to 465 // @llvm.objectsize. 466 if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext()))) 467 return getDefaultBuiltinObjectSizeResult(Type, ResType); 468 469 Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E); 470 assert(Ptr->getType()->isPointerTy() && 471 "Non-pointer passed to __builtin_object_size?"); 472 473 Value *F = CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()}); 474 475 // LLVM only supports 0 and 2, make sure that we pass along that as a boolean. 476 Value *Min = Builder.getInt1((Type & 2) != 0); 477 // For GCC compatability, __builtin_object_size treat NULL as unknown size. 478 Value *NullIsUnknown = Builder.getTrue(); 479 return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown}); 480 } 481 482 // Many of MSVC builtins are on both x64 and ARM; to avoid repeating code, we 483 // handle them here. 484 enum class CodeGenFunction::MSVCIntrin { 485 _BitScanForward, 486 _BitScanReverse, 487 _InterlockedAnd, 488 _InterlockedDecrement, 489 _InterlockedExchange, 490 _InterlockedExchangeAdd, 491 _InterlockedExchangeSub, 492 _InterlockedIncrement, 493 _InterlockedOr, 494 _InterlockedXor, 495 _interlockedbittestandset, 496 __fastfail, 497 }; 498 499 Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, 500 const CallExpr *E) { 501 switch (BuiltinID) { 502 case MSVCIntrin::_BitScanForward: 503 case MSVCIntrin::_BitScanReverse: { 504 Value *ArgValue = EmitScalarExpr(E->getArg(1)); 505 506 llvm::Type *ArgType = ArgValue->getType(); 507 llvm::Type *IndexType = 508 EmitScalarExpr(E->getArg(0))->getType()->getPointerElementType(); 509 llvm::Type *ResultType = ConvertType(E->getType()); 510 511 Value *ArgZero = llvm::Constant::getNullValue(ArgType); 512 Value *ResZero = llvm::Constant::getNullValue(ResultType); 513 Value *ResOne = llvm::ConstantInt::get(ResultType, 1); 514 515 BasicBlock *Begin = Builder.GetInsertBlock(); 516 BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn); 517 Builder.SetInsertPoint(End); 518 PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result"); 519 520 Builder.SetInsertPoint(Begin); 521 Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero); 522 BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn); 523 Builder.CreateCondBr(IsZero, End, NotZero); 524 Result->addIncoming(ResZero, Begin); 525 526 Builder.SetInsertPoint(NotZero); 527 Address IndexAddress = EmitPointerWithAlignment(E->getArg(0)); 528 529 if (BuiltinID == MSVCIntrin::_BitScanForward) { 530 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 531 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()}); 532 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false); 533 Builder.CreateStore(ZeroCount, IndexAddress, false); 534 } else { 535 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth(); 536 Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1); 537 538 Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); 539 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()}); 540 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false); 541 Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount); 542 Builder.CreateStore(Index, IndexAddress, false); 543 } 544 Builder.CreateBr(End); 545 Result->addIncoming(ResOne, NotZero); 546 547 Builder.SetInsertPoint(End); 548 return Result; 549 } 550 case MSVCIntrin::_InterlockedAnd: 551 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E); 552 case MSVCIntrin::_InterlockedExchange: 553 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E); 554 case MSVCIntrin::_InterlockedExchangeAdd: 555 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E); 556 case MSVCIntrin::_InterlockedExchangeSub: 557 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E); 558 case MSVCIntrin::_InterlockedOr: 559 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E); 560 case MSVCIntrin::_InterlockedXor: 561 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E); 562 563 case MSVCIntrin::_interlockedbittestandset: { 564 llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); 565 llvm::Value *Bit = EmitScalarExpr(E->getArg(1)); 566 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 567 AtomicRMWInst::Or, Addr, 568 Builder.CreateShl(ConstantInt::get(Bit->getType(), 1), Bit), 569 llvm::AtomicOrdering::SequentiallyConsistent); 570 // Shift the relevant bit to the least significant position, truncate to 571 // the result type, and test the low bit. 572 llvm::Value *Shifted = Builder.CreateLShr(RMWI, Bit); 573 llvm::Value *Truncated = 574 Builder.CreateTrunc(Shifted, ConvertType(E->getType())); 575 return Builder.CreateAnd(Truncated, 576 ConstantInt::get(Truncated->getType(), 1)); 577 } 578 579 case MSVCIntrin::_InterlockedDecrement: { 580 llvm::Type *IntTy = ConvertType(E->getType()); 581 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 582 AtomicRMWInst::Sub, 583 EmitScalarExpr(E->getArg(0)), 584 ConstantInt::get(IntTy, 1), 585 llvm::AtomicOrdering::SequentiallyConsistent); 586 return Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1)); 587 } 588 case MSVCIntrin::_InterlockedIncrement: { 589 llvm::Type *IntTy = ConvertType(E->getType()); 590 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 591 AtomicRMWInst::Add, 592 EmitScalarExpr(E->getArg(0)), 593 ConstantInt::get(IntTy, 1), 594 llvm::AtomicOrdering::SequentiallyConsistent); 595 return Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1)); 596 } 597 598 case MSVCIntrin::__fastfail: { 599 // Request immediate process termination from the kernel. The instruction 600 // sequences to do this are documented on MSDN: 601 // https://msdn.microsoft.com/en-us/library/dn774154.aspx 602 llvm::Triple::ArchType ISA = getTarget().getTriple().getArch(); 603 StringRef Asm, Constraints; 604 switch (ISA) { 605 default: 606 ErrorUnsupported(E, "__fastfail call for this architecture"); 607 break; 608 case llvm::Triple::x86: 609 case llvm::Triple::x86_64: 610 Asm = "int $$0x29"; 611 Constraints = "{cx}"; 612 break; 613 case llvm::Triple::thumb: 614 Asm = "udf #251"; 615 Constraints = "{r0}"; 616 break; 617 } 618 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false); 619 llvm::InlineAsm *IA = 620 llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true); 621 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get( 622 getLLVMContext(), llvm::AttributeList::FunctionIndex, 623 llvm::Attribute::NoReturn); 624 CallSite CS = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0))); 625 CS.setAttributes(NoReturnAttr); 626 return CS.getInstruction(); 627 } 628 } 629 llvm_unreachable("Incorrect MSVC intrinsic!"); 630 } 631 632 namespace { 633 // ARC cleanup for __builtin_os_log_format 634 struct CallObjCArcUse final : EHScopeStack::Cleanup { 635 CallObjCArcUse(llvm::Value *object) : object(object) {} 636 llvm::Value *object; 637 638 void Emit(CodeGenFunction &CGF, Flags flags) override { 639 CGF.EmitARCIntrinsicUse(object); 640 } 641 }; 642 } 643 644 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, 645 unsigned BuiltinID, const CallExpr *E, 646 ReturnValueSlot ReturnValue) { 647 // See if we can constant fold this builtin. If so, don't emit it at all. 648 Expr::EvalResult Result; 649 if (E->EvaluateAsRValue(Result, CGM.getContext()) && 650 !Result.hasSideEffects()) { 651 if (Result.Val.isInt()) 652 return RValue::get(llvm::ConstantInt::get(getLLVMContext(), 653 Result.Val.getInt())); 654 if (Result.Val.isFloat()) 655 return RValue::get(llvm::ConstantFP::get(getLLVMContext(), 656 Result.Val.getFloat())); 657 } 658 659 switch (BuiltinID) { 660 default: break; // Handle intrinsics and libm functions below. 661 case Builtin::BI__builtin___CFStringMakeConstantString: 662 case Builtin::BI__builtin___NSStringMakeConstantString: 663 return RValue::get(CGM.EmitConstantExpr(E, E->getType(), nullptr)); 664 case Builtin::BI__builtin_stdarg_start: 665 case Builtin::BI__builtin_va_start: 666 case Builtin::BI__va_start: 667 case Builtin::BI__builtin_va_end: 668 return RValue::get( 669 EmitVAStartEnd(BuiltinID == Builtin::BI__va_start 670 ? EmitScalarExpr(E->getArg(0)) 671 : EmitVAListRef(E->getArg(0)).getPointer(), 672 BuiltinID != Builtin::BI__builtin_va_end)); 673 case Builtin::BI__builtin_va_copy: { 674 Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer(); 675 Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer(); 676 677 llvm::Type *Type = Int8PtrTy; 678 679 DstPtr = Builder.CreateBitCast(DstPtr, Type); 680 SrcPtr = Builder.CreateBitCast(SrcPtr, Type); 681 return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy), 682 {DstPtr, SrcPtr})); 683 } 684 case Builtin::BI__builtin_abs: 685 case Builtin::BI__builtin_labs: 686 case Builtin::BI__builtin_llabs: { 687 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 688 689 Value *NegOp = Builder.CreateNeg(ArgValue, "neg"); 690 Value *CmpResult = 691 Builder.CreateICmpSGE(ArgValue, 692 llvm::Constant::getNullValue(ArgValue->getType()), 693 "abscond"); 694 Value *Result = 695 Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs"); 696 697 return RValue::get(Result); 698 } 699 case Builtin::BI__builtin_fabs: 700 case Builtin::BI__builtin_fabsf: 701 case Builtin::BI__builtin_fabsl: { 702 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs)); 703 } 704 case Builtin::BI__builtin_fmod: 705 case Builtin::BI__builtin_fmodf: 706 case Builtin::BI__builtin_fmodl: { 707 Value *Arg1 = EmitScalarExpr(E->getArg(0)); 708 Value *Arg2 = EmitScalarExpr(E->getArg(1)); 709 Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod"); 710 return RValue::get(Result); 711 } 712 case Builtin::BI__builtin_copysign: 713 case Builtin::BI__builtin_copysignf: 714 case Builtin::BI__builtin_copysignl: { 715 return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign)); 716 } 717 case Builtin::BI__builtin_ceil: 718 case Builtin::BI__builtin_ceilf: 719 case Builtin::BI__builtin_ceill: { 720 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil)); 721 } 722 case Builtin::BI__builtin_floor: 723 case Builtin::BI__builtin_floorf: 724 case Builtin::BI__builtin_floorl: { 725 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor)); 726 } 727 case Builtin::BI__builtin_trunc: 728 case Builtin::BI__builtin_truncf: 729 case Builtin::BI__builtin_truncl: { 730 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc)); 731 } 732 case Builtin::BI__builtin_rint: 733 case Builtin::BI__builtin_rintf: 734 case Builtin::BI__builtin_rintl: { 735 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint)); 736 } 737 case Builtin::BI__builtin_nearbyint: 738 case Builtin::BI__builtin_nearbyintf: 739 case Builtin::BI__builtin_nearbyintl: { 740 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::nearbyint)); 741 } 742 case Builtin::BI__builtin_round: 743 case Builtin::BI__builtin_roundf: 744 case Builtin::BI__builtin_roundl: { 745 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round)); 746 } 747 case Builtin::BI__builtin_fmin: 748 case Builtin::BI__builtin_fminf: 749 case Builtin::BI__builtin_fminl: { 750 return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum)); 751 } 752 case Builtin::BI__builtin_fmax: 753 case Builtin::BI__builtin_fmaxf: 754 case Builtin::BI__builtin_fmaxl: { 755 return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum)); 756 } 757 case Builtin::BI__builtin_conj: 758 case Builtin::BI__builtin_conjf: 759 case Builtin::BI__builtin_conjl: { 760 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 761 Value *Real = ComplexVal.first; 762 Value *Imag = ComplexVal.second; 763 Value *Zero = 764 Imag->getType()->isFPOrFPVectorTy() 765 ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType()) 766 : llvm::Constant::getNullValue(Imag->getType()); 767 768 Imag = Builder.CreateFSub(Zero, Imag, "sub"); 769 return RValue::getComplex(std::make_pair(Real, Imag)); 770 } 771 case Builtin::BI__builtin_creal: 772 case Builtin::BI__builtin_crealf: 773 case Builtin::BI__builtin_creall: 774 case Builtin::BIcreal: 775 case Builtin::BIcrealf: 776 case Builtin::BIcreall: { 777 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 778 return RValue::get(ComplexVal.first); 779 } 780 781 case Builtin::BI__builtin_cimag: 782 case Builtin::BI__builtin_cimagf: 783 case Builtin::BI__builtin_cimagl: 784 case Builtin::BIcimag: 785 case Builtin::BIcimagf: 786 case Builtin::BIcimagl: { 787 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 788 return RValue::get(ComplexVal.second); 789 } 790 791 case Builtin::BI__builtin_ctzs: 792 case Builtin::BI__builtin_ctz: 793 case Builtin::BI__builtin_ctzl: 794 case Builtin::BI__builtin_ctzll: { 795 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 796 797 llvm::Type *ArgType = ArgValue->getType(); 798 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 799 800 llvm::Type *ResultType = ConvertType(E->getType()); 801 Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); 802 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef}); 803 if (Result->getType() != ResultType) 804 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 805 "cast"); 806 return RValue::get(Result); 807 } 808 case Builtin::BI__builtin_clzs: 809 case Builtin::BI__builtin_clz: 810 case Builtin::BI__builtin_clzl: 811 case Builtin::BI__builtin_clzll: { 812 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 813 814 llvm::Type *ArgType = ArgValue->getType(); 815 Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); 816 817 llvm::Type *ResultType = ConvertType(E->getType()); 818 Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); 819 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef}); 820 if (Result->getType() != ResultType) 821 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 822 "cast"); 823 return RValue::get(Result); 824 } 825 case Builtin::BI__builtin_ffs: 826 case Builtin::BI__builtin_ffsl: 827 case Builtin::BI__builtin_ffsll: { 828 // ffs(x) -> x ? cttz(x) + 1 : 0 829 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 830 831 llvm::Type *ArgType = ArgValue->getType(); 832 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 833 834 llvm::Type *ResultType = ConvertType(E->getType()); 835 Value *Tmp = 836 Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}), 837 llvm::ConstantInt::get(ArgType, 1)); 838 Value *Zero = llvm::Constant::getNullValue(ArgType); 839 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero"); 840 Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs"); 841 if (Result->getType() != ResultType) 842 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 843 "cast"); 844 return RValue::get(Result); 845 } 846 case Builtin::BI__builtin_parity: 847 case Builtin::BI__builtin_parityl: 848 case Builtin::BI__builtin_parityll: { 849 // parity(x) -> ctpop(x) & 1 850 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 851 852 llvm::Type *ArgType = ArgValue->getType(); 853 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); 854 855 llvm::Type *ResultType = ConvertType(E->getType()); 856 Value *Tmp = Builder.CreateCall(F, ArgValue); 857 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1)); 858 if (Result->getType() != ResultType) 859 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 860 "cast"); 861 return RValue::get(Result); 862 } 863 case Builtin::BI__popcnt16: 864 case Builtin::BI__popcnt: 865 case Builtin::BI__popcnt64: 866 case Builtin::BI__builtin_popcount: 867 case Builtin::BI__builtin_popcountl: 868 case Builtin::BI__builtin_popcountll: { 869 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 870 871 llvm::Type *ArgType = ArgValue->getType(); 872 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); 873 874 llvm::Type *ResultType = ConvertType(E->getType()); 875 Value *Result = Builder.CreateCall(F, ArgValue); 876 if (Result->getType() != ResultType) 877 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 878 "cast"); 879 return RValue::get(Result); 880 } 881 case Builtin::BI_rotr8: 882 case Builtin::BI_rotr16: 883 case Builtin::BI_rotr: 884 case Builtin::BI_lrotr: 885 case Builtin::BI_rotr64: { 886 Value *Val = EmitScalarExpr(E->getArg(0)); 887 Value *Shift = EmitScalarExpr(E->getArg(1)); 888 889 llvm::Type *ArgType = Val->getType(); 890 Shift = Builder.CreateIntCast(Shift, ArgType, false); 891 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth(); 892 Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth); 893 Value *ArgZero = llvm::Constant::getNullValue(ArgType); 894 895 Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1); 896 Shift = Builder.CreateAnd(Shift, Mask); 897 Value *LeftShift = Builder.CreateSub(ArgTypeSize, Shift); 898 899 Value *RightShifted = Builder.CreateLShr(Val, Shift); 900 Value *LeftShifted = Builder.CreateShl(Val, LeftShift); 901 Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted); 902 903 Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero); 904 Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated); 905 return RValue::get(Result); 906 } 907 case Builtin::BI_rotl8: 908 case Builtin::BI_rotl16: 909 case Builtin::BI_rotl: 910 case Builtin::BI_lrotl: 911 case Builtin::BI_rotl64: { 912 Value *Val = EmitScalarExpr(E->getArg(0)); 913 Value *Shift = EmitScalarExpr(E->getArg(1)); 914 915 llvm::Type *ArgType = Val->getType(); 916 Shift = Builder.CreateIntCast(Shift, ArgType, false); 917 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth(); 918 Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth); 919 Value *ArgZero = llvm::Constant::getNullValue(ArgType); 920 921 Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1); 922 Shift = Builder.CreateAnd(Shift, Mask); 923 Value *RightShift = Builder.CreateSub(ArgTypeSize, Shift); 924 925 Value *LeftShifted = Builder.CreateShl(Val, Shift); 926 Value *RightShifted = Builder.CreateLShr(Val, RightShift); 927 Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted); 928 929 Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero); 930 Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated); 931 return RValue::get(Result); 932 } 933 case Builtin::BI__builtin_unpredictable: { 934 // Always return the argument of __builtin_unpredictable. LLVM does not 935 // handle this builtin. Metadata for this builtin should be added directly 936 // to instructions such as branches or switches that use it. 937 return RValue::get(EmitScalarExpr(E->getArg(0))); 938 } 939 case Builtin::BI__builtin_expect: { 940 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 941 llvm::Type *ArgType = ArgValue->getType(); 942 943 Value *ExpectedValue = EmitScalarExpr(E->getArg(1)); 944 // Don't generate llvm.expect on -O0 as the backend won't use it for 945 // anything. 946 // Note, we still IRGen ExpectedValue because it could have side-effects. 947 if (CGM.getCodeGenOpts().OptimizationLevel == 0) 948 return RValue::get(ArgValue); 949 950 Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType); 951 Value *Result = 952 Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval"); 953 return RValue::get(Result); 954 } 955 case Builtin::BI__builtin_assume_aligned: { 956 Value *PtrValue = EmitScalarExpr(E->getArg(0)); 957 Value *OffsetValue = 958 (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr; 959 960 Value *AlignmentValue = EmitScalarExpr(E->getArg(1)); 961 ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue); 962 unsigned Alignment = (unsigned) AlignmentCI->getZExtValue(); 963 964 EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue); 965 return RValue::get(PtrValue); 966 } 967 case Builtin::BI__assume: 968 case Builtin::BI__builtin_assume: { 969 if (E->getArg(0)->HasSideEffects(getContext())) 970 return RValue::get(nullptr); 971 972 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 973 Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume); 974 return RValue::get(Builder.CreateCall(FnAssume, ArgValue)); 975 } 976 case Builtin::BI__builtin_bswap16: 977 case Builtin::BI__builtin_bswap32: 978 case Builtin::BI__builtin_bswap64: { 979 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap)); 980 } 981 case Builtin::BI__builtin_bitreverse8: 982 case Builtin::BI__builtin_bitreverse16: 983 case Builtin::BI__builtin_bitreverse32: 984 case Builtin::BI__builtin_bitreverse64: { 985 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse)); 986 } 987 case Builtin::BI__builtin_object_size: { 988 unsigned Type = 989 E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue(); 990 auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType())); 991 992 // We pass this builtin onto the optimizer so that it can figure out the 993 // object size in more complex cases. 994 return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType, 995 /*EmittedE=*/nullptr)); 996 } 997 case Builtin::BI__builtin_prefetch: { 998 Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0)); 999 // FIXME: Technically these constants should of type 'int', yes? 1000 RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) : 1001 llvm::ConstantInt::get(Int32Ty, 0); 1002 Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : 1003 llvm::ConstantInt::get(Int32Ty, 3); 1004 Value *Data = llvm::ConstantInt::get(Int32Ty, 1); 1005 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 1006 return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data})); 1007 } 1008 case Builtin::BI__builtin_readcyclecounter: { 1009 Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter); 1010 return RValue::get(Builder.CreateCall(F)); 1011 } 1012 case Builtin::BI__builtin___clear_cache: { 1013 Value *Begin = EmitScalarExpr(E->getArg(0)); 1014 Value *End = EmitScalarExpr(E->getArg(1)); 1015 Value *F = CGM.getIntrinsic(Intrinsic::clear_cache); 1016 return RValue::get(Builder.CreateCall(F, {Begin, End})); 1017 } 1018 case Builtin::BI__builtin_trap: 1019 return RValue::get(EmitTrapCall(Intrinsic::trap)); 1020 case Builtin::BI__debugbreak: 1021 return RValue::get(EmitTrapCall(Intrinsic::debugtrap)); 1022 case Builtin::BI__builtin_unreachable: { 1023 if (SanOpts.has(SanitizerKind::Unreachable)) { 1024 SanitizerScope SanScope(this); 1025 EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()), 1026 SanitizerKind::Unreachable), 1027 SanitizerHandler::BuiltinUnreachable, 1028 EmitCheckSourceLocation(E->getExprLoc()), None); 1029 } else 1030 Builder.CreateUnreachable(); 1031 1032 // We do need to preserve an insertion point. 1033 EmitBlock(createBasicBlock("unreachable.cont")); 1034 1035 return RValue::get(nullptr); 1036 } 1037 1038 case Builtin::BI__builtin_powi: 1039 case Builtin::BI__builtin_powif: 1040 case Builtin::BI__builtin_powil: { 1041 Value *Base = EmitScalarExpr(E->getArg(0)); 1042 Value *Exponent = EmitScalarExpr(E->getArg(1)); 1043 llvm::Type *ArgType = Base->getType(); 1044 Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType); 1045 return RValue::get(Builder.CreateCall(F, {Base, Exponent})); 1046 } 1047 1048 case Builtin::BI__builtin_isgreater: 1049 case Builtin::BI__builtin_isgreaterequal: 1050 case Builtin::BI__builtin_isless: 1051 case Builtin::BI__builtin_islessequal: 1052 case Builtin::BI__builtin_islessgreater: 1053 case Builtin::BI__builtin_isunordered: { 1054 // Ordered comparisons: we know the arguments to these are matching scalar 1055 // floating point values. 1056 Value *LHS = EmitScalarExpr(E->getArg(0)); 1057 Value *RHS = EmitScalarExpr(E->getArg(1)); 1058 1059 switch (BuiltinID) { 1060 default: llvm_unreachable("Unknown ordered comparison"); 1061 case Builtin::BI__builtin_isgreater: 1062 LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp"); 1063 break; 1064 case Builtin::BI__builtin_isgreaterequal: 1065 LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp"); 1066 break; 1067 case Builtin::BI__builtin_isless: 1068 LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp"); 1069 break; 1070 case Builtin::BI__builtin_islessequal: 1071 LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp"); 1072 break; 1073 case Builtin::BI__builtin_islessgreater: 1074 LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp"); 1075 break; 1076 case Builtin::BI__builtin_isunordered: 1077 LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp"); 1078 break; 1079 } 1080 // ZExt bool to int type. 1081 return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType()))); 1082 } 1083 case Builtin::BI__builtin_isnan: { 1084 Value *V = EmitScalarExpr(E->getArg(0)); 1085 V = Builder.CreateFCmpUNO(V, V, "cmp"); 1086 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 1087 } 1088 1089 case Builtin::BIfinite: 1090 case Builtin::BI__finite: 1091 case Builtin::BIfinitef: 1092 case Builtin::BI__finitef: 1093 case Builtin::BIfinitel: 1094 case Builtin::BI__finitel: 1095 case Builtin::BI__builtin_isinf: 1096 case Builtin::BI__builtin_isfinite: { 1097 // isinf(x) --> fabs(x) == infinity 1098 // isfinite(x) --> fabs(x) != infinity 1099 // x != NaN via the ordered compare in either case. 1100 Value *V = EmitScalarExpr(E->getArg(0)); 1101 Value *Fabs = EmitFAbs(*this, V); 1102 Constant *Infinity = ConstantFP::getInfinity(V->getType()); 1103 CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf) 1104 ? CmpInst::FCMP_OEQ 1105 : CmpInst::FCMP_ONE; 1106 Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf"); 1107 return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType()))); 1108 } 1109 1110 case Builtin::BI__builtin_isinf_sign: { 1111 // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0 1112 Value *Arg = EmitScalarExpr(E->getArg(0)); 1113 Value *AbsArg = EmitFAbs(*this, Arg); 1114 Value *IsInf = Builder.CreateFCmpOEQ( 1115 AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf"); 1116 Value *IsNeg = EmitSignBit(*this, Arg); 1117 1118 llvm::Type *IntTy = ConvertType(E->getType()); 1119 Value *Zero = Constant::getNullValue(IntTy); 1120 Value *One = ConstantInt::get(IntTy, 1); 1121 Value *NegativeOne = ConstantInt::get(IntTy, -1); 1122 Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One); 1123 Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero); 1124 return RValue::get(Result); 1125 } 1126 1127 case Builtin::BI__builtin_isnormal: { 1128 // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min 1129 Value *V = EmitScalarExpr(E->getArg(0)); 1130 Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq"); 1131 1132 Value *Abs = EmitFAbs(*this, V); 1133 Value *IsLessThanInf = 1134 Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf"); 1135 APFloat Smallest = APFloat::getSmallestNormalized( 1136 getContext().getFloatTypeSemantics(E->getArg(0)->getType())); 1137 Value *IsNormal = 1138 Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest), 1139 "isnormal"); 1140 V = Builder.CreateAnd(Eq, IsLessThanInf, "and"); 1141 V = Builder.CreateAnd(V, IsNormal, "and"); 1142 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 1143 } 1144 1145 case Builtin::BI__builtin_fpclassify: { 1146 Value *V = EmitScalarExpr(E->getArg(5)); 1147 llvm::Type *Ty = ConvertType(E->getArg(5)->getType()); 1148 1149 // Create Result 1150 BasicBlock *Begin = Builder.GetInsertBlock(); 1151 BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn); 1152 Builder.SetInsertPoint(End); 1153 PHINode *Result = 1154 Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4, 1155 "fpclassify_result"); 1156 1157 // if (V==0) return FP_ZERO 1158 Builder.SetInsertPoint(Begin); 1159 Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty), 1160 "iszero"); 1161 Value *ZeroLiteral = EmitScalarExpr(E->getArg(4)); 1162 BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn); 1163 Builder.CreateCondBr(IsZero, End, NotZero); 1164 Result->addIncoming(ZeroLiteral, Begin); 1165 1166 // if (V != V) return FP_NAN 1167 Builder.SetInsertPoint(NotZero); 1168 Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp"); 1169 Value *NanLiteral = EmitScalarExpr(E->getArg(0)); 1170 BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn); 1171 Builder.CreateCondBr(IsNan, End, NotNan); 1172 Result->addIncoming(NanLiteral, NotZero); 1173 1174 // if (fabs(V) == infinity) return FP_INFINITY 1175 Builder.SetInsertPoint(NotNan); 1176 Value *VAbs = EmitFAbs(*this, V); 1177 Value *IsInf = 1178 Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()), 1179 "isinf"); 1180 Value *InfLiteral = EmitScalarExpr(E->getArg(1)); 1181 BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn); 1182 Builder.CreateCondBr(IsInf, End, NotInf); 1183 Result->addIncoming(InfLiteral, NotNan); 1184 1185 // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL 1186 Builder.SetInsertPoint(NotInf); 1187 APFloat Smallest = APFloat::getSmallestNormalized( 1188 getContext().getFloatTypeSemantics(E->getArg(5)->getType())); 1189 Value *IsNormal = 1190 Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest), 1191 "isnormal"); 1192 Value *NormalResult = 1193 Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)), 1194 EmitScalarExpr(E->getArg(3))); 1195 Builder.CreateBr(End); 1196 Result->addIncoming(NormalResult, NotInf); 1197 1198 // return Result 1199 Builder.SetInsertPoint(End); 1200 return RValue::get(Result); 1201 } 1202 1203 case Builtin::BIalloca: 1204 case Builtin::BI_alloca: 1205 case Builtin::BI__builtin_alloca: { 1206 Value *Size = EmitScalarExpr(E->getArg(0)); 1207 const TargetInfo &TI = getContext().getTargetInfo(); 1208 // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__. 1209 unsigned SuitableAlignmentInBytes = 1210 CGM.getContext() 1211 .toCharUnitsFromBits(TI.getSuitableAlign()) 1212 .getQuantity(); 1213 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size); 1214 AI->setAlignment(SuitableAlignmentInBytes); 1215 return RValue::get(AI); 1216 } 1217 1218 case Builtin::BI__builtin_alloca_with_align: { 1219 Value *Size = EmitScalarExpr(E->getArg(0)); 1220 Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1)); 1221 auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue); 1222 unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue(); 1223 unsigned AlignmentInBytes = 1224 CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getQuantity(); 1225 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size); 1226 AI->setAlignment(AlignmentInBytes); 1227 return RValue::get(AI); 1228 } 1229 1230 case Builtin::BIbzero: 1231 case Builtin::BI__builtin_bzero: { 1232 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1233 Value *SizeVal = EmitScalarExpr(E->getArg(1)); 1234 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 1235 E->getArg(0)->getExprLoc(), FD, 0); 1236 Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false); 1237 return RValue::get(Dest.getPointer()); 1238 } 1239 case Builtin::BImemcpy: 1240 case Builtin::BI__builtin_memcpy: { 1241 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1242 Address Src = EmitPointerWithAlignment(E->getArg(1)); 1243 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 1244 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 1245 E->getArg(0)->getExprLoc(), FD, 0); 1246 EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(), 1247 E->getArg(1)->getExprLoc(), FD, 1); 1248 Builder.CreateMemCpy(Dest, Src, SizeVal, false); 1249 return RValue::get(Dest.getPointer()); 1250 } 1251 1252 case Builtin::BI__builtin_char_memchr: 1253 BuiltinID = Builtin::BI__builtin_memchr; 1254 break; 1255 1256 case Builtin::BI__builtin___memcpy_chk: { 1257 // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2. 1258 llvm::APSInt Size, DstSize; 1259 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 1260 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 1261 break; 1262 if (Size.ugt(DstSize)) 1263 break; 1264 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1265 Address Src = EmitPointerWithAlignment(E->getArg(1)); 1266 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 1267 Builder.CreateMemCpy(Dest, Src, SizeVal, false); 1268 return RValue::get(Dest.getPointer()); 1269 } 1270 1271 case Builtin::BI__builtin_objc_memmove_collectable: { 1272 Address DestAddr = EmitPointerWithAlignment(E->getArg(0)); 1273 Address SrcAddr = EmitPointerWithAlignment(E->getArg(1)); 1274 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 1275 CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this, 1276 DestAddr, SrcAddr, SizeVal); 1277 return RValue::get(DestAddr.getPointer()); 1278 } 1279 1280 case Builtin::BI__builtin___memmove_chk: { 1281 // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2. 1282 llvm::APSInt Size, DstSize; 1283 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 1284 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 1285 break; 1286 if (Size.ugt(DstSize)) 1287 break; 1288 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1289 Address Src = EmitPointerWithAlignment(E->getArg(1)); 1290 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 1291 Builder.CreateMemMove(Dest, Src, SizeVal, false); 1292 return RValue::get(Dest.getPointer()); 1293 } 1294 1295 case Builtin::BImemmove: 1296 case Builtin::BI__builtin_memmove: { 1297 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1298 Address Src = EmitPointerWithAlignment(E->getArg(1)); 1299 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 1300 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 1301 E->getArg(0)->getExprLoc(), FD, 0); 1302 EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(), 1303 E->getArg(1)->getExprLoc(), FD, 1); 1304 Builder.CreateMemMove(Dest, Src, SizeVal, false); 1305 return RValue::get(Dest.getPointer()); 1306 } 1307 case Builtin::BImemset: 1308 case Builtin::BI__builtin_memset: { 1309 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1310 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), 1311 Builder.getInt8Ty()); 1312 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 1313 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 1314 E->getArg(0)->getExprLoc(), FD, 0); 1315 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false); 1316 return RValue::get(Dest.getPointer()); 1317 } 1318 case Builtin::BI__builtin___memset_chk: { 1319 // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2. 1320 llvm::APSInt Size, DstSize; 1321 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 1322 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 1323 break; 1324 if (Size.ugt(DstSize)) 1325 break; 1326 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1327 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), 1328 Builder.getInt8Ty()); 1329 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 1330 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false); 1331 return RValue::get(Dest.getPointer()); 1332 } 1333 case Builtin::BI__builtin_dwarf_cfa: { 1334 // The offset in bytes from the first argument to the CFA. 1335 // 1336 // Why on earth is this in the frontend? Is there any reason at 1337 // all that the backend can't reasonably determine this while 1338 // lowering llvm.eh.dwarf.cfa()? 1339 // 1340 // TODO: If there's a satisfactory reason, add a target hook for 1341 // this instead of hard-coding 0, which is correct for most targets. 1342 int32_t Offset = 0; 1343 1344 Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa); 1345 return RValue::get(Builder.CreateCall(F, 1346 llvm::ConstantInt::get(Int32Ty, Offset))); 1347 } 1348 case Builtin::BI__builtin_return_address: { 1349 Value *Depth = 1350 CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this); 1351 Value *F = CGM.getIntrinsic(Intrinsic::returnaddress); 1352 return RValue::get(Builder.CreateCall(F, Depth)); 1353 } 1354 case Builtin::BI_ReturnAddress: { 1355 Value *F = CGM.getIntrinsic(Intrinsic::returnaddress); 1356 return RValue::get(Builder.CreateCall(F, Builder.getInt32(0))); 1357 } 1358 case Builtin::BI__builtin_frame_address: { 1359 Value *Depth = 1360 CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this); 1361 Value *F = CGM.getIntrinsic(Intrinsic::frameaddress); 1362 return RValue::get(Builder.CreateCall(F, Depth)); 1363 } 1364 case Builtin::BI__builtin_extract_return_addr: { 1365 Value *Address = EmitScalarExpr(E->getArg(0)); 1366 Value *Result = getTargetHooks().decodeReturnAddress(*this, Address); 1367 return RValue::get(Result); 1368 } 1369 case Builtin::BI__builtin_frob_return_addr: { 1370 Value *Address = EmitScalarExpr(E->getArg(0)); 1371 Value *Result = getTargetHooks().encodeReturnAddress(*this, Address); 1372 return RValue::get(Result); 1373 } 1374 case Builtin::BI__builtin_dwarf_sp_column: { 1375 llvm::IntegerType *Ty 1376 = cast<llvm::IntegerType>(ConvertType(E->getType())); 1377 int Column = getTargetHooks().getDwarfEHStackPointer(CGM); 1378 if (Column == -1) { 1379 CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column"); 1380 return RValue::get(llvm::UndefValue::get(Ty)); 1381 } 1382 return RValue::get(llvm::ConstantInt::get(Ty, Column, true)); 1383 } 1384 case Builtin::BI__builtin_init_dwarf_reg_size_table: { 1385 Value *Address = EmitScalarExpr(E->getArg(0)); 1386 if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address)) 1387 CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table"); 1388 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType()))); 1389 } 1390 case Builtin::BI__builtin_eh_return: { 1391 Value *Int = EmitScalarExpr(E->getArg(0)); 1392 Value *Ptr = EmitScalarExpr(E->getArg(1)); 1393 1394 llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType()); 1395 assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) && 1396 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants"); 1397 Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32 1398 ? Intrinsic::eh_return_i32 1399 : Intrinsic::eh_return_i64); 1400 Builder.CreateCall(F, {Int, Ptr}); 1401 Builder.CreateUnreachable(); 1402 1403 // We do need to preserve an insertion point. 1404 EmitBlock(createBasicBlock("builtin_eh_return.cont")); 1405 1406 return RValue::get(nullptr); 1407 } 1408 case Builtin::BI__builtin_unwind_init: { 1409 Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init); 1410 return RValue::get(Builder.CreateCall(F)); 1411 } 1412 case Builtin::BI__builtin_extend_pointer: { 1413 // Extends a pointer to the size of an _Unwind_Word, which is 1414 // uint64_t on all platforms. Generally this gets poked into a 1415 // register and eventually used as an address, so if the 1416 // addressing registers are wider than pointers and the platform 1417 // doesn't implicitly ignore high-order bits when doing 1418 // addressing, we need to make sure we zext / sext based on 1419 // the platform's expectations. 1420 // 1421 // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html 1422 1423 // Cast the pointer to intptr_t. 1424 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1425 Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast"); 1426 1427 // If that's 64 bits, we're done. 1428 if (IntPtrTy->getBitWidth() == 64) 1429 return RValue::get(Result); 1430 1431 // Otherwise, ask the codegen data what to do. 1432 if (getTargetHooks().extendPointerWithSExt()) 1433 return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext")); 1434 else 1435 return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext")); 1436 } 1437 case Builtin::BI__builtin_setjmp: { 1438 // Buffer is a void**. 1439 Address Buf = EmitPointerWithAlignment(E->getArg(0)); 1440 1441 // Store the frame pointer to the setjmp buffer. 1442 Value *FrameAddr = 1443 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 1444 ConstantInt::get(Int32Ty, 0)); 1445 Builder.CreateStore(FrameAddr, Buf); 1446 1447 // Store the stack pointer to the setjmp buffer. 1448 Value *StackAddr = 1449 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave)); 1450 Address StackSaveSlot = 1451 Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize()); 1452 Builder.CreateStore(StackAddr, StackSaveSlot); 1453 1454 // Call LLVM's EH setjmp, which is lightweight. 1455 Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp); 1456 Buf = Builder.CreateBitCast(Buf, Int8PtrTy); 1457 return RValue::get(Builder.CreateCall(F, Buf.getPointer())); 1458 } 1459 case Builtin::BI__builtin_longjmp: { 1460 Value *Buf = EmitScalarExpr(E->getArg(0)); 1461 Buf = Builder.CreateBitCast(Buf, Int8PtrTy); 1462 1463 // Call LLVM's EH longjmp, which is lightweight. 1464 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf); 1465 1466 // longjmp doesn't return; mark this as unreachable. 1467 Builder.CreateUnreachable(); 1468 1469 // We do need to preserve an insertion point. 1470 EmitBlock(createBasicBlock("longjmp.cont")); 1471 1472 return RValue::get(nullptr); 1473 } 1474 case Builtin::BI__sync_fetch_and_add: 1475 case Builtin::BI__sync_fetch_and_sub: 1476 case Builtin::BI__sync_fetch_and_or: 1477 case Builtin::BI__sync_fetch_and_and: 1478 case Builtin::BI__sync_fetch_and_xor: 1479 case Builtin::BI__sync_fetch_and_nand: 1480 case Builtin::BI__sync_add_and_fetch: 1481 case Builtin::BI__sync_sub_and_fetch: 1482 case Builtin::BI__sync_and_and_fetch: 1483 case Builtin::BI__sync_or_and_fetch: 1484 case Builtin::BI__sync_xor_and_fetch: 1485 case Builtin::BI__sync_nand_and_fetch: 1486 case Builtin::BI__sync_val_compare_and_swap: 1487 case Builtin::BI__sync_bool_compare_and_swap: 1488 case Builtin::BI__sync_lock_test_and_set: 1489 case Builtin::BI__sync_lock_release: 1490 case Builtin::BI__sync_swap: 1491 llvm_unreachable("Shouldn't make it through sema"); 1492 case Builtin::BI__sync_fetch_and_add_1: 1493 case Builtin::BI__sync_fetch_and_add_2: 1494 case Builtin::BI__sync_fetch_and_add_4: 1495 case Builtin::BI__sync_fetch_and_add_8: 1496 case Builtin::BI__sync_fetch_and_add_16: 1497 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E); 1498 case Builtin::BI__sync_fetch_and_sub_1: 1499 case Builtin::BI__sync_fetch_and_sub_2: 1500 case Builtin::BI__sync_fetch_and_sub_4: 1501 case Builtin::BI__sync_fetch_and_sub_8: 1502 case Builtin::BI__sync_fetch_and_sub_16: 1503 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E); 1504 case Builtin::BI__sync_fetch_and_or_1: 1505 case Builtin::BI__sync_fetch_and_or_2: 1506 case Builtin::BI__sync_fetch_and_or_4: 1507 case Builtin::BI__sync_fetch_and_or_8: 1508 case Builtin::BI__sync_fetch_and_or_16: 1509 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E); 1510 case Builtin::BI__sync_fetch_and_and_1: 1511 case Builtin::BI__sync_fetch_and_and_2: 1512 case Builtin::BI__sync_fetch_and_and_4: 1513 case Builtin::BI__sync_fetch_and_and_8: 1514 case Builtin::BI__sync_fetch_and_and_16: 1515 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E); 1516 case Builtin::BI__sync_fetch_and_xor_1: 1517 case Builtin::BI__sync_fetch_and_xor_2: 1518 case Builtin::BI__sync_fetch_and_xor_4: 1519 case Builtin::BI__sync_fetch_and_xor_8: 1520 case Builtin::BI__sync_fetch_and_xor_16: 1521 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E); 1522 case Builtin::BI__sync_fetch_and_nand_1: 1523 case Builtin::BI__sync_fetch_and_nand_2: 1524 case Builtin::BI__sync_fetch_and_nand_4: 1525 case Builtin::BI__sync_fetch_and_nand_8: 1526 case Builtin::BI__sync_fetch_and_nand_16: 1527 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E); 1528 1529 // Clang extensions: not overloaded yet. 1530 case Builtin::BI__sync_fetch_and_min: 1531 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E); 1532 case Builtin::BI__sync_fetch_and_max: 1533 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E); 1534 case Builtin::BI__sync_fetch_and_umin: 1535 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E); 1536 case Builtin::BI__sync_fetch_and_umax: 1537 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E); 1538 1539 case Builtin::BI__sync_add_and_fetch_1: 1540 case Builtin::BI__sync_add_and_fetch_2: 1541 case Builtin::BI__sync_add_and_fetch_4: 1542 case Builtin::BI__sync_add_and_fetch_8: 1543 case Builtin::BI__sync_add_and_fetch_16: 1544 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E, 1545 llvm::Instruction::Add); 1546 case Builtin::BI__sync_sub_and_fetch_1: 1547 case Builtin::BI__sync_sub_and_fetch_2: 1548 case Builtin::BI__sync_sub_and_fetch_4: 1549 case Builtin::BI__sync_sub_and_fetch_8: 1550 case Builtin::BI__sync_sub_and_fetch_16: 1551 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E, 1552 llvm::Instruction::Sub); 1553 case Builtin::BI__sync_and_and_fetch_1: 1554 case Builtin::BI__sync_and_and_fetch_2: 1555 case Builtin::BI__sync_and_and_fetch_4: 1556 case Builtin::BI__sync_and_and_fetch_8: 1557 case Builtin::BI__sync_and_and_fetch_16: 1558 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E, 1559 llvm::Instruction::And); 1560 case Builtin::BI__sync_or_and_fetch_1: 1561 case Builtin::BI__sync_or_and_fetch_2: 1562 case Builtin::BI__sync_or_and_fetch_4: 1563 case Builtin::BI__sync_or_and_fetch_8: 1564 case Builtin::BI__sync_or_and_fetch_16: 1565 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E, 1566 llvm::Instruction::Or); 1567 case Builtin::BI__sync_xor_and_fetch_1: 1568 case Builtin::BI__sync_xor_and_fetch_2: 1569 case Builtin::BI__sync_xor_and_fetch_4: 1570 case Builtin::BI__sync_xor_and_fetch_8: 1571 case Builtin::BI__sync_xor_and_fetch_16: 1572 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E, 1573 llvm::Instruction::Xor); 1574 case Builtin::BI__sync_nand_and_fetch_1: 1575 case Builtin::BI__sync_nand_and_fetch_2: 1576 case Builtin::BI__sync_nand_and_fetch_4: 1577 case Builtin::BI__sync_nand_and_fetch_8: 1578 case Builtin::BI__sync_nand_and_fetch_16: 1579 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E, 1580 llvm::Instruction::And, true); 1581 1582 case Builtin::BI__sync_val_compare_and_swap_1: 1583 case Builtin::BI__sync_val_compare_and_swap_2: 1584 case Builtin::BI__sync_val_compare_and_swap_4: 1585 case Builtin::BI__sync_val_compare_and_swap_8: 1586 case Builtin::BI__sync_val_compare_and_swap_16: 1587 return RValue::get(MakeAtomicCmpXchgValue(*this, E, false)); 1588 1589 case Builtin::BI__sync_bool_compare_and_swap_1: 1590 case Builtin::BI__sync_bool_compare_and_swap_2: 1591 case Builtin::BI__sync_bool_compare_and_swap_4: 1592 case Builtin::BI__sync_bool_compare_and_swap_8: 1593 case Builtin::BI__sync_bool_compare_and_swap_16: 1594 return RValue::get(MakeAtomicCmpXchgValue(*this, E, true)); 1595 1596 case Builtin::BI__sync_swap_1: 1597 case Builtin::BI__sync_swap_2: 1598 case Builtin::BI__sync_swap_4: 1599 case Builtin::BI__sync_swap_8: 1600 case Builtin::BI__sync_swap_16: 1601 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 1602 1603 case Builtin::BI__sync_lock_test_and_set_1: 1604 case Builtin::BI__sync_lock_test_and_set_2: 1605 case Builtin::BI__sync_lock_test_and_set_4: 1606 case Builtin::BI__sync_lock_test_and_set_8: 1607 case Builtin::BI__sync_lock_test_and_set_16: 1608 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 1609 1610 case Builtin::BI__sync_lock_release_1: 1611 case Builtin::BI__sync_lock_release_2: 1612 case Builtin::BI__sync_lock_release_4: 1613 case Builtin::BI__sync_lock_release_8: 1614 case Builtin::BI__sync_lock_release_16: { 1615 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1616 QualType ElTy = E->getArg(0)->getType()->getPointeeType(); 1617 CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy); 1618 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), 1619 StoreSize.getQuantity() * 8); 1620 Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); 1621 llvm::StoreInst *Store = 1622 Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr, 1623 StoreSize); 1624 Store->setAtomic(llvm::AtomicOrdering::Release); 1625 return RValue::get(nullptr); 1626 } 1627 1628 case Builtin::BI__sync_synchronize: { 1629 // We assume this is supposed to correspond to a C++0x-style 1630 // sequentially-consistent fence (i.e. this is only usable for 1631 // synchonization, not device I/O or anything like that). This intrinsic 1632 // is really badly designed in the sense that in theory, there isn't 1633 // any way to safely use it... but in practice, it mostly works 1634 // to use it with non-atomic loads and stores to get acquire/release 1635 // semantics. 1636 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent); 1637 return RValue::get(nullptr); 1638 } 1639 1640 case Builtin::BI__builtin_nontemporal_load: 1641 return RValue::get(EmitNontemporalLoad(*this, E)); 1642 case Builtin::BI__builtin_nontemporal_store: 1643 return RValue::get(EmitNontemporalStore(*this, E)); 1644 case Builtin::BI__c11_atomic_is_lock_free: 1645 case Builtin::BI__atomic_is_lock_free: { 1646 // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the 1647 // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since 1648 // _Atomic(T) is always properly-aligned. 1649 const char *LibCallName = "__atomic_is_lock_free"; 1650 CallArgList Args; 1651 Args.add(RValue::get(EmitScalarExpr(E->getArg(0))), 1652 getContext().getSizeType()); 1653 if (BuiltinID == Builtin::BI__atomic_is_lock_free) 1654 Args.add(RValue::get(EmitScalarExpr(E->getArg(1))), 1655 getContext().VoidPtrTy); 1656 else 1657 Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)), 1658 getContext().VoidPtrTy); 1659 const CGFunctionInfo &FuncInfo = 1660 CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args); 1661 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo); 1662 llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName); 1663 return EmitCall(FuncInfo, CGCallee::forDirect(Func), 1664 ReturnValueSlot(), Args); 1665 } 1666 1667 case Builtin::BI__atomic_test_and_set: { 1668 // Look at the argument type to determine whether this is a volatile 1669 // operation. The parameter type is always volatile. 1670 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); 1671 bool Volatile = 1672 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); 1673 1674 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1675 unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace(); 1676 Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace)); 1677 Value *NewVal = Builder.getInt8(1); 1678 Value *Order = EmitScalarExpr(E->getArg(1)); 1679 if (isa<llvm::ConstantInt>(Order)) { 1680 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 1681 AtomicRMWInst *Result = nullptr; 1682 switch (ord) { 1683 case 0: // memory_order_relaxed 1684 default: // invalid order 1685 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 1686 llvm::AtomicOrdering::Monotonic); 1687 break; 1688 case 1: // memory_order_consume 1689 case 2: // memory_order_acquire 1690 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 1691 llvm::AtomicOrdering::Acquire); 1692 break; 1693 case 3: // memory_order_release 1694 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 1695 llvm::AtomicOrdering::Release); 1696 break; 1697 case 4: // memory_order_acq_rel 1698 1699 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 1700 llvm::AtomicOrdering::AcquireRelease); 1701 break; 1702 case 5: // memory_order_seq_cst 1703 Result = Builder.CreateAtomicRMW( 1704 llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 1705 llvm::AtomicOrdering::SequentiallyConsistent); 1706 break; 1707 } 1708 Result->setVolatile(Volatile); 1709 return RValue::get(Builder.CreateIsNotNull(Result, "tobool")); 1710 } 1711 1712 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 1713 1714 llvm::BasicBlock *BBs[5] = { 1715 createBasicBlock("monotonic", CurFn), 1716 createBasicBlock("acquire", CurFn), 1717 createBasicBlock("release", CurFn), 1718 createBasicBlock("acqrel", CurFn), 1719 createBasicBlock("seqcst", CurFn) 1720 }; 1721 llvm::AtomicOrdering Orders[5] = { 1722 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire, 1723 llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease, 1724 llvm::AtomicOrdering::SequentiallyConsistent}; 1725 1726 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 1727 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); 1728 1729 Builder.SetInsertPoint(ContBB); 1730 PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set"); 1731 1732 for (unsigned i = 0; i < 5; ++i) { 1733 Builder.SetInsertPoint(BBs[i]); 1734 AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 1735 Ptr, NewVal, Orders[i]); 1736 RMW->setVolatile(Volatile); 1737 Result->addIncoming(RMW, BBs[i]); 1738 Builder.CreateBr(ContBB); 1739 } 1740 1741 SI->addCase(Builder.getInt32(0), BBs[0]); 1742 SI->addCase(Builder.getInt32(1), BBs[1]); 1743 SI->addCase(Builder.getInt32(2), BBs[1]); 1744 SI->addCase(Builder.getInt32(3), BBs[2]); 1745 SI->addCase(Builder.getInt32(4), BBs[3]); 1746 SI->addCase(Builder.getInt32(5), BBs[4]); 1747 1748 Builder.SetInsertPoint(ContBB); 1749 return RValue::get(Builder.CreateIsNotNull(Result, "tobool")); 1750 } 1751 1752 case Builtin::BI__atomic_clear: { 1753 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); 1754 bool Volatile = 1755 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); 1756 1757 Address Ptr = EmitPointerWithAlignment(E->getArg(0)); 1758 unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace(); 1759 Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace)); 1760 Value *NewVal = Builder.getInt8(0); 1761 Value *Order = EmitScalarExpr(E->getArg(1)); 1762 if (isa<llvm::ConstantInt>(Order)) { 1763 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 1764 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); 1765 switch (ord) { 1766 case 0: // memory_order_relaxed 1767 default: // invalid order 1768 Store->setOrdering(llvm::AtomicOrdering::Monotonic); 1769 break; 1770 case 3: // memory_order_release 1771 Store->setOrdering(llvm::AtomicOrdering::Release); 1772 break; 1773 case 5: // memory_order_seq_cst 1774 Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent); 1775 break; 1776 } 1777 return RValue::get(nullptr); 1778 } 1779 1780 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 1781 1782 llvm::BasicBlock *BBs[3] = { 1783 createBasicBlock("monotonic", CurFn), 1784 createBasicBlock("release", CurFn), 1785 createBasicBlock("seqcst", CurFn) 1786 }; 1787 llvm::AtomicOrdering Orders[3] = { 1788 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release, 1789 llvm::AtomicOrdering::SequentiallyConsistent}; 1790 1791 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 1792 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); 1793 1794 for (unsigned i = 0; i < 3; ++i) { 1795 Builder.SetInsertPoint(BBs[i]); 1796 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); 1797 Store->setOrdering(Orders[i]); 1798 Builder.CreateBr(ContBB); 1799 } 1800 1801 SI->addCase(Builder.getInt32(0), BBs[0]); 1802 SI->addCase(Builder.getInt32(3), BBs[1]); 1803 SI->addCase(Builder.getInt32(5), BBs[2]); 1804 1805 Builder.SetInsertPoint(ContBB); 1806 return RValue::get(nullptr); 1807 } 1808 1809 case Builtin::BI__atomic_thread_fence: 1810 case Builtin::BI__atomic_signal_fence: 1811 case Builtin::BI__c11_atomic_thread_fence: 1812 case Builtin::BI__c11_atomic_signal_fence: { 1813 llvm::SynchronizationScope Scope; 1814 if (BuiltinID == Builtin::BI__atomic_signal_fence || 1815 BuiltinID == Builtin::BI__c11_atomic_signal_fence) 1816 Scope = llvm::SingleThread; 1817 else 1818 Scope = llvm::CrossThread; 1819 Value *Order = EmitScalarExpr(E->getArg(0)); 1820 if (isa<llvm::ConstantInt>(Order)) { 1821 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 1822 switch (ord) { 1823 case 0: // memory_order_relaxed 1824 default: // invalid order 1825 break; 1826 case 1: // memory_order_consume 1827 case 2: // memory_order_acquire 1828 Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope); 1829 break; 1830 case 3: // memory_order_release 1831 Builder.CreateFence(llvm::AtomicOrdering::Release, Scope); 1832 break; 1833 case 4: // memory_order_acq_rel 1834 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope); 1835 break; 1836 case 5: // memory_order_seq_cst 1837 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, 1838 Scope); 1839 break; 1840 } 1841 return RValue::get(nullptr); 1842 } 1843 1844 llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB; 1845 AcquireBB = createBasicBlock("acquire", CurFn); 1846 ReleaseBB = createBasicBlock("release", CurFn); 1847 AcqRelBB = createBasicBlock("acqrel", CurFn); 1848 SeqCstBB = createBasicBlock("seqcst", CurFn); 1849 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 1850 1851 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 1852 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB); 1853 1854 Builder.SetInsertPoint(AcquireBB); 1855 Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope); 1856 Builder.CreateBr(ContBB); 1857 SI->addCase(Builder.getInt32(1), AcquireBB); 1858 SI->addCase(Builder.getInt32(2), AcquireBB); 1859 1860 Builder.SetInsertPoint(ReleaseBB); 1861 Builder.CreateFence(llvm::AtomicOrdering::Release, Scope); 1862 Builder.CreateBr(ContBB); 1863 SI->addCase(Builder.getInt32(3), ReleaseBB); 1864 1865 Builder.SetInsertPoint(AcqRelBB); 1866 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope); 1867 Builder.CreateBr(ContBB); 1868 SI->addCase(Builder.getInt32(4), AcqRelBB); 1869 1870 Builder.SetInsertPoint(SeqCstBB); 1871 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, Scope); 1872 Builder.CreateBr(ContBB); 1873 SI->addCase(Builder.getInt32(5), SeqCstBB); 1874 1875 Builder.SetInsertPoint(ContBB); 1876 return RValue::get(nullptr); 1877 } 1878 1879 // Library functions with special handling. 1880 case Builtin::BIsqrt: 1881 case Builtin::BIsqrtf: 1882 case Builtin::BIsqrtl: { 1883 // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only 1884 // in finite- or unsafe-math mode (the intrinsic has different semantics 1885 // for handling negative numbers compared to the library function, so 1886 // -fmath-errno=0 is not enough). 1887 if (!FD->hasAttr<ConstAttr>()) 1888 break; 1889 if (!(CGM.getCodeGenOpts().UnsafeFPMath || 1890 CGM.getCodeGenOpts().NoNaNsFPMath)) 1891 break; 1892 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 1893 llvm::Type *ArgType = Arg0->getType(); 1894 Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType); 1895 return RValue::get(Builder.CreateCall(F, Arg0)); 1896 } 1897 1898 case Builtin::BI__builtin_pow: 1899 case Builtin::BI__builtin_powf: 1900 case Builtin::BI__builtin_powl: 1901 case Builtin::BIpow: 1902 case Builtin::BIpowf: 1903 case Builtin::BIpowl: { 1904 // Transform a call to pow* into a @llvm.pow.* intrinsic call. 1905 if (!FD->hasAttr<ConstAttr>()) 1906 break; 1907 Value *Base = EmitScalarExpr(E->getArg(0)); 1908 Value *Exponent = EmitScalarExpr(E->getArg(1)); 1909 llvm::Type *ArgType = Base->getType(); 1910 Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType); 1911 return RValue::get(Builder.CreateCall(F, {Base, Exponent})); 1912 } 1913 1914 case Builtin::BIfma: 1915 case Builtin::BIfmaf: 1916 case Builtin::BIfmal: 1917 case Builtin::BI__builtin_fma: 1918 case Builtin::BI__builtin_fmaf: 1919 case Builtin::BI__builtin_fmal: { 1920 // Rewrite fma to intrinsic. 1921 Value *FirstArg = EmitScalarExpr(E->getArg(0)); 1922 llvm::Type *ArgType = FirstArg->getType(); 1923 Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType); 1924 return RValue::get( 1925 Builder.CreateCall(F, {FirstArg, EmitScalarExpr(E->getArg(1)), 1926 EmitScalarExpr(E->getArg(2))})); 1927 } 1928 1929 case Builtin::BI__builtin_signbit: 1930 case Builtin::BI__builtin_signbitf: 1931 case Builtin::BI__builtin_signbitl: { 1932 return RValue::get( 1933 Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))), 1934 ConvertType(E->getType()))); 1935 } 1936 case Builtin::BI__builtin_annotation: { 1937 llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0)); 1938 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation, 1939 AnnVal->getType()); 1940 1941 // Get the annotation string, go through casts. Sema requires this to be a 1942 // non-wide string literal, potentially casted, so the cast<> is safe. 1943 const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts(); 1944 StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString(); 1945 return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc())); 1946 } 1947 case Builtin::BI__builtin_addcb: 1948 case Builtin::BI__builtin_addcs: 1949 case Builtin::BI__builtin_addc: 1950 case Builtin::BI__builtin_addcl: 1951 case Builtin::BI__builtin_addcll: 1952 case Builtin::BI__builtin_subcb: 1953 case Builtin::BI__builtin_subcs: 1954 case Builtin::BI__builtin_subc: 1955 case Builtin::BI__builtin_subcl: 1956 case Builtin::BI__builtin_subcll: { 1957 1958 // We translate all of these builtins from expressions of the form: 1959 // int x = ..., y = ..., carryin = ..., carryout, result; 1960 // result = __builtin_addc(x, y, carryin, &carryout); 1961 // 1962 // to LLVM IR of the form: 1963 // 1964 // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y) 1965 // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0 1966 // %carry1 = extractvalue {i32, i1} %tmp1, 1 1967 // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1, 1968 // i32 %carryin) 1969 // %result = extractvalue {i32, i1} %tmp2, 0 1970 // %carry2 = extractvalue {i32, i1} %tmp2, 1 1971 // %tmp3 = or i1 %carry1, %carry2 1972 // %tmp4 = zext i1 %tmp3 to i32 1973 // store i32 %tmp4, i32* %carryout 1974 1975 // Scalarize our inputs. 1976 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 1977 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 1978 llvm::Value *Carryin = EmitScalarExpr(E->getArg(2)); 1979 Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3)); 1980 1981 // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow. 1982 llvm::Intrinsic::ID IntrinsicId; 1983 switch (BuiltinID) { 1984 default: llvm_unreachable("Unknown multiprecision builtin id."); 1985 case Builtin::BI__builtin_addcb: 1986 case Builtin::BI__builtin_addcs: 1987 case Builtin::BI__builtin_addc: 1988 case Builtin::BI__builtin_addcl: 1989 case Builtin::BI__builtin_addcll: 1990 IntrinsicId = llvm::Intrinsic::uadd_with_overflow; 1991 break; 1992 case Builtin::BI__builtin_subcb: 1993 case Builtin::BI__builtin_subcs: 1994 case Builtin::BI__builtin_subc: 1995 case Builtin::BI__builtin_subcl: 1996 case Builtin::BI__builtin_subcll: 1997 IntrinsicId = llvm::Intrinsic::usub_with_overflow; 1998 break; 1999 } 2000 2001 // Construct our resulting LLVM IR expression. 2002 llvm::Value *Carry1; 2003 llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId, 2004 X, Y, Carry1); 2005 llvm::Value *Carry2; 2006 llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId, 2007 Sum1, Carryin, Carry2); 2008 llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2), 2009 X->getType()); 2010 Builder.CreateStore(CarryOut, CarryOutPtr); 2011 return RValue::get(Sum2); 2012 } 2013 2014 case Builtin::BI__builtin_add_overflow: 2015 case Builtin::BI__builtin_sub_overflow: 2016 case Builtin::BI__builtin_mul_overflow: { 2017 const clang::Expr *LeftArg = E->getArg(0); 2018 const clang::Expr *RightArg = E->getArg(1); 2019 const clang::Expr *ResultArg = E->getArg(2); 2020 2021 clang::QualType ResultQTy = 2022 ResultArg->getType()->castAs<PointerType>()->getPointeeType(); 2023 2024 WidthAndSignedness LeftInfo = 2025 getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType()); 2026 WidthAndSignedness RightInfo = 2027 getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType()); 2028 WidthAndSignedness ResultInfo = 2029 getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy); 2030 WidthAndSignedness EncompassingInfo = 2031 EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo}); 2032 2033 llvm::Type *EncompassingLLVMTy = 2034 llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width); 2035 2036 llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy); 2037 2038 llvm::Intrinsic::ID IntrinsicId; 2039 switch (BuiltinID) { 2040 default: 2041 llvm_unreachable("Unknown overflow builtin id."); 2042 case Builtin::BI__builtin_add_overflow: 2043 IntrinsicId = EncompassingInfo.Signed 2044 ? llvm::Intrinsic::sadd_with_overflow 2045 : llvm::Intrinsic::uadd_with_overflow; 2046 break; 2047 case Builtin::BI__builtin_sub_overflow: 2048 IntrinsicId = EncompassingInfo.Signed 2049 ? llvm::Intrinsic::ssub_with_overflow 2050 : llvm::Intrinsic::usub_with_overflow; 2051 break; 2052 case Builtin::BI__builtin_mul_overflow: 2053 IntrinsicId = EncompassingInfo.Signed 2054 ? llvm::Intrinsic::smul_with_overflow 2055 : llvm::Intrinsic::umul_with_overflow; 2056 break; 2057 } 2058 2059 llvm::Value *Left = EmitScalarExpr(LeftArg); 2060 llvm::Value *Right = EmitScalarExpr(RightArg); 2061 Address ResultPtr = EmitPointerWithAlignment(ResultArg); 2062 2063 // Extend each operand to the encompassing type. 2064 Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed); 2065 Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed); 2066 2067 // Perform the operation on the extended values. 2068 llvm::Value *Overflow, *Result; 2069 Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow); 2070 2071 if (EncompassingInfo.Width > ResultInfo.Width) { 2072 // The encompassing type is wider than the result type, so we need to 2073 // truncate it. 2074 llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy); 2075 2076 // To see if the truncation caused an overflow, we will extend 2077 // the result and then compare it to the original result. 2078 llvm::Value *ResultTruncExt = Builder.CreateIntCast( 2079 ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed); 2080 llvm::Value *TruncationOverflow = 2081 Builder.CreateICmpNE(Result, ResultTruncExt); 2082 2083 Overflow = Builder.CreateOr(Overflow, TruncationOverflow); 2084 Result = ResultTrunc; 2085 } 2086 2087 // Finally, store the result using the pointer. 2088 bool isVolatile = 2089 ResultArg->getType()->getPointeeType().isVolatileQualified(); 2090 Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile); 2091 2092 return RValue::get(Overflow); 2093 } 2094 2095 case Builtin::BI__builtin_uadd_overflow: 2096 case Builtin::BI__builtin_uaddl_overflow: 2097 case Builtin::BI__builtin_uaddll_overflow: 2098 case Builtin::BI__builtin_usub_overflow: 2099 case Builtin::BI__builtin_usubl_overflow: 2100 case Builtin::BI__builtin_usubll_overflow: 2101 case Builtin::BI__builtin_umul_overflow: 2102 case Builtin::BI__builtin_umull_overflow: 2103 case Builtin::BI__builtin_umulll_overflow: 2104 case Builtin::BI__builtin_sadd_overflow: 2105 case Builtin::BI__builtin_saddl_overflow: 2106 case Builtin::BI__builtin_saddll_overflow: 2107 case Builtin::BI__builtin_ssub_overflow: 2108 case Builtin::BI__builtin_ssubl_overflow: 2109 case Builtin::BI__builtin_ssubll_overflow: 2110 case Builtin::BI__builtin_smul_overflow: 2111 case Builtin::BI__builtin_smull_overflow: 2112 case Builtin::BI__builtin_smulll_overflow: { 2113 2114 // We translate all of these builtins directly to the relevant llvm IR node. 2115 2116 // Scalarize our inputs. 2117 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 2118 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 2119 Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2)); 2120 2121 // Decide which of the overflow intrinsics we are lowering to: 2122 llvm::Intrinsic::ID IntrinsicId; 2123 switch (BuiltinID) { 2124 default: llvm_unreachable("Unknown overflow builtin id."); 2125 case Builtin::BI__builtin_uadd_overflow: 2126 case Builtin::BI__builtin_uaddl_overflow: 2127 case Builtin::BI__builtin_uaddll_overflow: 2128 IntrinsicId = llvm::Intrinsic::uadd_with_overflow; 2129 break; 2130 case Builtin::BI__builtin_usub_overflow: 2131 case Builtin::BI__builtin_usubl_overflow: 2132 case Builtin::BI__builtin_usubll_overflow: 2133 IntrinsicId = llvm::Intrinsic::usub_with_overflow; 2134 break; 2135 case Builtin::BI__builtin_umul_overflow: 2136 case Builtin::BI__builtin_umull_overflow: 2137 case Builtin::BI__builtin_umulll_overflow: 2138 IntrinsicId = llvm::Intrinsic::umul_with_overflow; 2139 break; 2140 case Builtin::BI__builtin_sadd_overflow: 2141 case Builtin::BI__builtin_saddl_overflow: 2142 case Builtin::BI__builtin_saddll_overflow: 2143 IntrinsicId = llvm::Intrinsic::sadd_with_overflow; 2144 break; 2145 case Builtin::BI__builtin_ssub_overflow: 2146 case Builtin::BI__builtin_ssubl_overflow: 2147 case Builtin::BI__builtin_ssubll_overflow: 2148 IntrinsicId = llvm::Intrinsic::ssub_with_overflow; 2149 break; 2150 case Builtin::BI__builtin_smul_overflow: 2151 case Builtin::BI__builtin_smull_overflow: 2152 case Builtin::BI__builtin_smulll_overflow: 2153 IntrinsicId = llvm::Intrinsic::smul_with_overflow; 2154 break; 2155 } 2156 2157 2158 llvm::Value *Carry; 2159 llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry); 2160 Builder.CreateStore(Sum, SumOutPtr); 2161 2162 return RValue::get(Carry); 2163 } 2164 case Builtin::BI__builtin_addressof: 2165 return RValue::get(EmitLValue(E->getArg(0)).getPointer()); 2166 case Builtin::BI__builtin_operator_new: 2167 return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(), 2168 E->getArg(0), false); 2169 case Builtin::BI__builtin_operator_delete: 2170 return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(), 2171 E->getArg(0), true); 2172 case Builtin::BI__noop: 2173 // __noop always evaluates to an integer literal zero. 2174 return RValue::get(ConstantInt::get(IntTy, 0)); 2175 case Builtin::BI__builtin_call_with_static_chain: { 2176 const CallExpr *Call = cast<CallExpr>(E->getArg(0)); 2177 const Expr *Chain = E->getArg(1); 2178 return EmitCall(Call->getCallee()->getType(), 2179 EmitCallee(Call->getCallee()), Call, ReturnValue, 2180 EmitScalarExpr(Chain)); 2181 } 2182 case Builtin::BI_InterlockedExchange8: 2183 case Builtin::BI_InterlockedExchange16: 2184 case Builtin::BI_InterlockedExchange: 2185 case Builtin::BI_InterlockedExchangePointer: 2186 return RValue::get( 2187 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E)); 2188 case Builtin::BI_InterlockedCompareExchangePointer: { 2189 llvm::Type *RTy; 2190 llvm::IntegerType *IntType = 2191 IntegerType::get(getLLVMContext(), 2192 getContext().getTypeSize(E->getType())); 2193 llvm::Type *IntPtrType = IntType->getPointerTo(); 2194 2195 llvm::Value *Destination = 2196 Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType); 2197 2198 llvm::Value *Exchange = EmitScalarExpr(E->getArg(1)); 2199 RTy = Exchange->getType(); 2200 Exchange = Builder.CreatePtrToInt(Exchange, IntType); 2201 2202 llvm::Value *Comparand = 2203 Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType); 2204 2205 auto Result = 2206 Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange, 2207 AtomicOrdering::SequentiallyConsistent, 2208 AtomicOrdering::SequentiallyConsistent); 2209 Result->setVolatile(true); 2210 2211 return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result, 2212 0), 2213 RTy)); 2214 } 2215 case Builtin::BI_InterlockedCompareExchange8: 2216 case Builtin::BI_InterlockedCompareExchange16: 2217 case Builtin::BI_InterlockedCompareExchange: 2218 case Builtin::BI_InterlockedCompareExchange64: { 2219 AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg( 2220 EmitScalarExpr(E->getArg(0)), 2221 EmitScalarExpr(E->getArg(2)), 2222 EmitScalarExpr(E->getArg(1)), 2223 AtomicOrdering::SequentiallyConsistent, 2224 AtomicOrdering::SequentiallyConsistent); 2225 CXI->setVolatile(true); 2226 return RValue::get(Builder.CreateExtractValue(CXI, 0)); 2227 } 2228 case Builtin::BI_InterlockedIncrement16: 2229 case Builtin::BI_InterlockedIncrement: 2230 return RValue::get( 2231 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E)); 2232 case Builtin::BI_InterlockedDecrement16: 2233 case Builtin::BI_InterlockedDecrement: 2234 return RValue::get( 2235 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E)); 2236 case Builtin::BI_InterlockedAnd8: 2237 case Builtin::BI_InterlockedAnd16: 2238 case Builtin::BI_InterlockedAnd: 2239 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E)); 2240 case Builtin::BI_InterlockedExchangeAdd8: 2241 case Builtin::BI_InterlockedExchangeAdd16: 2242 case Builtin::BI_InterlockedExchangeAdd: 2243 return RValue::get( 2244 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E)); 2245 case Builtin::BI_InterlockedExchangeSub8: 2246 case Builtin::BI_InterlockedExchangeSub16: 2247 case Builtin::BI_InterlockedExchangeSub: 2248 return RValue::get( 2249 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E)); 2250 case Builtin::BI_InterlockedOr8: 2251 case Builtin::BI_InterlockedOr16: 2252 case Builtin::BI_InterlockedOr: 2253 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E)); 2254 case Builtin::BI_InterlockedXor8: 2255 case Builtin::BI_InterlockedXor16: 2256 case Builtin::BI_InterlockedXor: 2257 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E)); 2258 case Builtin::BI_interlockedbittestandset: 2259 return RValue::get( 2260 EmitMSVCBuiltinExpr(MSVCIntrin::_interlockedbittestandset, E)); 2261 2262 case Builtin::BI__exception_code: 2263 case Builtin::BI_exception_code: 2264 return RValue::get(EmitSEHExceptionCode()); 2265 case Builtin::BI__exception_info: 2266 case Builtin::BI_exception_info: 2267 return RValue::get(EmitSEHExceptionInfo()); 2268 case Builtin::BI__abnormal_termination: 2269 case Builtin::BI_abnormal_termination: 2270 return RValue::get(EmitSEHAbnormalTermination()); 2271 case Builtin::BI_setjmpex: { 2272 if (getTarget().getTriple().isOSMSVCRT()) { 2273 llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy}; 2274 llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get( 2275 getLLVMContext(), llvm::AttributeList::FunctionIndex, 2276 llvm::Attribute::ReturnsTwice); 2277 llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction( 2278 llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false), 2279 "_setjmpex", ReturnsTwiceAttr, /*Local=*/true); 2280 llvm::Value *Buf = Builder.CreateBitOrPointerCast( 2281 EmitScalarExpr(E->getArg(0)), Int8PtrTy); 2282 llvm::Value *FrameAddr = 2283 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 2284 ConstantInt::get(Int32Ty, 0)); 2285 llvm::Value *Args[] = {Buf, FrameAddr}; 2286 llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args); 2287 CS.setAttributes(ReturnsTwiceAttr); 2288 return RValue::get(CS.getInstruction()); 2289 } 2290 break; 2291 } 2292 case Builtin::BI_setjmp: { 2293 if (getTarget().getTriple().isOSMSVCRT()) { 2294 llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get( 2295 getLLVMContext(), llvm::AttributeList::FunctionIndex, 2296 llvm::Attribute::ReturnsTwice); 2297 llvm::Value *Buf = Builder.CreateBitOrPointerCast( 2298 EmitScalarExpr(E->getArg(0)), Int8PtrTy); 2299 llvm::CallSite CS; 2300 if (getTarget().getTriple().getArch() == llvm::Triple::x86) { 2301 llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy}; 2302 llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction( 2303 llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true), 2304 "_setjmp3", ReturnsTwiceAttr, /*Local=*/true); 2305 llvm::Value *Count = ConstantInt::get(IntTy, 0); 2306 llvm::Value *Args[] = {Buf, Count}; 2307 CS = EmitRuntimeCallOrInvoke(SetJmp3, Args); 2308 } else { 2309 llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy}; 2310 llvm::Constant *SetJmp = CGM.CreateRuntimeFunction( 2311 llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false), 2312 "_setjmp", ReturnsTwiceAttr, /*Local=*/true); 2313 llvm::Value *FrameAddr = 2314 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 2315 ConstantInt::get(Int32Ty, 0)); 2316 llvm::Value *Args[] = {Buf, FrameAddr}; 2317 CS = EmitRuntimeCallOrInvoke(SetJmp, Args); 2318 } 2319 CS.setAttributes(ReturnsTwiceAttr); 2320 return RValue::get(CS.getInstruction()); 2321 } 2322 break; 2323 } 2324 2325 case Builtin::BI__GetExceptionInfo: { 2326 if (llvm::GlobalVariable *GV = 2327 CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType())) 2328 return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy)); 2329 break; 2330 } 2331 2332 case Builtin::BI__fastfail: 2333 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E)); 2334 2335 case Builtin::BI__builtin_coro_size: { 2336 auto & Context = getContext(); 2337 auto SizeTy = Context.getSizeType(); 2338 auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy)); 2339 Value *F = CGM.getIntrinsic(Intrinsic::coro_size, T); 2340 return RValue::get(Builder.CreateCall(F)); 2341 } 2342 2343 case Builtin::BI__builtin_coro_id: 2344 return EmitCoroutineIntrinsic(E, Intrinsic::coro_id); 2345 case Builtin::BI__builtin_coro_promise: 2346 return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise); 2347 case Builtin::BI__builtin_coro_resume: 2348 return EmitCoroutineIntrinsic(E, Intrinsic::coro_resume); 2349 case Builtin::BI__builtin_coro_frame: 2350 return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame); 2351 case Builtin::BI__builtin_coro_free: 2352 return EmitCoroutineIntrinsic(E, Intrinsic::coro_free); 2353 case Builtin::BI__builtin_coro_destroy: 2354 return EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy); 2355 case Builtin::BI__builtin_coro_done: 2356 return EmitCoroutineIntrinsic(E, Intrinsic::coro_done); 2357 case Builtin::BI__builtin_coro_alloc: 2358 return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc); 2359 case Builtin::BI__builtin_coro_begin: 2360 return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin); 2361 case Builtin::BI__builtin_coro_end: 2362 return EmitCoroutineIntrinsic(E, Intrinsic::coro_end); 2363 case Builtin::BI__builtin_coro_suspend: 2364 return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend); 2365 case Builtin::BI__builtin_coro_param: 2366 return EmitCoroutineIntrinsic(E, Intrinsic::coro_param); 2367 2368 // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions 2369 case Builtin::BIread_pipe: 2370 case Builtin::BIwrite_pipe: { 2371 Value *Arg0 = EmitScalarExpr(E->getArg(0)), 2372 *Arg1 = EmitScalarExpr(E->getArg(1)); 2373 CGOpenCLRuntime OpenCLRT(CGM); 2374 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); 2375 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); 2376 2377 // Type of the generic packet parameter. 2378 unsigned GenericAS = 2379 getContext().getTargetAddressSpace(LangAS::opencl_generic); 2380 llvm::Type *I8PTy = llvm::PointerType::get( 2381 llvm::Type::getInt8Ty(getLLVMContext()), GenericAS); 2382 2383 // Testing which overloaded version we should generate the call for. 2384 if (2U == E->getNumArgs()) { 2385 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2" 2386 : "__write_pipe_2"; 2387 // Creating a generic function type to be able to call with any builtin or 2388 // user defined type. 2389 llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty}; 2390 llvm::FunctionType *FTy = llvm::FunctionType::get( 2391 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2392 Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy); 2393 return RValue::get( 2394 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2395 {Arg0, BCast, PacketSize, PacketAlign})); 2396 } else { 2397 assert(4 == E->getNumArgs() && 2398 "Illegal number of parameters to pipe function"); 2399 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4" 2400 : "__write_pipe_4"; 2401 2402 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy, 2403 Int32Ty, Int32Ty}; 2404 Value *Arg2 = EmitScalarExpr(E->getArg(2)), 2405 *Arg3 = EmitScalarExpr(E->getArg(3)); 2406 llvm::FunctionType *FTy = llvm::FunctionType::get( 2407 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2408 Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy); 2409 // We know the third argument is an integer type, but we may need to cast 2410 // it to i32. 2411 if (Arg2->getType() != Int32Ty) 2412 Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty); 2413 return RValue::get(Builder.CreateCall( 2414 CGM.CreateRuntimeFunction(FTy, Name), 2415 {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign})); 2416 } 2417 } 2418 // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write 2419 // functions 2420 case Builtin::BIreserve_read_pipe: 2421 case Builtin::BIreserve_write_pipe: 2422 case Builtin::BIwork_group_reserve_read_pipe: 2423 case Builtin::BIwork_group_reserve_write_pipe: 2424 case Builtin::BIsub_group_reserve_read_pipe: 2425 case Builtin::BIsub_group_reserve_write_pipe: { 2426 // Composing the mangled name for the function. 2427 const char *Name; 2428 if (BuiltinID == Builtin::BIreserve_read_pipe) 2429 Name = "__reserve_read_pipe"; 2430 else if (BuiltinID == Builtin::BIreserve_write_pipe) 2431 Name = "__reserve_write_pipe"; 2432 else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe) 2433 Name = "__work_group_reserve_read_pipe"; 2434 else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe) 2435 Name = "__work_group_reserve_write_pipe"; 2436 else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe) 2437 Name = "__sub_group_reserve_read_pipe"; 2438 else 2439 Name = "__sub_group_reserve_write_pipe"; 2440 2441 Value *Arg0 = EmitScalarExpr(E->getArg(0)), 2442 *Arg1 = EmitScalarExpr(E->getArg(1)); 2443 llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy); 2444 CGOpenCLRuntime OpenCLRT(CGM); 2445 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); 2446 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); 2447 2448 // Building the generic function prototype. 2449 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty}; 2450 llvm::FunctionType *FTy = llvm::FunctionType::get( 2451 ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2452 // We know the second argument is an integer type, but we may need to cast 2453 // it to i32. 2454 if (Arg1->getType() != Int32Ty) 2455 Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty); 2456 return RValue::get( 2457 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2458 {Arg0, Arg1, PacketSize, PacketAlign})); 2459 } 2460 // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write 2461 // functions 2462 case Builtin::BIcommit_read_pipe: 2463 case Builtin::BIcommit_write_pipe: 2464 case Builtin::BIwork_group_commit_read_pipe: 2465 case Builtin::BIwork_group_commit_write_pipe: 2466 case Builtin::BIsub_group_commit_read_pipe: 2467 case Builtin::BIsub_group_commit_write_pipe: { 2468 const char *Name; 2469 if (BuiltinID == Builtin::BIcommit_read_pipe) 2470 Name = "__commit_read_pipe"; 2471 else if (BuiltinID == Builtin::BIcommit_write_pipe) 2472 Name = "__commit_write_pipe"; 2473 else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe) 2474 Name = "__work_group_commit_read_pipe"; 2475 else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe) 2476 Name = "__work_group_commit_write_pipe"; 2477 else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe) 2478 Name = "__sub_group_commit_read_pipe"; 2479 else 2480 Name = "__sub_group_commit_write_pipe"; 2481 2482 Value *Arg0 = EmitScalarExpr(E->getArg(0)), 2483 *Arg1 = EmitScalarExpr(E->getArg(1)); 2484 CGOpenCLRuntime OpenCLRT(CGM); 2485 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); 2486 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); 2487 2488 // Building the generic function prototype. 2489 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty}; 2490 llvm::FunctionType *FTy = 2491 llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()), 2492 llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2493 2494 return RValue::get( 2495 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2496 {Arg0, Arg1, PacketSize, PacketAlign})); 2497 } 2498 // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions 2499 case Builtin::BIget_pipe_num_packets: 2500 case Builtin::BIget_pipe_max_packets: { 2501 const char *Name; 2502 if (BuiltinID == Builtin::BIget_pipe_num_packets) 2503 Name = "__get_pipe_num_packets"; 2504 else 2505 Name = "__get_pipe_max_packets"; 2506 2507 // Building the generic function prototype. 2508 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 2509 CGOpenCLRuntime OpenCLRT(CGM); 2510 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); 2511 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); 2512 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty}; 2513 llvm::FunctionType *FTy = llvm::FunctionType::get( 2514 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2515 2516 return RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2517 {Arg0, PacketSize, PacketAlign})); 2518 } 2519 2520 // OpenCL v2.0 s6.13.9 - Address space qualifier functions. 2521 case Builtin::BIto_global: 2522 case Builtin::BIto_local: 2523 case Builtin::BIto_private: { 2524 auto Arg0 = EmitScalarExpr(E->getArg(0)); 2525 auto NewArgT = llvm::PointerType::get(Int8Ty, 2526 CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); 2527 auto NewRetT = llvm::PointerType::get(Int8Ty, 2528 CGM.getContext().getTargetAddressSpace( 2529 E->getType()->getPointeeType().getAddressSpace())); 2530 auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false); 2531 llvm::Value *NewArg; 2532 if (Arg0->getType()->getPointerAddressSpace() != 2533 NewArgT->getPointerAddressSpace()) 2534 NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT); 2535 else 2536 NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT); 2537 auto NewName = std::string("__") + E->getDirectCallee()->getName().str(); 2538 auto NewCall = 2539 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg}); 2540 return RValue::get(Builder.CreateBitOrPointerCast(NewCall, 2541 ConvertType(E->getType()))); 2542 } 2543 2544 // OpenCL v2.0, s6.13.17 - Enqueue kernel function. 2545 // It contains four different overload formats specified in Table 6.13.17.1. 2546 case Builtin::BIenqueue_kernel: { 2547 StringRef Name; // Generated function call name 2548 unsigned NumArgs = E->getNumArgs(); 2549 2550 llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy); 2551 llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( 2552 getContext().getTargetAddressSpace(LangAS::opencl_generic)); 2553 2554 llvm::Value *Queue = EmitScalarExpr(E->getArg(0)); 2555 llvm::Value *Flags = EmitScalarExpr(E->getArg(1)); 2556 LValue NDRangeL = EmitAggExprToLValue(E->getArg(2)); 2557 llvm::Value *Range = NDRangeL.getAddress().getPointer(); 2558 llvm::Type *RangeTy = NDRangeL.getAddress().getType(); 2559 2560 if (NumArgs == 4) { 2561 // The most basic form of the call with parameters: 2562 // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void) 2563 Name = "__enqueue_kernel_basic"; 2564 llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy}; 2565 llvm::FunctionType *FTy = llvm::FunctionType::get( 2566 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys, 4), false); 2567 2568 llvm::Value *Block = Builder.CreatePointerCast( 2569 EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy); 2570 2571 AttrBuilder B; 2572 B.addAttribute(Attribute::ByVal); 2573 llvm::AttributeList ByValAttrSet = 2574 llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B); 2575 2576 auto RTCall = 2577 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet), 2578 {Queue, Flags, Range, Block}); 2579 RTCall->setAttributes(ByValAttrSet); 2580 return RValue::get(RTCall); 2581 } 2582 assert(NumArgs >= 5 && "Invalid enqueue_kernel signature"); 2583 2584 // Could have events and/or vaargs. 2585 if (E->getArg(3)->getType()->isBlockPointerType()) { 2586 // No events passed, but has variadic arguments. 2587 Name = "__enqueue_kernel_vaargs"; 2588 llvm::Value *Block = Builder.CreatePointerCast( 2589 EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy); 2590 // Create a vector of the arguments, as well as a constant value to 2591 // express to the runtime the number of variadic arguments. 2592 std::vector<llvm::Value *> Args = {Queue, Flags, Range, Block, 2593 ConstantInt::get(IntTy, NumArgs - 4)}; 2594 std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, RangeTy, 2595 GenericVoidPtrTy, IntTy}; 2596 2597 // Each of the following arguments specifies the size of the corresponding 2598 // argument passed to the enqueued block. 2599 for (unsigned I = 4/*Position of the first size arg*/; I < NumArgs; ++I) 2600 Args.push_back( 2601 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy)); 2602 2603 llvm::FunctionType *FTy = llvm::FunctionType::get( 2604 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true); 2605 return RValue::get( 2606 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2607 llvm::ArrayRef<llvm::Value *>(Args))); 2608 } 2609 // Any calls now have event arguments passed. 2610 if (NumArgs >= 7) { 2611 llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy); 2612 llvm::Type *EventPtrTy = EventTy->getPointerTo( 2613 CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); 2614 2615 llvm::Value *NumEvents = 2616 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty); 2617 llvm::Value *EventList = 2618 E->getArg(4)->getType()->isArrayType() 2619 ? EmitArrayToPointerDecay(E->getArg(4)).getPointer() 2620 : EmitScalarExpr(E->getArg(4)); 2621 llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5)); 2622 // Convert to generic address space. 2623 EventList = Builder.CreatePointerCast(EventList, EventPtrTy); 2624 ClkEvent = Builder.CreatePointerCast(ClkEvent, EventPtrTy); 2625 llvm::Value *Block = Builder.CreatePointerCast( 2626 EmitScalarExpr(E->getArg(6)), GenericVoidPtrTy); 2627 2628 std::vector<llvm::Type *> ArgTys = { 2629 QueueTy, Int32Ty, RangeTy, Int32Ty, 2630 EventPtrTy, EventPtrTy, GenericVoidPtrTy}; 2631 2632 std::vector<llvm::Value *> Args = {Queue, Flags, Range, NumEvents, 2633 EventList, ClkEvent, Block}; 2634 2635 if (NumArgs == 7) { 2636 // Has events but no variadics. 2637 Name = "__enqueue_kernel_basic_events"; 2638 llvm::FunctionType *FTy = llvm::FunctionType::get( 2639 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2640 return RValue::get( 2641 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2642 llvm::ArrayRef<llvm::Value *>(Args))); 2643 } 2644 // Has event info and variadics 2645 // Pass the number of variadics to the runtime function too. 2646 Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7)); 2647 ArgTys.push_back(Int32Ty); 2648 Name = "__enqueue_kernel_events_vaargs"; 2649 2650 // Each of the following arguments specifies the size of the corresponding 2651 // argument passed to the enqueued block. 2652 for (unsigned I = 7/*Position of the first size arg*/; I < NumArgs; ++I) 2653 Args.push_back( 2654 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy)); 2655 2656 llvm::FunctionType *FTy = llvm::FunctionType::get( 2657 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true); 2658 return RValue::get( 2659 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2660 llvm::ArrayRef<llvm::Value *>(Args))); 2661 } 2662 LLVM_FALLTHROUGH; 2663 } 2664 // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block 2665 // parameter. 2666 case Builtin::BIget_kernel_work_group_size: { 2667 llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( 2668 getContext().getTargetAddressSpace(LangAS::opencl_generic)); 2669 Value *Arg = EmitScalarExpr(E->getArg(0)); 2670 Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy); 2671 return RValue::get(Builder.CreateCall( 2672 CGM.CreateRuntimeFunction( 2673 llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false), 2674 "__get_kernel_work_group_size_impl"), 2675 Arg)); 2676 } 2677 case Builtin::BIget_kernel_preferred_work_group_size_multiple: { 2678 llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( 2679 getContext().getTargetAddressSpace(LangAS::opencl_generic)); 2680 Value *Arg = EmitScalarExpr(E->getArg(0)); 2681 Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy); 2682 return RValue::get(Builder.CreateCall( 2683 CGM.CreateRuntimeFunction( 2684 llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false), 2685 "__get_kernel_preferred_work_group_multiple_impl"), 2686 Arg)); 2687 } 2688 case Builtin::BIprintf: 2689 if (getTarget().getTriple().isNVPTX()) 2690 return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue); 2691 break; 2692 case Builtin::BI__builtin_canonicalize: 2693 case Builtin::BI__builtin_canonicalizef: 2694 case Builtin::BI__builtin_canonicalizel: 2695 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize)); 2696 2697 case Builtin::BI__builtin_thread_pointer: { 2698 if (!getContext().getTargetInfo().isTLSSupported()) 2699 CGM.ErrorUnsupported(E, "__builtin_thread_pointer"); 2700 // Fall through - it's already mapped to the intrinsic by GCCBuiltin. 2701 break; 2702 } 2703 case Builtin::BI__builtin_os_log_format: { 2704 assert(E->getNumArgs() >= 2 && 2705 "__builtin_os_log_format takes at least 2 arguments"); 2706 analyze_os_log::OSLogBufferLayout Layout; 2707 analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout); 2708 Address BufAddr = EmitPointerWithAlignment(E->getArg(0)); 2709 // Ignore argument 1, the format string. It is not currently used. 2710 CharUnits Offset; 2711 Builder.CreateStore( 2712 Builder.getInt8(Layout.getSummaryByte()), 2713 Builder.CreateConstByteGEP(BufAddr, Offset++, "summary")); 2714 Builder.CreateStore( 2715 Builder.getInt8(Layout.getNumArgsByte()), 2716 Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs")); 2717 2718 llvm::SmallVector<llvm::Value *, 4> RetainableOperands; 2719 for (const auto &Item : Layout.Items) { 2720 Builder.CreateStore( 2721 Builder.getInt8(Item.getDescriptorByte()), 2722 Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor")); 2723 Builder.CreateStore( 2724 Builder.getInt8(Item.getSizeByte()), 2725 Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize")); 2726 Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset); 2727 if (const Expr *TheExpr = Item.getExpr()) { 2728 Addr = Builder.CreateElementBitCast( 2729 Addr, ConvertTypeForMem(TheExpr->getType())); 2730 // Check if this is a retainable type. 2731 if (TheExpr->getType()->isObjCRetainableType()) { 2732 assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar && 2733 "Only scalar can be a ObjC retainable type"); 2734 llvm::Value *SV = EmitScalarExpr(TheExpr, /*Ignore*/ false); 2735 RValue RV = RValue::get(SV); 2736 LValue LV = MakeAddrLValue(Addr, TheExpr->getType()); 2737 EmitStoreThroughLValue(RV, LV); 2738 // Check if the object is constant, if not, save it in 2739 // RetainableOperands. 2740 if (!isa<Constant>(SV)) 2741 RetainableOperands.push_back(SV); 2742 } else { 2743 EmitAnyExprToMem(TheExpr, Addr, Qualifiers(), /*isInit*/ true); 2744 } 2745 } else { 2746 Addr = Builder.CreateElementBitCast(Addr, Int32Ty); 2747 Builder.CreateStore( 2748 Builder.getInt32(Item.getConstValue().getQuantity()), Addr); 2749 } 2750 Offset += Item.size(); 2751 } 2752 2753 // Push a clang.arc.use cleanup for each object in RetainableOperands. The 2754 // cleanup will cause the use to appear after the final log call, keeping 2755 // the object valid while it's held in the log buffer. Note that if there's 2756 // a release cleanup on the object, it will already be active; since 2757 // cleanups are emitted in reverse order, the use will occur before the 2758 // object is released. 2759 if (!RetainableOperands.empty() && getLangOpts().ObjCAutoRefCount && 2760 CGM.getCodeGenOpts().OptimizationLevel != 0) 2761 for (llvm::Value *object : RetainableOperands) 2762 pushFullExprCleanup<CallObjCArcUse>(getARCCleanupKind(), object); 2763 2764 return RValue::get(BufAddr.getPointer()); 2765 } 2766 2767 case Builtin::BI__builtin_os_log_format_buffer_size: { 2768 analyze_os_log::OSLogBufferLayout Layout; 2769 analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout); 2770 return RValue::get(ConstantInt::get(ConvertType(E->getType()), 2771 Layout.size().getQuantity())); 2772 } 2773 2774 case Builtin::BI__xray_customevent: { 2775 if (!ShouldXRayInstrumentFunction()) 2776 return RValue::getIgnored(); 2777 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>()) { 2778 if (XRayAttr->neverXRayInstrument()) 2779 return RValue::getIgnored(); 2780 } 2781 Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent); 2782 auto FTy = F->getFunctionType(); 2783 auto Arg0 = E->getArg(0); 2784 auto Arg0Val = EmitScalarExpr(Arg0); 2785 auto Arg0Ty = Arg0->getType(); 2786 auto PTy0 = FTy->getParamType(0); 2787 if (PTy0 != Arg0Val->getType()) { 2788 if (Arg0Ty->isArrayType()) 2789 Arg0Val = EmitArrayToPointerDecay(Arg0).getPointer(); 2790 else 2791 Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0); 2792 } 2793 auto Arg1 = EmitScalarExpr(E->getArg(1)); 2794 auto PTy1 = FTy->getParamType(1); 2795 if (PTy1 != Arg1->getType()) 2796 Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1); 2797 return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1})); 2798 } 2799 } 2800 2801 // If this is an alias for a lib function (e.g. __builtin_sin), emit 2802 // the call using the normal call path, but using the unmangled 2803 // version of the function name. 2804 if (getContext().BuiltinInfo.isLibFunction(BuiltinID)) 2805 return emitLibraryCall(*this, FD, E, 2806 CGM.getBuiltinLibFunction(FD, BuiltinID)); 2807 2808 // If this is a predefined lib function (e.g. malloc), emit the call 2809 // using exactly the normal call path. 2810 if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID)) 2811 return emitLibraryCall(*this, FD, E, 2812 cast<llvm::Constant>(EmitScalarExpr(E->getCallee()))); 2813 2814 // Check that a call to a target specific builtin has the correct target 2815 // features. 2816 // This is down here to avoid non-target specific builtins, however, if 2817 // generic builtins start to require generic target features then we 2818 // can move this up to the beginning of the function. 2819 checkTargetFeatures(E, FD); 2820 2821 // See if we have a target specific intrinsic. 2822 const char *Name = getContext().BuiltinInfo.getName(BuiltinID); 2823 Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic; 2824 StringRef Prefix = 2825 llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch()); 2826 if (!Prefix.empty()) { 2827 IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix.data(), Name); 2828 // NOTE we dont need to perform a compatibility flag check here since the 2829 // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the 2830 // MS builtins via ALL_MS_LANGUAGES and are filtered earlier. 2831 if (IntrinsicID == Intrinsic::not_intrinsic) 2832 IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name); 2833 } 2834 2835 if (IntrinsicID != Intrinsic::not_intrinsic) { 2836 SmallVector<Value*, 16> Args; 2837 2838 // Find out if any arguments are required to be integer constant 2839 // expressions. 2840 unsigned ICEArguments = 0; 2841 ASTContext::GetBuiltinTypeError Error; 2842 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 2843 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 2844 2845 Function *F = CGM.getIntrinsic(IntrinsicID); 2846 llvm::FunctionType *FTy = F->getFunctionType(); 2847 2848 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) { 2849 Value *ArgValue; 2850 // If this is a normal argument, just emit it as a scalar. 2851 if ((ICEArguments & (1 << i)) == 0) { 2852 ArgValue = EmitScalarExpr(E->getArg(i)); 2853 } else { 2854 // If this is required to be a constant, constant fold it so that we 2855 // know that the generated intrinsic gets a ConstantInt. 2856 llvm::APSInt Result; 2857 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext()); 2858 assert(IsConst && "Constant arg isn't actually constant?"); 2859 (void)IsConst; 2860 ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result); 2861 } 2862 2863 // If the intrinsic arg type is different from the builtin arg type 2864 // we need to do a bit cast. 2865 llvm::Type *PTy = FTy->getParamType(i); 2866 if (PTy != ArgValue->getType()) { 2867 assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) && 2868 "Must be able to losslessly bit cast to param"); 2869 ArgValue = Builder.CreateBitCast(ArgValue, PTy); 2870 } 2871 2872 Args.push_back(ArgValue); 2873 } 2874 2875 Value *V = Builder.CreateCall(F, Args); 2876 QualType BuiltinRetType = E->getType(); 2877 2878 llvm::Type *RetTy = VoidTy; 2879 if (!BuiltinRetType->isVoidType()) 2880 RetTy = ConvertType(BuiltinRetType); 2881 2882 if (RetTy != V->getType()) { 2883 assert(V->getType()->canLosslesslyBitCastTo(RetTy) && 2884 "Must be able to losslessly bit cast result type"); 2885 V = Builder.CreateBitCast(V, RetTy); 2886 } 2887 2888 return RValue::get(V); 2889 } 2890 2891 // See if we have a target specific builtin that needs to be lowered. 2892 if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E)) 2893 return RValue::get(V); 2894 2895 ErrorUnsupported(E, "builtin function"); 2896 2897 // Unknown builtin, for now just dump it out and return undef. 2898 return GetUndefRValue(E->getType()); 2899 } 2900 2901 static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, 2902 unsigned BuiltinID, const CallExpr *E, 2903 llvm::Triple::ArchType Arch) { 2904 switch (Arch) { 2905 case llvm::Triple::arm: 2906 case llvm::Triple::armeb: 2907 case llvm::Triple::thumb: 2908 case llvm::Triple::thumbeb: 2909 return CGF->EmitARMBuiltinExpr(BuiltinID, E); 2910 case llvm::Triple::aarch64: 2911 case llvm::Triple::aarch64_be: 2912 return CGF->EmitAArch64BuiltinExpr(BuiltinID, E); 2913 case llvm::Triple::x86: 2914 case llvm::Triple::x86_64: 2915 return CGF->EmitX86BuiltinExpr(BuiltinID, E); 2916 case llvm::Triple::ppc: 2917 case llvm::Triple::ppc64: 2918 case llvm::Triple::ppc64le: 2919 return CGF->EmitPPCBuiltinExpr(BuiltinID, E); 2920 case llvm::Triple::r600: 2921 case llvm::Triple::amdgcn: 2922 return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E); 2923 case llvm::Triple::systemz: 2924 return CGF->EmitSystemZBuiltinExpr(BuiltinID, E); 2925 case llvm::Triple::nvptx: 2926 case llvm::Triple::nvptx64: 2927 return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E); 2928 case llvm::Triple::wasm32: 2929 case llvm::Triple::wasm64: 2930 return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E); 2931 default: 2932 return nullptr; 2933 } 2934 } 2935 2936 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID, 2937 const CallExpr *E) { 2938 if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) { 2939 assert(getContext().getAuxTargetInfo() && "Missing aux target info"); 2940 return EmitTargetArchBuiltinExpr( 2941 this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E, 2942 getContext().getAuxTargetInfo()->getTriple().getArch()); 2943 } 2944 2945 return EmitTargetArchBuiltinExpr(this, BuiltinID, E, 2946 getTarget().getTriple().getArch()); 2947 } 2948 2949 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF, 2950 NeonTypeFlags TypeFlags, 2951 bool V1Ty=false) { 2952 int IsQuad = TypeFlags.isQuad(); 2953 switch (TypeFlags.getEltType()) { 2954 case NeonTypeFlags::Int8: 2955 case NeonTypeFlags::Poly8: 2956 return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad)); 2957 case NeonTypeFlags::Int16: 2958 case NeonTypeFlags::Poly16: 2959 return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad)); 2960 case NeonTypeFlags::Float16: 2961 return llvm::VectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad)); 2962 case NeonTypeFlags::Int32: 2963 return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad)); 2964 case NeonTypeFlags::Int64: 2965 case NeonTypeFlags::Poly64: 2966 return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad)); 2967 case NeonTypeFlags::Poly128: 2968 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm. 2969 // There is a lot of i128 and f128 API missing. 2970 // so we use v16i8 to represent poly128 and get pattern matched. 2971 return llvm::VectorType::get(CGF->Int8Ty, 16); 2972 case NeonTypeFlags::Float32: 2973 return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad)); 2974 case NeonTypeFlags::Float64: 2975 return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad)); 2976 } 2977 llvm_unreachable("Unknown vector element type!"); 2978 } 2979 2980 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF, 2981 NeonTypeFlags IntTypeFlags) { 2982 int IsQuad = IntTypeFlags.isQuad(); 2983 switch (IntTypeFlags.getEltType()) { 2984 case NeonTypeFlags::Int16: 2985 return llvm::VectorType::get(CGF->HalfTy, (4 << IsQuad)); 2986 case NeonTypeFlags::Int32: 2987 return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad)); 2988 case NeonTypeFlags::Int64: 2989 return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad)); 2990 default: 2991 llvm_unreachable("Type can't be converted to floating-point!"); 2992 } 2993 } 2994 2995 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) { 2996 unsigned nElts = V->getType()->getVectorNumElements(); 2997 Value* SV = llvm::ConstantVector::getSplat(nElts, C); 2998 return Builder.CreateShuffleVector(V, V, SV, "lane"); 2999 } 3000 3001 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops, 3002 const char *name, 3003 unsigned shift, bool rightshift) { 3004 unsigned j = 0; 3005 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); 3006 ai != ae; ++ai, ++j) 3007 if (shift > 0 && shift == j) 3008 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift); 3009 else 3010 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name); 3011 3012 return Builder.CreateCall(F, Ops, name); 3013 } 3014 3015 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty, 3016 bool neg) { 3017 int SV = cast<ConstantInt>(V)->getSExtValue(); 3018 return ConstantInt::get(Ty, neg ? -SV : SV); 3019 } 3020 3021 // \brief Right-shift a vector by a constant. 3022 Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift, 3023 llvm::Type *Ty, bool usgn, 3024 const char *name) { 3025 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 3026 3027 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue(); 3028 int EltSize = VTy->getScalarSizeInBits(); 3029 3030 Vec = Builder.CreateBitCast(Vec, Ty); 3031 3032 // lshr/ashr are undefined when the shift amount is equal to the vector 3033 // element size. 3034 if (ShiftAmt == EltSize) { 3035 if (usgn) { 3036 // Right-shifting an unsigned value by its size yields 0. 3037 return llvm::ConstantAggregateZero::get(VTy); 3038 } else { 3039 // Right-shifting a signed value by its size is equivalent 3040 // to a shift of size-1. 3041 --ShiftAmt; 3042 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt); 3043 } 3044 } 3045 3046 Shift = EmitNeonShiftVector(Shift, Ty, false); 3047 if (usgn) 3048 return Builder.CreateLShr(Vec, Shift, name); 3049 else 3050 return Builder.CreateAShr(Vec, Shift, name); 3051 } 3052 3053 enum { 3054 AddRetType = (1 << 0), 3055 Add1ArgType = (1 << 1), 3056 Add2ArgTypes = (1 << 2), 3057 3058 VectorizeRetType = (1 << 3), 3059 VectorizeArgTypes = (1 << 4), 3060 3061 InventFloatType = (1 << 5), 3062 UnsignedAlts = (1 << 6), 3063 3064 Use64BitVectors = (1 << 7), 3065 Use128BitVectors = (1 << 8), 3066 3067 Vectorize1ArgType = Add1ArgType | VectorizeArgTypes, 3068 VectorRet = AddRetType | VectorizeRetType, 3069 VectorRetGetArgs01 = 3070 AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes, 3071 FpCmpzModifiers = 3072 AddRetType | VectorizeRetType | Add1ArgType | InventFloatType 3073 }; 3074 3075 namespace { 3076 struct NeonIntrinsicInfo { 3077 const char *NameHint; 3078 unsigned BuiltinID; 3079 unsigned LLVMIntrinsic; 3080 unsigned AltLLVMIntrinsic; 3081 unsigned TypeModifier; 3082 3083 bool operator<(unsigned RHSBuiltinID) const { 3084 return BuiltinID < RHSBuiltinID; 3085 } 3086 bool operator<(const NeonIntrinsicInfo &TE) const { 3087 return BuiltinID < TE.BuiltinID; 3088 } 3089 }; 3090 } // end anonymous namespace 3091 3092 #define NEONMAP0(NameBase) \ 3093 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 } 3094 3095 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \ 3096 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \ 3097 Intrinsic::LLVMIntrinsic, 0, TypeModifier } 3098 3099 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \ 3100 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \ 3101 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \ 3102 TypeModifier } 3103 3104 static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { 3105 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), 3106 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), 3107 NEONMAP1(vabs_v, arm_neon_vabs, 0), 3108 NEONMAP1(vabsq_v, arm_neon_vabs, 0), 3109 NEONMAP0(vaddhn_v), 3110 NEONMAP1(vaesdq_v, arm_neon_aesd, 0), 3111 NEONMAP1(vaeseq_v, arm_neon_aese, 0), 3112 NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0), 3113 NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0), 3114 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType), 3115 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType), 3116 NEONMAP1(vcage_v, arm_neon_vacge, 0), 3117 NEONMAP1(vcageq_v, arm_neon_vacge, 0), 3118 NEONMAP1(vcagt_v, arm_neon_vacgt, 0), 3119 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0), 3120 NEONMAP1(vcale_v, arm_neon_vacge, 0), 3121 NEONMAP1(vcaleq_v, arm_neon_vacge, 0), 3122 NEONMAP1(vcalt_v, arm_neon_vacgt, 0), 3123 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0), 3124 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType), 3125 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType), 3126 NEONMAP1(vclz_v, ctlz, Add1ArgType), 3127 NEONMAP1(vclzq_v, ctlz, Add1ArgType), 3128 NEONMAP1(vcnt_v, ctpop, Add1ArgType), 3129 NEONMAP1(vcntq_v, ctpop, Add1ArgType), 3130 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0), 3131 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0), 3132 NEONMAP0(vcvt_f32_v), 3133 NEONMAP2(vcvt_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), 3134 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), 3135 NEONMAP1(vcvt_n_s16_v, arm_neon_vcvtfp2fxs, 0), 3136 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0), 3137 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0), 3138 NEONMAP1(vcvt_n_u16_v, arm_neon_vcvtfp2fxu, 0), 3139 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0), 3140 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0), 3141 NEONMAP0(vcvt_s16_v), 3142 NEONMAP0(vcvt_s32_v), 3143 NEONMAP0(vcvt_s64_v), 3144 NEONMAP0(vcvt_u16_v), 3145 NEONMAP0(vcvt_u32_v), 3146 NEONMAP0(vcvt_u64_v), 3147 NEONMAP1(vcvta_s16_v, arm_neon_vcvtas, 0), 3148 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0), 3149 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0), 3150 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0), 3151 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0), 3152 NEONMAP1(vcvtaq_s16_v, arm_neon_vcvtas, 0), 3153 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0), 3154 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0), 3155 NEONMAP1(vcvtaq_u16_v, arm_neon_vcvtau, 0), 3156 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0), 3157 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0), 3158 NEONMAP1(vcvtm_s16_v, arm_neon_vcvtms, 0), 3159 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0), 3160 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0), 3161 NEONMAP1(vcvtm_u16_v, arm_neon_vcvtmu, 0), 3162 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0), 3163 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0), 3164 NEONMAP1(vcvtmq_s16_v, arm_neon_vcvtms, 0), 3165 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0), 3166 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0), 3167 NEONMAP1(vcvtmq_u16_v, arm_neon_vcvtmu, 0), 3168 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0), 3169 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0), 3170 NEONMAP1(vcvtn_s16_v, arm_neon_vcvtns, 0), 3171 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0), 3172 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0), 3173 NEONMAP1(vcvtn_u16_v, arm_neon_vcvtnu, 0), 3174 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0), 3175 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0), 3176 NEONMAP1(vcvtnq_s16_v, arm_neon_vcvtns, 0), 3177 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0), 3178 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0), 3179 NEONMAP1(vcvtnq_u16_v, arm_neon_vcvtnu, 0), 3180 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0), 3181 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0), 3182 NEONMAP1(vcvtp_s16_v, arm_neon_vcvtps, 0), 3183 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0), 3184 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0), 3185 NEONMAP1(vcvtp_u16_v, arm_neon_vcvtpu, 0), 3186 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0), 3187 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0), 3188 NEONMAP1(vcvtpq_s16_v, arm_neon_vcvtps, 0), 3189 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0), 3190 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0), 3191 NEONMAP1(vcvtpq_u16_v, arm_neon_vcvtpu, 0), 3192 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0), 3193 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0), 3194 NEONMAP0(vcvtq_f32_v), 3195 NEONMAP2(vcvtq_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), 3196 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), 3197 NEONMAP1(vcvtq_n_s16_v, arm_neon_vcvtfp2fxs, 0), 3198 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0), 3199 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0), 3200 NEONMAP1(vcvtq_n_u16_v, arm_neon_vcvtfp2fxu, 0), 3201 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0), 3202 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0), 3203 NEONMAP0(vcvtq_s16_v), 3204 NEONMAP0(vcvtq_s32_v), 3205 NEONMAP0(vcvtq_s64_v), 3206 NEONMAP0(vcvtq_u16_v), 3207 NEONMAP0(vcvtq_u32_v), 3208 NEONMAP0(vcvtq_u64_v), 3209 NEONMAP0(vext_v), 3210 NEONMAP0(vextq_v), 3211 NEONMAP0(vfma_v), 3212 NEONMAP0(vfmaq_v), 3213 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts), 3214 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts), 3215 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts), 3216 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts), 3217 NEONMAP0(vld1_dup_v), 3218 NEONMAP1(vld1_v, arm_neon_vld1, 0), 3219 NEONMAP0(vld1q_dup_v), 3220 NEONMAP1(vld1q_v, arm_neon_vld1, 0), 3221 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0), 3222 NEONMAP1(vld2_v, arm_neon_vld2, 0), 3223 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0), 3224 NEONMAP1(vld2q_v, arm_neon_vld2, 0), 3225 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0), 3226 NEONMAP1(vld3_v, arm_neon_vld3, 0), 3227 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0), 3228 NEONMAP1(vld3q_v, arm_neon_vld3, 0), 3229 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0), 3230 NEONMAP1(vld4_v, arm_neon_vld4, 0), 3231 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0), 3232 NEONMAP1(vld4q_v, arm_neon_vld4, 0), 3233 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts), 3234 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType), 3235 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType), 3236 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts), 3237 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), 3238 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType), 3239 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType), 3240 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), 3241 NEONMAP0(vmovl_v), 3242 NEONMAP0(vmovn_v), 3243 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType), 3244 NEONMAP0(vmull_v), 3245 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType), 3246 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts), 3247 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts), 3248 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType), 3249 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts), 3250 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts), 3251 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType), 3252 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts), 3253 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts), 3254 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType), 3255 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType), 3256 NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts), 3257 NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts), 3258 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0), 3259 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0), 3260 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType), 3261 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType), 3262 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType), 3263 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts), 3264 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType), 3265 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType), 3266 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType), 3267 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType), 3268 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType), 3269 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts), 3270 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts), 3271 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts), 3272 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts), 3273 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts), 3274 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts), 3275 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0), 3276 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0), 3277 NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts), 3278 NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts), 3279 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType), 3280 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), 3281 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), 3282 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType), 3283 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType), 3284 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts), 3285 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts), 3286 NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType), 3287 NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType), 3288 NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType), 3289 NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType), 3290 NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType), 3291 NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType), 3292 NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType), 3293 NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType), 3294 NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType), 3295 NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType), 3296 NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType), 3297 NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType), 3298 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts), 3299 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts), 3300 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts), 3301 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts), 3302 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0), 3303 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0), 3304 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType), 3305 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType), 3306 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType), 3307 NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0), 3308 NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0), 3309 NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0), 3310 NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0), 3311 NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0), 3312 NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0), 3313 NEONMAP0(vshl_n_v), 3314 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts), 3315 NEONMAP0(vshll_n_v), 3316 NEONMAP0(vshlq_n_v), 3317 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts), 3318 NEONMAP0(vshr_n_v), 3319 NEONMAP0(vshrn_n_v), 3320 NEONMAP0(vshrq_n_v), 3321 NEONMAP1(vst1_v, arm_neon_vst1, 0), 3322 NEONMAP1(vst1q_v, arm_neon_vst1, 0), 3323 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0), 3324 NEONMAP1(vst2_v, arm_neon_vst2, 0), 3325 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0), 3326 NEONMAP1(vst2q_v, arm_neon_vst2, 0), 3327 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0), 3328 NEONMAP1(vst3_v, arm_neon_vst3, 0), 3329 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0), 3330 NEONMAP1(vst3q_v, arm_neon_vst3, 0), 3331 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0), 3332 NEONMAP1(vst4_v, arm_neon_vst4, 0), 3333 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0), 3334 NEONMAP1(vst4q_v, arm_neon_vst4, 0), 3335 NEONMAP0(vsubhn_v), 3336 NEONMAP0(vtrn_v), 3337 NEONMAP0(vtrnq_v), 3338 NEONMAP0(vtst_v), 3339 NEONMAP0(vtstq_v), 3340 NEONMAP0(vuzp_v), 3341 NEONMAP0(vuzpq_v), 3342 NEONMAP0(vzip_v), 3343 NEONMAP0(vzipq_v) 3344 }; 3345 3346 static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = { 3347 NEONMAP1(vabs_v, aarch64_neon_abs, 0), 3348 NEONMAP1(vabsq_v, aarch64_neon_abs, 0), 3349 NEONMAP0(vaddhn_v), 3350 NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0), 3351 NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0), 3352 NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0), 3353 NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0), 3354 NEONMAP1(vcage_v, aarch64_neon_facge, 0), 3355 NEONMAP1(vcageq_v, aarch64_neon_facge, 0), 3356 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0), 3357 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0), 3358 NEONMAP1(vcale_v, aarch64_neon_facge, 0), 3359 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0), 3360 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0), 3361 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0), 3362 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType), 3363 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType), 3364 NEONMAP1(vclz_v, ctlz, Add1ArgType), 3365 NEONMAP1(vclzq_v, ctlz, Add1ArgType), 3366 NEONMAP1(vcnt_v, ctpop, Add1ArgType), 3367 NEONMAP1(vcntq_v, ctpop, Add1ArgType), 3368 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0), 3369 NEONMAP0(vcvt_f16_v), 3370 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0), 3371 NEONMAP0(vcvt_f32_v), 3372 NEONMAP2(vcvt_n_f16_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 3373 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 3374 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 3375 NEONMAP1(vcvt_n_s16_v, aarch64_neon_vcvtfp2fxs, 0), 3376 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), 3377 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), 3378 NEONMAP1(vcvt_n_u16_v, aarch64_neon_vcvtfp2fxu, 0), 3379 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), 3380 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), 3381 NEONMAP0(vcvtq_f16_v), 3382 NEONMAP0(vcvtq_f32_v), 3383 NEONMAP2(vcvtq_n_f16_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 3384 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 3385 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 3386 NEONMAP1(vcvtq_n_s16_v, aarch64_neon_vcvtfp2fxs, 0), 3387 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), 3388 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), 3389 NEONMAP1(vcvtq_n_u16_v, aarch64_neon_vcvtfp2fxu, 0), 3390 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), 3391 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), 3392 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType), 3393 NEONMAP0(vext_v), 3394 NEONMAP0(vextq_v), 3395 NEONMAP0(vfma_v), 3396 NEONMAP0(vfmaq_v), 3397 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), 3398 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), 3399 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), 3400 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), 3401 NEONMAP0(vmovl_v), 3402 NEONMAP0(vmovn_v), 3403 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType), 3404 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType), 3405 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType), 3406 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts), 3407 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts), 3408 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType), 3409 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType), 3410 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType), 3411 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts), 3412 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts), 3413 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0), 3414 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0), 3415 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType), 3416 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType), 3417 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType), 3418 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts), 3419 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType), 3420 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType), 3421 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType), 3422 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType), 3423 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType), 3424 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts), 3425 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts), 3426 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts), 3427 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts), 3428 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts), 3429 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts), 3430 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0), 3431 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0), 3432 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), 3433 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), 3434 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType), 3435 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), 3436 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), 3437 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType), 3438 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType), 3439 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), 3440 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), 3441 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), 3442 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), 3443 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), 3444 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), 3445 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0), 3446 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0), 3447 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType), 3448 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType), 3449 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType), 3450 NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0), 3451 NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0), 3452 NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0), 3453 NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0), 3454 NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0), 3455 NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0), 3456 NEONMAP0(vshl_n_v), 3457 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts), 3458 NEONMAP0(vshll_n_v), 3459 NEONMAP0(vshlq_n_v), 3460 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts), 3461 NEONMAP0(vshr_n_v), 3462 NEONMAP0(vshrn_n_v), 3463 NEONMAP0(vshrq_n_v), 3464 NEONMAP0(vsubhn_v), 3465 NEONMAP0(vtst_v), 3466 NEONMAP0(vtstq_v), 3467 }; 3468 3469 static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = { 3470 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType), 3471 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType), 3472 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType), 3473 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType), 3474 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType), 3475 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType), 3476 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType), 3477 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), 3478 NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType), 3479 NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3480 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), 3481 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType), 3482 NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType), 3483 NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType), 3484 NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3485 NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3486 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType), 3487 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType), 3488 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType), 3489 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType), 3490 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType), 3491 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType), 3492 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType), 3493 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType), 3494 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType), 3495 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType), 3496 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType), 3497 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType), 3498 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), 3499 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), 3500 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), 3501 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), 3502 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType), 3503 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), 3504 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType), 3505 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), 3506 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType), 3507 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), 3508 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType), 3509 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), 3510 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType), 3511 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), 3512 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType), 3513 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), 3514 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), 3515 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), 3516 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), 3517 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), 3518 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0), 3519 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3520 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3521 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3522 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3523 NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType), 3524 NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType), 3525 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3526 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3527 NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType), 3528 NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType), 3529 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3530 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3531 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3532 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 3533 NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType), 3534 NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType), 3535 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 3536 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType), 3537 NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType), 3538 NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType), 3539 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0), 3540 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType), 3541 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType), 3542 NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3543 NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3544 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3545 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3546 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3547 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3548 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3549 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3550 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType), 3551 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 3552 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors), 3553 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType), 3554 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors), 3555 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType), 3556 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors), 3557 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors), 3558 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType), 3559 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType), 3560 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors), 3561 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors), 3562 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType), 3563 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType), 3564 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors), 3565 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType), 3566 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors), 3567 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0), 3568 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType), 3569 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType), 3570 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors), 3571 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors), 3572 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors), 3573 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors), 3574 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType), 3575 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors), 3576 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors), 3577 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors), 3578 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType), 3579 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors), 3580 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType), 3581 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors), 3582 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType), 3583 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors), 3584 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors), 3585 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType), 3586 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType), 3587 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors), 3588 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors), 3589 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType), 3590 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType), 3591 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType), 3592 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType), 3593 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors), 3594 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors), 3595 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors), 3596 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors), 3597 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType), 3598 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors), 3599 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors), 3600 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 3601 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 3602 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 3603 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 3604 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType), 3605 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType), 3606 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 3607 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 3608 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 3609 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 3610 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType), 3611 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType), 3612 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType), 3613 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType), 3614 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors), 3615 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors), 3616 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType), 3617 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType), 3618 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType), 3619 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors), 3620 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors), 3621 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors), 3622 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors), 3623 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType), 3624 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors), 3625 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors), 3626 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors), 3627 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors), 3628 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType), 3629 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType), 3630 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors), 3631 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors), 3632 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType), 3633 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType), 3634 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType), 3635 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType), 3636 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType), 3637 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType), 3638 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType), 3639 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType), 3640 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType), 3641 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType), 3642 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType), 3643 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType), 3644 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0), 3645 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0), 3646 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0), 3647 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0), 3648 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType), 3649 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType), 3650 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType), 3651 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType), 3652 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors), 3653 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType), 3654 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors), 3655 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType), 3656 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType), 3657 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType), 3658 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors), 3659 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType), 3660 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors), 3661 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType), 3662 }; 3663 3664 #undef NEONMAP0 3665 #undef NEONMAP1 3666 #undef NEONMAP2 3667 3668 static bool NEONSIMDIntrinsicsProvenSorted = false; 3669 3670 static bool AArch64SIMDIntrinsicsProvenSorted = false; 3671 static bool AArch64SISDIntrinsicsProvenSorted = false; 3672 3673 3674 static const NeonIntrinsicInfo * 3675 findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap, 3676 unsigned BuiltinID, bool &MapProvenSorted) { 3677 3678 #ifndef NDEBUG 3679 if (!MapProvenSorted) { 3680 assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap))); 3681 MapProvenSorted = true; 3682 } 3683 #endif 3684 3685 const NeonIntrinsicInfo *Builtin = 3686 std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID); 3687 3688 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID) 3689 return Builtin; 3690 3691 return nullptr; 3692 } 3693 3694 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID, 3695 unsigned Modifier, 3696 llvm::Type *ArgType, 3697 const CallExpr *E) { 3698 int VectorSize = 0; 3699 if (Modifier & Use64BitVectors) 3700 VectorSize = 64; 3701 else if (Modifier & Use128BitVectors) 3702 VectorSize = 128; 3703 3704 // Return type. 3705 SmallVector<llvm::Type *, 3> Tys; 3706 if (Modifier & AddRetType) { 3707 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext())); 3708 if (Modifier & VectorizeRetType) 3709 Ty = llvm::VectorType::get( 3710 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1); 3711 3712 Tys.push_back(Ty); 3713 } 3714 3715 // Arguments. 3716 if (Modifier & VectorizeArgTypes) { 3717 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1; 3718 ArgType = llvm::VectorType::get(ArgType, Elts); 3719 } 3720 3721 if (Modifier & (Add1ArgType | Add2ArgTypes)) 3722 Tys.push_back(ArgType); 3723 3724 if (Modifier & Add2ArgTypes) 3725 Tys.push_back(ArgType); 3726 3727 if (Modifier & InventFloatType) 3728 Tys.push_back(FloatTy); 3729 3730 return CGM.getIntrinsic(IntrinsicID, Tys); 3731 } 3732 3733 static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, 3734 const NeonIntrinsicInfo &SISDInfo, 3735 SmallVectorImpl<Value *> &Ops, 3736 const CallExpr *E) { 3737 unsigned BuiltinID = SISDInfo.BuiltinID; 3738 unsigned int Int = SISDInfo.LLVMIntrinsic; 3739 unsigned Modifier = SISDInfo.TypeModifier; 3740 const char *s = SISDInfo.NameHint; 3741 3742 switch (BuiltinID) { 3743 case NEON::BI__builtin_neon_vcled_s64: 3744 case NEON::BI__builtin_neon_vcled_u64: 3745 case NEON::BI__builtin_neon_vcles_f32: 3746 case NEON::BI__builtin_neon_vcled_f64: 3747 case NEON::BI__builtin_neon_vcltd_s64: 3748 case NEON::BI__builtin_neon_vcltd_u64: 3749 case NEON::BI__builtin_neon_vclts_f32: 3750 case NEON::BI__builtin_neon_vcltd_f64: 3751 case NEON::BI__builtin_neon_vcales_f32: 3752 case NEON::BI__builtin_neon_vcaled_f64: 3753 case NEON::BI__builtin_neon_vcalts_f32: 3754 case NEON::BI__builtin_neon_vcaltd_f64: 3755 // Only one direction of comparisons actually exist, cmle is actually a cmge 3756 // with swapped operands. The table gives us the right intrinsic but we 3757 // still need to do the swap. 3758 std::swap(Ops[0], Ops[1]); 3759 break; 3760 } 3761 3762 assert(Int && "Generic code assumes a valid intrinsic"); 3763 3764 // Determine the type(s) of this overloaded AArch64 intrinsic. 3765 const Expr *Arg = E->getArg(0); 3766 llvm::Type *ArgTy = CGF.ConvertType(Arg->getType()); 3767 Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E); 3768 3769 int j = 0; 3770 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0); 3771 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); 3772 ai != ae; ++ai, ++j) { 3773 llvm::Type *ArgTy = ai->getType(); 3774 if (Ops[j]->getType()->getPrimitiveSizeInBits() == 3775 ArgTy->getPrimitiveSizeInBits()) 3776 continue; 3777 3778 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy()); 3779 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate 3780 // it before inserting. 3781 Ops[j] = 3782 CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType()); 3783 Ops[j] = 3784 CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0); 3785 } 3786 3787 Value *Result = CGF.EmitNeonCall(F, Ops, s); 3788 llvm::Type *ResultType = CGF.ConvertType(E->getType()); 3789 if (ResultType->getPrimitiveSizeInBits() < 3790 Result->getType()->getPrimitiveSizeInBits()) 3791 return CGF.Builder.CreateExtractElement(Result, C0); 3792 3793 return CGF.Builder.CreateBitCast(Result, ResultType, s); 3794 } 3795 3796 Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( 3797 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, 3798 const char *NameHint, unsigned Modifier, const CallExpr *E, 3799 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1) { 3800 // Get the last argument, which specifies the vector type. 3801 llvm::APSInt NeonTypeConst; 3802 const Expr *Arg = E->getArg(E->getNumArgs() - 1); 3803 if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext())) 3804 return nullptr; 3805 3806 // Determine the type of this overloaded NEON intrinsic. 3807 NeonTypeFlags Type(NeonTypeConst.getZExtValue()); 3808 bool Usgn = Type.isUnsigned(); 3809 bool Quad = Type.isQuad(); 3810 3811 llvm::VectorType *VTy = GetNeonType(this, Type); 3812 llvm::Type *Ty = VTy; 3813 if (!Ty) 3814 return nullptr; 3815 3816 auto getAlignmentValue32 = [&](Address addr) -> Value* { 3817 return Builder.getInt32(addr.getAlignment().getQuantity()); 3818 }; 3819 3820 unsigned Int = LLVMIntrinsic; 3821 if ((Modifier & UnsignedAlts) && !Usgn) 3822 Int = AltLLVMIntrinsic; 3823 3824 switch (BuiltinID) { 3825 default: break; 3826 case NEON::BI__builtin_neon_vabs_v: 3827 case NEON::BI__builtin_neon_vabsq_v: 3828 if (VTy->getElementType()->isFloatingPointTy()) 3829 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs"); 3830 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs"); 3831 case NEON::BI__builtin_neon_vaddhn_v: { 3832 llvm::VectorType *SrcTy = 3833 llvm::VectorType::getExtendedElementVectorType(VTy); 3834 3835 // %sum = add <4 x i32> %lhs, %rhs 3836 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 3837 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy); 3838 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn"); 3839 3840 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16> 3841 Constant *ShiftAmt = 3842 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2); 3843 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn"); 3844 3845 // %res = trunc <4 x i32> %high to <4 x i16> 3846 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn"); 3847 } 3848 case NEON::BI__builtin_neon_vcale_v: 3849 case NEON::BI__builtin_neon_vcaleq_v: 3850 case NEON::BI__builtin_neon_vcalt_v: 3851 case NEON::BI__builtin_neon_vcaltq_v: 3852 std::swap(Ops[0], Ops[1]); 3853 LLVM_FALLTHROUGH; 3854 case NEON::BI__builtin_neon_vcage_v: 3855 case NEON::BI__builtin_neon_vcageq_v: 3856 case NEON::BI__builtin_neon_vcagt_v: 3857 case NEON::BI__builtin_neon_vcagtq_v: { 3858 llvm::Type *Ty; 3859 switch (VTy->getScalarSizeInBits()) { 3860 default: llvm_unreachable("unexpected type"); 3861 case 32: 3862 Ty = FloatTy; 3863 break; 3864 case 64: 3865 Ty = DoubleTy; 3866 break; 3867 case 16: 3868 Ty = HalfTy; 3869 break; 3870 } 3871 llvm::Type *VecFlt = llvm::VectorType::get(Ty, VTy->getNumElements()); 3872 llvm::Type *Tys[] = { VTy, VecFlt }; 3873 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 3874 return EmitNeonCall(F, Ops, NameHint); 3875 } 3876 case NEON::BI__builtin_neon_vclz_v: 3877 case NEON::BI__builtin_neon_vclzq_v: 3878 // We generate target-independent intrinsic, which needs a second argument 3879 // for whether or not clz of zero is undefined; on ARM it isn't. 3880 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef())); 3881 break; 3882 case NEON::BI__builtin_neon_vcvt_f32_v: 3883 case NEON::BI__builtin_neon_vcvtq_f32_v: 3884 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3885 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad)); 3886 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 3887 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 3888 case NEON::BI__builtin_neon_vcvt_f16_v: 3889 case NEON::BI__builtin_neon_vcvtq_f16_v: 3890 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3891 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad)); 3892 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 3893 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 3894 case NEON::BI__builtin_neon_vcvt_n_f16_v: 3895 case NEON::BI__builtin_neon_vcvt_n_f32_v: 3896 case NEON::BI__builtin_neon_vcvt_n_f64_v: 3897 case NEON::BI__builtin_neon_vcvtq_n_f16_v: 3898 case NEON::BI__builtin_neon_vcvtq_n_f32_v: 3899 case NEON::BI__builtin_neon_vcvtq_n_f64_v: { 3900 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty }; 3901 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic; 3902 Function *F = CGM.getIntrinsic(Int, Tys); 3903 return EmitNeonCall(F, Ops, "vcvt_n"); 3904 } 3905 case NEON::BI__builtin_neon_vcvt_n_s16_v: 3906 case NEON::BI__builtin_neon_vcvt_n_s32_v: 3907 case NEON::BI__builtin_neon_vcvt_n_u16_v: 3908 case NEON::BI__builtin_neon_vcvt_n_u32_v: 3909 case NEON::BI__builtin_neon_vcvt_n_s64_v: 3910 case NEON::BI__builtin_neon_vcvt_n_u64_v: 3911 case NEON::BI__builtin_neon_vcvtq_n_s16_v: 3912 case NEON::BI__builtin_neon_vcvtq_n_s32_v: 3913 case NEON::BI__builtin_neon_vcvtq_n_u16_v: 3914 case NEON::BI__builtin_neon_vcvtq_n_u32_v: 3915 case NEON::BI__builtin_neon_vcvtq_n_s64_v: 3916 case NEON::BI__builtin_neon_vcvtq_n_u64_v: { 3917 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 3918 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 3919 return EmitNeonCall(F, Ops, "vcvt_n"); 3920 } 3921 case NEON::BI__builtin_neon_vcvt_s32_v: 3922 case NEON::BI__builtin_neon_vcvt_u32_v: 3923 case NEON::BI__builtin_neon_vcvt_s64_v: 3924 case NEON::BI__builtin_neon_vcvt_u64_v: 3925 case NEON::BI__builtin_neon_vcvt_s16_v: 3926 case NEON::BI__builtin_neon_vcvt_u16_v: 3927 case NEON::BI__builtin_neon_vcvtq_s32_v: 3928 case NEON::BI__builtin_neon_vcvtq_u32_v: 3929 case NEON::BI__builtin_neon_vcvtq_s64_v: 3930 case NEON::BI__builtin_neon_vcvtq_u64_v: 3931 case NEON::BI__builtin_neon_vcvtq_s16_v: 3932 case NEON::BI__builtin_neon_vcvtq_u16_v: { 3933 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type)); 3934 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt") 3935 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt"); 3936 } 3937 case NEON::BI__builtin_neon_vcvta_s16_v: 3938 case NEON::BI__builtin_neon_vcvta_s32_v: 3939 case NEON::BI__builtin_neon_vcvta_s64_v: 3940 case NEON::BI__builtin_neon_vcvta_u32_v: 3941 case NEON::BI__builtin_neon_vcvta_u64_v: 3942 case NEON::BI__builtin_neon_vcvtaq_s16_v: 3943 case NEON::BI__builtin_neon_vcvtaq_s32_v: 3944 case NEON::BI__builtin_neon_vcvtaq_s64_v: 3945 case NEON::BI__builtin_neon_vcvtaq_u16_v: 3946 case NEON::BI__builtin_neon_vcvtaq_u32_v: 3947 case NEON::BI__builtin_neon_vcvtaq_u64_v: 3948 case NEON::BI__builtin_neon_vcvtn_s16_v: 3949 case NEON::BI__builtin_neon_vcvtn_s32_v: 3950 case NEON::BI__builtin_neon_vcvtn_s64_v: 3951 case NEON::BI__builtin_neon_vcvtn_u16_v: 3952 case NEON::BI__builtin_neon_vcvtn_u32_v: 3953 case NEON::BI__builtin_neon_vcvtn_u64_v: 3954 case NEON::BI__builtin_neon_vcvtnq_s16_v: 3955 case NEON::BI__builtin_neon_vcvtnq_s32_v: 3956 case NEON::BI__builtin_neon_vcvtnq_s64_v: 3957 case NEON::BI__builtin_neon_vcvtnq_u16_v: 3958 case NEON::BI__builtin_neon_vcvtnq_u32_v: 3959 case NEON::BI__builtin_neon_vcvtnq_u64_v: 3960 case NEON::BI__builtin_neon_vcvtp_s16_v: 3961 case NEON::BI__builtin_neon_vcvtp_s32_v: 3962 case NEON::BI__builtin_neon_vcvtp_s64_v: 3963 case NEON::BI__builtin_neon_vcvtp_u16_v: 3964 case NEON::BI__builtin_neon_vcvtp_u32_v: 3965 case NEON::BI__builtin_neon_vcvtp_u64_v: 3966 case NEON::BI__builtin_neon_vcvtpq_s16_v: 3967 case NEON::BI__builtin_neon_vcvtpq_s32_v: 3968 case NEON::BI__builtin_neon_vcvtpq_s64_v: 3969 case NEON::BI__builtin_neon_vcvtpq_u16_v: 3970 case NEON::BI__builtin_neon_vcvtpq_u32_v: 3971 case NEON::BI__builtin_neon_vcvtpq_u64_v: 3972 case NEON::BI__builtin_neon_vcvtm_s16_v: 3973 case NEON::BI__builtin_neon_vcvtm_s32_v: 3974 case NEON::BI__builtin_neon_vcvtm_s64_v: 3975 case NEON::BI__builtin_neon_vcvtm_u16_v: 3976 case NEON::BI__builtin_neon_vcvtm_u32_v: 3977 case NEON::BI__builtin_neon_vcvtm_u64_v: 3978 case NEON::BI__builtin_neon_vcvtmq_s16_v: 3979 case NEON::BI__builtin_neon_vcvtmq_s32_v: 3980 case NEON::BI__builtin_neon_vcvtmq_s64_v: 3981 case NEON::BI__builtin_neon_vcvtmq_u16_v: 3982 case NEON::BI__builtin_neon_vcvtmq_u32_v: 3983 case NEON::BI__builtin_neon_vcvtmq_u64_v: { 3984 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 3985 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint); 3986 } 3987 case NEON::BI__builtin_neon_vext_v: 3988 case NEON::BI__builtin_neon_vextq_v: { 3989 int CV = cast<ConstantInt>(Ops[2])->getSExtValue(); 3990 SmallVector<uint32_t, 16> Indices; 3991 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 3992 Indices.push_back(i+CV); 3993 3994 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3995 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3996 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext"); 3997 } 3998 case NEON::BI__builtin_neon_vfma_v: 3999 case NEON::BI__builtin_neon_vfmaq_v: { 4000 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 4001 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4002 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4003 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 4004 4005 // NEON intrinsic puts accumulator first, unlike the LLVM fma. 4006 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); 4007 } 4008 case NEON::BI__builtin_neon_vld1_v: 4009 case NEON::BI__builtin_neon_vld1q_v: { 4010 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 4011 Ops.push_back(getAlignmentValue32(PtrOp0)); 4012 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1"); 4013 } 4014 case NEON::BI__builtin_neon_vld2_v: 4015 case NEON::BI__builtin_neon_vld2q_v: 4016 case NEON::BI__builtin_neon_vld3_v: 4017 case NEON::BI__builtin_neon_vld3q_v: 4018 case NEON::BI__builtin_neon_vld4_v: 4019 case NEON::BI__builtin_neon_vld4q_v: { 4020 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 4021 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 4022 Value *Align = getAlignmentValue32(PtrOp1); 4023 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint); 4024 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 4025 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4026 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 4027 } 4028 case NEON::BI__builtin_neon_vld1_dup_v: 4029 case NEON::BI__builtin_neon_vld1q_dup_v: { 4030 Value *V = UndefValue::get(Ty); 4031 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 4032 PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty); 4033 LoadInst *Ld = Builder.CreateLoad(PtrOp0); 4034 llvm::Constant *CI = ConstantInt::get(SizeTy, 0); 4035 Ops[0] = Builder.CreateInsertElement(V, Ld, CI); 4036 return EmitNeonSplat(Ops[0], CI); 4037 } 4038 case NEON::BI__builtin_neon_vld2_lane_v: 4039 case NEON::BI__builtin_neon_vld2q_lane_v: 4040 case NEON::BI__builtin_neon_vld3_lane_v: 4041 case NEON::BI__builtin_neon_vld3q_lane_v: 4042 case NEON::BI__builtin_neon_vld4_lane_v: 4043 case NEON::BI__builtin_neon_vld4q_lane_v: { 4044 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 4045 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 4046 for (unsigned I = 2; I < Ops.size() - 1; ++I) 4047 Ops[I] = Builder.CreateBitCast(Ops[I], Ty); 4048 Ops.push_back(getAlignmentValue32(PtrOp1)); 4049 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint); 4050 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 4051 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4052 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 4053 } 4054 case NEON::BI__builtin_neon_vmovl_v: { 4055 llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy); 4056 Ops[0] = Builder.CreateBitCast(Ops[0], DTy); 4057 if (Usgn) 4058 return Builder.CreateZExt(Ops[0], Ty, "vmovl"); 4059 return Builder.CreateSExt(Ops[0], Ty, "vmovl"); 4060 } 4061 case NEON::BI__builtin_neon_vmovn_v: { 4062 llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy); 4063 Ops[0] = Builder.CreateBitCast(Ops[0], QTy); 4064 return Builder.CreateTrunc(Ops[0], Ty, "vmovn"); 4065 } 4066 case NEON::BI__builtin_neon_vmull_v: 4067 // FIXME: the integer vmull operations could be emitted in terms of pure 4068 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of 4069 // hoisting the exts outside loops. Until global ISel comes along that can 4070 // see through such movement this leads to bad CodeGen. So we need an 4071 // intrinsic for now. 4072 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls; 4073 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int; 4074 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull"); 4075 case NEON::BI__builtin_neon_vpadal_v: 4076 case NEON::BI__builtin_neon_vpadalq_v: { 4077 // The source operand type has twice as many elements of half the size. 4078 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); 4079 llvm::Type *EltTy = 4080 llvm::IntegerType::get(getLLVMContext(), EltBits / 2); 4081 llvm::Type *NarrowTy = 4082 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2); 4083 llvm::Type *Tys[2] = { Ty, NarrowTy }; 4084 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint); 4085 } 4086 case NEON::BI__builtin_neon_vpaddl_v: 4087 case NEON::BI__builtin_neon_vpaddlq_v: { 4088 // The source operand type has twice as many elements of half the size. 4089 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); 4090 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2); 4091 llvm::Type *NarrowTy = 4092 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2); 4093 llvm::Type *Tys[2] = { Ty, NarrowTy }; 4094 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl"); 4095 } 4096 case NEON::BI__builtin_neon_vqdmlal_v: 4097 case NEON::BI__builtin_neon_vqdmlsl_v: { 4098 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end()); 4099 Ops[1] = 4100 EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal"); 4101 Ops.resize(2); 4102 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint); 4103 } 4104 case NEON::BI__builtin_neon_vqshl_n_v: 4105 case NEON::BI__builtin_neon_vqshlq_n_v: 4106 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n", 4107 1, false); 4108 case NEON::BI__builtin_neon_vqshlu_n_v: 4109 case NEON::BI__builtin_neon_vqshluq_n_v: 4110 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n", 4111 1, false); 4112 case NEON::BI__builtin_neon_vrecpe_v: 4113 case NEON::BI__builtin_neon_vrecpeq_v: 4114 case NEON::BI__builtin_neon_vrsqrte_v: 4115 case NEON::BI__builtin_neon_vrsqrteq_v: 4116 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic; 4117 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint); 4118 4119 case NEON::BI__builtin_neon_vrshr_n_v: 4120 case NEON::BI__builtin_neon_vrshrq_n_v: 4121 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n", 4122 1, true); 4123 case NEON::BI__builtin_neon_vshl_n_v: 4124 case NEON::BI__builtin_neon_vshlq_n_v: 4125 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false); 4126 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1], 4127 "vshl_n"); 4128 case NEON::BI__builtin_neon_vshll_n_v: { 4129 llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy); 4130 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 4131 if (Usgn) 4132 Ops[0] = Builder.CreateZExt(Ops[0], VTy); 4133 else 4134 Ops[0] = Builder.CreateSExt(Ops[0], VTy); 4135 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false); 4136 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n"); 4137 } 4138 case NEON::BI__builtin_neon_vshrn_n_v: { 4139 llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy); 4140 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 4141 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false); 4142 if (Usgn) 4143 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]); 4144 else 4145 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]); 4146 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n"); 4147 } 4148 case NEON::BI__builtin_neon_vshr_n_v: 4149 case NEON::BI__builtin_neon_vshrq_n_v: 4150 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n"); 4151 case NEON::BI__builtin_neon_vst1_v: 4152 case NEON::BI__builtin_neon_vst1q_v: 4153 case NEON::BI__builtin_neon_vst2_v: 4154 case NEON::BI__builtin_neon_vst2q_v: 4155 case NEON::BI__builtin_neon_vst3_v: 4156 case NEON::BI__builtin_neon_vst3q_v: 4157 case NEON::BI__builtin_neon_vst4_v: 4158 case NEON::BI__builtin_neon_vst4q_v: 4159 case NEON::BI__builtin_neon_vst2_lane_v: 4160 case NEON::BI__builtin_neon_vst2q_lane_v: 4161 case NEON::BI__builtin_neon_vst3_lane_v: 4162 case NEON::BI__builtin_neon_vst3q_lane_v: 4163 case NEON::BI__builtin_neon_vst4_lane_v: 4164 case NEON::BI__builtin_neon_vst4q_lane_v: { 4165 llvm::Type *Tys[] = {Int8PtrTy, Ty}; 4166 Ops.push_back(getAlignmentValue32(PtrOp0)); 4167 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, ""); 4168 } 4169 case NEON::BI__builtin_neon_vsubhn_v: { 4170 llvm::VectorType *SrcTy = 4171 llvm::VectorType::getExtendedElementVectorType(VTy); 4172 4173 // %sum = add <4 x i32> %lhs, %rhs 4174 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 4175 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy); 4176 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn"); 4177 4178 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16> 4179 Constant *ShiftAmt = 4180 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2); 4181 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn"); 4182 4183 // %res = trunc <4 x i32> %high to <4 x i16> 4184 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn"); 4185 } 4186 case NEON::BI__builtin_neon_vtrn_v: 4187 case NEON::BI__builtin_neon_vtrnq_v: { 4188 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 4189 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4190 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 4191 Value *SV = nullptr; 4192 4193 for (unsigned vi = 0; vi != 2; ++vi) { 4194 SmallVector<uint32_t, 16> Indices; 4195 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 4196 Indices.push_back(i+vi); 4197 Indices.push_back(i+e+vi); 4198 } 4199 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 4200 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn"); 4201 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 4202 } 4203 return SV; 4204 } 4205 case NEON::BI__builtin_neon_vtst_v: 4206 case NEON::BI__builtin_neon_vtstq_v: { 4207 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4208 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4209 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); 4210 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], 4211 ConstantAggregateZero::get(Ty)); 4212 return Builder.CreateSExt(Ops[0], Ty, "vtst"); 4213 } 4214 case NEON::BI__builtin_neon_vuzp_v: 4215 case NEON::BI__builtin_neon_vuzpq_v: { 4216 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 4217 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4218 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 4219 Value *SV = nullptr; 4220 4221 for (unsigned vi = 0; vi != 2; ++vi) { 4222 SmallVector<uint32_t, 16> Indices; 4223 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 4224 Indices.push_back(2*i+vi); 4225 4226 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 4227 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp"); 4228 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 4229 } 4230 return SV; 4231 } 4232 case NEON::BI__builtin_neon_vzip_v: 4233 case NEON::BI__builtin_neon_vzipq_v: { 4234 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 4235 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4236 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 4237 Value *SV = nullptr; 4238 4239 for (unsigned vi = 0; vi != 2; ++vi) { 4240 SmallVector<uint32_t, 16> Indices; 4241 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 4242 Indices.push_back((i + vi*e) >> 1); 4243 Indices.push_back(((i + vi*e) >> 1)+e); 4244 } 4245 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 4246 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip"); 4247 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 4248 } 4249 return SV; 4250 } 4251 } 4252 4253 assert(Int && "Expected valid intrinsic number"); 4254 4255 // Determine the type(s) of this overloaded AArch64 intrinsic. 4256 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E); 4257 4258 Value *Result = EmitNeonCall(F, Ops, NameHint); 4259 llvm::Type *ResultType = ConvertType(E->getType()); 4260 // AArch64 intrinsic one-element vector type cast to 4261 // scalar type expected by the builtin 4262 return Builder.CreateBitCast(Result, ResultType, NameHint); 4263 } 4264 4265 Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr( 4266 Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp, 4267 const CmpInst::Predicate Ip, const Twine &Name) { 4268 llvm::Type *OTy = Op->getType(); 4269 4270 // FIXME: this is utterly horrific. We should not be looking at previous 4271 // codegen context to find out what needs doing. Unfortunately TableGen 4272 // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32 4273 // (etc). 4274 if (BitCastInst *BI = dyn_cast<BitCastInst>(Op)) 4275 OTy = BI->getOperand(0)->getType(); 4276 4277 Op = Builder.CreateBitCast(Op, OTy); 4278 if (OTy->getScalarType()->isFloatingPointTy()) { 4279 Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy)); 4280 } else { 4281 Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy)); 4282 } 4283 return Builder.CreateSExt(Op, Ty, Name); 4284 } 4285 4286 static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops, 4287 Value *ExtOp, Value *IndexOp, 4288 llvm::Type *ResTy, unsigned IntID, 4289 const char *Name) { 4290 SmallVector<Value *, 2> TblOps; 4291 if (ExtOp) 4292 TblOps.push_back(ExtOp); 4293 4294 // Build a vector containing sequential number like (0, 1, 2, ..., 15) 4295 SmallVector<uint32_t, 16> Indices; 4296 llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType()); 4297 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) { 4298 Indices.push_back(2*i); 4299 Indices.push_back(2*i+1); 4300 } 4301 4302 int PairPos = 0, End = Ops.size() - 1; 4303 while (PairPos < End) { 4304 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], 4305 Ops[PairPos+1], Indices, 4306 Name)); 4307 PairPos += 2; 4308 } 4309 4310 // If there's an odd number of 64-bit lookup table, fill the high 64-bit 4311 // of the 128-bit lookup table with zero. 4312 if (PairPos == End) { 4313 Value *ZeroTbl = ConstantAggregateZero::get(TblTy); 4314 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], 4315 ZeroTbl, Indices, Name)); 4316 } 4317 4318 Function *TblF; 4319 TblOps.push_back(IndexOp); 4320 TblF = CGF.CGM.getIntrinsic(IntID, ResTy); 4321 4322 return CGF.EmitNeonCall(TblF, TblOps, Name); 4323 } 4324 4325 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) { 4326 unsigned Value; 4327 switch (BuiltinID) { 4328 default: 4329 return nullptr; 4330 case ARM::BI__builtin_arm_nop: 4331 Value = 0; 4332 break; 4333 case ARM::BI__builtin_arm_yield: 4334 case ARM::BI__yield: 4335 Value = 1; 4336 break; 4337 case ARM::BI__builtin_arm_wfe: 4338 case ARM::BI__wfe: 4339 Value = 2; 4340 break; 4341 case ARM::BI__builtin_arm_wfi: 4342 case ARM::BI__wfi: 4343 Value = 3; 4344 break; 4345 case ARM::BI__builtin_arm_sev: 4346 case ARM::BI__sev: 4347 Value = 4; 4348 break; 4349 case ARM::BI__builtin_arm_sevl: 4350 case ARM::BI__sevl: 4351 Value = 5; 4352 break; 4353 } 4354 4355 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint), 4356 llvm::ConstantInt::get(Int32Ty, Value)); 4357 } 4358 4359 // Generates the IR for the read/write special register builtin, 4360 // ValueType is the type of the value that is to be written or read, 4361 // RegisterType is the type of the register being written to or read from. 4362 static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, 4363 const CallExpr *E, 4364 llvm::Type *RegisterType, 4365 llvm::Type *ValueType, 4366 bool IsRead, 4367 StringRef SysReg = "") { 4368 // write and register intrinsics only support 32 and 64 bit operations. 4369 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64)) 4370 && "Unsupported size for register."); 4371 4372 CodeGen::CGBuilderTy &Builder = CGF.Builder; 4373 CodeGen::CodeGenModule &CGM = CGF.CGM; 4374 LLVMContext &Context = CGM.getLLVMContext(); 4375 4376 if (SysReg.empty()) { 4377 const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts(); 4378 SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString(); 4379 } 4380 4381 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) }; 4382 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); 4383 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); 4384 4385 llvm::Type *Types[] = { RegisterType }; 4386 4387 bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32); 4388 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64)) 4389 && "Can't fit 64-bit value in 32-bit register"); 4390 4391 if (IsRead) { 4392 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types); 4393 llvm::Value *Call = Builder.CreateCall(F, Metadata); 4394 4395 if (MixedTypes) 4396 // Read into 64 bit register and then truncate result to 32 bit. 4397 return Builder.CreateTrunc(Call, ValueType); 4398 4399 if (ValueType->isPointerTy()) 4400 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*). 4401 return Builder.CreateIntToPtr(Call, ValueType); 4402 4403 return Call; 4404 } 4405 4406 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types); 4407 llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1)); 4408 if (MixedTypes) { 4409 // Extend 32 bit write value to 64 bit to pass to write. 4410 ArgValue = Builder.CreateZExt(ArgValue, RegisterType); 4411 return Builder.CreateCall(F, { Metadata, ArgValue }); 4412 } 4413 4414 if (ValueType->isPointerTy()) { 4415 // Have VoidPtrTy ArgValue but want to return an i32/i64. 4416 ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType); 4417 return Builder.CreateCall(F, { Metadata, ArgValue }); 4418 } 4419 4420 return Builder.CreateCall(F, { Metadata, ArgValue }); 4421 } 4422 4423 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra 4424 /// argument that specifies the vector type. 4425 static bool HasExtraNeonArgument(unsigned BuiltinID) { 4426 switch (BuiltinID) { 4427 default: break; 4428 case NEON::BI__builtin_neon_vget_lane_i8: 4429 case NEON::BI__builtin_neon_vget_lane_i16: 4430 case NEON::BI__builtin_neon_vget_lane_i32: 4431 case NEON::BI__builtin_neon_vget_lane_i64: 4432 case NEON::BI__builtin_neon_vget_lane_f32: 4433 case NEON::BI__builtin_neon_vgetq_lane_i8: 4434 case NEON::BI__builtin_neon_vgetq_lane_i16: 4435 case NEON::BI__builtin_neon_vgetq_lane_i32: 4436 case NEON::BI__builtin_neon_vgetq_lane_i64: 4437 case NEON::BI__builtin_neon_vgetq_lane_f32: 4438 case NEON::BI__builtin_neon_vset_lane_i8: 4439 case NEON::BI__builtin_neon_vset_lane_i16: 4440 case NEON::BI__builtin_neon_vset_lane_i32: 4441 case NEON::BI__builtin_neon_vset_lane_i64: 4442 case NEON::BI__builtin_neon_vset_lane_f32: 4443 case NEON::BI__builtin_neon_vsetq_lane_i8: 4444 case NEON::BI__builtin_neon_vsetq_lane_i16: 4445 case NEON::BI__builtin_neon_vsetq_lane_i32: 4446 case NEON::BI__builtin_neon_vsetq_lane_i64: 4447 case NEON::BI__builtin_neon_vsetq_lane_f32: 4448 case NEON::BI__builtin_neon_vsha1h_u32: 4449 case NEON::BI__builtin_neon_vsha1cq_u32: 4450 case NEON::BI__builtin_neon_vsha1pq_u32: 4451 case NEON::BI__builtin_neon_vsha1mq_u32: 4452 case ARM::BI_MoveToCoprocessor: 4453 case ARM::BI_MoveToCoprocessor2: 4454 return false; 4455 } 4456 return true; 4457 } 4458 4459 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, 4460 const CallExpr *E) { 4461 if (auto Hint = GetValueForARMHint(BuiltinID)) 4462 return Hint; 4463 4464 if (BuiltinID == ARM::BI__emit) { 4465 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb; 4466 llvm::FunctionType *FTy = 4467 llvm::FunctionType::get(VoidTy, /*Variadic=*/false); 4468 4469 APSInt Value; 4470 if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext())) 4471 llvm_unreachable("Sema will ensure that the parameter is constant"); 4472 4473 uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue(); 4474 4475 llvm::InlineAsm *Emit = 4476 IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "", 4477 /*SideEffects=*/true) 4478 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "", 4479 /*SideEffects=*/true); 4480 4481 return Builder.CreateCall(Emit); 4482 } 4483 4484 if (BuiltinID == ARM::BI__builtin_arm_dbg) { 4485 Value *Option = EmitScalarExpr(E->getArg(0)); 4486 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option); 4487 } 4488 4489 if (BuiltinID == ARM::BI__builtin_arm_prefetch) { 4490 Value *Address = EmitScalarExpr(E->getArg(0)); 4491 Value *RW = EmitScalarExpr(E->getArg(1)); 4492 Value *IsData = EmitScalarExpr(E->getArg(2)); 4493 4494 // Locality is not supported on ARM target 4495 Value *Locality = llvm::ConstantInt::get(Int32Ty, 3); 4496 4497 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 4498 return Builder.CreateCall(F, {Address, RW, Locality, IsData}); 4499 } 4500 4501 if (BuiltinID == ARM::BI__builtin_arm_rbit) { 4502 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 4503 return Builder.CreateCall( 4504 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); 4505 } 4506 4507 if (BuiltinID == ARM::BI__clear_cache) { 4508 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); 4509 const FunctionDecl *FD = E->getDirectCallee(); 4510 Value *Ops[2]; 4511 for (unsigned i = 0; i < 2; i++) 4512 Ops[i] = EmitScalarExpr(E->getArg(i)); 4513 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); 4514 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); 4515 StringRef Name = FD->getName(); 4516 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); 4517 } 4518 4519 if (BuiltinID == ARM::BI__builtin_arm_mcrr || 4520 BuiltinID == ARM::BI__builtin_arm_mcrr2) { 4521 Function *F; 4522 4523 switch (BuiltinID) { 4524 default: llvm_unreachable("unexpected builtin"); 4525 case ARM::BI__builtin_arm_mcrr: 4526 F = CGM.getIntrinsic(Intrinsic::arm_mcrr); 4527 break; 4528 case ARM::BI__builtin_arm_mcrr2: 4529 F = CGM.getIntrinsic(Intrinsic::arm_mcrr2); 4530 break; 4531 } 4532 4533 // MCRR{2} instruction has 5 operands but 4534 // the intrinsic has 4 because Rt and Rt2 4535 // are represented as a single unsigned 64 4536 // bit integer in the intrinsic definition 4537 // but internally it's represented as 2 32 4538 // bit integers. 4539 4540 Value *Coproc = EmitScalarExpr(E->getArg(0)); 4541 Value *Opc1 = EmitScalarExpr(E->getArg(1)); 4542 Value *RtAndRt2 = EmitScalarExpr(E->getArg(2)); 4543 Value *CRm = EmitScalarExpr(E->getArg(3)); 4544 4545 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32); 4546 Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty); 4547 Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1); 4548 Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty); 4549 4550 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm}); 4551 } 4552 4553 if (BuiltinID == ARM::BI__builtin_arm_mrrc || 4554 BuiltinID == ARM::BI__builtin_arm_mrrc2) { 4555 Function *F; 4556 4557 switch (BuiltinID) { 4558 default: llvm_unreachable("unexpected builtin"); 4559 case ARM::BI__builtin_arm_mrrc: 4560 F = CGM.getIntrinsic(Intrinsic::arm_mrrc); 4561 break; 4562 case ARM::BI__builtin_arm_mrrc2: 4563 F = CGM.getIntrinsic(Intrinsic::arm_mrrc2); 4564 break; 4565 } 4566 4567 Value *Coproc = EmitScalarExpr(E->getArg(0)); 4568 Value *Opc1 = EmitScalarExpr(E->getArg(1)); 4569 Value *CRm = EmitScalarExpr(E->getArg(2)); 4570 Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm}); 4571 4572 // Returns an unsigned 64 bit integer, represented 4573 // as two 32 bit integers. 4574 4575 Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1); 4576 Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0); 4577 Rt = Builder.CreateZExt(Rt, Int64Ty); 4578 Rt1 = Builder.CreateZExt(Rt1, Int64Ty); 4579 4580 Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32); 4581 RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true); 4582 RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1); 4583 4584 return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType())); 4585 } 4586 4587 if (BuiltinID == ARM::BI__builtin_arm_ldrexd || 4588 ((BuiltinID == ARM::BI__builtin_arm_ldrex || 4589 BuiltinID == ARM::BI__builtin_arm_ldaex) && 4590 getContext().getTypeSize(E->getType()) == 64) || 4591 BuiltinID == ARM::BI__ldrexd) { 4592 Function *F; 4593 4594 switch (BuiltinID) { 4595 default: llvm_unreachable("unexpected builtin"); 4596 case ARM::BI__builtin_arm_ldaex: 4597 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd); 4598 break; 4599 case ARM::BI__builtin_arm_ldrexd: 4600 case ARM::BI__builtin_arm_ldrex: 4601 case ARM::BI__ldrexd: 4602 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd); 4603 break; 4604 } 4605 4606 Value *LdPtr = EmitScalarExpr(E->getArg(0)); 4607 Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy), 4608 "ldrexd"); 4609 4610 Value *Val0 = Builder.CreateExtractValue(Val, 1); 4611 Value *Val1 = Builder.CreateExtractValue(Val, 0); 4612 Val0 = Builder.CreateZExt(Val0, Int64Ty); 4613 Val1 = Builder.CreateZExt(Val1, Int64Ty); 4614 4615 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32); 4616 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */); 4617 Val = Builder.CreateOr(Val, Val1); 4618 return Builder.CreateBitCast(Val, ConvertType(E->getType())); 4619 } 4620 4621 if (BuiltinID == ARM::BI__builtin_arm_ldrex || 4622 BuiltinID == ARM::BI__builtin_arm_ldaex) { 4623 Value *LoadAddr = EmitScalarExpr(E->getArg(0)); 4624 4625 QualType Ty = E->getType(); 4626 llvm::Type *RealResTy = ConvertType(Ty); 4627 llvm::Type *PtrTy = llvm::IntegerType::get( 4628 getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo(); 4629 LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy); 4630 4631 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex 4632 ? Intrinsic::arm_ldaex 4633 : Intrinsic::arm_ldrex, 4634 PtrTy); 4635 Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex"); 4636 4637 if (RealResTy->isPointerTy()) 4638 return Builder.CreateIntToPtr(Val, RealResTy); 4639 else { 4640 llvm::Type *IntResTy = llvm::IntegerType::get( 4641 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy)); 4642 Val = Builder.CreateTruncOrBitCast(Val, IntResTy); 4643 return Builder.CreateBitCast(Val, RealResTy); 4644 } 4645 } 4646 4647 if (BuiltinID == ARM::BI__builtin_arm_strexd || 4648 ((BuiltinID == ARM::BI__builtin_arm_stlex || 4649 BuiltinID == ARM::BI__builtin_arm_strex) && 4650 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) { 4651 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex 4652 ? Intrinsic::arm_stlexd 4653 : Intrinsic::arm_strexd); 4654 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty); 4655 4656 Address Tmp = CreateMemTemp(E->getArg(0)->getType()); 4657 Value *Val = EmitScalarExpr(E->getArg(0)); 4658 Builder.CreateStore(Val, Tmp); 4659 4660 Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy)); 4661 Val = Builder.CreateLoad(LdPtr); 4662 4663 Value *Arg0 = Builder.CreateExtractValue(Val, 0); 4664 Value *Arg1 = Builder.CreateExtractValue(Val, 1); 4665 Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy); 4666 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd"); 4667 } 4668 4669 if (BuiltinID == ARM::BI__builtin_arm_strex || 4670 BuiltinID == ARM::BI__builtin_arm_stlex) { 4671 Value *StoreVal = EmitScalarExpr(E->getArg(0)); 4672 Value *StoreAddr = EmitScalarExpr(E->getArg(1)); 4673 4674 QualType Ty = E->getArg(0)->getType(); 4675 llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(), 4676 getContext().getTypeSize(Ty)); 4677 StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo()); 4678 4679 if (StoreVal->getType()->isPointerTy()) 4680 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty); 4681 else { 4682 llvm::Type *IntTy = llvm::IntegerType::get( 4683 getLLVMContext(), 4684 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType())); 4685 StoreVal = Builder.CreateBitCast(StoreVal, IntTy); 4686 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty); 4687 } 4688 4689 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex 4690 ? Intrinsic::arm_stlex 4691 : Intrinsic::arm_strex, 4692 StoreAddr->getType()); 4693 return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex"); 4694 } 4695 4696 switch (BuiltinID) { 4697 case ARM::BI__iso_volatile_load8: 4698 case ARM::BI__iso_volatile_load16: 4699 case ARM::BI__iso_volatile_load32: 4700 case ARM::BI__iso_volatile_load64: { 4701 Value *Ptr = EmitScalarExpr(E->getArg(0)); 4702 QualType ElTy = E->getArg(0)->getType()->getPointeeType(); 4703 CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy); 4704 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), 4705 LoadSize.getQuantity() * 8); 4706 Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); 4707 llvm::LoadInst *Load = 4708 Builder.CreateAlignedLoad(Ptr, LoadSize); 4709 Load->setVolatile(true); 4710 return Load; 4711 } 4712 case ARM::BI__iso_volatile_store8: 4713 case ARM::BI__iso_volatile_store16: 4714 case ARM::BI__iso_volatile_store32: 4715 case ARM::BI__iso_volatile_store64: { 4716 Value *Ptr = EmitScalarExpr(E->getArg(0)); 4717 Value *Value = EmitScalarExpr(E->getArg(1)); 4718 QualType ElTy = E->getArg(0)->getType()->getPointeeType(); 4719 CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy); 4720 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), 4721 StoreSize.getQuantity() * 8); 4722 Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); 4723 llvm::StoreInst *Store = 4724 Builder.CreateAlignedStore(Value, Ptr, 4725 StoreSize); 4726 Store->setVolatile(true); 4727 return Store; 4728 } 4729 } 4730 4731 if (BuiltinID == ARM::BI__builtin_arm_clrex) { 4732 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex); 4733 return Builder.CreateCall(F); 4734 } 4735 4736 // CRC32 4737 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; 4738 switch (BuiltinID) { 4739 case ARM::BI__builtin_arm_crc32b: 4740 CRCIntrinsicID = Intrinsic::arm_crc32b; break; 4741 case ARM::BI__builtin_arm_crc32cb: 4742 CRCIntrinsicID = Intrinsic::arm_crc32cb; break; 4743 case ARM::BI__builtin_arm_crc32h: 4744 CRCIntrinsicID = Intrinsic::arm_crc32h; break; 4745 case ARM::BI__builtin_arm_crc32ch: 4746 CRCIntrinsicID = Intrinsic::arm_crc32ch; break; 4747 case ARM::BI__builtin_arm_crc32w: 4748 case ARM::BI__builtin_arm_crc32d: 4749 CRCIntrinsicID = Intrinsic::arm_crc32w; break; 4750 case ARM::BI__builtin_arm_crc32cw: 4751 case ARM::BI__builtin_arm_crc32cd: 4752 CRCIntrinsicID = Intrinsic::arm_crc32cw; break; 4753 } 4754 4755 if (CRCIntrinsicID != Intrinsic::not_intrinsic) { 4756 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 4757 Value *Arg1 = EmitScalarExpr(E->getArg(1)); 4758 4759 // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w 4760 // intrinsics, hence we need different codegen for these cases. 4761 if (BuiltinID == ARM::BI__builtin_arm_crc32d || 4762 BuiltinID == ARM::BI__builtin_arm_crc32cd) { 4763 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32); 4764 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty); 4765 Value *Arg1b = Builder.CreateLShr(Arg1, C1); 4766 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty); 4767 4768 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 4769 Value *Res = Builder.CreateCall(F, {Arg0, Arg1a}); 4770 return Builder.CreateCall(F, {Res, Arg1b}); 4771 } else { 4772 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty); 4773 4774 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 4775 return Builder.CreateCall(F, {Arg0, Arg1}); 4776 } 4777 } 4778 4779 if (BuiltinID == ARM::BI__builtin_arm_rsr || 4780 BuiltinID == ARM::BI__builtin_arm_rsr64 || 4781 BuiltinID == ARM::BI__builtin_arm_rsrp || 4782 BuiltinID == ARM::BI__builtin_arm_wsr || 4783 BuiltinID == ARM::BI__builtin_arm_wsr64 || 4784 BuiltinID == ARM::BI__builtin_arm_wsrp) { 4785 4786 bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr || 4787 BuiltinID == ARM::BI__builtin_arm_rsr64 || 4788 BuiltinID == ARM::BI__builtin_arm_rsrp; 4789 4790 bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp || 4791 BuiltinID == ARM::BI__builtin_arm_wsrp; 4792 4793 bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 || 4794 BuiltinID == ARM::BI__builtin_arm_wsr64; 4795 4796 llvm::Type *ValueType; 4797 llvm::Type *RegisterType; 4798 if (IsPointerBuiltin) { 4799 ValueType = VoidPtrTy; 4800 RegisterType = Int32Ty; 4801 } else if (Is64Bit) { 4802 ValueType = RegisterType = Int64Ty; 4803 } else { 4804 ValueType = RegisterType = Int32Ty; 4805 } 4806 4807 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead); 4808 } 4809 4810 // Find out if any arguments are required to be integer constant 4811 // expressions. 4812 unsigned ICEArguments = 0; 4813 ASTContext::GetBuiltinTypeError Error; 4814 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 4815 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 4816 4817 auto getAlignmentValue32 = [&](Address addr) -> Value* { 4818 return Builder.getInt32(addr.getAlignment().getQuantity()); 4819 }; 4820 4821 Address PtrOp0 = Address::invalid(); 4822 Address PtrOp1 = Address::invalid(); 4823 SmallVector<Value*, 4> Ops; 4824 bool HasExtraArg = HasExtraNeonArgument(BuiltinID); 4825 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0); 4826 for (unsigned i = 0, e = NumArgs; i != e; i++) { 4827 if (i == 0) { 4828 switch (BuiltinID) { 4829 case NEON::BI__builtin_neon_vld1_v: 4830 case NEON::BI__builtin_neon_vld1q_v: 4831 case NEON::BI__builtin_neon_vld1q_lane_v: 4832 case NEON::BI__builtin_neon_vld1_lane_v: 4833 case NEON::BI__builtin_neon_vld1_dup_v: 4834 case NEON::BI__builtin_neon_vld1q_dup_v: 4835 case NEON::BI__builtin_neon_vst1_v: 4836 case NEON::BI__builtin_neon_vst1q_v: 4837 case NEON::BI__builtin_neon_vst1q_lane_v: 4838 case NEON::BI__builtin_neon_vst1_lane_v: 4839 case NEON::BI__builtin_neon_vst2_v: 4840 case NEON::BI__builtin_neon_vst2q_v: 4841 case NEON::BI__builtin_neon_vst2_lane_v: 4842 case NEON::BI__builtin_neon_vst2q_lane_v: 4843 case NEON::BI__builtin_neon_vst3_v: 4844 case NEON::BI__builtin_neon_vst3q_v: 4845 case NEON::BI__builtin_neon_vst3_lane_v: 4846 case NEON::BI__builtin_neon_vst3q_lane_v: 4847 case NEON::BI__builtin_neon_vst4_v: 4848 case NEON::BI__builtin_neon_vst4q_v: 4849 case NEON::BI__builtin_neon_vst4_lane_v: 4850 case NEON::BI__builtin_neon_vst4q_lane_v: 4851 // Get the alignment for the argument in addition to the value; 4852 // we'll use it later. 4853 PtrOp0 = EmitPointerWithAlignment(E->getArg(0)); 4854 Ops.push_back(PtrOp0.getPointer()); 4855 continue; 4856 } 4857 } 4858 if (i == 1) { 4859 switch (BuiltinID) { 4860 case NEON::BI__builtin_neon_vld2_v: 4861 case NEON::BI__builtin_neon_vld2q_v: 4862 case NEON::BI__builtin_neon_vld3_v: 4863 case NEON::BI__builtin_neon_vld3q_v: 4864 case NEON::BI__builtin_neon_vld4_v: 4865 case NEON::BI__builtin_neon_vld4q_v: 4866 case NEON::BI__builtin_neon_vld2_lane_v: 4867 case NEON::BI__builtin_neon_vld2q_lane_v: 4868 case NEON::BI__builtin_neon_vld3_lane_v: 4869 case NEON::BI__builtin_neon_vld3q_lane_v: 4870 case NEON::BI__builtin_neon_vld4_lane_v: 4871 case NEON::BI__builtin_neon_vld4q_lane_v: 4872 case NEON::BI__builtin_neon_vld2_dup_v: 4873 case NEON::BI__builtin_neon_vld3_dup_v: 4874 case NEON::BI__builtin_neon_vld4_dup_v: 4875 // Get the alignment for the argument in addition to the value; 4876 // we'll use it later. 4877 PtrOp1 = EmitPointerWithAlignment(E->getArg(1)); 4878 Ops.push_back(PtrOp1.getPointer()); 4879 continue; 4880 } 4881 } 4882 4883 if ((ICEArguments & (1 << i)) == 0) { 4884 Ops.push_back(EmitScalarExpr(E->getArg(i))); 4885 } else { 4886 // If this is required to be a constant, constant fold it so that we know 4887 // that the generated intrinsic gets a ConstantInt. 4888 llvm::APSInt Result; 4889 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); 4890 assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst; 4891 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); 4892 } 4893 } 4894 4895 switch (BuiltinID) { 4896 default: break; 4897 4898 case NEON::BI__builtin_neon_vget_lane_i8: 4899 case NEON::BI__builtin_neon_vget_lane_i16: 4900 case NEON::BI__builtin_neon_vget_lane_i32: 4901 case NEON::BI__builtin_neon_vget_lane_i64: 4902 case NEON::BI__builtin_neon_vget_lane_f32: 4903 case NEON::BI__builtin_neon_vgetq_lane_i8: 4904 case NEON::BI__builtin_neon_vgetq_lane_i16: 4905 case NEON::BI__builtin_neon_vgetq_lane_i32: 4906 case NEON::BI__builtin_neon_vgetq_lane_i64: 4907 case NEON::BI__builtin_neon_vgetq_lane_f32: 4908 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane"); 4909 4910 case NEON::BI__builtin_neon_vset_lane_i8: 4911 case NEON::BI__builtin_neon_vset_lane_i16: 4912 case NEON::BI__builtin_neon_vset_lane_i32: 4913 case NEON::BI__builtin_neon_vset_lane_i64: 4914 case NEON::BI__builtin_neon_vset_lane_f32: 4915 case NEON::BI__builtin_neon_vsetq_lane_i8: 4916 case NEON::BI__builtin_neon_vsetq_lane_i16: 4917 case NEON::BI__builtin_neon_vsetq_lane_i32: 4918 case NEON::BI__builtin_neon_vsetq_lane_i64: 4919 case NEON::BI__builtin_neon_vsetq_lane_f32: 4920 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 4921 4922 case NEON::BI__builtin_neon_vsha1h_u32: 4923 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops, 4924 "vsha1h"); 4925 case NEON::BI__builtin_neon_vsha1cq_u32: 4926 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops, 4927 "vsha1h"); 4928 case NEON::BI__builtin_neon_vsha1pq_u32: 4929 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops, 4930 "vsha1h"); 4931 case NEON::BI__builtin_neon_vsha1mq_u32: 4932 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops, 4933 "vsha1h"); 4934 4935 // The ARM _MoveToCoprocessor builtins put the input register value as 4936 // the first argument, but the LLVM intrinsic expects it as the third one. 4937 case ARM::BI_MoveToCoprocessor: 4938 case ARM::BI_MoveToCoprocessor2: { 4939 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ? 4940 Intrinsic::arm_mcr : Intrinsic::arm_mcr2); 4941 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0], 4942 Ops[3], Ops[4], Ops[5]}); 4943 } 4944 case ARM::BI_BitScanForward: 4945 case ARM::BI_BitScanForward64: 4946 return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E); 4947 case ARM::BI_BitScanReverse: 4948 case ARM::BI_BitScanReverse64: 4949 return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E); 4950 4951 case ARM::BI_InterlockedAnd64: 4952 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E); 4953 case ARM::BI_InterlockedExchange64: 4954 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E); 4955 case ARM::BI_InterlockedExchangeAdd64: 4956 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E); 4957 case ARM::BI_InterlockedExchangeSub64: 4958 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E); 4959 case ARM::BI_InterlockedOr64: 4960 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E); 4961 case ARM::BI_InterlockedXor64: 4962 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E); 4963 case ARM::BI_InterlockedDecrement64: 4964 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E); 4965 case ARM::BI_InterlockedIncrement64: 4966 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E); 4967 } 4968 4969 // Get the last argument, which specifies the vector type. 4970 assert(HasExtraArg); 4971 llvm::APSInt Result; 4972 const Expr *Arg = E->getArg(E->getNumArgs()-1); 4973 if (!Arg->isIntegerConstantExpr(Result, getContext())) 4974 return nullptr; 4975 4976 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f || 4977 BuiltinID == ARM::BI__builtin_arm_vcvtr_d) { 4978 // Determine the overloaded type of this builtin. 4979 llvm::Type *Ty; 4980 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f) 4981 Ty = FloatTy; 4982 else 4983 Ty = DoubleTy; 4984 4985 // Determine whether this is an unsigned conversion or not. 4986 bool usgn = Result.getZExtValue() == 1; 4987 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr; 4988 4989 // Call the appropriate intrinsic. 4990 Function *F = CGM.getIntrinsic(Int, Ty); 4991 return Builder.CreateCall(F, Ops, "vcvtr"); 4992 } 4993 4994 // Determine the type of this overloaded NEON intrinsic. 4995 NeonTypeFlags Type(Result.getZExtValue()); 4996 bool usgn = Type.isUnsigned(); 4997 bool rightShift = false; 4998 4999 llvm::VectorType *VTy = GetNeonType(this, Type); 5000 llvm::Type *Ty = VTy; 5001 if (!Ty) 5002 return nullptr; 5003 5004 // Many NEON builtins have identical semantics and uses in ARM and 5005 // AArch64. Emit these in a single function. 5006 auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap); 5007 const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap( 5008 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted); 5009 if (Builtin) 5010 return EmitCommonNeonBuiltinExpr( 5011 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic, 5012 Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1); 5013 5014 unsigned Int; 5015 switch (BuiltinID) { 5016 default: return nullptr; 5017 case NEON::BI__builtin_neon_vld1q_lane_v: 5018 // Handle 64-bit integer elements as a special case. Use shuffles of 5019 // one-element vectors to avoid poor code for i64 in the backend. 5020 if (VTy->getElementType()->isIntegerTy(64)) { 5021 // Extract the other lane. 5022 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5023 uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue(); 5024 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane)); 5025 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); 5026 // Load the value as a one-element vector. 5027 Ty = llvm::VectorType::get(VTy->getElementType(), 1); 5028 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 5029 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys); 5030 Value *Align = getAlignmentValue32(PtrOp0); 5031 Value *Ld = Builder.CreateCall(F, {Ops[0], Align}); 5032 // Combine them. 5033 uint32_t Indices[] = {1 - Lane, Lane}; 5034 SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices); 5035 return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane"); 5036 } 5037 // fall through 5038 case NEON::BI__builtin_neon_vld1_lane_v: { 5039 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5040 PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType()); 5041 Value *Ld = Builder.CreateLoad(PtrOp0); 5042 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane"); 5043 } 5044 case NEON::BI__builtin_neon_vld2_dup_v: 5045 case NEON::BI__builtin_neon_vld3_dup_v: 5046 case NEON::BI__builtin_neon_vld4_dup_v: { 5047 // Handle 64-bit elements as a special-case. There is no "dup" needed. 5048 if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) { 5049 switch (BuiltinID) { 5050 case NEON::BI__builtin_neon_vld2_dup_v: 5051 Int = Intrinsic::arm_neon_vld2; 5052 break; 5053 case NEON::BI__builtin_neon_vld3_dup_v: 5054 Int = Intrinsic::arm_neon_vld3; 5055 break; 5056 case NEON::BI__builtin_neon_vld4_dup_v: 5057 Int = Intrinsic::arm_neon_vld4; 5058 break; 5059 default: llvm_unreachable("unknown vld_dup intrinsic?"); 5060 } 5061 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 5062 Function *F = CGM.getIntrinsic(Int, Tys); 5063 llvm::Value *Align = getAlignmentValue32(PtrOp1); 5064 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup"); 5065 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 5066 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5067 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 5068 } 5069 switch (BuiltinID) { 5070 case NEON::BI__builtin_neon_vld2_dup_v: 5071 Int = Intrinsic::arm_neon_vld2lane; 5072 break; 5073 case NEON::BI__builtin_neon_vld3_dup_v: 5074 Int = Intrinsic::arm_neon_vld3lane; 5075 break; 5076 case NEON::BI__builtin_neon_vld4_dup_v: 5077 Int = Intrinsic::arm_neon_vld4lane; 5078 break; 5079 default: llvm_unreachable("unknown vld_dup intrinsic?"); 5080 } 5081 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 5082 Function *F = CGM.getIntrinsic(Int, Tys); 5083 llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType()); 5084 5085 SmallVector<Value*, 6> Args; 5086 Args.push_back(Ops[1]); 5087 Args.append(STy->getNumElements(), UndefValue::get(Ty)); 5088 5089 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); 5090 Args.push_back(CI); 5091 Args.push_back(getAlignmentValue32(PtrOp1)); 5092 5093 Ops[1] = Builder.CreateCall(F, Args, "vld_dup"); 5094 // splat lane 0 to all elts in each vector of the result. 5095 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 5096 Value *Val = Builder.CreateExtractValue(Ops[1], i); 5097 Value *Elt = Builder.CreateBitCast(Val, Ty); 5098 Elt = EmitNeonSplat(Elt, CI); 5099 Elt = Builder.CreateBitCast(Elt, Val->getType()); 5100 Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i); 5101 } 5102 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 5103 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5104 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 5105 } 5106 case NEON::BI__builtin_neon_vqrshrn_n_v: 5107 Int = 5108 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns; 5109 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n", 5110 1, true); 5111 case NEON::BI__builtin_neon_vqrshrun_n_v: 5112 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty), 5113 Ops, "vqrshrun_n", 1, true); 5114 case NEON::BI__builtin_neon_vqshrn_n_v: 5115 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns; 5116 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n", 5117 1, true); 5118 case NEON::BI__builtin_neon_vqshrun_n_v: 5119 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty), 5120 Ops, "vqshrun_n", 1, true); 5121 case NEON::BI__builtin_neon_vrecpe_v: 5122 case NEON::BI__builtin_neon_vrecpeq_v: 5123 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty), 5124 Ops, "vrecpe"); 5125 case NEON::BI__builtin_neon_vrshrn_n_v: 5126 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty), 5127 Ops, "vrshrn_n", 1, true); 5128 case NEON::BI__builtin_neon_vrsra_n_v: 5129 case NEON::BI__builtin_neon_vrsraq_n_v: 5130 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5131 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5132 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true); 5133 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts; 5134 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]}); 5135 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n"); 5136 case NEON::BI__builtin_neon_vsri_n_v: 5137 case NEON::BI__builtin_neon_vsriq_n_v: 5138 rightShift = true; 5139 LLVM_FALLTHROUGH; 5140 case NEON::BI__builtin_neon_vsli_n_v: 5141 case NEON::BI__builtin_neon_vsliq_n_v: 5142 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift); 5143 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty), 5144 Ops, "vsli_n"); 5145 case NEON::BI__builtin_neon_vsra_n_v: 5146 case NEON::BI__builtin_neon_vsraq_n_v: 5147 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5148 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n"); 5149 return Builder.CreateAdd(Ops[0], Ops[1]); 5150 case NEON::BI__builtin_neon_vst1q_lane_v: 5151 // Handle 64-bit integer elements as a special case. Use a shuffle to get 5152 // a one-element vector and avoid poor code for i64 in the backend. 5153 if (VTy->getElementType()->isIntegerTy(64)) { 5154 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5155 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2])); 5156 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); 5157 Ops[2] = getAlignmentValue32(PtrOp0); 5158 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()}; 5159 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, 5160 Tys), Ops); 5161 } 5162 // fall through 5163 case NEON::BI__builtin_neon_vst1_lane_v: { 5164 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5165 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); 5166 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 5167 auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty)); 5168 return St; 5169 } 5170 case NEON::BI__builtin_neon_vtbl1_v: 5171 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1), 5172 Ops, "vtbl1"); 5173 case NEON::BI__builtin_neon_vtbl2_v: 5174 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2), 5175 Ops, "vtbl2"); 5176 case NEON::BI__builtin_neon_vtbl3_v: 5177 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3), 5178 Ops, "vtbl3"); 5179 case NEON::BI__builtin_neon_vtbl4_v: 5180 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4), 5181 Ops, "vtbl4"); 5182 case NEON::BI__builtin_neon_vtbx1_v: 5183 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1), 5184 Ops, "vtbx1"); 5185 case NEON::BI__builtin_neon_vtbx2_v: 5186 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2), 5187 Ops, "vtbx2"); 5188 case NEON::BI__builtin_neon_vtbx3_v: 5189 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3), 5190 Ops, "vtbx3"); 5191 case NEON::BI__builtin_neon_vtbx4_v: 5192 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4), 5193 Ops, "vtbx4"); 5194 } 5195 } 5196 5197 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, 5198 const CallExpr *E, 5199 SmallVectorImpl<Value *> &Ops) { 5200 unsigned int Int = 0; 5201 const char *s = nullptr; 5202 5203 switch (BuiltinID) { 5204 default: 5205 return nullptr; 5206 case NEON::BI__builtin_neon_vtbl1_v: 5207 case NEON::BI__builtin_neon_vqtbl1_v: 5208 case NEON::BI__builtin_neon_vqtbl1q_v: 5209 case NEON::BI__builtin_neon_vtbl2_v: 5210 case NEON::BI__builtin_neon_vqtbl2_v: 5211 case NEON::BI__builtin_neon_vqtbl2q_v: 5212 case NEON::BI__builtin_neon_vtbl3_v: 5213 case NEON::BI__builtin_neon_vqtbl3_v: 5214 case NEON::BI__builtin_neon_vqtbl3q_v: 5215 case NEON::BI__builtin_neon_vtbl4_v: 5216 case NEON::BI__builtin_neon_vqtbl4_v: 5217 case NEON::BI__builtin_neon_vqtbl4q_v: 5218 break; 5219 case NEON::BI__builtin_neon_vtbx1_v: 5220 case NEON::BI__builtin_neon_vqtbx1_v: 5221 case NEON::BI__builtin_neon_vqtbx1q_v: 5222 case NEON::BI__builtin_neon_vtbx2_v: 5223 case NEON::BI__builtin_neon_vqtbx2_v: 5224 case NEON::BI__builtin_neon_vqtbx2q_v: 5225 case NEON::BI__builtin_neon_vtbx3_v: 5226 case NEON::BI__builtin_neon_vqtbx3_v: 5227 case NEON::BI__builtin_neon_vqtbx3q_v: 5228 case NEON::BI__builtin_neon_vtbx4_v: 5229 case NEON::BI__builtin_neon_vqtbx4_v: 5230 case NEON::BI__builtin_neon_vqtbx4q_v: 5231 break; 5232 } 5233 5234 assert(E->getNumArgs() >= 3); 5235 5236 // Get the last argument, which specifies the vector type. 5237 llvm::APSInt Result; 5238 const Expr *Arg = E->getArg(E->getNumArgs() - 1); 5239 if (!Arg->isIntegerConstantExpr(Result, CGF.getContext())) 5240 return nullptr; 5241 5242 // Determine the type of this overloaded NEON intrinsic. 5243 NeonTypeFlags Type(Result.getZExtValue()); 5244 llvm::VectorType *Ty = GetNeonType(&CGF, Type); 5245 if (!Ty) 5246 return nullptr; 5247 5248 CodeGen::CGBuilderTy &Builder = CGF.Builder; 5249 5250 // AArch64 scalar builtins are not overloaded, they do not have an extra 5251 // argument that specifies the vector type, need to handle each case. 5252 switch (BuiltinID) { 5253 case NEON::BI__builtin_neon_vtbl1_v: { 5254 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr, 5255 Ops[1], Ty, Intrinsic::aarch64_neon_tbl1, 5256 "vtbl1"); 5257 } 5258 case NEON::BI__builtin_neon_vtbl2_v: { 5259 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr, 5260 Ops[2], Ty, Intrinsic::aarch64_neon_tbl1, 5261 "vtbl1"); 5262 } 5263 case NEON::BI__builtin_neon_vtbl3_v: { 5264 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr, 5265 Ops[3], Ty, Intrinsic::aarch64_neon_tbl2, 5266 "vtbl2"); 5267 } 5268 case NEON::BI__builtin_neon_vtbl4_v: { 5269 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr, 5270 Ops[4], Ty, Intrinsic::aarch64_neon_tbl2, 5271 "vtbl2"); 5272 } 5273 case NEON::BI__builtin_neon_vtbx1_v: { 5274 Value *TblRes = 5275 packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2], 5276 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1"); 5277 5278 llvm::Constant *EightV = ConstantInt::get(Ty, 8); 5279 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV); 5280 CmpRes = Builder.CreateSExt(CmpRes, Ty); 5281 5282 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]); 5283 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes); 5284 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx"); 5285 } 5286 case NEON::BI__builtin_neon_vtbx2_v: { 5287 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0], 5288 Ops[3], Ty, Intrinsic::aarch64_neon_tbx1, 5289 "vtbx1"); 5290 } 5291 case NEON::BI__builtin_neon_vtbx3_v: { 5292 Value *TblRes = 5293 packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4], 5294 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2"); 5295 5296 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24); 5297 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4], 5298 TwentyFourV); 5299 CmpRes = Builder.CreateSExt(CmpRes, Ty); 5300 5301 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]); 5302 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes); 5303 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx"); 5304 } 5305 case NEON::BI__builtin_neon_vtbx4_v: { 5306 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0], 5307 Ops[5], Ty, Intrinsic::aarch64_neon_tbx2, 5308 "vtbx2"); 5309 } 5310 case NEON::BI__builtin_neon_vqtbl1_v: 5311 case NEON::BI__builtin_neon_vqtbl1q_v: 5312 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break; 5313 case NEON::BI__builtin_neon_vqtbl2_v: 5314 case NEON::BI__builtin_neon_vqtbl2q_v: { 5315 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break; 5316 case NEON::BI__builtin_neon_vqtbl3_v: 5317 case NEON::BI__builtin_neon_vqtbl3q_v: 5318 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break; 5319 case NEON::BI__builtin_neon_vqtbl4_v: 5320 case NEON::BI__builtin_neon_vqtbl4q_v: 5321 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break; 5322 case NEON::BI__builtin_neon_vqtbx1_v: 5323 case NEON::BI__builtin_neon_vqtbx1q_v: 5324 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break; 5325 case NEON::BI__builtin_neon_vqtbx2_v: 5326 case NEON::BI__builtin_neon_vqtbx2q_v: 5327 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break; 5328 case NEON::BI__builtin_neon_vqtbx3_v: 5329 case NEON::BI__builtin_neon_vqtbx3q_v: 5330 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break; 5331 case NEON::BI__builtin_neon_vqtbx4_v: 5332 case NEON::BI__builtin_neon_vqtbx4q_v: 5333 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break; 5334 } 5335 } 5336 5337 if (!Int) 5338 return nullptr; 5339 5340 Function *F = CGF.CGM.getIntrinsic(Int, Ty); 5341 return CGF.EmitNeonCall(F, Ops, s); 5342 } 5343 5344 Value *CodeGenFunction::vectorWrapScalar16(Value *Op) { 5345 llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4); 5346 Op = Builder.CreateBitCast(Op, Int16Ty); 5347 Value *V = UndefValue::get(VTy); 5348 llvm::Constant *CI = ConstantInt::get(SizeTy, 0); 5349 Op = Builder.CreateInsertElement(V, Op, CI); 5350 return Op; 5351 } 5352 5353 Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, 5354 const CallExpr *E) { 5355 unsigned HintID = static_cast<unsigned>(-1); 5356 switch (BuiltinID) { 5357 default: break; 5358 case AArch64::BI__builtin_arm_nop: 5359 HintID = 0; 5360 break; 5361 case AArch64::BI__builtin_arm_yield: 5362 HintID = 1; 5363 break; 5364 case AArch64::BI__builtin_arm_wfe: 5365 HintID = 2; 5366 break; 5367 case AArch64::BI__builtin_arm_wfi: 5368 HintID = 3; 5369 break; 5370 case AArch64::BI__builtin_arm_sev: 5371 HintID = 4; 5372 break; 5373 case AArch64::BI__builtin_arm_sevl: 5374 HintID = 5; 5375 break; 5376 } 5377 5378 if (HintID != static_cast<unsigned>(-1)) { 5379 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint); 5380 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID)); 5381 } 5382 5383 if (BuiltinID == AArch64::BI__builtin_arm_prefetch) { 5384 Value *Address = EmitScalarExpr(E->getArg(0)); 5385 Value *RW = EmitScalarExpr(E->getArg(1)); 5386 Value *CacheLevel = EmitScalarExpr(E->getArg(2)); 5387 Value *RetentionPolicy = EmitScalarExpr(E->getArg(3)); 5388 Value *IsData = EmitScalarExpr(E->getArg(4)); 5389 5390 Value *Locality = nullptr; 5391 if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) { 5392 // Temporal fetch, needs to convert cache level to locality. 5393 Locality = llvm::ConstantInt::get(Int32Ty, 5394 -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3); 5395 } else { 5396 // Streaming fetch. 5397 Locality = llvm::ConstantInt::get(Int32Ty, 0); 5398 } 5399 5400 // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify 5401 // PLDL3STRM or PLDL2STRM. 5402 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 5403 return Builder.CreateCall(F, {Address, RW, Locality, IsData}); 5404 } 5405 5406 if (BuiltinID == AArch64::BI__builtin_arm_rbit) { 5407 assert((getContext().getTypeSize(E->getType()) == 32) && 5408 "rbit of unusual size!"); 5409 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 5410 return Builder.CreateCall( 5411 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); 5412 } 5413 if (BuiltinID == AArch64::BI__builtin_arm_rbit64) { 5414 assert((getContext().getTypeSize(E->getType()) == 64) && 5415 "rbit of unusual size!"); 5416 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 5417 return Builder.CreateCall( 5418 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); 5419 } 5420 5421 if (BuiltinID == AArch64::BI__clear_cache) { 5422 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); 5423 const FunctionDecl *FD = E->getDirectCallee(); 5424 Value *Ops[2]; 5425 for (unsigned i = 0; i < 2; i++) 5426 Ops[i] = EmitScalarExpr(E->getArg(i)); 5427 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); 5428 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); 5429 StringRef Name = FD->getName(); 5430 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); 5431 } 5432 5433 if ((BuiltinID == AArch64::BI__builtin_arm_ldrex || 5434 BuiltinID == AArch64::BI__builtin_arm_ldaex) && 5435 getContext().getTypeSize(E->getType()) == 128) { 5436 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex 5437 ? Intrinsic::aarch64_ldaxp 5438 : Intrinsic::aarch64_ldxp); 5439 5440 Value *LdPtr = EmitScalarExpr(E->getArg(0)); 5441 Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy), 5442 "ldxp"); 5443 5444 Value *Val0 = Builder.CreateExtractValue(Val, 1); 5445 Value *Val1 = Builder.CreateExtractValue(Val, 0); 5446 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); 5447 Val0 = Builder.CreateZExt(Val0, Int128Ty); 5448 Val1 = Builder.CreateZExt(Val1, Int128Ty); 5449 5450 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64); 5451 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */); 5452 Val = Builder.CreateOr(Val, Val1); 5453 return Builder.CreateBitCast(Val, ConvertType(E->getType())); 5454 } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex || 5455 BuiltinID == AArch64::BI__builtin_arm_ldaex) { 5456 Value *LoadAddr = EmitScalarExpr(E->getArg(0)); 5457 5458 QualType Ty = E->getType(); 5459 llvm::Type *RealResTy = ConvertType(Ty); 5460 llvm::Type *PtrTy = llvm::IntegerType::get( 5461 getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo(); 5462 LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy); 5463 5464 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex 5465 ? Intrinsic::aarch64_ldaxr 5466 : Intrinsic::aarch64_ldxr, 5467 PtrTy); 5468 Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr"); 5469 5470 if (RealResTy->isPointerTy()) 5471 return Builder.CreateIntToPtr(Val, RealResTy); 5472 5473 llvm::Type *IntResTy = llvm::IntegerType::get( 5474 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy)); 5475 Val = Builder.CreateTruncOrBitCast(Val, IntResTy); 5476 return Builder.CreateBitCast(Val, RealResTy); 5477 } 5478 5479 if ((BuiltinID == AArch64::BI__builtin_arm_strex || 5480 BuiltinID == AArch64::BI__builtin_arm_stlex) && 5481 getContext().getTypeSize(E->getArg(0)->getType()) == 128) { 5482 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex 5483 ? Intrinsic::aarch64_stlxp 5484 : Intrinsic::aarch64_stxp); 5485 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty); 5486 5487 Address Tmp = CreateMemTemp(E->getArg(0)->getType()); 5488 EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true); 5489 5490 Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy)); 5491 llvm::Value *Val = Builder.CreateLoad(Tmp); 5492 5493 Value *Arg0 = Builder.CreateExtractValue(Val, 0); 5494 Value *Arg1 = Builder.CreateExtractValue(Val, 1); 5495 Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), 5496 Int8PtrTy); 5497 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp"); 5498 } 5499 5500 if (BuiltinID == AArch64::BI__builtin_arm_strex || 5501 BuiltinID == AArch64::BI__builtin_arm_stlex) { 5502 Value *StoreVal = EmitScalarExpr(E->getArg(0)); 5503 Value *StoreAddr = EmitScalarExpr(E->getArg(1)); 5504 5505 QualType Ty = E->getArg(0)->getType(); 5506 llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(), 5507 getContext().getTypeSize(Ty)); 5508 StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo()); 5509 5510 if (StoreVal->getType()->isPointerTy()) 5511 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty); 5512 else { 5513 llvm::Type *IntTy = llvm::IntegerType::get( 5514 getLLVMContext(), 5515 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType())); 5516 StoreVal = Builder.CreateBitCast(StoreVal, IntTy); 5517 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty); 5518 } 5519 5520 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex 5521 ? Intrinsic::aarch64_stlxr 5522 : Intrinsic::aarch64_stxr, 5523 StoreAddr->getType()); 5524 return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr"); 5525 } 5526 5527 if (BuiltinID == AArch64::BI__builtin_arm_clrex) { 5528 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex); 5529 return Builder.CreateCall(F); 5530 } 5531 5532 // CRC32 5533 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; 5534 switch (BuiltinID) { 5535 case AArch64::BI__builtin_arm_crc32b: 5536 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break; 5537 case AArch64::BI__builtin_arm_crc32cb: 5538 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break; 5539 case AArch64::BI__builtin_arm_crc32h: 5540 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break; 5541 case AArch64::BI__builtin_arm_crc32ch: 5542 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break; 5543 case AArch64::BI__builtin_arm_crc32w: 5544 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break; 5545 case AArch64::BI__builtin_arm_crc32cw: 5546 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break; 5547 case AArch64::BI__builtin_arm_crc32d: 5548 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break; 5549 case AArch64::BI__builtin_arm_crc32cd: 5550 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break; 5551 } 5552 5553 if (CRCIntrinsicID != Intrinsic::not_intrinsic) { 5554 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 5555 Value *Arg1 = EmitScalarExpr(E->getArg(1)); 5556 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 5557 5558 llvm::Type *DataTy = F->getFunctionType()->getParamType(1); 5559 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy); 5560 5561 return Builder.CreateCall(F, {Arg0, Arg1}); 5562 } 5563 5564 if (BuiltinID == AArch64::BI__builtin_arm_rsr || 5565 BuiltinID == AArch64::BI__builtin_arm_rsr64 || 5566 BuiltinID == AArch64::BI__builtin_arm_rsrp || 5567 BuiltinID == AArch64::BI__builtin_arm_wsr || 5568 BuiltinID == AArch64::BI__builtin_arm_wsr64 || 5569 BuiltinID == AArch64::BI__builtin_arm_wsrp) { 5570 5571 bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr || 5572 BuiltinID == AArch64::BI__builtin_arm_rsr64 || 5573 BuiltinID == AArch64::BI__builtin_arm_rsrp; 5574 5575 bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp || 5576 BuiltinID == AArch64::BI__builtin_arm_wsrp; 5577 5578 bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr && 5579 BuiltinID != AArch64::BI__builtin_arm_wsr; 5580 5581 llvm::Type *ValueType; 5582 llvm::Type *RegisterType = Int64Ty; 5583 if (IsPointerBuiltin) { 5584 ValueType = VoidPtrTy; 5585 } else if (Is64Bit) { 5586 ValueType = Int64Ty; 5587 } else { 5588 ValueType = Int32Ty; 5589 } 5590 5591 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead); 5592 } 5593 5594 // Find out if any arguments are required to be integer constant 5595 // expressions. 5596 unsigned ICEArguments = 0; 5597 ASTContext::GetBuiltinTypeError Error; 5598 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 5599 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 5600 5601 llvm::SmallVector<Value*, 4> Ops; 5602 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) { 5603 if ((ICEArguments & (1 << i)) == 0) { 5604 Ops.push_back(EmitScalarExpr(E->getArg(i))); 5605 } else { 5606 // If this is required to be a constant, constant fold it so that we know 5607 // that the generated intrinsic gets a ConstantInt. 5608 llvm::APSInt Result; 5609 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); 5610 assert(IsConst && "Constant arg isn't actually constant?"); 5611 (void)IsConst; 5612 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); 5613 } 5614 } 5615 5616 auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap); 5617 const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap( 5618 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted); 5619 5620 if (Builtin) { 5621 Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1))); 5622 Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E); 5623 assert(Result && "SISD intrinsic should have been handled"); 5624 return Result; 5625 } 5626 5627 llvm::APSInt Result; 5628 const Expr *Arg = E->getArg(E->getNumArgs()-1); 5629 NeonTypeFlags Type(0); 5630 if (Arg->isIntegerConstantExpr(Result, getContext())) 5631 // Determine the type of this overloaded NEON intrinsic. 5632 Type = NeonTypeFlags(Result.getZExtValue()); 5633 5634 bool usgn = Type.isUnsigned(); 5635 bool quad = Type.isQuad(); 5636 5637 // Handle non-overloaded intrinsics first. 5638 switch (BuiltinID) { 5639 default: break; 5640 case NEON::BI__builtin_neon_vldrq_p128: { 5641 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128); 5642 llvm::Type *Int128PTy = llvm::PointerType::get(Int128Ty, 0); 5643 Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy); 5644 return Builder.CreateAlignedLoad(Int128Ty, Ptr, 5645 CharUnits::fromQuantity(16)); 5646 } 5647 case NEON::BI__builtin_neon_vstrq_p128: { 5648 llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128); 5649 Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy); 5650 return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr); 5651 } 5652 case NEON::BI__builtin_neon_vcvts_u32_f32: 5653 case NEON::BI__builtin_neon_vcvtd_u64_f64: 5654 usgn = true; 5655 // FALL THROUGH 5656 case NEON::BI__builtin_neon_vcvts_s32_f32: 5657 case NEON::BI__builtin_neon_vcvtd_s64_f64: { 5658 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5659 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64; 5660 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty; 5661 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy; 5662 Ops[0] = Builder.CreateBitCast(Ops[0], FTy); 5663 if (usgn) 5664 return Builder.CreateFPToUI(Ops[0], InTy); 5665 return Builder.CreateFPToSI(Ops[0], InTy); 5666 } 5667 case NEON::BI__builtin_neon_vcvts_f32_u32: 5668 case NEON::BI__builtin_neon_vcvtd_f64_u64: 5669 usgn = true; 5670 // FALL THROUGH 5671 case NEON::BI__builtin_neon_vcvts_f32_s32: 5672 case NEON::BI__builtin_neon_vcvtd_f64_s64: { 5673 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5674 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64; 5675 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty; 5676 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy; 5677 Ops[0] = Builder.CreateBitCast(Ops[0], InTy); 5678 if (usgn) 5679 return Builder.CreateUIToFP(Ops[0], FTy); 5680 return Builder.CreateSIToFP(Ops[0], FTy); 5681 } 5682 case NEON::BI__builtin_neon_vpaddd_s64: { 5683 llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2); 5684 Value *Vec = EmitScalarExpr(E->getArg(0)); 5685 // The vector is v2f64, so make sure it's bitcast to that. 5686 Vec = Builder.CreateBitCast(Vec, Ty, "v2i64"); 5687 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 5688 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 5689 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 5690 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 5691 // Pairwise addition of a v2f64 into a scalar f64. 5692 return Builder.CreateAdd(Op0, Op1, "vpaddd"); 5693 } 5694 case NEON::BI__builtin_neon_vpaddd_f64: { 5695 llvm::Type *Ty = 5696 llvm::VectorType::get(DoubleTy, 2); 5697 Value *Vec = EmitScalarExpr(E->getArg(0)); 5698 // The vector is v2f64, so make sure it's bitcast to that. 5699 Vec = Builder.CreateBitCast(Vec, Ty, "v2f64"); 5700 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 5701 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 5702 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 5703 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 5704 // Pairwise addition of a v2f64 into a scalar f64. 5705 return Builder.CreateFAdd(Op0, Op1, "vpaddd"); 5706 } 5707 case NEON::BI__builtin_neon_vpadds_f32: { 5708 llvm::Type *Ty = 5709 llvm::VectorType::get(FloatTy, 2); 5710 Value *Vec = EmitScalarExpr(E->getArg(0)); 5711 // The vector is v2f32, so make sure it's bitcast to that. 5712 Vec = Builder.CreateBitCast(Vec, Ty, "v2f32"); 5713 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 5714 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 5715 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 5716 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 5717 // Pairwise addition of a v2f32 into a scalar f32. 5718 return Builder.CreateFAdd(Op0, Op1, "vpaddd"); 5719 } 5720 case NEON::BI__builtin_neon_vceqzd_s64: 5721 case NEON::BI__builtin_neon_vceqzd_f64: 5722 case NEON::BI__builtin_neon_vceqzs_f32: 5723 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5724 return EmitAArch64CompareBuiltinExpr( 5725 Ops[0], ConvertType(E->getCallReturnType(getContext())), 5726 ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz"); 5727 case NEON::BI__builtin_neon_vcgezd_s64: 5728 case NEON::BI__builtin_neon_vcgezd_f64: 5729 case NEON::BI__builtin_neon_vcgezs_f32: 5730 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5731 return EmitAArch64CompareBuiltinExpr( 5732 Ops[0], ConvertType(E->getCallReturnType(getContext())), 5733 ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez"); 5734 case NEON::BI__builtin_neon_vclezd_s64: 5735 case NEON::BI__builtin_neon_vclezd_f64: 5736 case NEON::BI__builtin_neon_vclezs_f32: 5737 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5738 return EmitAArch64CompareBuiltinExpr( 5739 Ops[0], ConvertType(E->getCallReturnType(getContext())), 5740 ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez"); 5741 case NEON::BI__builtin_neon_vcgtzd_s64: 5742 case NEON::BI__builtin_neon_vcgtzd_f64: 5743 case NEON::BI__builtin_neon_vcgtzs_f32: 5744 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5745 return EmitAArch64CompareBuiltinExpr( 5746 Ops[0], ConvertType(E->getCallReturnType(getContext())), 5747 ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz"); 5748 case NEON::BI__builtin_neon_vcltzd_s64: 5749 case NEON::BI__builtin_neon_vcltzd_f64: 5750 case NEON::BI__builtin_neon_vcltzs_f32: 5751 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5752 return EmitAArch64CompareBuiltinExpr( 5753 Ops[0], ConvertType(E->getCallReturnType(getContext())), 5754 ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz"); 5755 5756 case NEON::BI__builtin_neon_vceqzd_u64: { 5757 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5758 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); 5759 Ops[0] = 5760 Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty)); 5761 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd"); 5762 } 5763 case NEON::BI__builtin_neon_vceqd_f64: 5764 case NEON::BI__builtin_neon_vcled_f64: 5765 case NEON::BI__builtin_neon_vcltd_f64: 5766 case NEON::BI__builtin_neon_vcged_f64: 5767 case NEON::BI__builtin_neon_vcgtd_f64: { 5768 llvm::CmpInst::Predicate P; 5769 switch (BuiltinID) { 5770 default: llvm_unreachable("missing builtin ID in switch!"); 5771 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break; 5772 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break; 5773 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break; 5774 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break; 5775 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break; 5776 } 5777 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5778 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 5779 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); 5780 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); 5781 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd"); 5782 } 5783 case NEON::BI__builtin_neon_vceqs_f32: 5784 case NEON::BI__builtin_neon_vcles_f32: 5785 case NEON::BI__builtin_neon_vclts_f32: 5786 case NEON::BI__builtin_neon_vcges_f32: 5787 case NEON::BI__builtin_neon_vcgts_f32: { 5788 llvm::CmpInst::Predicate P; 5789 switch (BuiltinID) { 5790 default: llvm_unreachable("missing builtin ID in switch!"); 5791 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break; 5792 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break; 5793 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break; 5794 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break; 5795 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break; 5796 } 5797 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5798 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy); 5799 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy); 5800 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); 5801 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd"); 5802 } 5803 case NEON::BI__builtin_neon_vceqd_s64: 5804 case NEON::BI__builtin_neon_vceqd_u64: 5805 case NEON::BI__builtin_neon_vcgtd_s64: 5806 case NEON::BI__builtin_neon_vcgtd_u64: 5807 case NEON::BI__builtin_neon_vcltd_s64: 5808 case NEON::BI__builtin_neon_vcltd_u64: 5809 case NEON::BI__builtin_neon_vcged_u64: 5810 case NEON::BI__builtin_neon_vcged_s64: 5811 case NEON::BI__builtin_neon_vcled_u64: 5812 case NEON::BI__builtin_neon_vcled_s64: { 5813 llvm::CmpInst::Predicate P; 5814 switch (BuiltinID) { 5815 default: llvm_unreachable("missing builtin ID in switch!"); 5816 case NEON::BI__builtin_neon_vceqd_s64: 5817 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break; 5818 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break; 5819 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break; 5820 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break; 5821 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break; 5822 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break; 5823 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break; 5824 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break; 5825 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break; 5826 } 5827 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5828 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); 5829 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 5830 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]); 5831 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd"); 5832 } 5833 case NEON::BI__builtin_neon_vtstd_s64: 5834 case NEON::BI__builtin_neon_vtstd_u64: { 5835 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5836 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); 5837 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 5838 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); 5839 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], 5840 llvm::Constant::getNullValue(Int64Ty)); 5841 return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd"); 5842 } 5843 case NEON::BI__builtin_neon_vset_lane_i8: 5844 case NEON::BI__builtin_neon_vset_lane_i16: 5845 case NEON::BI__builtin_neon_vset_lane_i32: 5846 case NEON::BI__builtin_neon_vset_lane_i64: 5847 case NEON::BI__builtin_neon_vset_lane_f32: 5848 case NEON::BI__builtin_neon_vsetq_lane_i8: 5849 case NEON::BI__builtin_neon_vsetq_lane_i16: 5850 case NEON::BI__builtin_neon_vsetq_lane_i32: 5851 case NEON::BI__builtin_neon_vsetq_lane_i64: 5852 case NEON::BI__builtin_neon_vsetq_lane_f32: 5853 Ops.push_back(EmitScalarExpr(E->getArg(2))); 5854 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 5855 case NEON::BI__builtin_neon_vset_lane_f64: 5856 // The vector type needs a cast for the v1f64 variant. 5857 Ops[1] = Builder.CreateBitCast(Ops[1], 5858 llvm::VectorType::get(DoubleTy, 1)); 5859 Ops.push_back(EmitScalarExpr(E->getArg(2))); 5860 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 5861 case NEON::BI__builtin_neon_vsetq_lane_f64: 5862 // The vector type needs a cast for the v2f64 variant. 5863 Ops[1] = Builder.CreateBitCast(Ops[1], 5864 llvm::VectorType::get(DoubleTy, 2)); 5865 Ops.push_back(EmitScalarExpr(E->getArg(2))); 5866 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 5867 5868 case NEON::BI__builtin_neon_vget_lane_i8: 5869 case NEON::BI__builtin_neon_vdupb_lane_i8: 5870 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8)); 5871 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5872 "vget_lane"); 5873 case NEON::BI__builtin_neon_vgetq_lane_i8: 5874 case NEON::BI__builtin_neon_vdupb_laneq_i8: 5875 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16)); 5876 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5877 "vgetq_lane"); 5878 case NEON::BI__builtin_neon_vget_lane_i16: 5879 case NEON::BI__builtin_neon_vduph_lane_i16: 5880 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4)); 5881 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5882 "vget_lane"); 5883 case NEON::BI__builtin_neon_vgetq_lane_i16: 5884 case NEON::BI__builtin_neon_vduph_laneq_i16: 5885 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8)); 5886 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5887 "vgetq_lane"); 5888 case NEON::BI__builtin_neon_vget_lane_i32: 5889 case NEON::BI__builtin_neon_vdups_lane_i32: 5890 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2)); 5891 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5892 "vget_lane"); 5893 case NEON::BI__builtin_neon_vdups_lane_f32: 5894 Ops[0] = Builder.CreateBitCast(Ops[0], 5895 llvm::VectorType::get(FloatTy, 2)); 5896 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5897 "vdups_lane"); 5898 case NEON::BI__builtin_neon_vgetq_lane_i32: 5899 case NEON::BI__builtin_neon_vdups_laneq_i32: 5900 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4)); 5901 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5902 "vgetq_lane"); 5903 case NEON::BI__builtin_neon_vget_lane_i64: 5904 case NEON::BI__builtin_neon_vdupd_lane_i64: 5905 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1)); 5906 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5907 "vget_lane"); 5908 case NEON::BI__builtin_neon_vdupd_lane_f64: 5909 Ops[0] = Builder.CreateBitCast(Ops[0], 5910 llvm::VectorType::get(DoubleTy, 1)); 5911 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5912 "vdupd_lane"); 5913 case NEON::BI__builtin_neon_vgetq_lane_i64: 5914 case NEON::BI__builtin_neon_vdupd_laneq_i64: 5915 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); 5916 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5917 "vgetq_lane"); 5918 case NEON::BI__builtin_neon_vget_lane_f32: 5919 Ops[0] = Builder.CreateBitCast(Ops[0], 5920 llvm::VectorType::get(FloatTy, 2)); 5921 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5922 "vget_lane"); 5923 case NEON::BI__builtin_neon_vget_lane_f64: 5924 Ops[0] = Builder.CreateBitCast(Ops[0], 5925 llvm::VectorType::get(DoubleTy, 1)); 5926 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5927 "vget_lane"); 5928 case NEON::BI__builtin_neon_vgetq_lane_f32: 5929 case NEON::BI__builtin_neon_vdups_laneq_f32: 5930 Ops[0] = Builder.CreateBitCast(Ops[0], 5931 llvm::VectorType::get(FloatTy, 4)); 5932 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5933 "vgetq_lane"); 5934 case NEON::BI__builtin_neon_vgetq_lane_f64: 5935 case NEON::BI__builtin_neon_vdupd_laneq_f64: 5936 Ops[0] = Builder.CreateBitCast(Ops[0], 5937 llvm::VectorType::get(DoubleTy, 2)); 5938 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5939 "vgetq_lane"); 5940 case NEON::BI__builtin_neon_vaddd_s64: 5941 case NEON::BI__builtin_neon_vaddd_u64: 5942 return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd"); 5943 case NEON::BI__builtin_neon_vsubd_s64: 5944 case NEON::BI__builtin_neon_vsubd_u64: 5945 return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd"); 5946 case NEON::BI__builtin_neon_vqdmlalh_s16: 5947 case NEON::BI__builtin_neon_vqdmlslh_s16: { 5948 SmallVector<Value *, 2> ProductOps; 5949 ProductOps.push_back(vectorWrapScalar16(Ops[1])); 5950 ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2)))); 5951 llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4); 5952 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy), 5953 ProductOps, "vqdmlXl"); 5954 Constant *CI = ConstantInt::get(SizeTy, 0); 5955 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0"); 5956 5957 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16 5958 ? Intrinsic::aarch64_neon_sqadd 5959 : Intrinsic::aarch64_neon_sqsub; 5960 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl"); 5961 } 5962 case NEON::BI__builtin_neon_vqshlud_n_s64: { 5963 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5964 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty); 5965 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty), 5966 Ops, "vqshlu_n"); 5967 } 5968 case NEON::BI__builtin_neon_vqshld_n_u64: 5969 case NEON::BI__builtin_neon_vqshld_n_s64: { 5970 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64 5971 ? Intrinsic::aarch64_neon_uqshl 5972 : Intrinsic::aarch64_neon_sqshl; 5973 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5974 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty); 5975 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n"); 5976 } 5977 case NEON::BI__builtin_neon_vrshrd_n_u64: 5978 case NEON::BI__builtin_neon_vrshrd_n_s64: { 5979 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64 5980 ? Intrinsic::aarch64_neon_urshl 5981 : Intrinsic::aarch64_neon_srshl; 5982 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5983 int SV = cast<ConstantInt>(Ops[1])->getSExtValue(); 5984 Ops[1] = ConstantInt::get(Int64Ty, -SV); 5985 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n"); 5986 } 5987 case NEON::BI__builtin_neon_vrsrad_n_u64: 5988 case NEON::BI__builtin_neon_vrsrad_n_s64: { 5989 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64 5990 ? Intrinsic::aarch64_neon_urshl 5991 : Intrinsic::aarch64_neon_srshl; 5992 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 5993 Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2)))); 5994 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty), 5995 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)}); 5996 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty)); 5997 } 5998 case NEON::BI__builtin_neon_vshld_n_s64: 5999 case NEON::BI__builtin_neon_vshld_n_u64: { 6000 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 6001 return Builder.CreateShl( 6002 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n"); 6003 } 6004 case NEON::BI__builtin_neon_vshrd_n_s64: { 6005 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 6006 return Builder.CreateAShr( 6007 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63), 6008 Amt->getZExtValue())), 6009 "shrd_n"); 6010 } 6011 case NEON::BI__builtin_neon_vshrd_n_u64: { 6012 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 6013 uint64_t ShiftAmt = Amt->getZExtValue(); 6014 // Right-shifting an unsigned value by its size yields 0. 6015 if (ShiftAmt == 64) 6016 return ConstantInt::get(Int64Ty, 0); 6017 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt), 6018 "shrd_n"); 6019 } 6020 case NEON::BI__builtin_neon_vsrad_n_s64: { 6021 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2))); 6022 Ops[1] = Builder.CreateAShr( 6023 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63), 6024 Amt->getZExtValue())), 6025 "shrd_n"); 6026 return Builder.CreateAdd(Ops[0], Ops[1]); 6027 } 6028 case NEON::BI__builtin_neon_vsrad_n_u64: { 6029 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2))); 6030 uint64_t ShiftAmt = Amt->getZExtValue(); 6031 // Right-shifting an unsigned value by its size yields 0. 6032 // As Op + 0 = Op, return Ops[0] directly. 6033 if (ShiftAmt == 64) 6034 return Ops[0]; 6035 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt), 6036 "shrd_n"); 6037 return Builder.CreateAdd(Ops[0], Ops[1]); 6038 } 6039 case NEON::BI__builtin_neon_vqdmlalh_lane_s16: 6040 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16: 6041 case NEON::BI__builtin_neon_vqdmlslh_lane_s16: 6042 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: { 6043 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)), 6044 "lane"); 6045 SmallVector<Value *, 2> ProductOps; 6046 ProductOps.push_back(vectorWrapScalar16(Ops[1])); 6047 ProductOps.push_back(vectorWrapScalar16(Ops[2])); 6048 llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4); 6049 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy), 6050 ProductOps, "vqdmlXl"); 6051 Constant *CI = ConstantInt::get(SizeTy, 0); 6052 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0"); 6053 Ops.pop_back(); 6054 6055 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 || 6056 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16) 6057 ? Intrinsic::aarch64_neon_sqadd 6058 : Intrinsic::aarch64_neon_sqsub; 6059 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl"); 6060 } 6061 case NEON::BI__builtin_neon_vqdmlals_s32: 6062 case NEON::BI__builtin_neon_vqdmlsls_s32: { 6063 SmallVector<Value *, 2> ProductOps; 6064 ProductOps.push_back(Ops[1]); 6065 ProductOps.push_back(EmitScalarExpr(E->getArg(2))); 6066 Ops[1] = 6067 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar), 6068 ProductOps, "vqdmlXl"); 6069 6070 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32 6071 ? Intrinsic::aarch64_neon_sqadd 6072 : Intrinsic::aarch64_neon_sqsub; 6073 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl"); 6074 } 6075 case NEON::BI__builtin_neon_vqdmlals_lane_s32: 6076 case NEON::BI__builtin_neon_vqdmlals_laneq_s32: 6077 case NEON::BI__builtin_neon_vqdmlsls_lane_s32: 6078 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: { 6079 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)), 6080 "lane"); 6081 SmallVector<Value *, 2> ProductOps; 6082 ProductOps.push_back(Ops[1]); 6083 ProductOps.push_back(Ops[2]); 6084 Ops[1] = 6085 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar), 6086 ProductOps, "vqdmlXl"); 6087 Ops.pop_back(); 6088 6089 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 || 6090 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32) 6091 ? Intrinsic::aarch64_neon_sqadd 6092 : Intrinsic::aarch64_neon_sqsub; 6093 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl"); 6094 } 6095 } 6096 6097 llvm::VectorType *VTy = GetNeonType(this, Type); 6098 llvm::Type *Ty = VTy; 6099 if (!Ty) 6100 return nullptr; 6101 6102 // Not all intrinsics handled by the common case work for AArch64 yet, so only 6103 // defer to common code if it's been added to our special map. 6104 Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID, 6105 AArch64SIMDIntrinsicsProvenSorted); 6106 6107 if (Builtin) 6108 return EmitCommonNeonBuiltinExpr( 6109 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic, 6110 Builtin->NameHint, Builtin->TypeModifier, E, Ops, 6111 /*never use addresses*/ Address::invalid(), Address::invalid()); 6112 6113 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops)) 6114 return V; 6115 6116 unsigned Int; 6117 switch (BuiltinID) { 6118 default: return nullptr; 6119 case NEON::BI__builtin_neon_vbsl_v: 6120 case NEON::BI__builtin_neon_vbslq_v: { 6121 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy); 6122 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl"); 6123 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl"); 6124 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl"); 6125 6126 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl"); 6127 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl"); 6128 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl"); 6129 return Builder.CreateBitCast(Ops[0], Ty); 6130 } 6131 case NEON::BI__builtin_neon_vfma_lane_v: 6132 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types 6133 // The ARM builtins (and instructions) have the addend as the first 6134 // operand, but the 'fma' intrinsics have it last. Swap it around here. 6135 Value *Addend = Ops[0]; 6136 Value *Multiplicand = Ops[1]; 6137 Value *LaneSource = Ops[2]; 6138 Ops[0] = Multiplicand; 6139 Ops[1] = LaneSource; 6140 Ops[2] = Addend; 6141 6142 // Now adjust things to handle the lane access. 6143 llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ? 6144 llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) : 6145 VTy; 6146 llvm::Constant *cst = cast<Constant>(Ops[3]); 6147 Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst); 6148 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy); 6149 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane"); 6150 6151 Ops.pop_back(); 6152 Int = Intrinsic::fma; 6153 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla"); 6154 } 6155 case NEON::BI__builtin_neon_vfma_laneq_v: { 6156 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 6157 // v1f64 fma should be mapped to Neon scalar f64 fma 6158 if (VTy && VTy->getElementType() == DoubleTy) { 6159 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 6160 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); 6161 llvm::Type *VTy = GetNeonType(this, 6162 NeonTypeFlags(NeonTypeFlags::Float64, false, true)); 6163 Ops[2] = Builder.CreateBitCast(Ops[2], VTy); 6164 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); 6165 Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy); 6166 Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); 6167 return Builder.CreateBitCast(Result, Ty); 6168 } 6169 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 6170 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6171 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6172 6173 llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(), 6174 VTy->getNumElements() * 2); 6175 Ops[2] = Builder.CreateBitCast(Ops[2], STy); 6176 Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), 6177 cast<ConstantInt>(Ops[3])); 6178 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane"); 6179 6180 return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]}); 6181 } 6182 case NEON::BI__builtin_neon_vfmaq_laneq_v: { 6183 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 6184 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6185 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6186 6187 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 6188 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3])); 6189 return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]}); 6190 } 6191 case NEON::BI__builtin_neon_vfmah_lane_f16: 6192 case NEON::BI__builtin_neon_vfmas_lane_f32: 6193 case NEON::BI__builtin_neon_vfmah_laneq_f16: 6194 case NEON::BI__builtin_neon_vfmas_laneq_f32: 6195 case NEON::BI__builtin_neon_vfmad_lane_f64: 6196 case NEON::BI__builtin_neon_vfmad_laneq_f64: { 6197 Ops.push_back(EmitScalarExpr(E->getArg(3))); 6198 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext())); 6199 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 6200 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); 6201 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); 6202 } 6203 case NEON::BI__builtin_neon_vmull_v: 6204 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 6205 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull; 6206 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull; 6207 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull"); 6208 case NEON::BI__builtin_neon_vmax_v: 6209 case NEON::BI__builtin_neon_vmaxq_v: 6210 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 6211 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax; 6212 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax; 6213 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax"); 6214 case NEON::BI__builtin_neon_vmin_v: 6215 case NEON::BI__builtin_neon_vminq_v: 6216 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 6217 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin; 6218 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin; 6219 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin"); 6220 case NEON::BI__builtin_neon_vabd_v: 6221 case NEON::BI__builtin_neon_vabdq_v: 6222 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 6223 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd; 6224 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd; 6225 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd"); 6226 case NEON::BI__builtin_neon_vpadal_v: 6227 case NEON::BI__builtin_neon_vpadalq_v: { 6228 unsigned ArgElts = VTy->getNumElements(); 6229 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType()); 6230 unsigned BitWidth = EltTy->getBitWidth(); 6231 llvm::Type *ArgTy = llvm::VectorType::get( 6232 llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts); 6233 llvm::Type* Tys[2] = { VTy, ArgTy }; 6234 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp; 6235 SmallVector<llvm::Value*, 1> TmpOps; 6236 TmpOps.push_back(Ops[1]); 6237 Function *F = CGM.getIntrinsic(Int, Tys); 6238 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal"); 6239 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType()); 6240 return Builder.CreateAdd(tmp, addend); 6241 } 6242 case NEON::BI__builtin_neon_vpmin_v: 6243 case NEON::BI__builtin_neon_vpminq_v: 6244 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 6245 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp; 6246 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp; 6247 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin"); 6248 case NEON::BI__builtin_neon_vpmax_v: 6249 case NEON::BI__builtin_neon_vpmaxq_v: 6250 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 6251 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp; 6252 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp; 6253 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax"); 6254 case NEON::BI__builtin_neon_vminnm_v: 6255 case NEON::BI__builtin_neon_vminnmq_v: 6256 Int = Intrinsic::aarch64_neon_fminnm; 6257 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm"); 6258 case NEON::BI__builtin_neon_vmaxnm_v: 6259 case NEON::BI__builtin_neon_vmaxnmq_v: 6260 Int = Intrinsic::aarch64_neon_fmaxnm; 6261 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm"); 6262 case NEON::BI__builtin_neon_vrecpss_f32: { 6263 Ops.push_back(EmitScalarExpr(E->getArg(1))); 6264 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy), 6265 Ops, "vrecps"); 6266 } 6267 case NEON::BI__builtin_neon_vrecpsd_f64: { 6268 Ops.push_back(EmitScalarExpr(E->getArg(1))); 6269 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy), 6270 Ops, "vrecps"); 6271 } 6272 case NEON::BI__builtin_neon_vqshrun_n_v: 6273 Int = Intrinsic::aarch64_neon_sqshrun; 6274 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n"); 6275 case NEON::BI__builtin_neon_vqrshrun_n_v: 6276 Int = Intrinsic::aarch64_neon_sqrshrun; 6277 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n"); 6278 case NEON::BI__builtin_neon_vqshrn_n_v: 6279 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn; 6280 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n"); 6281 case NEON::BI__builtin_neon_vrshrn_n_v: 6282 Int = Intrinsic::aarch64_neon_rshrn; 6283 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n"); 6284 case NEON::BI__builtin_neon_vqrshrn_n_v: 6285 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn; 6286 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n"); 6287 case NEON::BI__builtin_neon_vrnda_v: 6288 case NEON::BI__builtin_neon_vrndaq_v: { 6289 Int = Intrinsic::round; 6290 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda"); 6291 } 6292 case NEON::BI__builtin_neon_vrndi_v: 6293 case NEON::BI__builtin_neon_vrndiq_v: { 6294 Int = Intrinsic::nearbyint; 6295 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi"); 6296 } 6297 case NEON::BI__builtin_neon_vrndm_v: 6298 case NEON::BI__builtin_neon_vrndmq_v: { 6299 Int = Intrinsic::floor; 6300 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm"); 6301 } 6302 case NEON::BI__builtin_neon_vrndn_v: 6303 case NEON::BI__builtin_neon_vrndnq_v: { 6304 Int = Intrinsic::aarch64_neon_frintn; 6305 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn"); 6306 } 6307 case NEON::BI__builtin_neon_vrndp_v: 6308 case NEON::BI__builtin_neon_vrndpq_v: { 6309 Int = Intrinsic::ceil; 6310 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp"); 6311 } 6312 case NEON::BI__builtin_neon_vrndx_v: 6313 case NEON::BI__builtin_neon_vrndxq_v: { 6314 Int = Intrinsic::rint; 6315 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx"); 6316 } 6317 case NEON::BI__builtin_neon_vrnd_v: 6318 case NEON::BI__builtin_neon_vrndq_v: { 6319 Int = Intrinsic::trunc; 6320 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz"); 6321 } 6322 case NEON::BI__builtin_neon_vceqz_v: 6323 case NEON::BI__builtin_neon_vceqzq_v: 6324 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ, 6325 ICmpInst::ICMP_EQ, "vceqz"); 6326 case NEON::BI__builtin_neon_vcgez_v: 6327 case NEON::BI__builtin_neon_vcgezq_v: 6328 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE, 6329 ICmpInst::ICMP_SGE, "vcgez"); 6330 case NEON::BI__builtin_neon_vclez_v: 6331 case NEON::BI__builtin_neon_vclezq_v: 6332 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE, 6333 ICmpInst::ICMP_SLE, "vclez"); 6334 case NEON::BI__builtin_neon_vcgtz_v: 6335 case NEON::BI__builtin_neon_vcgtzq_v: 6336 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT, 6337 ICmpInst::ICMP_SGT, "vcgtz"); 6338 case NEON::BI__builtin_neon_vcltz_v: 6339 case NEON::BI__builtin_neon_vcltzq_v: 6340 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT, 6341 ICmpInst::ICMP_SLT, "vcltz"); 6342 case NEON::BI__builtin_neon_vcvt_f64_v: 6343 case NEON::BI__builtin_neon_vcvtq_f64_v: 6344 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6345 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad)); 6346 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 6347 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 6348 case NEON::BI__builtin_neon_vcvt_f64_f32: { 6349 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad && 6350 "unexpected vcvt_f64_f32 builtin"); 6351 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false); 6352 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag)); 6353 6354 return Builder.CreateFPExt(Ops[0], Ty, "vcvt"); 6355 } 6356 case NEON::BI__builtin_neon_vcvt_f32_f64: { 6357 assert(Type.getEltType() == NeonTypeFlags::Float32 && 6358 "unexpected vcvt_f32_f64 builtin"); 6359 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true); 6360 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag)); 6361 6362 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt"); 6363 } 6364 case NEON::BI__builtin_neon_vcvt_s32_v: 6365 case NEON::BI__builtin_neon_vcvt_u32_v: 6366 case NEON::BI__builtin_neon_vcvt_s64_v: 6367 case NEON::BI__builtin_neon_vcvt_u64_v: 6368 case NEON::BI__builtin_neon_vcvt_s16_v: 6369 case NEON::BI__builtin_neon_vcvt_u16_v: 6370 case NEON::BI__builtin_neon_vcvtq_s32_v: 6371 case NEON::BI__builtin_neon_vcvtq_u32_v: 6372 case NEON::BI__builtin_neon_vcvtq_s64_v: 6373 case NEON::BI__builtin_neon_vcvtq_u64_v: 6374 case NEON::BI__builtin_neon_vcvtq_s16_v: 6375 case NEON::BI__builtin_neon_vcvtq_u16_v: { 6376 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type)); 6377 if (usgn) 6378 return Builder.CreateFPToUI(Ops[0], Ty); 6379 return Builder.CreateFPToSI(Ops[0], Ty); 6380 } 6381 case NEON::BI__builtin_neon_vcvta_s16_v: 6382 case NEON::BI__builtin_neon_vcvta_s32_v: 6383 case NEON::BI__builtin_neon_vcvtaq_s16_v: 6384 case NEON::BI__builtin_neon_vcvtaq_s32_v: 6385 case NEON::BI__builtin_neon_vcvta_u32_v: 6386 case NEON::BI__builtin_neon_vcvtaq_u16_v: 6387 case NEON::BI__builtin_neon_vcvtaq_u32_v: 6388 case NEON::BI__builtin_neon_vcvta_s64_v: 6389 case NEON::BI__builtin_neon_vcvtaq_s64_v: 6390 case NEON::BI__builtin_neon_vcvta_u64_v: 6391 case NEON::BI__builtin_neon_vcvtaq_u64_v: { 6392 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas; 6393 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 6394 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta"); 6395 } 6396 case NEON::BI__builtin_neon_vcvtm_s16_v: 6397 case NEON::BI__builtin_neon_vcvtm_s32_v: 6398 case NEON::BI__builtin_neon_vcvtmq_s16_v: 6399 case NEON::BI__builtin_neon_vcvtmq_s32_v: 6400 case NEON::BI__builtin_neon_vcvtm_u16_v: 6401 case NEON::BI__builtin_neon_vcvtm_u32_v: 6402 case NEON::BI__builtin_neon_vcvtmq_u16_v: 6403 case NEON::BI__builtin_neon_vcvtmq_u32_v: 6404 case NEON::BI__builtin_neon_vcvtm_s64_v: 6405 case NEON::BI__builtin_neon_vcvtmq_s64_v: 6406 case NEON::BI__builtin_neon_vcvtm_u64_v: 6407 case NEON::BI__builtin_neon_vcvtmq_u64_v: { 6408 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms; 6409 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 6410 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm"); 6411 } 6412 case NEON::BI__builtin_neon_vcvtn_s16_v: 6413 case NEON::BI__builtin_neon_vcvtn_s32_v: 6414 case NEON::BI__builtin_neon_vcvtnq_s16_v: 6415 case NEON::BI__builtin_neon_vcvtnq_s32_v: 6416 case NEON::BI__builtin_neon_vcvtn_u16_v: 6417 case NEON::BI__builtin_neon_vcvtn_u32_v: 6418 case NEON::BI__builtin_neon_vcvtnq_u16_v: 6419 case NEON::BI__builtin_neon_vcvtnq_u32_v: 6420 case NEON::BI__builtin_neon_vcvtn_s64_v: 6421 case NEON::BI__builtin_neon_vcvtnq_s64_v: 6422 case NEON::BI__builtin_neon_vcvtn_u64_v: 6423 case NEON::BI__builtin_neon_vcvtnq_u64_v: { 6424 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns; 6425 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 6426 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn"); 6427 } 6428 case NEON::BI__builtin_neon_vcvtp_s16_v: 6429 case NEON::BI__builtin_neon_vcvtp_s32_v: 6430 case NEON::BI__builtin_neon_vcvtpq_s16_v: 6431 case NEON::BI__builtin_neon_vcvtpq_s32_v: 6432 case NEON::BI__builtin_neon_vcvtp_u16_v: 6433 case NEON::BI__builtin_neon_vcvtp_u32_v: 6434 case NEON::BI__builtin_neon_vcvtpq_u16_v: 6435 case NEON::BI__builtin_neon_vcvtpq_u32_v: 6436 case NEON::BI__builtin_neon_vcvtp_s64_v: 6437 case NEON::BI__builtin_neon_vcvtpq_s64_v: 6438 case NEON::BI__builtin_neon_vcvtp_u64_v: 6439 case NEON::BI__builtin_neon_vcvtpq_u64_v: { 6440 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps; 6441 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 6442 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp"); 6443 } 6444 case NEON::BI__builtin_neon_vmulx_v: 6445 case NEON::BI__builtin_neon_vmulxq_v: { 6446 Int = Intrinsic::aarch64_neon_fmulx; 6447 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx"); 6448 } 6449 case NEON::BI__builtin_neon_vmul_lane_v: 6450 case NEON::BI__builtin_neon_vmul_laneq_v: { 6451 // v1f64 vmul_lane should be mapped to Neon scalar mul lane 6452 bool Quad = false; 6453 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v) 6454 Quad = true; 6455 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 6456 llvm::Type *VTy = GetNeonType(this, 6457 NeonTypeFlags(NeonTypeFlags::Float64, false, Quad)); 6458 Ops[1] = Builder.CreateBitCast(Ops[1], VTy); 6459 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract"); 6460 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]); 6461 return Builder.CreateBitCast(Result, Ty); 6462 } 6463 case NEON::BI__builtin_neon_vnegd_s64: 6464 return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd"); 6465 case NEON::BI__builtin_neon_vpmaxnm_v: 6466 case NEON::BI__builtin_neon_vpmaxnmq_v: { 6467 Int = Intrinsic::aarch64_neon_fmaxnmp; 6468 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm"); 6469 } 6470 case NEON::BI__builtin_neon_vpminnm_v: 6471 case NEON::BI__builtin_neon_vpminnmq_v: { 6472 Int = Intrinsic::aarch64_neon_fminnmp; 6473 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm"); 6474 } 6475 case NEON::BI__builtin_neon_vsqrt_v: 6476 case NEON::BI__builtin_neon_vsqrtq_v: { 6477 Int = Intrinsic::sqrt; 6478 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6479 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt"); 6480 } 6481 case NEON::BI__builtin_neon_vrbit_v: 6482 case NEON::BI__builtin_neon_vrbitq_v: { 6483 Int = Intrinsic::aarch64_neon_rbit; 6484 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit"); 6485 } 6486 case NEON::BI__builtin_neon_vaddv_u8: 6487 // FIXME: These are handled by the AArch64 scalar code. 6488 usgn = true; 6489 // FALLTHROUGH 6490 case NEON::BI__builtin_neon_vaddv_s8: { 6491 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 6492 Ty = Int32Ty; 6493 VTy = llvm::VectorType::get(Int8Ty, 8); 6494 llvm::Type *Tys[2] = { Ty, VTy }; 6495 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6496 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 6497 return Builder.CreateTrunc(Ops[0], Int8Ty); 6498 } 6499 case NEON::BI__builtin_neon_vaddv_u16: 6500 usgn = true; 6501 // FALLTHROUGH 6502 case NEON::BI__builtin_neon_vaddv_s16: { 6503 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 6504 Ty = Int32Ty; 6505 VTy = llvm::VectorType::get(Int16Ty, 4); 6506 llvm::Type *Tys[2] = { Ty, VTy }; 6507 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6508 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 6509 return Builder.CreateTrunc(Ops[0], Int16Ty); 6510 } 6511 case NEON::BI__builtin_neon_vaddvq_u8: 6512 usgn = true; 6513 // FALLTHROUGH 6514 case NEON::BI__builtin_neon_vaddvq_s8: { 6515 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 6516 Ty = Int32Ty; 6517 VTy = llvm::VectorType::get(Int8Ty, 16); 6518 llvm::Type *Tys[2] = { Ty, VTy }; 6519 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6520 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 6521 return Builder.CreateTrunc(Ops[0], Int8Ty); 6522 } 6523 case NEON::BI__builtin_neon_vaddvq_u16: 6524 usgn = true; 6525 // FALLTHROUGH 6526 case NEON::BI__builtin_neon_vaddvq_s16: { 6527 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 6528 Ty = Int32Ty; 6529 VTy = llvm::VectorType::get(Int16Ty, 8); 6530 llvm::Type *Tys[2] = { Ty, VTy }; 6531 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6532 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 6533 return Builder.CreateTrunc(Ops[0], Int16Ty); 6534 } 6535 case NEON::BI__builtin_neon_vmaxv_u8: { 6536 Int = Intrinsic::aarch64_neon_umaxv; 6537 Ty = Int32Ty; 6538 VTy = llvm::VectorType::get(Int8Ty, 8); 6539 llvm::Type *Tys[2] = { Ty, VTy }; 6540 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6541 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6542 return Builder.CreateTrunc(Ops[0], Int8Ty); 6543 } 6544 case NEON::BI__builtin_neon_vmaxv_u16: { 6545 Int = Intrinsic::aarch64_neon_umaxv; 6546 Ty = Int32Ty; 6547 VTy = llvm::VectorType::get(Int16Ty, 4); 6548 llvm::Type *Tys[2] = { Ty, VTy }; 6549 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6550 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6551 return Builder.CreateTrunc(Ops[0], Int16Ty); 6552 } 6553 case NEON::BI__builtin_neon_vmaxvq_u8: { 6554 Int = Intrinsic::aarch64_neon_umaxv; 6555 Ty = Int32Ty; 6556 VTy = llvm::VectorType::get(Int8Ty, 16); 6557 llvm::Type *Tys[2] = { Ty, VTy }; 6558 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6559 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6560 return Builder.CreateTrunc(Ops[0], Int8Ty); 6561 } 6562 case NEON::BI__builtin_neon_vmaxvq_u16: { 6563 Int = Intrinsic::aarch64_neon_umaxv; 6564 Ty = Int32Ty; 6565 VTy = llvm::VectorType::get(Int16Ty, 8); 6566 llvm::Type *Tys[2] = { Ty, VTy }; 6567 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6568 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6569 return Builder.CreateTrunc(Ops[0], Int16Ty); 6570 } 6571 case NEON::BI__builtin_neon_vmaxv_s8: { 6572 Int = Intrinsic::aarch64_neon_smaxv; 6573 Ty = Int32Ty; 6574 VTy = llvm::VectorType::get(Int8Ty, 8); 6575 llvm::Type *Tys[2] = { Ty, VTy }; 6576 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6577 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6578 return Builder.CreateTrunc(Ops[0], Int8Ty); 6579 } 6580 case NEON::BI__builtin_neon_vmaxv_s16: { 6581 Int = Intrinsic::aarch64_neon_smaxv; 6582 Ty = Int32Ty; 6583 VTy = llvm::VectorType::get(Int16Ty, 4); 6584 llvm::Type *Tys[2] = { Ty, VTy }; 6585 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6586 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6587 return Builder.CreateTrunc(Ops[0], Int16Ty); 6588 } 6589 case NEON::BI__builtin_neon_vmaxvq_s8: { 6590 Int = Intrinsic::aarch64_neon_smaxv; 6591 Ty = Int32Ty; 6592 VTy = llvm::VectorType::get(Int8Ty, 16); 6593 llvm::Type *Tys[2] = { Ty, VTy }; 6594 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6595 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6596 return Builder.CreateTrunc(Ops[0], Int8Ty); 6597 } 6598 case NEON::BI__builtin_neon_vmaxvq_s16: { 6599 Int = Intrinsic::aarch64_neon_smaxv; 6600 Ty = Int32Ty; 6601 VTy = llvm::VectorType::get(Int16Ty, 8); 6602 llvm::Type *Tys[2] = { Ty, VTy }; 6603 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6604 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6605 return Builder.CreateTrunc(Ops[0], Int16Ty); 6606 } 6607 case NEON::BI__builtin_neon_vmaxv_f16: { 6608 Int = Intrinsic::aarch64_neon_fmaxv; 6609 Ty = HalfTy; 6610 VTy = llvm::VectorType::get(HalfTy, 4); 6611 llvm::Type *Tys[2] = { Ty, VTy }; 6612 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6613 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6614 return Builder.CreateTrunc(Ops[0], HalfTy); 6615 } 6616 case NEON::BI__builtin_neon_vmaxvq_f16: { 6617 Int = Intrinsic::aarch64_neon_fmaxv; 6618 Ty = HalfTy; 6619 VTy = llvm::VectorType::get(HalfTy, 8); 6620 llvm::Type *Tys[2] = { Ty, VTy }; 6621 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6622 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6623 return Builder.CreateTrunc(Ops[0], HalfTy); 6624 } 6625 case NEON::BI__builtin_neon_vminv_u8: { 6626 Int = Intrinsic::aarch64_neon_uminv; 6627 Ty = Int32Ty; 6628 VTy = llvm::VectorType::get(Int8Ty, 8); 6629 llvm::Type *Tys[2] = { Ty, VTy }; 6630 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6631 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6632 return Builder.CreateTrunc(Ops[0], Int8Ty); 6633 } 6634 case NEON::BI__builtin_neon_vminv_u16: { 6635 Int = Intrinsic::aarch64_neon_uminv; 6636 Ty = Int32Ty; 6637 VTy = llvm::VectorType::get(Int16Ty, 4); 6638 llvm::Type *Tys[2] = { Ty, VTy }; 6639 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6640 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6641 return Builder.CreateTrunc(Ops[0], Int16Ty); 6642 } 6643 case NEON::BI__builtin_neon_vminvq_u8: { 6644 Int = Intrinsic::aarch64_neon_uminv; 6645 Ty = Int32Ty; 6646 VTy = llvm::VectorType::get(Int8Ty, 16); 6647 llvm::Type *Tys[2] = { Ty, VTy }; 6648 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6649 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6650 return Builder.CreateTrunc(Ops[0], Int8Ty); 6651 } 6652 case NEON::BI__builtin_neon_vminvq_u16: { 6653 Int = Intrinsic::aarch64_neon_uminv; 6654 Ty = Int32Ty; 6655 VTy = llvm::VectorType::get(Int16Ty, 8); 6656 llvm::Type *Tys[2] = { Ty, VTy }; 6657 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6658 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6659 return Builder.CreateTrunc(Ops[0], Int16Ty); 6660 } 6661 case NEON::BI__builtin_neon_vminv_s8: { 6662 Int = Intrinsic::aarch64_neon_sminv; 6663 Ty = Int32Ty; 6664 VTy = llvm::VectorType::get(Int8Ty, 8); 6665 llvm::Type *Tys[2] = { Ty, VTy }; 6666 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6667 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6668 return Builder.CreateTrunc(Ops[0], Int8Ty); 6669 } 6670 case NEON::BI__builtin_neon_vminv_s16: { 6671 Int = Intrinsic::aarch64_neon_sminv; 6672 Ty = Int32Ty; 6673 VTy = llvm::VectorType::get(Int16Ty, 4); 6674 llvm::Type *Tys[2] = { Ty, VTy }; 6675 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6676 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6677 return Builder.CreateTrunc(Ops[0], Int16Ty); 6678 } 6679 case NEON::BI__builtin_neon_vminvq_s8: { 6680 Int = Intrinsic::aarch64_neon_sminv; 6681 Ty = Int32Ty; 6682 VTy = llvm::VectorType::get(Int8Ty, 16); 6683 llvm::Type *Tys[2] = { Ty, VTy }; 6684 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6685 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6686 return Builder.CreateTrunc(Ops[0], Int8Ty); 6687 } 6688 case NEON::BI__builtin_neon_vminvq_s16: { 6689 Int = Intrinsic::aarch64_neon_sminv; 6690 Ty = Int32Ty; 6691 VTy = llvm::VectorType::get(Int16Ty, 8); 6692 llvm::Type *Tys[2] = { Ty, VTy }; 6693 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6694 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6695 return Builder.CreateTrunc(Ops[0], Int16Ty); 6696 } 6697 case NEON::BI__builtin_neon_vminv_f16: { 6698 Int = Intrinsic::aarch64_neon_fminv; 6699 Ty = HalfTy; 6700 VTy = llvm::VectorType::get(HalfTy, 4); 6701 llvm::Type *Tys[2] = { Ty, VTy }; 6702 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6703 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6704 return Builder.CreateTrunc(Ops[0], HalfTy); 6705 } 6706 case NEON::BI__builtin_neon_vminvq_f16: { 6707 Int = Intrinsic::aarch64_neon_fminv; 6708 Ty = HalfTy; 6709 VTy = llvm::VectorType::get(HalfTy, 8); 6710 llvm::Type *Tys[2] = { Ty, VTy }; 6711 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6712 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6713 return Builder.CreateTrunc(Ops[0], HalfTy); 6714 } 6715 case NEON::BI__builtin_neon_vmaxnmv_f16: { 6716 Int = Intrinsic::aarch64_neon_fmaxnmv; 6717 Ty = HalfTy; 6718 VTy = llvm::VectorType::get(HalfTy, 4); 6719 llvm::Type *Tys[2] = { Ty, VTy }; 6720 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6721 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv"); 6722 return Builder.CreateTrunc(Ops[0], HalfTy); 6723 } 6724 case NEON::BI__builtin_neon_vmaxnmvq_f16: { 6725 Int = Intrinsic::aarch64_neon_fmaxnmv; 6726 Ty = HalfTy; 6727 VTy = llvm::VectorType::get(HalfTy, 8); 6728 llvm::Type *Tys[2] = { Ty, VTy }; 6729 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6730 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv"); 6731 return Builder.CreateTrunc(Ops[0], HalfTy); 6732 } 6733 case NEON::BI__builtin_neon_vminnmv_f16: { 6734 Int = Intrinsic::aarch64_neon_fminnmv; 6735 Ty = HalfTy; 6736 VTy = llvm::VectorType::get(HalfTy, 4); 6737 llvm::Type *Tys[2] = { Ty, VTy }; 6738 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6739 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv"); 6740 return Builder.CreateTrunc(Ops[0], HalfTy); 6741 } 6742 case NEON::BI__builtin_neon_vminnmvq_f16: { 6743 Int = Intrinsic::aarch64_neon_fminnmv; 6744 Ty = HalfTy; 6745 VTy = llvm::VectorType::get(HalfTy, 8); 6746 llvm::Type *Tys[2] = { Ty, VTy }; 6747 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6748 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv"); 6749 return Builder.CreateTrunc(Ops[0], HalfTy); 6750 } 6751 case NEON::BI__builtin_neon_vmul_n_f64: { 6752 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 6753 Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy); 6754 return Builder.CreateFMul(Ops[0], RHS); 6755 } 6756 case NEON::BI__builtin_neon_vaddlv_u8: { 6757 Int = Intrinsic::aarch64_neon_uaddlv; 6758 Ty = Int32Ty; 6759 VTy = llvm::VectorType::get(Int8Ty, 8); 6760 llvm::Type *Tys[2] = { Ty, VTy }; 6761 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6762 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6763 return Builder.CreateTrunc(Ops[0], Int16Ty); 6764 } 6765 case NEON::BI__builtin_neon_vaddlv_u16: { 6766 Int = Intrinsic::aarch64_neon_uaddlv; 6767 Ty = Int32Ty; 6768 VTy = llvm::VectorType::get(Int16Ty, 4); 6769 llvm::Type *Tys[2] = { Ty, VTy }; 6770 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6771 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6772 } 6773 case NEON::BI__builtin_neon_vaddlvq_u8: { 6774 Int = Intrinsic::aarch64_neon_uaddlv; 6775 Ty = Int32Ty; 6776 VTy = llvm::VectorType::get(Int8Ty, 16); 6777 llvm::Type *Tys[2] = { Ty, VTy }; 6778 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6779 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6780 return Builder.CreateTrunc(Ops[0], Int16Ty); 6781 } 6782 case NEON::BI__builtin_neon_vaddlvq_u16: { 6783 Int = Intrinsic::aarch64_neon_uaddlv; 6784 Ty = Int32Ty; 6785 VTy = llvm::VectorType::get(Int16Ty, 8); 6786 llvm::Type *Tys[2] = { Ty, VTy }; 6787 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6788 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6789 } 6790 case NEON::BI__builtin_neon_vaddlv_s8: { 6791 Int = Intrinsic::aarch64_neon_saddlv; 6792 Ty = Int32Ty; 6793 VTy = llvm::VectorType::get(Int8Ty, 8); 6794 llvm::Type *Tys[2] = { Ty, VTy }; 6795 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6796 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6797 return Builder.CreateTrunc(Ops[0], Int16Ty); 6798 } 6799 case NEON::BI__builtin_neon_vaddlv_s16: { 6800 Int = Intrinsic::aarch64_neon_saddlv; 6801 Ty = Int32Ty; 6802 VTy = llvm::VectorType::get(Int16Ty, 4); 6803 llvm::Type *Tys[2] = { Ty, VTy }; 6804 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6805 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6806 } 6807 case NEON::BI__builtin_neon_vaddlvq_s8: { 6808 Int = Intrinsic::aarch64_neon_saddlv; 6809 Ty = Int32Ty; 6810 VTy = llvm::VectorType::get(Int8Ty, 16); 6811 llvm::Type *Tys[2] = { Ty, VTy }; 6812 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6813 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6814 return Builder.CreateTrunc(Ops[0], Int16Ty); 6815 } 6816 case NEON::BI__builtin_neon_vaddlvq_s16: { 6817 Int = Intrinsic::aarch64_neon_saddlv; 6818 Ty = Int32Ty; 6819 VTy = llvm::VectorType::get(Int16Ty, 8); 6820 llvm::Type *Tys[2] = { Ty, VTy }; 6821 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6822 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6823 } 6824 case NEON::BI__builtin_neon_vsri_n_v: 6825 case NEON::BI__builtin_neon_vsriq_n_v: { 6826 Int = Intrinsic::aarch64_neon_vsri; 6827 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty); 6828 return EmitNeonCall(Intrin, Ops, "vsri_n"); 6829 } 6830 case NEON::BI__builtin_neon_vsli_n_v: 6831 case NEON::BI__builtin_neon_vsliq_n_v: { 6832 Int = Intrinsic::aarch64_neon_vsli; 6833 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty); 6834 return EmitNeonCall(Intrin, Ops, "vsli_n"); 6835 } 6836 case NEON::BI__builtin_neon_vsra_n_v: 6837 case NEON::BI__builtin_neon_vsraq_n_v: 6838 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6839 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n"); 6840 return Builder.CreateAdd(Ops[0], Ops[1]); 6841 case NEON::BI__builtin_neon_vrsra_n_v: 6842 case NEON::BI__builtin_neon_vrsraq_n_v: { 6843 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl; 6844 SmallVector<llvm::Value*,2> TmpOps; 6845 TmpOps.push_back(Ops[1]); 6846 TmpOps.push_back(Ops[2]); 6847 Function* F = CGM.getIntrinsic(Int, Ty); 6848 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true); 6849 Ops[0] = Builder.CreateBitCast(Ops[0], VTy); 6850 return Builder.CreateAdd(Ops[0], tmp); 6851 } 6852 // FIXME: Sharing loads & stores with 32-bit is complicated by the absence 6853 // of an Align parameter here. 6854 case NEON::BI__builtin_neon_vld1_x2_v: 6855 case NEON::BI__builtin_neon_vld1q_x2_v: 6856 case NEON::BI__builtin_neon_vld1_x3_v: 6857 case NEON::BI__builtin_neon_vld1q_x3_v: 6858 case NEON::BI__builtin_neon_vld1_x4_v: 6859 case NEON::BI__builtin_neon_vld1q_x4_v: { 6860 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType()); 6861 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6862 llvm::Type *Tys[2] = { VTy, PTy }; 6863 unsigned Int; 6864 switch (BuiltinID) { 6865 case NEON::BI__builtin_neon_vld1_x2_v: 6866 case NEON::BI__builtin_neon_vld1q_x2_v: 6867 Int = Intrinsic::aarch64_neon_ld1x2; 6868 break; 6869 case NEON::BI__builtin_neon_vld1_x3_v: 6870 case NEON::BI__builtin_neon_vld1q_x3_v: 6871 Int = Intrinsic::aarch64_neon_ld1x3; 6872 break; 6873 case NEON::BI__builtin_neon_vld1_x4_v: 6874 case NEON::BI__builtin_neon_vld1q_x4_v: 6875 Int = Intrinsic::aarch64_neon_ld1x4; 6876 break; 6877 } 6878 Function *F = CGM.getIntrinsic(Int, Tys); 6879 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN"); 6880 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 6881 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6882 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6883 } 6884 case NEON::BI__builtin_neon_vst1_x2_v: 6885 case NEON::BI__builtin_neon_vst1q_x2_v: 6886 case NEON::BI__builtin_neon_vst1_x3_v: 6887 case NEON::BI__builtin_neon_vst1q_x3_v: 6888 case NEON::BI__builtin_neon_vst1_x4_v: 6889 case NEON::BI__builtin_neon_vst1q_x4_v: { 6890 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType()); 6891 llvm::Type *Tys[2] = { VTy, PTy }; 6892 unsigned Int; 6893 switch (BuiltinID) { 6894 case NEON::BI__builtin_neon_vst1_x2_v: 6895 case NEON::BI__builtin_neon_vst1q_x2_v: 6896 Int = Intrinsic::aarch64_neon_st1x2; 6897 break; 6898 case NEON::BI__builtin_neon_vst1_x3_v: 6899 case NEON::BI__builtin_neon_vst1q_x3_v: 6900 Int = Intrinsic::aarch64_neon_st1x3; 6901 break; 6902 case NEON::BI__builtin_neon_vst1_x4_v: 6903 case NEON::BI__builtin_neon_vst1q_x4_v: 6904 Int = Intrinsic::aarch64_neon_st1x4; 6905 break; 6906 } 6907 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end()); 6908 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, ""); 6909 } 6910 case NEON::BI__builtin_neon_vld1_v: 6911 case NEON::BI__builtin_neon_vld1q_v: { 6912 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); 6913 auto Alignment = CharUnits::fromQuantity( 6914 BuiltinID == NEON::BI__builtin_neon_vld1_v ? 8 : 16); 6915 return Builder.CreateAlignedLoad(VTy, Ops[0], Alignment); 6916 } 6917 case NEON::BI__builtin_neon_vst1_v: 6918 case NEON::BI__builtin_neon_vst1q_v: 6919 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); 6920 Ops[1] = Builder.CreateBitCast(Ops[1], VTy); 6921 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6922 case NEON::BI__builtin_neon_vld1_lane_v: 6923 case NEON::BI__builtin_neon_vld1q_lane_v: { 6924 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6925 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 6926 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6927 auto Alignment = CharUnits::fromQuantity( 6928 BuiltinID == NEON::BI__builtin_neon_vld1_lane_v ? 8 : 16); 6929 Ops[0] = 6930 Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment); 6931 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane"); 6932 } 6933 case NEON::BI__builtin_neon_vld1_dup_v: 6934 case NEON::BI__builtin_neon_vld1q_dup_v: { 6935 Value *V = UndefValue::get(Ty); 6936 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 6937 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6938 auto Alignment = CharUnits::fromQuantity( 6939 BuiltinID == NEON::BI__builtin_neon_vld1_dup_v ? 8 : 16); 6940 Ops[0] = 6941 Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment); 6942 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); 6943 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI); 6944 return EmitNeonSplat(Ops[0], CI); 6945 } 6946 case NEON::BI__builtin_neon_vst1_lane_v: 6947 case NEON::BI__builtin_neon_vst1q_lane_v: 6948 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6949 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); 6950 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 6951 return Builder.CreateDefaultAlignedStore(Ops[1], 6952 Builder.CreateBitCast(Ops[0], Ty)); 6953 case NEON::BI__builtin_neon_vld2_v: 6954 case NEON::BI__builtin_neon_vld2q_v: { 6955 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 6956 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6957 llvm::Type *Tys[2] = { VTy, PTy }; 6958 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys); 6959 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); 6960 Ops[0] = Builder.CreateBitCast(Ops[0], 6961 llvm::PointerType::getUnqual(Ops[1]->getType())); 6962 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6963 } 6964 case NEON::BI__builtin_neon_vld3_v: 6965 case NEON::BI__builtin_neon_vld3q_v: { 6966 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 6967 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6968 llvm::Type *Tys[2] = { VTy, PTy }; 6969 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys); 6970 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); 6971 Ops[0] = Builder.CreateBitCast(Ops[0], 6972 llvm::PointerType::getUnqual(Ops[1]->getType())); 6973 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6974 } 6975 case NEON::BI__builtin_neon_vld4_v: 6976 case NEON::BI__builtin_neon_vld4q_v: { 6977 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 6978 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6979 llvm::Type *Tys[2] = { VTy, PTy }; 6980 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys); 6981 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); 6982 Ops[0] = Builder.CreateBitCast(Ops[0], 6983 llvm::PointerType::getUnqual(Ops[1]->getType())); 6984 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6985 } 6986 case NEON::BI__builtin_neon_vld2_dup_v: 6987 case NEON::BI__builtin_neon_vld2q_dup_v: { 6988 llvm::Type *PTy = 6989 llvm::PointerType::getUnqual(VTy->getElementType()); 6990 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6991 llvm::Type *Tys[2] = { VTy, PTy }; 6992 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys); 6993 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); 6994 Ops[0] = Builder.CreateBitCast(Ops[0], 6995 llvm::PointerType::getUnqual(Ops[1]->getType())); 6996 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6997 } 6998 case NEON::BI__builtin_neon_vld3_dup_v: 6999 case NEON::BI__builtin_neon_vld3q_dup_v: { 7000 llvm::Type *PTy = 7001 llvm::PointerType::getUnqual(VTy->getElementType()); 7002 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 7003 llvm::Type *Tys[2] = { VTy, PTy }; 7004 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys); 7005 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); 7006 Ops[0] = Builder.CreateBitCast(Ops[0], 7007 llvm::PointerType::getUnqual(Ops[1]->getType())); 7008 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 7009 } 7010 case NEON::BI__builtin_neon_vld4_dup_v: 7011 case NEON::BI__builtin_neon_vld4q_dup_v: { 7012 llvm::Type *PTy = 7013 llvm::PointerType::getUnqual(VTy->getElementType()); 7014 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 7015 llvm::Type *Tys[2] = { VTy, PTy }; 7016 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys); 7017 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); 7018 Ops[0] = Builder.CreateBitCast(Ops[0], 7019 llvm::PointerType::getUnqual(Ops[1]->getType())); 7020 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 7021 } 7022 case NEON::BI__builtin_neon_vld2_lane_v: 7023 case NEON::BI__builtin_neon_vld2q_lane_v: { 7024 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 7025 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys); 7026 Ops.push_back(Ops[1]); 7027 Ops.erase(Ops.begin()+1); 7028 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 7029 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 7030 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty); 7031 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane"); 7032 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 7033 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 7034 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 7035 } 7036 case NEON::BI__builtin_neon_vld3_lane_v: 7037 case NEON::BI__builtin_neon_vld3q_lane_v: { 7038 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 7039 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys); 7040 Ops.push_back(Ops[1]); 7041 Ops.erase(Ops.begin()+1); 7042 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 7043 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 7044 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 7045 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty); 7046 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane"); 7047 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 7048 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 7049 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 7050 } 7051 case NEON::BI__builtin_neon_vld4_lane_v: 7052 case NEON::BI__builtin_neon_vld4q_lane_v: { 7053 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 7054 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys); 7055 Ops.push_back(Ops[1]); 7056 Ops.erase(Ops.begin()+1); 7057 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 7058 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 7059 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 7060 Ops[4] = Builder.CreateBitCast(Ops[4], Ty); 7061 Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty); 7062 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane"); 7063 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 7064 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 7065 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 7066 } 7067 case NEON::BI__builtin_neon_vst2_v: 7068 case NEON::BI__builtin_neon_vst2q_v: { 7069 Ops.push_back(Ops[0]); 7070 Ops.erase(Ops.begin()); 7071 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() }; 7072 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys), 7073 Ops, ""); 7074 } 7075 case NEON::BI__builtin_neon_vst2_lane_v: 7076 case NEON::BI__builtin_neon_vst2q_lane_v: { 7077 Ops.push_back(Ops[0]); 7078 Ops.erase(Ops.begin()); 7079 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty); 7080 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() }; 7081 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys), 7082 Ops, ""); 7083 } 7084 case NEON::BI__builtin_neon_vst3_v: 7085 case NEON::BI__builtin_neon_vst3q_v: { 7086 Ops.push_back(Ops[0]); 7087 Ops.erase(Ops.begin()); 7088 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() }; 7089 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys), 7090 Ops, ""); 7091 } 7092 case NEON::BI__builtin_neon_vst3_lane_v: 7093 case NEON::BI__builtin_neon_vst3q_lane_v: { 7094 Ops.push_back(Ops[0]); 7095 Ops.erase(Ops.begin()); 7096 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty); 7097 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() }; 7098 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys), 7099 Ops, ""); 7100 } 7101 case NEON::BI__builtin_neon_vst4_v: 7102 case NEON::BI__builtin_neon_vst4q_v: { 7103 Ops.push_back(Ops[0]); 7104 Ops.erase(Ops.begin()); 7105 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() }; 7106 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys), 7107 Ops, ""); 7108 } 7109 case NEON::BI__builtin_neon_vst4_lane_v: 7110 case NEON::BI__builtin_neon_vst4q_lane_v: { 7111 Ops.push_back(Ops[0]); 7112 Ops.erase(Ops.begin()); 7113 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty); 7114 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() }; 7115 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys), 7116 Ops, ""); 7117 } 7118 case NEON::BI__builtin_neon_vtrn_v: 7119 case NEON::BI__builtin_neon_vtrnq_v: { 7120 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 7121 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 7122 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 7123 Value *SV = nullptr; 7124 7125 for (unsigned vi = 0; vi != 2; ++vi) { 7126 SmallVector<uint32_t, 16> Indices; 7127 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 7128 Indices.push_back(i+vi); 7129 Indices.push_back(i+e+vi); 7130 } 7131 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 7132 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn"); 7133 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 7134 } 7135 return SV; 7136 } 7137 case NEON::BI__builtin_neon_vuzp_v: 7138 case NEON::BI__builtin_neon_vuzpq_v: { 7139 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 7140 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 7141 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 7142 Value *SV = nullptr; 7143 7144 for (unsigned vi = 0; vi != 2; ++vi) { 7145 SmallVector<uint32_t, 16> Indices; 7146 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 7147 Indices.push_back(2*i+vi); 7148 7149 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 7150 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp"); 7151 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 7152 } 7153 return SV; 7154 } 7155 case NEON::BI__builtin_neon_vzip_v: 7156 case NEON::BI__builtin_neon_vzipq_v: { 7157 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 7158 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 7159 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 7160 Value *SV = nullptr; 7161 7162 for (unsigned vi = 0; vi != 2; ++vi) { 7163 SmallVector<uint32_t, 16> Indices; 7164 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 7165 Indices.push_back((i + vi*e) >> 1); 7166 Indices.push_back(((i + vi*e) >> 1)+e); 7167 } 7168 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 7169 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip"); 7170 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 7171 } 7172 return SV; 7173 } 7174 case NEON::BI__builtin_neon_vqtbl1q_v: { 7175 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty), 7176 Ops, "vtbl1"); 7177 } 7178 case NEON::BI__builtin_neon_vqtbl2q_v: { 7179 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty), 7180 Ops, "vtbl2"); 7181 } 7182 case NEON::BI__builtin_neon_vqtbl3q_v: { 7183 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty), 7184 Ops, "vtbl3"); 7185 } 7186 case NEON::BI__builtin_neon_vqtbl4q_v: { 7187 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty), 7188 Ops, "vtbl4"); 7189 } 7190 case NEON::BI__builtin_neon_vqtbx1q_v: { 7191 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty), 7192 Ops, "vtbx1"); 7193 } 7194 case NEON::BI__builtin_neon_vqtbx2q_v: { 7195 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty), 7196 Ops, "vtbx2"); 7197 } 7198 case NEON::BI__builtin_neon_vqtbx3q_v: { 7199 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty), 7200 Ops, "vtbx3"); 7201 } 7202 case NEON::BI__builtin_neon_vqtbx4q_v: { 7203 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty), 7204 Ops, "vtbx4"); 7205 } 7206 case NEON::BI__builtin_neon_vsqadd_v: 7207 case NEON::BI__builtin_neon_vsqaddq_v: { 7208 Int = Intrinsic::aarch64_neon_usqadd; 7209 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd"); 7210 } 7211 case NEON::BI__builtin_neon_vuqadd_v: 7212 case NEON::BI__builtin_neon_vuqaddq_v: { 7213 Int = Intrinsic::aarch64_neon_suqadd; 7214 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd"); 7215 } 7216 } 7217 } 7218 7219 llvm::Value *CodeGenFunction:: 7220 BuildVector(ArrayRef<llvm::Value*> Ops) { 7221 assert((Ops.size() & (Ops.size() - 1)) == 0 && 7222 "Not a power-of-two sized vector!"); 7223 bool AllConstants = true; 7224 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i) 7225 AllConstants &= isa<Constant>(Ops[i]); 7226 7227 // If this is a constant vector, create a ConstantVector. 7228 if (AllConstants) { 7229 SmallVector<llvm::Constant*, 16> CstOps; 7230 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 7231 CstOps.push_back(cast<Constant>(Ops[i])); 7232 return llvm::ConstantVector::get(CstOps); 7233 } 7234 7235 // Otherwise, insertelement the values to build the vector. 7236 Value *Result = 7237 llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size())); 7238 7239 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 7240 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i)); 7241 7242 return Result; 7243 } 7244 7245 // Convert the mask from an integer type to a vector of i1. 7246 static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask, 7247 unsigned NumElts) { 7248 7249 llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(), 7250 cast<IntegerType>(Mask->getType())->getBitWidth()); 7251 Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy); 7252 7253 // If we have less than 8 elements, then the starting mask was an i8 and 7254 // we need to extract down to the right number of elements. 7255 if (NumElts < 8) { 7256 uint32_t Indices[4]; 7257 for (unsigned i = 0; i != NumElts; ++i) 7258 Indices[i] = i; 7259 MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec, 7260 makeArrayRef(Indices, NumElts), 7261 "extract"); 7262 } 7263 return MaskVec; 7264 } 7265 7266 static Value *EmitX86MaskedStore(CodeGenFunction &CGF, 7267 SmallVectorImpl<Value *> &Ops, 7268 unsigned Align) { 7269 // Cast the pointer to right type. 7270 Ops[0] = CGF.Builder.CreateBitCast(Ops[0], 7271 llvm::PointerType::getUnqual(Ops[1]->getType())); 7272 7273 // If the mask is all ones just emit a regular store. 7274 if (const auto *C = dyn_cast<Constant>(Ops[2])) 7275 if (C->isAllOnesValue()) 7276 return CGF.Builder.CreateAlignedStore(Ops[1], Ops[0], Align); 7277 7278 Value *MaskVec = getMaskVecValue(CGF, Ops[2], 7279 Ops[1]->getType()->getVectorNumElements()); 7280 7281 return CGF.Builder.CreateMaskedStore(Ops[1], Ops[0], Align, MaskVec); 7282 } 7283 7284 static Value *EmitX86MaskedLoad(CodeGenFunction &CGF, 7285 SmallVectorImpl<Value *> &Ops, unsigned Align) { 7286 // Cast the pointer to right type. 7287 Ops[0] = CGF.Builder.CreateBitCast(Ops[0], 7288 llvm::PointerType::getUnqual(Ops[1]->getType())); 7289 7290 // If the mask is all ones just emit a regular store. 7291 if (const auto *C = dyn_cast<Constant>(Ops[2])) 7292 if (C->isAllOnesValue()) 7293 return CGF.Builder.CreateAlignedLoad(Ops[0], Align); 7294 7295 Value *MaskVec = getMaskVecValue(CGF, Ops[2], 7296 Ops[1]->getType()->getVectorNumElements()); 7297 7298 return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]); 7299 } 7300 7301 static Value *EmitX86SubVectorBroadcast(CodeGenFunction &CGF, 7302 SmallVectorImpl<Value *> &Ops, 7303 llvm::Type *DstTy, 7304 unsigned SrcSizeInBits, 7305 unsigned Align) { 7306 // Load the subvector. 7307 Ops[0] = CGF.Builder.CreateAlignedLoad(Ops[0], Align); 7308 7309 // Create broadcast mask. 7310 unsigned NumDstElts = DstTy->getVectorNumElements(); 7311 unsigned NumSrcElts = SrcSizeInBits / DstTy->getScalarSizeInBits(); 7312 7313 SmallVector<uint32_t, 8> Mask; 7314 for (unsigned i = 0; i != NumDstElts; i += NumSrcElts) 7315 for (unsigned j = 0; j != NumSrcElts; ++j) 7316 Mask.push_back(j); 7317 7318 return CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], Mask, "subvecbcst"); 7319 } 7320 7321 static Value *EmitX86Select(CodeGenFunction &CGF, 7322 Value *Mask, Value *Op0, Value *Op1) { 7323 7324 // If the mask is all ones just return first argument. 7325 if (const auto *C = dyn_cast<Constant>(Mask)) 7326 if (C->isAllOnesValue()) 7327 return Op0; 7328 7329 Mask = getMaskVecValue(CGF, Mask, Op0->getType()->getVectorNumElements()); 7330 7331 return CGF.Builder.CreateSelect(Mask, Op0, Op1); 7332 } 7333 7334 static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, 7335 bool Signed, SmallVectorImpl<Value *> &Ops) { 7336 unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); 7337 Value *Cmp; 7338 7339 if (CC == 3) { 7340 Cmp = Constant::getNullValue( 7341 llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts)); 7342 } else if (CC == 7) { 7343 Cmp = Constant::getAllOnesValue( 7344 llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts)); 7345 } else { 7346 ICmpInst::Predicate Pred; 7347 switch (CC) { 7348 default: llvm_unreachable("Unknown condition code"); 7349 case 0: Pred = ICmpInst::ICMP_EQ; break; 7350 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break; 7351 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break; 7352 case 4: Pred = ICmpInst::ICMP_NE; break; 7353 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break; 7354 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break; 7355 } 7356 Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]); 7357 } 7358 7359 const auto *C = dyn_cast<Constant>(Ops.back()); 7360 if (!C || !C->isAllOnesValue()) 7361 Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops.back(), NumElts)); 7362 7363 if (NumElts < 8) { 7364 uint32_t Indices[8]; 7365 for (unsigned i = 0; i != NumElts; ++i) 7366 Indices[i] = i; 7367 for (unsigned i = NumElts; i != 8; ++i) 7368 Indices[i] = i % NumElts + NumElts; 7369 Cmp = CGF.Builder.CreateShuffleVector( 7370 Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices); 7371 } 7372 return CGF.Builder.CreateBitCast(Cmp, 7373 IntegerType::get(CGF.getLLVMContext(), 7374 std::max(NumElts, 8U))); 7375 } 7376 7377 static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred, 7378 ArrayRef<Value *> Ops) { 7379 Value *Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]); 7380 Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Ops[1]); 7381 7382 if (Ops.size() == 2) 7383 return Res; 7384 7385 assert(Ops.size() == 4); 7386 return EmitX86Select(CGF, Ops[3], Res, Ops[2]); 7387 } 7388 7389 static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, 7390 llvm::Type *DstTy) { 7391 unsigned NumberOfElements = DstTy->getVectorNumElements(); 7392 Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements); 7393 return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2"); 7394 } 7395 7396 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, 7397 const CallExpr *E) { 7398 if (BuiltinID == X86::BI__builtin_ms_va_start || 7399 BuiltinID == X86::BI__builtin_ms_va_end) 7400 return EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(), 7401 BuiltinID == X86::BI__builtin_ms_va_start); 7402 if (BuiltinID == X86::BI__builtin_ms_va_copy) { 7403 // Lower this manually. We can't reliably determine whether or not any 7404 // given va_copy() is for a Win64 va_list from the calling convention 7405 // alone, because it's legal to do this from a System V ABI function. 7406 // With opaque pointer types, we won't have enough information in LLVM 7407 // IR to determine this from the argument types, either. Best to do it 7408 // now, while we have enough information. 7409 Address DestAddr = EmitMSVAListRef(E->getArg(0)); 7410 Address SrcAddr = EmitMSVAListRef(E->getArg(1)); 7411 7412 llvm::Type *BPP = Int8PtrPtrTy; 7413 7414 DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"), 7415 DestAddr.getAlignment()); 7416 SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"), 7417 SrcAddr.getAlignment()); 7418 7419 Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val"); 7420 return Builder.CreateStore(ArgPtr, DestAddr); 7421 } 7422 7423 SmallVector<Value*, 4> Ops; 7424 7425 // Find out if any arguments are required to be integer constant expressions. 7426 unsigned ICEArguments = 0; 7427 ASTContext::GetBuiltinTypeError Error; 7428 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 7429 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 7430 7431 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { 7432 // If this is a normal argument, just emit it as a scalar. 7433 if ((ICEArguments & (1 << i)) == 0) { 7434 Ops.push_back(EmitScalarExpr(E->getArg(i))); 7435 continue; 7436 } 7437 7438 // If this is required to be a constant, constant fold it so that we know 7439 // that the generated intrinsic gets a ConstantInt. 7440 llvm::APSInt Result; 7441 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); 7442 assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst; 7443 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); 7444 } 7445 7446 // These exist so that the builtin that takes an immediate can be bounds 7447 // checked by clang to avoid passing bad immediates to the backend. Since 7448 // AVX has a larger immediate than SSE we would need separate builtins to 7449 // do the different bounds checking. Rather than create a clang specific 7450 // SSE only builtin, this implements eight separate builtins to match gcc 7451 // implementation. 7452 auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) { 7453 Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm)); 7454 llvm::Function *F = CGM.getIntrinsic(ID); 7455 return Builder.CreateCall(F, Ops); 7456 }; 7457 7458 // For the vector forms of FP comparisons, translate the builtins directly to 7459 // IR. 7460 // TODO: The builtins could be removed if the SSE header files used vector 7461 // extension comparisons directly (vector ordered/unordered may need 7462 // additional support via __builtin_isnan()). 7463 auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred) { 7464 Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]); 7465 llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType()); 7466 llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy); 7467 Value *Sext = Builder.CreateSExt(Cmp, IntVecTy); 7468 return Builder.CreateBitCast(Sext, FPVecTy); 7469 }; 7470 7471 switch (BuiltinID) { 7472 default: return nullptr; 7473 case X86::BI__builtin_cpu_supports: { 7474 const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts(); 7475 StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString(); 7476 7477 // TODO: When/if this becomes more than x86 specific then use a TargetInfo 7478 // based mapping. 7479 // Processor features and mapping to processor feature value. 7480 enum X86Features { 7481 CMOV = 0, 7482 MMX, 7483 POPCNT, 7484 SSE, 7485 SSE2, 7486 SSE3, 7487 SSSE3, 7488 SSE4_1, 7489 SSE4_2, 7490 AVX, 7491 AVX2, 7492 SSE4_A, 7493 FMA4, 7494 XOP, 7495 FMA, 7496 AVX512F, 7497 BMI, 7498 BMI2, 7499 AES, 7500 PCLMUL, 7501 AVX512VL, 7502 AVX512BW, 7503 AVX512DQ, 7504 AVX512CD, 7505 AVX512ER, 7506 AVX512PF, 7507 AVX512VBMI, 7508 AVX512IFMA, 7509 AVX512VPOPCNTDQ, 7510 MAX 7511 }; 7512 7513 X86Features Feature = 7514 StringSwitch<X86Features>(FeatureStr) 7515 .Case("cmov", X86Features::CMOV) 7516 .Case("mmx", X86Features::MMX) 7517 .Case("popcnt", X86Features::POPCNT) 7518 .Case("sse", X86Features::SSE) 7519 .Case("sse2", X86Features::SSE2) 7520 .Case("sse3", X86Features::SSE3) 7521 .Case("ssse3", X86Features::SSSE3) 7522 .Case("sse4.1", X86Features::SSE4_1) 7523 .Case("sse4.2", X86Features::SSE4_2) 7524 .Case("avx", X86Features::AVX) 7525 .Case("avx2", X86Features::AVX2) 7526 .Case("sse4a", X86Features::SSE4_A) 7527 .Case("fma4", X86Features::FMA4) 7528 .Case("xop", X86Features::XOP) 7529 .Case("fma", X86Features::FMA) 7530 .Case("avx512f", X86Features::AVX512F) 7531 .Case("bmi", X86Features::BMI) 7532 .Case("bmi2", X86Features::BMI2) 7533 .Case("aes", X86Features::AES) 7534 .Case("pclmul", X86Features::PCLMUL) 7535 .Case("avx512vl", X86Features::AVX512VL) 7536 .Case("avx512bw", X86Features::AVX512BW) 7537 .Case("avx512dq", X86Features::AVX512DQ) 7538 .Case("avx512cd", X86Features::AVX512CD) 7539 .Case("avx512er", X86Features::AVX512ER) 7540 .Case("avx512pf", X86Features::AVX512PF) 7541 .Case("avx512vbmi", X86Features::AVX512VBMI) 7542 .Case("avx512ifma", X86Features::AVX512IFMA) 7543 .Case("avx512vpopcntdq", X86Features::AVX512VPOPCNTDQ) 7544 .Default(X86Features::MAX); 7545 assert(Feature != X86Features::MAX && "Invalid feature!"); 7546 7547 // Matching the struct layout from the compiler-rt/libgcc structure that is 7548 // filled in: 7549 // unsigned int __cpu_vendor; 7550 // unsigned int __cpu_type; 7551 // unsigned int __cpu_subtype; 7552 // unsigned int __cpu_features[1]; 7553 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, 7554 llvm::ArrayType::get(Int32Ty, 1)); 7555 7556 // Grab the global __cpu_model. 7557 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model"); 7558 7559 // Grab the first (0th) element from the field __cpu_features off of the 7560 // global in the struct STy. 7561 Value *Idxs[] = { 7562 ConstantInt::get(Int32Ty, 0), 7563 ConstantInt::get(Int32Ty, 3), 7564 ConstantInt::get(Int32Ty, 0) 7565 }; 7566 Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs); 7567 Value *Features = Builder.CreateAlignedLoad(CpuFeatures, 7568 CharUnits::fromQuantity(4)); 7569 7570 // Check the value of the bit corresponding to the feature requested. 7571 Value *Bitset = Builder.CreateAnd( 7572 Features, llvm::ConstantInt::get(Int32Ty, 1ULL << Feature)); 7573 return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0)); 7574 } 7575 case X86::BI_mm_prefetch: { 7576 Value *Address = Ops[0]; 7577 Value *RW = ConstantInt::get(Int32Ty, 0); 7578 Value *Locality = Ops[1]; 7579 Value *Data = ConstantInt::get(Int32Ty, 1); 7580 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 7581 return Builder.CreateCall(F, {Address, RW, Locality, Data}); 7582 } 7583 case X86::BI_mm_clflush: { 7584 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush), 7585 Ops[0]); 7586 } 7587 case X86::BI_mm_lfence: { 7588 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence)); 7589 } 7590 case X86::BI_mm_mfence: { 7591 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence)); 7592 } 7593 case X86::BI_mm_sfence: { 7594 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence)); 7595 } 7596 case X86::BI_mm_pause: { 7597 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause)); 7598 } 7599 case X86::BI__rdtsc: { 7600 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc)); 7601 } 7602 case X86::BI__builtin_ia32_undef128: 7603 case X86::BI__builtin_ia32_undef256: 7604 case X86::BI__builtin_ia32_undef512: 7605 // The x86 definition of "undef" is not the same as the LLVM definition 7606 // (PR32176). We leave optimizing away an unnecessary zero constant to the 7607 // IR optimizer and backend. 7608 // TODO: If we had a "freeze" IR instruction to generate a fixed undef 7609 // value, we should use that here instead of a zero. 7610 return llvm::Constant::getNullValue(ConvertType(E->getType())); 7611 case X86::BI__builtin_ia32_vec_init_v8qi: 7612 case X86::BI__builtin_ia32_vec_init_v4hi: 7613 case X86::BI__builtin_ia32_vec_init_v2si: 7614 return Builder.CreateBitCast(BuildVector(Ops), 7615 llvm::Type::getX86_MMXTy(getLLVMContext())); 7616 case X86::BI__builtin_ia32_vec_ext_v2si: 7617 return Builder.CreateExtractElement(Ops[0], 7618 llvm::ConstantInt::get(Ops[1]->getType(), 0)); 7619 case X86::BI_mm_setcsr: 7620 case X86::BI__builtin_ia32_ldmxcsr: { 7621 Address Tmp = CreateMemTemp(E->getArg(0)->getType()); 7622 Builder.CreateStore(Ops[0], Tmp); 7623 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr), 7624 Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy)); 7625 } 7626 case X86::BI_mm_getcsr: 7627 case X86::BI__builtin_ia32_stmxcsr: { 7628 Address Tmp = CreateMemTemp(E->getType()); 7629 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr), 7630 Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy)); 7631 return Builder.CreateLoad(Tmp, "stmxcsr"); 7632 } 7633 case X86::BI__builtin_ia32_xsave: 7634 case X86::BI__builtin_ia32_xsave64: 7635 case X86::BI__builtin_ia32_xrstor: 7636 case X86::BI__builtin_ia32_xrstor64: 7637 case X86::BI__builtin_ia32_xsaveopt: 7638 case X86::BI__builtin_ia32_xsaveopt64: 7639 case X86::BI__builtin_ia32_xrstors: 7640 case X86::BI__builtin_ia32_xrstors64: 7641 case X86::BI__builtin_ia32_xsavec: 7642 case X86::BI__builtin_ia32_xsavec64: 7643 case X86::BI__builtin_ia32_xsaves: 7644 case X86::BI__builtin_ia32_xsaves64: { 7645 Intrinsic::ID ID; 7646 #define INTRINSIC_X86_XSAVE_ID(NAME) \ 7647 case X86::BI__builtin_ia32_##NAME: \ 7648 ID = Intrinsic::x86_##NAME; \ 7649 break 7650 switch (BuiltinID) { 7651 default: llvm_unreachable("Unsupported intrinsic!"); 7652 INTRINSIC_X86_XSAVE_ID(xsave); 7653 INTRINSIC_X86_XSAVE_ID(xsave64); 7654 INTRINSIC_X86_XSAVE_ID(xrstor); 7655 INTRINSIC_X86_XSAVE_ID(xrstor64); 7656 INTRINSIC_X86_XSAVE_ID(xsaveopt); 7657 INTRINSIC_X86_XSAVE_ID(xsaveopt64); 7658 INTRINSIC_X86_XSAVE_ID(xrstors); 7659 INTRINSIC_X86_XSAVE_ID(xrstors64); 7660 INTRINSIC_X86_XSAVE_ID(xsavec); 7661 INTRINSIC_X86_XSAVE_ID(xsavec64); 7662 INTRINSIC_X86_XSAVE_ID(xsaves); 7663 INTRINSIC_X86_XSAVE_ID(xsaves64); 7664 } 7665 #undef INTRINSIC_X86_XSAVE_ID 7666 Value *Mhi = Builder.CreateTrunc( 7667 Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty); 7668 Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty); 7669 Ops[1] = Mhi; 7670 Ops.push_back(Mlo); 7671 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); 7672 } 7673 case X86::BI__builtin_ia32_storedqudi128_mask: 7674 case X86::BI__builtin_ia32_storedqusi128_mask: 7675 case X86::BI__builtin_ia32_storedquhi128_mask: 7676 case X86::BI__builtin_ia32_storedquqi128_mask: 7677 case X86::BI__builtin_ia32_storeupd128_mask: 7678 case X86::BI__builtin_ia32_storeups128_mask: 7679 case X86::BI__builtin_ia32_storedqudi256_mask: 7680 case X86::BI__builtin_ia32_storedqusi256_mask: 7681 case X86::BI__builtin_ia32_storedquhi256_mask: 7682 case X86::BI__builtin_ia32_storedquqi256_mask: 7683 case X86::BI__builtin_ia32_storeupd256_mask: 7684 case X86::BI__builtin_ia32_storeups256_mask: 7685 case X86::BI__builtin_ia32_storedqudi512_mask: 7686 case X86::BI__builtin_ia32_storedqusi512_mask: 7687 case X86::BI__builtin_ia32_storedquhi512_mask: 7688 case X86::BI__builtin_ia32_storedquqi512_mask: 7689 case X86::BI__builtin_ia32_storeupd512_mask: 7690 case X86::BI__builtin_ia32_storeups512_mask: 7691 return EmitX86MaskedStore(*this, Ops, 1); 7692 7693 case X86::BI__builtin_ia32_storess128_mask: 7694 case X86::BI__builtin_ia32_storesd128_mask: { 7695 return EmitX86MaskedStore(*this, Ops, 16); 7696 } 7697 case X86::BI__builtin_ia32_vpopcntd_512: 7698 case X86::BI__builtin_ia32_vpopcntq_512: { 7699 llvm::Type *ResultType = ConvertType(E->getType()); 7700 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); 7701 return Builder.CreateCall(F, Ops); 7702 } 7703 case X86::BI__builtin_ia32_cvtmask2b128: 7704 case X86::BI__builtin_ia32_cvtmask2b256: 7705 case X86::BI__builtin_ia32_cvtmask2b512: 7706 case X86::BI__builtin_ia32_cvtmask2w128: 7707 case X86::BI__builtin_ia32_cvtmask2w256: 7708 case X86::BI__builtin_ia32_cvtmask2w512: 7709 case X86::BI__builtin_ia32_cvtmask2d128: 7710 case X86::BI__builtin_ia32_cvtmask2d256: 7711 case X86::BI__builtin_ia32_cvtmask2d512: 7712 case X86::BI__builtin_ia32_cvtmask2q128: 7713 case X86::BI__builtin_ia32_cvtmask2q256: 7714 case X86::BI__builtin_ia32_cvtmask2q512: 7715 return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType())); 7716 7717 case X86::BI__builtin_ia32_movdqa32store128_mask: 7718 case X86::BI__builtin_ia32_movdqa64store128_mask: 7719 case X86::BI__builtin_ia32_storeaps128_mask: 7720 case X86::BI__builtin_ia32_storeapd128_mask: 7721 case X86::BI__builtin_ia32_movdqa32store256_mask: 7722 case X86::BI__builtin_ia32_movdqa64store256_mask: 7723 case X86::BI__builtin_ia32_storeaps256_mask: 7724 case X86::BI__builtin_ia32_storeapd256_mask: 7725 case X86::BI__builtin_ia32_movdqa32store512_mask: 7726 case X86::BI__builtin_ia32_movdqa64store512_mask: 7727 case X86::BI__builtin_ia32_storeaps512_mask: 7728 case X86::BI__builtin_ia32_storeapd512_mask: { 7729 unsigned Align = 7730 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity(); 7731 return EmitX86MaskedStore(*this, Ops, Align); 7732 } 7733 case X86::BI__builtin_ia32_loadups128_mask: 7734 case X86::BI__builtin_ia32_loadups256_mask: 7735 case X86::BI__builtin_ia32_loadups512_mask: 7736 case X86::BI__builtin_ia32_loadupd128_mask: 7737 case X86::BI__builtin_ia32_loadupd256_mask: 7738 case X86::BI__builtin_ia32_loadupd512_mask: 7739 case X86::BI__builtin_ia32_loaddquqi128_mask: 7740 case X86::BI__builtin_ia32_loaddquqi256_mask: 7741 case X86::BI__builtin_ia32_loaddquqi512_mask: 7742 case X86::BI__builtin_ia32_loaddquhi128_mask: 7743 case X86::BI__builtin_ia32_loaddquhi256_mask: 7744 case X86::BI__builtin_ia32_loaddquhi512_mask: 7745 case X86::BI__builtin_ia32_loaddqusi128_mask: 7746 case X86::BI__builtin_ia32_loaddqusi256_mask: 7747 case X86::BI__builtin_ia32_loaddqusi512_mask: 7748 case X86::BI__builtin_ia32_loaddqudi128_mask: 7749 case X86::BI__builtin_ia32_loaddqudi256_mask: 7750 case X86::BI__builtin_ia32_loaddqudi512_mask: 7751 return EmitX86MaskedLoad(*this, Ops, 1); 7752 7753 case X86::BI__builtin_ia32_loadss128_mask: 7754 case X86::BI__builtin_ia32_loadsd128_mask: 7755 return EmitX86MaskedLoad(*this, Ops, 16); 7756 7757 case X86::BI__builtin_ia32_loadaps128_mask: 7758 case X86::BI__builtin_ia32_loadaps256_mask: 7759 case X86::BI__builtin_ia32_loadaps512_mask: 7760 case X86::BI__builtin_ia32_loadapd128_mask: 7761 case X86::BI__builtin_ia32_loadapd256_mask: 7762 case X86::BI__builtin_ia32_loadapd512_mask: 7763 case X86::BI__builtin_ia32_movdqa32load128_mask: 7764 case X86::BI__builtin_ia32_movdqa32load256_mask: 7765 case X86::BI__builtin_ia32_movdqa32load512_mask: 7766 case X86::BI__builtin_ia32_movdqa64load128_mask: 7767 case X86::BI__builtin_ia32_movdqa64load256_mask: 7768 case X86::BI__builtin_ia32_movdqa64load512_mask: { 7769 unsigned Align = 7770 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity(); 7771 return EmitX86MaskedLoad(*this, Ops, Align); 7772 } 7773 7774 case X86::BI__builtin_ia32_vbroadcastf128_pd256: 7775 case X86::BI__builtin_ia32_vbroadcastf128_ps256: { 7776 llvm::Type *DstTy = ConvertType(E->getType()); 7777 return EmitX86SubVectorBroadcast(*this, Ops, DstTy, 128, 1); 7778 } 7779 7780 case X86::BI__builtin_ia32_storehps: 7781 case X86::BI__builtin_ia32_storelps: { 7782 llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty); 7783 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2); 7784 7785 // cast val v2i64 7786 Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast"); 7787 7788 // extract (0, 1) 7789 unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1; 7790 llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index); 7791 Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract"); 7792 7793 // cast pointer to i64 & store 7794 Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy); 7795 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 7796 } 7797 case X86::BI__builtin_ia32_palignr128: 7798 case X86::BI__builtin_ia32_palignr256: 7799 case X86::BI__builtin_ia32_palignr512_mask: { 7800 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 7801 7802 unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); 7803 assert(NumElts % 16 == 0); 7804 7805 // If palignr is shifting the pair of vectors more than the size of two 7806 // lanes, emit zero. 7807 if (ShiftVal >= 32) 7808 return llvm::Constant::getNullValue(ConvertType(E->getType())); 7809 7810 // If palignr is shifting the pair of input vectors more than one lane, 7811 // but less than two lanes, convert to shifting in zeroes. 7812 if (ShiftVal > 16) { 7813 ShiftVal -= 16; 7814 Ops[1] = Ops[0]; 7815 Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType()); 7816 } 7817 7818 uint32_t Indices[64]; 7819 // 256-bit palignr operates on 128-bit lanes so we need to handle that 7820 for (unsigned l = 0; l != NumElts; l += 16) { 7821 for (unsigned i = 0; i != 16; ++i) { 7822 unsigned Idx = ShiftVal + i; 7823 if (Idx >= 16) 7824 Idx += NumElts - 16; // End of lane, switch operand. 7825 Indices[l + i] = Idx + l; 7826 } 7827 } 7828 7829 Value *Align = Builder.CreateShuffleVector(Ops[1], Ops[0], 7830 makeArrayRef(Indices, NumElts), 7831 "palignr"); 7832 7833 // If this isn't a masked builtin, just return the align operation. 7834 if (Ops.size() == 3) 7835 return Align; 7836 7837 return EmitX86Select(*this, Ops[4], Align, Ops[3]); 7838 } 7839 7840 case X86::BI__builtin_ia32_movnti: 7841 case X86::BI__builtin_ia32_movnti64: 7842 case X86::BI__builtin_ia32_movntsd: 7843 case X86::BI__builtin_ia32_movntss: { 7844 llvm::MDNode *Node = llvm::MDNode::get( 7845 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1))); 7846 7847 Value *Ptr = Ops[0]; 7848 Value *Src = Ops[1]; 7849 7850 // Extract the 0'th element of the source vector. 7851 if (BuiltinID == X86::BI__builtin_ia32_movntsd || 7852 BuiltinID == X86::BI__builtin_ia32_movntss) 7853 Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract"); 7854 7855 // Convert the type of the pointer to a pointer to the stored type. 7856 Value *BC = Builder.CreateBitCast( 7857 Ptr, llvm::PointerType::getUnqual(Src->getType()), "cast"); 7858 7859 // Unaligned nontemporal store of the scalar value. 7860 StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, BC); 7861 SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); 7862 SI->setAlignment(1); 7863 return SI; 7864 } 7865 7866 case X86::BI__builtin_ia32_selectb_128: 7867 case X86::BI__builtin_ia32_selectb_256: 7868 case X86::BI__builtin_ia32_selectb_512: 7869 case X86::BI__builtin_ia32_selectw_128: 7870 case X86::BI__builtin_ia32_selectw_256: 7871 case X86::BI__builtin_ia32_selectw_512: 7872 case X86::BI__builtin_ia32_selectd_128: 7873 case X86::BI__builtin_ia32_selectd_256: 7874 case X86::BI__builtin_ia32_selectd_512: 7875 case X86::BI__builtin_ia32_selectq_128: 7876 case X86::BI__builtin_ia32_selectq_256: 7877 case X86::BI__builtin_ia32_selectq_512: 7878 case X86::BI__builtin_ia32_selectps_128: 7879 case X86::BI__builtin_ia32_selectps_256: 7880 case X86::BI__builtin_ia32_selectps_512: 7881 case X86::BI__builtin_ia32_selectpd_128: 7882 case X86::BI__builtin_ia32_selectpd_256: 7883 case X86::BI__builtin_ia32_selectpd_512: 7884 return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]); 7885 case X86::BI__builtin_ia32_pcmpeqb128_mask: 7886 case X86::BI__builtin_ia32_pcmpeqb256_mask: 7887 case X86::BI__builtin_ia32_pcmpeqb512_mask: 7888 case X86::BI__builtin_ia32_pcmpeqw128_mask: 7889 case X86::BI__builtin_ia32_pcmpeqw256_mask: 7890 case X86::BI__builtin_ia32_pcmpeqw512_mask: 7891 case X86::BI__builtin_ia32_pcmpeqd128_mask: 7892 case X86::BI__builtin_ia32_pcmpeqd256_mask: 7893 case X86::BI__builtin_ia32_pcmpeqd512_mask: 7894 case X86::BI__builtin_ia32_pcmpeqq128_mask: 7895 case X86::BI__builtin_ia32_pcmpeqq256_mask: 7896 case X86::BI__builtin_ia32_pcmpeqq512_mask: 7897 return EmitX86MaskedCompare(*this, 0, false, Ops); 7898 case X86::BI__builtin_ia32_pcmpgtb128_mask: 7899 case X86::BI__builtin_ia32_pcmpgtb256_mask: 7900 case X86::BI__builtin_ia32_pcmpgtb512_mask: 7901 case X86::BI__builtin_ia32_pcmpgtw128_mask: 7902 case X86::BI__builtin_ia32_pcmpgtw256_mask: 7903 case X86::BI__builtin_ia32_pcmpgtw512_mask: 7904 case X86::BI__builtin_ia32_pcmpgtd128_mask: 7905 case X86::BI__builtin_ia32_pcmpgtd256_mask: 7906 case X86::BI__builtin_ia32_pcmpgtd512_mask: 7907 case X86::BI__builtin_ia32_pcmpgtq128_mask: 7908 case X86::BI__builtin_ia32_pcmpgtq256_mask: 7909 case X86::BI__builtin_ia32_pcmpgtq512_mask: 7910 return EmitX86MaskedCompare(*this, 6, true, Ops); 7911 case X86::BI__builtin_ia32_cmpb128_mask: 7912 case X86::BI__builtin_ia32_cmpb256_mask: 7913 case X86::BI__builtin_ia32_cmpb512_mask: 7914 case X86::BI__builtin_ia32_cmpw128_mask: 7915 case X86::BI__builtin_ia32_cmpw256_mask: 7916 case X86::BI__builtin_ia32_cmpw512_mask: 7917 case X86::BI__builtin_ia32_cmpd128_mask: 7918 case X86::BI__builtin_ia32_cmpd256_mask: 7919 case X86::BI__builtin_ia32_cmpd512_mask: 7920 case X86::BI__builtin_ia32_cmpq128_mask: 7921 case X86::BI__builtin_ia32_cmpq256_mask: 7922 case X86::BI__builtin_ia32_cmpq512_mask: { 7923 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7; 7924 return EmitX86MaskedCompare(*this, CC, true, Ops); 7925 } 7926 case X86::BI__builtin_ia32_ucmpb128_mask: 7927 case X86::BI__builtin_ia32_ucmpb256_mask: 7928 case X86::BI__builtin_ia32_ucmpb512_mask: 7929 case X86::BI__builtin_ia32_ucmpw128_mask: 7930 case X86::BI__builtin_ia32_ucmpw256_mask: 7931 case X86::BI__builtin_ia32_ucmpw512_mask: 7932 case X86::BI__builtin_ia32_ucmpd128_mask: 7933 case X86::BI__builtin_ia32_ucmpd256_mask: 7934 case X86::BI__builtin_ia32_ucmpd512_mask: 7935 case X86::BI__builtin_ia32_ucmpq128_mask: 7936 case X86::BI__builtin_ia32_ucmpq256_mask: 7937 case X86::BI__builtin_ia32_ucmpq512_mask: { 7938 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7; 7939 return EmitX86MaskedCompare(*this, CC, false, Ops); 7940 } 7941 7942 case X86::BI__builtin_ia32_vplzcntd_128_mask: 7943 case X86::BI__builtin_ia32_vplzcntd_256_mask: 7944 case X86::BI__builtin_ia32_vplzcntd_512_mask: 7945 case X86::BI__builtin_ia32_vplzcntq_128_mask: 7946 case X86::BI__builtin_ia32_vplzcntq_256_mask: 7947 case X86::BI__builtin_ia32_vplzcntq_512_mask: { 7948 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType()); 7949 return EmitX86Select(*this, Ops[2], 7950 Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)}), 7951 Ops[1]); 7952 } 7953 7954 case X86::BI__builtin_ia32_pmaxsb128: 7955 case X86::BI__builtin_ia32_pmaxsw128: 7956 case X86::BI__builtin_ia32_pmaxsd128: 7957 case X86::BI__builtin_ia32_pmaxsq128_mask: 7958 case X86::BI__builtin_ia32_pmaxsb256: 7959 case X86::BI__builtin_ia32_pmaxsw256: 7960 case X86::BI__builtin_ia32_pmaxsd256: 7961 case X86::BI__builtin_ia32_pmaxsq256_mask: 7962 case X86::BI__builtin_ia32_pmaxsb512_mask: 7963 case X86::BI__builtin_ia32_pmaxsw512_mask: 7964 case X86::BI__builtin_ia32_pmaxsd512_mask: 7965 case X86::BI__builtin_ia32_pmaxsq512_mask: 7966 return EmitX86MinMax(*this, ICmpInst::ICMP_SGT, Ops); 7967 case X86::BI__builtin_ia32_pmaxub128: 7968 case X86::BI__builtin_ia32_pmaxuw128: 7969 case X86::BI__builtin_ia32_pmaxud128: 7970 case X86::BI__builtin_ia32_pmaxuq128_mask: 7971 case X86::BI__builtin_ia32_pmaxub256: 7972 case X86::BI__builtin_ia32_pmaxuw256: 7973 case X86::BI__builtin_ia32_pmaxud256: 7974 case X86::BI__builtin_ia32_pmaxuq256_mask: 7975 case X86::BI__builtin_ia32_pmaxub512_mask: 7976 case X86::BI__builtin_ia32_pmaxuw512_mask: 7977 case X86::BI__builtin_ia32_pmaxud512_mask: 7978 case X86::BI__builtin_ia32_pmaxuq512_mask: 7979 return EmitX86MinMax(*this, ICmpInst::ICMP_UGT, Ops); 7980 case X86::BI__builtin_ia32_pminsb128: 7981 case X86::BI__builtin_ia32_pminsw128: 7982 case X86::BI__builtin_ia32_pminsd128: 7983 case X86::BI__builtin_ia32_pminsq128_mask: 7984 case X86::BI__builtin_ia32_pminsb256: 7985 case X86::BI__builtin_ia32_pminsw256: 7986 case X86::BI__builtin_ia32_pminsd256: 7987 case X86::BI__builtin_ia32_pminsq256_mask: 7988 case X86::BI__builtin_ia32_pminsb512_mask: 7989 case X86::BI__builtin_ia32_pminsw512_mask: 7990 case X86::BI__builtin_ia32_pminsd512_mask: 7991 case X86::BI__builtin_ia32_pminsq512_mask: 7992 return EmitX86MinMax(*this, ICmpInst::ICMP_SLT, Ops); 7993 case X86::BI__builtin_ia32_pminub128: 7994 case X86::BI__builtin_ia32_pminuw128: 7995 case X86::BI__builtin_ia32_pminud128: 7996 case X86::BI__builtin_ia32_pminuq128_mask: 7997 case X86::BI__builtin_ia32_pminub256: 7998 case X86::BI__builtin_ia32_pminuw256: 7999 case X86::BI__builtin_ia32_pminud256: 8000 case X86::BI__builtin_ia32_pminuq256_mask: 8001 case X86::BI__builtin_ia32_pminub512_mask: 8002 case X86::BI__builtin_ia32_pminuw512_mask: 8003 case X86::BI__builtin_ia32_pminud512_mask: 8004 case X86::BI__builtin_ia32_pminuq512_mask: 8005 return EmitX86MinMax(*this, ICmpInst::ICMP_ULT, Ops); 8006 8007 // 3DNow! 8008 case X86::BI__builtin_ia32_pswapdsf: 8009 case X86::BI__builtin_ia32_pswapdsi: { 8010 llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext()); 8011 Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast"); 8012 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd); 8013 return Builder.CreateCall(F, Ops, "pswapd"); 8014 } 8015 case X86::BI__builtin_ia32_rdrand16_step: 8016 case X86::BI__builtin_ia32_rdrand32_step: 8017 case X86::BI__builtin_ia32_rdrand64_step: 8018 case X86::BI__builtin_ia32_rdseed16_step: 8019 case X86::BI__builtin_ia32_rdseed32_step: 8020 case X86::BI__builtin_ia32_rdseed64_step: { 8021 Intrinsic::ID ID; 8022 switch (BuiltinID) { 8023 default: llvm_unreachable("Unsupported intrinsic!"); 8024 case X86::BI__builtin_ia32_rdrand16_step: 8025 ID = Intrinsic::x86_rdrand_16; 8026 break; 8027 case X86::BI__builtin_ia32_rdrand32_step: 8028 ID = Intrinsic::x86_rdrand_32; 8029 break; 8030 case X86::BI__builtin_ia32_rdrand64_step: 8031 ID = Intrinsic::x86_rdrand_64; 8032 break; 8033 case X86::BI__builtin_ia32_rdseed16_step: 8034 ID = Intrinsic::x86_rdseed_16; 8035 break; 8036 case X86::BI__builtin_ia32_rdseed32_step: 8037 ID = Intrinsic::x86_rdseed_32; 8038 break; 8039 case X86::BI__builtin_ia32_rdseed64_step: 8040 ID = Intrinsic::x86_rdseed_64; 8041 break; 8042 } 8043 8044 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID)); 8045 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0), 8046 Ops[0]); 8047 return Builder.CreateExtractValue(Call, 1); 8048 } 8049 8050 // SSE packed comparison intrinsics 8051 case X86::BI__builtin_ia32_cmpeqps: 8052 case X86::BI__builtin_ia32_cmpeqpd: 8053 return getVectorFCmpIR(CmpInst::FCMP_OEQ); 8054 case X86::BI__builtin_ia32_cmpltps: 8055 case X86::BI__builtin_ia32_cmpltpd: 8056 return getVectorFCmpIR(CmpInst::FCMP_OLT); 8057 case X86::BI__builtin_ia32_cmpleps: 8058 case X86::BI__builtin_ia32_cmplepd: 8059 return getVectorFCmpIR(CmpInst::FCMP_OLE); 8060 case X86::BI__builtin_ia32_cmpunordps: 8061 case X86::BI__builtin_ia32_cmpunordpd: 8062 return getVectorFCmpIR(CmpInst::FCMP_UNO); 8063 case X86::BI__builtin_ia32_cmpneqps: 8064 case X86::BI__builtin_ia32_cmpneqpd: 8065 return getVectorFCmpIR(CmpInst::FCMP_UNE); 8066 case X86::BI__builtin_ia32_cmpnltps: 8067 case X86::BI__builtin_ia32_cmpnltpd: 8068 return getVectorFCmpIR(CmpInst::FCMP_UGE); 8069 case X86::BI__builtin_ia32_cmpnleps: 8070 case X86::BI__builtin_ia32_cmpnlepd: 8071 return getVectorFCmpIR(CmpInst::FCMP_UGT); 8072 case X86::BI__builtin_ia32_cmpordps: 8073 case X86::BI__builtin_ia32_cmpordpd: 8074 return getVectorFCmpIR(CmpInst::FCMP_ORD); 8075 case X86::BI__builtin_ia32_cmpps: 8076 case X86::BI__builtin_ia32_cmpps256: 8077 case X86::BI__builtin_ia32_cmppd: 8078 case X86::BI__builtin_ia32_cmppd256: { 8079 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 8080 // If this one of the SSE immediates, we can use native IR. 8081 if (CC < 8) { 8082 FCmpInst::Predicate Pred; 8083 switch (CC) { 8084 case 0: Pred = FCmpInst::FCMP_OEQ; break; 8085 case 1: Pred = FCmpInst::FCMP_OLT; break; 8086 case 2: Pred = FCmpInst::FCMP_OLE; break; 8087 case 3: Pred = FCmpInst::FCMP_UNO; break; 8088 case 4: Pred = FCmpInst::FCMP_UNE; break; 8089 case 5: Pred = FCmpInst::FCMP_UGE; break; 8090 case 6: Pred = FCmpInst::FCMP_UGT; break; 8091 case 7: Pred = FCmpInst::FCMP_ORD; break; 8092 } 8093 return getVectorFCmpIR(Pred); 8094 } 8095 8096 // We can't handle 8-31 immediates with native IR, use the intrinsic. 8097 // Except for predicates that create constants. 8098 Intrinsic::ID ID; 8099 switch (BuiltinID) { 8100 default: llvm_unreachable("Unsupported intrinsic!"); 8101 case X86::BI__builtin_ia32_cmpps: 8102 ID = Intrinsic::x86_sse_cmp_ps; 8103 break; 8104 case X86::BI__builtin_ia32_cmpps256: 8105 // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector 8106 // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0... 8107 if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) { 8108 Value *Constant = (CC == 0xf || CC == 0x1f) ? 8109 llvm::Constant::getAllOnesValue(Builder.getInt32Ty()) : 8110 llvm::Constant::getNullValue(Builder.getInt32Ty()); 8111 Value *Vec = Builder.CreateVectorSplat( 8112 Ops[0]->getType()->getVectorNumElements(), Constant); 8113 return Builder.CreateBitCast(Vec, Ops[0]->getType()); 8114 } 8115 ID = Intrinsic::x86_avx_cmp_ps_256; 8116 break; 8117 case X86::BI__builtin_ia32_cmppd: 8118 ID = Intrinsic::x86_sse2_cmp_pd; 8119 break; 8120 case X86::BI__builtin_ia32_cmppd256: 8121 // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector 8122 // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0... 8123 if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) { 8124 Value *Constant = (CC == 0xf || CC == 0x1f) ? 8125 llvm::Constant::getAllOnesValue(Builder.getInt64Ty()) : 8126 llvm::Constant::getNullValue(Builder.getInt64Ty()); 8127 Value *Vec = Builder.CreateVectorSplat( 8128 Ops[0]->getType()->getVectorNumElements(), Constant); 8129 return Builder.CreateBitCast(Vec, Ops[0]->getType()); 8130 } 8131 ID = Intrinsic::x86_avx_cmp_pd_256; 8132 break; 8133 } 8134 8135 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); 8136 } 8137 8138 // SSE scalar comparison intrinsics 8139 case X86::BI__builtin_ia32_cmpeqss: 8140 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0); 8141 case X86::BI__builtin_ia32_cmpltss: 8142 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1); 8143 case X86::BI__builtin_ia32_cmpless: 8144 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2); 8145 case X86::BI__builtin_ia32_cmpunordss: 8146 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3); 8147 case X86::BI__builtin_ia32_cmpneqss: 8148 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4); 8149 case X86::BI__builtin_ia32_cmpnltss: 8150 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5); 8151 case X86::BI__builtin_ia32_cmpnless: 8152 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6); 8153 case X86::BI__builtin_ia32_cmpordss: 8154 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7); 8155 case X86::BI__builtin_ia32_cmpeqsd: 8156 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0); 8157 case X86::BI__builtin_ia32_cmpltsd: 8158 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1); 8159 case X86::BI__builtin_ia32_cmplesd: 8160 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2); 8161 case X86::BI__builtin_ia32_cmpunordsd: 8162 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3); 8163 case X86::BI__builtin_ia32_cmpneqsd: 8164 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4); 8165 case X86::BI__builtin_ia32_cmpnltsd: 8166 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5); 8167 case X86::BI__builtin_ia32_cmpnlesd: 8168 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6); 8169 case X86::BI__builtin_ia32_cmpordsd: 8170 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7); 8171 8172 case X86::BI__emul: 8173 case X86::BI__emulu: { 8174 llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64); 8175 bool isSigned = (BuiltinID == X86::BI__emul); 8176 Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned); 8177 Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned); 8178 return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned); 8179 } 8180 case X86::BI__mulh: 8181 case X86::BI__umulh: 8182 case X86::BI_mul128: 8183 case X86::BI_umul128: { 8184 llvm::Type *ResType = ConvertType(E->getType()); 8185 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); 8186 8187 bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128); 8188 Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned); 8189 Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned); 8190 8191 Value *MulResult, *HigherBits; 8192 if (IsSigned) { 8193 MulResult = Builder.CreateNSWMul(LHS, RHS); 8194 HigherBits = Builder.CreateAShr(MulResult, 64); 8195 } else { 8196 MulResult = Builder.CreateNUWMul(LHS, RHS); 8197 HigherBits = Builder.CreateLShr(MulResult, 64); 8198 } 8199 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned); 8200 8201 if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh) 8202 return HigherBits; 8203 8204 Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2)); 8205 Builder.CreateStore(HigherBits, HighBitsAddress); 8206 return Builder.CreateIntCast(MulResult, ResType, IsSigned); 8207 } 8208 8209 case X86::BI__faststorefence: { 8210 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, 8211 llvm::CrossThread); 8212 } 8213 case X86::BI_ReadWriteBarrier: 8214 case X86::BI_ReadBarrier: 8215 case X86::BI_WriteBarrier: { 8216 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, 8217 llvm::SingleThread); 8218 } 8219 case X86::BI_BitScanForward: 8220 case X86::BI_BitScanForward64: 8221 return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E); 8222 case X86::BI_BitScanReverse: 8223 case X86::BI_BitScanReverse64: 8224 return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E); 8225 8226 case X86::BI_InterlockedAnd64: 8227 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E); 8228 case X86::BI_InterlockedExchange64: 8229 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E); 8230 case X86::BI_InterlockedExchangeAdd64: 8231 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E); 8232 case X86::BI_InterlockedExchangeSub64: 8233 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E); 8234 case X86::BI_InterlockedOr64: 8235 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E); 8236 case X86::BI_InterlockedXor64: 8237 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E); 8238 case X86::BI_InterlockedDecrement64: 8239 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E); 8240 case X86::BI_InterlockedIncrement64: 8241 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E); 8242 8243 case X86::BI_AddressOfReturnAddress: { 8244 Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress); 8245 return Builder.CreateCall(F); 8246 } 8247 case X86::BI__stosb: { 8248 // We treat __stosb as a volatile memset - it may not generate "rep stosb" 8249 // instruction, but it will create a memset that won't be optimized away. 8250 return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], 1, true); 8251 } 8252 case X86::BI__ud2: 8253 // llvm.trap makes a ud2a instruction on x86. 8254 return EmitTrapCall(Intrinsic::trap); 8255 case X86::BI__int2c: { 8256 // This syscall signals a driver assertion failure in x86 NT kernels. 8257 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false); 8258 llvm::InlineAsm *IA = 8259 llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*SideEffects=*/true); 8260 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get( 8261 getLLVMContext(), llvm::AttributeList::FunctionIndex, 8262 llvm::Attribute::NoReturn); 8263 CallSite CS = Builder.CreateCall(IA); 8264 CS.setAttributes(NoReturnAttr); 8265 return CS.getInstruction(); 8266 } 8267 case X86::BI__readfsbyte: 8268 case X86::BI__readfsword: 8269 case X86::BI__readfsdword: 8270 case X86::BI__readfsqword: { 8271 llvm::Type *IntTy = ConvertType(E->getType()); 8272 Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)), 8273 llvm::PointerType::get(IntTy, 257)); 8274 LoadInst *Load = Builder.CreateAlignedLoad( 8275 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType())); 8276 Load->setVolatile(true); 8277 return Load; 8278 } 8279 case X86::BI__readgsbyte: 8280 case X86::BI__readgsword: 8281 case X86::BI__readgsdword: 8282 case X86::BI__readgsqword: { 8283 llvm::Type *IntTy = ConvertType(E->getType()); 8284 Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)), 8285 llvm::PointerType::get(IntTy, 256)); 8286 LoadInst *Load = Builder.CreateAlignedLoad( 8287 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType())); 8288 Load->setVolatile(true); 8289 return Load; 8290 } 8291 } 8292 } 8293 8294 8295 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, 8296 const CallExpr *E) { 8297 SmallVector<Value*, 4> Ops; 8298 8299 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) 8300 Ops.push_back(EmitScalarExpr(E->getArg(i))); 8301 8302 Intrinsic::ID ID = Intrinsic::not_intrinsic; 8303 8304 switch (BuiltinID) { 8305 default: return nullptr; 8306 8307 // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we 8308 // call __builtin_readcyclecounter. 8309 case PPC::BI__builtin_ppc_get_timebase: 8310 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter)); 8311 8312 // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr 8313 case PPC::BI__builtin_altivec_lvx: 8314 case PPC::BI__builtin_altivec_lvxl: 8315 case PPC::BI__builtin_altivec_lvebx: 8316 case PPC::BI__builtin_altivec_lvehx: 8317 case PPC::BI__builtin_altivec_lvewx: 8318 case PPC::BI__builtin_altivec_lvsl: 8319 case PPC::BI__builtin_altivec_lvsr: 8320 case PPC::BI__builtin_vsx_lxvd2x: 8321 case PPC::BI__builtin_vsx_lxvw4x: 8322 case PPC::BI__builtin_vsx_lxvd2x_be: 8323 case PPC::BI__builtin_vsx_lxvw4x_be: 8324 case PPC::BI__builtin_vsx_lxvl: 8325 case PPC::BI__builtin_vsx_lxvll: 8326 { 8327 if(BuiltinID == PPC::BI__builtin_vsx_lxvl || 8328 BuiltinID == PPC::BI__builtin_vsx_lxvll){ 8329 Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy); 8330 }else { 8331 Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy); 8332 Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]); 8333 Ops.pop_back(); 8334 } 8335 8336 switch (BuiltinID) { 8337 default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!"); 8338 case PPC::BI__builtin_altivec_lvx: 8339 ID = Intrinsic::ppc_altivec_lvx; 8340 break; 8341 case PPC::BI__builtin_altivec_lvxl: 8342 ID = Intrinsic::ppc_altivec_lvxl; 8343 break; 8344 case PPC::BI__builtin_altivec_lvebx: 8345 ID = Intrinsic::ppc_altivec_lvebx; 8346 break; 8347 case PPC::BI__builtin_altivec_lvehx: 8348 ID = Intrinsic::ppc_altivec_lvehx; 8349 break; 8350 case PPC::BI__builtin_altivec_lvewx: 8351 ID = Intrinsic::ppc_altivec_lvewx; 8352 break; 8353 case PPC::BI__builtin_altivec_lvsl: 8354 ID = Intrinsic::ppc_altivec_lvsl; 8355 break; 8356 case PPC::BI__builtin_altivec_lvsr: 8357 ID = Intrinsic::ppc_altivec_lvsr; 8358 break; 8359 case PPC::BI__builtin_vsx_lxvd2x: 8360 ID = Intrinsic::ppc_vsx_lxvd2x; 8361 break; 8362 case PPC::BI__builtin_vsx_lxvw4x: 8363 ID = Intrinsic::ppc_vsx_lxvw4x; 8364 break; 8365 case PPC::BI__builtin_vsx_lxvd2x_be: 8366 ID = Intrinsic::ppc_vsx_lxvd2x_be; 8367 break; 8368 case PPC::BI__builtin_vsx_lxvw4x_be: 8369 ID = Intrinsic::ppc_vsx_lxvw4x_be; 8370 break; 8371 case PPC::BI__builtin_vsx_lxvl: 8372 ID = Intrinsic::ppc_vsx_lxvl; 8373 break; 8374 case PPC::BI__builtin_vsx_lxvll: 8375 ID = Intrinsic::ppc_vsx_lxvll; 8376 break; 8377 } 8378 llvm::Function *F = CGM.getIntrinsic(ID); 8379 return Builder.CreateCall(F, Ops, ""); 8380 } 8381 8382 // vec_st, vec_xst_be 8383 case PPC::BI__builtin_altivec_stvx: 8384 case PPC::BI__builtin_altivec_stvxl: 8385 case PPC::BI__builtin_altivec_stvebx: 8386 case PPC::BI__builtin_altivec_stvehx: 8387 case PPC::BI__builtin_altivec_stvewx: 8388 case PPC::BI__builtin_vsx_stxvd2x: 8389 case PPC::BI__builtin_vsx_stxvw4x: 8390 case PPC::BI__builtin_vsx_stxvd2x_be: 8391 case PPC::BI__builtin_vsx_stxvw4x_be: 8392 case PPC::BI__builtin_vsx_stxvl: 8393 case PPC::BI__builtin_vsx_stxvll: 8394 { 8395 if(BuiltinID == PPC::BI__builtin_vsx_stxvl || 8396 BuiltinID == PPC::BI__builtin_vsx_stxvll ){ 8397 Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy); 8398 }else { 8399 Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy); 8400 Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]); 8401 Ops.pop_back(); 8402 } 8403 8404 switch (BuiltinID) { 8405 default: llvm_unreachable("Unsupported st intrinsic!"); 8406 case PPC::BI__builtin_altivec_stvx: 8407 ID = Intrinsic::ppc_altivec_stvx; 8408 break; 8409 case PPC::BI__builtin_altivec_stvxl: 8410 ID = Intrinsic::ppc_altivec_stvxl; 8411 break; 8412 case PPC::BI__builtin_altivec_stvebx: 8413 ID = Intrinsic::ppc_altivec_stvebx; 8414 break; 8415 case PPC::BI__builtin_altivec_stvehx: 8416 ID = Intrinsic::ppc_altivec_stvehx; 8417 break; 8418 case PPC::BI__builtin_altivec_stvewx: 8419 ID = Intrinsic::ppc_altivec_stvewx; 8420 break; 8421 case PPC::BI__builtin_vsx_stxvd2x: 8422 ID = Intrinsic::ppc_vsx_stxvd2x; 8423 break; 8424 case PPC::BI__builtin_vsx_stxvw4x: 8425 ID = Intrinsic::ppc_vsx_stxvw4x; 8426 break; 8427 case PPC::BI__builtin_vsx_stxvd2x_be: 8428 ID = Intrinsic::ppc_vsx_stxvd2x_be; 8429 break; 8430 case PPC::BI__builtin_vsx_stxvw4x_be: 8431 ID = Intrinsic::ppc_vsx_stxvw4x_be; 8432 break; 8433 case PPC::BI__builtin_vsx_stxvl: 8434 ID = Intrinsic::ppc_vsx_stxvl; 8435 break; 8436 case PPC::BI__builtin_vsx_stxvll: 8437 ID = Intrinsic::ppc_vsx_stxvll; 8438 break; 8439 } 8440 llvm::Function *F = CGM.getIntrinsic(ID); 8441 return Builder.CreateCall(F, Ops, ""); 8442 } 8443 // Square root 8444 case PPC::BI__builtin_vsx_xvsqrtsp: 8445 case PPC::BI__builtin_vsx_xvsqrtdp: { 8446 llvm::Type *ResultType = ConvertType(E->getType()); 8447 Value *X = EmitScalarExpr(E->getArg(0)); 8448 ID = Intrinsic::sqrt; 8449 llvm::Function *F = CGM.getIntrinsic(ID, ResultType); 8450 return Builder.CreateCall(F, X); 8451 } 8452 // Count leading zeros 8453 case PPC::BI__builtin_altivec_vclzb: 8454 case PPC::BI__builtin_altivec_vclzh: 8455 case PPC::BI__builtin_altivec_vclzw: 8456 case PPC::BI__builtin_altivec_vclzd: { 8457 llvm::Type *ResultType = ConvertType(E->getType()); 8458 Value *X = EmitScalarExpr(E->getArg(0)); 8459 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 8460 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType); 8461 return Builder.CreateCall(F, {X, Undef}); 8462 } 8463 case PPC::BI__builtin_altivec_vctzb: 8464 case PPC::BI__builtin_altivec_vctzh: 8465 case PPC::BI__builtin_altivec_vctzw: 8466 case PPC::BI__builtin_altivec_vctzd: { 8467 llvm::Type *ResultType = ConvertType(E->getType()); 8468 Value *X = EmitScalarExpr(E->getArg(0)); 8469 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 8470 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType); 8471 return Builder.CreateCall(F, {X, Undef}); 8472 } 8473 case PPC::BI__builtin_altivec_vpopcntb: 8474 case PPC::BI__builtin_altivec_vpopcnth: 8475 case PPC::BI__builtin_altivec_vpopcntw: 8476 case PPC::BI__builtin_altivec_vpopcntd: { 8477 llvm::Type *ResultType = ConvertType(E->getType()); 8478 Value *X = EmitScalarExpr(E->getArg(0)); 8479 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); 8480 return Builder.CreateCall(F, X); 8481 } 8482 // Copy sign 8483 case PPC::BI__builtin_vsx_xvcpsgnsp: 8484 case PPC::BI__builtin_vsx_xvcpsgndp: { 8485 llvm::Type *ResultType = ConvertType(E->getType()); 8486 Value *X = EmitScalarExpr(E->getArg(0)); 8487 Value *Y = EmitScalarExpr(E->getArg(1)); 8488 ID = Intrinsic::copysign; 8489 llvm::Function *F = CGM.getIntrinsic(ID, ResultType); 8490 return Builder.CreateCall(F, {X, Y}); 8491 } 8492 // Rounding/truncation 8493 case PPC::BI__builtin_vsx_xvrspip: 8494 case PPC::BI__builtin_vsx_xvrdpip: 8495 case PPC::BI__builtin_vsx_xvrdpim: 8496 case PPC::BI__builtin_vsx_xvrspim: 8497 case PPC::BI__builtin_vsx_xvrdpi: 8498 case PPC::BI__builtin_vsx_xvrspi: 8499 case PPC::BI__builtin_vsx_xvrdpic: 8500 case PPC::BI__builtin_vsx_xvrspic: 8501 case PPC::BI__builtin_vsx_xvrdpiz: 8502 case PPC::BI__builtin_vsx_xvrspiz: { 8503 llvm::Type *ResultType = ConvertType(E->getType()); 8504 Value *X = EmitScalarExpr(E->getArg(0)); 8505 if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim || 8506 BuiltinID == PPC::BI__builtin_vsx_xvrspim) 8507 ID = Intrinsic::floor; 8508 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi || 8509 BuiltinID == PPC::BI__builtin_vsx_xvrspi) 8510 ID = Intrinsic::round; 8511 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic || 8512 BuiltinID == PPC::BI__builtin_vsx_xvrspic) 8513 ID = Intrinsic::nearbyint; 8514 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip || 8515 BuiltinID == PPC::BI__builtin_vsx_xvrspip) 8516 ID = Intrinsic::ceil; 8517 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz || 8518 BuiltinID == PPC::BI__builtin_vsx_xvrspiz) 8519 ID = Intrinsic::trunc; 8520 llvm::Function *F = CGM.getIntrinsic(ID, ResultType); 8521 return Builder.CreateCall(F, X); 8522 } 8523 8524 // Absolute value 8525 case PPC::BI__builtin_vsx_xvabsdp: 8526 case PPC::BI__builtin_vsx_xvabssp: { 8527 llvm::Type *ResultType = ConvertType(E->getType()); 8528 Value *X = EmitScalarExpr(E->getArg(0)); 8529 llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); 8530 return Builder.CreateCall(F, X); 8531 } 8532 8533 // FMA variations 8534 case PPC::BI__builtin_vsx_xvmaddadp: 8535 case PPC::BI__builtin_vsx_xvmaddasp: 8536 case PPC::BI__builtin_vsx_xvnmaddadp: 8537 case PPC::BI__builtin_vsx_xvnmaddasp: 8538 case PPC::BI__builtin_vsx_xvmsubadp: 8539 case PPC::BI__builtin_vsx_xvmsubasp: 8540 case PPC::BI__builtin_vsx_xvnmsubadp: 8541 case PPC::BI__builtin_vsx_xvnmsubasp: { 8542 llvm::Type *ResultType = ConvertType(E->getType()); 8543 Value *X = EmitScalarExpr(E->getArg(0)); 8544 Value *Y = EmitScalarExpr(E->getArg(1)); 8545 Value *Z = EmitScalarExpr(E->getArg(2)); 8546 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); 8547 llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 8548 switch (BuiltinID) { 8549 case PPC::BI__builtin_vsx_xvmaddadp: 8550 case PPC::BI__builtin_vsx_xvmaddasp: 8551 return Builder.CreateCall(F, {X, Y, Z}); 8552 case PPC::BI__builtin_vsx_xvnmaddadp: 8553 case PPC::BI__builtin_vsx_xvnmaddasp: 8554 return Builder.CreateFSub(Zero, 8555 Builder.CreateCall(F, {X, Y, Z}), "sub"); 8556 case PPC::BI__builtin_vsx_xvmsubadp: 8557 case PPC::BI__builtin_vsx_xvmsubasp: 8558 return Builder.CreateCall(F, 8559 {X, Y, Builder.CreateFSub(Zero, Z, "sub")}); 8560 case PPC::BI__builtin_vsx_xvnmsubadp: 8561 case PPC::BI__builtin_vsx_xvnmsubasp: 8562 Value *FsubRes = 8563 Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")}); 8564 return Builder.CreateFSub(Zero, FsubRes, "sub"); 8565 } 8566 llvm_unreachable("Unknown FMA operation"); 8567 return nullptr; // Suppress no-return warning 8568 } 8569 8570 case PPC::BI__builtin_vsx_insertword: { 8571 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw); 8572 8573 // Third argument is a compile time constant int. It must be clamped to 8574 // to the range [0, 12]. 8575 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]); 8576 assert(ArgCI && 8577 "Third arg to xxinsertw intrinsic must be constant integer"); 8578 const int64_t MaxIndex = 12; 8579 int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex); 8580 8581 // The builtin semantics don't exactly match the xxinsertw instructions 8582 // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the 8583 // word from the first argument, and inserts it in the second argument. The 8584 // instruction extracts the word from its second input register and inserts 8585 // it into its first input register, so swap the first and second arguments. 8586 std::swap(Ops[0], Ops[1]); 8587 8588 // Need to cast the second argument from a vector of unsigned int to a 8589 // vector of long long. 8590 Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2)); 8591 8592 if (getTarget().isLittleEndian()) { 8593 // Create a shuffle mask of (1, 0) 8594 Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1), 8595 ConstantInt::get(Int32Ty, 0) 8596 }; 8597 Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); 8598 8599 // Reverse the double words in the vector we will extract from. 8600 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); 8601 Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ShuffleMask); 8602 8603 // Reverse the index. 8604 Index = MaxIndex - Index; 8605 } 8606 8607 // Intrinsic expects the first arg to be a vector of int. 8608 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4)); 8609 Ops[2] = ConstantInt::getSigned(Int32Ty, Index); 8610 return Builder.CreateCall(F, Ops); 8611 } 8612 8613 case PPC::BI__builtin_vsx_extractuword: { 8614 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw); 8615 8616 // Intrinsic expects the first argument to be a vector of doublewords. 8617 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); 8618 8619 // The second argument is a compile time constant int that needs to 8620 // be clamped to the range [0, 12]. 8621 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[1]); 8622 assert(ArgCI && 8623 "Second Arg to xxextractuw intrinsic must be a constant integer!"); 8624 const int64_t MaxIndex = 12; 8625 int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex); 8626 8627 if (getTarget().isLittleEndian()) { 8628 // Reverse the index. 8629 Index = MaxIndex - Index; 8630 Ops[1] = ConstantInt::getSigned(Int32Ty, Index); 8631 8632 // Emit the call, then reverse the double words of the results vector. 8633 Value *Call = Builder.CreateCall(F, Ops); 8634 8635 // Create a shuffle mask of (1, 0) 8636 Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1), 8637 ConstantInt::get(Int32Ty, 0) 8638 }; 8639 Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); 8640 8641 Value *ShuffleCall = Builder.CreateShuffleVector(Call, Call, ShuffleMask); 8642 return ShuffleCall; 8643 } else { 8644 Ops[1] = ConstantInt::getSigned(Int32Ty, Index); 8645 return Builder.CreateCall(F, Ops); 8646 } 8647 } 8648 8649 case PPC::BI__builtin_vsx_xxpermdi: { 8650 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]); 8651 assert(ArgCI && "Third arg must be constant integer!"); 8652 8653 unsigned Index = ArgCI->getZExtValue(); 8654 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); 8655 Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2)); 8656 8657 // Element zero comes from the first input vector and element one comes from 8658 // the second. The element indices within each vector are numbered in big 8659 // endian order so the shuffle mask must be adjusted for this on little 8660 // endian platforms (i.e. index is complemented and source vector reversed). 8661 unsigned ElemIdx0; 8662 unsigned ElemIdx1; 8663 if (getTarget().isLittleEndian()) { 8664 ElemIdx0 = (~Index & 1) + 2; 8665 ElemIdx1 = (~Index & 2) >> 1; 8666 } else { // BigEndian 8667 ElemIdx0 = (Index & 2) >> 1; 8668 ElemIdx1 = 2 + (Index & 1); 8669 } 8670 8671 Constant *ShuffleElts[2] = {ConstantInt::get(Int32Ty, ElemIdx0), 8672 ConstantInt::get(Int32Ty, ElemIdx1)}; 8673 Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); 8674 8675 Value *ShuffleCall = 8676 Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask); 8677 QualType BIRetType = E->getType(); 8678 auto RetTy = ConvertType(BIRetType); 8679 return Builder.CreateBitCast(ShuffleCall, RetTy); 8680 } 8681 8682 case PPC::BI__builtin_vsx_xxsldwi: { 8683 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]); 8684 assert(ArgCI && "Third argument must be a compile time constant"); 8685 unsigned Index = ArgCI->getZExtValue() & 0x3; 8686 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4)); 8687 Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int32Ty, 4)); 8688 8689 // Create a shuffle mask 8690 unsigned ElemIdx0; 8691 unsigned ElemIdx1; 8692 unsigned ElemIdx2; 8693 unsigned ElemIdx3; 8694 if (getTarget().isLittleEndian()) { 8695 // Little endian element N comes from element 8+N-Index of the 8696 // concatenated wide vector (of course, using modulo arithmetic on 8697 // the total number of elements). 8698 ElemIdx0 = (8 - Index) % 8; 8699 ElemIdx1 = (9 - Index) % 8; 8700 ElemIdx2 = (10 - Index) % 8; 8701 ElemIdx3 = (11 - Index) % 8; 8702 } else { 8703 // Big endian ElemIdx<N> = Index + N 8704 ElemIdx0 = Index; 8705 ElemIdx1 = Index + 1; 8706 ElemIdx2 = Index + 2; 8707 ElemIdx3 = Index + 3; 8708 } 8709 8710 Constant *ShuffleElts[4] = {ConstantInt::get(Int32Ty, ElemIdx0), 8711 ConstantInt::get(Int32Ty, ElemIdx1), 8712 ConstantInt::get(Int32Ty, ElemIdx2), 8713 ConstantInt::get(Int32Ty, ElemIdx3)}; 8714 8715 Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); 8716 Value *ShuffleCall = 8717 Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask); 8718 QualType BIRetType = E->getType(); 8719 auto RetTy = ConvertType(BIRetType); 8720 return Builder.CreateBitCast(ShuffleCall, RetTy); 8721 } 8722 } 8723 } 8724 8725 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, 8726 const CallExpr *E) { 8727 switch (BuiltinID) { 8728 case AMDGPU::BI__builtin_amdgcn_div_scale: 8729 case AMDGPU::BI__builtin_amdgcn_div_scalef: { 8730 // Translate from the intrinsics's struct return to the builtin's out 8731 // argument. 8732 8733 Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3)); 8734 8735 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 8736 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 8737 llvm::Value *Z = EmitScalarExpr(E->getArg(2)); 8738 8739 llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale, 8740 X->getType()); 8741 8742 llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z}); 8743 8744 llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0); 8745 llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1); 8746 8747 llvm::Type *RealFlagType 8748 = FlagOutPtr.getPointer()->getType()->getPointerElementType(); 8749 8750 llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType); 8751 Builder.CreateStore(FlagExt, FlagOutPtr); 8752 return Result; 8753 } 8754 case AMDGPU::BI__builtin_amdgcn_div_fmas: 8755 case AMDGPU::BI__builtin_amdgcn_div_fmasf: { 8756 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); 8757 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); 8758 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); 8759 llvm::Value *Src3 = EmitScalarExpr(E->getArg(3)); 8760 8761 llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas, 8762 Src0->getType()); 8763 llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3); 8764 return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool}); 8765 } 8766 8767 case AMDGPU::BI__builtin_amdgcn_ds_swizzle: 8768 return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle); 8769 case AMDGPU::BI__builtin_amdgcn_mov_dpp: { 8770 llvm::SmallVector<llvm::Value *, 5> Args; 8771 for (unsigned I = 0; I != 5; ++I) 8772 Args.push_back(EmitScalarExpr(E->getArg(I))); 8773 Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_mov_dpp, 8774 Args[0]->getType()); 8775 return Builder.CreateCall(F, Args); 8776 } 8777 case AMDGPU::BI__builtin_amdgcn_div_fixup: 8778 case AMDGPU::BI__builtin_amdgcn_div_fixupf: 8779 case AMDGPU::BI__builtin_amdgcn_div_fixuph: 8780 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup); 8781 case AMDGPU::BI__builtin_amdgcn_trig_preop: 8782 case AMDGPU::BI__builtin_amdgcn_trig_preopf: 8783 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop); 8784 case AMDGPU::BI__builtin_amdgcn_rcp: 8785 case AMDGPU::BI__builtin_amdgcn_rcpf: 8786 case AMDGPU::BI__builtin_amdgcn_rcph: 8787 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp); 8788 case AMDGPU::BI__builtin_amdgcn_rsq: 8789 case AMDGPU::BI__builtin_amdgcn_rsqf: 8790 case AMDGPU::BI__builtin_amdgcn_rsqh: 8791 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq); 8792 case AMDGPU::BI__builtin_amdgcn_rsq_clamp: 8793 case AMDGPU::BI__builtin_amdgcn_rsq_clampf: 8794 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp); 8795 case AMDGPU::BI__builtin_amdgcn_sinf: 8796 case AMDGPU::BI__builtin_amdgcn_sinh: 8797 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin); 8798 case AMDGPU::BI__builtin_amdgcn_cosf: 8799 case AMDGPU::BI__builtin_amdgcn_cosh: 8800 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos); 8801 case AMDGPU::BI__builtin_amdgcn_log_clampf: 8802 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp); 8803 case AMDGPU::BI__builtin_amdgcn_ldexp: 8804 case AMDGPU::BI__builtin_amdgcn_ldexpf: 8805 case AMDGPU::BI__builtin_amdgcn_ldexph: 8806 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp); 8807 case AMDGPU::BI__builtin_amdgcn_frexp_mant: 8808 case AMDGPU::BI__builtin_amdgcn_frexp_mantf: 8809 case AMDGPU::BI__builtin_amdgcn_frexp_manth: 8810 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant); 8811 case AMDGPU::BI__builtin_amdgcn_frexp_exp: 8812 case AMDGPU::BI__builtin_amdgcn_frexp_expf: { 8813 Value *Src0 = EmitScalarExpr(E->getArg(0)); 8814 Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp, 8815 { Builder.getInt32Ty(), Src0->getType() }); 8816 return Builder.CreateCall(F, Src0); 8817 } 8818 case AMDGPU::BI__builtin_amdgcn_frexp_exph: { 8819 Value *Src0 = EmitScalarExpr(E->getArg(0)); 8820 Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp, 8821 { Builder.getInt16Ty(), Src0->getType() }); 8822 return Builder.CreateCall(F, Src0); 8823 } 8824 case AMDGPU::BI__builtin_amdgcn_fract: 8825 case AMDGPU::BI__builtin_amdgcn_fractf: 8826 case AMDGPU::BI__builtin_amdgcn_fracth: 8827 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract); 8828 case AMDGPU::BI__builtin_amdgcn_lerp: 8829 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp); 8830 case AMDGPU::BI__builtin_amdgcn_uicmp: 8831 case AMDGPU::BI__builtin_amdgcn_uicmpl: 8832 case AMDGPU::BI__builtin_amdgcn_sicmp: 8833 case AMDGPU::BI__builtin_amdgcn_sicmpl: 8834 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_icmp); 8835 case AMDGPU::BI__builtin_amdgcn_fcmp: 8836 case AMDGPU::BI__builtin_amdgcn_fcmpf: 8837 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fcmp); 8838 case AMDGPU::BI__builtin_amdgcn_class: 8839 case AMDGPU::BI__builtin_amdgcn_classf: 8840 case AMDGPU::BI__builtin_amdgcn_classh: 8841 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class); 8842 case AMDGPU::BI__builtin_amdgcn_fmed3f: 8843 case AMDGPU::BI__builtin_amdgcn_fmed3h: 8844 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3); 8845 case AMDGPU::BI__builtin_amdgcn_read_exec: { 8846 CallInst *CI = cast<CallInst>( 8847 EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec")); 8848 CI->setConvergent(); 8849 return CI; 8850 } 8851 8852 // amdgcn workitem 8853 case AMDGPU::BI__builtin_amdgcn_workitem_id_x: 8854 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024); 8855 case AMDGPU::BI__builtin_amdgcn_workitem_id_y: 8856 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024); 8857 case AMDGPU::BI__builtin_amdgcn_workitem_id_z: 8858 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024); 8859 8860 // r600 intrinsics 8861 case AMDGPU::BI__builtin_r600_recipsqrt_ieee: 8862 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef: 8863 return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee); 8864 case AMDGPU::BI__builtin_r600_read_tidig_x: 8865 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024); 8866 case AMDGPU::BI__builtin_r600_read_tidig_y: 8867 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024); 8868 case AMDGPU::BI__builtin_r600_read_tidig_z: 8869 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024); 8870 default: 8871 return nullptr; 8872 } 8873 } 8874 8875 /// Handle a SystemZ function in which the final argument is a pointer 8876 /// to an int that receives the post-instruction CC value. At the LLVM level 8877 /// this is represented as a function that returns a {result, cc} pair. 8878 static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF, 8879 unsigned IntrinsicID, 8880 const CallExpr *E) { 8881 unsigned NumArgs = E->getNumArgs() - 1; 8882 SmallVector<Value *, 8> Args(NumArgs); 8883 for (unsigned I = 0; I < NumArgs; ++I) 8884 Args[I] = CGF.EmitScalarExpr(E->getArg(I)); 8885 Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs)); 8886 Value *F = CGF.CGM.getIntrinsic(IntrinsicID); 8887 Value *Call = CGF.Builder.CreateCall(F, Args); 8888 Value *CC = CGF.Builder.CreateExtractValue(Call, 1); 8889 CGF.Builder.CreateStore(CC, CCPtr); 8890 return CGF.Builder.CreateExtractValue(Call, 0); 8891 } 8892 8893 Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, 8894 const CallExpr *E) { 8895 switch (BuiltinID) { 8896 case SystemZ::BI__builtin_tbegin: { 8897 Value *TDB = EmitScalarExpr(E->getArg(0)); 8898 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c); 8899 Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin); 8900 return Builder.CreateCall(F, {TDB, Control}); 8901 } 8902 case SystemZ::BI__builtin_tbegin_nofloat: { 8903 Value *TDB = EmitScalarExpr(E->getArg(0)); 8904 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c); 8905 Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat); 8906 return Builder.CreateCall(F, {TDB, Control}); 8907 } 8908 case SystemZ::BI__builtin_tbeginc: { 8909 Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy); 8910 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08); 8911 Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc); 8912 return Builder.CreateCall(F, {TDB, Control}); 8913 } 8914 case SystemZ::BI__builtin_tabort: { 8915 Value *Data = EmitScalarExpr(E->getArg(0)); 8916 Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort); 8917 return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort")); 8918 } 8919 case SystemZ::BI__builtin_non_tx_store: { 8920 Value *Address = EmitScalarExpr(E->getArg(0)); 8921 Value *Data = EmitScalarExpr(E->getArg(1)); 8922 Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg); 8923 return Builder.CreateCall(F, {Data, Address}); 8924 } 8925 8926 // Vector builtins. Note that most vector builtins are mapped automatically 8927 // to target-specific LLVM intrinsics. The ones handled specially here can 8928 // be represented via standard LLVM IR, which is preferable to enable common 8929 // LLVM optimizations. 8930 8931 case SystemZ::BI__builtin_s390_vpopctb: 8932 case SystemZ::BI__builtin_s390_vpopcth: 8933 case SystemZ::BI__builtin_s390_vpopctf: 8934 case SystemZ::BI__builtin_s390_vpopctg: { 8935 llvm::Type *ResultType = ConvertType(E->getType()); 8936 Value *X = EmitScalarExpr(E->getArg(0)); 8937 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); 8938 return Builder.CreateCall(F, X); 8939 } 8940 8941 case SystemZ::BI__builtin_s390_vclzb: 8942 case SystemZ::BI__builtin_s390_vclzh: 8943 case SystemZ::BI__builtin_s390_vclzf: 8944 case SystemZ::BI__builtin_s390_vclzg: { 8945 llvm::Type *ResultType = ConvertType(E->getType()); 8946 Value *X = EmitScalarExpr(E->getArg(0)); 8947 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 8948 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType); 8949 return Builder.CreateCall(F, {X, Undef}); 8950 } 8951 8952 case SystemZ::BI__builtin_s390_vctzb: 8953 case SystemZ::BI__builtin_s390_vctzh: 8954 case SystemZ::BI__builtin_s390_vctzf: 8955 case SystemZ::BI__builtin_s390_vctzg: { 8956 llvm::Type *ResultType = ConvertType(E->getType()); 8957 Value *X = EmitScalarExpr(E->getArg(0)); 8958 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 8959 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType); 8960 return Builder.CreateCall(F, {X, Undef}); 8961 } 8962 8963 case SystemZ::BI__builtin_s390_vfsqdb: { 8964 llvm::Type *ResultType = ConvertType(E->getType()); 8965 Value *X = EmitScalarExpr(E->getArg(0)); 8966 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType); 8967 return Builder.CreateCall(F, X); 8968 } 8969 case SystemZ::BI__builtin_s390_vfmadb: { 8970 llvm::Type *ResultType = ConvertType(E->getType()); 8971 Value *X = EmitScalarExpr(E->getArg(0)); 8972 Value *Y = EmitScalarExpr(E->getArg(1)); 8973 Value *Z = EmitScalarExpr(E->getArg(2)); 8974 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 8975 return Builder.CreateCall(F, {X, Y, Z}); 8976 } 8977 case SystemZ::BI__builtin_s390_vfmsdb: { 8978 llvm::Type *ResultType = ConvertType(E->getType()); 8979 Value *X = EmitScalarExpr(E->getArg(0)); 8980 Value *Y = EmitScalarExpr(E->getArg(1)); 8981 Value *Z = EmitScalarExpr(E->getArg(2)); 8982 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); 8983 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 8984 return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")}); 8985 } 8986 case SystemZ::BI__builtin_s390_vflpdb: { 8987 llvm::Type *ResultType = ConvertType(E->getType()); 8988 Value *X = EmitScalarExpr(E->getArg(0)); 8989 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); 8990 return Builder.CreateCall(F, X); 8991 } 8992 case SystemZ::BI__builtin_s390_vflndb: { 8993 llvm::Type *ResultType = ConvertType(E->getType()); 8994 Value *X = EmitScalarExpr(E->getArg(0)); 8995 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); 8996 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); 8997 return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub"); 8998 } 8999 case SystemZ::BI__builtin_s390_vfidb: { 9000 llvm::Type *ResultType = ConvertType(E->getType()); 9001 Value *X = EmitScalarExpr(E->getArg(0)); 9002 // Constant-fold the M4 and M5 mask arguments. 9003 llvm::APSInt M4, M5; 9004 bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext()); 9005 bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext()); 9006 assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?"); 9007 (void)IsConstM4; (void)IsConstM5; 9008 // Check whether this instance of vfidb can be represented via a LLVM 9009 // standard intrinsic. We only support some combinations of M4 and M5. 9010 Intrinsic::ID ID = Intrinsic::not_intrinsic; 9011 switch (M4.getZExtValue()) { 9012 default: break; 9013 case 0: // IEEE-inexact exception allowed 9014 switch (M5.getZExtValue()) { 9015 default: break; 9016 case 0: ID = Intrinsic::rint; break; 9017 } 9018 break; 9019 case 4: // IEEE-inexact exception suppressed 9020 switch (M5.getZExtValue()) { 9021 default: break; 9022 case 0: ID = Intrinsic::nearbyint; break; 9023 case 1: ID = Intrinsic::round; break; 9024 case 5: ID = Intrinsic::trunc; break; 9025 case 6: ID = Intrinsic::ceil; break; 9026 case 7: ID = Intrinsic::floor; break; 9027 } 9028 break; 9029 } 9030 if (ID != Intrinsic::not_intrinsic) { 9031 Function *F = CGM.getIntrinsic(ID, ResultType); 9032 return Builder.CreateCall(F, X); 9033 } 9034 Function *F = CGM.getIntrinsic(Intrinsic::s390_vfidb); 9035 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4); 9036 Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5); 9037 return Builder.CreateCall(F, {X, M4Value, M5Value}); 9038 } 9039 9040 // Vector intrisincs that output the post-instruction CC value. 9041 9042 #define INTRINSIC_WITH_CC(NAME) \ 9043 case SystemZ::BI__builtin_##NAME: \ 9044 return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E) 9045 9046 INTRINSIC_WITH_CC(s390_vpkshs); 9047 INTRINSIC_WITH_CC(s390_vpksfs); 9048 INTRINSIC_WITH_CC(s390_vpksgs); 9049 9050 INTRINSIC_WITH_CC(s390_vpklshs); 9051 INTRINSIC_WITH_CC(s390_vpklsfs); 9052 INTRINSIC_WITH_CC(s390_vpklsgs); 9053 9054 INTRINSIC_WITH_CC(s390_vceqbs); 9055 INTRINSIC_WITH_CC(s390_vceqhs); 9056 INTRINSIC_WITH_CC(s390_vceqfs); 9057 INTRINSIC_WITH_CC(s390_vceqgs); 9058 9059 INTRINSIC_WITH_CC(s390_vchbs); 9060 INTRINSIC_WITH_CC(s390_vchhs); 9061 INTRINSIC_WITH_CC(s390_vchfs); 9062 INTRINSIC_WITH_CC(s390_vchgs); 9063 9064 INTRINSIC_WITH_CC(s390_vchlbs); 9065 INTRINSIC_WITH_CC(s390_vchlhs); 9066 INTRINSIC_WITH_CC(s390_vchlfs); 9067 INTRINSIC_WITH_CC(s390_vchlgs); 9068 9069 INTRINSIC_WITH_CC(s390_vfaebs); 9070 INTRINSIC_WITH_CC(s390_vfaehs); 9071 INTRINSIC_WITH_CC(s390_vfaefs); 9072 9073 INTRINSIC_WITH_CC(s390_vfaezbs); 9074 INTRINSIC_WITH_CC(s390_vfaezhs); 9075 INTRINSIC_WITH_CC(s390_vfaezfs); 9076 9077 INTRINSIC_WITH_CC(s390_vfeebs); 9078 INTRINSIC_WITH_CC(s390_vfeehs); 9079 INTRINSIC_WITH_CC(s390_vfeefs); 9080 9081 INTRINSIC_WITH_CC(s390_vfeezbs); 9082 INTRINSIC_WITH_CC(s390_vfeezhs); 9083 INTRINSIC_WITH_CC(s390_vfeezfs); 9084 9085 INTRINSIC_WITH_CC(s390_vfenebs); 9086 INTRINSIC_WITH_CC(s390_vfenehs); 9087 INTRINSIC_WITH_CC(s390_vfenefs); 9088 9089 INTRINSIC_WITH_CC(s390_vfenezbs); 9090 INTRINSIC_WITH_CC(s390_vfenezhs); 9091 INTRINSIC_WITH_CC(s390_vfenezfs); 9092 9093 INTRINSIC_WITH_CC(s390_vistrbs); 9094 INTRINSIC_WITH_CC(s390_vistrhs); 9095 INTRINSIC_WITH_CC(s390_vistrfs); 9096 9097 INTRINSIC_WITH_CC(s390_vstrcbs); 9098 INTRINSIC_WITH_CC(s390_vstrchs); 9099 INTRINSIC_WITH_CC(s390_vstrcfs); 9100 9101 INTRINSIC_WITH_CC(s390_vstrczbs); 9102 INTRINSIC_WITH_CC(s390_vstrczhs); 9103 INTRINSIC_WITH_CC(s390_vstrczfs); 9104 9105 INTRINSIC_WITH_CC(s390_vfcedbs); 9106 INTRINSIC_WITH_CC(s390_vfchdbs); 9107 INTRINSIC_WITH_CC(s390_vfchedbs); 9108 9109 INTRINSIC_WITH_CC(s390_vftcidb); 9110 9111 #undef INTRINSIC_WITH_CC 9112 9113 default: 9114 return nullptr; 9115 } 9116 } 9117 9118 Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, 9119 const CallExpr *E) { 9120 auto MakeLdg = [&](unsigned IntrinsicID) { 9121 Value *Ptr = EmitScalarExpr(E->getArg(0)); 9122 clang::CharUnits Align = 9123 getNaturalPointeeTypeAlignment(E->getArg(0)->getType()); 9124 return Builder.CreateCall( 9125 CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(), 9126 Ptr->getType()}), 9127 {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())}); 9128 }; 9129 auto MakeScopedAtomic = [&](unsigned IntrinsicID) { 9130 Value *Ptr = EmitScalarExpr(E->getArg(0)); 9131 return Builder.CreateCall( 9132 CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(), 9133 Ptr->getType()}), 9134 {Ptr, EmitScalarExpr(E->getArg(1))}); 9135 }; 9136 switch (BuiltinID) { 9137 case NVPTX::BI__nvvm_atom_add_gen_i: 9138 case NVPTX::BI__nvvm_atom_add_gen_l: 9139 case NVPTX::BI__nvvm_atom_add_gen_ll: 9140 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E); 9141 9142 case NVPTX::BI__nvvm_atom_sub_gen_i: 9143 case NVPTX::BI__nvvm_atom_sub_gen_l: 9144 case NVPTX::BI__nvvm_atom_sub_gen_ll: 9145 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E); 9146 9147 case NVPTX::BI__nvvm_atom_and_gen_i: 9148 case NVPTX::BI__nvvm_atom_and_gen_l: 9149 case NVPTX::BI__nvvm_atom_and_gen_ll: 9150 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E); 9151 9152 case NVPTX::BI__nvvm_atom_or_gen_i: 9153 case NVPTX::BI__nvvm_atom_or_gen_l: 9154 case NVPTX::BI__nvvm_atom_or_gen_ll: 9155 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E); 9156 9157 case NVPTX::BI__nvvm_atom_xor_gen_i: 9158 case NVPTX::BI__nvvm_atom_xor_gen_l: 9159 case NVPTX::BI__nvvm_atom_xor_gen_ll: 9160 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E); 9161 9162 case NVPTX::BI__nvvm_atom_xchg_gen_i: 9163 case NVPTX::BI__nvvm_atom_xchg_gen_l: 9164 case NVPTX::BI__nvvm_atom_xchg_gen_ll: 9165 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E); 9166 9167 case NVPTX::BI__nvvm_atom_max_gen_i: 9168 case NVPTX::BI__nvvm_atom_max_gen_l: 9169 case NVPTX::BI__nvvm_atom_max_gen_ll: 9170 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E); 9171 9172 case NVPTX::BI__nvvm_atom_max_gen_ui: 9173 case NVPTX::BI__nvvm_atom_max_gen_ul: 9174 case NVPTX::BI__nvvm_atom_max_gen_ull: 9175 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E); 9176 9177 case NVPTX::BI__nvvm_atom_min_gen_i: 9178 case NVPTX::BI__nvvm_atom_min_gen_l: 9179 case NVPTX::BI__nvvm_atom_min_gen_ll: 9180 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E); 9181 9182 case NVPTX::BI__nvvm_atom_min_gen_ui: 9183 case NVPTX::BI__nvvm_atom_min_gen_ul: 9184 case NVPTX::BI__nvvm_atom_min_gen_ull: 9185 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E); 9186 9187 case NVPTX::BI__nvvm_atom_cas_gen_i: 9188 case NVPTX::BI__nvvm_atom_cas_gen_l: 9189 case NVPTX::BI__nvvm_atom_cas_gen_ll: 9190 // __nvvm_atom_cas_gen_* should return the old value rather than the 9191 // success flag. 9192 return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false); 9193 9194 case NVPTX::BI__nvvm_atom_add_gen_f: { 9195 Value *Ptr = EmitScalarExpr(E->getArg(0)); 9196 Value *Val = EmitScalarExpr(E->getArg(1)); 9197 // atomicrmw only deals with integer arguments so we need to use 9198 // LLVM's nvvm_atomic_load_add_f32 intrinsic for that. 9199 Value *FnALAF32 = 9200 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType()); 9201 return Builder.CreateCall(FnALAF32, {Ptr, Val}); 9202 } 9203 9204 case NVPTX::BI__nvvm_atom_inc_gen_ui: { 9205 Value *Ptr = EmitScalarExpr(E->getArg(0)); 9206 Value *Val = EmitScalarExpr(E->getArg(1)); 9207 Value *FnALI32 = 9208 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType()); 9209 return Builder.CreateCall(FnALI32, {Ptr, Val}); 9210 } 9211 9212 case NVPTX::BI__nvvm_atom_dec_gen_ui: { 9213 Value *Ptr = EmitScalarExpr(E->getArg(0)); 9214 Value *Val = EmitScalarExpr(E->getArg(1)); 9215 Value *FnALD32 = 9216 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType()); 9217 return Builder.CreateCall(FnALD32, {Ptr, Val}); 9218 } 9219 9220 case NVPTX::BI__nvvm_ldg_c: 9221 case NVPTX::BI__nvvm_ldg_c2: 9222 case NVPTX::BI__nvvm_ldg_c4: 9223 case NVPTX::BI__nvvm_ldg_s: 9224 case NVPTX::BI__nvvm_ldg_s2: 9225 case NVPTX::BI__nvvm_ldg_s4: 9226 case NVPTX::BI__nvvm_ldg_i: 9227 case NVPTX::BI__nvvm_ldg_i2: 9228 case NVPTX::BI__nvvm_ldg_i4: 9229 case NVPTX::BI__nvvm_ldg_l: 9230 case NVPTX::BI__nvvm_ldg_ll: 9231 case NVPTX::BI__nvvm_ldg_ll2: 9232 case NVPTX::BI__nvvm_ldg_uc: 9233 case NVPTX::BI__nvvm_ldg_uc2: 9234 case NVPTX::BI__nvvm_ldg_uc4: 9235 case NVPTX::BI__nvvm_ldg_us: 9236 case NVPTX::BI__nvvm_ldg_us2: 9237 case NVPTX::BI__nvvm_ldg_us4: 9238 case NVPTX::BI__nvvm_ldg_ui: 9239 case NVPTX::BI__nvvm_ldg_ui2: 9240 case NVPTX::BI__nvvm_ldg_ui4: 9241 case NVPTX::BI__nvvm_ldg_ul: 9242 case NVPTX::BI__nvvm_ldg_ull: 9243 case NVPTX::BI__nvvm_ldg_ull2: 9244 // PTX Interoperability section 2.2: "For a vector with an even number of 9245 // elements, its alignment is set to number of elements times the alignment 9246 // of its member: n*alignof(t)." 9247 return MakeLdg(Intrinsic::nvvm_ldg_global_i); 9248 case NVPTX::BI__nvvm_ldg_f: 9249 case NVPTX::BI__nvvm_ldg_f2: 9250 case NVPTX::BI__nvvm_ldg_f4: 9251 case NVPTX::BI__nvvm_ldg_d: 9252 case NVPTX::BI__nvvm_ldg_d2: 9253 return MakeLdg(Intrinsic::nvvm_ldg_global_f); 9254 9255 case NVPTX::BI__nvvm_atom_cta_add_gen_i: 9256 case NVPTX::BI__nvvm_atom_cta_add_gen_l: 9257 case NVPTX::BI__nvvm_atom_cta_add_gen_ll: 9258 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta); 9259 case NVPTX::BI__nvvm_atom_sys_add_gen_i: 9260 case NVPTX::BI__nvvm_atom_sys_add_gen_l: 9261 case NVPTX::BI__nvvm_atom_sys_add_gen_ll: 9262 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys); 9263 case NVPTX::BI__nvvm_atom_cta_add_gen_f: 9264 case NVPTX::BI__nvvm_atom_cta_add_gen_d: 9265 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta); 9266 case NVPTX::BI__nvvm_atom_sys_add_gen_f: 9267 case NVPTX::BI__nvvm_atom_sys_add_gen_d: 9268 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys); 9269 case NVPTX::BI__nvvm_atom_cta_xchg_gen_i: 9270 case NVPTX::BI__nvvm_atom_cta_xchg_gen_l: 9271 case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll: 9272 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta); 9273 case NVPTX::BI__nvvm_atom_sys_xchg_gen_i: 9274 case NVPTX::BI__nvvm_atom_sys_xchg_gen_l: 9275 case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll: 9276 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys); 9277 case NVPTX::BI__nvvm_atom_cta_max_gen_i: 9278 case NVPTX::BI__nvvm_atom_cta_max_gen_ui: 9279 case NVPTX::BI__nvvm_atom_cta_max_gen_l: 9280 case NVPTX::BI__nvvm_atom_cta_max_gen_ul: 9281 case NVPTX::BI__nvvm_atom_cta_max_gen_ll: 9282 case NVPTX::BI__nvvm_atom_cta_max_gen_ull: 9283 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta); 9284 case NVPTX::BI__nvvm_atom_sys_max_gen_i: 9285 case NVPTX::BI__nvvm_atom_sys_max_gen_ui: 9286 case NVPTX::BI__nvvm_atom_sys_max_gen_l: 9287 case NVPTX::BI__nvvm_atom_sys_max_gen_ul: 9288 case NVPTX::BI__nvvm_atom_sys_max_gen_ll: 9289 case NVPTX::BI__nvvm_atom_sys_max_gen_ull: 9290 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys); 9291 case NVPTX::BI__nvvm_atom_cta_min_gen_i: 9292 case NVPTX::BI__nvvm_atom_cta_min_gen_ui: 9293 case NVPTX::BI__nvvm_atom_cta_min_gen_l: 9294 case NVPTX::BI__nvvm_atom_cta_min_gen_ul: 9295 case NVPTX::BI__nvvm_atom_cta_min_gen_ll: 9296 case NVPTX::BI__nvvm_atom_cta_min_gen_ull: 9297 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta); 9298 case NVPTX::BI__nvvm_atom_sys_min_gen_i: 9299 case NVPTX::BI__nvvm_atom_sys_min_gen_ui: 9300 case NVPTX::BI__nvvm_atom_sys_min_gen_l: 9301 case NVPTX::BI__nvvm_atom_sys_min_gen_ul: 9302 case NVPTX::BI__nvvm_atom_sys_min_gen_ll: 9303 case NVPTX::BI__nvvm_atom_sys_min_gen_ull: 9304 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys); 9305 case NVPTX::BI__nvvm_atom_cta_inc_gen_ui: 9306 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta); 9307 case NVPTX::BI__nvvm_atom_cta_dec_gen_ui: 9308 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta); 9309 case NVPTX::BI__nvvm_atom_sys_inc_gen_ui: 9310 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys); 9311 case NVPTX::BI__nvvm_atom_sys_dec_gen_ui: 9312 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys); 9313 case NVPTX::BI__nvvm_atom_cta_and_gen_i: 9314 case NVPTX::BI__nvvm_atom_cta_and_gen_l: 9315 case NVPTX::BI__nvvm_atom_cta_and_gen_ll: 9316 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta); 9317 case NVPTX::BI__nvvm_atom_sys_and_gen_i: 9318 case NVPTX::BI__nvvm_atom_sys_and_gen_l: 9319 case NVPTX::BI__nvvm_atom_sys_and_gen_ll: 9320 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys); 9321 case NVPTX::BI__nvvm_atom_cta_or_gen_i: 9322 case NVPTX::BI__nvvm_atom_cta_or_gen_l: 9323 case NVPTX::BI__nvvm_atom_cta_or_gen_ll: 9324 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta); 9325 case NVPTX::BI__nvvm_atom_sys_or_gen_i: 9326 case NVPTX::BI__nvvm_atom_sys_or_gen_l: 9327 case NVPTX::BI__nvvm_atom_sys_or_gen_ll: 9328 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys); 9329 case NVPTX::BI__nvvm_atom_cta_xor_gen_i: 9330 case NVPTX::BI__nvvm_atom_cta_xor_gen_l: 9331 case NVPTX::BI__nvvm_atom_cta_xor_gen_ll: 9332 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta); 9333 case NVPTX::BI__nvvm_atom_sys_xor_gen_i: 9334 case NVPTX::BI__nvvm_atom_sys_xor_gen_l: 9335 case NVPTX::BI__nvvm_atom_sys_xor_gen_ll: 9336 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys); 9337 case NVPTX::BI__nvvm_atom_cta_cas_gen_i: 9338 case NVPTX::BI__nvvm_atom_cta_cas_gen_l: 9339 case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: { 9340 Value *Ptr = EmitScalarExpr(E->getArg(0)); 9341 return Builder.CreateCall( 9342 CGM.getIntrinsic( 9343 Intrinsic::nvvm_atomic_cas_gen_i_cta, 9344 {Ptr->getType()->getPointerElementType(), Ptr->getType()}), 9345 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))}); 9346 } 9347 case NVPTX::BI__nvvm_atom_sys_cas_gen_i: 9348 case NVPTX::BI__nvvm_atom_sys_cas_gen_l: 9349 case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: { 9350 Value *Ptr = EmitScalarExpr(E->getArg(0)); 9351 return Builder.CreateCall( 9352 CGM.getIntrinsic( 9353 Intrinsic::nvvm_atomic_cas_gen_i_sys, 9354 {Ptr->getType()->getPointerElementType(), Ptr->getType()}), 9355 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))}); 9356 } 9357 default: 9358 return nullptr; 9359 } 9360 } 9361 9362 Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, 9363 const CallExpr *E) { 9364 switch (BuiltinID) { 9365 case WebAssembly::BI__builtin_wasm_current_memory: { 9366 llvm::Type *ResultType = ConvertType(E->getType()); 9367 Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType); 9368 return Builder.CreateCall(Callee); 9369 } 9370 case WebAssembly::BI__builtin_wasm_grow_memory: { 9371 Value *X = EmitScalarExpr(E->getArg(0)); 9372 Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_grow_memory, X->getType()); 9373 return Builder.CreateCall(Callee, X); 9374 } 9375 9376 default: 9377 return nullptr; 9378 } 9379 } 9380