1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit Builtin calls as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCXXABI.h" 15 #include "CGObjCRuntime.h" 16 #include "CGOpenCLRuntime.h" 17 #include "CodeGenFunction.h" 18 #include "CodeGenModule.h" 19 #include "ConstantEmitter.h" 20 #include "TargetInfo.h" 21 #include "clang/AST/ASTContext.h" 22 #include "clang/AST/Decl.h" 23 #include "clang/Analysis/Analyses/OSLog.h" 24 #include "clang/Basic/TargetBuiltins.h" 25 #include "clang/Basic/TargetInfo.h" 26 #include "clang/CodeGen/CGFunctionInfo.h" 27 #include "llvm/ADT/StringExtras.h" 28 #include "llvm/IR/CallSite.h" 29 #include "llvm/IR/DataLayout.h" 30 #include "llvm/IR/InlineAsm.h" 31 #include "llvm/IR/Intrinsics.h" 32 #include "llvm/IR/MDBuilder.h" 33 #include <sstream> 34 35 using namespace clang; 36 using namespace CodeGen; 37 using namespace llvm; 38 39 static 40 int64_t clamp(int64_t Value, int64_t Low, int64_t High) { 41 return std::min(High, std::max(Low, Value)); 42 } 43 44 /// getBuiltinLibFunction - Given a builtin id for a function like 45 /// "__builtin_fabsf", return a Function* for "fabsf". 46 llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD, 47 unsigned BuiltinID) { 48 assert(Context.BuiltinInfo.isLibFunction(BuiltinID)); 49 50 // Get the name, skip over the __builtin_ prefix (if necessary). 51 StringRef Name; 52 GlobalDecl D(FD); 53 54 // If the builtin has been declared explicitly with an assembler label, 55 // use the mangled name. This differs from the plain label on platforms 56 // that prefix labels. 57 if (FD->hasAttr<AsmLabelAttr>()) 58 Name = getMangledName(D); 59 else 60 Name = Context.BuiltinInfo.getName(BuiltinID) + 10; 61 62 llvm::FunctionType *Ty = 63 cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType())); 64 65 return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false); 66 } 67 68 /// Emit the conversions required to turn the given value into an 69 /// integer of the given size. 70 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V, 71 QualType T, llvm::IntegerType *IntType) { 72 V = CGF.EmitToMemory(V, T); 73 74 if (V->getType()->isPointerTy()) 75 return CGF.Builder.CreatePtrToInt(V, IntType); 76 77 assert(V->getType() == IntType); 78 return V; 79 } 80 81 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, 82 QualType T, llvm::Type *ResultType) { 83 V = CGF.EmitFromMemory(V, T); 84 85 if (ResultType->isPointerTy()) 86 return CGF.Builder.CreateIntToPtr(V, ResultType); 87 88 assert(V->getType() == ResultType); 89 return V; 90 } 91 92 /// Utility to insert an atomic instruction based on Instrinsic::ID 93 /// and the expression node. 94 static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF, 95 llvm::AtomicRMWInst::BinOp Kind, 96 const CallExpr *E) { 97 QualType T = E->getType(); 98 assert(E->getArg(0)->getType()->isPointerType()); 99 assert(CGF.getContext().hasSameUnqualifiedType(T, 100 E->getArg(0)->getType()->getPointeeType())); 101 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); 102 103 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 104 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 105 106 llvm::IntegerType *IntType = 107 llvm::IntegerType::get(CGF.getLLVMContext(), 108 CGF.getContext().getTypeSize(T)); 109 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 110 111 llvm::Value *Args[2]; 112 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 113 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 114 llvm::Type *ValueType = Args[1]->getType(); 115 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 116 117 llvm::Value *Result = CGF.Builder.CreateAtomicRMW( 118 Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent); 119 return EmitFromInt(CGF, Result, T, ValueType); 120 } 121 122 static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) { 123 Value *Val = CGF.EmitScalarExpr(E->getArg(0)); 124 Value *Address = CGF.EmitScalarExpr(E->getArg(1)); 125 126 // Convert the type of the pointer to a pointer to the stored type. 127 Val = CGF.EmitToMemory(Val, E->getArg(0)->getType()); 128 Value *BC = CGF.Builder.CreateBitCast( 129 Address, llvm::PointerType::getUnqual(Val->getType()), "cast"); 130 LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType()); 131 LV.setNontemporal(true); 132 CGF.EmitStoreOfScalar(Val, LV, false); 133 return nullptr; 134 } 135 136 static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) { 137 Value *Address = CGF.EmitScalarExpr(E->getArg(0)); 138 139 LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType()); 140 LV.setNontemporal(true); 141 return CGF.EmitLoadOfScalar(LV, E->getExprLoc()); 142 } 143 144 static RValue EmitBinaryAtomic(CodeGenFunction &CGF, 145 llvm::AtomicRMWInst::BinOp Kind, 146 const CallExpr *E) { 147 return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E)); 148 } 149 150 /// Utility to insert an atomic instruction based Instrinsic::ID and 151 /// the expression node, where the return value is the result of the 152 /// operation. 153 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, 154 llvm::AtomicRMWInst::BinOp Kind, 155 const CallExpr *E, 156 Instruction::BinaryOps Op, 157 bool Invert = false) { 158 QualType T = E->getType(); 159 assert(E->getArg(0)->getType()->isPointerType()); 160 assert(CGF.getContext().hasSameUnqualifiedType(T, 161 E->getArg(0)->getType()->getPointeeType())); 162 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); 163 164 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 165 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 166 167 llvm::IntegerType *IntType = 168 llvm::IntegerType::get(CGF.getLLVMContext(), 169 CGF.getContext().getTypeSize(T)); 170 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 171 172 llvm::Value *Args[2]; 173 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 174 llvm::Type *ValueType = Args[1]->getType(); 175 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 176 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 177 178 llvm::Value *Result = CGF.Builder.CreateAtomicRMW( 179 Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent); 180 Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]); 181 if (Invert) 182 Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result, 183 llvm::ConstantInt::get(IntType, -1)); 184 Result = EmitFromInt(CGF, Result, T, ValueType); 185 return RValue::get(Result); 186 } 187 188 /// @brief Utility to insert an atomic cmpxchg instruction. 189 /// 190 /// @param CGF The current codegen function. 191 /// @param E Builtin call expression to convert to cmpxchg. 192 /// arg0 - address to operate on 193 /// arg1 - value to compare with 194 /// arg2 - new value 195 /// @param ReturnBool Specifies whether to return success flag of 196 /// cmpxchg result or the old value. 197 /// 198 /// @returns result of cmpxchg, according to ReturnBool 199 static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, 200 bool ReturnBool) { 201 QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType(); 202 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 203 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 204 205 llvm::IntegerType *IntType = llvm::IntegerType::get( 206 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T)); 207 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 208 209 Value *Args[3]; 210 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 211 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 212 llvm::Type *ValueType = Args[1]->getType(); 213 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 214 Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType); 215 216 Value *Pair = CGF.Builder.CreateAtomicCmpXchg( 217 Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent, 218 llvm::AtomicOrdering::SequentiallyConsistent); 219 if (ReturnBool) 220 // Extract boolean success flag and zext it to int. 221 return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1), 222 CGF.ConvertType(E->getType())); 223 else 224 // Extract old value and emit it using the same type as compare value. 225 return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T, 226 ValueType); 227 } 228 229 // Emit a simple mangled intrinsic that has 1 argument and a return type 230 // matching the argument type. 231 static Value *emitUnaryBuiltin(CodeGenFunction &CGF, 232 const CallExpr *E, 233 unsigned IntrinsicID) { 234 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 235 236 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 237 return CGF.Builder.CreateCall(F, Src0); 238 } 239 240 // Emit an intrinsic that has 2 operands of the same type as its result. 241 static Value *emitBinaryBuiltin(CodeGenFunction &CGF, 242 const CallExpr *E, 243 unsigned IntrinsicID) { 244 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 245 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 246 247 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 248 return CGF.Builder.CreateCall(F, { Src0, Src1 }); 249 } 250 251 // Emit an intrinsic that has 3 operands of the same type as its result. 252 static Value *emitTernaryBuiltin(CodeGenFunction &CGF, 253 const CallExpr *E, 254 unsigned IntrinsicID) { 255 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 256 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 257 llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); 258 259 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 260 return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 }); 261 } 262 263 // Emit an intrinsic that has 1 float or double operand, and 1 integer. 264 static Value *emitFPIntBuiltin(CodeGenFunction &CGF, 265 const CallExpr *E, 266 unsigned IntrinsicID) { 267 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 268 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 269 270 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 271 return CGF.Builder.CreateCall(F, {Src0, Src1}); 272 } 273 274 /// EmitFAbs - Emit a call to @llvm.fabs(). 275 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) { 276 Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType()); 277 llvm::CallInst *Call = CGF.Builder.CreateCall(F, V); 278 Call->setDoesNotAccessMemory(); 279 return Call; 280 } 281 282 /// Emit the computation of the sign bit for a floating point value. Returns 283 /// the i1 sign bit value. 284 static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) { 285 LLVMContext &C = CGF.CGM.getLLVMContext(); 286 287 llvm::Type *Ty = V->getType(); 288 int Width = Ty->getPrimitiveSizeInBits(); 289 llvm::Type *IntTy = llvm::IntegerType::get(C, Width); 290 V = CGF.Builder.CreateBitCast(V, IntTy); 291 if (Ty->isPPC_FP128Ty()) { 292 // We want the sign bit of the higher-order double. The bitcast we just 293 // did works as if the double-double was stored to memory and then 294 // read as an i128. The "store" will put the higher-order double in the 295 // lower address in both little- and big-Endian modes, but the "load" 296 // will treat those bits as a different part of the i128: the low bits in 297 // little-Endian, the high bits in big-Endian. Therefore, on big-Endian 298 // we need to shift the high bits down to the low before truncating. 299 Width >>= 1; 300 if (CGF.getTarget().isBigEndian()) { 301 Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width); 302 V = CGF.Builder.CreateLShr(V, ShiftCst); 303 } 304 // We are truncating value in order to extract the higher-order 305 // double, which we will be using to extract the sign from. 306 IntTy = llvm::IntegerType::get(C, Width); 307 V = CGF.Builder.CreateTrunc(V, IntTy); 308 } 309 Value *Zero = llvm::Constant::getNullValue(IntTy); 310 return CGF.Builder.CreateICmpSLT(V, Zero); 311 } 312 313 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD, 314 const CallExpr *E, llvm::Constant *calleeValue) { 315 CGCallee callee = CGCallee::forDirect(calleeValue, FD); 316 return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot()); 317 } 318 319 /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.* 320 /// depending on IntrinsicID. 321 /// 322 /// \arg CGF The current codegen function. 323 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate. 324 /// \arg X The first argument to the llvm.*.with.overflow.*. 325 /// \arg Y The second argument to the llvm.*.with.overflow.*. 326 /// \arg Carry The carry returned by the llvm.*.with.overflow.*. 327 /// \returns The result (i.e. sum/product) returned by the intrinsic. 328 static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF, 329 const llvm::Intrinsic::ID IntrinsicID, 330 llvm::Value *X, llvm::Value *Y, 331 llvm::Value *&Carry) { 332 // Make sure we have integers of the same width. 333 assert(X->getType() == Y->getType() && 334 "Arguments must be the same type. (Did you forget to make sure both " 335 "arguments have the same integer width?)"); 336 337 llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType()); 338 llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y}); 339 Carry = CGF.Builder.CreateExtractValue(Tmp, 1); 340 return CGF.Builder.CreateExtractValue(Tmp, 0); 341 } 342 343 static Value *emitRangedBuiltin(CodeGenFunction &CGF, 344 unsigned IntrinsicID, 345 int low, int high) { 346 llvm::MDBuilder MDHelper(CGF.getLLVMContext()); 347 llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high)); 348 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {}); 349 llvm::Instruction *Call = CGF.Builder.CreateCall(F); 350 Call->setMetadata(llvm::LLVMContext::MD_range, RNode); 351 return Call; 352 } 353 354 namespace { 355 struct WidthAndSignedness { 356 unsigned Width; 357 bool Signed; 358 }; 359 } 360 361 static WidthAndSignedness 362 getIntegerWidthAndSignedness(const clang::ASTContext &context, 363 const clang::QualType Type) { 364 assert(Type->isIntegerType() && "Given type is not an integer."); 365 unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width; 366 bool Signed = Type->isSignedIntegerType(); 367 return {Width, Signed}; 368 } 369 370 // Given one or more integer types, this function produces an integer type that 371 // encompasses them: any value in one of the given types could be expressed in 372 // the encompassing type. 373 static struct WidthAndSignedness 374 EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) { 375 assert(Types.size() > 0 && "Empty list of types."); 376 377 // If any of the given types is signed, we must return a signed type. 378 bool Signed = false; 379 for (const auto &Type : Types) { 380 Signed |= Type.Signed; 381 } 382 383 // The encompassing type must have a width greater than or equal to the width 384 // of the specified types. Aditionally, if the encompassing type is signed, 385 // its width must be strictly greater than the width of any unsigned types 386 // given. 387 unsigned Width = 0; 388 for (const auto &Type : Types) { 389 unsigned MinWidth = Type.Width + (Signed && !Type.Signed); 390 if (Width < MinWidth) { 391 Width = MinWidth; 392 } 393 } 394 395 return {Width, Signed}; 396 } 397 398 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) { 399 llvm::Type *DestType = Int8PtrTy; 400 if (ArgValue->getType() != DestType) 401 ArgValue = 402 Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data()); 403 404 Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend; 405 return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue); 406 } 407 408 /// Checks if using the result of __builtin_object_size(p, @p From) in place of 409 /// __builtin_object_size(p, @p To) is correct 410 static bool areBOSTypesCompatible(int From, int To) { 411 // Note: Our __builtin_object_size implementation currently treats Type=0 and 412 // Type=2 identically. Encoding this implementation detail here may make 413 // improving __builtin_object_size difficult in the future, so it's omitted. 414 return From == To || (From == 0 && To == 1) || (From == 3 && To == 2); 415 } 416 417 static llvm::Value * 418 getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) { 419 return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true); 420 } 421 422 llvm::Value * 423 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type, 424 llvm::IntegerType *ResType, 425 llvm::Value *EmittedE) { 426 uint64_t ObjectSize; 427 if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type)) 428 return emitBuiltinObjectSize(E, Type, ResType, EmittedE); 429 return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true); 430 } 431 432 /// Returns a Value corresponding to the size of the given expression. 433 /// This Value may be either of the following: 434 /// - A llvm::Argument (if E is a param with the pass_object_size attribute on 435 /// it) 436 /// - A call to the @llvm.objectsize intrinsic 437 /// 438 /// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null 439 /// and we wouldn't otherwise try to reference a pass_object_size parameter, 440 /// we'll call @llvm.objectsize on EmittedE, rather than emitting E. 441 llvm::Value * 442 CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type, 443 llvm::IntegerType *ResType, 444 llvm::Value *EmittedE) { 445 // We need to reference an argument if the pointer is a parameter with the 446 // pass_object_size attribute. 447 if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) { 448 auto *Param = dyn_cast<ParmVarDecl>(D->getDecl()); 449 auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>(); 450 if (Param != nullptr && PS != nullptr && 451 areBOSTypesCompatible(PS->getType(), Type)) { 452 auto Iter = SizeArguments.find(Param); 453 assert(Iter != SizeArguments.end()); 454 455 const ImplicitParamDecl *D = Iter->second; 456 auto DIter = LocalDeclMap.find(D); 457 assert(DIter != LocalDeclMap.end()); 458 459 return EmitLoadOfScalar(DIter->second, /*volatile=*/false, 460 getContext().getSizeType(), E->getLocStart()); 461 } 462 } 463 464 // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't 465 // evaluate E for side-effects. In either case, we shouldn't lower to 466 // @llvm.objectsize. 467 if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext()))) 468 return getDefaultBuiltinObjectSizeResult(Type, ResType); 469 470 Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E); 471 assert(Ptr->getType()->isPointerTy() && 472 "Non-pointer passed to __builtin_object_size?"); 473 474 Value *F = CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()}); 475 476 // LLVM only supports 0 and 2, make sure that we pass along that as a boolean. 477 Value *Min = Builder.getInt1((Type & 2) != 0); 478 // For GCC compatability, __builtin_object_size treat NULL as unknown size. 479 Value *NullIsUnknown = Builder.getTrue(); 480 return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown}); 481 } 482 483 // Many of MSVC builtins are on both x64 and ARM; to avoid repeating code, we 484 // handle them here. 485 enum class CodeGenFunction::MSVCIntrin { 486 _BitScanForward, 487 _BitScanReverse, 488 _InterlockedAnd, 489 _InterlockedDecrement, 490 _InterlockedExchange, 491 _InterlockedExchangeAdd, 492 _InterlockedExchangeSub, 493 _InterlockedIncrement, 494 _InterlockedOr, 495 _InterlockedXor, 496 _interlockedbittestandset, 497 __fastfail, 498 }; 499 500 Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, 501 const CallExpr *E) { 502 switch (BuiltinID) { 503 case MSVCIntrin::_BitScanForward: 504 case MSVCIntrin::_BitScanReverse: { 505 Value *ArgValue = EmitScalarExpr(E->getArg(1)); 506 507 llvm::Type *ArgType = ArgValue->getType(); 508 llvm::Type *IndexType = 509 EmitScalarExpr(E->getArg(0))->getType()->getPointerElementType(); 510 llvm::Type *ResultType = ConvertType(E->getType()); 511 512 Value *ArgZero = llvm::Constant::getNullValue(ArgType); 513 Value *ResZero = llvm::Constant::getNullValue(ResultType); 514 Value *ResOne = llvm::ConstantInt::get(ResultType, 1); 515 516 BasicBlock *Begin = Builder.GetInsertBlock(); 517 BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn); 518 Builder.SetInsertPoint(End); 519 PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result"); 520 521 Builder.SetInsertPoint(Begin); 522 Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero); 523 BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn); 524 Builder.CreateCondBr(IsZero, End, NotZero); 525 Result->addIncoming(ResZero, Begin); 526 527 Builder.SetInsertPoint(NotZero); 528 Address IndexAddress = EmitPointerWithAlignment(E->getArg(0)); 529 530 if (BuiltinID == MSVCIntrin::_BitScanForward) { 531 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 532 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()}); 533 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false); 534 Builder.CreateStore(ZeroCount, IndexAddress, false); 535 } else { 536 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth(); 537 Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1); 538 539 Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); 540 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()}); 541 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false); 542 Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount); 543 Builder.CreateStore(Index, IndexAddress, false); 544 } 545 Builder.CreateBr(End); 546 Result->addIncoming(ResOne, NotZero); 547 548 Builder.SetInsertPoint(End); 549 return Result; 550 } 551 case MSVCIntrin::_InterlockedAnd: 552 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E); 553 case MSVCIntrin::_InterlockedExchange: 554 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E); 555 case MSVCIntrin::_InterlockedExchangeAdd: 556 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E); 557 case MSVCIntrin::_InterlockedExchangeSub: 558 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E); 559 case MSVCIntrin::_InterlockedOr: 560 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E); 561 case MSVCIntrin::_InterlockedXor: 562 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E); 563 564 case MSVCIntrin::_interlockedbittestandset: { 565 llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); 566 llvm::Value *Bit = EmitScalarExpr(E->getArg(1)); 567 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 568 AtomicRMWInst::Or, Addr, 569 Builder.CreateShl(ConstantInt::get(Bit->getType(), 1), Bit), 570 llvm::AtomicOrdering::SequentiallyConsistent); 571 // Shift the relevant bit to the least significant position, truncate to 572 // the result type, and test the low bit. 573 llvm::Value *Shifted = Builder.CreateLShr(RMWI, Bit); 574 llvm::Value *Truncated = 575 Builder.CreateTrunc(Shifted, ConvertType(E->getType())); 576 return Builder.CreateAnd(Truncated, 577 ConstantInt::get(Truncated->getType(), 1)); 578 } 579 580 case MSVCIntrin::_InterlockedDecrement: { 581 llvm::Type *IntTy = ConvertType(E->getType()); 582 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 583 AtomicRMWInst::Sub, 584 EmitScalarExpr(E->getArg(0)), 585 ConstantInt::get(IntTy, 1), 586 llvm::AtomicOrdering::SequentiallyConsistent); 587 return Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1)); 588 } 589 case MSVCIntrin::_InterlockedIncrement: { 590 llvm::Type *IntTy = ConvertType(E->getType()); 591 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 592 AtomicRMWInst::Add, 593 EmitScalarExpr(E->getArg(0)), 594 ConstantInt::get(IntTy, 1), 595 llvm::AtomicOrdering::SequentiallyConsistent); 596 return Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1)); 597 } 598 599 case MSVCIntrin::__fastfail: { 600 // Request immediate process termination from the kernel. The instruction 601 // sequences to do this are documented on MSDN: 602 // https://msdn.microsoft.com/en-us/library/dn774154.aspx 603 llvm::Triple::ArchType ISA = getTarget().getTriple().getArch(); 604 StringRef Asm, Constraints; 605 switch (ISA) { 606 default: 607 ErrorUnsupported(E, "__fastfail call for this architecture"); 608 break; 609 case llvm::Triple::x86: 610 case llvm::Triple::x86_64: 611 Asm = "int $$0x29"; 612 Constraints = "{cx}"; 613 break; 614 case llvm::Triple::thumb: 615 Asm = "udf #251"; 616 Constraints = "{r0}"; 617 break; 618 } 619 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false); 620 llvm::InlineAsm *IA = 621 llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true); 622 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get( 623 getLLVMContext(), llvm::AttributeList::FunctionIndex, 624 llvm::Attribute::NoReturn); 625 CallSite CS = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0))); 626 CS.setAttributes(NoReturnAttr); 627 return CS.getInstruction(); 628 } 629 } 630 llvm_unreachable("Incorrect MSVC intrinsic!"); 631 } 632 633 namespace { 634 // ARC cleanup for __builtin_os_log_format 635 struct CallObjCArcUse final : EHScopeStack::Cleanup { 636 CallObjCArcUse(llvm::Value *object) : object(object) {} 637 llvm::Value *object; 638 639 void Emit(CodeGenFunction &CGF, Flags flags) override { 640 CGF.EmitARCIntrinsicUse(object); 641 } 642 }; 643 } 644 645 Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E, 646 BuiltinCheckKind Kind) { 647 assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero) 648 && "Unsupported builtin check kind"); 649 650 Value *ArgValue = EmitScalarExpr(E); 651 if (!SanOpts.has(SanitizerKind::Builtin) || !getTarget().isCLZForZeroUndef()) 652 return ArgValue; 653 654 SanitizerScope SanScope(this); 655 Value *Cond = Builder.CreateICmpNE( 656 ArgValue, llvm::Constant::getNullValue(ArgValue->getType())); 657 EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin), 658 SanitizerHandler::InvalidBuiltin, 659 {EmitCheckSourceLocation(E->getExprLoc()), 660 llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)}, 661 None); 662 return ArgValue; 663 } 664 665 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, 666 unsigned BuiltinID, const CallExpr *E, 667 ReturnValueSlot ReturnValue) { 668 // See if we can constant fold this builtin. If so, don't emit it at all. 669 Expr::EvalResult Result; 670 if (E->EvaluateAsRValue(Result, CGM.getContext()) && 671 !Result.hasSideEffects()) { 672 if (Result.Val.isInt()) 673 return RValue::get(llvm::ConstantInt::get(getLLVMContext(), 674 Result.Val.getInt())); 675 if (Result.Val.isFloat()) 676 return RValue::get(llvm::ConstantFP::get(getLLVMContext(), 677 Result.Val.getFloat())); 678 } 679 680 switch (BuiltinID) { 681 default: break; // Handle intrinsics and libm functions below. 682 case Builtin::BI__builtin___CFStringMakeConstantString: 683 case Builtin::BI__builtin___NSStringMakeConstantString: 684 return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType())); 685 case Builtin::BI__builtin_stdarg_start: 686 case Builtin::BI__builtin_va_start: 687 case Builtin::BI__va_start: 688 case Builtin::BI__builtin_va_end: 689 return RValue::get( 690 EmitVAStartEnd(BuiltinID == Builtin::BI__va_start 691 ? EmitScalarExpr(E->getArg(0)) 692 : EmitVAListRef(E->getArg(0)).getPointer(), 693 BuiltinID != Builtin::BI__builtin_va_end)); 694 case Builtin::BI__builtin_va_copy: { 695 Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer(); 696 Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer(); 697 698 llvm::Type *Type = Int8PtrTy; 699 700 DstPtr = Builder.CreateBitCast(DstPtr, Type); 701 SrcPtr = Builder.CreateBitCast(SrcPtr, Type); 702 return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy), 703 {DstPtr, SrcPtr})); 704 } 705 case Builtin::BI__builtin_abs: 706 case Builtin::BI__builtin_labs: 707 case Builtin::BI__builtin_llabs: { 708 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 709 710 Value *NegOp = Builder.CreateNeg(ArgValue, "neg"); 711 Value *CmpResult = 712 Builder.CreateICmpSGE(ArgValue, 713 llvm::Constant::getNullValue(ArgValue->getType()), 714 "abscond"); 715 Value *Result = 716 Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs"); 717 718 return RValue::get(Result); 719 } 720 case Builtin::BI__builtin_fabs: 721 case Builtin::BI__builtin_fabsf: 722 case Builtin::BI__builtin_fabsl: { 723 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs)); 724 } 725 case Builtin::BI__builtin_fmod: 726 case Builtin::BI__builtin_fmodf: 727 case Builtin::BI__builtin_fmodl: { 728 Value *Arg1 = EmitScalarExpr(E->getArg(0)); 729 Value *Arg2 = EmitScalarExpr(E->getArg(1)); 730 Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod"); 731 return RValue::get(Result); 732 } 733 case Builtin::BI__builtin_copysign: 734 case Builtin::BI__builtin_copysignf: 735 case Builtin::BI__builtin_copysignl: { 736 return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign)); 737 } 738 case Builtin::BI__builtin_ceil: 739 case Builtin::BI__builtin_ceilf: 740 case Builtin::BI__builtin_ceill: { 741 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil)); 742 } 743 case Builtin::BI__builtin_floor: 744 case Builtin::BI__builtin_floorf: 745 case Builtin::BI__builtin_floorl: { 746 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor)); 747 } 748 case Builtin::BI__builtin_trunc: 749 case Builtin::BI__builtin_truncf: 750 case Builtin::BI__builtin_truncl: { 751 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc)); 752 } 753 case Builtin::BI__builtin_rint: 754 case Builtin::BI__builtin_rintf: 755 case Builtin::BI__builtin_rintl: { 756 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint)); 757 } 758 case Builtin::BI__builtin_nearbyint: 759 case Builtin::BI__builtin_nearbyintf: 760 case Builtin::BI__builtin_nearbyintl: { 761 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::nearbyint)); 762 } 763 case Builtin::BI__builtin_round: 764 case Builtin::BI__builtin_roundf: 765 case Builtin::BI__builtin_roundl: { 766 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round)); 767 } 768 case Builtin::BI__builtin_fmin: 769 case Builtin::BI__builtin_fminf: 770 case Builtin::BI__builtin_fminl: { 771 return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum)); 772 } 773 case Builtin::BI__builtin_fmax: 774 case Builtin::BI__builtin_fmaxf: 775 case Builtin::BI__builtin_fmaxl: { 776 return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum)); 777 } 778 case Builtin::BI__builtin_conj: 779 case Builtin::BI__builtin_conjf: 780 case Builtin::BI__builtin_conjl: { 781 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 782 Value *Real = ComplexVal.first; 783 Value *Imag = ComplexVal.second; 784 Value *Zero = 785 Imag->getType()->isFPOrFPVectorTy() 786 ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType()) 787 : llvm::Constant::getNullValue(Imag->getType()); 788 789 Imag = Builder.CreateFSub(Zero, Imag, "sub"); 790 return RValue::getComplex(std::make_pair(Real, Imag)); 791 } 792 case Builtin::BI__builtin_creal: 793 case Builtin::BI__builtin_crealf: 794 case Builtin::BI__builtin_creall: 795 case Builtin::BIcreal: 796 case Builtin::BIcrealf: 797 case Builtin::BIcreall: { 798 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 799 return RValue::get(ComplexVal.first); 800 } 801 802 case Builtin::BI__builtin_cimag: 803 case Builtin::BI__builtin_cimagf: 804 case Builtin::BI__builtin_cimagl: 805 case Builtin::BIcimag: 806 case Builtin::BIcimagf: 807 case Builtin::BIcimagl: { 808 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 809 return RValue::get(ComplexVal.second); 810 } 811 812 case Builtin::BI__builtin_ctzs: 813 case Builtin::BI__builtin_ctz: 814 case Builtin::BI__builtin_ctzl: 815 case Builtin::BI__builtin_ctzll: { 816 Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero); 817 818 llvm::Type *ArgType = ArgValue->getType(); 819 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 820 821 llvm::Type *ResultType = ConvertType(E->getType()); 822 Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); 823 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef}); 824 if (Result->getType() != ResultType) 825 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 826 "cast"); 827 return RValue::get(Result); 828 } 829 case Builtin::BI__builtin_clzs: 830 case Builtin::BI__builtin_clz: 831 case Builtin::BI__builtin_clzl: 832 case Builtin::BI__builtin_clzll: { 833 Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero); 834 835 llvm::Type *ArgType = ArgValue->getType(); 836 Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); 837 838 llvm::Type *ResultType = ConvertType(E->getType()); 839 Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); 840 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef}); 841 if (Result->getType() != ResultType) 842 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 843 "cast"); 844 return RValue::get(Result); 845 } 846 case Builtin::BI__builtin_ffs: 847 case Builtin::BI__builtin_ffsl: 848 case Builtin::BI__builtin_ffsll: { 849 // ffs(x) -> x ? cttz(x) + 1 : 0 850 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 851 852 llvm::Type *ArgType = ArgValue->getType(); 853 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 854 855 llvm::Type *ResultType = ConvertType(E->getType()); 856 Value *Tmp = 857 Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}), 858 llvm::ConstantInt::get(ArgType, 1)); 859 Value *Zero = llvm::Constant::getNullValue(ArgType); 860 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero"); 861 Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs"); 862 if (Result->getType() != ResultType) 863 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 864 "cast"); 865 return RValue::get(Result); 866 } 867 case Builtin::BI__builtin_parity: 868 case Builtin::BI__builtin_parityl: 869 case Builtin::BI__builtin_parityll: { 870 // parity(x) -> ctpop(x) & 1 871 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 872 873 llvm::Type *ArgType = ArgValue->getType(); 874 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); 875 876 llvm::Type *ResultType = ConvertType(E->getType()); 877 Value *Tmp = Builder.CreateCall(F, ArgValue); 878 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1)); 879 if (Result->getType() != ResultType) 880 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 881 "cast"); 882 return RValue::get(Result); 883 } 884 case Builtin::BI__popcnt16: 885 case Builtin::BI__popcnt: 886 case Builtin::BI__popcnt64: 887 case Builtin::BI__builtin_popcount: 888 case Builtin::BI__builtin_popcountl: 889 case Builtin::BI__builtin_popcountll: { 890 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 891 892 llvm::Type *ArgType = ArgValue->getType(); 893 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); 894 895 llvm::Type *ResultType = ConvertType(E->getType()); 896 Value *Result = Builder.CreateCall(F, ArgValue); 897 if (Result->getType() != ResultType) 898 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 899 "cast"); 900 return RValue::get(Result); 901 } 902 case Builtin::BI_rotr8: 903 case Builtin::BI_rotr16: 904 case Builtin::BI_rotr: 905 case Builtin::BI_lrotr: 906 case Builtin::BI_rotr64: { 907 Value *Val = EmitScalarExpr(E->getArg(0)); 908 Value *Shift = EmitScalarExpr(E->getArg(1)); 909 910 llvm::Type *ArgType = Val->getType(); 911 Shift = Builder.CreateIntCast(Shift, ArgType, false); 912 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth(); 913 Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth); 914 Value *ArgZero = llvm::Constant::getNullValue(ArgType); 915 916 Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1); 917 Shift = Builder.CreateAnd(Shift, Mask); 918 Value *LeftShift = Builder.CreateSub(ArgTypeSize, Shift); 919 920 Value *RightShifted = Builder.CreateLShr(Val, Shift); 921 Value *LeftShifted = Builder.CreateShl(Val, LeftShift); 922 Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted); 923 924 Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero); 925 Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated); 926 return RValue::get(Result); 927 } 928 case Builtin::BI_rotl8: 929 case Builtin::BI_rotl16: 930 case Builtin::BI_rotl: 931 case Builtin::BI_lrotl: 932 case Builtin::BI_rotl64: { 933 Value *Val = EmitScalarExpr(E->getArg(0)); 934 Value *Shift = EmitScalarExpr(E->getArg(1)); 935 936 llvm::Type *ArgType = Val->getType(); 937 Shift = Builder.CreateIntCast(Shift, ArgType, false); 938 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth(); 939 Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth); 940 Value *ArgZero = llvm::Constant::getNullValue(ArgType); 941 942 Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1); 943 Shift = Builder.CreateAnd(Shift, Mask); 944 Value *RightShift = Builder.CreateSub(ArgTypeSize, Shift); 945 946 Value *LeftShifted = Builder.CreateShl(Val, Shift); 947 Value *RightShifted = Builder.CreateLShr(Val, RightShift); 948 Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted); 949 950 Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero); 951 Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated); 952 return RValue::get(Result); 953 } 954 case Builtin::BI__builtin_unpredictable: { 955 // Always return the argument of __builtin_unpredictable. LLVM does not 956 // handle this builtin. Metadata for this builtin should be added directly 957 // to instructions such as branches or switches that use it. 958 return RValue::get(EmitScalarExpr(E->getArg(0))); 959 } 960 case Builtin::BI__builtin_expect: { 961 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 962 llvm::Type *ArgType = ArgValue->getType(); 963 964 Value *ExpectedValue = EmitScalarExpr(E->getArg(1)); 965 // Don't generate llvm.expect on -O0 as the backend won't use it for 966 // anything. 967 // Note, we still IRGen ExpectedValue because it could have side-effects. 968 if (CGM.getCodeGenOpts().OptimizationLevel == 0) 969 return RValue::get(ArgValue); 970 971 Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType); 972 Value *Result = 973 Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval"); 974 return RValue::get(Result); 975 } 976 case Builtin::BI__builtin_assume_aligned: { 977 Value *PtrValue = EmitScalarExpr(E->getArg(0)); 978 Value *OffsetValue = 979 (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr; 980 981 Value *AlignmentValue = EmitScalarExpr(E->getArg(1)); 982 ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue); 983 unsigned Alignment = (unsigned) AlignmentCI->getZExtValue(); 984 985 EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue); 986 return RValue::get(PtrValue); 987 } 988 case Builtin::BI__assume: 989 case Builtin::BI__builtin_assume: { 990 if (E->getArg(0)->HasSideEffects(getContext())) 991 return RValue::get(nullptr); 992 993 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 994 Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume); 995 return RValue::get(Builder.CreateCall(FnAssume, ArgValue)); 996 } 997 case Builtin::BI__builtin_bswap16: 998 case Builtin::BI__builtin_bswap32: 999 case Builtin::BI__builtin_bswap64: { 1000 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap)); 1001 } 1002 case Builtin::BI__builtin_bitreverse8: 1003 case Builtin::BI__builtin_bitreverse16: 1004 case Builtin::BI__builtin_bitreverse32: 1005 case Builtin::BI__builtin_bitreverse64: { 1006 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse)); 1007 } 1008 case Builtin::BI__builtin_object_size: { 1009 unsigned Type = 1010 E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue(); 1011 auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType())); 1012 1013 // We pass this builtin onto the optimizer so that it can figure out the 1014 // object size in more complex cases. 1015 return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType, 1016 /*EmittedE=*/nullptr)); 1017 } 1018 case Builtin::BI__builtin_prefetch: { 1019 Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0)); 1020 // FIXME: Technically these constants should of type 'int', yes? 1021 RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) : 1022 llvm::ConstantInt::get(Int32Ty, 0); 1023 Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : 1024 llvm::ConstantInt::get(Int32Ty, 3); 1025 Value *Data = llvm::ConstantInt::get(Int32Ty, 1); 1026 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 1027 return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data})); 1028 } 1029 case Builtin::BI__builtin_readcyclecounter: { 1030 Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter); 1031 return RValue::get(Builder.CreateCall(F)); 1032 } 1033 case Builtin::BI__builtin___clear_cache: { 1034 Value *Begin = EmitScalarExpr(E->getArg(0)); 1035 Value *End = EmitScalarExpr(E->getArg(1)); 1036 Value *F = CGM.getIntrinsic(Intrinsic::clear_cache); 1037 return RValue::get(Builder.CreateCall(F, {Begin, End})); 1038 } 1039 case Builtin::BI__builtin_trap: 1040 return RValue::get(EmitTrapCall(Intrinsic::trap)); 1041 case Builtin::BI__debugbreak: 1042 return RValue::get(EmitTrapCall(Intrinsic::debugtrap)); 1043 case Builtin::BI__builtin_unreachable: { 1044 if (SanOpts.has(SanitizerKind::Unreachable)) { 1045 SanitizerScope SanScope(this); 1046 EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()), 1047 SanitizerKind::Unreachable), 1048 SanitizerHandler::BuiltinUnreachable, 1049 EmitCheckSourceLocation(E->getExprLoc()), None); 1050 } else 1051 Builder.CreateUnreachable(); 1052 1053 // We do need to preserve an insertion point. 1054 EmitBlock(createBasicBlock("unreachable.cont")); 1055 1056 return RValue::get(nullptr); 1057 } 1058 1059 case Builtin::BI__builtin_powi: 1060 case Builtin::BI__builtin_powif: 1061 case Builtin::BI__builtin_powil: { 1062 Value *Base = EmitScalarExpr(E->getArg(0)); 1063 Value *Exponent = EmitScalarExpr(E->getArg(1)); 1064 llvm::Type *ArgType = Base->getType(); 1065 Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType); 1066 return RValue::get(Builder.CreateCall(F, {Base, Exponent})); 1067 } 1068 1069 case Builtin::BI__builtin_isgreater: 1070 case Builtin::BI__builtin_isgreaterequal: 1071 case Builtin::BI__builtin_isless: 1072 case Builtin::BI__builtin_islessequal: 1073 case Builtin::BI__builtin_islessgreater: 1074 case Builtin::BI__builtin_isunordered: { 1075 // Ordered comparisons: we know the arguments to these are matching scalar 1076 // floating point values. 1077 Value *LHS = EmitScalarExpr(E->getArg(0)); 1078 Value *RHS = EmitScalarExpr(E->getArg(1)); 1079 1080 switch (BuiltinID) { 1081 default: llvm_unreachable("Unknown ordered comparison"); 1082 case Builtin::BI__builtin_isgreater: 1083 LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp"); 1084 break; 1085 case Builtin::BI__builtin_isgreaterequal: 1086 LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp"); 1087 break; 1088 case Builtin::BI__builtin_isless: 1089 LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp"); 1090 break; 1091 case Builtin::BI__builtin_islessequal: 1092 LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp"); 1093 break; 1094 case Builtin::BI__builtin_islessgreater: 1095 LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp"); 1096 break; 1097 case Builtin::BI__builtin_isunordered: 1098 LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp"); 1099 break; 1100 } 1101 // ZExt bool to int type. 1102 return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType()))); 1103 } 1104 case Builtin::BI__builtin_isnan: { 1105 Value *V = EmitScalarExpr(E->getArg(0)); 1106 V = Builder.CreateFCmpUNO(V, V, "cmp"); 1107 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 1108 } 1109 1110 case Builtin::BIfinite: 1111 case Builtin::BI__finite: 1112 case Builtin::BIfinitef: 1113 case Builtin::BI__finitef: 1114 case Builtin::BIfinitel: 1115 case Builtin::BI__finitel: 1116 case Builtin::BI__builtin_isinf: 1117 case Builtin::BI__builtin_isfinite: { 1118 // isinf(x) --> fabs(x) == infinity 1119 // isfinite(x) --> fabs(x) != infinity 1120 // x != NaN via the ordered compare in either case. 1121 Value *V = EmitScalarExpr(E->getArg(0)); 1122 Value *Fabs = EmitFAbs(*this, V); 1123 Constant *Infinity = ConstantFP::getInfinity(V->getType()); 1124 CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf) 1125 ? CmpInst::FCMP_OEQ 1126 : CmpInst::FCMP_ONE; 1127 Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf"); 1128 return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType()))); 1129 } 1130 1131 case Builtin::BI__builtin_isinf_sign: { 1132 // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0 1133 Value *Arg = EmitScalarExpr(E->getArg(0)); 1134 Value *AbsArg = EmitFAbs(*this, Arg); 1135 Value *IsInf = Builder.CreateFCmpOEQ( 1136 AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf"); 1137 Value *IsNeg = EmitSignBit(*this, Arg); 1138 1139 llvm::Type *IntTy = ConvertType(E->getType()); 1140 Value *Zero = Constant::getNullValue(IntTy); 1141 Value *One = ConstantInt::get(IntTy, 1); 1142 Value *NegativeOne = ConstantInt::get(IntTy, -1); 1143 Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One); 1144 Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero); 1145 return RValue::get(Result); 1146 } 1147 1148 case Builtin::BI__builtin_isnormal: { 1149 // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min 1150 Value *V = EmitScalarExpr(E->getArg(0)); 1151 Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq"); 1152 1153 Value *Abs = EmitFAbs(*this, V); 1154 Value *IsLessThanInf = 1155 Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf"); 1156 APFloat Smallest = APFloat::getSmallestNormalized( 1157 getContext().getFloatTypeSemantics(E->getArg(0)->getType())); 1158 Value *IsNormal = 1159 Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest), 1160 "isnormal"); 1161 V = Builder.CreateAnd(Eq, IsLessThanInf, "and"); 1162 V = Builder.CreateAnd(V, IsNormal, "and"); 1163 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 1164 } 1165 1166 case Builtin::BI__builtin_fpclassify: { 1167 Value *V = EmitScalarExpr(E->getArg(5)); 1168 llvm::Type *Ty = ConvertType(E->getArg(5)->getType()); 1169 1170 // Create Result 1171 BasicBlock *Begin = Builder.GetInsertBlock(); 1172 BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn); 1173 Builder.SetInsertPoint(End); 1174 PHINode *Result = 1175 Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4, 1176 "fpclassify_result"); 1177 1178 // if (V==0) return FP_ZERO 1179 Builder.SetInsertPoint(Begin); 1180 Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty), 1181 "iszero"); 1182 Value *ZeroLiteral = EmitScalarExpr(E->getArg(4)); 1183 BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn); 1184 Builder.CreateCondBr(IsZero, End, NotZero); 1185 Result->addIncoming(ZeroLiteral, Begin); 1186 1187 // if (V != V) return FP_NAN 1188 Builder.SetInsertPoint(NotZero); 1189 Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp"); 1190 Value *NanLiteral = EmitScalarExpr(E->getArg(0)); 1191 BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn); 1192 Builder.CreateCondBr(IsNan, End, NotNan); 1193 Result->addIncoming(NanLiteral, NotZero); 1194 1195 // if (fabs(V) == infinity) return FP_INFINITY 1196 Builder.SetInsertPoint(NotNan); 1197 Value *VAbs = EmitFAbs(*this, V); 1198 Value *IsInf = 1199 Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()), 1200 "isinf"); 1201 Value *InfLiteral = EmitScalarExpr(E->getArg(1)); 1202 BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn); 1203 Builder.CreateCondBr(IsInf, End, NotInf); 1204 Result->addIncoming(InfLiteral, NotNan); 1205 1206 // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL 1207 Builder.SetInsertPoint(NotInf); 1208 APFloat Smallest = APFloat::getSmallestNormalized( 1209 getContext().getFloatTypeSemantics(E->getArg(5)->getType())); 1210 Value *IsNormal = 1211 Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest), 1212 "isnormal"); 1213 Value *NormalResult = 1214 Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)), 1215 EmitScalarExpr(E->getArg(3))); 1216 Builder.CreateBr(End); 1217 Result->addIncoming(NormalResult, NotInf); 1218 1219 // return Result 1220 Builder.SetInsertPoint(End); 1221 return RValue::get(Result); 1222 } 1223 1224 case Builtin::BIalloca: 1225 case Builtin::BI_alloca: 1226 case Builtin::BI__builtin_alloca: { 1227 Value *Size = EmitScalarExpr(E->getArg(0)); 1228 const TargetInfo &TI = getContext().getTargetInfo(); 1229 // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__. 1230 unsigned SuitableAlignmentInBytes = 1231 CGM.getContext() 1232 .toCharUnitsFromBits(TI.getSuitableAlign()) 1233 .getQuantity(); 1234 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size); 1235 AI->setAlignment(SuitableAlignmentInBytes); 1236 return RValue::get(AI); 1237 } 1238 1239 case Builtin::BI__builtin_alloca_with_align: { 1240 Value *Size = EmitScalarExpr(E->getArg(0)); 1241 Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1)); 1242 auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue); 1243 unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue(); 1244 unsigned AlignmentInBytes = 1245 CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getQuantity(); 1246 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size); 1247 AI->setAlignment(AlignmentInBytes); 1248 return RValue::get(AI); 1249 } 1250 1251 case Builtin::BIbzero: 1252 case Builtin::BI__builtin_bzero: { 1253 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1254 Value *SizeVal = EmitScalarExpr(E->getArg(1)); 1255 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 1256 E->getArg(0)->getExprLoc(), FD, 0); 1257 Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false); 1258 return RValue::get(Dest.getPointer()); 1259 } 1260 case Builtin::BImemcpy: 1261 case Builtin::BI__builtin_memcpy: { 1262 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1263 Address Src = EmitPointerWithAlignment(E->getArg(1)); 1264 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 1265 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 1266 E->getArg(0)->getExprLoc(), FD, 0); 1267 EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(), 1268 E->getArg(1)->getExprLoc(), FD, 1); 1269 Builder.CreateMemCpy(Dest, Src, SizeVal, false); 1270 return RValue::get(Dest.getPointer()); 1271 } 1272 1273 case Builtin::BI__builtin_char_memchr: 1274 BuiltinID = Builtin::BI__builtin_memchr; 1275 break; 1276 1277 case Builtin::BI__builtin___memcpy_chk: { 1278 // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2. 1279 llvm::APSInt Size, DstSize; 1280 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 1281 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 1282 break; 1283 if (Size.ugt(DstSize)) 1284 break; 1285 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1286 Address Src = EmitPointerWithAlignment(E->getArg(1)); 1287 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 1288 Builder.CreateMemCpy(Dest, Src, SizeVal, false); 1289 return RValue::get(Dest.getPointer()); 1290 } 1291 1292 case Builtin::BI__builtin_objc_memmove_collectable: { 1293 Address DestAddr = EmitPointerWithAlignment(E->getArg(0)); 1294 Address SrcAddr = EmitPointerWithAlignment(E->getArg(1)); 1295 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 1296 CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this, 1297 DestAddr, SrcAddr, SizeVal); 1298 return RValue::get(DestAddr.getPointer()); 1299 } 1300 1301 case Builtin::BI__builtin___memmove_chk: { 1302 // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2. 1303 llvm::APSInt Size, DstSize; 1304 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 1305 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 1306 break; 1307 if (Size.ugt(DstSize)) 1308 break; 1309 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1310 Address Src = EmitPointerWithAlignment(E->getArg(1)); 1311 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 1312 Builder.CreateMemMove(Dest, Src, SizeVal, false); 1313 return RValue::get(Dest.getPointer()); 1314 } 1315 1316 case Builtin::BImemmove: 1317 case Builtin::BI__builtin_memmove: { 1318 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1319 Address Src = EmitPointerWithAlignment(E->getArg(1)); 1320 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 1321 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 1322 E->getArg(0)->getExprLoc(), FD, 0); 1323 EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(), 1324 E->getArg(1)->getExprLoc(), FD, 1); 1325 Builder.CreateMemMove(Dest, Src, SizeVal, false); 1326 return RValue::get(Dest.getPointer()); 1327 } 1328 case Builtin::BImemset: 1329 case Builtin::BI__builtin_memset: { 1330 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1331 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), 1332 Builder.getInt8Ty()); 1333 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 1334 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 1335 E->getArg(0)->getExprLoc(), FD, 0); 1336 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false); 1337 return RValue::get(Dest.getPointer()); 1338 } 1339 case Builtin::BI__builtin___memset_chk: { 1340 // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2. 1341 llvm::APSInt Size, DstSize; 1342 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 1343 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 1344 break; 1345 if (Size.ugt(DstSize)) 1346 break; 1347 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1348 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), 1349 Builder.getInt8Ty()); 1350 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 1351 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false); 1352 return RValue::get(Dest.getPointer()); 1353 } 1354 case Builtin::BI__builtin_dwarf_cfa: { 1355 // The offset in bytes from the first argument to the CFA. 1356 // 1357 // Why on earth is this in the frontend? Is there any reason at 1358 // all that the backend can't reasonably determine this while 1359 // lowering llvm.eh.dwarf.cfa()? 1360 // 1361 // TODO: If there's a satisfactory reason, add a target hook for 1362 // this instead of hard-coding 0, which is correct for most targets. 1363 int32_t Offset = 0; 1364 1365 Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa); 1366 return RValue::get(Builder.CreateCall(F, 1367 llvm::ConstantInt::get(Int32Ty, Offset))); 1368 } 1369 case Builtin::BI__builtin_return_address: { 1370 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0), 1371 getContext().UnsignedIntTy); 1372 Value *F = CGM.getIntrinsic(Intrinsic::returnaddress); 1373 return RValue::get(Builder.CreateCall(F, Depth)); 1374 } 1375 case Builtin::BI_ReturnAddress: { 1376 Value *F = CGM.getIntrinsic(Intrinsic::returnaddress); 1377 return RValue::get(Builder.CreateCall(F, Builder.getInt32(0))); 1378 } 1379 case Builtin::BI__builtin_frame_address: { 1380 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0), 1381 getContext().UnsignedIntTy); 1382 Value *F = CGM.getIntrinsic(Intrinsic::frameaddress); 1383 return RValue::get(Builder.CreateCall(F, Depth)); 1384 } 1385 case Builtin::BI__builtin_extract_return_addr: { 1386 Value *Address = EmitScalarExpr(E->getArg(0)); 1387 Value *Result = getTargetHooks().decodeReturnAddress(*this, Address); 1388 return RValue::get(Result); 1389 } 1390 case Builtin::BI__builtin_frob_return_addr: { 1391 Value *Address = EmitScalarExpr(E->getArg(0)); 1392 Value *Result = getTargetHooks().encodeReturnAddress(*this, Address); 1393 return RValue::get(Result); 1394 } 1395 case Builtin::BI__builtin_dwarf_sp_column: { 1396 llvm::IntegerType *Ty 1397 = cast<llvm::IntegerType>(ConvertType(E->getType())); 1398 int Column = getTargetHooks().getDwarfEHStackPointer(CGM); 1399 if (Column == -1) { 1400 CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column"); 1401 return RValue::get(llvm::UndefValue::get(Ty)); 1402 } 1403 return RValue::get(llvm::ConstantInt::get(Ty, Column, true)); 1404 } 1405 case Builtin::BI__builtin_init_dwarf_reg_size_table: { 1406 Value *Address = EmitScalarExpr(E->getArg(0)); 1407 if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address)) 1408 CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table"); 1409 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType()))); 1410 } 1411 case Builtin::BI__builtin_eh_return: { 1412 Value *Int = EmitScalarExpr(E->getArg(0)); 1413 Value *Ptr = EmitScalarExpr(E->getArg(1)); 1414 1415 llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType()); 1416 assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) && 1417 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants"); 1418 Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32 1419 ? Intrinsic::eh_return_i32 1420 : Intrinsic::eh_return_i64); 1421 Builder.CreateCall(F, {Int, Ptr}); 1422 Builder.CreateUnreachable(); 1423 1424 // We do need to preserve an insertion point. 1425 EmitBlock(createBasicBlock("builtin_eh_return.cont")); 1426 1427 return RValue::get(nullptr); 1428 } 1429 case Builtin::BI__builtin_unwind_init: { 1430 Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init); 1431 return RValue::get(Builder.CreateCall(F)); 1432 } 1433 case Builtin::BI__builtin_extend_pointer: { 1434 // Extends a pointer to the size of an _Unwind_Word, which is 1435 // uint64_t on all platforms. Generally this gets poked into a 1436 // register and eventually used as an address, so if the 1437 // addressing registers are wider than pointers and the platform 1438 // doesn't implicitly ignore high-order bits when doing 1439 // addressing, we need to make sure we zext / sext based on 1440 // the platform's expectations. 1441 // 1442 // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html 1443 1444 // Cast the pointer to intptr_t. 1445 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1446 Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast"); 1447 1448 // If that's 64 bits, we're done. 1449 if (IntPtrTy->getBitWidth() == 64) 1450 return RValue::get(Result); 1451 1452 // Otherwise, ask the codegen data what to do. 1453 if (getTargetHooks().extendPointerWithSExt()) 1454 return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext")); 1455 else 1456 return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext")); 1457 } 1458 case Builtin::BI__builtin_setjmp: { 1459 // Buffer is a void**. 1460 Address Buf = EmitPointerWithAlignment(E->getArg(0)); 1461 1462 // Store the frame pointer to the setjmp buffer. 1463 Value *FrameAddr = 1464 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 1465 ConstantInt::get(Int32Ty, 0)); 1466 Builder.CreateStore(FrameAddr, Buf); 1467 1468 // Store the stack pointer to the setjmp buffer. 1469 Value *StackAddr = 1470 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave)); 1471 Address StackSaveSlot = 1472 Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize()); 1473 Builder.CreateStore(StackAddr, StackSaveSlot); 1474 1475 // Call LLVM's EH setjmp, which is lightweight. 1476 Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp); 1477 Buf = Builder.CreateBitCast(Buf, Int8PtrTy); 1478 return RValue::get(Builder.CreateCall(F, Buf.getPointer())); 1479 } 1480 case Builtin::BI__builtin_longjmp: { 1481 Value *Buf = EmitScalarExpr(E->getArg(0)); 1482 Buf = Builder.CreateBitCast(Buf, Int8PtrTy); 1483 1484 // Call LLVM's EH longjmp, which is lightweight. 1485 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf); 1486 1487 // longjmp doesn't return; mark this as unreachable. 1488 Builder.CreateUnreachable(); 1489 1490 // We do need to preserve an insertion point. 1491 EmitBlock(createBasicBlock("longjmp.cont")); 1492 1493 return RValue::get(nullptr); 1494 } 1495 case Builtin::BI__sync_fetch_and_add: 1496 case Builtin::BI__sync_fetch_and_sub: 1497 case Builtin::BI__sync_fetch_and_or: 1498 case Builtin::BI__sync_fetch_and_and: 1499 case Builtin::BI__sync_fetch_and_xor: 1500 case Builtin::BI__sync_fetch_and_nand: 1501 case Builtin::BI__sync_add_and_fetch: 1502 case Builtin::BI__sync_sub_and_fetch: 1503 case Builtin::BI__sync_and_and_fetch: 1504 case Builtin::BI__sync_or_and_fetch: 1505 case Builtin::BI__sync_xor_and_fetch: 1506 case Builtin::BI__sync_nand_and_fetch: 1507 case Builtin::BI__sync_val_compare_and_swap: 1508 case Builtin::BI__sync_bool_compare_and_swap: 1509 case Builtin::BI__sync_lock_test_and_set: 1510 case Builtin::BI__sync_lock_release: 1511 case Builtin::BI__sync_swap: 1512 llvm_unreachable("Shouldn't make it through sema"); 1513 case Builtin::BI__sync_fetch_and_add_1: 1514 case Builtin::BI__sync_fetch_and_add_2: 1515 case Builtin::BI__sync_fetch_and_add_4: 1516 case Builtin::BI__sync_fetch_and_add_8: 1517 case Builtin::BI__sync_fetch_and_add_16: 1518 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E); 1519 case Builtin::BI__sync_fetch_and_sub_1: 1520 case Builtin::BI__sync_fetch_and_sub_2: 1521 case Builtin::BI__sync_fetch_and_sub_4: 1522 case Builtin::BI__sync_fetch_and_sub_8: 1523 case Builtin::BI__sync_fetch_and_sub_16: 1524 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E); 1525 case Builtin::BI__sync_fetch_and_or_1: 1526 case Builtin::BI__sync_fetch_and_or_2: 1527 case Builtin::BI__sync_fetch_and_or_4: 1528 case Builtin::BI__sync_fetch_and_or_8: 1529 case Builtin::BI__sync_fetch_and_or_16: 1530 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E); 1531 case Builtin::BI__sync_fetch_and_and_1: 1532 case Builtin::BI__sync_fetch_and_and_2: 1533 case Builtin::BI__sync_fetch_and_and_4: 1534 case Builtin::BI__sync_fetch_and_and_8: 1535 case Builtin::BI__sync_fetch_and_and_16: 1536 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E); 1537 case Builtin::BI__sync_fetch_and_xor_1: 1538 case Builtin::BI__sync_fetch_and_xor_2: 1539 case Builtin::BI__sync_fetch_and_xor_4: 1540 case Builtin::BI__sync_fetch_and_xor_8: 1541 case Builtin::BI__sync_fetch_and_xor_16: 1542 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E); 1543 case Builtin::BI__sync_fetch_and_nand_1: 1544 case Builtin::BI__sync_fetch_and_nand_2: 1545 case Builtin::BI__sync_fetch_and_nand_4: 1546 case Builtin::BI__sync_fetch_and_nand_8: 1547 case Builtin::BI__sync_fetch_and_nand_16: 1548 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E); 1549 1550 // Clang extensions: not overloaded yet. 1551 case Builtin::BI__sync_fetch_and_min: 1552 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E); 1553 case Builtin::BI__sync_fetch_and_max: 1554 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E); 1555 case Builtin::BI__sync_fetch_and_umin: 1556 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E); 1557 case Builtin::BI__sync_fetch_and_umax: 1558 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E); 1559 1560 case Builtin::BI__sync_add_and_fetch_1: 1561 case Builtin::BI__sync_add_and_fetch_2: 1562 case Builtin::BI__sync_add_and_fetch_4: 1563 case Builtin::BI__sync_add_and_fetch_8: 1564 case Builtin::BI__sync_add_and_fetch_16: 1565 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E, 1566 llvm::Instruction::Add); 1567 case Builtin::BI__sync_sub_and_fetch_1: 1568 case Builtin::BI__sync_sub_and_fetch_2: 1569 case Builtin::BI__sync_sub_and_fetch_4: 1570 case Builtin::BI__sync_sub_and_fetch_8: 1571 case Builtin::BI__sync_sub_and_fetch_16: 1572 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E, 1573 llvm::Instruction::Sub); 1574 case Builtin::BI__sync_and_and_fetch_1: 1575 case Builtin::BI__sync_and_and_fetch_2: 1576 case Builtin::BI__sync_and_and_fetch_4: 1577 case Builtin::BI__sync_and_and_fetch_8: 1578 case Builtin::BI__sync_and_and_fetch_16: 1579 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E, 1580 llvm::Instruction::And); 1581 case Builtin::BI__sync_or_and_fetch_1: 1582 case Builtin::BI__sync_or_and_fetch_2: 1583 case Builtin::BI__sync_or_and_fetch_4: 1584 case Builtin::BI__sync_or_and_fetch_8: 1585 case Builtin::BI__sync_or_and_fetch_16: 1586 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E, 1587 llvm::Instruction::Or); 1588 case Builtin::BI__sync_xor_and_fetch_1: 1589 case Builtin::BI__sync_xor_and_fetch_2: 1590 case Builtin::BI__sync_xor_and_fetch_4: 1591 case Builtin::BI__sync_xor_and_fetch_8: 1592 case Builtin::BI__sync_xor_and_fetch_16: 1593 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E, 1594 llvm::Instruction::Xor); 1595 case Builtin::BI__sync_nand_and_fetch_1: 1596 case Builtin::BI__sync_nand_and_fetch_2: 1597 case Builtin::BI__sync_nand_and_fetch_4: 1598 case Builtin::BI__sync_nand_and_fetch_8: 1599 case Builtin::BI__sync_nand_and_fetch_16: 1600 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E, 1601 llvm::Instruction::And, true); 1602 1603 case Builtin::BI__sync_val_compare_and_swap_1: 1604 case Builtin::BI__sync_val_compare_and_swap_2: 1605 case Builtin::BI__sync_val_compare_and_swap_4: 1606 case Builtin::BI__sync_val_compare_and_swap_8: 1607 case Builtin::BI__sync_val_compare_and_swap_16: 1608 return RValue::get(MakeAtomicCmpXchgValue(*this, E, false)); 1609 1610 case Builtin::BI__sync_bool_compare_and_swap_1: 1611 case Builtin::BI__sync_bool_compare_and_swap_2: 1612 case Builtin::BI__sync_bool_compare_and_swap_4: 1613 case Builtin::BI__sync_bool_compare_and_swap_8: 1614 case Builtin::BI__sync_bool_compare_and_swap_16: 1615 return RValue::get(MakeAtomicCmpXchgValue(*this, E, true)); 1616 1617 case Builtin::BI__sync_swap_1: 1618 case Builtin::BI__sync_swap_2: 1619 case Builtin::BI__sync_swap_4: 1620 case Builtin::BI__sync_swap_8: 1621 case Builtin::BI__sync_swap_16: 1622 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 1623 1624 case Builtin::BI__sync_lock_test_and_set_1: 1625 case Builtin::BI__sync_lock_test_and_set_2: 1626 case Builtin::BI__sync_lock_test_and_set_4: 1627 case Builtin::BI__sync_lock_test_and_set_8: 1628 case Builtin::BI__sync_lock_test_and_set_16: 1629 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 1630 1631 case Builtin::BI__sync_lock_release_1: 1632 case Builtin::BI__sync_lock_release_2: 1633 case Builtin::BI__sync_lock_release_4: 1634 case Builtin::BI__sync_lock_release_8: 1635 case Builtin::BI__sync_lock_release_16: { 1636 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1637 QualType ElTy = E->getArg(0)->getType()->getPointeeType(); 1638 CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy); 1639 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), 1640 StoreSize.getQuantity() * 8); 1641 Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); 1642 llvm::StoreInst *Store = 1643 Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr, 1644 StoreSize); 1645 Store->setAtomic(llvm::AtomicOrdering::Release); 1646 return RValue::get(nullptr); 1647 } 1648 1649 case Builtin::BI__sync_synchronize: { 1650 // We assume this is supposed to correspond to a C++0x-style 1651 // sequentially-consistent fence (i.e. this is only usable for 1652 // synchonization, not device I/O or anything like that). This intrinsic 1653 // is really badly designed in the sense that in theory, there isn't 1654 // any way to safely use it... but in practice, it mostly works 1655 // to use it with non-atomic loads and stores to get acquire/release 1656 // semantics. 1657 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent); 1658 return RValue::get(nullptr); 1659 } 1660 1661 case Builtin::BI__builtin_nontemporal_load: 1662 return RValue::get(EmitNontemporalLoad(*this, E)); 1663 case Builtin::BI__builtin_nontemporal_store: 1664 return RValue::get(EmitNontemporalStore(*this, E)); 1665 case Builtin::BI__c11_atomic_is_lock_free: 1666 case Builtin::BI__atomic_is_lock_free: { 1667 // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the 1668 // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since 1669 // _Atomic(T) is always properly-aligned. 1670 const char *LibCallName = "__atomic_is_lock_free"; 1671 CallArgList Args; 1672 Args.add(RValue::get(EmitScalarExpr(E->getArg(0))), 1673 getContext().getSizeType()); 1674 if (BuiltinID == Builtin::BI__atomic_is_lock_free) 1675 Args.add(RValue::get(EmitScalarExpr(E->getArg(1))), 1676 getContext().VoidPtrTy); 1677 else 1678 Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)), 1679 getContext().VoidPtrTy); 1680 const CGFunctionInfo &FuncInfo = 1681 CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args); 1682 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo); 1683 llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName); 1684 return EmitCall(FuncInfo, CGCallee::forDirect(Func), 1685 ReturnValueSlot(), Args); 1686 } 1687 1688 case Builtin::BI__atomic_test_and_set: { 1689 // Look at the argument type to determine whether this is a volatile 1690 // operation. The parameter type is always volatile. 1691 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); 1692 bool Volatile = 1693 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); 1694 1695 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1696 unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace(); 1697 Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace)); 1698 Value *NewVal = Builder.getInt8(1); 1699 Value *Order = EmitScalarExpr(E->getArg(1)); 1700 if (isa<llvm::ConstantInt>(Order)) { 1701 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 1702 AtomicRMWInst *Result = nullptr; 1703 switch (ord) { 1704 case 0: // memory_order_relaxed 1705 default: // invalid order 1706 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 1707 llvm::AtomicOrdering::Monotonic); 1708 break; 1709 case 1: // memory_order_consume 1710 case 2: // memory_order_acquire 1711 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 1712 llvm::AtomicOrdering::Acquire); 1713 break; 1714 case 3: // memory_order_release 1715 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 1716 llvm::AtomicOrdering::Release); 1717 break; 1718 case 4: // memory_order_acq_rel 1719 1720 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 1721 llvm::AtomicOrdering::AcquireRelease); 1722 break; 1723 case 5: // memory_order_seq_cst 1724 Result = Builder.CreateAtomicRMW( 1725 llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 1726 llvm::AtomicOrdering::SequentiallyConsistent); 1727 break; 1728 } 1729 Result->setVolatile(Volatile); 1730 return RValue::get(Builder.CreateIsNotNull(Result, "tobool")); 1731 } 1732 1733 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 1734 1735 llvm::BasicBlock *BBs[5] = { 1736 createBasicBlock("monotonic", CurFn), 1737 createBasicBlock("acquire", CurFn), 1738 createBasicBlock("release", CurFn), 1739 createBasicBlock("acqrel", CurFn), 1740 createBasicBlock("seqcst", CurFn) 1741 }; 1742 llvm::AtomicOrdering Orders[5] = { 1743 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire, 1744 llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease, 1745 llvm::AtomicOrdering::SequentiallyConsistent}; 1746 1747 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 1748 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); 1749 1750 Builder.SetInsertPoint(ContBB); 1751 PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set"); 1752 1753 for (unsigned i = 0; i < 5; ++i) { 1754 Builder.SetInsertPoint(BBs[i]); 1755 AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 1756 Ptr, NewVal, Orders[i]); 1757 RMW->setVolatile(Volatile); 1758 Result->addIncoming(RMW, BBs[i]); 1759 Builder.CreateBr(ContBB); 1760 } 1761 1762 SI->addCase(Builder.getInt32(0), BBs[0]); 1763 SI->addCase(Builder.getInt32(1), BBs[1]); 1764 SI->addCase(Builder.getInt32(2), BBs[1]); 1765 SI->addCase(Builder.getInt32(3), BBs[2]); 1766 SI->addCase(Builder.getInt32(4), BBs[3]); 1767 SI->addCase(Builder.getInt32(5), BBs[4]); 1768 1769 Builder.SetInsertPoint(ContBB); 1770 return RValue::get(Builder.CreateIsNotNull(Result, "tobool")); 1771 } 1772 1773 case Builtin::BI__atomic_clear: { 1774 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); 1775 bool Volatile = 1776 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); 1777 1778 Address Ptr = EmitPointerWithAlignment(E->getArg(0)); 1779 unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace(); 1780 Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace)); 1781 Value *NewVal = Builder.getInt8(0); 1782 Value *Order = EmitScalarExpr(E->getArg(1)); 1783 if (isa<llvm::ConstantInt>(Order)) { 1784 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 1785 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); 1786 switch (ord) { 1787 case 0: // memory_order_relaxed 1788 default: // invalid order 1789 Store->setOrdering(llvm::AtomicOrdering::Monotonic); 1790 break; 1791 case 3: // memory_order_release 1792 Store->setOrdering(llvm::AtomicOrdering::Release); 1793 break; 1794 case 5: // memory_order_seq_cst 1795 Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent); 1796 break; 1797 } 1798 return RValue::get(nullptr); 1799 } 1800 1801 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 1802 1803 llvm::BasicBlock *BBs[3] = { 1804 createBasicBlock("monotonic", CurFn), 1805 createBasicBlock("release", CurFn), 1806 createBasicBlock("seqcst", CurFn) 1807 }; 1808 llvm::AtomicOrdering Orders[3] = { 1809 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release, 1810 llvm::AtomicOrdering::SequentiallyConsistent}; 1811 1812 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 1813 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); 1814 1815 for (unsigned i = 0; i < 3; ++i) { 1816 Builder.SetInsertPoint(BBs[i]); 1817 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); 1818 Store->setOrdering(Orders[i]); 1819 Builder.CreateBr(ContBB); 1820 } 1821 1822 SI->addCase(Builder.getInt32(0), BBs[0]); 1823 SI->addCase(Builder.getInt32(3), BBs[1]); 1824 SI->addCase(Builder.getInt32(5), BBs[2]); 1825 1826 Builder.SetInsertPoint(ContBB); 1827 return RValue::get(nullptr); 1828 } 1829 1830 case Builtin::BI__atomic_thread_fence: 1831 case Builtin::BI__atomic_signal_fence: 1832 case Builtin::BI__c11_atomic_thread_fence: 1833 case Builtin::BI__c11_atomic_signal_fence: { 1834 llvm::SyncScope::ID SSID; 1835 if (BuiltinID == Builtin::BI__atomic_signal_fence || 1836 BuiltinID == Builtin::BI__c11_atomic_signal_fence) 1837 SSID = llvm::SyncScope::SingleThread; 1838 else 1839 SSID = llvm::SyncScope::System; 1840 Value *Order = EmitScalarExpr(E->getArg(0)); 1841 if (isa<llvm::ConstantInt>(Order)) { 1842 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 1843 switch (ord) { 1844 case 0: // memory_order_relaxed 1845 default: // invalid order 1846 break; 1847 case 1: // memory_order_consume 1848 case 2: // memory_order_acquire 1849 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID); 1850 break; 1851 case 3: // memory_order_release 1852 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID); 1853 break; 1854 case 4: // memory_order_acq_rel 1855 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID); 1856 break; 1857 case 5: // memory_order_seq_cst 1858 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID); 1859 break; 1860 } 1861 return RValue::get(nullptr); 1862 } 1863 1864 llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB; 1865 AcquireBB = createBasicBlock("acquire", CurFn); 1866 ReleaseBB = createBasicBlock("release", CurFn); 1867 AcqRelBB = createBasicBlock("acqrel", CurFn); 1868 SeqCstBB = createBasicBlock("seqcst", CurFn); 1869 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 1870 1871 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 1872 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB); 1873 1874 Builder.SetInsertPoint(AcquireBB); 1875 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID); 1876 Builder.CreateBr(ContBB); 1877 SI->addCase(Builder.getInt32(1), AcquireBB); 1878 SI->addCase(Builder.getInt32(2), AcquireBB); 1879 1880 Builder.SetInsertPoint(ReleaseBB); 1881 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID); 1882 Builder.CreateBr(ContBB); 1883 SI->addCase(Builder.getInt32(3), ReleaseBB); 1884 1885 Builder.SetInsertPoint(AcqRelBB); 1886 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID); 1887 Builder.CreateBr(ContBB); 1888 SI->addCase(Builder.getInt32(4), AcqRelBB); 1889 1890 Builder.SetInsertPoint(SeqCstBB); 1891 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID); 1892 Builder.CreateBr(ContBB); 1893 SI->addCase(Builder.getInt32(5), SeqCstBB); 1894 1895 Builder.SetInsertPoint(ContBB); 1896 return RValue::get(nullptr); 1897 } 1898 1899 // Library functions with special handling. 1900 case Builtin::BIsqrt: 1901 case Builtin::BIsqrtf: 1902 case Builtin::BIsqrtl: { 1903 // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only 1904 // in finite- or unsafe-math mode (the intrinsic has different semantics 1905 // for handling negative numbers compared to the library function, so 1906 // -fmath-errno=0 is not enough). 1907 if (!FD->hasAttr<ConstAttr>()) 1908 break; 1909 if (!(CGM.getCodeGenOpts().UnsafeFPMath || 1910 CGM.getCodeGenOpts().NoNaNsFPMath)) 1911 break; 1912 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 1913 llvm::Type *ArgType = Arg0->getType(); 1914 Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType); 1915 return RValue::get(Builder.CreateCall(F, Arg0)); 1916 } 1917 1918 case Builtin::BI__builtin_pow: 1919 case Builtin::BI__builtin_powf: 1920 case Builtin::BI__builtin_powl: 1921 case Builtin::BIpow: 1922 case Builtin::BIpowf: 1923 case Builtin::BIpowl: { 1924 // Transform a call to pow* into a @llvm.pow.* intrinsic call. 1925 if (!FD->hasAttr<ConstAttr>()) 1926 break; 1927 Value *Base = EmitScalarExpr(E->getArg(0)); 1928 Value *Exponent = EmitScalarExpr(E->getArg(1)); 1929 llvm::Type *ArgType = Base->getType(); 1930 Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType); 1931 return RValue::get(Builder.CreateCall(F, {Base, Exponent})); 1932 } 1933 1934 case Builtin::BIfma: 1935 case Builtin::BIfmaf: 1936 case Builtin::BIfmal: 1937 case Builtin::BI__builtin_fma: 1938 case Builtin::BI__builtin_fmaf: 1939 case Builtin::BI__builtin_fmal: { 1940 // Rewrite fma to intrinsic. 1941 Value *FirstArg = EmitScalarExpr(E->getArg(0)); 1942 llvm::Type *ArgType = FirstArg->getType(); 1943 Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType); 1944 return RValue::get( 1945 Builder.CreateCall(F, {FirstArg, EmitScalarExpr(E->getArg(1)), 1946 EmitScalarExpr(E->getArg(2))})); 1947 } 1948 1949 case Builtin::BI__builtin_signbit: 1950 case Builtin::BI__builtin_signbitf: 1951 case Builtin::BI__builtin_signbitl: { 1952 return RValue::get( 1953 Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))), 1954 ConvertType(E->getType()))); 1955 } 1956 case Builtin::BI__builtin_annotation: { 1957 llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0)); 1958 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation, 1959 AnnVal->getType()); 1960 1961 // Get the annotation string, go through casts. Sema requires this to be a 1962 // non-wide string literal, potentially casted, so the cast<> is safe. 1963 const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts(); 1964 StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString(); 1965 return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc())); 1966 } 1967 case Builtin::BI__builtin_addcb: 1968 case Builtin::BI__builtin_addcs: 1969 case Builtin::BI__builtin_addc: 1970 case Builtin::BI__builtin_addcl: 1971 case Builtin::BI__builtin_addcll: 1972 case Builtin::BI__builtin_subcb: 1973 case Builtin::BI__builtin_subcs: 1974 case Builtin::BI__builtin_subc: 1975 case Builtin::BI__builtin_subcl: 1976 case Builtin::BI__builtin_subcll: { 1977 1978 // We translate all of these builtins from expressions of the form: 1979 // int x = ..., y = ..., carryin = ..., carryout, result; 1980 // result = __builtin_addc(x, y, carryin, &carryout); 1981 // 1982 // to LLVM IR of the form: 1983 // 1984 // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y) 1985 // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0 1986 // %carry1 = extractvalue {i32, i1} %tmp1, 1 1987 // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1, 1988 // i32 %carryin) 1989 // %result = extractvalue {i32, i1} %tmp2, 0 1990 // %carry2 = extractvalue {i32, i1} %tmp2, 1 1991 // %tmp3 = or i1 %carry1, %carry2 1992 // %tmp4 = zext i1 %tmp3 to i32 1993 // store i32 %tmp4, i32* %carryout 1994 1995 // Scalarize our inputs. 1996 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 1997 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 1998 llvm::Value *Carryin = EmitScalarExpr(E->getArg(2)); 1999 Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3)); 2000 2001 // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow. 2002 llvm::Intrinsic::ID IntrinsicId; 2003 switch (BuiltinID) { 2004 default: llvm_unreachable("Unknown multiprecision builtin id."); 2005 case Builtin::BI__builtin_addcb: 2006 case Builtin::BI__builtin_addcs: 2007 case Builtin::BI__builtin_addc: 2008 case Builtin::BI__builtin_addcl: 2009 case Builtin::BI__builtin_addcll: 2010 IntrinsicId = llvm::Intrinsic::uadd_with_overflow; 2011 break; 2012 case Builtin::BI__builtin_subcb: 2013 case Builtin::BI__builtin_subcs: 2014 case Builtin::BI__builtin_subc: 2015 case Builtin::BI__builtin_subcl: 2016 case Builtin::BI__builtin_subcll: 2017 IntrinsicId = llvm::Intrinsic::usub_with_overflow; 2018 break; 2019 } 2020 2021 // Construct our resulting LLVM IR expression. 2022 llvm::Value *Carry1; 2023 llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId, 2024 X, Y, Carry1); 2025 llvm::Value *Carry2; 2026 llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId, 2027 Sum1, Carryin, Carry2); 2028 llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2), 2029 X->getType()); 2030 Builder.CreateStore(CarryOut, CarryOutPtr); 2031 return RValue::get(Sum2); 2032 } 2033 2034 case Builtin::BI__builtin_add_overflow: 2035 case Builtin::BI__builtin_sub_overflow: 2036 case Builtin::BI__builtin_mul_overflow: { 2037 const clang::Expr *LeftArg = E->getArg(0); 2038 const clang::Expr *RightArg = E->getArg(1); 2039 const clang::Expr *ResultArg = E->getArg(2); 2040 2041 clang::QualType ResultQTy = 2042 ResultArg->getType()->castAs<PointerType>()->getPointeeType(); 2043 2044 WidthAndSignedness LeftInfo = 2045 getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType()); 2046 WidthAndSignedness RightInfo = 2047 getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType()); 2048 WidthAndSignedness ResultInfo = 2049 getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy); 2050 WidthAndSignedness EncompassingInfo = 2051 EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo}); 2052 2053 llvm::Type *EncompassingLLVMTy = 2054 llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width); 2055 2056 llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy); 2057 2058 llvm::Intrinsic::ID IntrinsicId; 2059 switch (BuiltinID) { 2060 default: 2061 llvm_unreachable("Unknown overflow builtin id."); 2062 case Builtin::BI__builtin_add_overflow: 2063 IntrinsicId = EncompassingInfo.Signed 2064 ? llvm::Intrinsic::sadd_with_overflow 2065 : llvm::Intrinsic::uadd_with_overflow; 2066 break; 2067 case Builtin::BI__builtin_sub_overflow: 2068 IntrinsicId = EncompassingInfo.Signed 2069 ? llvm::Intrinsic::ssub_with_overflow 2070 : llvm::Intrinsic::usub_with_overflow; 2071 break; 2072 case Builtin::BI__builtin_mul_overflow: 2073 IntrinsicId = EncompassingInfo.Signed 2074 ? llvm::Intrinsic::smul_with_overflow 2075 : llvm::Intrinsic::umul_with_overflow; 2076 break; 2077 } 2078 2079 llvm::Value *Left = EmitScalarExpr(LeftArg); 2080 llvm::Value *Right = EmitScalarExpr(RightArg); 2081 Address ResultPtr = EmitPointerWithAlignment(ResultArg); 2082 2083 // Extend each operand to the encompassing type. 2084 Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed); 2085 Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed); 2086 2087 // Perform the operation on the extended values. 2088 llvm::Value *Overflow, *Result; 2089 Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow); 2090 2091 if (EncompassingInfo.Width > ResultInfo.Width) { 2092 // The encompassing type is wider than the result type, so we need to 2093 // truncate it. 2094 llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy); 2095 2096 // To see if the truncation caused an overflow, we will extend 2097 // the result and then compare it to the original result. 2098 llvm::Value *ResultTruncExt = Builder.CreateIntCast( 2099 ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed); 2100 llvm::Value *TruncationOverflow = 2101 Builder.CreateICmpNE(Result, ResultTruncExt); 2102 2103 Overflow = Builder.CreateOr(Overflow, TruncationOverflow); 2104 Result = ResultTrunc; 2105 } 2106 2107 // Finally, store the result using the pointer. 2108 bool isVolatile = 2109 ResultArg->getType()->getPointeeType().isVolatileQualified(); 2110 Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile); 2111 2112 return RValue::get(Overflow); 2113 } 2114 2115 case Builtin::BI__builtin_uadd_overflow: 2116 case Builtin::BI__builtin_uaddl_overflow: 2117 case Builtin::BI__builtin_uaddll_overflow: 2118 case Builtin::BI__builtin_usub_overflow: 2119 case Builtin::BI__builtin_usubl_overflow: 2120 case Builtin::BI__builtin_usubll_overflow: 2121 case Builtin::BI__builtin_umul_overflow: 2122 case Builtin::BI__builtin_umull_overflow: 2123 case Builtin::BI__builtin_umulll_overflow: 2124 case Builtin::BI__builtin_sadd_overflow: 2125 case Builtin::BI__builtin_saddl_overflow: 2126 case Builtin::BI__builtin_saddll_overflow: 2127 case Builtin::BI__builtin_ssub_overflow: 2128 case Builtin::BI__builtin_ssubl_overflow: 2129 case Builtin::BI__builtin_ssubll_overflow: 2130 case Builtin::BI__builtin_smul_overflow: 2131 case Builtin::BI__builtin_smull_overflow: 2132 case Builtin::BI__builtin_smulll_overflow: { 2133 2134 // We translate all of these builtins directly to the relevant llvm IR node. 2135 2136 // Scalarize our inputs. 2137 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 2138 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 2139 Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2)); 2140 2141 // Decide which of the overflow intrinsics we are lowering to: 2142 llvm::Intrinsic::ID IntrinsicId; 2143 switch (BuiltinID) { 2144 default: llvm_unreachable("Unknown overflow builtin id."); 2145 case Builtin::BI__builtin_uadd_overflow: 2146 case Builtin::BI__builtin_uaddl_overflow: 2147 case Builtin::BI__builtin_uaddll_overflow: 2148 IntrinsicId = llvm::Intrinsic::uadd_with_overflow; 2149 break; 2150 case Builtin::BI__builtin_usub_overflow: 2151 case Builtin::BI__builtin_usubl_overflow: 2152 case Builtin::BI__builtin_usubll_overflow: 2153 IntrinsicId = llvm::Intrinsic::usub_with_overflow; 2154 break; 2155 case Builtin::BI__builtin_umul_overflow: 2156 case Builtin::BI__builtin_umull_overflow: 2157 case Builtin::BI__builtin_umulll_overflow: 2158 IntrinsicId = llvm::Intrinsic::umul_with_overflow; 2159 break; 2160 case Builtin::BI__builtin_sadd_overflow: 2161 case Builtin::BI__builtin_saddl_overflow: 2162 case Builtin::BI__builtin_saddll_overflow: 2163 IntrinsicId = llvm::Intrinsic::sadd_with_overflow; 2164 break; 2165 case Builtin::BI__builtin_ssub_overflow: 2166 case Builtin::BI__builtin_ssubl_overflow: 2167 case Builtin::BI__builtin_ssubll_overflow: 2168 IntrinsicId = llvm::Intrinsic::ssub_with_overflow; 2169 break; 2170 case Builtin::BI__builtin_smul_overflow: 2171 case Builtin::BI__builtin_smull_overflow: 2172 case Builtin::BI__builtin_smulll_overflow: 2173 IntrinsicId = llvm::Intrinsic::smul_with_overflow; 2174 break; 2175 } 2176 2177 2178 llvm::Value *Carry; 2179 llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry); 2180 Builder.CreateStore(Sum, SumOutPtr); 2181 2182 return RValue::get(Carry); 2183 } 2184 case Builtin::BI__builtin_addressof: 2185 return RValue::get(EmitLValue(E->getArg(0)).getPointer()); 2186 case Builtin::BI__builtin_operator_new: 2187 return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(), 2188 E->getArg(0), false); 2189 case Builtin::BI__builtin_operator_delete: 2190 return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(), 2191 E->getArg(0), true); 2192 case Builtin::BI__noop: 2193 // __noop always evaluates to an integer literal zero. 2194 return RValue::get(ConstantInt::get(IntTy, 0)); 2195 case Builtin::BI__builtin_call_with_static_chain: { 2196 const CallExpr *Call = cast<CallExpr>(E->getArg(0)); 2197 const Expr *Chain = E->getArg(1); 2198 return EmitCall(Call->getCallee()->getType(), 2199 EmitCallee(Call->getCallee()), Call, ReturnValue, 2200 EmitScalarExpr(Chain)); 2201 } 2202 case Builtin::BI_InterlockedExchange8: 2203 case Builtin::BI_InterlockedExchange16: 2204 case Builtin::BI_InterlockedExchange: 2205 case Builtin::BI_InterlockedExchangePointer: 2206 return RValue::get( 2207 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E)); 2208 case Builtin::BI_InterlockedCompareExchangePointer: { 2209 llvm::Type *RTy; 2210 llvm::IntegerType *IntType = 2211 IntegerType::get(getLLVMContext(), 2212 getContext().getTypeSize(E->getType())); 2213 llvm::Type *IntPtrType = IntType->getPointerTo(); 2214 2215 llvm::Value *Destination = 2216 Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType); 2217 2218 llvm::Value *Exchange = EmitScalarExpr(E->getArg(1)); 2219 RTy = Exchange->getType(); 2220 Exchange = Builder.CreatePtrToInt(Exchange, IntType); 2221 2222 llvm::Value *Comparand = 2223 Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType); 2224 2225 auto Result = 2226 Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange, 2227 AtomicOrdering::SequentiallyConsistent, 2228 AtomicOrdering::SequentiallyConsistent); 2229 Result->setVolatile(true); 2230 2231 return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result, 2232 0), 2233 RTy)); 2234 } 2235 case Builtin::BI_InterlockedCompareExchange8: 2236 case Builtin::BI_InterlockedCompareExchange16: 2237 case Builtin::BI_InterlockedCompareExchange: 2238 case Builtin::BI_InterlockedCompareExchange64: { 2239 AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg( 2240 EmitScalarExpr(E->getArg(0)), 2241 EmitScalarExpr(E->getArg(2)), 2242 EmitScalarExpr(E->getArg(1)), 2243 AtomicOrdering::SequentiallyConsistent, 2244 AtomicOrdering::SequentiallyConsistent); 2245 CXI->setVolatile(true); 2246 return RValue::get(Builder.CreateExtractValue(CXI, 0)); 2247 } 2248 case Builtin::BI_InterlockedIncrement16: 2249 case Builtin::BI_InterlockedIncrement: 2250 return RValue::get( 2251 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E)); 2252 case Builtin::BI_InterlockedDecrement16: 2253 case Builtin::BI_InterlockedDecrement: 2254 return RValue::get( 2255 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E)); 2256 case Builtin::BI_InterlockedAnd8: 2257 case Builtin::BI_InterlockedAnd16: 2258 case Builtin::BI_InterlockedAnd: 2259 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E)); 2260 case Builtin::BI_InterlockedExchangeAdd8: 2261 case Builtin::BI_InterlockedExchangeAdd16: 2262 case Builtin::BI_InterlockedExchangeAdd: 2263 return RValue::get( 2264 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E)); 2265 case Builtin::BI_InterlockedExchangeSub8: 2266 case Builtin::BI_InterlockedExchangeSub16: 2267 case Builtin::BI_InterlockedExchangeSub: 2268 return RValue::get( 2269 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E)); 2270 case Builtin::BI_InterlockedOr8: 2271 case Builtin::BI_InterlockedOr16: 2272 case Builtin::BI_InterlockedOr: 2273 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E)); 2274 case Builtin::BI_InterlockedXor8: 2275 case Builtin::BI_InterlockedXor16: 2276 case Builtin::BI_InterlockedXor: 2277 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E)); 2278 case Builtin::BI_interlockedbittestandset: 2279 return RValue::get( 2280 EmitMSVCBuiltinExpr(MSVCIntrin::_interlockedbittestandset, E)); 2281 2282 case Builtin::BI__exception_code: 2283 case Builtin::BI_exception_code: 2284 return RValue::get(EmitSEHExceptionCode()); 2285 case Builtin::BI__exception_info: 2286 case Builtin::BI_exception_info: 2287 return RValue::get(EmitSEHExceptionInfo()); 2288 case Builtin::BI__abnormal_termination: 2289 case Builtin::BI_abnormal_termination: 2290 return RValue::get(EmitSEHAbnormalTermination()); 2291 case Builtin::BI_setjmpex: { 2292 if (getTarget().getTriple().isOSMSVCRT()) { 2293 llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy}; 2294 llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get( 2295 getLLVMContext(), llvm::AttributeList::FunctionIndex, 2296 llvm::Attribute::ReturnsTwice); 2297 llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction( 2298 llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false), 2299 "_setjmpex", ReturnsTwiceAttr, /*Local=*/true); 2300 llvm::Value *Buf = Builder.CreateBitOrPointerCast( 2301 EmitScalarExpr(E->getArg(0)), Int8PtrTy); 2302 llvm::Value *FrameAddr = 2303 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 2304 ConstantInt::get(Int32Ty, 0)); 2305 llvm::Value *Args[] = {Buf, FrameAddr}; 2306 llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args); 2307 CS.setAttributes(ReturnsTwiceAttr); 2308 return RValue::get(CS.getInstruction()); 2309 } 2310 break; 2311 } 2312 case Builtin::BI_setjmp: { 2313 if (getTarget().getTriple().isOSMSVCRT()) { 2314 llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get( 2315 getLLVMContext(), llvm::AttributeList::FunctionIndex, 2316 llvm::Attribute::ReturnsTwice); 2317 llvm::Value *Buf = Builder.CreateBitOrPointerCast( 2318 EmitScalarExpr(E->getArg(0)), Int8PtrTy); 2319 llvm::CallSite CS; 2320 if (getTarget().getTriple().getArch() == llvm::Triple::x86) { 2321 llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy}; 2322 llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction( 2323 llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true), 2324 "_setjmp3", ReturnsTwiceAttr, /*Local=*/true); 2325 llvm::Value *Count = ConstantInt::get(IntTy, 0); 2326 llvm::Value *Args[] = {Buf, Count}; 2327 CS = EmitRuntimeCallOrInvoke(SetJmp3, Args); 2328 } else { 2329 llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy}; 2330 llvm::Constant *SetJmp = CGM.CreateRuntimeFunction( 2331 llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false), 2332 "_setjmp", ReturnsTwiceAttr, /*Local=*/true); 2333 llvm::Value *FrameAddr = 2334 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 2335 ConstantInt::get(Int32Ty, 0)); 2336 llvm::Value *Args[] = {Buf, FrameAddr}; 2337 CS = EmitRuntimeCallOrInvoke(SetJmp, Args); 2338 } 2339 CS.setAttributes(ReturnsTwiceAttr); 2340 return RValue::get(CS.getInstruction()); 2341 } 2342 break; 2343 } 2344 2345 case Builtin::BI__GetExceptionInfo: { 2346 if (llvm::GlobalVariable *GV = 2347 CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType())) 2348 return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy)); 2349 break; 2350 } 2351 2352 case Builtin::BI__fastfail: 2353 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E)); 2354 2355 case Builtin::BI__builtin_coro_size: { 2356 auto & Context = getContext(); 2357 auto SizeTy = Context.getSizeType(); 2358 auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy)); 2359 Value *F = CGM.getIntrinsic(Intrinsic::coro_size, T); 2360 return RValue::get(Builder.CreateCall(F)); 2361 } 2362 2363 case Builtin::BI__builtin_coro_id: 2364 return EmitCoroutineIntrinsic(E, Intrinsic::coro_id); 2365 case Builtin::BI__builtin_coro_promise: 2366 return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise); 2367 case Builtin::BI__builtin_coro_resume: 2368 return EmitCoroutineIntrinsic(E, Intrinsic::coro_resume); 2369 case Builtin::BI__builtin_coro_frame: 2370 return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame); 2371 case Builtin::BI__builtin_coro_free: 2372 return EmitCoroutineIntrinsic(E, Intrinsic::coro_free); 2373 case Builtin::BI__builtin_coro_destroy: 2374 return EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy); 2375 case Builtin::BI__builtin_coro_done: 2376 return EmitCoroutineIntrinsic(E, Intrinsic::coro_done); 2377 case Builtin::BI__builtin_coro_alloc: 2378 return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc); 2379 case Builtin::BI__builtin_coro_begin: 2380 return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin); 2381 case Builtin::BI__builtin_coro_end: 2382 return EmitCoroutineIntrinsic(E, Intrinsic::coro_end); 2383 case Builtin::BI__builtin_coro_suspend: 2384 return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend); 2385 case Builtin::BI__builtin_coro_param: 2386 return EmitCoroutineIntrinsic(E, Intrinsic::coro_param); 2387 2388 // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions 2389 case Builtin::BIread_pipe: 2390 case Builtin::BIwrite_pipe: { 2391 Value *Arg0 = EmitScalarExpr(E->getArg(0)), 2392 *Arg1 = EmitScalarExpr(E->getArg(1)); 2393 CGOpenCLRuntime OpenCLRT(CGM); 2394 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); 2395 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); 2396 2397 // Type of the generic packet parameter. 2398 unsigned GenericAS = 2399 getContext().getTargetAddressSpace(LangAS::opencl_generic); 2400 llvm::Type *I8PTy = llvm::PointerType::get( 2401 llvm::Type::getInt8Ty(getLLVMContext()), GenericAS); 2402 2403 // Testing which overloaded version we should generate the call for. 2404 if (2U == E->getNumArgs()) { 2405 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2" 2406 : "__write_pipe_2"; 2407 // Creating a generic function type to be able to call with any builtin or 2408 // user defined type. 2409 llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty}; 2410 llvm::FunctionType *FTy = llvm::FunctionType::get( 2411 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2412 Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy); 2413 return RValue::get( 2414 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2415 {Arg0, BCast, PacketSize, PacketAlign})); 2416 } else { 2417 assert(4 == E->getNumArgs() && 2418 "Illegal number of parameters to pipe function"); 2419 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4" 2420 : "__write_pipe_4"; 2421 2422 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy, 2423 Int32Ty, Int32Ty}; 2424 Value *Arg2 = EmitScalarExpr(E->getArg(2)), 2425 *Arg3 = EmitScalarExpr(E->getArg(3)); 2426 llvm::FunctionType *FTy = llvm::FunctionType::get( 2427 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2428 Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy); 2429 // We know the third argument is an integer type, but we may need to cast 2430 // it to i32. 2431 if (Arg2->getType() != Int32Ty) 2432 Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty); 2433 return RValue::get(Builder.CreateCall( 2434 CGM.CreateRuntimeFunction(FTy, Name), 2435 {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign})); 2436 } 2437 } 2438 // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write 2439 // functions 2440 case Builtin::BIreserve_read_pipe: 2441 case Builtin::BIreserve_write_pipe: 2442 case Builtin::BIwork_group_reserve_read_pipe: 2443 case Builtin::BIwork_group_reserve_write_pipe: 2444 case Builtin::BIsub_group_reserve_read_pipe: 2445 case Builtin::BIsub_group_reserve_write_pipe: { 2446 // Composing the mangled name for the function. 2447 const char *Name; 2448 if (BuiltinID == Builtin::BIreserve_read_pipe) 2449 Name = "__reserve_read_pipe"; 2450 else if (BuiltinID == Builtin::BIreserve_write_pipe) 2451 Name = "__reserve_write_pipe"; 2452 else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe) 2453 Name = "__work_group_reserve_read_pipe"; 2454 else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe) 2455 Name = "__work_group_reserve_write_pipe"; 2456 else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe) 2457 Name = "__sub_group_reserve_read_pipe"; 2458 else 2459 Name = "__sub_group_reserve_write_pipe"; 2460 2461 Value *Arg0 = EmitScalarExpr(E->getArg(0)), 2462 *Arg1 = EmitScalarExpr(E->getArg(1)); 2463 llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy); 2464 CGOpenCLRuntime OpenCLRT(CGM); 2465 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); 2466 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); 2467 2468 // Building the generic function prototype. 2469 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty}; 2470 llvm::FunctionType *FTy = llvm::FunctionType::get( 2471 ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2472 // We know the second argument is an integer type, but we may need to cast 2473 // it to i32. 2474 if (Arg1->getType() != Int32Ty) 2475 Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty); 2476 return RValue::get( 2477 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2478 {Arg0, Arg1, PacketSize, PacketAlign})); 2479 } 2480 // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write 2481 // functions 2482 case Builtin::BIcommit_read_pipe: 2483 case Builtin::BIcommit_write_pipe: 2484 case Builtin::BIwork_group_commit_read_pipe: 2485 case Builtin::BIwork_group_commit_write_pipe: 2486 case Builtin::BIsub_group_commit_read_pipe: 2487 case Builtin::BIsub_group_commit_write_pipe: { 2488 const char *Name; 2489 if (BuiltinID == Builtin::BIcommit_read_pipe) 2490 Name = "__commit_read_pipe"; 2491 else if (BuiltinID == Builtin::BIcommit_write_pipe) 2492 Name = "__commit_write_pipe"; 2493 else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe) 2494 Name = "__work_group_commit_read_pipe"; 2495 else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe) 2496 Name = "__work_group_commit_write_pipe"; 2497 else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe) 2498 Name = "__sub_group_commit_read_pipe"; 2499 else 2500 Name = "__sub_group_commit_write_pipe"; 2501 2502 Value *Arg0 = EmitScalarExpr(E->getArg(0)), 2503 *Arg1 = EmitScalarExpr(E->getArg(1)); 2504 CGOpenCLRuntime OpenCLRT(CGM); 2505 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); 2506 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); 2507 2508 // Building the generic function prototype. 2509 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty}; 2510 llvm::FunctionType *FTy = 2511 llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()), 2512 llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2513 2514 return RValue::get( 2515 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2516 {Arg0, Arg1, PacketSize, PacketAlign})); 2517 } 2518 // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions 2519 case Builtin::BIget_pipe_num_packets: 2520 case Builtin::BIget_pipe_max_packets: { 2521 const char *Name; 2522 if (BuiltinID == Builtin::BIget_pipe_num_packets) 2523 Name = "__get_pipe_num_packets"; 2524 else 2525 Name = "__get_pipe_max_packets"; 2526 2527 // Building the generic function prototype. 2528 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 2529 CGOpenCLRuntime OpenCLRT(CGM); 2530 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); 2531 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); 2532 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty}; 2533 llvm::FunctionType *FTy = llvm::FunctionType::get( 2534 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2535 2536 return RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2537 {Arg0, PacketSize, PacketAlign})); 2538 } 2539 2540 // OpenCL v2.0 s6.13.9 - Address space qualifier functions. 2541 case Builtin::BIto_global: 2542 case Builtin::BIto_local: 2543 case Builtin::BIto_private: { 2544 auto Arg0 = EmitScalarExpr(E->getArg(0)); 2545 auto NewArgT = llvm::PointerType::get(Int8Ty, 2546 CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); 2547 auto NewRetT = llvm::PointerType::get(Int8Ty, 2548 CGM.getContext().getTargetAddressSpace( 2549 E->getType()->getPointeeType().getAddressSpace())); 2550 auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false); 2551 llvm::Value *NewArg; 2552 if (Arg0->getType()->getPointerAddressSpace() != 2553 NewArgT->getPointerAddressSpace()) 2554 NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT); 2555 else 2556 NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT); 2557 auto NewName = std::string("__") + E->getDirectCallee()->getName().str(); 2558 auto NewCall = 2559 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg}); 2560 return RValue::get(Builder.CreateBitOrPointerCast(NewCall, 2561 ConvertType(E->getType()))); 2562 } 2563 2564 // OpenCL v2.0, s6.13.17 - Enqueue kernel function. 2565 // It contains four different overload formats specified in Table 6.13.17.1. 2566 case Builtin::BIenqueue_kernel: { 2567 StringRef Name; // Generated function call name 2568 unsigned NumArgs = E->getNumArgs(); 2569 2570 llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy); 2571 llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( 2572 getContext().getTargetAddressSpace(LangAS::opencl_generic)); 2573 2574 llvm::Value *Queue = EmitScalarExpr(E->getArg(0)); 2575 llvm::Value *Flags = EmitScalarExpr(E->getArg(1)); 2576 LValue NDRangeL = EmitAggExprToLValue(E->getArg(2)); 2577 llvm::Value *Range = NDRangeL.getAddress().getPointer(); 2578 llvm::Type *RangeTy = NDRangeL.getAddress().getType(); 2579 2580 if (NumArgs == 4) { 2581 // The most basic form of the call with parameters: 2582 // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void) 2583 Name = "__enqueue_kernel_basic"; 2584 llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy}; 2585 llvm::FunctionType *FTy = llvm::FunctionType::get( 2586 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys, 4), false); 2587 2588 llvm::Value *Block = Builder.CreatePointerCast( 2589 EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy); 2590 2591 AttrBuilder B; 2592 B.addAttribute(Attribute::ByVal); 2593 llvm::AttributeList ByValAttrSet = 2594 llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B); 2595 2596 auto RTCall = 2597 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet), 2598 {Queue, Flags, Range, Block}); 2599 RTCall->setAttributes(ByValAttrSet); 2600 return RValue::get(RTCall); 2601 } 2602 assert(NumArgs >= 5 && "Invalid enqueue_kernel signature"); 2603 2604 // Could have events and/or vaargs. 2605 if (E->getArg(3)->getType()->isBlockPointerType()) { 2606 // No events passed, but has variadic arguments. 2607 Name = "__enqueue_kernel_vaargs"; 2608 llvm::Value *Block = Builder.CreatePointerCast( 2609 EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy); 2610 // Create a vector of the arguments, as well as a constant value to 2611 // express to the runtime the number of variadic arguments. 2612 std::vector<llvm::Value *> Args = {Queue, Flags, Range, Block, 2613 ConstantInt::get(IntTy, NumArgs - 4)}; 2614 std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, RangeTy, 2615 GenericVoidPtrTy, IntTy}; 2616 2617 // Each of the following arguments specifies the size of the corresponding 2618 // argument passed to the enqueued block. 2619 for (unsigned I = 4/*Position of the first size arg*/; I < NumArgs; ++I) 2620 Args.push_back( 2621 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy)); 2622 2623 llvm::FunctionType *FTy = llvm::FunctionType::get( 2624 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true); 2625 return RValue::get( 2626 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2627 llvm::ArrayRef<llvm::Value *>(Args))); 2628 } 2629 // Any calls now have event arguments passed. 2630 if (NumArgs >= 7) { 2631 llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy); 2632 llvm::Type *EventPtrTy = EventTy->getPointerTo( 2633 CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); 2634 2635 llvm::Value *NumEvents = 2636 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty); 2637 llvm::Value *EventList = 2638 E->getArg(4)->getType()->isArrayType() 2639 ? EmitArrayToPointerDecay(E->getArg(4)).getPointer() 2640 : EmitScalarExpr(E->getArg(4)); 2641 llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5)); 2642 // Convert to generic address space. 2643 EventList = Builder.CreatePointerCast(EventList, EventPtrTy); 2644 ClkEvent = Builder.CreatePointerCast(ClkEvent, EventPtrTy); 2645 llvm::Value *Block = Builder.CreatePointerCast( 2646 EmitScalarExpr(E->getArg(6)), GenericVoidPtrTy); 2647 2648 std::vector<llvm::Type *> ArgTys = { 2649 QueueTy, Int32Ty, RangeTy, Int32Ty, 2650 EventPtrTy, EventPtrTy, GenericVoidPtrTy}; 2651 2652 std::vector<llvm::Value *> Args = {Queue, Flags, Range, NumEvents, 2653 EventList, ClkEvent, Block}; 2654 2655 if (NumArgs == 7) { 2656 // Has events but no variadics. 2657 Name = "__enqueue_kernel_basic_events"; 2658 llvm::FunctionType *FTy = llvm::FunctionType::get( 2659 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2660 return RValue::get( 2661 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2662 llvm::ArrayRef<llvm::Value *>(Args))); 2663 } 2664 // Has event info and variadics 2665 // Pass the number of variadics to the runtime function too. 2666 Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7)); 2667 ArgTys.push_back(Int32Ty); 2668 Name = "__enqueue_kernel_events_vaargs"; 2669 2670 // Each of the following arguments specifies the size of the corresponding 2671 // argument passed to the enqueued block. 2672 for (unsigned I = 7/*Position of the first size arg*/; I < NumArgs; ++I) 2673 Args.push_back( 2674 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy)); 2675 2676 llvm::FunctionType *FTy = llvm::FunctionType::get( 2677 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true); 2678 return RValue::get( 2679 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2680 llvm::ArrayRef<llvm::Value *>(Args))); 2681 } 2682 LLVM_FALLTHROUGH; 2683 } 2684 // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block 2685 // parameter. 2686 case Builtin::BIget_kernel_work_group_size: { 2687 llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( 2688 getContext().getTargetAddressSpace(LangAS::opencl_generic)); 2689 Value *Arg = EmitScalarExpr(E->getArg(0)); 2690 Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy); 2691 return RValue::get(Builder.CreateCall( 2692 CGM.CreateRuntimeFunction( 2693 llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false), 2694 "__get_kernel_work_group_size_impl"), 2695 Arg)); 2696 } 2697 case Builtin::BIget_kernel_preferred_work_group_size_multiple: { 2698 llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( 2699 getContext().getTargetAddressSpace(LangAS::opencl_generic)); 2700 Value *Arg = EmitScalarExpr(E->getArg(0)); 2701 Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy); 2702 return RValue::get(Builder.CreateCall( 2703 CGM.CreateRuntimeFunction( 2704 llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false), 2705 "__get_kernel_preferred_work_group_multiple_impl"), 2706 Arg)); 2707 } 2708 case Builtin::BIget_kernel_max_sub_group_size_for_ndrange: 2709 case Builtin::BIget_kernel_sub_group_count_for_ndrange: { 2710 llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( 2711 getContext().getTargetAddressSpace(LangAS::opencl_generic)); 2712 LValue NDRangeL = EmitAggExprToLValue(E->getArg(0)); 2713 llvm::Value *NDRange = NDRangeL.getAddress().getPointer(); 2714 Value *Block = EmitScalarExpr(E->getArg(1)); 2715 Block = Builder.CreatePointerCast(Block, GenericVoidPtrTy); 2716 const char *Name = 2717 BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange 2718 ? "__get_kernel_max_sub_group_size_for_ndrange_impl" 2719 : "__get_kernel_sub_group_count_for_ndrange_impl"; 2720 return RValue::get(Builder.CreateCall( 2721 CGM.CreateRuntimeFunction( 2722 llvm::FunctionType::get( 2723 IntTy, {NDRange->getType(), GenericVoidPtrTy}, false), 2724 Name), 2725 {NDRange, Block})); 2726 } 2727 case Builtin::BIprintf: 2728 if (getTarget().getTriple().isNVPTX()) 2729 return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue); 2730 break; 2731 case Builtin::BI__builtin_canonicalize: 2732 case Builtin::BI__builtin_canonicalizef: 2733 case Builtin::BI__builtin_canonicalizel: 2734 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize)); 2735 2736 case Builtin::BI__builtin_thread_pointer: { 2737 if (!getContext().getTargetInfo().isTLSSupported()) 2738 CGM.ErrorUnsupported(E, "__builtin_thread_pointer"); 2739 // Fall through - it's already mapped to the intrinsic by GCCBuiltin. 2740 break; 2741 } 2742 case Builtin::BI__builtin_os_log_format: { 2743 assert(E->getNumArgs() >= 2 && 2744 "__builtin_os_log_format takes at least 2 arguments"); 2745 analyze_os_log::OSLogBufferLayout Layout; 2746 analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout); 2747 Address BufAddr = EmitPointerWithAlignment(E->getArg(0)); 2748 // Ignore argument 1, the format string. It is not currently used. 2749 CharUnits Offset; 2750 Builder.CreateStore( 2751 Builder.getInt8(Layout.getSummaryByte()), 2752 Builder.CreateConstByteGEP(BufAddr, Offset++, "summary")); 2753 Builder.CreateStore( 2754 Builder.getInt8(Layout.getNumArgsByte()), 2755 Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs")); 2756 2757 llvm::SmallVector<llvm::Value *, 4> RetainableOperands; 2758 for (const auto &Item : Layout.Items) { 2759 Builder.CreateStore( 2760 Builder.getInt8(Item.getDescriptorByte()), 2761 Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor")); 2762 Builder.CreateStore( 2763 Builder.getInt8(Item.getSizeByte()), 2764 Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize")); 2765 Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset); 2766 if (const Expr *TheExpr = Item.getExpr()) { 2767 Addr = Builder.CreateElementBitCast( 2768 Addr, ConvertTypeForMem(TheExpr->getType())); 2769 // Check if this is a retainable type. 2770 if (TheExpr->getType()->isObjCRetainableType()) { 2771 assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar && 2772 "Only scalar can be a ObjC retainable type"); 2773 llvm::Value *SV = EmitScalarExpr(TheExpr, /*Ignore*/ false); 2774 RValue RV = RValue::get(SV); 2775 LValue LV = MakeAddrLValue(Addr, TheExpr->getType()); 2776 EmitStoreThroughLValue(RV, LV); 2777 // Check if the object is constant, if not, save it in 2778 // RetainableOperands. 2779 if (!isa<Constant>(SV)) 2780 RetainableOperands.push_back(SV); 2781 } else { 2782 EmitAnyExprToMem(TheExpr, Addr, Qualifiers(), /*isInit*/ true); 2783 } 2784 } else { 2785 Addr = Builder.CreateElementBitCast(Addr, Int32Ty); 2786 Builder.CreateStore( 2787 Builder.getInt32(Item.getConstValue().getQuantity()), Addr); 2788 } 2789 Offset += Item.size(); 2790 } 2791 2792 // Push a clang.arc.use cleanup for each object in RetainableOperands. The 2793 // cleanup will cause the use to appear after the final log call, keeping 2794 // the object valid while it's held in the log buffer. Note that if there's 2795 // a release cleanup on the object, it will already be active; since 2796 // cleanups are emitted in reverse order, the use will occur before the 2797 // object is released. 2798 if (!RetainableOperands.empty() && getLangOpts().ObjCAutoRefCount && 2799 CGM.getCodeGenOpts().OptimizationLevel != 0) 2800 for (llvm::Value *object : RetainableOperands) 2801 pushFullExprCleanup<CallObjCArcUse>(getARCCleanupKind(), object); 2802 2803 return RValue::get(BufAddr.getPointer()); 2804 } 2805 2806 case Builtin::BI__builtin_os_log_format_buffer_size: { 2807 analyze_os_log::OSLogBufferLayout Layout; 2808 analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout); 2809 return RValue::get(ConstantInt::get(ConvertType(E->getType()), 2810 Layout.size().getQuantity())); 2811 } 2812 2813 case Builtin::BI__xray_customevent: { 2814 if (!ShouldXRayInstrumentFunction()) 2815 return RValue::getIgnored(); 2816 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>()) { 2817 if (XRayAttr->neverXRayInstrument()) 2818 return RValue::getIgnored(); 2819 } 2820 Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent); 2821 auto FTy = F->getFunctionType(); 2822 auto Arg0 = E->getArg(0); 2823 auto Arg0Val = EmitScalarExpr(Arg0); 2824 auto Arg0Ty = Arg0->getType(); 2825 auto PTy0 = FTy->getParamType(0); 2826 if (PTy0 != Arg0Val->getType()) { 2827 if (Arg0Ty->isArrayType()) 2828 Arg0Val = EmitArrayToPointerDecay(Arg0).getPointer(); 2829 else 2830 Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0); 2831 } 2832 auto Arg1 = EmitScalarExpr(E->getArg(1)); 2833 auto PTy1 = FTy->getParamType(1); 2834 if (PTy1 != Arg1->getType()) 2835 Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1); 2836 return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1})); 2837 } 2838 2839 case Builtin::BI__builtin_ms_va_start: 2840 case Builtin::BI__builtin_ms_va_end: 2841 return RValue::get( 2842 EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(), 2843 BuiltinID == Builtin::BI__builtin_ms_va_start)); 2844 2845 case Builtin::BI__builtin_ms_va_copy: { 2846 // Lower this manually. We can't reliably determine whether or not any 2847 // given va_copy() is for a Win64 va_list from the calling convention 2848 // alone, because it's legal to do this from a System V ABI function. 2849 // With opaque pointer types, we won't have enough information in LLVM 2850 // IR to determine this from the argument types, either. Best to do it 2851 // now, while we have enough information. 2852 Address DestAddr = EmitMSVAListRef(E->getArg(0)); 2853 Address SrcAddr = EmitMSVAListRef(E->getArg(1)); 2854 2855 llvm::Type *BPP = Int8PtrPtrTy; 2856 2857 DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"), 2858 DestAddr.getAlignment()); 2859 SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"), 2860 SrcAddr.getAlignment()); 2861 2862 Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val"); 2863 return RValue::get(Builder.CreateStore(ArgPtr, DestAddr)); 2864 } 2865 } 2866 2867 // If this is an alias for a lib function (e.g. __builtin_sin), emit 2868 // the call using the normal call path, but using the unmangled 2869 // version of the function name. 2870 if (getContext().BuiltinInfo.isLibFunction(BuiltinID)) 2871 return emitLibraryCall(*this, FD, E, 2872 CGM.getBuiltinLibFunction(FD, BuiltinID)); 2873 2874 // If this is a predefined lib function (e.g. malloc), emit the call 2875 // using exactly the normal call path. 2876 if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID)) 2877 return emitLibraryCall(*this, FD, E, 2878 cast<llvm::Constant>(EmitScalarExpr(E->getCallee()))); 2879 2880 // Check that a call to a target specific builtin has the correct target 2881 // features. 2882 // This is down here to avoid non-target specific builtins, however, if 2883 // generic builtins start to require generic target features then we 2884 // can move this up to the beginning of the function. 2885 checkTargetFeatures(E, FD); 2886 2887 // See if we have a target specific intrinsic. 2888 const char *Name = getContext().BuiltinInfo.getName(BuiltinID); 2889 Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic; 2890 StringRef Prefix = 2891 llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch()); 2892 if (!Prefix.empty()) { 2893 IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix.data(), Name); 2894 // NOTE we dont need to perform a compatibility flag check here since the 2895 // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the 2896 // MS builtins via ALL_MS_LANGUAGES and are filtered earlier. 2897 if (IntrinsicID == Intrinsic::not_intrinsic) 2898 IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name); 2899 } 2900 2901 if (IntrinsicID != Intrinsic::not_intrinsic) { 2902 SmallVector<Value*, 16> Args; 2903 2904 // Find out if any arguments are required to be integer constant 2905 // expressions. 2906 unsigned ICEArguments = 0; 2907 ASTContext::GetBuiltinTypeError Error; 2908 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 2909 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 2910 2911 Function *F = CGM.getIntrinsic(IntrinsicID); 2912 llvm::FunctionType *FTy = F->getFunctionType(); 2913 2914 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) { 2915 Value *ArgValue; 2916 // If this is a normal argument, just emit it as a scalar. 2917 if ((ICEArguments & (1 << i)) == 0) { 2918 ArgValue = EmitScalarExpr(E->getArg(i)); 2919 } else { 2920 // If this is required to be a constant, constant fold it so that we 2921 // know that the generated intrinsic gets a ConstantInt. 2922 llvm::APSInt Result; 2923 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext()); 2924 assert(IsConst && "Constant arg isn't actually constant?"); 2925 (void)IsConst; 2926 ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result); 2927 } 2928 2929 // If the intrinsic arg type is different from the builtin arg type 2930 // we need to do a bit cast. 2931 llvm::Type *PTy = FTy->getParamType(i); 2932 if (PTy != ArgValue->getType()) { 2933 assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) && 2934 "Must be able to losslessly bit cast to param"); 2935 ArgValue = Builder.CreateBitCast(ArgValue, PTy); 2936 } 2937 2938 Args.push_back(ArgValue); 2939 } 2940 2941 Value *V = Builder.CreateCall(F, Args); 2942 QualType BuiltinRetType = E->getType(); 2943 2944 llvm::Type *RetTy = VoidTy; 2945 if (!BuiltinRetType->isVoidType()) 2946 RetTy = ConvertType(BuiltinRetType); 2947 2948 if (RetTy != V->getType()) { 2949 assert(V->getType()->canLosslesslyBitCastTo(RetTy) && 2950 "Must be able to losslessly bit cast result type"); 2951 V = Builder.CreateBitCast(V, RetTy); 2952 } 2953 2954 return RValue::get(V); 2955 } 2956 2957 // See if we have a target specific builtin that needs to be lowered. 2958 if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E)) 2959 return RValue::get(V); 2960 2961 ErrorUnsupported(E, "builtin function"); 2962 2963 // Unknown builtin, for now just dump it out and return undef. 2964 return GetUndefRValue(E->getType()); 2965 } 2966 2967 static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, 2968 unsigned BuiltinID, const CallExpr *E, 2969 llvm::Triple::ArchType Arch) { 2970 switch (Arch) { 2971 case llvm::Triple::arm: 2972 case llvm::Triple::armeb: 2973 case llvm::Triple::thumb: 2974 case llvm::Triple::thumbeb: 2975 return CGF->EmitARMBuiltinExpr(BuiltinID, E); 2976 case llvm::Triple::aarch64: 2977 case llvm::Triple::aarch64_be: 2978 return CGF->EmitAArch64BuiltinExpr(BuiltinID, E); 2979 case llvm::Triple::x86: 2980 case llvm::Triple::x86_64: 2981 return CGF->EmitX86BuiltinExpr(BuiltinID, E); 2982 case llvm::Triple::ppc: 2983 case llvm::Triple::ppc64: 2984 case llvm::Triple::ppc64le: 2985 return CGF->EmitPPCBuiltinExpr(BuiltinID, E); 2986 case llvm::Triple::r600: 2987 case llvm::Triple::amdgcn: 2988 return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E); 2989 case llvm::Triple::systemz: 2990 return CGF->EmitSystemZBuiltinExpr(BuiltinID, E); 2991 case llvm::Triple::nvptx: 2992 case llvm::Triple::nvptx64: 2993 return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E); 2994 case llvm::Triple::wasm32: 2995 case llvm::Triple::wasm64: 2996 return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E); 2997 default: 2998 return nullptr; 2999 } 3000 } 3001 3002 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID, 3003 const CallExpr *E) { 3004 if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) { 3005 assert(getContext().getAuxTargetInfo() && "Missing aux target info"); 3006 return EmitTargetArchBuiltinExpr( 3007 this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E, 3008 getContext().getAuxTargetInfo()->getTriple().getArch()); 3009 } 3010 3011 return EmitTargetArchBuiltinExpr(this, BuiltinID, E, 3012 getTarget().getTriple().getArch()); 3013 } 3014 3015 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF, 3016 NeonTypeFlags TypeFlags, 3017 bool V1Ty=false) { 3018 int IsQuad = TypeFlags.isQuad(); 3019 switch (TypeFlags.getEltType()) { 3020 case NeonTypeFlags::Int8: 3021 case NeonTypeFlags::Poly8: 3022 return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad)); 3023 case NeonTypeFlags::Int16: 3024 case NeonTypeFlags::Poly16: 3025 case NeonTypeFlags::Float16: 3026 return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad)); 3027 case NeonTypeFlags::Int32: 3028 return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad)); 3029 case NeonTypeFlags::Int64: 3030 case NeonTypeFlags::Poly64: 3031 return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad)); 3032 case NeonTypeFlags::Poly128: 3033 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm. 3034 // There is a lot of i128 and f128 API missing. 3035 // so we use v16i8 to represent poly128 and get pattern matched. 3036 return llvm::VectorType::get(CGF->Int8Ty, 16); 3037 case NeonTypeFlags::Float32: 3038 return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad)); 3039 case NeonTypeFlags::Float64: 3040 return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad)); 3041 } 3042 llvm_unreachable("Unknown vector element type!"); 3043 } 3044 3045 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF, 3046 NeonTypeFlags IntTypeFlags) { 3047 int IsQuad = IntTypeFlags.isQuad(); 3048 switch (IntTypeFlags.getEltType()) { 3049 case NeonTypeFlags::Int32: 3050 return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad)); 3051 case NeonTypeFlags::Int64: 3052 return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad)); 3053 default: 3054 llvm_unreachable("Type can't be converted to floating-point!"); 3055 } 3056 } 3057 3058 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) { 3059 unsigned nElts = V->getType()->getVectorNumElements(); 3060 Value* SV = llvm::ConstantVector::getSplat(nElts, C); 3061 return Builder.CreateShuffleVector(V, V, SV, "lane"); 3062 } 3063 3064 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops, 3065 const char *name, 3066 unsigned shift, bool rightshift) { 3067 unsigned j = 0; 3068 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); 3069 ai != ae; ++ai, ++j) 3070 if (shift > 0 && shift == j) 3071 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift); 3072 else 3073 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name); 3074 3075 return Builder.CreateCall(F, Ops, name); 3076 } 3077 3078 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty, 3079 bool neg) { 3080 int SV = cast<ConstantInt>(V)->getSExtValue(); 3081 return ConstantInt::get(Ty, neg ? -SV : SV); 3082 } 3083 3084 // \brief Right-shift a vector by a constant. 3085 Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift, 3086 llvm::Type *Ty, bool usgn, 3087 const char *name) { 3088 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 3089 3090 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue(); 3091 int EltSize = VTy->getScalarSizeInBits(); 3092 3093 Vec = Builder.CreateBitCast(Vec, Ty); 3094 3095 // lshr/ashr are undefined when the shift amount is equal to the vector 3096 // element size. 3097 if (ShiftAmt == EltSize) { 3098 if (usgn) { 3099 // Right-shifting an unsigned value by its size yields 0. 3100 return llvm::ConstantAggregateZero::get(VTy); 3101 } else { 3102 // Right-shifting a signed value by its size is equivalent 3103 // to a shift of size-1. 3104 --ShiftAmt; 3105 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt); 3106 } 3107 } 3108 3109 Shift = EmitNeonShiftVector(Shift, Ty, false); 3110 if (usgn) 3111 return Builder.CreateLShr(Vec, Shift, name); 3112 else 3113 return Builder.CreateAShr(Vec, Shift, name); 3114 } 3115 3116 enum { 3117 AddRetType = (1 << 0), 3118 Add1ArgType = (1 << 1), 3119 Add2ArgTypes = (1 << 2), 3120 3121 VectorizeRetType = (1 << 3), 3122 VectorizeArgTypes = (1 << 4), 3123 3124 InventFloatType = (1 << 5), 3125 UnsignedAlts = (1 << 6), 3126 3127 Use64BitVectors = (1 << 7), 3128 Use128BitVectors = (1 << 8), 3129 3130 Vectorize1ArgType = Add1ArgType | VectorizeArgTypes, 3131 VectorRet = AddRetType | VectorizeRetType, 3132 VectorRetGetArgs01 = 3133 AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes, 3134 FpCmpzModifiers = 3135 AddRetType | VectorizeRetType | Add1ArgType | InventFloatType 3136 }; 3137 3138 namespace { 3139 struct NeonIntrinsicInfo { 3140 const char *NameHint; 3141 unsigned BuiltinID; 3142 unsigned LLVMIntrinsic; 3143 unsigned AltLLVMIntrinsic; 3144 unsigned TypeModifier; 3145 3146 bool operator<(unsigned RHSBuiltinID) const { 3147 return BuiltinID < RHSBuiltinID; 3148 } 3149 bool operator<(const NeonIntrinsicInfo &TE) const { 3150 return BuiltinID < TE.BuiltinID; 3151 } 3152 }; 3153 } // end anonymous namespace 3154 3155 #define NEONMAP0(NameBase) \ 3156 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 } 3157 3158 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \ 3159 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \ 3160 Intrinsic::LLVMIntrinsic, 0, TypeModifier } 3161 3162 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \ 3163 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \ 3164 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \ 3165 TypeModifier } 3166 3167 static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { 3168 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), 3169 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), 3170 NEONMAP1(vabs_v, arm_neon_vabs, 0), 3171 NEONMAP1(vabsq_v, arm_neon_vabs, 0), 3172 NEONMAP0(vaddhn_v), 3173 NEONMAP1(vaesdq_v, arm_neon_aesd, 0), 3174 NEONMAP1(vaeseq_v, arm_neon_aese, 0), 3175 NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0), 3176 NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0), 3177 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType), 3178 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType), 3179 NEONMAP1(vcage_v, arm_neon_vacge, 0), 3180 NEONMAP1(vcageq_v, arm_neon_vacge, 0), 3181 NEONMAP1(vcagt_v, arm_neon_vacgt, 0), 3182 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0), 3183 NEONMAP1(vcale_v, arm_neon_vacge, 0), 3184 NEONMAP1(vcaleq_v, arm_neon_vacge, 0), 3185 NEONMAP1(vcalt_v, arm_neon_vacgt, 0), 3186 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0), 3187 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType), 3188 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType), 3189 NEONMAP1(vclz_v, ctlz, Add1ArgType), 3190 NEONMAP1(vclzq_v, ctlz, Add1ArgType), 3191 NEONMAP1(vcnt_v, ctpop, Add1ArgType), 3192 NEONMAP1(vcntq_v, ctpop, Add1ArgType), 3193 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0), 3194 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0), 3195 NEONMAP0(vcvt_f32_v), 3196 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), 3197 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0), 3198 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0), 3199 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0), 3200 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0), 3201 NEONMAP0(vcvt_s32_v), 3202 NEONMAP0(vcvt_s64_v), 3203 NEONMAP0(vcvt_u32_v), 3204 NEONMAP0(vcvt_u64_v), 3205 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0), 3206 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0), 3207 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0), 3208 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0), 3209 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0), 3210 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0), 3211 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0), 3212 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0), 3213 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0), 3214 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0), 3215 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0), 3216 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0), 3217 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0), 3218 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0), 3219 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0), 3220 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0), 3221 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0), 3222 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0), 3223 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0), 3224 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0), 3225 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0), 3226 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0), 3227 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0), 3228 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0), 3229 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0), 3230 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0), 3231 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0), 3232 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0), 3233 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0), 3234 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0), 3235 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0), 3236 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0), 3237 NEONMAP0(vcvtq_f32_v), 3238 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), 3239 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0), 3240 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0), 3241 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0), 3242 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0), 3243 NEONMAP0(vcvtq_s32_v), 3244 NEONMAP0(vcvtq_s64_v), 3245 NEONMAP0(vcvtq_u32_v), 3246 NEONMAP0(vcvtq_u64_v), 3247 NEONMAP0(vext_v), 3248 NEONMAP0(vextq_v), 3249 NEONMAP0(vfma_v), 3250 NEONMAP0(vfmaq_v), 3251 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts), 3252 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts), 3253 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts), 3254 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts), 3255 NEONMAP0(vld1_dup_v), 3256 NEONMAP1(vld1_v, arm_neon_vld1, 0), 3257 NEONMAP0(vld1q_dup_v), 3258 NEONMAP1(vld1q_v, arm_neon_vld1, 0), 3259 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0), 3260 NEONMAP1(vld2_v, arm_neon_vld2, 0), 3261 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0), 3262 NEONMAP1(vld2q_v, arm_neon_vld2, 0), 3263 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0), 3264 NEONMAP1(vld3_v, arm_neon_vld3, 0), 3265 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0), 3266 NEONMAP1(vld3q_v, arm_neon_vld3, 0), 3267 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0), 3268 NEONMAP1(vld4_v, arm_neon_vld4, 0), 3269 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0), 3270 NEONMAP1(vld4q_v, arm_neon_vld4, 0), 3271 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts), 3272 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType), 3273 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType), 3274 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts), 3275 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), 3276 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType), 3277 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType), 3278 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), 3279 NEONMAP0(vmovl_v), 3280 NEONMAP0(vmovn_v), 3281 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType), 3282 NEONMAP0(vmull_v), 3283 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType), 3284 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts), 3285 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts), 3286 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType), 3287 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts), 3288 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts), 3289 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType), 3290 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts), 3291 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts), 3292 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType), 3293 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType), 3294 NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts), 3295 NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts), 3296 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0), 3297 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0), 3298 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType), 3299 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType), 3300 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType), 3301 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts), 3302 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType), 3303 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType), 3304 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType), 3305 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType), 3306 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType), 3307 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts), 3308 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts), 3309 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts), 3310 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts), 3311 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts), 3312 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts), 3313 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0), 3314 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0), 3315 NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts), 3316 NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts), 3317 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType), 3318 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), 3319 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), 3320 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType), 3321 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType), 3322 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts), 3323 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts), 3324 NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType), 3325 NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType), 3326 NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType), 3327 NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType), 3328 NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType), 3329 NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType), 3330 NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType), 3331 NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType), 3332 NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType), 3333 NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType), 3334 NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType), 3335 NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType), 3336 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts), 3337 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts), 3338 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts), 3339 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts), 3340 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0), 3341 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0), 3342 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType), 3343 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType), 3344 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType), 3345 NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0), 3346 NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0), 3347 NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0), 3348 NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0), 3349 NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0), 3350 NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0), 3351 NEONMAP0(vshl_n_v), 3352 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts), 3353 NEONMAP0(vshll_n_v), 3354 NEONMAP0(vshlq_n_v), 3355 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts), 3356 NEONMAP0(vshr_n_v), 3357 NEONMAP0(vshrn_n_v), 3358 NEONMAP0(vshrq_n_v), 3359 NEONMAP1(vst1_v, arm_neon_vst1, 0), 3360 NEONMAP1(vst1q_v, arm_neon_vst1, 0), 3361 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0), 3362 NEONMAP1(vst2_v, arm_neon_vst2, 0), 3363 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0), 3364 NEONMAP1(vst2q_v, arm_neon_vst2, 0), 3365 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0), 3366 NEONMAP1(vst3_v, arm_neon_vst3, 0), 3367 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0), 3368 NEONMAP1(vst3q_v, arm_neon_vst3, 0), 3369 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0), 3370 NEONMAP1(vst4_v, arm_neon_vst4, 0), 3371 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0), 3372 NEONMAP1(vst4q_v, arm_neon_vst4, 0), 3373 NEONMAP0(vsubhn_v), 3374 NEONMAP0(vtrn_v), 3375 NEONMAP0(vtrnq_v), 3376 NEONMAP0(vtst_v), 3377 NEONMAP0(vtstq_v), 3378 NEONMAP0(vuzp_v), 3379 NEONMAP0(vuzpq_v), 3380 NEONMAP0(vzip_v), 3381 NEONMAP0(vzipq_v) 3382 }; 3383 3384 static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = { 3385 NEONMAP1(vabs_v, aarch64_neon_abs, 0), 3386 NEONMAP1(vabsq_v, aarch64_neon_abs, 0), 3387 NEONMAP0(vaddhn_v), 3388 NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0), 3389 NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0), 3390 NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0), 3391 NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0), 3392 NEONMAP1(vcage_v, aarch64_neon_facge, 0), 3393 NEONMAP1(vcageq_v, aarch64_neon_facge, 0), 3394 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0), 3395 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0), 3396 NEONMAP1(vcale_v, aarch64_neon_facge, 0), 3397 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0), 3398 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0), 3399 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0), 3400 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType), 3401 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType), 3402 NEONMAP1(vclz_v, ctlz, Add1ArgType), 3403 NEONMAP1(vclzq_v, ctlz, Add1ArgType), 3404 NEONMAP1(vcnt_v, ctpop, Add1ArgType), 3405 NEONMAP1(vcntq_v, ctpop, Add1ArgType), 3406 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0), 3407 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0), 3408 NEONMAP0(vcvt_f32_v), 3409 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 3410 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 3411 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), 3412 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), 3413 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), 3414 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), 3415 NEONMAP0(vcvtq_f32_v), 3416 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 3417 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 3418 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), 3419 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), 3420 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), 3421 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), 3422 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType), 3423 NEONMAP0(vext_v), 3424 NEONMAP0(vextq_v), 3425 NEONMAP0(vfma_v), 3426 NEONMAP0(vfmaq_v), 3427 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), 3428 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), 3429 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), 3430 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), 3431 NEONMAP0(vmovl_v), 3432 NEONMAP0(vmovn_v), 3433 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType), 3434 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType), 3435 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType), 3436 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts), 3437 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts), 3438 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType), 3439 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType), 3440 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType), 3441 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts), 3442 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts), 3443 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0), 3444 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0), 3445 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType), 3446 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType), 3447 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType), 3448 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts), 3449 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType), 3450 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType), 3451 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType), 3452 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType), 3453 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType), 3454 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts), 3455 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts), 3456 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts), 3457 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts), 3458 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts), 3459 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts), 3460 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0), 3461 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0), 3462 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), 3463 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), 3464 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType), 3465 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), 3466 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), 3467 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType), 3468 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType), 3469 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), 3470 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), 3471 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), 3472 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), 3473 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), 3474 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), 3475 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0), 3476 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0), 3477 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType), 3478 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType), 3479 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType), 3480 NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0), 3481 NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0), 3482 NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0), 3483 NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0), 3484 NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0), 3485 NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0), 3486 NEONMAP0(vshl_n_v), 3487 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts), 3488 NEONMAP0(vshll_n_v), 3489 NEONMAP0(vshlq_n_v), 3490 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts), 3491 NEONMAP0(vshr_n_v), 3492 NEONMAP0(vshrn_n_v), 3493 NEONMAP0(vshrq_n_v), 3494 NEONMAP0(vsubhn_v), 3495 NEONMAP0(vtst_v), 3496 NEONMAP0(vtstq_v), 3497 }; 3498 3499 static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = { 3500 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType), 3501 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType), 3502 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType), 3503 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType), 3504 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType), 3505 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType), 3506 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType), 3507 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), 3508 NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType), 3509 NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3510 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), 3511 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType), 3512 NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType), 3513 NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType), 3514 NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3515 NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3516 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType), 3517 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType), 3518 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType), 3519 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType), 3520 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType), 3521 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType), 3522 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType), 3523 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType), 3524 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType), 3525 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType), 3526 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType), 3527 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType), 3528 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), 3529 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), 3530 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), 3531 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), 3532 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType), 3533 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), 3534 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType), 3535 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), 3536 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType), 3537 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), 3538 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType), 3539 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), 3540 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType), 3541 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), 3542 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType), 3543 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), 3544 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), 3545 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), 3546 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), 3547 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), 3548 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0), 3549 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3550 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3551 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3552 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3553 NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType), 3554 NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType), 3555 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3556 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3557 NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType), 3558 NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType), 3559 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3560 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3561 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3562 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 3563 NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType), 3564 NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType), 3565 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 3566 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType), 3567 NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType), 3568 NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType), 3569 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0), 3570 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType), 3571 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType), 3572 NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3573 NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3574 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3575 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3576 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3577 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3578 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3579 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3580 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType), 3581 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 3582 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors), 3583 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType), 3584 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors), 3585 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType), 3586 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors), 3587 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors), 3588 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType), 3589 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType), 3590 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors), 3591 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors), 3592 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType), 3593 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType), 3594 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors), 3595 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType), 3596 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors), 3597 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0), 3598 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType), 3599 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType), 3600 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors), 3601 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors), 3602 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors), 3603 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors), 3604 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType), 3605 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors), 3606 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors), 3607 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors), 3608 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType), 3609 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors), 3610 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType), 3611 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors), 3612 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType), 3613 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors), 3614 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors), 3615 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType), 3616 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType), 3617 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors), 3618 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors), 3619 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType), 3620 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType), 3621 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType), 3622 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType), 3623 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors), 3624 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors), 3625 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors), 3626 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors), 3627 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType), 3628 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors), 3629 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors), 3630 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 3631 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 3632 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 3633 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 3634 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType), 3635 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType), 3636 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 3637 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 3638 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 3639 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 3640 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType), 3641 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType), 3642 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType), 3643 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType), 3644 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors), 3645 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors), 3646 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType), 3647 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType), 3648 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType), 3649 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors), 3650 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors), 3651 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors), 3652 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors), 3653 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType), 3654 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors), 3655 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors), 3656 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors), 3657 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors), 3658 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType), 3659 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType), 3660 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors), 3661 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors), 3662 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType), 3663 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType), 3664 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType), 3665 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType), 3666 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType), 3667 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType), 3668 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType), 3669 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType), 3670 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType), 3671 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType), 3672 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType), 3673 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType), 3674 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0), 3675 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0), 3676 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0), 3677 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0), 3678 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType), 3679 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType), 3680 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType), 3681 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType), 3682 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors), 3683 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType), 3684 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors), 3685 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType), 3686 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType), 3687 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType), 3688 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors), 3689 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType), 3690 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors), 3691 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType), 3692 }; 3693 3694 #undef NEONMAP0 3695 #undef NEONMAP1 3696 #undef NEONMAP2 3697 3698 static bool NEONSIMDIntrinsicsProvenSorted = false; 3699 3700 static bool AArch64SIMDIntrinsicsProvenSorted = false; 3701 static bool AArch64SISDIntrinsicsProvenSorted = false; 3702 3703 3704 static const NeonIntrinsicInfo * 3705 findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap, 3706 unsigned BuiltinID, bool &MapProvenSorted) { 3707 3708 #ifndef NDEBUG 3709 if (!MapProvenSorted) { 3710 assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap))); 3711 MapProvenSorted = true; 3712 } 3713 #endif 3714 3715 const NeonIntrinsicInfo *Builtin = 3716 std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID); 3717 3718 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID) 3719 return Builtin; 3720 3721 return nullptr; 3722 } 3723 3724 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID, 3725 unsigned Modifier, 3726 llvm::Type *ArgType, 3727 const CallExpr *E) { 3728 int VectorSize = 0; 3729 if (Modifier & Use64BitVectors) 3730 VectorSize = 64; 3731 else if (Modifier & Use128BitVectors) 3732 VectorSize = 128; 3733 3734 // Return type. 3735 SmallVector<llvm::Type *, 3> Tys; 3736 if (Modifier & AddRetType) { 3737 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext())); 3738 if (Modifier & VectorizeRetType) 3739 Ty = llvm::VectorType::get( 3740 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1); 3741 3742 Tys.push_back(Ty); 3743 } 3744 3745 // Arguments. 3746 if (Modifier & VectorizeArgTypes) { 3747 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1; 3748 ArgType = llvm::VectorType::get(ArgType, Elts); 3749 } 3750 3751 if (Modifier & (Add1ArgType | Add2ArgTypes)) 3752 Tys.push_back(ArgType); 3753 3754 if (Modifier & Add2ArgTypes) 3755 Tys.push_back(ArgType); 3756 3757 if (Modifier & InventFloatType) 3758 Tys.push_back(FloatTy); 3759 3760 return CGM.getIntrinsic(IntrinsicID, Tys); 3761 } 3762 3763 static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, 3764 const NeonIntrinsicInfo &SISDInfo, 3765 SmallVectorImpl<Value *> &Ops, 3766 const CallExpr *E) { 3767 unsigned BuiltinID = SISDInfo.BuiltinID; 3768 unsigned int Int = SISDInfo.LLVMIntrinsic; 3769 unsigned Modifier = SISDInfo.TypeModifier; 3770 const char *s = SISDInfo.NameHint; 3771 3772 switch (BuiltinID) { 3773 case NEON::BI__builtin_neon_vcled_s64: 3774 case NEON::BI__builtin_neon_vcled_u64: 3775 case NEON::BI__builtin_neon_vcles_f32: 3776 case NEON::BI__builtin_neon_vcled_f64: 3777 case NEON::BI__builtin_neon_vcltd_s64: 3778 case NEON::BI__builtin_neon_vcltd_u64: 3779 case NEON::BI__builtin_neon_vclts_f32: 3780 case NEON::BI__builtin_neon_vcltd_f64: 3781 case NEON::BI__builtin_neon_vcales_f32: 3782 case NEON::BI__builtin_neon_vcaled_f64: 3783 case NEON::BI__builtin_neon_vcalts_f32: 3784 case NEON::BI__builtin_neon_vcaltd_f64: 3785 // Only one direction of comparisons actually exist, cmle is actually a cmge 3786 // with swapped operands. The table gives us the right intrinsic but we 3787 // still need to do the swap. 3788 std::swap(Ops[0], Ops[1]); 3789 break; 3790 } 3791 3792 assert(Int && "Generic code assumes a valid intrinsic"); 3793 3794 // Determine the type(s) of this overloaded AArch64 intrinsic. 3795 const Expr *Arg = E->getArg(0); 3796 llvm::Type *ArgTy = CGF.ConvertType(Arg->getType()); 3797 Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E); 3798 3799 int j = 0; 3800 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0); 3801 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); 3802 ai != ae; ++ai, ++j) { 3803 llvm::Type *ArgTy = ai->getType(); 3804 if (Ops[j]->getType()->getPrimitiveSizeInBits() == 3805 ArgTy->getPrimitiveSizeInBits()) 3806 continue; 3807 3808 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy()); 3809 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate 3810 // it before inserting. 3811 Ops[j] = 3812 CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType()); 3813 Ops[j] = 3814 CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0); 3815 } 3816 3817 Value *Result = CGF.EmitNeonCall(F, Ops, s); 3818 llvm::Type *ResultType = CGF.ConvertType(E->getType()); 3819 if (ResultType->getPrimitiveSizeInBits() < 3820 Result->getType()->getPrimitiveSizeInBits()) 3821 return CGF.Builder.CreateExtractElement(Result, C0); 3822 3823 return CGF.Builder.CreateBitCast(Result, ResultType, s); 3824 } 3825 3826 Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( 3827 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, 3828 const char *NameHint, unsigned Modifier, const CallExpr *E, 3829 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1) { 3830 // Get the last argument, which specifies the vector type. 3831 llvm::APSInt NeonTypeConst; 3832 const Expr *Arg = E->getArg(E->getNumArgs() - 1); 3833 if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext())) 3834 return nullptr; 3835 3836 // Determine the type of this overloaded NEON intrinsic. 3837 NeonTypeFlags Type(NeonTypeConst.getZExtValue()); 3838 bool Usgn = Type.isUnsigned(); 3839 bool Quad = Type.isQuad(); 3840 3841 llvm::VectorType *VTy = GetNeonType(this, Type); 3842 llvm::Type *Ty = VTy; 3843 if (!Ty) 3844 return nullptr; 3845 3846 auto getAlignmentValue32 = [&](Address addr) -> Value* { 3847 return Builder.getInt32(addr.getAlignment().getQuantity()); 3848 }; 3849 3850 unsigned Int = LLVMIntrinsic; 3851 if ((Modifier & UnsignedAlts) && !Usgn) 3852 Int = AltLLVMIntrinsic; 3853 3854 switch (BuiltinID) { 3855 default: break; 3856 case NEON::BI__builtin_neon_vabs_v: 3857 case NEON::BI__builtin_neon_vabsq_v: 3858 if (VTy->getElementType()->isFloatingPointTy()) 3859 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs"); 3860 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs"); 3861 case NEON::BI__builtin_neon_vaddhn_v: { 3862 llvm::VectorType *SrcTy = 3863 llvm::VectorType::getExtendedElementVectorType(VTy); 3864 3865 // %sum = add <4 x i32> %lhs, %rhs 3866 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 3867 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy); 3868 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn"); 3869 3870 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16> 3871 Constant *ShiftAmt = 3872 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2); 3873 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn"); 3874 3875 // %res = trunc <4 x i32> %high to <4 x i16> 3876 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn"); 3877 } 3878 case NEON::BI__builtin_neon_vcale_v: 3879 case NEON::BI__builtin_neon_vcaleq_v: 3880 case NEON::BI__builtin_neon_vcalt_v: 3881 case NEON::BI__builtin_neon_vcaltq_v: 3882 std::swap(Ops[0], Ops[1]); 3883 LLVM_FALLTHROUGH; 3884 case NEON::BI__builtin_neon_vcage_v: 3885 case NEON::BI__builtin_neon_vcageq_v: 3886 case NEON::BI__builtin_neon_vcagt_v: 3887 case NEON::BI__builtin_neon_vcagtq_v: { 3888 llvm::Type *VecFlt = llvm::VectorType::get( 3889 VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy, 3890 VTy->getNumElements()); 3891 llvm::Type *Tys[] = { VTy, VecFlt }; 3892 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 3893 return EmitNeonCall(F, Ops, NameHint); 3894 } 3895 case NEON::BI__builtin_neon_vclz_v: 3896 case NEON::BI__builtin_neon_vclzq_v: 3897 // We generate target-independent intrinsic, which needs a second argument 3898 // for whether or not clz of zero is undefined; on ARM it isn't. 3899 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef())); 3900 break; 3901 case NEON::BI__builtin_neon_vcvt_f32_v: 3902 case NEON::BI__builtin_neon_vcvtq_f32_v: 3903 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3904 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad)); 3905 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 3906 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 3907 case NEON::BI__builtin_neon_vcvt_n_f32_v: 3908 case NEON::BI__builtin_neon_vcvt_n_f64_v: 3909 case NEON::BI__builtin_neon_vcvtq_n_f32_v: 3910 case NEON::BI__builtin_neon_vcvtq_n_f64_v: { 3911 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty }; 3912 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic; 3913 Function *F = CGM.getIntrinsic(Int, Tys); 3914 return EmitNeonCall(F, Ops, "vcvt_n"); 3915 } 3916 case NEON::BI__builtin_neon_vcvt_n_s32_v: 3917 case NEON::BI__builtin_neon_vcvt_n_u32_v: 3918 case NEON::BI__builtin_neon_vcvt_n_s64_v: 3919 case NEON::BI__builtin_neon_vcvt_n_u64_v: 3920 case NEON::BI__builtin_neon_vcvtq_n_s32_v: 3921 case NEON::BI__builtin_neon_vcvtq_n_u32_v: 3922 case NEON::BI__builtin_neon_vcvtq_n_s64_v: 3923 case NEON::BI__builtin_neon_vcvtq_n_u64_v: { 3924 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 3925 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 3926 return EmitNeonCall(F, Ops, "vcvt_n"); 3927 } 3928 case NEON::BI__builtin_neon_vcvt_s32_v: 3929 case NEON::BI__builtin_neon_vcvt_u32_v: 3930 case NEON::BI__builtin_neon_vcvt_s64_v: 3931 case NEON::BI__builtin_neon_vcvt_u64_v: 3932 case NEON::BI__builtin_neon_vcvtq_s32_v: 3933 case NEON::BI__builtin_neon_vcvtq_u32_v: 3934 case NEON::BI__builtin_neon_vcvtq_s64_v: 3935 case NEON::BI__builtin_neon_vcvtq_u64_v: { 3936 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type)); 3937 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt") 3938 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt"); 3939 } 3940 case NEON::BI__builtin_neon_vcvta_s32_v: 3941 case NEON::BI__builtin_neon_vcvta_s64_v: 3942 case NEON::BI__builtin_neon_vcvta_u32_v: 3943 case NEON::BI__builtin_neon_vcvta_u64_v: 3944 case NEON::BI__builtin_neon_vcvtaq_s32_v: 3945 case NEON::BI__builtin_neon_vcvtaq_s64_v: 3946 case NEON::BI__builtin_neon_vcvtaq_u32_v: 3947 case NEON::BI__builtin_neon_vcvtaq_u64_v: 3948 case NEON::BI__builtin_neon_vcvtn_s32_v: 3949 case NEON::BI__builtin_neon_vcvtn_s64_v: 3950 case NEON::BI__builtin_neon_vcvtn_u32_v: 3951 case NEON::BI__builtin_neon_vcvtn_u64_v: 3952 case NEON::BI__builtin_neon_vcvtnq_s32_v: 3953 case NEON::BI__builtin_neon_vcvtnq_s64_v: 3954 case NEON::BI__builtin_neon_vcvtnq_u32_v: 3955 case NEON::BI__builtin_neon_vcvtnq_u64_v: 3956 case NEON::BI__builtin_neon_vcvtp_s32_v: 3957 case NEON::BI__builtin_neon_vcvtp_s64_v: 3958 case NEON::BI__builtin_neon_vcvtp_u32_v: 3959 case NEON::BI__builtin_neon_vcvtp_u64_v: 3960 case NEON::BI__builtin_neon_vcvtpq_s32_v: 3961 case NEON::BI__builtin_neon_vcvtpq_s64_v: 3962 case NEON::BI__builtin_neon_vcvtpq_u32_v: 3963 case NEON::BI__builtin_neon_vcvtpq_u64_v: 3964 case NEON::BI__builtin_neon_vcvtm_s32_v: 3965 case NEON::BI__builtin_neon_vcvtm_s64_v: 3966 case NEON::BI__builtin_neon_vcvtm_u32_v: 3967 case NEON::BI__builtin_neon_vcvtm_u64_v: 3968 case NEON::BI__builtin_neon_vcvtmq_s32_v: 3969 case NEON::BI__builtin_neon_vcvtmq_s64_v: 3970 case NEON::BI__builtin_neon_vcvtmq_u32_v: 3971 case NEON::BI__builtin_neon_vcvtmq_u64_v: { 3972 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 3973 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint); 3974 } 3975 case NEON::BI__builtin_neon_vext_v: 3976 case NEON::BI__builtin_neon_vextq_v: { 3977 int CV = cast<ConstantInt>(Ops[2])->getSExtValue(); 3978 SmallVector<uint32_t, 16> Indices; 3979 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 3980 Indices.push_back(i+CV); 3981 3982 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3983 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3984 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext"); 3985 } 3986 case NEON::BI__builtin_neon_vfma_v: 3987 case NEON::BI__builtin_neon_vfmaq_v: { 3988 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 3989 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3990 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3991 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 3992 3993 // NEON intrinsic puts accumulator first, unlike the LLVM fma. 3994 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); 3995 } 3996 case NEON::BI__builtin_neon_vld1_v: 3997 case NEON::BI__builtin_neon_vld1q_v: { 3998 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 3999 Ops.push_back(getAlignmentValue32(PtrOp0)); 4000 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1"); 4001 } 4002 case NEON::BI__builtin_neon_vld2_v: 4003 case NEON::BI__builtin_neon_vld2q_v: 4004 case NEON::BI__builtin_neon_vld3_v: 4005 case NEON::BI__builtin_neon_vld3q_v: 4006 case NEON::BI__builtin_neon_vld4_v: 4007 case NEON::BI__builtin_neon_vld4q_v: { 4008 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 4009 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 4010 Value *Align = getAlignmentValue32(PtrOp1); 4011 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint); 4012 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 4013 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4014 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 4015 } 4016 case NEON::BI__builtin_neon_vld1_dup_v: 4017 case NEON::BI__builtin_neon_vld1q_dup_v: { 4018 Value *V = UndefValue::get(Ty); 4019 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 4020 PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty); 4021 LoadInst *Ld = Builder.CreateLoad(PtrOp0); 4022 llvm::Constant *CI = ConstantInt::get(SizeTy, 0); 4023 Ops[0] = Builder.CreateInsertElement(V, Ld, CI); 4024 return EmitNeonSplat(Ops[0], CI); 4025 } 4026 case NEON::BI__builtin_neon_vld2_lane_v: 4027 case NEON::BI__builtin_neon_vld2q_lane_v: 4028 case NEON::BI__builtin_neon_vld3_lane_v: 4029 case NEON::BI__builtin_neon_vld3q_lane_v: 4030 case NEON::BI__builtin_neon_vld4_lane_v: 4031 case NEON::BI__builtin_neon_vld4q_lane_v: { 4032 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 4033 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 4034 for (unsigned I = 2; I < Ops.size() - 1; ++I) 4035 Ops[I] = Builder.CreateBitCast(Ops[I], Ty); 4036 Ops.push_back(getAlignmentValue32(PtrOp1)); 4037 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint); 4038 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 4039 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4040 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 4041 } 4042 case NEON::BI__builtin_neon_vmovl_v: { 4043 llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy); 4044 Ops[0] = Builder.CreateBitCast(Ops[0], DTy); 4045 if (Usgn) 4046 return Builder.CreateZExt(Ops[0], Ty, "vmovl"); 4047 return Builder.CreateSExt(Ops[0], Ty, "vmovl"); 4048 } 4049 case NEON::BI__builtin_neon_vmovn_v: { 4050 llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy); 4051 Ops[0] = Builder.CreateBitCast(Ops[0], QTy); 4052 return Builder.CreateTrunc(Ops[0], Ty, "vmovn"); 4053 } 4054 case NEON::BI__builtin_neon_vmull_v: 4055 // FIXME: the integer vmull operations could be emitted in terms of pure 4056 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of 4057 // hoisting the exts outside loops. Until global ISel comes along that can 4058 // see through such movement this leads to bad CodeGen. So we need an 4059 // intrinsic for now. 4060 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls; 4061 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int; 4062 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull"); 4063 case NEON::BI__builtin_neon_vpadal_v: 4064 case NEON::BI__builtin_neon_vpadalq_v: { 4065 // The source operand type has twice as many elements of half the size. 4066 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); 4067 llvm::Type *EltTy = 4068 llvm::IntegerType::get(getLLVMContext(), EltBits / 2); 4069 llvm::Type *NarrowTy = 4070 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2); 4071 llvm::Type *Tys[2] = { Ty, NarrowTy }; 4072 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint); 4073 } 4074 case NEON::BI__builtin_neon_vpaddl_v: 4075 case NEON::BI__builtin_neon_vpaddlq_v: { 4076 // The source operand type has twice as many elements of half the size. 4077 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); 4078 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2); 4079 llvm::Type *NarrowTy = 4080 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2); 4081 llvm::Type *Tys[2] = { Ty, NarrowTy }; 4082 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl"); 4083 } 4084 case NEON::BI__builtin_neon_vqdmlal_v: 4085 case NEON::BI__builtin_neon_vqdmlsl_v: { 4086 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end()); 4087 Ops[1] = 4088 EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal"); 4089 Ops.resize(2); 4090 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint); 4091 } 4092 case NEON::BI__builtin_neon_vqshl_n_v: 4093 case NEON::BI__builtin_neon_vqshlq_n_v: 4094 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n", 4095 1, false); 4096 case NEON::BI__builtin_neon_vqshlu_n_v: 4097 case NEON::BI__builtin_neon_vqshluq_n_v: 4098 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n", 4099 1, false); 4100 case NEON::BI__builtin_neon_vrecpe_v: 4101 case NEON::BI__builtin_neon_vrecpeq_v: 4102 case NEON::BI__builtin_neon_vrsqrte_v: 4103 case NEON::BI__builtin_neon_vrsqrteq_v: 4104 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic; 4105 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint); 4106 4107 case NEON::BI__builtin_neon_vrshr_n_v: 4108 case NEON::BI__builtin_neon_vrshrq_n_v: 4109 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n", 4110 1, true); 4111 case NEON::BI__builtin_neon_vshl_n_v: 4112 case NEON::BI__builtin_neon_vshlq_n_v: 4113 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false); 4114 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1], 4115 "vshl_n"); 4116 case NEON::BI__builtin_neon_vshll_n_v: { 4117 llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy); 4118 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 4119 if (Usgn) 4120 Ops[0] = Builder.CreateZExt(Ops[0], VTy); 4121 else 4122 Ops[0] = Builder.CreateSExt(Ops[0], VTy); 4123 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false); 4124 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n"); 4125 } 4126 case NEON::BI__builtin_neon_vshrn_n_v: { 4127 llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy); 4128 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 4129 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false); 4130 if (Usgn) 4131 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]); 4132 else 4133 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]); 4134 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n"); 4135 } 4136 case NEON::BI__builtin_neon_vshr_n_v: 4137 case NEON::BI__builtin_neon_vshrq_n_v: 4138 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n"); 4139 case NEON::BI__builtin_neon_vst1_v: 4140 case NEON::BI__builtin_neon_vst1q_v: 4141 case NEON::BI__builtin_neon_vst2_v: 4142 case NEON::BI__builtin_neon_vst2q_v: 4143 case NEON::BI__builtin_neon_vst3_v: 4144 case NEON::BI__builtin_neon_vst3q_v: 4145 case NEON::BI__builtin_neon_vst4_v: 4146 case NEON::BI__builtin_neon_vst4q_v: 4147 case NEON::BI__builtin_neon_vst2_lane_v: 4148 case NEON::BI__builtin_neon_vst2q_lane_v: 4149 case NEON::BI__builtin_neon_vst3_lane_v: 4150 case NEON::BI__builtin_neon_vst3q_lane_v: 4151 case NEON::BI__builtin_neon_vst4_lane_v: 4152 case NEON::BI__builtin_neon_vst4q_lane_v: { 4153 llvm::Type *Tys[] = {Int8PtrTy, Ty}; 4154 Ops.push_back(getAlignmentValue32(PtrOp0)); 4155 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, ""); 4156 } 4157 case NEON::BI__builtin_neon_vsubhn_v: { 4158 llvm::VectorType *SrcTy = 4159 llvm::VectorType::getExtendedElementVectorType(VTy); 4160 4161 // %sum = add <4 x i32> %lhs, %rhs 4162 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 4163 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy); 4164 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn"); 4165 4166 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16> 4167 Constant *ShiftAmt = 4168 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2); 4169 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn"); 4170 4171 // %res = trunc <4 x i32> %high to <4 x i16> 4172 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn"); 4173 } 4174 case NEON::BI__builtin_neon_vtrn_v: 4175 case NEON::BI__builtin_neon_vtrnq_v: { 4176 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 4177 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4178 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 4179 Value *SV = nullptr; 4180 4181 for (unsigned vi = 0; vi != 2; ++vi) { 4182 SmallVector<uint32_t, 16> Indices; 4183 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 4184 Indices.push_back(i+vi); 4185 Indices.push_back(i+e+vi); 4186 } 4187 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 4188 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn"); 4189 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 4190 } 4191 return SV; 4192 } 4193 case NEON::BI__builtin_neon_vtst_v: 4194 case NEON::BI__builtin_neon_vtstq_v: { 4195 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4196 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4197 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); 4198 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], 4199 ConstantAggregateZero::get(Ty)); 4200 return Builder.CreateSExt(Ops[0], Ty, "vtst"); 4201 } 4202 case NEON::BI__builtin_neon_vuzp_v: 4203 case NEON::BI__builtin_neon_vuzpq_v: { 4204 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 4205 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4206 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 4207 Value *SV = nullptr; 4208 4209 for (unsigned vi = 0; vi != 2; ++vi) { 4210 SmallVector<uint32_t, 16> Indices; 4211 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 4212 Indices.push_back(2*i+vi); 4213 4214 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 4215 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp"); 4216 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 4217 } 4218 return SV; 4219 } 4220 case NEON::BI__builtin_neon_vzip_v: 4221 case NEON::BI__builtin_neon_vzipq_v: { 4222 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 4223 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4224 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 4225 Value *SV = nullptr; 4226 4227 for (unsigned vi = 0; vi != 2; ++vi) { 4228 SmallVector<uint32_t, 16> Indices; 4229 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 4230 Indices.push_back((i + vi*e) >> 1); 4231 Indices.push_back(((i + vi*e) >> 1)+e); 4232 } 4233 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 4234 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip"); 4235 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 4236 } 4237 return SV; 4238 } 4239 } 4240 4241 assert(Int && "Expected valid intrinsic number"); 4242 4243 // Determine the type(s) of this overloaded AArch64 intrinsic. 4244 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E); 4245 4246 Value *Result = EmitNeonCall(F, Ops, NameHint); 4247 llvm::Type *ResultType = ConvertType(E->getType()); 4248 // AArch64 intrinsic one-element vector type cast to 4249 // scalar type expected by the builtin 4250 return Builder.CreateBitCast(Result, ResultType, NameHint); 4251 } 4252 4253 Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr( 4254 Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp, 4255 const CmpInst::Predicate Ip, const Twine &Name) { 4256 llvm::Type *OTy = Op->getType(); 4257 4258 // FIXME: this is utterly horrific. We should not be looking at previous 4259 // codegen context to find out what needs doing. Unfortunately TableGen 4260 // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32 4261 // (etc). 4262 if (BitCastInst *BI = dyn_cast<BitCastInst>(Op)) 4263 OTy = BI->getOperand(0)->getType(); 4264 4265 Op = Builder.CreateBitCast(Op, OTy); 4266 if (OTy->getScalarType()->isFloatingPointTy()) { 4267 Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy)); 4268 } else { 4269 Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy)); 4270 } 4271 return Builder.CreateSExt(Op, Ty, Name); 4272 } 4273 4274 static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops, 4275 Value *ExtOp, Value *IndexOp, 4276 llvm::Type *ResTy, unsigned IntID, 4277 const char *Name) { 4278 SmallVector<Value *, 2> TblOps; 4279 if (ExtOp) 4280 TblOps.push_back(ExtOp); 4281 4282 // Build a vector containing sequential number like (0, 1, 2, ..., 15) 4283 SmallVector<uint32_t, 16> Indices; 4284 llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType()); 4285 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) { 4286 Indices.push_back(2*i); 4287 Indices.push_back(2*i+1); 4288 } 4289 4290 int PairPos = 0, End = Ops.size() - 1; 4291 while (PairPos < End) { 4292 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], 4293 Ops[PairPos+1], Indices, 4294 Name)); 4295 PairPos += 2; 4296 } 4297 4298 // If there's an odd number of 64-bit lookup table, fill the high 64-bit 4299 // of the 128-bit lookup table with zero. 4300 if (PairPos == End) { 4301 Value *ZeroTbl = ConstantAggregateZero::get(TblTy); 4302 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], 4303 ZeroTbl, Indices, Name)); 4304 } 4305 4306 Function *TblF; 4307 TblOps.push_back(IndexOp); 4308 TblF = CGF.CGM.getIntrinsic(IntID, ResTy); 4309 4310 return CGF.EmitNeonCall(TblF, TblOps, Name); 4311 } 4312 4313 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) { 4314 unsigned Value; 4315 switch (BuiltinID) { 4316 default: 4317 return nullptr; 4318 case ARM::BI__builtin_arm_nop: 4319 Value = 0; 4320 break; 4321 case ARM::BI__builtin_arm_yield: 4322 case ARM::BI__yield: 4323 Value = 1; 4324 break; 4325 case ARM::BI__builtin_arm_wfe: 4326 case ARM::BI__wfe: 4327 Value = 2; 4328 break; 4329 case ARM::BI__builtin_arm_wfi: 4330 case ARM::BI__wfi: 4331 Value = 3; 4332 break; 4333 case ARM::BI__builtin_arm_sev: 4334 case ARM::BI__sev: 4335 Value = 4; 4336 break; 4337 case ARM::BI__builtin_arm_sevl: 4338 case ARM::BI__sevl: 4339 Value = 5; 4340 break; 4341 } 4342 4343 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint), 4344 llvm::ConstantInt::get(Int32Ty, Value)); 4345 } 4346 4347 // Generates the IR for the read/write special register builtin, 4348 // ValueType is the type of the value that is to be written or read, 4349 // RegisterType is the type of the register being written to or read from. 4350 static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, 4351 const CallExpr *E, 4352 llvm::Type *RegisterType, 4353 llvm::Type *ValueType, 4354 bool IsRead, 4355 StringRef SysReg = "") { 4356 // write and register intrinsics only support 32 and 64 bit operations. 4357 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64)) 4358 && "Unsupported size for register."); 4359 4360 CodeGen::CGBuilderTy &Builder = CGF.Builder; 4361 CodeGen::CodeGenModule &CGM = CGF.CGM; 4362 LLVMContext &Context = CGM.getLLVMContext(); 4363 4364 if (SysReg.empty()) { 4365 const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts(); 4366 SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString(); 4367 } 4368 4369 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) }; 4370 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); 4371 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); 4372 4373 llvm::Type *Types[] = { RegisterType }; 4374 4375 bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32); 4376 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64)) 4377 && "Can't fit 64-bit value in 32-bit register"); 4378 4379 if (IsRead) { 4380 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types); 4381 llvm::Value *Call = Builder.CreateCall(F, Metadata); 4382 4383 if (MixedTypes) 4384 // Read into 64 bit register and then truncate result to 32 bit. 4385 return Builder.CreateTrunc(Call, ValueType); 4386 4387 if (ValueType->isPointerTy()) 4388 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*). 4389 return Builder.CreateIntToPtr(Call, ValueType); 4390 4391 return Call; 4392 } 4393 4394 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types); 4395 llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1)); 4396 if (MixedTypes) { 4397 // Extend 32 bit write value to 64 bit to pass to write. 4398 ArgValue = Builder.CreateZExt(ArgValue, RegisterType); 4399 return Builder.CreateCall(F, { Metadata, ArgValue }); 4400 } 4401 4402 if (ValueType->isPointerTy()) { 4403 // Have VoidPtrTy ArgValue but want to return an i32/i64. 4404 ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType); 4405 return Builder.CreateCall(F, { Metadata, ArgValue }); 4406 } 4407 4408 return Builder.CreateCall(F, { Metadata, ArgValue }); 4409 } 4410 4411 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra 4412 /// argument that specifies the vector type. 4413 static bool HasExtraNeonArgument(unsigned BuiltinID) { 4414 switch (BuiltinID) { 4415 default: break; 4416 case NEON::BI__builtin_neon_vget_lane_i8: 4417 case NEON::BI__builtin_neon_vget_lane_i16: 4418 case NEON::BI__builtin_neon_vget_lane_i32: 4419 case NEON::BI__builtin_neon_vget_lane_i64: 4420 case NEON::BI__builtin_neon_vget_lane_f32: 4421 case NEON::BI__builtin_neon_vgetq_lane_i8: 4422 case NEON::BI__builtin_neon_vgetq_lane_i16: 4423 case NEON::BI__builtin_neon_vgetq_lane_i32: 4424 case NEON::BI__builtin_neon_vgetq_lane_i64: 4425 case NEON::BI__builtin_neon_vgetq_lane_f32: 4426 case NEON::BI__builtin_neon_vset_lane_i8: 4427 case NEON::BI__builtin_neon_vset_lane_i16: 4428 case NEON::BI__builtin_neon_vset_lane_i32: 4429 case NEON::BI__builtin_neon_vset_lane_i64: 4430 case NEON::BI__builtin_neon_vset_lane_f32: 4431 case NEON::BI__builtin_neon_vsetq_lane_i8: 4432 case NEON::BI__builtin_neon_vsetq_lane_i16: 4433 case NEON::BI__builtin_neon_vsetq_lane_i32: 4434 case NEON::BI__builtin_neon_vsetq_lane_i64: 4435 case NEON::BI__builtin_neon_vsetq_lane_f32: 4436 case NEON::BI__builtin_neon_vsha1h_u32: 4437 case NEON::BI__builtin_neon_vsha1cq_u32: 4438 case NEON::BI__builtin_neon_vsha1pq_u32: 4439 case NEON::BI__builtin_neon_vsha1mq_u32: 4440 case ARM::BI_MoveToCoprocessor: 4441 case ARM::BI_MoveToCoprocessor2: 4442 return false; 4443 } 4444 return true; 4445 } 4446 4447 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, 4448 const CallExpr *E) { 4449 if (auto Hint = GetValueForARMHint(BuiltinID)) 4450 return Hint; 4451 4452 if (BuiltinID == ARM::BI__emit) { 4453 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb; 4454 llvm::FunctionType *FTy = 4455 llvm::FunctionType::get(VoidTy, /*Variadic=*/false); 4456 4457 APSInt Value; 4458 if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext())) 4459 llvm_unreachable("Sema will ensure that the parameter is constant"); 4460 4461 uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue(); 4462 4463 llvm::InlineAsm *Emit = 4464 IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "", 4465 /*SideEffects=*/true) 4466 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "", 4467 /*SideEffects=*/true); 4468 4469 return Builder.CreateCall(Emit); 4470 } 4471 4472 if (BuiltinID == ARM::BI__builtin_arm_dbg) { 4473 Value *Option = EmitScalarExpr(E->getArg(0)); 4474 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option); 4475 } 4476 4477 if (BuiltinID == ARM::BI__builtin_arm_prefetch) { 4478 Value *Address = EmitScalarExpr(E->getArg(0)); 4479 Value *RW = EmitScalarExpr(E->getArg(1)); 4480 Value *IsData = EmitScalarExpr(E->getArg(2)); 4481 4482 // Locality is not supported on ARM target 4483 Value *Locality = llvm::ConstantInt::get(Int32Ty, 3); 4484 4485 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 4486 return Builder.CreateCall(F, {Address, RW, Locality, IsData}); 4487 } 4488 4489 if (BuiltinID == ARM::BI__builtin_arm_rbit) { 4490 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 4491 return Builder.CreateCall( 4492 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); 4493 } 4494 4495 if (BuiltinID == ARM::BI__clear_cache) { 4496 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); 4497 const FunctionDecl *FD = E->getDirectCallee(); 4498 Value *Ops[2]; 4499 for (unsigned i = 0; i < 2; i++) 4500 Ops[i] = EmitScalarExpr(E->getArg(i)); 4501 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); 4502 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); 4503 StringRef Name = FD->getName(); 4504 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); 4505 } 4506 4507 if (BuiltinID == ARM::BI__builtin_arm_mcrr || 4508 BuiltinID == ARM::BI__builtin_arm_mcrr2) { 4509 Function *F; 4510 4511 switch (BuiltinID) { 4512 default: llvm_unreachable("unexpected builtin"); 4513 case ARM::BI__builtin_arm_mcrr: 4514 F = CGM.getIntrinsic(Intrinsic::arm_mcrr); 4515 break; 4516 case ARM::BI__builtin_arm_mcrr2: 4517 F = CGM.getIntrinsic(Intrinsic::arm_mcrr2); 4518 break; 4519 } 4520 4521 // MCRR{2} instruction has 5 operands but 4522 // the intrinsic has 4 because Rt and Rt2 4523 // are represented as a single unsigned 64 4524 // bit integer in the intrinsic definition 4525 // but internally it's represented as 2 32 4526 // bit integers. 4527 4528 Value *Coproc = EmitScalarExpr(E->getArg(0)); 4529 Value *Opc1 = EmitScalarExpr(E->getArg(1)); 4530 Value *RtAndRt2 = EmitScalarExpr(E->getArg(2)); 4531 Value *CRm = EmitScalarExpr(E->getArg(3)); 4532 4533 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32); 4534 Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty); 4535 Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1); 4536 Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty); 4537 4538 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm}); 4539 } 4540 4541 if (BuiltinID == ARM::BI__builtin_arm_mrrc || 4542 BuiltinID == ARM::BI__builtin_arm_mrrc2) { 4543 Function *F; 4544 4545 switch (BuiltinID) { 4546 default: llvm_unreachable("unexpected builtin"); 4547 case ARM::BI__builtin_arm_mrrc: 4548 F = CGM.getIntrinsic(Intrinsic::arm_mrrc); 4549 break; 4550 case ARM::BI__builtin_arm_mrrc2: 4551 F = CGM.getIntrinsic(Intrinsic::arm_mrrc2); 4552 break; 4553 } 4554 4555 Value *Coproc = EmitScalarExpr(E->getArg(0)); 4556 Value *Opc1 = EmitScalarExpr(E->getArg(1)); 4557 Value *CRm = EmitScalarExpr(E->getArg(2)); 4558 Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm}); 4559 4560 // Returns an unsigned 64 bit integer, represented 4561 // as two 32 bit integers. 4562 4563 Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1); 4564 Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0); 4565 Rt = Builder.CreateZExt(Rt, Int64Ty); 4566 Rt1 = Builder.CreateZExt(Rt1, Int64Ty); 4567 4568 Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32); 4569 RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true); 4570 RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1); 4571 4572 return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType())); 4573 } 4574 4575 if (BuiltinID == ARM::BI__builtin_arm_ldrexd || 4576 ((BuiltinID == ARM::BI__builtin_arm_ldrex || 4577 BuiltinID == ARM::BI__builtin_arm_ldaex) && 4578 getContext().getTypeSize(E->getType()) == 64) || 4579 BuiltinID == ARM::BI__ldrexd) { 4580 Function *F; 4581 4582 switch (BuiltinID) { 4583 default: llvm_unreachable("unexpected builtin"); 4584 case ARM::BI__builtin_arm_ldaex: 4585 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd); 4586 break; 4587 case ARM::BI__builtin_arm_ldrexd: 4588 case ARM::BI__builtin_arm_ldrex: 4589 case ARM::BI__ldrexd: 4590 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd); 4591 break; 4592 } 4593 4594 Value *LdPtr = EmitScalarExpr(E->getArg(0)); 4595 Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy), 4596 "ldrexd"); 4597 4598 Value *Val0 = Builder.CreateExtractValue(Val, 1); 4599 Value *Val1 = Builder.CreateExtractValue(Val, 0); 4600 Val0 = Builder.CreateZExt(Val0, Int64Ty); 4601 Val1 = Builder.CreateZExt(Val1, Int64Ty); 4602 4603 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32); 4604 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */); 4605 Val = Builder.CreateOr(Val, Val1); 4606 return Builder.CreateBitCast(Val, ConvertType(E->getType())); 4607 } 4608 4609 if (BuiltinID == ARM::BI__builtin_arm_ldrex || 4610 BuiltinID == ARM::BI__builtin_arm_ldaex) { 4611 Value *LoadAddr = EmitScalarExpr(E->getArg(0)); 4612 4613 QualType Ty = E->getType(); 4614 llvm::Type *RealResTy = ConvertType(Ty); 4615 llvm::Type *PtrTy = llvm::IntegerType::get( 4616 getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo(); 4617 LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy); 4618 4619 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex 4620 ? Intrinsic::arm_ldaex 4621 : Intrinsic::arm_ldrex, 4622 PtrTy); 4623 Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex"); 4624 4625 if (RealResTy->isPointerTy()) 4626 return Builder.CreateIntToPtr(Val, RealResTy); 4627 else { 4628 llvm::Type *IntResTy = llvm::IntegerType::get( 4629 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy)); 4630 Val = Builder.CreateTruncOrBitCast(Val, IntResTy); 4631 return Builder.CreateBitCast(Val, RealResTy); 4632 } 4633 } 4634 4635 if (BuiltinID == ARM::BI__builtin_arm_strexd || 4636 ((BuiltinID == ARM::BI__builtin_arm_stlex || 4637 BuiltinID == ARM::BI__builtin_arm_strex) && 4638 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) { 4639 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex 4640 ? Intrinsic::arm_stlexd 4641 : Intrinsic::arm_strexd); 4642 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty); 4643 4644 Address Tmp = CreateMemTemp(E->getArg(0)->getType()); 4645 Value *Val = EmitScalarExpr(E->getArg(0)); 4646 Builder.CreateStore(Val, Tmp); 4647 4648 Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy)); 4649 Val = Builder.CreateLoad(LdPtr); 4650 4651 Value *Arg0 = Builder.CreateExtractValue(Val, 0); 4652 Value *Arg1 = Builder.CreateExtractValue(Val, 1); 4653 Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy); 4654 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd"); 4655 } 4656 4657 if (BuiltinID == ARM::BI__builtin_arm_strex || 4658 BuiltinID == ARM::BI__builtin_arm_stlex) { 4659 Value *StoreVal = EmitScalarExpr(E->getArg(0)); 4660 Value *StoreAddr = EmitScalarExpr(E->getArg(1)); 4661 4662 QualType Ty = E->getArg(0)->getType(); 4663 llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(), 4664 getContext().getTypeSize(Ty)); 4665 StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo()); 4666 4667 if (StoreVal->getType()->isPointerTy()) 4668 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty); 4669 else { 4670 llvm::Type *IntTy = llvm::IntegerType::get( 4671 getLLVMContext(), 4672 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType())); 4673 StoreVal = Builder.CreateBitCast(StoreVal, IntTy); 4674 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty); 4675 } 4676 4677 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex 4678 ? Intrinsic::arm_stlex 4679 : Intrinsic::arm_strex, 4680 StoreAddr->getType()); 4681 return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex"); 4682 } 4683 4684 switch (BuiltinID) { 4685 case ARM::BI__iso_volatile_load8: 4686 case ARM::BI__iso_volatile_load16: 4687 case ARM::BI__iso_volatile_load32: 4688 case ARM::BI__iso_volatile_load64: { 4689 Value *Ptr = EmitScalarExpr(E->getArg(0)); 4690 QualType ElTy = E->getArg(0)->getType()->getPointeeType(); 4691 CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy); 4692 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), 4693 LoadSize.getQuantity() * 8); 4694 Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); 4695 llvm::LoadInst *Load = 4696 Builder.CreateAlignedLoad(Ptr, LoadSize); 4697 Load->setVolatile(true); 4698 return Load; 4699 } 4700 case ARM::BI__iso_volatile_store8: 4701 case ARM::BI__iso_volatile_store16: 4702 case ARM::BI__iso_volatile_store32: 4703 case ARM::BI__iso_volatile_store64: { 4704 Value *Ptr = EmitScalarExpr(E->getArg(0)); 4705 Value *Value = EmitScalarExpr(E->getArg(1)); 4706 QualType ElTy = E->getArg(0)->getType()->getPointeeType(); 4707 CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy); 4708 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), 4709 StoreSize.getQuantity() * 8); 4710 Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); 4711 llvm::StoreInst *Store = 4712 Builder.CreateAlignedStore(Value, Ptr, 4713 StoreSize); 4714 Store->setVolatile(true); 4715 return Store; 4716 } 4717 } 4718 4719 if (BuiltinID == ARM::BI__builtin_arm_clrex) { 4720 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex); 4721 return Builder.CreateCall(F); 4722 } 4723 4724 // CRC32 4725 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; 4726 switch (BuiltinID) { 4727 case ARM::BI__builtin_arm_crc32b: 4728 CRCIntrinsicID = Intrinsic::arm_crc32b; break; 4729 case ARM::BI__builtin_arm_crc32cb: 4730 CRCIntrinsicID = Intrinsic::arm_crc32cb; break; 4731 case ARM::BI__builtin_arm_crc32h: 4732 CRCIntrinsicID = Intrinsic::arm_crc32h; break; 4733 case ARM::BI__builtin_arm_crc32ch: 4734 CRCIntrinsicID = Intrinsic::arm_crc32ch; break; 4735 case ARM::BI__builtin_arm_crc32w: 4736 case ARM::BI__builtin_arm_crc32d: 4737 CRCIntrinsicID = Intrinsic::arm_crc32w; break; 4738 case ARM::BI__builtin_arm_crc32cw: 4739 case ARM::BI__builtin_arm_crc32cd: 4740 CRCIntrinsicID = Intrinsic::arm_crc32cw; break; 4741 } 4742 4743 if (CRCIntrinsicID != Intrinsic::not_intrinsic) { 4744 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 4745 Value *Arg1 = EmitScalarExpr(E->getArg(1)); 4746 4747 // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w 4748 // intrinsics, hence we need different codegen for these cases. 4749 if (BuiltinID == ARM::BI__builtin_arm_crc32d || 4750 BuiltinID == ARM::BI__builtin_arm_crc32cd) { 4751 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32); 4752 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty); 4753 Value *Arg1b = Builder.CreateLShr(Arg1, C1); 4754 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty); 4755 4756 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 4757 Value *Res = Builder.CreateCall(F, {Arg0, Arg1a}); 4758 return Builder.CreateCall(F, {Res, Arg1b}); 4759 } else { 4760 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty); 4761 4762 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 4763 return Builder.CreateCall(F, {Arg0, Arg1}); 4764 } 4765 } 4766 4767 if (BuiltinID == ARM::BI__builtin_arm_rsr || 4768 BuiltinID == ARM::BI__builtin_arm_rsr64 || 4769 BuiltinID == ARM::BI__builtin_arm_rsrp || 4770 BuiltinID == ARM::BI__builtin_arm_wsr || 4771 BuiltinID == ARM::BI__builtin_arm_wsr64 || 4772 BuiltinID == ARM::BI__builtin_arm_wsrp) { 4773 4774 bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr || 4775 BuiltinID == ARM::BI__builtin_arm_rsr64 || 4776 BuiltinID == ARM::BI__builtin_arm_rsrp; 4777 4778 bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp || 4779 BuiltinID == ARM::BI__builtin_arm_wsrp; 4780 4781 bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 || 4782 BuiltinID == ARM::BI__builtin_arm_wsr64; 4783 4784 llvm::Type *ValueType; 4785 llvm::Type *RegisterType; 4786 if (IsPointerBuiltin) { 4787 ValueType = VoidPtrTy; 4788 RegisterType = Int32Ty; 4789 } else if (Is64Bit) { 4790 ValueType = RegisterType = Int64Ty; 4791 } else { 4792 ValueType = RegisterType = Int32Ty; 4793 } 4794 4795 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead); 4796 } 4797 4798 // Find out if any arguments are required to be integer constant 4799 // expressions. 4800 unsigned ICEArguments = 0; 4801 ASTContext::GetBuiltinTypeError Error; 4802 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 4803 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 4804 4805 auto getAlignmentValue32 = [&](Address addr) -> Value* { 4806 return Builder.getInt32(addr.getAlignment().getQuantity()); 4807 }; 4808 4809 Address PtrOp0 = Address::invalid(); 4810 Address PtrOp1 = Address::invalid(); 4811 SmallVector<Value*, 4> Ops; 4812 bool HasExtraArg = HasExtraNeonArgument(BuiltinID); 4813 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0); 4814 for (unsigned i = 0, e = NumArgs; i != e; i++) { 4815 if (i == 0) { 4816 switch (BuiltinID) { 4817 case NEON::BI__builtin_neon_vld1_v: 4818 case NEON::BI__builtin_neon_vld1q_v: 4819 case NEON::BI__builtin_neon_vld1q_lane_v: 4820 case NEON::BI__builtin_neon_vld1_lane_v: 4821 case NEON::BI__builtin_neon_vld1_dup_v: 4822 case NEON::BI__builtin_neon_vld1q_dup_v: 4823 case NEON::BI__builtin_neon_vst1_v: 4824 case NEON::BI__builtin_neon_vst1q_v: 4825 case NEON::BI__builtin_neon_vst1q_lane_v: 4826 case NEON::BI__builtin_neon_vst1_lane_v: 4827 case NEON::BI__builtin_neon_vst2_v: 4828 case NEON::BI__builtin_neon_vst2q_v: 4829 case NEON::BI__builtin_neon_vst2_lane_v: 4830 case NEON::BI__builtin_neon_vst2q_lane_v: 4831 case NEON::BI__builtin_neon_vst3_v: 4832 case NEON::BI__builtin_neon_vst3q_v: 4833 case NEON::BI__builtin_neon_vst3_lane_v: 4834 case NEON::BI__builtin_neon_vst3q_lane_v: 4835 case NEON::BI__builtin_neon_vst4_v: 4836 case NEON::BI__builtin_neon_vst4q_v: 4837 case NEON::BI__builtin_neon_vst4_lane_v: 4838 case NEON::BI__builtin_neon_vst4q_lane_v: 4839 // Get the alignment for the argument in addition to the value; 4840 // we'll use it later. 4841 PtrOp0 = EmitPointerWithAlignment(E->getArg(0)); 4842 Ops.push_back(PtrOp0.getPointer()); 4843 continue; 4844 } 4845 } 4846 if (i == 1) { 4847 switch (BuiltinID) { 4848 case NEON::BI__builtin_neon_vld2_v: 4849 case NEON::BI__builtin_neon_vld2q_v: 4850 case NEON::BI__builtin_neon_vld3_v: 4851 case NEON::BI__builtin_neon_vld3q_v: 4852 case NEON::BI__builtin_neon_vld4_v: 4853 case NEON::BI__builtin_neon_vld4q_v: 4854 case NEON::BI__builtin_neon_vld2_lane_v: 4855 case NEON::BI__builtin_neon_vld2q_lane_v: 4856 case NEON::BI__builtin_neon_vld3_lane_v: 4857 case NEON::BI__builtin_neon_vld3q_lane_v: 4858 case NEON::BI__builtin_neon_vld4_lane_v: 4859 case NEON::BI__builtin_neon_vld4q_lane_v: 4860 case NEON::BI__builtin_neon_vld2_dup_v: 4861 case NEON::BI__builtin_neon_vld3_dup_v: 4862 case NEON::BI__builtin_neon_vld4_dup_v: 4863 // Get the alignment for the argument in addition to the value; 4864 // we'll use it later. 4865 PtrOp1 = EmitPointerWithAlignment(E->getArg(1)); 4866 Ops.push_back(PtrOp1.getPointer()); 4867 continue; 4868 } 4869 } 4870 4871 if ((ICEArguments & (1 << i)) == 0) { 4872 Ops.push_back(EmitScalarExpr(E->getArg(i))); 4873 } else { 4874 // If this is required to be a constant, constant fold it so that we know 4875 // that the generated intrinsic gets a ConstantInt. 4876 llvm::APSInt Result; 4877 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); 4878 assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst; 4879 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); 4880 } 4881 } 4882 4883 switch (BuiltinID) { 4884 default: break; 4885 4886 case NEON::BI__builtin_neon_vget_lane_i8: 4887 case NEON::BI__builtin_neon_vget_lane_i16: 4888 case NEON::BI__builtin_neon_vget_lane_i32: 4889 case NEON::BI__builtin_neon_vget_lane_i64: 4890 case NEON::BI__builtin_neon_vget_lane_f32: 4891 case NEON::BI__builtin_neon_vgetq_lane_i8: 4892 case NEON::BI__builtin_neon_vgetq_lane_i16: 4893 case NEON::BI__builtin_neon_vgetq_lane_i32: 4894 case NEON::BI__builtin_neon_vgetq_lane_i64: 4895 case NEON::BI__builtin_neon_vgetq_lane_f32: 4896 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane"); 4897 4898 case NEON::BI__builtin_neon_vset_lane_i8: 4899 case NEON::BI__builtin_neon_vset_lane_i16: 4900 case NEON::BI__builtin_neon_vset_lane_i32: 4901 case NEON::BI__builtin_neon_vset_lane_i64: 4902 case NEON::BI__builtin_neon_vset_lane_f32: 4903 case NEON::BI__builtin_neon_vsetq_lane_i8: 4904 case NEON::BI__builtin_neon_vsetq_lane_i16: 4905 case NEON::BI__builtin_neon_vsetq_lane_i32: 4906 case NEON::BI__builtin_neon_vsetq_lane_i64: 4907 case NEON::BI__builtin_neon_vsetq_lane_f32: 4908 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 4909 4910 case NEON::BI__builtin_neon_vsha1h_u32: 4911 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops, 4912 "vsha1h"); 4913 case NEON::BI__builtin_neon_vsha1cq_u32: 4914 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops, 4915 "vsha1h"); 4916 case NEON::BI__builtin_neon_vsha1pq_u32: 4917 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops, 4918 "vsha1h"); 4919 case NEON::BI__builtin_neon_vsha1mq_u32: 4920 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops, 4921 "vsha1h"); 4922 4923 // The ARM _MoveToCoprocessor builtins put the input register value as 4924 // the first argument, but the LLVM intrinsic expects it as the third one. 4925 case ARM::BI_MoveToCoprocessor: 4926 case ARM::BI_MoveToCoprocessor2: { 4927 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ? 4928 Intrinsic::arm_mcr : Intrinsic::arm_mcr2); 4929 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0], 4930 Ops[3], Ops[4], Ops[5]}); 4931 } 4932 case ARM::BI_BitScanForward: 4933 case ARM::BI_BitScanForward64: 4934 return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E); 4935 case ARM::BI_BitScanReverse: 4936 case ARM::BI_BitScanReverse64: 4937 return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E); 4938 4939 case ARM::BI_InterlockedAnd64: 4940 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E); 4941 case ARM::BI_InterlockedExchange64: 4942 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E); 4943 case ARM::BI_InterlockedExchangeAdd64: 4944 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E); 4945 case ARM::BI_InterlockedExchangeSub64: 4946 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E); 4947 case ARM::BI_InterlockedOr64: 4948 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E); 4949 case ARM::BI_InterlockedXor64: 4950 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E); 4951 case ARM::BI_InterlockedDecrement64: 4952 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E); 4953 case ARM::BI_InterlockedIncrement64: 4954 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E); 4955 } 4956 4957 // Get the last argument, which specifies the vector type. 4958 assert(HasExtraArg); 4959 llvm::APSInt Result; 4960 const Expr *Arg = E->getArg(E->getNumArgs()-1); 4961 if (!Arg->isIntegerConstantExpr(Result, getContext())) 4962 return nullptr; 4963 4964 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f || 4965 BuiltinID == ARM::BI__builtin_arm_vcvtr_d) { 4966 // Determine the overloaded type of this builtin. 4967 llvm::Type *Ty; 4968 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f) 4969 Ty = FloatTy; 4970 else 4971 Ty = DoubleTy; 4972 4973 // Determine whether this is an unsigned conversion or not. 4974 bool usgn = Result.getZExtValue() == 1; 4975 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr; 4976 4977 // Call the appropriate intrinsic. 4978 Function *F = CGM.getIntrinsic(Int, Ty); 4979 return Builder.CreateCall(F, Ops, "vcvtr"); 4980 } 4981 4982 // Determine the type of this overloaded NEON intrinsic. 4983 NeonTypeFlags Type(Result.getZExtValue()); 4984 bool usgn = Type.isUnsigned(); 4985 bool rightShift = false; 4986 4987 llvm::VectorType *VTy = GetNeonType(this, Type); 4988 llvm::Type *Ty = VTy; 4989 if (!Ty) 4990 return nullptr; 4991 4992 // Many NEON builtins have identical semantics and uses in ARM and 4993 // AArch64. Emit these in a single function. 4994 auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap); 4995 const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap( 4996 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted); 4997 if (Builtin) 4998 return EmitCommonNeonBuiltinExpr( 4999 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic, 5000 Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1); 5001 5002 unsigned Int; 5003 switch (BuiltinID) { 5004 default: return nullptr; 5005 case NEON::BI__builtin_neon_vld1q_lane_v: 5006 // Handle 64-bit integer elements as a special case. Use shuffles of 5007 // one-element vectors to avoid poor code for i64 in the backend. 5008 if (VTy->getElementType()->isIntegerTy(64)) { 5009 // Extract the other lane. 5010 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5011 uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue(); 5012 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane)); 5013 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); 5014 // Load the value as a one-element vector. 5015 Ty = llvm::VectorType::get(VTy->getElementType(), 1); 5016 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 5017 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys); 5018 Value *Align = getAlignmentValue32(PtrOp0); 5019 Value *Ld = Builder.CreateCall(F, {Ops[0], Align}); 5020 // Combine them. 5021 uint32_t Indices[] = {1 - Lane, Lane}; 5022 SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices); 5023 return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane"); 5024 } 5025 // fall through 5026 case NEON::BI__builtin_neon_vld1_lane_v: { 5027 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5028 PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType()); 5029 Value *Ld = Builder.CreateLoad(PtrOp0); 5030 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane"); 5031 } 5032 case NEON::BI__builtin_neon_vld2_dup_v: 5033 case NEON::BI__builtin_neon_vld3_dup_v: 5034 case NEON::BI__builtin_neon_vld4_dup_v: { 5035 // Handle 64-bit elements as a special-case. There is no "dup" needed. 5036 if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) { 5037 switch (BuiltinID) { 5038 case NEON::BI__builtin_neon_vld2_dup_v: 5039 Int = Intrinsic::arm_neon_vld2; 5040 break; 5041 case NEON::BI__builtin_neon_vld3_dup_v: 5042 Int = Intrinsic::arm_neon_vld3; 5043 break; 5044 case NEON::BI__builtin_neon_vld4_dup_v: 5045 Int = Intrinsic::arm_neon_vld4; 5046 break; 5047 default: llvm_unreachable("unknown vld_dup intrinsic?"); 5048 } 5049 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 5050 Function *F = CGM.getIntrinsic(Int, Tys); 5051 llvm::Value *Align = getAlignmentValue32(PtrOp1); 5052 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup"); 5053 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 5054 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5055 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 5056 } 5057 switch (BuiltinID) { 5058 case NEON::BI__builtin_neon_vld2_dup_v: 5059 Int = Intrinsic::arm_neon_vld2lane; 5060 break; 5061 case NEON::BI__builtin_neon_vld3_dup_v: 5062 Int = Intrinsic::arm_neon_vld3lane; 5063 break; 5064 case NEON::BI__builtin_neon_vld4_dup_v: 5065 Int = Intrinsic::arm_neon_vld4lane; 5066 break; 5067 default: llvm_unreachable("unknown vld_dup intrinsic?"); 5068 } 5069 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 5070 Function *F = CGM.getIntrinsic(Int, Tys); 5071 llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType()); 5072 5073 SmallVector<Value*, 6> Args; 5074 Args.push_back(Ops[1]); 5075 Args.append(STy->getNumElements(), UndefValue::get(Ty)); 5076 5077 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); 5078 Args.push_back(CI); 5079 Args.push_back(getAlignmentValue32(PtrOp1)); 5080 5081 Ops[1] = Builder.CreateCall(F, Args, "vld_dup"); 5082 // splat lane 0 to all elts in each vector of the result. 5083 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 5084 Value *Val = Builder.CreateExtractValue(Ops[1], i); 5085 Value *Elt = Builder.CreateBitCast(Val, Ty); 5086 Elt = EmitNeonSplat(Elt, CI); 5087 Elt = Builder.CreateBitCast(Elt, Val->getType()); 5088 Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i); 5089 } 5090 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 5091 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5092 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 5093 } 5094 case NEON::BI__builtin_neon_vqrshrn_n_v: 5095 Int = 5096 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns; 5097 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n", 5098 1, true); 5099 case NEON::BI__builtin_neon_vqrshrun_n_v: 5100 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty), 5101 Ops, "vqrshrun_n", 1, true); 5102 case NEON::BI__builtin_neon_vqshrn_n_v: 5103 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns; 5104 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n", 5105 1, true); 5106 case NEON::BI__builtin_neon_vqshrun_n_v: 5107 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty), 5108 Ops, "vqshrun_n", 1, true); 5109 case NEON::BI__builtin_neon_vrecpe_v: 5110 case NEON::BI__builtin_neon_vrecpeq_v: 5111 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty), 5112 Ops, "vrecpe"); 5113 case NEON::BI__builtin_neon_vrshrn_n_v: 5114 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty), 5115 Ops, "vrshrn_n", 1, true); 5116 case NEON::BI__builtin_neon_vrsra_n_v: 5117 case NEON::BI__builtin_neon_vrsraq_n_v: 5118 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5119 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5120 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true); 5121 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts; 5122 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]}); 5123 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n"); 5124 case NEON::BI__builtin_neon_vsri_n_v: 5125 case NEON::BI__builtin_neon_vsriq_n_v: 5126 rightShift = true; 5127 LLVM_FALLTHROUGH; 5128 case NEON::BI__builtin_neon_vsli_n_v: 5129 case NEON::BI__builtin_neon_vsliq_n_v: 5130 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift); 5131 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty), 5132 Ops, "vsli_n"); 5133 case NEON::BI__builtin_neon_vsra_n_v: 5134 case NEON::BI__builtin_neon_vsraq_n_v: 5135 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5136 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n"); 5137 return Builder.CreateAdd(Ops[0], Ops[1]); 5138 case NEON::BI__builtin_neon_vst1q_lane_v: 5139 // Handle 64-bit integer elements as a special case. Use a shuffle to get 5140 // a one-element vector and avoid poor code for i64 in the backend. 5141 if (VTy->getElementType()->isIntegerTy(64)) { 5142 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5143 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2])); 5144 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); 5145 Ops[2] = getAlignmentValue32(PtrOp0); 5146 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()}; 5147 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, 5148 Tys), Ops); 5149 } 5150 // fall through 5151 case NEON::BI__builtin_neon_vst1_lane_v: { 5152 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5153 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); 5154 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 5155 auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty)); 5156 return St; 5157 } 5158 case NEON::BI__builtin_neon_vtbl1_v: 5159 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1), 5160 Ops, "vtbl1"); 5161 case NEON::BI__builtin_neon_vtbl2_v: 5162 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2), 5163 Ops, "vtbl2"); 5164 case NEON::BI__builtin_neon_vtbl3_v: 5165 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3), 5166 Ops, "vtbl3"); 5167 case NEON::BI__builtin_neon_vtbl4_v: 5168 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4), 5169 Ops, "vtbl4"); 5170 case NEON::BI__builtin_neon_vtbx1_v: 5171 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1), 5172 Ops, "vtbx1"); 5173 case NEON::BI__builtin_neon_vtbx2_v: 5174 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2), 5175 Ops, "vtbx2"); 5176 case NEON::BI__builtin_neon_vtbx3_v: 5177 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3), 5178 Ops, "vtbx3"); 5179 case NEON::BI__builtin_neon_vtbx4_v: 5180 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4), 5181 Ops, "vtbx4"); 5182 } 5183 } 5184 5185 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, 5186 const CallExpr *E, 5187 SmallVectorImpl<Value *> &Ops) { 5188 unsigned int Int = 0; 5189 const char *s = nullptr; 5190 5191 switch (BuiltinID) { 5192 default: 5193 return nullptr; 5194 case NEON::BI__builtin_neon_vtbl1_v: 5195 case NEON::BI__builtin_neon_vqtbl1_v: 5196 case NEON::BI__builtin_neon_vqtbl1q_v: 5197 case NEON::BI__builtin_neon_vtbl2_v: 5198 case NEON::BI__builtin_neon_vqtbl2_v: 5199 case NEON::BI__builtin_neon_vqtbl2q_v: 5200 case NEON::BI__builtin_neon_vtbl3_v: 5201 case NEON::BI__builtin_neon_vqtbl3_v: 5202 case NEON::BI__builtin_neon_vqtbl3q_v: 5203 case NEON::BI__builtin_neon_vtbl4_v: 5204 case NEON::BI__builtin_neon_vqtbl4_v: 5205 case NEON::BI__builtin_neon_vqtbl4q_v: 5206 break; 5207 case NEON::BI__builtin_neon_vtbx1_v: 5208 case NEON::BI__builtin_neon_vqtbx1_v: 5209 case NEON::BI__builtin_neon_vqtbx1q_v: 5210 case NEON::BI__builtin_neon_vtbx2_v: 5211 case NEON::BI__builtin_neon_vqtbx2_v: 5212 case NEON::BI__builtin_neon_vqtbx2q_v: 5213 case NEON::BI__builtin_neon_vtbx3_v: 5214 case NEON::BI__builtin_neon_vqtbx3_v: 5215 case NEON::BI__builtin_neon_vqtbx3q_v: 5216 case NEON::BI__builtin_neon_vtbx4_v: 5217 case NEON::BI__builtin_neon_vqtbx4_v: 5218 case NEON::BI__builtin_neon_vqtbx4q_v: 5219 break; 5220 } 5221 5222 assert(E->getNumArgs() >= 3); 5223 5224 // Get the last argument, which specifies the vector type. 5225 llvm::APSInt Result; 5226 const Expr *Arg = E->getArg(E->getNumArgs() - 1); 5227 if (!Arg->isIntegerConstantExpr(Result, CGF.getContext())) 5228 return nullptr; 5229 5230 // Determine the type of this overloaded NEON intrinsic. 5231 NeonTypeFlags Type(Result.getZExtValue()); 5232 llvm::VectorType *Ty = GetNeonType(&CGF, Type); 5233 if (!Ty) 5234 return nullptr; 5235 5236 CodeGen::CGBuilderTy &Builder = CGF.Builder; 5237 5238 // AArch64 scalar builtins are not overloaded, they do not have an extra 5239 // argument that specifies the vector type, need to handle each case. 5240 switch (BuiltinID) { 5241 case NEON::BI__builtin_neon_vtbl1_v: { 5242 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr, 5243 Ops[1], Ty, Intrinsic::aarch64_neon_tbl1, 5244 "vtbl1"); 5245 } 5246 case NEON::BI__builtin_neon_vtbl2_v: { 5247 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr, 5248 Ops[2], Ty, Intrinsic::aarch64_neon_tbl1, 5249 "vtbl1"); 5250 } 5251 case NEON::BI__builtin_neon_vtbl3_v: { 5252 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr, 5253 Ops[3], Ty, Intrinsic::aarch64_neon_tbl2, 5254 "vtbl2"); 5255 } 5256 case NEON::BI__builtin_neon_vtbl4_v: { 5257 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr, 5258 Ops[4], Ty, Intrinsic::aarch64_neon_tbl2, 5259 "vtbl2"); 5260 } 5261 case NEON::BI__builtin_neon_vtbx1_v: { 5262 Value *TblRes = 5263 packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2], 5264 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1"); 5265 5266 llvm::Constant *EightV = ConstantInt::get(Ty, 8); 5267 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV); 5268 CmpRes = Builder.CreateSExt(CmpRes, Ty); 5269 5270 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]); 5271 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes); 5272 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx"); 5273 } 5274 case NEON::BI__builtin_neon_vtbx2_v: { 5275 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0], 5276 Ops[3], Ty, Intrinsic::aarch64_neon_tbx1, 5277 "vtbx1"); 5278 } 5279 case NEON::BI__builtin_neon_vtbx3_v: { 5280 Value *TblRes = 5281 packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4], 5282 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2"); 5283 5284 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24); 5285 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4], 5286 TwentyFourV); 5287 CmpRes = Builder.CreateSExt(CmpRes, Ty); 5288 5289 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]); 5290 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes); 5291 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx"); 5292 } 5293 case NEON::BI__builtin_neon_vtbx4_v: { 5294 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0], 5295 Ops[5], Ty, Intrinsic::aarch64_neon_tbx2, 5296 "vtbx2"); 5297 } 5298 case NEON::BI__builtin_neon_vqtbl1_v: 5299 case NEON::BI__builtin_neon_vqtbl1q_v: 5300 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break; 5301 case NEON::BI__builtin_neon_vqtbl2_v: 5302 case NEON::BI__builtin_neon_vqtbl2q_v: { 5303 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break; 5304 case NEON::BI__builtin_neon_vqtbl3_v: 5305 case NEON::BI__builtin_neon_vqtbl3q_v: 5306 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break; 5307 case NEON::BI__builtin_neon_vqtbl4_v: 5308 case NEON::BI__builtin_neon_vqtbl4q_v: 5309 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break; 5310 case NEON::BI__builtin_neon_vqtbx1_v: 5311 case NEON::BI__builtin_neon_vqtbx1q_v: 5312 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break; 5313 case NEON::BI__builtin_neon_vqtbx2_v: 5314 case NEON::BI__builtin_neon_vqtbx2q_v: 5315 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break; 5316 case NEON::BI__builtin_neon_vqtbx3_v: 5317 case NEON::BI__builtin_neon_vqtbx3q_v: 5318 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break; 5319 case NEON::BI__builtin_neon_vqtbx4_v: 5320 case NEON::BI__builtin_neon_vqtbx4q_v: 5321 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break; 5322 } 5323 } 5324 5325 if (!Int) 5326 return nullptr; 5327 5328 Function *F = CGF.CGM.getIntrinsic(Int, Ty); 5329 return CGF.EmitNeonCall(F, Ops, s); 5330 } 5331 5332 Value *CodeGenFunction::vectorWrapScalar16(Value *Op) { 5333 llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4); 5334 Op = Builder.CreateBitCast(Op, Int16Ty); 5335 Value *V = UndefValue::get(VTy); 5336 llvm::Constant *CI = ConstantInt::get(SizeTy, 0); 5337 Op = Builder.CreateInsertElement(V, Op, CI); 5338 return Op; 5339 } 5340 5341 Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, 5342 const CallExpr *E) { 5343 unsigned HintID = static_cast<unsigned>(-1); 5344 switch (BuiltinID) { 5345 default: break; 5346 case AArch64::BI__builtin_arm_nop: 5347 HintID = 0; 5348 break; 5349 case AArch64::BI__builtin_arm_yield: 5350 HintID = 1; 5351 break; 5352 case AArch64::BI__builtin_arm_wfe: 5353 HintID = 2; 5354 break; 5355 case AArch64::BI__builtin_arm_wfi: 5356 HintID = 3; 5357 break; 5358 case AArch64::BI__builtin_arm_sev: 5359 HintID = 4; 5360 break; 5361 case AArch64::BI__builtin_arm_sevl: 5362 HintID = 5; 5363 break; 5364 } 5365 5366 if (HintID != static_cast<unsigned>(-1)) { 5367 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint); 5368 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID)); 5369 } 5370 5371 if (BuiltinID == AArch64::BI__builtin_arm_prefetch) { 5372 Value *Address = EmitScalarExpr(E->getArg(0)); 5373 Value *RW = EmitScalarExpr(E->getArg(1)); 5374 Value *CacheLevel = EmitScalarExpr(E->getArg(2)); 5375 Value *RetentionPolicy = EmitScalarExpr(E->getArg(3)); 5376 Value *IsData = EmitScalarExpr(E->getArg(4)); 5377 5378 Value *Locality = nullptr; 5379 if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) { 5380 // Temporal fetch, needs to convert cache level to locality. 5381 Locality = llvm::ConstantInt::get(Int32Ty, 5382 -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3); 5383 } else { 5384 // Streaming fetch. 5385 Locality = llvm::ConstantInt::get(Int32Ty, 0); 5386 } 5387 5388 // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify 5389 // PLDL3STRM or PLDL2STRM. 5390 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 5391 return Builder.CreateCall(F, {Address, RW, Locality, IsData}); 5392 } 5393 5394 if (BuiltinID == AArch64::BI__builtin_arm_rbit) { 5395 assert((getContext().getTypeSize(E->getType()) == 32) && 5396 "rbit of unusual size!"); 5397 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 5398 return Builder.CreateCall( 5399 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); 5400 } 5401 if (BuiltinID == AArch64::BI__builtin_arm_rbit64) { 5402 assert((getContext().getTypeSize(E->getType()) == 64) && 5403 "rbit of unusual size!"); 5404 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 5405 return Builder.CreateCall( 5406 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); 5407 } 5408 5409 if (BuiltinID == AArch64::BI__clear_cache) { 5410 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); 5411 const FunctionDecl *FD = E->getDirectCallee(); 5412 Value *Ops[2]; 5413 for (unsigned i = 0; i < 2; i++) 5414 Ops[i] = EmitScalarExpr(E->getArg(i)); 5415 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); 5416 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); 5417 StringRef Name = FD->getName(); 5418 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); 5419 } 5420 5421 if ((BuiltinID == AArch64::BI__builtin_arm_ldrex || 5422 BuiltinID == AArch64::BI__builtin_arm_ldaex) && 5423 getContext().getTypeSize(E->getType()) == 128) { 5424 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex 5425 ? Intrinsic::aarch64_ldaxp 5426 : Intrinsic::aarch64_ldxp); 5427 5428 Value *LdPtr = EmitScalarExpr(E->getArg(0)); 5429 Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy), 5430 "ldxp"); 5431 5432 Value *Val0 = Builder.CreateExtractValue(Val, 1); 5433 Value *Val1 = Builder.CreateExtractValue(Val, 0); 5434 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); 5435 Val0 = Builder.CreateZExt(Val0, Int128Ty); 5436 Val1 = Builder.CreateZExt(Val1, Int128Ty); 5437 5438 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64); 5439 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */); 5440 Val = Builder.CreateOr(Val, Val1); 5441 return Builder.CreateBitCast(Val, ConvertType(E->getType())); 5442 } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex || 5443 BuiltinID == AArch64::BI__builtin_arm_ldaex) { 5444 Value *LoadAddr = EmitScalarExpr(E->getArg(0)); 5445 5446 QualType Ty = E->getType(); 5447 llvm::Type *RealResTy = ConvertType(Ty); 5448 llvm::Type *PtrTy = llvm::IntegerType::get( 5449 getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo(); 5450 LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy); 5451 5452 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex 5453 ? Intrinsic::aarch64_ldaxr 5454 : Intrinsic::aarch64_ldxr, 5455 PtrTy); 5456 Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr"); 5457 5458 if (RealResTy->isPointerTy()) 5459 return Builder.CreateIntToPtr(Val, RealResTy); 5460 5461 llvm::Type *IntResTy = llvm::IntegerType::get( 5462 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy)); 5463 Val = Builder.CreateTruncOrBitCast(Val, IntResTy); 5464 return Builder.CreateBitCast(Val, RealResTy); 5465 } 5466 5467 if ((BuiltinID == AArch64::BI__builtin_arm_strex || 5468 BuiltinID == AArch64::BI__builtin_arm_stlex) && 5469 getContext().getTypeSize(E->getArg(0)->getType()) == 128) { 5470 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex 5471 ? Intrinsic::aarch64_stlxp 5472 : Intrinsic::aarch64_stxp); 5473 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty); 5474 5475 Address Tmp = CreateMemTemp(E->getArg(0)->getType()); 5476 EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true); 5477 5478 Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy)); 5479 llvm::Value *Val = Builder.CreateLoad(Tmp); 5480 5481 Value *Arg0 = Builder.CreateExtractValue(Val, 0); 5482 Value *Arg1 = Builder.CreateExtractValue(Val, 1); 5483 Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), 5484 Int8PtrTy); 5485 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp"); 5486 } 5487 5488 if (BuiltinID == AArch64::BI__builtin_arm_strex || 5489 BuiltinID == AArch64::BI__builtin_arm_stlex) { 5490 Value *StoreVal = EmitScalarExpr(E->getArg(0)); 5491 Value *StoreAddr = EmitScalarExpr(E->getArg(1)); 5492 5493 QualType Ty = E->getArg(0)->getType(); 5494 llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(), 5495 getContext().getTypeSize(Ty)); 5496 StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo()); 5497 5498 if (StoreVal->getType()->isPointerTy()) 5499 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty); 5500 else { 5501 llvm::Type *IntTy = llvm::IntegerType::get( 5502 getLLVMContext(), 5503 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType())); 5504 StoreVal = Builder.CreateBitCast(StoreVal, IntTy); 5505 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty); 5506 } 5507 5508 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex 5509 ? Intrinsic::aarch64_stlxr 5510 : Intrinsic::aarch64_stxr, 5511 StoreAddr->getType()); 5512 return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr"); 5513 } 5514 5515 if (BuiltinID == AArch64::BI__builtin_arm_clrex) { 5516 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex); 5517 return Builder.CreateCall(F); 5518 } 5519 5520 // CRC32 5521 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; 5522 switch (BuiltinID) { 5523 case AArch64::BI__builtin_arm_crc32b: 5524 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break; 5525 case AArch64::BI__builtin_arm_crc32cb: 5526 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break; 5527 case AArch64::BI__builtin_arm_crc32h: 5528 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break; 5529 case AArch64::BI__builtin_arm_crc32ch: 5530 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break; 5531 case AArch64::BI__builtin_arm_crc32w: 5532 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break; 5533 case AArch64::BI__builtin_arm_crc32cw: 5534 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break; 5535 case AArch64::BI__builtin_arm_crc32d: 5536 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break; 5537 case AArch64::BI__builtin_arm_crc32cd: 5538 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break; 5539 } 5540 5541 if (CRCIntrinsicID != Intrinsic::not_intrinsic) { 5542 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 5543 Value *Arg1 = EmitScalarExpr(E->getArg(1)); 5544 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 5545 5546 llvm::Type *DataTy = F->getFunctionType()->getParamType(1); 5547 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy); 5548 5549 return Builder.CreateCall(F, {Arg0, Arg1}); 5550 } 5551 5552 if (BuiltinID == AArch64::BI__builtin_arm_rsr || 5553 BuiltinID == AArch64::BI__builtin_arm_rsr64 || 5554 BuiltinID == AArch64::BI__builtin_arm_rsrp || 5555 BuiltinID == AArch64::BI__builtin_arm_wsr || 5556 BuiltinID == AArch64::BI__builtin_arm_wsr64 || 5557 BuiltinID == AArch64::BI__builtin_arm_wsrp) { 5558 5559 bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr || 5560 BuiltinID == AArch64::BI__builtin_arm_rsr64 || 5561 BuiltinID == AArch64::BI__builtin_arm_rsrp; 5562 5563 bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp || 5564 BuiltinID == AArch64::BI__builtin_arm_wsrp; 5565 5566 bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr && 5567 BuiltinID != AArch64::BI__builtin_arm_wsr; 5568 5569 llvm::Type *ValueType; 5570 llvm::Type *RegisterType = Int64Ty; 5571 if (IsPointerBuiltin) { 5572 ValueType = VoidPtrTy; 5573 } else if (Is64Bit) { 5574 ValueType = Int64Ty; 5575 } else { 5576 ValueType = Int32Ty; 5577 } 5578 5579 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead); 5580 } 5581 5582 // Find out if any arguments are required to be integer constant 5583 // expressions. 5584 unsigned ICEArguments = 0; 5585 ASTContext::GetBuiltinTypeError Error; 5586 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 5587 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 5588 5589 llvm::SmallVector<Value*, 4> Ops; 5590 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) { 5591 if ((ICEArguments & (1 << i)) == 0) { 5592 Ops.push_back(EmitScalarExpr(E->getArg(i))); 5593 } else { 5594 // If this is required to be a constant, constant fold it so that we know 5595 // that the generated intrinsic gets a ConstantInt. 5596 llvm::APSInt Result; 5597 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); 5598 assert(IsConst && "Constant arg isn't actually constant?"); 5599 (void)IsConst; 5600 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); 5601 } 5602 } 5603 5604 auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap); 5605 const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap( 5606 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted); 5607 5608 if (Builtin) { 5609 Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1))); 5610 Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E); 5611 assert(Result && "SISD intrinsic should have been handled"); 5612 return Result; 5613 } 5614 5615 llvm::APSInt Result; 5616 const Expr *Arg = E->getArg(E->getNumArgs()-1); 5617 NeonTypeFlags Type(0); 5618 if (Arg->isIntegerConstantExpr(Result, getContext())) 5619 // Determine the type of this overloaded NEON intrinsic. 5620 Type = NeonTypeFlags(Result.getZExtValue()); 5621 5622 bool usgn = Type.isUnsigned(); 5623 bool quad = Type.isQuad(); 5624 5625 // Handle non-overloaded intrinsics first. 5626 switch (BuiltinID) { 5627 default: break; 5628 case NEON::BI__builtin_neon_vldrq_p128: { 5629 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128); 5630 llvm::Type *Int128PTy = llvm::PointerType::get(Int128Ty, 0); 5631 Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy); 5632 return Builder.CreateAlignedLoad(Int128Ty, Ptr, 5633 CharUnits::fromQuantity(16)); 5634 } 5635 case NEON::BI__builtin_neon_vstrq_p128: { 5636 llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128); 5637 Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy); 5638 return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr); 5639 } 5640 case NEON::BI__builtin_neon_vcvts_u32_f32: 5641 case NEON::BI__builtin_neon_vcvtd_u64_f64: 5642 usgn = true; 5643 // FALL THROUGH 5644 case NEON::BI__builtin_neon_vcvts_s32_f32: 5645 case NEON::BI__builtin_neon_vcvtd_s64_f64: { 5646 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5647 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64; 5648 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty; 5649 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy; 5650 Ops[0] = Builder.CreateBitCast(Ops[0], FTy); 5651 if (usgn) 5652 return Builder.CreateFPToUI(Ops[0], InTy); 5653 return Builder.CreateFPToSI(Ops[0], InTy); 5654 } 5655 case NEON::BI__builtin_neon_vcvts_f32_u32: 5656 case NEON::BI__builtin_neon_vcvtd_f64_u64: 5657 usgn = true; 5658 // FALL THROUGH 5659 case NEON::BI__builtin_neon_vcvts_f32_s32: 5660 case NEON::BI__builtin_neon_vcvtd_f64_s64: { 5661 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5662 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64; 5663 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty; 5664 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy; 5665 Ops[0] = Builder.CreateBitCast(Ops[0], InTy); 5666 if (usgn) 5667 return Builder.CreateUIToFP(Ops[0], FTy); 5668 return Builder.CreateSIToFP(Ops[0], FTy); 5669 } 5670 case NEON::BI__builtin_neon_vpaddd_s64: { 5671 llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2); 5672 Value *Vec = EmitScalarExpr(E->getArg(0)); 5673 // The vector is v2f64, so make sure it's bitcast to that. 5674 Vec = Builder.CreateBitCast(Vec, Ty, "v2i64"); 5675 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 5676 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 5677 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 5678 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 5679 // Pairwise addition of a v2f64 into a scalar f64. 5680 return Builder.CreateAdd(Op0, Op1, "vpaddd"); 5681 } 5682 case NEON::BI__builtin_neon_vpaddd_f64: { 5683 llvm::Type *Ty = 5684 llvm::VectorType::get(DoubleTy, 2); 5685 Value *Vec = EmitScalarExpr(E->getArg(0)); 5686 // The vector is v2f64, so make sure it's bitcast to that. 5687 Vec = Builder.CreateBitCast(Vec, Ty, "v2f64"); 5688 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 5689 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 5690 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 5691 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 5692 // Pairwise addition of a v2f64 into a scalar f64. 5693 return Builder.CreateFAdd(Op0, Op1, "vpaddd"); 5694 } 5695 case NEON::BI__builtin_neon_vpadds_f32: { 5696 llvm::Type *Ty = 5697 llvm::VectorType::get(FloatTy, 2); 5698 Value *Vec = EmitScalarExpr(E->getArg(0)); 5699 // The vector is v2f32, so make sure it's bitcast to that. 5700 Vec = Builder.CreateBitCast(Vec, Ty, "v2f32"); 5701 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 5702 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 5703 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 5704 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 5705 // Pairwise addition of a v2f32 into a scalar f32. 5706 return Builder.CreateFAdd(Op0, Op1, "vpaddd"); 5707 } 5708 case NEON::BI__builtin_neon_vceqzd_s64: 5709 case NEON::BI__builtin_neon_vceqzd_f64: 5710 case NEON::BI__builtin_neon_vceqzs_f32: 5711 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5712 return EmitAArch64CompareBuiltinExpr( 5713 Ops[0], ConvertType(E->getCallReturnType(getContext())), 5714 ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz"); 5715 case NEON::BI__builtin_neon_vcgezd_s64: 5716 case NEON::BI__builtin_neon_vcgezd_f64: 5717 case NEON::BI__builtin_neon_vcgezs_f32: 5718 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5719 return EmitAArch64CompareBuiltinExpr( 5720 Ops[0], ConvertType(E->getCallReturnType(getContext())), 5721 ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez"); 5722 case NEON::BI__builtin_neon_vclezd_s64: 5723 case NEON::BI__builtin_neon_vclezd_f64: 5724 case NEON::BI__builtin_neon_vclezs_f32: 5725 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5726 return EmitAArch64CompareBuiltinExpr( 5727 Ops[0], ConvertType(E->getCallReturnType(getContext())), 5728 ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez"); 5729 case NEON::BI__builtin_neon_vcgtzd_s64: 5730 case NEON::BI__builtin_neon_vcgtzd_f64: 5731 case NEON::BI__builtin_neon_vcgtzs_f32: 5732 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5733 return EmitAArch64CompareBuiltinExpr( 5734 Ops[0], ConvertType(E->getCallReturnType(getContext())), 5735 ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz"); 5736 case NEON::BI__builtin_neon_vcltzd_s64: 5737 case NEON::BI__builtin_neon_vcltzd_f64: 5738 case NEON::BI__builtin_neon_vcltzs_f32: 5739 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5740 return EmitAArch64CompareBuiltinExpr( 5741 Ops[0], ConvertType(E->getCallReturnType(getContext())), 5742 ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz"); 5743 5744 case NEON::BI__builtin_neon_vceqzd_u64: { 5745 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5746 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); 5747 Ops[0] = 5748 Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty)); 5749 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd"); 5750 } 5751 case NEON::BI__builtin_neon_vceqd_f64: 5752 case NEON::BI__builtin_neon_vcled_f64: 5753 case NEON::BI__builtin_neon_vcltd_f64: 5754 case NEON::BI__builtin_neon_vcged_f64: 5755 case NEON::BI__builtin_neon_vcgtd_f64: { 5756 llvm::CmpInst::Predicate P; 5757 switch (BuiltinID) { 5758 default: llvm_unreachable("missing builtin ID in switch!"); 5759 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break; 5760 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break; 5761 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break; 5762 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break; 5763 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break; 5764 } 5765 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5766 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 5767 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); 5768 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); 5769 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd"); 5770 } 5771 case NEON::BI__builtin_neon_vceqs_f32: 5772 case NEON::BI__builtin_neon_vcles_f32: 5773 case NEON::BI__builtin_neon_vclts_f32: 5774 case NEON::BI__builtin_neon_vcges_f32: 5775 case NEON::BI__builtin_neon_vcgts_f32: { 5776 llvm::CmpInst::Predicate P; 5777 switch (BuiltinID) { 5778 default: llvm_unreachable("missing builtin ID in switch!"); 5779 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break; 5780 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break; 5781 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break; 5782 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break; 5783 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break; 5784 } 5785 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5786 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy); 5787 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy); 5788 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); 5789 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd"); 5790 } 5791 case NEON::BI__builtin_neon_vceqd_s64: 5792 case NEON::BI__builtin_neon_vceqd_u64: 5793 case NEON::BI__builtin_neon_vcgtd_s64: 5794 case NEON::BI__builtin_neon_vcgtd_u64: 5795 case NEON::BI__builtin_neon_vcltd_s64: 5796 case NEON::BI__builtin_neon_vcltd_u64: 5797 case NEON::BI__builtin_neon_vcged_u64: 5798 case NEON::BI__builtin_neon_vcged_s64: 5799 case NEON::BI__builtin_neon_vcled_u64: 5800 case NEON::BI__builtin_neon_vcled_s64: { 5801 llvm::CmpInst::Predicate P; 5802 switch (BuiltinID) { 5803 default: llvm_unreachable("missing builtin ID in switch!"); 5804 case NEON::BI__builtin_neon_vceqd_s64: 5805 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break; 5806 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break; 5807 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break; 5808 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break; 5809 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break; 5810 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break; 5811 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break; 5812 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break; 5813 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break; 5814 } 5815 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5816 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); 5817 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 5818 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]); 5819 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd"); 5820 } 5821 case NEON::BI__builtin_neon_vtstd_s64: 5822 case NEON::BI__builtin_neon_vtstd_u64: { 5823 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5824 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); 5825 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 5826 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); 5827 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], 5828 llvm::Constant::getNullValue(Int64Ty)); 5829 return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd"); 5830 } 5831 case NEON::BI__builtin_neon_vset_lane_i8: 5832 case NEON::BI__builtin_neon_vset_lane_i16: 5833 case NEON::BI__builtin_neon_vset_lane_i32: 5834 case NEON::BI__builtin_neon_vset_lane_i64: 5835 case NEON::BI__builtin_neon_vset_lane_f32: 5836 case NEON::BI__builtin_neon_vsetq_lane_i8: 5837 case NEON::BI__builtin_neon_vsetq_lane_i16: 5838 case NEON::BI__builtin_neon_vsetq_lane_i32: 5839 case NEON::BI__builtin_neon_vsetq_lane_i64: 5840 case NEON::BI__builtin_neon_vsetq_lane_f32: 5841 Ops.push_back(EmitScalarExpr(E->getArg(2))); 5842 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 5843 case NEON::BI__builtin_neon_vset_lane_f64: 5844 // The vector type needs a cast for the v1f64 variant. 5845 Ops[1] = Builder.CreateBitCast(Ops[1], 5846 llvm::VectorType::get(DoubleTy, 1)); 5847 Ops.push_back(EmitScalarExpr(E->getArg(2))); 5848 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 5849 case NEON::BI__builtin_neon_vsetq_lane_f64: 5850 // The vector type needs a cast for the v2f64 variant. 5851 Ops[1] = Builder.CreateBitCast(Ops[1], 5852 llvm::VectorType::get(DoubleTy, 2)); 5853 Ops.push_back(EmitScalarExpr(E->getArg(2))); 5854 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 5855 5856 case NEON::BI__builtin_neon_vget_lane_i8: 5857 case NEON::BI__builtin_neon_vdupb_lane_i8: 5858 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8)); 5859 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5860 "vget_lane"); 5861 case NEON::BI__builtin_neon_vgetq_lane_i8: 5862 case NEON::BI__builtin_neon_vdupb_laneq_i8: 5863 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16)); 5864 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5865 "vgetq_lane"); 5866 case NEON::BI__builtin_neon_vget_lane_i16: 5867 case NEON::BI__builtin_neon_vduph_lane_i16: 5868 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4)); 5869 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5870 "vget_lane"); 5871 case NEON::BI__builtin_neon_vgetq_lane_i16: 5872 case NEON::BI__builtin_neon_vduph_laneq_i16: 5873 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8)); 5874 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5875 "vgetq_lane"); 5876 case NEON::BI__builtin_neon_vget_lane_i32: 5877 case NEON::BI__builtin_neon_vdups_lane_i32: 5878 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2)); 5879 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5880 "vget_lane"); 5881 case NEON::BI__builtin_neon_vdups_lane_f32: 5882 Ops[0] = Builder.CreateBitCast(Ops[0], 5883 llvm::VectorType::get(FloatTy, 2)); 5884 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5885 "vdups_lane"); 5886 case NEON::BI__builtin_neon_vgetq_lane_i32: 5887 case NEON::BI__builtin_neon_vdups_laneq_i32: 5888 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4)); 5889 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5890 "vgetq_lane"); 5891 case NEON::BI__builtin_neon_vget_lane_i64: 5892 case NEON::BI__builtin_neon_vdupd_lane_i64: 5893 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1)); 5894 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5895 "vget_lane"); 5896 case NEON::BI__builtin_neon_vdupd_lane_f64: 5897 Ops[0] = Builder.CreateBitCast(Ops[0], 5898 llvm::VectorType::get(DoubleTy, 1)); 5899 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5900 "vdupd_lane"); 5901 case NEON::BI__builtin_neon_vgetq_lane_i64: 5902 case NEON::BI__builtin_neon_vdupd_laneq_i64: 5903 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); 5904 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5905 "vgetq_lane"); 5906 case NEON::BI__builtin_neon_vget_lane_f32: 5907 Ops[0] = Builder.CreateBitCast(Ops[0], 5908 llvm::VectorType::get(FloatTy, 2)); 5909 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5910 "vget_lane"); 5911 case NEON::BI__builtin_neon_vget_lane_f64: 5912 Ops[0] = Builder.CreateBitCast(Ops[0], 5913 llvm::VectorType::get(DoubleTy, 1)); 5914 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5915 "vget_lane"); 5916 case NEON::BI__builtin_neon_vgetq_lane_f32: 5917 case NEON::BI__builtin_neon_vdups_laneq_f32: 5918 Ops[0] = Builder.CreateBitCast(Ops[0], 5919 llvm::VectorType::get(FloatTy, 4)); 5920 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5921 "vgetq_lane"); 5922 case NEON::BI__builtin_neon_vgetq_lane_f64: 5923 case NEON::BI__builtin_neon_vdupd_laneq_f64: 5924 Ops[0] = Builder.CreateBitCast(Ops[0], 5925 llvm::VectorType::get(DoubleTy, 2)); 5926 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5927 "vgetq_lane"); 5928 case NEON::BI__builtin_neon_vaddd_s64: 5929 case NEON::BI__builtin_neon_vaddd_u64: 5930 return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd"); 5931 case NEON::BI__builtin_neon_vsubd_s64: 5932 case NEON::BI__builtin_neon_vsubd_u64: 5933 return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd"); 5934 case NEON::BI__builtin_neon_vqdmlalh_s16: 5935 case NEON::BI__builtin_neon_vqdmlslh_s16: { 5936 SmallVector<Value *, 2> ProductOps; 5937 ProductOps.push_back(vectorWrapScalar16(Ops[1])); 5938 ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2)))); 5939 llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4); 5940 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy), 5941 ProductOps, "vqdmlXl"); 5942 Constant *CI = ConstantInt::get(SizeTy, 0); 5943 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0"); 5944 5945 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16 5946 ? Intrinsic::aarch64_neon_sqadd 5947 : Intrinsic::aarch64_neon_sqsub; 5948 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl"); 5949 } 5950 case NEON::BI__builtin_neon_vqshlud_n_s64: { 5951 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5952 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty); 5953 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty), 5954 Ops, "vqshlu_n"); 5955 } 5956 case NEON::BI__builtin_neon_vqshld_n_u64: 5957 case NEON::BI__builtin_neon_vqshld_n_s64: { 5958 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64 5959 ? Intrinsic::aarch64_neon_uqshl 5960 : Intrinsic::aarch64_neon_sqshl; 5961 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5962 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty); 5963 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n"); 5964 } 5965 case NEON::BI__builtin_neon_vrshrd_n_u64: 5966 case NEON::BI__builtin_neon_vrshrd_n_s64: { 5967 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64 5968 ? Intrinsic::aarch64_neon_urshl 5969 : Intrinsic::aarch64_neon_srshl; 5970 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5971 int SV = cast<ConstantInt>(Ops[1])->getSExtValue(); 5972 Ops[1] = ConstantInt::get(Int64Ty, -SV); 5973 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n"); 5974 } 5975 case NEON::BI__builtin_neon_vrsrad_n_u64: 5976 case NEON::BI__builtin_neon_vrsrad_n_s64: { 5977 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64 5978 ? Intrinsic::aarch64_neon_urshl 5979 : Intrinsic::aarch64_neon_srshl; 5980 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 5981 Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2)))); 5982 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty), 5983 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)}); 5984 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty)); 5985 } 5986 case NEON::BI__builtin_neon_vshld_n_s64: 5987 case NEON::BI__builtin_neon_vshld_n_u64: { 5988 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 5989 return Builder.CreateShl( 5990 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n"); 5991 } 5992 case NEON::BI__builtin_neon_vshrd_n_s64: { 5993 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 5994 return Builder.CreateAShr( 5995 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63), 5996 Amt->getZExtValue())), 5997 "shrd_n"); 5998 } 5999 case NEON::BI__builtin_neon_vshrd_n_u64: { 6000 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 6001 uint64_t ShiftAmt = Amt->getZExtValue(); 6002 // Right-shifting an unsigned value by its size yields 0. 6003 if (ShiftAmt == 64) 6004 return ConstantInt::get(Int64Ty, 0); 6005 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt), 6006 "shrd_n"); 6007 } 6008 case NEON::BI__builtin_neon_vsrad_n_s64: { 6009 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2))); 6010 Ops[1] = Builder.CreateAShr( 6011 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63), 6012 Amt->getZExtValue())), 6013 "shrd_n"); 6014 return Builder.CreateAdd(Ops[0], Ops[1]); 6015 } 6016 case NEON::BI__builtin_neon_vsrad_n_u64: { 6017 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2))); 6018 uint64_t ShiftAmt = Amt->getZExtValue(); 6019 // Right-shifting an unsigned value by its size yields 0. 6020 // As Op + 0 = Op, return Ops[0] directly. 6021 if (ShiftAmt == 64) 6022 return Ops[0]; 6023 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt), 6024 "shrd_n"); 6025 return Builder.CreateAdd(Ops[0], Ops[1]); 6026 } 6027 case NEON::BI__builtin_neon_vqdmlalh_lane_s16: 6028 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16: 6029 case NEON::BI__builtin_neon_vqdmlslh_lane_s16: 6030 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: { 6031 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)), 6032 "lane"); 6033 SmallVector<Value *, 2> ProductOps; 6034 ProductOps.push_back(vectorWrapScalar16(Ops[1])); 6035 ProductOps.push_back(vectorWrapScalar16(Ops[2])); 6036 llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4); 6037 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy), 6038 ProductOps, "vqdmlXl"); 6039 Constant *CI = ConstantInt::get(SizeTy, 0); 6040 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0"); 6041 Ops.pop_back(); 6042 6043 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 || 6044 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16) 6045 ? Intrinsic::aarch64_neon_sqadd 6046 : Intrinsic::aarch64_neon_sqsub; 6047 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl"); 6048 } 6049 case NEON::BI__builtin_neon_vqdmlals_s32: 6050 case NEON::BI__builtin_neon_vqdmlsls_s32: { 6051 SmallVector<Value *, 2> ProductOps; 6052 ProductOps.push_back(Ops[1]); 6053 ProductOps.push_back(EmitScalarExpr(E->getArg(2))); 6054 Ops[1] = 6055 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar), 6056 ProductOps, "vqdmlXl"); 6057 6058 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32 6059 ? Intrinsic::aarch64_neon_sqadd 6060 : Intrinsic::aarch64_neon_sqsub; 6061 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl"); 6062 } 6063 case NEON::BI__builtin_neon_vqdmlals_lane_s32: 6064 case NEON::BI__builtin_neon_vqdmlals_laneq_s32: 6065 case NEON::BI__builtin_neon_vqdmlsls_lane_s32: 6066 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: { 6067 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)), 6068 "lane"); 6069 SmallVector<Value *, 2> ProductOps; 6070 ProductOps.push_back(Ops[1]); 6071 ProductOps.push_back(Ops[2]); 6072 Ops[1] = 6073 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar), 6074 ProductOps, "vqdmlXl"); 6075 Ops.pop_back(); 6076 6077 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 || 6078 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32) 6079 ? Intrinsic::aarch64_neon_sqadd 6080 : Intrinsic::aarch64_neon_sqsub; 6081 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl"); 6082 } 6083 } 6084 6085 llvm::VectorType *VTy = GetNeonType(this, Type); 6086 llvm::Type *Ty = VTy; 6087 if (!Ty) 6088 return nullptr; 6089 6090 // Not all intrinsics handled by the common case work for AArch64 yet, so only 6091 // defer to common code if it's been added to our special map. 6092 Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID, 6093 AArch64SIMDIntrinsicsProvenSorted); 6094 6095 if (Builtin) 6096 return EmitCommonNeonBuiltinExpr( 6097 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic, 6098 Builtin->NameHint, Builtin->TypeModifier, E, Ops, 6099 /*never use addresses*/ Address::invalid(), Address::invalid()); 6100 6101 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops)) 6102 return V; 6103 6104 unsigned Int; 6105 switch (BuiltinID) { 6106 default: return nullptr; 6107 case NEON::BI__builtin_neon_vbsl_v: 6108 case NEON::BI__builtin_neon_vbslq_v: { 6109 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy); 6110 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl"); 6111 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl"); 6112 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl"); 6113 6114 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl"); 6115 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl"); 6116 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl"); 6117 return Builder.CreateBitCast(Ops[0], Ty); 6118 } 6119 case NEON::BI__builtin_neon_vfma_lane_v: 6120 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types 6121 // The ARM builtins (and instructions) have the addend as the first 6122 // operand, but the 'fma' intrinsics have it last. Swap it around here. 6123 Value *Addend = Ops[0]; 6124 Value *Multiplicand = Ops[1]; 6125 Value *LaneSource = Ops[2]; 6126 Ops[0] = Multiplicand; 6127 Ops[1] = LaneSource; 6128 Ops[2] = Addend; 6129 6130 // Now adjust things to handle the lane access. 6131 llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ? 6132 llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) : 6133 VTy; 6134 llvm::Constant *cst = cast<Constant>(Ops[3]); 6135 Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst); 6136 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy); 6137 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane"); 6138 6139 Ops.pop_back(); 6140 Int = Intrinsic::fma; 6141 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla"); 6142 } 6143 case NEON::BI__builtin_neon_vfma_laneq_v: { 6144 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 6145 // v1f64 fma should be mapped to Neon scalar f64 fma 6146 if (VTy && VTy->getElementType() == DoubleTy) { 6147 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 6148 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); 6149 llvm::Type *VTy = GetNeonType(this, 6150 NeonTypeFlags(NeonTypeFlags::Float64, false, true)); 6151 Ops[2] = Builder.CreateBitCast(Ops[2], VTy); 6152 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); 6153 Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy); 6154 Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); 6155 return Builder.CreateBitCast(Result, Ty); 6156 } 6157 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 6158 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6159 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6160 6161 llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(), 6162 VTy->getNumElements() * 2); 6163 Ops[2] = Builder.CreateBitCast(Ops[2], STy); 6164 Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), 6165 cast<ConstantInt>(Ops[3])); 6166 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane"); 6167 6168 return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]}); 6169 } 6170 case NEON::BI__builtin_neon_vfmaq_laneq_v: { 6171 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 6172 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6173 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6174 6175 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 6176 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3])); 6177 return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]}); 6178 } 6179 case NEON::BI__builtin_neon_vfmas_lane_f32: 6180 case NEON::BI__builtin_neon_vfmas_laneq_f32: 6181 case NEON::BI__builtin_neon_vfmad_lane_f64: 6182 case NEON::BI__builtin_neon_vfmad_laneq_f64: { 6183 Ops.push_back(EmitScalarExpr(E->getArg(3))); 6184 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext())); 6185 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 6186 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); 6187 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); 6188 } 6189 case NEON::BI__builtin_neon_vmull_v: 6190 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 6191 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull; 6192 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull; 6193 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull"); 6194 case NEON::BI__builtin_neon_vmax_v: 6195 case NEON::BI__builtin_neon_vmaxq_v: 6196 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 6197 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax; 6198 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax; 6199 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax"); 6200 case NEON::BI__builtin_neon_vmin_v: 6201 case NEON::BI__builtin_neon_vminq_v: 6202 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 6203 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin; 6204 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin; 6205 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin"); 6206 case NEON::BI__builtin_neon_vabd_v: 6207 case NEON::BI__builtin_neon_vabdq_v: 6208 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 6209 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd; 6210 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd; 6211 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd"); 6212 case NEON::BI__builtin_neon_vpadal_v: 6213 case NEON::BI__builtin_neon_vpadalq_v: { 6214 unsigned ArgElts = VTy->getNumElements(); 6215 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType()); 6216 unsigned BitWidth = EltTy->getBitWidth(); 6217 llvm::Type *ArgTy = llvm::VectorType::get( 6218 llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts); 6219 llvm::Type* Tys[2] = { VTy, ArgTy }; 6220 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp; 6221 SmallVector<llvm::Value*, 1> TmpOps; 6222 TmpOps.push_back(Ops[1]); 6223 Function *F = CGM.getIntrinsic(Int, Tys); 6224 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal"); 6225 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType()); 6226 return Builder.CreateAdd(tmp, addend); 6227 } 6228 case NEON::BI__builtin_neon_vpmin_v: 6229 case NEON::BI__builtin_neon_vpminq_v: 6230 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 6231 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp; 6232 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp; 6233 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin"); 6234 case NEON::BI__builtin_neon_vpmax_v: 6235 case NEON::BI__builtin_neon_vpmaxq_v: 6236 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 6237 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp; 6238 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp; 6239 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax"); 6240 case NEON::BI__builtin_neon_vminnm_v: 6241 case NEON::BI__builtin_neon_vminnmq_v: 6242 Int = Intrinsic::aarch64_neon_fminnm; 6243 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm"); 6244 case NEON::BI__builtin_neon_vmaxnm_v: 6245 case NEON::BI__builtin_neon_vmaxnmq_v: 6246 Int = Intrinsic::aarch64_neon_fmaxnm; 6247 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm"); 6248 case NEON::BI__builtin_neon_vrecpss_f32: { 6249 Ops.push_back(EmitScalarExpr(E->getArg(1))); 6250 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy), 6251 Ops, "vrecps"); 6252 } 6253 case NEON::BI__builtin_neon_vrecpsd_f64: { 6254 Ops.push_back(EmitScalarExpr(E->getArg(1))); 6255 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy), 6256 Ops, "vrecps"); 6257 } 6258 case NEON::BI__builtin_neon_vqshrun_n_v: 6259 Int = Intrinsic::aarch64_neon_sqshrun; 6260 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n"); 6261 case NEON::BI__builtin_neon_vqrshrun_n_v: 6262 Int = Intrinsic::aarch64_neon_sqrshrun; 6263 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n"); 6264 case NEON::BI__builtin_neon_vqshrn_n_v: 6265 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn; 6266 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n"); 6267 case NEON::BI__builtin_neon_vrshrn_n_v: 6268 Int = Intrinsic::aarch64_neon_rshrn; 6269 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n"); 6270 case NEON::BI__builtin_neon_vqrshrn_n_v: 6271 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn; 6272 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n"); 6273 case NEON::BI__builtin_neon_vrnda_v: 6274 case NEON::BI__builtin_neon_vrndaq_v: { 6275 Int = Intrinsic::round; 6276 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda"); 6277 } 6278 case NEON::BI__builtin_neon_vrndi_v: 6279 case NEON::BI__builtin_neon_vrndiq_v: { 6280 Int = Intrinsic::nearbyint; 6281 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi"); 6282 } 6283 case NEON::BI__builtin_neon_vrndm_v: 6284 case NEON::BI__builtin_neon_vrndmq_v: { 6285 Int = Intrinsic::floor; 6286 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm"); 6287 } 6288 case NEON::BI__builtin_neon_vrndn_v: 6289 case NEON::BI__builtin_neon_vrndnq_v: { 6290 Int = Intrinsic::aarch64_neon_frintn; 6291 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn"); 6292 } 6293 case NEON::BI__builtin_neon_vrndp_v: 6294 case NEON::BI__builtin_neon_vrndpq_v: { 6295 Int = Intrinsic::ceil; 6296 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp"); 6297 } 6298 case NEON::BI__builtin_neon_vrndx_v: 6299 case NEON::BI__builtin_neon_vrndxq_v: { 6300 Int = Intrinsic::rint; 6301 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx"); 6302 } 6303 case NEON::BI__builtin_neon_vrnd_v: 6304 case NEON::BI__builtin_neon_vrndq_v: { 6305 Int = Intrinsic::trunc; 6306 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz"); 6307 } 6308 case NEON::BI__builtin_neon_vceqz_v: 6309 case NEON::BI__builtin_neon_vceqzq_v: 6310 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ, 6311 ICmpInst::ICMP_EQ, "vceqz"); 6312 case NEON::BI__builtin_neon_vcgez_v: 6313 case NEON::BI__builtin_neon_vcgezq_v: 6314 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE, 6315 ICmpInst::ICMP_SGE, "vcgez"); 6316 case NEON::BI__builtin_neon_vclez_v: 6317 case NEON::BI__builtin_neon_vclezq_v: 6318 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE, 6319 ICmpInst::ICMP_SLE, "vclez"); 6320 case NEON::BI__builtin_neon_vcgtz_v: 6321 case NEON::BI__builtin_neon_vcgtzq_v: 6322 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT, 6323 ICmpInst::ICMP_SGT, "vcgtz"); 6324 case NEON::BI__builtin_neon_vcltz_v: 6325 case NEON::BI__builtin_neon_vcltzq_v: 6326 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT, 6327 ICmpInst::ICMP_SLT, "vcltz"); 6328 case NEON::BI__builtin_neon_vcvt_f64_v: 6329 case NEON::BI__builtin_neon_vcvtq_f64_v: 6330 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6331 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad)); 6332 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 6333 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 6334 case NEON::BI__builtin_neon_vcvt_f64_f32: { 6335 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad && 6336 "unexpected vcvt_f64_f32 builtin"); 6337 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false); 6338 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag)); 6339 6340 return Builder.CreateFPExt(Ops[0], Ty, "vcvt"); 6341 } 6342 case NEON::BI__builtin_neon_vcvt_f32_f64: { 6343 assert(Type.getEltType() == NeonTypeFlags::Float32 && 6344 "unexpected vcvt_f32_f64 builtin"); 6345 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true); 6346 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag)); 6347 6348 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt"); 6349 } 6350 case NEON::BI__builtin_neon_vcvt_s32_v: 6351 case NEON::BI__builtin_neon_vcvt_u32_v: 6352 case NEON::BI__builtin_neon_vcvt_s64_v: 6353 case NEON::BI__builtin_neon_vcvt_u64_v: 6354 case NEON::BI__builtin_neon_vcvtq_s32_v: 6355 case NEON::BI__builtin_neon_vcvtq_u32_v: 6356 case NEON::BI__builtin_neon_vcvtq_s64_v: 6357 case NEON::BI__builtin_neon_vcvtq_u64_v: { 6358 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type)); 6359 if (usgn) 6360 return Builder.CreateFPToUI(Ops[0], Ty); 6361 return Builder.CreateFPToSI(Ops[0], Ty); 6362 } 6363 case NEON::BI__builtin_neon_vcvta_s32_v: 6364 case NEON::BI__builtin_neon_vcvtaq_s32_v: 6365 case NEON::BI__builtin_neon_vcvta_u32_v: 6366 case NEON::BI__builtin_neon_vcvtaq_u32_v: 6367 case NEON::BI__builtin_neon_vcvta_s64_v: 6368 case NEON::BI__builtin_neon_vcvtaq_s64_v: 6369 case NEON::BI__builtin_neon_vcvta_u64_v: 6370 case NEON::BI__builtin_neon_vcvtaq_u64_v: { 6371 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas; 6372 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 6373 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta"); 6374 } 6375 case NEON::BI__builtin_neon_vcvtm_s32_v: 6376 case NEON::BI__builtin_neon_vcvtmq_s32_v: 6377 case NEON::BI__builtin_neon_vcvtm_u32_v: 6378 case NEON::BI__builtin_neon_vcvtmq_u32_v: 6379 case NEON::BI__builtin_neon_vcvtm_s64_v: 6380 case NEON::BI__builtin_neon_vcvtmq_s64_v: 6381 case NEON::BI__builtin_neon_vcvtm_u64_v: 6382 case NEON::BI__builtin_neon_vcvtmq_u64_v: { 6383 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms; 6384 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 6385 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm"); 6386 } 6387 case NEON::BI__builtin_neon_vcvtn_s32_v: 6388 case NEON::BI__builtin_neon_vcvtnq_s32_v: 6389 case NEON::BI__builtin_neon_vcvtn_u32_v: 6390 case NEON::BI__builtin_neon_vcvtnq_u32_v: 6391 case NEON::BI__builtin_neon_vcvtn_s64_v: 6392 case NEON::BI__builtin_neon_vcvtnq_s64_v: 6393 case NEON::BI__builtin_neon_vcvtn_u64_v: 6394 case NEON::BI__builtin_neon_vcvtnq_u64_v: { 6395 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns; 6396 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 6397 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn"); 6398 } 6399 case NEON::BI__builtin_neon_vcvtp_s32_v: 6400 case NEON::BI__builtin_neon_vcvtpq_s32_v: 6401 case NEON::BI__builtin_neon_vcvtp_u32_v: 6402 case NEON::BI__builtin_neon_vcvtpq_u32_v: 6403 case NEON::BI__builtin_neon_vcvtp_s64_v: 6404 case NEON::BI__builtin_neon_vcvtpq_s64_v: 6405 case NEON::BI__builtin_neon_vcvtp_u64_v: 6406 case NEON::BI__builtin_neon_vcvtpq_u64_v: { 6407 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps; 6408 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 6409 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp"); 6410 } 6411 case NEON::BI__builtin_neon_vmulx_v: 6412 case NEON::BI__builtin_neon_vmulxq_v: { 6413 Int = Intrinsic::aarch64_neon_fmulx; 6414 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx"); 6415 } 6416 case NEON::BI__builtin_neon_vmul_lane_v: 6417 case NEON::BI__builtin_neon_vmul_laneq_v: { 6418 // v1f64 vmul_lane should be mapped to Neon scalar mul lane 6419 bool Quad = false; 6420 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v) 6421 Quad = true; 6422 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 6423 llvm::Type *VTy = GetNeonType(this, 6424 NeonTypeFlags(NeonTypeFlags::Float64, false, Quad)); 6425 Ops[1] = Builder.CreateBitCast(Ops[1], VTy); 6426 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract"); 6427 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]); 6428 return Builder.CreateBitCast(Result, Ty); 6429 } 6430 case NEON::BI__builtin_neon_vnegd_s64: 6431 return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd"); 6432 case NEON::BI__builtin_neon_vpmaxnm_v: 6433 case NEON::BI__builtin_neon_vpmaxnmq_v: { 6434 Int = Intrinsic::aarch64_neon_fmaxnmp; 6435 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm"); 6436 } 6437 case NEON::BI__builtin_neon_vpminnm_v: 6438 case NEON::BI__builtin_neon_vpminnmq_v: { 6439 Int = Intrinsic::aarch64_neon_fminnmp; 6440 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm"); 6441 } 6442 case NEON::BI__builtin_neon_vsqrt_v: 6443 case NEON::BI__builtin_neon_vsqrtq_v: { 6444 Int = Intrinsic::sqrt; 6445 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6446 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt"); 6447 } 6448 case NEON::BI__builtin_neon_vrbit_v: 6449 case NEON::BI__builtin_neon_vrbitq_v: { 6450 Int = Intrinsic::aarch64_neon_rbit; 6451 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit"); 6452 } 6453 case NEON::BI__builtin_neon_vaddv_u8: 6454 // FIXME: These are handled by the AArch64 scalar code. 6455 usgn = true; 6456 // FALLTHROUGH 6457 case NEON::BI__builtin_neon_vaddv_s8: { 6458 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 6459 Ty = Int32Ty; 6460 VTy = llvm::VectorType::get(Int8Ty, 8); 6461 llvm::Type *Tys[2] = { Ty, VTy }; 6462 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6463 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 6464 return Builder.CreateTrunc(Ops[0], Int8Ty); 6465 } 6466 case NEON::BI__builtin_neon_vaddv_u16: 6467 usgn = true; 6468 // FALLTHROUGH 6469 case NEON::BI__builtin_neon_vaddv_s16: { 6470 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 6471 Ty = Int32Ty; 6472 VTy = llvm::VectorType::get(Int16Ty, 4); 6473 llvm::Type *Tys[2] = { Ty, VTy }; 6474 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6475 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 6476 return Builder.CreateTrunc(Ops[0], Int16Ty); 6477 } 6478 case NEON::BI__builtin_neon_vaddvq_u8: 6479 usgn = true; 6480 // FALLTHROUGH 6481 case NEON::BI__builtin_neon_vaddvq_s8: { 6482 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 6483 Ty = Int32Ty; 6484 VTy = llvm::VectorType::get(Int8Ty, 16); 6485 llvm::Type *Tys[2] = { Ty, VTy }; 6486 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6487 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 6488 return Builder.CreateTrunc(Ops[0], Int8Ty); 6489 } 6490 case NEON::BI__builtin_neon_vaddvq_u16: 6491 usgn = true; 6492 // FALLTHROUGH 6493 case NEON::BI__builtin_neon_vaddvq_s16: { 6494 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 6495 Ty = Int32Ty; 6496 VTy = llvm::VectorType::get(Int16Ty, 8); 6497 llvm::Type *Tys[2] = { Ty, VTy }; 6498 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6499 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 6500 return Builder.CreateTrunc(Ops[0], Int16Ty); 6501 } 6502 case NEON::BI__builtin_neon_vmaxv_u8: { 6503 Int = Intrinsic::aarch64_neon_umaxv; 6504 Ty = Int32Ty; 6505 VTy = llvm::VectorType::get(Int8Ty, 8); 6506 llvm::Type *Tys[2] = { Ty, VTy }; 6507 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6508 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6509 return Builder.CreateTrunc(Ops[0], Int8Ty); 6510 } 6511 case NEON::BI__builtin_neon_vmaxv_u16: { 6512 Int = Intrinsic::aarch64_neon_umaxv; 6513 Ty = Int32Ty; 6514 VTy = llvm::VectorType::get(Int16Ty, 4); 6515 llvm::Type *Tys[2] = { Ty, VTy }; 6516 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6517 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6518 return Builder.CreateTrunc(Ops[0], Int16Ty); 6519 } 6520 case NEON::BI__builtin_neon_vmaxvq_u8: { 6521 Int = Intrinsic::aarch64_neon_umaxv; 6522 Ty = Int32Ty; 6523 VTy = llvm::VectorType::get(Int8Ty, 16); 6524 llvm::Type *Tys[2] = { Ty, VTy }; 6525 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6526 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6527 return Builder.CreateTrunc(Ops[0], Int8Ty); 6528 } 6529 case NEON::BI__builtin_neon_vmaxvq_u16: { 6530 Int = Intrinsic::aarch64_neon_umaxv; 6531 Ty = Int32Ty; 6532 VTy = llvm::VectorType::get(Int16Ty, 8); 6533 llvm::Type *Tys[2] = { Ty, VTy }; 6534 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6535 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6536 return Builder.CreateTrunc(Ops[0], Int16Ty); 6537 } 6538 case NEON::BI__builtin_neon_vmaxv_s8: { 6539 Int = Intrinsic::aarch64_neon_smaxv; 6540 Ty = Int32Ty; 6541 VTy = llvm::VectorType::get(Int8Ty, 8); 6542 llvm::Type *Tys[2] = { Ty, VTy }; 6543 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6544 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6545 return Builder.CreateTrunc(Ops[0], Int8Ty); 6546 } 6547 case NEON::BI__builtin_neon_vmaxv_s16: { 6548 Int = Intrinsic::aarch64_neon_smaxv; 6549 Ty = Int32Ty; 6550 VTy = llvm::VectorType::get(Int16Ty, 4); 6551 llvm::Type *Tys[2] = { Ty, VTy }; 6552 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6553 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6554 return Builder.CreateTrunc(Ops[0], Int16Ty); 6555 } 6556 case NEON::BI__builtin_neon_vmaxvq_s8: { 6557 Int = Intrinsic::aarch64_neon_smaxv; 6558 Ty = Int32Ty; 6559 VTy = llvm::VectorType::get(Int8Ty, 16); 6560 llvm::Type *Tys[2] = { Ty, VTy }; 6561 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6562 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6563 return Builder.CreateTrunc(Ops[0], Int8Ty); 6564 } 6565 case NEON::BI__builtin_neon_vmaxvq_s16: { 6566 Int = Intrinsic::aarch64_neon_smaxv; 6567 Ty = Int32Ty; 6568 VTy = llvm::VectorType::get(Int16Ty, 8); 6569 llvm::Type *Tys[2] = { Ty, VTy }; 6570 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6571 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6572 return Builder.CreateTrunc(Ops[0], Int16Ty); 6573 } 6574 case NEON::BI__builtin_neon_vminv_u8: { 6575 Int = Intrinsic::aarch64_neon_uminv; 6576 Ty = Int32Ty; 6577 VTy = llvm::VectorType::get(Int8Ty, 8); 6578 llvm::Type *Tys[2] = { Ty, VTy }; 6579 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6580 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6581 return Builder.CreateTrunc(Ops[0], Int8Ty); 6582 } 6583 case NEON::BI__builtin_neon_vminv_u16: { 6584 Int = Intrinsic::aarch64_neon_uminv; 6585 Ty = Int32Ty; 6586 VTy = llvm::VectorType::get(Int16Ty, 4); 6587 llvm::Type *Tys[2] = { Ty, VTy }; 6588 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6589 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6590 return Builder.CreateTrunc(Ops[0], Int16Ty); 6591 } 6592 case NEON::BI__builtin_neon_vminvq_u8: { 6593 Int = Intrinsic::aarch64_neon_uminv; 6594 Ty = Int32Ty; 6595 VTy = llvm::VectorType::get(Int8Ty, 16); 6596 llvm::Type *Tys[2] = { Ty, VTy }; 6597 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6598 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6599 return Builder.CreateTrunc(Ops[0], Int8Ty); 6600 } 6601 case NEON::BI__builtin_neon_vminvq_u16: { 6602 Int = Intrinsic::aarch64_neon_uminv; 6603 Ty = Int32Ty; 6604 VTy = llvm::VectorType::get(Int16Ty, 8); 6605 llvm::Type *Tys[2] = { Ty, VTy }; 6606 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6607 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6608 return Builder.CreateTrunc(Ops[0], Int16Ty); 6609 } 6610 case NEON::BI__builtin_neon_vminv_s8: { 6611 Int = Intrinsic::aarch64_neon_sminv; 6612 Ty = Int32Ty; 6613 VTy = llvm::VectorType::get(Int8Ty, 8); 6614 llvm::Type *Tys[2] = { Ty, VTy }; 6615 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6616 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6617 return Builder.CreateTrunc(Ops[0], Int8Ty); 6618 } 6619 case NEON::BI__builtin_neon_vminv_s16: { 6620 Int = Intrinsic::aarch64_neon_sminv; 6621 Ty = Int32Ty; 6622 VTy = llvm::VectorType::get(Int16Ty, 4); 6623 llvm::Type *Tys[2] = { Ty, VTy }; 6624 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6625 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6626 return Builder.CreateTrunc(Ops[0], Int16Ty); 6627 } 6628 case NEON::BI__builtin_neon_vminvq_s8: { 6629 Int = Intrinsic::aarch64_neon_sminv; 6630 Ty = Int32Ty; 6631 VTy = llvm::VectorType::get(Int8Ty, 16); 6632 llvm::Type *Tys[2] = { Ty, VTy }; 6633 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6634 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6635 return Builder.CreateTrunc(Ops[0], Int8Ty); 6636 } 6637 case NEON::BI__builtin_neon_vminvq_s16: { 6638 Int = Intrinsic::aarch64_neon_sminv; 6639 Ty = Int32Ty; 6640 VTy = llvm::VectorType::get(Int16Ty, 8); 6641 llvm::Type *Tys[2] = { Ty, VTy }; 6642 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6643 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6644 return Builder.CreateTrunc(Ops[0], Int16Ty); 6645 } 6646 case NEON::BI__builtin_neon_vmul_n_f64: { 6647 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 6648 Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy); 6649 return Builder.CreateFMul(Ops[0], RHS); 6650 } 6651 case NEON::BI__builtin_neon_vaddlv_u8: { 6652 Int = Intrinsic::aarch64_neon_uaddlv; 6653 Ty = Int32Ty; 6654 VTy = llvm::VectorType::get(Int8Ty, 8); 6655 llvm::Type *Tys[2] = { Ty, VTy }; 6656 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6657 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6658 return Builder.CreateTrunc(Ops[0], Int16Ty); 6659 } 6660 case NEON::BI__builtin_neon_vaddlv_u16: { 6661 Int = Intrinsic::aarch64_neon_uaddlv; 6662 Ty = Int32Ty; 6663 VTy = llvm::VectorType::get(Int16Ty, 4); 6664 llvm::Type *Tys[2] = { Ty, VTy }; 6665 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6666 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6667 } 6668 case NEON::BI__builtin_neon_vaddlvq_u8: { 6669 Int = Intrinsic::aarch64_neon_uaddlv; 6670 Ty = Int32Ty; 6671 VTy = llvm::VectorType::get(Int8Ty, 16); 6672 llvm::Type *Tys[2] = { Ty, VTy }; 6673 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6674 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6675 return Builder.CreateTrunc(Ops[0], Int16Ty); 6676 } 6677 case NEON::BI__builtin_neon_vaddlvq_u16: { 6678 Int = Intrinsic::aarch64_neon_uaddlv; 6679 Ty = Int32Ty; 6680 VTy = llvm::VectorType::get(Int16Ty, 8); 6681 llvm::Type *Tys[2] = { Ty, VTy }; 6682 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6683 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6684 } 6685 case NEON::BI__builtin_neon_vaddlv_s8: { 6686 Int = Intrinsic::aarch64_neon_saddlv; 6687 Ty = Int32Ty; 6688 VTy = llvm::VectorType::get(Int8Ty, 8); 6689 llvm::Type *Tys[2] = { Ty, VTy }; 6690 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6691 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6692 return Builder.CreateTrunc(Ops[0], Int16Ty); 6693 } 6694 case NEON::BI__builtin_neon_vaddlv_s16: { 6695 Int = Intrinsic::aarch64_neon_saddlv; 6696 Ty = Int32Ty; 6697 VTy = llvm::VectorType::get(Int16Ty, 4); 6698 llvm::Type *Tys[2] = { Ty, VTy }; 6699 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6700 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6701 } 6702 case NEON::BI__builtin_neon_vaddlvq_s8: { 6703 Int = Intrinsic::aarch64_neon_saddlv; 6704 Ty = Int32Ty; 6705 VTy = llvm::VectorType::get(Int8Ty, 16); 6706 llvm::Type *Tys[2] = { Ty, VTy }; 6707 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6708 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6709 return Builder.CreateTrunc(Ops[0], Int16Ty); 6710 } 6711 case NEON::BI__builtin_neon_vaddlvq_s16: { 6712 Int = Intrinsic::aarch64_neon_saddlv; 6713 Ty = Int32Ty; 6714 VTy = llvm::VectorType::get(Int16Ty, 8); 6715 llvm::Type *Tys[2] = { Ty, VTy }; 6716 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6717 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6718 } 6719 case NEON::BI__builtin_neon_vsri_n_v: 6720 case NEON::BI__builtin_neon_vsriq_n_v: { 6721 Int = Intrinsic::aarch64_neon_vsri; 6722 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty); 6723 return EmitNeonCall(Intrin, Ops, "vsri_n"); 6724 } 6725 case NEON::BI__builtin_neon_vsli_n_v: 6726 case NEON::BI__builtin_neon_vsliq_n_v: { 6727 Int = Intrinsic::aarch64_neon_vsli; 6728 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty); 6729 return EmitNeonCall(Intrin, Ops, "vsli_n"); 6730 } 6731 case NEON::BI__builtin_neon_vsra_n_v: 6732 case NEON::BI__builtin_neon_vsraq_n_v: 6733 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6734 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n"); 6735 return Builder.CreateAdd(Ops[0], Ops[1]); 6736 case NEON::BI__builtin_neon_vrsra_n_v: 6737 case NEON::BI__builtin_neon_vrsraq_n_v: { 6738 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl; 6739 SmallVector<llvm::Value*,2> TmpOps; 6740 TmpOps.push_back(Ops[1]); 6741 TmpOps.push_back(Ops[2]); 6742 Function* F = CGM.getIntrinsic(Int, Ty); 6743 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true); 6744 Ops[0] = Builder.CreateBitCast(Ops[0], VTy); 6745 return Builder.CreateAdd(Ops[0], tmp); 6746 } 6747 // FIXME: Sharing loads & stores with 32-bit is complicated by the absence 6748 // of an Align parameter here. 6749 case NEON::BI__builtin_neon_vld1_x2_v: 6750 case NEON::BI__builtin_neon_vld1q_x2_v: 6751 case NEON::BI__builtin_neon_vld1_x3_v: 6752 case NEON::BI__builtin_neon_vld1q_x3_v: 6753 case NEON::BI__builtin_neon_vld1_x4_v: 6754 case NEON::BI__builtin_neon_vld1q_x4_v: { 6755 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType()); 6756 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6757 llvm::Type *Tys[2] = { VTy, PTy }; 6758 unsigned Int; 6759 switch (BuiltinID) { 6760 case NEON::BI__builtin_neon_vld1_x2_v: 6761 case NEON::BI__builtin_neon_vld1q_x2_v: 6762 Int = Intrinsic::aarch64_neon_ld1x2; 6763 break; 6764 case NEON::BI__builtin_neon_vld1_x3_v: 6765 case NEON::BI__builtin_neon_vld1q_x3_v: 6766 Int = Intrinsic::aarch64_neon_ld1x3; 6767 break; 6768 case NEON::BI__builtin_neon_vld1_x4_v: 6769 case NEON::BI__builtin_neon_vld1q_x4_v: 6770 Int = Intrinsic::aarch64_neon_ld1x4; 6771 break; 6772 } 6773 Function *F = CGM.getIntrinsic(Int, Tys); 6774 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN"); 6775 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 6776 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6777 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6778 } 6779 case NEON::BI__builtin_neon_vst1_x2_v: 6780 case NEON::BI__builtin_neon_vst1q_x2_v: 6781 case NEON::BI__builtin_neon_vst1_x3_v: 6782 case NEON::BI__builtin_neon_vst1q_x3_v: 6783 case NEON::BI__builtin_neon_vst1_x4_v: 6784 case NEON::BI__builtin_neon_vst1q_x4_v: { 6785 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType()); 6786 llvm::Type *Tys[2] = { VTy, PTy }; 6787 unsigned Int; 6788 switch (BuiltinID) { 6789 case NEON::BI__builtin_neon_vst1_x2_v: 6790 case NEON::BI__builtin_neon_vst1q_x2_v: 6791 Int = Intrinsic::aarch64_neon_st1x2; 6792 break; 6793 case NEON::BI__builtin_neon_vst1_x3_v: 6794 case NEON::BI__builtin_neon_vst1q_x3_v: 6795 Int = Intrinsic::aarch64_neon_st1x3; 6796 break; 6797 case NEON::BI__builtin_neon_vst1_x4_v: 6798 case NEON::BI__builtin_neon_vst1q_x4_v: 6799 Int = Intrinsic::aarch64_neon_st1x4; 6800 break; 6801 } 6802 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end()); 6803 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, ""); 6804 } 6805 case NEON::BI__builtin_neon_vld1_v: 6806 case NEON::BI__builtin_neon_vld1q_v: { 6807 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); 6808 auto Alignment = CharUnits::fromQuantity( 6809 BuiltinID == NEON::BI__builtin_neon_vld1_v ? 8 : 16); 6810 return Builder.CreateAlignedLoad(VTy, Ops[0], Alignment); 6811 } 6812 case NEON::BI__builtin_neon_vst1_v: 6813 case NEON::BI__builtin_neon_vst1q_v: 6814 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); 6815 Ops[1] = Builder.CreateBitCast(Ops[1], VTy); 6816 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6817 case NEON::BI__builtin_neon_vld1_lane_v: 6818 case NEON::BI__builtin_neon_vld1q_lane_v: { 6819 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6820 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 6821 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6822 auto Alignment = CharUnits::fromQuantity( 6823 BuiltinID == NEON::BI__builtin_neon_vld1_lane_v ? 8 : 16); 6824 Ops[0] = 6825 Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment); 6826 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane"); 6827 } 6828 case NEON::BI__builtin_neon_vld1_dup_v: 6829 case NEON::BI__builtin_neon_vld1q_dup_v: { 6830 Value *V = UndefValue::get(Ty); 6831 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 6832 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6833 auto Alignment = CharUnits::fromQuantity( 6834 BuiltinID == NEON::BI__builtin_neon_vld1_dup_v ? 8 : 16); 6835 Ops[0] = 6836 Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment); 6837 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); 6838 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI); 6839 return EmitNeonSplat(Ops[0], CI); 6840 } 6841 case NEON::BI__builtin_neon_vst1_lane_v: 6842 case NEON::BI__builtin_neon_vst1q_lane_v: 6843 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6844 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); 6845 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 6846 return Builder.CreateDefaultAlignedStore(Ops[1], 6847 Builder.CreateBitCast(Ops[0], Ty)); 6848 case NEON::BI__builtin_neon_vld2_v: 6849 case NEON::BI__builtin_neon_vld2q_v: { 6850 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 6851 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6852 llvm::Type *Tys[2] = { VTy, PTy }; 6853 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys); 6854 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); 6855 Ops[0] = Builder.CreateBitCast(Ops[0], 6856 llvm::PointerType::getUnqual(Ops[1]->getType())); 6857 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6858 } 6859 case NEON::BI__builtin_neon_vld3_v: 6860 case NEON::BI__builtin_neon_vld3q_v: { 6861 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 6862 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6863 llvm::Type *Tys[2] = { VTy, PTy }; 6864 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys); 6865 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); 6866 Ops[0] = Builder.CreateBitCast(Ops[0], 6867 llvm::PointerType::getUnqual(Ops[1]->getType())); 6868 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6869 } 6870 case NEON::BI__builtin_neon_vld4_v: 6871 case NEON::BI__builtin_neon_vld4q_v: { 6872 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 6873 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6874 llvm::Type *Tys[2] = { VTy, PTy }; 6875 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys); 6876 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); 6877 Ops[0] = Builder.CreateBitCast(Ops[0], 6878 llvm::PointerType::getUnqual(Ops[1]->getType())); 6879 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6880 } 6881 case NEON::BI__builtin_neon_vld2_dup_v: 6882 case NEON::BI__builtin_neon_vld2q_dup_v: { 6883 llvm::Type *PTy = 6884 llvm::PointerType::getUnqual(VTy->getElementType()); 6885 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6886 llvm::Type *Tys[2] = { VTy, PTy }; 6887 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys); 6888 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); 6889 Ops[0] = Builder.CreateBitCast(Ops[0], 6890 llvm::PointerType::getUnqual(Ops[1]->getType())); 6891 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6892 } 6893 case NEON::BI__builtin_neon_vld3_dup_v: 6894 case NEON::BI__builtin_neon_vld3q_dup_v: { 6895 llvm::Type *PTy = 6896 llvm::PointerType::getUnqual(VTy->getElementType()); 6897 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6898 llvm::Type *Tys[2] = { VTy, PTy }; 6899 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys); 6900 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); 6901 Ops[0] = Builder.CreateBitCast(Ops[0], 6902 llvm::PointerType::getUnqual(Ops[1]->getType())); 6903 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6904 } 6905 case NEON::BI__builtin_neon_vld4_dup_v: 6906 case NEON::BI__builtin_neon_vld4q_dup_v: { 6907 llvm::Type *PTy = 6908 llvm::PointerType::getUnqual(VTy->getElementType()); 6909 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6910 llvm::Type *Tys[2] = { VTy, PTy }; 6911 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys); 6912 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); 6913 Ops[0] = Builder.CreateBitCast(Ops[0], 6914 llvm::PointerType::getUnqual(Ops[1]->getType())); 6915 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6916 } 6917 case NEON::BI__builtin_neon_vld2_lane_v: 6918 case NEON::BI__builtin_neon_vld2q_lane_v: { 6919 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 6920 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys); 6921 Ops.push_back(Ops[1]); 6922 Ops.erase(Ops.begin()+1); 6923 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6924 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 6925 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty); 6926 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane"); 6927 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 6928 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6929 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6930 } 6931 case NEON::BI__builtin_neon_vld3_lane_v: 6932 case NEON::BI__builtin_neon_vld3q_lane_v: { 6933 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 6934 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys); 6935 Ops.push_back(Ops[1]); 6936 Ops.erase(Ops.begin()+1); 6937 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6938 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 6939 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 6940 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty); 6941 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane"); 6942 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 6943 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6944 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6945 } 6946 case NEON::BI__builtin_neon_vld4_lane_v: 6947 case NEON::BI__builtin_neon_vld4q_lane_v: { 6948 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 6949 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys); 6950 Ops.push_back(Ops[1]); 6951 Ops.erase(Ops.begin()+1); 6952 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6953 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 6954 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 6955 Ops[4] = Builder.CreateBitCast(Ops[4], Ty); 6956 Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty); 6957 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane"); 6958 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 6959 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6960 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6961 } 6962 case NEON::BI__builtin_neon_vst2_v: 6963 case NEON::BI__builtin_neon_vst2q_v: { 6964 Ops.push_back(Ops[0]); 6965 Ops.erase(Ops.begin()); 6966 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() }; 6967 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys), 6968 Ops, ""); 6969 } 6970 case NEON::BI__builtin_neon_vst2_lane_v: 6971 case NEON::BI__builtin_neon_vst2q_lane_v: { 6972 Ops.push_back(Ops[0]); 6973 Ops.erase(Ops.begin()); 6974 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty); 6975 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() }; 6976 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys), 6977 Ops, ""); 6978 } 6979 case NEON::BI__builtin_neon_vst3_v: 6980 case NEON::BI__builtin_neon_vst3q_v: { 6981 Ops.push_back(Ops[0]); 6982 Ops.erase(Ops.begin()); 6983 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() }; 6984 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys), 6985 Ops, ""); 6986 } 6987 case NEON::BI__builtin_neon_vst3_lane_v: 6988 case NEON::BI__builtin_neon_vst3q_lane_v: { 6989 Ops.push_back(Ops[0]); 6990 Ops.erase(Ops.begin()); 6991 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty); 6992 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() }; 6993 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys), 6994 Ops, ""); 6995 } 6996 case NEON::BI__builtin_neon_vst4_v: 6997 case NEON::BI__builtin_neon_vst4q_v: { 6998 Ops.push_back(Ops[0]); 6999 Ops.erase(Ops.begin()); 7000 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() }; 7001 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys), 7002 Ops, ""); 7003 } 7004 case NEON::BI__builtin_neon_vst4_lane_v: 7005 case NEON::BI__builtin_neon_vst4q_lane_v: { 7006 Ops.push_back(Ops[0]); 7007 Ops.erase(Ops.begin()); 7008 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty); 7009 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() }; 7010 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys), 7011 Ops, ""); 7012 } 7013 case NEON::BI__builtin_neon_vtrn_v: 7014 case NEON::BI__builtin_neon_vtrnq_v: { 7015 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 7016 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 7017 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 7018 Value *SV = nullptr; 7019 7020 for (unsigned vi = 0; vi != 2; ++vi) { 7021 SmallVector<uint32_t, 16> Indices; 7022 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 7023 Indices.push_back(i+vi); 7024 Indices.push_back(i+e+vi); 7025 } 7026 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 7027 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn"); 7028 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 7029 } 7030 return SV; 7031 } 7032 case NEON::BI__builtin_neon_vuzp_v: 7033 case NEON::BI__builtin_neon_vuzpq_v: { 7034 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 7035 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 7036 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 7037 Value *SV = nullptr; 7038 7039 for (unsigned vi = 0; vi != 2; ++vi) { 7040 SmallVector<uint32_t, 16> Indices; 7041 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 7042 Indices.push_back(2*i+vi); 7043 7044 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 7045 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp"); 7046 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 7047 } 7048 return SV; 7049 } 7050 case NEON::BI__builtin_neon_vzip_v: 7051 case NEON::BI__builtin_neon_vzipq_v: { 7052 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 7053 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 7054 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 7055 Value *SV = nullptr; 7056 7057 for (unsigned vi = 0; vi != 2; ++vi) { 7058 SmallVector<uint32_t, 16> Indices; 7059 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 7060 Indices.push_back((i + vi*e) >> 1); 7061 Indices.push_back(((i + vi*e) >> 1)+e); 7062 } 7063 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 7064 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip"); 7065 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 7066 } 7067 return SV; 7068 } 7069 case NEON::BI__builtin_neon_vqtbl1q_v: { 7070 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty), 7071 Ops, "vtbl1"); 7072 } 7073 case NEON::BI__builtin_neon_vqtbl2q_v: { 7074 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty), 7075 Ops, "vtbl2"); 7076 } 7077 case NEON::BI__builtin_neon_vqtbl3q_v: { 7078 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty), 7079 Ops, "vtbl3"); 7080 } 7081 case NEON::BI__builtin_neon_vqtbl4q_v: { 7082 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty), 7083 Ops, "vtbl4"); 7084 } 7085 case NEON::BI__builtin_neon_vqtbx1q_v: { 7086 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty), 7087 Ops, "vtbx1"); 7088 } 7089 case NEON::BI__builtin_neon_vqtbx2q_v: { 7090 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty), 7091 Ops, "vtbx2"); 7092 } 7093 case NEON::BI__builtin_neon_vqtbx3q_v: { 7094 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty), 7095 Ops, "vtbx3"); 7096 } 7097 case NEON::BI__builtin_neon_vqtbx4q_v: { 7098 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty), 7099 Ops, "vtbx4"); 7100 } 7101 case NEON::BI__builtin_neon_vsqadd_v: 7102 case NEON::BI__builtin_neon_vsqaddq_v: { 7103 Int = Intrinsic::aarch64_neon_usqadd; 7104 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd"); 7105 } 7106 case NEON::BI__builtin_neon_vuqadd_v: 7107 case NEON::BI__builtin_neon_vuqaddq_v: { 7108 Int = Intrinsic::aarch64_neon_suqadd; 7109 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd"); 7110 } 7111 } 7112 } 7113 7114 llvm::Value *CodeGenFunction:: 7115 BuildVector(ArrayRef<llvm::Value*> Ops) { 7116 assert((Ops.size() & (Ops.size() - 1)) == 0 && 7117 "Not a power-of-two sized vector!"); 7118 bool AllConstants = true; 7119 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i) 7120 AllConstants &= isa<Constant>(Ops[i]); 7121 7122 // If this is a constant vector, create a ConstantVector. 7123 if (AllConstants) { 7124 SmallVector<llvm::Constant*, 16> CstOps; 7125 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 7126 CstOps.push_back(cast<Constant>(Ops[i])); 7127 return llvm::ConstantVector::get(CstOps); 7128 } 7129 7130 // Otherwise, insertelement the values to build the vector. 7131 Value *Result = 7132 llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size())); 7133 7134 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 7135 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i)); 7136 7137 return Result; 7138 } 7139 7140 // Convert the mask from an integer type to a vector of i1. 7141 static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask, 7142 unsigned NumElts) { 7143 7144 llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(), 7145 cast<IntegerType>(Mask->getType())->getBitWidth()); 7146 Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy); 7147 7148 // If we have less than 8 elements, then the starting mask was an i8 and 7149 // we need to extract down to the right number of elements. 7150 if (NumElts < 8) { 7151 uint32_t Indices[4]; 7152 for (unsigned i = 0; i != NumElts; ++i) 7153 Indices[i] = i; 7154 MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec, 7155 makeArrayRef(Indices, NumElts), 7156 "extract"); 7157 } 7158 return MaskVec; 7159 } 7160 7161 static Value *EmitX86MaskedStore(CodeGenFunction &CGF, 7162 SmallVectorImpl<Value *> &Ops, 7163 unsigned Align) { 7164 // Cast the pointer to right type. 7165 Ops[0] = CGF.Builder.CreateBitCast(Ops[0], 7166 llvm::PointerType::getUnqual(Ops[1]->getType())); 7167 7168 // If the mask is all ones just emit a regular store. 7169 if (const auto *C = dyn_cast<Constant>(Ops[2])) 7170 if (C->isAllOnesValue()) 7171 return CGF.Builder.CreateAlignedStore(Ops[1], Ops[0], Align); 7172 7173 Value *MaskVec = getMaskVecValue(CGF, Ops[2], 7174 Ops[1]->getType()->getVectorNumElements()); 7175 7176 return CGF.Builder.CreateMaskedStore(Ops[1], Ops[0], Align, MaskVec); 7177 } 7178 7179 static Value *EmitX86MaskedLoad(CodeGenFunction &CGF, 7180 SmallVectorImpl<Value *> &Ops, unsigned Align) { 7181 // Cast the pointer to right type. 7182 Ops[0] = CGF.Builder.CreateBitCast(Ops[0], 7183 llvm::PointerType::getUnqual(Ops[1]->getType())); 7184 7185 // If the mask is all ones just emit a regular store. 7186 if (const auto *C = dyn_cast<Constant>(Ops[2])) 7187 if (C->isAllOnesValue()) 7188 return CGF.Builder.CreateAlignedLoad(Ops[0], Align); 7189 7190 Value *MaskVec = getMaskVecValue(CGF, Ops[2], 7191 Ops[1]->getType()->getVectorNumElements()); 7192 7193 return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]); 7194 } 7195 7196 static Value *EmitX86SubVectorBroadcast(CodeGenFunction &CGF, 7197 SmallVectorImpl<Value *> &Ops, 7198 llvm::Type *DstTy, 7199 unsigned SrcSizeInBits, 7200 unsigned Align) { 7201 // Load the subvector. 7202 Ops[0] = CGF.Builder.CreateAlignedLoad(Ops[0], Align); 7203 7204 // Create broadcast mask. 7205 unsigned NumDstElts = DstTy->getVectorNumElements(); 7206 unsigned NumSrcElts = SrcSizeInBits / DstTy->getScalarSizeInBits(); 7207 7208 SmallVector<uint32_t, 8> Mask; 7209 for (unsigned i = 0; i != NumDstElts; i += NumSrcElts) 7210 for (unsigned j = 0; j != NumSrcElts; ++j) 7211 Mask.push_back(j); 7212 7213 return CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], Mask, "subvecbcst"); 7214 } 7215 7216 static Value *EmitX86Select(CodeGenFunction &CGF, 7217 Value *Mask, Value *Op0, Value *Op1) { 7218 7219 // If the mask is all ones just return first argument. 7220 if (const auto *C = dyn_cast<Constant>(Mask)) 7221 if (C->isAllOnesValue()) 7222 return Op0; 7223 7224 Mask = getMaskVecValue(CGF, Mask, Op0->getType()->getVectorNumElements()); 7225 7226 return CGF.Builder.CreateSelect(Mask, Op0, Op1); 7227 } 7228 7229 static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, 7230 bool Signed, SmallVectorImpl<Value *> &Ops) { 7231 unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); 7232 Value *Cmp; 7233 7234 if (CC == 3) { 7235 Cmp = Constant::getNullValue( 7236 llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts)); 7237 } else if (CC == 7) { 7238 Cmp = Constant::getAllOnesValue( 7239 llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts)); 7240 } else { 7241 ICmpInst::Predicate Pred; 7242 switch (CC) { 7243 default: llvm_unreachable("Unknown condition code"); 7244 case 0: Pred = ICmpInst::ICMP_EQ; break; 7245 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break; 7246 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break; 7247 case 4: Pred = ICmpInst::ICMP_NE; break; 7248 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break; 7249 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break; 7250 } 7251 Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]); 7252 } 7253 7254 const auto *C = dyn_cast<Constant>(Ops.back()); 7255 if (!C || !C->isAllOnesValue()) 7256 Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops.back(), NumElts)); 7257 7258 if (NumElts < 8) { 7259 uint32_t Indices[8]; 7260 for (unsigned i = 0; i != NumElts; ++i) 7261 Indices[i] = i; 7262 for (unsigned i = NumElts; i != 8; ++i) 7263 Indices[i] = i % NumElts + NumElts; 7264 Cmp = CGF.Builder.CreateShuffleVector( 7265 Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices); 7266 } 7267 return CGF.Builder.CreateBitCast(Cmp, 7268 IntegerType::get(CGF.getLLVMContext(), 7269 std::max(NumElts, 8U))); 7270 } 7271 7272 static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred, 7273 ArrayRef<Value *> Ops) { 7274 Value *Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]); 7275 Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Ops[1]); 7276 7277 if (Ops.size() == 2) 7278 return Res; 7279 7280 assert(Ops.size() == 4); 7281 return EmitX86Select(CGF, Ops[3], Res, Ops[2]); 7282 } 7283 7284 static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, 7285 llvm::Type *DstTy) { 7286 unsigned NumberOfElements = DstTy->getVectorNumElements(); 7287 Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements); 7288 return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2"); 7289 } 7290 7291 static Value *EmitX86CpuIs(CodeGenFunction &CGF, const CallExpr *E) { 7292 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts(); 7293 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString(); 7294 7295 // This enum contains the vendor, type, and subtype enums from the 7296 // runtime library concatenated together. The _START labels mark 7297 // the start and are used to adjust the value into the correct 7298 // encoding space. 7299 enum X86CPUs { 7300 INTEL = 1, 7301 AMD, 7302 CPU_TYPE_START, 7303 INTEL_BONNELL, 7304 INTEL_CORE2, 7305 INTEL_COREI7, 7306 AMDFAM10H, 7307 AMDFAM15H, 7308 INTEL_SILVERMONT, 7309 INTEL_KNL, 7310 AMD_BTVER1, 7311 AMD_BTVER2, 7312 CPU_SUBTYPE_START, 7313 INTEL_COREI7_NEHALEM, 7314 INTEL_COREI7_WESTMERE, 7315 INTEL_COREI7_SANDYBRIDGE, 7316 AMDFAM10H_BARCELONA, 7317 AMDFAM10H_SHANGHAI, 7318 AMDFAM10H_ISTANBUL, 7319 AMDFAM15H_BDVER1, 7320 AMDFAM15H_BDVER2, 7321 AMDFAM15H_BDVER3, 7322 AMDFAM15H_BDVER4, 7323 AMDFAM17H_ZNVER1, 7324 INTEL_COREI7_IVYBRIDGE, 7325 INTEL_COREI7_HASWELL, 7326 INTEL_COREI7_BROADWELL, 7327 INTEL_COREI7_SKYLAKE, 7328 INTEL_COREI7_SKYLAKE_AVX512, 7329 }; 7330 7331 X86CPUs CPU = 7332 StringSwitch<X86CPUs>(CPUStr) 7333 .Case("amd", AMD) 7334 .Case("amdfam10h", AMDFAM10H) 7335 .Case("amdfam15h", AMDFAM15H) 7336 .Case("atom", INTEL_BONNELL) 7337 .Case("barcelona", AMDFAM10H_BARCELONA) 7338 .Case("bdver1", AMDFAM15H_BDVER1) 7339 .Case("bdver2", AMDFAM15H_BDVER2) 7340 .Case("bdver3", AMDFAM15H_BDVER3) 7341 .Case("bdver4", AMDFAM15H_BDVER4) 7342 .Case("bonnell", INTEL_BONNELL) 7343 .Case("broadwell", INTEL_COREI7_BROADWELL) 7344 .Case("btver1", AMD_BTVER1) 7345 .Case("btver2", AMD_BTVER2) 7346 .Case("core2", INTEL_CORE2) 7347 .Case("corei7", INTEL_COREI7) 7348 .Case("haswell", INTEL_COREI7_HASWELL) 7349 .Case("intel", INTEL) 7350 .Case("istanbul", AMDFAM10H_ISTANBUL) 7351 .Case("ivybridge", INTEL_COREI7_IVYBRIDGE) 7352 .Case("knl", INTEL_KNL) 7353 .Case("nehalem", INTEL_COREI7_NEHALEM) 7354 .Case("sandybridge", INTEL_COREI7_SANDYBRIDGE) 7355 .Case("shanghai", AMDFAM10H_SHANGHAI) 7356 .Case("silvermont", INTEL_SILVERMONT) 7357 .Case("skylake", INTEL_COREI7_SKYLAKE) 7358 .Case("skylake-avx512", INTEL_COREI7_SKYLAKE_AVX512) 7359 .Case("slm", INTEL_SILVERMONT) 7360 .Case("westmere", INTEL_COREI7_WESTMERE) 7361 .Case("znver1", AMDFAM17H_ZNVER1); 7362 7363 llvm::Type *Int32Ty = CGF.Builder.getInt32Ty(); 7364 7365 // Matching the struct layout from the compiler-rt/libgcc structure that is 7366 // filled in: 7367 // unsigned int __cpu_vendor; 7368 // unsigned int __cpu_type; 7369 // unsigned int __cpu_subtype; 7370 // unsigned int __cpu_features[1]; 7371 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, 7372 llvm::ArrayType::get(Int32Ty, 1)); 7373 7374 // Grab the global __cpu_model. 7375 llvm::Constant *CpuModel = CGF.CGM.CreateRuntimeVariable(STy, "__cpu_model"); 7376 7377 // Calculate the index needed to access the correct field based on the 7378 // range. Also adjust the expected value. 7379 unsigned Index; 7380 unsigned Value; 7381 if (CPU > CPU_SUBTYPE_START) { 7382 Index = 2; 7383 Value = CPU - CPU_SUBTYPE_START; 7384 } else if (CPU > CPU_TYPE_START) { 7385 Index = 1; 7386 Value = CPU - CPU_TYPE_START; 7387 } else { 7388 Index = 0; 7389 Value = CPU; 7390 } 7391 7392 // Grab the appropriate field from __cpu_model. 7393 llvm::Value *Idxs[] = { 7394 ConstantInt::get(Int32Ty, 0), 7395 ConstantInt::get(Int32Ty, Index) 7396 }; 7397 llvm::Value *CpuValue = CGF.Builder.CreateGEP(STy, CpuModel, Idxs); 7398 CpuValue = CGF.Builder.CreateAlignedLoad(CpuValue, CharUnits::fromQuantity(4)); 7399 7400 // Check the value of the field against the requested value. 7401 return CGF.Builder.CreateICmpEQ(CpuValue, 7402 llvm::ConstantInt::get(Int32Ty, Value)); 7403 } 7404 7405 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, 7406 const CallExpr *E) { 7407 if (BuiltinID == X86::BI__builtin_cpu_is) 7408 return EmitX86CpuIs(*this, E); 7409 7410 SmallVector<Value*, 4> Ops; 7411 7412 // Find out if any arguments are required to be integer constant expressions. 7413 unsigned ICEArguments = 0; 7414 ASTContext::GetBuiltinTypeError Error; 7415 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 7416 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 7417 7418 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { 7419 // If this is a normal argument, just emit it as a scalar. 7420 if ((ICEArguments & (1 << i)) == 0) { 7421 Ops.push_back(EmitScalarExpr(E->getArg(i))); 7422 continue; 7423 } 7424 7425 // If this is required to be a constant, constant fold it so that we know 7426 // that the generated intrinsic gets a ConstantInt. 7427 llvm::APSInt Result; 7428 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); 7429 assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst; 7430 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); 7431 } 7432 7433 // These exist so that the builtin that takes an immediate can be bounds 7434 // checked by clang to avoid passing bad immediates to the backend. Since 7435 // AVX has a larger immediate than SSE we would need separate builtins to 7436 // do the different bounds checking. Rather than create a clang specific 7437 // SSE only builtin, this implements eight separate builtins to match gcc 7438 // implementation. 7439 auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) { 7440 Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm)); 7441 llvm::Function *F = CGM.getIntrinsic(ID); 7442 return Builder.CreateCall(F, Ops); 7443 }; 7444 7445 // For the vector forms of FP comparisons, translate the builtins directly to 7446 // IR. 7447 // TODO: The builtins could be removed if the SSE header files used vector 7448 // extension comparisons directly (vector ordered/unordered may need 7449 // additional support via __builtin_isnan()). 7450 auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred) { 7451 Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]); 7452 llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType()); 7453 llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy); 7454 Value *Sext = Builder.CreateSExt(Cmp, IntVecTy); 7455 return Builder.CreateBitCast(Sext, FPVecTy); 7456 }; 7457 7458 switch (BuiltinID) { 7459 default: return nullptr; 7460 case X86::BI__builtin_cpu_supports: { 7461 const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts(); 7462 StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString(); 7463 7464 // TODO: When/if this becomes more than x86 specific then use a TargetInfo 7465 // based mapping. 7466 // Processor features and mapping to processor feature value. 7467 enum X86Features { 7468 CMOV = 0, 7469 MMX, 7470 POPCNT, 7471 SSE, 7472 SSE2, 7473 SSE3, 7474 SSSE3, 7475 SSE4_1, 7476 SSE4_2, 7477 AVX, 7478 AVX2, 7479 SSE4_A, 7480 FMA4, 7481 XOP, 7482 FMA, 7483 AVX512F, 7484 BMI, 7485 BMI2, 7486 AES, 7487 PCLMUL, 7488 AVX512VL, 7489 AVX512BW, 7490 AVX512DQ, 7491 AVX512CD, 7492 AVX512ER, 7493 AVX512PF, 7494 AVX512VBMI, 7495 AVX512IFMA, 7496 AVX5124VNNIW, 7497 AVX5124FMAPS, 7498 AVX512VPOPCNTDQ, 7499 MAX 7500 }; 7501 7502 X86Features Feature = 7503 StringSwitch<X86Features>(FeatureStr) 7504 .Case("cmov", X86Features::CMOV) 7505 .Case("mmx", X86Features::MMX) 7506 .Case("popcnt", X86Features::POPCNT) 7507 .Case("sse", X86Features::SSE) 7508 .Case("sse2", X86Features::SSE2) 7509 .Case("sse3", X86Features::SSE3) 7510 .Case("ssse3", X86Features::SSSE3) 7511 .Case("sse4.1", X86Features::SSE4_1) 7512 .Case("sse4.2", X86Features::SSE4_2) 7513 .Case("avx", X86Features::AVX) 7514 .Case("avx2", X86Features::AVX2) 7515 .Case("sse4a", X86Features::SSE4_A) 7516 .Case("fma4", X86Features::FMA4) 7517 .Case("xop", X86Features::XOP) 7518 .Case("fma", X86Features::FMA) 7519 .Case("avx512f", X86Features::AVX512F) 7520 .Case("bmi", X86Features::BMI) 7521 .Case("bmi2", X86Features::BMI2) 7522 .Case("aes", X86Features::AES) 7523 .Case("pclmul", X86Features::PCLMUL) 7524 .Case("avx512vl", X86Features::AVX512VL) 7525 .Case("avx512bw", X86Features::AVX512BW) 7526 .Case("avx512dq", X86Features::AVX512DQ) 7527 .Case("avx512cd", X86Features::AVX512CD) 7528 .Case("avx512er", X86Features::AVX512ER) 7529 .Case("avx512pf", X86Features::AVX512PF) 7530 .Case("avx512vbmi", X86Features::AVX512VBMI) 7531 .Case("avx512ifma", X86Features::AVX512IFMA) 7532 .Case("avx5124vnniw", X86Features::AVX5124VNNIW) 7533 .Case("avx5124fmaps", X86Features::AVX5124FMAPS) 7534 .Case("avx512vpopcntdq", X86Features::AVX512VPOPCNTDQ) 7535 .Default(X86Features::MAX); 7536 assert(Feature != X86Features::MAX && "Invalid feature!"); 7537 7538 // Matching the struct layout from the compiler-rt/libgcc structure that is 7539 // filled in: 7540 // unsigned int __cpu_vendor; 7541 // unsigned int __cpu_type; 7542 // unsigned int __cpu_subtype; 7543 // unsigned int __cpu_features[1]; 7544 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, 7545 llvm::ArrayType::get(Int32Ty, 1)); 7546 7547 // Grab the global __cpu_model. 7548 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model"); 7549 7550 // Grab the first (0th) element from the field __cpu_features off of the 7551 // global in the struct STy. 7552 Value *Idxs[] = { 7553 ConstantInt::get(Int32Ty, 0), 7554 ConstantInt::get(Int32Ty, 3), 7555 ConstantInt::get(Int32Ty, 0) 7556 }; 7557 Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs); 7558 Value *Features = Builder.CreateAlignedLoad(CpuFeatures, 7559 CharUnits::fromQuantity(4)); 7560 7561 // Check the value of the bit corresponding to the feature requested. 7562 Value *Bitset = Builder.CreateAnd( 7563 Features, llvm::ConstantInt::get(Int32Ty, 1ULL << Feature)); 7564 return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0)); 7565 } 7566 case X86::BI_mm_prefetch: { 7567 Value *Address = Ops[0]; 7568 Value *RW = ConstantInt::get(Int32Ty, 0); 7569 Value *Locality = Ops[1]; 7570 Value *Data = ConstantInt::get(Int32Ty, 1); 7571 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 7572 return Builder.CreateCall(F, {Address, RW, Locality, Data}); 7573 } 7574 case X86::BI_mm_clflush: { 7575 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush), 7576 Ops[0]); 7577 } 7578 case X86::BI_mm_lfence: { 7579 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence)); 7580 } 7581 case X86::BI_mm_mfence: { 7582 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence)); 7583 } 7584 case X86::BI_mm_sfence: { 7585 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence)); 7586 } 7587 case X86::BI_mm_pause: { 7588 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause)); 7589 } 7590 case X86::BI__rdtsc: { 7591 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc)); 7592 } 7593 case X86::BI__builtin_ia32_undef128: 7594 case X86::BI__builtin_ia32_undef256: 7595 case X86::BI__builtin_ia32_undef512: 7596 // The x86 definition of "undef" is not the same as the LLVM definition 7597 // (PR32176). We leave optimizing away an unnecessary zero constant to the 7598 // IR optimizer and backend. 7599 // TODO: If we had a "freeze" IR instruction to generate a fixed undef 7600 // value, we should use that here instead of a zero. 7601 return llvm::Constant::getNullValue(ConvertType(E->getType())); 7602 case X86::BI__builtin_ia32_vec_init_v8qi: 7603 case X86::BI__builtin_ia32_vec_init_v4hi: 7604 case X86::BI__builtin_ia32_vec_init_v2si: 7605 return Builder.CreateBitCast(BuildVector(Ops), 7606 llvm::Type::getX86_MMXTy(getLLVMContext())); 7607 case X86::BI__builtin_ia32_vec_ext_v2si: 7608 return Builder.CreateExtractElement(Ops[0], 7609 llvm::ConstantInt::get(Ops[1]->getType(), 0)); 7610 case X86::BI_mm_setcsr: 7611 case X86::BI__builtin_ia32_ldmxcsr: { 7612 Address Tmp = CreateMemTemp(E->getArg(0)->getType()); 7613 Builder.CreateStore(Ops[0], Tmp); 7614 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr), 7615 Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy)); 7616 } 7617 case X86::BI_mm_getcsr: 7618 case X86::BI__builtin_ia32_stmxcsr: { 7619 Address Tmp = CreateMemTemp(E->getType()); 7620 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr), 7621 Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy)); 7622 return Builder.CreateLoad(Tmp, "stmxcsr"); 7623 } 7624 case X86::BI__builtin_ia32_xsave: 7625 case X86::BI__builtin_ia32_xsave64: 7626 case X86::BI__builtin_ia32_xrstor: 7627 case X86::BI__builtin_ia32_xrstor64: 7628 case X86::BI__builtin_ia32_xsaveopt: 7629 case X86::BI__builtin_ia32_xsaveopt64: 7630 case X86::BI__builtin_ia32_xrstors: 7631 case X86::BI__builtin_ia32_xrstors64: 7632 case X86::BI__builtin_ia32_xsavec: 7633 case X86::BI__builtin_ia32_xsavec64: 7634 case X86::BI__builtin_ia32_xsaves: 7635 case X86::BI__builtin_ia32_xsaves64: { 7636 Intrinsic::ID ID; 7637 #define INTRINSIC_X86_XSAVE_ID(NAME) \ 7638 case X86::BI__builtin_ia32_##NAME: \ 7639 ID = Intrinsic::x86_##NAME; \ 7640 break 7641 switch (BuiltinID) { 7642 default: llvm_unreachable("Unsupported intrinsic!"); 7643 INTRINSIC_X86_XSAVE_ID(xsave); 7644 INTRINSIC_X86_XSAVE_ID(xsave64); 7645 INTRINSIC_X86_XSAVE_ID(xrstor); 7646 INTRINSIC_X86_XSAVE_ID(xrstor64); 7647 INTRINSIC_X86_XSAVE_ID(xsaveopt); 7648 INTRINSIC_X86_XSAVE_ID(xsaveopt64); 7649 INTRINSIC_X86_XSAVE_ID(xrstors); 7650 INTRINSIC_X86_XSAVE_ID(xrstors64); 7651 INTRINSIC_X86_XSAVE_ID(xsavec); 7652 INTRINSIC_X86_XSAVE_ID(xsavec64); 7653 INTRINSIC_X86_XSAVE_ID(xsaves); 7654 INTRINSIC_X86_XSAVE_ID(xsaves64); 7655 } 7656 #undef INTRINSIC_X86_XSAVE_ID 7657 Value *Mhi = Builder.CreateTrunc( 7658 Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty); 7659 Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty); 7660 Ops[1] = Mhi; 7661 Ops.push_back(Mlo); 7662 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); 7663 } 7664 case X86::BI__builtin_ia32_storedqudi128_mask: 7665 case X86::BI__builtin_ia32_storedqusi128_mask: 7666 case X86::BI__builtin_ia32_storedquhi128_mask: 7667 case X86::BI__builtin_ia32_storedquqi128_mask: 7668 case X86::BI__builtin_ia32_storeupd128_mask: 7669 case X86::BI__builtin_ia32_storeups128_mask: 7670 case X86::BI__builtin_ia32_storedqudi256_mask: 7671 case X86::BI__builtin_ia32_storedqusi256_mask: 7672 case X86::BI__builtin_ia32_storedquhi256_mask: 7673 case X86::BI__builtin_ia32_storedquqi256_mask: 7674 case X86::BI__builtin_ia32_storeupd256_mask: 7675 case X86::BI__builtin_ia32_storeups256_mask: 7676 case X86::BI__builtin_ia32_storedqudi512_mask: 7677 case X86::BI__builtin_ia32_storedqusi512_mask: 7678 case X86::BI__builtin_ia32_storedquhi512_mask: 7679 case X86::BI__builtin_ia32_storedquqi512_mask: 7680 case X86::BI__builtin_ia32_storeupd512_mask: 7681 case X86::BI__builtin_ia32_storeups512_mask: 7682 return EmitX86MaskedStore(*this, Ops, 1); 7683 7684 case X86::BI__builtin_ia32_storess128_mask: 7685 case X86::BI__builtin_ia32_storesd128_mask: { 7686 return EmitX86MaskedStore(*this, Ops, 16); 7687 } 7688 case X86::BI__builtin_ia32_vpopcntd_512: 7689 case X86::BI__builtin_ia32_vpopcntq_512: { 7690 llvm::Type *ResultType = ConvertType(E->getType()); 7691 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); 7692 return Builder.CreateCall(F, Ops); 7693 } 7694 case X86::BI__builtin_ia32_cvtmask2b128: 7695 case X86::BI__builtin_ia32_cvtmask2b256: 7696 case X86::BI__builtin_ia32_cvtmask2b512: 7697 case X86::BI__builtin_ia32_cvtmask2w128: 7698 case X86::BI__builtin_ia32_cvtmask2w256: 7699 case X86::BI__builtin_ia32_cvtmask2w512: 7700 case X86::BI__builtin_ia32_cvtmask2d128: 7701 case X86::BI__builtin_ia32_cvtmask2d256: 7702 case X86::BI__builtin_ia32_cvtmask2d512: 7703 case X86::BI__builtin_ia32_cvtmask2q128: 7704 case X86::BI__builtin_ia32_cvtmask2q256: 7705 case X86::BI__builtin_ia32_cvtmask2q512: 7706 return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType())); 7707 7708 case X86::BI__builtin_ia32_movdqa32store128_mask: 7709 case X86::BI__builtin_ia32_movdqa64store128_mask: 7710 case X86::BI__builtin_ia32_storeaps128_mask: 7711 case X86::BI__builtin_ia32_storeapd128_mask: 7712 case X86::BI__builtin_ia32_movdqa32store256_mask: 7713 case X86::BI__builtin_ia32_movdqa64store256_mask: 7714 case X86::BI__builtin_ia32_storeaps256_mask: 7715 case X86::BI__builtin_ia32_storeapd256_mask: 7716 case X86::BI__builtin_ia32_movdqa32store512_mask: 7717 case X86::BI__builtin_ia32_movdqa64store512_mask: 7718 case X86::BI__builtin_ia32_storeaps512_mask: 7719 case X86::BI__builtin_ia32_storeapd512_mask: { 7720 unsigned Align = 7721 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity(); 7722 return EmitX86MaskedStore(*this, Ops, Align); 7723 } 7724 case X86::BI__builtin_ia32_loadups128_mask: 7725 case X86::BI__builtin_ia32_loadups256_mask: 7726 case X86::BI__builtin_ia32_loadups512_mask: 7727 case X86::BI__builtin_ia32_loadupd128_mask: 7728 case X86::BI__builtin_ia32_loadupd256_mask: 7729 case X86::BI__builtin_ia32_loadupd512_mask: 7730 case X86::BI__builtin_ia32_loaddquqi128_mask: 7731 case X86::BI__builtin_ia32_loaddquqi256_mask: 7732 case X86::BI__builtin_ia32_loaddquqi512_mask: 7733 case X86::BI__builtin_ia32_loaddquhi128_mask: 7734 case X86::BI__builtin_ia32_loaddquhi256_mask: 7735 case X86::BI__builtin_ia32_loaddquhi512_mask: 7736 case X86::BI__builtin_ia32_loaddqusi128_mask: 7737 case X86::BI__builtin_ia32_loaddqusi256_mask: 7738 case X86::BI__builtin_ia32_loaddqusi512_mask: 7739 case X86::BI__builtin_ia32_loaddqudi128_mask: 7740 case X86::BI__builtin_ia32_loaddqudi256_mask: 7741 case X86::BI__builtin_ia32_loaddqudi512_mask: 7742 return EmitX86MaskedLoad(*this, Ops, 1); 7743 7744 case X86::BI__builtin_ia32_loadss128_mask: 7745 case X86::BI__builtin_ia32_loadsd128_mask: 7746 return EmitX86MaskedLoad(*this, Ops, 16); 7747 7748 case X86::BI__builtin_ia32_loadaps128_mask: 7749 case X86::BI__builtin_ia32_loadaps256_mask: 7750 case X86::BI__builtin_ia32_loadaps512_mask: 7751 case X86::BI__builtin_ia32_loadapd128_mask: 7752 case X86::BI__builtin_ia32_loadapd256_mask: 7753 case X86::BI__builtin_ia32_loadapd512_mask: 7754 case X86::BI__builtin_ia32_movdqa32load128_mask: 7755 case X86::BI__builtin_ia32_movdqa32load256_mask: 7756 case X86::BI__builtin_ia32_movdqa32load512_mask: 7757 case X86::BI__builtin_ia32_movdqa64load128_mask: 7758 case X86::BI__builtin_ia32_movdqa64load256_mask: 7759 case X86::BI__builtin_ia32_movdqa64load512_mask: { 7760 unsigned Align = 7761 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity(); 7762 return EmitX86MaskedLoad(*this, Ops, Align); 7763 } 7764 7765 case X86::BI__builtin_ia32_vbroadcastf128_pd256: 7766 case X86::BI__builtin_ia32_vbroadcastf128_ps256: { 7767 llvm::Type *DstTy = ConvertType(E->getType()); 7768 return EmitX86SubVectorBroadcast(*this, Ops, DstTy, 128, 1); 7769 } 7770 7771 case X86::BI__builtin_ia32_storehps: 7772 case X86::BI__builtin_ia32_storelps: { 7773 llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty); 7774 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2); 7775 7776 // cast val v2i64 7777 Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast"); 7778 7779 // extract (0, 1) 7780 unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1; 7781 llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index); 7782 Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract"); 7783 7784 // cast pointer to i64 & store 7785 Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy); 7786 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 7787 } 7788 case X86::BI__builtin_ia32_palignr128: 7789 case X86::BI__builtin_ia32_palignr256: 7790 case X86::BI__builtin_ia32_palignr512_mask: { 7791 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 7792 7793 unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); 7794 assert(NumElts % 16 == 0); 7795 7796 // If palignr is shifting the pair of vectors more than the size of two 7797 // lanes, emit zero. 7798 if (ShiftVal >= 32) 7799 return llvm::Constant::getNullValue(ConvertType(E->getType())); 7800 7801 // If palignr is shifting the pair of input vectors more than one lane, 7802 // but less than two lanes, convert to shifting in zeroes. 7803 if (ShiftVal > 16) { 7804 ShiftVal -= 16; 7805 Ops[1] = Ops[0]; 7806 Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType()); 7807 } 7808 7809 uint32_t Indices[64]; 7810 // 256-bit palignr operates on 128-bit lanes so we need to handle that 7811 for (unsigned l = 0; l != NumElts; l += 16) { 7812 for (unsigned i = 0; i != 16; ++i) { 7813 unsigned Idx = ShiftVal + i; 7814 if (Idx >= 16) 7815 Idx += NumElts - 16; // End of lane, switch operand. 7816 Indices[l + i] = Idx + l; 7817 } 7818 } 7819 7820 Value *Align = Builder.CreateShuffleVector(Ops[1], Ops[0], 7821 makeArrayRef(Indices, NumElts), 7822 "palignr"); 7823 7824 // If this isn't a masked builtin, just return the align operation. 7825 if (Ops.size() == 3) 7826 return Align; 7827 7828 return EmitX86Select(*this, Ops[4], Align, Ops[3]); 7829 } 7830 7831 case X86::BI__builtin_ia32_movnti: 7832 case X86::BI__builtin_ia32_movnti64: 7833 case X86::BI__builtin_ia32_movntsd: 7834 case X86::BI__builtin_ia32_movntss: { 7835 llvm::MDNode *Node = llvm::MDNode::get( 7836 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1))); 7837 7838 Value *Ptr = Ops[0]; 7839 Value *Src = Ops[1]; 7840 7841 // Extract the 0'th element of the source vector. 7842 if (BuiltinID == X86::BI__builtin_ia32_movntsd || 7843 BuiltinID == X86::BI__builtin_ia32_movntss) 7844 Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract"); 7845 7846 // Convert the type of the pointer to a pointer to the stored type. 7847 Value *BC = Builder.CreateBitCast( 7848 Ptr, llvm::PointerType::getUnqual(Src->getType()), "cast"); 7849 7850 // Unaligned nontemporal store of the scalar value. 7851 StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, BC); 7852 SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); 7853 SI->setAlignment(1); 7854 return SI; 7855 } 7856 7857 case X86::BI__builtin_ia32_selectb_128: 7858 case X86::BI__builtin_ia32_selectb_256: 7859 case X86::BI__builtin_ia32_selectb_512: 7860 case X86::BI__builtin_ia32_selectw_128: 7861 case X86::BI__builtin_ia32_selectw_256: 7862 case X86::BI__builtin_ia32_selectw_512: 7863 case X86::BI__builtin_ia32_selectd_128: 7864 case X86::BI__builtin_ia32_selectd_256: 7865 case X86::BI__builtin_ia32_selectd_512: 7866 case X86::BI__builtin_ia32_selectq_128: 7867 case X86::BI__builtin_ia32_selectq_256: 7868 case X86::BI__builtin_ia32_selectq_512: 7869 case X86::BI__builtin_ia32_selectps_128: 7870 case X86::BI__builtin_ia32_selectps_256: 7871 case X86::BI__builtin_ia32_selectps_512: 7872 case X86::BI__builtin_ia32_selectpd_128: 7873 case X86::BI__builtin_ia32_selectpd_256: 7874 case X86::BI__builtin_ia32_selectpd_512: 7875 return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]); 7876 case X86::BI__builtin_ia32_pcmpeqb128_mask: 7877 case X86::BI__builtin_ia32_pcmpeqb256_mask: 7878 case X86::BI__builtin_ia32_pcmpeqb512_mask: 7879 case X86::BI__builtin_ia32_pcmpeqw128_mask: 7880 case X86::BI__builtin_ia32_pcmpeqw256_mask: 7881 case X86::BI__builtin_ia32_pcmpeqw512_mask: 7882 case X86::BI__builtin_ia32_pcmpeqd128_mask: 7883 case X86::BI__builtin_ia32_pcmpeqd256_mask: 7884 case X86::BI__builtin_ia32_pcmpeqd512_mask: 7885 case X86::BI__builtin_ia32_pcmpeqq128_mask: 7886 case X86::BI__builtin_ia32_pcmpeqq256_mask: 7887 case X86::BI__builtin_ia32_pcmpeqq512_mask: 7888 return EmitX86MaskedCompare(*this, 0, false, Ops); 7889 case X86::BI__builtin_ia32_pcmpgtb128_mask: 7890 case X86::BI__builtin_ia32_pcmpgtb256_mask: 7891 case X86::BI__builtin_ia32_pcmpgtb512_mask: 7892 case X86::BI__builtin_ia32_pcmpgtw128_mask: 7893 case X86::BI__builtin_ia32_pcmpgtw256_mask: 7894 case X86::BI__builtin_ia32_pcmpgtw512_mask: 7895 case X86::BI__builtin_ia32_pcmpgtd128_mask: 7896 case X86::BI__builtin_ia32_pcmpgtd256_mask: 7897 case X86::BI__builtin_ia32_pcmpgtd512_mask: 7898 case X86::BI__builtin_ia32_pcmpgtq128_mask: 7899 case X86::BI__builtin_ia32_pcmpgtq256_mask: 7900 case X86::BI__builtin_ia32_pcmpgtq512_mask: 7901 return EmitX86MaskedCompare(*this, 6, true, Ops); 7902 case X86::BI__builtin_ia32_cmpb128_mask: 7903 case X86::BI__builtin_ia32_cmpb256_mask: 7904 case X86::BI__builtin_ia32_cmpb512_mask: 7905 case X86::BI__builtin_ia32_cmpw128_mask: 7906 case X86::BI__builtin_ia32_cmpw256_mask: 7907 case X86::BI__builtin_ia32_cmpw512_mask: 7908 case X86::BI__builtin_ia32_cmpd128_mask: 7909 case X86::BI__builtin_ia32_cmpd256_mask: 7910 case X86::BI__builtin_ia32_cmpd512_mask: 7911 case X86::BI__builtin_ia32_cmpq128_mask: 7912 case X86::BI__builtin_ia32_cmpq256_mask: 7913 case X86::BI__builtin_ia32_cmpq512_mask: { 7914 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7; 7915 return EmitX86MaskedCompare(*this, CC, true, Ops); 7916 } 7917 case X86::BI__builtin_ia32_ucmpb128_mask: 7918 case X86::BI__builtin_ia32_ucmpb256_mask: 7919 case X86::BI__builtin_ia32_ucmpb512_mask: 7920 case X86::BI__builtin_ia32_ucmpw128_mask: 7921 case X86::BI__builtin_ia32_ucmpw256_mask: 7922 case X86::BI__builtin_ia32_ucmpw512_mask: 7923 case X86::BI__builtin_ia32_ucmpd128_mask: 7924 case X86::BI__builtin_ia32_ucmpd256_mask: 7925 case X86::BI__builtin_ia32_ucmpd512_mask: 7926 case X86::BI__builtin_ia32_ucmpq128_mask: 7927 case X86::BI__builtin_ia32_ucmpq256_mask: 7928 case X86::BI__builtin_ia32_ucmpq512_mask: { 7929 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7; 7930 return EmitX86MaskedCompare(*this, CC, false, Ops); 7931 } 7932 7933 case X86::BI__builtin_ia32_vplzcntd_128_mask: 7934 case X86::BI__builtin_ia32_vplzcntd_256_mask: 7935 case X86::BI__builtin_ia32_vplzcntd_512_mask: 7936 case X86::BI__builtin_ia32_vplzcntq_128_mask: 7937 case X86::BI__builtin_ia32_vplzcntq_256_mask: 7938 case X86::BI__builtin_ia32_vplzcntq_512_mask: { 7939 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType()); 7940 return EmitX86Select(*this, Ops[2], 7941 Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)}), 7942 Ops[1]); 7943 } 7944 7945 case X86::BI__builtin_ia32_pmaxsb128: 7946 case X86::BI__builtin_ia32_pmaxsw128: 7947 case X86::BI__builtin_ia32_pmaxsd128: 7948 case X86::BI__builtin_ia32_pmaxsq128_mask: 7949 case X86::BI__builtin_ia32_pmaxsb256: 7950 case X86::BI__builtin_ia32_pmaxsw256: 7951 case X86::BI__builtin_ia32_pmaxsd256: 7952 case X86::BI__builtin_ia32_pmaxsq256_mask: 7953 case X86::BI__builtin_ia32_pmaxsb512_mask: 7954 case X86::BI__builtin_ia32_pmaxsw512_mask: 7955 case X86::BI__builtin_ia32_pmaxsd512_mask: 7956 case X86::BI__builtin_ia32_pmaxsq512_mask: 7957 return EmitX86MinMax(*this, ICmpInst::ICMP_SGT, Ops); 7958 case X86::BI__builtin_ia32_pmaxub128: 7959 case X86::BI__builtin_ia32_pmaxuw128: 7960 case X86::BI__builtin_ia32_pmaxud128: 7961 case X86::BI__builtin_ia32_pmaxuq128_mask: 7962 case X86::BI__builtin_ia32_pmaxub256: 7963 case X86::BI__builtin_ia32_pmaxuw256: 7964 case X86::BI__builtin_ia32_pmaxud256: 7965 case X86::BI__builtin_ia32_pmaxuq256_mask: 7966 case X86::BI__builtin_ia32_pmaxub512_mask: 7967 case X86::BI__builtin_ia32_pmaxuw512_mask: 7968 case X86::BI__builtin_ia32_pmaxud512_mask: 7969 case X86::BI__builtin_ia32_pmaxuq512_mask: 7970 return EmitX86MinMax(*this, ICmpInst::ICMP_UGT, Ops); 7971 case X86::BI__builtin_ia32_pminsb128: 7972 case X86::BI__builtin_ia32_pminsw128: 7973 case X86::BI__builtin_ia32_pminsd128: 7974 case X86::BI__builtin_ia32_pminsq128_mask: 7975 case X86::BI__builtin_ia32_pminsb256: 7976 case X86::BI__builtin_ia32_pminsw256: 7977 case X86::BI__builtin_ia32_pminsd256: 7978 case X86::BI__builtin_ia32_pminsq256_mask: 7979 case X86::BI__builtin_ia32_pminsb512_mask: 7980 case X86::BI__builtin_ia32_pminsw512_mask: 7981 case X86::BI__builtin_ia32_pminsd512_mask: 7982 case X86::BI__builtin_ia32_pminsq512_mask: 7983 return EmitX86MinMax(*this, ICmpInst::ICMP_SLT, Ops); 7984 case X86::BI__builtin_ia32_pminub128: 7985 case X86::BI__builtin_ia32_pminuw128: 7986 case X86::BI__builtin_ia32_pminud128: 7987 case X86::BI__builtin_ia32_pminuq128_mask: 7988 case X86::BI__builtin_ia32_pminub256: 7989 case X86::BI__builtin_ia32_pminuw256: 7990 case X86::BI__builtin_ia32_pminud256: 7991 case X86::BI__builtin_ia32_pminuq256_mask: 7992 case X86::BI__builtin_ia32_pminub512_mask: 7993 case X86::BI__builtin_ia32_pminuw512_mask: 7994 case X86::BI__builtin_ia32_pminud512_mask: 7995 case X86::BI__builtin_ia32_pminuq512_mask: 7996 return EmitX86MinMax(*this, ICmpInst::ICMP_ULT, Ops); 7997 7998 // 3DNow! 7999 case X86::BI__builtin_ia32_pswapdsf: 8000 case X86::BI__builtin_ia32_pswapdsi: { 8001 llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext()); 8002 Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast"); 8003 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd); 8004 return Builder.CreateCall(F, Ops, "pswapd"); 8005 } 8006 case X86::BI__builtin_ia32_rdrand16_step: 8007 case X86::BI__builtin_ia32_rdrand32_step: 8008 case X86::BI__builtin_ia32_rdrand64_step: 8009 case X86::BI__builtin_ia32_rdseed16_step: 8010 case X86::BI__builtin_ia32_rdseed32_step: 8011 case X86::BI__builtin_ia32_rdseed64_step: { 8012 Intrinsic::ID ID; 8013 switch (BuiltinID) { 8014 default: llvm_unreachable("Unsupported intrinsic!"); 8015 case X86::BI__builtin_ia32_rdrand16_step: 8016 ID = Intrinsic::x86_rdrand_16; 8017 break; 8018 case X86::BI__builtin_ia32_rdrand32_step: 8019 ID = Intrinsic::x86_rdrand_32; 8020 break; 8021 case X86::BI__builtin_ia32_rdrand64_step: 8022 ID = Intrinsic::x86_rdrand_64; 8023 break; 8024 case X86::BI__builtin_ia32_rdseed16_step: 8025 ID = Intrinsic::x86_rdseed_16; 8026 break; 8027 case X86::BI__builtin_ia32_rdseed32_step: 8028 ID = Intrinsic::x86_rdseed_32; 8029 break; 8030 case X86::BI__builtin_ia32_rdseed64_step: 8031 ID = Intrinsic::x86_rdseed_64; 8032 break; 8033 } 8034 8035 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID)); 8036 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0), 8037 Ops[0]); 8038 return Builder.CreateExtractValue(Call, 1); 8039 } 8040 8041 // SSE packed comparison intrinsics 8042 case X86::BI__builtin_ia32_cmpeqps: 8043 case X86::BI__builtin_ia32_cmpeqpd: 8044 return getVectorFCmpIR(CmpInst::FCMP_OEQ); 8045 case X86::BI__builtin_ia32_cmpltps: 8046 case X86::BI__builtin_ia32_cmpltpd: 8047 return getVectorFCmpIR(CmpInst::FCMP_OLT); 8048 case X86::BI__builtin_ia32_cmpleps: 8049 case X86::BI__builtin_ia32_cmplepd: 8050 return getVectorFCmpIR(CmpInst::FCMP_OLE); 8051 case X86::BI__builtin_ia32_cmpunordps: 8052 case X86::BI__builtin_ia32_cmpunordpd: 8053 return getVectorFCmpIR(CmpInst::FCMP_UNO); 8054 case X86::BI__builtin_ia32_cmpneqps: 8055 case X86::BI__builtin_ia32_cmpneqpd: 8056 return getVectorFCmpIR(CmpInst::FCMP_UNE); 8057 case X86::BI__builtin_ia32_cmpnltps: 8058 case X86::BI__builtin_ia32_cmpnltpd: 8059 return getVectorFCmpIR(CmpInst::FCMP_UGE); 8060 case X86::BI__builtin_ia32_cmpnleps: 8061 case X86::BI__builtin_ia32_cmpnlepd: 8062 return getVectorFCmpIR(CmpInst::FCMP_UGT); 8063 case X86::BI__builtin_ia32_cmpordps: 8064 case X86::BI__builtin_ia32_cmpordpd: 8065 return getVectorFCmpIR(CmpInst::FCMP_ORD); 8066 case X86::BI__builtin_ia32_cmpps: 8067 case X86::BI__builtin_ia32_cmpps256: 8068 case X86::BI__builtin_ia32_cmppd: 8069 case X86::BI__builtin_ia32_cmppd256: { 8070 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 8071 // If this one of the SSE immediates, we can use native IR. 8072 if (CC < 8) { 8073 FCmpInst::Predicate Pred; 8074 switch (CC) { 8075 case 0: Pred = FCmpInst::FCMP_OEQ; break; 8076 case 1: Pred = FCmpInst::FCMP_OLT; break; 8077 case 2: Pred = FCmpInst::FCMP_OLE; break; 8078 case 3: Pred = FCmpInst::FCMP_UNO; break; 8079 case 4: Pred = FCmpInst::FCMP_UNE; break; 8080 case 5: Pred = FCmpInst::FCMP_UGE; break; 8081 case 6: Pred = FCmpInst::FCMP_UGT; break; 8082 case 7: Pred = FCmpInst::FCMP_ORD; break; 8083 } 8084 return getVectorFCmpIR(Pred); 8085 } 8086 8087 // We can't handle 8-31 immediates with native IR, use the intrinsic. 8088 // Except for predicates that create constants. 8089 Intrinsic::ID ID; 8090 switch (BuiltinID) { 8091 default: llvm_unreachable("Unsupported intrinsic!"); 8092 case X86::BI__builtin_ia32_cmpps: 8093 ID = Intrinsic::x86_sse_cmp_ps; 8094 break; 8095 case X86::BI__builtin_ia32_cmpps256: 8096 // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector 8097 // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0... 8098 if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) { 8099 Value *Constant = (CC == 0xf || CC == 0x1f) ? 8100 llvm::Constant::getAllOnesValue(Builder.getInt32Ty()) : 8101 llvm::Constant::getNullValue(Builder.getInt32Ty()); 8102 Value *Vec = Builder.CreateVectorSplat( 8103 Ops[0]->getType()->getVectorNumElements(), Constant); 8104 return Builder.CreateBitCast(Vec, Ops[0]->getType()); 8105 } 8106 ID = Intrinsic::x86_avx_cmp_ps_256; 8107 break; 8108 case X86::BI__builtin_ia32_cmppd: 8109 ID = Intrinsic::x86_sse2_cmp_pd; 8110 break; 8111 case X86::BI__builtin_ia32_cmppd256: 8112 // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector 8113 // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0... 8114 if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) { 8115 Value *Constant = (CC == 0xf || CC == 0x1f) ? 8116 llvm::Constant::getAllOnesValue(Builder.getInt64Ty()) : 8117 llvm::Constant::getNullValue(Builder.getInt64Ty()); 8118 Value *Vec = Builder.CreateVectorSplat( 8119 Ops[0]->getType()->getVectorNumElements(), Constant); 8120 return Builder.CreateBitCast(Vec, Ops[0]->getType()); 8121 } 8122 ID = Intrinsic::x86_avx_cmp_pd_256; 8123 break; 8124 } 8125 8126 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); 8127 } 8128 8129 // SSE scalar comparison intrinsics 8130 case X86::BI__builtin_ia32_cmpeqss: 8131 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0); 8132 case X86::BI__builtin_ia32_cmpltss: 8133 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1); 8134 case X86::BI__builtin_ia32_cmpless: 8135 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2); 8136 case X86::BI__builtin_ia32_cmpunordss: 8137 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3); 8138 case X86::BI__builtin_ia32_cmpneqss: 8139 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4); 8140 case X86::BI__builtin_ia32_cmpnltss: 8141 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5); 8142 case X86::BI__builtin_ia32_cmpnless: 8143 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6); 8144 case X86::BI__builtin_ia32_cmpordss: 8145 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7); 8146 case X86::BI__builtin_ia32_cmpeqsd: 8147 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0); 8148 case X86::BI__builtin_ia32_cmpltsd: 8149 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1); 8150 case X86::BI__builtin_ia32_cmplesd: 8151 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2); 8152 case X86::BI__builtin_ia32_cmpunordsd: 8153 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3); 8154 case X86::BI__builtin_ia32_cmpneqsd: 8155 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4); 8156 case X86::BI__builtin_ia32_cmpnltsd: 8157 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5); 8158 case X86::BI__builtin_ia32_cmpnlesd: 8159 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6); 8160 case X86::BI__builtin_ia32_cmpordsd: 8161 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7); 8162 8163 case X86::BI__emul: 8164 case X86::BI__emulu: { 8165 llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64); 8166 bool isSigned = (BuiltinID == X86::BI__emul); 8167 Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned); 8168 Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned); 8169 return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned); 8170 } 8171 case X86::BI__mulh: 8172 case X86::BI__umulh: 8173 case X86::BI_mul128: 8174 case X86::BI_umul128: { 8175 llvm::Type *ResType = ConvertType(E->getType()); 8176 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); 8177 8178 bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128); 8179 Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned); 8180 Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned); 8181 8182 Value *MulResult, *HigherBits; 8183 if (IsSigned) { 8184 MulResult = Builder.CreateNSWMul(LHS, RHS); 8185 HigherBits = Builder.CreateAShr(MulResult, 64); 8186 } else { 8187 MulResult = Builder.CreateNUWMul(LHS, RHS); 8188 HigherBits = Builder.CreateLShr(MulResult, 64); 8189 } 8190 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned); 8191 8192 if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh) 8193 return HigherBits; 8194 8195 Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2)); 8196 Builder.CreateStore(HigherBits, HighBitsAddress); 8197 return Builder.CreateIntCast(MulResult, ResType, IsSigned); 8198 } 8199 8200 case X86::BI__faststorefence: { 8201 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, 8202 llvm::SyncScope::System); 8203 } 8204 case X86::BI_ReadWriteBarrier: 8205 case X86::BI_ReadBarrier: 8206 case X86::BI_WriteBarrier: { 8207 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, 8208 llvm::SyncScope::SingleThread); 8209 } 8210 case X86::BI_BitScanForward: 8211 case X86::BI_BitScanForward64: 8212 return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E); 8213 case X86::BI_BitScanReverse: 8214 case X86::BI_BitScanReverse64: 8215 return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E); 8216 8217 case X86::BI_InterlockedAnd64: 8218 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E); 8219 case X86::BI_InterlockedExchange64: 8220 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E); 8221 case X86::BI_InterlockedExchangeAdd64: 8222 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E); 8223 case X86::BI_InterlockedExchangeSub64: 8224 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E); 8225 case X86::BI_InterlockedOr64: 8226 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E); 8227 case X86::BI_InterlockedXor64: 8228 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E); 8229 case X86::BI_InterlockedDecrement64: 8230 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E); 8231 case X86::BI_InterlockedIncrement64: 8232 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E); 8233 8234 case X86::BI_AddressOfReturnAddress: { 8235 Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress); 8236 return Builder.CreateCall(F); 8237 } 8238 case X86::BI__stosb: { 8239 // We treat __stosb as a volatile memset - it may not generate "rep stosb" 8240 // instruction, but it will create a memset that won't be optimized away. 8241 return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], 1, true); 8242 } 8243 case X86::BI__ud2: 8244 // llvm.trap makes a ud2a instruction on x86. 8245 return EmitTrapCall(Intrinsic::trap); 8246 case X86::BI__int2c: { 8247 // This syscall signals a driver assertion failure in x86 NT kernels. 8248 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false); 8249 llvm::InlineAsm *IA = 8250 llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*SideEffects=*/true); 8251 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get( 8252 getLLVMContext(), llvm::AttributeList::FunctionIndex, 8253 llvm::Attribute::NoReturn); 8254 CallSite CS = Builder.CreateCall(IA); 8255 CS.setAttributes(NoReturnAttr); 8256 return CS.getInstruction(); 8257 } 8258 case X86::BI__readfsbyte: 8259 case X86::BI__readfsword: 8260 case X86::BI__readfsdword: 8261 case X86::BI__readfsqword: { 8262 llvm::Type *IntTy = ConvertType(E->getType()); 8263 Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)), 8264 llvm::PointerType::get(IntTy, 257)); 8265 LoadInst *Load = Builder.CreateAlignedLoad( 8266 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType())); 8267 Load->setVolatile(true); 8268 return Load; 8269 } 8270 case X86::BI__readgsbyte: 8271 case X86::BI__readgsword: 8272 case X86::BI__readgsdword: 8273 case X86::BI__readgsqword: { 8274 llvm::Type *IntTy = ConvertType(E->getType()); 8275 Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)), 8276 llvm::PointerType::get(IntTy, 256)); 8277 LoadInst *Load = Builder.CreateAlignedLoad( 8278 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType())); 8279 Load->setVolatile(true); 8280 return Load; 8281 } 8282 } 8283 } 8284 8285 8286 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, 8287 const CallExpr *E) { 8288 SmallVector<Value*, 4> Ops; 8289 8290 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) 8291 Ops.push_back(EmitScalarExpr(E->getArg(i))); 8292 8293 Intrinsic::ID ID = Intrinsic::not_intrinsic; 8294 8295 switch (BuiltinID) { 8296 default: return nullptr; 8297 8298 // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we 8299 // call __builtin_readcyclecounter. 8300 case PPC::BI__builtin_ppc_get_timebase: 8301 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter)); 8302 8303 // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr 8304 case PPC::BI__builtin_altivec_lvx: 8305 case PPC::BI__builtin_altivec_lvxl: 8306 case PPC::BI__builtin_altivec_lvebx: 8307 case PPC::BI__builtin_altivec_lvehx: 8308 case PPC::BI__builtin_altivec_lvewx: 8309 case PPC::BI__builtin_altivec_lvsl: 8310 case PPC::BI__builtin_altivec_lvsr: 8311 case PPC::BI__builtin_vsx_lxvd2x: 8312 case PPC::BI__builtin_vsx_lxvw4x: 8313 case PPC::BI__builtin_vsx_lxvd2x_be: 8314 case PPC::BI__builtin_vsx_lxvw4x_be: 8315 case PPC::BI__builtin_vsx_lxvl: 8316 case PPC::BI__builtin_vsx_lxvll: 8317 { 8318 if(BuiltinID == PPC::BI__builtin_vsx_lxvl || 8319 BuiltinID == PPC::BI__builtin_vsx_lxvll){ 8320 Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy); 8321 }else { 8322 Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy); 8323 Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]); 8324 Ops.pop_back(); 8325 } 8326 8327 switch (BuiltinID) { 8328 default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!"); 8329 case PPC::BI__builtin_altivec_lvx: 8330 ID = Intrinsic::ppc_altivec_lvx; 8331 break; 8332 case PPC::BI__builtin_altivec_lvxl: 8333 ID = Intrinsic::ppc_altivec_lvxl; 8334 break; 8335 case PPC::BI__builtin_altivec_lvebx: 8336 ID = Intrinsic::ppc_altivec_lvebx; 8337 break; 8338 case PPC::BI__builtin_altivec_lvehx: 8339 ID = Intrinsic::ppc_altivec_lvehx; 8340 break; 8341 case PPC::BI__builtin_altivec_lvewx: 8342 ID = Intrinsic::ppc_altivec_lvewx; 8343 break; 8344 case PPC::BI__builtin_altivec_lvsl: 8345 ID = Intrinsic::ppc_altivec_lvsl; 8346 break; 8347 case PPC::BI__builtin_altivec_lvsr: 8348 ID = Intrinsic::ppc_altivec_lvsr; 8349 break; 8350 case PPC::BI__builtin_vsx_lxvd2x: 8351 ID = Intrinsic::ppc_vsx_lxvd2x; 8352 break; 8353 case PPC::BI__builtin_vsx_lxvw4x: 8354 ID = Intrinsic::ppc_vsx_lxvw4x; 8355 break; 8356 case PPC::BI__builtin_vsx_lxvd2x_be: 8357 ID = Intrinsic::ppc_vsx_lxvd2x_be; 8358 break; 8359 case PPC::BI__builtin_vsx_lxvw4x_be: 8360 ID = Intrinsic::ppc_vsx_lxvw4x_be; 8361 break; 8362 case PPC::BI__builtin_vsx_lxvl: 8363 ID = Intrinsic::ppc_vsx_lxvl; 8364 break; 8365 case PPC::BI__builtin_vsx_lxvll: 8366 ID = Intrinsic::ppc_vsx_lxvll; 8367 break; 8368 } 8369 llvm::Function *F = CGM.getIntrinsic(ID); 8370 return Builder.CreateCall(F, Ops, ""); 8371 } 8372 8373 // vec_st, vec_xst_be 8374 case PPC::BI__builtin_altivec_stvx: 8375 case PPC::BI__builtin_altivec_stvxl: 8376 case PPC::BI__builtin_altivec_stvebx: 8377 case PPC::BI__builtin_altivec_stvehx: 8378 case PPC::BI__builtin_altivec_stvewx: 8379 case PPC::BI__builtin_vsx_stxvd2x: 8380 case PPC::BI__builtin_vsx_stxvw4x: 8381 case PPC::BI__builtin_vsx_stxvd2x_be: 8382 case PPC::BI__builtin_vsx_stxvw4x_be: 8383 case PPC::BI__builtin_vsx_stxvl: 8384 case PPC::BI__builtin_vsx_stxvll: 8385 { 8386 if(BuiltinID == PPC::BI__builtin_vsx_stxvl || 8387 BuiltinID == PPC::BI__builtin_vsx_stxvll ){ 8388 Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy); 8389 }else { 8390 Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy); 8391 Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]); 8392 Ops.pop_back(); 8393 } 8394 8395 switch (BuiltinID) { 8396 default: llvm_unreachable("Unsupported st intrinsic!"); 8397 case PPC::BI__builtin_altivec_stvx: 8398 ID = Intrinsic::ppc_altivec_stvx; 8399 break; 8400 case PPC::BI__builtin_altivec_stvxl: 8401 ID = Intrinsic::ppc_altivec_stvxl; 8402 break; 8403 case PPC::BI__builtin_altivec_stvebx: 8404 ID = Intrinsic::ppc_altivec_stvebx; 8405 break; 8406 case PPC::BI__builtin_altivec_stvehx: 8407 ID = Intrinsic::ppc_altivec_stvehx; 8408 break; 8409 case PPC::BI__builtin_altivec_stvewx: 8410 ID = Intrinsic::ppc_altivec_stvewx; 8411 break; 8412 case PPC::BI__builtin_vsx_stxvd2x: 8413 ID = Intrinsic::ppc_vsx_stxvd2x; 8414 break; 8415 case PPC::BI__builtin_vsx_stxvw4x: 8416 ID = Intrinsic::ppc_vsx_stxvw4x; 8417 break; 8418 case PPC::BI__builtin_vsx_stxvd2x_be: 8419 ID = Intrinsic::ppc_vsx_stxvd2x_be; 8420 break; 8421 case PPC::BI__builtin_vsx_stxvw4x_be: 8422 ID = Intrinsic::ppc_vsx_stxvw4x_be; 8423 break; 8424 case PPC::BI__builtin_vsx_stxvl: 8425 ID = Intrinsic::ppc_vsx_stxvl; 8426 break; 8427 case PPC::BI__builtin_vsx_stxvll: 8428 ID = Intrinsic::ppc_vsx_stxvll; 8429 break; 8430 } 8431 llvm::Function *F = CGM.getIntrinsic(ID); 8432 return Builder.CreateCall(F, Ops, ""); 8433 } 8434 // Square root 8435 case PPC::BI__builtin_vsx_xvsqrtsp: 8436 case PPC::BI__builtin_vsx_xvsqrtdp: { 8437 llvm::Type *ResultType = ConvertType(E->getType()); 8438 Value *X = EmitScalarExpr(E->getArg(0)); 8439 ID = Intrinsic::sqrt; 8440 llvm::Function *F = CGM.getIntrinsic(ID, ResultType); 8441 return Builder.CreateCall(F, X); 8442 } 8443 // Count leading zeros 8444 case PPC::BI__builtin_altivec_vclzb: 8445 case PPC::BI__builtin_altivec_vclzh: 8446 case PPC::BI__builtin_altivec_vclzw: 8447 case PPC::BI__builtin_altivec_vclzd: { 8448 llvm::Type *ResultType = ConvertType(E->getType()); 8449 Value *X = EmitScalarExpr(E->getArg(0)); 8450 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 8451 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType); 8452 return Builder.CreateCall(F, {X, Undef}); 8453 } 8454 case PPC::BI__builtin_altivec_vctzb: 8455 case PPC::BI__builtin_altivec_vctzh: 8456 case PPC::BI__builtin_altivec_vctzw: 8457 case PPC::BI__builtin_altivec_vctzd: { 8458 llvm::Type *ResultType = ConvertType(E->getType()); 8459 Value *X = EmitScalarExpr(E->getArg(0)); 8460 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 8461 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType); 8462 return Builder.CreateCall(F, {X, Undef}); 8463 } 8464 case PPC::BI__builtin_altivec_vpopcntb: 8465 case PPC::BI__builtin_altivec_vpopcnth: 8466 case PPC::BI__builtin_altivec_vpopcntw: 8467 case PPC::BI__builtin_altivec_vpopcntd: { 8468 llvm::Type *ResultType = ConvertType(E->getType()); 8469 Value *X = EmitScalarExpr(E->getArg(0)); 8470 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); 8471 return Builder.CreateCall(F, X); 8472 } 8473 // Copy sign 8474 case PPC::BI__builtin_vsx_xvcpsgnsp: 8475 case PPC::BI__builtin_vsx_xvcpsgndp: { 8476 llvm::Type *ResultType = ConvertType(E->getType()); 8477 Value *X = EmitScalarExpr(E->getArg(0)); 8478 Value *Y = EmitScalarExpr(E->getArg(1)); 8479 ID = Intrinsic::copysign; 8480 llvm::Function *F = CGM.getIntrinsic(ID, ResultType); 8481 return Builder.CreateCall(F, {X, Y}); 8482 } 8483 // Rounding/truncation 8484 case PPC::BI__builtin_vsx_xvrspip: 8485 case PPC::BI__builtin_vsx_xvrdpip: 8486 case PPC::BI__builtin_vsx_xvrdpim: 8487 case PPC::BI__builtin_vsx_xvrspim: 8488 case PPC::BI__builtin_vsx_xvrdpi: 8489 case PPC::BI__builtin_vsx_xvrspi: 8490 case PPC::BI__builtin_vsx_xvrdpic: 8491 case PPC::BI__builtin_vsx_xvrspic: 8492 case PPC::BI__builtin_vsx_xvrdpiz: 8493 case PPC::BI__builtin_vsx_xvrspiz: { 8494 llvm::Type *ResultType = ConvertType(E->getType()); 8495 Value *X = EmitScalarExpr(E->getArg(0)); 8496 if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim || 8497 BuiltinID == PPC::BI__builtin_vsx_xvrspim) 8498 ID = Intrinsic::floor; 8499 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi || 8500 BuiltinID == PPC::BI__builtin_vsx_xvrspi) 8501 ID = Intrinsic::round; 8502 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic || 8503 BuiltinID == PPC::BI__builtin_vsx_xvrspic) 8504 ID = Intrinsic::nearbyint; 8505 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip || 8506 BuiltinID == PPC::BI__builtin_vsx_xvrspip) 8507 ID = Intrinsic::ceil; 8508 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz || 8509 BuiltinID == PPC::BI__builtin_vsx_xvrspiz) 8510 ID = Intrinsic::trunc; 8511 llvm::Function *F = CGM.getIntrinsic(ID, ResultType); 8512 return Builder.CreateCall(F, X); 8513 } 8514 8515 // Absolute value 8516 case PPC::BI__builtin_vsx_xvabsdp: 8517 case PPC::BI__builtin_vsx_xvabssp: { 8518 llvm::Type *ResultType = ConvertType(E->getType()); 8519 Value *X = EmitScalarExpr(E->getArg(0)); 8520 llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); 8521 return Builder.CreateCall(F, X); 8522 } 8523 8524 // FMA variations 8525 case PPC::BI__builtin_vsx_xvmaddadp: 8526 case PPC::BI__builtin_vsx_xvmaddasp: 8527 case PPC::BI__builtin_vsx_xvnmaddadp: 8528 case PPC::BI__builtin_vsx_xvnmaddasp: 8529 case PPC::BI__builtin_vsx_xvmsubadp: 8530 case PPC::BI__builtin_vsx_xvmsubasp: 8531 case PPC::BI__builtin_vsx_xvnmsubadp: 8532 case PPC::BI__builtin_vsx_xvnmsubasp: { 8533 llvm::Type *ResultType = ConvertType(E->getType()); 8534 Value *X = EmitScalarExpr(E->getArg(0)); 8535 Value *Y = EmitScalarExpr(E->getArg(1)); 8536 Value *Z = EmitScalarExpr(E->getArg(2)); 8537 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); 8538 llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 8539 switch (BuiltinID) { 8540 case PPC::BI__builtin_vsx_xvmaddadp: 8541 case PPC::BI__builtin_vsx_xvmaddasp: 8542 return Builder.CreateCall(F, {X, Y, Z}); 8543 case PPC::BI__builtin_vsx_xvnmaddadp: 8544 case PPC::BI__builtin_vsx_xvnmaddasp: 8545 return Builder.CreateFSub(Zero, 8546 Builder.CreateCall(F, {X, Y, Z}), "sub"); 8547 case PPC::BI__builtin_vsx_xvmsubadp: 8548 case PPC::BI__builtin_vsx_xvmsubasp: 8549 return Builder.CreateCall(F, 8550 {X, Y, Builder.CreateFSub(Zero, Z, "sub")}); 8551 case PPC::BI__builtin_vsx_xvnmsubadp: 8552 case PPC::BI__builtin_vsx_xvnmsubasp: 8553 Value *FsubRes = 8554 Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")}); 8555 return Builder.CreateFSub(Zero, FsubRes, "sub"); 8556 } 8557 llvm_unreachable("Unknown FMA operation"); 8558 return nullptr; // Suppress no-return warning 8559 } 8560 8561 case PPC::BI__builtin_vsx_insertword: { 8562 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw); 8563 8564 // Third argument is a compile time constant int. It must be clamped to 8565 // to the range [0, 12]. 8566 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]); 8567 assert(ArgCI && 8568 "Third arg to xxinsertw intrinsic must be constant integer"); 8569 const int64_t MaxIndex = 12; 8570 int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex); 8571 8572 // The builtin semantics don't exactly match the xxinsertw instructions 8573 // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the 8574 // word from the first argument, and inserts it in the second argument. The 8575 // instruction extracts the word from its second input register and inserts 8576 // it into its first input register, so swap the first and second arguments. 8577 std::swap(Ops[0], Ops[1]); 8578 8579 // Need to cast the second argument from a vector of unsigned int to a 8580 // vector of long long. 8581 Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2)); 8582 8583 if (getTarget().isLittleEndian()) { 8584 // Create a shuffle mask of (1, 0) 8585 Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1), 8586 ConstantInt::get(Int32Ty, 0) 8587 }; 8588 Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); 8589 8590 // Reverse the double words in the vector we will extract from. 8591 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); 8592 Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ShuffleMask); 8593 8594 // Reverse the index. 8595 Index = MaxIndex - Index; 8596 } 8597 8598 // Intrinsic expects the first arg to be a vector of int. 8599 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4)); 8600 Ops[2] = ConstantInt::getSigned(Int32Ty, Index); 8601 return Builder.CreateCall(F, Ops); 8602 } 8603 8604 case PPC::BI__builtin_vsx_extractuword: { 8605 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw); 8606 8607 // Intrinsic expects the first argument to be a vector of doublewords. 8608 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); 8609 8610 // The second argument is a compile time constant int that needs to 8611 // be clamped to the range [0, 12]. 8612 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[1]); 8613 assert(ArgCI && 8614 "Second Arg to xxextractuw intrinsic must be a constant integer!"); 8615 const int64_t MaxIndex = 12; 8616 int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex); 8617 8618 if (getTarget().isLittleEndian()) { 8619 // Reverse the index. 8620 Index = MaxIndex - Index; 8621 Ops[1] = ConstantInt::getSigned(Int32Ty, Index); 8622 8623 // Emit the call, then reverse the double words of the results vector. 8624 Value *Call = Builder.CreateCall(F, Ops); 8625 8626 // Create a shuffle mask of (1, 0) 8627 Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1), 8628 ConstantInt::get(Int32Ty, 0) 8629 }; 8630 Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); 8631 8632 Value *ShuffleCall = Builder.CreateShuffleVector(Call, Call, ShuffleMask); 8633 return ShuffleCall; 8634 } else { 8635 Ops[1] = ConstantInt::getSigned(Int32Ty, Index); 8636 return Builder.CreateCall(F, Ops); 8637 } 8638 } 8639 8640 case PPC::BI__builtin_vsx_xxpermdi: { 8641 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]); 8642 assert(ArgCI && "Third arg must be constant integer!"); 8643 8644 unsigned Index = ArgCI->getZExtValue(); 8645 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); 8646 Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2)); 8647 8648 // Element zero comes from the first input vector and element one comes from 8649 // the second. The element indices within each vector are numbered in big 8650 // endian order so the shuffle mask must be adjusted for this on little 8651 // endian platforms (i.e. index is complemented and source vector reversed). 8652 unsigned ElemIdx0; 8653 unsigned ElemIdx1; 8654 if (getTarget().isLittleEndian()) { 8655 ElemIdx0 = (~Index & 1) + 2; 8656 ElemIdx1 = (~Index & 2) >> 1; 8657 } else { // BigEndian 8658 ElemIdx0 = (Index & 2) >> 1; 8659 ElemIdx1 = 2 + (Index & 1); 8660 } 8661 8662 Constant *ShuffleElts[2] = {ConstantInt::get(Int32Ty, ElemIdx0), 8663 ConstantInt::get(Int32Ty, ElemIdx1)}; 8664 Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); 8665 8666 Value *ShuffleCall = 8667 Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask); 8668 QualType BIRetType = E->getType(); 8669 auto RetTy = ConvertType(BIRetType); 8670 return Builder.CreateBitCast(ShuffleCall, RetTy); 8671 } 8672 8673 case PPC::BI__builtin_vsx_xxsldwi: { 8674 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]); 8675 assert(ArgCI && "Third argument must be a compile time constant"); 8676 unsigned Index = ArgCI->getZExtValue() & 0x3; 8677 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4)); 8678 Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int32Ty, 4)); 8679 8680 // Create a shuffle mask 8681 unsigned ElemIdx0; 8682 unsigned ElemIdx1; 8683 unsigned ElemIdx2; 8684 unsigned ElemIdx3; 8685 if (getTarget().isLittleEndian()) { 8686 // Little endian element N comes from element 8+N-Index of the 8687 // concatenated wide vector (of course, using modulo arithmetic on 8688 // the total number of elements). 8689 ElemIdx0 = (8 - Index) % 8; 8690 ElemIdx1 = (9 - Index) % 8; 8691 ElemIdx2 = (10 - Index) % 8; 8692 ElemIdx3 = (11 - Index) % 8; 8693 } else { 8694 // Big endian ElemIdx<N> = Index + N 8695 ElemIdx0 = Index; 8696 ElemIdx1 = Index + 1; 8697 ElemIdx2 = Index + 2; 8698 ElemIdx3 = Index + 3; 8699 } 8700 8701 Constant *ShuffleElts[4] = {ConstantInt::get(Int32Ty, ElemIdx0), 8702 ConstantInt::get(Int32Ty, ElemIdx1), 8703 ConstantInt::get(Int32Ty, ElemIdx2), 8704 ConstantInt::get(Int32Ty, ElemIdx3)}; 8705 8706 Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); 8707 Value *ShuffleCall = 8708 Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask); 8709 QualType BIRetType = E->getType(); 8710 auto RetTy = ConvertType(BIRetType); 8711 return Builder.CreateBitCast(ShuffleCall, RetTy); 8712 } 8713 } 8714 } 8715 8716 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, 8717 const CallExpr *E) { 8718 switch (BuiltinID) { 8719 case AMDGPU::BI__builtin_amdgcn_div_scale: 8720 case AMDGPU::BI__builtin_amdgcn_div_scalef: { 8721 // Translate from the intrinsics's struct return to the builtin's out 8722 // argument. 8723 8724 Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3)); 8725 8726 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 8727 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 8728 llvm::Value *Z = EmitScalarExpr(E->getArg(2)); 8729 8730 llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale, 8731 X->getType()); 8732 8733 llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z}); 8734 8735 llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0); 8736 llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1); 8737 8738 llvm::Type *RealFlagType 8739 = FlagOutPtr.getPointer()->getType()->getPointerElementType(); 8740 8741 llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType); 8742 Builder.CreateStore(FlagExt, FlagOutPtr); 8743 return Result; 8744 } 8745 case AMDGPU::BI__builtin_amdgcn_div_fmas: 8746 case AMDGPU::BI__builtin_amdgcn_div_fmasf: { 8747 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); 8748 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); 8749 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); 8750 llvm::Value *Src3 = EmitScalarExpr(E->getArg(3)); 8751 8752 llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas, 8753 Src0->getType()); 8754 llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3); 8755 return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool}); 8756 } 8757 8758 case AMDGPU::BI__builtin_amdgcn_ds_swizzle: 8759 return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle); 8760 case AMDGPU::BI__builtin_amdgcn_mov_dpp: { 8761 llvm::SmallVector<llvm::Value *, 5> Args; 8762 for (unsigned I = 0; I != 5; ++I) 8763 Args.push_back(EmitScalarExpr(E->getArg(I))); 8764 Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_mov_dpp, 8765 Args[0]->getType()); 8766 return Builder.CreateCall(F, Args); 8767 } 8768 case AMDGPU::BI__builtin_amdgcn_div_fixup: 8769 case AMDGPU::BI__builtin_amdgcn_div_fixupf: 8770 case AMDGPU::BI__builtin_amdgcn_div_fixuph: 8771 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup); 8772 case AMDGPU::BI__builtin_amdgcn_trig_preop: 8773 case AMDGPU::BI__builtin_amdgcn_trig_preopf: 8774 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop); 8775 case AMDGPU::BI__builtin_amdgcn_rcp: 8776 case AMDGPU::BI__builtin_amdgcn_rcpf: 8777 case AMDGPU::BI__builtin_amdgcn_rcph: 8778 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp); 8779 case AMDGPU::BI__builtin_amdgcn_rsq: 8780 case AMDGPU::BI__builtin_amdgcn_rsqf: 8781 case AMDGPU::BI__builtin_amdgcn_rsqh: 8782 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq); 8783 case AMDGPU::BI__builtin_amdgcn_rsq_clamp: 8784 case AMDGPU::BI__builtin_amdgcn_rsq_clampf: 8785 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp); 8786 case AMDGPU::BI__builtin_amdgcn_sinf: 8787 case AMDGPU::BI__builtin_amdgcn_sinh: 8788 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin); 8789 case AMDGPU::BI__builtin_amdgcn_cosf: 8790 case AMDGPU::BI__builtin_amdgcn_cosh: 8791 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos); 8792 case AMDGPU::BI__builtin_amdgcn_log_clampf: 8793 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp); 8794 case AMDGPU::BI__builtin_amdgcn_ldexp: 8795 case AMDGPU::BI__builtin_amdgcn_ldexpf: 8796 case AMDGPU::BI__builtin_amdgcn_ldexph: 8797 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp); 8798 case AMDGPU::BI__builtin_amdgcn_frexp_mant: 8799 case AMDGPU::BI__builtin_amdgcn_frexp_mantf: 8800 case AMDGPU::BI__builtin_amdgcn_frexp_manth: 8801 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant); 8802 case AMDGPU::BI__builtin_amdgcn_frexp_exp: 8803 case AMDGPU::BI__builtin_amdgcn_frexp_expf: { 8804 Value *Src0 = EmitScalarExpr(E->getArg(0)); 8805 Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp, 8806 { Builder.getInt32Ty(), Src0->getType() }); 8807 return Builder.CreateCall(F, Src0); 8808 } 8809 case AMDGPU::BI__builtin_amdgcn_frexp_exph: { 8810 Value *Src0 = EmitScalarExpr(E->getArg(0)); 8811 Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp, 8812 { Builder.getInt16Ty(), Src0->getType() }); 8813 return Builder.CreateCall(F, Src0); 8814 } 8815 case AMDGPU::BI__builtin_amdgcn_fract: 8816 case AMDGPU::BI__builtin_amdgcn_fractf: 8817 case AMDGPU::BI__builtin_amdgcn_fracth: 8818 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract); 8819 case AMDGPU::BI__builtin_amdgcn_lerp: 8820 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp); 8821 case AMDGPU::BI__builtin_amdgcn_uicmp: 8822 case AMDGPU::BI__builtin_amdgcn_uicmpl: 8823 case AMDGPU::BI__builtin_amdgcn_sicmp: 8824 case AMDGPU::BI__builtin_amdgcn_sicmpl: 8825 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_icmp); 8826 case AMDGPU::BI__builtin_amdgcn_fcmp: 8827 case AMDGPU::BI__builtin_amdgcn_fcmpf: 8828 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fcmp); 8829 case AMDGPU::BI__builtin_amdgcn_class: 8830 case AMDGPU::BI__builtin_amdgcn_classf: 8831 case AMDGPU::BI__builtin_amdgcn_classh: 8832 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class); 8833 case AMDGPU::BI__builtin_amdgcn_fmed3f: 8834 case AMDGPU::BI__builtin_amdgcn_fmed3h: 8835 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3); 8836 case AMDGPU::BI__builtin_amdgcn_read_exec: { 8837 CallInst *CI = cast<CallInst>( 8838 EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec")); 8839 CI->setConvergent(); 8840 return CI; 8841 } 8842 8843 // amdgcn workitem 8844 case AMDGPU::BI__builtin_amdgcn_workitem_id_x: 8845 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024); 8846 case AMDGPU::BI__builtin_amdgcn_workitem_id_y: 8847 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024); 8848 case AMDGPU::BI__builtin_amdgcn_workitem_id_z: 8849 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024); 8850 8851 // r600 intrinsics 8852 case AMDGPU::BI__builtin_r600_recipsqrt_ieee: 8853 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef: 8854 return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee); 8855 case AMDGPU::BI__builtin_r600_read_tidig_x: 8856 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024); 8857 case AMDGPU::BI__builtin_r600_read_tidig_y: 8858 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024); 8859 case AMDGPU::BI__builtin_r600_read_tidig_z: 8860 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024); 8861 default: 8862 return nullptr; 8863 } 8864 } 8865 8866 /// Handle a SystemZ function in which the final argument is a pointer 8867 /// to an int that receives the post-instruction CC value. At the LLVM level 8868 /// this is represented as a function that returns a {result, cc} pair. 8869 static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF, 8870 unsigned IntrinsicID, 8871 const CallExpr *E) { 8872 unsigned NumArgs = E->getNumArgs() - 1; 8873 SmallVector<Value *, 8> Args(NumArgs); 8874 for (unsigned I = 0; I < NumArgs; ++I) 8875 Args[I] = CGF.EmitScalarExpr(E->getArg(I)); 8876 Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs)); 8877 Value *F = CGF.CGM.getIntrinsic(IntrinsicID); 8878 Value *Call = CGF.Builder.CreateCall(F, Args); 8879 Value *CC = CGF.Builder.CreateExtractValue(Call, 1); 8880 CGF.Builder.CreateStore(CC, CCPtr); 8881 return CGF.Builder.CreateExtractValue(Call, 0); 8882 } 8883 8884 Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, 8885 const CallExpr *E) { 8886 switch (BuiltinID) { 8887 case SystemZ::BI__builtin_tbegin: { 8888 Value *TDB = EmitScalarExpr(E->getArg(0)); 8889 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c); 8890 Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin); 8891 return Builder.CreateCall(F, {TDB, Control}); 8892 } 8893 case SystemZ::BI__builtin_tbegin_nofloat: { 8894 Value *TDB = EmitScalarExpr(E->getArg(0)); 8895 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c); 8896 Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat); 8897 return Builder.CreateCall(F, {TDB, Control}); 8898 } 8899 case SystemZ::BI__builtin_tbeginc: { 8900 Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy); 8901 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08); 8902 Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc); 8903 return Builder.CreateCall(F, {TDB, Control}); 8904 } 8905 case SystemZ::BI__builtin_tabort: { 8906 Value *Data = EmitScalarExpr(E->getArg(0)); 8907 Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort); 8908 return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort")); 8909 } 8910 case SystemZ::BI__builtin_non_tx_store: { 8911 Value *Address = EmitScalarExpr(E->getArg(0)); 8912 Value *Data = EmitScalarExpr(E->getArg(1)); 8913 Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg); 8914 return Builder.CreateCall(F, {Data, Address}); 8915 } 8916 8917 // Vector builtins. Note that most vector builtins are mapped automatically 8918 // to target-specific LLVM intrinsics. The ones handled specially here can 8919 // be represented via standard LLVM IR, which is preferable to enable common 8920 // LLVM optimizations. 8921 8922 case SystemZ::BI__builtin_s390_vpopctb: 8923 case SystemZ::BI__builtin_s390_vpopcth: 8924 case SystemZ::BI__builtin_s390_vpopctf: 8925 case SystemZ::BI__builtin_s390_vpopctg: { 8926 llvm::Type *ResultType = ConvertType(E->getType()); 8927 Value *X = EmitScalarExpr(E->getArg(0)); 8928 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); 8929 return Builder.CreateCall(F, X); 8930 } 8931 8932 case SystemZ::BI__builtin_s390_vclzb: 8933 case SystemZ::BI__builtin_s390_vclzh: 8934 case SystemZ::BI__builtin_s390_vclzf: 8935 case SystemZ::BI__builtin_s390_vclzg: { 8936 llvm::Type *ResultType = ConvertType(E->getType()); 8937 Value *X = EmitScalarExpr(E->getArg(0)); 8938 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 8939 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType); 8940 return Builder.CreateCall(F, {X, Undef}); 8941 } 8942 8943 case SystemZ::BI__builtin_s390_vctzb: 8944 case SystemZ::BI__builtin_s390_vctzh: 8945 case SystemZ::BI__builtin_s390_vctzf: 8946 case SystemZ::BI__builtin_s390_vctzg: { 8947 llvm::Type *ResultType = ConvertType(E->getType()); 8948 Value *X = EmitScalarExpr(E->getArg(0)); 8949 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 8950 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType); 8951 return Builder.CreateCall(F, {X, Undef}); 8952 } 8953 8954 case SystemZ::BI__builtin_s390_vfsqsb: 8955 case SystemZ::BI__builtin_s390_vfsqdb: { 8956 llvm::Type *ResultType = ConvertType(E->getType()); 8957 Value *X = EmitScalarExpr(E->getArg(0)); 8958 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType); 8959 return Builder.CreateCall(F, X); 8960 } 8961 case SystemZ::BI__builtin_s390_vfmasb: 8962 case SystemZ::BI__builtin_s390_vfmadb: { 8963 llvm::Type *ResultType = ConvertType(E->getType()); 8964 Value *X = EmitScalarExpr(E->getArg(0)); 8965 Value *Y = EmitScalarExpr(E->getArg(1)); 8966 Value *Z = EmitScalarExpr(E->getArg(2)); 8967 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 8968 return Builder.CreateCall(F, {X, Y, Z}); 8969 } 8970 case SystemZ::BI__builtin_s390_vfmssb: 8971 case SystemZ::BI__builtin_s390_vfmsdb: { 8972 llvm::Type *ResultType = ConvertType(E->getType()); 8973 Value *X = EmitScalarExpr(E->getArg(0)); 8974 Value *Y = EmitScalarExpr(E->getArg(1)); 8975 Value *Z = EmitScalarExpr(E->getArg(2)); 8976 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); 8977 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 8978 return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")}); 8979 } 8980 case SystemZ::BI__builtin_s390_vfnmasb: 8981 case SystemZ::BI__builtin_s390_vfnmadb: { 8982 llvm::Type *ResultType = ConvertType(E->getType()); 8983 Value *X = EmitScalarExpr(E->getArg(0)); 8984 Value *Y = EmitScalarExpr(E->getArg(1)); 8985 Value *Z = EmitScalarExpr(E->getArg(2)); 8986 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); 8987 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 8988 return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, Z}), "sub"); 8989 } 8990 case SystemZ::BI__builtin_s390_vfnmssb: 8991 case SystemZ::BI__builtin_s390_vfnmsdb: { 8992 llvm::Type *ResultType = ConvertType(E->getType()); 8993 Value *X = EmitScalarExpr(E->getArg(0)); 8994 Value *Y = EmitScalarExpr(E->getArg(1)); 8995 Value *Z = EmitScalarExpr(E->getArg(2)); 8996 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); 8997 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 8998 Value *NegZ = Builder.CreateFSub(Zero, Z, "sub"); 8999 return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, NegZ})); 9000 } 9001 case SystemZ::BI__builtin_s390_vflpsb: 9002 case SystemZ::BI__builtin_s390_vflpdb: { 9003 llvm::Type *ResultType = ConvertType(E->getType()); 9004 Value *X = EmitScalarExpr(E->getArg(0)); 9005 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); 9006 return Builder.CreateCall(F, X); 9007 } 9008 case SystemZ::BI__builtin_s390_vflnsb: 9009 case SystemZ::BI__builtin_s390_vflndb: { 9010 llvm::Type *ResultType = ConvertType(E->getType()); 9011 Value *X = EmitScalarExpr(E->getArg(0)); 9012 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); 9013 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); 9014 return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub"); 9015 } 9016 case SystemZ::BI__builtin_s390_vfisb: 9017 case SystemZ::BI__builtin_s390_vfidb: { 9018 llvm::Type *ResultType = ConvertType(E->getType()); 9019 Value *X = EmitScalarExpr(E->getArg(0)); 9020 // Constant-fold the M4 and M5 mask arguments. 9021 llvm::APSInt M4, M5; 9022 bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext()); 9023 bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext()); 9024 assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?"); 9025 (void)IsConstM4; (void)IsConstM5; 9026 // Check whether this instance can be represented via a LLVM standard 9027 // intrinsic. We only support some combinations of M4 and M5. 9028 Intrinsic::ID ID = Intrinsic::not_intrinsic; 9029 switch (M4.getZExtValue()) { 9030 default: break; 9031 case 0: // IEEE-inexact exception allowed 9032 switch (M5.getZExtValue()) { 9033 default: break; 9034 case 0: ID = Intrinsic::rint; break; 9035 } 9036 break; 9037 case 4: // IEEE-inexact exception suppressed 9038 switch (M5.getZExtValue()) { 9039 default: break; 9040 case 0: ID = Intrinsic::nearbyint; break; 9041 case 1: ID = Intrinsic::round; break; 9042 case 5: ID = Intrinsic::trunc; break; 9043 case 6: ID = Intrinsic::ceil; break; 9044 case 7: ID = Intrinsic::floor; break; 9045 } 9046 break; 9047 } 9048 if (ID != Intrinsic::not_intrinsic) { 9049 Function *F = CGM.getIntrinsic(ID, ResultType); 9050 return Builder.CreateCall(F, X); 9051 } 9052 switch (BuiltinID) { 9053 case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break; 9054 case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break; 9055 default: llvm_unreachable("Unknown BuiltinID"); 9056 } 9057 Function *F = CGM.getIntrinsic(ID); 9058 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4); 9059 Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5); 9060 return Builder.CreateCall(F, {X, M4Value, M5Value}); 9061 } 9062 case SystemZ::BI__builtin_s390_vfmaxsb: 9063 case SystemZ::BI__builtin_s390_vfmaxdb: { 9064 llvm::Type *ResultType = ConvertType(E->getType()); 9065 Value *X = EmitScalarExpr(E->getArg(0)); 9066 Value *Y = EmitScalarExpr(E->getArg(1)); 9067 // Constant-fold the M4 mask argument. 9068 llvm::APSInt M4; 9069 bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext()); 9070 assert(IsConstM4 && "Constant arg isn't actually constant?"); 9071 (void)IsConstM4; 9072 // Check whether this instance can be represented via a LLVM standard 9073 // intrinsic. We only support some values of M4. 9074 Intrinsic::ID ID = Intrinsic::not_intrinsic; 9075 switch (M4.getZExtValue()) { 9076 default: break; 9077 case 4: ID = Intrinsic::maxnum; break; 9078 } 9079 if (ID != Intrinsic::not_intrinsic) { 9080 Function *F = CGM.getIntrinsic(ID, ResultType); 9081 return Builder.CreateCall(F, {X, Y}); 9082 } 9083 switch (BuiltinID) { 9084 case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break; 9085 case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break; 9086 default: llvm_unreachable("Unknown BuiltinID"); 9087 } 9088 Function *F = CGM.getIntrinsic(ID); 9089 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4); 9090 return Builder.CreateCall(F, {X, Y, M4Value}); 9091 } 9092 case SystemZ::BI__builtin_s390_vfminsb: 9093 case SystemZ::BI__builtin_s390_vfmindb: { 9094 llvm::Type *ResultType = ConvertType(E->getType()); 9095 Value *X = EmitScalarExpr(E->getArg(0)); 9096 Value *Y = EmitScalarExpr(E->getArg(1)); 9097 // Constant-fold the M4 mask argument. 9098 llvm::APSInt M4; 9099 bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext()); 9100 assert(IsConstM4 && "Constant arg isn't actually constant?"); 9101 (void)IsConstM4; 9102 // Check whether this instance can be represented via a LLVM standard 9103 // intrinsic. We only support some values of M4. 9104 Intrinsic::ID ID = Intrinsic::not_intrinsic; 9105 switch (M4.getZExtValue()) { 9106 default: break; 9107 case 4: ID = Intrinsic::minnum; break; 9108 } 9109 if (ID != Intrinsic::not_intrinsic) { 9110 Function *F = CGM.getIntrinsic(ID, ResultType); 9111 return Builder.CreateCall(F, {X, Y}); 9112 } 9113 switch (BuiltinID) { 9114 case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break; 9115 case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break; 9116 default: llvm_unreachable("Unknown BuiltinID"); 9117 } 9118 Function *F = CGM.getIntrinsic(ID); 9119 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4); 9120 return Builder.CreateCall(F, {X, Y, M4Value}); 9121 } 9122 9123 // Vector intrisincs that output the post-instruction CC value. 9124 9125 #define INTRINSIC_WITH_CC(NAME) \ 9126 case SystemZ::BI__builtin_##NAME: \ 9127 return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E) 9128 9129 INTRINSIC_WITH_CC(s390_vpkshs); 9130 INTRINSIC_WITH_CC(s390_vpksfs); 9131 INTRINSIC_WITH_CC(s390_vpksgs); 9132 9133 INTRINSIC_WITH_CC(s390_vpklshs); 9134 INTRINSIC_WITH_CC(s390_vpklsfs); 9135 INTRINSIC_WITH_CC(s390_vpklsgs); 9136 9137 INTRINSIC_WITH_CC(s390_vceqbs); 9138 INTRINSIC_WITH_CC(s390_vceqhs); 9139 INTRINSIC_WITH_CC(s390_vceqfs); 9140 INTRINSIC_WITH_CC(s390_vceqgs); 9141 9142 INTRINSIC_WITH_CC(s390_vchbs); 9143 INTRINSIC_WITH_CC(s390_vchhs); 9144 INTRINSIC_WITH_CC(s390_vchfs); 9145 INTRINSIC_WITH_CC(s390_vchgs); 9146 9147 INTRINSIC_WITH_CC(s390_vchlbs); 9148 INTRINSIC_WITH_CC(s390_vchlhs); 9149 INTRINSIC_WITH_CC(s390_vchlfs); 9150 INTRINSIC_WITH_CC(s390_vchlgs); 9151 9152 INTRINSIC_WITH_CC(s390_vfaebs); 9153 INTRINSIC_WITH_CC(s390_vfaehs); 9154 INTRINSIC_WITH_CC(s390_vfaefs); 9155 9156 INTRINSIC_WITH_CC(s390_vfaezbs); 9157 INTRINSIC_WITH_CC(s390_vfaezhs); 9158 INTRINSIC_WITH_CC(s390_vfaezfs); 9159 9160 INTRINSIC_WITH_CC(s390_vfeebs); 9161 INTRINSIC_WITH_CC(s390_vfeehs); 9162 INTRINSIC_WITH_CC(s390_vfeefs); 9163 9164 INTRINSIC_WITH_CC(s390_vfeezbs); 9165 INTRINSIC_WITH_CC(s390_vfeezhs); 9166 INTRINSIC_WITH_CC(s390_vfeezfs); 9167 9168 INTRINSIC_WITH_CC(s390_vfenebs); 9169 INTRINSIC_WITH_CC(s390_vfenehs); 9170 INTRINSIC_WITH_CC(s390_vfenefs); 9171 9172 INTRINSIC_WITH_CC(s390_vfenezbs); 9173 INTRINSIC_WITH_CC(s390_vfenezhs); 9174 INTRINSIC_WITH_CC(s390_vfenezfs); 9175 9176 INTRINSIC_WITH_CC(s390_vistrbs); 9177 INTRINSIC_WITH_CC(s390_vistrhs); 9178 INTRINSIC_WITH_CC(s390_vistrfs); 9179 9180 INTRINSIC_WITH_CC(s390_vstrcbs); 9181 INTRINSIC_WITH_CC(s390_vstrchs); 9182 INTRINSIC_WITH_CC(s390_vstrcfs); 9183 9184 INTRINSIC_WITH_CC(s390_vstrczbs); 9185 INTRINSIC_WITH_CC(s390_vstrczhs); 9186 INTRINSIC_WITH_CC(s390_vstrczfs); 9187 9188 INTRINSIC_WITH_CC(s390_vfcesbs); 9189 INTRINSIC_WITH_CC(s390_vfcedbs); 9190 INTRINSIC_WITH_CC(s390_vfchsbs); 9191 INTRINSIC_WITH_CC(s390_vfchdbs); 9192 INTRINSIC_WITH_CC(s390_vfchesbs); 9193 INTRINSIC_WITH_CC(s390_vfchedbs); 9194 9195 INTRINSIC_WITH_CC(s390_vftcisb); 9196 INTRINSIC_WITH_CC(s390_vftcidb); 9197 9198 #undef INTRINSIC_WITH_CC 9199 9200 default: 9201 return nullptr; 9202 } 9203 } 9204 9205 Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, 9206 const CallExpr *E) { 9207 auto MakeLdg = [&](unsigned IntrinsicID) { 9208 Value *Ptr = EmitScalarExpr(E->getArg(0)); 9209 clang::CharUnits Align = 9210 getNaturalPointeeTypeAlignment(E->getArg(0)->getType()); 9211 return Builder.CreateCall( 9212 CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(), 9213 Ptr->getType()}), 9214 {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())}); 9215 }; 9216 auto MakeScopedAtomic = [&](unsigned IntrinsicID) { 9217 Value *Ptr = EmitScalarExpr(E->getArg(0)); 9218 return Builder.CreateCall( 9219 CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(), 9220 Ptr->getType()}), 9221 {Ptr, EmitScalarExpr(E->getArg(1))}); 9222 }; 9223 switch (BuiltinID) { 9224 case NVPTX::BI__nvvm_atom_add_gen_i: 9225 case NVPTX::BI__nvvm_atom_add_gen_l: 9226 case NVPTX::BI__nvvm_atom_add_gen_ll: 9227 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E); 9228 9229 case NVPTX::BI__nvvm_atom_sub_gen_i: 9230 case NVPTX::BI__nvvm_atom_sub_gen_l: 9231 case NVPTX::BI__nvvm_atom_sub_gen_ll: 9232 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E); 9233 9234 case NVPTX::BI__nvvm_atom_and_gen_i: 9235 case NVPTX::BI__nvvm_atom_and_gen_l: 9236 case NVPTX::BI__nvvm_atom_and_gen_ll: 9237 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E); 9238 9239 case NVPTX::BI__nvvm_atom_or_gen_i: 9240 case NVPTX::BI__nvvm_atom_or_gen_l: 9241 case NVPTX::BI__nvvm_atom_or_gen_ll: 9242 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E); 9243 9244 case NVPTX::BI__nvvm_atom_xor_gen_i: 9245 case NVPTX::BI__nvvm_atom_xor_gen_l: 9246 case NVPTX::BI__nvvm_atom_xor_gen_ll: 9247 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E); 9248 9249 case NVPTX::BI__nvvm_atom_xchg_gen_i: 9250 case NVPTX::BI__nvvm_atom_xchg_gen_l: 9251 case NVPTX::BI__nvvm_atom_xchg_gen_ll: 9252 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E); 9253 9254 case NVPTX::BI__nvvm_atom_max_gen_i: 9255 case NVPTX::BI__nvvm_atom_max_gen_l: 9256 case NVPTX::BI__nvvm_atom_max_gen_ll: 9257 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E); 9258 9259 case NVPTX::BI__nvvm_atom_max_gen_ui: 9260 case NVPTX::BI__nvvm_atom_max_gen_ul: 9261 case NVPTX::BI__nvvm_atom_max_gen_ull: 9262 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E); 9263 9264 case NVPTX::BI__nvvm_atom_min_gen_i: 9265 case NVPTX::BI__nvvm_atom_min_gen_l: 9266 case NVPTX::BI__nvvm_atom_min_gen_ll: 9267 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E); 9268 9269 case NVPTX::BI__nvvm_atom_min_gen_ui: 9270 case NVPTX::BI__nvvm_atom_min_gen_ul: 9271 case NVPTX::BI__nvvm_atom_min_gen_ull: 9272 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E); 9273 9274 case NVPTX::BI__nvvm_atom_cas_gen_i: 9275 case NVPTX::BI__nvvm_atom_cas_gen_l: 9276 case NVPTX::BI__nvvm_atom_cas_gen_ll: 9277 // __nvvm_atom_cas_gen_* should return the old value rather than the 9278 // success flag. 9279 return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false); 9280 9281 case NVPTX::BI__nvvm_atom_add_gen_f: { 9282 Value *Ptr = EmitScalarExpr(E->getArg(0)); 9283 Value *Val = EmitScalarExpr(E->getArg(1)); 9284 // atomicrmw only deals with integer arguments so we need to use 9285 // LLVM's nvvm_atomic_load_add_f32 intrinsic for that. 9286 Value *FnALAF32 = 9287 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType()); 9288 return Builder.CreateCall(FnALAF32, {Ptr, Val}); 9289 } 9290 9291 case NVPTX::BI__nvvm_atom_inc_gen_ui: { 9292 Value *Ptr = EmitScalarExpr(E->getArg(0)); 9293 Value *Val = EmitScalarExpr(E->getArg(1)); 9294 Value *FnALI32 = 9295 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType()); 9296 return Builder.CreateCall(FnALI32, {Ptr, Val}); 9297 } 9298 9299 case NVPTX::BI__nvvm_atom_dec_gen_ui: { 9300 Value *Ptr = EmitScalarExpr(E->getArg(0)); 9301 Value *Val = EmitScalarExpr(E->getArg(1)); 9302 Value *FnALD32 = 9303 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType()); 9304 return Builder.CreateCall(FnALD32, {Ptr, Val}); 9305 } 9306 9307 case NVPTX::BI__nvvm_ldg_c: 9308 case NVPTX::BI__nvvm_ldg_c2: 9309 case NVPTX::BI__nvvm_ldg_c4: 9310 case NVPTX::BI__nvvm_ldg_s: 9311 case NVPTX::BI__nvvm_ldg_s2: 9312 case NVPTX::BI__nvvm_ldg_s4: 9313 case NVPTX::BI__nvvm_ldg_i: 9314 case NVPTX::BI__nvvm_ldg_i2: 9315 case NVPTX::BI__nvvm_ldg_i4: 9316 case NVPTX::BI__nvvm_ldg_l: 9317 case NVPTX::BI__nvvm_ldg_ll: 9318 case NVPTX::BI__nvvm_ldg_ll2: 9319 case NVPTX::BI__nvvm_ldg_uc: 9320 case NVPTX::BI__nvvm_ldg_uc2: 9321 case NVPTX::BI__nvvm_ldg_uc4: 9322 case NVPTX::BI__nvvm_ldg_us: 9323 case NVPTX::BI__nvvm_ldg_us2: 9324 case NVPTX::BI__nvvm_ldg_us4: 9325 case NVPTX::BI__nvvm_ldg_ui: 9326 case NVPTX::BI__nvvm_ldg_ui2: 9327 case NVPTX::BI__nvvm_ldg_ui4: 9328 case NVPTX::BI__nvvm_ldg_ul: 9329 case NVPTX::BI__nvvm_ldg_ull: 9330 case NVPTX::BI__nvvm_ldg_ull2: 9331 // PTX Interoperability section 2.2: "For a vector with an even number of 9332 // elements, its alignment is set to number of elements times the alignment 9333 // of its member: n*alignof(t)." 9334 return MakeLdg(Intrinsic::nvvm_ldg_global_i); 9335 case NVPTX::BI__nvvm_ldg_f: 9336 case NVPTX::BI__nvvm_ldg_f2: 9337 case NVPTX::BI__nvvm_ldg_f4: 9338 case NVPTX::BI__nvvm_ldg_d: 9339 case NVPTX::BI__nvvm_ldg_d2: 9340 return MakeLdg(Intrinsic::nvvm_ldg_global_f); 9341 9342 case NVPTX::BI__nvvm_atom_cta_add_gen_i: 9343 case NVPTX::BI__nvvm_atom_cta_add_gen_l: 9344 case NVPTX::BI__nvvm_atom_cta_add_gen_ll: 9345 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta); 9346 case NVPTX::BI__nvvm_atom_sys_add_gen_i: 9347 case NVPTX::BI__nvvm_atom_sys_add_gen_l: 9348 case NVPTX::BI__nvvm_atom_sys_add_gen_ll: 9349 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys); 9350 case NVPTX::BI__nvvm_atom_cta_add_gen_f: 9351 case NVPTX::BI__nvvm_atom_cta_add_gen_d: 9352 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta); 9353 case NVPTX::BI__nvvm_atom_sys_add_gen_f: 9354 case NVPTX::BI__nvvm_atom_sys_add_gen_d: 9355 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys); 9356 case NVPTX::BI__nvvm_atom_cta_xchg_gen_i: 9357 case NVPTX::BI__nvvm_atom_cta_xchg_gen_l: 9358 case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll: 9359 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta); 9360 case NVPTX::BI__nvvm_atom_sys_xchg_gen_i: 9361 case NVPTX::BI__nvvm_atom_sys_xchg_gen_l: 9362 case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll: 9363 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys); 9364 case NVPTX::BI__nvvm_atom_cta_max_gen_i: 9365 case NVPTX::BI__nvvm_atom_cta_max_gen_ui: 9366 case NVPTX::BI__nvvm_atom_cta_max_gen_l: 9367 case NVPTX::BI__nvvm_atom_cta_max_gen_ul: 9368 case NVPTX::BI__nvvm_atom_cta_max_gen_ll: 9369 case NVPTX::BI__nvvm_atom_cta_max_gen_ull: 9370 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta); 9371 case NVPTX::BI__nvvm_atom_sys_max_gen_i: 9372 case NVPTX::BI__nvvm_atom_sys_max_gen_ui: 9373 case NVPTX::BI__nvvm_atom_sys_max_gen_l: 9374 case NVPTX::BI__nvvm_atom_sys_max_gen_ul: 9375 case NVPTX::BI__nvvm_atom_sys_max_gen_ll: 9376 case NVPTX::BI__nvvm_atom_sys_max_gen_ull: 9377 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys); 9378 case NVPTX::BI__nvvm_atom_cta_min_gen_i: 9379 case NVPTX::BI__nvvm_atom_cta_min_gen_ui: 9380 case NVPTX::BI__nvvm_atom_cta_min_gen_l: 9381 case NVPTX::BI__nvvm_atom_cta_min_gen_ul: 9382 case NVPTX::BI__nvvm_atom_cta_min_gen_ll: 9383 case NVPTX::BI__nvvm_atom_cta_min_gen_ull: 9384 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta); 9385 case NVPTX::BI__nvvm_atom_sys_min_gen_i: 9386 case NVPTX::BI__nvvm_atom_sys_min_gen_ui: 9387 case NVPTX::BI__nvvm_atom_sys_min_gen_l: 9388 case NVPTX::BI__nvvm_atom_sys_min_gen_ul: 9389 case NVPTX::BI__nvvm_atom_sys_min_gen_ll: 9390 case NVPTX::BI__nvvm_atom_sys_min_gen_ull: 9391 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys); 9392 case NVPTX::BI__nvvm_atom_cta_inc_gen_ui: 9393 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta); 9394 case NVPTX::BI__nvvm_atom_cta_dec_gen_ui: 9395 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta); 9396 case NVPTX::BI__nvvm_atom_sys_inc_gen_ui: 9397 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys); 9398 case NVPTX::BI__nvvm_atom_sys_dec_gen_ui: 9399 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys); 9400 case NVPTX::BI__nvvm_atom_cta_and_gen_i: 9401 case NVPTX::BI__nvvm_atom_cta_and_gen_l: 9402 case NVPTX::BI__nvvm_atom_cta_and_gen_ll: 9403 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta); 9404 case NVPTX::BI__nvvm_atom_sys_and_gen_i: 9405 case NVPTX::BI__nvvm_atom_sys_and_gen_l: 9406 case NVPTX::BI__nvvm_atom_sys_and_gen_ll: 9407 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys); 9408 case NVPTX::BI__nvvm_atom_cta_or_gen_i: 9409 case NVPTX::BI__nvvm_atom_cta_or_gen_l: 9410 case NVPTX::BI__nvvm_atom_cta_or_gen_ll: 9411 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta); 9412 case NVPTX::BI__nvvm_atom_sys_or_gen_i: 9413 case NVPTX::BI__nvvm_atom_sys_or_gen_l: 9414 case NVPTX::BI__nvvm_atom_sys_or_gen_ll: 9415 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys); 9416 case NVPTX::BI__nvvm_atom_cta_xor_gen_i: 9417 case NVPTX::BI__nvvm_atom_cta_xor_gen_l: 9418 case NVPTX::BI__nvvm_atom_cta_xor_gen_ll: 9419 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta); 9420 case NVPTX::BI__nvvm_atom_sys_xor_gen_i: 9421 case NVPTX::BI__nvvm_atom_sys_xor_gen_l: 9422 case NVPTX::BI__nvvm_atom_sys_xor_gen_ll: 9423 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys); 9424 case NVPTX::BI__nvvm_atom_cta_cas_gen_i: 9425 case NVPTX::BI__nvvm_atom_cta_cas_gen_l: 9426 case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: { 9427 Value *Ptr = EmitScalarExpr(E->getArg(0)); 9428 return Builder.CreateCall( 9429 CGM.getIntrinsic( 9430 Intrinsic::nvvm_atomic_cas_gen_i_cta, 9431 {Ptr->getType()->getPointerElementType(), Ptr->getType()}), 9432 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))}); 9433 } 9434 case NVPTX::BI__nvvm_atom_sys_cas_gen_i: 9435 case NVPTX::BI__nvvm_atom_sys_cas_gen_l: 9436 case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: { 9437 Value *Ptr = EmitScalarExpr(E->getArg(0)); 9438 return Builder.CreateCall( 9439 CGM.getIntrinsic( 9440 Intrinsic::nvvm_atomic_cas_gen_i_sys, 9441 {Ptr->getType()->getPointerElementType(), Ptr->getType()}), 9442 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))}); 9443 } 9444 default: 9445 return nullptr; 9446 } 9447 } 9448 9449 Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, 9450 const CallExpr *E) { 9451 switch (BuiltinID) { 9452 case WebAssembly::BI__builtin_wasm_current_memory: { 9453 llvm::Type *ResultType = ConvertType(E->getType()); 9454 Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType); 9455 return Builder.CreateCall(Callee); 9456 } 9457 case WebAssembly::BI__builtin_wasm_grow_memory: { 9458 Value *X = EmitScalarExpr(E->getArg(0)); 9459 Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_grow_memory, X->getType()); 9460 return Builder.CreateCall(Callee, X); 9461 } 9462 case WebAssembly::BI__builtin_wasm_throw: { 9463 Value *Tag = EmitScalarExpr(E->getArg(0)); 9464 Value *Obj = EmitScalarExpr(E->getArg(1)); 9465 Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw); 9466 return Builder.CreateCall(Callee, {Tag, Obj}); 9467 } 9468 case WebAssembly::BI__builtin_wasm_rethrow: { 9469 Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow); 9470 return Builder.CreateCall(Callee); 9471 } 9472 9473 default: 9474 return nullptr; 9475 } 9476 } 9477