1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit Builtin calls as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCXXABI.h" 15 #include "CGObjCRuntime.h" 16 #include "CGOpenCLRuntime.h" 17 #include "CodeGenFunction.h" 18 #include "CodeGenModule.h" 19 #include "ConstantEmitter.h" 20 #include "TargetInfo.h" 21 #include "clang/AST/ASTContext.h" 22 #include "clang/AST/Decl.h" 23 #include "clang/Analysis/Analyses/OSLog.h" 24 #include "clang/Basic/TargetBuiltins.h" 25 #include "clang/Basic/TargetInfo.h" 26 #include "clang/CodeGen/CGFunctionInfo.h" 27 #include "llvm/ADT/StringExtras.h" 28 #include "llvm/IR/CallSite.h" 29 #include "llvm/IR/DataLayout.h" 30 #include "llvm/IR/InlineAsm.h" 31 #include "llvm/IR/Intrinsics.h" 32 #include "llvm/IR/MDBuilder.h" 33 #include "llvm/Support/ConvertUTF.h" 34 #include <sstream> 35 36 using namespace clang; 37 using namespace CodeGen; 38 using namespace llvm; 39 40 static 41 int64_t clamp(int64_t Value, int64_t Low, int64_t High) { 42 return std::min(High, std::max(Low, Value)); 43 } 44 45 /// getBuiltinLibFunction - Given a builtin id for a function like 46 /// "__builtin_fabsf", return a Function* for "fabsf". 47 llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD, 48 unsigned BuiltinID) { 49 assert(Context.BuiltinInfo.isLibFunction(BuiltinID)); 50 51 // Get the name, skip over the __builtin_ prefix (if necessary). 52 StringRef Name; 53 GlobalDecl D(FD); 54 55 // If the builtin has been declared explicitly with an assembler label, 56 // use the mangled name. This differs from the plain label on platforms 57 // that prefix labels. 58 if (FD->hasAttr<AsmLabelAttr>()) 59 Name = getMangledName(D); 60 else 61 Name = Context.BuiltinInfo.getName(BuiltinID) + 10; 62 63 llvm::FunctionType *Ty = 64 cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType())); 65 66 return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false); 67 } 68 69 /// Emit the conversions required to turn the given value into an 70 /// integer of the given size. 71 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V, 72 QualType T, llvm::IntegerType *IntType) { 73 V = CGF.EmitToMemory(V, T); 74 75 if (V->getType()->isPointerTy()) 76 return CGF.Builder.CreatePtrToInt(V, IntType); 77 78 assert(V->getType() == IntType); 79 return V; 80 } 81 82 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, 83 QualType T, llvm::Type *ResultType) { 84 V = CGF.EmitFromMemory(V, T); 85 86 if (ResultType->isPointerTy()) 87 return CGF.Builder.CreateIntToPtr(V, ResultType); 88 89 assert(V->getType() == ResultType); 90 return V; 91 } 92 93 /// Utility to insert an atomic instruction based on Instrinsic::ID 94 /// and the expression node. 95 static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF, 96 llvm::AtomicRMWInst::BinOp Kind, 97 const CallExpr *E) { 98 QualType T = E->getType(); 99 assert(E->getArg(0)->getType()->isPointerType()); 100 assert(CGF.getContext().hasSameUnqualifiedType(T, 101 E->getArg(0)->getType()->getPointeeType())); 102 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); 103 104 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 105 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 106 107 llvm::IntegerType *IntType = 108 llvm::IntegerType::get(CGF.getLLVMContext(), 109 CGF.getContext().getTypeSize(T)); 110 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 111 112 llvm::Value *Args[2]; 113 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 114 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 115 llvm::Type *ValueType = Args[1]->getType(); 116 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 117 118 llvm::Value *Result = CGF.Builder.CreateAtomicRMW( 119 Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent); 120 return EmitFromInt(CGF, Result, T, ValueType); 121 } 122 123 static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) { 124 Value *Val = CGF.EmitScalarExpr(E->getArg(0)); 125 Value *Address = CGF.EmitScalarExpr(E->getArg(1)); 126 127 // Convert the type of the pointer to a pointer to the stored type. 128 Val = CGF.EmitToMemory(Val, E->getArg(0)->getType()); 129 Value *BC = CGF.Builder.CreateBitCast( 130 Address, llvm::PointerType::getUnqual(Val->getType()), "cast"); 131 LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType()); 132 LV.setNontemporal(true); 133 CGF.EmitStoreOfScalar(Val, LV, false); 134 return nullptr; 135 } 136 137 static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) { 138 Value *Address = CGF.EmitScalarExpr(E->getArg(0)); 139 140 LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType()); 141 LV.setNontemporal(true); 142 return CGF.EmitLoadOfScalar(LV, E->getExprLoc()); 143 } 144 145 static RValue EmitBinaryAtomic(CodeGenFunction &CGF, 146 llvm::AtomicRMWInst::BinOp Kind, 147 const CallExpr *E) { 148 return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E)); 149 } 150 151 /// Utility to insert an atomic instruction based Instrinsic::ID and 152 /// the expression node, where the return value is the result of the 153 /// operation. 154 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, 155 llvm::AtomicRMWInst::BinOp Kind, 156 const CallExpr *E, 157 Instruction::BinaryOps Op, 158 bool Invert = false) { 159 QualType T = E->getType(); 160 assert(E->getArg(0)->getType()->isPointerType()); 161 assert(CGF.getContext().hasSameUnqualifiedType(T, 162 E->getArg(0)->getType()->getPointeeType())); 163 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); 164 165 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 166 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 167 168 llvm::IntegerType *IntType = 169 llvm::IntegerType::get(CGF.getLLVMContext(), 170 CGF.getContext().getTypeSize(T)); 171 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 172 173 llvm::Value *Args[2]; 174 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 175 llvm::Type *ValueType = Args[1]->getType(); 176 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 177 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 178 179 llvm::Value *Result = CGF.Builder.CreateAtomicRMW( 180 Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent); 181 Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]); 182 if (Invert) 183 Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result, 184 llvm::ConstantInt::get(IntType, -1)); 185 Result = EmitFromInt(CGF, Result, T, ValueType); 186 return RValue::get(Result); 187 } 188 189 /// @brief Utility to insert an atomic cmpxchg instruction. 190 /// 191 /// @param CGF The current codegen function. 192 /// @param E Builtin call expression to convert to cmpxchg. 193 /// arg0 - address to operate on 194 /// arg1 - value to compare with 195 /// arg2 - new value 196 /// @param ReturnBool Specifies whether to return success flag of 197 /// cmpxchg result or the old value. 198 /// 199 /// @returns result of cmpxchg, according to ReturnBool 200 static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, 201 bool ReturnBool) { 202 QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType(); 203 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 204 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 205 206 llvm::IntegerType *IntType = llvm::IntegerType::get( 207 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T)); 208 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 209 210 Value *Args[3]; 211 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 212 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 213 llvm::Type *ValueType = Args[1]->getType(); 214 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 215 Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType); 216 217 Value *Pair = CGF.Builder.CreateAtomicCmpXchg( 218 Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent, 219 llvm::AtomicOrdering::SequentiallyConsistent); 220 if (ReturnBool) 221 // Extract boolean success flag and zext it to int. 222 return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1), 223 CGF.ConvertType(E->getType())); 224 else 225 // Extract old value and emit it using the same type as compare value. 226 return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T, 227 ValueType); 228 } 229 230 // Emit a simple mangled intrinsic that has 1 argument and a return type 231 // matching the argument type. 232 static Value *emitUnaryBuiltin(CodeGenFunction &CGF, 233 const CallExpr *E, 234 unsigned IntrinsicID) { 235 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 236 237 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 238 return CGF.Builder.CreateCall(F, Src0); 239 } 240 241 // Emit an intrinsic that has 2 operands of the same type as its result. 242 static Value *emitBinaryBuiltin(CodeGenFunction &CGF, 243 const CallExpr *E, 244 unsigned IntrinsicID) { 245 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 246 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 247 248 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 249 return CGF.Builder.CreateCall(F, { Src0, Src1 }); 250 } 251 252 // Emit an intrinsic that has 3 operands of the same type as its result. 253 static Value *emitTernaryBuiltin(CodeGenFunction &CGF, 254 const CallExpr *E, 255 unsigned IntrinsicID) { 256 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 257 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 258 llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); 259 260 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 261 return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 }); 262 } 263 264 // Emit an intrinsic that has 1 float or double operand, and 1 integer. 265 static Value *emitFPIntBuiltin(CodeGenFunction &CGF, 266 const CallExpr *E, 267 unsigned IntrinsicID) { 268 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 269 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 270 271 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 272 return CGF.Builder.CreateCall(F, {Src0, Src1}); 273 } 274 275 /// EmitFAbs - Emit a call to @llvm.fabs(). 276 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) { 277 Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType()); 278 llvm::CallInst *Call = CGF.Builder.CreateCall(F, V); 279 Call->setDoesNotAccessMemory(); 280 return Call; 281 } 282 283 /// Emit the computation of the sign bit for a floating point value. Returns 284 /// the i1 sign bit value. 285 static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) { 286 LLVMContext &C = CGF.CGM.getLLVMContext(); 287 288 llvm::Type *Ty = V->getType(); 289 int Width = Ty->getPrimitiveSizeInBits(); 290 llvm::Type *IntTy = llvm::IntegerType::get(C, Width); 291 V = CGF.Builder.CreateBitCast(V, IntTy); 292 if (Ty->isPPC_FP128Ty()) { 293 // We want the sign bit of the higher-order double. The bitcast we just 294 // did works as if the double-double was stored to memory and then 295 // read as an i128. The "store" will put the higher-order double in the 296 // lower address in both little- and big-Endian modes, but the "load" 297 // will treat those bits as a different part of the i128: the low bits in 298 // little-Endian, the high bits in big-Endian. Therefore, on big-Endian 299 // we need to shift the high bits down to the low before truncating. 300 Width >>= 1; 301 if (CGF.getTarget().isBigEndian()) { 302 Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width); 303 V = CGF.Builder.CreateLShr(V, ShiftCst); 304 } 305 // We are truncating value in order to extract the higher-order 306 // double, which we will be using to extract the sign from. 307 IntTy = llvm::IntegerType::get(C, Width); 308 V = CGF.Builder.CreateTrunc(V, IntTy); 309 } 310 Value *Zero = llvm::Constant::getNullValue(IntTy); 311 return CGF.Builder.CreateICmpSLT(V, Zero); 312 } 313 314 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD, 315 const CallExpr *E, llvm::Constant *calleeValue) { 316 CGCallee callee = CGCallee::forDirect(calleeValue, FD); 317 return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot()); 318 } 319 320 /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.* 321 /// depending on IntrinsicID. 322 /// 323 /// \arg CGF The current codegen function. 324 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate. 325 /// \arg X The first argument to the llvm.*.with.overflow.*. 326 /// \arg Y The second argument to the llvm.*.with.overflow.*. 327 /// \arg Carry The carry returned by the llvm.*.with.overflow.*. 328 /// \returns The result (i.e. sum/product) returned by the intrinsic. 329 static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF, 330 const llvm::Intrinsic::ID IntrinsicID, 331 llvm::Value *X, llvm::Value *Y, 332 llvm::Value *&Carry) { 333 // Make sure we have integers of the same width. 334 assert(X->getType() == Y->getType() && 335 "Arguments must be the same type. (Did you forget to make sure both " 336 "arguments have the same integer width?)"); 337 338 llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType()); 339 llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y}); 340 Carry = CGF.Builder.CreateExtractValue(Tmp, 1); 341 return CGF.Builder.CreateExtractValue(Tmp, 0); 342 } 343 344 static Value *emitRangedBuiltin(CodeGenFunction &CGF, 345 unsigned IntrinsicID, 346 int low, int high) { 347 llvm::MDBuilder MDHelper(CGF.getLLVMContext()); 348 llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high)); 349 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {}); 350 llvm::Instruction *Call = CGF.Builder.CreateCall(F); 351 Call->setMetadata(llvm::LLVMContext::MD_range, RNode); 352 return Call; 353 } 354 355 namespace { 356 struct WidthAndSignedness { 357 unsigned Width; 358 bool Signed; 359 }; 360 } 361 362 static WidthAndSignedness 363 getIntegerWidthAndSignedness(const clang::ASTContext &context, 364 const clang::QualType Type) { 365 assert(Type->isIntegerType() && "Given type is not an integer."); 366 unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width; 367 bool Signed = Type->isSignedIntegerType(); 368 return {Width, Signed}; 369 } 370 371 // Given one or more integer types, this function produces an integer type that 372 // encompasses them: any value in one of the given types could be expressed in 373 // the encompassing type. 374 static struct WidthAndSignedness 375 EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) { 376 assert(Types.size() > 0 && "Empty list of types."); 377 378 // If any of the given types is signed, we must return a signed type. 379 bool Signed = false; 380 for (const auto &Type : Types) { 381 Signed |= Type.Signed; 382 } 383 384 // The encompassing type must have a width greater than or equal to the width 385 // of the specified types. Aditionally, if the encompassing type is signed, 386 // its width must be strictly greater than the width of any unsigned types 387 // given. 388 unsigned Width = 0; 389 for (const auto &Type : Types) { 390 unsigned MinWidth = Type.Width + (Signed && !Type.Signed); 391 if (Width < MinWidth) { 392 Width = MinWidth; 393 } 394 } 395 396 return {Width, Signed}; 397 } 398 399 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) { 400 llvm::Type *DestType = Int8PtrTy; 401 if (ArgValue->getType() != DestType) 402 ArgValue = 403 Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data()); 404 405 Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend; 406 return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue); 407 } 408 409 /// Checks if using the result of __builtin_object_size(p, @p From) in place of 410 /// __builtin_object_size(p, @p To) is correct 411 static bool areBOSTypesCompatible(int From, int To) { 412 // Note: Our __builtin_object_size implementation currently treats Type=0 and 413 // Type=2 identically. Encoding this implementation detail here may make 414 // improving __builtin_object_size difficult in the future, so it's omitted. 415 return From == To || (From == 0 && To == 1) || (From == 3 && To == 2); 416 } 417 418 static llvm::Value * 419 getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) { 420 return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true); 421 } 422 423 llvm::Value * 424 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type, 425 llvm::IntegerType *ResType, 426 llvm::Value *EmittedE) { 427 uint64_t ObjectSize; 428 if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type)) 429 return emitBuiltinObjectSize(E, Type, ResType, EmittedE); 430 return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true); 431 } 432 433 /// Returns a Value corresponding to the size of the given expression. 434 /// This Value may be either of the following: 435 /// - A llvm::Argument (if E is a param with the pass_object_size attribute on 436 /// it) 437 /// - A call to the @llvm.objectsize intrinsic 438 /// 439 /// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null 440 /// and we wouldn't otherwise try to reference a pass_object_size parameter, 441 /// we'll call @llvm.objectsize on EmittedE, rather than emitting E. 442 llvm::Value * 443 CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type, 444 llvm::IntegerType *ResType, 445 llvm::Value *EmittedE) { 446 // We need to reference an argument if the pointer is a parameter with the 447 // pass_object_size attribute. 448 if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) { 449 auto *Param = dyn_cast<ParmVarDecl>(D->getDecl()); 450 auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>(); 451 if (Param != nullptr && PS != nullptr && 452 areBOSTypesCompatible(PS->getType(), Type)) { 453 auto Iter = SizeArguments.find(Param); 454 assert(Iter != SizeArguments.end()); 455 456 const ImplicitParamDecl *D = Iter->second; 457 auto DIter = LocalDeclMap.find(D); 458 assert(DIter != LocalDeclMap.end()); 459 460 return EmitLoadOfScalar(DIter->second, /*volatile=*/false, 461 getContext().getSizeType(), E->getLocStart()); 462 } 463 } 464 465 // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't 466 // evaluate E for side-effects. In either case, we shouldn't lower to 467 // @llvm.objectsize. 468 if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext()))) 469 return getDefaultBuiltinObjectSizeResult(Type, ResType); 470 471 Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E); 472 assert(Ptr->getType()->isPointerTy() && 473 "Non-pointer passed to __builtin_object_size?"); 474 475 Value *F = CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()}); 476 477 // LLVM only supports 0 and 2, make sure that we pass along that as a boolean. 478 Value *Min = Builder.getInt1((Type & 2) != 0); 479 // For GCC compatability, __builtin_object_size treat NULL as unknown size. 480 Value *NullIsUnknown = Builder.getTrue(); 481 return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown}); 482 } 483 484 // Many of MSVC builtins are on both x64 and ARM; to avoid repeating code, we 485 // handle them here. 486 enum class CodeGenFunction::MSVCIntrin { 487 _BitScanForward, 488 _BitScanReverse, 489 _InterlockedAnd, 490 _InterlockedDecrement, 491 _InterlockedExchange, 492 _InterlockedExchangeAdd, 493 _InterlockedExchangeSub, 494 _InterlockedIncrement, 495 _InterlockedOr, 496 _InterlockedXor, 497 _interlockedbittestandset, 498 __fastfail, 499 }; 500 501 Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, 502 const CallExpr *E) { 503 switch (BuiltinID) { 504 case MSVCIntrin::_BitScanForward: 505 case MSVCIntrin::_BitScanReverse: { 506 Value *ArgValue = EmitScalarExpr(E->getArg(1)); 507 508 llvm::Type *ArgType = ArgValue->getType(); 509 llvm::Type *IndexType = 510 EmitScalarExpr(E->getArg(0))->getType()->getPointerElementType(); 511 llvm::Type *ResultType = ConvertType(E->getType()); 512 513 Value *ArgZero = llvm::Constant::getNullValue(ArgType); 514 Value *ResZero = llvm::Constant::getNullValue(ResultType); 515 Value *ResOne = llvm::ConstantInt::get(ResultType, 1); 516 517 BasicBlock *Begin = Builder.GetInsertBlock(); 518 BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn); 519 Builder.SetInsertPoint(End); 520 PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result"); 521 522 Builder.SetInsertPoint(Begin); 523 Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero); 524 BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn); 525 Builder.CreateCondBr(IsZero, End, NotZero); 526 Result->addIncoming(ResZero, Begin); 527 528 Builder.SetInsertPoint(NotZero); 529 Address IndexAddress = EmitPointerWithAlignment(E->getArg(0)); 530 531 if (BuiltinID == MSVCIntrin::_BitScanForward) { 532 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 533 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()}); 534 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false); 535 Builder.CreateStore(ZeroCount, IndexAddress, false); 536 } else { 537 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth(); 538 Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1); 539 540 Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); 541 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()}); 542 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false); 543 Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount); 544 Builder.CreateStore(Index, IndexAddress, false); 545 } 546 Builder.CreateBr(End); 547 Result->addIncoming(ResOne, NotZero); 548 549 Builder.SetInsertPoint(End); 550 return Result; 551 } 552 case MSVCIntrin::_InterlockedAnd: 553 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E); 554 case MSVCIntrin::_InterlockedExchange: 555 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E); 556 case MSVCIntrin::_InterlockedExchangeAdd: 557 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E); 558 case MSVCIntrin::_InterlockedExchangeSub: 559 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E); 560 case MSVCIntrin::_InterlockedOr: 561 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E); 562 case MSVCIntrin::_InterlockedXor: 563 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E); 564 565 case MSVCIntrin::_interlockedbittestandset: { 566 llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); 567 llvm::Value *Bit = EmitScalarExpr(E->getArg(1)); 568 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 569 AtomicRMWInst::Or, Addr, 570 Builder.CreateShl(ConstantInt::get(Bit->getType(), 1), Bit), 571 llvm::AtomicOrdering::SequentiallyConsistent); 572 // Shift the relevant bit to the least significant position, truncate to 573 // the result type, and test the low bit. 574 llvm::Value *Shifted = Builder.CreateLShr(RMWI, Bit); 575 llvm::Value *Truncated = 576 Builder.CreateTrunc(Shifted, ConvertType(E->getType())); 577 return Builder.CreateAnd(Truncated, 578 ConstantInt::get(Truncated->getType(), 1)); 579 } 580 581 case MSVCIntrin::_InterlockedDecrement: { 582 llvm::Type *IntTy = ConvertType(E->getType()); 583 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 584 AtomicRMWInst::Sub, 585 EmitScalarExpr(E->getArg(0)), 586 ConstantInt::get(IntTy, 1), 587 llvm::AtomicOrdering::SequentiallyConsistent); 588 return Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1)); 589 } 590 case MSVCIntrin::_InterlockedIncrement: { 591 llvm::Type *IntTy = ConvertType(E->getType()); 592 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 593 AtomicRMWInst::Add, 594 EmitScalarExpr(E->getArg(0)), 595 ConstantInt::get(IntTy, 1), 596 llvm::AtomicOrdering::SequentiallyConsistent); 597 return Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1)); 598 } 599 600 case MSVCIntrin::__fastfail: { 601 // Request immediate process termination from the kernel. The instruction 602 // sequences to do this are documented on MSDN: 603 // https://msdn.microsoft.com/en-us/library/dn774154.aspx 604 llvm::Triple::ArchType ISA = getTarget().getTriple().getArch(); 605 StringRef Asm, Constraints; 606 switch (ISA) { 607 default: 608 ErrorUnsupported(E, "__fastfail call for this architecture"); 609 break; 610 case llvm::Triple::x86: 611 case llvm::Triple::x86_64: 612 Asm = "int $$0x29"; 613 Constraints = "{cx}"; 614 break; 615 case llvm::Triple::thumb: 616 Asm = "udf #251"; 617 Constraints = "{r0}"; 618 break; 619 } 620 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false); 621 llvm::InlineAsm *IA = 622 llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true); 623 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get( 624 getLLVMContext(), llvm::AttributeList::FunctionIndex, 625 llvm::Attribute::NoReturn); 626 CallSite CS = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0))); 627 CS.setAttributes(NoReturnAttr); 628 return CS.getInstruction(); 629 } 630 } 631 llvm_unreachable("Incorrect MSVC intrinsic!"); 632 } 633 634 namespace { 635 // ARC cleanup for __builtin_os_log_format 636 struct CallObjCArcUse final : EHScopeStack::Cleanup { 637 CallObjCArcUse(llvm::Value *object) : object(object) {} 638 llvm::Value *object; 639 640 void Emit(CodeGenFunction &CGF, Flags flags) override { 641 CGF.EmitARCIntrinsicUse(object); 642 } 643 }; 644 } 645 646 Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E, 647 BuiltinCheckKind Kind) { 648 assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero) 649 && "Unsupported builtin check kind"); 650 651 Value *ArgValue = EmitScalarExpr(E); 652 if (!SanOpts.has(SanitizerKind::Builtin) || !getTarget().isCLZForZeroUndef()) 653 return ArgValue; 654 655 SanitizerScope SanScope(this); 656 Value *Cond = Builder.CreateICmpNE( 657 ArgValue, llvm::Constant::getNullValue(ArgValue->getType())); 658 EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin), 659 SanitizerHandler::InvalidBuiltin, 660 {EmitCheckSourceLocation(E->getExprLoc()), 661 llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)}, 662 None); 663 return ArgValue; 664 } 665 666 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, 667 unsigned BuiltinID, const CallExpr *E, 668 ReturnValueSlot ReturnValue) { 669 // See if we can constant fold this builtin. If so, don't emit it at all. 670 Expr::EvalResult Result; 671 if (E->EvaluateAsRValue(Result, CGM.getContext()) && 672 !Result.hasSideEffects()) { 673 if (Result.Val.isInt()) 674 return RValue::get(llvm::ConstantInt::get(getLLVMContext(), 675 Result.Val.getInt())); 676 if (Result.Val.isFloat()) 677 return RValue::get(llvm::ConstantFP::get(getLLVMContext(), 678 Result.Val.getFloat())); 679 } 680 681 switch (BuiltinID) { 682 default: break; // Handle intrinsics and libm functions below. 683 case Builtin::BI__builtin___CFStringMakeConstantString: 684 case Builtin::BI__builtin___NSStringMakeConstantString: 685 return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType())); 686 case Builtin::BI__builtin_stdarg_start: 687 case Builtin::BI__builtin_va_start: 688 case Builtin::BI__va_start: 689 case Builtin::BI__builtin_va_end: 690 return RValue::get( 691 EmitVAStartEnd(BuiltinID == Builtin::BI__va_start 692 ? EmitScalarExpr(E->getArg(0)) 693 : EmitVAListRef(E->getArg(0)).getPointer(), 694 BuiltinID != Builtin::BI__builtin_va_end)); 695 case Builtin::BI__builtin_va_copy: { 696 Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer(); 697 Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer(); 698 699 llvm::Type *Type = Int8PtrTy; 700 701 DstPtr = Builder.CreateBitCast(DstPtr, Type); 702 SrcPtr = Builder.CreateBitCast(SrcPtr, Type); 703 return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy), 704 {DstPtr, SrcPtr})); 705 } 706 case Builtin::BI__builtin_abs: 707 case Builtin::BI__builtin_labs: 708 case Builtin::BI__builtin_llabs: { 709 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 710 711 Value *NegOp = Builder.CreateNeg(ArgValue, "neg"); 712 Value *CmpResult = 713 Builder.CreateICmpSGE(ArgValue, 714 llvm::Constant::getNullValue(ArgValue->getType()), 715 "abscond"); 716 Value *Result = 717 Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs"); 718 719 return RValue::get(Result); 720 } 721 case Builtin::BI__builtin_fabs: 722 case Builtin::BI__builtin_fabsf: 723 case Builtin::BI__builtin_fabsl: { 724 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs)); 725 } 726 case Builtin::BI__builtin_fmod: 727 case Builtin::BI__builtin_fmodf: 728 case Builtin::BI__builtin_fmodl: { 729 Value *Arg1 = EmitScalarExpr(E->getArg(0)); 730 Value *Arg2 = EmitScalarExpr(E->getArg(1)); 731 Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod"); 732 return RValue::get(Result); 733 } 734 case Builtin::BI__builtin_copysign: 735 case Builtin::BI__builtin_copysignf: 736 case Builtin::BI__builtin_copysignl: { 737 return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign)); 738 } 739 case Builtin::BI__builtin_ceil: 740 case Builtin::BI__builtin_ceilf: 741 case Builtin::BI__builtin_ceill: { 742 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil)); 743 } 744 case Builtin::BI__builtin_floor: 745 case Builtin::BI__builtin_floorf: 746 case Builtin::BI__builtin_floorl: { 747 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor)); 748 } 749 case Builtin::BI__builtin_trunc: 750 case Builtin::BI__builtin_truncf: 751 case Builtin::BI__builtin_truncl: { 752 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc)); 753 } 754 case Builtin::BI__builtin_rint: 755 case Builtin::BI__builtin_rintf: 756 case Builtin::BI__builtin_rintl: { 757 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint)); 758 } 759 case Builtin::BI__builtin_nearbyint: 760 case Builtin::BI__builtin_nearbyintf: 761 case Builtin::BI__builtin_nearbyintl: { 762 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::nearbyint)); 763 } 764 case Builtin::BI__builtin_round: 765 case Builtin::BI__builtin_roundf: 766 case Builtin::BI__builtin_roundl: { 767 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round)); 768 } 769 case Builtin::BI__builtin_fmin: 770 case Builtin::BI__builtin_fminf: 771 case Builtin::BI__builtin_fminl: { 772 return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum)); 773 } 774 case Builtin::BI__builtin_fmax: 775 case Builtin::BI__builtin_fmaxf: 776 case Builtin::BI__builtin_fmaxl: { 777 return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum)); 778 } 779 case Builtin::BI__builtin_conj: 780 case Builtin::BI__builtin_conjf: 781 case Builtin::BI__builtin_conjl: { 782 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 783 Value *Real = ComplexVal.first; 784 Value *Imag = ComplexVal.second; 785 Value *Zero = 786 Imag->getType()->isFPOrFPVectorTy() 787 ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType()) 788 : llvm::Constant::getNullValue(Imag->getType()); 789 790 Imag = Builder.CreateFSub(Zero, Imag, "sub"); 791 return RValue::getComplex(std::make_pair(Real, Imag)); 792 } 793 case Builtin::BI__builtin_creal: 794 case Builtin::BI__builtin_crealf: 795 case Builtin::BI__builtin_creall: 796 case Builtin::BIcreal: 797 case Builtin::BIcrealf: 798 case Builtin::BIcreall: { 799 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 800 return RValue::get(ComplexVal.first); 801 } 802 803 case Builtin::BI__builtin_cimag: 804 case Builtin::BI__builtin_cimagf: 805 case Builtin::BI__builtin_cimagl: 806 case Builtin::BIcimag: 807 case Builtin::BIcimagf: 808 case Builtin::BIcimagl: { 809 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 810 return RValue::get(ComplexVal.second); 811 } 812 813 case Builtin::BI__builtin_ctzs: 814 case Builtin::BI__builtin_ctz: 815 case Builtin::BI__builtin_ctzl: 816 case Builtin::BI__builtin_ctzll: { 817 Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero); 818 819 llvm::Type *ArgType = ArgValue->getType(); 820 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 821 822 llvm::Type *ResultType = ConvertType(E->getType()); 823 Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); 824 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef}); 825 if (Result->getType() != ResultType) 826 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 827 "cast"); 828 return RValue::get(Result); 829 } 830 case Builtin::BI__builtin_clzs: 831 case Builtin::BI__builtin_clz: 832 case Builtin::BI__builtin_clzl: 833 case Builtin::BI__builtin_clzll: { 834 Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero); 835 836 llvm::Type *ArgType = ArgValue->getType(); 837 Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); 838 839 llvm::Type *ResultType = ConvertType(E->getType()); 840 Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); 841 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef}); 842 if (Result->getType() != ResultType) 843 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 844 "cast"); 845 return RValue::get(Result); 846 } 847 case Builtin::BI__builtin_ffs: 848 case Builtin::BI__builtin_ffsl: 849 case Builtin::BI__builtin_ffsll: { 850 // ffs(x) -> x ? cttz(x) + 1 : 0 851 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 852 853 llvm::Type *ArgType = ArgValue->getType(); 854 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 855 856 llvm::Type *ResultType = ConvertType(E->getType()); 857 Value *Tmp = 858 Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}), 859 llvm::ConstantInt::get(ArgType, 1)); 860 Value *Zero = llvm::Constant::getNullValue(ArgType); 861 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero"); 862 Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs"); 863 if (Result->getType() != ResultType) 864 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 865 "cast"); 866 return RValue::get(Result); 867 } 868 case Builtin::BI__builtin_parity: 869 case Builtin::BI__builtin_parityl: 870 case Builtin::BI__builtin_parityll: { 871 // parity(x) -> ctpop(x) & 1 872 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 873 874 llvm::Type *ArgType = ArgValue->getType(); 875 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); 876 877 llvm::Type *ResultType = ConvertType(E->getType()); 878 Value *Tmp = Builder.CreateCall(F, ArgValue); 879 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1)); 880 if (Result->getType() != ResultType) 881 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 882 "cast"); 883 return RValue::get(Result); 884 } 885 case Builtin::BI__popcnt16: 886 case Builtin::BI__popcnt: 887 case Builtin::BI__popcnt64: 888 case Builtin::BI__builtin_popcount: 889 case Builtin::BI__builtin_popcountl: 890 case Builtin::BI__builtin_popcountll: { 891 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 892 893 llvm::Type *ArgType = ArgValue->getType(); 894 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); 895 896 llvm::Type *ResultType = ConvertType(E->getType()); 897 Value *Result = Builder.CreateCall(F, ArgValue); 898 if (Result->getType() != ResultType) 899 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 900 "cast"); 901 return RValue::get(Result); 902 } 903 case Builtin::BI_rotr8: 904 case Builtin::BI_rotr16: 905 case Builtin::BI_rotr: 906 case Builtin::BI_lrotr: 907 case Builtin::BI_rotr64: { 908 Value *Val = EmitScalarExpr(E->getArg(0)); 909 Value *Shift = EmitScalarExpr(E->getArg(1)); 910 911 llvm::Type *ArgType = Val->getType(); 912 Shift = Builder.CreateIntCast(Shift, ArgType, false); 913 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth(); 914 Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth); 915 Value *ArgZero = llvm::Constant::getNullValue(ArgType); 916 917 Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1); 918 Shift = Builder.CreateAnd(Shift, Mask); 919 Value *LeftShift = Builder.CreateSub(ArgTypeSize, Shift); 920 921 Value *RightShifted = Builder.CreateLShr(Val, Shift); 922 Value *LeftShifted = Builder.CreateShl(Val, LeftShift); 923 Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted); 924 925 Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero); 926 Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated); 927 return RValue::get(Result); 928 } 929 case Builtin::BI_rotl8: 930 case Builtin::BI_rotl16: 931 case Builtin::BI_rotl: 932 case Builtin::BI_lrotl: 933 case Builtin::BI_rotl64: { 934 Value *Val = EmitScalarExpr(E->getArg(0)); 935 Value *Shift = EmitScalarExpr(E->getArg(1)); 936 937 llvm::Type *ArgType = Val->getType(); 938 Shift = Builder.CreateIntCast(Shift, ArgType, false); 939 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth(); 940 Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth); 941 Value *ArgZero = llvm::Constant::getNullValue(ArgType); 942 943 Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1); 944 Shift = Builder.CreateAnd(Shift, Mask); 945 Value *RightShift = Builder.CreateSub(ArgTypeSize, Shift); 946 947 Value *LeftShifted = Builder.CreateShl(Val, Shift); 948 Value *RightShifted = Builder.CreateLShr(Val, RightShift); 949 Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted); 950 951 Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero); 952 Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated); 953 return RValue::get(Result); 954 } 955 case Builtin::BI__builtin_unpredictable: { 956 // Always return the argument of __builtin_unpredictable. LLVM does not 957 // handle this builtin. Metadata for this builtin should be added directly 958 // to instructions such as branches or switches that use it. 959 return RValue::get(EmitScalarExpr(E->getArg(0))); 960 } 961 case Builtin::BI__builtin_expect: { 962 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 963 llvm::Type *ArgType = ArgValue->getType(); 964 965 Value *ExpectedValue = EmitScalarExpr(E->getArg(1)); 966 // Don't generate llvm.expect on -O0 as the backend won't use it for 967 // anything. 968 // Note, we still IRGen ExpectedValue because it could have side-effects. 969 if (CGM.getCodeGenOpts().OptimizationLevel == 0) 970 return RValue::get(ArgValue); 971 972 Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType); 973 Value *Result = 974 Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval"); 975 return RValue::get(Result); 976 } 977 case Builtin::BI__builtin_assume_aligned: { 978 Value *PtrValue = EmitScalarExpr(E->getArg(0)); 979 Value *OffsetValue = 980 (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr; 981 982 Value *AlignmentValue = EmitScalarExpr(E->getArg(1)); 983 ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue); 984 unsigned Alignment = (unsigned) AlignmentCI->getZExtValue(); 985 986 EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue); 987 return RValue::get(PtrValue); 988 } 989 case Builtin::BI__assume: 990 case Builtin::BI__builtin_assume: { 991 if (E->getArg(0)->HasSideEffects(getContext())) 992 return RValue::get(nullptr); 993 994 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 995 Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume); 996 return RValue::get(Builder.CreateCall(FnAssume, ArgValue)); 997 } 998 case Builtin::BI__builtin_bswap16: 999 case Builtin::BI__builtin_bswap32: 1000 case Builtin::BI__builtin_bswap64: { 1001 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap)); 1002 } 1003 case Builtin::BI__builtin_bitreverse8: 1004 case Builtin::BI__builtin_bitreverse16: 1005 case Builtin::BI__builtin_bitreverse32: 1006 case Builtin::BI__builtin_bitreverse64: { 1007 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse)); 1008 } 1009 case Builtin::BI__builtin_object_size: { 1010 unsigned Type = 1011 E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue(); 1012 auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType())); 1013 1014 // We pass this builtin onto the optimizer so that it can figure out the 1015 // object size in more complex cases. 1016 return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType, 1017 /*EmittedE=*/nullptr)); 1018 } 1019 case Builtin::BI__builtin_prefetch: { 1020 Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0)); 1021 // FIXME: Technically these constants should of type 'int', yes? 1022 RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) : 1023 llvm::ConstantInt::get(Int32Ty, 0); 1024 Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : 1025 llvm::ConstantInt::get(Int32Ty, 3); 1026 Value *Data = llvm::ConstantInt::get(Int32Ty, 1); 1027 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 1028 return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data})); 1029 } 1030 case Builtin::BI__builtin_readcyclecounter: { 1031 Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter); 1032 return RValue::get(Builder.CreateCall(F)); 1033 } 1034 case Builtin::BI__builtin___clear_cache: { 1035 Value *Begin = EmitScalarExpr(E->getArg(0)); 1036 Value *End = EmitScalarExpr(E->getArg(1)); 1037 Value *F = CGM.getIntrinsic(Intrinsic::clear_cache); 1038 return RValue::get(Builder.CreateCall(F, {Begin, End})); 1039 } 1040 case Builtin::BI__builtin_trap: 1041 return RValue::get(EmitTrapCall(Intrinsic::trap)); 1042 case Builtin::BI__debugbreak: 1043 return RValue::get(EmitTrapCall(Intrinsic::debugtrap)); 1044 case Builtin::BI__builtin_unreachable: { 1045 if (SanOpts.has(SanitizerKind::Unreachable)) { 1046 SanitizerScope SanScope(this); 1047 EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()), 1048 SanitizerKind::Unreachable), 1049 SanitizerHandler::BuiltinUnreachable, 1050 EmitCheckSourceLocation(E->getExprLoc()), None); 1051 } else 1052 Builder.CreateUnreachable(); 1053 1054 // We do need to preserve an insertion point. 1055 EmitBlock(createBasicBlock("unreachable.cont")); 1056 1057 return RValue::get(nullptr); 1058 } 1059 1060 case Builtin::BI__builtin_powi: 1061 case Builtin::BI__builtin_powif: 1062 case Builtin::BI__builtin_powil: { 1063 Value *Base = EmitScalarExpr(E->getArg(0)); 1064 Value *Exponent = EmitScalarExpr(E->getArg(1)); 1065 llvm::Type *ArgType = Base->getType(); 1066 Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType); 1067 return RValue::get(Builder.CreateCall(F, {Base, Exponent})); 1068 } 1069 1070 case Builtin::BI__builtin_isgreater: 1071 case Builtin::BI__builtin_isgreaterequal: 1072 case Builtin::BI__builtin_isless: 1073 case Builtin::BI__builtin_islessequal: 1074 case Builtin::BI__builtin_islessgreater: 1075 case Builtin::BI__builtin_isunordered: { 1076 // Ordered comparisons: we know the arguments to these are matching scalar 1077 // floating point values. 1078 Value *LHS = EmitScalarExpr(E->getArg(0)); 1079 Value *RHS = EmitScalarExpr(E->getArg(1)); 1080 1081 switch (BuiltinID) { 1082 default: llvm_unreachable("Unknown ordered comparison"); 1083 case Builtin::BI__builtin_isgreater: 1084 LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp"); 1085 break; 1086 case Builtin::BI__builtin_isgreaterequal: 1087 LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp"); 1088 break; 1089 case Builtin::BI__builtin_isless: 1090 LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp"); 1091 break; 1092 case Builtin::BI__builtin_islessequal: 1093 LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp"); 1094 break; 1095 case Builtin::BI__builtin_islessgreater: 1096 LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp"); 1097 break; 1098 case Builtin::BI__builtin_isunordered: 1099 LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp"); 1100 break; 1101 } 1102 // ZExt bool to int type. 1103 return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType()))); 1104 } 1105 case Builtin::BI__builtin_isnan: { 1106 Value *V = EmitScalarExpr(E->getArg(0)); 1107 V = Builder.CreateFCmpUNO(V, V, "cmp"); 1108 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 1109 } 1110 1111 case Builtin::BIfinite: 1112 case Builtin::BI__finite: 1113 case Builtin::BIfinitef: 1114 case Builtin::BI__finitef: 1115 case Builtin::BIfinitel: 1116 case Builtin::BI__finitel: 1117 case Builtin::BI__builtin_isinf: 1118 case Builtin::BI__builtin_isfinite: { 1119 // isinf(x) --> fabs(x) == infinity 1120 // isfinite(x) --> fabs(x) != infinity 1121 // x != NaN via the ordered compare in either case. 1122 Value *V = EmitScalarExpr(E->getArg(0)); 1123 Value *Fabs = EmitFAbs(*this, V); 1124 Constant *Infinity = ConstantFP::getInfinity(V->getType()); 1125 CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf) 1126 ? CmpInst::FCMP_OEQ 1127 : CmpInst::FCMP_ONE; 1128 Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf"); 1129 return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType()))); 1130 } 1131 1132 case Builtin::BI__builtin_isinf_sign: { 1133 // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0 1134 Value *Arg = EmitScalarExpr(E->getArg(0)); 1135 Value *AbsArg = EmitFAbs(*this, Arg); 1136 Value *IsInf = Builder.CreateFCmpOEQ( 1137 AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf"); 1138 Value *IsNeg = EmitSignBit(*this, Arg); 1139 1140 llvm::Type *IntTy = ConvertType(E->getType()); 1141 Value *Zero = Constant::getNullValue(IntTy); 1142 Value *One = ConstantInt::get(IntTy, 1); 1143 Value *NegativeOne = ConstantInt::get(IntTy, -1); 1144 Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One); 1145 Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero); 1146 return RValue::get(Result); 1147 } 1148 1149 case Builtin::BI__builtin_isnormal: { 1150 // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min 1151 Value *V = EmitScalarExpr(E->getArg(0)); 1152 Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq"); 1153 1154 Value *Abs = EmitFAbs(*this, V); 1155 Value *IsLessThanInf = 1156 Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf"); 1157 APFloat Smallest = APFloat::getSmallestNormalized( 1158 getContext().getFloatTypeSemantics(E->getArg(0)->getType())); 1159 Value *IsNormal = 1160 Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest), 1161 "isnormal"); 1162 V = Builder.CreateAnd(Eq, IsLessThanInf, "and"); 1163 V = Builder.CreateAnd(V, IsNormal, "and"); 1164 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 1165 } 1166 1167 case Builtin::BI__builtin_fpclassify: { 1168 Value *V = EmitScalarExpr(E->getArg(5)); 1169 llvm::Type *Ty = ConvertType(E->getArg(5)->getType()); 1170 1171 // Create Result 1172 BasicBlock *Begin = Builder.GetInsertBlock(); 1173 BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn); 1174 Builder.SetInsertPoint(End); 1175 PHINode *Result = 1176 Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4, 1177 "fpclassify_result"); 1178 1179 // if (V==0) return FP_ZERO 1180 Builder.SetInsertPoint(Begin); 1181 Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty), 1182 "iszero"); 1183 Value *ZeroLiteral = EmitScalarExpr(E->getArg(4)); 1184 BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn); 1185 Builder.CreateCondBr(IsZero, End, NotZero); 1186 Result->addIncoming(ZeroLiteral, Begin); 1187 1188 // if (V != V) return FP_NAN 1189 Builder.SetInsertPoint(NotZero); 1190 Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp"); 1191 Value *NanLiteral = EmitScalarExpr(E->getArg(0)); 1192 BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn); 1193 Builder.CreateCondBr(IsNan, End, NotNan); 1194 Result->addIncoming(NanLiteral, NotZero); 1195 1196 // if (fabs(V) == infinity) return FP_INFINITY 1197 Builder.SetInsertPoint(NotNan); 1198 Value *VAbs = EmitFAbs(*this, V); 1199 Value *IsInf = 1200 Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()), 1201 "isinf"); 1202 Value *InfLiteral = EmitScalarExpr(E->getArg(1)); 1203 BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn); 1204 Builder.CreateCondBr(IsInf, End, NotInf); 1205 Result->addIncoming(InfLiteral, NotNan); 1206 1207 // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL 1208 Builder.SetInsertPoint(NotInf); 1209 APFloat Smallest = APFloat::getSmallestNormalized( 1210 getContext().getFloatTypeSemantics(E->getArg(5)->getType())); 1211 Value *IsNormal = 1212 Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest), 1213 "isnormal"); 1214 Value *NormalResult = 1215 Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)), 1216 EmitScalarExpr(E->getArg(3))); 1217 Builder.CreateBr(End); 1218 Result->addIncoming(NormalResult, NotInf); 1219 1220 // return Result 1221 Builder.SetInsertPoint(End); 1222 return RValue::get(Result); 1223 } 1224 1225 case Builtin::BIalloca: 1226 case Builtin::BI_alloca: 1227 case Builtin::BI__builtin_alloca: { 1228 Value *Size = EmitScalarExpr(E->getArg(0)); 1229 const TargetInfo &TI = getContext().getTargetInfo(); 1230 // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__. 1231 unsigned SuitableAlignmentInBytes = 1232 CGM.getContext() 1233 .toCharUnitsFromBits(TI.getSuitableAlign()) 1234 .getQuantity(); 1235 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size); 1236 AI->setAlignment(SuitableAlignmentInBytes); 1237 return RValue::get(AI); 1238 } 1239 1240 case Builtin::BI__builtin_alloca_with_align: { 1241 Value *Size = EmitScalarExpr(E->getArg(0)); 1242 Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1)); 1243 auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue); 1244 unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue(); 1245 unsigned AlignmentInBytes = 1246 CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getQuantity(); 1247 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size); 1248 AI->setAlignment(AlignmentInBytes); 1249 return RValue::get(AI); 1250 } 1251 1252 case Builtin::BIbzero: 1253 case Builtin::BI__builtin_bzero: { 1254 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1255 Value *SizeVal = EmitScalarExpr(E->getArg(1)); 1256 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 1257 E->getArg(0)->getExprLoc(), FD, 0); 1258 Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false); 1259 return RValue::get(Dest.getPointer()); 1260 } 1261 case Builtin::BImemcpy: 1262 case Builtin::BI__builtin_memcpy: { 1263 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1264 Address Src = EmitPointerWithAlignment(E->getArg(1)); 1265 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 1266 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 1267 E->getArg(0)->getExprLoc(), FD, 0); 1268 EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(), 1269 E->getArg(1)->getExprLoc(), FD, 1); 1270 Builder.CreateMemCpy(Dest, Src, SizeVal, false); 1271 return RValue::get(Dest.getPointer()); 1272 } 1273 1274 case Builtin::BI__builtin_char_memchr: 1275 BuiltinID = Builtin::BI__builtin_memchr; 1276 break; 1277 1278 case Builtin::BI__builtin___memcpy_chk: { 1279 // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2. 1280 llvm::APSInt Size, DstSize; 1281 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 1282 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 1283 break; 1284 if (Size.ugt(DstSize)) 1285 break; 1286 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1287 Address Src = EmitPointerWithAlignment(E->getArg(1)); 1288 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 1289 Builder.CreateMemCpy(Dest, Src, SizeVal, false); 1290 return RValue::get(Dest.getPointer()); 1291 } 1292 1293 case Builtin::BI__builtin_objc_memmove_collectable: { 1294 Address DestAddr = EmitPointerWithAlignment(E->getArg(0)); 1295 Address SrcAddr = EmitPointerWithAlignment(E->getArg(1)); 1296 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 1297 CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this, 1298 DestAddr, SrcAddr, SizeVal); 1299 return RValue::get(DestAddr.getPointer()); 1300 } 1301 1302 case Builtin::BI__builtin___memmove_chk: { 1303 // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2. 1304 llvm::APSInt Size, DstSize; 1305 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 1306 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 1307 break; 1308 if (Size.ugt(DstSize)) 1309 break; 1310 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1311 Address Src = EmitPointerWithAlignment(E->getArg(1)); 1312 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 1313 Builder.CreateMemMove(Dest, Src, SizeVal, false); 1314 return RValue::get(Dest.getPointer()); 1315 } 1316 1317 case Builtin::BImemmove: 1318 case Builtin::BI__builtin_memmove: { 1319 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1320 Address Src = EmitPointerWithAlignment(E->getArg(1)); 1321 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 1322 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 1323 E->getArg(0)->getExprLoc(), FD, 0); 1324 EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(), 1325 E->getArg(1)->getExprLoc(), FD, 1); 1326 Builder.CreateMemMove(Dest, Src, SizeVal, false); 1327 return RValue::get(Dest.getPointer()); 1328 } 1329 case Builtin::BImemset: 1330 case Builtin::BI__builtin_memset: { 1331 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1332 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), 1333 Builder.getInt8Ty()); 1334 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 1335 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 1336 E->getArg(0)->getExprLoc(), FD, 0); 1337 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false); 1338 return RValue::get(Dest.getPointer()); 1339 } 1340 case Builtin::BI__builtin___memset_chk: { 1341 // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2. 1342 llvm::APSInt Size, DstSize; 1343 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 1344 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 1345 break; 1346 if (Size.ugt(DstSize)) 1347 break; 1348 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1349 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), 1350 Builder.getInt8Ty()); 1351 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 1352 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false); 1353 return RValue::get(Dest.getPointer()); 1354 } 1355 case Builtin::BI__builtin_dwarf_cfa: { 1356 // The offset in bytes from the first argument to the CFA. 1357 // 1358 // Why on earth is this in the frontend? Is there any reason at 1359 // all that the backend can't reasonably determine this while 1360 // lowering llvm.eh.dwarf.cfa()? 1361 // 1362 // TODO: If there's a satisfactory reason, add a target hook for 1363 // this instead of hard-coding 0, which is correct for most targets. 1364 int32_t Offset = 0; 1365 1366 Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa); 1367 return RValue::get(Builder.CreateCall(F, 1368 llvm::ConstantInt::get(Int32Ty, Offset))); 1369 } 1370 case Builtin::BI__builtin_return_address: { 1371 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0), 1372 getContext().UnsignedIntTy); 1373 Value *F = CGM.getIntrinsic(Intrinsic::returnaddress); 1374 return RValue::get(Builder.CreateCall(F, Depth)); 1375 } 1376 case Builtin::BI_ReturnAddress: { 1377 Value *F = CGM.getIntrinsic(Intrinsic::returnaddress); 1378 return RValue::get(Builder.CreateCall(F, Builder.getInt32(0))); 1379 } 1380 case Builtin::BI__builtin_frame_address: { 1381 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0), 1382 getContext().UnsignedIntTy); 1383 Value *F = CGM.getIntrinsic(Intrinsic::frameaddress); 1384 return RValue::get(Builder.CreateCall(F, Depth)); 1385 } 1386 case Builtin::BI__builtin_extract_return_addr: { 1387 Value *Address = EmitScalarExpr(E->getArg(0)); 1388 Value *Result = getTargetHooks().decodeReturnAddress(*this, Address); 1389 return RValue::get(Result); 1390 } 1391 case Builtin::BI__builtin_frob_return_addr: { 1392 Value *Address = EmitScalarExpr(E->getArg(0)); 1393 Value *Result = getTargetHooks().encodeReturnAddress(*this, Address); 1394 return RValue::get(Result); 1395 } 1396 case Builtin::BI__builtin_dwarf_sp_column: { 1397 llvm::IntegerType *Ty 1398 = cast<llvm::IntegerType>(ConvertType(E->getType())); 1399 int Column = getTargetHooks().getDwarfEHStackPointer(CGM); 1400 if (Column == -1) { 1401 CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column"); 1402 return RValue::get(llvm::UndefValue::get(Ty)); 1403 } 1404 return RValue::get(llvm::ConstantInt::get(Ty, Column, true)); 1405 } 1406 case Builtin::BI__builtin_init_dwarf_reg_size_table: { 1407 Value *Address = EmitScalarExpr(E->getArg(0)); 1408 if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address)) 1409 CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table"); 1410 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType()))); 1411 } 1412 case Builtin::BI__builtin_eh_return: { 1413 Value *Int = EmitScalarExpr(E->getArg(0)); 1414 Value *Ptr = EmitScalarExpr(E->getArg(1)); 1415 1416 llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType()); 1417 assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) && 1418 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants"); 1419 Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32 1420 ? Intrinsic::eh_return_i32 1421 : Intrinsic::eh_return_i64); 1422 Builder.CreateCall(F, {Int, Ptr}); 1423 Builder.CreateUnreachable(); 1424 1425 // We do need to preserve an insertion point. 1426 EmitBlock(createBasicBlock("builtin_eh_return.cont")); 1427 1428 return RValue::get(nullptr); 1429 } 1430 case Builtin::BI__builtin_unwind_init: { 1431 Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init); 1432 return RValue::get(Builder.CreateCall(F)); 1433 } 1434 case Builtin::BI__builtin_extend_pointer: { 1435 // Extends a pointer to the size of an _Unwind_Word, which is 1436 // uint64_t on all platforms. Generally this gets poked into a 1437 // register and eventually used as an address, so if the 1438 // addressing registers are wider than pointers and the platform 1439 // doesn't implicitly ignore high-order bits when doing 1440 // addressing, we need to make sure we zext / sext based on 1441 // the platform's expectations. 1442 // 1443 // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html 1444 1445 // Cast the pointer to intptr_t. 1446 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1447 Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast"); 1448 1449 // If that's 64 bits, we're done. 1450 if (IntPtrTy->getBitWidth() == 64) 1451 return RValue::get(Result); 1452 1453 // Otherwise, ask the codegen data what to do. 1454 if (getTargetHooks().extendPointerWithSExt()) 1455 return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext")); 1456 else 1457 return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext")); 1458 } 1459 case Builtin::BI__builtin_setjmp: { 1460 // Buffer is a void**. 1461 Address Buf = EmitPointerWithAlignment(E->getArg(0)); 1462 1463 // Store the frame pointer to the setjmp buffer. 1464 Value *FrameAddr = 1465 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 1466 ConstantInt::get(Int32Ty, 0)); 1467 Builder.CreateStore(FrameAddr, Buf); 1468 1469 // Store the stack pointer to the setjmp buffer. 1470 Value *StackAddr = 1471 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave)); 1472 Address StackSaveSlot = 1473 Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize()); 1474 Builder.CreateStore(StackAddr, StackSaveSlot); 1475 1476 // Call LLVM's EH setjmp, which is lightweight. 1477 Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp); 1478 Buf = Builder.CreateBitCast(Buf, Int8PtrTy); 1479 return RValue::get(Builder.CreateCall(F, Buf.getPointer())); 1480 } 1481 case Builtin::BI__builtin_longjmp: { 1482 Value *Buf = EmitScalarExpr(E->getArg(0)); 1483 Buf = Builder.CreateBitCast(Buf, Int8PtrTy); 1484 1485 // Call LLVM's EH longjmp, which is lightweight. 1486 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf); 1487 1488 // longjmp doesn't return; mark this as unreachable. 1489 Builder.CreateUnreachable(); 1490 1491 // We do need to preserve an insertion point. 1492 EmitBlock(createBasicBlock("longjmp.cont")); 1493 1494 return RValue::get(nullptr); 1495 } 1496 case Builtin::BI__sync_fetch_and_add: 1497 case Builtin::BI__sync_fetch_and_sub: 1498 case Builtin::BI__sync_fetch_and_or: 1499 case Builtin::BI__sync_fetch_and_and: 1500 case Builtin::BI__sync_fetch_and_xor: 1501 case Builtin::BI__sync_fetch_and_nand: 1502 case Builtin::BI__sync_add_and_fetch: 1503 case Builtin::BI__sync_sub_and_fetch: 1504 case Builtin::BI__sync_and_and_fetch: 1505 case Builtin::BI__sync_or_and_fetch: 1506 case Builtin::BI__sync_xor_and_fetch: 1507 case Builtin::BI__sync_nand_and_fetch: 1508 case Builtin::BI__sync_val_compare_and_swap: 1509 case Builtin::BI__sync_bool_compare_and_swap: 1510 case Builtin::BI__sync_lock_test_and_set: 1511 case Builtin::BI__sync_lock_release: 1512 case Builtin::BI__sync_swap: 1513 llvm_unreachable("Shouldn't make it through sema"); 1514 case Builtin::BI__sync_fetch_and_add_1: 1515 case Builtin::BI__sync_fetch_and_add_2: 1516 case Builtin::BI__sync_fetch_and_add_4: 1517 case Builtin::BI__sync_fetch_and_add_8: 1518 case Builtin::BI__sync_fetch_and_add_16: 1519 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E); 1520 case Builtin::BI__sync_fetch_and_sub_1: 1521 case Builtin::BI__sync_fetch_and_sub_2: 1522 case Builtin::BI__sync_fetch_and_sub_4: 1523 case Builtin::BI__sync_fetch_and_sub_8: 1524 case Builtin::BI__sync_fetch_and_sub_16: 1525 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E); 1526 case Builtin::BI__sync_fetch_and_or_1: 1527 case Builtin::BI__sync_fetch_and_or_2: 1528 case Builtin::BI__sync_fetch_and_or_4: 1529 case Builtin::BI__sync_fetch_and_or_8: 1530 case Builtin::BI__sync_fetch_and_or_16: 1531 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E); 1532 case Builtin::BI__sync_fetch_and_and_1: 1533 case Builtin::BI__sync_fetch_and_and_2: 1534 case Builtin::BI__sync_fetch_and_and_4: 1535 case Builtin::BI__sync_fetch_and_and_8: 1536 case Builtin::BI__sync_fetch_and_and_16: 1537 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E); 1538 case Builtin::BI__sync_fetch_and_xor_1: 1539 case Builtin::BI__sync_fetch_and_xor_2: 1540 case Builtin::BI__sync_fetch_and_xor_4: 1541 case Builtin::BI__sync_fetch_and_xor_8: 1542 case Builtin::BI__sync_fetch_and_xor_16: 1543 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E); 1544 case Builtin::BI__sync_fetch_and_nand_1: 1545 case Builtin::BI__sync_fetch_and_nand_2: 1546 case Builtin::BI__sync_fetch_and_nand_4: 1547 case Builtin::BI__sync_fetch_and_nand_8: 1548 case Builtin::BI__sync_fetch_and_nand_16: 1549 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E); 1550 1551 // Clang extensions: not overloaded yet. 1552 case Builtin::BI__sync_fetch_and_min: 1553 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E); 1554 case Builtin::BI__sync_fetch_and_max: 1555 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E); 1556 case Builtin::BI__sync_fetch_and_umin: 1557 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E); 1558 case Builtin::BI__sync_fetch_and_umax: 1559 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E); 1560 1561 case Builtin::BI__sync_add_and_fetch_1: 1562 case Builtin::BI__sync_add_and_fetch_2: 1563 case Builtin::BI__sync_add_and_fetch_4: 1564 case Builtin::BI__sync_add_and_fetch_8: 1565 case Builtin::BI__sync_add_and_fetch_16: 1566 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E, 1567 llvm::Instruction::Add); 1568 case Builtin::BI__sync_sub_and_fetch_1: 1569 case Builtin::BI__sync_sub_and_fetch_2: 1570 case Builtin::BI__sync_sub_and_fetch_4: 1571 case Builtin::BI__sync_sub_and_fetch_8: 1572 case Builtin::BI__sync_sub_and_fetch_16: 1573 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E, 1574 llvm::Instruction::Sub); 1575 case Builtin::BI__sync_and_and_fetch_1: 1576 case Builtin::BI__sync_and_and_fetch_2: 1577 case Builtin::BI__sync_and_and_fetch_4: 1578 case Builtin::BI__sync_and_and_fetch_8: 1579 case Builtin::BI__sync_and_and_fetch_16: 1580 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E, 1581 llvm::Instruction::And); 1582 case Builtin::BI__sync_or_and_fetch_1: 1583 case Builtin::BI__sync_or_and_fetch_2: 1584 case Builtin::BI__sync_or_and_fetch_4: 1585 case Builtin::BI__sync_or_and_fetch_8: 1586 case Builtin::BI__sync_or_and_fetch_16: 1587 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E, 1588 llvm::Instruction::Or); 1589 case Builtin::BI__sync_xor_and_fetch_1: 1590 case Builtin::BI__sync_xor_and_fetch_2: 1591 case Builtin::BI__sync_xor_and_fetch_4: 1592 case Builtin::BI__sync_xor_and_fetch_8: 1593 case Builtin::BI__sync_xor_and_fetch_16: 1594 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E, 1595 llvm::Instruction::Xor); 1596 case Builtin::BI__sync_nand_and_fetch_1: 1597 case Builtin::BI__sync_nand_and_fetch_2: 1598 case Builtin::BI__sync_nand_and_fetch_4: 1599 case Builtin::BI__sync_nand_and_fetch_8: 1600 case Builtin::BI__sync_nand_and_fetch_16: 1601 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E, 1602 llvm::Instruction::And, true); 1603 1604 case Builtin::BI__sync_val_compare_and_swap_1: 1605 case Builtin::BI__sync_val_compare_and_swap_2: 1606 case Builtin::BI__sync_val_compare_and_swap_4: 1607 case Builtin::BI__sync_val_compare_and_swap_8: 1608 case Builtin::BI__sync_val_compare_and_swap_16: 1609 return RValue::get(MakeAtomicCmpXchgValue(*this, E, false)); 1610 1611 case Builtin::BI__sync_bool_compare_and_swap_1: 1612 case Builtin::BI__sync_bool_compare_and_swap_2: 1613 case Builtin::BI__sync_bool_compare_and_swap_4: 1614 case Builtin::BI__sync_bool_compare_and_swap_8: 1615 case Builtin::BI__sync_bool_compare_and_swap_16: 1616 return RValue::get(MakeAtomicCmpXchgValue(*this, E, true)); 1617 1618 case Builtin::BI__sync_swap_1: 1619 case Builtin::BI__sync_swap_2: 1620 case Builtin::BI__sync_swap_4: 1621 case Builtin::BI__sync_swap_8: 1622 case Builtin::BI__sync_swap_16: 1623 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 1624 1625 case Builtin::BI__sync_lock_test_and_set_1: 1626 case Builtin::BI__sync_lock_test_and_set_2: 1627 case Builtin::BI__sync_lock_test_and_set_4: 1628 case Builtin::BI__sync_lock_test_and_set_8: 1629 case Builtin::BI__sync_lock_test_and_set_16: 1630 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 1631 1632 case Builtin::BI__sync_lock_release_1: 1633 case Builtin::BI__sync_lock_release_2: 1634 case Builtin::BI__sync_lock_release_4: 1635 case Builtin::BI__sync_lock_release_8: 1636 case Builtin::BI__sync_lock_release_16: { 1637 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1638 QualType ElTy = E->getArg(0)->getType()->getPointeeType(); 1639 CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy); 1640 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), 1641 StoreSize.getQuantity() * 8); 1642 Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); 1643 llvm::StoreInst *Store = 1644 Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr, 1645 StoreSize); 1646 Store->setAtomic(llvm::AtomicOrdering::Release); 1647 return RValue::get(nullptr); 1648 } 1649 1650 case Builtin::BI__sync_synchronize: { 1651 // We assume this is supposed to correspond to a C++0x-style 1652 // sequentially-consistent fence (i.e. this is only usable for 1653 // synchonization, not device I/O or anything like that). This intrinsic 1654 // is really badly designed in the sense that in theory, there isn't 1655 // any way to safely use it... but in practice, it mostly works 1656 // to use it with non-atomic loads and stores to get acquire/release 1657 // semantics. 1658 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent); 1659 return RValue::get(nullptr); 1660 } 1661 1662 case Builtin::BI__builtin_nontemporal_load: 1663 return RValue::get(EmitNontemporalLoad(*this, E)); 1664 case Builtin::BI__builtin_nontemporal_store: 1665 return RValue::get(EmitNontemporalStore(*this, E)); 1666 case Builtin::BI__c11_atomic_is_lock_free: 1667 case Builtin::BI__atomic_is_lock_free: { 1668 // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the 1669 // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since 1670 // _Atomic(T) is always properly-aligned. 1671 const char *LibCallName = "__atomic_is_lock_free"; 1672 CallArgList Args; 1673 Args.add(RValue::get(EmitScalarExpr(E->getArg(0))), 1674 getContext().getSizeType()); 1675 if (BuiltinID == Builtin::BI__atomic_is_lock_free) 1676 Args.add(RValue::get(EmitScalarExpr(E->getArg(1))), 1677 getContext().VoidPtrTy); 1678 else 1679 Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)), 1680 getContext().VoidPtrTy); 1681 const CGFunctionInfo &FuncInfo = 1682 CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args); 1683 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo); 1684 llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName); 1685 return EmitCall(FuncInfo, CGCallee::forDirect(Func), 1686 ReturnValueSlot(), Args); 1687 } 1688 1689 case Builtin::BI__atomic_test_and_set: { 1690 // Look at the argument type to determine whether this is a volatile 1691 // operation. The parameter type is always volatile. 1692 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); 1693 bool Volatile = 1694 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); 1695 1696 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1697 unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace(); 1698 Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace)); 1699 Value *NewVal = Builder.getInt8(1); 1700 Value *Order = EmitScalarExpr(E->getArg(1)); 1701 if (isa<llvm::ConstantInt>(Order)) { 1702 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 1703 AtomicRMWInst *Result = nullptr; 1704 switch (ord) { 1705 case 0: // memory_order_relaxed 1706 default: // invalid order 1707 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 1708 llvm::AtomicOrdering::Monotonic); 1709 break; 1710 case 1: // memory_order_consume 1711 case 2: // memory_order_acquire 1712 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 1713 llvm::AtomicOrdering::Acquire); 1714 break; 1715 case 3: // memory_order_release 1716 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 1717 llvm::AtomicOrdering::Release); 1718 break; 1719 case 4: // memory_order_acq_rel 1720 1721 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 1722 llvm::AtomicOrdering::AcquireRelease); 1723 break; 1724 case 5: // memory_order_seq_cst 1725 Result = Builder.CreateAtomicRMW( 1726 llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 1727 llvm::AtomicOrdering::SequentiallyConsistent); 1728 break; 1729 } 1730 Result->setVolatile(Volatile); 1731 return RValue::get(Builder.CreateIsNotNull(Result, "tobool")); 1732 } 1733 1734 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 1735 1736 llvm::BasicBlock *BBs[5] = { 1737 createBasicBlock("monotonic", CurFn), 1738 createBasicBlock("acquire", CurFn), 1739 createBasicBlock("release", CurFn), 1740 createBasicBlock("acqrel", CurFn), 1741 createBasicBlock("seqcst", CurFn) 1742 }; 1743 llvm::AtomicOrdering Orders[5] = { 1744 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire, 1745 llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease, 1746 llvm::AtomicOrdering::SequentiallyConsistent}; 1747 1748 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 1749 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); 1750 1751 Builder.SetInsertPoint(ContBB); 1752 PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set"); 1753 1754 for (unsigned i = 0; i < 5; ++i) { 1755 Builder.SetInsertPoint(BBs[i]); 1756 AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 1757 Ptr, NewVal, Orders[i]); 1758 RMW->setVolatile(Volatile); 1759 Result->addIncoming(RMW, BBs[i]); 1760 Builder.CreateBr(ContBB); 1761 } 1762 1763 SI->addCase(Builder.getInt32(0), BBs[0]); 1764 SI->addCase(Builder.getInt32(1), BBs[1]); 1765 SI->addCase(Builder.getInt32(2), BBs[1]); 1766 SI->addCase(Builder.getInt32(3), BBs[2]); 1767 SI->addCase(Builder.getInt32(4), BBs[3]); 1768 SI->addCase(Builder.getInt32(5), BBs[4]); 1769 1770 Builder.SetInsertPoint(ContBB); 1771 return RValue::get(Builder.CreateIsNotNull(Result, "tobool")); 1772 } 1773 1774 case Builtin::BI__atomic_clear: { 1775 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); 1776 bool Volatile = 1777 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); 1778 1779 Address Ptr = EmitPointerWithAlignment(E->getArg(0)); 1780 unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace(); 1781 Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace)); 1782 Value *NewVal = Builder.getInt8(0); 1783 Value *Order = EmitScalarExpr(E->getArg(1)); 1784 if (isa<llvm::ConstantInt>(Order)) { 1785 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 1786 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); 1787 switch (ord) { 1788 case 0: // memory_order_relaxed 1789 default: // invalid order 1790 Store->setOrdering(llvm::AtomicOrdering::Monotonic); 1791 break; 1792 case 3: // memory_order_release 1793 Store->setOrdering(llvm::AtomicOrdering::Release); 1794 break; 1795 case 5: // memory_order_seq_cst 1796 Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent); 1797 break; 1798 } 1799 return RValue::get(nullptr); 1800 } 1801 1802 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 1803 1804 llvm::BasicBlock *BBs[3] = { 1805 createBasicBlock("monotonic", CurFn), 1806 createBasicBlock("release", CurFn), 1807 createBasicBlock("seqcst", CurFn) 1808 }; 1809 llvm::AtomicOrdering Orders[3] = { 1810 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release, 1811 llvm::AtomicOrdering::SequentiallyConsistent}; 1812 1813 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 1814 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); 1815 1816 for (unsigned i = 0; i < 3; ++i) { 1817 Builder.SetInsertPoint(BBs[i]); 1818 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); 1819 Store->setOrdering(Orders[i]); 1820 Builder.CreateBr(ContBB); 1821 } 1822 1823 SI->addCase(Builder.getInt32(0), BBs[0]); 1824 SI->addCase(Builder.getInt32(3), BBs[1]); 1825 SI->addCase(Builder.getInt32(5), BBs[2]); 1826 1827 Builder.SetInsertPoint(ContBB); 1828 return RValue::get(nullptr); 1829 } 1830 1831 case Builtin::BI__atomic_thread_fence: 1832 case Builtin::BI__atomic_signal_fence: 1833 case Builtin::BI__c11_atomic_thread_fence: 1834 case Builtin::BI__c11_atomic_signal_fence: { 1835 llvm::SyncScope::ID SSID; 1836 if (BuiltinID == Builtin::BI__atomic_signal_fence || 1837 BuiltinID == Builtin::BI__c11_atomic_signal_fence) 1838 SSID = llvm::SyncScope::SingleThread; 1839 else 1840 SSID = llvm::SyncScope::System; 1841 Value *Order = EmitScalarExpr(E->getArg(0)); 1842 if (isa<llvm::ConstantInt>(Order)) { 1843 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 1844 switch (ord) { 1845 case 0: // memory_order_relaxed 1846 default: // invalid order 1847 break; 1848 case 1: // memory_order_consume 1849 case 2: // memory_order_acquire 1850 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID); 1851 break; 1852 case 3: // memory_order_release 1853 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID); 1854 break; 1855 case 4: // memory_order_acq_rel 1856 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID); 1857 break; 1858 case 5: // memory_order_seq_cst 1859 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID); 1860 break; 1861 } 1862 return RValue::get(nullptr); 1863 } 1864 1865 llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB; 1866 AcquireBB = createBasicBlock("acquire", CurFn); 1867 ReleaseBB = createBasicBlock("release", CurFn); 1868 AcqRelBB = createBasicBlock("acqrel", CurFn); 1869 SeqCstBB = createBasicBlock("seqcst", CurFn); 1870 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 1871 1872 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 1873 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB); 1874 1875 Builder.SetInsertPoint(AcquireBB); 1876 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID); 1877 Builder.CreateBr(ContBB); 1878 SI->addCase(Builder.getInt32(1), AcquireBB); 1879 SI->addCase(Builder.getInt32(2), AcquireBB); 1880 1881 Builder.SetInsertPoint(ReleaseBB); 1882 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID); 1883 Builder.CreateBr(ContBB); 1884 SI->addCase(Builder.getInt32(3), ReleaseBB); 1885 1886 Builder.SetInsertPoint(AcqRelBB); 1887 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID); 1888 Builder.CreateBr(ContBB); 1889 SI->addCase(Builder.getInt32(4), AcqRelBB); 1890 1891 Builder.SetInsertPoint(SeqCstBB); 1892 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID); 1893 Builder.CreateBr(ContBB); 1894 SI->addCase(Builder.getInt32(5), SeqCstBB); 1895 1896 Builder.SetInsertPoint(ContBB); 1897 return RValue::get(nullptr); 1898 } 1899 1900 // Library functions with special handling. 1901 case Builtin::BIsqrt: 1902 case Builtin::BIsqrtf: 1903 case Builtin::BIsqrtl: { 1904 // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only 1905 // in finite- or unsafe-math mode (the intrinsic has different semantics 1906 // for handling negative numbers compared to the library function, so 1907 // -fmath-errno=0 is not enough). 1908 if (!FD->hasAttr<ConstAttr>()) 1909 break; 1910 if (!(CGM.getCodeGenOpts().UnsafeFPMath || 1911 CGM.getCodeGenOpts().NoNaNsFPMath)) 1912 break; 1913 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 1914 llvm::Type *ArgType = Arg0->getType(); 1915 Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType); 1916 return RValue::get(Builder.CreateCall(F, Arg0)); 1917 } 1918 1919 case Builtin::BI__builtin_pow: 1920 case Builtin::BI__builtin_powf: 1921 case Builtin::BI__builtin_powl: 1922 case Builtin::BIpow: 1923 case Builtin::BIpowf: 1924 case Builtin::BIpowl: { 1925 // Transform a call to pow* into a @llvm.pow.* intrinsic call. 1926 if (!FD->hasAttr<ConstAttr>()) 1927 break; 1928 Value *Base = EmitScalarExpr(E->getArg(0)); 1929 Value *Exponent = EmitScalarExpr(E->getArg(1)); 1930 llvm::Type *ArgType = Base->getType(); 1931 Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType); 1932 return RValue::get(Builder.CreateCall(F, {Base, Exponent})); 1933 } 1934 1935 case Builtin::BIfma: 1936 case Builtin::BIfmaf: 1937 case Builtin::BIfmal: 1938 case Builtin::BI__builtin_fma: 1939 case Builtin::BI__builtin_fmaf: 1940 case Builtin::BI__builtin_fmal: { 1941 // Rewrite fma to intrinsic. 1942 Value *FirstArg = EmitScalarExpr(E->getArg(0)); 1943 llvm::Type *ArgType = FirstArg->getType(); 1944 Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType); 1945 return RValue::get( 1946 Builder.CreateCall(F, {FirstArg, EmitScalarExpr(E->getArg(1)), 1947 EmitScalarExpr(E->getArg(2))})); 1948 } 1949 1950 case Builtin::BI__builtin_signbit: 1951 case Builtin::BI__builtin_signbitf: 1952 case Builtin::BI__builtin_signbitl: { 1953 return RValue::get( 1954 Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))), 1955 ConvertType(E->getType()))); 1956 } 1957 case Builtin::BI__annotation: { 1958 // Re-encode each wide string to UTF8 and make an MDString. 1959 SmallVector<Metadata *, 1> Strings; 1960 for (const Expr *Arg : E->arguments()) { 1961 const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts()); 1962 assert(Str->getCharByteWidth() == 2); 1963 StringRef WideBytes = Str->getBytes(); 1964 std::string StrUtf8; 1965 if (!convertUTF16ToUTF8String( 1966 makeArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) { 1967 CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument"); 1968 continue; 1969 } 1970 Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8)); 1971 } 1972 1973 // Build and MDTuple of MDStrings and emit the intrinsic call. 1974 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {}); 1975 MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings); 1976 Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple)); 1977 return RValue::getIgnored(); 1978 } 1979 case Builtin::BI__builtin_annotation: { 1980 llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0)); 1981 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation, 1982 AnnVal->getType()); 1983 1984 // Get the annotation string, go through casts. Sema requires this to be a 1985 // non-wide string literal, potentially casted, so the cast<> is safe. 1986 const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts(); 1987 StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString(); 1988 return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc())); 1989 } 1990 case Builtin::BI__builtin_addcb: 1991 case Builtin::BI__builtin_addcs: 1992 case Builtin::BI__builtin_addc: 1993 case Builtin::BI__builtin_addcl: 1994 case Builtin::BI__builtin_addcll: 1995 case Builtin::BI__builtin_subcb: 1996 case Builtin::BI__builtin_subcs: 1997 case Builtin::BI__builtin_subc: 1998 case Builtin::BI__builtin_subcl: 1999 case Builtin::BI__builtin_subcll: { 2000 2001 // We translate all of these builtins from expressions of the form: 2002 // int x = ..., y = ..., carryin = ..., carryout, result; 2003 // result = __builtin_addc(x, y, carryin, &carryout); 2004 // 2005 // to LLVM IR of the form: 2006 // 2007 // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y) 2008 // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0 2009 // %carry1 = extractvalue {i32, i1} %tmp1, 1 2010 // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1, 2011 // i32 %carryin) 2012 // %result = extractvalue {i32, i1} %tmp2, 0 2013 // %carry2 = extractvalue {i32, i1} %tmp2, 1 2014 // %tmp3 = or i1 %carry1, %carry2 2015 // %tmp4 = zext i1 %tmp3 to i32 2016 // store i32 %tmp4, i32* %carryout 2017 2018 // Scalarize our inputs. 2019 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 2020 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 2021 llvm::Value *Carryin = EmitScalarExpr(E->getArg(2)); 2022 Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3)); 2023 2024 // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow. 2025 llvm::Intrinsic::ID IntrinsicId; 2026 switch (BuiltinID) { 2027 default: llvm_unreachable("Unknown multiprecision builtin id."); 2028 case Builtin::BI__builtin_addcb: 2029 case Builtin::BI__builtin_addcs: 2030 case Builtin::BI__builtin_addc: 2031 case Builtin::BI__builtin_addcl: 2032 case Builtin::BI__builtin_addcll: 2033 IntrinsicId = llvm::Intrinsic::uadd_with_overflow; 2034 break; 2035 case Builtin::BI__builtin_subcb: 2036 case Builtin::BI__builtin_subcs: 2037 case Builtin::BI__builtin_subc: 2038 case Builtin::BI__builtin_subcl: 2039 case Builtin::BI__builtin_subcll: 2040 IntrinsicId = llvm::Intrinsic::usub_with_overflow; 2041 break; 2042 } 2043 2044 // Construct our resulting LLVM IR expression. 2045 llvm::Value *Carry1; 2046 llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId, 2047 X, Y, Carry1); 2048 llvm::Value *Carry2; 2049 llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId, 2050 Sum1, Carryin, Carry2); 2051 llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2), 2052 X->getType()); 2053 Builder.CreateStore(CarryOut, CarryOutPtr); 2054 return RValue::get(Sum2); 2055 } 2056 2057 case Builtin::BI__builtin_add_overflow: 2058 case Builtin::BI__builtin_sub_overflow: 2059 case Builtin::BI__builtin_mul_overflow: { 2060 const clang::Expr *LeftArg = E->getArg(0); 2061 const clang::Expr *RightArg = E->getArg(1); 2062 const clang::Expr *ResultArg = E->getArg(2); 2063 2064 clang::QualType ResultQTy = 2065 ResultArg->getType()->castAs<PointerType>()->getPointeeType(); 2066 2067 WidthAndSignedness LeftInfo = 2068 getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType()); 2069 WidthAndSignedness RightInfo = 2070 getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType()); 2071 WidthAndSignedness ResultInfo = 2072 getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy); 2073 WidthAndSignedness EncompassingInfo = 2074 EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo}); 2075 2076 llvm::Type *EncompassingLLVMTy = 2077 llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width); 2078 2079 llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy); 2080 2081 llvm::Intrinsic::ID IntrinsicId; 2082 switch (BuiltinID) { 2083 default: 2084 llvm_unreachable("Unknown overflow builtin id."); 2085 case Builtin::BI__builtin_add_overflow: 2086 IntrinsicId = EncompassingInfo.Signed 2087 ? llvm::Intrinsic::sadd_with_overflow 2088 : llvm::Intrinsic::uadd_with_overflow; 2089 break; 2090 case Builtin::BI__builtin_sub_overflow: 2091 IntrinsicId = EncompassingInfo.Signed 2092 ? llvm::Intrinsic::ssub_with_overflow 2093 : llvm::Intrinsic::usub_with_overflow; 2094 break; 2095 case Builtin::BI__builtin_mul_overflow: 2096 IntrinsicId = EncompassingInfo.Signed 2097 ? llvm::Intrinsic::smul_with_overflow 2098 : llvm::Intrinsic::umul_with_overflow; 2099 break; 2100 } 2101 2102 llvm::Value *Left = EmitScalarExpr(LeftArg); 2103 llvm::Value *Right = EmitScalarExpr(RightArg); 2104 Address ResultPtr = EmitPointerWithAlignment(ResultArg); 2105 2106 // Extend each operand to the encompassing type. 2107 Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed); 2108 Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed); 2109 2110 // Perform the operation on the extended values. 2111 llvm::Value *Overflow, *Result; 2112 Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow); 2113 2114 if (EncompassingInfo.Width > ResultInfo.Width) { 2115 // The encompassing type is wider than the result type, so we need to 2116 // truncate it. 2117 llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy); 2118 2119 // To see if the truncation caused an overflow, we will extend 2120 // the result and then compare it to the original result. 2121 llvm::Value *ResultTruncExt = Builder.CreateIntCast( 2122 ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed); 2123 llvm::Value *TruncationOverflow = 2124 Builder.CreateICmpNE(Result, ResultTruncExt); 2125 2126 Overflow = Builder.CreateOr(Overflow, TruncationOverflow); 2127 Result = ResultTrunc; 2128 } 2129 2130 // Finally, store the result using the pointer. 2131 bool isVolatile = 2132 ResultArg->getType()->getPointeeType().isVolatileQualified(); 2133 Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile); 2134 2135 return RValue::get(Overflow); 2136 } 2137 2138 case Builtin::BI__builtin_uadd_overflow: 2139 case Builtin::BI__builtin_uaddl_overflow: 2140 case Builtin::BI__builtin_uaddll_overflow: 2141 case Builtin::BI__builtin_usub_overflow: 2142 case Builtin::BI__builtin_usubl_overflow: 2143 case Builtin::BI__builtin_usubll_overflow: 2144 case Builtin::BI__builtin_umul_overflow: 2145 case Builtin::BI__builtin_umull_overflow: 2146 case Builtin::BI__builtin_umulll_overflow: 2147 case Builtin::BI__builtin_sadd_overflow: 2148 case Builtin::BI__builtin_saddl_overflow: 2149 case Builtin::BI__builtin_saddll_overflow: 2150 case Builtin::BI__builtin_ssub_overflow: 2151 case Builtin::BI__builtin_ssubl_overflow: 2152 case Builtin::BI__builtin_ssubll_overflow: 2153 case Builtin::BI__builtin_smul_overflow: 2154 case Builtin::BI__builtin_smull_overflow: 2155 case Builtin::BI__builtin_smulll_overflow: { 2156 2157 // We translate all of these builtins directly to the relevant llvm IR node. 2158 2159 // Scalarize our inputs. 2160 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 2161 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 2162 Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2)); 2163 2164 // Decide which of the overflow intrinsics we are lowering to: 2165 llvm::Intrinsic::ID IntrinsicId; 2166 switch (BuiltinID) { 2167 default: llvm_unreachable("Unknown overflow builtin id."); 2168 case Builtin::BI__builtin_uadd_overflow: 2169 case Builtin::BI__builtin_uaddl_overflow: 2170 case Builtin::BI__builtin_uaddll_overflow: 2171 IntrinsicId = llvm::Intrinsic::uadd_with_overflow; 2172 break; 2173 case Builtin::BI__builtin_usub_overflow: 2174 case Builtin::BI__builtin_usubl_overflow: 2175 case Builtin::BI__builtin_usubll_overflow: 2176 IntrinsicId = llvm::Intrinsic::usub_with_overflow; 2177 break; 2178 case Builtin::BI__builtin_umul_overflow: 2179 case Builtin::BI__builtin_umull_overflow: 2180 case Builtin::BI__builtin_umulll_overflow: 2181 IntrinsicId = llvm::Intrinsic::umul_with_overflow; 2182 break; 2183 case Builtin::BI__builtin_sadd_overflow: 2184 case Builtin::BI__builtin_saddl_overflow: 2185 case Builtin::BI__builtin_saddll_overflow: 2186 IntrinsicId = llvm::Intrinsic::sadd_with_overflow; 2187 break; 2188 case Builtin::BI__builtin_ssub_overflow: 2189 case Builtin::BI__builtin_ssubl_overflow: 2190 case Builtin::BI__builtin_ssubll_overflow: 2191 IntrinsicId = llvm::Intrinsic::ssub_with_overflow; 2192 break; 2193 case Builtin::BI__builtin_smul_overflow: 2194 case Builtin::BI__builtin_smull_overflow: 2195 case Builtin::BI__builtin_smulll_overflow: 2196 IntrinsicId = llvm::Intrinsic::smul_with_overflow; 2197 break; 2198 } 2199 2200 2201 llvm::Value *Carry; 2202 llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry); 2203 Builder.CreateStore(Sum, SumOutPtr); 2204 2205 return RValue::get(Carry); 2206 } 2207 case Builtin::BI__builtin_addressof: 2208 return RValue::get(EmitLValue(E->getArg(0)).getPointer()); 2209 case Builtin::BI__builtin_operator_new: 2210 return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(), 2211 E->getArg(0), false); 2212 case Builtin::BI__builtin_operator_delete: 2213 return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(), 2214 E->getArg(0), true); 2215 case Builtin::BI__noop: 2216 // __noop always evaluates to an integer literal zero. 2217 return RValue::get(ConstantInt::get(IntTy, 0)); 2218 case Builtin::BI__builtin_call_with_static_chain: { 2219 const CallExpr *Call = cast<CallExpr>(E->getArg(0)); 2220 const Expr *Chain = E->getArg(1); 2221 return EmitCall(Call->getCallee()->getType(), 2222 EmitCallee(Call->getCallee()), Call, ReturnValue, 2223 EmitScalarExpr(Chain)); 2224 } 2225 case Builtin::BI_InterlockedExchange8: 2226 case Builtin::BI_InterlockedExchange16: 2227 case Builtin::BI_InterlockedExchange: 2228 case Builtin::BI_InterlockedExchangePointer: 2229 return RValue::get( 2230 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E)); 2231 case Builtin::BI_InterlockedCompareExchangePointer: { 2232 llvm::Type *RTy; 2233 llvm::IntegerType *IntType = 2234 IntegerType::get(getLLVMContext(), 2235 getContext().getTypeSize(E->getType())); 2236 llvm::Type *IntPtrType = IntType->getPointerTo(); 2237 2238 llvm::Value *Destination = 2239 Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType); 2240 2241 llvm::Value *Exchange = EmitScalarExpr(E->getArg(1)); 2242 RTy = Exchange->getType(); 2243 Exchange = Builder.CreatePtrToInt(Exchange, IntType); 2244 2245 llvm::Value *Comparand = 2246 Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType); 2247 2248 auto Result = 2249 Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange, 2250 AtomicOrdering::SequentiallyConsistent, 2251 AtomicOrdering::SequentiallyConsistent); 2252 Result->setVolatile(true); 2253 2254 return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result, 2255 0), 2256 RTy)); 2257 } 2258 case Builtin::BI_InterlockedCompareExchange8: 2259 case Builtin::BI_InterlockedCompareExchange16: 2260 case Builtin::BI_InterlockedCompareExchange: 2261 case Builtin::BI_InterlockedCompareExchange64: { 2262 AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg( 2263 EmitScalarExpr(E->getArg(0)), 2264 EmitScalarExpr(E->getArg(2)), 2265 EmitScalarExpr(E->getArg(1)), 2266 AtomicOrdering::SequentiallyConsistent, 2267 AtomicOrdering::SequentiallyConsistent); 2268 CXI->setVolatile(true); 2269 return RValue::get(Builder.CreateExtractValue(CXI, 0)); 2270 } 2271 case Builtin::BI_InterlockedIncrement16: 2272 case Builtin::BI_InterlockedIncrement: 2273 return RValue::get( 2274 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E)); 2275 case Builtin::BI_InterlockedDecrement16: 2276 case Builtin::BI_InterlockedDecrement: 2277 return RValue::get( 2278 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E)); 2279 case Builtin::BI_InterlockedAnd8: 2280 case Builtin::BI_InterlockedAnd16: 2281 case Builtin::BI_InterlockedAnd: 2282 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E)); 2283 case Builtin::BI_InterlockedExchangeAdd8: 2284 case Builtin::BI_InterlockedExchangeAdd16: 2285 case Builtin::BI_InterlockedExchangeAdd: 2286 return RValue::get( 2287 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E)); 2288 case Builtin::BI_InterlockedExchangeSub8: 2289 case Builtin::BI_InterlockedExchangeSub16: 2290 case Builtin::BI_InterlockedExchangeSub: 2291 return RValue::get( 2292 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E)); 2293 case Builtin::BI_InterlockedOr8: 2294 case Builtin::BI_InterlockedOr16: 2295 case Builtin::BI_InterlockedOr: 2296 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E)); 2297 case Builtin::BI_InterlockedXor8: 2298 case Builtin::BI_InterlockedXor16: 2299 case Builtin::BI_InterlockedXor: 2300 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E)); 2301 case Builtin::BI_interlockedbittestandset: 2302 return RValue::get( 2303 EmitMSVCBuiltinExpr(MSVCIntrin::_interlockedbittestandset, E)); 2304 2305 case Builtin::BI__exception_code: 2306 case Builtin::BI_exception_code: 2307 return RValue::get(EmitSEHExceptionCode()); 2308 case Builtin::BI__exception_info: 2309 case Builtin::BI_exception_info: 2310 return RValue::get(EmitSEHExceptionInfo()); 2311 case Builtin::BI__abnormal_termination: 2312 case Builtin::BI_abnormal_termination: 2313 return RValue::get(EmitSEHAbnormalTermination()); 2314 case Builtin::BI_setjmpex: { 2315 if (getTarget().getTriple().isOSMSVCRT()) { 2316 llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy}; 2317 llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get( 2318 getLLVMContext(), llvm::AttributeList::FunctionIndex, 2319 llvm::Attribute::ReturnsTwice); 2320 llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction( 2321 llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false), 2322 "_setjmpex", ReturnsTwiceAttr, /*Local=*/true); 2323 llvm::Value *Buf = Builder.CreateBitOrPointerCast( 2324 EmitScalarExpr(E->getArg(0)), Int8PtrTy); 2325 llvm::Value *FrameAddr = 2326 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 2327 ConstantInt::get(Int32Ty, 0)); 2328 llvm::Value *Args[] = {Buf, FrameAddr}; 2329 llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args); 2330 CS.setAttributes(ReturnsTwiceAttr); 2331 return RValue::get(CS.getInstruction()); 2332 } 2333 break; 2334 } 2335 case Builtin::BI_setjmp: { 2336 if (getTarget().getTriple().isOSMSVCRT()) { 2337 llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get( 2338 getLLVMContext(), llvm::AttributeList::FunctionIndex, 2339 llvm::Attribute::ReturnsTwice); 2340 llvm::Value *Buf = Builder.CreateBitOrPointerCast( 2341 EmitScalarExpr(E->getArg(0)), Int8PtrTy); 2342 llvm::CallSite CS; 2343 if (getTarget().getTriple().getArch() == llvm::Triple::x86) { 2344 llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy}; 2345 llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction( 2346 llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true), 2347 "_setjmp3", ReturnsTwiceAttr, /*Local=*/true); 2348 llvm::Value *Count = ConstantInt::get(IntTy, 0); 2349 llvm::Value *Args[] = {Buf, Count}; 2350 CS = EmitRuntimeCallOrInvoke(SetJmp3, Args); 2351 } else { 2352 llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy}; 2353 llvm::Constant *SetJmp = CGM.CreateRuntimeFunction( 2354 llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false), 2355 "_setjmp", ReturnsTwiceAttr, /*Local=*/true); 2356 llvm::Value *FrameAddr = 2357 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 2358 ConstantInt::get(Int32Ty, 0)); 2359 llvm::Value *Args[] = {Buf, FrameAddr}; 2360 CS = EmitRuntimeCallOrInvoke(SetJmp, Args); 2361 } 2362 CS.setAttributes(ReturnsTwiceAttr); 2363 return RValue::get(CS.getInstruction()); 2364 } 2365 break; 2366 } 2367 2368 case Builtin::BI__GetExceptionInfo: { 2369 if (llvm::GlobalVariable *GV = 2370 CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType())) 2371 return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy)); 2372 break; 2373 } 2374 2375 case Builtin::BI__fastfail: 2376 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E)); 2377 2378 case Builtin::BI__builtin_coro_size: { 2379 auto & Context = getContext(); 2380 auto SizeTy = Context.getSizeType(); 2381 auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy)); 2382 Value *F = CGM.getIntrinsic(Intrinsic::coro_size, T); 2383 return RValue::get(Builder.CreateCall(F)); 2384 } 2385 2386 case Builtin::BI__builtin_coro_id: 2387 return EmitCoroutineIntrinsic(E, Intrinsic::coro_id); 2388 case Builtin::BI__builtin_coro_promise: 2389 return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise); 2390 case Builtin::BI__builtin_coro_resume: 2391 return EmitCoroutineIntrinsic(E, Intrinsic::coro_resume); 2392 case Builtin::BI__builtin_coro_frame: 2393 return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame); 2394 case Builtin::BI__builtin_coro_free: 2395 return EmitCoroutineIntrinsic(E, Intrinsic::coro_free); 2396 case Builtin::BI__builtin_coro_destroy: 2397 return EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy); 2398 case Builtin::BI__builtin_coro_done: 2399 return EmitCoroutineIntrinsic(E, Intrinsic::coro_done); 2400 case Builtin::BI__builtin_coro_alloc: 2401 return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc); 2402 case Builtin::BI__builtin_coro_begin: 2403 return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin); 2404 case Builtin::BI__builtin_coro_end: 2405 return EmitCoroutineIntrinsic(E, Intrinsic::coro_end); 2406 case Builtin::BI__builtin_coro_suspend: 2407 return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend); 2408 case Builtin::BI__builtin_coro_param: 2409 return EmitCoroutineIntrinsic(E, Intrinsic::coro_param); 2410 2411 // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions 2412 case Builtin::BIread_pipe: 2413 case Builtin::BIwrite_pipe: { 2414 Value *Arg0 = EmitScalarExpr(E->getArg(0)), 2415 *Arg1 = EmitScalarExpr(E->getArg(1)); 2416 CGOpenCLRuntime OpenCLRT(CGM); 2417 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); 2418 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); 2419 2420 // Type of the generic packet parameter. 2421 unsigned GenericAS = 2422 getContext().getTargetAddressSpace(LangAS::opencl_generic); 2423 llvm::Type *I8PTy = llvm::PointerType::get( 2424 llvm::Type::getInt8Ty(getLLVMContext()), GenericAS); 2425 2426 // Testing which overloaded version we should generate the call for. 2427 if (2U == E->getNumArgs()) { 2428 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2" 2429 : "__write_pipe_2"; 2430 // Creating a generic function type to be able to call with any builtin or 2431 // user defined type. 2432 llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty}; 2433 llvm::FunctionType *FTy = llvm::FunctionType::get( 2434 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2435 Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy); 2436 return RValue::get( 2437 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2438 {Arg0, BCast, PacketSize, PacketAlign})); 2439 } else { 2440 assert(4 == E->getNumArgs() && 2441 "Illegal number of parameters to pipe function"); 2442 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4" 2443 : "__write_pipe_4"; 2444 2445 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy, 2446 Int32Ty, Int32Ty}; 2447 Value *Arg2 = EmitScalarExpr(E->getArg(2)), 2448 *Arg3 = EmitScalarExpr(E->getArg(3)); 2449 llvm::FunctionType *FTy = llvm::FunctionType::get( 2450 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2451 Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy); 2452 // We know the third argument is an integer type, but we may need to cast 2453 // it to i32. 2454 if (Arg2->getType() != Int32Ty) 2455 Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty); 2456 return RValue::get(Builder.CreateCall( 2457 CGM.CreateRuntimeFunction(FTy, Name), 2458 {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign})); 2459 } 2460 } 2461 // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write 2462 // functions 2463 case Builtin::BIreserve_read_pipe: 2464 case Builtin::BIreserve_write_pipe: 2465 case Builtin::BIwork_group_reserve_read_pipe: 2466 case Builtin::BIwork_group_reserve_write_pipe: 2467 case Builtin::BIsub_group_reserve_read_pipe: 2468 case Builtin::BIsub_group_reserve_write_pipe: { 2469 // Composing the mangled name for the function. 2470 const char *Name; 2471 if (BuiltinID == Builtin::BIreserve_read_pipe) 2472 Name = "__reserve_read_pipe"; 2473 else if (BuiltinID == Builtin::BIreserve_write_pipe) 2474 Name = "__reserve_write_pipe"; 2475 else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe) 2476 Name = "__work_group_reserve_read_pipe"; 2477 else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe) 2478 Name = "__work_group_reserve_write_pipe"; 2479 else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe) 2480 Name = "__sub_group_reserve_read_pipe"; 2481 else 2482 Name = "__sub_group_reserve_write_pipe"; 2483 2484 Value *Arg0 = EmitScalarExpr(E->getArg(0)), 2485 *Arg1 = EmitScalarExpr(E->getArg(1)); 2486 llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy); 2487 CGOpenCLRuntime OpenCLRT(CGM); 2488 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); 2489 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); 2490 2491 // Building the generic function prototype. 2492 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty}; 2493 llvm::FunctionType *FTy = llvm::FunctionType::get( 2494 ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2495 // We know the second argument is an integer type, but we may need to cast 2496 // it to i32. 2497 if (Arg1->getType() != Int32Ty) 2498 Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty); 2499 return RValue::get( 2500 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2501 {Arg0, Arg1, PacketSize, PacketAlign})); 2502 } 2503 // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write 2504 // functions 2505 case Builtin::BIcommit_read_pipe: 2506 case Builtin::BIcommit_write_pipe: 2507 case Builtin::BIwork_group_commit_read_pipe: 2508 case Builtin::BIwork_group_commit_write_pipe: 2509 case Builtin::BIsub_group_commit_read_pipe: 2510 case Builtin::BIsub_group_commit_write_pipe: { 2511 const char *Name; 2512 if (BuiltinID == Builtin::BIcommit_read_pipe) 2513 Name = "__commit_read_pipe"; 2514 else if (BuiltinID == Builtin::BIcommit_write_pipe) 2515 Name = "__commit_write_pipe"; 2516 else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe) 2517 Name = "__work_group_commit_read_pipe"; 2518 else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe) 2519 Name = "__work_group_commit_write_pipe"; 2520 else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe) 2521 Name = "__sub_group_commit_read_pipe"; 2522 else 2523 Name = "__sub_group_commit_write_pipe"; 2524 2525 Value *Arg0 = EmitScalarExpr(E->getArg(0)), 2526 *Arg1 = EmitScalarExpr(E->getArg(1)); 2527 CGOpenCLRuntime OpenCLRT(CGM); 2528 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); 2529 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); 2530 2531 // Building the generic function prototype. 2532 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty}; 2533 llvm::FunctionType *FTy = 2534 llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()), 2535 llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2536 2537 return RValue::get( 2538 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2539 {Arg0, Arg1, PacketSize, PacketAlign})); 2540 } 2541 // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions 2542 case Builtin::BIget_pipe_num_packets: 2543 case Builtin::BIget_pipe_max_packets: { 2544 const char *Name; 2545 if (BuiltinID == Builtin::BIget_pipe_num_packets) 2546 Name = "__get_pipe_num_packets"; 2547 else 2548 Name = "__get_pipe_max_packets"; 2549 2550 // Building the generic function prototype. 2551 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 2552 CGOpenCLRuntime OpenCLRT(CGM); 2553 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); 2554 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); 2555 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty}; 2556 llvm::FunctionType *FTy = llvm::FunctionType::get( 2557 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2558 2559 return RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2560 {Arg0, PacketSize, PacketAlign})); 2561 } 2562 2563 // OpenCL v2.0 s6.13.9 - Address space qualifier functions. 2564 case Builtin::BIto_global: 2565 case Builtin::BIto_local: 2566 case Builtin::BIto_private: { 2567 auto Arg0 = EmitScalarExpr(E->getArg(0)); 2568 auto NewArgT = llvm::PointerType::get(Int8Ty, 2569 CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); 2570 auto NewRetT = llvm::PointerType::get(Int8Ty, 2571 CGM.getContext().getTargetAddressSpace( 2572 E->getType()->getPointeeType().getAddressSpace())); 2573 auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false); 2574 llvm::Value *NewArg; 2575 if (Arg0->getType()->getPointerAddressSpace() != 2576 NewArgT->getPointerAddressSpace()) 2577 NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT); 2578 else 2579 NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT); 2580 auto NewName = std::string("__") + E->getDirectCallee()->getName().str(); 2581 auto NewCall = 2582 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg}); 2583 return RValue::get(Builder.CreateBitOrPointerCast(NewCall, 2584 ConvertType(E->getType()))); 2585 } 2586 2587 // OpenCL v2.0, s6.13.17 - Enqueue kernel function. 2588 // It contains four different overload formats specified in Table 6.13.17.1. 2589 case Builtin::BIenqueue_kernel: { 2590 StringRef Name; // Generated function call name 2591 unsigned NumArgs = E->getNumArgs(); 2592 2593 llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy); 2594 llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( 2595 getContext().getTargetAddressSpace(LangAS::opencl_generic)); 2596 2597 llvm::Value *Queue = EmitScalarExpr(E->getArg(0)); 2598 llvm::Value *Flags = EmitScalarExpr(E->getArg(1)); 2599 LValue NDRangeL = EmitAggExprToLValue(E->getArg(2)); 2600 llvm::Value *Range = NDRangeL.getAddress().getPointer(); 2601 llvm::Type *RangeTy = NDRangeL.getAddress().getType(); 2602 2603 if (NumArgs == 4) { 2604 // The most basic form of the call with parameters: 2605 // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void) 2606 Name = "__enqueue_kernel_basic"; 2607 llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy}; 2608 llvm::FunctionType *FTy = llvm::FunctionType::get( 2609 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys, 4), false); 2610 2611 llvm::Value *Block = Builder.CreatePointerCast( 2612 EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy); 2613 2614 AttrBuilder B; 2615 B.addAttribute(Attribute::ByVal); 2616 llvm::AttributeList ByValAttrSet = 2617 llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B); 2618 2619 auto RTCall = 2620 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet), 2621 {Queue, Flags, Range, Block}); 2622 RTCall->setAttributes(ByValAttrSet); 2623 return RValue::get(RTCall); 2624 } 2625 assert(NumArgs >= 5 && "Invalid enqueue_kernel signature"); 2626 2627 // Create a temporary array to hold the sizes of local pointer arguments 2628 // for the block. \p First is the position of the first size argument. 2629 auto CreateArrayForSizeVar = [=](unsigned First) { 2630 auto *AT = llvm::ArrayType::get(SizeTy, NumArgs - First); 2631 auto *Arr = Builder.CreateAlloca(AT); 2632 llvm::Value *Ptr; 2633 // Each of the following arguments specifies the size of the corresponding 2634 // argument passed to the enqueued block. 2635 auto *Zero = llvm::ConstantInt::get(IntTy, 0); 2636 for (unsigned I = First; I < NumArgs; ++I) { 2637 auto *Index = llvm::ConstantInt::get(IntTy, I - First); 2638 auto *GEP = Builder.CreateGEP(Arr, {Zero, Index}); 2639 if (I == First) 2640 Ptr = GEP; 2641 auto *V = 2642 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy); 2643 Builder.CreateAlignedStore( 2644 V, GEP, CGM.getDataLayout().getPrefTypeAlignment(SizeTy)); 2645 } 2646 return Ptr; 2647 }; 2648 2649 // Could have events and/or vaargs. 2650 if (E->getArg(3)->getType()->isBlockPointerType()) { 2651 // No events passed, but has variadic arguments. 2652 Name = "__enqueue_kernel_vaargs"; 2653 auto *Block = Builder.CreatePointerCast(EmitScalarExpr(E->getArg(3)), 2654 GenericVoidPtrTy); 2655 auto *PtrToSizeArray = CreateArrayForSizeVar(4); 2656 2657 // Create a vector of the arguments, as well as a constant value to 2658 // express to the runtime the number of variadic arguments. 2659 std::vector<llvm::Value *> Args = {Queue, 2660 Flags, 2661 Range, 2662 Block, 2663 ConstantInt::get(IntTy, NumArgs - 4), 2664 PtrToSizeArray}; 2665 std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, 2666 RangeTy, GenericVoidPtrTy, 2667 IntTy, PtrToSizeArray->getType()}; 2668 2669 llvm::FunctionType *FTy = llvm::FunctionType::get( 2670 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2671 return RValue::get( 2672 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2673 llvm::ArrayRef<llvm::Value *>(Args))); 2674 } 2675 // Any calls now have event arguments passed. 2676 if (NumArgs >= 7) { 2677 llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy); 2678 llvm::Type *EventPtrTy = EventTy->getPointerTo( 2679 CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); 2680 2681 llvm::Value *NumEvents = 2682 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty); 2683 llvm::Value *EventList = 2684 E->getArg(4)->getType()->isArrayType() 2685 ? EmitArrayToPointerDecay(E->getArg(4)).getPointer() 2686 : EmitScalarExpr(E->getArg(4)); 2687 llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5)); 2688 // Convert to generic address space. 2689 EventList = Builder.CreatePointerCast(EventList, EventPtrTy); 2690 ClkEvent = Builder.CreatePointerCast(ClkEvent, EventPtrTy); 2691 llvm::Value *Block = Builder.CreatePointerCast( 2692 EmitScalarExpr(E->getArg(6)), GenericVoidPtrTy); 2693 2694 std::vector<llvm::Type *> ArgTys = { 2695 QueueTy, Int32Ty, RangeTy, Int32Ty, 2696 EventPtrTy, EventPtrTy, GenericVoidPtrTy}; 2697 2698 std::vector<llvm::Value *> Args = {Queue, Flags, Range, NumEvents, 2699 EventList, ClkEvent, Block}; 2700 2701 if (NumArgs == 7) { 2702 // Has events but no variadics. 2703 Name = "__enqueue_kernel_basic_events"; 2704 llvm::FunctionType *FTy = llvm::FunctionType::get( 2705 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2706 return RValue::get( 2707 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2708 llvm::ArrayRef<llvm::Value *>(Args))); 2709 } 2710 // Has event info and variadics 2711 // Pass the number of variadics to the runtime function too. 2712 Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7)); 2713 ArgTys.push_back(Int32Ty); 2714 Name = "__enqueue_kernel_events_vaargs"; 2715 2716 auto *PtrToSizeArray = CreateArrayForSizeVar(7); 2717 Args.push_back(PtrToSizeArray); 2718 ArgTys.push_back(PtrToSizeArray->getType()); 2719 2720 llvm::FunctionType *FTy = llvm::FunctionType::get( 2721 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2722 return RValue::get( 2723 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2724 llvm::ArrayRef<llvm::Value *>(Args))); 2725 } 2726 LLVM_FALLTHROUGH; 2727 } 2728 // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block 2729 // parameter. 2730 case Builtin::BIget_kernel_work_group_size: { 2731 llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( 2732 getContext().getTargetAddressSpace(LangAS::opencl_generic)); 2733 Value *Arg = EmitScalarExpr(E->getArg(0)); 2734 Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy); 2735 return RValue::get(Builder.CreateCall( 2736 CGM.CreateRuntimeFunction( 2737 llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false), 2738 "__get_kernel_work_group_size_impl"), 2739 Arg)); 2740 } 2741 case Builtin::BIget_kernel_preferred_work_group_size_multiple: { 2742 llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( 2743 getContext().getTargetAddressSpace(LangAS::opencl_generic)); 2744 Value *Arg = EmitScalarExpr(E->getArg(0)); 2745 Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy); 2746 return RValue::get(Builder.CreateCall( 2747 CGM.CreateRuntimeFunction( 2748 llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false), 2749 "__get_kernel_preferred_work_group_multiple_impl"), 2750 Arg)); 2751 } 2752 case Builtin::BIget_kernel_max_sub_group_size_for_ndrange: 2753 case Builtin::BIget_kernel_sub_group_count_for_ndrange: { 2754 llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( 2755 getContext().getTargetAddressSpace(LangAS::opencl_generic)); 2756 LValue NDRangeL = EmitAggExprToLValue(E->getArg(0)); 2757 llvm::Value *NDRange = NDRangeL.getAddress().getPointer(); 2758 Value *Block = EmitScalarExpr(E->getArg(1)); 2759 Block = Builder.CreatePointerCast(Block, GenericVoidPtrTy); 2760 const char *Name = 2761 BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange 2762 ? "__get_kernel_max_sub_group_size_for_ndrange_impl" 2763 : "__get_kernel_sub_group_count_for_ndrange_impl"; 2764 return RValue::get(Builder.CreateCall( 2765 CGM.CreateRuntimeFunction( 2766 llvm::FunctionType::get( 2767 IntTy, {NDRange->getType(), GenericVoidPtrTy}, false), 2768 Name), 2769 {NDRange, Block})); 2770 } 2771 2772 case Builtin::BI__builtin_store_half: 2773 case Builtin::BI__builtin_store_halff: { 2774 Value *Val = EmitScalarExpr(E->getArg(0)); 2775 Address Address = EmitPointerWithAlignment(E->getArg(1)); 2776 Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy()); 2777 return RValue::get(Builder.CreateStore(HalfVal, Address)); 2778 } 2779 case Builtin::BI__builtin_load_half: { 2780 Address Address = EmitPointerWithAlignment(E->getArg(0)); 2781 Value *HalfVal = Builder.CreateLoad(Address); 2782 return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy())); 2783 } 2784 case Builtin::BI__builtin_load_halff: { 2785 Address Address = EmitPointerWithAlignment(E->getArg(0)); 2786 Value *HalfVal = Builder.CreateLoad(Address); 2787 return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy())); 2788 } 2789 case Builtin::BIprintf: 2790 if (getTarget().getTriple().isNVPTX()) 2791 return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue); 2792 break; 2793 case Builtin::BI__builtin_canonicalize: 2794 case Builtin::BI__builtin_canonicalizef: 2795 case Builtin::BI__builtin_canonicalizel: 2796 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize)); 2797 2798 case Builtin::BI__builtin_thread_pointer: { 2799 if (!getContext().getTargetInfo().isTLSSupported()) 2800 CGM.ErrorUnsupported(E, "__builtin_thread_pointer"); 2801 // Fall through - it's already mapped to the intrinsic by GCCBuiltin. 2802 break; 2803 } 2804 case Builtin::BI__builtin_os_log_format: { 2805 assert(E->getNumArgs() >= 2 && 2806 "__builtin_os_log_format takes at least 2 arguments"); 2807 analyze_os_log::OSLogBufferLayout Layout; 2808 analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout); 2809 Address BufAddr = EmitPointerWithAlignment(E->getArg(0)); 2810 // Ignore argument 1, the format string. It is not currently used. 2811 CharUnits Offset; 2812 Builder.CreateStore( 2813 Builder.getInt8(Layout.getSummaryByte()), 2814 Builder.CreateConstByteGEP(BufAddr, Offset++, "summary")); 2815 Builder.CreateStore( 2816 Builder.getInt8(Layout.getNumArgsByte()), 2817 Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs")); 2818 2819 llvm::SmallVector<llvm::Value *, 4> RetainableOperands; 2820 for (const auto &Item : Layout.Items) { 2821 Builder.CreateStore( 2822 Builder.getInt8(Item.getDescriptorByte()), 2823 Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor")); 2824 Builder.CreateStore( 2825 Builder.getInt8(Item.getSizeByte()), 2826 Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize")); 2827 Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset); 2828 if (const Expr *TheExpr = Item.getExpr()) { 2829 Addr = Builder.CreateElementBitCast( 2830 Addr, ConvertTypeForMem(TheExpr->getType())); 2831 // Check if this is a retainable type. 2832 if (TheExpr->getType()->isObjCRetainableType()) { 2833 assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar && 2834 "Only scalar can be a ObjC retainable type"); 2835 llvm::Value *SV = EmitScalarExpr(TheExpr, /*Ignore*/ false); 2836 RValue RV = RValue::get(SV); 2837 LValue LV = MakeAddrLValue(Addr, TheExpr->getType()); 2838 EmitStoreThroughLValue(RV, LV); 2839 // Check if the object is constant, if not, save it in 2840 // RetainableOperands. 2841 if (!isa<Constant>(SV)) 2842 RetainableOperands.push_back(SV); 2843 } else { 2844 EmitAnyExprToMem(TheExpr, Addr, Qualifiers(), /*isInit*/ true); 2845 } 2846 } else { 2847 Addr = Builder.CreateElementBitCast(Addr, Int32Ty); 2848 Builder.CreateStore( 2849 Builder.getInt32(Item.getConstValue().getQuantity()), Addr); 2850 } 2851 Offset += Item.size(); 2852 } 2853 2854 // Push a clang.arc.use cleanup for each object in RetainableOperands. The 2855 // cleanup will cause the use to appear after the final log call, keeping 2856 // the object valid while it's held in the log buffer. Note that if there's 2857 // a release cleanup on the object, it will already be active; since 2858 // cleanups are emitted in reverse order, the use will occur before the 2859 // object is released. 2860 if (!RetainableOperands.empty() && getLangOpts().ObjCAutoRefCount && 2861 CGM.getCodeGenOpts().OptimizationLevel != 0) 2862 for (llvm::Value *object : RetainableOperands) 2863 pushFullExprCleanup<CallObjCArcUse>(getARCCleanupKind(), object); 2864 2865 return RValue::get(BufAddr.getPointer()); 2866 } 2867 2868 case Builtin::BI__builtin_os_log_format_buffer_size: { 2869 analyze_os_log::OSLogBufferLayout Layout; 2870 analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout); 2871 return RValue::get(ConstantInt::get(ConvertType(E->getType()), 2872 Layout.size().getQuantity())); 2873 } 2874 2875 case Builtin::BI__xray_customevent: { 2876 if (!ShouldXRayInstrumentFunction()) 2877 return RValue::getIgnored(); 2878 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>()) { 2879 if (XRayAttr->neverXRayInstrument()) 2880 return RValue::getIgnored(); 2881 } 2882 Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent); 2883 auto FTy = F->getFunctionType(); 2884 auto Arg0 = E->getArg(0); 2885 auto Arg0Val = EmitScalarExpr(Arg0); 2886 auto Arg0Ty = Arg0->getType(); 2887 auto PTy0 = FTy->getParamType(0); 2888 if (PTy0 != Arg0Val->getType()) { 2889 if (Arg0Ty->isArrayType()) 2890 Arg0Val = EmitArrayToPointerDecay(Arg0).getPointer(); 2891 else 2892 Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0); 2893 } 2894 auto Arg1 = EmitScalarExpr(E->getArg(1)); 2895 auto PTy1 = FTy->getParamType(1); 2896 if (PTy1 != Arg1->getType()) 2897 Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1); 2898 return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1})); 2899 } 2900 2901 case Builtin::BI__builtin_ms_va_start: 2902 case Builtin::BI__builtin_ms_va_end: 2903 return RValue::get( 2904 EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(), 2905 BuiltinID == Builtin::BI__builtin_ms_va_start)); 2906 2907 case Builtin::BI__builtin_ms_va_copy: { 2908 // Lower this manually. We can't reliably determine whether or not any 2909 // given va_copy() is for a Win64 va_list from the calling convention 2910 // alone, because it's legal to do this from a System V ABI function. 2911 // With opaque pointer types, we won't have enough information in LLVM 2912 // IR to determine this from the argument types, either. Best to do it 2913 // now, while we have enough information. 2914 Address DestAddr = EmitMSVAListRef(E->getArg(0)); 2915 Address SrcAddr = EmitMSVAListRef(E->getArg(1)); 2916 2917 llvm::Type *BPP = Int8PtrPtrTy; 2918 2919 DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"), 2920 DestAddr.getAlignment()); 2921 SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"), 2922 SrcAddr.getAlignment()); 2923 2924 Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val"); 2925 return RValue::get(Builder.CreateStore(ArgPtr, DestAddr)); 2926 } 2927 } 2928 2929 // If this is an alias for a lib function (e.g. __builtin_sin), emit 2930 // the call using the normal call path, but using the unmangled 2931 // version of the function name. 2932 if (getContext().BuiltinInfo.isLibFunction(BuiltinID)) 2933 return emitLibraryCall(*this, FD, E, 2934 CGM.getBuiltinLibFunction(FD, BuiltinID)); 2935 2936 // If this is a predefined lib function (e.g. malloc), emit the call 2937 // using exactly the normal call path. 2938 if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID)) 2939 return emitLibraryCall(*this, FD, E, 2940 cast<llvm::Constant>(EmitScalarExpr(E->getCallee()))); 2941 2942 // Check that a call to a target specific builtin has the correct target 2943 // features. 2944 // This is down here to avoid non-target specific builtins, however, if 2945 // generic builtins start to require generic target features then we 2946 // can move this up to the beginning of the function. 2947 checkTargetFeatures(E, FD); 2948 2949 // See if we have a target specific intrinsic. 2950 const char *Name = getContext().BuiltinInfo.getName(BuiltinID); 2951 Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic; 2952 StringRef Prefix = 2953 llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch()); 2954 if (!Prefix.empty()) { 2955 IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix.data(), Name); 2956 // NOTE we dont need to perform a compatibility flag check here since the 2957 // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the 2958 // MS builtins via ALL_MS_LANGUAGES and are filtered earlier. 2959 if (IntrinsicID == Intrinsic::not_intrinsic) 2960 IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name); 2961 } 2962 2963 if (IntrinsicID != Intrinsic::not_intrinsic) { 2964 SmallVector<Value*, 16> Args; 2965 2966 // Find out if any arguments are required to be integer constant 2967 // expressions. 2968 unsigned ICEArguments = 0; 2969 ASTContext::GetBuiltinTypeError Error; 2970 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 2971 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 2972 2973 Function *F = CGM.getIntrinsic(IntrinsicID); 2974 llvm::FunctionType *FTy = F->getFunctionType(); 2975 2976 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) { 2977 Value *ArgValue; 2978 // If this is a normal argument, just emit it as a scalar. 2979 if ((ICEArguments & (1 << i)) == 0) { 2980 ArgValue = EmitScalarExpr(E->getArg(i)); 2981 } else { 2982 // If this is required to be a constant, constant fold it so that we 2983 // know that the generated intrinsic gets a ConstantInt. 2984 llvm::APSInt Result; 2985 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext()); 2986 assert(IsConst && "Constant arg isn't actually constant?"); 2987 (void)IsConst; 2988 ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result); 2989 } 2990 2991 // If the intrinsic arg type is different from the builtin arg type 2992 // we need to do a bit cast. 2993 llvm::Type *PTy = FTy->getParamType(i); 2994 if (PTy != ArgValue->getType()) { 2995 assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) && 2996 "Must be able to losslessly bit cast to param"); 2997 ArgValue = Builder.CreateBitCast(ArgValue, PTy); 2998 } 2999 3000 Args.push_back(ArgValue); 3001 } 3002 3003 Value *V = Builder.CreateCall(F, Args); 3004 QualType BuiltinRetType = E->getType(); 3005 3006 llvm::Type *RetTy = VoidTy; 3007 if (!BuiltinRetType->isVoidType()) 3008 RetTy = ConvertType(BuiltinRetType); 3009 3010 if (RetTy != V->getType()) { 3011 assert(V->getType()->canLosslesslyBitCastTo(RetTy) && 3012 "Must be able to losslessly bit cast result type"); 3013 V = Builder.CreateBitCast(V, RetTy); 3014 } 3015 3016 return RValue::get(V); 3017 } 3018 3019 // See if we have a target specific builtin that needs to be lowered. 3020 if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E)) 3021 return RValue::get(V); 3022 3023 ErrorUnsupported(E, "builtin function"); 3024 3025 // Unknown builtin, for now just dump it out and return undef. 3026 return GetUndefRValue(E->getType()); 3027 } 3028 3029 static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, 3030 unsigned BuiltinID, const CallExpr *E, 3031 llvm::Triple::ArchType Arch) { 3032 switch (Arch) { 3033 case llvm::Triple::arm: 3034 case llvm::Triple::armeb: 3035 case llvm::Triple::thumb: 3036 case llvm::Triple::thumbeb: 3037 return CGF->EmitARMBuiltinExpr(BuiltinID, E); 3038 case llvm::Triple::aarch64: 3039 case llvm::Triple::aarch64_be: 3040 return CGF->EmitAArch64BuiltinExpr(BuiltinID, E); 3041 case llvm::Triple::x86: 3042 case llvm::Triple::x86_64: 3043 return CGF->EmitX86BuiltinExpr(BuiltinID, E); 3044 case llvm::Triple::ppc: 3045 case llvm::Triple::ppc64: 3046 case llvm::Triple::ppc64le: 3047 return CGF->EmitPPCBuiltinExpr(BuiltinID, E); 3048 case llvm::Triple::r600: 3049 case llvm::Triple::amdgcn: 3050 return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E); 3051 case llvm::Triple::systemz: 3052 return CGF->EmitSystemZBuiltinExpr(BuiltinID, E); 3053 case llvm::Triple::nvptx: 3054 case llvm::Triple::nvptx64: 3055 return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E); 3056 case llvm::Triple::wasm32: 3057 case llvm::Triple::wasm64: 3058 return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E); 3059 default: 3060 return nullptr; 3061 } 3062 } 3063 3064 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID, 3065 const CallExpr *E) { 3066 if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) { 3067 assert(getContext().getAuxTargetInfo() && "Missing aux target info"); 3068 return EmitTargetArchBuiltinExpr( 3069 this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E, 3070 getContext().getAuxTargetInfo()->getTriple().getArch()); 3071 } 3072 3073 return EmitTargetArchBuiltinExpr(this, BuiltinID, E, 3074 getTarget().getTriple().getArch()); 3075 } 3076 3077 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF, 3078 NeonTypeFlags TypeFlags, 3079 bool V1Ty=false) { 3080 int IsQuad = TypeFlags.isQuad(); 3081 switch (TypeFlags.getEltType()) { 3082 case NeonTypeFlags::Int8: 3083 case NeonTypeFlags::Poly8: 3084 return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad)); 3085 case NeonTypeFlags::Int16: 3086 case NeonTypeFlags::Poly16: 3087 case NeonTypeFlags::Float16: 3088 return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad)); 3089 case NeonTypeFlags::Int32: 3090 return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad)); 3091 case NeonTypeFlags::Int64: 3092 case NeonTypeFlags::Poly64: 3093 return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad)); 3094 case NeonTypeFlags::Poly128: 3095 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm. 3096 // There is a lot of i128 and f128 API missing. 3097 // so we use v16i8 to represent poly128 and get pattern matched. 3098 return llvm::VectorType::get(CGF->Int8Ty, 16); 3099 case NeonTypeFlags::Float32: 3100 return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad)); 3101 case NeonTypeFlags::Float64: 3102 return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad)); 3103 } 3104 llvm_unreachable("Unknown vector element type!"); 3105 } 3106 3107 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF, 3108 NeonTypeFlags IntTypeFlags) { 3109 int IsQuad = IntTypeFlags.isQuad(); 3110 switch (IntTypeFlags.getEltType()) { 3111 case NeonTypeFlags::Int32: 3112 return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad)); 3113 case NeonTypeFlags::Int64: 3114 return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad)); 3115 default: 3116 llvm_unreachable("Type can't be converted to floating-point!"); 3117 } 3118 } 3119 3120 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) { 3121 unsigned nElts = V->getType()->getVectorNumElements(); 3122 Value* SV = llvm::ConstantVector::getSplat(nElts, C); 3123 return Builder.CreateShuffleVector(V, V, SV, "lane"); 3124 } 3125 3126 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops, 3127 const char *name, 3128 unsigned shift, bool rightshift) { 3129 unsigned j = 0; 3130 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); 3131 ai != ae; ++ai, ++j) 3132 if (shift > 0 && shift == j) 3133 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift); 3134 else 3135 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name); 3136 3137 return Builder.CreateCall(F, Ops, name); 3138 } 3139 3140 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty, 3141 bool neg) { 3142 int SV = cast<ConstantInt>(V)->getSExtValue(); 3143 return ConstantInt::get(Ty, neg ? -SV : SV); 3144 } 3145 3146 // \brief Right-shift a vector by a constant. 3147 Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift, 3148 llvm::Type *Ty, bool usgn, 3149 const char *name) { 3150 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 3151 3152 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue(); 3153 int EltSize = VTy->getScalarSizeInBits(); 3154 3155 Vec = Builder.CreateBitCast(Vec, Ty); 3156 3157 // lshr/ashr are undefined when the shift amount is equal to the vector 3158 // element size. 3159 if (ShiftAmt == EltSize) { 3160 if (usgn) { 3161 // Right-shifting an unsigned value by its size yields 0. 3162 return llvm::ConstantAggregateZero::get(VTy); 3163 } else { 3164 // Right-shifting a signed value by its size is equivalent 3165 // to a shift of size-1. 3166 --ShiftAmt; 3167 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt); 3168 } 3169 } 3170 3171 Shift = EmitNeonShiftVector(Shift, Ty, false); 3172 if (usgn) 3173 return Builder.CreateLShr(Vec, Shift, name); 3174 else 3175 return Builder.CreateAShr(Vec, Shift, name); 3176 } 3177 3178 enum { 3179 AddRetType = (1 << 0), 3180 Add1ArgType = (1 << 1), 3181 Add2ArgTypes = (1 << 2), 3182 3183 VectorizeRetType = (1 << 3), 3184 VectorizeArgTypes = (1 << 4), 3185 3186 InventFloatType = (1 << 5), 3187 UnsignedAlts = (1 << 6), 3188 3189 Use64BitVectors = (1 << 7), 3190 Use128BitVectors = (1 << 8), 3191 3192 Vectorize1ArgType = Add1ArgType | VectorizeArgTypes, 3193 VectorRet = AddRetType | VectorizeRetType, 3194 VectorRetGetArgs01 = 3195 AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes, 3196 FpCmpzModifiers = 3197 AddRetType | VectorizeRetType | Add1ArgType | InventFloatType 3198 }; 3199 3200 namespace { 3201 struct NeonIntrinsicInfo { 3202 const char *NameHint; 3203 unsigned BuiltinID; 3204 unsigned LLVMIntrinsic; 3205 unsigned AltLLVMIntrinsic; 3206 unsigned TypeModifier; 3207 3208 bool operator<(unsigned RHSBuiltinID) const { 3209 return BuiltinID < RHSBuiltinID; 3210 } 3211 bool operator<(const NeonIntrinsicInfo &TE) const { 3212 return BuiltinID < TE.BuiltinID; 3213 } 3214 }; 3215 } // end anonymous namespace 3216 3217 #define NEONMAP0(NameBase) \ 3218 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 } 3219 3220 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \ 3221 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \ 3222 Intrinsic::LLVMIntrinsic, 0, TypeModifier } 3223 3224 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \ 3225 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \ 3226 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \ 3227 TypeModifier } 3228 3229 static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { 3230 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), 3231 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), 3232 NEONMAP1(vabs_v, arm_neon_vabs, 0), 3233 NEONMAP1(vabsq_v, arm_neon_vabs, 0), 3234 NEONMAP0(vaddhn_v), 3235 NEONMAP1(vaesdq_v, arm_neon_aesd, 0), 3236 NEONMAP1(vaeseq_v, arm_neon_aese, 0), 3237 NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0), 3238 NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0), 3239 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType), 3240 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType), 3241 NEONMAP1(vcage_v, arm_neon_vacge, 0), 3242 NEONMAP1(vcageq_v, arm_neon_vacge, 0), 3243 NEONMAP1(vcagt_v, arm_neon_vacgt, 0), 3244 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0), 3245 NEONMAP1(vcale_v, arm_neon_vacge, 0), 3246 NEONMAP1(vcaleq_v, arm_neon_vacge, 0), 3247 NEONMAP1(vcalt_v, arm_neon_vacgt, 0), 3248 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0), 3249 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType), 3250 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType), 3251 NEONMAP1(vclz_v, ctlz, Add1ArgType), 3252 NEONMAP1(vclzq_v, ctlz, Add1ArgType), 3253 NEONMAP1(vcnt_v, ctpop, Add1ArgType), 3254 NEONMAP1(vcntq_v, ctpop, Add1ArgType), 3255 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0), 3256 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0), 3257 NEONMAP0(vcvt_f32_v), 3258 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), 3259 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0), 3260 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0), 3261 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0), 3262 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0), 3263 NEONMAP0(vcvt_s32_v), 3264 NEONMAP0(vcvt_s64_v), 3265 NEONMAP0(vcvt_u32_v), 3266 NEONMAP0(vcvt_u64_v), 3267 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0), 3268 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0), 3269 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0), 3270 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0), 3271 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0), 3272 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0), 3273 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0), 3274 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0), 3275 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0), 3276 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0), 3277 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0), 3278 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0), 3279 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0), 3280 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0), 3281 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0), 3282 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0), 3283 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0), 3284 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0), 3285 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0), 3286 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0), 3287 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0), 3288 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0), 3289 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0), 3290 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0), 3291 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0), 3292 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0), 3293 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0), 3294 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0), 3295 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0), 3296 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0), 3297 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0), 3298 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0), 3299 NEONMAP0(vcvtq_f32_v), 3300 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), 3301 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0), 3302 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0), 3303 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0), 3304 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0), 3305 NEONMAP0(vcvtq_s32_v), 3306 NEONMAP0(vcvtq_s64_v), 3307 NEONMAP0(vcvtq_u32_v), 3308 NEONMAP0(vcvtq_u64_v), 3309 NEONMAP0(vext_v), 3310 NEONMAP0(vextq_v), 3311 NEONMAP0(vfma_v), 3312 NEONMAP0(vfmaq_v), 3313 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts), 3314 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts), 3315 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts), 3316 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts), 3317 NEONMAP0(vld1_dup_v), 3318 NEONMAP1(vld1_v, arm_neon_vld1, 0), 3319 NEONMAP0(vld1q_dup_v), 3320 NEONMAP1(vld1q_v, arm_neon_vld1, 0), 3321 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0), 3322 NEONMAP1(vld2_v, arm_neon_vld2, 0), 3323 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0), 3324 NEONMAP1(vld2q_v, arm_neon_vld2, 0), 3325 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0), 3326 NEONMAP1(vld3_v, arm_neon_vld3, 0), 3327 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0), 3328 NEONMAP1(vld3q_v, arm_neon_vld3, 0), 3329 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0), 3330 NEONMAP1(vld4_v, arm_neon_vld4, 0), 3331 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0), 3332 NEONMAP1(vld4q_v, arm_neon_vld4, 0), 3333 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts), 3334 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType), 3335 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType), 3336 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts), 3337 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), 3338 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType), 3339 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType), 3340 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), 3341 NEONMAP0(vmovl_v), 3342 NEONMAP0(vmovn_v), 3343 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType), 3344 NEONMAP0(vmull_v), 3345 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType), 3346 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts), 3347 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts), 3348 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType), 3349 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts), 3350 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts), 3351 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType), 3352 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts), 3353 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts), 3354 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType), 3355 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType), 3356 NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts), 3357 NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts), 3358 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0), 3359 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0), 3360 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType), 3361 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType), 3362 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType), 3363 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts), 3364 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType), 3365 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType), 3366 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType), 3367 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType), 3368 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType), 3369 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts), 3370 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts), 3371 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts), 3372 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts), 3373 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts), 3374 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts), 3375 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0), 3376 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0), 3377 NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts), 3378 NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts), 3379 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType), 3380 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), 3381 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), 3382 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType), 3383 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType), 3384 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts), 3385 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts), 3386 NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType), 3387 NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType), 3388 NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType), 3389 NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType), 3390 NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType), 3391 NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType), 3392 NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType), 3393 NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType), 3394 NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType), 3395 NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType), 3396 NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType), 3397 NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType), 3398 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts), 3399 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts), 3400 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts), 3401 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts), 3402 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0), 3403 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0), 3404 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType), 3405 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType), 3406 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType), 3407 NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0), 3408 NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0), 3409 NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0), 3410 NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0), 3411 NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0), 3412 NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0), 3413 NEONMAP0(vshl_n_v), 3414 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts), 3415 NEONMAP0(vshll_n_v), 3416 NEONMAP0(vshlq_n_v), 3417 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts), 3418 NEONMAP0(vshr_n_v), 3419 NEONMAP0(vshrn_n_v), 3420 NEONMAP0(vshrq_n_v), 3421 NEONMAP1(vst1_v, arm_neon_vst1, 0), 3422 NEONMAP1(vst1q_v, arm_neon_vst1, 0), 3423 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0), 3424 NEONMAP1(vst2_v, arm_neon_vst2, 0), 3425 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0), 3426 NEONMAP1(vst2q_v, arm_neon_vst2, 0), 3427 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0), 3428 NEONMAP1(vst3_v, arm_neon_vst3, 0), 3429 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0), 3430 NEONMAP1(vst3q_v, arm_neon_vst3, 0), 3431 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0), 3432 NEONMAP1(vst4_v, arm_neon_vst4, 0), 3433 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0), 3434 NEONMAP1(vst4q_v, arm_neon_vst4, 0), 3435 NEONMAP0(vsubhn_v), 3436 NEONMAP0(vtrn_v), 3437 NEONMAP0(vtrnq_v), 3438 NEONMAP0(vtst_v), 3439 NEONMAP0(vtstq_v), 3440 NEONMAP0(vuzp_v), 3441 NEONMAP0(vuzpq_v), 3442 NEONMAP0(vzip_v), 3443 NEONMAP0(vzipq_v) 3444 }; 3445 3446 static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = { 3447 NEONMAP1(vabs_v, aarch64_neon_abs, 0), 3448 NEONMAP1(vabsq_v, aarch64_neon_abs, 0), 3449 NEONMAP0(vaddhn_v), 3450 NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0), 3451 NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0), 3452 NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0), 3453 NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0), 3454 NEONMAP1(vcage_v, aarch64_neon_facge, 0), 3455 NEONMAP1(vcageq_v, aarch64_neon_facge, 0), 3456 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0), 3457 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0), 3458 NEONMAP1(vcale_v, aarch64_neon_facge, 0), 3459 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0), 3460 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0), 3461 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0), 3462 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType), 3463 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType), 3464 NEONMAP1(vclz_v, ctlz, Add1ArgType), 3465 NEONMAP1(vclzq_v, ctlz, Add1ArgType), 3466 NEONMAP1(vcnt_v, ctpop, Add1ArgType), 3467 NEONMAP1(vcntq_v, ctpop, Add1ArgType), 3468 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0), 3469 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0), 3470 NEONMAP0(vcvt_f32_v), 3471 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 3472 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 3473 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), 3474 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), 3475 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), 3476 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), 3477 NEONMAP0(vcvtq_f32_v), 3478 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 3479 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 3480 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), 3481 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), 3482 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), 3483 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), 3484 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType), 3485 NEONMAP0(vext_v), 3486 NEONMAP0(vextq_v), 3487 NEONMAP0(vfma_v), 3488 NEONMAP0(vfmaq_v), 3489 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), 3490 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), 3491 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), 3492 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), 3493 NEONMAP0(vmovl_v), 3494 NEONMAP0(vmovn_v), 3495 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType), 3496 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType), 3497 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType), 3498 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts), 3499 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts), 3500 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType), 3501 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType), 3502 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType), 3503 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts), 3504 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts), 3505 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0), 3506 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0), 3507 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType), 3508 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType), 3509 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType), 3510 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts), 3511 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType), 3512 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType), 3513 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType), 3514 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType), 3515 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType), 3516 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts), 3517 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts), 3518 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts), 3519 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts), 3520 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts), 3521 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts), 3522 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0), 3523 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0), 3524 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), 3525 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), 3526 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType), 3527 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), 3528 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), 3529 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType), 3530 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType), 3531 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), 3532 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), 3533 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), 3534 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), 3535 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), 3536 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), 3537 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0), 3538 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0), 3539 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType), 3540 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType), 3541 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType), 3542 NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0), 3543 NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0), 3544 NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0), 3545 NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0), 3546 NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0), 3547 NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0), 3548 NEONMAP0(vshl_n_v), 3549 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts), 3550 NEONMAP0(vshll_n_v), 3551 NEONMAP0(vshlq_n_v), 3552 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts), 3553 NEONMAP0(vshr_n_v), 3554 NEONMAP0(vshrn_n_v), 3555 NEONMAP0(vshrq_n_v), 3556 NEONMAP0(vsubhn_v), 3557 NEONMAP0(vtst_v), 3558 NEONMAP0(vtstq_v), 3559 }; 3560 3561 static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = { 3562 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType), 3563 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType), 3564 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType), 3565 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType), 3566 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType), 3567 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType), 3568 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType), 3569 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), 3570 NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType), 3571 NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3572 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), 3573 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType), 3574 NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType), 3575 NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType), 3576 NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3577 NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3578 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType), 3579 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType), 3580 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType), 3581 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType), 3582 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType), 3583 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType), 3584 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType), 3585 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType), 3586 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType), 3587 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType), 3588 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType), 3589 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType), 3590 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), 3591 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), 3592 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), 3593 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), 3594 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType), 3595 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), 3596 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType), 3597 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), 3598 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType), 3599 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), 3600 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType), 3601 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), 3602 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType), 3603 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), 3604 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType), 3605 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), 3606 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), 3607 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), 3608 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), 3609 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), 3610 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0), 3611 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3612 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3613 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3614 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3615 NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType), 3616 NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType), 3617 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3618 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3619 NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType), 3620 NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType), 3621 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3622 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3623 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3624 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 3625 NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType), 3626 NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType), 3627 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 3628 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType), 3629 NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType), 3630 NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType), 3631 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0), 3632 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType), 3633 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType), 3634 NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3635 NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3636 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3637 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3638 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3639 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3640 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3641 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3642 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType), 3643 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 3644 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors), 3645 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType), 3646 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors), 3647 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType), 3648 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors), 3649 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors), 3650 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType), 3651 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType), 3652 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors), 3653 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors), 3654 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType), 3655 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType), 3656 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors), 3657 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType), 3658 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors), 3659 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0), 3660 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType), 3661 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType), 3662 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors), 3663 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors), 3664 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors), 3665 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors), 3666 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType), 3667 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors), 3668 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors), 3669 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors), 3670 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType), 3671 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors), 3672 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType), 3673 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors), 3674 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType), 3675 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors), 3676 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors), 3677 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType), 3678 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType), 3679 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors), 3680 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors), 3681 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType), 3682 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType), 3683 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType), 3684 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType), 3685 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors), 3686 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors), 3687 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors), 3688 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors), 3689 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType), 3690 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors), 3691 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors), 3692 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 3693 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 3694 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 3695 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 3696 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType), 3697 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType), 3698 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 3699 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 3700 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 3701 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 3702 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType), 3703 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType), 3704 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType), 3705 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType), 3706 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors), 3707 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors), 3708 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType), 3709 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType), 3710 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType), 3711 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors), 3712 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors), 3713 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors), 3714 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors), 3715 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType), 3716 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors), 3717 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors), 3718 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors), 3719 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors), 3720 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType), 3721 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType), 3722 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors), 3723 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors), 3724 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType), 3725 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType), 3726 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType), 3727 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType), 3728 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType), 3729 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType), 3730 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType), 3731 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType), 3732 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType), 3733 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType), 3734 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType), 3735 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType), 3736 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0), 3737 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0), 3738 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0), 3739 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0), 3740 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType), 3741 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType), 3742 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType), 3743 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType), 3744 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors), 3745 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType), 3746 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors), 3747 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType), 3748 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType), 3749 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType), 3750 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors), 3751 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType), 3752 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors), 3753 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType), 3754 }; 3755 3756 #undef NEONMAP0 3757 #undef NEONMAP1 3758 #undef NEONMAP2 3759 3760 static bool NEONSIMDIntrinsicsProvenSorted = false; 3761 3762 static bool AArch64SIMDIntrinsicsProvenSorted = false; 3763 static bool AArch64SISDIntrinsicsProvenSorted = false; 3764 3765 3766 static const NeonIntrinsicInfo * 3767 findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap, 3768 unsigned BuiltinID, bool &MapProvenSorted) { 3769 3770 #ifndef NDEBUG 3771 if (!MapProvenSorted) { 3772 assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap))); 3773 MapProvenSorted = true; 3774 } 3775 #endif 3776 3777 const NeonIntrinsicInfo *Builtin = 3778 std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID); 3779 3780 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID) 3781 return Builtin; 3782 3783 return nullptr; 3784 } 3785 3786 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID, 3787 unsigned Modifier, 3788 llvm::Type *ArgType, 3789 const CallExpr *E) { 3790 int VectorSize = 0; 3791 if (Modifier & Use64BitVectors) 3792 VectorSize = 64; 3793 else if (Modifier & Use128BitVectors) 3794 VectorSize = 128; 3795 3796 // Return type. 3797 SmallVector<llvm::Type *, 3> Tys; 3798 if (Modifier & AddRetType) { 3799 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext())); 3800 if (Modifier & VectorizeRetType) 3801 Ty = llvm::VectorType::get( 3802 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1); 3803 3804 Tys.push_back(Ty); 3805 } 3806 3807 // Arguments. 3808 if (Modifier & VectorizeArgTypes) { 3809 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1; 3810 ArgType = llvm::VectorType::get(ArgType, Elts); 3811 } 3812 3813 if (Modifier & (Add1ArgType | Add2ArgTypes)) 3814 Tys.push_back(ArgType); 3815 3816 if (Modifier & Add2ArgTypes) 3817 Tys.push_back(ArgType); 3818 3819 if (Modifier & InventFloatType) 3820 Tys.push_back(FloatTy); 3821 3822 return CGM.getIntrinsic(IntrinsicID, Tys); 3823 } 3824 3825 static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, 3826 const NeonIntrinsicInfo &SISDInfo, 3827 SmallVectorImpl<Value *> &Ops, 3828 const CallExpr *E) { 3829 unsigned BuiltinID = SISDInfo.BuiltinID; 3830 unsigned int Int = SISDInfo.LLVMIntrinsic; 3831 unsigned Modifier = SISDInfo.TypeModifier; 3832 const char *s = SISDInfo.NameHint; 3833 3834 switch (BuiltinID) { 3835 case NEON::BI__builtin_neon_vcled_s64: 3836 case NEON::BI__builtin_neon_vcled_u64: 3837 case NEON::BI__builtin_neon_vcles_f32: 3838 case NEON::BI__builtin_neon_vcled_f64: 3839 case NEON::BI__builtin_neon_vcltd_s64: 3840 case NEON::BI__builtin_neon_vcltd_u64: 3841 case NEON::BI__builtin_neon_vclts_f32: 3842 case NEON::BI__builtin_neon_vcltd_f64: 3843 case NEON::BI__builtin_neon_vcales_f32: 3844 case NEON::BI__builtin_neon_vcaled_f64: 3845 case NEON::BI__builtin_neon_vcalts_f32: 3846 case NEON::BI__builtin_neon_vcaltd_f64: 3847 // Only one direction of comparisons actually exist, cmle is actually a cmge 3848 // with swapped operands. The table gives us the right intrinsic but we 3849 // still need to do the swap. 3850 std::swap(Ops[0], Ops[1]); 3851 break; 3852 } 3853 3854 assert(Int && "Generic code assumes a valid intrinsic"); 3855 3856 // Determine the type(s) of this overloaded AArch64 intrinsic. 3857 const Expr *Arg = E->getArg(0); 3858 llvm::Type *ArgTy = CGF.ConvertType(Arg->getType()); 3859 Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E); 3860 3861 int j = 0; 3862 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0); 3863 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); 3864 ai != ae; ++ai, ++j) { 3865 llvm::Type *ArgTy = ai->getType(); 3866 if (Ops[j]->getType()->getPrimitiveSizeInBits() == 3867 ArgTy->getPrimitiveSizeInBits()) 3868 continue; 3869 3870 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy()); 3871 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate 3872 // it before inserting. 3873 Ops[j] = 3874 CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType()); 3875 Ops[j] = 3876 CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0); 3877 } 3878 3879 Value *Result = CGF.EmitNeonCall(F, Ops, s); 3880 llvm::Type *ResultType = CGF.ConvertType(E->getType()); 3881 if (ResultType->getPrimitiveSizeInBits() < 3882 Result->getType()->getPrimitiveSizeInBits()) 3883 return CGF.Builder.CreateExtractElement(Result, C0); 3884 3885 return CGF.Builder.CreateBitCast(Result, ResultType, s); 3886 } 3887 3888 Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( 3889 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, 3890 const char *NameHint, unsigned Modifier, const CallExpr *E, 3891 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1) { 3892 // Get the last argument, which specifies the vector type. 3893 llvm::APSInt NeonTypeConst; 3894 const Expr *Arg = E->getArg(E->getNumArgs() - 1); 3895 if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext())) 3896 return nullptr; 3897 3898 // Determine the type of this overloaded NEON intrinsic. 3899 NeonTypeFlags Type(NeonTypeConst.getZExtValue()); 3900 bool Usgn = Type.isUnsigned(); 3901 bool Quad = Type.isQuad(); 3902 3903 llvm::VectorType *VTy = GetNeonType(this, Type); 3904 llvm::Type *Ty = VTy; 3905 if (!Ty) 3906 return nullptr; 3907 3908 auto getAlignmentValue32 = [&](Address addr) -> Value* { 3909 return Builder.getInt32(addr.getAlignment().getQuantity()); 3910 }; 3911 3912 unsigned Int = LLVMIntrinsic; 3913 if ((Modifier & UnsignedAlts) && !Usgn) 3914 Int = AltLLVMIntrinsic; 3915 3916 switch (BuiltinID) { 3917 default: break; 3918 case NEON::BI__builtin_neon_vabs_v: 3919 case NEON::BI__builtin_neon_vabsq_v: 3920 if (VTy->getElementType()->isFloatingPointTy()) 3921 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs"); 3922 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs"); 3923 case NEON::BI__builtin_neon_vaddhn_v: { 3924 llvm::VectorType *SrcTy = 3925 llvm::VectorType::getExtendedElementVectorType(VTy); 3926 3927 // %sum = add <4 x i32> %lhs, %rhs 3928 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 3929 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy); 3930 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn"); 3931 3932 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16> 3933 Constant *ShiftAmt = 3934 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2); 3935 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn"); 3936 3937 // %res = trunc <4 x i32> %high to <4 x i16> 3938 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn"); 3939 } 3940 case NEON::BI__builtin_neon_vcale_v: 3941 case NEON::BI__builtin_neon_vcaleq_v: 3942 case NEON::BI__builtin_neon_vcalt_v: 3943 case NEON::BI__builtin_neon_vcaltq_v: 3944 std::swap(Ops[0], Ops[1]); 3945 LLVM_FALLTHROUGH; 3946 case NEON::BI__builtin_neon_vcage_v: 3947 case NEON::BI__builtin_neon_vcageq_v: 3948 case NEON::BI__builtin_neon_vcagt_v: 3949 case NEON::BI__builtin_neon_vcagtq_v: { 3950 llvm::Type *VecFlt = llvm::VectorType::get( 3951 VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy, 3952 VTy->getNumElements()); 3953 llvm::Type *Tys[] = { VTy, VecFlt }; 3954 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 3955 return EmitNeonCall(F, Ops, NameHint); 3956 } 3957 case NEON::BI__builtin_neon_vclz_v: 3958 case NEON::BI__builtin_neon_vclzq_v: 3959 // We generate target-independent intrinsic, which needs a second argument 3960 // for whether or not clz of zero is undefined; on ARM it isn't. 3961 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef())); 3962 break; 3963 case NEON::BI__builtin_neon_vcvt_f32_v: 3964 case NEON::BI__builtin_neon_vcvtq_f32_v: 3965 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3966 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad)); 3967 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 3968 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 3969 case NEON::BI__builtin_neon_vcvt_n_f32_v: 3970 case NEON::BI__builtin_neon_vcvt_n_f64_v: 3971 case NEON::BI__builtin_neon_vcvtq_n_f32_v: 3972 case NEON::BI__builtin_neon_vcvtq_n_f64_v: { 3973 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty }; 3974 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic; 3975 Function *F = CGM.getIntrinsic(Int, Tys); 3976 return EmitNeonCall(F, Ops, "vcvt_n"); 3977 } 3978 case NEON::BI__builtin_neon_vcvt_n_s32_v: 3979 case NEON::BI__builtin_neon_vcvt_n_u32_v: 3980 case NEON::BI__builtin_neon_vcvt_n_s64_v: 3981 case NEON::BI__builtin_neon_vcvt_n_u64_v: 3982 case NEON::BI__builtin_neon_vcvtq_n_s32_v: 3983 case NEON::BI__builtin_neon_vcvtq_n_u32_v: 3984 case NEON::BI__builtin_neon_vcvtq_n_s64_v: 3985 case NEON::BI__builtin_neon_vcvtq_n_u64_v: { 3986 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 3987 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 3988 return EmitNeonCall(F, Ops, "vcvt_n"); 3989 } 3990 case NEON::BI__builtin_neon_vcvt_s32_v: 3991 case NEON::BI__builtin_neon_vcvt_u32_v: 3992 case NEON::BI__builtin_neon_vcvt_s64_v: 3993 case NEON::BI__builtin_neon_vcvt_u64_v: 3994 case NEON::BI__builtin_neon_vcvtq_s32_v: 3995 case NEON::BI__builtin_neon_vcvtq_u32_v: 3996 case NEON::BI__builtin_neon_vcvtq_s64_v: 3997 case NEON::BI__builtin_neon_vcvtq_u64_v: { 3998 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type)); 3999 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt") 4000 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt"); 4001 } 4002 case NEON::BI__builtin_neon_vcvta_s32_v: 4003 case NEON::BI__builtin_neon_vcvta_s64_v: 4004 case NEON::BI__builtin_neon_vcvta_u32_v: 4005 case NEON::BI__builtin_neon_vcvta_u64_v: 4006 case NEON::BI__builtin_neon_vcvtaq_s32_v: 4007 case NEON::BI__builtin_neon_vcvtaq_s64_v: 4008 case NEON::BI__builtin_neon_vcvtaq_u32_v: 4009 case NEON::BI__builtin_neon_vcvtaq_u64_v: 4010 case NEON::BI__builtin_neon_vcvtn_s32_v: 4011 case NEON::BI__builtin_neon_vcvtn_s64_v: 4012 case NEON::BI__builtin_neon_vcvtn_u32_v: 4013 case NEON::BI__builtin_neon_vcvtn_u64_v: 4014 case NEON::BI__builtin_neon_vcvtnq_s32_v: 4015 case NEON::BI__builtin_neon_vcvtnq_s64_v: 4016 case NEON::BI__builtin_neon_vcvtnq_u32_v: 4017 case NEON::BI__builtin_neon_vcvtnq_u64_v: 4018 case NEON::BI__builtin_neon_vcvtp_s32_v: 4019 case NEON::BI__builtin_neon_vcvtp_s64_v: 4020 case NEON::BI__builtin_neon_vcvtp_u32_v: 4021 case NEON::BI__builtin_neon_vcvtp_u64_v: 4022 case NEON::BI__builtin_neon_vcvtpq_s32_v: 4023 case NEON::BI__builtin_neon_vcvtpq_s64_v: 4024 case NEON::BI__builtin_neon_vcvtpq_u32_v: 4025 case NEON::BI__builtin_neon_vcvtpq_u64_v: 4026 case NEON::BI__builtin_neon_vcvtm_s32_v: 4027 case NEON::BI__builtin_neon_vcvtm_s64_v: 4028 case NEON::BI__builtin_neon_vcvtm_u32_v: 4029 case NEON::BI__builtin_neon_vcvtm_u64_v: 4030 case NEON::BI__builtin_neon_vcvtmq_s32_v: 4031 case NEON::BI__builtin_neon_vcvtmq_s64_v: 4032 case NEON::BI__builtin_neon_vcvtmq_u32_v: 4033 case NEON::BI__builtin_neon_vcvtmq_u64_v: { 4034 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 4035 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint); 4036 } 4037 case NEON::BI__builtin_neon_vext_v: 4038 case NEON::BI__builtin_neon_vextq_v: { 4039 int CV = cast<ConstantInt>(Ops[2])->getSExtValue(); 4040 SmallVector<uint32_t, 16> Indices; 4041 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 4042 Indices.push_back(i+CV); 4043 4044 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4045 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4046 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext"); 4047 } 4048 case NEON::BI__builtin_neon_vfma_v: 4049 case NEON::BI__builtin_neon_vfmaq_v: { 4050 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 4051 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4052 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4053 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 4054 4055 // NEON intrinsic puts accumulator first, unlike the LLVM fma. 4056 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); 4057 } 4058 case NEON::BI__builtin_neon_vld1_v: 4059 case NEON::BI__builtin_neon_vld1q_v: { 4060 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 4061 Ops.push_back(getAlignmentValue32(PtrOp0)); 4062 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1"); 4063 } 4064 case NEON::BI__builtin_neon_vld2_v: 4065 case NEON::BI__builtin_neon_vld2q_v: 4066 case NEON::BI__builtin_neon_vld3_v: 4067 case NEON::BI__builtin_neon_vld3q_v: 4068 case NEON::BI__builtin_neon_vld4_v: 4069 case NEON::BI__builtin_neon_vld4q_v: { 4070 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 4071 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 4072 Value *Align = getAlignmentValue32(PtrOp1); 4073 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint); 4074 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 4075 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4076 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 4077 } 4078 case NEON::BI__builtin_neon_vld1_dup_v: 4079 case NEON::BI__builtin_neon_vld1q_dup_v: { 4080 Value *V = UndefValue::get(Ty); 4081 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 4082 PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty); 4083 LoadInst *Ld = Builder.CreateLoad(PtrOp0); 4084 llvm::Constant *CI = ConstantInt::get(SizeTy, 0); 4085 Ops[0] = Builder.CreateInsertElement(V, Ld, CI); 4086 return EmitNeonSplat(Ops[0], CI); 4087 } 4088 case NEON::BI__builtin_neon_vld2_lane_v: 4089 case NEON::BI__builtin_neon_vld2q_lane_v: 4090 case NEON::BI__builtin_neon_vld3_lane_v: 4091 case NEON::BI__builtin_neon_vld3q_lane_v: 4092 case NEON::BI__builtin_neon_vld4_lane_v: 4093 case NEON::BI__builtin_neon_vld4q_lane_v: { 4094 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 4095 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 4096 for (unsigned I = 2; I < Ops.size() - 1; ++I) 4097 Ops[I] = Builder.CreateBitCast(Ops[I], Ty); 4098 Ops.push_back(getAlignmentValue32(PtrOp1)); 4099 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint); 4100 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 4101 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4102 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 4103 } 4104 case NEON::BI__builtin_neon_vmovl_v: { 4105 llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy); 4106 Ops[0] = Builder.CreateBitCast(Ops[0], DTy); 4107 if (Usgn) 4108 return Builder.CreateZExt(Ops[0], Ty, "vmovl"); 4109 return Builder.CreateSExt(Ops[0], Ty, "vmovl"); 4110 } 4111 case NEON::BI__builtin_neon_vmovn_v: { 4112 llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy); 4113 Ops[0] = Builder.CreateBitCast(Ops[0], QTy); 4114 return Builder.CreateTrunc(Ops[0], Ty, "vmovn"); 4115 } 4116 case NEON::BI__builtin_neon_vmull_v: 4117 // FIXME: the integer vmull operations could be emitted in terms of pure 4118 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of 4119 // hoisting the exts outside loops. Until global ISel comes along that can 4120 // see through such movement this leads to bad CodeGen. So we need an 4121 // intrinsic for now. 4122 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls; 4123 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int; 4124 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull"); 4125 case NEON::BI__builtin_neon_vpadal_v: 4126 case NEON::BI__builtin_neon_vpadalq_v: { 4127 // The source operand type has twice as many elements of half the size. 4128 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); 4129 llvm::Type *EltTy = 4130 llvm::IntegerType::get(getLLVMContext(), EltBits / 2); 4131 llvm::Type *NarrowTy = 4132 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2); 4133 llvm::Type *Tys[2] = { Ty, NarrowTy }; 4134 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint); 4135 } 4136 case NEON::BI__builtin_neon_vpaddl_v: 4137 case NEON::BI__builtin_neon_vpaddlq_v: { 4138 // The source operand type has twice as many elements of half the size. 4139 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); 4140 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2); 4141 llvm::Type *NarrowTy = 4142 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2); 4143 llvm::Type *Tys[2] = { Ty, NarrowTy }; 4144 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl"); 4145 } 4146 case NEON::BI__builtin_neon_vqdmlal_v: 4147 case NEON::BI__builtin_neon_vqdmlsl_v: { 4148 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end()); 4149 Ops[1] = 4150 EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal"); 4151 Ops.resize(2); 4152 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint); 4153 } 4154 case NEON::BI__builtin_neon_vqshl_n_v: 4155 case NEON::BI__builtin_neon_vqshlq_n_v: 4156 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n", 4157 1, false); 4158 case NEON::BI__builtin_neon_vqshlu_n_v: 4159 case NEON::BI__builtin_neon_vqshluq_n_v: 4160 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n", 4161 1, false); 4162 case NEON::BI__builtin_neon_vrecpe_v: 4163 case NEON::BI__builtin_neon_vrecpeq_v: 4164 case NEON::BI__builtin_neon_vrsqrte_v: 4165 case NEON::BI__builtin_neon_vrsqrteq_v: 4166 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic; 4167 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint); 4168 4169 case NEON::BI__builtin_neon_vrshr_n_v: 4170 case NEON::BI__builtin_neon_vrshrq_n_v: 4171 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n", 4172 1, true); 4173 case NEON::BI__builtin_neon_vshl_n_v: 4174 case NEON::BI__builtin_neon_vshlq_n_v: 4175 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false); 4176 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1], 4177 "vshl_n"); 4178 case NEON::BI__builtin_neon_vshll_n_v: { 4179 llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy); 4180 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 4181 if (Usgn) 4182 Ops[0] = Builder.CreateZExt(Ops[0], VTy); 4183 else 4184 Ops[0] = Builder.CreateSExt(Ops[0], VTy); 4185 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false); 4186 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n"); 4187 } 4188 case NEON::BI__builtin_neon_vshrn_n_v: { 4189 llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy); 4190 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 4191 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false); 4192 if (Usgn) 4193 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]); 4194 else 4195 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]); 4196 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n"); 4197 } 4198 case NEON::BI__builtin_neon_vshr_n_v: 4199 case NEON::BI__builtin_neon_vshrq_n_v: 4200 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n"); 4201 case NEON::BI__builtin_neon_vst1_v: 4202 case NEON::BI__builtin_neon_vst1q_v: 4203 case NEON::BI__builtin_neon_vst2_v: 4204 case NEON::BI__builtin_neon_vst2q_v: 4205 case NEON::BI__builtin_neon_vst3_v: 4206 case NEON::BI__builtin_neon_vst3q_v: 4207 case NEON::BI__builtin_neon_vst4_v: 4208 case NEON::BI__builtin_neon_vst4q_v: 4209 case NEON::BI__builtin_neon_vst2_lane_v: 4210 case NEON::BI__builtin_neon_vst2q_lane_v: 4211 case NEON::BI__builtin_neon_vst3_lane_v: 4212 case NEON::BI__builtin_neon_vst3q_lane_v: 4213 case NEON::BI__builtin_neon_vst4_lane_v: 4214 case NEON::BI__builtin_neon_vst4q_lane_v: { 4215 llvm::Type *Tys[] = {Int8PtrTy, Ty}; 4216 Ops.push_back(getAlignmentValue32(PtrOp0)); 4217 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, ""); 4218 } 4219 case NEON::BI__builtin_neon_vsubhn_v: { 4220 llvm::VectorType *SrcTy = 4221 llvm::VectorType::getExtendedElementVectorType(VTy); 4222 4223 // %sum = add <4 x i32> %lhs, %rhs 4224 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 4225 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy); 4226 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn"); 4227 4228 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16> 4229 Constant *ShiftAmt = 4230 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2); 4231 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn"); 4232 4233 // %res = trunc <4 x i32> %high to <4 x i16> 4234 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn"); 4235 } 4236 case NEON::BI__builtin_neon_vtrn_v: 4237 case NEON::BI__builtin_neon_vtrnq_v: { 4238 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 4239 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4240 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 4241 Value *SV = nullptr; 4242 4243 for (unsigned vi = 0; vi != 2; ++vi) { 4244 SmallVector<uint32_t, 16> Indices; 4245 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 4246 Indices.push_back(i+vi); 4247 Indices.push_back(i+e+vi); 4248 } 4249 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 4250 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn"); 4251 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 4252 } 4253 return SV; 4254 } 4255 case NEON::BI__builtin_neon_vtst_v: 4256 case NEON::BI__builtin_neon_vtstq_v: { 4257 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4258 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4259 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); 4260 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], 4261 ConstantAggregateZero::get(Ty)); 4262 return Builder.CreateSExt(Ops[0], Ty, "vtst"); 4263 } 4264 case NEON::BI__builtin_neon_vuzp_v: 4265 case NEON::BI__builtin_neon_vuzpq_v: { 4266 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 4267 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4268 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 4269 Value *SV = nullptr; 4270 4271 for (unsigned vi = 0; vi != 2; ++vi) { 4272 SmallVector<uint32_t, 16> Indices; 4273 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 4274 Indices.push_back(2*i+vi); 4275 4276 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 4277 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp"); 4278 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 4279 } 4280 return SV; 4281 } 4282 case NEON::BI__builtin_neon_vzip_v: 4283 case NEON::BI__builtin_neon_vzipq_v: { 4284 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 4285 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4286 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 4287 Value *SV = nullptr; 4288 4289 for (unsigned vi = 0; vi != 2; ++vi) { 4290 SmallVector<uint32_t, 16> Indices; 4291 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 4292 Indices.push_back((i + vi*e) >> 1); 4293 Indices.push_back(((i + vi*e) >> 1)+e); 4294 } 4295 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 4296 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip"); 4297 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 4298 } 4299 return SV; 4300 } 4301 } 4302 4303 assert(Int && "Expected valid intrinsic number"); 4304 4305 // Determine the type(s) of this overloaded AArch64 intrinsic. 4306 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E); 4307 4308 Value *Result = EmitNeonCall(F, Ops, NameHint); 4309 llvm::Type *ResultType = ConvertType(E->getType()); 4310 // AArch64 intrinsic one-element vector type cast to 4311 // scalar type expected by the builtin 4312 return Builder.CreateBitCast(Result, ResultType, NameHint); 4313 } 4314 4315 Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr( 4316 Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp, 4317 const CmpInst::Predicate Ip, const Twine &Name) { 4318 llvm::Type *OTy = Op->getType(); 4319 4320 // FIXME: this is utterly horrific. We should not be looking at previous 4321 // codegen context to find out what needs doing. Unfortunately TableGen 4322 // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32 4323 // (etc). 4324 if (BitCastInst *BI = dyn_cast<BitCastInst>(Op)) 4325 OTy = BI->getOperand(0)->getType(); 4326 4327 Op = Builder.CreateBitCast(Op, OTy); 4328 if (OTy->getScalarType()->isFloatingPointTy()) { 4329 Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy)); 4330 } else { 4331 Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy)); 4332 } 4333 return Builder.CreateSExt(Op, Ty, Name); 4334 } 4335 4336 static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops, 4337 Value *ExtOp, Value *IndexOp, 4338 llvm::Type *ResTy, unsigned IntID, 4339 const char *Name) { 4340 SmallVector<Value *, 2> TblOps; 4341 if (ExtOp) 4342 TblOps.push_back(ExtOp); 4343 4344 // Build a vector containing sequential number like (0, 1, 2, ..., 15) 4345 SmallVector<uint32_t, 16> Indices; 4346 llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType()); 4347 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) { 4348 Indices.push_back(2*i); 4349 Indices.push_back(2*i+1); 4350 } 4351 4352 int PairPos = 0, End = Ops.size() - 1; 4353 while (PairPos < End) { 4354 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], 4355 Ops[PairPos+1], Indices, 4356 Name)); 4357 PairPos += 2; 4358 } 4359 4360 // If there's an odd number of 64-bit lookup table, fill the high 64-bit 4361 // of the 128-bit lookup table with zero. 4362 if (PairPos == End) { 4363 Value *ZeroTbl = ConstantAggregateZero::get(TblTy); 4364 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], 4365 ZeroTbl, Indices, Name)); 4366 } 4367 4368 Function *TblF; 4369 TblOps.push_back(IndexOp); 4370 TblF = CGF.CGM.getIntrinsic(IntID, ResTy); 4371 4372 return CGF.EmitNeonCall(TblF, TblOps, Name); 4373 } 4374 4375 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) { 4376 unsigned Value; 4377 switch (BuiltinID) { 4378 default: 4379 return nullptr; 4380 case ARM::BI__builtin_arm_nop: 4381 Value = 0; 4382 break; 4383 case ARM::BI__builtin_arm_yield: 4384 case ARM::BI__yield: 4385 Value = 1; 4386 break; 4387 case ARM::BI__builtin_arm_wfe: 4388 case ARM::BI__wfe: 4389 Value = 2; 4390 break; 4391 case ARM::BI__builtin_arm_wfi: 4392 case ARM::BI__wfi: 4393 Value = 3; 4394 break; 4395 case ARM::BI__builtin_arm_sev: 4396 case ARM::BI__sev: 4397 Value = 4; 4398 break; 4399 case ARM::BI__builtin_arm_sevl: 4400 case ARM::BI__sevl: 4401 Value = 5; 4402 break; 4403 } 4404 4405 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint), 4406 llvm::ConstantInt::get(Int32Ty, Value)); 4407 } 4408 4409 // Generates the IR for the read/write special register builtin, 4410 // ValueType is the type of the value that is to be written or read, 4411 // RegisterType is the type of the register being written to or read from. 4412 static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, 4413 const CallExpr *E, 4414 llvm::Type *RegisterType, 4415 llvm::Type *ValueType, 4416 bool IsRead, 4417 StringRef SysReg = "") { 4418 // write and register intrinsics only support 32 and 64 bit operations. 4419 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64)) 4420 && "Unsupported size for register."); 4421 4422 CodeGen::CGBuilderTy &Builder = CGF.Builder; 4423 CodeGen::CodeGenModule &CGM = CGF.CGM; 4424 LLVMContext &Context = CGM.getLLVMContext(); 4425 4426 if (SysReg.empty()) { 4427 const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts(); 4428 SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString(); 4429 } 4430 4431 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) }; 4432 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); 4433 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); 4434 4435 llvm::Type *Types[] = { RegisterType }; 4436 4437 bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32); 4438 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64)) 4439 && "Can't fit 64-bit value in 32-bit register"); 4440 4441 if (IsRead) { 4442 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types); 4443 llvm::Value *Call = Builder.CreateCall(F, Metadata); 4444 4445 if (MixedTypes) 4446 // Read into 64 bit register and then truncate result to 32 bit. 4447 return Builder.CreateTrunc(Call, ValueType); 4448 4449 if (ValueType->isPointerTy()) 4450 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*). 4451 return Builder.CreateIntToPtr(Call, ValueType); 4452 4453 return Call; 4454 } 4455 4456 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types); 4457 llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1)); 4458 if (MixedTypes) { 4459 // Extend 32 bit write value to 64 bit to pass to write. 4460 ArgValue = Builder.CreateZExt(ArgValue, RegisterType); 4461 return Builder.CreateCall(F, { Metadata, ArgValue }); 4462 } 4463 4464 if (ValueType->isPointerTy()) { 4465 // Have VoidPtrTy ArgValue but want to return an i32/i64. 4466 ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType); 4467 return Builder.CreateCall(F, { Metadata, ArgValue }); 4468 } 4469 4470 return Builder.CreateCall(F, { Metadata, ArgValue }); 4471 } 4472 4473 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra 4474 /// argument that specifies the vector type. 4475 static bool HasExtraNeonArgument(unsigned BuiltinID) { 4476 switch (BuiltinID) { 4477 default: break; 4478 case NEON::BI__builtin_neon_vget_lane_i8: 4479 case NEON::BI__builtin_neon_vget_lane_i16: 4480 case NEON::BI__builtin_neon_vget_lane_i32: 4481 case NEON::BI__builtin_neon_vget_lane_i64: 4482 case NEON::BI__builtin_neon_vget_lane_f32: 4483 case NEON::BI__builtin_neon_vgetq_lane_i8: 4484 case NEON::BI__builtin_neon_vgetq_lane_i16: 4485 case NEON::BI__builtin_neon_vgetq_lane_i32: 4486 case NEON::BI__builtin_neon_vgetq_lane_i64: 4487 case NEON::BI__builtin_neon_vgetq_lane_f32: 4488 case NEON::BI__builtin_neon_vset_lane_i8: 4489 case NEON::BI__builtin_neon_vset_lane_i16: 4490 case NEON::BI__builtin_neon_vset_lane_i32: 4491 case NEON::BI__builtin_neon_vset_lane_i64: 4492 case NEON::BI__builtin_neon_vset_lane_f32: 4493 case NEON::BI__builtin_neon_vsetq_lane_i8: 4494 case NEON::BI__builtin_neon_vsetq_lane_i16: 4495 case NEON::BI__builtin_neon_vsetq_lane_i32: 4496 case NEON::BI__builtin_neon_vsetq_lane_i64: 4497 case NEON::BI__builtin_neon_vsetq_lane_f32: 4498 case NEON::BI__builtin_neon_vsha1h_u32: 4499 case NEON::BI__builtin_neon_vsha1cq_u32: 4500 case NEON::BI__builtin_neon_vsha1pq_u32: 4501 case NEON::BI__builtin_neon_vsha1mq_u32: 4502 case ARM::BI_MoveToCoprocessor: 4503 case ARM::BI_MoveToCoprocessor2: 4504 return false; 4505 } 4506 return true; 4507 } 4508 4509 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, 4510 const CallExpr *E) { 4511 if (auto Hint = GetValueForARMHint(BuiltinID)) 4512 return Hint; 4513 4514 if (BuiltinID == ARM::BI__emit) { 4515 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb; 4516 llvm::FunctionType *FTy = 4517 llvm::FunctionType::get(VoidTy, /*Variadic=*/false); 4518 4519 APSInt Value; 4520 if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext())) 4521 llvm_unreachable("Sema will ensure that the parameter is constant"); 4522 4523 uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue(); 4524 4525 llvm::InlineAsm *Emit = 4526 IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "", 4527 /*SideEffects=*/true) 4528 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "", 4529 /*SideEffects=*/true); 4530 4531 return Builder.CreateCall(Emit); 4532 } 4533 4534 if (BuiltinID == ARM::BI__builtin_arm_dbg) { 4535 Value *Option = EmitScalarExpr(E->getArg(0)); 4536 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option); 4537 } 4538 4539 if (BuiltinID == ARM::BI__builtin_arm_prefetch) { 4540 Value *Address = EmitScalarExpr(E->getArg(0)); 4541 Value *RW = EmitScalarExpr(E->getArg(1)); 4542 Value *IsData = EmitScalarExpr(E->getArg(2)); 4543 4544 // Locality is not supported on ARM target 4545 Value *Locality = llvm::ConstantInt::get(Int32Ty, 3); 4546 4547 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 4548 return Builder.CreateCall(F, {Address, RW, Locality, IsData}); 4549 } 4550 4551 if (BuiltinID == ARM::BI__builtin_arm_rbit) { 4552 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 4553 return Builder.CreateCall( 4554 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); 4555 } 4556 4557 if (BuiltinID == ARM::BI__clear_cache) { 4558 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); 4559 const FunctionDecl *FD = E->getDirectCallee(); 4560 Value *Ops[2]; 4561 for (unsigned i = 0; i < 2; i++) 4562 Ops[i] = EmitScalarExpr(E->getArg(i)); 4563 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); 4564 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); 4565 StringRef Name = FD->getName(); 4566 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); 4567 } 4568 4569 if (BuiltinID == ARM::BI__builtin_arm_mcrr || 4570 BuiltinID == ARM::BI__builtin_arm_mcrr2) { 4571 Function *F; 4572 4573 switch (BuiltinID) { 4574 default: llvm_unreachable("unexpected builtin"); 4575 case ARM::BI__builtin_arm_mcrr: 4576 F = CGM.getIntrinsic(Intrinsic::arm_mcrr); 4577 break; 4578 case ARM::BI__builtin_arm_mcrr2: 4579 F = CGM.getIntrinsic(Intrinsic::arm_mcrr2); 4580 break; 4581 } 4582 4583 // MCRR{2} instruction has 5 operands but 4584 // the intrinsic has 4 because Rt and Rt2 4585 // are represented as a single unsigned 64 4586 // bit integer in the intrinsic definition 4587 // but internally it's represented as 2 32 4588 // bit integers. 4589 4590 Value *Coproc = EmitScalarExpr(E->getArg(0)); 4591 Value *Opc1 = EmitScalarExpr(E->getArg(1)); 4592 Value *RtAndRt2 = EmitScalarExpr(E->getArg(2)); 4593 Value *CRm = EmitScalarExpr(E->getArg(3)); 4594 4595 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32); 4596 Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty); 4597 Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1); 4598 Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty); 4599 4600 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm}); 4601 } 4602 4603 if (BuiltinID == ARM::BI__builtin_arm_mrrc || 4604 BuiltinID == ARM::BI__builtin_arm_mrrc2) { 4605 Function *F; 4606 4607 switch (BuiltinID) { 4608 default: llvm_unreachable("unexpected builtin"); 4609 case ARM::BI__builtin_arm_mrrc: 4610 F = CGM.getIntrinsic(Intrinsic::arm_mrrc); 4611 break; 4612 case ARM::BI__builtin_arm_mrrc2: 4613 F = CGM.getIntrinsic(Intrinsic::arm_mrrc2); 4614 break; 4615 } 4616 4617 Value *Coproc = EmitScalarExpr(E->getArg(0)); 4618 Value *Opc1 = EmitScalarExpr(E->getArg(1)); 4619 Value *CRm = EmitScalarExpr(E->getArg(2)); 4620 Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm}); 4621 4622 // Returns an unsigned 64 bit integer, represented 4623 // as two 32 bit integers. 4624 4625 Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1); 4626 Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0); 4627 Rt = Builder.CreateZExt(Rt, Int64Ty); 4628 Rt1 = Builder.CreateZExt(Rt1, Int64Ty); 4629 4630 Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32); 4631 RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true); 4632 RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1); 4633 4634 return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType())); 4635 } 4636 4637 if (BuiltinID == ARM::BI__builtin_arm_ldrexd || 4638 ((BuiltinID == ARM::BI__builtin_arm_ldrex || 4639 BuiltinID == ARM::BI__builtin_arm_ldaex) && 4640 getContext().getTypeSize(E->getType()) == 64) || 4641 BuiltinID == ARM::BI__ldrexd) { 4642 Function *F; 4643 4644 switch (BuiltinID) { 4645 default: llvm_unreachable("unexpected builtin"); 4646 case ARM::BI__builtin_arm_ldaex: 4647 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd); 4648 break; 4649 case ARM::BI__builtin_arm_ldrexd: 4650 case ARM::BI__builtin_arm_ldrex: 4651 case ARM::BI__ldrexd: 4652 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd); 4653 break; 4654 } 4655 4656 Value *LdPtr = EmitScalarExpr(E->getArg(0)); 4657 Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy), 4658 "ldrexd"); 4659 4660 Value *Val0 = Builder.CreateExtractValue(Val, 1); 4661 Value *Val1 = Builder.CreateExtractValue(Val, 0); 4662 Val0 = Builder.CreateZExt(Val0, Int64Ty); 4663 Val1 = Builder.CreateZExt(Val1, Int64Ty); 4664 4665 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32); 4666 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */); 4667 Val = Builder.CreateOr(Val, Val1); 4668 return Builder.CreateBitCast(Val, ConvertType(E->getType())); 4669 } 4670 4671 if (BuiltinID == ARM::BI__builtin_arm_ldrex || 4672 BuiltinID == ARM::BI__builtin_arm_ldaex) { 4673 Value *LoadAddr = EmitScalarExpr(E->getArg(0)); 4674 4675 QualType Ty = E->getType(); 4676 llvm::Type *RealResTy = ConvertType(Ty); 4677 llvm::Type *PtrTy = llvm::IntegerType::get( 4678 getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo(); 4679 LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy); 4680 4681 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex 4682 ? Intrinsic::arm_ldaex 4683 : Intrinsic::arm_ldrex, 4684 PtrTy); 4685 Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex"); 4686 4687 if (RealResTy->isPointerTy()) 4688 return Builder.CreateIntToPtr(Val, RealResTy); 4689 else { 4690 llvm::Type *IntResTy = llvm::IntegerType::get( 4691 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy)); 4692 Val = Builder.CreateTruncOrBitCast(Val, IntResTy); 4693 return Builder.CreateBitCast(Val, RealResTy); 4694 } 4695 } 4696 4697 if (BuiltinID == ARM::BI__builtin_arm_strexd || 4698 ((BuiltinID == ARM::BI__builtin_arm_stlex || 4699 BuiltinID == ARM::BI__builtin_arm_strex) && 4700 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) { 4701 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex 4702 ? Intrinsic::arm_stlexd 4703 : Intrinsic::arm_strexd); 4704 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty); 4705 4706 Address Tmp = CreateMemTemp(E->getArg(0)->getType()); 4707 Value *Val = EmitScalarExpr(E->getArg(0)); 4708 Builder.CreateStore(Val, Tmp); 4709 4710 Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy)); 4711 Val = Builder.CreateLoad(LdPtr); 4712 4713 Value *Arg0 = Builder.CreateExtractValue(Val, 0); 4714 Value *Arg1 = Builder.CreateExtractValue(Val, 1); 4715 Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy); 4716 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd"); 4717 } 4718 4719 if (BuiltinID == ARM::BI__builtin_arm_strex || 4720 BuiltinID == ARM::BI__builtin_arm_stlex) { 4721 Value *StoreVal = EmitScalarExpr(E->getArg(0)); 4722 Value *StoreAddr = EmitScalarExpr(E->getArg(1)); 4723 4724 QualType Ty = E->getArg(0)->getType(); 4725 llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(), 4726 getContext().getTypeSize(Ty)); 4727 StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo()); 4728 4729 if (StoreVal->getType()->isPointerTy()) 4730 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty); 4731 else { 4732 llvm::Type *IntTy = llvm::IntegerType::get( 4733 getLLVMContext(), 4734 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType())); 4735 StoreVal = Builder.CreateBitCast(StoreVal, IntTy); 4736 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty); 4737 } 4738 4739 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex 4740 ? Intrinsic::arm_stlex 4741 : Intrinsic::arm_strex, 4742 StoreAddr->getType()); 4743 return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex"); 4744 } 4745 4746 switch (BuiltinID) { 4747 case ARM::BI__iso_volatile_load8: 4748 case ARM::BI__iso_volatile_load16: 4749 case ARM::BI__iso_volatile_load32: 4750 case ARM::BI__iso_volatile_load64: { 4751 Value *Ptr = EmitScalarExpr(E->getArg(0)); 4752 QualType ElTy = E->getArg(0)->getType()->getPointeeType(); 4753 CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy); 4754 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), 4755 LoadSize.getQuantity() * 8); 4756 Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); 4757 llvm::LoadInst *Load = 4758 Builder.CreateAlignedLoad(Ptr, LoadSize); 4759 Load->setVolatile(true); 4760 return Load; 4761 } 4762 case ARM::BI__iso_volatile_store8: 4763 case ARM::BI__iso_volatile_store16: 4764 case ARM::BI__iso_volatile_store32: 4765 case ARM::BI__iso_volatile_store64: { 4766 Value *Ptr = EmitScalarExpr(E->getArg(0)); 4767 Value *Value = EmitScalarExpr(E->getArg(1)); 4768 QualType ElTy = E->getArg(0)->getType()->getPointeeType(); 4769 CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy); 4770 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), 4771 StoreSize.getQuantity() * 8); 4772 Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); 4773 llvm::StoreInst *Store = 4774 Builder.CreateAlignedStore(Value, Ptr, 4775 StoreSize); 4776 Store->setVolatile(true); 4777 return Store; 4778 } 4779 } 4780 4781 if (BuiltinID == ARM::BI__builtin_arm_clrex) { 4782 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex); 4783 return Builder.CreateCall(F); 4784 } 4785 4786 // CRC32 4787 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; 4788 switch (BuiltinID) { 4789 case ARM::BI__builtin_arm_crc32b: 4790 CRCIntrinsicID = Intrinsic::arm_crc32b; break; 4791 case ARM::BI__builtin_arm_crc32cb: 4792 CRCIntrinsicID = Intrinsic::arm_crc32cb; break; 4793 case ARM::BI__builtin_arm_crc32h: 4794 CRCIntrinsicID = Intrinsic::arm_crc32h; break; 4795 case ARM::BI__builtin_arm_crc32ch: 4796 CRCIntrinsicID = Intrinsic::arm_crc32ch; break; 4797 case ARM::BI__builtin_arm_crc32w: 4798 case ARM::BI__builtin_arm_crc32d: 4799 CRCIntrinsicID = Intrinsic::arm_crc32w; break; 4800 case ARM::BI__builtin_arm_crc32cw: 4801 case ARM::BI__builtin_arm_crc32cd: 4802 CRCIntrinsicID = Intrinsic::arm_crc32cw; break; 4803 } 4804 4805 if (CRCIntrinsicID != Intrinsic::not_intrinsic) { 4806 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 4807 Value *Arg1 = EmitScalarExpr(E->getArg(1)); 4808 4809 // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w 4810 // intrinsics, hence we need different codegen for these cases. 4811 if (BuiltinID == ARM::BI__builtin_arm_crc32d || 4812 BuiltinID == ARM::BI__builtin_arm_crc32cd) { 4813 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32); 4814 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty); 4815 Value *Arg1b = Builder.CreateLShr(Arg1, C1); 4816 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty); 4817 4818 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 4819 Value *Res = Builder.CreateCall(F, {Arg0, Arg1a}); 4820 return Builder.CreateCall(F, {Res, Arg1b}); 4821 } else { 4822 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty); 4823 4824 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 4825 return Builder.CreateCall(F, {Arg0, Arg1}); 4826 } 4827 } 4828 4829 if (BuiltinID == ARM::BI__builtin_arm_rsr || 4830 BuiltinID == ARM::BI__builtin_arm_rsr64 || 4831 BuiltinID == ARM::BI__builtin_arm_rsrp || 4832 BuiltinID == ARM::BI__builtin_arm_wsr || 4833 BuiltinID == ARM::BI__builtin_arm_wsr64 || 4834 BuiltinID == ARM::BI__builtin_arm_wsrp) { 4835 4836 bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr || 4837 BuiltinID == ARM::BI__builtin_arm_rsr64 || 4838 BuiltinID == ARM::BI__builtin_arm_rsrp; 4839 4840 bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp || 4841 BuiltinID == ARM::BI__builtin_arm_wsrp; 4842 4843 bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 || 4844 BuiltinID == ARM::BI__builtin_arm_wsr64; 4845 4846 llvm::Type *ValueType; 4847 llvm::Type *RegisterType; 4848 if (IsPointerBuiltin) { 4849 ValueType = VoidPtrTy; 4850 RegisterType = Int32Ty; 4851 } else if (Is64Bit) { 4852 ValueType = RegisterType = Int64Ty; 4853 } else { 4854 ValueType = RegisterType = Int32Ty; 4855 } 4856 4857 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead); 4858 } 4859 4860 // Find out if any arguments are required to be integer constant 4861 // expressions. 4862 unsigned ICEArguments = 0; 4863 ASTContext::GetBuiltinTypeError Error; 4864 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 4865 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 4866 4867 auto getAlignmentValue32 = [&](Address addr) -> Value* { 4868 return Builder.getInt32(addr.getAlignment().getQuantity()); 4869 }; 4870 4871 Address PtrOp0 = Address::invalid(); 4872 Address PtrOp1 = Address::invalid(); 4873 SmallVector<Value*, 4> Ops; 4874 bool HasExtraArg = HasExtraNeonArgument(BuiltinID); 4875 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0); 4876 for (unsigned i = 0, e = NumArgs; i != e; i++) { 4877 if (i == 0) { 4878 switch (BuiltinID) { 4879 case NEON::BI__builtin_neon_vld1_v: 4880 case NEON::BI__builtin_neon_vld1q_v: 4881 case NEON::BI__builtin_neon_vld1q_lane_v: 4882 case NEON::BI__builtin_neon_vld1_lane_v: 4883 case NEON::BI__builtin_neon_vld1_dup_v: 4884 case NEON::BI__builtin_neon_vld1q_dup_v: 4885 case NEON::BI__builtin_neon_vst1_v: 4886 case NEON::BI__builtin_neon_vst1q_v: 4887 case NEON::BI__builtin_neon_vst1q_lane_v: 4888 case NEON::BI__builtin_neon_vst1_lane_v: 4889 case NEON::BI__builtin_neon_vst2_v: 4890 case NEON::BI__builtin_neon_vst2q_v: 4891 case NEON::BI__builtin_neon_vst2_lane_v: 4892 case NEON::BI__builtin_neon_vst2q_lane_v: 4893 case NEON::BI__builtin_neon_vst3_v: 4894 case NEON::BI__builtin_neon_vst3q_v: 4895 case NEON::BI__builtin_neon_vst3_lane_v: 4896 case NEON::BI__builtin_neon_vst3q_lane_v: 4897 case NEON::BI__builtin_neon_vst4_v: 4898 case NEON::BI__builtin_neon_vst4q_v: 4899 case NEON::BI__builtin_neon_vst4_lane_v: 4900 case NEON::BI__builtin_neon_vst4q_lane_v: 4901 // Get the alignment for the argument in addition to the value; 4902 // we'll use it later. 4903 PtrOp0 = EmitPointerWithAlignment(E->getArg(0)); 4904 Ops.push_back(PtrOp0.getPointer()); 4905 continue; 4906 } 4907 } 4908 if (i == 1) { 4909 switch (BuiltinID) { 4910 case NEON::BI__builtin_neon_vld2_v: 4911 case NEON::BI__builtin_neon_vld2q_v: 4912 case NEON::BI__builtin_neon_vld3_v: 4913 case NEON::BI__builtin_neon_vld3q_v: 4914 case NEON::BI__builtin_neon_vld4_v: 4915 case NEON::BI__builtin_neon_vld4q_v: 4916 case NEON::BI__builtin_neon_vld2_lane_v: 4917 case NEON::BI__builtin_neon_vld2q_lane_v: 4918 case NEON::BI__builtin_neon_vld3_lane_v: 4919 case NEON::BI__builtin_neon_vld3q_lane_v: 4920 case NEON::BI__builtin_neon_vld4_lane_v: 4921 case NEON::BI__builtin_neon_vld4q_lane_v: 4922 case NEON::BI__builtin_neon_vld2_dup_v: 4923 case NEON::BI__builtin_neon_vld3_dup_v: 4924 case NEON::BI__builtin_neon_vld4_dup_v: 4925 // Get the alignment for the argument in addition to the value; 4926 // we'll use it later. 4927 PtrOp1 = EmitPointerWithAlignment(E->getArg(1)); 4928 Ops.push_back(PtrOp1.getPointer()); 4929 continue; 4930 } 4931 } 4932 4933 if ((ICEArguments & (1 << i)) == 0) { 4934 Ops.push_back(EmitScalarExpr(E->getArg(i))); 4935 } else { 4936 // If this is required to be a constant, constant fold it so that we know 4937 // that the generated intrinsic gets a ConstantInt. 4938 llvm::APSInt Result; 4939 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); 4940 assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst; 4941 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); 4942 } 4943 } 4944 4945 switch (BuiltinID) { 4946 default: break; 4947 4948 case NEON::BI__builtin_neon_vget_lane_i8: 4949 case NEON::BI__builtin_neon_vget_lane_i16: 4950 case NEON::BI__builtin_neon_vget_lane_i32: 4951 case NEON::BI__builtin_neon_vget_lane_i64: 4952 case NEON::BI__builtin_neon_vget_lane_f32: 4953 case NEON::BI__builtin_neon_vgetq_lane_i8: 4954 case NEON::BI__builtin_neon_vgetq_lane_i16: 4955 case NEON::BI__builtin_neon_vgetq_lane_i32: 4956 case NEON::BI__builtin_neon_vgetq_lane_i64: 4957 case NEON::BI__builtin_neon_vgetq_lane_f32: 4958 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane"); 4959 4960 case NEON::BI__builtin_neon_vset_lane_i8: 4961 case NEON::BI__builtin_neon_vset_lane_i16: 4962 case NEON::BI__builtin_neon_vset_lane_i32: 4963 case NEON::BI__builtin_neon_vset_lane_i64: 4964 case NEON::BI__builtin_neon_vset_lane_f32: 4965 case NEON::BI__builtin_neon_vsetq_lane_i8: 4966 case NEON::BI__builtin_neon_vsetq_lane_i16: 4967 case NEON::BI__builtin_neon_vsetq_lane_i32: 4968 case NEON::BI__builtin_neon_vsetq_lane_i64: 4969 case NEON::BI__builtin_neon_vsetq_lane_f32: 4970 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 4971 4972 case NEON::BI__builtin_neon_vsha1h_u32: 4973 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops, 4974 "vsha1h"); 4975 case NEON::BI__builtin_neon_vsha1cq_u32: 4976 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops, 4977 "vsha1h"); 4978 case NEON::BI__builtin_neon_vsha1pq_u32: 4979 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops, 4980 "vsha1h"); 4981 case NEON::BI__builtin_neon_vsha1mq_u32: 4982 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops, 4983 "vsha1h"); 4984 4985 // The ARM _MoveToCoprocessor builtins put the input register value as 4986 // the first argument, but the LLVM intrinsic expects it as the third one. 4987 case ARM::BI_MoveToCoprocessor: 4988 case ARM::BI_MoveToCoprocessor2: { 4989 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ? 4990 Intrinsic::arm_mcr : Intrinsic::arm_mcr2); 4991 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0], 4992 Ops[3], Ops[4], Ops[5]}); 4993 } 4994 case ARM::BI_BitScanForward: 4995 case ARM::BI_BitScanForward64: 4996 return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E); 4997 case ARM::BI_BitScanReverse: 4998 case ARM::BI_BitScanReverse64: 4999 return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E); 5000 5001 case ARM::BI_InterlockedAnd64: 5002 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E); 5003 case ARM::BI_InterlockedExchange64: 5004 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E); 5005 case ARM::BI_InterlockedExchangeAdd64: 5006 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E); 5007 case ARM::BI_InterlockedExchangeSub64: 5008 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E); 5009 case ARM::BI_InterlockedOr64: 5010 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E); 5011 case ARM::BI_InterlockedXor64: 5012 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E); 5013 case ARM::BI_InterlockedDecrement64: 5014 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E); 5015 case ARM::BI_InterlockedIncrement64: 5016 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E); 5017 } 5018 5019 // Get the last argument, which specifies the vector type. 5020 assert(HasExtraArg); 5021 llvm::APSInt Result; 5022 const Expr *Arg = E->getArg(E->getNumArgs()-1); 5023 if (!Arg->isIntegerConstantExpr(Result, getContext())) 5024 return nullptr; 5025 5026 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f || 5027 BuiltinID == ARM::BI__builtin_arm_vcvtr_d) { 5028 // Determine the overloaded type of this builtin. 5029 llvm::Type *Ty; 5030 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f) 5031 Ty = FloatTy; 5032 else 5033 Ty = DoubleTy; 5034 5035 // Determine whether this is an unsigned conversion or not. 5036 bool usgn = Result.getZExtValue() == 1; 5037 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr; 5038 5039 // Call the appropriate intrinsic. 5040 Function *F = CGM.getIntrinsic(Int, Ty); 5041 return Builder.CreateCall(F, Ops, "vcvtr"); 5042 } 5043 5044 // Determine the type of this overloaded NEON intrinsic. 5045 NeonTypeFlags Type(Result.getZExtValue()); 5046 bool usgn = Type.isUnsigned(); 5047 bool rightShift = false; 5048 5049 llvm::VectorType *VTy = GetNeonType(this, Type); 5050 llvm::Type *Ty = VTy; 5051 if (!Ty) 5052 return nullptr; 5053 5054 // Many NEON builtins have identical semantics and uses in ARM and 5055 // AArch64. Emit these in a single function. 5056 auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap); 5057 const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap( 5058 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted); 5059 if (Builtin) 5060 return EmitCommonNeonBuiltinExpr( 5061 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic, 5062 Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1); 5063 5064 unsigned Int; 5065 switch (BuiltinID) { 5066 default: return nullptr; 5067 case NEON::BI__builtin_neon_vld1q_lane_v: 5068 // Handle 64-bit integer elements as a special case. Use shuffles of 5069 // one-element vectors to avoid poor code for i64 in the backend. 5070 if (VTy->getElementType()->isIntegerTy(64)) { 5071 // Extract the other lane. 5072 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5073 uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue(); 5074 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane)); 5075 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); 5076 // Load the value as a one-element vector. 5077 Ty = llvm::VectorType::get(VTy->getElementType(), 1); 5078 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 5079 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys); 5080 Value *Align = getAlignmentValue32(PtrOp0); 5081 Value *Ld = Builder.CreateCall(F, {Ops[0], Align}); 5082 // Combine them. 5083 uint32_t Indices[] = {1 - Lane, Lane}; 5084 SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices); 5085 return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane"); 5086 } 5087 // fall through 5088 case NEON::BI__builtin_neon_vld1_lane_v: { 5089 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5090 PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType()); 5091 Value *Ld = Builder.CreateLoad(PtrOp0); 5092 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane"); 5093 } 5094 case NEON::BI__builtin_neon_vld2_dup_v: 5095 case NEON::BI__builtin_neon_vld3_dup_v: 5096 case NEON::BI__builtin_neon_vld4_dup_v: { 5097 // Handle 64-bit elements as a special-case. There is no "dup" needed. 5098 if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) { 5099 switch (BuiltinID) { 5100 case NEON::BI__builtin_neon_vld2_dup_v: 5101 Int = Intrinsic::arm_neon_vld2; 5102 break; 5103 case NEON::BI__builtin_neon_vld3_dup_v: 5104 Int = Intrinsic::arm_neon_vld3; 5105 break; 5106 case NEON::BI__builtin_neon_vld4_dup_v: 5107 Int = Intrinsic::arm_neon_vld4; 5108 break; 5109 default: llvm_unreachable("unknown vld_dup intrinsic?"); 5110 } 5111 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 5112 Function *F = CGM.getIntrinsic(Int, Tys); 5113 llvm::Value *Align = getAlignmentValue32(PtrOp1); 5114 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup"); 5115 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 5116 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5117 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 5118 } 5119 switch (BuiltinID) { 5120 case NEON::BI__builtin_neon_vld2_dup_v: 5121 Int = Intrinsic::arm_neon_vld2lane; 5122 break; 5123 case NEON::BI__builtin_neon_vld3_dup_v: 5124 Int = Intrinsic::arm_neon_vld3lane; 5125 break; 5126 case NEON::BI__builtin_neon_vld4_dup_v: 5127 Int = Intrinsic::arm_neon_vld4lane; 5128 break; 5129 default: llvm_unreachable("unknown vld_dup intrinsic?"); 5130 } 5131 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 5132 Function *F = CGM.getIntrinsic(Int, Tys); 5133 llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType()); 5134 5135 SmallVector<Value*, 6> Args; 5136 Args.push_back(Ops[1]); 5137 Args.append(STy->getNumElements(), UndefValue::get(Ty)); 5138 5139 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); 5140 Args.push_back(CI); 5141 Args.push_back(getAlignmentValue32(PtrOp1)); 5142 5143 Ops[1] = Builder.CreateCall(F, Args, "vld_dup"); 5144 // splat lane 0 to all elts in each vector of the result. 5145 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 5146 Value *Val = Builder.CreateExtractValue(Ops[1], i); 5147 Value *Elt = Builder.CreateBitCast(Val, Ty); 5148 Elt = EmitNeonSplat(Elt, CI); 5149 Elt = Builder.CreateBitCast(Elt, Val->getType()); 5150 Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i); 5151 } 5152 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 5153 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5154 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 5155 } 5156 case NEON::BI__builtin_neon_vqrshrn_n_v: 5157 Int = 5158 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns; 5159 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n", 5160 1, true); 5161 case NEON::BI__builtin_neon_vqrshrun_n_v: 5162 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty), 5163 Ops, "vqrshrun_n", 1, true); 5164 case NEON::BI__builtin_neon_vqshrn_n_v: 5165 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns; 5166 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n", 5167 1, true); 5168 case NEON::BI__builtin_neon_vqshrun_n_v: 5169 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty), 5170 Ops, "vqshrun_n", 1, true); 5171 case NEON::BI__builtin_neon_vrecpe_v: 5172 case NEON::BI__builtin_neon_vrecpeq_v: 5173 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty), 5174 Ops, "vrecpe"); 5175 case NEON::BI__builtin_neon_vrshrn_n_v: 5176 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty), 5177 Ops, "vrshrn_n", 1, true); 5178 case NEON::BI__builtin_neon_vrsra_n_v: 5179 case NEON::BI__builtin_neon_vrsraq_n_v: 5180 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5181 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5182 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true); 5183 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts; 5184 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]}); 5185 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n"); 5186 case NEON::BI__builtin_neon_vsri_n_v: 5187 case NEON::BI__builtin_neon_vsriq_n_v: 5188 rightShift = true; 5189 LLVM_FALLTHROUGH; 5190 case NEON::BI__builtin_neon_vsli_n_v: 5191 case NEON::BI__builtin_neon_vsliq_n_v: 5192 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift); 5193 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty), 5194 Ops, "vsli_n"); 5195 case NEON::BI__builtin_neon_vsra_n_v: 5196 case NEON::BI__builtin_neon_vsraq_n_v: 5197 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5198 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n"); 5199 return Builder.CreateAdd(Ops[0], Ops[1]); 5200 case NEON::BI__builtin_neon_vst1q_lane_v: 5201 // Handle 64-bit integer elements as a special case. Use a shuffle to get 5202 // a one-element vector and avoid poor code for i64 in the backend. 5203 if (VTy->getElementType()->isIntegerTy(64)) { 5204 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5205 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2])); 5206 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); 5207 Ops[2] = getAlignmentValue32(PtrOp0); 5208 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()}; 5209 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, 5210 Tys), Ops); 5211 } 5212 // fall through 5213 case NEON::BI__builtin_neon_vst1_lane_v: { 5214 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5215 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); 5216 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 5217 auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty)); 5218 return St; 5219 } 5220 case NEON::BI__builtin_neon_vtbl1_v: 5221 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1), 5222 Ops, "vtbl1"); 5223 case NEON::BI__builtin_neon_vtbl2_v: 5224 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2), 5225 Ops, "vtbl2"); 5226 case NEON::BI__builtin_neon_vtbl3_v: 5227 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3), 5228 Ops, "vtbl3"); 5229 case NEON::BI__builtin_neon_vtbl4_v: 5230 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4), 5231 Ops, "vtbl4"); 5232 case NEON::BI__builtin_neon_vtbx1_v: 5233 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1), 5234 Ops, "vtbx1"); 5235 case NEON::BI__builtin_neon_vtbx2_v: 5236 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2), 5237 Ops, "vtbx2"); 5238 case NEON::BI__builtin_neon_vtbx3_v: 5239 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3), 5240 Ops, "vtbx3"); 5241 case NEON::BI__builtin_neon_vtbx4_v: 5242 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4), 5243 Ops, "vtbx4"); 5244 } 5245 } 5246 5247 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, 5248 const CallExpr *E, 5249 SmallVectorImpl<Value *> &Ops) { 5250 unsigned int Int = 0; 5251 const char *s = nullptr; 5252 5253 switch (BuiltinID) { 5254 default: 5255 return nullptr; 5256 case NEON::BI__builtin_neon_vtbl1_v: 5257 case NEON::BI__builtin_neon_vqtbl1_v: 5258 case NEON::BI__builtin_neon_vqtbl1q_v: 5259 case NEON::BI__builtin_neon_vtbl2_v: 5260 case NEON::BI__builtin_neon_vqtbl2_v: 5261 case NEON::BI__builtin_neon_vqtbl2q_v: 5262 case NEON::BI__builtin_neon_vtbl3_v: 5263 case NEON::BI__builtin_neon_vqtbl3_v: 5264 case NEON::BI__builtin_neon_vqtbl3q_v: 5265 case NEON::BI__builtin_neon_vtbl4_v: 5266 case NEON::BI__builtin_neon_vqtbl4_v: 5267 case NEON::BI__builtin_neon_vqtbl4q_v: 5268 break; 5269 case NEON::BI__builtin_neon_vtbx1_v: 5270 case NEON::BI__builtin_neon_vqtbx1_v: 5271 case NEON::BI__builtin_neon_vqtbx1q_v: 5272 case NEON::BI__builtin_neon_vtbx2_v: 5273 case NEON::BI__builtin_neon_vqtbx2_v: 5274 case NEON::BI__builtin_neon_vqtbx2q_v: 5275 case NEON::BI__builtin_neon_vtbx3_v: 5276 case NEON::BI__builtin_neon_vqtbx3_v: 5277 case NEON::BI__builtin_neon_vqtbx3q_v: 5278 case NEON::BI__builtin_neon_vtbx4_v: 5279 case NEON::BI__builtin_neon_vqtbx4_v: 5280 case NEON::BI__builtin_neon_vqtbx4q_v: 5281 break; 5282 } 5283 5284 assert(E->getNumArgs() >= 3); 5285 5286 // Get the last argument, which specifies the vector type. 5287 llvm::APSInt Result; 5288 const Expr *Arg = E->getArg(E->getNumArgs() - 1); 5289 if (!Arg->isIntegerConstantExpr(Result, CGF.getContext())) 5290 return nullptr; 5291 5292 // Determine the type of this overloaded NEON intrinsic. 5293 NeonTypeFlags Type(Result.getZExtValue()); 5294 llvm::VectorType *Ty = GetNeonType(&CGF, Type); 5295 if (!Ty) 5296 return nullptr; 5297 5298 CodeGen::CGBuilderTy &Builder = CGF.Builder; 5299 5300 // AArch64 scalar builtins are not overloaded, they do not have an extra 5301 // argument that specifies the vector type, need to handle each case. 5302 switch (BuiltinID) { 5303 case NEON::BI__builtin_neon_vtbl1_v: { 5304 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr, 5305 Ops[1], Ty, Intrinsic::aarch64_neon_tbl1, 5306 "vtbl1"); 5307 } 5308 case NEON::BI__builtin_neon_vtbl2_v: { 5309 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr, 5310 Ops[2], Ty, Intrinsic::aarch64_neon_tbl1, 5311 "vtbl1"); 5312 } 5313 case NEON::BI__builtin_neon_vtbl3_v: { 5314 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr, 5315 Ops[3], Ty, Intrinsic::aarch64_neon_tbl2, 5316 "vtbl2"); 5317 } 5318 case NEON::BI__builtin_neon_vtbl4_v: { 5319 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr, 5320 Ops[4], Ty, Intrinsic::aarch64_neon_tbl2, 5321 "vtbl2"); 5322 } 5323 case NEON::BI__builtin_neon_vtbx1_v: { 5324 Value *TblRes = 5325 packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2], 5326 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1"); 5327 5328 llvm::Constant *EightV = ConstantInt::get(Ty, 8); 5329 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV); 5330 CmpRes = Builder.CreateSExt(CmpRes, Ty); 5331 5332 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]); 5333 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes); 5334 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx"); 5335 } 5336 case NEON::BI__builtin_neon_vtbx2_v: { 5337 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0], 5338 Ops[3], Ty, Intrinsic::aarch64_neon_tbx1, 5339 "vtbx1"); 5340 } 5341 case NEON::BI__builtin_neon_vtbx3_v: { 5342 Value *TblRes = 5343 packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4], 5344 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2"); 5345 5346 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24); 5347 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4], 5348 TwentyFourV); 5349 CmpRes = Builder.CreateSExt(CmpRes, Ty); 5350 5351 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]); 5352 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes); 5353 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx"); 5354 } 5355 case NEON::BI__builtin_neon_vtbx4_v: { 5356 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0], 5357 Ops[5], Ty, Intrinsic::aarch64_neon_tbx2, 5358 "vtbx2"); 5359 } 5360 case NEON::BI__builtin_neon_vqtbl1_v: 5361 case NEON::BI__builtin_neon_vqtbl1q_v: 5362 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break; 5363 case NEON::BI__builtin_neon_vqtbl2_v: 5364 case NEON::BI__builtin_neon_vqtbl2q_v: { 5365 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break; 5366 case NEON::BI__builtin_neon_vqtbl3_v: 5367 case NEON::BI__builtin_neon_vqtbl3q_v: 5368 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break; 5369 case NEON::BI__builtin_neon_vqtbl4_v: 5370 case NEON::BI__builtin_neon_vqtbl4q_v: 5371 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break; 5372 case NEON::BI__builtin_neon_vqtbx1_v: 5373 case NEON::BI__builtin_neon_vqtbx1q_v: 5374 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break; 5375 case NEON::BI__builtin_neon_vqtbx2_v: 5376 case NEON::BI__builtin_neon_vqtbx2q_v: 5377 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break; 5378 case NEON::BI__builtin_neon_vqtbx3_v: 5379 case NEON::BI__builtin_neon_vqtbx3q_v: 5380 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break; 5381 case NEON::BI__builtin_neon_vqtbx4_v: 5382 case NEON::BI__builtin_neon_vqtbx4q_v: 5383 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break; 5384 } 5385 } 5386 5387 if (!Int) 5388 return nullptr; 5389 5390 Function *F = CGF.CGM.getIntrinsic(Int, Ty); 5391 return CGF.EmitNeonCall(F, Ops, s); 5392 } 5393 5394 Value *CodeGenFunction::vectorWrapScalar16(Value *Op) { 5395 llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4); 5396 Op = Builder.CreateBitCast(Op, Int16Ty); 5397 Value *V = UndefValue::get(VTy); 5398 llvm::Constant *CI = ConstantInt::get(SizeTy, 0); 5399 Op = Builder.CreateInsertElement(V, Op, CI); 5400 return Op; 5401 } 5402 5403 Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, 5404 const CallExpr *E) { 5405 unsigned HintID = static_cast<unsigned>(-1); 5406 switch (BuiltinID) { 5407 default: break; 5408 case AArch64::BI__builtin_arm_nop: 5409 HintID = 0; 5410 break; 5411 case AArch64::BI__builtin_arm_yield: 5412 HintID = 1; 5413 break; 5414 case AArch64::BI__builtin_arm_wfe: 5415 HintID = 2; 5416 break; 5417 case AArch64::BI__builtin_arm_wfi: 5418 HintID = 3; 5419 break; 5420 case AArch64::BI__builtin_arm_sev: 5421 HintID = 4; 5422 break; 5423 case AArch64::BI__builtin_arm_sevl: 5424 HintID = 5; 5425 break; 5426 } 5427 5428 if (HintID != static_cast<unsigned>(-1)) { 5429 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint); 5430 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID)); 5431 } 5432 5433 if (BuiltinID == AArch64::BI__builtin_arm_prefetch) { 5434 Value *Address = EmitScalarExpr(E->getArg(0)); 5435 Value *RW = EmitScalarExpr(E->getArg(1)); 5436 Value *CacheLevel = EmitScalarExpr(E->getArg(2)); 5437 Value *RetentionPolicy = EmitScalarExpr(E->getArg(3)); 5438 Value *IsData = EmitScalarExpr(E->getArg(4)); 5439 5440 Value *Locality = nullptr; 5441 if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) { 5442 // Temporal fetch, needs to convert cache level to locality. 5443 Locality = llvm::ConstantInt::get(Int32Ty, 5444 -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3); 5445 } else { 5446 // Streaming fetch. 5447 Locality = llvm::ConstantInt::get(Int32Ty, 0); 5448 } 5449 5450 // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify 5451 // PLDL3STRM or PLDL2STRM. 5452 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 5453 return Builder.CreateCall(F, {Address, RW, Locality, IsData}); 5454 } 5455 5456 if (BuiltinID == AArch64::BI__builtin_arm_rbit) { 5457 assert((getContext().getTypeSize(E->getType()) == 32) && 5458 "rbit of unusual size!"); 5459 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 5460 return Builder.CreateCall( 5461 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); 5462 } 5463 if (BuiltinID == AArch64::BI__builtin_arm_rbit64) { 5464 assert((getContext().getTypeSize(E->getType()) == 64) && 5465 "rbit of unusual size!"); 5466 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 5467 return Builder.CreateCall( 5468 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); 5469 } 5470 5471 if (BuiltinID == AArch64::BI__clear_cache) { 5472 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); 5473 const FunctionDecl *FD = E->getDirectCallee(); 5474 Value *Ops[2]; 5475 for (unsigned i = 0; i < 2; i++) 5476 Ops[i] = EmitScalarExpr(E->getArg(i)); 5477 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); 5478 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); 5479 StringRef Name = FD->getName(); 5480 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); 5481 } 5482 5483 if ((BuiltinID == AArch64::BI__builtin_arm_ldrex || 5484 BuiltinID == AArch64::BI__builtin_arm_ldaex) && 5485 getContext().getTypeSize(E->getType()) == 128) { 5486 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex 5487 ? Intrinsic::aarch64_ldaxp 5488 : Intrinsic::aarch64_ldxp); 5489 5490 Value *LdPtr = EmitScalarExpr(E->getArg(0)); 5491 Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy), 5492 "ldxp"); 5493 5494 Value *Val0 = Builder.CreateExtractValue(Val, 1); 5495 Value *Val1 = Builder.CreateExtractValue(Val, 0); 5496 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); 5497 Val0 = Builder.CreateZExt(Val0, Int128Ty); 5498 Val1 = Builder.CreateZExt(Val1, Int128Ty); 5499 5500 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64); 5501 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */); 5502 Val = Builder.CreateOr(Val, Val1); 5503 return Builder.CreateBitCast(Val, ConvertType(E->getType())); 5504 } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex || 5505 BuiltinID == AArch64::BI__builtin_arm_ldaex) { 5506 Value *LoadAddr = EmitScalarExpr(E->getArg(0)); 5507 5508 QualType Ty = E->getType(); 5509 llvm::Type *RealResTy = ConvertType(Ty); 5510 llvm::Type *PtrTy = llvm::IntegerType::get( 5511 getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo(); 5512 LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy); 5513 5514 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex 5515 ? Intrinsic::aarch64_ldaxr 5516 : Intrinsic::aarch64_ldxr, 5517 PtrTy); 5518 Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr"); 5519 5520 if (RealResTy->isPointerTy()) 5521 return Builder.CreateIntToPtr(Val, RealResTy); 5522 5523 llvm::Type *IntResTy = llvm::IntegerType::get( 5524 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy)); 5525 Val = Builder.CreateTruncOrBitCast(Val, IntResTy); 5526 return Builder.CreateBitCast(Val, RealResTy); 5527 } 5528 5529 if ((BuiltinID == AArch64::BI__builtin_arm_strex || 5530 BuiltinID == AArch64::BI__builtin_arm_stlex) && 5531 getContext().getTypeSize(E->getArg(0)->getType()) == 128) { 5532 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex 5533 ? Intrinsic::aarch64_stlxp 5534 : Intrinsic::aarch64_stxp); 5535 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty); 5536 5537 Address Tmp = CreateMemTemp(E->getArg(0)->getType()); 5538 EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true); 5539 5540 Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy)); 5541 llvm::Value *Val = Builder.CreateLoad(Tmp); 5542 5543 Value *Arg0 = Builder.CreateExtractValue(Val, 0); 5544 Value *Arg1 = Builder.CreateExtractValue(Val, 1); 5545 Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), 5546 Int8PtrTy); 5547 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp"); 5548 } 5549 5550 if (BuiltinID == AArch64::BI__builtin_arm_strex || 5551 BuiltinID == AArch64::BI__builtin_arm_stlex) { 5552 Value *StoreVal = EmitScalarExpr(E->getArg(0)); 5553 Value *StoreAddr = EmitScalarExpr(E->getArg(1)); 5554 5555 QualType Ty = E->getArg(0)->getType(); 5556 llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(), 5557 getContext().getTypeSize(Ty)); 5558 StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo()); 5559 5560 if (StoreVal->getType()->isPointerTy()) 5561 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty); 5562 else { 5563 llvm::Type *IntTy = llvm::IntegerType::get( 5564 getLLVMContext(), 5565 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType())); 5566 StoreVal = Builder.CreateBitCast(StoreVal, IntTy); 5567 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty); 5568 } 5569 5570 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex 5571 ? Intrinsic::aarch64_stlxr 5572 : Intrinsic::aarch64_stxr, 5573 StoreAddr->getType()); 5574 return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr"); 5575 } 5576 5577 if (BuiltinID == AArch64::BI__builtin_arm_clrex) { 5578 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex); 5579 return Builder.CreateCall(F); 5580 } 5581 5582 // CRC32 5583 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; 5584 switch (BuiltinID) { 5585 case AArch64::BI__builtin_arm_crc32b: 5586 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break; 5587 case AArch64::BI__builtin_arm_crc32cb: 5588 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break; 5589 case AArch64::BI__builtin_arm_crc32h: 5590 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break; 5591 case AArch64::BI__builtin_arm_crc32ch: 5592 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break; 5593 case AArch64::BI__builtin_arm_crc32w: 5594 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break; 5595 case AArch64::BI__builtin_arm_crc32cw: 5596 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break; 5597 case AArch64::BI__builtin_arm_crc32d: 5598 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break; 5599 case AArch64::BI__builtin_arm_crc32cd: 5600 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break; 5601 } 5602 5603 if (CRCIntrinsicID != Intrinsic::not_intrinsic) { 5604 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 5605 Value *Arg1 = EmitScalarExpr(E->getArg(1)); 5606 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 5607 5608 llvm::Type *DataTy = F->getFunctionType()->getParamType(1); 5609 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy); 5610 5611 return Builder.CreateCall(F, {Arg0, Arg1}); 5612 } 5613 5614 if (BuiltinID == AArch64::BI__builtin_arm_rsr || 5615 BuiltinID == AArch64::BI__builtin_arm_rsr64 || 5616 BuiltinID == AArch64::BI__builtin_arm_rsrp || 5617 BuiltinID == AArch64::BI__builtin_arm_wsr || 5618 BuiltinID == AArch64::BI__builtin_arm_wsr64 || 5619 BuiltinID == AArch64::BI__builtin_arm_wsrp) { 5620 5621 bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr || 5622 BuiltinID == AArch64::BI__builtin_arm_rsr64 || 5623 BuiltinID == AArch64::BI__builtin_arm_rsrp; 5624 5625 bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp || 5626 BuiltinID == AArch64::BI__builtin_arm_wsrp; 5627 5628 bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr && 5629 BuiltinID != AArch64::BI__builtin_arm_wsr; 5630 5631 llvm::Type *ValueType; 5632 llvm::Type *RegisterType = Int64Ty; 5633 if (IsPointerBuiltin) { 5634 ValueType = VoidPtrTy; 5635 } else if (Is64Bit) { 5636 ValueType = Int64Ty; 5637 } else { 5638 ValueType = Int32Ty; 5639 } 5640 5641 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead); 5642 } 5643 5644 // Find out if any arguments are required to be integer constant 5645 // expressions. 5646 unsigned ICEArguments = 0; 5647 ASTContext::GetBuiltinTypeError Error; 5648 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 5649 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 5650 5651 llvm::SmallVector<Value*, 4> Ops; 5652 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) { 5653 if ((ICEArguments & (1 << i)) == 0) { 5654 Ops.push_back(EmitScalarExpr(E->getArg(i))); 5655 } else { 5656 // If this is required to be a constant, constant fold it so that we know 5657 // that the generated intrinsic gets a ConstantInt. 5658 llvm::APSInt Result; 5659 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); 5660 assert(IsConst && "Constant arg isn't actually constant?"); 5661 (void)IsConst; 5662 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); 5663 } 5664 } 5665 5666 auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap); 5667 const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap( 5668 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted); 5669 5670 if (Builtin) { 5671 Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1))); 5672 Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E); 5673 assert(Result && "SISD intrinsic should have been handled"); 5674 return Result; 5675 } 5676 5677 llvm::APSInt Result; 5678 const Expr *Arg = E->getArg(E->getNumArgs()-1); 5679 NeonTypeFlags Type(0); 5680 if (Arg->isIntegerConstantExpr(Result, getContext())) 5681 // Determine the type of this overloaded NEON intrinsic. 5682 Type = NeonTypeFlags(Result.getZExtValue()); 5683 5684 bool usgn = Type.isUnsigned(); 5685 bool quad = Type.isQuad(); 5686 5687 // Handle non-overloaded intrinsics first. 5688 switch (BuiltinID) { 5689 default: break; 5690 case NEON::BI__builtin_neon_vldrq_p128: { 5691 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128); 5692 llvm::Type *Int128PTy = llvm::PointerType::get(Int128Ty, 0); 5693 Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy); 5694 return Builder.CreateAlignedLoad(Int128Ty, Ptr, 5695 CharUnits::fromQuantity(16)); 5696 } 5697 case NEON::BI__builtin_neon_vstrq_p128: { 5698 llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128); 5699 Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy); 5700 return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr); 5701 } 5702 case NEON::BI__builtin_neon_vcvts_u32_f32: 5703 case NEON::BI__builtin_neon_vcvtd_u64_f64: 5704 usgn = true; 5705 // FALL THROUGH 5706 case NEON::BI__builtin_neon_vcvts_s32_f32: 5707 case NEON::BI__builtin_neon_vcvtd_s64_f64: { 5708 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5709 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64; 5710 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty; 5711 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy; 5712 Ops[0] = Builder.CreateBitCast(Ops[0], FTy); 5713 if (usgn) 5714 return Builder.CreateFPToUI(Ops[0], InTy); 5715 return Builder.CreateFPToSI(Ops[0], InTy); 5716 } 5717 case NEON::BI__builtin_neon_vcvts_f32_u32: 5718 case NEON::BI__builtin_neon_vcvtd_f64_u64: 5719 usgn = true; 5720 // FALL THROUGH 5721 case NEON::BI__builtin_neon_vcvts_f32_s32: 5722 case NEON::BI__builtin_neon_vcvtd_f64_s64: { 5723 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5724 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64; 5725 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty; 5726 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy; 5727 Ops[0] = Builder.CreateBitCast(Ops[0], InTy); 5728 if (usgn) 5729 return Builder.CreateUIToFP(Ops[0], FTy); 5730 return Builder.CreateSIToFP(Ops[0], FTy); 5731 } 5732 case NEON::BI__builtin_neon_vpaddd_s64: { 5733 llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2); 5734 Value *Vec = EmitScalarExpr(E->getArg(0)); 5735 // The vector is v2f64, so make sure it's bitcast to that. 5736 Vec = Builder.CreateBitCast(Vec, Ty, "v2i64"); 5737 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 5738 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 5739 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 5740 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 5741 // Pairwise addition of a v2f64 into a scalar f64. 5742 return Builder.CreateAdd(Op0, Op1, "vpaddd"); 5743 } 5744 case NEON::BI__builtin_neon_vpaddd_f64: { 5745 llvm::Type *Ty = 5746 llvm::VectorType::get(DoubleTy, 2); 5747 Value *Vec = EmitScalarExpr(E->getArg(0)); 5748 // The vector is v2f64, so make sure it's bitcast to that. 5749 Vec = Builder.CreateBitCast(Vec, Ty, "v2f64"); 5750 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 5751 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 5752 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 5753 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 5754 // Pairwise addition of a v2f64 into a scalar f64. 5755 return Builder.CreateFAdd(Op0, Op1, "vpaddd"); 5756 } 5757 case NEON::BI__builtin_neon_vpadds_f32: { 5758 llvm::Type *Ty = 5759 llvm::VectorType::get(FloatTy, 2); 5760 Value *Vec = EmitScalarExpr(E->getArg(0)); 5761 // The vector is v2f32, so make sure it's bitcast to that. 5762 Vec = Builder.CreateBitCast(Vec, Ty, "v2f32"); 5763 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 5764 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 5765 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 5766 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 5767 // Pairwise addition of a v2f32 into a scalar f32. 5768 return Builder.CreateFAdd(Op0, Op1, "vpaddd"); 5769 } 5770 case NEON::BI__builtin_neon_vceqzd_s64: 5771 case NEON::BI__builtin_neon_vceqzd_f64: 5772 case NEON::BI__builtin_neon_vceqzs_f32: 5773 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5774 return EmitAArch64CompareBuiltinExpr( 5775 Ops[0], ConvertType(E->getCallReturnType(getContext())), 5776 ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz"); 5777 case NEON::BI__builtin_neon_vcgezd_s64: 5778 case NEON::BI__builtin_neon_vcgezd_f64: 5779 case NEON::BI__builtin_neon_vcgezs_f32: 5780 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5781 return EmitAArch64CompareBuiltinExpr( 5782 Ops[0], ConvertType(E->getCallReturnType(getContext())), 5783 ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez"); 5784 case NEON::BI__builtin_neon_vclezd_s64: 5785 case NEON::BI__builtin_neon_vclezd_f64: 5786 case NEON::BI__builtin_neon_vclezs_f32: 5787 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5788 return EmitAArch64CompareBuiltinExpr( 5789 Ops[0], ConvertType(E->getCallReturnType(getContext())), 5790 ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez"); 5791 case NEON::BI__builtin_neon_vcgtzd_s64: 5792 case NEON::BI__builtin_neon_vcgtzd_f64: 5793 case NEON::BI__builtin_neon_vcgtzs_f32: 5794 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5795 return EmitAArch64CompareBuiltinExpr( 5796 Ops[0], ConvertType(E->getCallReturnType(getContext())), 5797 ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz"); 5798 case NEON::BI__builtin_neon_vcltzd_s64: 5799 case NEON::BI__builtin_neon_vcltzd_f64: 5800 case NEON::BI__builtin_neon_vcltzs_f32: 5801 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5802 return EmitAArch64CompareBuiltinExpr( 5803 Ops[0], ConvertType(E->getCallReturnType(getContext())), 5804 ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz"); 5805 5806 case NEON::BI__builtin_neon_vceqzd_u64: { 5807 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5808 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); 5809 Ops[0] = 5810 Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty)); 5811 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd"); 5812 } 5813 case NEON::BI__builtin_neon_vceqd_f64: 5814 case NEON::BI__builtin_neon_vcled_f64: 5815 case NEON::BI__builtin_neon_vcltd_f64: 5816 case NEON::BI__builtin_neon_vcged_f64: 5817 case NEON::BI__builtin_neon_vcgtd_f64: { 5818 llvm::CmpInst::Predicate P; 5819 switch (BuiltinID) { 5820 default: llvm_unreachable("missing builtin ID in switch!"); 5821 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break; 5822 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break; 5823 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break; 5824 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break; 5825 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break; 5826 } 5827 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5828 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 5829 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); 5830 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); 5831 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd"); 5832 } 5833 case NEON::BI__builtin_neon_vceqs_f32: 5834 case NEON::BI__builtin_neon_vcles_f32: 5835 case NEON::BI__builtin_neon_vclts_f32: 5836 case NEON::BI__builtin_neon_vcges_f32: 5837 case NEON::BI__builtin_neon_vcgts_f32: { 5838 llvm::CmpInst::Predicate P; 5839 switch (BuiltinID) { 5840 default: llvm_unreachable("missing builtin ID in switch!"); 5841 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break; 5842 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break; 5843 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break; 5844 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break; 5845 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break; 5846 } 5847 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5848 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy); 5849 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy); 5850 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); 5851 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd"); 5852 } 5853 case NEON::BI__builtin_neon_vceqd_s64: 5854 case NEON::BI__builtin_neon_vceqd_u64: 5855 case NEON::BI__builtin_neon_vcgtd_s64: 5856 case NEON::BI__builtin_neon_vcgtd_u64: 5857 case NEON::BI__builtin_neon_vcltd_s64: 5858 case NEON::BI__builtin_neon_vcltd_u64: 5859 case NEON::BI__builtin_neon_vcged_u64: 5860 case NEON::BI__builtin_neon_vcged_s64: 5861 case NEON::BI__builtin_neon_vcled_u64: 5862 case NEON::BI__builtin_neon_vcled_s64: { 5863 llvm::CmpInst::Predicate P; 5864 switch (BuiltinID) { 5865 default: llvm_unreachable("missing builtin ID in switch!"); 5866 case NEON::BI__builtin_neon_vceqd_s64: 5867 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break; 5868 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break; 5869 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break; 5870 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break; 5871 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break; 5872 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break; 5873 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break; 5874 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break; 5875 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break; 5876 } 5877 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5878 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); 5879 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 5880 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]); 5881 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd"); 5882 } 5883 case NEON::BI__builtin_neon_vtstd_s64: 5884 case NEON::BI__builtin_neon_vtstd_u64: { 5885 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5886 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); 5887 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 5888 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); 5889 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], 5890 llvm::Constant::getNullValue(Int64Ty)); 5891 return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd"); 5892 } 5893 case NEON::BI__builtin_neon_vset_lane_i8: 5894 case NEON::BI__builtin_neon_vset_lane_i16: 5895 case NEON::BI__builtin_neon_vset_lane_i32: 5896 case NEON::BI__builtin_neon_vset_lane_i64: 5897 case NEON::BI__builtin_neon_vset_lane_f32: 5898 case NEON::BI__builtin_neon_vsetq_lane_i8: 5899 case NEON::BI__builtin_neon_vsetq_lane_i16: 5900 case NEON::BI__builtin_neon_vsetq_lane_i32: 5901 case NEON::BI__builtin_neon_vsetq_lane_i64: 5902 case NEON::BI__builtin_neon_vsetq_lane_f32: 5903 Ops.push_back(EmitScalarExpr(E->getArg(2))); 5904 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 5905 case NEON::BI__builtin_neon_vset_lane_f64: 5906 // The vector type needs a cast for the v1f64 variant. 5907 Ops[1] = Builder.CreateBitCast(Ops[1], 5908 llvm::VectorType::get(DoubleTy, 1)); 5909 Ops.push_back(EmitScalarExpr(E->getArg(2))); 5910 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 5911 case NEON::BI__builtin_neon_vsetq_lane_f64: 5912 // The vector type needs a cast for the v2f64 variant. 5913 Ops[1] = Builder.CreateBitCast(Ops[1], 5914 llvm::VectorType::get(DoubleTy, 2)); 5915 Ops.push_back(EmitScalarExpr(E->getArg(2))); 5916 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 5917 5918 case NEON::BI__builtin_neon_vget_lane_i8: 5919 case NEON::BI__builtin_neon_vdupb_lane_i8: 5920 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8)); 5921 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5922 "vget_lane"); 5923 case NEON::BI__builtin_neon_vgetq_lane_i8: 5924 case NEON::BI__builtin_neon_vdupb_laneq_i8: 5925 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16)); 5926 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5927 "vgetq_lane"); 5928 case NEON::BI__builtin_neon_vget_lane_i16: 5929 case NEON::BI__builtin_neon_vduph_lane_i16: 5930 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4)); 5931 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5932 "vget_lane"); 5933 case NEON::BI__builtin_neon_vgetq_lane_i16: 5934 case NEON::BI__builtin_neon_vduph_laneq_i16: 5935 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8)); 5936 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5937 "vgetq_lane"); 5938 case NEON::BI__builtin_neon_vget_lane_i32: 5939 case NEON::BI__builtin_neon_vdups_lane_i32: 5940 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2)); 5941 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5942 "vget_lane"); 5943 case NEON::BI__builtin_neon_vdups_lane_f32: 5944 Ops[0] = Builder.CreateBitCast(Ops[0], 5945 llvm::VectorType::get(FloatTy, 2)); 5946 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5947 "vdups_lane"); 5948 case NEON::BI__builtin_neon_vgetq_lane_i32: 5949 case NEON::BI__builtin_neon_vdups_laneq_i32: 5950 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4)); 5951 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5952 "vgetq_lane"); 5953 case NEON::BI__builtin_neon_vget_lane_i64: 5954 case NEON::BI__builtin_neon_vdupd_lane_i64: 5955 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1)); 5956 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5957 "vget_lane"); 5958 case NEON::BI__builtin_neon_vdupd_lane_f64: 5959 Ops[0] = Builder.CreateBitCast(Ops[0], 5960 llvm::VectorType::get(DoubleTy, 1)); 5961 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5962 "vdupd_lane"); 5963 case NEON::BI__builtin_neon_vgetq_lane_i64: 5964 case NEON::BI__builtin_neon_vdupd_laneq_i64: 5965 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); 5966 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5967 "vgetq_lane"); 5968 case NEON::BI__builtin_neon_vget_lane_f32: 5969 Ops[0] = Builder.CreateBitCast(Ops[0], 5970 llvm::VectorType::get(FloatTy, 2)); 5971 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5972 "vget_lane"); 5973 case NEON::BI__builtin_neon_vget_lane_f64: 5974 Ops[0] = Builder.CreateBitCast(Ops[0], 5975 llvm::VectorType::get(DoubleTy, 1)); 5976 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5977 "vget_lane"); 5978 case NEON::BI__builtin_neon_vgetq_lane_f32: 5979 case NEON::BI__builtin_neon_vdups_laneq_f32: 5980 Ops[0] = Builder.CreateBitCast(Ops[0], 5981 llvm::VectorType::get(FloatTy, 4)); 5982 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5983 "vgetq_lane"); 5984 case NEON::BI__builtin_neon_vgetq_lane_f64: 5985 case NEON::BI__builtin_neon_vdupd_laneq_f64: 5986 Ops[0] = Builder.CreateBitCast(Ops[0], 5987 llvm::VectorType::get(DoubleTy, 2)); 5988 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5989 "vgetq_lane"); 5990 case NEON::BI__builtin_neon_vaddd_s64: 5991 case NEON::BI__builtin_neon_vaddd_u64: 5992 return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd"); 5993 case NEON::BI__builtin_neon_vsubd_s64: 5994 case NEON::BI__builtin_neon_vsubd_u64: 5995 return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd"); 5996 case NEON::BI__builtin_neon_vqdmlalh_s16: 5997 case NEON::BI__builtin_neon_vqdmlslh_s16: { 5998 SmallVector<Value *, 2> ProductOps; 5999 ProductOps.push_back(vectorWrapScalar16(Ops[1])); 6000 ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2)))); 6001 llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4); 6002 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy), 6003 ProductOps, "vqdmlXl"); 6004 Constant *CI = ConstantInt::get(SizeTy, 0); 6005 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0"); 6006 6007 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16 6008 ? Intrinsic::aarch64_neon_sqadd 6009 : Intrinsic::aarch64_neon_sqsub; 6010 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl"); 6011 } 6012 case NEON::BI__builtin_neon_vqshlud_n_s64: { 6013 Ops.push_back(EmitScalarExpr(E->getArg(1))); 6014 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty); 6015 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty), 6016 Ops, "vqshlu_n"); 6017 } 6018 case NEON::BI__builtin_neon_vqshld_n_u64: 6019 case NEON::BI__builtin_neon_vqshld_n_s64: { 6020 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64 6021 ? Intrinsic::aarch64_neon_uqshl 6022 : Intrinsic::aarch64_neon_sqshl; 6023 Ops.push_back(EmitScalarExpr(E->getArg(1))); 6024 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty); 6025 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n"); 6026 } 6027 case NEON::BI__builtin_neon_vrshrd_n_u64: 6028 case NEON::BI__builtin_neon_vrshrd_n_s64: { 6029 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64 6030 ? Intrinsic::aarch64_neon_urshl 6031 : Intrinsic::aarch64_neon_srshl; 6032 Ops.push_back(EmitScalarExpr(E->getArg(1))); 6033 int SV = cast<ConstantInt>(Ops[1])->getSExtValue(); 6034 Ops[1] = ConstantInt::get(Int64Ty, -SV); 6035 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n"); 6036 } 6037 case NEON::BI__builtin_neon_vrsrad_n_u64: 6038 case NEON::BI__builtin_neon_vrsrad_n_s64: { 6039 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64 6040 ? Intrinsic::aarch64_neon_urshl 6041 : Intrinsic::aarch64_neon_srshl; 6042 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 6043 Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2)))); 6044 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty), 6045 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)}); 6046 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty)); 6047 } 6048 case NEON::BI__builtin_neon_vshld_n_s64: 6049 case NEON::BI__builtin_neon_vshld_n_u64: { 6050 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 6051 return Builder.CreateShl( 6052 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n"); 6053 } 6054 case NEON::BI__builtin_neon_vshrd_n_s64: { 6055 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 6056 return Builder.CreateAShr( 6057 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63), 6058 Amt->getZExtValue())), 6059 "shrd_n"); 6060 } 6061 case NEON::BI__builtin_neon_vshrd_n_u64: { 6062 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 6063 uint64_t ShiftAmt = Amt->getZExtValue(); 6064 // Right-shifting an unsigned value by its size yields 0. 6065 if (ShiftAmt == 64) 6066 return ConstantInt::get(Int64Ty, 0); 6067 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt), 6068 "shrd_n"); 6069 } 6070 case NEON::BI__builtin_neon_vsrad_n_s64: { 6071 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2))); 6072 Ops[1] = Builder.CreateAShr( 6073 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63), 6074 Amt->getZExtValue())), 6075 "shrd_n"); 6076 return Builder.CreateAdd(Ops[0], Ops[1]); 6077 } 6078 case NEON::BI__builtin_neon_vsrad_n_u64: { 6079 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2))); 6080 uint64_t ShiftAmt = Amt->getZExtValue(); 6081 // Right-shifting an unsigned value by its size yields 0. 6082 // As Op + 0 = Op, return Ops[0] directly. 6083 if (ShiftAmt == 64) 6084 return Ops[0]; 6085 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt), 6086 "shrd_n"); 6087 return Builder.CreateAdd(Ops[0], Ops[1]); 6088 } 6089 case NEON::BI__builtin_neon_vqdmlalh_lane_s16: 6090 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16: 6091 case NEON::BI__builtin_neon_vqdmlslh_lane_s16: 6092 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: { 6093 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)), 6094 "lane"); 6095 SmallVector<Value *, 2> ProductOps; 6096 ProductOps.push_back(vectorWrapScalar16(Ops[1])); 6097 ProductOps.push_back(vectorWrapScalar16(Ops[2])); 6098 llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4); 6099 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy), 6100 ProductOps, "vqdmlXl"); 6101 Constant *CI = ConstantInt::get(SizeTy, 0); 6102 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0"); 6103 Ops.pop_back(); 6104 6105 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 || 6106 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16) 6107 ? Intrinsic::aarch64_neon_sqadd 6108 : Intrinsic::aarch64_neon_sqsub; 6109 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl"); 6110 } 6111 case NEON::BI__builtin_neon_vqdmlals_s32: 6112 case NEON::BI__builtin_neon_vqdmlsls_s32: { 6113 SmallVector<Value *, 2> ProductOps; 6114 ProductOps.push_back(Ops[1]); 6115 ProductOps.push_back(EmitScalarExpr(E->getArg(2))); 6116 Ops[1] = 6117 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar), 6118 ProductOps, "vqdmlXl"); 6119 6120 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32 6121 ? Intrinsic::aarch64_neon_sqadd 6122 : Intrinsic::aarch64_neon_sqsub; 6123 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl"); 6124 } 6125 case NEON::BI__builtin_neon_vqdmlals_lane_s32: 6126 case NEON::BI__builtin_neon_vqdmlals_laneq_s32: 6127 case NEON::BI__builtin_neon_vqdmlsls_lane_s32: 6128 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: { 6129 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)), 6130 "lane"); 6131 SmallVector<Value *, 2> ProductOps; 6132 ProductOps.push_back(Ops[1]); 6133 ProductOps.push_back(Ops[2]); 6134 Ops[1] = 6135 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar), 6136 ProductOps, "vqdmlXl"); 6137 Ops.pop_back(); 6138 6139 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 || 6140 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32) 6141 ? Intrinsic::aarch64_neon_sqadd 6142 : Intrinsic::aarch64_neon_sqsub; 6143 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl"); 6144 } 6145 } 6146 6147 llvm::VectorType *VTy = GetNeonType(this, Type); 6148 llvm::Type *Ty = VTy; 6149 if (!Ty) 6150 return nullptr; 6151 6152 // Not all intrinsics handled by the common case work for AArch64 yet, so only 6153 // defer to common code if it's been added to our special map. 6154 Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID, 6155 AArch64SIMDIntrinsicsProvenSorted); 6156 6157 if (Builtin) 6158 return EmitCommonNeonBuiltinExpr( 6159 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic, 6160 Builtin->NameHint, Builtin->TypeModifier, E, Ops, 6161 /*never use addresses*/ Address::invalid(), Address::invalid()); 6162 6163 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops)) 6164 return V; 6165 6166 unsigned Int; 6167 switch (BuiltinID) { 6168 default: return nullptr; 6169 case NEON::BI__builtin_neon_vbsl_v: 6170 case NEON::BI__builtin_neon_vbslq_v: { 6171 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy); 6172 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl"); 6173 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl"); 6174 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl"); 6175 6176 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl"); 6177 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl"); 6178 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl"); 6179 return Builder.CreateBitCast(Ops[0], Ty); 6180 } 6181 case NEON::BI__builtin_neon_vfma_lane_v: 6182 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types 6183 // The ARM builtins (and instructions) have the addend as the first 6184 // operand, but the 'fma' intrinsics have it last. Swap it around here. 6185 Value *Addend = Ops[0]; 6186 Value *Multiplicand = Ops[1]; 6187 Value *LaneSource = Ops[2]; 6188 Ops[0] = Multiplicand; 6189 Ops[1] = LaneSource; 6190 Ops[2] = Addend; 6191 6192 // Now adjust things to handle the lane access. 6193 llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ? 6194 llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) : 6195 VTy; 6196 llvm::Constant *cst = cast<Constant>(Ops[3]); 6197 Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst); 6198 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy); 6199 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane"); 6200 6201 Ops.pop_back(); 6202 Int = Intrinsic::fma; 6203 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla"); 6204 } 6205 case NEON::BI__builtin_neon_vfma_laneq_v: { 6206 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 6207 // v1f64 fma should be mapped to Neon scalar f64 fma 6208 if (VTy && VTy->getElementType() == DoubleTy) { 6209 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 6210 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); 6211 llvm::Type *VTy = GetNeonType(this, 6212 NeonTypeFlags(NeonTypeFlags::Float64, false, true)); 6213 Ops[2] = Builder.CreateBitCast(Ops[2], VTy); 6214 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); 6215 Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy); 6216 Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); 6217 return Builder.CreateBitCast(Result, Ty); 6218 } 6219 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 6220 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6221 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6222 6223 llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(), 6224 VTy->getNumElements() * 2); 6225 Ops[2] = Builder.CreateBitCast(Ops[2], STy); 6226 Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), 6227 cast<ConstantInt>(Ops[3])); 6228 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane"); 6229 6230 return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]}); 6231 } 6232 case NEON::BI__builtin_neon_vfmaq_laneq_v: { 6233 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 6234 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6235 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6236 6237 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 6238 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3])); 6239 return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]}); 6240 } 6241 case NEON::BI__builtin_neon_vfmas_lane_f32: 6242 case NEON::BI__builtin_neon_vfmas_laneq_f32: 6243 case NEON::BI__builtin_neon_vfmad_lane_f64: 6244 case NEON::BI__builtin_neon_vfmad_laneq_f64: { 6245 Ops.push_back(EmitScalarExpr(E->getArg(3))); 6246 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext())); 6247 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 6248 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); 6249 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); 6250 } 6251 case NEON::BI__builtin_neon_vmull_v: 6252 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 6253 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull; 6254 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull; 6255 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull"); 6256 case NEON::BI__builtin_neon_vmax_v: 6257 case NEON::BI__builtin_neon_vmaxq_v: 6258 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 6259 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax; 6260 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax; 6261 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax"); 6262 case NEON::BI__builtin_neon_vmin_v: 6263 case NEON::BI__builtin_neon_vminq_v: 6264 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 6265 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin; 6266 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin; 6267 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin"); 6268 case NEON::BI__builtin_neon_vabd_v: 6269 case NEON::BI__builtin_neon_vabdq_v: 6270 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 6271 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd; 6272 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd; 6273 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd"); 6274 case NEON::BI__builtin_neon_vpadal_v: 6275 case NEON::BI__builtin_neon_vpadalq_v: { 6276 unsigned ArgElts = VTy->getNumElements(); 6277 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType()); 6278 unsigned BitWidth = EltTy->getBitWidth(); 6279 llvm::Type *ArgTy = llvm::VectorType::get( 6280 llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts); 6281 llvm::Type* Tys[2] = { VTy, ArgTy }; 6282 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp; 6283 SmallVector<llvm::Value*, 1> TmpOps; 6284 TmpOps.push_back(Ops[1]); 6285 Function *F = CGM.getIntrinsic(Int, Tys); 6286 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal"); 6287 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType()); 6288 return Builder.CreateAdd(tmp, addend); 6289 } 6290 case NEON::BI__builtin_neon_vpmin_v: 6291 case NEON::BI__builtin_neon_vpminq_v: 6292 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 6293 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp; 6294 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp; 6295 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin"); 6296 case NEON::BI__builtin_neon_vpmax_v: 6297 case NEON::BI__builtin_neon_vpmaxq_v: 6298 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 6299 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp; 6300 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp; 6301 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax"); 6302 case NEON::BI__builtin_neon_vminnm_v: 6303 case NEON::BI__builtin_neon_vminnmq_v: 6304 Int = Intrinsic::aarch64_neon_fminnm; 6305 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm"); 6306 case NEON::BI__builtin_neon_vmaxnm_v: 6307 case NEON::BI__builtin_neon_vmaxnmq_v: 6308 Int = Intrinsic::aarch64_neon_fmaxnm; 6309 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm"); 6310 case NEON::BI__builtin_neon_vrecpss_f32: { 6311 Ops.push_back(EmitScalarExpr(E->getArg(1))); 6312 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy), 6313 Ops, "vrecps"); 6314 } 6315 case NEON::BI__builtin_neon_vrecpsd_f64: { 6316 Ops.push_back(EmitScalarExpr(E->getArg(1))); 6317 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy), 6318 Ops, "vrecps"); 6319 } 6320 case NEON::BI__builtin_neon_vqshrun_n_v: 6321 Int = Intrinsic::aarch64_neon_sqshrun; 6322 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n"); 6323 case NEON::BI__builtin_neon_vqrshrun_n_v: 6324 Int = Intrinsic::aarch64_neon_sqrshrun; 6325 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n"); 6326 case NEON::BI__builtin_neon_vqshrn_n_v: 6327 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn; 6328 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n"); 6329 case NEON::BI__builtin_neon_vrshrn_n_v: 6330 Int = Intrinsic::aarch64_neon_rshrn; 6331 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n"); 6332 case NEON::BI__builtin_neon_vqrshrn_n_v: 6333 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn; 6334 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n"); 6335 case NEON::BI__builtin_neon_vrnda_v: 6336 case NEON::BI__builtin_neon_vrndaq_v: { 6337 Int = Intrinsic::round; 6338 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda"); 6339 } 6340 case NEON::BI__builtin_neon_vrndi_v: 6341 case NEON::BI__builtin_neon_vrndiq_v: { 6342 Int = Intrinsic::nearbyint; 6343 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi"); 6344 } 6345 case NEON::BI__builtin_neon_vrndm_v: 6346 case NEON::BI__builtin_neon_vrndmq_v: { 6347 Int = Intrinsic::floor; 6348 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm"); 6349 } 6350 case NEON::BI__builtin_neon_vrndn_v: 6351 case NEON::BI__builtin_neon_vrndnq_v: { 6352 Int = Intrinsic::aarch64_neon_frintn; 6353 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn"); 6354 } 6355 case NEON::BI__builtin_neon_vrndp_v: 6356 case NEON::BI__builtin_neon_vrndpq_v: { 6357 Int = Intrinsic::ceil; 6358 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp"); 6359 } 6360 case NEON::BI__builtin_neon_vrndx_v: 6361 case NEON::BI__builtin_neon_vrndxq_v: { 6362 Int = Intrinsic::rint; 6363 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx"); 6364 } 6365 case NEON::BI__builtin_neon_vrnd_v: 6366 case NEON::BI__builtin_neon_vrndq_v: { 6367 Int = Intrinsic::trunc; 6368 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz"); 6369 } 6370 case NEON::BI__builtin_neon_vceqz_v: 6371 case NEON::BI__builtin_neon_vceqzq_v: 6372 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ, 6373 ICmpInst::ICMP_EQ, "vceqz"); 6374 case NEON::BI__builtin_neon_vcgez_v: 6375 case NEON::BI__builtin_neon_vcgezq_v: 6376 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE, 6377 ICmpInst::ICMP_SGE, "vcgez"); 6378 case NEON::BI__builtin_neon_vclez_v: 6379 case NEON::BI__builtin_neon_vclezq_v: 6380 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE, 6381 ICmpInst::ICMP_SLE, "vclez"); 6382 case NEON::BI__builtin_neon_vcgtz_v: 6383 case NEON::BI__builtin_neon_vcgtzq_v: 6384 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT, 6385 ICmpInst::ICMP_SGT, "vcgtz"); 6386 case NEON::BI__builtin_neon_vcltz_v: 6387 case NEON::BI__builtin_neon_vcltzq_v: 6388 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT, 6389 ICmpInst::ICMP_SLT, "vcltz"); 6390 case NEON::BI__builtin_neon_vcvt_f64_v: 6391 case NEON::BI__builtin_neon_vcvtq_f64_v: 6392 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6393 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad)); 6394 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 6395 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 6396 case NEON::BI__builtin_neon_vcvt_f64_f32: { 6397 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad && 6398 "unexpected vcvt_f64_f32 builtin"); 6399 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false); 6400 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag)); 6401 6402 return Builder.CreateFPExt(Ops[0], Ty, "vcvt"); 6403 } 6404 case NEON::BI__builtin_neon_vcvt_f32_f64: { 6405 assert(Type.getEltType() == NeonTypeFlags::Float32 && 6406 "unexpected vcvt_f32_f64 builtin"); 6407 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true); 6408 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag)); 6409 6410 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt"); 6411 } 6412 case NEON::BI__builtin_neon_vcvt_s32_v: 6413 case NEON::BI__builtin_neon_vcvt_u32_v: 6414 case NEON::BI__builtin_neon_vcvt_s64_v: 6415 case NEON::BI__builtin_neon_vcvt_u64_v: 6416 case NEON::BI__builtin_neon_vcvtq_s32_v: 6417 case NEON::BI__builtin_neon_vcvtq_u32_v: 6418 case NEON::BI__builtin_neon_vcvtq_s64_v: 6419 case NEON::BI__builtin_neon_vcvtq_u64_v: { 6420 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type)); 6421 if (usgn) 6422 return Builder.CreateFPToUI(Ops[0], Ty); 6423 return Builder.CreateFPToSI(Ops[0], Ty); 6424 } 6425 case NEON::BI__builtin_neon_vcvta_s32_v: 6426 case NEON::BI__builtin_neon_vcvtaq_s32_v: 6427 case NEON::BI__builtin_neon_vcvta_u32_v: 6428 case NEON::BI__builtin_neon_vcvtaq_u32_v: 6429 case NEON::BI__builtin_neon_vcvta_s64_v: 6430 case NEON::BI__builtin_neon_vcvtaq_s64_v: 6431 case NEON::BI__builtin_neon_vcvta_u64_v: 6432 case NEON::BI__builtin_neon_vcvtaq_u64_v: { 6433 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas; 6434 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 6435 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta"); 6436 } 6437 case NEON::BI__builtin_neon_vcvtm_s32_v: 6438 case NEON::BI__builtin_neon_vcvtmq_s32_v: 6439 case NEON::BI__builtin_neon_vcvtm_u32_v: 6440 case NEON::BI__builtin_neon_vcvtmq_u32_v: 6441 case NEON::BI__builtin_neon_vcvtm_s64_v: 6442 case NEON::BI__builtin_neon_vcvtmq_s64_v: 6443 case NEON::BI__builtin_neon_vcvtm_u64_v: 6444 case NEON::BI__builtin_neon_vcvtmq_u64_v: { 6445 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms; 6446 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 6447 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm"); 6448 } 6449 case NEON::BI__builtin_neon_vcvtn_s32_v: 6450 case NEON::BI__builtin_neon_vcvtnq_s32_v: 6451 case NEON::BI__builtin_neon_vcvtn_u32_v: 6452 case NEON::BI__builtin_neon_vcvtnq_u32_v: 6453 case NEON::BI__builtin_neon_vcvtn_s64_v: 6454 case NEON::BI__builtin_neon_vcvtnq_s64_v: 6455 case NEON::BI__builtin_neon_vcvtn_u64_v: 6456 case NEON::BI__builtin_neon_vcvtnq_u64_v: { 6457 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns; 6458 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 6459 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn"); 6460 } 6461 case NEON::BI__builtin_neon_vcvtp_s32_v: 6462 case NEON::BI__builtin_neon_vcvtpq_s32_v: 6463 case NEON::BI__builtin_neon_vcvtp_u32_v: 6464 case NEON::BI__builtin_neon_vcvtpq_u32_v: 6465 case NEON::BI__builtin_neon_vcvtp_s64_v: 6466 case NEON::BI__builtin_neon_vcvtpq_s64_v: 6467 case NEON::BI__builtin_neon_vcvtp_u64_v: 6468 case NEON::BI__builtin_neon_vcvtpq_u64_v: { 6469 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps; 6470 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 6471 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp"); 6472 } 6473 case NEON::BI__builtin_neon_vmulx_v: 6474 case NEON::BI__builtin_neon_vmulxq_v: { 6475 Int = Intrinsic::aarch64_neon_fmulx; 6476 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx"); 6477 } 6478 case NEON::BI__builtin_neon_vmul_lane_v: 6479 case NEON::BI__builtin_neon_vmul_laneq_v: { 6480 // v1f64 vmul_lane should be mapped to Neon scalar mul lane 6481 bool Quad = false; 6482 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v) 6483 Quad = true; 6484 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 6485 llvm::Type *VTy = GetNeonType(this, 6486 NeonTypeFlags(NeonTypeFlags::Float64, false, Quad)); 6487 Ops[1] = Builder.CreateBitCast(Ops[1], VTy); 6488 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract"); 6489 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]); 6490 return Builder.CreateBitCast(Result, Ty); 6491 } 6492 case NEON::BI__builtin_neon_vnegd_s64: 6493 return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd"); 6494 case NEON::BI__builtin_neon_vpmaxnm_v: 6495 case NEON::BI__builtin_neon_vpmaxnmq_v: { 6496 Int = Intrinsic::aarch64_neon_fmaxnmp; 6497 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm"); 6498 } 6499 case NEON::BI__builtin_neon_vpminnm_v: 6500 case NEON::BI__builtin_neon_vpminnmq_v: { 6501 Int = Intrinsic::aarch64_neon_fminnmp; 6502 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm"); 6503 } 6504 case NEON::BI__builtin_neon_vsqrt_v: 6505 case NEON::BI__builtin_neon_vsqrtq_v: { 6506 Int = Intrinsic::sqrt; 6507 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6508 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt"); 6509 } 6510 case NEON::BI__builtin_neon_vrbit_v: 6511 case NEON::BI__builtin_neon_vrbitq_v: { 6512 Int = Intrinsic::aarch64_neon_rbit; 6513 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit"); 6514 } 6515 case NEON::BI__builtin_neon_vaddv_u8: 6516 // FIXME: These are handled by the AArch64 scalar code. 6517 usgn = true; 6518 // FALLTHROUGH 6519 case NEON::BI__builtin_neon_vaddv_s8: { 6520 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 6521 Ty = Int32Ty; 6522 VTy = llvm::VectorType::get(Int8Ty, 8); 6523 llvm::Type *Tys[2] = { Ty, VTy }; 6524 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6525 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 6526 return Builder.CreateTrunc(Ops[0], Int8Ty); 6527 } 6528 case NEON::BI__builtin_neon_vaddv_u16: 6529 usgn = true; 6530 // FALLTHROUGH 6531 case NEON::BI__builtin_neon_vaddv_s16: { 6532 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 6533 Ty = Int32Ty; 6534 VTy = llvm::VectorType::get(Int16Ty, 4); 6535 llvm::Type *Tys[2] = { Ty, VTy }; 6536 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6537 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 6538 return Builder.CreateTrunc(Ops[0], Int16Ty); 6539 } 6540 case NEON::BI__builtin_neon_vaddvq_u8: 6541 usgn = true; 6542 // FALLTHROUGH 6543 case NEON::BI__builtin_neon_vaddvq_s8: { 6544 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 6545 Ty = Int32Ty; 6546 VTy = llvm::VectorType::get(Int8Ty, 16); 6547 llvm::Type *Tys[2] = { Ty, VTy }; 6548 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6549 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 6550 return Builder.CreateTrunc(Ops[0], Int8Ty); 6551 } 6552 case NEON::BI__builtin_neon_vaddvq_u16: 6553 usgn = true; 6554 // FALLTHROUGH 6555 case NEON::BI__builtin_neon_vaddvq_s16: { 6556 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 6557 Ty = Int32Ty; 6558 VTy = llvm::VectorType::get(Int16Ty, 8); 6559 llvm::Type *Tys[2] = { Ty, VTy }; 6560 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6561 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 6562 return Builder.CreateTrunc(Ops[0], Int16Ty); 6563 } 6564 case NEON::BI__builtin_neon_vmaxv_u8: { 6565 Int = Intrinsic::aarch64_neon_umaxv; 6566 Ty = Int32Ty; 6567 VTy = llvm::VectorType::get(Int8Ty, 8); 6568 llvm::Type *Tys[2] = { Ty, VTy }; 6569 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6570 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6571 return Builder.CreateTrunc(Ops[0], Int8Ty); 6572 } 6573 case NEON::BI__builtin_neon_vmaxv_u16: { 6574 Int = Intrinsic::aarch64_neon_umaxv; 6575 Ty = Int32Ty; 6576 VTy = llvm::VectorType::get(Int16Ty, 4); 6577 llvm::Type *Tys[2] = { Ty, VTy }; 6578 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6579 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6580 return Builder.CreateTrunc(Ops[0], Int16Ty); 6581 } 6582 case NEON::BI__builtin_neon_vmaxvq_u8: { 6583 Int = Intrinsic::aarch64_neon_umaxv; 6584 Ty = Int32Ty; 6585 VTy = llvm::VectorType::get(Int8Ty, 16); 6586 llvm::Type *Tys[2] = { Ty, VTy }; 6587 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6588 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6589 return Builder.CreateTrunc(Ops[0], Int8Ty); 6590 } 6591 case NEON::BI__builtin_neon_vmaxvq_u16: { 6592 Int = Intrinsic::aarch64_neon_umaxv; 6593 Ty = Int32Ty; 6594 VTy = llvm::VectorType::get(Int16Ty, 8); 6595 llvm::Type *Tys[2] = { Ty, VTy }; 6596 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6597 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6598 return Builder.CreateTrunc(Ops[0], Int16Ty); 6599 } 6600 case NEON::BI__builtin_neon_vmaxv_s8: { 6601 Int = Intrinsic::aarch64_neon_smaxv; 6602 Ty = Int32Ty; 6603 VTy = llvm::VectorType::get(Int8Ty, 8); 6604 llvm::Type *Tys[2] = { Ty, VTy }; 6605 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6606 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6607 return Builder.CreateTrunc(Ops[0], Int8Ty); 6608 } 6609 case NEON::BI__builtin_neon_vmaxv_s16: { 6610 Int = Intrinsic::aarch64_neon_smaxv; 6611 Ty = Int32Ty; 6612 VTy = llvm::VectorType::get(Int16Ty, 4); 6613 llvm::Type *Tys[2] = { Ty, VTy }; 6614 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6615 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6616 return Builder.CreateTrunc(Ops[0], Int16Ty); 6617 } 6618 case NEON::BI__builtin_neon_vmaxvq_s8: { 6619 Int = Intrinsic::aarch64_neon_smaxv; 6620 Ty = Int32Ty; 6621 VTy = llvm::VectorType::get(Int8Ty, 16); 6622 llvm::Type *Tys[2] = { Ty, VTy }; 6623 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6624 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6625 return Builder.CreateTrunc(Ops[0], Int8Ty); 6626 } 6627 case NEON::BI__builtin_neon_vmaxvq_s16: { 6628 Int = Intrinsic::aarch64_neon_smaxv; 6629 Ty = Int32Ty; 6630 VTy = llvm::VectorType::get(Int16Ty, 8); 6631 llvm::Type *Tys[2] = { Ty, VTy }; 6632 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6633 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6634 return Builder.CreateTrunc(Ops[0], Int16Ty); 6635 } 6636 case NEON::BI__builtin_neon_vminv_u8: { 6637 Int = Intrinsic::aarch64_neon_uminv; 6638 Ty = Int32Ty; 6639 VTy = llvm::VectorType::get(Int8Ty, 8); 6640 llvm::Type *Tys[2] = { Ty, VTy }; 6641 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6642 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6643 return Builder.CreateTrunc(Ops[0], Int8Ty); 6644 } 6645 case NEON::BI__builtin_neon_vminv_u16: { 6646 Int = Intrinsic::aarch64_neon_uminv; 6647 Ty = Int32Ty; 6648 VTy = llvm::VectorType::get(Int16Ty, 4); 6649 llvm::Type *Tys[2] = { Ty, VTy }; 6650 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6651 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6652 return Builder.CreateTrunc(Ops[0], Int16Ty); 6653 } 6654 case NEON::BI__builtin_neon_vminvq_u8: { 6655 Int = Intrinsic::aarch64_neon_uminv; 6656 Ty = Int32Ty; 6657 VTy = llvm::VectorType::get(Int8Ty, 16); 6658 llvm::Type *Tys[2] = { Ty, VTy }; 6659 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6660 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6661 return Builder.CreateTrunc(Ops[0], Int8Ty); 6662 } 6663 case NEON::BI__builtin_neon_vminvq_u16: { 6664 Int = Intrinsic::aarch64_neon_uminv; 6665 Ty = Int32Ty; 6666 VTy = llvm::VectorType::get(Int16Ty, 8); 6667 llvm::Type *Tys[2] = { Ty, VTy }; 6668 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6669 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6670 return Builder.CreateTrunc(Ops[0], Int16Ty); 6671 } 6672 case NEON::BI__builtin_neon_vminv_s8: { 6673 Int = Intrinsic::aarch64_neon_sminv; 6674 Ty = Int32Ty; 6675 VTy = llvm::VectorType::get(Int8Ty, 8); 6676 llvm::Type *Tys[2] = { Ty, VTy }; 6677 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6678 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6679 return Builder.CreateTrunc(Ops[0], Int8Ty); 6680 } 6681 case NEON::BI__builtin_neon_vminv_s16: { 6682 Int = Intrinsic::aarch64_neon_sminv; 6683 Ty = Int32Ty; 6684 VTy = llvm::VectorType::get(Int16Ty, 4); 6685 llvm::Type *Tys[2] = { Ty, VTy }; 6686 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6687 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6688 return Builder.CreateTrunc(Ops[0], Int16Ty); 6689 } 6690 case NEON::BI__builtin_neon_vminvq_s8: { 6691 Int = Intrinsic::aarch64_neon_sminv; 6692 Ty = Int32Ty; 6693 VTy = llvm::VectorType::get(Int8Ty, 16); 6694 llvm::Type *Tys[2] = { Ty, VTy }; 6695 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6696 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6697 return Builder.CreateTrunc(Ops[0], Int8Ty); 6698 } 6699 case NEON::BI__builtin_neon_vminvq_s16: { 6700 Int = Intrinsic::aarch64_neon_sminv; 6701 Ty = Int32Ty; 6702 VTy = llvm::VectorType::get(Int16Ty, 8); 6703 llvm::Type *Tys[2] = { Ty, VTy }; 6704 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6705 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6706 return Builder.CreateTrunc(Ops[0], Int16Ty); 6707 } 6708 case NEON::BI__builtin_neon_vmul_n_f64: { 6709 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 6710 Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy); 6711 return Builder.CreateFMul(Ops[0], RHS); 6712 } 6713 case NEON::BI__builtin_neon_vaddlv_u8: { 6714 Int = Intrinsic::aarch64_neon_uaddlv; 6715 Ty = Int32Ty; 6716 VTy = llvm::VectorType::get(Int8Ty, 8); 6717 llvm::Type *Tys[2] = { Ty, VTy }; 6718 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6719 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6720 return Builder.CreateTrunc(Ops[0], Int16Ty); 6721 } 6722 case NEON::BI__builtin_neon_vaddlv_u16: { 6723 Int = Intrinsic::aarch64_neon_uaddlv; 6724 Ty = Int32Ty; 6725 VTy = llvm::VectorType::get(Int16Ty, 4); 6726 llvm::Type *Tys[2] = { Ty, VTy }; 6727 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6728 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6729 } 6730 case NEON::BI__builtin_neon_vaddlvq_u8: { 6731 Int = Intrinsic::aarch64_neon_uaddlv; 6732 Ty = Int32Ty; 6733 VTy = llvm::VectorType::get(Int8Ty, 16); 6734 llvm::Type *Tys[2] = { Ty, VTy }; 6735 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6736 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6737 return Builder.CreateTrunc(Ops[0], Int16Ty); 6738 } 6739 case NEON::BI__builtin_neon_vaddlvq_u16: { 6740 Int = Intrinsic::aarch64_neon_uaddlv; 6741 Ty = Int32Ty; 6742 VTy = llvm::VectorType::get(Int16Ty, 8); 6743 llvm::Type *Tys[2] = { Ty, VTy }; 6744 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6745 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6746 } 6747 case NEON::BI__builtin_neon_vaddlv_s8: { 6748 Int = Intrinsic::aarch64_neon_saddlv; 6749 Ty = Int32Ty; 6750 VTy = llvm::VectorType::get(Int8Ty, 8); 6751 llvm::Type *Tys[2] = { Ty, VTy }; 6752 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6753 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6754 return Builder.CreateTrunc(Ops[0], Int16Ty); 6755 } 6756 case NEON::BI__builtin_neon_vaddlv_s16: { 6757 Int = Intrinsic::aarch64_neon_saddlv; 6758 Ty = Int32Ty; 6759 VTy = llvm::VectorType::get(Int16Ty, 4); 6760 llvm::Type *Tys[2] = { Ty, VTy }; 6761 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6762 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6763 } 6764 case NEON::BI__builtin_neon_vaddlvq_s8: { 6765 Int = Intrinsic::aarch64_neon_saddlv; 6766 Ty = Int32Ty; 6767 VTy = llvm::VectorType::get(Int8Ty, 16); 6768 llvm::Type *Tys[2] = { Ty, VTy }; 6769 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6770 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6771 return Builder.CreateTrunc(Ops[0], Int16Ty); 6772 } 6773 case NEON::BI__builtin_neon_vaddlvq_s16: { 6774 Int = Intrinsic::aarch64_neon_saddlv; 6775 Ty = Int32Ty; 6776 VTy = llvm::VectorType::get(Int16Ty, 8); 6777 llvm::Type *Tys[2] = { Ty, VTy }; 6778 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6779 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6780 } 6781 case NEON::BI__builtin_neon_vsri_n_v: 6782 case NEON::BI__builtin_neon_vsriq_n_v: { 6783 Int = Intrinsic::aarch64_neon_vsri; 6784 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty); 6785 return EmitNeonCall(Intrin, Ops, "vsri_n"); 6786 } 6787 case NEON::BI__builtin_neon_vsli_n_v: 6788 case NEON::BI__builtin_neon_vsliq_n_v: { 6789 Int = Intrinsic::aarch64_neon_vsli; 6790 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty); 6791 return EmitNeonCall(Intrin, Ops, "vsli_n"); 6792 } 6793 case NEON::BI__builtin_neon_vsra_n_v: 6794 case NEON::BI__builtin_neon_vsraq_n_v: 6795 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6796 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n"); 6797 return Builder.CreateAdd(Ops[0], Ops[1]); 6798 case NEON::BI__builtin_neon_vrsra_n_v: 6799 case NEON::BI__builtin_neon_vrsraq_n_v: { 6800 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl; 6801 SmallVector<llvm::Value*,2> TmpOps; 6802 TmpOps.push_back(Ops[1]); 6803 TmpOps.push_back(Ops[2]); 6804 Function* F = CGM.getIntrinsic(Int, Ty); 6805 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true); 6806 Ops[0] = Builder.CreateBitCast(Ops[0], VTy); 6807 return Builder.CreateAdd(Ops[0], tmp); 6808 } 6809 // FIXME: Sharing loads & stores with 32-bit is complicated by the absence 6810 // of an Align parameter here. 6811 case NEON::BI__builtin_neon_vld1_x2_v: 6812 case NEON::BI__builtin_neon_vld1q_x2_v: 6813 case NEON::BI__builtin_neon_vld1_x3_v: 6814 case NEON::BI__builtin_neon_vld1q_x3_v: 6815 case NEON::BI__builtin_neon_vld1_x4_v: 6816 case NEON::BI__builtin_neon_vld1q_x4_v: { 6817 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType()); 6818 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6819 llvm::Type *Tys[2] = { VTy, PTy }; 6820 unsigned Int; 6821 switch (BuiltinID) { 6822 case NEON::BI__builtin_neon_vld1_x2_v: 6823 case NEON::BI__builtin_neon_vld1q_x2_v: 6824 Int = Intrinsic::aarch64_neon_ld1x2; 6825 break; 6826 case NEON::BI__builtin_neon_vld1_x3_v: 6827 case NEON::BI__builtin_neon_vld1q_x3_v: 6828 Int = Intrinsic::aarch64_neon_ld1x3; 6829 break; 6830 case NEON::BI__builtin_neon_vld1_x4_v: 6831 case NEON::BI__builtin_neon_vld1q_x4_v: 6832 Int = Intrinsic::aarch64_neon_ld1x4; 6833 break; 6834 } 6835 Function *F = CGM.getIntrinsic(Int, Tys); 6836 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN"); 6837 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 6838 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6839 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6840 } 6841 case NEON::BI__builtin_neon_vst1_x2_v: 6842 case NEON::BI__builtin_neon_vst1q_x2_v: 6843 case NEON::BI__builtin_neon_vst1_x3_v: 6844 case NEON::BI__builtin_neon_vst1q_x3_v: 6845 case NEON::BI__builtin_neon_vst1_x4_v: 6846 case NEON::BI__builtin_neon_vst1q_x4_v: { 6847 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType()); 6848 llvm::Type *Tys[2] = { VTy, PTy }; 6849 unsigned Int; 6850 switch (BuiltinID) { 6851 case NEON::BI__builtin_neon_vst1_x2_v: 6852 case NEON::BI__builtin_neon_vst1q_x2_v: 6853 Int = Intrinsic::aarch64_neon_st1x2; 6854 break; 6855 case NEON::BI__builtin_neon_vst1_x3_v: 6856 case NEON::BI__builtin_neon_vst1q_x3_v: 6857 Int = Intrinsic::aarch64_neon_st1x3; 6858 break; 6859 case NEON::BI__builtin_neon_vst1_x4_v: 6860 case NEON::BI__builtin_neon_vst1q_x4_v: 6861 Int = Intrinsic::aarch64_neon_st1x4; 6862 break; 6863 } 6864 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end()); 6865 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, ""); 6866 } 6867 case NEON::BI__builtin_neon_vld1_v: 6868 case NEON::BI__builtin_neon_vld1q_v: { 6869 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); 6870 auto Alignment = CharUnits::fromQuantity( 6871 BuiltinID == NEON::BI__builtin_neon_vld1_v ? 8 : 16); 6872 return Builder.CreateAlignedLoad(VTy, Ops[0], Alignment); 6873 } 6874 case NEON::BI__builtin_neon_vst1_v: 6875 case NEON::BI__builtin_neon_vst1q_v: 6876 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); 6877 Ops[1] = Builder.CreateBitCast(Ops[1], VTy); 6878 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6879 case NEON::BI__builtin_neon_vld1_lane_v: 6880 case NEON::BI__builtin_neon_vld1q_lane_v: { 6881 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6882 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 6883 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6884 auto Alignment = CharUnits::fromQuantity( 6885 BuiltinID == NEON::BI__builtin_neon_vld1_lane_v ? 8 : 16); 6886 Ops[0] = 6887 Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment); 6888 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane"); 6889 } 6890 case NEON::BI__builtin_neon_vld1_dup_v: 6891 case NEON::BI__builtin_neon_vld1q_dup_v: { 6892 Value *V = UndefValue::get(Ty); 6893 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 6894 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6895 auto Alignment = CharUnits::fromQuantity( 6896 BuiltinID == NEON::BI__builtin_neon_vld1_dup_v ? 8 : 16); 6897 Ops[0] = 6898 Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment); 6899 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); 6900 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI); 6901 return EmitNeonSplat(Ops[0], CI); 6902 } 6903 case NEON::BI__builtin_neon_vst1_lane_v: 6904 case NEON::BI__builtin_neon_vst1q_lane_v: 6905 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6906 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); 6907 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 6908 return Builder.CreateDefaultAlignedStore(Ops[1], 6909 Builder.CreateBitCast(Ops[0], Ty)); 6910 case NEON::BI__builtin_neon_vld2_v: 6911 case NEON::BI__builtin_neon_vld2q_v: { 6912 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 6913 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6914 llvm::Type *Tys[2] = { VTy, PTy }; 6915 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys); 6916 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); 6917 Ops[0] = Builder.CreateBitCast(Ops[0], 6918 llvm::PointerType::getUnqual(Ops[1]->getType())); 6919 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6920 } 6921 case NEON::BI__builtin_neon_vld3_v: 6922 case NEON::BI__builtin_neon_vld3q_v: { 6923 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 6924 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6925 llvm::Type *Tys[2] = { VTy, PTy }; 6926 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys); 6927 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); 6928 Ops[0] = Builder.CreateBitCast(Ops[0], 6929 llvm::PointerType::getUnqual(Ops[1]->getType())); 6930 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6931 } 6932 case NEON::BI__builtin_neon_vld4_v: 6933 case NEON::BI__builtin_neon_vld4q_v: { 6934 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 6935 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6936 llvm::Type *Tys[2] = { VTy, PTy }; 6937 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys); 6938 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); 6939 Ops[0] = Builder.CreateBitCast(Ops[0], 6940 llvm::PointerType::getUnqual(Ops[1]->getType())); 6941 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6942 } 6943 case NEON::BI__builtin_neon_vld2_dup_v: 6944 case NEON::BI__builtin_neon_vld2q_dup_v: { 6945 llvm::Type *PTy = 6946 llvm::PointerType::getUnqual(VTy->getElementType()); 6947 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6948 llvm::Type *Tys[2] = { VTy, PTy }; 6949 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys); 6950 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); 6951 Ops[0] = Builder.CreateBitCast(Ops[0], 6952 llvm::PointerType::getUnqual(Ops[1]->getType())); 6953 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6954 } 6955 case NEON::BI__builtin_neon_vld3_dup_v: 6956 case NEON::BI__builtin_neon_vld3q_dup_v: { 6957 llvm::Type *PTy = 6958 llvm::PointerType::getUnqual(VTy->getElementType()); 6959 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6960 llvm::Type *Tys[2] = { VTy, PTy }; 6961 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys); 6962 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); 6963 Ops[0] = Builder.CreateBitCast(Ops[0], 6964 llvm::PointerType::getUnqual(Ops[1]->getType())); 6965 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6966 } 6967 case NEON::BI__builtin_neon_vld4_dup_v: 6968 case NEON::BI__builtin_neon_vld4q_dup_v: { 6969 llvm::Type *PTy = 6970 llvm::PointerType::getUnqual(VTy->getElementType()); 6971 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6972 llvm::Type *Tys[2] = { VTy, PTy }; 6973 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys); 6974 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); 6975 Ops[0] = Builder.CreateBitCast(Ops[0], 6976 llvm::PointerType::getUnqual(Ops[1]->getType())); 6977 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6978 } 6979 case NEON::BI__builtin_neon_vld2_lane_v: 6980 case NEON::BI__builtin_neon_vld2q_lane_v: { 6981 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 6982 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys); 6983 Ops.push_back(Ops[1]); 6984 Ops.erase(Ops.begin()+1); 6985 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6986 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 6987 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty); 6988 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane"); 6989 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 6990 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6991 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6992 } 6993 case NEON::BI__builtin_neon_vld3_lane_v: 6994 case NEON::BI__builtin_neon_vld3q_lane_v: { 6995 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 6996 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys); 6997 Ops.push_back(Ops[1]); 6998 Ops.erase(Ops.begin()+1); 6999 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 7000 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 7001 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 7002 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty); 7003 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane"); 7004 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 7005 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 7006 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 7007 } 7008 case NEON::BI__builtin_neon_vld4_lane_v: 7009 case NEON::BI__builtin_neon_vld4q_lane_v: { 7010 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 7011 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys); 7012 Ops.push_back(Ops[1]); 7013 Ops.erase(Ops.begin()+1); 7014 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 7015 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 7016 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 7017 Ops[4] = Builder.CreateBitCast(Ops[4], Ty); 7018 Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty); 7019 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane"); 7020 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 7021 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 7022 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 7023 } 7024 case NEON::BI__builtin_neon_vst2_v: 7025 case NEON::BI__builtin_neon_vst2q_v: { 7026 Ops.push_back(Ops[0]); 7027 Ops.erase(Ops.begin()); 7028 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() }; 7029 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys), 7030 Ops, ""); 7031 } 7032 case NEON::BI__builtin_neon_vst2_lane_v: 7033 case NEON::BI__builtin_neon_vst2q_lane_v: { 7034 Ops.push_back(Ops[0]); 7035 Ops.erase(Ops.begin()); 7036 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty); 7037 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() }; 7038 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys), 7039 Ops, ""); 7040 } 7041 case NEON::BI__builtin_neon_vst3_v: 7042 case NEON::BI__builtin_neon_vst3q_v: { 7043 Ops.push_back(Ops[0]); 7044 Ops.erase(Ops.begin()); 7045 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() }; 7046 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys), 7047 Ops, ""); 7048 } 7049 case NEON::BI__builtin_neon_vst3_lane_v: 7050 case NEON::BI__builtin_neon_vst3q_lane_v: { 7051 Ops.push_back(Ops[0]); 7052 Ops.erase(Ops.begin()); 7053 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty); 7054 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() }; 7055 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys), 7056 Ops, ""); 7057 } 7058 case NEON::BI__builtin_neon_vst4_v: 7059 case NEON::BI__builtin_neon_vst4q_v: { 7060 Ops.push_back(Ops[0]); 7061 Ops.erase(Ops.begin()); 7062 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() }; 7063 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys), 7064 Ops, ""); 7065 } 7066 case NEON::BI__builtin_neon_vst4_lane_v: 7067 case NEON::BI__builtin_neon_vst4q_lane_v: { 7068 Ops.push_back(Ops[0]); 7069 Ops.erase(Ops.begin()); 7070 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty); 7071 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() }; 7072 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys), 7073 Ops, ""); 7074 } 7075 case NEON::BI__builtin_neon_vtrn_v: 7076 case NEON::BI__builtin_neon_vtrnq_v: { 7077 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 7078 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 7079 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 7080 Value *SV = nullptr; 7081 7082 for (unsigned vi = 0; vi != 2; ++vi) { 7083 SmallVector<uint32_t, 16> Indices; 7084 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 7085 Indices.push_back(i+vi); 7086 Indices.push_back(i+e+vi); 7087 } 7088 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 7089 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn"); 7090 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 7091 } 7092 return SV; 7093 } 7094 case NEON::BI__builtin_neon_vuzp_v: 7095 case NEON::BI__builtin_neon_vuzpq_v: { 7096 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 7097 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 7098 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 7099 Value *SV = nullptr; 7100 7101 for (unsigned vi = 0; vi != 2; ++vi) { 7102 SmallVector<uint32_t, 16> Indices; 7103 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 7104 Indices.push_back(2*i+vi); 7105 7106 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 7107 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp"); 7108 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 7109 } 7110 return SV; 7111 } 7112 case NEON::BI__builtin_neon_vzip_v: 7113 case NEON::BI__builtin_neon_vzipq_v: { 7114 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 7115 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 7116 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 7117 Value *SV = nullptr; 7118 7119 for (unsigned vi = 0; vi != 2; ++vi) { 7120 SmallVector<uint32_t, 16> Indices; 7121 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 7122 Indices.push_back((i + vi*e) >> 1); 7123 Indices.push_back(((i + vi*e) >> 1)+e); 7124 } 7125 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 7126 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip"); 7127 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 7128 } 7129 return SV; 7130 } 7131 case NEON::BI__builtin_neon_vqtbl1q_v: { 7132 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty), 7133 Ops, "vtbl1"); 7134 } 7135 case NEON::BI__builtin_neon_vqtbl2q_v: { 7136 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty), 7137 Ops, "vtbl2"); 7138 } 7139 case NEON::BI__builtin_neon_vqtbl3q_v: { 7140 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty), 7141 Ops, "vtbl3"); 7142 } 7143 case NEON::BI__builtin_neon_vqtbl4q_v: { 7144 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty), 7145 Ops, "vtbl4"); 7146 } 7147 case NEON::BI__builtin_neon_vqtbx1q_v: { 7148 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty), 7149 Ops, "vtbx1"); 7150 } 7151 case NEON::BI__builtin_neon_vqtbx2q_v: { 7152 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty), 7153 Ops, "vtbx2"); 7154 } 7155 case NEON::BI__builtin_neon_vqtbx3q_v: { 7156 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty), 7157 Ops, "vtbx3"); 7158 } 7159 case NEON::BI__builtin_neon_vqtbx4q_v: { 7160 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty), 7161 Ops, "vtbx4"); 7162 } 7163 case NEON::BI__builtin_neon_vsqadd_v: 7164 case NEON::BI__builtin_neon_vsqaddq_v: { 7165 Int = Intrinsic::aarch64_neon_usqadd; 7166 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd"); 7167 } 7168 case NEON::BI__builtin_neon_vuqadd_v: 7169 case NEON::BI__builtin_neon_vuqaddq_v: { 7170 Int = Intrinsic::aarch64_neon_suqadd; 7171 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd"); 7172 } 7173 } 7174 } 7175 7176 llvm::Value *CodeGenFunction:: 7177 BuildVector(ArrayRef<llvm::Value*> Ops) { 7178 assert((Ops.size() & (Ops.size() - 1)) == 0 && 7179 "Not a power-of-two sized vector!"); 7180 bool AllConstants = true; 7181 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i) 7182 AllConstants &= isa<Constant>(Ops[i]); 7183 7184 // If this is a constant vector, create a ConstantVector. 7185 if (AllConstants) { 7186 SmallVector<llvm::Constant*, 16> CstOps; 7187 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 7188 CstOps.push_back(cast<Constant>(Ops[i])); 7189 return llvm::ConstantVector::get(CstOps); 7190 } 7191 7192 // Otherwise, insertelement the values to build the vector. 7193 Value *Result = 7194 llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size())); 7195 7196 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 7197 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i)); 7198 7199 return Result; 7200 } 7201 7202 // Convert the mask from an integer type to a vector of i1. 7203 static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask, 7204 unsigned NumElts) { 7205 7206 llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(), 7207 cast<IntegerType>(Mask->getType())->getBitWidth()); 7208 Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy); 7209 7210 // If we have less than 8 elements, then the starting mask was an i8 and 7211 // we need to extract down to the right number of elements. 7212 if (NumElts < 8) { 7213 uint32_t Indices[4]; 7214 for (unsigned i = 0; i != NumElts; ++i) 7215 Indices[i] = i; 7216 MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec, 7217 makeArrayRef(Indices, NumElts), 7218 "extract"); 7219 } 7220 return MaskVec; 7221 } 7222 7223 static Value *EmitX86MaskedStore(CodeGenFunction &CGF, 7224 SmallVectorImpl<Value *> &Ops, 7225 unsigned Align) { 7226 // Cast the pointer to right type. 7227 Ops[0] = CGF.Builder.CreateBitCast(Ops[0], 7228 llvm::PointerType::getUnqual(Ops[1]->getType())); 7229 7230 // If the mask is all ones just emit a regular store. 7231 if (const auto *C = dyn_cast<Constant>(Ops[2])) 7232 if (C->isAllOnesValue()) 7233 return CGF.Builder.CreateAlignedStore(Ops[1], Ops[0], Align); 7234 7235 Value *MaskVec = getMaskVecValue(CGF, Ops[2], 7236 Ops[1]->getType()->getVectorNumElements()); 7237 7238 return CGF.Builder.CreateMaskedStore(Ops[1], Ops[0], Align, MaskVec); 7239 } 7240 7241 static Value *EmitX86MaskedLoad(CodeGenFunction &CGF, 7242 SmallVectorImpl<Value *> &Ops, unsigned Align) { 7243 // Cast the pointer to right type. 7244 Ops[0] = CGF.Builder.CreateBitCast(Ops[0], 7245 llvm::PointerType::getUnqual(Ops[1]->getType())); 7246 7247 // If the mask is all ones just emit a regular store. 7248 if (const auto *C = dyn_cast<Constant>(Ops[2])) 7249 if (C->isAllOnesValue()) 7250 return CGF.Builder.CreateAlignedLoad(Ops[0], Align); 7251 7252 Value *MaskVec = getMaskVecValue(CGF, Ops[2], 7253 Ops[1]->getType()->getVectorNumElements()); 7254 7255 return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]); 7256 } 7257 7258 static Value *EmitX86SubVectorBroadcast(CodeGenFunction &CGF, 7259 SmallVectorImpl<Value *> &Ops, 7260 llvm::Type *DstTy, 7261 unsigned SrcSizeInBits, 7262 unsigned Align) { 7263 // Load the subvector. 7264 Ops[0] = CGF.Builder.CreateAlignedLoad(Ops[0], Align); 7265 7266 // Create broadcast mask. 7267 unsigned NumDstElts = DstTy->getVectorNumElements(); 7268 unsigned NumSrcElts = SrcSizeInBits / DstTy->getScalarSizeInBits(); 7269 7270 SmallVector<uint32_t, 8> Mask; 7271 for (unsigned i = 0; i != NumDstElts; i += NumSrcElts) 7272 for (unsigned j = 0; j != NumSrcElts; ++j) 7273 Mask.push_back(j); 7274 7275 return CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], Mask, "subvecbcst"); 7276 } 7277 7278 static Value *EmitX86Select(CodeGenFunction &CGF, 7279 Value *Mask, Value *Op0, Value *Op1) { 7280 7281 // If the mask is all ones just return first argument. 7282 if (const auto *C = dyn_cast<Constant>(Mask)) 7283 if (C->isAllOnesValue()) 7284 return Op0; 7285 7286 Mask = getMaskVecValue(CGF, Mask, Op0->getType()->getVectorNumElements()); 7287 7288 return CGF.Builder.CreateSelect(Mask, Op0, Op1); 7289 } 7290 7291 static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, 7292 bool Signed, SmallVectorImpl<Value *> &Ops) { 7293 unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); 7294 Value *Cmp; 7295 7296 if (CC == 3) { 7297 Cmp = Constant::getNullValue( 7298 llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts)); 7299 } else if (CC == 7) { 7300 Cmp = Constant::getAllOnesValue( 7301 llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts)); 7302 } else { 7303 ICmpInst::Predicate Pred; 7304 switch (CC) { 7305 default: llvm_unreachable("Unknown condition code"); 7306 case 0: Pred = ICmpInst::ICMP_EQ; break; 7307 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break; 7308 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break; 7309 case 4: Pred = ICmpInst::ICMP_NE; break; 7310 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break; 7311 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break; 7312 } 7313 Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]); 7314 } 7315 7316 const auto *C = dyn_cast<Constant>(Ops.back()); 7317 if (!C || !C->isAllOnesValue()) 7318 Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops.back(), NumElts)); 7319 7320 if (NumElts < 8) { 7321 uint32_t Indices[8]; 7322 for (unsigned i = 0; i != NumElts; ++i) 7323 Indices[i] = i; 7324 for (unsigned i = NumElts; i != 8; ++i) 7325 Indices[i] = i % NumElts + NumElts; 7326 Cmp = CGF.Builder.CreateShuffleVector( 7327 Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices); 7328 } 7329 return CGF.Builder.CreateBitCast(Cmp, 7330 IntegerType::get(CGF.getLLVMContext(), 7331 std::max(NumElts, 8U))); 7332 } 7333 7334 static Value *EmitX86Abs(CodeGenFunction &CGF, ArrayRef<Value *> Ops) { 7335 7336 llvm::Type *Ty = Ops[0]->getType(); 7337 Value *Zero = llvm::Constant::getNullValue(Ty); 7338 Value *Sub = CGF.Builder.CreateSub(Zero, Ops[0]); 7339 Value *Cmp = CGF.Builder.CreateICmp(ICmpInst::ICMP_SGT, Ops[0], Zero); 7340 Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Sub); 7341 if (Ops.size() == 1) 7342 return Res; 7343 return EmitX86Select(CGF, Ops[2], Res, Ops[1]); 7344 } 7345 7346 static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred, 7347 ArrayRef<Value *> Ops) { 7348 Value *Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]); 7349 Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Ops[1]); 7350 7351 if (Ops.size() == 2) 7352 return Res; 7353 7354 assert(Ops.size() == 4); 7355 return EmitX86Select(CGF, Ops[3], Res, Ops[2]); 7356 } 7357 7358 static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, 7359 llvm::Type *DstTy) { 7360 unsigned NumberOfElements = DstTy->getVectorNumElements(); 7361 Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements); 7362 return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2"); 7363 } 7364 7365 Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) { 7366 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts(); 7367 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString(); 7368 return EmitX86CpuIs(CPUStr); 7369 } 7370 7371 Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) { 7372 7373 // This enum contains the vendor, type, and subtype enums from the 7374 // runtime library concatenated together. The _START labels mark 7375 // the start and are used to adjust the value into the correct 7376 // encoding space. 7377 enum X86CPUs { 7378 INTEL = 1, 7379 AMD, 7380 CPU_TYPE_START, 7381 INTEL_BONNELL, 7382 INTEL_CORE2, 7383 INTEL_COREI7, 7384 AMDFAM10H, 7385 AMDFAM15H, 7386 INTEL_SILVERMONT, 7387 INTEL_KNL, 7388 AMD_BTVER1, 7389 AMD_BTVER2, 7390 CPU_SUBTYPE_START, 7391 INTEL_COREI7_NEHALEM, 7392 INTEL_COREI7_WESTMERE, 7393 INTEL_COREI7_SANDYBRIDGE, 7394 AMDFAM10H_BARCELONA, 7395 AMDFAM10H_SHANGHAI, 7396 AMDFAM10H_ISTANBUL, 7397 AMDFAM15H_BDVER1, 7398 AMDFAM15H_BDVER2, 7399 AMDFAM15H_BDVER3, 7400 AMDFAM15H_BDVER4, 7401 AMDFAM17H_ZNVER1, 7402 INTEL_COREI7_IVYBRIDGE, 7403 INTEL_COREI7_HASWELL, 7404 INTEL_COREI7_BROADWELL, 7405 INTEL_COREI7_SKYLAKE, 7406 INTEL_COREI7_SKYLAKE_AVX512, 7407 }; 7408 7409 X86CPUs CPU = 7410 StringSwitch<X86CPUs>(CPUStr) 7411 .Case("amd", AMD) 7412 .Case("amdfam10h", AMDFAM10H) 7413 .Case("amdfam10", AMDFAM10H) 7414 .Case("amdfam15h", AMDFAM15H) 7415 .Case("amdfam15", AMDFAM15H) 7416 .Case("atom", INTEL_BONNELL) 7417 .Case("barcelona", AMDFAM10H_BARCELONA) 7418 .Case("bdver1", AMDFAM15H_BDVER1) 7419 .Case("bdver2", AMDFAM15H_BDVER2) 7420 .Case("bdver3", AMDFAM15H_BDVER3) 7421 .Case("bdver4", AMDFAM15H_BDVER4) 7422 .Case("bonnell", INTEL_BONNELL) 7423 .Case("broadwell", INTEL_COREI7_BROADWELL) 7424 .Case("btver1", AMD_BTVER1) 7425 .Case("btver2", AMD_BTVER2) 7426 .Case("core2", INTEL_CORE2) 7427 .Case("corei7", INTEL_COREI7) 7428 .Case("haswell", INTEL_COREI7_HASWELL) 7429 .Case("intel", INTEL) 7430 .Case("istanbul", AMDFAM10H_ISTANBUL) 7431 .Case("ivybridge", INTEL_COREI7_IVYBRIDGE) 7432 .Case("knl", INTEL_KNL) 7433 .Case("nehalem", INTEL_COREI7_NEHALEM) 7434 .Case("sandybridge", INTEL_COREI7_SANDYBRIDGE) 7435 .Case("shanghai", AMDFAM10H_SHANGHAI) 7436 .Case("silvermont", INTEL_SILVERMONT) 7437 .Case("skylake", INTEL_COREI7_SKYLAKE) 7438 .Case("skylake-avx512", INTEL_COREI7_SKYLAKE_AVX512) 7439 .Case("slm", INTEL_SILVERMONT) 7440 .Case("westmere", INTEL_COREI7_WESTMERE) 7441 .Case("znver1", AMDFAM17H_ZNVER1); 7442 7443 llvm::Type *Int32Ty = Builder.getInt32Ty(); 7444 7445 // Matching the struct layout from the compiler-rt/libgcc structure that is 7446 // filled in: 7447 // unsigned int __cpu_vendor; 7448 // unsigned int __cpu_type; 7449 // unsigned int __cpu_subtype; 7450 // unsigned int __cpu_features[1]; 7451 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, 7452 llvm::ArrayType::get(Int32Ty, 1)); 7453 7454 // Grab the global __cpu_model. 7455 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model"); 7456 7457 // Calculate the index needed to access the correct field based on the 7458 // range. Also adjust the expected value. 7459 unsigned Index; 7460 unsigned Value; 7461 if (CPU > CPU_SUBTYPE_START) { 7462 Index = 2; 7463 Value = CPU - CPU_SUBTYPE_START; 7464 } else if (CPU > CPU_TYPE_START) { 7465 Index = 1; 7466 Value = CPU - CPU_TYPE_START; 7467 } else { 7468 Index = 0; 7469 Value = CPU; 7470 } 7471 7472 // Grab the appropriate field from __cpu_model. 7473 llvm::Value *Idxs[] = { 7474 ConstantInt::get(Int32Ty, 0), 7475 ConstantInt::get(Int32Ty, Index) 7476 }; 7477 llvm::Value *CpuValue = Builder.CreateGEP(STy, CpuModel, Idxs); 7478 CpuValue = Builder.CreateAlignedLoad(CpuValue, CharUnits::fromQuantity(4)); 7479 7480 // Check the value of the field against the requested value. 7481 return Builder.CreateICmpEQ(CpuValue, 7482 llvm::ConstantInt::get(Int32Ty, Value)); 7483 } 7484 7485 Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) { 7486 const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts(); 7487 StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString(); 7488 return EmitX86CpuSupports(FeatureStr); 7489 } 7490 7491 Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) { 7492 // TODO: When/if this becomes more than x86 specific then use a TargetInfo 7493 // based mapping. 7494 // Processor features and mapping to processor feature value. 7495 enum X86Features { 7496 CMOV = 0, 7497 MMX, 7498 POPCNT, 7499 SSE, 7500 SSE2, 7501 SSE3, 7502 SSSE3, 7503 SSE4_1, 7504 SSE4_2, 7505 AVX, 7506 AVX2, 7507 SSE4_A, 7508 FMA4, 7509 XOP, 7510 FMA, 7511 AVX512F, 7512 BMI, 7513 BMI2, 7514 AES, 7515 PCLMUL, 7516 AVX512VL, 7517 AVX512BW, 7518 AVX512DQ, 7519 AVX512CD, 7520 AVX512ER, 7521 AVX512PF, 7522 AVX512VBMI, 7523 AVX512IFMA, 7524 AVX5124VNNIW, 7525 AVX5124FMAPS, 7526 AVX512VPOPCNTDQ, 7527 MAX 7528 }; 7529 7530 uint32_t FeaturesMask = 0; 7531 7532 for (const StringRef &FeatureStr : FeatureStrs) { 7533 X86Features Feature = 7534 StringSwitch<X86Features>(FeatureStr) 7535 .Case("cmov", X86Features::CMOV) 7536 .Case("mmx", X86Features::MMX) 7537 .Case("popcnt", X86Features::POPCNT) 7538 .Case("sse", X86Features::SSE) 7539 .Case("sse2", X86Features::SSE2) 7540 .Case("sse3", X86Features::SSE3) 7541 .Case("ssse3", X86Features::SSSE3) 7542 .Case("sse4.1", X86Features::SSE4_1) 7543 .Case("sse4.2", X86Features::SSE4_2) 7544 .Case("avx", X86Features::AVX) 7545 .Case("avx2", X86Features::AVX2) 7546 .Case("sse4a", X86Features::SSE4_A) 7547 .Case("fma4", X86Features::FMA4) 7548 .Case("xop", X86Features::XOP) 7549 .Case("fma", X86Features::FMA) 7550 .Case("avx512f", X86Features::AVX512F) 7551 .Case("bmi", X86Features::BMI) 7552 .Case("bmi2", X86Features::BMI2) 7553 .Case("aes", X86Features::AES) 7554 .Case("pclmul", X86Features::PCLMUL) 7555 .Case("avx512vl", X86Features::AVX512VL) 7556 .Case("avx512bw", X86Features::AVX512BW) 7557 .Case("avx512dq", X86Features::AVX512DQ) 7558 .Case("avx512cd", X86Features::AVX512CD) 7559 .Case("avx512er", X86Features::AVX512ER) 7560 .Case("avx512pf", X86Features::AVX512PF) 7561 .Case("avx512vbmi", X86Features::AVX512VBMI) 7562 .Case("avx512ifma", X86Features::AVX512IFMA) 7563 .Case("avx5124vnniw", X86Features::AVX5124VNNIW) 7564 .Case("avx5124fmaps", X86Features::AVX5124FMAPS) 7565 .Case("avx512vpopcntdq", X86Features::AVX512VPOPCNTDQ) 7566 .Default(X86Features::MAX); 7567 assert(Feature != X86Features::MAX && "Invalid feature!"); 7568 FeaturesMask |= (1U << Feature); 7569 } 7570 7571 // Matching the struct layout from the compiler-rt/libgcc structure that is 7572 // filled in: 7573 // unsigned int __cpu_vendor; 7574 // unsigned int __cpu_type; 7575 // unsigned int __cpu_subtype; 7576 // unsigned int __cpu_features[1]; 7577 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, 7578 llvm::ArrayType::get(Int32Ty, 1)); 7579 7580 // Grab the global __cpu_model. 7581 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model"); 7582 7583 // Grab the first (0th) element from the field __cpu_features off of the 7584 // global in the struct STy. 7585 Value *Idxs[] = {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 3), 7586 ConstantInt::get(Int32Ty, 0)}; 7587 Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs); 7588 Value *Features = 7589 Builder.CreateAlignedLoad(CpuFeatures, CharUnits::fromQuantity(4)); 7590 7591 // Check the value of the bit corresponding to the feature requested. 7592 Value *Bitset = Builder.CreateAnd( 7593 Features, llvm::ConstantInt::get(Int32Ty, FeaturesMask)); 7594 return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0)); 7595 } 7596 7597 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, 7598 const CallExpr *E) { 7599 if (BuiltinID == X86::BI__builtin_cpu_is) 7600 return EmitX86CpuIs(E); 7601 if (BuiltinID == X86::BI__builtin_cpu_supports) 7602 return EmitX86CpuSupports(E); 7603 7604 SmallVector<Value*, 4> Ops; 7605 7606 // Find out if any arguments are required to be integer constant expressions. 7607 unsigned ICEArguments = 0; 7608 ASTContext::GetBuiltinTypeError Error; 7609 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 7610 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 7611 7612 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { 7613 // If this is a normal argument, just emit it as a scalar. 7614 if ((ICEArguments & (1 << i)) == 0) { 7615 Ops.push_back(EmitScalarExpr(E->getArg(i))); 7616 continue; 7617 } 7618 7619 // If this is required to be a constant, constant fold it so that we know 7620 // that the generated intrinsic gets a ConstantInt. 7621 llvm::APSInt Result; 7622 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); 7623 assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst; 7624 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); 7625 } 7626 7627 // These exist so that the builtin that takes an immediate can be bounds 7628 // checked by clang to avoid passing bad immediates to the backend. Since 7629 // AVX has a larger immediate than SSE we would need separate builtins to 7630 // do the different bounds checking. Rather than create a clang specific 7631 // SSE only builtin, this implements eight separate builtins to match gcc 7632 // implementation. 7633 auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) { 7634 Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm)); 7635 llvm::Function *F = CGM.getIntrinsic(ID); 7636 return Builder.CreateCall(F, Ops); 7637 }; 7638 7639 // For the vector forms of FP comparisons, translate the builtins directly to 7640 // IR. 7641 // TODO: The builtins could be removed if the SSE header files used vector 7642 // extension comparisons directly (vector ordered/unordered may need 7643 // additional support via __builtin_isnan()). 7644 auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred) { 7645 Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]); 7646 llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType()); 7647 llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy); 7648 Value *Sext = Builder.CreateSExt(Cmp, IntVecTy); 7649 return Builder.CreateBitCast(Sext, FPVecTy); 7650 }; 7651 7652 switch (BuiltinID) { 7653 default: return nullptr; 7654 case X86::BI__builtin_cpu_init: { 7655 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, 7656 /*Variadic*/false); 7657 llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, 7658 "__cpu_indicator_init"); 7659 return Builder.CreateCall(Func); 7660 } 7661 case X86::BI_mm_prefetch: { 7662 Value *Address = Ops[0]; 7663 Value *RW = ConstantInt::get(Int32Ty, 0); 7664 Value *Locality = Ops[1]; 7665 Value *Data = ConstantInt::get(Int32Ty, 1); 7666 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 7667 return Builder.CreateCall(F, {Address, RW, Locality, Data}); 7668 } 7669 case X86::BI_mm_clflush: { 7670 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush), 7671 Ops[0]); 7672 } 7673 case X86::BI_mm_lfence: { 7674 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence)); 7675 } 7676 case X86::BI_mm_mfence: { 7677 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence)); 7678 } 7679 case X86::BI_mm_sfence: { 7680 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence)); 7681 } 7682 case X86::BI_mm_pause: { 7683 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause)); 7684 } 7685 case X86::BI__rdtsc: { 7686 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc)); 7687 } 7688 case X86::BI__builtin_ia32_undef128: 7689 case X86::BI__builtin_ia32_undef256: 7690 case X86::BI__builtin_ia32_undef512: 7691 // The x86 definition of "undef" is not the same as the LLVM definition 7692 // (PR32176). We leave optimizing away an unnecessary zero constant to the 7693 // IR optimizer and backend. 7694 // TODO: If we had a "freeze" IR instruction to generate a fixed undef 7695 // value, we should use that here instead of a zero. 7696 return llvm::Constant::getNullValue(ConvertType(E->getType())); 7697 case X86::BI__builtin_ia32_vec_init_v8qi: 7698 case X86::BI__builtin_ia32_vec_init_v4hi: 7699 case X86::BI__builtin_ia32_vec_init_v2si: 7700 return Builder.CreateBitCast(BuildVector(Ops), 7701 llvm::Type::getX86_MMXTy(getLLVMContext())); 7702 case X86::BI__builtin_ia32_vec_ext_v2si: 7703 return Builder.CreateExtractElement(Ops[0], 7704 llvm::ConstantInt::get(Ops[1]->getType(), 0)); 7705 case X86::BI_mm_setcsr: 7706 case X86::BI__builtin_ia32_ldmxcsr: { 7707 Address Tmp = CreateMemTemp(E->getArg(0)->getType()); 7708 Builder.CreateStore(Ops[0], Tmp); 7709 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr), 7710 Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy)); 7711 } 7712 case X86::BI_mm_getcsr: 7713 case X86::BI__builtin_ia32_stmxcsr: { 7714 Address Tmp = CreateMemTemp(E->getType()); 7715 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr), 7716 Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy)); 7717 return Builder.CreateLoad(Tmp, "stmxcsr"); 7718 } 7719 case X86::BI__builtin_ia32_xsave: 7720 case X86::BI__builtin_ia32_xsave64: 7721 case X86::BI__builtin_ia32_xrstor: 7722 case X86::BI__builtin_ia32_xrstor64: 7723 case X86::BI__builtin_ia32_xsaveopt: 7724 case X86::BI__builtin_ia32_xsaveopt64: 7725 case X86::BI__builtin_ia32_xrstors: 7726 case X86::BI__builtin_ia32_xrstors64: 7727 case X86::BI__builtin_ia32_xsavec: 7728 case X86::BI__builtin_ia32_xsavec64: 7729 case X86::BI__builtin_ia32_xsaves: 7730 case X86::BI__builtin_ia32_xsaves64: { 7731 Intrinsic::ID ID; 7732 #define INTRINSIC_X86_XSAVE_ID(NAME) \ 7733 case X86::BI__builtin_ia32_##NAME: \ 7734 ID = Intrinsic::x86_##NAME; \ 7735 break 7736 switch (BuiltinID) { 7737 default: llvm_unreachable("Unsupported intrinsic!"); 7738 INTRINSIC_X86_XSAVE_ID(xsave); 7739 INTRINSIC_X86_XSAVE_ID(xsave64); 7740 INTRINSIC_X86_XSAVE_ID(xrstor); 7741 INTRINSIC_X86_XSAVE_ID(xrstor64); 7742 INTRINSIC_X86_XSAVE_ID(xsaveopt); 7743 INTRINSIC_X86_XSAVE_ID(xsaveopt64); 7744 INTRINSIC_X86_XSAVE_ID(xrstors); 7745 INTRINSIC_X86_XSAVE_ID(xrstors64); 7746 INTRINSIC_X86_XSAVE_ID(xsavec); 7747 INTRINSIC_X86_XSAVE_ID(xsavec64); 7748 INTRINSIC_X86_XSAVE_ID(xsaves); 7749 INTRINSIC_X86_XSAVE_ID(xsaves64); 7750 } 7751 #undef INTRINSIC_X86_XSAVE_ID 7752 Value *Mhi = Builder.CreateTrunc( 7753 Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty); 7754 Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty); 7755 Ops[1] = Mhi; 7756 Ops.push_back(Mlo); 7757 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); 7758 } 7759 case X86::BI__builtin_ia32_storedqudi128_mask: 7760 case X86::BI__builtin_ia32_storedqusi128_mask: 7761 case X86::BI__builtin_ia32_storedquhi128_mask: 7762 case X86::BI__builtin_ia32_storedquqi128_mask: 7763 case X86::BI__builtin_ia32_storeupd128_mask: 7764 case X86::BI__builtin_ia32_storeups128_mask: 7765 case X86::BI__builtin_ia32_storedqudi256_mask: 7766 case X86::BI__builtin_ia32_storedqusi256_mask: 7767 case X86::BI__builtin_ia32_storedquhi256_mask: 7768 case X86::BI__builtin_ia32_storedquqi256_mask: 7769 case X86::BI__builtin_ia32_storeupd256_mask: 7770 case X86::BI__builtin_ia32_storeups256_mask: 7771 case X86::BI__builtin_ia32_storedqudi512_mask: 7772 case X86::BI__builtin_ia32_storedqusi512_mask: 7773 case X86::BI__builtin_ia32_storedquhi512_mask: 7774 case X86::BI__builtin_ia32_storedquqi512_mask: 7775 case X86::BI__builtin_ia32_storeupd512_mask: 7776 case X86::BI__builtin_ia32_storeups512_mask: 7777 return EmitX86MaskedStore(*this, Ops, 1); 7778 7779 case X86::BI__builtin_ia32_storess128_mask: 7780 case X86::BI__builtin_ia32_storesd128_mask: { 7781 return EmitX86MaskedStore(*this, Ops, 16); 7782 } 7783 case X86::BI__builtin_ia32_vpopcntd_512: 7784 case X86::BI__builtin_ia32_vpopcntq_512: { 7785 llvm::Type *ResultType = ConvertType(E->getType()); 7786 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); 7787 return Builder.CreateCall(F, Ops); 7788 } 7789 case X86::BI__builtin_ia32_cvtmask2b128: 7790 case X86::BI__builtin_ia32_cvtmask2b256: 7791 case X86::BI__builtin_ia32_cvtmask2b512: 7792 case X86::BI__builtin_ia32_cvtmask2w128: 7793 case X86::BI__builtin_ia32_cvtmask2w256: 7794 case X86::BI__builtin_ia32_cvtmask2w512: 7795 case X86::BI__builtin_ia32_cvtmask2d128: 7796 case X86::BI__builtin_ia32_cvtmask2d256: 7797 case X86::BI__builtin_ia32_cvtmask2d512: 7798 case X86::BI__builtin_ia32_cvtmask2q128: 7799 case X86::BI__builtin_ia32_cvtmask2q256: 7800 case X86::BI__builtin_ia32_cvtmask2q512: 7801 return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType())); 7802 7803 case X86::BI__builtin_ia32_movdqa32store128_mask: 7804 case X86::BI__builtin_ia32_movdqa64store128_mask: 7805 case X86::BI__builtin_ia32_storeaps128_mask: 7806 case X86::BI__builtin_ia32_storeapd128_mask: 7807 case X86::BI__builtin_ia32_movdqa32store256_mask: 7808 case X86::BI__builtin_ia32_movdqa64store256_mask: 7809 case X86::BI__builtin_ia32_storeaps256_mask: 7810 case X86::BI__builtin_ia32_storeapd256_mask: 7811 case X86::BI__builtin_ia32_movdqa32store512_mask: 7812 case X86::BI__builtin_ia32_movdqa64store512_mask: 7813 case X86::BI__builtin_ia32_storeaps512_mask: 7814 case X86::BI__builtin_ia32_storeapd512_mask: { 7815 unsigned Align = 7816 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity(); 7817 return EmitX86MaskedStore(*this, Ops, Align); 7818 } 7819 case X86::BI__builtin_ia32_loadups128_mask: 7820 case X86::BI__builtin_ia32_loadups256_mask: 7821 case X86::BI__builtin_ia32_loadups512_mask: 7822 case X86::BI__builtin_ia32_loadupd128_mask: 7823 case X86::BI__builtin_ia32_loadupd256_mask: 7824 case X86::BI__builtin_ia32_loadupd512_mask: 7825 case X86::BI__builtin_ia32_loaddquqi128_mask: 7826 case X86::BI__builtin_ia32_loaddquqi256_mask: 7827 case X86::BI__builtin_ia32_loaddquqi512_mask: 7828 case X86::BI__builtin_ia32_loaddquhi128_mask: 7829 case X86::BI__builtin_ia32_loaddquhi256_mask: 7830 case X86::BI__builtin_ia32_loaddquhi512_mask: 7831 case X86::BI__builtin_ia32_loaddqusi128_mask: 7832 case X86::BI__builtin_ia32_loaddqusi256_mask: 7833 case X86::BI__builtin_ia32_loaddqusi512_mask: 7834 case X86::BI__builtin_ia32_loaddqudi128_mask: 7835 case X86::BI__builtin_ia32_loaddqudi256_mask: 7836 case X86::BI__builtin_ia32_loaddqudi512_mask: 7837 return EmitX86MaskedLoad(*this, Ops, 1); 7838 7839 case X86::BI__builtin_ia32_loadss128_mask: 7840 case X86::BI__builtin_ia32_loadsd128_mask: 7841 return EmitX86MaskedLoad(*this, Ops, 16); 7842 7843 case X86::BI__builtin_ia32_loadaps128_mask: 7844 case X86::BI__builtin_ia32_loadaps256_mask: 7845 case X86::BI__builtin_ia32_loadaps512_mask: 7846 case X86::BI__builtin_ia32_loadapd128_mask: 7847 case X86::BI__builtin_ia32_loadapd256_mask: 7848 case X86::BI__builtin_ia32_loadapd512_mask: 7849 case X86::BI__builtin_ia32_movdqa32load128_mask: 7850 case X86::BI__builtin_ia32_movdqa32load256_mask: 7851 case X86::BI__builtin_ia32_movdqa32load512_mask: 7852 case X86::BI__builtin_ia32_movdqa64load128_mask: 7853 case X86::BI__builtin_ia32_movdqa64load256_mask: 7854 case X86::BI__builtin_ia32_movdqa64load512_mask: { 7855 unsigned Align = 7856 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity(); 7857 return EmitX86MaskedLoad(*this, Ops, Align); 7858 } 7859 7860 case X86::BI__builtin_ia32_vbroadcastf128_pd256: 7861 case X86::BI__builtin_ia32_vbroadcastf128_ps256: { 7862 llvm::Type *DstTy = ConvertType(E->getType()); 7863 return EmitX86SubVectorBroadcast(*this, Ops, DstTy, 128, 1); 7864 } 7865 7866 case X86::BI__builtin_ia32_storehps: 7867 case X86::BI__builtin_ia32_storelps: { 7868 llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty); 7869 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2); 7870 7871 // cast val v2i64 7872 Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast"); 7873 7874 // extract (0, 1) 7875 unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1; 7876 llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index); 7877 Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract"); 7878 7879 // cast pointer to i64 & store 7880 Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy); 7881 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 7882 } 7883 case X86::BI__builtin_ia32_palignr128: 7884 case X86::BI__builtin_ia32_palignr256: 7885 case X86::BI__builtin_ia32_palignr512_mask: { 7886 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 7887 7888 unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); 7889 assert(NumElts % 16 == 0); 7890 7891 // If palignr is shifting the pair of vectors more than the size of two 7892 // lanes, emit zero. 7893 if (ShiftVal >= 32) 7894 return llvm::Constant::getNullValue(ConvertType(E->getType())); 7895 7896 // If palignr is shifting the pair of input vectors more than one lane, 7897 // but less than two lanes, convert to shifting in zeroes. 7898 if (ShiftVal > 16) { 7899 ShiftVal -= 16; 7900 Ops[1] = Ops[0]; 7901 Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType()); 7902 } 7903 7904 uint32_t Indices[64]; 7905 // 256-bit palignr operates on 128-bit lanes so we need to handle that 7906 for (unsigned l = 0; l != NumElts; l += 16) { 7907 for (unsigned i = 0; i != 16; ++i) { 7908 unsigned Idx = ShiftVal + i; 7909 if (Idx >= 16) 7910 Idx += NumElts - 16; // End of lane, switch operand. 7911 Indices[l + i] = Idx + l; 7912 } 7913 } 7914 7915 Value *Align = Builder.CreateShuffleVector(Ops[1], Ops[0], 7916 makeArrayRef(Indices, NumElts), 7917 "palignr"); 7918 7919 // If this isn't a masked builtin, just return the align operation. 7920 if (Ops.size() == 3) 7921 return Align; 7922 7923 return EmitX86Select(*this, Ops[4], Align, Ops[3]); 7924 } 7925 7926 case X86::BI__builtin_ia32_vperm2f128_pd256: 7927 case X86::BI__builtin_ia32_vperm2f128_ps256: 7928 case X86::BI__builtin_ia32_vperm2f128_si256: 7929 case X86::BI__builtin_ia32_permti256: { 7930 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 7931 unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); 7932 7933 // This takes a very simple approach since there are two lanes and a 7934 // shuffle can have 2 inputs. So we reserve the first input for the first 7935 // lane and the second input for the second lane. This may result in 7936 // duplicate sources, but this can be dealt with in the backend. 7937 7938 Value *OutOps[2]; 7939 uint32_t Indices[8]; 7940 for (unsigned l = 0; l != 2; ++l) { 7941 // Determine the source for this lane. 7942 if (Imm & (1 << ((l * 4) + 3))) 7943 OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType()); 7944 else if (Imm & (1 << ((l * 4) + 1))) 7945 OutOps[l] = Ops[1]; 7946 else 7947 OutOps[l] = Ops[0]; 7948 7949 for (unsigned i = 0; i != NumElts/2; ++i) { 7950 // Start with ith element of the source for this lane. 7951 unsigned Idx = (l * NumElts) + i; 7952 // If bit 0 of the immediate half is set, switch to the high half of 7953 // the source. 7954 if (Imm & (1 << (l * 4))) 7955 Idx += NumElts/2; 7956 Indices[(l * (NumElts/2)) + i] = Idx; 7957 } 7958 } 7959 7960 return Builder.CreateShuffleVector(OutOps[0], OutOps[1], 7961 makeArrayRef(Indices, NumElts), 7962 "vperm"); 7963 } 7964 7965 case X86::BI__builtin_ia32_movnti: 7966 case X86::BI__builtin_ia32_movnti64: 7967 case X86::BI__builtin_ia32_movntsd: 7968 case X86::BI__builtin_ia32_movntss: { 7969 llvm::MDNode *Node = llvm::MDNode::get( 7970 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1))); 7971 7972 Value *Ptr = Ops[0]; 7973 Value *Src = Ops[1]; 7974 7975 // Extract the 0'th element of the source vector. 7976 if (BuiltinID == X86::BI__builtin_ia32_movntsd || 7977 BuiltinID == X86::BI__builtin_ia32_movntss) 7978 Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract"); 7979 7980 // Convert the type of the pointer to a pointer to the stored type. 7981 Value *BC = Builder.CreateBitCast( 7982 Ptr, llvm::PointerType::getUnqual(Src->getType()), "cast"); 7983 7984 // Unaligned nontemporal store of the scalar value. 7985 StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, BC); 7986 SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); 7987 SI->setAlignment(1); 7988 return SI; 7989 } 7990 7991 case X86::BI__builtin_ia32_selectb_128: 7992 case X86::BI__builtin_ia32_selectb_256: 7993 case X86::BI__builtin_ia32_selectb_512: 7994 case X86::BI__builtin_ia32_selectw_128: 7995 case X86::BI__builtin_ia32_selectw_256: 7996 case X86::BI__builtin_ia32_selectw_512: 7997 case X86::BI__builtin_ia32_selectd_128: 7998 case X86::BI__builtin_ia32_selectd_256: 7999 case X86::BI__builtin_ia32_selectd_512: 8000 case X86::BI__builtin_ia32_selectq_128: 8001 case X86::BI__builtin_ia32_selectq_256: 8002 case X86::BI__builtin_ia32_selectq_512: 8003 case X86::BI__builtin_ia32_selectps_128: 8004 case X86::BI__builtin_ia32_selectps_256: 8005 case X86::BI__builtin_ia32_selectps_512: 8006 case X86::BI__builtin_ia32_selectpd_128: 8007 case X86::BI__builtin_ia32_selectpd_256: 8008 case X86::BI__builtin_ia32_selectpd_512: 8009 return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]); 8010 case X86::BI__builtin_ia32_pcmpeqb128_mask: 8011 case X86::BI__builtin_ia32_pcmpeqb256_mask: 8012 case X86::BI__builtin_ia32_pcmpeqb512_mask: 8013 case X86::BI__builtin_ia32_pcmpeqw128_mask: 8014 case X86::BI__builtin_ia32_pcmpeqw256_mask: 8015 case X86::BI__builtin_ia32_pcmpeqw512_mask: 8016 case X86::BI__builtin_ia32_pcmpeqd128_mask: 8017 case X86::BI__builtin_ia32_pcmpeqd256_mask: 8018 case X86::BI__builtin_ia32_pcmpeqd512_mask: 8019 case X86::BI__builtin_ia32_pcmpeqq128_mask: 8020 case X86::BI__builtin_ia32_pcmpeqq256_mask: 8021 case X86::BI__builtin_ia32_pcmpeqq512_mask: 8022 return EmitX86MaskedCompare(*this, 0, false, Ops); 8023 case X86::BI__builtin_ia32_pcmpgtb128_mask: 8024 case X86::BI__builtin_ia32_pcmpgtb256_mask: 8025 case X86::BI__builtin_ia32_pcmpgtb512_mask: 8026 case X86::BI__builtin_ia32_pcmpgtw128_mask: 8027 case X86::BI__builtin_ia32_pcmpgtw256_mask: 8028 case X86::BI__builtin_ia32_pcmpgtw512_mask: 8029 case X86::BI__builtin_ia32_pcmpgtd128_mask: 8030 case X86::BI__builtin_ia32_pcmpgtd256_mask: 8031 case X86::BI__builtin_ia32_pcmpgtd512_mask: 8032 case X86::BI__builtin_ia32_pcmpgtq128_mask: 8033 case X86::BI__builtin_ia32_pcmpgtq256_mask: 8034 case X86::BI__builtin_ia32_pcmpgtq512_mask: 8035 return EmitX86MaskedCompare(*this, 6, true, Ops); 8036 case X86::BI__builtin_ia32_cmpb128_mask: 8037 case X86::BI__builtin_ia32_cmpb256_mask: 8038 case X86::BI__builtin_ia32_cmpb512_mask: 8039 case X86::BI__builtin_ia32_cmpw128_mask: 8040 case X86::BI__builtin_ia32_cmpw256_mask: 8041 case X86::BI__builtin_ia32_cmpw512_mask: 8042 case X86::BI__builtin_ia32_cmpd128_mask: 8043 case X86::BI__builtin_ia32_cmpd256_mask: 8044 case X86::BI__builtin_ia32_cmpd512_mask: 8045 case X86::BI__builtin_ia32_cmpq128_mask: 8046 case X86::BI__builtin_ia32_cmpq256_mask: 8047 case X86::BI__builtin_ia32_cmpq512_mask: { 8048 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7; 8049 return EmitX86MaskedCompare(*this, CC, true, Ops); 8050 } 8051 case X86::BI__builtin_ia32_ucmpb128_mask: 8052 case X86::BI__builtin_ia32_ucmpb256_mask: 8053 case X86::BI__builtin_ia32_ucmpb512_mask: 8054 case X86::BI__builtin_ia32_ucmpw128_mask: 8055 case X86::BI__builtin_ia32_ucmpw256_mask: 8056 case X86::BI__builtin_ia32_ucmpw512_mask: 8057 case X86::BI__builtin_ia32_ucmpd128_mask: 8058 case X86::BI__builtin_ia32_ucmpd256_mask: 8059 case X86::BI__builtin_ia32_ucmpd512_mask: 8060 case X86::BI__builtin_ia32_ucmpq128_mask: 8061 case X86::BI__builtin_ia32_ucmpq256_mask: 8062 case X86::BI__builtin_ia32_ucmpq512_mask: { 8063 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7; 8064 return EmitX86MaskedCompare(*this, CC, false, Ops); 8065 } 8066 8067 case X86::BI__builtin_ia32_vplzcntd_128_mask: 8068 case X86::BI__builtin_ia32_vplzcntd_256_mask: 8069 case X86::BI__builtin_ia32_vplzcntd_512_mask: 8070 case X86::BI__builtin_ia32_vplzcntq_128_mask: 8071 case X86::BI__builtin_ia32_vplzcntq_256_mask: 8072 case X86::BI__builtin_ia32_vplzcntq_512_mask: { 8073 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType()); 8074 return EmitX86Select(*this, Ops[2], 8075 Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)}), 8076 Ops[1]); 8077 } 8078 8079 case X86::BI__builtin_ia32_pabsb128: 8080 case X86::BI__builtin_ia32_pabsw128: 8081 case X86::BI__builtin_ia32_pabsd128: 8082 case X86::BI__builtin_ia32_pabsb256: 8083 case X86::BI__builtin_ia32_pabsw256: 8084 case X86::BI__builtin_ia32_pabsd256: 8085 case X86::BI__builtin_ia32_pabsq128_mask: 8086 case X86::BI__builtin_ia32_pabsq256_mask: 8087 case X86::BI__builtin_ia32_pabsb512_mask: 8088 case X86::BI__builtin_ia32_pabsw512_mask: 8089 case X86::BI__builtin_ia32_pabsd512_mask: 8090 case X86::BI__builtin_ia32_pabsq512_mask: 8091 return EmitX86Abs(*this, Ops); 8092 8093 case X86::BI__builtin_ia32_pmaxsb128: 8094 case X86::BI__builtin_ia32_pmaxsw128: 8095 case X86::BI__builtin_ia32_pmaxsd128: 8096 case X86::BI__builtin_ia32_pmaxsq128_mask: 8097 case X86::BI__builtin_ia32_pmaxsb256: 8098 case X86::BI__builtin_ia32_pmaxsw256: 8099 case X86::BI__builtin_ia32_pmaxsd256: 8100 case X86::BI__builtin_ia32_pmaxsq256_mask: 8101 case X86::BI__builtin_ia32_pmaxsb512_mask: 8102 case X86::BI__builtin_ia32_pmaxsw512_mask: 8103 case X86::BI__builtin_ia32_pmaxsd512_mask: 8104 case X86::BI__builtin_ia32_pmaxsq512_mask: 8105 return EmitX86MinMax(*this, ICmpInst::ICMP_SGT, Ops); 8106 case X86::BI__builtin_ia32_pmaxub128: 8107 case X86::BI__builtin_ia32_pmaxuw128: 8108 case X86::BI__builtin_ia32_pmaxud128: 8109 case X86::BI__builtin_ia32_pmaxuq128_mask: 8110 case X86::BI__builtin_ia32_pmaxub256: 8111 case X86::BI__builtin_ia32_pmaxuw256: 8112 case X86::BI__builtin_ia32_pmaxud256: 8113 case X86::BI__builtin_ia32_pmaxuq256_mask: 8114 case X86::BI__builtin_ia32_pmaxub512_mask: 8115 case X86::BI__builtin_ia32_pmaxuw512_mask: 8116 case X86::BI__builtin_ia32_pmaxud512_mask: 8117 case X86::BI__builtin_ia32_pmaxuq512_mask: 8118 return EmitX86MinMax(*this, ICmpInst::ICMP_UGT, Ops); 8119 case X86::BI__builtin_ia32_pminsb128: 8120 case X86::BI__builtin_ia32_pminsw128: 8121 case X86::BI__builtin_ia32_pminsd128: 8122 case X86::BI__builtin_ia32_pminsq128_mask: 8123 case X86::BI__builtin_ia32_pminsb256: 8124 case X86::BI__builtin_ia32_pminsw256: 8125 case X86::BI__builtin_ia32_pminsd256: 8126 case X86::BI__builtin_ia32_pminsq256_mask: 8127 case X86::BI__builtin_ia32_pminsb512_mask: 8128 case X86::BI__builtin_ia32_pminsw512_mask: 8129 case X86::BI__builtin_ia32_pminsd512_mask: 8130 case X86::BI__builtin_ia32_pminsq512_mask: 8131 return EmitX86MinMax(*this, ICmpInst::ICMP_SLT, Ops); 8132 case X86::BI__builtin_ia32_pminub128: 8133 case X86::BI__builtin_ia32_pminuw128: 8134 case X86::BI__builtin_ia32_pminud128: 8135 case X86::BI__builtin_ia32_pminuq128_mask: 8136 case X86::BI__builtin_ia32_pminub256: 8137 case X86::BI__builtin_ia32_pminuw256: 8138 case X86::BI__builtin_ia32_pminud256: 8139 case X86::BI__builtin_ia32_pminuq256_mask: 8140 case X86::BI__builtin_ia32_pminub512_mask: 8141 case X86::BI__builtin_ia32_pminuw512_mask: 8142 case X86::BI__builtin_ia32_pminud512_mask: 8143 case X86::BI__builtin_ia32_pminuq512_mask: 8144 return EmitX86MinMax(*this, ICmpInst::ICMP_ULT, Ops); 8145 8146 // 3DNow! 8147 case X86::BI__builtin_ia32_pswapdsf: 8148 case X86::BI__builtin_ia32_pswapdsi: { 8149 llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext()); 8150 Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast"); 8151 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd); 8152 return Builder.CreateCall(F, Ops, "pswapd"); 8153 } 8154 case X86::BI__builtin_ia32_rdrand16_step: 8155 case X86::BI__builtin_ia32_rdrand32_step: 8156 case X86::BI__builtin_ia32_rdrand64_step: 8157 case X86::BI__builtin_ia32_rdseed16_step: 8158 case X86::BI__builtin_ia32_rdseed32_step: 8159 case X86::BI__builtin_ia32_rdseed64_step: { 8160 Intrinsic::ID ID; 8161 switch (BuiltinID) { 8162 default: llvm_unreachable("Unsupported intrinsic!"); 8163 case X86::BI__builtin_ia32_rdrand16_step: 8164 ID = Intrinsic::x86_rdrand_16; 8165 break; 8166 case X86::BI__builtin_ia32_rdrand32_step: 8167 ID = Intrinsic::x86_rdrand_32; 8168 break; 8169 case X86::BI__builtin_ia32_rdrand64_step: 8170 ID = Intrinsic::x86_rdrand_64; 8171 break; 8172 case X86::BI__builtin_ia32_rdseed16_step: 8173 ID = Intrinsic::x86_rdseed_16; 8174 break; 8175 case X86::BI__builtin_ia32_rdseed32_step: 8176 ID = Intrinsic::x86_rdseed_32; 8177 break; 8178 case X86::BI__builtin_ia32_rdseed64_step: 8179 ID = Intrinsic::x86_rdseed_64; 8180 break; 8181 } 8182 8183 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID)); 8184 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0), 8185 Ops[0]); 8186 return Builder.CreateExtractValue(Call, 1); 8187 } 8188 8189 // SSE packed comparison intrinsics 8190 case X86::BI__builtin_ia32_cmpeqps: 8191 case X86::BI__builtin_ia32_cmpeqpd: 8192 return getVectorFCmpIR(CmpInst::FCMP_OEQ); 8193 case X86::BI__builtin_ia32_cmpltps: 8194 case X86::BI__builtin_ia32_cmpltpd: 8195 return getVectorFCmpIR(CmpInst::FCMP_OLT); 8196 case X86::BI__builtin_ia32_cmpleps: 8197 case X86::BI__builtin_ia32_cmplepd: 8198 return getVectorFCmpIR(CmpInst::FCMP_OLE); 8199 case X86::BI__builtin_ia32_cmpunordps: 8200 case X86::BI__builtin_ia32_cmpunordpd: 8201 return getVectorFCmpIR(CmpInst::FCMP_UNO); 8202 case X86::BI__builtin_ia32_cmpneqps: 8203 case X86::BI__builtin_ia32_cmpneqpd: 8204 return getVectorFCmpIR(CmpInst::FCMP_UNE); 8205 case X86::BI__builtin_ia32_cmpnltps: 8206 case X86::BI__builtin_ia32_cmpnltpd: 8207 return getVectorFCmpIR(CmpInst::FCMP_UGE); 8208 case X86::BI__builtin_ia32_cmpnleps: 8209 case X86::BI__builtin_ia32_cmpnlepd: 8210 return getVectorFCmpIR(CmpInst::FCMP_UGT); 8211 case X86::BI__builtin_ia32_cmpordps: 8212 case X86::BI__builtin_ia32_cmpordpd: 8213 return getVectorFCmpIR(CmpInst::FCMP_ORD); 8214 case X86::BI__builtin_ia32_cmpps: 8215 case X86::BI__builtin_ia32_cmpps256: 8216 case X86::BI__builtin_ia32_cmppd: 8217 case X86::BI__builtin_ia32_cmppd256: { 8218 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 8219 // If this one of the SSE immediates, we can use native IR. 8220 if (CC < 8) { 8221 FCmpInst::Predicate Pred; 8222 switch (CC) { 8223 case 0: Pred = FCmpInst::FCMP_OEQ; break; 8224 case 1: Pred = FCmpInst::FCMP_OLT; break; 8225 case 2: Pred = FCmpInst::FCMP_OLE; break; 8226 case 3: Pred = FCmpInst::FCMP_UNO; break; 8227 case 4: Pred = FCmpInst::FCMP_UNE; break; 8228 case 5: Pred = FCmpInst::FCMP_UGE; break; 8229 case 6: Pred = FCmpInst::FCMP_UGT; break; 8230 case 7: Pred = FCmpInst::FCMP_ORD; break; 8231 } 8232 return getVectorFCmpIR(Pred); 8233 } 8234 8235 // We can't handle 8-31 immediates with native IR, use the intrinsic. 8236 // Except for predicates that create constants. 8237 Intrinsic::ID ID; 8238 switch (BuiltinID) { 8239 default: llvm_unreachable("Unsupported intrinsic!"); 8240 case X86::BI__builtin_ia32_cmpps: 8241 ID = Intrinsic::x86_sse_cmp_ps; 8242 break; 8243 case X86::BI__builtin_ia32_cmpps256: 8244 // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector 8245 // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0... 8246 if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) { 8247 Value *Constant = (CC == 0xf || CC == 0x1f) ? 8248 llvm::Constant::getAllOnesValue(Builder.getInt32Ty()) : 8249 llvm::Constant::getNullValue(Builder.getInt32Ty()); 8250 Value *Vec = Builder.CreateVectorSplat( 8251 Ops[0]->getType()->getVectorNumElements(), Constant); 8252 return Builder.CreateBitCast(Vec, Ops[0]->getType()); 8253 } 8254 ID = Intrinsic::x86_avx_cmp_ps_256; 8255 break; 8256 case X86::BI__builtin_ia32_cmppd: 8257 ID = Intrinsic::x86_sse2_cmp_pd; 8258 break; 8259 case X86::BI__builtin_ia32_cmppd256: 8260 // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector 8261 // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0... 8262 if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) { 8263 Value *Constant = (CC == 0xf || CC == 0x1f) ? 8264 llvm::Constant::getAllOnesValue(Builder.getInt64Ty()) : 8265 llvm::Constant::getNullValue(Builder.getInt64Ty()); 8266 Value *Vec = Builder.CreateVectorSplat( 8267 Ops[0]->getType()->getVectorNumElements(), Constant); 8268 return Builder.CreateBitCast(Vec, Ops[0]->getType()); 8269 } 8270 ID = Intrinsic::x86_avx_cmp_pd_256; 8271 break; 8272 } 8273 8274 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); 8275 } 8276 8277 // SSE scalar comparison intrinsics 8278 case X86::BI__builtin_ia32_cmpeqss: 8279 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0); 8280 case X86::BI__builtin_ia32_cmpltss: 8281 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1); 8282 case X86::BI__builtin_ia32_cmpless: 8283 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2); 8284 case X86::BI__builtin_ia32_cmpunordss: 8285 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3); 8286 case X86::BI__builtin_ia32_cmpneqss: 8287 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4); 8288 case X86::BI__builtin_ia32_cmpnltss: 8289 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5); 8290 case X86::BI__builtin_ia32_cmpnless: 8291 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6); 8292 case X86::BI__builtin_ia32_cmpordss: 8293 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7); 8294 case X86::BI__builtin_ia32_cmpeqsd: 8295 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0); 8296 case X86::BI__builtin_ia32_cmpltsd: 8297 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1); 8298 case X86::BI__builtin_ia32_cmplesd: 8299 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2); 8300 case X86::BI__builtin_ia32_cmpunordsd: 8301 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3); 8302 case X86::BI__builtin_ia32_cmpneqsd: 8303 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4); 8304 case X86::BI__builtin_ia32_cmpnltsd: 8305 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5); 8306 case X86::BI__builtin_ia32_cmpnlesd: 8307 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6); 8308 case X86::BI__builtin_ia32_cmpordsd: 8309 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7); 8310 8311 case X86::BI__emul: 8312 case X86::BI__emulu: { 8313 llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64); 8314 bool isSigned = (BuiltinID == X86::BI__emul); 8315 Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned); 8316 Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned); 8317 return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned); 8318 } 8319 case X86::BI__mulh: 8320 case X86::BI__umulh: 8321 case X86::BI_mul128: 8322 case X86::BI_umul128: { 8323 llvm::Type *ResType = ConvertType(E->getType()); 8324 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); 8325 8326 bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128); 8327 Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned); 8328 Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned); 8329 8330 Value *MulResult, *HigherBits; 8331 if (IsSigned) { 8332 MulResult = Builder.CreateNSWMul(LHS, RHS); 8333 HigherBits = Builder.CreateAShr(MulResult, 64); 8334 } else { 8335 MulResult = Builder.CreateNUWMul(LHS, RHS); 8336 HigherBits = Builder.CreateLShr(MulResult, 64); 8337 } 8338 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned); 8339 8340 if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh) 8341 return HigherBits; 8342 8343 Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2)); 8344 Builder.CreateStore(HigherBits, HighBitsAddress); 8345 return Builder.CreateIntCast(MulResult, ResType, IsSigned); 8346 } 8347 8348 case X86::BI__faststorefence: { 8349 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, 8350 llvm::SyncScope::System); 8351 } 8352 case X86::BI_ReadWriteBarrier: 8353 case X86::BI_ReadBarrier: 8354 case X86::BI_WriteBarrier: { 8355 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, 8356 llvm::SyncScope::SingleThread); 8357 } 8358 case X86::BI_BitScanForward: 8359 case X86::BI_BitScanForward64: 8360 return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E); 8361 case X86::BI_BitScanReverse: 8362 case X86::BI_BitScanReverse64: 8363 return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E); 8364 8365 case X86::BI_InterlockedAnd64: 8366 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E); 8367 case X86::BI_InterlockedExchange64: 8368 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E); 8369 case X86::BI_InterlockedExchangeAdd64: 8370 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E); 8371 case X86::BI_InterlockedExchangeSub64: 8372 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E); 8373 case X86::BI_InterlockedOr64: 8374 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E); 8375 case X86::BI_InterlockedXor64: 8376 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E); 8377 case X86::BI_InterlockedDecrement64: 8378 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E); 8379 case X86::BI_InterlockedIncrement64: 8380 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E); 8381 8382 case X86::BI_AddressOfReturnAddress: { 8383 Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress); 8384 return Builder.CreateCall(F); 8385 } 8386 case X86::BI__stosb: { 8387 // We treat __stosb as a volatile memset - it may not generate "rep stosb" 8388 // instruction, but it will create a memset that won't be optimized away. 8389 return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], 1, true); 8390 } 8391 case X86::BI__ud2: 8392 // llvm.trap makes a ud2a instruction on x86. 8393 return EmitTrapCall(Intrinsic::trap); 8394 case X86::BI__int2c: { 8395 // This syscall signals a driver assertion failure in x86 NT kernels. 8396 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false); 8397 llvm::InlineAsm *IA = 8398 llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*SideEffects=*/true); 8399 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get( 8400 getLLVMContext(), llvm::AttributeList::FunctionIndex, 8401 llvm::Attribute::NoReturn); 8402 CallSite CS = Builder.CreateCall(IA); 8403 CS.setAttributes(NoReturnAttr); 8404 return CS.getInstruction(); 8405 } 8406 case X86::BI__readfsbyte: 8407 case X86::BI__readfsword: 8408 case X86::BI__readfsdword: 8409 case X86::BI__readfsqword: { 8410 llvm::Type *IntTy = ConvertType(E->getType()); 8411 Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)), 8412 llvm::PointerType::get(IntTy, 257)); 8413 LoadInst *Load = Builder.CreateAlignedLoad( 8414 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType())); 8415 Load->setVolatile(true); 8416 return Load; 8417 } 8418 case X86::BI__readgsbyte: 8419 case X86::BI__readgsword: 8420 case X86::BI__readgsdword: 8421 case X86::BI__readgsqword: { 8422 llvm::Type *IntTy = ConvertType(E->getType()); 8423 Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)), 8424 llvm::PointerType::get(IntTy, 256)); 8425 LoadInst *Load = Builder.CreateAlignedLoad( 8426 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType())); 8427 Load->setVolatile(true); 8428 return Load; 8429 } 8430 } 8431 } 8432 8433 8434 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, 8435 const CallExpr *E) { 8436 SmallVector<Value*, 4> Ops; 8437 8438 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) 8439 Ops.push_back(EmitScalarExpr(E->getArg(i))); 8440 8441 Intrinsic::ID ID = Intrinsic::not_intrinsic; 8442 8443 switch (BuiltinID) { 8444 default: return nullptr; 8445 8446 // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we 8447 // call __builtin_readcyclecounter. 8448 case PPC::BI__builtin_ppc_get_timebase: 8449 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter)); 8450 8451 // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr 8452 case PPC::BI__builtin_altivec_lvx: 8453 case PPC::BI__builtin_altivec_lvxl: 8454 case PPC::BI__builtin_altivec_lvebx: 8455 case PPC::BI__builtin_altivec_lvehx: 8456 case PPC::BI__builtin_altivec_lvewx: 8457 case PPC::BI__builtin_altivec_lvsl: 8458 case PPC::BI__builtin_altivec_lvsr: 8459 case PPC::BI__builtin_vsx_lxvd2x: 8460 case PPC::BI__builtin_vsx_lxvw4x: 8461 case PPC::BI__builtin_vsx_lxvd2x_be: 8462 case PPC::BI__builtin_vsx_lxvw4x_be: 8463 case PPC::BI__builtin_vsx_lxvl: 8464 case PPC::BI__builtin_vsx_lxvll: 8465 { 8466 if(BuiltinID == PPC::BI__builtin_vsx_lxvl || 8467 BuiltinID == PPC::BI__builtin_vsx_lxvll){ 8468 Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy); 8469 }else { 8470 Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy); 8471 Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]); 8472 Ops.pop_back(); 8473 } 8474 8475 switch (BuiltinID) { 8476 default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!"); 8477 case PPC::BI__builtin_altivec_lvx: 8478 ID = Intrinsic::ppc_altivec_lvx; 8479 break; 8480 case PPC::BI__builtin_altivec_lvxl: 8481 ID = Intrinsic::ppc_altivec_lvxl; 8482 break; 8483 case PPC::BI__builtin_altivec_lvebx: 8484 ID = Intrinsic::ppc_altivec_lvebx; 8485 break; 8486 case PPC::BI__builtin_altivec_lvehx: 8487 ID = Intrinsic::ppc_altivec_lvehx; 8488 break; 8489 case PPC::BI__builtin_altivec_lvewx: 8490 ID = Intrinsic::ppc_altivec_lvewx; 8491 break; 8492 case PPC::BI__builtin_altivec_lvsl: 8493 ID = Intrinsic::ppc_altivec_lvsl; 8494 break; 8495 case PPC::BI__builtin_altivec_lvsr: 8496 ID = Intrinsic::ppc_altivec_lvsr; 8497 break; 8498 case PPC::BI__builtin_vsx_lxvd2x: 8499 ID = Intrinsic::ppc_vsx_lxvd2x; 8500 break; 8501 case PPC::BI__builtin_vsx_lxvw4x: 8502 ID = Intrinsic::ppc_vsx_lxvw4x; 8503 break; 8504 case PPC::BI__builtin_vsx_lxvd2x_be: 8505 ID = Intrinsic::ppc_vsx_lxvd2x_be; 8506 break; 8507 case PPC::BI__builtin_vsx_lxvw4x_be: 8508 ID = Intrinsic::ppc_vsx_lxvw4x_be; 8509 break; 8510 case PPC::BI__builtin_vsx_lxvl: 8511 ID = Intrinsic::ppc_vsx_lxvl; 8512 break; 8513 case PPC::BI__builtin_vsx_lxvll: 8514 ID = Intrinsic::ppc_vsx_lxvll; 8515 break; 8516 } 8517 llvm::Function *F = CGM.getIntrinsic(ID); 8518 return Builder.CreateCall(F, Ops, ""); 8519 } 8520 8521 // vec_st, vec_xst_be 8522 case PPC::BI__builtin_altivec_stvx: 8523 case PPC::BI__builtin_altivec_stvxl: 8524 case PPC::BI__builtin_altivec_stvebx: 8525 case PPC::BI__builtin_altivec_stvehx: 8526 case PPC::BI__builtin_altivec_stvewx: 8527 case PPC::BI__builtin_vsx_stxvd2x: 8528 case PPC::BI__builtin_vsx_stxvw4x: 8529 case PPC::BI__builtin_vsx_stxvd2x_be: 8530 case PPC::BI__builtin_vsx_stxvw4x_be: 8531 case PPC::BI__builtin_vsx_stxvl: 8532 case PPC::BI__builtin_vsx_stxvll: 8533 { 8534 if(BuiltinID == PPC::BI__builtin_vsx_stxvl || 8535 BuiltinID == PPC::BI__builtin_vsx_stxvll ){ 8536 Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy); 8537 }else { 8538 Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy); 8539 Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]); 8540 Ops.pop_back(); 8541 } 8542 8543 switch (BuiltinID) { 8544 default: llvm_unreachable("Unsupported st intrinsic!"); 8545 case PPC::BI__builtin_altivec_stvx: 8546 ID = Intrinsic::ppc_altivec_stvx; 8547 break; 8548 case PPC::BI__builtin_altivec_stvxl: 8549 ID = Intrinsic::ppc_altivec_stvxl; 8550 break; 8551 case PPC::BI__builtin_altivec_stvebx: 8552 ID = Intrinsic::ppc_altivec_stvebx; 8553 break; 8554 case PPC::BI__builtin_altivec_stvehx: 8555 ID = Intrinsic::ppc_altivec_stvehx; 8556 break; 8557 case PPC::BI__builtin_altivec_stvewx: 8558 ID = Intrinsic::ppc_altivec_stvewx; 8559 break; 8560 case PPC::BI__builtin_vsx_stxvd2x: 8561 ID = Intrinsic::ppc_vsx_stxvd2x; 8562 break; 8563 case PPC::BI__builtin_vsx_stxvw4x: 8564 ID = Intrinsic::ppc_vsx_stxvw4x; 8565 break; 8566 case PPC::BI__builtin_vsx_stxvd2x_be: 8567 ID = Intrinsic::ppc_vsx_stxvd2x_be; 8568 break; 8569 case PPC::BI__builtin_vsx_stxvw4x_be: 8570 ID = Intrinsic::ppc_vsx_stxvw4x_be; 8571 break; 8572 case PPC::BI__builtin_vsx_stxvl: 8573 ID = Intrinsic::ppc_vsx_stxvl; 8574 break; 8575 case PPC::BI__builtin_vsx_stxvll: 8576 ID = Intrinsic::ppc_vsx_stxvll; 8577 break; 8578 } 8579 llvm::Function *F = CGM.getIntrinsic(ID); 8580 return Builder.CreateCall(F, Ops, ""); 8581 } 8582 // Square root 8583 case PPC::BI__builtin_vsx_xvsqrtsp: 8584 case PPC::BI__builtin_vsx_xvsqrtdp: { 8585 llvm::Type *ResultType = ConvertType(E->getType()); 8586 Value *X = EmitScalarExpr(E->getArg(0)); 8587 ID = Intrinsic::sqrt; 8588 llvm::Function *F = CGM.getIntrinsic(ID, ResultType); 8589 return Builder.CreateCall(F, X); 8590 } 8591 // Count leading zeros 8592 case PPC::BI__builtin_altivec_vclzb: 8593 case PPC::BI__builtin_altivec_vclzh: 8594 case PPC::BI__builtin_altivec_vclzw: 8595 case PPC::BI__builtin_altivec_vclzd: { 8596 llvm::Type *ResultType = ConvertType(E->getType()); 8597 Value *X = EmitScalarExpr(E->getArg(0)); 8598 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 8599 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType); 8600 return Builder.CreateCall(F, {X, Undef}); 8601 } 8602 case PPC::BI__builtin_altivec_vctzb: 8603 case PPC::BI__builtin_altivec_vctzh: 8604 case PPC::BI__builtin_altivec_vctzw: 8605 case PPC::BI__builtin_altivec_vctzd: { 8606 llvm::Type *ResultType = ConvertType(E->getType()); 8607 Value *X = EmitScalarExpr(E->getArg(0)); 8608 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 8609 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType); 8610 return Builder.CreateCall(F, {X, Undef}); 8611 } 8612 case PPC::BI__builtin_altivec_vpopcntb: 8613 case PPC::BI__builtin_altivec_vpopcnth: 8614 case PPC::BI__builtin_altivec_vpopcntw: 8615 case PPC::BI__builtin_altivec_vpopcntd: { 8616 llvm::Type *ResultType = ConvertType(E->getType()); 8617 Value *X = EmitScalarExpr(E->getArg(0)); 8618 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); 8619 return Builder.CreateCall(F, X); 8620 } 8621 // Copy sign 8622 case PPC::BI__builtin_vsx_xvcpsgnsp: 8623 case PPC::BI__builtin_vsx_xvcpsgndp: { 8624 llvm::Type *ResultType = ConvertType(E->getType()); 8625 Value *X = EmitScalarExpr(E->getArg(0)); 8626 Value *Y = EmitScalarExpr(E->getArg(1)); 8627 ID = Intrinsic::copysign; 8628 llvm::Function *F = CGM.getIntrinsic(ID, ResultType); 8629 return Builder.CreateCall(F, {X, Y}); 8630 } 8631 // Rounding/truncation 8632 case PPC::BI__builtin_vsx_xvrspip: 8633 case PPC::BI__builtin_vsx_xvrdpip: 8634 case PPC::BI__builtin_vsx_xvrdpim: 8635 case PPC::BI__builtin_vsx_xvrspim: 8636 case PPC::BI__builtin_vsx_xvrdpi: 8637 case PPC::BI__builtin_vsx_xvrspi: 8638 case PPC::BI__builtin_vsx_xvrdpic: 8639 case PPC::BI__builtin_vsx_xvrspic: 8640 case PPC::BI__builtin_vsx_xvrdpiz: 8641 case PPC::BI__builtin_vsx_xvrspiz: { 8642 llvm::Type *ResultType = ConvertType(E->getType()); 8643 Value *X = EmitScalarExpr(E->getArg(0)); 8644 if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim || 8645 BuiltinID == PPC::BI__builtin_vsx_xvrspim) 8646 ID = Intrinsic::floor; 8647 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi || 8648 BuiltinID == PPC::BI__builtin_vsx_xvrspi) 8649 ID = Intrinsic::round; 8650 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic || 8651 BuiltinID == PPC::BI__builtin_vsx_xvrspic) 8652 ID = Intrinsic::nearbyint; 8653 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip || 8654 BuiltinID == PPC::BI__builtin_vsx_xvrspip) 8655 ID = Intrinsic::ceil; 8656 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz || 8657 BuiltinID == PPC::BI__builtin_vsx_xvrspiz) 8658 ID = Intrinsic::trunc; 8659 llvm::Function *F = CGM.getIntrinsic(ID, ResultType); 8660 return Builder.CreateCall(F, X); 8661 } 8662 8663 // Absolute value 8664 case PPC::BI__builtin_vsx_xvabsdp: 8665 case PPC::BI__builtin_vsx_xvabssp: { 8666 llvm::Type *ResultType = ConvertType(E->getType()); 8667 Value *X = EmitScalarExpr(E->getArg(0)); 8668 llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); 8669 return Builder.CreateCall(F, X); 8670 } 8671 8672 // FMA variations 8673 case PPC::BI__builtin_vsx_xvmaddadp: 8674 case PPC::BI__builtin_vsx_xvmaddasp: 8675 case PPC::BI__builtin_vsx_xvnmaddadp: 8676 case PPC::BI__builtin_vsx_xvnmaddasp: 8677 case PPC::BI__builtin_vsx_xvmsubadp: 8678 case PPC::BI__builtin_vsx_xvmsubasp: 8679 case PPC::BI__builtin_vsx_xvnmsubadp: 8680 case PPC::BI__builtin_vsx_xvnmsubasp: { 8681 llvm::Type *ResultType = ConvertType(E->getType()); 8682 Value *X = EmitScalarExpr(E->getArg(0)); 8683 Value *Y = EmitScalarExpr(E->getArg(1)); 8684 Value *Z = EmitScalarExpr(E->getArg(2)); 8685 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); 8686 llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 8687 switch (BuiltinID) { 8688 case PPC::BI__builtin_vsx_xvmaddadp: 8689 case PPC::BI__builtin_vsx_xvmaddasp: 8690 return Builder.CreateCall(F, {X, Y, Z}); 8691 case PPC::BI__builtin_vsx_xvnmaddadp: 8692 case PPC::BI__builtin_vsx_xvnmaddasp: 8693 return Builder.CreateFSub(Zero, 8694 Builder.CreateCall(F, {X, Y, Z}), "sub"); 8695 case PPC::BI__builtin_vsx_xvmsubadp: 8696 case PPC::BI__builtin_vsx_xvmsubasp: 8697 return Builder.CreateCall(F, 8698 {X, Y, Builder.CreateFSub(Zero, Z, "sub")}); 8699 case PPC::BI__builtin_vsx_xvnmsubadp: 8700 case PPC::BI__builtin_vsx_xvnmsubasp: 8701 Value *FsubRes = 8702 Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")}); 8703 return Builder.CreateFSub(Zero, FsubRes, "sub"); 8704 } 8705 llvm_unreachable("Unknown FMA operation"); 8706 return nullptr; // Suppress no-return warning 8707 } 8708 8709 case PPC::BI__builtin_vsx_insertword: { 8710 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw); 8711 8712 // Third argument is a compile time constant int. It must be clamped to 8713 // to the range [0, 12]. 8714 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]); 8715 assert(ArgCI && 8716 "Third arg to xxinsertw intrinsic must be constant integer"); 8717 const int64_t MaxIndex = 12; 8718 int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex); 8719 8720 // The builtin semantics don't exactly match the xxinsertw instructions 8721 // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the 8722 // word from the first argument, and inserts it in the second argument. The 8723 // instruction extracts the word from its second input register and inserts 8724 // it into its first input register, so swap the first and second arguments. 8725 std::swap(Ops[0], Ops[1]); 8726 8727 // Need to cast the second argument from a vector of unsigned int to a 8728 // vector of long long. 8729 Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2)); 8730 8731 if (getTarget().isLittleEndian()) { 8732 // Create a shuffle mask of (1, 0) 8733 Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1), 8734 ConstantInt::get(Int32Ty, 0) 8735 }; 8736 Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); 8737 8738 // Reverse the double words in the vector we will extract from. 8739 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); 8740 Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ShuffleMask); 8741 8742 // Reverse the index. 8743 Index = MaxIndex - Index; 8744 } 8745 8746 // Intrinsic expects the first arg to be a vector of int. 8747 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4)); 8748 Ops[2] = ConstantInt::getSigned(Int32Ty, Index); 8749 return Builder.CreateCall(F, Ops); 8750 } 8751 8752 case PPC::BI__builtin_vsx_extractuword: { 8753 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw); 8754 8755 // Intrinsic expects the first argument to be a vector of doublewords. 8756 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); 8757 8758 // The second argument is a compile time constant int that needs to 8759 // be clamped to the range [0, 12]. 8760 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[1]); 8761 assert(ArgCI && 8762 "Second Arg to xxextractuw intrinsic must be a constant integer!"); 8763 const int64_t MaxIndex = 12; 8764 int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex); 8765 8766 if (getTarget().isLittleEndian()) { 8767 // Reverse the index. 8768 Index = MaxIndex - Index; 8769 Ops[1] = ConstantInt::getSigned(Int32Ty, Index); 8770 8771 // Emit the call, then reverse the double words of the results vector. 8772 Value *Call = Builder.CreateCall(F, Ops); 8773 8774 // Create a shuffle mask of (1, 0) 8775 Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1), 8776 ConstantInt::get(Int32Ty, 0) 8777 }; 8778 Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); 8779 8780 Value *ShuffleCall = Builder.CreateShuffleVector(Call, Call, ShuffleMask); 8781 return ShuffleCall; 8782 } else { 8783 Ops[1] = ConstantInt::getSigned(Int32Ty, Index); 8784 return Builder.CreateCall(F, Ops); 8785 } 8786 } 8787 8788 case PPC::BI__builtin_vsx_xxpermdi: { 8789 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]); 8790 assert(ArgCI && "Third arg must be constant integer!"); 8791 8792 unsigned Index = ArgCI->getZExtValue(); 8793 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); 8794 Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2)); 8795 8796 // Element zero comes from the first input vector and element one comes from 8797 // the second. The element indices within each vector are numbered in big 8798 // endian order so the shuffle mask must be adjusted for this on little 8799 // endian platforms (i.e. index is complemented and source vector reversed). 8800 unsigned ElemIdx0; 8801 unsigned ElemIdx1; 8802 if (getTarget().isLittleEndian()) { 8803 ElemIdx0 = (~Index & 1) + 2; 8804 ElemIdx1 = (~Index & 2) >> 1; 8805 } else { // BigEndian 8806 ElemIdx0 = (Index & 2) >> 1; 8807 ElemIdx1 = 2 + (Index & 1); 8808 } 8809 8810 Constant *ShuffleElts[2] = {ConstantInt::get(Int32Ty, ElemIdx0), 8811 ConstantInt::get(Int32Ty, ElemIdx1)}; 8812 Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); 8813 8814 Value *ShuffleCall = 8815 Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask); 8816 QualType BIRetType = E->getType(); 8817 auto RetTy = ConvertType(BIRetType); 8818 return Builder.CreateBitCast(ShuffleCall, RetTy); 8819 } 8820 8821 case PPC::BI__builtin_vsx_xxsldwi: { 8822 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]); 8823 assert(ArgCI && "Third argument must be a compile time constant"); 8824 unsigned Index = ArgCI->getZExtValue() & 0x3; 8825 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4)); 8826 Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int32Ty, 4)); 8827 8828 // Create a shuffle mask 8829 unsigned ElemIdx0; 8830 unsigned ElemIdx1; 8831 unsigned ElemIdx2; 8832 unsigned ElemIdx3; 8833 if (getTarget().isLittleEndian()) { 8834 // Little endian element N comes from element 8+N-Index of the 8835 // concatenated wide vector (of course, using modulo arithmetic on 8836 // the total number of elements). 8837 ElemIdx0 = (8 - Index) % 8; 8838 ElemIdx1 = (9 - Index) % 8; 8839 ElemIdx2 = (10 - Index) % 8; 8840 ElemIdx3 = (11 - Index) % 8; 8841 } else { 8842 // Big endian ElemIdx<N> = Index + N 8843 ElemIdx0 = Index; 8844 ElemIdx1 = Index + 1; 8845 ElemIdx2 = Index + 2; 8846 ElemIdx3 = Index + 3; 8847 } 8848 8849 Constant *ShuffleElts[4] = {ConstantInt::get(Int32Ty, ElemIdx0), 8850 ConstantInt::get(Int32Ty, ElemIdx1), 8851 ConstantInt::get(Int32Ty, ElemIdx2), 8852 ConstantInt::get(Int32Ty, ElemIdx3)}; 8853 8854 Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); 8855 Value *ShuffleCall = 8856 Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask); 8857 QualType BIRetType = E->getType(); 8858 auto RetTy = ConvertType(BIRetType); 8859 return Builder.CreateBitCast(ShuffleCall, RetTy); 8860 } 8861 } 8862 } 8863 8864 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, 8865 const CallExpr *E) { 8866 switch (BuiltinID) { 8867 case AMDGPU::BI__builtin_amdgcn_div_scale: 8868 case AMDGPU::BI__builtin_amdgcn_div_scalef: { 8869 // Translate from the intrinsics's struct return to the builtin's out 8870 // argument. 8871 8872 Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3)); 8873 8874 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 8875 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 8876 llvm::Value *Z = EmitScalarExpr(E->getArg(2)); 8877 8878 llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale, 8879 X->getType()); 8880 8881 llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z}); 8882 8883 llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0); 8884 llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1); 8885 8886 llvm::Type *RealFlagType 8887 = FlagOutPtr.getPointer()->getType()->getPointerElementType(); 8888 8889 llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType); 8890 Builder.CreateStore(FlagExt, FlagOutPtr); 8891 return Result; 8892 } 8893 case AMDGPU::BI__builtin_amdgcn_div_fmas: 8894 case AMDGPU::BI__builtin_amdgcn_div_fmasf: { 8895 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); 8896 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); 8897 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); 8898 llvm::Value *Src3 = EmitScalarExpr(E->getArg(3)); 8899 8900 llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas, 8901 Src0->getType()); 8902 llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3); 8903 return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool}); 8904 } 8905 8906 case AMDGPU::BI__builtin_amdgcn_ds_swizzle: 8907 return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle); 8908 case AMDGPU::BI__builtin_amdgcn_mov_dpp: { 8909 llvm::SmallVector<llvm::Value *, 5> Args; 8910 for (unsigned I = 0; I != 5; ++I) 8911 Args.push_back(EmitScalarExpr(E->getArg(I))); 8912 Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_mov_dpp, 8913 Args[0]->getType()); 8914 return Builder.CreateCall(F, Args); 8915 } 8916 case AMDGPU::BI__builtin_amdgcn_div_fixup: 8917 case AMDGPU::BI__builtin_amdgcn_div_fixupf: 8918 case AMDGPU::BI__builtin_amdgcn_div_fixuph: 8919 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup); 8920 case AMDGPU::BI__builtin_amdgcn_trig_preop: 8921 case AMDGPU::BI__builtin_amdgcn_trig_preopf: 8922 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop); 8923 case AMDGPU::BI__builtin_amdgcn_rcp: 8924 case AMDGPU::BI__builtin_amdgcn_rcpf: 8925 case AMDGPU::BI__builtin_amdgcn_rcph: 8926 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp); 8927 case AMDGPU::BI__builtin_amdgcn_rsq: 8928 case AMDGPU::BI__builtin_amdgcn_rsqf: 8929 case AMDGPU::BI__builtin_amdgcn_rsqh: 8930 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq); 8931 case AMDGPU::BI__builtin_amdgcn_rsq_clamp: 8932 case AMDGPU::BI__builtin_amdgcn_rsq_clampf: 8933 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp); 8934 case AMDGPU::BI__builtin_amdgcn_sinf: 8935 case AMDGPU::BI__builtin_amdgcn_sinh: 8936 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin); 8937 case AMDGPU::BI__builtin_amdgcn_cosf: 8938 case AMDGPU::BI__builtin_amdgcn_cosh: 8939 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos); 8940 case AMDGPU::BI__builtin_amdgcn_log_clampf: 8941 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp); 8942 case AMDGPU::BI__builtin_amdgcn_ldexp: 8943 case AMDGPU::BI__builtin_amdgcn_ldexpf: 8944 case AMDGPU::BI__builtin_amdgcn_ldexph: 8945 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp); 8946 case AMDGPU::BI__builtin_amdgcn_frexp_mant: 8947 case AMDGPU::BI__builtin_amdgcn_frexp_mantf: 8948 case AMDGPU::BI__builtin_amdgcn_frexp_manth: 8949 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant); 8950 case AMDGPU::BI__builtin_amdgcn_frexp_exp: 8951 case AMDGPU::BI__builtin_amdgcn_frexp_expf: { 8952 Value *Src0 = EmitScalarExpr(E->getArg(0)); 8953 Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp, 8954 { Builder.getInt32Ty(), Src0->getType() }); 8955 return Builder.CreateCall(F, Src0); 8956 } 8957 case AMDGPU::BI__builtin_amdgcn_frexp_exph: { 8958 Value *Src0 = EmitScalarExpr(E->getArg(0)); 8959 Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp, 8960 { Builder.getInt16Ty(), Src0->getType() }); 8961 return Builder.CreateCall(F, Src0); 8962 } 8963 case AMDGPU::BI__builtin_amdgcn_fract: 8964 case AMDGPU::BI__builtin_amdgcn_fractf: 8965 case AMDGPU::BI__builtin_amdgcn_fracth: 8966 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract); 8967 case AMDGPU::BI__builtin_amdgcn_lerp: 8968 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp); 8969 case AMDGPU::BI__builtin_amdgcn_uicmp: 8970 case AMDGPU::BI__builtin_amdgcn_uicmpl: 8971 case AMDGPU::BI__builtin_amdgcn_sicmp: 8972 case AMDGPU::BI__builtin_amdgcn_sicmpl: 8973 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_icmp); 8974 case AMDGPU::BI__builtin_amdgcn_fcmp: 8975 case AMDGPU::BI__builtin_amdgcn_fcmpf: 8976 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fcmp); 8977 case AMDGPU::BI__builtin_amdgcn_class: 8978 case AMDGPU::BI__builtin_amdgcn_classf: 8979 case AMDGPU::BI__builtin_amdgcn_classh: 8980 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class); 8981 case AMDGPU::BI__builtin_amdgcn_fmed3f: 8982 case AMDGPU::BI__builtin_amdgcn_fmed3h: 8983 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3); 8984 case AMDGPU::BI__builtin_amdgcn_read_exec: { 8985 CallInst *CI = cast<CallInst>( 8986 EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec")); 8987 CI->setConvergent(); 8988 return CI; 8989 } 8990 8991 // amdgcn workitem 8992 case AMDGPU::BI__builtin_amdgcn_workitem_id_x: 8993 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024); 8994 case AMDGPU::BI__builtin_amdgcn_workitem_id_y: 8995 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024); 8996 case AMDGPU::BI__builtin_amdgcn_workitem_id_z: 8997 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024); 8998 8999 // r600 intrinsics 9000 case AMDGPU::BI__builtin_r600_recipsqrt_ieee: 9001 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef: 9002 return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee); 9003 case AMDGPU::BI__builtin_r600_read_tidig_x: 9004 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024); 9005 case AMDGPU::BI__builtin_r600_read_tidig_y: 9006 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024); 9007 case AMDGPU::BI__builtin_r600_read_tidig_z: 9008 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024); 9009 default: 9010 return nullptr; 9011 } 9012 } 9013 9014 /// Handle a SystemZ function in which the final argument is a pointer 9015 /// to an int that receives the post-instruction CC value. At the LLVM level 9016 /// this is represented as a function that returns a {result, cc} pair. 9017 static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF, 9018 unsigned IntrinsicID, 9019 const CallExpr *E) { 9020 unsigned NumArgs = E->getNumArgs() - 1; 9021 SmallVector<Value *, 8> Args(NumArgs); 9022 for (unsigned I = 0; I < NumArgs; ++I) 9023 Args[I] = CGF.EmitScalarExpr(E->getArg(I)); 9024 Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs)); 9025 Value *F = CGF.CGM.getIntrinsic(IntrinsicID); 9026 Value *Call = CGF.Builder.CreateCall(F, Args); 9027 Value *CC = CGF.Builder.CreateExtractValue(Call, 1); 9028 CGF.Builder.CreateStore(CC, CCPtr); 9029 return CGF.Builder.CreateExtractValue(Call, 0); 9030 } 9031 9032 Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, 9033 const CallExpr *E) { 9034 switch (BuiltinID) { 9035 case SystemZ::BI__builtin_tbegin: { 9036 Value *TDB = EmitScalarExpr(E->getArg(0)); 9037 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c); 9038 Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin); 9039 return Builder.CreateCall(F, {TDB, Control}); 9040 } 9041 case SystemZ::BI__builtin_tbegin_nofloat: { 9042 Value *TDB = EmitScalarExpr(E->getArg(0)); 9043 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c); 9044 Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat); 9045 return Builder.CreateCall(F, {TDB, Control}); 9046 } 9047 case SystemZ::BI__builtin_tbeginc: { 9048 Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy); 9049 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08); 9050 Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc); 9051 return Builder.CreateCall(F, {TDB, Control}); 9052 } 9053 case SystemZ::BI__builtin_tabort: { 9054 Value *Data = EmitScalarExpr(E->getArg(0)); 9055 Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort); 9056 return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort")); 9057 } 9058 case SystemZ::BI__builtin_non_tx_store: { 9059 Value *Address = EmitScalarExpr(E->getArg(0)); 9060 Value *Data = EmitScalarExpr(E->getArg(1)); 9061 Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg); 9062 return Builder.CreateCall(F, {Data, Address}); 9063 } 9064 9065 // Vector builtins. Note that most vector builtins are mapped automatically 9066 // to target-specific LLVM intrinsics. The ones handled specially here can 9067 // be represented via standard LLVM IR, which is preferable to enable common 9068 // LLVM optimizations. 9069 9070 case SystemZ::BI__builtin_s390_vpopctb: 9071 case SystemZ::BI__builtin_s390_vpopcth: 9072 case SystemZ::BI__builtin_s390_vpopctf: 9073 case SystemZ::BI__builtin_s390_vpopctg: { 9074 llvm::Type *ResultType = ConvertType(E->getType()); 9075 Value *X = EmitScalarExpr(E->getArg(0)); 9076 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); 9077 return Builder.CreateCall(F, X); 9078 } 9079 9080 case SystemZ::BI__builtin_s390_vclzb: 9081 case SystemZ::BI__builtin_s390_vclzh: 9082 case SystemZ::BI__builtin_s390_vclzf: 9083 case SystemZ::BI__builtin_s390_vclzg: { 9084 llvm::Type *ResultType = ConvertType(E->getType()); 9085 Value *X = EmitScalarExpr(E->getArg(0)); 9086 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 9087 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType); 9088 return Builder.CreateCall(F, {X, Undef}); 9089 } 9090 9091 case SystemZ::BI__builtin_s390_vctzb: 9092 case SystemZ::BI__builtin_s390_vctzh: 9093 case SystemZ::BI__builtin_s390_vctzf: 9094 case SystemZ::BI__builtin_s390_vctzg: { 9095 llvm::Type *ResultType = ConvertType(E->getType()); 9096 Value *X = EmitScalarExpr(E->getArg(0)); 9097 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 9098 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType); 9099 return Builder.CreateCall(F, {X, Undef}); 9100 } 9101 9102 case SystemZ::BI__builtin_s390_vfsqsb: 9103 case SystemZ::BI__builtin_s390_vfsqdb: { 9104 llvm::Type *ResultType = ConvertType(E->getType()); 9105 Value *X = EmitScalarExpr(E->getArg(0)); 9106 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType); 9107 return Builder.CreateCall(F, X); 9108 } 9109 case SystemZ::BI__builtin_s390_vfmasb: 9110 case SystemZ::BI__builtin_s390_vfmadb: { 9111 llvm::Type *ResultType = ConvertType(E->getType()); 9112 Value *X = EmitScalarExpr(E->getArg(0)); 9113 Value *Y = EmitScalarExpr(E->getArg(1)); 9114 Value *Z = EmitScalarExpr(E->getArg(2)); 9115 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 9116 return Builder.CreateCall(F, {X, Y, Z}); 9117 } 9118 case SystemZ::BI__builtin_s390_vfmssb: 9119 case SystemZ::BI__builtin_s390_vfmsdb: { 9120 llvm::Type *ResultType = ConvertType(E->getType()); 9121 Value *X = EmitScalarExpr(E->getArg(0)); 9122 Value *Y = EmitScalarExpr(E->getArg(1)); 9123 Value *Z = EmitScalarExpr(E->getArg(2)); 9124 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); 9125 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 9126 return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")}); 9127 } 9128 case SystemZ::BI__builtin_s390_vfnmasb: 9129 case SystemZ::BI__builtin_s390_vfnmadb: { 9130 llvm::Type *ResultType = ConvertType(E->getType()); 9131 Value *X = EmitScalarExpr(E->getArg(0)); 9132 Value *Y = EmitScalarExpr(E->getArg(1)); 9133 Value *Z = EmitScalarExpr(E->getArg(2)); 9134 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); 9135 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 9136 return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, Z}), "sub"); 9137 } 9138 case SystemZ::BI__builtin_s390_vfnmssb: 9139 case SystemZ::BI__builtin_s390_vfnmsdb: { 9140 llvm::Type *ResultType = ConvertType(E->getType()); 9141 Value *X = EmitScalarExpr(E->getArg(0)); 9142 Value *Y = EmitScalarExpr(E->getArg(1)); 9143 Value *Z = EmitScalarExpr(E->getArg(2)); 9144 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); 9145 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 9146 Value *NegZ = Builder.CreateFSub(Zero, Z, "sub"); 9147 return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, NegZ})); 9148 } 9149 case SystemZ::BI__builtin_s390_vflpsb: 9150 case SystemZ::BI__builtin_s390_vflpdb: { 9151 llvm::Type *ResultType = ConvertType(E->getType()); 9152 Value *X = EmitScalarExpr(E->getArg(0)); 9153 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); 9154 return Builder.CreateCall(F, X); 9155 } 9156 case SystemZ::BI__builtin_s390_vflnsb: 9157 case SystemZ::BI__builtin_s390_vflndb: { 9158 llvm::Type *ResultType = ConvertType(E->getType()); 9159 Value *X = EmitScalarExpr(E->getArg(0)); 9160 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); 9161 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); 9162 return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub"); 9163 } 9164 case SystemZ::BI__builtin_s390_vfisb: 9165 case SystemZ::BI__builtin_s390_vfidb: { 9166 llvm::Type *ResultType = ConvertType(E->getType()); 9167 Value *X = EmitScalarExpr(E->getArg(0)); 9168 // Constant-fold the M4 and M5 mask arguments. 9169 llvm::APSInt M4, M5; 9170 bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext()); 9171 bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext()); 9172 assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?"); 9173 (void)IsConstM4; (void)IsConstM5; 9174 // Check whether this instance can be represented via a LLVM standard 9175 // intrinsic. We only support some combinations of M4 and M5. 9176 Intrinsic::ID ID = Intrinsic::not_intrinsic; 9177 switch (M4.getZExtValue()) { 9178 default: break; 9179 case 0: // IEEE-inexact exception allowed 9180 switch (M5.getZExtValue()) { 9181 default: break; 9182 case 0: ID = Intrinsic::rint; break; 9183 } 9184 break; 9185 case 4: // IEEE-inexact exception suppressed 9186 switch (M5.getZExtValue()) { 9187 default: break; 9188 case 0: ID = Intrinsic::nearbyint; break; 9189 case 1: ID = Intrinsic::round; break; 9190 case 5: ID = Intrinsic::trunc; break; 9191 case 6: ID = Intrinsic::ceil; break; 9192 case 7: ID = Intrinsic::floor; break; 9193 } 9194 break; 9195 } 9196 if (ID != Intrinsic::not_intrinsic) { 9197 Function *F = CGM.getIntrinsic(ID, ResultType); 9198 return Builder.CreateCall(F, X); 9199 } 9200 switch (BuiltinID) { 9201 case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break; 9202 case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break; 9203 default: llvm_unreachable("Unknown BuiltinID"); 9204 } 9205 Function *F = CGM.getIntrinsic(ID); 9206 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4); 9207 Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5); 9208 return Builder.CreateCall(F, {X, M4Value, M5Value}); 9209 } 9210 case SystemZ::BI__builtin_s390_vfmaxsb: 9211 case SystemZ::BI__builtin_s390_vfmaxdb: { 9212 llvm::Type *ResultType = ConvertType(E->getType()); 9213 Value *X = EmitScalarExpr(E->getArg(0)); 9214 Value *Y = EmitScalarExpr(E->getArg(1)); 9215 // Constant-fold the M4 mask argument. 9216 llvm::APSInt M4; 9217 bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext()); 9218 assert(IsConstM4 && "Constant arg isn't actually constant?"); 9219 (void)IsConstM4; 9220 // Check whether this instance can be represented via a LLVM standard 9221 // intrinsic. We only support some values of M4. 9222 Intrinsic::ID ID = Intrinsic::not_intrinsic; 9223 switch (M4.getZExtValue()) { 9224 default: break; 9225 case 4: ID = Intrinsic::maxnum; break; 9226 } 9227 if (ID != Intrinsic::not_intrinsic) { 9228 Function *F = CGM.getIntrinsic(ID, ResultType); 9229 return Builder.CreateCall(F, {X, Y}); 9230 } 9231 switch (BuiltinID) { 9232 case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break; 9233 case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break; 9234 default: llvm_unreachable("Unknown BuiltinID"); 9235 } 9236 Function *F = CGM.getIntrinsic(ID); 9237 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4); 9238 return Builder.CreateCall(F, {X, Y, M4Value}); 9239 } 9240 case SystemZ::BI__builtin_s390_vfminsb: 9241 case SystemZ::BI__builtin_s390_vfmindb: { 9242 llvm::Type *ResultType = ConvertType(E->getType()); 9243 Value *X = EmitScalarExpr(E->getArg(0)); 9244 Value *Y = EmitScalarExpr(E->getArg(1)); 9245 // Constant-fold the M4 mask argument. 9246 llvm::APSInt M4; 9247 bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext()); 9248 assert(IsConstM4 && "Constant arg isn't actually constant?"); 9249 (void)IsConstM4; 9250 // Check whether this instance can be represented via a LLVM standard 9251 // intrinsic. We only support some values of M4. 9252 Intrinsic::ID ID = Intrinsic::not_intrinsic; 9253 switch (M4.getZExtValue()) { 9254 default: break; 9255 case 4: ID = Intrinsic::minnum; break; 9256 } 9257 if (ID != Intrinsic::not_intrinsic) { 9258 Function *F = CGM.getIntrinsic(ID, ResultType); 9259 return Builder.CreateCall(F, {X, Y}); 9260 } 9261 switch (BuiltinID) { 9262 case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break; 9263 case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break; 9264 default: llvm_unreachable("Unknown BuiltinID"); 9265 } 9266 Function *F = CGM.getIntrinsic(ID); 9267 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4); 9268 return Builder.CreateCall(F, {X, Y, M4Value}); 9269 } 9270 9271 // Vector intrisincs that output the post-instruction CC value. 9272 9273 #define INTRINSIC_WITH_CC(NAME) \ 9274 case SystemZ::BI__builtin_##NAME: \ 9275 return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E) 9276 9277 INTRINSIC_WITH_CC(s390_vpkshs); 9278 INTRINSIC_WITH_CC(s390_vpksfs); 9279 INTRINSIC_WITH_CC(s390_vpksgs); 9280 9281 INTRINSIC_WITH_CC(s390_vpklshs); 9282 INTRINSIC_WITH_CC(s390_vpklsfs); 9283 INTRINSIC_WITH_CC(s390_vpklsgs); 9284 9285 INTRINSIC_WITH_CC(s390_vceqbs); 9286 INTRINSIC_WITH_CC(s390_vceqhs); 9287 INTRINSIC_WITH_CC(s390_vceqfs); 9288 INTRINSIC_WITH_CC(s390_vceqgs); 9289 9290 INTRINSIC_WITH_CC(s390_vchbs); 9291 INTRINSIC_WITH_CC(s390_vchhs); 9292 INTRINSIC_WITH_CC(s390_vchfs); 9293 INTRINSIC_WITH_CC(s390_vchgs); 9294 9295 INTRINSIC_WITH_CC(s390_vchlbs); 9296 INTRINSIC_WITH_CC(s390_vchlhs); 9297 INTRINSIC_WITH_CC(s390_vchlfs); 9298 INTRINSIC_WITH_CC(s390_vchlgs); 9299 9300 INTRINSIC_WITH_CC(s390_vfaebs); 9301 INTRINSIC_WITH_CC(s390_vfaehs); 9302 INTRINSIC_WITH_CC(s390_vfaefs); 9303 9304 INTRINSIC_WITH_CC(s390_vfaezbs); 9305 INTRINSIC_WITH_CC(s390_vfaezhs); 9306 INTRINSIC_WITH_CC(s390_vfaezfs); 9307 9308 INTRINSIC_WITH_CC(s390_vfeebs); 9309 INTRINSIC_WITH_CC(s390_vfeehs); 9310 INTRINSIC_WITH_CC(s390_vfeefs); 9311 9312 INTRINSIC_WITH_CC(s390_vfeezbs); 9313 INTRINSIC_WITH_CC(s390_vfeezhs); 9314 INTRINSIC_WITH_CC(s390_vfeezfs); 9315 9316 INTRINSIC_WITH_CC(s390_vfenebs); 9317 INTRINSIC_WITH_CC(s390_vfenehs); 9318 INTRINSIC_WITH_CC(s390_vfenefs); 9319 9320 INTRINSIC_WITH_CC(s390_vfenezbs); 9321 INTRINSIC_WITH_CC(s390_vfenezhs); 9322 INTRINSIC_WITH_CC(s390_vfenezfs); 9323 9324 INTRINSIC_WITH_CC(s390_vistrbs); 9325 INTRINSIC_WITH_CC(s390_vistrhs); 9326 INTRINSIC_WITH_CC(s390_vistrfs); 9327 9328 INTRINSIC_WITH_CC(s390_vstrcbs); 9329 INTRINSIC_WITH_CC(s390_vstrchs); 9330 INTRINSIC_WITH_CC(s390_vstrcfs); 9331 9332 INTRINSIC_WITH_CC(s390_vstrczbs); 9333 INTRINSIC_WITH_CC(s390_vstrczhs); 9334 INTRINSIC_WITH_CC(s390_vstrczfs); 9335 9336 INTRINSIC_WITH_CC(s390_vfcesbs); 9337 INTRINSIC_WITH_CC(s390_vfcedbs); 9338 INTRINSIC_WITH_CC(s390_vfchsbs); 9339 INTRINSIC_WITH_CC(s390_vfchdbs); 9340 INTRINSIC_WITH_CC(s390_vfchesbs); 9341 INTRINSIC_WITH_CC(s390_vfchedbs); 9342 9343 INTRINSIC_WITH_CC(s390_vftcisb); 9344 INTRINSIC_WITH_CC(s390_vftcidb); 9345 9346 #undef INTRINSIC_WITH_CC 9347 9348 default: 9349 return nullptr; 9350 } 9351 } 9352 9353 Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, 9354 const CallExpr *E) { 9355 auto MakeLdg = [&](unsigned IntrinsicID) { 9356 Value *Ptr = EmitScalarExpr(E->getArg(0)); 9357 clang::CharUnits Align = 9358 getNaturalPointeeTypeAlignment(E->getArg(0)->getType()); 9359 return Builder.CreateCall( 9360 CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(), 9361 Ptr->getType()}), 9362 {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())}); 9363 }; 9364 auto MakeScopedAtomic = [&](unsigned IntrinsicID) { 9365 Value *Ptr = EmitScalarExpr(E->getArg(0)); 9366 return Builder.CreateCall( 9367 CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(), 9368 Ptr->getType()}), 9369 {Ptr, EmitScalarExpr(E->getArg(1))}); 9370 }; 9371 switch (BuiltinID) { 9372 case NVPTX::BI__nvvm_atom_add_gen_i: 9373 case NVPTX::BI__nvvm_atom_add_gen_l: 9374 case NVPTX::BI__nvvm_atom_add_gen_ll: 9375 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E); 9376 9377 case NVPTX::BI__nvvm_atom_sub_gen_i: 9378 case NVPTX::BI__nvvm_atom_sub_gen_l: 9379 case NVPTX::BI__nvvm_atom_sub_gen_ll: 9380 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E); 9381 9382 case NVPTX::BI__nvvm_atom_and_gen_i: 9383 case NVPTX::BI__nvvm_atom_and_gen_l: 9384 case NVPTX::BI__nvvm_atom_and_gen_ll: 9385 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E); 9386 9387 case NVPTX::BI__nvvm_atom_or_gen_i: 9388 case NVPTX::BI__nvvm_atom_or_gen_l: 9389 case NVPTX::BI__nvvm_atom_or_gen_ll: 9390 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E); 9391 9392 case NVPTX::BI__nvvm_atom_xor_gen_i: 9393 case NVPTX::BI__nvvm_atom_xor_gen_l: 9394 case NVPTX::BI__nvvm_atom_xor_gen_ll: 9395 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E); 9396 9397 case NVPTX::BI__nvvm_atom_xchg_gen_i: 9398 case NVPTX::BI__nvvm_atom_xchg_gen_l: 9399 case NVPTX::BI__nvvm_atom_xchg_gen_ll: 9400 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E); 9401 9402 case NVPTX::BI__nvvm_atom_max_gen_i: 9403 case NVPTX::BI__nvvm_atom_max_gen_l: 9404 case NVPTX::BI__nvvm_atom_max_gen_ll: 9405 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E); 9406 9407 case NVPTX::BI__nvvm_atom_max_gen_ui: 9408 case NVPTX::BI__nvvm_atom_max_gen_ul: 9409 case NVPTX::BI__nvvm_atom_max_gen_ull: 9410 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E); 9411 9412 case NVPTX::BI__nvvm_atom_min_gen_i: 9413 case NVPTX::BI__nvvm_atom_min_gen_l: 9414 case NVPTX::BI__nvvm_atom_min_gen_ll: 9415 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E); 9416 9417 case NVPTX::BI__nvvm_atom_min_gen_ui: 9418 case NVPTX::BI__nvvm_atom_min_gen_ul: 9419 case NVPTX::BI__nvvm_atom_min_gen_ull: 9420 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E); 9421 9422 case NVPTX::BI__nvvm_atom_cas_gen_i: 9423 case NVPTX::BI__nvvm_atom_cas_gen_l: 9424 case NVPTX::BI__nvvm_atom_cas_gen_ll: 9425 // __nvvm_atom_cas_gen_* should return the old value rather than the 9426 // success flag. 9427 return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false); 9428 9429 case NVPTX::BI__nvvm_atom_add_gen_f: { 9430 Value *Ptr = EmitScalarExpr(E->getArg(0)); 9431 Value *Val = EmitScalarExpr(E->getArg(1)); 9432 // atomicrmw only deals with integer arguments so we need to use 9433 // LLVM's nvvm_atomic_load_add_f32 intrinsic for that. 9434 Value *FnALAF32 = 9435 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType()); 9436 return Builder.CreateCall(FnALAF32, {Ptr, Val}); 9437 } 9438 9439 case NVPTX::BI__nvvm_atom_inc_gen_ui: { 9440 Value *Ptr = EmitScalarExpr(E->getArg(0)); 9441 Value *Val = EmitScalarExpr(E->getArg(1)); 9442 Value *FnALI32 = 9443 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType()); 9444 return Builder.CreateCall(FnALI32, {Ptr, Val}); 9445 } 9446 9447 case NVPTX::BI__nvvm_atom_dec_gen_ui: { 9448 Value *Ptr = EmitScalarExpr(E->getArg(0)); 9449 Value *Val = EmitScalarExpr(E->getArg(1)); 9450 Value *FnALD32 = 9451 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType()); 9452 return Builder.CreateCall(FnALD32, {Ptr, Val}); 9453 } 9454 9455 case NVPTX::BI__nvvm_ldg_c: 9456 case NVPTX::BI__nvvm_ldg_c2: 9457 case NVPTX::BI__nvvm_ldg_c4: 9458 case NVPTX::BI__nvvm_ldg_s: 9459 case NVPTX::BI__nvvm_ldg_s2: 9460 case NVPTX::BI__nvvm_ldg_s4: 9461 case NVPTX::BI__nvvm_ldg_i: 9462 case NVPTX::BI__nvvm_ldg_i2: 9463 case NVPTX::BI__nvvm_ldg_i4: 9464 case NVPTX::BI__nvvm_ldg_l: 9465 case NVPTX::BI__nvvm_ldg_ll: 9466 case NVPTX::BI__nvvm_ldg_ll2: 9467 case NVPTX::BI__nvvm_ldg_uc: 9468 case NVPTX::BI__nvvm_ldg_uc2: 9469 case NVPTX::BI__nvvm_ldg_uc4: 9470 case NVPTX::BI__nvvm_ldg_us: 9471 case NVPTX::BI__nvvm_ldg_us2: 9472 case NVPTX::BI__nvvm_ldg_us4: 9473 case NVPTX::BI__nvvm_ldg_ui: 9474 case NVPTX::BI__nvvm_ldg_ui2: 9475 case NVPTX::BI__nvvm_ldg_ui4: 9476 case NVPTX::BI__nvvm_ldg_ul: 9477 case NVPTX::BI__nvvm_ldg_ull: 9478 case NVPTX::BI__nvvm_ldg_ull2: 9479 // PTX Interoperability section 2.2: "For a vector with an even number of 9480 // elements, its alignment is set to number of elements times the alignment 9481 // of its member: n*alignof(t)." 9482 return MakeLdg(Intrinsic::nvvm_ldg_global_i); 9483 case NVPTX::BI__nvvm_ldg_f: 9484 case NVPTX::BI__nvvm_ldg_f2: 9485 case NVPTX::BI__nvvm_ldg_f4: 9486 case NVPTX::BI__nvvm_ldg_d: 9487 case NVPTX::BI__nvvm_ldg_d2: 9488 return MakeLdg(Intrinsic::nvvm_ldg_global_f); 9489 9490 case NVPTX::BI__nvvm_atom_cta_add_gen_i: 9491 case NVPTX::BI__nvvm_atom_cta_add_gen_l: 9492 case NVPTX::BI__nvvm_atom_cta_add_gen_ll: 9493 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta); 9494 case NVPTX::BI__nvvm_atom_sys_add_gen_i: 9495 case NVPTX::BI__nvvm_atom_sys_add_gen_l: 9496 case NVPTX::BI__nvvm_atom_sys_add_gen_ll: 9497 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys); 9498 case NVPTX::BI__nvvm_atom_cta_add_gen_f: 9499 case NVPTX::BI__nvvm_atom_cta_add_gen_d: 9500 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta); 9501 case NVPTX::BI__nvvm_atom_sys_add_gen_f: 9502 case NVPTX::BI__nvvm_atom_sys_add_gen_d: 9503 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys); 9504 case NVPTX::BI__nvvm_atom_cta_xchg_gen_i: 9505 case NVPTX::BI__nvvm_atom_cta_xchg_gen_l: 9506 case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll: 9507 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta); 9508 case NVPTX::BI__nvvm_atom_sys_xchg_gen_i: 9509 case NVPTX::BI__nvvm_atom_sys_xchg_gen_l: 9510 case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll: 9511 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys); 9512 case NVPTX::BI__nvvm_atom_cta_max_gen_i: 9513 case NVPTX::BI__nvvm_atom_cta_max_gen_ui: 9514 case NVPTX::BI__nvvm_atom_cta_max_gen_l: 9515 case NVPTX::BI__nvvm_atom_cta_max_gen_ul: 9516 case NVPTX::BI__nvvm_atom_cta_max_gen_ll: 9517 case NVPTX::BI__nvvm_atom_cta_max_gen_ull: 9518 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta); 9519 case NVPTX::BI__nvvm_atom_sys_max_gen_i: 9520 case NVPTX::BI__nvvm_atom_sys_max_gen_ui: 9521 case NVPTX::BI__nvvm_atom_sys_max_gen_l: 9522 case NVPTX::BI__nvvm_atom_sys_max_gen_ul: 9523 case NVPTX::BI__nvvm_atom_sys_max_gen_ll: 9524 case NVPTX::BI__nvvm_atom_sys_max_gen_ull: 9525 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys); 9526 case NVPTX::BI__nvvm_atom_cta_min_gen_i: 9527 case NVPTX::BI__nvvm_atom_cta_min_gen_ui: 9528 case NVPTX::BI__nvvm_atom_cta_min_gen_l: 9529 case NVPTX::BI__nvvm_atom_cta_min_gen_ul: 9530 case NVPTX::BI__nvvm_atom_cta_min_gen_ll: 9531 case NVPTX::BI__nvvm_atom_cta_min_gen_ull: 9532 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta); 9533 case NVPTX::BI__nvvm_atom_sys_min_gen_i: 9534 case NVPTX::BI__nvvm_atom_sys_min_gen_ui: 9535 case NVPTX::BI__nvvm_atom_sys_min_gen_l: 9536 case NVPTX::BI__nvvm_atom_sys_min_gen_ul: 9537 case NVPTX::BI__nvvm_atom_sys_min_gen_ll: 9538 case NVPTX::BI__nvvm_atom_sys_min_gen_ull: 9539 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys); 9540 case NVPTX::BI__nvvm_atom_cta_inc_gen_ui: 9541 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta); 9542 case NVPTX::BI__nvvm_atom_cta_dec_gen_ui: 9543 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta); 9544 case NVPTX::BI__nvvm_atom_sys_inc_gen_ui: 9545 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys); 9546 case NVPTX::BI__nvvm_atom_sys_dec_gen_ui: 9547 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys); 9548 case NVPTX::BI__nvvm_atom_cta_and_gen_i: 9549 case NVPTX::BI__nvvm_atom_cta_and_gen_l: 9550 case NVPTX::BI__nvvm_atom_cta_and_gen_ll: 9551 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta); 9552 case NVPTX::BI__nvvm_atom_sys_and_gen_i: 9553 case NVPTX::BI__nvvm_atom_sys_and_gen_l: 9554 case NVPTX::BI__nvvm_atom_sys_and_gen_ll: 9555 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys); 9556 case NVPTX::BI__nvvm_atom_cta_or_gen_i: 9557 case NVPTX::BI__nvvm_atom_cta_or_gen_l: 9558 case NVPTX::BI__nvvm_atom_cta_or_gen_ll: 9559 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta); 9560 case NVPTX::BI__nvvm_atom_sys_or_gen_i: 9561 case NVPTX::BI__nvvm_atom_sys_or_gen_l: 9562 case NVPTX::BI__nvvm_atom_sys_or_gen_ll: 9563 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys); 9564 case NVPTX::BI__nvvm_atom_cta_xor_gen_i: 9565 case NVPTX::BI__nvvm_atom_cta_xor_gen_l: 9566 case NVPTX::BI__nvvm_atom_cta_xor_gen_ll: 9567 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta); 9568 case NVPTX::BI__nvvm_atom_sys_xor_gen_i: 9569 case NVPTX::BI__nvvm_atom_sys_xor_gen_l: 9570 case NVPTX::BI__nvvm_atom_sys_xor_gen_ll: 9571 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys); 9572 case NVPTX::BI__nvvm_atom_cta_cas_gen_i: 9573 case NVPTX::BI__nvvm_atom_cta_cas_gen_l: 9574 case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: { 9575 Value *Ptr = EmitScalarExpr(E->getArg(0)); 9576 return Builder.CreateCall( 9577 CGM.getIntrinsic( 9578 Intrinsic::nvvm_atomic_cas_gen_i_cta, 9579 {Ptr->getType()->getPointerElementType(), Ptr->getType()}), 9580 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))}); 9581 } 9582 case NVPTX::BI__nvvm_atom_sys_cas_gen_i: 9583 case NVPTX::BI__nvvm_atom_sys_cas_gen_l: 9584 case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: { 9585 Value *Ptr = EmitScalarExpr(E->getArg(0)); 9586 return Builder.CreateCall( 9587 CGM.getIntrinsic( 9588 Intrinsic::nvvm_atomic_cas_gen_i_sys, 9589 {Ptr->getType()->getPointerElementType(), Ptr->getType()}), 9590 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))}); 9591 } 9592 case NVPTX::BI__nvvm_match_all_sync_i32p: 9593 case NVPTX::BI__nvvm_match_all_sync_i64p: { 9594 Value *Mask = EmitScalarExpr(E->getArg(0)); 9595 Value *Val = EmitScalarExpr(E->getArg(1)); 9596 Address PredOutPtr = EmitPointerWithAlignment(E->getArg(2)); 9597 Value *ResultPair = Builder.CreateCall( 9598 CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p 9599 ? Intrinsic::nvvm_match_all_sync_i32p 9600 : Intrinsic::nvvm_match_all_sync_i64p), 9601 {Mask, Val}); 9602 Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1), 9603 PredOutPtr.getElementType()); 9604 Builder.CreateStore(Pred, PredOutPtr); 9605 return Builder.CreateExtractValue(ResultPair, 0); 9606 } 9607 default: 9608 return nullptr; 9609 } 9610 } 9611 9612 Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, 9613 const CallExpr *E) { 9614 switch (BuiltinID) { 9615 case WebAssembly::BI__builtin_wasm_current_memory: { 9616 llvm::Type *ResultType = ConvertType(E->getType()); 9617 Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType); 9618 return Builder.CreateCall(Callee); 9619 } 9620 case WebAssembly::BI__builtin_wasm_grow_memory: { 9621 Value *X = EmitScalarExpr(E->getArg(0)); 9622 Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_grow_memory, X->getType()); 9623 return Builder.CreateCall(Callee, X); 9624 } 9625 case WebAssembly::BI__builtin_wasm_throw: { 9626 Value *Tag = EmitScalarExpr(E->getArg(0)); 9627 Value *Obj = EmitScalarExpr(E->getArg(1)); 9628 Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw); 9629 return Builder.CreateCall(Callee, {Tag, Obj}); 9630 } 9631 case WebAssembly::BI__builtin_wasm_rethrow: { 9632 Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow); 9633 return Builder.CreateCall(Callee); 9634 } 9635 9636 default: 9637 return nullptr; 9638 } 9639 } 9640