1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit Builtin calls as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCXXABI.h" 15 #include "CGObjCRuntime.h" 16 #include "CGOpenCLRuntime.h" 17 #include "CodeGenFunction.h" 18 #include "CodeGenModule.h" 19 #include "ConstantEmitter.h" 20 #include "TargetInfo.h" 21 #include "clang/AST/ASTContext.h" 22 #include "clang/AST/Decl.h" 23 #include "clang/Analysis/Analyses/OSLog.h" 24 #include "clang/Basic/TargetBuiltins.h" 25 #include "clang/Basic/TargetInfo.h" 26 #include "clang/CodeGen/CGFunctionInfo.h" 27 #include "llvm/ADT/StringExtras.h" 28 #include "llvm/IR/CallSite.h" 29 #include "llvm/IR/DataLayout.h" 30 #include "llvm/IR/InlineAsm.h" 31 #include "llvm/IR/Intrinsics.h" 32 #include "llvm/IR/MDBuilder.h" 33 #include "llvm/Support/ScopedPrinter.h" 34 #include "llvm/Support/ConvertUTF.h" 35 #include <sstream> 36 37 using namespace clang; 38 using namespace CodeGen; 39 using namespace llvm; 40 41 static 42 int64_t clamp(int64_t Value, int64_t Low, int64_t High) { 43 return std::min(High, std::max(Low, Value)); 44 } 45 46 /// getBuiltinLibFunction - Given a builtin id for a function like 47 /// "__builtin_fabsf", return a Function* for "fabsf". 48 llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD, 49 unsigned BuiltinID) { 50 assert(Context.BuiltinInfo.isLibFunction(BuiltinID)); 51 52 // Get the name, skip over the __builtin_ prefix (if necessary). 53 StringRef Name; 54 GlobalDecl D(FD); 55 56 // If the builtin has been declared explicitly with an assembler label, 57 // use the mangled name. This differs from the plain label on platforms 58 // that prefix labels. 59 if (FD->hasAttr<AsmLabelAttr>()) 60 Name = getMangledName(D); 61 else 62 Name = Context.BuiltinInfo.getName(BuiltinID) + 10; 63 64 llvm::FunctionType *Ty = 65 cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType())); 66 67 return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false); 68 } 69 70 /// Emit the conversions required to turn the given value into an 71 /// integer of the given size. 72 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V, 73 QualType T, llvm::IntegerType *IntType) { 74 V = CGF.EmitToMemory(V, T); 75 76 if (V->getType()->isPointerTy()) 77 return CGF.Builder.CreatePtrToInt(V, IntType); 78 79 assert(V->getType() == IntType); 80 return V; 81 } 82 83 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, 84 QualType T, llvm::Type *ResultType) { 85 V = CGF.EmitFromMemory(V, T); 86 87 if (ResultType->isPointerTy()) 88 return CGF.Builder.CreateIntToPtr(V, ResultType); 89 90 assert(V->getType() == ResultType); 91 return V; 92 } 93 94 /// Utility to insert an atomic instruction based on Instrinsic::ID 95 /// and the expression node. 96 static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF, 97 llvm::AtomicRMWInst::BinOp Kind, 98 const CallExpr *E) { 99 QualType T = E->getType(); 100 assert(E->getArg(0)->getType()->isPointerType()); 101 assert(CGF.getContext().hasSameUnqualifiedType(T, 102 E->getArg(0)->getType()->getPointeeType())); 103 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); 104 105 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 106 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 107 108 llvm::IntegerType *IntType = 109 llvm::IntegerType::get(CGF.getLLVMContext(), 110 CGF.getContext().getTypeSize(T)); 111 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 112 113 llvm::Value *Args[2]; 114 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 115 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 116 llvm::Type *ValueType = Args[1]->getType(); 117 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 118 119 llvm::Value *Result = CGF.Builder.CreateAtomicRMW( 120 Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent); 121 return EmitFromInt(CGF, Result, T, ValueType); 122 } 123 124 static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) { 125 Value *Val = CGF.EmitScalarExpr(E->getArg(0)); 126 Value *Address = CGF.EmitScalarExpr(E->getArg(1)); 127 128 // Convert the type of the pointer to a pointer to the stored type. 129 Val = CGF.EmitToMemory(Val, E->getArg(0)->getType()); 130 Value *BC = CGF.Builder.CreateBitCast( 131 Address, llvm::PointerType::getUnqual(Val->getType()), "cast"); 132 LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType()); 133 LV.setNontemporal(true); 134 CGF.EmitStoreOfScalar(Val, LV, false); 135 return nullptr; 136 } 137 138 static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) { 139 Value *Address = CGF.EmitScalarExpr(E->getArg(0)); 140 141 LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType()); 142 LV.setNontemporal(true); 143 return CGF.EmitLoadOfScalar(LV, E->getExprLoc()); 144 } 145 146 static RValue EmitBinaryAtomic(CodeGenFunction &CGF, 147 llvm::AtomicRMWInst::BinOp Kind, 148 const CallExpr *E) { 149 return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E)); 150 } 151 152 /// Utility to insert an atomic instruction based Instrinsic::ID and 153 /// the expression node, where the return value is the result of the 154 /// operation. 155 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, 156 llvm::AtomicRMWInst::BinOp Kind, 157 const CallExpr *E, 158 Instruction::BinaryOps Op, 159 bool Invert = false) { 160 QualType T = E->getType(); 161 assert(E->getArg(0)->getType()->isPointerType()); 162 assert(CGF.getContext().hasSameUnqualifiedType(T, 163 E->getArg(0)->getType()->getPointeeType())); 164 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); 165 166 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 167 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 168 169 llvm::IntegerType *IntType = 170 llvm::IntegerType::get(CGF.getLLVMContext(), 171 CGF.getContext().getTypeSize(T)); 172 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 173 174 llvm::Value *Args[2]; 175 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 176 llvm::Type *ValueType = Args[1]->getType(); 177 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 178 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 179 180 llvm::Value *Result = CGF.Builder.CreateAtomicRMW( 181 Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent); 182 Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]); 183 if (Invert) 184 Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result, 185 llvm::ConstantInt::get(IntType, -1)); 186 Result = EmitFromInt(CGF, Result, T, ValueType); 187 return RValue::get(Result); 188 } 189 190 /// @brief Utility to insert an atomic cmpxchg instruction. 191 /// 192 /// @param CGF The current codegen function. 193 /// @param E Builtin call expression to convert to cmpxchg. 194 /// arg0 - address to operate on 195 /// arg1 - value to compare with 196 /// arg2 - new value 197 /// @param ReturnBool Specifies whether to return success flag of 198 /// cmpxchg result or the old value. 199 /// 200 /// @returns result of cmpxchg, according to ReturnBool 201 static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, 202 bool ReturnBool) { 203 QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType(); 204 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 205 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 206 207 llvm::IntegerType *IntType = llvm::IntegerType::get( 208 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T)); 209 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 210 211 Value *Args[3]; 212 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 213 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 214 llvm::Type *ValueType = Args[1]->getType(); 215 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 216 Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType); 217 218 Value *Pair = CGF.Builder.CreateAtomicCmpXchg( 219 Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent, 220 llvm::AtomicOrdering::SequentiallyConsistent); 221 if (ReturnBool) 222 // Extract boolean success flag and zext it to int. 223 return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1), 224 CGF.ConvertType(E->getType())); 225 else 226 // Extract old value and emit it using the same type as compare value. 227 return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T, 228 ValueType); 229 } 230 231 // Emit a simple mangled intrinsic that has 1 argument and a return type 232 // matching the argument type. 233 static Value *emitUnaryBuiltin(CodeGenFunction &CGF, 234 const CallExpr *E, 235 unsigned IntrinsicID) { 236 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 237 238 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 239 return CGF.Builder.CreateCall(F, Src0); 240 } 241 242 // Emit an intrinsic that has 2 operands of the same type as its result. 243 static Value *emitBinaryBuiltin(CodeGenFunction &CGF, 244 const CallExpr *E, 245 unsigned IntrinsicID) { 246 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 247 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 248 249 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 250 return CGF.Builder.CreateCall(F, { Src0, Src1 }); 251 } 252 253 // Emit an intrinsic that has 3 operands of the same type as its result. 254 static Value *emitTernaryBuiltin(CodeGenFunction &CGF, 255 const CallExpr *E, 256 unsigned IntrinsicID) { 257 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 258 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 259 llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); 260 261 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 262 return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 }); 263 } 264 265 // Emit an intrinsic that has 1 float or double operand, and 1 integer. 266 static Value *emitFPIntBuiltin(CodeGenFunction &CGF, 267 const CallExpr *E, 268 unsigned IntrinsicID) { 269 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 270 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 271 272 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 273 return CGF.Builder.CreateCall(F, {Src0, Src1}); 274 } 275 276 /// EmitFAbs - Emit a call to @llvm.fabs(). 277 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) { 278 Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType()); 279 llvm::CallInst *Call = CGF.Builder.CreateCall(F, V); 280 Call->setDoesNotAccessMemory(); 281 return Call; 282 } 283 284 /// Emit the computation of the sign bit for a floating point value. Returns 285 /// the i1 sign bit value. 286 static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) { 287 LLVMContext &C = CGF.CGM.getLLVMContext(); 288 289 llvm::Type *Ty = V->getType(); 290 int Width = Ty->getPrimitiveSizeInBits(); 291 llvm::Type *IntTy = llvm::IntegerType::get(C, Width); 292 V = CGF.Builder.CreateBitCast(V, IntTy); 293 if (Ty->isPPC_FP128Ty()) { 294 // We want the sign bit of the higher-order double. The bitcast we just 295 // did works as if the double-double was stored to memory and then 296 // read as an i128. The "store" will put the higher-order double in the 297 // lower address in both little- and big-Endian modes, but the "load" 298 // will treat those bits as a different part of the i128: the low bits in 299 // little-Endian, the high bits in big-Endian. Therefore, on big-Endian 300 // we need to shift the high bits down to the low before truncating. 301 Width >>= 1; 302 if (CGF.getTarget().isBigEndian()) { 303 Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width); 304 V = CGF.Builder.CreateLShr(V, ShiftCst); 305 } 306 // We are truncating value in order to extract the higher-order 307 // double, which we will be using to extract the sign from. 308 IntTy = llvm::IntegerType::get(C, Width); 309 V = CGF.Builder.CreateTrunc(V, IntTy); 310 } 311 Value *Zero = llvm::Constant::getNullValue(IntTy); 312 return CGF.Builder.CreateICmpSLT(V, Zero); 313 } 314 315 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD, 316 const CallExpr *E, llvm::Constant *calleeValue) { 317 CGCallee callee = CGCallee::forDirect(calleeValue, FD); 318 return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot()); 319 } 320 321 /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.* 322 /// depending on IntrinsicID. 323 /// 324 /// \arg CGF The current codegen function. 325 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate. 326 /// \arg X The first argument to the llvm.*.with.overflow.*. 327 /// \arg Y The second argument to the llvm.*.with.overflow.*. 328 /// \arg Carry The carry returned by the llvm.*.with.overflow.*. 329 /// \returns The result (i.e. sum/product) returned by the intrinsic. 330 static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF, 331 const llvm::Intrinsic::ID IntrinsicID, 332 llvm::Value *X, llvm::Value *Y, 333 llvm::Value *&Carry) { 334 // Make sure we have integers of the same width. 335 assert(X->getType() == Y->getType() && 336 "Arguments must be the same type. (Did you forget to make sure both " 337 "arguments have the same integer width?)"); 338 339 llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType()); 340 llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y}); 341 Carry = CGF.Builder.CreateExtractValue(Tmp, 1); 342 return CGF.Builder.CreateExtractValue(Tmp, 0); 343 } 344 345 static Value *emitRangedBuiltin(CodeGenFunction &CGF, 346 unsigned IntrinsicID, 347 int low, int high) { 348 llvm::MDBuilder MDHelper(CGF.getLLVMContext()); 349 llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high)); 350 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {}); 351 llvm::Instruction *Call = CGF.Builder.CreateCall(F); 352 Call->setMetadata(llvm::LLVMContext::MD_range, RNode); 353 return Call; 354 } 355 356 namespace { 357 struct WidthAndSignedness { 358 unsigned Width; 359 bool Signed; 360 }; 361 } 362 363 static WidthAndSignedness 364 getIntegerWidthAndSignedness(const clang::ASTContext &context, 365 const clang::QualType Type) { 366 assert(Type->isIntegerType() && "Given type is not an integer."); 367 unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width; 368 bool Signed = Type->isSignedIntegerType(); 369 return {Width, Signed}; 370 } 371 372 // Given one or more integer types, this function produces an integer type that 373 // encompasses them: any value in one of the given types could be expressed in 374 // the encompassing type. 375 static struct WidthAndSignedness 376 EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) { 377 assert(Types.size() > 0 && "Empty list of types."); 378 379 // If any of the given types is signed, we must return a signed type. 380 bool Signed = false; 381 for (const auto &Type : Types) { 382 Signed |= Type.Signed; 383 } 384 385 // The encompassing type must have a width greater than or equal to the width 386 // of the specified types. Aditionally, if the encompassing type is signed, 387 // its width must be strictly greater than the width of any unsigned types 388 // given. 389 unsigned Width = 0; 390 for (const auto &Type : Types) { 391 unsigned MinWidth = Type.Width + (Signed && !Type.Signed); 392 if (Width < MinWidth) { 393 Width = MinWidth; 394 } 395 } 396 397 return {Width, Signed}; 398 } 399 400 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) { 401 llvm::Type *DestType = Int8PtrTy; 402 if (ArgValue->getType() != DestType) 403 ArgValue = 404 Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data()); 405 406 Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend; 407 return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue); 408 } 409 410 /// Checks if using the result of __builtin_object_size(p, @p From) in place of 411 /// __builtin_object_size(p, @p To) is correct 412 static bool areBOSTypesCompatible(int From, int To) { 413 // Note: Our __builtin_object_size implementation currently treats Type=0 and 414 // Type=2 identically. Encoding this implementation detail here may make 415 // improving __builtin_object_size difficult in the future, so it's omitted. 416 return From == To || (From == 0 && To == 1) || (From == 3 && To == 2); 417 } 418 419 static llvm::Value * 420 getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) { 421 return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true); 422 } 423 424 llvm::Value * 425 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type, 426 llvm::IntegerType *ResType, 427 llvm::Value *EmittedE) { 428 uint64_t ObjectSize; 429 if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type)) 430 return emitBuiltinObjectSize(E, Type, ResType, EmittedE); 431 return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true); 432 } 433 434 /// Returns a Value corresponding to the size of the given expression. 435 /// This Value may be either of the following: 436 /// - A llvm::Argument (if E is a param with the pass_object_size attribute on 437 /// it) 438 /// - A call to the @llvm.objectsize intrinsic 439 /// 440 /// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null 441 /// and we wouldn't otherwise try to reference a pass_object_size parameter, 442 /// we'll call @llvm.objectsize on EmittedE, rather than emitting E. 443 llvm::Value * 444 CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type, 445 llvm::IntegerType *ResType, 446 llvm::Value *EmittedE) { 447 // We need to reference an argument if the pointer is a parameter with the 448 // pass_object_size attribute. 449 if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) { 450 auto *Param = dyn_cast<ParmVarDecl>(D->getDecl()); 451 auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>(); 452 if (Param != nullptr && PS != nullptr && 453 areBOSTypesCompatible(PS->getType(), Type)) { 454 auto Iter = SizeArguments.find(Param); 455 assert(Iter != SizeArguments.end()); 456 457 const ImplicitParamDecl *D = Iter->second; 458 auto DIter = LocalDeclMap.find(D); 459 assert(DIter != LocalDeclMap.end()); 460 461 return EmitLoadOfScalar(DIter->second, /*volatile=*/false, 462 getContext().getSizeType(), E->getLocStart()); 463 } 464 } 465 466 // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't 467 // evaluate E for side-effects. In either case, we shouldn't lower to 468 // @llvm.objectsize. 469 if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext()))) 470 return getDefaultBuiltinObjectSizeResult(Type, ResType); 471 472 Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E); 473 assert(Ptr->getType()->isPointerTy() && 474 "Non-pointer passed to __builtin_object_size?"); 475 476 Value *F = CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()}); 477 478 // LLVM only supports 0 and 2, make sure that we pass along that as a boolean. 479 Value *Min = Builder.getInt1((Type & 2) != 0); 480 // For GCC compatability, __builtin_object_size treat NULL as unknown size. 481 Value *NullIsUnknown = Builder.getTrue(); 482 return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown}); 483 } 484 485 // Many of MSVC builtins are on both x64 and ARM; to avoid repeating code, we 486 // handle them here. 487 enum class CodeGenFunction::MSVCIntrin { 488 _BitScanForward, 489 _BitScanReverse, 490 _InterlockedAnd, 491 _InterlockedDecrement, 492 _InterlockedExchange, 493 _InterlockedExchangeAdd, 494 _InterlockedExchangeSub, 495 _InterlockedIncrement, 496 _InterlockedOr, 497 _InterlockedXor, 498 _interlockedbittestandset, 499 __fastfail, 500 }; 501 502 Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, 503 const CallExpr *E) { 504 switch (BuiltinID) { 505 case MSVCIntrin::_BitScanForward: 506 case MSVCIntrin::_BitScanReverse: { 507 Value *ArgValue = EmitScalarExpr(E->getArg(1)); 508 509 llvm::Type *ArgType = ArgValue->getType(); 510 llvm::Type *IndexType = 511 EmitScalarExpr(E->getArg(0))->getType()->getPointerElementType(); 512 llvm::Type *ResultType = ConvertType(E->getType()); 513 514 Value *ArgZero = llvm::Constant::getNullValue(ArgType); 515 Value *ResZero = llvm::Constant::getNullValue(ResultType); 516 Value *ResOne = llvm::ConstantInt::get(ResultType, 1); 517 518 BasicBlock *Begin = Builder.GetInsertBlock(); 519 BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn); 520 Builder.SetInsertPoint(End); 521 PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result"); 522 523 Builder.SetInsertPoint(Begin); 524 Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero); 525 BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn); 526 Builder.CreateCondBr(IsZero, End, NotZero); 527 Result->addIncoming(ResZero, Begin); 528 529 Builder.SetInsertPoint(NotZero); 530 Address IndexAddress = EmitPointerWithAlignment(E->getArg(0)); 531 532 if (BuiltinID == MSVCIntrin::_BitScanForward) { 533 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 534 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()}); 535 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false); 536 Builder.CreateStore(ZeroCount, IndexAddress, false); 537 } else { 538 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth(); 539 Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1); 540 541 Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); 542 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()}); 543 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false); 544 Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount); 545 Builder.CreateStore(Index, IndexAddress, false); 546 } 547 Builder.CreateBr(End); 548 Result->addIncoming(ResOne, NotZero); 549 550 Builder.SetInsertPoint(End); 551 return Result; 552 } 553 case MSVCIntrin::_InterlockedAnd: 554 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E); 555 case MSVCIntrin::_InterlockedExchange: 556 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E); 557 case MSVCIntrin::_InterlockedExchangeAdd: 558 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E); 559 case MSVCIntrin::_InterlockedExchangeSub: 560 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E); 561 case MSVCIntrin::_InterlockedOr: 562 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E); 563 case MSVCIntrin::_InterlockedXor: 564 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E); 565 566 case MSVCIntrin::_interlockedbittestandset: { 567 llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); 568 llvm::Value *Bit = EmitScalarExpr(E->getArg(1)); 569 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 570 AtomicRMWInst::Or, Addr, 571 Builder.CreateShl(ConstantInt::get(Bit->getType(), 1), Bit), 572 llvm::AtomicOrdering::SequentiallyConsistent); 573 // Shift the relevant bit to the least significant position, truncate to 574 // the result type, and test the low bit. 575 llvm::Value *Shifted = Builder.CreateLShr(RMWI, Bit); 576 llvm::Value *Truncated = 577 Builder.CreateTrunc(Shifted, ConvertType(E->getType())); 578 return Builder.CreateAnd(Truncated, 579 ConstantInt::get(Truncated->getType(), 1)); 580 } 581 582 case MSVCIntrin::_InterlockedDecrement: { 583 llvm::Type *IntTy = ConvertType(E->getType()); 584 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 585 AtomicRMWInst::Sub, 586 EmitScalarExpr(E->getArg(0)), 587 ConstantInt::get(IntTy, 1), 588 llvm::AtomicOrdering::SequentiallyConsistent); 589 return Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1)); 590 } 591 case MSVCIntrin::_InterlockedIncrement: { 592 llvm::Type *IntTy = ConvertType(E->getType()); 593 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 594 AtomicRMWInst::Add, 595 EmitScalarExpr(E->getArg(0)), 596 ConstantInt::get(IntTy, 1), 597 llvm::AtomicOrdering::SequentiallyConsistent); 598 return Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1)); 599 } 600 601 case MSVCIntrin::__fastfail: { 602 // Request immediate process termination from the kernel. The instruction 603 // sequences to do this are documented on MSDN: 604 // https://msdn.microsoft.com/en-us/library/dn774154.aspx 605 llvm::Triple::ArchType ISA = getTarget().getTriple().getArch(); 606 StringRef Asm, Constraints; 607 switch (ISA) { 608 default: 609 ErrorUnsupported(E, "__fastfail call for this architecture"); 610 break; 611 case llvm::Triple::x86: 612 case llvm::Triple::x86_64: 613 Asm = "int $$0x29"; 614 Constraints = "{cx}"; 615 break; 616 case llvm::Triple::thumb: 617 Asm = "udf #251"; 618 Constraints = "{r0}"; 619 break; 620 } 621 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false); 622 llvm::InlineAsm *IA = 623 llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true); 624 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get( 625 getLLVMContext(), llvm::AttributeList::FunctionIndex, 626 llvm::Attribute::NoReturn); 627 CallSite CS = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0))); 628 CS.setAttributes(NoReturnAttr); 629 return CS.getInstruction(); 630 } 631 } 632 llvm_unreachable("Incorrect MSVC intrinsic!"); 633 } 634 635 namespace { 636 // ARC cleanup for __builtin_os_log_format 637 struct CallObjCArcUse final : EHScopeStack::Cleanup { 638 CallObjCArcUse(llvm::Value *object) : object(object) {} 639 llvm::Value *object; 640 641 void Emit(CodeGenFunction &CGF, Flags flags) override { 642 CGF.EmitARCIntrinsicUse(object); 643 } 644 }; 645 } 646 647 Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E, 648 BuiltinCheckKind Kind) { 649 assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero) 650 && "Unsupported builtin check kind"); 651 652 Value *ArgValue = EmitScalarExpr(E); 653 if (!SanOpts.has(SanitizerKind::Builtin) || !getTarget().isCLZForZeroUndef()) 654 return ArgValue; 655 656 SanitizerScope SanScope(this); 657 Value *Cond = Builder.CreateICmpNE( 658 ArgValue, llvm::Constant::getNullValue(ArgValue->getType())); 659 EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin), 660 SanitizerHandler::InvalidBuiltin, 661 {EmitCheckSourceLocation(E->getExprLoc()), 662 llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)}, 663 None); 664 return ArgValue; 665 } 666 667 /// Get the argument type for arguments to os_log_helper. 668 static CanQualType getOSLogArgType(ASTContext &C, int Size) { 669 QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false); 670 return C.getCanonicalType(UnsignedTy); 671 } 672 673 llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction( 674 const analyze_os_log::OSLogBufferLayout &Layout, 675 CharUnits BufferAlignment) { 676 ASTContext &Ctx = getContext(); 677 678 llvm::SmallString<64> Name; 679 { 680 raw_svector_ostream OS(Name); 681 OS << "__os_log_helper"; 682 OS << "_" << BufferAlignment.getQuantity(); 683 OS << "_" << int(Layout.getSummaryByte()); 684 OS << "_" << int(Layout.getNumArgsByte()); 685 for (const auto &Item : Layout.Items) 686 OS << "_" << int(Item.getSizeByte()) << "_" 687 << int(Item.getDescriptorByte()); 688 } 689 690 if (llvm::Function *F = CGM.getModule().getFunction(Name)) 691 return F; 692 693 llvm::SmallVector<ImplicitParamDecl, 4> Params; 694 Params.emplace_back(Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"), 695 Ctx.VoidPtrTy, ImplicitParamDecl::Other); 696 697 for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) { 698 char Size = Layout.Items[I].getSizeByte(); 699 if (!Size) 700 continue; 701 702 Params.emplace_back( 703 Ctx, nullptr, SourceLocation(), 704 &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), 705 getOSLogArgType(Ctx, Size), ImplicitParamDecl::Other); 706 } 707 708 FunctionArgList Args; 709 for (auto &P : Params) 710 Args.push_back(&P); 711 712 // The helper function has linkonce_odr linkage to enable the linker to merge 713 // identical functions. To ensure the merging always happens, 'noinline' is 714 // attached to the function when compiling with -Oz. 715 const CGFunctionInfo &FI = 716 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args); 717 llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI); 718 llvm::Function *Fn = llvm::Function::Create( 719 FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule()); 720 Fn->setVisibility(llvm::GlobalValue::HiddenVisibility); 721 CGM.SetLLVMFunctionAttributes(nullptr, FI, Fn); 722 CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn); 723 724 // Attach 'noinline' at -Oz. 725 if (CGM.getCodeGenOpts().OptimizeSize == 2) 726 Fn->addFnAttr(llvm::Attribute::NoInline); 727 728 auto NL = ApplyDebugLocation::CreateEmpty(*this); 729 IdentifierInfo *II = &Ctx.Idents.get(Name); 730 FunctionDecl *FD = FunctionDecl::Create( 731 Ctx, Ctx.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II, 732 Ctx.VoidTy, nullptr, SC_PrivateExtern, false, false); 733 734 StartFunction(FD, Ctx.VoidTy, Fn, FI, Args); 735 736 // Create a scope with an artificial location for the body of this function. 737 auto AL = ApplyDebugLocation::CreateArtificial(*this); 738 739 CharUnits Offset; 740 Address BufAddr(Builder.CreateLoad(GetAddrOfLocalVar(&Params[0]), "buf"), 741 BufferAlignment); 742 Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()), 743 Builder.CreateConstByteGEP(BufAddr, Offset++, "summary")); 744 Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()), 745 Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs")); 746 747 unsigned I = 1; 748 for (const auto &Item : Layout.Items) { 749 Builder.CreateStore( 750 Builder.getInt8(Item.getDescriptorByte()), 751 Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor")); 752 Builder.CreateStore( 753 Builder.getInt8(Item.getSizeByte()), 754 Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize")); 755 756 CharUnits Size = Item.size(); 757 if (!Size.getQuantity()) 758 continue; 759 760 Address Arg = GetAddrOfLocalVar(&Params[I]); 761 Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData"); 762 Addr = Builder.CreateBitCast(Addr, Arg.getPointer()->getType(), 763 "argDataCast"); 764 Builder.CreateStore(Builder.CreateLoad(Arg), Addr); 765 Offset += Size; 766 ++I; 767 } 768 769 FinishFunction(); 770 771 return Fn; 772 } 773 774 RValue CodeGenFunction::emitBuiltinOSLogFormat(const CallExpr &E) { 775 assert(E.getNumArgs() >= 2 && 776 "__builtin_os_log_format takes at least 2 arguments"); 777 ASTContext &Ctx = getContext(); 778 analyze_os_log::OSLogBufferLayout Layout; 779 analyze_os_log::computeOSLogBufferLayout(Ctx, &E, Layout); 780 Address BufAddr = EmitPointerWithAlignment(E.getArg(0)); 781 llvm::SmallVector<llvm::Value *, 4> RetainableOperands; 782 783 // Ignore argument 1, the format string. It is not currently used. 784 CallArgList Args; 785 Args.add(RValue::get(BufAddr.getPointer()), Ctx.VoidPtrTy); 786 787 for (const auto &Item : Layout.Items) { 788 int Size = Item.getSizeByte(); 789 if (!Size) 790 continue; 791 792 llvm::Value *ArgVal; 793 794 if (const Expr *TheExpr = Item.getExpr()) { 795 ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false); 796 797 // Check if this is a retainable type. 798 if (TheExpr->getType()->isObjCRetainableType()) { 799 assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar && 800 "Only scalar can be a ObjC retainable type"); 801 // Check if the object is constant, if not, save it in 802 // RetainableOperands. 803 if (!isa<Constant>(ArgVal)) 804 RetainableOperands.push_back(ArgVal); 805 } 806 } else { 807 ArgVal = Builder.getInt32(Item.getConstValue().getQuantity()); 808 } 809 810 unsigned ArgValSize = 811 CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType()); 812 llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(), 813 ArgValSize); 814 ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy); 815 CanQualType ArgTy = getOSLogArgType(Ctx, Size); 816 // If ArgVal has type x86_fp80, zero-extend ArgVal. 817 ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy)); 818 Args.add(RValue::get(ArgVal), ArgTy); 819 } 820 821 const CGFunctionInfo &FI = 822 CGM.getTypes().arrangeBuiltinFunctionCall(Ctx.VoidTy, Args); 823 llvm::Function *F = CodeGenFunction(CGM).generateBuiltinOSLogHelperFunction( 824 Layout, BufAddr.getAlignment()); 825 EmitCall(FI, CGCallee::forDirect(F), ReturnValueSlot(), Args); 826 827 // Push a clang.arc.use cleanup for each object in RetainableOperands. The 828 // cleanup will cause the use to appear after the final log call, keeping 829 // the object valid while it’s held in the log buffer. Note that if there’s 830 // a release cleanup on the object, it will already be active; since 831 // cleanups are emitted in reverse order, the use will occur before the 832 // object is released. 833 if (!RetainableOperands.empty() && getLangOpts().ObjCAutoRefCount && 834 CGM.getCodeGenOpts().OptimizationLevel != 0) 835 for (llvm::Value *Object : RetainableOperands) 836 pushFullExprCleanup<CallObjCArcUse>(getARCCleanupKind(), Object); 837 838 return RValue::get(BufAddr.getPointer()); 839 } 840 841 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, 842 unsigned BuiltinID, const CallExpr *E, 843 ReturnValueSlot ReturnValue) { 844 // See if we can constant fold this builtin. If so, don't emit it at all. 845 Expr::EvalResult Result; 846 if (E->EvaluateAsRValue(Result, CGM.getContext()) && 847 !Result.hasSideEffects()) { 848 if (Result.Val.isInt()) 849 return RValue::get(llvm::ConstantInt::get(getLLVMContext(), 850 Result.Val.getInt())); 851 if (Result.Val.isFloat()) 852 return RValue::get(llvm::ConstantFP::get(getLLVMContext(), 853 Result.Val.getFloat())); 854 } 855 856 switch (BuiltinID) { 857 default: break; // Handle intrinsics and libm functions below. 858 case Builtin::BI__builtin___CFStringMakeConstantString: 859 case Builtin::BI__builtin___NSStringMakeConstantString: 860 return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType())); 861 case Builtin::BI__builtin_stdarg_start: 862 case Builtin::BI__builtin_va_start: 863 case Builtin::BI__va_start: 864 case Builtin::BI__builtin_va_end: 865 return RValue::get( 866 EmitVAStartEnd(BuiltinID == Builtin::BI__va_start 867 ? EmitScalarExpr(E->getArg(0)) 868 : EmitVAListRef(E->getArg(0)).getPointer(), 869 BuiltinID != Builtin::BI__builtin_va_end)); 870 case Builtin::BI__builtin_va_copy: { 871 Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer(); 872 Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer(); 873 874 llvm::Type *Type = Int8PtrTy; 875 876 DstPtr = Builder.CreateBitCast(DstPtr, Type); 877 SrcPtr = Builder.CreateBitCast(SrcPtr, Type); 878 return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy), 879 {DstPtr, SrcPtr})); 880 } 881 case Builtin::BI__builtin_abs: 882 case Builtin::BI__builtin_labs: 883 case Builtin::BI__builtin_llabs: { 884 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 885 886 Value *NegOp = Builder.CreateNeg(ArgValue, "neg"); 887 Value *CmpResult = 888 Builder.CreateICmpSGE(ArgValue, 889 llvm::Constant::getNullValue(ArgValue->getType()), 890 "abscond"); 891 Value *Result = 892 Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs"); 893 894 return RValue::get(Result); 895 } 896 case Builtin::BI__builtin_fabs: 897 case Builtin::BI__builtin_fabsf: 898 case Builtin::BI__builtin_fabsl: { 899 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs)); 900 } 901 case Builtin::BI__builtin_fmod: 902 case Builtin::BI__builtin_fmodf: 903 case Builtin::BI__builtin_fmodl: { 904 Value *Arg1 = EmitScalarExpr(E->getArg(0)); 905 Value *Arg2 = EmitScalarExpr(E->getArg(1)); 906 Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod"); 907 return RValue::get(Result); 908 } 909 case Builtin::BI__builtin_copysign: 910 case Builtin::BI__builtin_copysignf: 911 case Builtin::BI__builtin_copysignl: { 912 return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign)); 913 } 914 case Builtin::BI__builtin_ceil: 915 case Builtin::BI__builtin_ceilf: 916 case Builtin::BI__builtin_ceill: { 917 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil)); 918 } 919 case Builtin::BI__builtin_floor: 920 case Builtin::BI__builtin_floorf: 921 case Builtin::BI__builtin_floorl: { 922 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor)); 923 } 924 case Builtin::BI__builtin_trunc: 925 case Builtin::BI__builtin_truncf: 926 case Builtin::BI__builtin_truncl: { 927 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc)); 928 } 929 case Builtin::BI__builtin_rint: 930 case Builtin::BI__builtin_rintf: 931 case Builtin::BI__builtin_rintl: { 932 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint)); 933 } 934 case Builtin::BI__builtin_nearbyint: 935 case Builtin::BI__builtin_nearbyintf: 936 case Builtin::BI__builtin_nearbyintl: { 937 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::nearbyint)); 938 } 939 case Builtin::BI__builtin_round: 940 case Builtin::BI__builtin_roundf: 941 case Builtin::BI__builtin_roundl: { 942 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round)); 943 } 944 case Builtin::BI__builtin_fmin: 945 case Builtin::BI__builtin_fminf: 946 case Builtin::BI__builtin_fminl: { 947 return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum)); 948 } 949 case Builtin::BI__builtin_fmax: 950 case Builtin::BI__builtin_fmaxf: 951 case Builtin::BI__builtin_fmaxl: { 952 return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum)); 953 } 954 case Builtin::BI__builtin_conj: 955 case Builtin::BI__builtin_conjf: 956 case Builtin::BI__builtin_conjl: { 957 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 958 Value *Real = ComplexVal.first; 959 Value *Imag = ComplexVal.second; 960 Value *Zero = 961 Imag->getType()->isFPOrFPVectorTy() 962 ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType()) 963 : llvm::Constant::getNullValue(Imag->getType()); 964 965 Imag = Builder.CreateFSub(Zero, Imag, "sub"); 966 return RValue::getComplex(std::make_pair(Real, Imag)); 967 } 968 case Builtin::BI__builtin_creal: 969 case Builtin::BI__builtin_crealf: 970 case Builtin::BI__builtin_creall: 971 case Builtin::BIcreal: 972 case Builtin::BIcrealf: 973 case Builtin::BIcreall: { 974 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 975 return RValue::get(ComplexVal.first); 976 } 977 978 case Builtin::BI__builtin_cimag: 979 case Builtin::BI__builtin_cimagf: 980 case Builtin::BI__builtin_cimagl: 981 case Builtin::BIcimag: 982 case Builtin::BIcimagf: 983 case Builtin::BIcimagl: { 984 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 985 return RValue::get(ComplexVal.second); 986 } 987 988 case Builtin::BI__builtin_ctzs: 989 case Builtin::BI__builtin_ctz: 990 case Builtin::BI__builtin_ctzl: 991 case Builtin::BI__builtin_ctzll: { 992 Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero); 993 994 llvm::Type *ArgType = ArgValue->getType(); 995 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 996 997 llvm::Type *ResultType = ConvertType(E->getType()); 998 Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); 999 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef}); 1000 if (Result->getType() != ResultType) 1001 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 1002 "cast"); 1003 return RValue::get(Result); 1004 } 1005 case Builtin::BI__builtin_clzs: 1006 case Builtin::BI__builtin_clz: 1007 case Builtin::BI__builtin_clzl: 1008 case Builtin::BI__builtin_clzll: { 1009 Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero); 1010 1011 llvm::Type *ArgType = ArgValue->getType(); 1012 Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); 1013 1014 llvm::Type *ResultType = ConvertType(E->getType()); 1015 Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); 1016 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef}); 1017 if (Result->getType() != ResultType) 1018 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 1019 "cast"); 1020 return RValue::get(Result); 1021 } 1022 case Builtin::BI__builtin_ffs: 1023 case Builtin::BI__builtin_ffsl: 1024 case Builtin::BI__builtin_ffsll: { 1025 // ffs(x) -> x ? cttz(x) + 1 : 0 1026 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 1027 1028 llvm::Type *ArgType = ArgValue->getType(); 1029 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 1030 1031 llvm::Type *ResultType = ConvertType(E->getType()); 1032 Value *Tmp = 1033 Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}), 1034 llvm::ConstantInt::get(ArgType, 1)); 1035 Value *Zero = llvm::Constant::getNullValue(ArgType); 1036 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero"); 1037 Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs"); 1038 if (Result->getType() != ResultType) 1039 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 1040 "cast"); 1041 return RValue::get(Result); 1042 } 1043 case Builtin::BI__builtin_parity: 1044 case Builtin::BI__builtin_parityl: 1045 case Builtin::BI__builtin_parityll: { 1046 // parity(x) -> ctpop(x) & 1 1047 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 1048 1049 llvm::Type *ArgType = ArgValue->getType(); 1050 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); 1051 1052 llvm::Type *ResultType = ConvertType(E->getType()); 1053 Value *Tmp = Builder.CreateCall(F, ArgValue); 1054 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1)); 1055 if (Result->getType() != ResultType) 1056 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 1057 "cast"); 1058 return RValue::get(Result); 1059 } 1060 case Builtin::BI__popcnt16: 1061 case Builtin::BI__popcnt: 1062 case Builtin::BI__popcnt64: 1063 case Builtin::BI__builtin_popcount: 1064 case Builtin::BI__builtin_popcountl: 1065 case Builtin::BI__builtin_popcountll: { 1066 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 1067 1068 llvm::Type *ArgType = ArgValue->getType(); 1069 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); 1070 1071 llvm::Type *ResultType = ConvertType(E->getType()); 1072 Value *Result = Builder.CreateCall(F, ArgValue); 1073 if (Result->getType() != ResultType) 1074 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 1075 "cast"); 1076 return RValue::get(Result); 1077 } 1078 case Builtin::BI_rotr8: 1079 case Builtin::BI_rotr16: 1080 case Builtin::BI_rotr: 1081 case Builtin::BI_lrotr: 1082 case Builtin::BI_rotr64: { 1083 Value *Val = EmitScalarExpr(E->getArg(0)); 1084 Value *Shift = EmitScalarExpr(E->getArg(1)); 1085 1086 llvm::Type *ArgType = Val->getType(); 1087 Shift = Builder.CreateIntCast(Shift, ArgType, false); 1088 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth(); 1089 Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth); 1090 Value *ArgZero = llvm::Constant::getNullValue(ArgType); 1091 1092 Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1); 1093 Shift = Builder.CreateAnd(Shift, Mask); 1094 Value *LeftShift = Builder.CreateSub(ArgTypeSize, Shift); 1095 1096 Value *RightShifted = Builder.CreateLShr(Val, Shift); 1097 Value *LeftShifted = Builder.CreateShl(Val, LeftShift); 1098 Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted); 1099 1100 Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero); 1101 Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated); 1102 return RValue::get(Result); 1103 } 1104 case Builtin::BI_rotl8: 1105 case Builtin::BI_rotl16: 1106 case Builtin::BI_rotl: 1107 case Builtin::BI_lrotl: 1108 case Builtin::BI_rotl64: { 1109 Value *Val = EmitScalarExpr(E->getArg(0)); 1110 Value *Shift = EmitScalarExpr(E->getArg(1)); 1111 1112 llvm::Type *ArgType = Val->getType(); 1113 Shift = Builder.CreateIntCast(Shift, ArgType, false); 1114 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth(); 1115 Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth); 1116 Value *ArgZero = llvm::Constant::getNullValue(ArgType); 1117 1118 Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1); 1119 Shift = Builder.CreateAnd(Shift, Mask); 1120 Value *RightShift = Builder.CreateSub(ArgTypeSize, Shift); 1121 1122 Value *LeftShifted = Builder.CreateShl(Val, Shift); 1123 Value *RightShifted = Builder.CreateLShr(Val, RightShift); 1124 Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted); 1125 1126 Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero); 1127 Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated); 1128 return RValue::get(Result); 1129 } 1130 case Builtin::BI__builtin_unpredictable: { 1131 // Always return the argument of __builtin_unpredictable. LLVM does not 1132 // handle this builtin. Metadata for this builtin should be added directly 1133 // to instructions such as branches or switches that use it. 1134 return RValue::get(EmitScalarExpr(E->getArg(0))); 1135 } 1136 case Builtin::BI__builtin_expect: { 1137 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 1138 llvm::Type *ArgType = ArgValue->getType(); 1139 1140 Value *ExpectedValue = EmitScalarExpr(E->getArg(1)); 1141 // Don't generate llvm.expect on -O0 as the backend won't use it for 1142 // anything. 1143 // Note, we still IRGen ExpectedValue because it could have side-effects. 1144 if (CGM.getCodeGenOpts().OptimizationLevel == 0) 1145 return RValue::get(ArgValue); 1146 1147 Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType); 1148 Value *Result = 1149 Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval"); 1150 return RValue::get(Result); 1151 } 1152 case Builtin::BI__builtin_assume_aligned: { 1153 Value *PtrValue = EmitScalarExpr(E->getArg(0)); 1154 Value *OffsetValue = 1155 (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr; 1156 1157 Value *AlignmentValue = EmitScalarExpr(E->getArg(1)); 1158 ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue); 1159 unsigned Alignment = (unsigned) AlignmentCI->getZExtValue(); 1160 1161 EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue); 1162 return RValue::get(PtrValue); 1163 } 1164 case Builtin::BI__assume: 1165 case Builtin::BI__builtin_assume: { 1166 if (E->getArg(0)->HasSideEffects(getContext())) 1167 return RValue::get(nullptr); 1168 1169 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 1170 Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume); 1171 return RValue::get(Builder.CreateCall(FnAssume, ArgValue)); 1172 } 1173 case Builtin::BI__builtin_bswap16: 1174 case Builtin::BI__builtin_bswap32: 1175 case Builtin::BI__builtin_bswap64: { 1176 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap)); 1177 } 1178 case Builtin::BI__builtin_bitreverse8: 1179 case Builtin::BI__builtin_bitreverse16: 1180 case Builtin::BI__builtin_bitreverse32: 1181 case Builtin::BI__builtin_bitreverse64: { 1182 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse)); 1183 } 1184 case Builtin::BI__builtin_object_size: { 1185 unsigned Type = 1186 E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue(); 1187 auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType())); 1188 1189 // We pass this builtin onto the optimizer so that it can figure out the 1190 // object size in more complex cases. 1191 return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType, 1192 /*EmittedE=*/nullptr)); 1193 } 1194 case Builtin::BI__builtin_prefetch: { 1195 Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0)); 1196 // FIXME: Technically these constants should of type 'int', yes? 1197 RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) : 1198 llvm::ConstantInt::get(Int32Ty, 0); 1199 Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : 1200 llvm::ConstantInt::get(Int32Ty, 3); 1201 Value *Data = llvm::ConstantInt::get(Int32Ty, 1); 1202 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 1203 return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data})); 1204 } 1205 case Builtin::BI__builtin_readcyclecounter: { 1206 Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter); 1207 return RValue::get(Builder.CreateCall(F)); 1208 } 1209 case Builtin::BI__builtin___clear_cache: { 1210 Value *Begin = EmitScalarExpr(E->getArg(0)); 1211 Value *End = EmitScalarExpr(E->getArg(1)); 1212 Value *F = CGM.getIntrinsic(Intrinsic::clear_cache); 1213 return RValue::get(Builder.CreateCall(F, {Begin, End})); 1214 } 1215 case Builtin::BI__builtin_trap: 1216 return RValue::get(EmitTrapCall(Intrinsic::trap)); 1217 case Builtin::BI__debugbreak: 1218 return RValue::get(EmitTrapCall(Intrinsic::debugtrap)); 1219 case Builtin::BI__builtin_unreachable: { 1220 if (SanOpts.has(SanitizerKind::Unreachable)) { 1221 SanitizerScope SanScope(this); 1222 EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()), 1223 SanitizerKind::Unreachable), 1224 SanitizerHandler::BuiltinUnreachable, 1225 EmitCheckSourceLocation(E->getExprLoc()), None); 1226 } else 1227 Builder.CreateUnreachable(); 1228 1229 // We do need to preserve an insertion point. 1230 EmitBlock(createBasicBlock("unreachable.cont")); 1231 1232 return RValue::get(nullptr); 1233 } 1234 1235 case Builtin::BI__builtin_powi: 1236 case Builtin::BI__builtin_powif: 1237 case Builtin::BI__builtin_powil: { 1238 Value *Base = EmitScalarExpr(E->getArg(0)); 1239 Value *Exponent = EmitScalarExpr(E->getArg(1)); 1240 llvm::Type *ArgType = Base->getType(); 1241 Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType); 1242 return RValue::get(Builder.CreateCall(F, {Base, Exponent})); 1243 } 1244 1245 case Builtin::BI__builtin_isgreater: 1246 case Builtin::BI__builtin_isgreaterequal: 1247 case Builtin::BI__builtin_isless: 1248 case Builtin::BI__builtin_islessequal: 1249 case Builtin::BI__builtin_islessgreater: 1250 case Builtin::BI__builtin_isunordered: { 1251 // Ordered comparisons: we know the arguments to these are matching scalar 1252 // floating point values. 1253 Value *LHS = EmitScalarExpr(E->getArg(0)); 1254 Value *RHS = EmitScalarExpr(E->getArg(1)); 1255 1256 switch (BuiltinID) { 1257 default: llvm_unreachable("Unknown ordered comparison"); 1258 case Builtin::BI__builtin_isgreater: 1259 LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp"); 1260 break; 1261 case Builtin::BI__builtin_isgreaterequal: 1262 LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp"); 1263 break; 1264 case Builtin::BI__builtin_isless: 1265 LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp"); 1266 break; 1267 case Builtin::BI__builtin_islessequal: 1268 LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp"); 1269 break; 1270 case Builtin::BI__builtin_islessgreater: 1271 LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp"); 1272 break; 1273 case Builtin::BI__builtin_isunordered: 1274 LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp"); 1275 break; 1276 } 1277 // ZExt bool to int type. 1278 return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType()))); 1279 } 1280 case Builtin::BI__builtin_isnan: { 1281 Value *V = EmitScalarExpr(E->getArg(0)); 1282 V = Builder.CreateFCmpUNO(V, V, "cmp"); 1283 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 1284 } 1285 1286 case Builtin::BIfinite: 1287 case Builtin::BI__finite: 1288 case Builtin::BIfinitef: 1289 case Builtin::BI__finitef: 1290 case Builtin::BIfinitel: 1291 case Builtin::BI__finitel: 1292 case Builtin::BI__builtin_isinf: 1293 case Builtin::BI__builtin_isfinite: { 1294 // isinf(x) --> fabs(x) == infinity 1295 // isfinite(x) --> fabs(x) != infinity 1296 // x != NaN via the ordered compare in either case. 1297 Value *V = EmitScalarExpr(E->getArg(0)); 1298 Value *Fabs = EmitFAbs(*this, V); 1299 Constant *Infinity = ConstantFP::getInfinity(V->getType()); 1300 CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf) 1301 ? CmpInst::FCMP_OEQ 1302 : CmpInst::FCMP_ONE; 1303 Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf"); 1304 return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType()))); 1305 } 1306 1307 case Builtin::BI__builtin_isinf_sign: { 1308 // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0 1309 Value *Arg = EmitScalarExpr(E->getArg(0)); 1310 Value *AbsArg = EmitFAbs(*this, Arg); 1311 Value *IsInf = Builder.CreateFCmpOEQ( 1312 AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf"); 1313 Value *IsNeg = EmitSignBit(*this, Arg); 1314 1315 llvm::Type *IntTy = ConvertType(E->getType()); 1316 Value *Zero = Constant::getNullValue(IntTy); 1317 Value *One = ConstantInt::get(IntTy, 1); 1318 Value *NegativeOne = ConstantInt::get(IntTy, -1); 1319 Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One); 1320 Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero); 1321 return RValue::get(Result); 1322 } 1323 1324 case Builtin::BI__builtin_isnormal: { 1325 // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min 1326 Value *V = EmitScalarExpr(E->getArg(0)); 1327 Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq"); 1328 1329 Value *Abs = EmitFAbs(*this, V); 1330 Value *IsLessThanInf = 1331 Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf"); 1332 APFloat Smallest = APFloat::getSmallestNormalized( 1333 getContext().getFloatTypeSemantics(E->getArg(0)->getType())); 1334 Value *IsNormal = 1335 Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest), 1336 "isnormal"); 1337 V = Builder.CreateAnd(Eq, IsLessThanInf, "and"); 1338 V = Builder.CreateAnd(V, IsNormal, "and"); 1339 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 1340 } 1341 1342 case Builtin::BI__builtin_fpclassify: { 1343 Value *V = EmitScalarExpr(E->getArg(5)); 1344 llvm::Type *Ty = ConvertType(E->getArg(5)->getType()); 1345 1346 // Create Result 1347 BasicBlock *Begin = Builder.GetInsertBlock(); 1348 BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn); 1349 Builder.SetInsertPoint(End); 1350 PHINode *Result = 1351 Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4, 1352 "fpclassify_result"); 1353 1354 // if (V==0) return FP_ZERO 1355 Builder.SetInsertPoint(Begin); 1356 Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty), 1357 "iszero"); 1358 Value *ZeroLiteral = EmitScalarExpr(E->getArg(4)); 1359 BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn); 1360 Builder.CreateCondBr(IsZero, End, NotZero); 1361 Result->addIncoming(ZeroLiteral, Begin); 1362 1363 // if (V != V) return FP_NAN 1364 Builder.SetInsertPoint(NotZero); 1365 Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp"); 1366 Value *NanLiteral = EmitScalarExpr(E->getArg(0)); 1367 BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn); 1368 Builder.CreateCondBr(IsNan, End, NotNan); 1369 Result->addIncoming(NanLiteral, NotZero); 1370 1371 // if (fabs(V) == infinity) return FP_INFINITY 1372 Builder.SetInsertPoint(NotNan); 1373 Value *VAbs = EmitFAbs(*this, V); 1374 Value *IsInf = 1375 Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()), 1376 "isinf"); 1377 Value *InfLiteral = EmitScalarExpr(E->getArg(1)); 1378 BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn); 1379 Builder.CreateCondBr(IsInf, End, NotInf); 1380 Result->addIncoming(InfLiteral, NotNan); 1381 1382 // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL 1383 Builder.SetInsertPoint(NotInf); 1384 APFloat Smallest = APFloat::getSmallestNormalized( 1385 getContext().getFloatTypeSemantics(E->getArg(5)->getType())); 1386 Value *IsNormal = 1387 Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest), 1388 "isnormal"); 1389 Value *NormalResult = 1390 Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)), 1391 EmitScalarExpr(E->getArg(3))); 1392 Builder.CreateBr(End); 1393 Result->addIncoming(NormalResult, NotInf); 1394 1395 // return Result 1396 Builder.SetInsertPoint(End); 1397 return RValue::get(Result); 1398 } 1399 1400 case Builtin::BIalloca: 1401 case Builtin::BI_alloca: 1402 case Builtin::BI__builtin_alloca: { 1403 Value *Size = EmitScalarExpr(E->getArg(0)); 1404 const TargetInfo &TI = getContext().getTargetInfo(); 1405 // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__. 1406 unsigned SuitableAlignmentInBytes = 1407 CGM.getContext() 1408 .toCharUnitsFromBits(TI.getSuitableAlign()) 1409 .getQuantity(); 1410 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size); 1411 AI->setAlignment(SuitableAlignmentInBytes); 1412 return RValue::get(AI); 1413 } 1414 1415 case Builtin::BI__builtin_alloca_with_align: { 1416 Value *Size = EmitScalarExpr(E->getArg(0)); 1417 Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1)); 1418 auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue); 1419 unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue(); 1420 unsigned AlignmentInBytes = 1421 CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getQuantity(); 1422 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size); 1423 AI->setAlignment(AlignmentInBytes); 1424 return RValue::get(AI); 1425 } 1426 1427 case Builtin::BIbzero: 1428 case Builtin::BI__builtin_bzero: { 1429 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1430 Value *SizeVal = EmitScalarExpr(E->getArg(1)); 1431 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 1432 E->getArg(0)->getExprLoc(), FD, 0); 1433 Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false); 1434 return RValue::get(Dest.getPointer()); 1435 } 1436 case Builtin::BImemcpy: 1437 case Builtin::BI__builtin_memcpy: { 1438 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1439 Address Src = EmitPointerWithAlignment(E->getArg(1)); 1440 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 1441 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 1442 E->getArg(0)->getExprLoc(), FD, 0); 1443 EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(), 1444 E->getArg(1)->getExprLoc(), FD, 1); 1445 Builder.CreateMemCpy(Dest, Src, SizeVal, false); 1446 return RValue::get(Dest.getPointer()); 1447 } 1448 1449 case Builtin::BI__builtin_char_memchr: 1450 BuiltinID = Builtin::BI__builtin_memchr; 1451 break; 1452 1453 case Builtin::BI__builtin___memcpy_chk: { 1454 // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2. 1455 llvm::APSInt Size, DstSize; 1456 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 1457 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 1458 break; 1459 if (Size.ugt(DstSize)) 1460 break; 1461 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1462 Address Src = EmitPointerWithAlignment(E->getArg(1)); 1463 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 1464 Builder.CreateMemCpy(Dest, Src, SizeVal, false); 1465 return RValue::get(Dest.getPointer()); 1466 } 1467 1468 case Builtin::BI__builtin_objc_memmove_collectable: { 1469 Address DestAddr = EmitPointerWithAlignment(E->getArg(0)); 1470 Address SrcAddr = EmitPointerWithAlignment(E->getArg(1)); 1471 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 1472 CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this, 1473 DestAddr, SrcAddr, SizeVal); 1474 return RValue::get(DestAddr.getPointer()); 1475 } 1476 1477 case Builtin::BI__builtin___memmove_chk: { 1478 // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2. 1479 llvm::APSInt Size, DstSize; 1480 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 1481 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 1482 break; 1483 if (Size.ugt(DstSize)) 1484 break; 1485 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1486 Address Src = EmitPointerWithAlignment(E->getArg(1)); 1487 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 1488 Builder.CreateMemMove(Dest, Src, SizeVal, false); 1489 return RValue::get(Dest.getPointer()); 1490 } 1491 1492 case Builtin::BImemmove: 1493 case Builtin::BI__builtin_memmove: { 1494 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1495 Address Src = EmitPointerWithAlignment(E->getArg(1)); 1496 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 1497 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 1498 E->getArg(0)->getExprLoc(), FD, 0); 1499 EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(), 1500 E->getArg(1)->getExprLoc(), FD, 1); 1501 Builder.CreateMemMove(Dest, Src, SizeVal, false); 1502 return RValue::get(Dest.getPointer()); 1503 } 1504 case Builtin::BImemset: 1505 case Builtin::BI__builtin_memset: { 1506 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1507 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), 1508 Builder.getInt8Ty()); 1509 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 1510 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 1511 E->getArg(0)->getExprLoc(), FD, 0); 1512 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false); 1513 return RValue::get(Dest.getPointer()); 1514 } 1515 case Builtin::BI__builtin___memset_chk: { 1516 // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2. 1517 llvm::APSInt Size, DstSize; 1518 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 1519 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 1520 break; 1521 if (Size.ugt(DstSize)) 1522 break; 1523 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1524 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), 1525 Builder.getInt8Ty()); 1526 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 1527 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false); 1528 return RValue::get(Dest.getPointer()); 1529 } 1530 case Builtin::BI__builtin_dwarf_cfa: { 1531 // The offset in bytes from the first argument to the CFA. 1532 // 1533 // Why on earth is this in the frontend? Is there any reason at 1534 // all that the backend can't reasonably determine this while 1535 // lowering llvm.eh.dwarf.cfa()? 1536 // 1537 // TODO: If there's a satisfactory reason, add a target hook for 1538 // this instead of hard-coding 0, which is correct for most targets. 1539 int32_t Offset = 0; 1540 1541 Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa); 1542 return RValue::get(Builder.CreateCall(F, 1543 llvm::ConstantInt::get(Int32Ty, Offset))); 1544 } 1545 case Builtin::BI__builtin_return_address: { 1546 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0), 1547 getContext().UnsignedIntTy); 1548 Value *F = CGM.getIntrinsic(Intrinsic::returnaddress); 1549 return RValue::get(Builder.CreateCall(F, Depth)); 1550 } 1551 case Builtin::BI_ReturnAddress: { 1552 Value *F = CGM.getIntrinsic(Intrinsic::returnaddress); 1553 return RValue::get(Builder.CreateCall(F, Builder.getInt32(0))); 1554 } 1555 case Builtin::BI__builtin_frame_address: { 1556 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0), 1557 getContext().UnsignedIntTy); 1558 Value *F = CGM.getIntrinsic(Intrinsic::frameaddress); 1559 return RValue::get(Builder.CreateCall(F, Depth)); 1560 } 1561 case Builtin::BI__builtin_extract_return_addr: { 1562 Value *Address = EmitScalarExpr(E->getArg(0)); 1563 Value *Result = getTargetHooks().decodeReturnAddress(*this, Address); 1564 return RValue::get(Result); 1565 } 1566 case Builtin::BI__builtin_frob_return_addr: { 1567 Value *Address = EmitScalarExpr(E->getArg(0)); 1568 Value *Result = getTargetHooks().encodeReturnAddress(*this, Address); 1569 return RValue::get(Result); 1570 } 1571 case Builtin::BI__builtin_dwarf_sp_column: { 1572 llvm::IntegerType *Ty 1573 = cast<llvm::IntegerType>(ConvertType(E->getType())); 1574 int Column = getTargetHooks().getDwarfEHStackPointer(CGM); 1575 if (Column == -1) { 1576 CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column"); 1577 return RValue::get(llvm::UndefValue::get(Ty)); 1578 } 1579 return RValue::get(llvm::ConstantInt::get(Ty, Column, true)); 1580 } 1581 case Builtin::BI__builtin_init_dwarf_reg_size_table: { 1582 Value *Address = EmitScalarExpr(E->getArg(0)); 1583 if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address)) 1584 CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table"); 1585 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType()))); 1586 } 1587 case Builtin::BI__builtin_eh_return: { 1588 Value *Int = EmitScalarExpr(E->getArg(0)); 1589 Value *Ptr = EmitScalarExpr(E->getArg(1)); 1590 1591 llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType()); 1592 assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) && 1593 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants"); 1594 Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32 1595 ? Intrinsic::eh_return_i32 1596 : Intrinsic::eh_return_i64); 1597 Builder.CreateCall(F, {Int, Ptr}); 1598 Builder.CreateUnreachable(); 1599 1600 // We do need to preserve an insertion point. 1601 EmitBlock(createBasicBlock("builtin_eh_return.cont")); 1602 1603 return RValue::get(nullptr); 1604 } 1605 case Builtin::BI__builtin_unwind_init: { 1606 Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init); 1607 return RValue::get(Builder.CreateCall(F)); 1608 } 1609 case Builtin::BI__builtin_extend_pointer: { 1610 // Extends a pointer to the size of an _Unwind_Word, which is 1611 // uint64_t on all platforms. Generally this gets poked into a 1612 // register and eventually used as an address, so if the 1613 // addressing registers are wider than pointers and the platform 1614 // doesn't implicitly ignore high-order bits when doing 1615 // addressing, we need to make sure we zext / sext based on 1616 // the platform's expectations. 1617 // 1618 // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html 1619 1620 // Cast the pointer to intptr_t. 1621 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1622 Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast"); 1623 1624 // If that's 64 bits, we're done. 1625 if (IntPtrTy->getBitWidth() == 64) 1626 return RValue::get(Result); 1627 1628 // Otherwise, ask the codegen data what to do. 1629 if (getTargetHooks().extendPointerWithSExt()) 1630 return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext")); 1631 else 1632 return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext")); 1633 } 1634 case Builtin::BI__builtin_setjmp: { 1635 // Buffer is a void**. 1636 Address Buf = EmitPointerWithAlignment(E->getArg(0)); 1637 1638 // Store the frame pointer to the setjmp buffer. 1639 Value *FrameAddr = 1640 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 1641 ConstantInt::get(Int32Ty, 0)); 1642 Builder.CreateStore(FrameAddr, Buf); 1643 1644 // Store the stack pointer to the setjmp buffer. 1645 Value *StackAddr = 1646 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave)); 1647 Address StackSaveSlot = 1648 Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize()); 1649 Builder.CreateStore(StackAddr, StackSaveSlot); 1650 1651 // Call LLVM's EH setjmp, which is lightweight. 1652 Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp); 1653 Buf = Builder.CreateBitCast(Buf, Int8PtrTy); 1654 return RValue::get(Builder.CreateCall(F, Buf.getPointer())); 1655 } 1656 case Builtin::BI__builtin_longjmp: { 1657 Value *Buf = EmitScalarExpr(E->getArg(0)); 1658 Buf = Builder.CreateBitCast(Buf, Int8PtrTy); 1659 1660 // Call LLVM's EH longjmp, which is lightweight. 1661 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf); 1662 1663 // longjmp doesn't return; mark this as unreachable. 1664 Builder.CreateUnreachable(); 1665 1666 // We do need to preserve an insertion point. 1667 EmitBlock(createBasicBlock("longjmp.cont")); 1668 1669 return RValue::get(nullptr); 1670 } 1671 case Builtin::BI__sync_fetch_and_add: 1672 case Builtin::BI__sync_fetch_and_sub: 1673 case Builtin::BI__sync_fetch_and_or: 1674 case Builtin::BI__sync_fetch_and_and: 1675 case Builtin::BI__sync_fetch_and_xor: 1676 case Builtin::BI__sync_fetch_and_nand: 1677 case Builtin::BI__sync_add_and_fetch: 1678 case Builtin::BI__sync_sub_and_fetch: 1679 case Builtin::BI__sync_and_and_fetch: 1680 case Builtin::BI__sync_or_and_fetch: 1681 case Builtin::BI__sync_xor_and_fetch: 1682 case Builtin::BI__sync_nand_and_fetch: 1683 case Builtin::BI__sync_val_compare_and_swap: 1684 case Builtin::BI__sync_bool_compare_and_swap: 1685 case Builtin::BI__sync_lock_test_and_set: 1686 case Builtin::BI__sync_lock_release: 1687 case Builtin::BI__sync_swap: 1688 llvm_unreachable("Shouldn't make it through sema"); 1689 case Builtin::BI__sync_fetch_and_add_1: 1690 case Builtin::BI__sync_fetch_and_add_2: 1691 case Builtin::BI__sync_fetch_and_add_4: 1692 case Builtin::BI__sync_fetch_and_add_8: 1693 case Builtin::BI__sync_fetch_and_add_16: 1694 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E); 1695 case Builtin::BI__sync_fetch_and_sub_1: 1696 case Builtin::BI__sync_fetch_and_sub_2: 1697 case Builtin::BI__sync_fetch_and_sub_4: 1698 case Builtin::BI__sync_fetch_and_sub_8: 1699 case Builtin::BI__sync_fetch_and_sub_16: 1700 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E); 1701 case Builtin::BI__sync_fetch_and_or_1: 1702 case Builtin::BI__sync_fetch_and_or_2: 1703 case Builtin::BI__sync_fetch_and_or_4: 1704 case Builtin::BI__sync_fetch_and_or_8: 1705 case Builtin::BI__sync_fetch_and_or_16: 1706 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E); 1707 case Builtin::BI__sync_fetch_and_and_1: 1708 case Builtin::BI__sync_fetch_and_and_2: 1709 case Builtin::BI__sync_fetch_and_and_4: 1710 case Builtin::BI__sync_fetch_and_and_8: 1711 case Builtin::BI__sync_fetch_and_and_16: 1712 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E); 1713 case Builtin::BI__sync_fetch_and_xor_1: 1714 case Builtin::BI__sync_fetch_and_xor_2: 1715 case Builtin::BI__sync_fetch_and_xor_4: 1716 case Builtin::BI__sync_fetch_and_xor_8: 1717 case Builtin::BI__sync_fetch_and_xor_16: 1718 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E); 1719 case Builtin::BI__sync_fetch_and_nand_1: 1720 case Builtin::BI__sync_fetch_and_nand_2: 1721 case Builtin::BI__sync_fetch_and_nand_4: 1722 case Builtin::BI__sync_fetch_and_nand_8: 1723 case Builtin::BI__sync_fetch_and_nand_16: 1724 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E); 1725 1726 // Clang extensions: not overloaded yet. 1727 case Builtin::BI__sync_fetch_and_min: 1728 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E); 1729 case Builtin::BI__sync_fetch_and_max: 1730 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E); 1731 case Builtin::BI__sync_fetch_and_umin: 1732 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E); 1733 case Builtin::BI__sync_fetch_and_umax: 1734 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E); 1735 1736 case Builtin::BI__sync_add_and_fetch_1: 1737 case Builtin::BI__sync_add_and_fetch_2: 1738 case Builtin::BI__sync_add_and_fetch_4: 1739 case Builtin::BI__sync_add_and_fetch_8: 1740 case Builtin::BI__sync_add_and_fetch_16: 1741 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E, 1742 llvm::Instruction::Add); 1743 case Builtin::BI__sync_sub_and_fetch_1: 1744 case Builtin::BI__sync_sub_and_fetch_2: 1745 case Builtin::BI__sync_sub_and_fetch_4: 1746 case Builtin::BI__sync_sub_and_fetch_8: 1747 case Builtin::BI__sync_sub_and_fetch_16: 1748 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E, 1749 llvm::Instruction::Sub); 1750 case Builtin::BI__sync_and_and_fetch_1: 1751 case Builtin::BI__sync_and_and_fetch_2: 1752 case Builtin::BI__sync_and_and_fetch_4: 1753 case Builtin::BI__sync_and_and_fetch_8: 1754 case Builtin::BI__sync_and_and_fetch_16: 1755 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E, 1756 llvm::Instruction::And); 1757 case Builtin::BI__sync_or_and_fetch_1: 1758 case Builtin::BI__sync_or_and_fetch_2: 1759 case Builtin::BI__sync_or_and_fetch_4: 1760 case Builtin::BI__sync_or_and_fetch_8: 1761 case Builtin::BI__sync_or_and_fetch_16: 1762 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E, 1763 llvm::Instruction::Or); 1764 case Builtin::BI__sync_xor_and_fetch_1: 1765 case Builtin::BI__sync_xor_and_fetch_2: 1766 case Builtin::BI__sync_xor_and_fetch_4: 1767 case Builtin::BI__sync_xor_and_fetch_8: 1768 case Builtin::BI__sync_xor_and_fetch_16: 1769 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E, 1770 llvm::Instruction::Xor); 1771 case Builtin::BI__sync_nand_and_fetch_1: 1772 case Builtin::BI__sync_nand_and_fetch_2: 1773 case Builtin::BI__sync_nand_and_fetch_4: 1774 case Builtin::BI__sync_nand_and_fetch_8: 1775 case Builtin::BI__sync_nand_and_fetch_16: 1776 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E, 1777 llvm::Instruction::And, true); 1778 1779 case Builtin::BI__sync_val_compare_and_swap_1: 1780 case Builtin::BI__sync_val_compare_and_swap_2: 1781 case Builtin::BI__sync_val_compare_and_swap_4: 1782 case Builtin::BI__sync_val_compare_and_swap_8: 1783 case Builtin::BI__sync_val_compare_and_swap_16: 1784 return RValue::get(MakeAtomicCmpXchgValue(*this, E, false)); 1785 1786 case Builtin::BI__sync_bool_compare_and_swap_1: 1787 case Builtin::BI__sync_bool_compare_and_swap_2: 1788 case Builtin::BI__sync_bool_compare_and_swap_4: 1789 case Builtin::BI__sync_bool_compare_and_swap_8: 1790 case Builtin::BI__sync_bool_compare_and_swap_16: 1791 return RValue::get(MakeAtomicCmpXchgValue(*this, E, true)); 1792 1793 case Builtin::BI__sync_swap_1: 1794 case Builtin::BI__sync_swap_2: 1795 case Builtin::BI__sync_swap_4: 1796 case Builtin::BI__sync_swap_8: 1797 case Builtin::BI__sync_swap_16: 1798 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 1799 1800 case Builtin::BI__sync_lock_test_and_set_1: 1801 case Builtin::BI__sync_lock_test_and_set_2: 1802 case Builtin::BI__sync_lock_test_and_set_4: 1803 case Builtin::BI__sync_lock_test_and_set_8: 1804 case Builtin::BI__sync_lock_test_and_set_16: 1805 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 1806 1807 case Builtin::BI__sync_lock_release_1: 1808 case Builtin::BI__sync_lock_release_2: 1809 case Builtin::BI__sync_lock_release_4: 1810 case Builtin::BI__sync_lock_release_8: 1811 case Builtin::BI__sync_lock_release_16: { 1812 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1813 QualType ElTy = E->getArg(0)->getType()->getPointeeType(); 1814 CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy); 1815 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), 1816 StoreSize.getQuantity() * 8); 1817 Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); 1818 llvm::StoreInst *Store = 1819 Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr, 1820 StoreSize); 1821 Store->setAtomic(llvm::AtomicOrdering::Release); 1822 return RValue::get(nullptr); 1823 } 1824 1825 case Builtin::BI__sync_synchronize: { 1826 // We assume this is supposed to correspond to a C++0x-style 1827 // sequentially-consistent fence (i.e. this is only usable for 1828 // synchonization, not device I/O or anything like that). This intrinsic 1829 // is really badly designed in the sense that in theory, there isn't 1830 // any way to safely use it... but in practice, it mostly works 1831 // to use it with non-atomic loads and stores to get acquire/release 1832 // semantics. 1833 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent); 1834 return RValue::get(nullptr); 1835 } 1836 1837 case Builtin::BI__builtin_nontemporal_load: 1838 return RValue::get(EmitNontemporalLoad(*this, E)); 1839 case Builtin::BI__builtin_nontemporal_store: 1840 return RValue::get(EmitNontemporalStore(*this, E)); 1841 case Builtin::BI__c11_atomic_is_lock_free: 1842 case Builtin::BI__atomic_is_lock_free: { 1843 // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the 1844 // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since 1845 // _Atomic(T) is always properly-aligned. 1846 const char *LibCallName = "__atomic_is_lock_free"; 1847 CallArgList Args; 1848 Args.add(RValue::get(EmitScalarExpr(E->getArg(0))), 1849 getContext().getSizeType()); 1850 if (BuiltinID == Builtin::BI__atomic_is_lock_free) 1851 Args.add(RValue::get(EmitScalarExpr(E->getArg(1))), 1852 getContext().VoidPtrTy); 1853 else 1854 Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)), 1855 getContext().VoidPtrTy); 1856 const CGFunctionInfo &FuncInfo = 1857 CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args); 1858 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo); 1859 llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName); 1860 return EmitCall(FuncInfo, CGCallee::forDirect(Func), 1861 ReturnValueSlot(), Args); 1862 } 1863 1864 case Builtin::BI__atomic_test_and_set: { 1865 // Look at the argument type to determine whether this is a volatile 1866 // operation. The parameter type is always volatile. 1867 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); 1868 bool Volatile = 1869 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); 1870 1871 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1872 unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace(); 1873 Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace)); 1874 Value *NewVal = Builder.getInt8(1); 1875 Value *Order = EmitScalarExpr(E->getArg(1)); 1876 if (isa<llvm::ConstantInt>(Order)) { 1877 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 1878 AtomicRMWInst *Result = nullptr; 1879 switch (ord) { 1880 case 0: // memory_order_relaxed 1881 default: // invalid order 1882 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 1883 llvm::AtomicOrdering::Monotonic); 1884 break; 1885 case 1: // memory_order_consume 1886 case 2: // memory_order_acquire 1887 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 1888 llvm::AtomicOrdering::Acquire); 1889 break; 1890 case 3: // memory_order_release 1891 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 1892 llvm::AtomicOrdering::Release); 1893 break; 1894 case 4: // memory_order_acq_rel 1895 1896 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 1897 llvm::AtomicOrdering::AcquireRelease); 1898 break; 1899 case 5: // memory_order_seq_cst 1900 Result = Builder.CreateAtomicRMW( 1901 llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 1902 llvm::AtomicOrdering::SequentiallyConsistent); 1903 break; 1904 } 1905 Result->setVolatile(Volatile); 1906 return RValue::get(Builder.CreateIsNotNull(Result, "tobool")); 1907 } 1908 1909 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 1910 1911 llvm::BasicBlock *BBs[5] = { 1912 createBasicBlock("monotonic", CurFn), 1913 createBasicBlock("acquire", CurFn), 1914 createBasicBlock("release", CurFn), 1915 createBasicBlock("acqrel", CurFn), 1916 createBasicBlock("seqcst", CurFn) 1917 }; 1918 llvm::AtomicOrdering Orders[5] = { 1919 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire, 1920 llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease, 1921 llvm::AtomicOrdering::SequentiallyConsistent}; 1922 1923 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 1924 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); 1925 1926 Builder.SetInsertPoint(ContBB); 1927 PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set"); 1928 1929 for (unsigned i = 0; i < 5; ++i) { 1930 Builder.SetInsertPoint(BBs[i]); 1931 AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 1932 Ptr, NewVal, Orders[i]); 1933 RMW->setVolatile(Volatile); 1934 Result->addIncoming(RMW, BBs[i]); 1935 Builder.CreateBr(ContBB); 1936 } 1937 1938 SI->addCase(Builder.getInt32(0), BBs[0]); 1939 SI->addCase(Builder.getInt32(1), BBs[1]); 1940 SI->addCase(Builder.getInt32(2), BBs[1]); 1941 SI->addCase(Builder.getInt32(3), BBs[2]); 1942 SI->addCase(Builder.getInt32(4), BBs[3]); 1943 SI->addCase(Builder.getInt32(5), BBs[4]); 1944 1945 Builder.SetInsertPoint(ContBB); 1946 return RValue::get(Builder.CreateIsNotNull(Result, "tobool")); 1947 } 1948 1949 case Builtin::BI__atomic_clear: { 1950 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); 1951 bool Volatile = 1952 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); 1953 1954 Address Ptr = EmitPointerWithAlignment(E->getArg(0)); 1955 unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace(); 1956 Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace)); 1957 Value *NewVal = Builder.getInt8(0); 1958 Value *Order = EmitScalarExpr(E->getArg(1)); 1959 if (isa<llvm::ConstantInt>(Order)) { 1960 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 1961 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); 1962 switch (ord) { 1963 case 0: // memory_order_relaxed 1964 default: // invalid order 1965 Store->setOrdering(llvm::AtomicOrdering::Monotonic); 1966 break; 1967 case 3: // memory_order_release 1968 Store->setOrdering(llvm::AtomicOrdering::Release); 1969 break; 1970 case 5: // memory_order_seq_cst 1971 Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent); 1972 break; 1973 } 1974 return RValue::get(nullptr); 1975 } 1976 1977 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 1978 1979 llvm::BasicBlock *BBs[3] = { 1980 createBasicBlock("monotonic", CurFn), 1981 createBasicBlock("release", CurFn), 1982 createBasicBlock("seqcst", CurFn) 1983 }; 1984 llvm::AtomicOrdering Orders[3] = { 1985 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release, 1986 llvm::AtomicOrdering::SequentiallyConsistent}; 1987 1988 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 1989 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); 1990 1991 for (unsigned i = 0; i < 3; ++i) { 1992 Builder.SetInsertPoint(BBs[i]); 1993 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); 1994 Store->setOrdering(Orders[i]); 1995 Builder.CreateBr(ContBB); 1996 } 1997 1998 SI->addCase(Builder.getInt32(0), BBs[0]); 1999 SI->addCase(Builder.getInt32(3), BBs[1]); 2000 SI->addCase(Builder.getInt32(5), BBs[2]); 2001 2002 Builder.SetInsertPoint(ContBB); 2003 return RValue::get(nullptr); 2004 } 2005 2006 case Builtin::BI__atomic_thread_fence: 2007 case Builtin::BI__atomic_signal_fence: 2008 case Builtin::BI__c11_atomic_thread_fence: 2009 case Builtin::BI__c11_atomic_signal_fence: { 2010 llvm::SyncScope::ID SSID; 2011 if (BuiltinID == Builtin::BI__atomic_signal_fence || 2012 BuiltinID == Builtin::BI__c11_atomic_signal_fence) 2013 SSID = llvm::SyncScope::SingleThread; 2014 else 2015 SSID = llvm::SyncScope::System; 2016 Value *Order = EmitScalarExpr(E->getArg(0)); 2017 if (isa<llvm::ConstantInt>(Order)) { 2018 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 2019 switch (ord) { 2020 case 0: // memory_order_relaxed 2021 default: // invalid order 2022 break; 2023 case 1: // memory_order_consume 2024 case 2: // memory_order_acquire 2025 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID); 2026 break; 2027 case 3: // memory_order_release 2028 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID); 2029 break; 2030 case 4: // memory_order_acq_rel 2031 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID); 2032 break; 2033 case 5: // memory_order_seq_cst 2034 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID); 2035 break; 2036 } 2037 return RValue::get(nullptr); 2038 } 2039 2040 llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB; 2041 AcquireBB = createBasicBlock("acquire", CurFn); 2042 ReleaseBB = createBasicBlock("release", CurFn); 2043 AcqRelBB = createBasicBlock("acqrel", CurFn); 2044 SeqCstBB = createBasicBlock("seqcst", CurFn); 2045 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 2046 2047 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 2048 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB); 2049 2050 Builder.SetInsertPoint(AcquireBB); 2051 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID); 2052 Builder.CreateBr(ContBB); 2053 SI->addCase(Builder.getInt32(1), AcquireBB); 2054 SI->addCase(Builder.getInt32(2), AcquireBB); 2055 2056 Builder.SetInsertPoint(ReleaseBB); 2057 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID); 2058 Builder.CreateBr(ContBB); 2059 SI->addCase(Builder.getInt32(3), ReleaseBB); 2060 2061 Builder.SetInsertPoint(AcqRelBB); 2062 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID); 2063 Builder.CreateBr(ContBB); 2064 SI->addCase(Builder.getInt32(4), AcqRelBB); 2065 2066 Builder.SetInsertPoint(SeqCstBB); 2067 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID); 2068 Builder.CreateBr(ContBB); 2069 SI->addCase(Builder.getInt32(5), SeqCstBB); 2070 2071 Builder.SetInsertPoint(ContBB); 2072 return RValue::get(nullptr); 2073 } 2074 2075 // Library functions with special handling. 2076 case Builtin::BIsqrt: 2077 case Builtin::BIsqrtf: 2078 case Builtin::BIsqrtl: { 2079 // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only 2080 // in finite- or unsafe-math mode (the intrinsic has different semantics 2081 // for handling negative numbers compared to the library function, so 2082 // -fmath-errno=0 is not enough). 2083 if (!FD->hasAttr<ConstAttr>()) 2084 break; 2085 if (!(CGM.getCodeGenOpts().UnsafeFPMath || 2086 CGM.getCodeGenOpts().NoNaNsFPMath)) 2087 break; 2088 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 2089 llvm::Type *ArgType = Arg0->getType(); 2090 Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType); 2091 return RValue::get(Builder.CreateCall(F, Arg0)); 2092 } 2093 2094 case Builtin::BI__builtin_pow: 2095 case Builtin::BI__builtin_powf: 2096 case Builtin::BI__builtin_powl: 2097 case Builtin::BIpow: 2098 case Builtin::BIpowf: 2099 case Builtin::BIpowl: { 2100 // Transform a call to pow* into a @llvm.pow.* intrinsic call. 2101 if (!FD->hasAttr<ConstAttr>()) 2102 break; 2103 Value *Base = EmitScalarExpr(E->getArg(0)); 2104 Value *Exponent = EmitScalarExpr(E->getArg(1)); 2105 llvm::Type *ArgType = Base->getType(); 2106 Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType); 2107 return RValue::get(Builder.CreateCall(F, {Base, Exponent})); 2108 } 2109 2110 case Builtin::BIfma: 2111 case Builtin::BIfmaf: 2112 case Builtin::BIfmal: 2113 case Builtin::BI__builtin_fma: 2114 case Builtin::BI__builtin_fmaf: 2115 case Builtin::BI__builtin_fmal: { 2116 // Rewrite fma to intrinsic. 2117 Value *FirstArg = EmitScalarExpr(E->getArg(0)); 2118 llvm::Type *ArgType = FirstArg->getType(); 2119 Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType); 2120 return RValue::get( 2121 Builder.CreateCall(F, {FirstArg, EmitScalarExpr(E->getArg(1)), 2122 EmitScalarExpr(E->getArg(2))})); 2123 } 2124 2125 case Builtin::BI__builtin_signbit: 2126 case Builtin::BI__builtin_signbitf: 2127 case Builtin::BI__builtin_signbitl: { 2128 return RValue::get( 2129 Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))), 2130 ConvertType(E->getType()))); 2131 } 2132 case Builtin::BI__annotation: { 2133 // Re-encode each wide string to UTF8 and make an MDString. 2134 SmallVector<Metadata *, 1> Strings; 2135 for (const Expr *Arg : E->arguments()) { 2136 const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts()); 2137 assert(Str->getCharByteWidth() == 2); 2138 StringRef WideBytes = Str->getBytes(); 2139 std::string StrUtf8; 2140 if (!convertUTF16ToUTF8String( 2141 makeArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) { 2142 CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument"); 2143 continue; 2144 } 2145 Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8)); 2146 } 2147 2148 // Build and MDTuple of MDStrings and emit the intrinsic call. 2149 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {}); 2150 MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings); 2151 Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple)); 2152 return RValue::getIgnored(); 2153 } 2154 case Builtin::BI__builtin_annotation: { 2155 llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0)); 2156 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation, 2157 AnnVal->getType()); 2158 2159 // Get the annotation string, go through casts. Sema requires this to be a 2160 // non-wide string literal, potentially casted, so the cast<> is safe. 2161 const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts(); 2162 StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString(); 2163 return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc())); 2164 } 2165 case Builtin::BI__builtin_addcb: 2166 case Builtin::BI__builtin_addcs: 2167 case Builtin::BI__builtin_addc: 2168 case Builtin::BI__builtin_addcl: 2169 case Builtin::BI__builtin_addcll: 2170 case Builtin::BI__builtin_subcb: 2171 case Builtin::BI__builtin_subcs: 2172 case Builtin::BI__builtin_subc: 2173 case Builtin::BI__builtin_subcl: 2174 case Builtin::BI__builtin_subcll: { 2175 2176 // We translate all of these builtins from expressions of the form: 2177 // int x = ..., y = ..., carryin = ..., carryout, result; 2178 // result = __builtin_addc(x, y, carryin, &carryout); 2179 // 2180 // to LLVM IR of the form: 2181 // 2182 // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y) 2183 // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0 2184 // %carry1 = extractvalue {i32, i1} %tmp1, 1 2185 // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1, 2186 // i32 %carryin) 2187 // %result = extractvalue {i32, i1} %tmp2, 0 2188 // %carry2 = extractvalue {i32, i1} %tmp2, 1 2189 // %tmp3 = or i1 %carry1, %carry2 2190 // %tmp4 = zext i1 %tmp3 to i32 2191 // store i32 %tmp4, i32* %carryout 2192 2193 // Scalarize our inputs. 2194 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 2195 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 2196 llvm::Value *Carryin = EmitScalarExpr(E->getArg(2)); 2197 Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3)); 2198 2199 // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow. 2200 llvm::Intrinsic::ID IntrinsicId; 2201 switch (BuiltinID) { 2202 default: llvm_unreachable("Unknown multiprecision builtin id."); 2203 case Builtin::BI__builtin_addcb: 2204 case Builtin::BI__builtin_addcs: 2205 case Builtin::BI__builtin_addc: 2206 case Builtin::BI__builtin_addcl: 2207 case Builtin::BI__builtin_addcll: 2208 IntrinsicId = llvm::Intrinsic::uadd_with_overflow; 2209 break; 2210 case Builtin::BI__builtin_subcb: 2211 case Builtin::BI__builtin_subcs: 2212 case Builtin::BI__builtin_subc: 2213 case Builtin::BI__builtin_subcl: 2214 case Builtin::BI__builtin_subcll: 2215 IntrinsicId = llvm::Intrinsic::usub_with_overflow; 2216 break; 2217 } 2218 2219 // Construct our resulting LLVM IR expression. 2220 llvm::Value *Carry1; 2221 llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId, 2222 X, Y, Carry1); 2223 llvm::Value *Carry2; 2224 llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId, 2225 Sum1, Carryin, Carry2); 2226 llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2), 2227 X->getType()); 2228 Builder.CreateStore(CarryOut, CarryOutPtr); 2229 return RValue::get(Sum2); 2230 } 2231 2232 case Builtin::BI__builtin_add_overflow: 2233 case Builtin::BI__builtin_sub_overflow: 2234 case Builtin::BI__builtin_mul_overflow: { 2235 const clang::Expr *LeftArg = E->getArg(0); 2236 const clang::Expr *RightArg = E->getArg(1); 2237 const clang::Expr *ResultArg = E->getArg(2); 2238 2239 clang::QualType ResultQTy = 2240 ResultArg->getType()->castAs<PointerType>()->getPointeeType(); 2241 2242 WidthAndSignedness LeftInfo = 2243 getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType()); 2244 WidthAndSignedness RightInfo = 2245 getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType()); 2246 WidthAndSignedness ResultInfo = 2247 getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy); 2248 WidthAndSignedness EncompassingInfo = 2249 EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo}); 2250 2251 llvm::Type *EncompassingLLVMTy = 2252 llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width); 2253 2254 llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy); 2255 2256 llvm::Intrinsic::ID IntrinsicId; 2257 switch (BuiltinID) { 2258 default: 2259 llvm_unreachable("Unknown overflow builtin id."); 2260 case Builtin::BI__builtin_add_overflow: 2261 IntrinsicId = EncompassingInfo.Signed 2262 ? llvm::Intrinsic::sadd_with_overflow 2263 : llvm::Intrinsic::uadd_with_overflow; 2264 break; 2265 case Builtin::BI__builtin_sub_overflow: 2266 IntrinsicId = EncompassingInfo.Signed 2267 ? llvm::Intrinsic::ssub_with_overflow 2268 : llvm::Intrinsic::usub_with_overflow; 2269 break; 2270 case Builtin::BI__builtin_mul_overflow: 2271 IntrinsicId = EncompassingInfo.Signed 2272 ? llvm::Intrinsic::smul_with_overflow 2273 : llvm::Intrinsic::umul_with_overflow; 2274 break; 2275 } 2276 2277 llvm::Value *Left = EmitScalarExpr(LeftArg); 2278 llvm::Value *Right = EmitScalarExpr(RightArg); 2279 Address ResultPtr = EmitPointerWithAlignment(ResultArg); 2280 2281 // Extend each operand to the encompassing type. 2282 Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed); 2283 Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed); 2284 2285 // Perform the operation on the extended values. 2286 llvm::Value *Overflow, *Result; 2287 Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow); 2288 2289 if (EncompassingInfo.Width > ResultInfo.Width) { 2290 // The encompassing type is wider than the result type, so we need to 2291 // truncate it. 2292 llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy); 2293 2294 // To see if the truncation caused an overflow, we will extend 2295 // the result and then compare it to the original result. 2296 llvm::Value *ResultTruncExt = Builder.CreateIntCast( 2297 ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed); 2298 llvm::Value *TruncationOverflow = 2299 Builder.CreateICmpNE(Result, ResultTruncExt); 2300 2301 Overflow = Builder.CreateOr(Overflow, TruncationOverflow); 2302 Result = ResultTrunc; 2303 } 2304 2305 // Finally, store the result using the pointer. 2306 bool isVolatile = 2307 ResultArg->getType()->getPointeeType().isVolatileQualified(); 2308 Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile); 2309 2310 return RValue::get(Overflow); 2311 } 2312 2313 case Builtin::BI__builtin_uadd_overflow: 2314 case Builtin::BI__builtin_uaddl_overflow: 2315 case Builtin::BI__builtin_uaddll_overflow: 2316 case Builtin::BI__builtin_usub_overflow: 2317 case Builtin::BI__builtin_usubl_overflow: 2318 case Builtin::BI__builtin_usubll_overflow: 2319 case Builtin::BI__builtin_umul_overflow: 2320 case Builtin::BI__builtin_umull_overflow: 2321 case Builtin::BI__builtin_umulll_overflow: 2322 case Builtin::BI__builtin_sadd_overflow: 2323 case Builtin::BI__builtin_saddl_overflow: 2324 case Builtin::BI__builtin_saddll_overflow: 2325 case Builtin::BI__builtin_ssub_overflow: 2326 case Builtin::BI__builtin_ssubl_overflow: 2327 case Builtin::BI__builtin_ssubll_overflow: 2328 case Builtin::BI__builtin_smul_overflow: 2329 case Builtin::BI__builtin_smull_overflow: 2330 case Builtin::BI__builtin_smulll_overflow: { 2331 2332 // We translate all of these builtins directly to the relevant llvm IR node. 2333 2334 // Scalarize our inputs. 2335 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 2336 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 2337 Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2)); 2338 2339 // Decide which of the overflow intrinsics we are lowering to: 2340 llvm::Intrinsic::ID IntrinsicId; 2341 switch (BuiltinID) { 2342 default: llvm_unreachable("Unknown overflow builtin id."); 2343 case Builtin::BI__builtin_uadd_overflow: 2344 case Builtin::BI__builtin_uaddl_overflow: 2345 case Builtin::BI__builtin_uaddll_overflow: 2346 IntrinsicId = llvm::Intrinsic::uadd_with_overflow; 2347 break; 2348 case Builtin::BI__builtin_usub_overflow: 2349 case Builtin::BI__builtin_usubl_overflow: 2350 case Builtin::BI__builtin_usubll_overflow: 2351 IntrinsicId = llvm::Intrinsic::usub_with_overflow; 2352 break; 2353 case Builtin::BI__builtin_umul_overflow: 2354 case Builtin::BI__builtin_umull_overflow: 2355 case Builtin::BI__builtin_umulll_overflow: 2356 IntrinsicId = llvm::Intrinsic::umul_with_overflow; 2357 break; 2358 case Builtin::BI__builtin_sadd_overflow: 2359 case Builtin::BI__builtin_saddl_overflow: 2360 case Builtin::BI__builtin_saddll_overflow: 2361 IntrinsicId = llvm::Intrinsic::sadd_with_overflow; 2362 break; 2363 case Builtin::BI__builtin_ssub_overflow: 2364 case Builtin::BI__builtin_ssubl_overflow: 2365 case Builtin::BI__builtin_ssubll_overflow: 2366 IntrinsicId = llvm::Intrinsic::ssub_with_overflow; 2367 break; 2368 case Builtin::BI__builtin_smul_overflow: 2369 case Builtin::BI__builtin_smull_overflow: 2370 case Builtin::BI__builtin_smulll_overflow: 2371 IntrinsicId = llvm::Intrinsic::smul_with_overflow; 2372 break; 2373 } 2374 2375 2376 llvm::Value *Carry; 2377 llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry); 2378 Builder.CreateStore(Sum, SumOutPtr); 2379 2380 return RValue::get(Carry); 2381 } 2382 case Builtin::BI__builtin_addressof: 2383 return RValue::get(EmitLValue(E->getArg(0)).getPointer()); 2384 case Builtin::BI__builtin_operator_new: 2385 return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(), 2386 E->getArg(0), false); 2387 case Builtin::BI__builtin_operator_delete: 2388 return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(), 2389 E->getArg(0), true); 2390 case Builtin::BI__noop: 2391 // __noop always evaluates to an integer literal zero. 2392 return RValue::get(ConstantInt::get(IntTy, 0)); 2393 case Builtin::BI__builtin_call_with_static_chain: { 2394 const CallExpr *Call = cast<CallExpr>(E->getArg(0)); 2395 const Expr *Chain = E->getArg(1); 2396 return EmitCall(Call->getCallee()->getType(), 2397 EmitCallee(Call->getCallee()), Call, ReturnValue, 2398 EmitScalarExpr(Chain)); 2399 } 2400 case Builtin::BI_InterlockedExchange8: 2401 case Builtin::BI_InterlockedExchange16: 2402 case Builtin::BI_InterlockedExchange: 2403 case Builtin::BI_InterlockedExchangePointer: 2404 return RValue::get( 2405 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E)); 2406 case Builtin::BI_InterlockedCompareExchangePointer: { 2407 llvm::Type *RTy; 2408 llvm::IntegerType *IntType = 2409 IntegerType::get(getLLVMContext(), 2410 getContext().getTypeSize(E->getType())); 2411 llvm::Type *IntPtrType = IntType->getPointerTo(); 2412 2413 llvm::Value *Destination = 2414 Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType); 2415 2416 llvm::Value *Exchange = EmitScalarExpr(E->getArg(1)); 2417 RTy = Exchange->getType(); 2418 Exchange = Builder.CreatePtrToInt(Exchange, IntType); 2419 2420 llvm::Value *Comparand = 2421 Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType); 2422 2423 auto Result = 2424 Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange, 2425 AtomicOrdering::SequentiallyConsistent, 2426 AtomicOrdering::SequentiallyConsistent); 2427 Result->setVolatile(true); 2428 2429 return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result, 2430 0), 2431 RTy)); 2432 } 2433 case Builtin::BI_InterlockedCompareExchange8: 2434 case Builtin::BI_InterlockedCompareExchange16: 2435 case Builtin::BI_InterlockedCompareExchange: 2436 case Builtin::BI_InterlockedCompareExchange64: { 2437 AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg( 2438 EmitScalarExpr(E->getArg(0)), 2439 EmitScalarExpr(E->getArg(2)), 2440 EmitScalarExpr(E->getArg(1)), 2441 AtomicOrdering::SequentiallyConsistent, 2442 AtomicOrdering::SequentiallyConsistent); 2443 CXI->setVolatile(true); 2444 return RValue::get(Builder.CreateExtractValue(CXI, 0)); 2445 } 2446 case Builtin::BI_InterlockedIncrement16: 2447 case Builtin::BI_InterlockedIncrement: 2448 return RValue::get( 2449 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E)); 2450 case Builtin::BI_InterlockedDecrement16: 2451 case Builtin::BI_InterlockedDecrement: 2452 return RValue::get( 2453 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E)); 2454 case Builtin::BI_InterlockedAnd8: 2455 case Builtin::BI_InterlockedAnd16: 2456 case Builtin::BI_InterlockedAnd: 2457 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E)); 2458 case Builtin::BI_InterlockedExchangeAdd8: 2459 case Builtin::BI_InterlockedExchangeAdd16: 2460 case Builtin::BI_InterlockedExchangeAdd: 2461 return RValue::get( 2462 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E)); 2463 case Builtin::BI_InterlockedExchangeSub8: 2464 case Builtin::BI_InterlockedExchangeSub16: 2465 case Builtin::BI_InterlockedExchangeSub: 2466 return RValue::get( 2467 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E)); 2468 case Builtin::BI_InterlockedOr8: 2469 case Builtin::BI_InterlockedOr16: 2470 case Builtin::BI_InterlockedOr: 2471 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E)); 2472 case Builtin::BI_InterlockedXor8: 2473 case Builtin::BI_InterlockedXor16: 2474 case Builtin::BI_InterlockedXor: 2475 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E)); 2476 case Builtin::BI_interlockedbittestandset: 2477 return RValue::get( 2478 EmitMSVCBuiltinExpr(MSVCIntrin::_interlockedbittestandset, E)); 2479 2480 case Builtin::BI__exception_code: 2481 case Builtin::BI_exception_code: 2482 return RValue::get(EmitSEHExceptionCode()); 2483 case Builtin::BI__exception_info: 2484 case Builtin::BI_exception_info: 2485 return RValue::get(EmitSEHExceptionInfo()); 2486 case Builtin::BI__abnormal_termination: 2487 case Builtin::BI_abnormal_termination: 2488 return RValue::get(EmitSEHAbnormalTermination()); 2489 case Builtin::BI_setjmpex: { 2490 if (getTarget().getTriple().isOSMSVCRT()) { 2491 llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy}; 2492 llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get( 2493 getLLVMContext(), llvm::AttributeList::FunctionIndex, 2494 llvm::Attribute::ReturnsTwice); 2495 llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction( 2496 llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false), 2497 "_setjmpex", ReturnsTwiceAttr, /*Local=*/true); 2498 llvm::Value *Buf = Builder.CreateBitOrPointerCast( 2499 EmitScalarExpr(E->getArg(0)), Int8PtrTy); 2500 llvm::Value *FrameAddr = 2501 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 2502 ConstantInt::get(Int32Ty, 0)); 2503 llvm::Value *Args[] = {Buf, FrameAddr}; 2504 llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args); 2505 CS.setAttributes(ReturnsTwiceAttr); 2506 return RValue::get(CS.getInstruction()); 2507 } 2508 break; 2509 } 2510 case Builtin::BI_setjmp: { 2511 if (getTarget().getTriple().isOSMSVCRT()) { 2512 llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get( 2513 getLLVMContext(), llvm::AttributeList::FunctionIndex, 2514 llvm::Attribute::ReturnsTwice); 2515 llvm::Value *Buf = Builder.CreateBitOrPointerCast( 2516 EmitScalarExpr(E->getArg(0)), Int8PtrTy); 2517 llvm::CallSite CS; 2518 if (getTarget().getTriple().getArch() == llvm::Triple::x86) { 2519 llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy}; 2520 llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction( 2521 llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true), 2522 "_setjmp3", ReturnsTwiceAttr, /*Local=*/true); 2523 llvm::Value *Count = ConstantInt::get(IntTy, 0); 2524 llvm::Value *Args[] = {Buf, Count}; 2525 CS = EmitRuntimeCallOrInvoke(SetJmp3, Args); 2526 } else { 2527 llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy}; 2528 llvm::Constant *SetJmp = CGM.CreateRuntimeFunction( 2529 llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false), 2530 "_setjmp", ReturnsTwiceAttr, /*Local=*/true); 2531 llvm::Value *FrameAddr = 2532 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 2533 ConstantInt::get(Int32Ty, 0)); 2534 llvm::Value *Args[] = {Buf, FrameAddr}; 2535 CS = EmitRuntimeCallOrInvoke(SetJmp, Args); 2536 } 2537 CS.setAttributes(ReturnsTwiceAttr); 2538 return RValue::get(CS.getInstruction()); 2539 } 2540 break; 2541 } 2542 2543 case Builtin::BI__GetExceptionInfo: { 2544 if (llvm::GlobalVariable *GV = 2545 CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType())) 2546 return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy)); 2547 break; 2548 } 2549 2550 case Builtin::BI__fastfail: 2551 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E)); 2552 2553 case Builtin::BI__builtin_coro_size: { 2554 auto & Context = getContext(); 2555 auto SizeTy = Context.getSizeType(); 2556 auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy)); 2557 Value *F = CGM.getIntrinsic(Intrinsic::coro_size, T); 2558 return RValue::get(Builder.CreateCall(F)); 2559 } 2560 2561 case Builtin::BI__builtin_coro_id: 2562 return EmitCoroutineIntrinsic(E, Intrinsic::coro_id); 2563 case Builtin::BI__builtin_coro_promise: 2564 return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise); 2565 case Builtin::BI__builtin_coro_resume: 2566 return EmitCoroutineIntrinsic(E, Intrinsic::coro_resume); 2567 case Builtin::BI__builtin_coro_frame: 2568 return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame); 2569 case Builtin::BI__builtin_coro_free: 2570 return EmitCoroutineIntrinsic(E, Intrinsic::coro_free); 2571 case Builtin::BI__builtin_coro_destroy: 2572 return EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy); 2573 case Builtin::BI__builtin_coro_done: 2574 return EmitCoroutineIntrinsic(E, Intrinsic::coro_done); 2575 case Builtin::BI__builtin_coro_alloc: 2576 return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc); 2577 case Builtin::BI__builtin_coro_begin: 2578 return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin); 2579 case Builtin::BI__builtin_coro_end: 2580 return EmitCoroutineIntrinsic(E, Intrinsic::coro_end); 2581 case Builtin::BI__builtin_coro_suspend: 2582 return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend); 2583 case Builtin::BI__builtin_coro_param: 2584 return EmitCoroutineIntrinsic(E, Intrinsic::coro_param); 2585 2586 // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions 2587 case Builtin::BIread_pipe: 2588 case Builtin::BIwrite_pipe: { 2589 Value *Arg0 = EmitScalarExpr(E->getArg(0)), 2590 *Arg1 = EmitScalarExpr(E->getArg(1)); 2591 CGOpenCLRuntime OpenCLRT(CGM); 2592 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); 2593 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); 2594 2595 // Type of the generic packet parameter. 2596 unsigned GenericAS = 2597 getContext().getTargetAddressSpace(LangAS::opencl_generic); 2598 llvm::Type *I8PTy = llvm::PointerType::get( 2599 llvm::Type::getInt8Ty(getLLVMContext()), GenericAS); 2600 2601 // Testing which overloaded version we should generate the call for. 2602 if (2U == E->getNumArgs()) { 2603 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2" 2604 : "__write_pipe_2"; 2605 // Creating a generic function type to be able to call with any builtin or 2606 // user defined type. 2607 llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty}; 2608 llvm::FunctionType *FTy = llvm::FunctionType::get( 2609 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2610 Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy); 2611 return RValue::get( 2612 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2613 {Arg0, BCast, PacketSize, PacketAlign})); 2614 } else { 2615 assert(4 == E->getNumArgs() && 2616 "Illegal number of parameters to pipe function"); 2617 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4" 2618 : "__write_pipe_4"; 2619 2620 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy, 2621 Int32Ty, Int32Ty}; 2622 Value *Arg2 = EmitScalarExpr(E->getArg(2)), 2623 *Arg3 = EmitScalarExpr(E->getArg(3)); 2624 llvm::FunctionType *FTy = llvm::FunctionType::get( 2625 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2626 Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy); 2627 // We know the third argument is an integer type, but we may need to cast 2628 // it to i32. 2629 if (Arg2->getType() != Int32Ty) 2630 Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty); 2631 return RValue::get(Builder.CreateCall( 2632 CGM.CreateRuntimeFunction(FTy, Name), 2633 {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign})); 2634 } 2635 } 2636 // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write 2637 // functions 2638 case Builtin::BIreserve_read_pipe: 2639 case Builtin::BIreserve_write_pipe: 2640 case Builtin::BIwork_group_reserve_read_pipe: 2641 case Builtin::BIwork_group_reserve_write_pipe: 2642 case Builtin::BIsub_group_reserve_read_pipe: 2643 case Builtin::BIsub_group_reserve_write_pipe: { 2644 // Composing the mangled name for the function. 2645 const char *Name; 2646 if (BuiltinID == Builtin::BIreserve_read_pipe) 2647 Name = "__reserve_read_pipe"; 2648 else if (BuiltinID == Builtin::BIreserve_write_pipe) 2649 Name = "__reserve_write_pipe"; 2650 else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe) 2651 Name = "__work_group_reserve_read_pipe"; 2652 else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe) 2653 Name = "__work_group_reserve_write_pipe"; 2654 else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe) 2655 Name = "__sub_group_reserve_read_pipe"; 2656 else 2657 Name = "__sub_group_reserve_write_pipe"; 2658 2659 Value *Arg0 = EmitScalarExpr(E->getArg(0)), 2660 *Arg1 = EmitScalarExpr(E->getArg(1)); 2661 llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy); 2662 CGOpenCLRuntime OpenCLRT(CGM); 2663 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); 2664 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); 2665 2666 // Building the generic function prototype. 2667 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty}; 2668 llvm::FunctionType *FTy = llvm::FunctionType::get( 2669 ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2670 // We know the second argument is an integer type, but we may need to cast 2671 // it to i32. 2672 if (Arg1->getType() != Int32Ty) 2673 Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty); 2674 return RValue::get( 2675 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2676 {Arg0, Arg1, PacketSize, PacketAlign})); 2677 } 2678 // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write 2679 // functions 2680 case Builtin::BIcommit_read_pipe: 2681 case Builtin::BIcommit_write_pipe: 2682 case Builtin::BIwork_group_commit_read_pipe: 2683 case Builtin::BIwork_group_commit_write_pipe: 2684 case Builtin::BIsub_group_commit_read_pipe: 2685 case Builtin::BIsub_group_commit_write_pipe: { 2686 const char *Name; 2687 if (BuiltinID == Builtin::BIcommit_read_pipe) 2688 Name = "__commit_read_pipe"; 2689 else if (BuiltinID == Builtin::BIcommit_write_pipe) 2690 Name = "__commit_write_pipe"; 2691 else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe) 2692 Name = "__work_group_commit_read_pipe"; 2693 else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe) 2694 Name = "__work_group_commit_write_pipe"; 2695 else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe) 2696 Name = "__sub_group_commit_read_pipe"; 2697 else 2698 Name = "__sub_group_commit_write_pipe"; 2699 2700 Value *Arg0 = EmitScalarExpr(E->getArg(0)), 2701 *Arg1 = EmitScalarExpr(E->getArg(1)); 2702 CGOpenCLRuntime OpenCLRT(CGM); 2703 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); 2704 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); 2705 2706 // Building the generic function prototype. 2707 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty}; 2708 llvm::FunctionType *FTy = 2709 llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()), 2710 llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2711 2712 return RValue::get( 2713 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2714 {Arg0, Arg1, PacketSize, PacketAlign})); 2715 } 2716 // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions 2717 case Builtin::BIget_pipe_num_packets: 2718 case Builtin::BIget_pipe_max_packets: { 2719 const char *Name; 2720 if (BuiltinID == Builtin::BIget_pipe_num_packets) 2721 Name = "__get_pipe_num_packets"; 2722 else 2723 Name = "__get_pipe_max_packets"; 2724 2725 // Building the generic function prototype. 2726 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 2727 CGOpenCLRuntime OpenCLRT(CGM); 2728 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); 2729 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); 2730 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty}; 2731 llvm::FunctionType *FTy = llvm::FunctionType::get( 2732 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2733 2734 return RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2735 {Arg0, PacketSize, PacketAlign})); 2736 } 2737 2738 // OpenCL v2.0 s6.13.9 - Address space qualifier functions. 2739 case Builtin::BIto_global: 2740 case Builtin::BIto_local: 2741 case Builtin::BIto_private: { 2742 auto Arg0 = EmitScalarExpr(E->getArg(0)); 2743 auto NewArgT = llvm::PointerType::get(Int8Ty, 2744 CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); 2745 auto NewRetT = llvm::PointerType::get(Int8Ty, 2746 CGM.getContext().getTargetAddressSpace( 2747 E->getType()->getPointeeType().getAddressSpace())); 2748 auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false); 2749 llvm::Value *NewArg; 2750 if (Arg0->getType()->getPointerAddressSpace() != 2751 NewArgT->getPointerAddressSpace()) 2752 NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT); 2753 else 2754 NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT); 2755 auto NewName = std::string("__") + E->getDirectCallee()->getName().str(); 2756 auto NewCall = 2757 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg}); 2758 return RValue::get(Builder.CreateBitOrPointerCast(NewCall, 2759 ConvertType(E->getType()))); 2760 } 2761 2762 // OpenCL v2.0, s6.13.17 - Enqueue kernel function. 2763 // It contains four different overload formats specified in Table 6.13.17.1. 2764 case Builtin::BIenqueue_kernel: { 2765 StringRef Name; // Generated function call name 2766 unsigned NumArgs = E->getNumArgs(); 2767 2768 llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy); 2769 llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( 2770 getContext().getTargetAddressSpace(LangAS::opencl_generic)); 2771 2772 llvm::Value *Queue = EmitScalarExpr(E->getArg(0)); 2773 llvm::Value *Flags = EmitScalarExpr(E->getArg(1)); 2774 LValue NDRangeL = EmitAggExprToLValue(E->getArg(2)); 2775 llvm::Value *Range = NDRangeL.getAddress().getPointer(); 2776 llvm::Type *RangeTy = NDRangeL.getAddress().getType(); 2777 2778 if (NumArgs == 4) { 2779 // The most basic form of the call with parameters: 2780 // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void) 2781 Name = "__enqueue_kernel_basic"; 2782 llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy}; 2783 llvm::FunctionType *FTy = llvm::FunctionType::get( 2784 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys, 4), false); 2785 2786 llvm::Value *Block = Builder.CreatePointerCast( 2787 EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy); 2788 2789 AttrBuilder B; 2790 B.addAttribute(Attribute::ByVal); 2791 llvm::AttributeList ByValAttrSet = 2792 llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B); 2793 2794 auto RTCall = 2795 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet), 2796 {Queue, Flags, Range, Block}); 2797 RTCall->setAttributes(ByValAttrSet); 2798 return RValue::get(RTCall); 2799 } 2800 assert(NumArgs >= 5 && "Invalid enqueue_kernel signature"); 2801 2802 // Create a temporary array to hold the sizes of local pointer arguments 2803 // for the block. \p First is the position of the first size argument. 2804 auto CreateArrayForSizeVar = [=](unsigned First) { 2805 auto *AT = llvm::ArrayType::get(SizeTy, NumArgs - First); 2806 auto *Arr = Builder.CreateAlloca(AT); 2807 llvm::Value *Ptr; 2808 // Each of the following arguments specifies the size of the corresponding 2809 // argument passed to the enqueued block. 2810 auto *Zero = llvm::ConstantInt::get(IntTy, 0); 2811 for (unsigned I = First; I < NumArgs; ++I) { 2812 auto *Index = llvm::ConstantInt::get(IntTy, I - First); 2813 auto *GEP = Builder.CreateGEP(Arr, {Zero, Index}); 2814 if (I == First) 2815 Ptr = GEP; 2816 auto *V = 2817 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy); 2818 Builder.CreateAlignedStore( 2819 V, GEP, CGM.getDataLayout().getPrefTypeAlignment(SizeTy)); 2820 } 2821 return Ptr; 2822 }; 2823 2824 // Could have events and/or vaargs. 2825 if (E->getArg(3)->getType()->isBlockPointerType()) { 2826 // No events passed, but has variadic arguments. 2827 Name = "__enqueue_kernel_vaargs"; 2828 auto *Block = Builder.CreatePointerCast(EmitScalarExpr(E->getArg(3)), 2829 GenericVoidPtrTy); 2830 auto *PtrToSizeArray = CreateArrayForSizeVar(4); 2831 2832 // Create a vector of the arguments, as well as a constant value to 2833 // express to the runtime the number of variadic arguments. 2834 std::vector<llvm::Value *> Args = {Queue, 2835 Flags, 2836 Range, 2837 Block, 2838 ConstantInt::get(IntTy, NumArgs - 4), 2839 PtrToSizeArray}; 2840 std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, 2841 RangeTy, GenericVoidPtrTy, 2842 IntTy, PtrToSizeArray->getType()}; 2843 2844 llvm::FunctionType *FTy = llvm::FunctionType::get( 2845 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2846 return RValue::get( 2847 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2848 llvm::ArrayRef<llvm::Value *>(Args))); 2849 } 2850 // Any calls now have event arguments passed. 2851 if (NumArgs >= 7) { 2852 llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy); 2853 llvm::Type *EventPtrTy = EventTy->getPointerTo( 2854 CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); 2855 2856 llvm::Value *NumEvents = 2857 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty); 2858 llvm::Value *EventList = 2859 E->getArg(4)->getType()->isArrayType() 2860 ? EmitArrayToPointerDecay(E->getArg(4)).getPointer() 2861 : EmitScalarExpr(E->getArg(4)); 2862 llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5)); 2863 // Convert to generic address space. 2864 EventList = Builder.CreatePointerCast(EventList, EventPtrTy); 2865 ClkEvent = Builder.CreatePointerCast(ClkEvent, EventPtrTy); 2866 llvm::Value *Block = Builder.CreatePointerCast( 2867 EmitScalarExpr(E->getArg(6)), GenericVoidPtrTy); 2868 2869 std::vector<llvm::Type *> ArgTys = { 2870 QueueTy, Int32Ty, RangeTy, Int32Ty, 2871 EventPtrTy, EventPtrTy, GenericVoidPtrTy}; 2872 2873 std::vector<llvm::Value *> Args = {Queue, Flags, Range, NumEvents, 2874 EventList, ClkEvent, Block}; 2875 2876 if (NumArgs == 7) { 2877 // Has events but no variadics. 2878 Name = "__enqueue_kernel_basic_events"; 2879 llvm::FunctionType *FTy = llvm::FunctionType::get( 2880 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2881 return RValue::get( 2882 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2883 llvm::ArrayRef<llvm::Value *>(Args))); 2884 } 2885 // Has event info and variadics 2886 // Pass the number of variadics to the runtime function too. 2887 Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7)); 2888 ArgTys.push_back(Int32Ty); 2889 Name = "__enqueue_kernel_events_vaargs"; 2890 2891 auto *PtrToSizeArray = CreateArrayForSizeVar(7); 2892 Args.push_back(PtrToSizeArray); 2893 ArgTys.push_back(PtrToSizeArray->getType()); 2894 2895 llvm::FunctionType *FTy = llvm::FunctionType::get( 2896 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2897 return RValue::get( 2898 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2899 llvm::ArrayRef<llvm::Value *>(Args))); 2900 } 2901 LLVM_FALLTHROUGH; 2902 } 2903 // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block 2904 // parameter. 2905 case Builtin::BIget_kernel_work_group_size: { 2906 llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( 2907 getContext().getTargetAddressSpace(LangAS::opencl_generic)); 2908 Value *Arg = EmitScalarExpr(E->getArg(0)); 2909 Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy); 2910 return RValue::get(Builder.CreateCall( 2911 CGM.CreateRuntimeFunction( 2912 llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false), 2913 "__get_kernel_work_group_size_impl"), 2914 Arg)); 2915 } 2916 case Builtin::BIget_kernel_preferred_work_group_size_multiple: { 2917 llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( 2918 getContext().getTargetAddressSpace(LangAS::opencl_generic)); 2919 Value *Arg = EmitScalarExpr(E->getArg(0)); 2920 Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy); 2921 return RValue::get(Builder.CreateCall( 2922 CGM.CreateRuntimeFunction( 2923 llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false), 2924 "__get_kernel_preferred_work_group_multiple_impl"), 2925 Arg)); 2926 } 2927 case Builtin::BIget_kernel_max_sub_group_size_for_ndrange: 2928 case Builtin::BIget_kernel_sub_group_count_for_ndrange: { 2929 llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( 2930 getContext().getTargetAddressSpace(LangAS::opencl_generic)); 2931 LValue NDRangeL = EmitAggExprToLValue(E->getArg(0)); 2932 llvm::Value *NDRange = NDRangeL.getAddress().getPointer(); 2933 Value *Block = EmitScalarExpr(E->getArg(1)); 2934 Block = Builder.CreatePointerCast(Block, GenericVoidPtrTy); 2935 const char *Name = 2936 BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange 2937 ? "__get_kernel_max_sub_group_size_for_ndrange_impl" 2938 : "__get_kernel_sub_group_count_for_ndrange_impl"; 2939 return RValue::get(Builder.CreateCall( 2940 CGM.CreateRuntimeFunction( 2941 llvm::FunctionType::get( 2942 IntTy, {NDRange->getType(), GenericVoidPtrTy}, false), 2943 Name), 2944 {NDRange, Block})); 2945 } 2946 2947 case Builtin::BI__builtin_store_half: 2948 case Builtin::BI__builtin_store_halff: { 2949 Value *Val = EmitScalarExpr(E->getArg(0)); 2950 Address Address = EmitPointerWithAlignment(E->getArg(1)); 2951 Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy()); 2952 return RValue::get(Builder.CreateStore(HalfVal, Address)); 2953 } 2954 case Builtin::BI__builtin_load_half: { 2955 Address Address = EmitPointerWithAlignment(E->getArg(0)); 2956 Value *HalfVal = Builder.CreateLoad(Address); 2957 return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy())); 2958 } 2959 case Builtin::BI__builtin_load_halff: { 2960 Address Address = EmitPointerWithAlignment(E->getArg(0)); 2961 Value *HalfVal = Builder.CreateLoad(Address); 2962 return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy())); 2963 } 2964 case Builtin::BIprintf: 2965 if (getTarget().getTriple().isNVPTX()) 2966 return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue); 2967 break; 2968 case Builtin::BI__builtin_canonicalize: 2969 case Builtin::BI__builtin_canonicalizef: 2970 case Builtin::BI__builtin_canonicalizel: 2971 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize)); 2972 2973 case Builtin::BI__builtin_thread_pointer: { 2974 if (!getContext().getTargetInfo().isTLSSupported()) 2975 CGM.ErrorUnsupported(E, "__builtin_thread_pointer"); 2976 // Fall through - it's already mapped to the intrinsic by GCCBuiltin. 2977 break; 2978 } 2979 case Builtin::BI__builtin_os_log_format: 2980 return emitBuiltinOSLogFormat(*E); 2981 2982 case Builtin::BI__builtin_os_log_format_buffer_size: { 2983 analyze_os_log::OSLogBufferLayout Layout; 2984 analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout); 2985 return RValue::get(ConstantInt::get(ConvertType(E->getType()), 2986 Layout.size().getQuantity())); 2987 } 2988 2989 case Builtin::BI__xray_customevent: { 2990 if (!ShouldXRayInstrumentFunction()) 2991 return RValue::getIgnored(); 2992 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>()) { 2993 if (XRayAttr->neverXRayInstrument()) 2994 return RValue::getIgnored(); 2995 } 2996 Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent); 2997 auto FTy = F->getFunctionType(); 2998 auto Arg0 = E->getArg(0); 2999 auto Arg0Val = EmitScalarExpr(Arg0); 3000 auto Arg0Ty = Arg0->getType(); 3001 auto PTy0 = FTy->getParamType(0); 3002 if (PTy0 != Arg0Val->getType()) { 3003 if (Arg0Ty->isArrayType()) 3004 Arg0Val = EmitArrayToPointerDecay(Arg0).getPointer(); 3005 else 3006 Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0); 3007 } 3008 auto Arg1 = EmitScalarExpr(E->getArg(1)); 3009 auto PTy1 = FTy->getParamType(1); 3010 if (PTy1 != Arg1->getType()) 3011 Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1); 3012 return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1})); 3013 } 3014 3015 case Builtin::BI__builtin_ms_va_start: 3016 case Builtin::BI__builtin_ms_va_end: 3017 return RValue::get( 3018 EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(), 3019 BuiltinID == Builtin::BI__builtin_ms_va_start)); 3020 3021 case Builtin::BI__builtin_ms_va_copy: { 3022 // Lower this manually. We can't reliably determine whether or not any 3023 // given va_copy() is for a Win64 va_list from the calling convention 3024 // alone, because it's legal to do this from a System V ABI function. 3025 // With opaque pointer types, we won't have enough information in LLVM 3026 // IR to determine this from the argument types, either. Best to do it 3027 // now, while we have enough information. 3028 Address DestAddr = EmitMSVAListRef(E->getArg(0)); 3029 Address SrcAddr = EmitMSVAListRef(E->getArg(1)); 3030 3031 llvm::Type *BPP = Int8PtrPtrTy; 3032 3033 DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"), 3034 DestAddr.getAlignment()); 3035 SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"), 3036 SrcAddr.getAlignment()); 3037 3038 Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val"); 3039 return RValue::get(Builder.CreateStore(ArgPtr, DestAddr)); 3040 } 3041 } 3042 3043 // If this is an alias for a lib function (e.g. __builtin_sin), emit 3044 // the call using the normal call path, but using the unmangled 3045 // version of the function name. 3046 if (getContext().BuiltinInfo.isLibFunction(BuiltinID)) 3047 return emitLibraryCall(*this, FD, E, 3048 CGM.getBuiltinLibFunction(FD, BuiltinID)); 3049 3050 // If this is a predefined lib function (e.g. malloc), emit the call 3051 // using exactly the normal call path. 3052 if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID)) 3053 return emitLibraryCall(*this, FD, E, 3054 cast<llvm::Constant>(EmitScalarExpr(E->getCallee()))); 3055 3056 // Check that a call to a target specific builtin has the correct target 3057 // features. 3058 // This is down here to avoid non-target specific builtins, however, if 3059 // generic builtins start to require generic target features then we 3060 // can move this up to the beginning of the function. 3061 checkTargetFeatures(E, FD); 3062 3063 // See if we have a target specific intrinsic. 3064 const char *Name = getContext().BuiltinInfo.getName(BuiltinID); 3065 Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic; 3066 StringRef Prefix = 3067 llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch()); 3068 if (!Prefix.empty()) { 3069 IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix.data(), Name); 3070 // NOTE we dont need to perform a compatibility flag check here since the 3071 // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the 3072 // MS builtins via ALL_MS_LANGUAGES and are filtered earlier. 3073 if (IntrinsicID == Intrinsic::not_intrinsic) 3074 IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name); 3075 } 3076 3077 if (IntrinsicID != Intrinsic::not_intrinsic) { 3078 SmallVector<Value*, 16> Args; 3079 3080 // Find out if any arguments are required to be integer constant 3081 // expressions. 3082 unsigned ICEArguments = 0; 3083 ASTContext::GetBuiltinTypeError Error; 3084 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 3085 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 3086 3087 Function *F = CGM.getIntrinsic(IntrinsicID); 3088 llvm::FunctionType *FTy = F->getFunctionType(); 3089 3090 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) { 3091 Value *ArgValue; 3092 // If this is a normal argument, just emit it as a scalar. 3093 if ((ICEArguments & (1 << i)) == 0) { 3094 ArgValue = EmitScalarExpr(E->getArg(i)); 3095 } else { 3096 // If this is required to be a constant, constant fold it so that we 3097 // know that the generated intrinsic gets a ConstantInt. 3098 llvm::APSInt Result; 3099 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext()); 3100 assert(IsConst && "Constant arg isn't actually constant?"); 3101 (void)IsConst; 3102 ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result); 3103 } 3104 3105 // If the intrinsic arg type is different from the builtin arg type 3106 // we need to do a bit cast. 3107 llvm::Type *PTy = FTy->getParamType(i); 3108 if (PTy != ArgValue->getType()) { 3109 assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) && 3110 "Must be able to losslessly bit cast to param"); 3111 ArgValue = Builder.CreateBitCast(ArgValue, PTy); 3112 } 3113 3114 Args.push_back(ArgValue); 3115 } 3116 3117 Value *V = Builder.CreateCall(F, Args); 3118 QualType BuiltinRetType = E->getType(); 3119 3120 llvm::Type *RetTy = VoidTy; 3121 if (!BuiltinRetType->isVoidType()) 3122 RetTy = ConvertType(BuiltinRetType); 3123 3124 if (RetTy != V->getType()) { 3125 assert(V->getType()->canLosslesslyBitCastTo(RetTy) && 3126 "Must be able to losslessly bit cast result type"); 3127 V = Builder.CreateBitCast(V, RetTy); 3128 } 3129 3130 return RValue::get(V); 3131 } 3132 3133 // See if we have a target specific builtin that needs to be lowered. 3134 if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E)) 3135 return RValue::get(V); 3136 3137 ErrorUnsupported(E, "builtin function"); 3138 3139 // Unknown builtin, for now just dump it out and return undef. 3140 return GetUndefRValue(E->getType()); 3141 } 3142 3143 static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, 3144 unsigned BuiltinID, const CallExpr *E, 3145 llvm::Triple::ArchType Arch) { 3146 switch (Arch) { 3147 case llvm::Triple::arm: 3148 case llvm::Triple::armeb: 3149 case llvm::Triple::thumb: 3150 case llvm::Triple::thumbeb: 3151 return CGF->EmitARMBuiltinExpr(BuiltinID, E); 3152 case llvm::Triple::aarch64: 3153 case llvm::Triple::aarch64_be: 3154 return CGF->EmitAArch64BuiltinExpr(BuiltinID, E); 3155 case llvm::Triple::x86: 3156 case llvm::Triple::x86_64: 3157 return CGF->EmitX86BuiltinExpr(BuiltinID, E); 3158 case llvm::Triple::ppc: 3159 case llvm::Triple::ppc64: 3160 case llvm::Triple::ppc64le: 3161 return CGF->EmitPPCBuiltinExpr(BuiltinID, E); 3162 case llvm::Triple::r600: 3163 case llvm::Triple::amdgcn: 3164 return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E); 3165 case llvm::Triple::systemz: 3166 return CGF->EmitSystemZBuiltinExpr(BuiltinID, E); 3167 case llvm::Triple::nvptx: 3168 case llvm::Triple::nvptx64: 3169 return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E); 3170 case llvm::Triple::wasm32: 3171 case llvm::Triple::wasm64: 3172 return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E); 3173 default: 3174 return nullptr; 3175 } 3176 } 3177 3178 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID, 3179 const CallExpr *E) { 3180 if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) { 3181 assert(getContext().getAuxTargetInfo() && "Missing aux target info"); 3182 return EmitTargetArchBuiltinExpr( 3183 this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E, 3184 getContext().getAuxTargetInfo()->getTriple().getArch()); 3185 } 3186 3187 return EmitTargetArchBuiltinExpr(this, BuiltinID, E, 3188 getTarget().getTriple().getArch()); 3189 } 3190 3191 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF, 3192 NeonTypeFlags TypeFlags, 3193 bool V1Ty=false) { 3194 int IsQuad = TypeFlags.isQuad(); 3195 switch (TypeFlags.getEltType()) { 3196 case NeonTypeFlags::Int8: 3197 case NeonTypeFlags::Poly8: 3198 return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad)); 3199 case NeonTypeFlags::Int16: 3200 case NeonTypeFlags::Poly16: 3201 case NeonTypeFlags::Float16: 3202 return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad)); 3203 case NeonTypeFlags::Int32: 3204 return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad)); 3205 case NeonTypeFlags::Int64: 3206 case NeonTypeFlags::Poly64: 3207 return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad)); 3208 case NeonTypeFlags::Poly128: 3209 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm. 3210 // There is a lot of i128 and f128 API missing. 3211 // so we use v16i8 to represent poly128 and get pattern matched. 3212 return llvm::VectorType::get(CGF->Int8Ty, 16); 3213 case NeonTypeFlags::Float32: 3214 return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad)); 3215 case NeonTypeFlags::Float64: 3216 return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad)); 3217 } 3218 llvm_unreachable("Unknown vector element type!"); 3219 } 3220 3221 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF, 3222 NeonTypeFlags IntTypeFlags) { 3223 int IsQuad = IntTypeFlags.isQuad(); 3224 switch (IntTypeFlags.getEltType()) { 3225 case NeonTypeFlags::Int32: 3226 return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad)); 3227 case NeonTypeFlags::Int64: 3228 return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad)); 3229 default: 3230 llvm_unreachable("Type can't be converted to floating-point!"); 3231 } 3232 } 3233 3234 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) { 3235 unsigned nElts = V->getType()->getVectorNumElements(); 3236 Value* SV = llvm::ConstantVector::getSplat(nElts, C); 3237 return Builder.CreateShuffleVector(V, V, SV, "lane"); 3238 } 3239 3240 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops, 3241 const char *name, 3242 unsigned shift, bool rightshift) { 3243 unsigned j = 0; 3244 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); 3245 ai != ae; ++ai, ++j) 3246 if (shift > 0 && shift == j) 3247 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift); 3248 else 3249 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name); 3250 3251 return Builder.CreateCall(F, Ops, name); 3252 } 3253 3254 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty, 3255 bool neg) { 3256 int SV = cast<ConstantInt>(V)->getSExtValue(); 3257 return ConstantInt::get(Ty, neg ? -SV : SV); 3258 } 3259 3260 // \brief Right-shift a vector by a constant. 3261 Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift, 3262 llvm::Type *Ty, bool usgn, 3263 const char *name) { 3264 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 3265 3266 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue(); 3267 int EltSize = VTy->getScalarSizeInBits(); 3268 3269 Vec = Builder.CreateBitCast(Vec, Ty); 3270 3271 // lshr/ashr are undefined when the shift amount is equal to the vector 3272 // element size. 3273 if (ShiftAmt == EltSize) { 3274 if (usgn) { 3275 // Right-shifting an unsigned value by its size yields 0. 3276 return llvm::ConstantAggregateZero::get(VTy); 3277 } else { 3278 // Right-shifting a signed value by its size is equivalent 3279 // to a shift of size-1. 3280 --ShiftAmt; 3281 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt); 3282 } 3283 } 3284 3285 Shift = EmitNeonShiftVector(Shift, Ty, false); 3286 if (usgn) 3287 return Builder.CreateLShr(Vec, Shift, name); 3288 else 3289 return Builder.CreateAShr(Vec, Shift, name); 3290 } 3291 3292 enum { 3293 AddRetType = (1 << 0), 3294 Add1ArgType = (1 << 1), 3295 Add2ArgTypes = (1 << 2), 3296 3297 VectorizeRetType = (1 << 3), 3298 VectorizeArgTypes = (1 << 4), 3299 3300 InventFloatType = (1 << 5), 3301 UnsignedAlts = (1 << 6), 3302 3303 Use64BitVectors = (1 << 7), 3304 Use128BitVectors = (1 << 8), 3305 3306 Vectorize1ArgType = Add1ArgType | VectorizeArgTypes, 3307 VectorRet = AddRetType | VectorizeRetType, 3308 VectorRetGetArgs01 = 3309 AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes, 3310 FpCmpzModifiers = 3311 AddRetType | VectorizeRetType | Add1ArgType | InventFloatType 3312 }; 3313 3314 namespace { 3315 struct NeonIntrinsicInfo { 3316 const char *NameHint; 3317 unsigned BuiltinID; 3318 unsigned LLVMIntrinsic; 3319 unsigned AltLLVMIntrinsic; 3320 unsigned TypeModifier; 3321 3322 bool operator<(unsigned RHSBuiltinID) const { 3323 return BuiltinID < RHSBuiltinID; 3324 } 3325 bool operator<(const NeonIntrinsicInfo &TE) const { 3326 return BuiltinID < TE.BuiltinID; 3327 } 3328 }; 3329 } // end anonymous namespace 3330 3331 #define NEONMAP0(NameBase) \ 3332 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 } 3333 3334 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \ 3335 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \ 3336 Intrinsic::LLVMIntrinsic, 0, TypeModifier } 3337 3338 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \ 3339 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \ 3340 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \ 3341 TypeModifier } 3342 3343 static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { 3344 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), 3345 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), 3346 NEONMAP1(vabs_v, arm_neon_vabs, 0), 3347 NEONMAP1(vabsq_v, arm_neon_vabs, 0), 3348 NEONMAP0(vaddhn_v), 3349 NEONMAP1(vaesdq_v, arm_neon_aesd, 0), 3350 NEONMAP1(vaeseq_v, arm_neon_aese, 0), 3351 NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0), 3352 NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0), 3353 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType), 3354 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType), 3355 NEONMAP1(vcage_v, arm_neon_vacge, 0), 3356 NEONMAP1(vcageq_v, arm_neon_vacge, 0), 3357 NEONMAP1(vcagt_v, arm_neon_vacgt, 0), 3358 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0), 3359 NEONMAP1(vcale_v, arm_neon_vacge, 0), 3360 NEONMAP1(vcaleq_v, arm_neon_vacge, 0), 3361 NEONMAP1(vcalt_v, arm_neon_vacgt, 0), 3362 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0), 3363 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType), 3364 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType), 3365 NEONMAP1(vclz_v, ctlz, Add1ArgType), 3366 NEONMAP1(vclzq_v, ctlz, Add1ArgType), 3367 NEONMAP1(vcnt_v, ctpop, Add1ArgType), 3368 NEONMAP1(vcntq_v, ctpop, Add1ArgType), 3369 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0), 3370 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0), 3371 NEONMAP0(vcvt_f32_v), 3372 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), 3373 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0), 3374 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0), 3375 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0), 3376 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0), 3377 NEONMAP0(vcvt_s32_v), 3378 NEONMAP0(vcvt_s64_v), 3379 NEONMAP0(vcvt_u32_v), 3380 NEONMAP0(vcvt_u64_v), 3381 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0), 3382 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0), 3383 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0), 3384 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0), 3385 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0), 3386 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0), 3387 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0), 3388 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0), 3389 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0), 3390 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0), 3391 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0), 3392 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0), 3393 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0), 3394 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0), 3395 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0), 3396 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0), 3397 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0), 3398 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0), 3399 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0), 3400 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0), 3401 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0), 3402 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0), 3403 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0), 3404 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0), 3405 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0), 3406 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0), 3407 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0), 3408 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0), 3409 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0), 3410 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0), 3411 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0), 3412 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0), 3413 NEONMAP0(vcvtq_f32_v), 3414 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), 3415 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0), 3416 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0), 3417 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0), 3418 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0), 3419 NEONMAP0(vcvtq_s32_v), 3420 NEONMAP0(vcvtq_s64_v), 3421 NEONMAP0(vcvtq_u32_v), 3422 NEONMAP0(vcvtq_u64_v), 3423 NEONMAP0(vext_v), 3424 NEONMAP0(vextq_v), 3425 NEONMAP0(vfma_v), 3426 NEONMAP0(vfmaq_v), 3427 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts), 3428 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts), 3429 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts), 3430 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts), 3431 NEONMAP0(vld1_dup_v), 3432 NEONMAP1(vld1_v, arm_neon_vld1, 0), 3433 NEONMAP0(vld1q_dup_v), 3434 NEONMAP1(vld1q_v, arm_neon_vld1, 0), 3435 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0), 3436 NEONMAP1(vld2_v, arm_neon_vld2, 0), 3437 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0), 3438 NEONMAP1(vld2q_v, arm_neon_vld2, 0), 3439 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0), 3440 NEONMAP1(vld3_v, arm_neon_vld3, 0), 3441 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0), 3442 NEONMAP1(vld3q_v, arm_neon_vld3, 0), 3443 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0), 3444 NEONMAP1(vld4_v, arm_neon_vld4, 0), 3445 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0), 3446 NEONMAP1(vld4q_v, arm_neon_vld4, 0), 3447 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts), 3448 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType), 3449 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType), 3450 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts), 3451 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), 3452 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType), 3453 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType), 3454 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), 3455 NEONMAP0(vmovl_v), 3456 NEONMAP0(vmovn_v), 3457 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType), 3458 NEONMAP0(vmull_v), 3459 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType), 3460 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts), 3461 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts), 3462 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType), 3463 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts), 3464 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts), 3465 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType), 3466 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts), 3467 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts), 3468 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType), 3469 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType), 3470 NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts), 3471 NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts), 3472 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0), 3473 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0), 3474 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType), 3475 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType), 3476 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType), 3477 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts), 3478 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType), 3479 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType), 3480 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType), 3481 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType), 3482 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType), 3483 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts), 3484 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts), 3485 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts), 3486 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts), 3487 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts), 3488 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts), 3489 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0), 3490 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0), 3491 NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts), 3492 NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts), 3493 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType), 3494 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), 3495 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), 3496 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType), 3497 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType), 3498 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts), 3499 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts), 3500 NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType), 3501 NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType), 3502 NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType), 3503 NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType), 3504 NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType), 3505 NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType), 3506 NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType), 3507 NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType), 3508 NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType), 3509 NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType), 3510 NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType), 3511 NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType), 3512 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts), 3513 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts), 3514 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts), 3515 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts), 3516 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0), 3517 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0), 3518 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType), 3519 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType), 3520 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType), 3521 NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0), 3522 NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0), 3523 NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0), 3524 NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0), 3525 NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0), 3526 NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0), 3527 NEONMAP0(vshl_n_v), 3528 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts), 3529 NEONMAP0(vshll_n_v), 3530 NEONMAP0(vshlq_n_v), 3531 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts), 3532 NEONMAP0(vshr_n_v), 3533 NEONMAP0(vshrn_n_v), 3534 NEONMAP0(vshrq_n_v), 3535 NEONMAP1(vst1_v, arm_neon_vst1, 0), 3536 NEONMAP1(vst1q_v, arm_neon_vst1, 0), 3537 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0), 3538 NEONMAP1(vst2_v, arm_neon_vst2, 0), 3539 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0), 3540 NEONMAP1(vst2q_v, arm_neon_vst2, 0), 3541 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0), 3542 NEONMAP1(vst3_v, arm_neon_vst3, 0), 3543 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0), 3544 NEONMAP1(vst3q_v, arm_neon_vst3, 0), 3545 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0), 3546 NEONMAP1(vst4_v, arm_neon_vst4, 0), 3547 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0), 3548 NEONMAP1(vst4q_v, arm_neon_vst4, 0), 3549 NEONMAP0(vsubhn_v), 3550 NEONMAP0(vtrn_v), 3551 NEONMAP0(vtrnq_v), 3552 NEONMAP0(vtst_v), 3553 NEONMAP0(vtstq_v), 3554 NEONMAP0(vuzp_v), 3555 NEONMAP0(vuzpq_v), 3556 NEONMAP0(vzip_v), 3557 NEONMAP0(vzipq_v) 3558 }; 3559 3560 static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = { 3561 NEONMAP1(vabs_v, aarch64_neon_abs, 0), 3562 NEONMAP1(vabsq_v, aarch64_neon_abs, 0), 3563 NEONMAP0(vaddhn_v), 3564 NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0), 3565 NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0), 3566 NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0), 3567 NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0), 3568 NEONMAP1(vcage_v, aarch64_neon_facge, 0), 3569 NEONMAP1(vcageq_v, aarch64_neon_facge, 0), 3570 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0), 3571 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0), 3572 NEONMAP1(vcale_v, aarch64_neon_facge, 0), 3573 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0), 3574 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0), 3575 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0), 3576 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType), 3577 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType), 3578 NEONMAP1(vclz_v, ctlz, Add1ArgType), 3579 NEONMAP1(vclzq_v, ctlz, Add1ArgType), 3580 NEONMAP1(vcnt_v, ctpop, Add1ArgType), 3581 NEONMAP1(vcntq_v, ctpop, Add1ArgType), 3582 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0), 3583 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0), 3584 NEONMAP0(vcvt_f32_v), 3585 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 3586 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 3587 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), 3588 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), 3589 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), 3590 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), 3591 NEONMAP0(vcvtq_f32_v), 3592 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 3593 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 3594 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), 3595 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), 3596 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), 3597 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), 3598 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType), 3599 NEONMAP0(vext_v), 3600 NEONMAP0(vextq_v), 3601 NEONMAP0(vfma_v), 3602 NEONMAP0(vfmaq_v), 3603 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), 3604 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), 3605 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), 3606 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), 3607 NEONMAP0(vmovl_v), 3608 NEONMAP0(vmovn_v), 3609 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType), 3610 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType), 3611 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType), 3612 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts), 3613 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts), 3614 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType), 3615 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType), 3616 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType), 3617 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts), 3618 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts), 3619 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0), 3620 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0), 3621 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType), 3622 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType), 3623 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType), 3624 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts), 3625 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType), 3626 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType), 3627 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType), 3628 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType), 3629 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType), 3630 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts), 3631 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts), 3632 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts), 3633 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts), 3634 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts), 3635 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts), 3636 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0), 3637 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0), 3638 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), 3639 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), 3640 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType), 3641 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), 3642 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), 3643 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType), 3644 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType), 3645 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), 3646 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), 3647 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), 3648 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), 3649 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), 3650 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), 3651 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0), 3652 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0), 3653 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType), 3654 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType), 3655 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType), 3656 NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0), 3657 NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0), 3658 NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0), 3659 NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0), 3660 NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0), 3661 NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0), 3662 NEONMAP0(vshl_n_v), 3663 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts), 3664 NEONMAP0(vshll_n_v), 3665 NEONMAP0(vshlq_n_v), 3666 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts), 3667 NEONMAP0(vshr_n_v), 3668 NEONMAP0(vshrn_n_v), 3669 NEONMAP0(vshrq_n_v), 3670 NEONMAP0(vsubhn_v), 3671 NEONMAP0(vtst_v), 3672 NEONMAP0(vtstq_v), 3673 }; 3674 3675 static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = { 3676 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType), 3677 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType), 3678 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType), 3679 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType), 3680 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType), 3681 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType), 3682 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType), 3683 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), 3684 NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType), 3685 NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3686 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), 3687 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType), 3688 NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType), 3689 NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType), 3690 NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3691 NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3692 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType), 3693 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType), 3694 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType), 3695 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType), 3696 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType), 3697 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType), 3698 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType), 3699 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType), 3700 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType), 3701 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType), 3702 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType), 3703 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType), 3704 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), 3705 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), 3706 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), 3707 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), 3708 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType), 3709 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), 3710 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType), 3711 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), 3712 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType), 3713 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), 3714 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType), 3715 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), 3716 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType), 3717 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), 3718 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType), 3719 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), 3720 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), 3721 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), 3722 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), 3723 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), 3724 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0), 3725 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3726 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3727 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3728 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3729 NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType), 3730 NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType), 3731 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3732 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3733 NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType), 3734 NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType), 3735 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3736 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3737 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3738 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 3739 NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType), 3740 NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType), 3741 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 3742 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType), 3743 NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType), 3744 NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType), 3745 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0), 3746 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType), 3747 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType), 3748 NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3749 NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3750 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3751 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3752 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3753 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3754 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3755 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3756 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType), 3757 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 3758 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors), 3759 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType), 3760 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors), 3761 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType), 3762 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors), 3763 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors), 3764 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType), 3765 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType), 3766 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors), 3767 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors), 3768 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType), 3769 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType), 3770 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors), 3771 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType), 3772 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors), 3773 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0), 3774 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType), 3775 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType), 3776 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors), 3777 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors), 3778 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors), 3779 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors), 3780 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType), 3781 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors), 3782 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors), 3783 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors), 3784 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType), 3785 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors), 3786 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType), 3787 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors), 3788 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType), 3789 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors), 3790 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors), 3791 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType), 3792 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType), 3793 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors), 3794 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors), 3795 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType), 3796 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType), 3797 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType), 3798 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType), 3799 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors), 3800 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors), 3801 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors), 3802 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors), 3803 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType), 3804 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors), 3805 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors), 3806 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 3807 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 3808 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 3809 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 3810 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType), 3811 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType), 3812 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 3813 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 3814 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 3815 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 3816 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType), 3817 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType), 3818 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType), 3819 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType), 3820 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors), 3821 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors), 3822 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType), 3823 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType), 3824 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType), 3825 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors), 3826 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors), 3827 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors), 3828 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors), 3829 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType), 3830 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors), 3831 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors), 3832 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors), 3833 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors), 3834 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType), 3835 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType), 3836 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors), 3837 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors), 3838 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType), 3839 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType), 3840 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType), 3841 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType), 3842 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType), 3843 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType), 3844 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType), 3845 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType), 3846 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType), 3847 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType), 3848 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType), 3849 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType), 3850 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0), 3851 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0), 3852 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0), 3853 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0), 3854 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType), 3855 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType), 3856 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType), 3857 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType), 3858 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors), 3859 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType), 3860 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors), 3861 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType), 3862 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType), 3863 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType), 3864 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors), 3865 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType), 3866 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors), 3867 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType), 3868 }; 3869 3870 #undef NEONMAP0 3871 #undef NEONMAP1 3872 #undef NEONMAP2 3873 3874 static bool NEONSIMDIntrinsicsProvenSorted = false; 3875 3876 static bool AArch64SIMDIntrinsicsProvenSorted = false; 3877 static bool AArch64SISDIntrinsicsProvenSorted = false; 3878 3879 3880 static const NeonIntrinsicInfo * 3881 findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap, 3882 unsigned BuiltinID, bool &MapProvenSorted) { 3883 3884 #ifndef NDEBUG 3885 if (!MapProvenSorted) { 3886 assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap))); 3887 MapProvenSorted = true; 3888 } 3889 #endif 3890 3891 const NeonIntrinsicInfo *Builtin = 3892 std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID); 3893 3894 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID) 3895 return Builtin; 3896 3897 return nullptr; 3898 } 3899 3900 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID, 3901 unsigned Modifier, 3902 llvm::Type *ArgType, 3903 const CallExpr *E) { 3904 int VectorSize = 0; 3905 if (Modifier & Use64BitVectors) 3906 VectorSize = 64; 3907 else if (Modifier & Use128BitVectors) 3908 VectorSize = 128; 3909 3910 // Return type. 3911 SmallVector<llvm::Type *, 3> Tys; 3912 if (Modifier & AddRetType) { 3913 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext())); 3914 if (Modifier & VectorizeRetType) 3915 Ty = llvm::VectorType::get( 3916 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1); 3917 3918 Tys.push_back(Ty); 3919 } 3920 3921 // Arguments. 3922 if (Modifier & VectorizeArgTypes) { 3923 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1; 3924 ArgType = llvm::VectorType::get(ArgType, Elts); 3925 } 3926 3927 if (Modifier & (Add1ArgType | Add2ArgTypes)) 3928 Tys.push_back(ArgType); 3929 3930 if (Modifier & Add2ArgTypes) 3931 Tys.push_back(ArgType); 3932 3933 if (Modifier & InventFloatType) 3934 Tys.push_back(FloatTy); 3935 3936 return CGM.getIntrinsic(IntrinsicID, Tys); 3937 } 3938 3939 static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, 3940 const NeonIntrinsicInfo &SISDInfo, 3941 SmallVectorImpl<Value *> &Ops, 3942 const CallExpr *E) { 3943 unsigned BuiltinID = SISDInfo.BuiltinID; 3944 unsigned int Int = SISDInfo.LLVMIntrinsic; 3945 unsigned Modifier = SISDInfo.TypeModifier; 3946 const char *s = SISDInfo.NameHint; 3947 3948 switch (BuiltinID) { 3949 case NEON::BI__builtin_neon_vcled_s64: 3950 case NEON::BI__builtin_neon_vcled_u64: 3951 case NEON::BI__builtin_neon_vcles_f32: 3952 case NEON::BI__builtin_neon_vcled_f64: 3953 case NEON::BI__builtin_neon_vcltd_s64: 3954 case NEON::BI__builtin_neon_vcltd_u64: 3955 case NEON::BI__builtin_neon_vclts_f32: 3956 case NEON::BI__builtin_neon_vcltd_f64: 3957 case NEON::BI__builtin_neon_vcales_f32: 3958 case NEON::BI__builtin_neon_vcaled_f64: 3959 case NEON::BI__builtin_neon_vcalts_f32: 3960 case NEON::BI__builtin_neon_vcaltd_f64: 3961 // Only one direction of comparisons actually exist, cmle is actually a cmge 3962 // with swapped operands. The table gives us the right intrinsic but we 3963 // still need to do the swap. 3964 std::swap(Ops[0], Ops[1]); 3965 break; 3966 } 3967 3968 assert(Int && "Generic code assumes a valid intrinsic"); 3969 3970 // Determine the type(s) of this overloaded AArch64 intrinsic. 3971 const Expr *Arg = E->getArg(0); 3972 llvm::Type *ArgTy = CGF.ConvertType(Arg->getType()); 3973 Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E); 3974 3975 int j = 0; 3976 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0); 3977 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); 3978 ai != ae; ++ai, ++j) { 3979 llvm::Type *ArgTy = ai->getType(); 3980 if (Ops[j]->getType()->getPrimitiveSizeInBits() == 3981 ArgTy->getPrimitiveSizeInBits()) 3982 continue; 3983 3984 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy()); 3985 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate 3986 // it before inserting. 3987 Ops[j] = 3988 CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType()); 3989 Ops[j] = 3990 CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0); 3991 } 3992 3993 Value *Result = CGF.EmitNeonCall(F, Ops, s); 3994 llvm::Type *ResultType = CGF.ConvertType(E->getType()); 3995 if (ResultType->getPrimitiveSizeInBits() < 3996 Result->getType()->getPrimitiveSizeInBits()) 3997 return CGF.Builder.CreateExtractElement(Result, C0); 3998 3999 return CGF.Builder.CreateBitCast(Result, ResultType, s); 4000 } 4001 4002 Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( 4003 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, 4004 const char *NameHint, unsigned Modifier, const CallExpr *E, 4005 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1) { 4006 // Get the last argument, which specifies the vector type. 4007 llvm::APSInt NeonTypeConst; 4008 const Expr *Arg = E->getArg(E->getNumArgs() - 1); 4009 if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext())) 4010 return nullptr; 4011 4012 // Determine the type of this overloaded NEON intrinsic. 4013 NeonTypeFlags Type(NeonTypeConst.getZExtValue()); 4014 bool Usgn = Type.isUnsigned(); 4015 bool Quad = Type.isQuad(); 4016 4017 llvm::VectorType *VTy = GetNeonType(this, Type); 4018 llvm::Type *Ty = VTy; 4019 if (!Ty) 4020 return nullptr; 4021 4022 auto getAlignmentValue32 = [&](Address addr) -> Value* { 4023 return Builder.getInt32(addr.getAlignment().getQuantity()); 4024 }; 4025 4026 unsigned Int = LLVMIntrinsic; 4027 if ((Modifier & UnsignedAlts) && !Usgn) 4028 Int = AltLLVMIntrinsic; 4029 4030 switch (BuiltinID) { 4031 default: break; 4032 case NEON::BI__builtin_neon_vabs_v: 4033 case NEON::BI__builtin_neon_vabsq_v: 4034 if (VTy->getElementType()->isFloatingPointTy()) 4035 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs"); 4036 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs"); 4037 case NEON::BI__builtin_neon_vaddhn_v: { 4038 llvm::VectorType *SrcTy = 4039 llvm::VectorType::getExtendedElementVectorType(VTy); 4040 4041 // %sum = add <4 x i32> %lhs, %rhs 4042 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 4043 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy); 4044 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn"); 4045 4046 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16> 4047 Constant *ShiftAmt = 4048 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2); 4049 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn"); 4050 4051 // %res = trunc <4 x i32> %high to <4 x i16> 4052 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn"); 4053 } 4054 case NEON::BI__builtin_neon_vcale_v: 4055 case NEON::BI__builtin_neon_vcaleq_v: 4056 case NEON::BI__builtin_neon_vcalt_v: 4057 case NEON::BI__builtin_neon_vcaltq_v: 4058 std::swap(Ops[0], Ops[1]); 4059 LLVM_FALLTHROUGH; 4060 case NEON::BI__builtin_neon_vcage_v: 4061 case NEON::BI__builtin_neon_vcageq_v: 4062 case NEON::BI__builtin_neon_vcagt_v: 4063 case NEON::BI__builtin_neon_vcagtq_v: { 4064 llvm::Type *VecFlt = llvm::VectorType::get( 4065 VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy, 4066 VTy->getNumElements()); 4067 llvm::Type *Tys[] = { VTy, VecFlt }; 4068 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 4069 return EmitNeonCall(F, Ops, NameHint); 4070 } 4071 case NEON::BI__builtin_neon_vclz_v: 4072 case NEON::BI__builtin_neon_vclzq_v: 4073 // We generate target-independent intrinsic, which needs a second argument 4074 // for whether or not clz of zero is undefined; on ARM it isn't. 4075 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef())); 4076 break; 4077 case NEON::BI__builtin_neon_vcvt_f32_v: 4078 case NEON::BI__builtin_neon_vcvtq_f32_v: 4079 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4080 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad)); 4081 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 4082 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 4083 case NEON::BI__builtin_neon_vcvt_n_f32_v: 4084 case NEON::BI__builtin_neon_vcvt_n_f64_v: 4085 case NEON::BI__builtin_neon_vcvtq_n_f32_v: 4086 case NEON::BI__builtin_neon_vcvtq_n_f64_v: { 4087 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty }; 4088 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic; 4089 Function *F = CGM.getIntrinsic(Int, Tys); 4090 return EmitNeonCall(F, Ops, "vcvt_n"); 4091 } 4092 case NEON::BI__builtin_neon_vcvt_n_s32_v: 4093 case NEON::BI__builtin_neon_vcvt_n_u32_v: 4094 case NEON::BI__builtin_neon_vcvt_n_s64_v: 4095 case NEON::BI__builtin_neon_vcvt_n_u64_v: 4096 case NEON::BI__builtin_neon_vcvtq_n_s32_v: 4097 case NEON::BI__builtin_neon_vcvtq_n_u32_v: 4098 case NEON::BI__builtin_neon_vcvtq_n_s64_v: 4099 case NEON::BI__builtin_neon_vcvtq_n_u64_v: { 4100 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 4101 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 4102 return EmitNeonCall(F, Ops, "vcvt_n"); 4103 } 4104 case NEON::BI__builtin_neon_vcvt_s32_v: 4105 case NEON::BI__builtin_neon_vcvt_u32_v: 4106 case NEON::BI__builtin_neon_vcvt_s64_v: 4107 case NEON::BI__builtin_neon_vcvt_u64_v: 4108 case NEON::BI__builtin_neon_vcvtq_s32_v: 4109 case NEON::BI__builtin_neon_vcvtq_u32_v: 4110 case NEON::BI__builtin_neon_vcvtq_s64_v: 4111 case NEON::BI__builtin_neon_vcvtq_u64_v: { 4112 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type)); 4113 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt") 4114 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt"); 4115 } 4116 case NEON::BI__builtin_neon_vcvta_s32_v: 4117 case NEON::BI__builtin_neon_vcvta_s64_v: 4118 case NEON::BI__builtin_neon_vcvta_u32_v: 4119 case NEON::BI__builtin_neon_vcvta_u64_v: 4120 case NEON::BI__builtin_neon_vcvtaq_s32_v: 4121 case NEON::BI__builtin_neon_vcvtaq_s64_v: 4122 case NEON::BI__builtin_neon_vcvtaq_u32_v: 4123 case NEON::BI__builtin_neon_vcvtaq_u64_v: 4124 case NEON::BI__builtin_neon_vcvtn_s32_v: 4125 case NEON::BI__builtin_neon_vcvtn_s64_v: 4126 case NEON::BI__builtin_neon_vcvtn_u32_v: 4127 case NEON::BI__builtin_neon_vcvtn_u64_v: 4128 case NEON::BI__builtin_neon_vcvtnq_s32_v: 4129 case NEON::BI__builtin_neon_vcvtnq_s64_v: 4130 case NEON::BI__builtin_neon_vcvtnq_u32_v: 4131 case NEON::BI__builtin_neon_vcvtnq_u64_v: 4132 case NEON::BI__builtin_neon_vcvtp_s32_v: 4133 case NEON::BI__builtin_neon_vcvtp_s64_v: 4134 case NEON::BI__builtin_neon_vcvtp_u32_v: 4135 case NEON::BI__builtin_neon_vcvtp_u64_v: 4136 case NEON::BI__builtin_neon_vcvtpq_s32_v: 4137 case NEON::BI__builtin_neon_vcvtpq_s64_v: 4138 case NEON::BI__builtin_neon_vcvtpq_u32_v: 4139 case NEON::BI__builtin_neon_vcvtpq_u64_v: 4140 case NEON::BI__builtin_neon_vcvtm_s32_v: 4141 case NEON::BI__builtin_neon_vcvtm_s64_v: 4142 case NEON::BI__builtin_neon_vcvtm_u32_v: 4143 case NEON::BI__builtin_neon_vcvtm_u64_v: 4144 case NEON::BI__builtin_neon_vcvtmq_s32_v: 4145 case NEON::BI__builtin_neon_vcvtmq_s64_v: 4146 case NEON::BI__builtin_neon_vcvtmq_u32_v: 4147 case NEON::BI__builtin_neon_vcvtmq_u64_v: { 4148 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 4149 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint); 4150 } 4151 case NEON::BI__builtin_neon_vext_v: 4152 case NEON::BI__builtin_neon_vextq_v: { 4153 int CV = cast<ConstantInt>(Ops[2])->getSExtValue(); 4154 SmallVector<uint32_t, 16> Indices; 4155 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 4156 Indices.push_back(i+CV); 4157 4158 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4159 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4160 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext"); 4161 } 4162 case NEON::BI__builtin_neon_vfma_v: 4163 case NEON::BI__builtin_neon_vfmaq_v: { 4164 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 4165 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4166 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4167 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 4168 4169 // NEON intrinsic puts accumulator first, unlike the LLVM fma. 4170 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); 4171 } 4172 case NEON::BI__builtin_neon_vld1_v: 4173 case NEON::BI__builtin_neon_vld1q_v: { 4174 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 4175 Ops.push_back(getAlignmentValue32(PtrOp0)); 4176 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1"); 4177 } 4178 case NEON::BI__builtin_neon_vld2_v: 4179 case NEON::BI__builtin_neon_vld2q_v: 4180 case NEON::BI__builtin_neon_vld3_v: 4181 case NEON::BI__builtin_neon_vld3q_v: 4182 case NEON::BI__builtin_neon_vld4_v: 4183 case NEON::BI__builtin_neon_vld4q_v: { 4184 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 4185 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 4186 Value *Align = getAlignmentValue32(PtrOp1); 4187 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint); 4188 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 4189 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4190 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 4191 } 4192 case NEON::BI__builtin_neon_vld1_dup_v: 4193 case NEON::BI__builtin_neon_vld1q_dup_v: { 4194 Value *V = UndefValue::get(Ty); 4195 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 4196 PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty); 4197 LoadInst *Ld = Builder.CreateLoad(PtrOp0); 4198 llvm::Constant *CI = ConstantInt::get(SizeTy, 0); 4199 Ops[0] = Builder.CreateInsertElement(V, Ld, CI); 4200 return EmitNeonSplat(Ops[0], CI); 4201 } 4202 case NEON::BI__builtin_neon_vld2_lane_v: 4203 case NEON::BI__builtin_neon_vld2q_lane_v: 4204 case NEON::BI__builtin_neon_vld3_lane_v: 4205 case NEON::BI__builtin_neon_vld3q_lane_v: 4206 case NEON::BI__builtin_neon_vld4_lane_v: 4207 case NEON::BI__builtin_neon_vld4q_lane_v: { 4208 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 4209 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 4210 for (unsigned I = 2; I < Ops.size() - 1; ++I) 4211 Ops[I] = Builder.CreateBitCast(Ops[I], Ty); 4212 Ops.push_back(getAlignmentValue32(PtrOp1)); 4213 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint); 4214 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 4215 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4216 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 4217 } 4218 case NEON::BI__builtin_neon_vmovl_v: { 4219 llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy); 4220 Ops[0] = Builder.CreateBitCast(Ops[0], DTy); 4221 if (Usgn) 4222 return Builder.CreateZExt(Ops[0], Ty, "vmovl"); 4223 return Builder.CreateSExt(Ops[0], Ty, "vmovl"); 4224 } 4225 case NEON::BI__builtin_neon_vmovn_v: { 4226 llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy); 4227 Ops[0] = Builder.CreateBitCast(Ops[0], QTy); 4228 return Builder.CreateTrunc(Ops[0], Ty, "vmovn"); 4229 } 4230 case NEON::BI__builtin_neon_vmull_v: 4231 // FIXME: the integer vmull operations could be emitted in terms of pure 4232 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of 4233 // hoisting the exts outside loops. Until global ISel comes along that can 4234 // see through such movement this leads to bad CodeGen. So we need an 4235 // intrinsic for now. 4236 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls; 4237 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int; 4238 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull"); 4239 case NEON::BI__builtin_neon_vpadal_v: 4240 case NEON::BI__builtin_neon_vpadalq_v: { 4241 // The source operand type has twice as many elements of half the size. 4242 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); 4243 llvm::Type *EltTy = 4244 llvm::IntegerType::get(getLLVMContext(), EltBits / 2); 4245 llvm::Type *NarrowTy = 4246 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2); 4247 llvm::Type *Tys[2] = { Ty, NarrowTy }; 4248 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint); 4249 } 4250 case NEON::BI__builtin_neon_vpaddl_v: 4251 case NEON::BI__builtin_neon_vpaddlq_v: { 4252 // The source operand type has twice as many elements of half the size. 4253 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); 4254 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2); 4255 llvm::Type *NarrowTy = 4256 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2); 4257 llvm::Type *Tys[2] = { Ty, NarrowTy }; 4258 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl"); 4259 } 4260 case NEON::BI__builtin_neon_vqdmlal_v: 4261 case NEON::BI__builtin_neon_vqdmlsl_v: { 4262 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end()); 4263 Ops[1] = 4264 EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal"); 4265 Ops.resize(2); 4266 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint); 4267 } 4268 case NEON::BI__builtin_neon_vqshl_n_v: 4269 case NEON::BI__builtin_neon_vqshlq_n_v: 4270 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n", 4271 1, false); 4272 case NEON::BI__builtin_neon_vqshlu_n_v: 4273 case NEON::BI__builtin_neon_vqshluq_n_v: 4274 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n", 4275 1, false); 4276 case NEON::BI__builtin_neon_vrecpe_v: 4277 case NEON::BI__builtin_neon_vrecpeq_v: 4278 case NEON::BI__builtin_neon_vrsqrte_v: 4279 case NEON::BI__builtin_neon_vrsqrteq_v: 4280 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic; 4281 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint); 4282 4283 case NEON::BI__builtin_neon_vrshr_n_v: 4284 case NEON::BI__builtin_neon_vrshrq_n_v: 4285 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n", 4286 1, true); 4287 case NEON::BI__builtin_neon_vshl_n_v: 4288 case NEON::BI__builtin_neon_vshlq_n_v: 4289 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false); 4290 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1], 4291 "vshl_n"); 4292 case NEON::BI__builtin_neon_vshll_n_v: { 4293 llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy); 4294 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 4295 if (Usgn) 4296 Ops[0] = Builder.CreateZExt(Ops[0], VTy); 4297 else 4298 Ops[0] = Builder.CreateSExt(Ops[0], VTy); 4299 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false); 4300 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n"); 4301 } 4302 case NEON::BI__builtin_neon_vshrn_n_v: { 4303 llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy); 4304 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 4305 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false); 4306 if (Usgn) 4307 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]); 4308 else 4309 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]); 4310 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n"); 4311 } 4312 case NEON::BI__builtin_neon_vshr_n_v: 4313 case NEON::BI__builtin_neon_vshrq_n_v: 4314 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n"); 4315 case NEON::BI__builtin_neon_vst1_v: 4316 case NEON::BI__builtin_neon_vst1q_v: 4317 case NEON::BI__builtin_neon_vst2_v: 4318 case NEON::BI__builtin_neon_vst2q_v: 4319 case NEON::BI__builtin_neon_vst3_v: 4320 case NEON::BI__builtin_neon_vst3q_v: 4321 case NEON::BI__builtin_neon_vst4_v: 4322 case NEON::BI__builtin_neon_vst4q_v: 4323 case NEON::BI__builtin_neon_vst2_lane_v: 4324 case NEON::BI__builtin_neon_vst2q_lane_v: 4325 case NEON::BI__builtin_neon_vst3_lane_v: 4326 case NEON::BI__builtin_neon_vst3q_lane_v: 4327 case NEON::BI__builtin_neon_vst4_lane_v: 4328 case NEON::BI__builtin_neon_vst4q_lane_v: { 4329 llvm::Type *Tys[] = {Int8PtrTy, Ty}; 4330 Ops.push_back(getAlignmentValue32(PtrOp0)); 4331 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, ""); 4332 } 4333 case NEON::BI__builtin_neon_vsubhn_v: { 4334 llvm::VectorType *SrcTy = 4335 llvm::VectorType::getExtendedElementVectorType(VTy); 4336 4337 // %sum = add <4 x i32> %lhs, %rhs 4338 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 4339 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy); 4340 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn"); 4341 4342 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16> 4343 Constant *ShiftAmt = 4344 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2); 4345 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn"); 4346 4347 // %res = trunc <4 x i32> %high to <4 x i16> 4348 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn"); 4349 } 4350 case NEON::BI__builtin_neon_vtrn_v: 4351 case NEON::BI__builtin_neon_vtrnq_v: { 4352 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 4353 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4354 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 4355 Value *SV = nullptr; 4356 4357 for (unsigned vi = 0; vi != 2; ++vi) { 4358 SmallVector<uint32_t, 16> Indices; 4359 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 4360 Indices.push_back(i+vi); 4361 Indices.push_back(i+e+vi); 4362 } 4363 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 4364 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn"); 4365 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 4366 } 4367 return SV; 4368 } 4369 case NEON::BI__builtin_neon_vtst_v: 4370 case NEON::BI__builtin_neon_vtstq_v: { 4371 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4372 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4373 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); 4374 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], 4375 ConstantAggregateZero::get(Ty)); 4376 return Builder.CreateSExt(Ops[0], Ty, "vtst"); 4377 } 4378 case NEON::BI__builtin_neon_vuzp_v: 4379 case NEON::BI__builtin_neon_vuzpq_v: { 4380 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 4381 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4382 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 4383 Value *SV = nullptr; 4384 4385 for (unsigned vi = 0; vi != 2; ++vi) { 4386 SmallVector<uint32_t, 16> Indices; 4387 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 4388 Indices.push_back(2*i+vi); 4389 4390 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 4391 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp"); 4392 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 4393 } 4394 return SV; 4395 } 4396 case NEON::BI__builtin_neon_vzip_v: 4397 case NEON::BI__builtin_neon_vzipq_v: { 4398 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 4399 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4400 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 4401 Value *SV = nullptr; 4402 4403 for (unsigned vi = 0; vi != 2; ++vi) { 4404 SmallVector<uint32_t, 16> Indices; 4405 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 4406 Indices.push_back((i + vi*e) >> 1); 4407 Indices.push_back(((i + vi*e) >> 1)+e); 4408 } 4409 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 4410 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip"); 4411 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 4412 } 4413 return SV; 4414 } 4415 } 4416 4417 assert(Int && "Expected valid intrinsic number"); 4418 4419 // Determine the type(s) of this overloaded AArch64 intrinsic. 4420 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E); 4421 4422 Value *Result = EmitNeonCall(F, Ops, NameHint); 4423 llvm::Type *ResultType = ConvertType(E->getType()); 4424 // AArch64 intrinsic one-element vector type cast to 4425 // scalar type expected by the builtin 4426 return Builder.CreateBitCast(Result, ResultType, NameHint); 4427 } 4428 4429 Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr( 4430 Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp, 4431 const CmpInst::Predicate Ip, const Twine &Name) { 4432 llvm::Type *OTy = Op->getType(); 4433 4434 // FIXME: this is utterly horrific. We should not be looking at previous 4435 // codegen context to find out what needs doing. Unfortunately TableGen 4436 // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32 4437 // (etc). 4438 if (BitCastInst *BI = dyn_cast<BitCastInst>(Op)) 4439 OTy = BI->getOperand(0)->getType(); 4440 4441 Op = Builder.CreateBitCast(Op, OTy); 4442 if (OTy->getScalarType()->isFloatingPointTy()) { 4443 Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy)); 4444 } else { 4445 Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy)); 4446 } 4447 return Builder.CreateSExt(Op, Ty, Name); 4448 } 4449 4450 static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops, 4451 Value *ExtOp, Value *IndexOp, 4452 llvm::Type *ResTy, unsigned IntID, 4453 const char *Name) { 4454 SmallVector<Value *, 2> TblOps; 4455 if (ExtOp) 4456 TblOps.push_back(ExtOp); 4457 4458 // Build a vector containing sequential number like (0, 1, 2, ..., 15) 4459 SmallVector<uint32_t, 16> Indices; 4460 llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType()); 4461 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) { 4462 Indices.push_back(2*i); 4463 Indices.push_back(2*i+1); 4464 } 4465 4466 int PairPos = 0, End = Ops.size() - 1; 4467 while (PairPos < End) { 4468 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], 4469 Ops[PairPos+1], Indices, 4470 Name)); 4471 PairPos += 2; 4472 } 4473 4474 // If there's an odd number of 64-bit lookup table, fill the high 64-bit 4475 // of the 128-bit lookup table with zero. 4476 if (PairPos == End) { 4477 Value *ZeroTbl = ConstantAggregateZero::get(TblTy); 4478 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], 4479 ZeroTbl, Indices, Name)); 4480 } 4481 4482 Function *TblF; 4483 TblOps.push_back(IndexOp); 4484 TblF = CGF.CGM.getIntrinsic(IntID, ResTy); 4485 4486 return CGF.EmitNeonCall(TblF, TblOps, Name); 4487 } 4488 4489 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) { 4490 unsigned Value; 4491 switch (BuiltinID) { 4492 default: 4493 return nullptr; 4494 case ARM::BI__builtin_arm_nop: 4495 Value = 0; 4496 break; 4497 case ARM::BI__builtin_arm_yield: 4498 case ARM::BI__yield: 4499 Value = 1; 4500 break; 4501 case ARM::BI__builtin_arm_wfe: 4502 case ARM::BI__wfe: 4503 Value = 2; 4504 break; 4505 case ARM::BI__builtin_arm_wfi: 4506 case ARM::BI__wfi: 4507 Value = 3; 4508 break; 4509 case ARM::BI__builtin_arm_sev: 4510 case ARM::BI__sev: 4511 Value = 4; 4512 break; 4513 case ARM::BI__builtin_arm_sevl: 4514 case ARM::BI__sevl: 4515 Value = 5; 4516 break; 4517 } 4518 4519 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint), 4520 llvm::ConstantInt::get(Int32Ty, Value)); 4521 } 4522 4523 // Generates the IR for the read/write special register builtin, 4524 // ValueType is the type of the value that is to be written or read, 4525 // RegisterType is the type of the register being written to or read from. 4526 static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, 4527 const CallExpr *E, 4528 llvm::Type *RegisterType, 4529 llvm::Type *ValueType, 4530 bool IsRead, 4531 StringRef SysReg = "") { 4532 // write and register intrinsics only support 32 and 64 bit operations. 4533 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64)) 4534 && "Unsupported size for register."); 4535 4536 CodeGen::CGBuilderTy &Builder = CGF.Builder; 4537 CodeGen::CodeGenModule &CGM = CGF.CGM; 4538 LLVMContext &Context = CGM.getLLVMContext(); 4539 4540 if (SysReg.empty()) { 4541 const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts(); 4542 SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString(); 4543 } 4544 4545 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) }; 4546 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); 4547 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); 4548 4549 llvm::Type *Types[] = { RegisterType }; 4550 4551 bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32); 4552 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64)) 4553 && "Can't fit 64-bit value in 32-bit register"); 4554 4555 if (IsRead) { 4556 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types); 4557 llvm::Value *Call = Builder.CreateCall(F, Metadata); 4558 4559 if (MixedTypes) 4560 // Read into 64 bit register and then truncate result to 32 bit. 4561 return Builder.CreateTrunc(Call, ValueType); 4562 4563 if (ValueType->isPointerTy()) 4564 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*). 4565 return Builder.CreateIntToPtr(Call, ValueType); 4566 4567 return Call; 4568 } 4569 4570 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types); 4571 llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1)); 4572 if (MixedTypes) { 4573 // Extend 32 bit write value to 64 bit to pass to write. 4574 ArgValue = Builder.CreateZExt(ArgValue, RegisterType); 4575 return Builder.CreateCall(F, { Metadata, ArgValue }); 4576 } 4577 4578 if (ValueType->isPointerTy()) { 4579 // Have VoidPtrTy ArgValue but want to return an i32/i64. 4580 ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType); 4581 return Builder.CreateCall(F, { Metadata, ArgValue }); 4582 } 4583 4584 return Builder.CreateCall(F, { Metadata, ArgValue }); 4585 } 4586 4587 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra 4588 /// argument that specifies the vector type. 4589 static bool HasExtraNeonArgument(unsigned BuiltinID) { 4590 switch (BuiltinID) { 4591 default: break; 4592 case NEON::BI__builtin_neon_vget_lane_i8: 4593 case NEON::BI__builtin_neon_vget_lane_i16: 4594 case NEON::BI__builtin_neon_vget_lane_i32: 4595 case NEON::BI__builtin_neon_vget_lane_i64: 4596 case NEON::BI__builtin_neon_vget_lane_f32: 4597 case NEON::BI__builtin_neon_vgetq_lane_i8: 4598 case NEON::BI__builtin_neon_vgetq_lane_i16: 4599 case NEON::BI__builtin_neon_vgetq_lane_i32: 4600 case NEON::BI__builtin_neon_vgetq_lane_i64: 4601 case NEON::BI__builtin_neon_vgetq_lane_f32: 4602 case NEON::BI__builtin_neon_vset_lane_i8: 4603 case NEON::BI__builtin_neon_vset_lane_i16: 4604 case NEON::BI__builtin_neon_vset_lane_i32: 4605 case NEON::BI__builtin_neon_vset_lane_i64: 4606 case NEON::BI__builtin_neon_vset_lane_f32: 4607 case NEON::BI__builtin_neon_vsetq_lane_i8: 4608 case NEON::BI__builtin_neon_vsetq_lane_i16: 4609 case NEON::BI__builtin_neon_vsetq_lane_i32: 4610 case NEON::BI__builtin_neon_vsetq_lane_i64: 4611 case NEON::BI__builtin_neon_vsetq_lane_f32: 4612 case NEON::BI__builtin_neon_vsha1h_u32: 4613 case NEON::BI__builtin_neon_vsha1cq_u32: 4614 case NEON::BI__builtin_neon_vsha1pq_u32: 4615 case NEON::BI__builtin_neon_vsha1mq_u32: 4616 case ARM::BI_MoveToCoprocessor: 4617 case ARM::BI_MoveToCoprocessor2: 4618 return false; 4619 } 4620 return true; 4621 } 4622 4623 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, 4624 const CallExpr *E) { 4625 if (auto Hint = GetValueForARMHint(BuiltinID)) 4626 return Hint; 4627 4628 if (BuiltinID == ARM::BI__emit) { 4629 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb; 4630 llvm::FunctionType *FTy = 4631 llvm::FunctionType::get(VoidTy, /*Variadic=*/false); 4632 4633 APSInt Value; 4634 if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext())) 4635 llvm_unreachable("Sema will ensure that the parameter is constant"); 4636 4637 uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue(); 4638 4639 llvm::InlineAsm *Emit = 4640 IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "", 4641 /*SideEffects=*/true) 4642 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "", 4643 /*SideEffects=*/true); 4644 4645 return Builder.CreateCall(Emit); 4646 } 4647 4648 if (BuiltinID == ARM::BI__builtin_arm_dbg) { 4649 Value *Option = EmitScalarExpr(E->getArg(0)); 4650 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option); 4651 } 4652 4653 if (BuiltinID == ARM::BI__builtin_arm_prefetch) { 4654 Value *Address = EmitScalarExpr(E->getArg(0)); 4655 Value *RW = EmitScalarExpr(E->getArg(1)); 4656 Value *IsData = EmitScalarExpr(E->getArg(2)); 4657 4658 // Locality is not supported on ARM target 4659 Value *Locality = llvm::ConstantInt::get(Int32Ty, 3); 4660 4661 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 4662 return Builder.CreateCall(F, {Address, RW, Locality, IsData}); 4663 } 4664 4665 if (BuiltinID == ARM::BI__builtin_arm_rbit) { 4666 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 4667 return Builder.CreateCall( 4668 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); 4669 } 4670 4671 if (BuiltinID == ARM::BI__clear_cache) { 4672 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); 4673 const FunctionDecl *FD = E->getDirectCallee(); 4674 Value *Ops[2]; 4675 for (unsigned i = 0; i < 2; i++) 4676 Ops[i] = EmitScalarExpr(E->getArg(i)); 4677 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); 4678 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); 4679 StringRef Name = FD->getName(); 4680 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); 4681 } 4682 4683 if (BuiltinID == ARM::BI__builtin_arm_mcrr || 4684 BuiltinID == ARM::BI__builtin_arm_mcrr2) { 4685 Function *F; 4686 4687 switch (BuiltinID) { 4688 default: llvm_unreachable("unexpected builtin"); 4689 case ARM::BI__builtin_arm_mcrr: 4690 F = CGM.getIntrinsic(Intrinsic::arm_mcrr); 4691 break; 4692 case ARM::BI__builtin_arm_mcrr2: 4693 F = CGM.getIntrinsic(Intrinsic::arm_mcrr2); 4694 break; 4695 } 4696 4697 // MCRR{2} instruction has 5 operands but 4698 // the intrinsic has 4 because Rt and Rt2 4699 // are represented as a single unsigned 64 4700 // bit integer in the intrinsic definition 4701 // but internally it's represented as 2 32 4702 // bit integers. 4703 4704 Value *Coproc = EmitScalarExpr(E->getArg(0)); 4705 Value *Opc1 = EmitScalarExpr(E->getArg(1)); 4706 Value *RtAndRt2 = EmitScalarExpr(E->getArg(2)); 4707 Value *CRm = EmitScalarExpr(E->getArg(3)); 4708 4709 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32); 4710 Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty); 4711 Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1); 4712 Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty); 4713 4714 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm}); 4715 } 4716 4717 if (BuiltinID == ARM::BI__builtin_arm_mrrc || 4718 BuiltinID == ARM::BI__builtin_arm_mrrc2) { 4719 Function *F; 4720 4721 switch (BuiltinID) { 4722 default: llvm_unreachable("unexpected builtin"); 4723 case ARM::BI__builtin_arm_mrrc: 4724 F = CGM.getIntrinsic(Intrinsic::arm_mrrc); 4725 break; 4726 case ARM::BI__builtin_arm_mrrc2: 4727 F = CGM.getIntrinsic(Intrinsic::arm_mrrc2); 4728 break; 4729 } 4730 4731 Value *Coproc = EmitScalarExpr(E->getArg(0)); 4732 Value *Opc1 = EmitScalarExpr(E->getArg(1)); 4733 Value *CRm = EmitScalarExpr(E->getArg(2)); 4734 Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm}); 4735 4736 // Returns an unsigned 64 bit integer, represented 4737 // as two 32 bit integers. 4738 4739 Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1); 4740 Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0); 4741 Rt = Builder.CreateZExt(Rt, Int64Ty); 4742 Rt1 = Builder.CreateZExt(Rt1, Int64Ty); 4743 4744 Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32); 4745 RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true); 4746 RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1); 4747 4748 return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType())); 4749 } 4750 4751 if (BuiltinID == ARM::BI__builtin_arm_ldrexd || 4752 ((BuiltinID == ARM::BI__builtin_arm_ldrex || 4753 BuiltinID == ARM::BI__builtin_arm_ldaex) && 4754 getContext().getTypeSize(E->getType()) == 64) || 4755 BuiltinID == ARM::BI__ldrexd) { 4756 Function *F; 4757 4758 switch (BuiltinID) { 4759 default: llvm_unreachable("unexpected builtin"); 4760 case ARM::BI__builtin_arm_ldaex: 4761 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd); 4762 break; 4763 case ARM::BI__builtin_arm_ldrexd: 4764 case ARM::BI__builtin_arm_ldrex: 4765 case ARM::BI__ldrexd: 4766 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd); 4767 break; 4768 } 4769 4770 Value *LdPtr = EmitScalarExpr(E->getArg(0)); 4771 Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy), 4772 "ldrexd"); 4773 4774 Value *Val0 = Builder.CreateExtractValue(Val, 1); 4775 Value *Val1 = Builder.CreateExtractValue(Val, 0); 4776 Val0 = Builder.CreateZExt(Val0, Int64Ty); 4777 Val1 = Builder.CreateZExt(Val1, Int64Ty); 4778 4779 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32); 4780 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */); 4781 Val = Builder.CreateOr(Val, Val1); 4782 return Builder.CreateBitCast(Val, ConvertType(E->getType())); 4783 } 4784 4785 if (BuiltinID == ARM::BI__builtin_arm_ldrex || 4786 BuiltinID == ARM::BI__builtin_arm_ldaex) { 4787 Value *LoadAddr = EmitScalarExpr(E->getArg(0)); 4788 4789 QualType Ty = E->getType(); 4790 llvm::Type *RealResTy = ConvertType(Ty); 4791 llvm::Type *PtrTy = llvm::IntegerType::get( 4792 getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo(); 4793 LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy); 4794 4795 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex 4796 ? Intrinsic::arm_ldaex 4797 : Intrinsic::arm_ldrex, 4798 PtrTy); 4799 Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex"); 4800 4801 if (RealResTy->isPointerTy()) 4802 return Builder.CreateIntToPtr(Val, RealResTy); 4803 else { 4804 llvm::Type *IntResTy = llvm::IntegerType::get( 4805 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy)); 4806 Val = Builder.CreateTruncOrBitCast(Val, IntResTy); 4807 return Builder.CreateBitCast(Val, RealResTy); 4808 } 4809 } 4810 4811 if (BuiltinID == ARM::BI__builtin_arm_strexd || 4812 ((BuiltinID == ARM::BI__builtin_arm_stlex || 4813 BuiltinID == ARM::BI__builtin_arm_strex) && 4814 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) { 4815 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex 4816 ? Intrinsic::arm_stlexd 4817 : Intrinsic::arm_strexd); 4818 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty); 4819 4820 Address Tmp = CreateMemTemp(E->getArg(0)->getType()); 4821 Value *Val = EmitScalarExpr(E->getArg(0)); 4822 Builder.CreateStore(Val, Tmp); 4823 4824 Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy)); 4825 Val = Builder.CreateLoad(LdPtr); 4826 4827 Value *Arg0 = Builder.CreateExtractValue(Val, 0); 4828 Value *Arg1 = Builder.CreateExtractValue(Val, 1); 4829 Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy); 4830 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd"); 4831 } 4832 4833 if (BuiltinID == ARM::BI__builtin_arm_strex || 4834 BuiltinID == ARM::BI__builtin_arm_stlex) { 4835 Value *StoreVal = EmitScalarExpr(E->getArg(0)); 4836 Value *StoreAddr = EmitScalarExpr(E->getArg(1)); 4837 4838 QualType Ty = E->getArg(0)->getType(); 4839 llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(), 4840 getContext().getTypeSize(Ty)); 4841 StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo()); 4842 4843 if (StoreVal->getType()->isPointerTy()) 4844 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty); 4845 else { 4846 llvm::Type *IntTy = llvm::IntegerType::get( 4847 getLLVMContext(), 4848 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType())); 4849 StoreVal = Builder.CreateBitCast(StoreVal, IntTy); 4850 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty); 4851 } 4852 4853 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex 4854 ? Intrinsic::arm_stlex 4855 : Intrinsic::arm_strex, 4856 StoreAddr->getType()); 4857 return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex"); 4858 } 4859 4860 switch (BuiltinID) { 4861 case ARM::BI__iso_volatile_load8: 4862 case ARM::BI__iso_volatile_load16: 4863 case ARM::BI__iso_volatile_load32: 4864 case ARM::BI__iso_volatile_load64: { 4865 Value *Ptr = EmitScalarExpr(E->getArg(0)); 4866 QualType ElTy = E->getArg(0)->getType()->getPointeeType(); 4867 CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy); 4868 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), 4869 LoadSize.getQuantity() * 8); 4870 Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); 4871 llvm::LoadInst *Load = 4872 Builder.CreateAlignedLoad(Ptr, LoadSize); 4873 Load->setVolatile(true); 4874 return Load; 4875 } 4876 case ARM::BI__iso_volatile_store8: 4877 case ARM::BI__iso_volatile_store16: 4878 case ARM::BI__iso_volatile_store32: 4879 case ARM::BI__iso_volatile_store64: { 4880 Value *Ptr = EmitScalarExpr(E->getArg(0)); 4881 Value *Value = EmitScalarExpr(E->getArg(1)); 4882 QualType ElTy = E->getArg(0)->getType()->getPointeeType(); 4883 CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy); 4884 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), 4885 StoreSize.getQuantity() * 8); 4886 Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); 4887 llvm::StoreInst *Store = 4888 Builder.CreateAlignedStore(Value, Ptr, 4889 StoreSize); 4890 Store->setVolatile(true); 4891 return Store; 4892 } 4893 } 4894 4895 if (BuiltinID == ARM::BI__builtin_arm_clrex) { 4896 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex); 4897 return Builder.CreateCall(F); 4898 } 4899 4900 // CRC32 4901 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; 4902 switch (BuiltinID) { 4903 case ARM::BI__builtin_arm_crc32b: 4904 CRCIntrinsicID = Intrinsic::arm_crc32b; break; 4905 case ARM::BI__builtin_arm_crc32cb: 4906 CRCIntrinsicID = Intrinsic::arm_crc32cb; break; 4907 case ARM::BI__builtin_arm_crc32h: 4908 CRCIntrinsicID = Intrinsic::arm_crc32h; break; 4909 case ARM::BI__builtin_arm_crc32ch: 4910 CRCIntrinsicID = Intrinsic::arm_crc32ch; break; 4911 case ARM::BI__builtin_arm_crc32w: 4912 case ARM::BI__builtin_arm_crc32d: 4913 CRCIntrinsicID = Intrinsic::arm_crc32w; break; 4914 case ARM::BI__builtin_arm_crc32cw: 4915 case ARM::BI__builtin_arm_crc32cd: 4916 CRCIntrinsicID = Intrinsic::arm_crc32cw; break; 4917 } 4918 4919 if (CRCIntrinsicID != Intrinsic::not_intrinsic) { 4920 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 4921 Value *Arg1 = EmitScalarExpr(E->getArg(1)); 4922 4923 // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w 4924 // intrinsics, hence we need different codegen for these cases. 4925 if (BuiltinID == ARM::BI__builtin_arm_crc32d || 4926 BuiltinID == ARM::BI__builtin_arm_crc32cd) { 4927 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32); 4928 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty); 4929 Value *Arg1b = Builder.CreateLShr(Arg1, C1); 4930 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty); 4931 4932 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 4933 Value *Res = Builder.CreateCall(F, {Arg0, Arg1a}); 4934 return Builder.CreateCall(F, {Res, Arg1b}); 4935 } else { 4936 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty); 4937 4938 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 4939 return Builder.CreateCall(F, {Arg0, Arg1}); 4940 } 4941 } 4942 4943 if (BuiltinID == ARM::BI__builtin_arm_rsr || 4944 BuiltinID == ARM::BI__builtin_arm_rsr64 || 4945 BuiltinID == ARM::BI__builtin_arm_rsrp || 4946 BuiltinID == ARM::BI__builtin_arm_wsr || 4947 BuiltinID == ARM::BI__builtin_arm_wsr64 || 4948 BuiltinID == ARM::BI__builtin_arm_wsrp) { 4949 4950 bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr || 4951 BuiltinID == ARM::BI__builtin_arm_rsr64 || 4952 BuiltinID == ARM::BI__builtin_arm_rsrp; 4953 4954 bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp || 4955 BuiltinID == ARM::BI__builtin_arm_wsrp; 4956 4957 bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 || 4958 BuiltinID == ARM::BI__builtin_arm_wsr64; 4959 4960 llvm::Type *ValueType; 4961 llvm::Type *RegisterType; 4962 if (IsPointerBuiltin) { 4963 ValueType = VoidPtrTy; 4964 RegisterType = Int32Ty; 4965 } else if (Is64Bit) { 4966 ValueType = RegisterType = Int64Ty; 4967 } else { 4968 ValueType = RegisterType = Int32Ty; 4969 } 4970 4971 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead); 4972 } 4973 4974 // Find out if any arguments are required to be integer constant 4975 // expressions. 4976 unsigned ICEArguments = 0; 4977 ASTContext::GetBuiltinTypeError Error; 4978 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 4979 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 4980 4981 auto getAlignmentValue32 = [&](Address addr) -> Value* { 4982 return Builder.getInt32(addr.getAlignment().getQuantity()); 4983 }; 4984 4985 Address PtrOp0 = Address::invalid(); 4986 Address PtrOp1 = Address::invalid(); 4987 SmallVector<Value*, 4> Ops; 4988 bool HasExtraArg = HasExtraNeonArgument(BuiltinID); 4989 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0); 4990 for (unsigned i = 0, e = NumArgs; i != e; i++) { 4991 if (i == 0) { 4992 switch (BuiltinID) { 4993 case NEON::BI__builtin_neon_vld1_v: 4994 case NEON::BI__builtin_neon_vld1q_v: 4995 case NEON::BI__builtin_neon_vld1q_lane_v: 4996 case NEON::BI__builtin_neon_vld1_lane_v: 4997 case NEON::BI__builtin_neon_vld1_dup_v: 4998 case NEON::BI__builtin_neon_vld1q_dup_v: 4999 case NEON::BI__builtin_neon_vst1_v: 5000 case NEON::BI__builtin_neon_vst1q_v: 5001 case NEON::BI__builtin_neon_vst1q_lane_v: 5002 case NEON::BI__builtin_neon_vst1_lane_v: 5003 case NEON::BI__builtin_neon_vst2_v: 5004 case NEON::BI__builtin_neon_vst2q_v: 5005 case NEON::BI__builtin_neon_vst2_lane_v: 5006 case NEON::BI__builtin_neon_vst2q_lane_v: 5007 case NEON::BI__builtin_neon_vst3_v: 5008 case NEON::BI__builtin_neon_vst3q_v: 5009 case NEON::BI__builtin_neon_vst3_lane_v: 5010 case NEON::BI__builtin_neon_vst3q_lane_v: 5011 case NEON::BI__builtin_neon_vst4_v: 5012 case NEON::BI__builtin_neon_vst4q_v: 5013 case NEON::BI__builtin_neon_vst4_lane_v: 5014 case NEON::BI__builtin_neon_vst4q_lane_v: 5015 // Get the alignment for the argument in addition to the value; 5016 // we'll use it later. 5017 PtrOp0 = EmitPointerWithAlignment(E->getArg(0)); 5018 Ops.push_back(PtrOp0.getPointer()); 5019 continue; 5020 } 5021 } 5022 if (i == 1) { 5023 switch (BuiltinID) { 5024 case NEON::BI__builtin_neon_vld2_v: 5025 case NEON::BI__builtin_neon_vld2q_v: 5026 case NEON::BI__builtin_neon_vld3_v: 5027 case NEON::BI__builtin_neon_vld3q_v: 5028 case NEON::BI__builtin_neon_vld4_v: 5029 case NEON::BI__builtin_neon_vld4q_v: 5030 case NEON::BI__builtin_neon_vld2_lane_v: 5031 case NEON::BI__builtin_neon_vld2q_lane_v: 5032 case NEON::BI__builtin_neon_vld3_lane_v: 5033 case NEON::BI__builtin_neon_vld3q_lane_v: 5034 case NEON::BI__builtin_neon_vld4_lane_v: 5035 case NEON::BI__builtin_neon_vld4q_lane_v: 5036 case NEON::BI__builtin_neon_vld2_dup_v: 5037 case NEON::BI__builtin_neon_vld3_dup_v: 5038 case NEON::BI__builtin_neon_vld4_dup_v: 5039 // Get the alignment for the argument in addition to the value; 5040 // we'll use it later. 5041 PtrOp1 = EmitPointerWithAlignment(E->getArg(1)); 5042 Ops.push_back(PtrOp1.getPointer()); 5043 continue; 5044 } 5045 } 5046 5047 if ((ICEArguments & (1 << i)) == 0) { 5048 Ops.push_back(EmitScalarExpr(E->getArg(i))); 5049 } else { 5050 // If this is required to be a constant, constant fold it so that we know 5051 // that the generated intrinsic gets a ConstantInt. 5052 llvm::APSInt Result; 5053 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); 5054 assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst; 5055 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); 5056 } 5057 } 5058 5059 switch (BuiltinID) { 5060 default: break; 5061 5062 case NEON::BI__builtin_neon_vget_lane_i8: 5063 case NEON::BI__builtin_neon_vget_lane_i16: 5064 case NEON::BI__builtin_neon_vget_lane_i32: 5065 case NEON::BI__builtin_neon_vget_lane_i64: 5066 case NEON::BI__builtin_neon_vget_lane_f32: 5067 case NEON::BI__builtin_neon_vgetq_lane_i8: 5068 case NEON::BI__builtin_neon_vgetq_lane_i16: 5069 case NEON::BI__builtin_neon_vgetq_lane_i32: 5070 case NEON::BI__builtin_neon_vgetq_lane_i64: 5071 case NEON::BI__builtin_neon_vgetq_lane_f32: 5072 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane"); 5073 5074 case NEON::BI__builtin_neon_vset_lane_i8: 5075 case NEON::BI__builtin_neon_vset_lane_i16: 5076 case NEON::BI__builtin_neon_vset_lane_i32: 5077 case NEON::BI__builtin_neon_vset_lane_i64: 5078 case NEON::BI__builtin_neon_vset_lane_f32: 5079 case NEON::BI__builtin_neon_vsetq_lane_i8: 5080 case NEON::BI__builtin_neon_vsetq_lane_i16: 5081 case NEON::BI__builtin_neon_vsetq_lane_i32: 5082 case NEON::BI__builtin_neon_vsetq_lane_i64: 5083 case NEON::BI__builtin_neon_vsetq_lane_f32: 5084 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 5085 5086 case NEON::BI__builtin_neon_vsha1h_u32: 5087 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops, 5088 "vsha1h"); 5089 case NEON::BI__builtin_neon_vsha1cq_u32: 5090 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops, 5091 "vsha1h"); 5092 case NEON::BI__builtin_neon_vsha1pq_u32: 5093 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops, 5094 "vsha1h"); 5095 case NEON::BI__builtin_neon_vsha1mq_u32: 5096 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops, 5097 "vsha1h"); 5098 5099 // The ARM _MoveToCoprocessor builtins put the input register value as 5100 // the first argument, but the LLVM intrinsic expects it as the third one. 5101 case ARM::BI_MoveToCoprocessor: 5102 case ARM::BI_MoveToCoprocessor2: { 5103 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ? 5104 Intrinsic::arm_mcr : Intrinsic::arm_mcr2); 5105 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0], 5106 Ops[3], Ops[4], Ops[5]}); 5107 } 5108 case ARM::BI_BitScanForward: 5109 case ARM::BI_BitScanForward64: 5110 return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E); 5111 case ARM::BI_BitScanReverse: 5112 case ARM::BI_BitScanReverse64: 5113 return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E); 5114 5115 case ARM::BI_InterlockedAnd64: 5116 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E); 5117 case ARM::BI_InterlockedExchange64: 5118 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E); 5119 case ARM::BI_InterlockedExchangeAdd64: 5120 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E); 5121 case ARM::BI_InterlockedExchangeSub64: 5122 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E); 5123 case ARM::BI_InterlockedOr64: 5124 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E); 5125 case ARM::BI_InterlockedXor64: 5126 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E); 5127 case ARM::BI_InterlockedDecrement64: 5128 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E); 5129 case ARM::BI_InterlockedIncrement64: 5130 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E); 5131 } 5132 5133 // Get the last argument, which specifies the vector type. 5134 assert(HasExtraArg); 5135 llvm::APSInt Result; 5136 const Expr *Arg = E->getArg(E->getNumArgs()-1); 5137 if (!Arg->isIntegerConstantExpr(Result, getContext())) 5138 return nullptr; 5139 5140 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f || 5141 BuiltinID == ARM::BI__builtin_arm_vcvtr_d) { 5142 // Determine the overloaded type of this builtin. 5143 llvm::Type *Ty; 5144 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f) 5145 Ty = FloatTy; 5146 else 5147 Ty = DoubleTy; 5148 5149 // Determine whether this is an unsigned conversion or not. 5150 bool usgn = Result.getZExtValue() == 1; 5151 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr; 5152 5153 // Call the appropriate intrinsic. 5154 Function *F = CGM.getIntrinsic(Int, Ty); 5155 return Builder.CreateCall(F, Ops, "vcvtr"); 5156 } 5157 5158 // Determine the type of this overloaded NEON intrinsic. 5159 NeonTypeFlags Type(Result.getZExtValue()); 5160 bool usgn = Type.isUnsigned(); 5161 bool rightShift = false; 5162 5163 llvm::VectorType *VTy = GetNeonType(this, Type); 5164 llvm::Type *Ty = VTy; 5165 if (!Ty) 5166 return nullptr; 5167 5168 // Many NEON builtins have identical semantics and uses in ARM and 5169 // AArch64. Emit these in a single function. 5170 auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap); 5171 const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap( 5172 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted); 5173 if (Builtin) 5174 return EmitCommonNeonBuiltinExpr( 5175 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic, 5176 Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1); 5177 5178 unsigned Int; 5179 switch (BuiltinID) { 5180 default: return nullptr; 5181 case NEON::BI__builtin_neon_vld1q_lane_v: 5182 // Handle 64-bit integer elements as a special case. Use shuffles of 5183 // one-element vectors to avoid poor code for i64 in the backend. 5184 if (VTy->getElementType()->isIntegerTy(64)) { 5185 // Extract the other lane. 5186 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5187 uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue(); 5188 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane)); 5189 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); 5190 // Load the value as a one-element vector. 5191 Ty = llvm::VectorType::get(VTy->getElementType(), 1); 5192 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 5193 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys); 5194 Value *Align = getAlignmentValue32(PtrOp0); 5195 Value *Ld = Builder.CreateCall(F, {Ops[0], Align}); 5196 // Combine them. 5197 uint32_t Indices[] = {1 - Lane, Lane}; 5198 SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices); 5199 return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane"); 5200 } 5201 // fall through 5202 case NEON::BI__builtin_neon_vld1_lane_v: { 5203 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5204 PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType()); 5205 Value *Ld = Builder.CreateLoad(PtrOp0); 5206 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane"); 5207 } 5208 case NEON::BI__builtin_neon_vld2_dup_v: 5209 case NEON::BI__builtin_neon_vld3_dup_v: 5210 case NEON::BI__builtin_neon_vld4_dup_v: { 5211 // Handle 64-bit elements as a special-case. There is no "dup" needed. 5212 if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) { 5213 switch (BuiltinID) { 5214 case NEON::BI__builtin_neon_vld2_dup_v: 5215 Int = Intrinsic::arm_neon_vld2; 5216 break; 5217 case NEON::BI__builtin_neon_vld3_dup_v: 5218 Int = Intrinsic::arm_neon_vld3; 5219 break; 5220 case NEON::BI__builtin_neon_vld4_dup_v: 5221 Int = Intrinsic::arm_neon_vld4; 5222 break; 5223 default: llvm_unreachable("unknown vld_dup intrinsic?"); 5224 } 5225 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 5226 Function *F = CGM.getIntrinsic(Int, Tys); 5227 llvm::Value *Align = getAlignmentValue32(PtrOp1); 5228 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup"); 5229 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 5230 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5231 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 5232 } 5233 switch (BuiltinID) { 5234 case NEON::BI__builtin_neon_vld2_dup_v: 5235 Int = Intrinsic::arm_neon_vld2lane; 5236 break; 5237 case NEON::BI__builtin_neon_vld3_dup_v: 5238 Int = Intrinsic::arm_neon_vld3lane; 5239 break; 5240 case NEON::BI__builtin_neon_vld4_dup_v: 5241 Int = Intrinsic::arm_neon_vld4lane; 5242 break; 5243 default: llvm_unreachable("unknown vld_dup intrinsic?"); 5244 } 5245 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 5246 Function *F = CGM.getIntrinsic(Int, Tys); 5247 llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType()); 5248 5249 SmallVector<Value*, 6> Args; 5250 Args.push_back(Ops[1]); 5251 Args.append(STy->getNumElements(), UndefValue::get(Ty)); 5252 5253 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); 5254 Args.push_back(CI); 5255 Args.push_back(getAlignmentValue32(PtrOp1)); 5256 5257 Ops[1] = Builder.CreateCall(F, Args, "vld_dup"); 5258 // splat lane 0 to all elts in each vector of the result. 5259 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 5260 Value *Val = Builder.CreateExtractValue(Ops[1], i); 5261 Value *Elt = Builder.CreateBitCast(Val, Ty); 5262 Elt = EmitNeonSplat(Elt, CI); 5263 Elt = Builder.CreateBitCast(Elt, Val->getType()); 5264 Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i); 5265 } 5266 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 5267 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5268 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 5269 } 5270 case NEON::BI__builtin_neon_vqrshrn_n_v: 5271 Int = 5272 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns; 5273 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n", 5274 1, true); 5275 case NEON::BI__builtin_neon_vqrshrun_n_v: 5276 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty), 5277 Ops, "vqrshrun_n", 1, true); 5278 case NEON::BI__builtin_neon_vqshrn_n_v: 5279 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns; 5280 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n", 5281 1, true); 5282 case NEON::BI__builtin_neon_vqshrun_n_v: 5283 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty), 5284 Ops, "vqshrun_n", 1, true); 5285 case NEON::BI__builtin_neon_vrecpe_v: 5286 case NEON::BI__builtin_neon_vrecpeq_v: 5287 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty), 5288 Ops, "vrecpe"); 5289 case NEON::BI__builtin_neon_vrshrn_n_v: 5290 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty), 5291 Ops, "vrshrn_n", 1, true); 5292 case NEON::BI__builtin_neon_vrsra_n_v: 5293 case NEON::BI__builtin_neon_vrsraq_n_v: 5294 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5295 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5296 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true); 5297 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts; 5298 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]}); 5299 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n"); 5300 case NEON::BI__builtin_neon_vsri_n_v: 5301 case NEON::BI__builtin_neon_vsriq_n_v: 5302 rightShift = true; 5303 LLVM_FALLTHROUGH; 5304 case NEON::BI__builtin_neon_vsli_n_v: 5305 case NEON::BI__builtin_neon_vsliq_n_v: 5306 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift); 5307 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty), 5308 Ops, "vsli_n"); 5309 case NEON::BI__builtin_neon_vsra_n_v: 5310 case NEON::BI__builtin_neon_vsraq_n_v: 5311 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5312 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n"); 5313 return Builder.CreateAdd(Ops[0], Ops[1]); 5314 case NEON::BI__builtin_neon_vst1q_lane_v: 5315 // Handle 64-bit integer elements as a special case. Use a shuffle to get 5316 // a one-element vector and avoid poor code for i64 in the backend. 5317 if (VTy->getElementType()->isIntegerTy(64)) { 5318 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5319 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2])); 5320 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); 5321 Ops[2] = getAlignmentValue32(PtrOp0); 5322 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()}; 5323 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, 5324 Tys), Ops); 5325 } 5326 // fall through 5327 case NEON::BI__builtin_neon_vst1_lane_v: { 5328 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5329 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); 5330 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 5331 auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty)); 5332 return St; 5333 } 5334 case NEON::BI__builtin_neon_vtbl1_v: 5335 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1), 5336 Ops, "vtbl1"); 5337 case NEON::BI__builtin_neon_vtbl2_v: 5338 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2), 5339 Ops, "vtbl2"); 5340 case NEON::BI__builtin_neon_vtbl3_v: 5341 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3), 5342 Ops, "vtbl3"); 5343 case NEON::BI__builtin_neon_vtbl4_v: 5344 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4), 5345 Ops, "vtbl4"); 5346 case NEON::BI__builtin_neon_vtbx1_v: 5347 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1), 5348 Ops, "vtbx1"); 5349 case NEON::BI__builtin_neon_vtbx2_v: 5350 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2), 5351 Ops, "vtbx2"); 5352 case NEON::BI__builtin_neon_vtbx3_v: 5353 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3), 5354 Ops, "vtbx3"); 5355 case NEON::BI__builtin_neon_vtbx4_v: 5356 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4), 5357 Ops, "vtbx4"); 5358 } 5359 } 5360 5361 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, 5362 const CallExpr *E, 5363 SmallVectorImpl<Value *> &Ops) { 5364 unsigned int Int = 0; 5365 const char *s = nullptr; 5366 5367 switch (BuiltinID) { 5368 default: 5369 return nullptr; 5370 case NEON::BI__builtin_neon_vtbl1_v: 5371 case NEON::BI__builtin_neon_vqtbl1_v: 5372 case NEON::BI__builtin_neon_vqtbl1q_v: 5373 case NEON::BI__builtin_neon_vtbl2_v: 5374 case NEON::BI__builtin_neon_vqtbl2_v: 5375 case NEON::BI__builtin_neon_vqtbl2q_v: 5376 case NEON::BI__builtin_neon_vtbl3_v: 5377 case NEON::BI__builtin_neon_vqtbl3_v: 5378 case NEON::BI__builtin_neon_vqtbl3q_v: 5379 case NEON::BI__builtin_neon_vtbl4_v: 5380 case NEON::BI__builtin_neon_vqtbl4_v: 5381 case NEON::BI__builtin_neon_vqtbl4q_v: 5382 break; 5383 case NEON::BI__builtin_neon_vtbx1_v: 5384 case NEON::BI__builtin_neon_vqtbx1_v: 5385 case NEON::BI__builtin_neon_vqtbx1q_v: 5386 case NEON::BI__builtin_neon_vtbx2_v: 5387 case NEON::BI__builtin_neon_vqtbx2_v: 5388 case NEON::BI__builtin_neon_vqtbx2q_v: 5389 case NEON::BI__builtin_neon_vtbx3_v: 5390 case NEON::BI__builtin_neon_vqtbx3_v: 5391 case NEON::BI__builtin_neon_vqtbx3q_v: 5392 case NEON::BI__builtin_neon_vtbx4_v: 5393 case NEON::BI__builtin_neon_vqtbx4_v: 5394 case NEON::BI__builtin_neon_vqtbx4q_v: 5395 break; 5396 } 5397 5398 assert(E->getNumArgs() >= 3); 5399 5400 // Get the last argument, which specifies the vector type. 5401 llvm::APSInt Result; 5402 const Expr *Arg = E->getArg(E->getNumArgs() - 1); 5403 if (!Arg->isIntegerConstantExpr(Result, CGF.getContext())) 5404 return nullptr; 5405 5406 // Determine the type of this overloaded NEON intrinsic. 5407 NeonTypeFlags Type(Result.getZExtValue()); 5408 llvm::VectorType *Ty = GetNeonType(&CGF, Type); 5409 if (!Ty) 5410 return nullptr; 5411 5412 CodeGen::CGBuilderTy &Builder = CGF.Builder; 5413 5414 // AArch64 scalar builtins are not overloaded, they do not have an extra 5415 // argument that specifies the vector type, need to handle each case. 5416 switch (BuiltinID) { 5417 case NEON::BI__builtin_neon_vtbl1_v: { 5418 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr, 5419 Ops[1], Ty, Intrinsic::aarch64_neon_tbl1, 5420 "vtbl1"); 5421 } 5422 case NEON::BI__builtin_neon_vtbl2_v: { 5423 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr, 5424 Ops[2], Ty, Intrinsic::aarch64_neon_tbl1, 5425 "vtbl1"); 5426 } 5427 case NEON::BI__builtin_neon_vtbl3_v: { 5428 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr, 5429 Ops[3], Ty, Intrinsic::aarch64_neon_tbl2, 5430 "vtbl2"); 5431 } 5432 case NEON::BI__builtin_neon_vtbl4_v: { 5433 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr, 5434 Ops[4], Ty, Intrinsic::aarch64_neon_tbl2, 5435 "vtbl2"); 5436 } 5437 case NEON::BI__builtin_neon_vtbx1_v: { 5438 Value *TblRes = 5439 packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2], 5440 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1"); 5441 5442 llvm::Constant *EightV = ConstantInt::get(Ty, 8); 5443 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV); 5444 CmpRes = Builder.CreateSExt(CmpRes, Ty); 5445 5446 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]); 5447 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes); 5448 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx"); 5449 } 5450 case NEON::BI__builtin_neon_vtbx2_v: { 5451 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0], 5452 Ops[3], Ty, Intrinsic::aarch64_neon_tbx1, 5453 "vtbx1"); 5454 } 5455 case NEON::BI__builtin_neon_vtbx3_v: { 5456 Value *TblRes = 5457 packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4], 5458 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2"); 5459 5460 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24); 5461 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4], 5462 TwentyFourV); 5463 CmpRes = Builder.CreateSExt(CmpRes, Ty); 5464 5465 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]); 5466 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes); 5467 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx"); 5468 } 5469 case NEON::BI__builtin_neon_vtbx4_v: { 5470 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0], 5471 Ops[5], Ty, Intrinsic::aarch64_neon_tbx2, 5472 "vtbx2"); 5473 } 5474 case NEON::BI__builtin_neon_vqtbl1_v: 5475 case NEON::BI__builtin_neon_vqtbl1q_v: 5476 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break; 5477 case NEON::BI__builtin_neon_vqtbl2_v: 5478 case NEON::BI__builtin_neon_vqtbl2q_v: { 5479 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break; 5480 case NEON::BI__builtin_neon_vqtbl3_v: 5481 case NEON::BI__builtin_neon_vqtbl3q_v: 5482 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break; 5483 case NEON::BI__builtin_neon_vqtbl4_v: 5484 case NEON::BI__builtin_neon_vqtbl4q_v: 5485 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break; 5486 case NEON::BI__builtin_neon_vqtbx1_v: 5487 case NEON::BI__builtin_neon_vqtbx1q_v: 5488 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break; 5489 case NEON::BI__builtin_neon_vqtbx2_v: 5490 case NEON::BI__builtin_neon_vqtbx2q_v: 5491 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break; 5492 case NEON::BI__builtin_neon_vqtbx3_v: 5493 case NEON::BI__builtin_neon_vqtbx3q_v: 5494 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break; 5495 case NEON::BI__builtin_neon_vqtbx4_v: 5496 case NEON::BI__builtin_neon_vqtbx4q_v: 5497 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break; 5498 } 5499 } 5500 5501 if (!Int) 5502 return nullptr; 5503 5504 Function *F = CGF.CGM.getIntrinsic(Int, Ty); 5505 return CGF.EmitNeonCall(F, Ops, s); 5506 } 5507 5508 Value *CodeGenFunction::vectorWrapScalar16(Value *Op) { 5509 llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4); 5510 Op = Builder.CreateBitCast(Op, Int16Ty); 5511 Value *V = UndefValue::get(VTy); 5512 llvm::Constant *CI = ConstantInt::get(SizeTy, 0); 5513 Op = Builder.CreateInsertElement(V, Op, CI); 5514 return Op; 5515 } 5516 5517 Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, 5518 const CallExpr *E) { 5519 unsigned HintID = static_cast<unsigned>(-1); 5520 switch (BuiltinID) { 5521 default: break; 5522 case AArch64::BI__builtin_arm_nop: 5523 HintID = 0; 5524 break; 5525 case AArch64::BI__builtin_arm_yield: 5526 HintID = 1; 5527 break; 5528 case AArch64::BI__builtin_arm_wfe: 5529 HintID = 2; 5530 break; 5531 case AArch64::BI__builtin_arm_wfi: 5532 HintID = 3; 5533 break; 5534 case AArch64::BI__builtin_arm_sev: 5535 HintID = 4; 5536 break; 5537 case AArch64::BI__builtin_arm_sevl: 5538 HintID = 5; 5539 break; 5540 } 5541 5542 if (HintID != static_cast<unsigned>(-1)) { 5543 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint); 5544 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID)); 5545 } 5546 5547 if (BuiltinID == AArch64::BI__builtin_arm_prefetch) { 5548 Value *Address = EmitScalarExpr(E->getArg(0)); 5549 Value *RW = EmitScalarExpr(E->getArg(1)); 5550 Value *CacheLevel = EmitScalarExpr(E->getArg(2)); 5551 Value *RetentionPolicy = EmitScalarExpr(E->getArg(3)); 5552 Value *IsData = EmitScalarExpr(E->getArg(4)); 5553 5554 Value *Locality = nullptr; 5555 if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) { 5556 // Temporal fetch, needs to convert cache level to locality. 5557 Locality = llvm::ConstantInt::get(Int32Ty, 5558 -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3); 5559 } else { 5560 // Streaming fetch. 5561 Locality = llvm::ConstantInt::get(Int32Ty, 0); 5562 } 5563 5564 // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify 5565 // PLDL3STRM or PLDL2STRM. 5566 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 5567 return Builder.CreateCall(F, {Address, RW, Locality, IsData}); 5568 } 5569 5570 if (BuiltinID == AArch64::BI__builtin_arm_rbit) { 5571 assert((getContext().getTypeSize(E->getType()) == 32) && 5572 "rbit of unusual size!"); 5573 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 5574 return Builder.CreateCall( 5575 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); 5576 } 5577 if (BuiltinID == AArch64::BI__builtin_arm_rbit64) { 5578 assert((getContext().getTypeSize(E->getType()) == 64) && 5579 "rbit of unusual size!"); 5580 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 5581 return Builder.CreateCall( 5582 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); 5583 } 5584 5585 if (BuiltinID == AArch64::BI__clear_cache) { 5586 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); 5587 const FunctionDecl *FD = E->getDirectCallee(); 5588 Value *Ops[2]; 5589 for (unsigned i = 0; i < 2; i++) 5590 Ops[i] = EmitScalarExpr(E->getArg(i)); 5591 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); 5592 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); 5593 StringRef Name = FD->getName(); 5594 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); 5595 } 5596 5597 if ((BuiltinID == AArch64::BI__builtin_arm_ldrex || 5598 BuiltinID == AArch64::BI__builtin_arm_ldaex) && 5599 getContext().getTypeSize(E->getType()) == 128) { 5600 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex 5601 ? Intrinsic::aarch64_ldaxp 5602 : Intrinsic::aarch64_ldxp); 5603 5604 Value *LdPtr = EmitScalarExpr(E->getArg(0)); 5605 Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy), 5606 "ldxp"); 5607 5608 Value *Val0 = Builder.CreateExtractValue(Val, 1); 5609 Value *Val1 = Builder.CreateExtractValue(Val, 0); 5610 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); 5611 Val0 = Builder.CreateZExt(Val0, Int128Ty); 5612 Val1 = Builder.CreateZExt(Val1, Int128Ty); 5613 5614 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64); 5615 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */); 5616 Val = Builder.CreateOr(Val, Val1); 5617 return Builder.CreateBitCast(Val, ConvertType(E->getType())); 5618 } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex || 5619 BuiltinID == AArch64::BI__builtin_arm_ldaex) { 5620 Value *LoadAddr = EmitScalarExpr(E->getArg(0)); 5621 5622 QualType Ty = E->getType(); 5623 llvm::Type *RealResTy = ConvertType(Ty); 5624 llvm::Type *PtrTy = llvm::IntegerType::get( 5625 getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo(); 5626 LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy); 5627 5628 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex 5629 ? Intrinsic::aarch64_ldaxr 5630 : Intrinsic::aarch64_ldxr, 5631 PtrTy); 5632 Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr"); 5633 5634 if (RealResTy->isPointerTy()) 5635 return Builder.CreateIntToPtr(Val, RealResTy); 5636 5637 llvm::Type *IntResTy = llvm::IntegerType::get( 5638 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy)); 5639 Val = Builder.CreateTruncOrBitCast(Val, IntResTy); 5640 return Builder.CreateBitCast(Val, RealResTy); 5641 } 5642 5643 if ((BuiltinID == AArch64::BI__builtin_arm_strex || 5644 BuiltinID == AArch64::BI__builtin_arm_stlex) && 5645 getContext().getTypeSize(E->getArg(0)->getType()) == 128) { 5646 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex 5647 ? Intrinsic::aarch64_stlxp 5648 : Intrinsic::aarch64_stxp); 5649 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty); 5650 5651 Address Tmp = CreateMemTemp(E->getArg(0)->getType()); 5652 EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true); 5653 5654 Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy)); 5655 llvm::Value *Val = Builder.CreateLoad(Tmp); 5656 5657 Value *Arg0 = Builder.CreateExtractValue(Val, 0); 5658 Value *Arg1 = Builder.CreateExtractValue(Val, 1); 5659 Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), 5660 Int8PtrTy); 5661 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp"); 5662 } 5663 5664 if (BuiltinID == AArch64::BI__builtin_arm_strex || 5665 BuiltinID == AArch64::BI__builtin_arm_stlex) { 5666 Value *StoreVal = EmitScalarExpr(E->getArg(0)); 5667 Value *StoreAddr = EmitScalarExpr(E->getArg(1)); 5668 5669 QualType Ty = E->getArg(0)->getType(); 5670 llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(), 5671 getContext().getTypeSize(Ty)); 5672 StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo()); 5673 5674 if (StoreVal->getType()->isPointerTy()) 5675 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty); 5676 else { 5677 llvm::Type *IntTy = llvm::IntegerType::get( 5678 getLLVMContext(), 5679 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType())); 5680 StoreVal = Builder.CreateBitCast(StoreVal, IntTy); 5681 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty); 5682 } 5683 5684 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex 5685 ? Intrinsic::aarch64_stlxr 5686 : Intrinsic::aarch64_stxr, 5687 StoreAddr->getType()); 5688 return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr"); 5689 } 5690 5691 if (BuiltinID == AArch64::BI__builtin_arm_clrex) { 5692 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex); 5693 return Builder.CreateCall(F); 5694 } 5695 5696 // CRC32 5697 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; 5698 switch (BuiltinID) { 5699 case AArch64::BI__builtin_arm_crc32b: 5700 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break; 5701 case AArch64::BI__builtin_arm_crc32cb: 5702 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break; 5703 case AArch64::BI__builtin_arm_crc32h: 5704 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break; 5705 case AArch64::BI__builtin_arm_crc32ch: 5706 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break; 5707 case AArch64::BI__builtin_arm_crc32w: 5708 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break; 5709 case AArch64::BI__builtin_arm_crc32cw: 5710 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break; 5711 case AArch64::BI__builtin_arm_crc32d: 5712 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break; 5713 case AArch64::BI__builtin_arm_crc32cd: 5714 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break; 5715 } 5716 5717 if (CRCIntrinsicID != Intrinsic::not_intrinsic) { 5718 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 5719 Value *Arg1 = EmitScalarExpr(E->getArg(1)); 5720 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 5721 5722 llvm::Type *DataTy = F->getFunctionType()->getParamType(1); 5723 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy); 5724 5725 return Builder.CreateCall(F, {Arg0, Arg1}); 5726 } 5727 5728 if (BuiltinID == AArch64::BI__builtin_arm_rsr || 5729 BuiltinID == AArch64::BI__builtin_arm_rsr64 || 5730 BuiltinID == AArch64::BI__builtin_arm_rsrp || 5731 BuiltinID == AArch64::BI__builtin_arm_wsr || 5732 BuiltinID == AArch64::BI__builtin_arm_wsr64 || 5733 BuiltinID == AArch64::BI__builtin_arm_wsrp) { 5734 5735 bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr || 5736 BuiltinID == AArch64::BI__builtin_arm_rsr64 || 5737 BuiltinID == AArch64::BI__builtin_arm_rsrp; 5738 5739 bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp || 5740 BuiltinID == AArch64::BI__builtin_arm_wsrp; 5741 5742 bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr && 5743 BuiltinID != AArch64::BI__builtin_arm_wsr; 5744 5745 llvm::Type *ValueType; 5746 llvm::Type *RegisterType = Int64Ty; 5747 if (IsPointerBuiltin) { 5748 ValueType = VoidPtrTy; 5749 } else if (Is64Bit) { 5750 ValueType = Int64Ty; 5751 } else { 5752 ValueType = Int32Ty; 5753 } 5754 5755 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead); 5756 } 5757 5758 // Find out if any arguments are required to be integer constant 5759 // expressions. 5760 unsigned ICEArguments = 0; 5761 ASTContext::GetBuiltinTypeError Error; 5762 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 5763 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 5764 5765 llvm::SmallVector<Value*, 4> Ops; 5766 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) { 5767 if ((ICEArguments & (1 << i)) == 0) { 5768 Ops.push_back(EmitScalarExpr(E->getArg(i))); 5769 } else { 5770 // If this is required to be a constant, constant fold it so that we know 5771 // that the generated intrinsic gets a ConstantInt. 5772 llvm::APSInt Result; 5773 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); 5774 assert(IsConst && "Constant arg isn't actually constant?"); 5775 (void)IsConst; 5776 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); 5777 } 5778 } 5779 5780 auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap); 5781 const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap( 5782 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted); 5783 5784 if (Builtin) { 5785 Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1))); 5786 Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E); 5787 assert(Result && "SISD intrinsic should have been handled"); 5788 return Result; 5789 } 5790 5791 llvm::APSInt Result; 5792 const Expr *Arg = E->getArg(E->getNumArgs()-1); 5793 NeonTypeFlags Type(0); 5794 if (Arg->isIntegerConstantExpr(Result, getContext())) 5795 // Determine the type of this overloaded NEON intrinsic. 5796 Type = NeonTypeFlags(Result.getZExtValue()); 5797 5798 bool usgn = Type.isUnsigned(); 5799 bool quad = Type.isQuad(); 5800 5801 // Handle non-overloaded intrinsics first. 5802 switch (BuiltinID) { 5803 default: break; 5804 case NEON::BI__builtin_neon_vldrq_p128: { 5805 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128); 5806 llvm::Type *Int128PTy = llvm::PointerType::get(Int128Ty, 0); 5807 Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy); 5808 return Builder.CreateAlignedLoad(Int128Ty, Ptr, 5809 CharUnits::fromQuantity(16)); 5810 } 5811 case NEON::BI__builtin_neon_vstrq_p128: { 5812 llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128); 5813 Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy); 5814 return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr); 5815 } 5816 case NEON::BI__builtin_neon_vcvts_u32_f32: 5817 case NEON::BI__builtin_neon_vcvtd_u64_f64: 5818 usgn = true; 5819 // FALL THROUGH 5820 case NEON::BI__builtin_neon_vcvts_s32_f32: 5821 case NEON::BI__builtin_neon_vcvtd_s64_f64: { 5822 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5823 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64; 5824 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty; 5825 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy; 5826 Ops[0] = Builder.CreateBitCast(Ops[0], FTy); 5827 if (usgn) 5828 return Builder.CreateFPToUI(Ops[0], InTy); 5829 return Builder.CreateFPToSI(Ops[0], InTy); 5830 } 5831 case NEON::BI__builtin_neon_vcvts_f32_u32: 5832 case NEON::BI__builtin_neon_vcvtd_f64_u64: 5833 usgn = true; 5834 // FALL THROUGH 5835 case NEON::BI__builtin_neon_vcvts_f32_s32: 5836 case NEON::BI__builtin_neon_vcvtd_f64_s64: { 5837 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5838 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64; 5839 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty; 5840 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy; 5841 Ops[0] = Builder.CreateBitCast(Ops[0], InTy); 5842 if (usgn) 5843 return Builder.CreateUIToFP(Ops[0], FTy); 5844 return Builder.CreateSIToFP(Ops[0], FTy); 5845 } 5846 case NEON::BI__builtin_neon_vpaddd_s64: { 5847 llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2); 5848 Value *Vec = EmitScalarExpr(E->getArg(0)); 5849 // The vector is v2f64, so make sure it's bitcast to that. 5850 Vec = Builder.CreateBitCast(Vec, Ty, "v2i64"); 5851 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 5852 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 5853 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 5854 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 5855 // Pairwise addition of a v2f64 into a scalar f64. 5856 return Builder.CreateAdd(Op0, Op1, "vpaddd"); 5857 } 5858 case NEON::BI__builtin_neon_vpaddd_f64: { 5859 llvm::Type *Ty = 5860 llvm::VectorType::get(DoubleTy, 2); 5861 Value *Vec = EmitScalarExpr(E->getArg(0)); 5862 // The vector is v2f64, so make sure it's bitcast to that. 5863 Vec = Builder.CreateBitCast(Vec, Ty, "v2f64"); 5864 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 5865 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 5866 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 5867 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 5868 // Pairwise addition of a v2f64 into a scalar f64. 5869 return Builder.CreateFAdd(Op0, Op1, "vpaddd"); 5870 } 5871 case NEON::BI__builtin_neon_vpadds_f32: { 5872 llvm::Type *Ty = 5873 llvm::VectorType::get(FloatTy, 2); 5874 Value *Vec = EmitScalarExpr(E->getArg(0)); 5875 // The vector is v2f32, so make sure it's bitcast to that. 5876 Vec = Builder.CreateBitCast(Vec, Ty, "v2f32"); 5877 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 5878 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 5879 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 5880 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 5881 // Pairwise addition of a v2f32 into a scalar f32. 5882 return Builder.CreateFAdd(Op0, Op1, "vpaddd"); 5883 } 5884 case NEON::BI__builtin_neon_vceqzd_s64: 5885 case NEON::BI__builtin_neon_vceqzd_f64: 5886 case NEON::BI__builtin_neon_vceqzs_f32: 5887 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5888 return EmitAArch64CompareBuiltinExpr( 5889 Ops[0], ConvertType(E->getCallReturnType(getContext())), 5890 ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz"); 5891 case NEON::BI__builtin_neon_vcgezd_s64: 5892 case NEON::BI__builtin_neon_vcgezd_f64: 5893 case NEON::BI__builtin_neon_vcgezs_f32: 5894 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5895 return EmitAArch64CompareBuiltinExpr( 5896 Ops[0], ConvertType(E->getCallReturnType(getContext())), 5897 ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez"); 5898 case NEON::BI__builtin_neon_vclezd_s64: 5899 case NEON::BI__builtin_neon_vclezd_f64: 5900 case NEON::BI__builtin_neon_vclezs_f32: 5901 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5902 return EmitAArch64CompareBuiltinExpr( 5903 Ops[0], ConvertType(E->getCallReturnType(getContext())), 5904 ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez"); 5905 case NEON::BI__builtin_neon_vcgtzd_s64: 5906 case NEON::BI__builtin_neon_vcgtzd_f64: 5907 case NEON::BI__builtin_neon_vcgtzs_f32: 5908 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5909 return EmitAArch64CompareBuiltinExpr( 5910 Ops[0], ConvertType(E->getCallReturnType(getContext())), 5911 ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz"); 5912 case NEON::BI__builtin_neon_vcltzd_s64: 5913 case NEON::BI__builtin_neon_vcltzd_f64: 5914 case NEON::BI__builtin_neon_vcltzs_f32: 5915 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5916 return EmitAArch64CompareBuiltinExpr( 5917 Ops[0], ConvertType(E->getCallReturnType(getContext())), 5918 ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz"); 5919 5920 case NEON::BI__builtin_neon_vceqzd_u64: { 5921 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5922 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); 5923 Ops[0] = 5924 Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty)); 5925 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd"); 5926 } 5927 case NEON::BI__builtin_neon_vceqd_f64: 5928 case NEON::BI__builtin_neon_vcled_f64: 5929 case NEON::BI__builtin_neon_vcltd_f64: 5930 case NEON::BI__builtin_neon_vcged_f64: 5931 case NEON::BI__builtin_neon_vcgtd_f64: { 5932 llvm::CmpInst::Predicate P; 5933 switch (BuiltinID) { 5934 default: llvm_unreachable("missing builtin ID in switch!"); 5935 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break; 5936 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break; 5937 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break; 5938 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break; 5939 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break; 5940 } 5941 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5942 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 5943 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); 5944 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); 5945 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd"); 5946 } 5947 case NEON::BI__builtin_neon_vceqs_f32: 5948 case NEON::BI__builtin_neon_vcles_f32: 5949 case NEON::BI__builtin_neon_vclts_f32: 5950 case NEON::BI__builtin_neon_vcges_f32: 5951 case NEON::BI__builtin_neon_vcgts_f32: { 5952 llvm::CmpInst::Predicate P; 5953 switch (BuiltinID) { 5954 default: llvm_unreachable("missing builtin ID in switch!"); 5955 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break; 5956 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break; 5957 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break; 5958 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break; 5959 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break; 5960 } 5961 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5962 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy); 5963 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy); 5964 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); 5965 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd"); 5966 } 5967 case NEON::BI__builtin_neon_vceqd_s64: 5968 case NEON::BI__builtin_neon_vceqd_u64: 5969 case NEON::BI__builtin_neon_vcgtd_s64: 5970 case NEON::BI__builtin_neon_vcgtd_u64: 5971 case NEON::BI__builtin_neon_vcltd_s64: 5972 case NEON::BI__builtin_neon_vcltd_u64: 5973 case NEON::BI__builtin_neon_vcged_u64: 5974 case NEON::BI__builtin_neon_vcged_s64: 5975 case NEON::BI__builtin_neon_vcled_u64: 5976 case NEON::BI__builtin_neon_vcled_s64: { 5977 llvm::CmpInst::Predicate P; 5978 switch (BuiltinID) { 5979 default: llvm_unreachable("missing builtin ID in switch!"); 5980 case NEON::BI__builtin_neon_vceqd_s64: 5981 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break; 5982 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break; 5983 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break; 5984 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break; 5985 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break; 5986 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break; 5987 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break; 5988 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break; 5989 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break; 5990 } 5991 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5992 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); 5993 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 5994 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]); 5995 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd"); 5996 } 5997 case NEON::BI__builtin_neon_vtstd_s64: 5998 case NEON::BI__builtin_neon_vtstd_u64: { 5999 Ops.push_back(EmitScalarExpr(E->getArg(1))); 6000 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); 6001 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 6002 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); 6003 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], 6004 llvm::Constant::getNullValue(Int64Ty)); 6005 return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd"); 6006 } 6007 case NEON::BI__builtin_neon_vset_lane_i8: 6008 case NEON::BI__builtin_neon_vset_lane_i16: 6009 case NEON::BI__builtin_neon_vset_lane_i32: 6010 case NEON::BI__builtin_neon_vset_lane_i64: 6011 case NEON::BI__builtin_neon_vset_lane_f32: 6012 case NEON::BI__builtin_neon_vsetq_lane_i8: 6013 case NEON::BI__builtin_neon_vsetq_lane_i16: 6014 case NEON::BI__builtin_neon_vsetq_lane_i32: 6015 case NEON::BI__builtin_neon_vsetq_lane_i64: 6016 case NEON::BI__builtin_neon_vsetq_lane_f32: 6017 Ops.push_back(EmitScalarExpr(E->getArg(2))); 6018 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 6019 case NEON::BI__builtin_neon_vset_lane_f64: 6020 // The vector type needs a cast for the v1f64 variant. 6021 Ops[1] = Builder.CreateBitCast(Ops[1], 6022 llvm::VectorType::get(DoubleTy, 1)); 6023 Ops.push_back(EmitScalarExpr(E->getArg(2))); 6024 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 6025 case NEON::BI__builtin_neon_vsetq_lane_f64: 6026 // The vector type needs a cast for the v2f64 variant. 6027 Ops[1] = Builder.CreateBitCast(Ops[1], 6028 llvm::VectorType::get(DoubleTy, 2)); 6029 Ops.push_back(EmitScalarExpr(E->getArg(2))); 6030 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 6031 6032 case NEON::BI__builtin_neon_vget_lane_i8: 6033 case NEON::BI__builtin_neon_vdupb_lane_i8: 6034 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8)); 6035 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 6036 "vget_lane"); 6037 case NEON::BI__builtin_neon_vgetq_lane_i8: 6038 case NEON::BI__builtin_neon_vdupb_laneq_i8: 6039 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16)); 6040 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 6041 "vgetq_lane"); 6042 case NEON::BI__builtin_neon_vget_lane_i16: 6043 case NEON::BI__builtin_neon_vduph_lane_i16: 6044 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4)); 6045 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 6046 "vget_lane"); 6047 case NEON::BI__builtin_neon_vgetq_lane_i16: 6048 case NEON::BI__builtin_neon_vduph_laneq_i16: 6049 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8)); 6050 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 6051 "vgetq_lane"); 6052 case NEON::BI__builtin_neon_vget_lane_i32: 6053 case NEON::BI__builtin_neon_vdups_lane_i32: 6054 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2)); 6055 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 6056 "vget_lane"); 6057 case NEON::BI__builtin_neon_vdups_lane_f32: 6058 Ops[0] = Builder.CreateBitCast(Ops[0], 6059 llvm::VectorType::get(FloatTy, 2)); 6060 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 6061 "vdups_lane"); 6062 case NEON::BI__builtin_neon_vgetq_lane_i32: 6063 case NEON::BI__builtin_neon_vdups_laneq_i32: 6064 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4)); 6065 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 6066 "vgetq_lane"); 6067 case NEON::BI__builtin_neon_vget_lane_i64: 6068 case NEON::BI__builtin_neon_vdupd_lane_i64: 6069 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1)); 6070 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 6071 "vget_lane"); 6072 case NEON::BI__builtin_neon_vdupd_lane_f64: 6073 Ops[0] = Builder.CreateBitCast(Ops[0], 6074 llvm::VectorType::get(DoubleTy, 1)); 6075 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 6076 "vdupd_lane"); 6077 case NEON::BI__builtin_neon_vgetq_lane_i64: 6078 case NEON::BI__builtin_neon_vdupd_laneq_i64: 6079 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); 6080 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 6081 "vgetq_lane"); 6082 case NEON::BI__builtin_neon_vget_lane_f32: 6083 Ops[0] = Builder.CreateBitCast(Ops[0], 6084 llvm::VectorType::get(FloatTy, 2)); 6085 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 6086 "vget_lane"); 6087 case NEON::BI__builtin_neon_vget_lane_f64: 6088 Ops[0] = Builder.CreateBitCast(Ops[0], 6089 llvm::VectorType::get(DoubleTy, 1)); 6090 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 6091 "vget_lane"); 6092 case NEON::BI__builtin_neon_vgetq_lane_f32: 6093 case NEON::BI__builtin_neon_vdups_laneq_f32: 6094 Ops[0] = Builder.CreateBitCast(Ops[0], 6095 llvm::VectorType::get(FloatTy, 4)); 6096 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 6097 "vgetq_lane"); 6098 case NEON::BI__builtin_neon_vgetq_lane_f64: 6099 case NEON::BI__builtin_neon_vdupd_laneq_f64: 6100 Ops[0] = Builder.CreateBitCast(Ops[0], 6101 llvm::VectorType::get(DoubleTy, 2)); 6102 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 6103 "vgetq_lane"); 6104 case NEON::BI__builtin_neon_vaddd_s64: 6105 case NEON::BI__builtin_neon_vaddd_u64: 6106 return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd"); 6107 case NEON::BI__builtin_neon_vsubd_s64: 6108 case NEON::BI__builtin_neon_vsubd_u64: 6109 return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd"); 6110 case NEON::BI__builtin_neon_vqdmlalh_s16: 6111 case NEON::BI__builtin_neon_vqdmlslh_s16: { 6112 SmallVector<Value *, 2> ProductOps; 6113 ProductOps.push_back(vectorWrapScalar16(Ops[1])); 6114 ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2)))); 6115 llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4); 6116 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy), 6117 ProductOps, "vqdmlXl"); 6118 Constant *CI = ConstantInt::get(SizeTy, 0); 6119 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0"); 6120 6121 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16 6122 ? Intrinsic::aarch64_neon_sqadd 6123 : Intrinsic::aarch64_neon_sqsub; 6124 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl"); 6125 } 6126 case NEON::BI__builtin_neon_vqshlud_n_s64: { 6127 Ops.push_back(EmitScalarExpr(E->getArg(1))); 6128 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty); 6129 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty), 6130 Ops, "vqshlu_n"); 6131 } 6132 case NEON::BI__builtin_neon_vqshld_n_u64: 6133 case NEON::BI__builtin_neon_vqshld_n_s64: { 6134 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64 6135 ? Intrinsic::aarch64_neon_uqshl 6136 : Intrinsic::aarch64_neon_sqshl; 6137 Ops.push_back(EmitScalarExpr(E->getArg(1))); 6138 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty); 6139 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n"); 6140 } 6141 case NEON::BI__builtin_neon_vrshrd_n_u64: 6142 case NEON::BI__builtin_neon_vrshrd_n_s64: { 6143 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64 6144 ? Intrinsic::aarch64_neon_urshl 6145 : Intrinsic::aarch64_neon_srshl; 6146 Ops.push_back(EmitScalarExpr(E->getArg(1))); 6147 int SV = cast<ConstantInt>(Ops[1])->getSExtValue(); 6148 Ops[1] = ConstantInt::get(Int64Ty, -SV); 6149 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n"); 6150 } 6151 case NEON::BI__builtin_neon_vrsrad_n_u64: 6152 case NEON::BI__builtin_neon_vrsrad_n_s64: { 6153 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64 6154 ? Intrinsic::aarch64_neon_urshl 6155 : Intrinsic::aarch64_neon_srshl; 6156 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 6157 Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2)))); 6158 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty), 6159 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)}); 6160 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty)); 6161 } 6162 case NEON::BI__builtin_neon_vshld_n_s64: 6163 case NEON::BI__builtin_neon_vshld_n_u64: { 6164 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 6165 return Builder.CreateShl( 6166 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n"); 6167 } 6168 case NEON::BI__builtin_neon_vshrd_n_s64: { 6169 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 6170 return Builder.CreateAShr( 6171 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63), 6172 Amt->getZExtValue())), 6173 "shrd_n"); 6174 } 6175 case NEON::BI__builtin_neon_vshrd_n_u64: { 6176 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 6177 uint64_t ShiftAmt = Amt->getZExtValue(); 6178 // Right-shifting an unsigned value by its size yields 0. 6179 if (ShiftAmt == 64) 6180 return ConstantInt::get(Int64Ty, 0); 6181 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt), 6182 "shrd_n"); 6183 } 6184 case NEON::BI__builtin_neon_vsrad_n_s64: { 6185 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2))); 6186 Ops[1] = Builder.CreateAShr( 6187 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63), 6188 Amt->getZExtValue())), 6189 "shrd_n"); 6190 return Builder.CreateAdd(Ops[0], Ops[1]); 6191 } 6192 case NEON::BI__builtin_neon_vsrad_n_u64: { 6193 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2))); 6194 uint64_t ShiftAmt = Amt->getZExtValue(); 6195 // Right-shifting an unsigned value by its size yields 0. 6196 // As Op + 0 = Op, return Ops[0] directly. 6197 if (ShiftAmt == 64) 6198 return Ops[0]; 6199 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt), 6200 "shrd_n"); 6201 return Builder.CreateAdd(Ops[0], Ops[1]); 6202 } 6203 case NEON::BI__builtin_neon_vqdmlalh_lane_s16: 6204 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16: 6205 case NEON::BI__builtin_neon_vqdmlslh_lane_s16: 6206 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: { 6207 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)), 6208 "lane"); 6209 SmallVector<Value *, 2> ProductOps; 6210 ProductOps.push_back(vectorWrapScalar16(Ops[1])); 6211 ProductOps.push_back(vectorWrapScalar16(Ops[2])); 6212 llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4); 6213 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy), 6214 ProductOps, "vqdmlXl"); 6215 Constant *CI = ConstantInt::get(SizeTy, 0); 6216 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0"); 6217 Ops.pop_back(); 6218 6219 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 || 6220 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16) 6221 ? Intrinsic::aarch64_neon_sqadd 6222 : Intrinsic::aarch64_neon_sqsub; 6223 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl"); 6224 } 6225 case NEON::BI__builtin_neon_vqdmlals_s32: 6226 case NEON::BI__builtin_neon_vqdmlsls_s32: { 6227 SmallVector<Value *, 2> ProductOps; 6228 ProductOps.push_back(Ops[1]); 6229 ProductOps.push_back(EmitScalarExpr(E->getArg(2))); 6230 Ops[1] = 6231 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar), 6232 ProductOps, "vqdmlXl"); 6233 6234 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32 6235 ? Intrinsic::aarch64_neon_sqadd 6236 : Intrinsic::aarch64_neon_sqsub; 6237 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl"); 6238 } 6239 case NEON::BI__builtin_neon_vqdmlals_lane_s32: 6240 case NEON::BI__builtin_neon_vqdmlals_laneq_s32: 6241 case NEON::BI__builtin_neon_vqdmlsls_lane_s32: 6242 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: { 6243 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)), 6244 "lane"); 6245 SmallVector<Value *, 2> ProductOps; 6246 ProductOps.push_back(Ops[1]); 6247 ProductOps.push_back(Ops[2]); 6248 Ops[1] = 6249 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar), 6250 ProductOps, "vqdmlXl"); 6251 Ops.pop_back(); 6252 6253 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 || 6254 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32) 6255 ? Intrinsic::aarch64_neon_sqadd 6256 : Intrinsic::aarch64_neon_sqsub; 6257 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl"); 6258 } 6259 } 6260 6261 llvm::VectorType *VTy = GetNeonType(this, Type); 6262 llvm::Type *Ty = VTy; 6263 if (!Ty) 6264 return nullptr; 6265 6266 // Not all intrinsics handled by the common case work for AArch64 yet, so only 6267 // defer to common code if it's been added to our special map. 6268 Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID, 6269 AArch64SIMDIntrinsicsProvenSorted); 6270 6271 if (Builtin) 6272 return EmitCommonNeonBuiltinExpr( 6273 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic, 6274 Builtin->NameHint, Builtin->TypeModifier, E, Ops, 6275 /*never use addresses*/ Address::invalid(), Address::invalid()); 6276 6277 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops)) 6278 return V; 6279 6280 unsigned Int; 6281 switch (BuiltinID) { 6282 default: return nullptr; 6283 case NEON::BI__builtin_neon_vbsl_v: 6284 case NEON::BI__builtin_neon_vbslq_v: { 6285 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy); 6286 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl"); 6287 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl"); 6288 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl"); 6289 6290 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl"); 6291 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl"); 6292 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl"); 6293 return Builder.CreateBitCast(Ops[0], Ty); 6294 } 6295 case NEON::BI__builtin_neon_vfma_lane_v: 6296 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types 6297 // The ARM builtins (and instructions) have the addend as the first 6298 // operand, but the 'fma' intrinsics have it last. Swap it around here. 6299 Value *Addend = Ops[0]; 6300 Value *Multiplicand = Ops[1]; 6301 Value *LaneSource = Ops[2]; 6302 Ops[0] = Multiplicand; 6303 Ops[1] = LaneSource; 6304 Ops[2] = Addend; 6305 6306 // Now adjust things to handle the lane access. 6307 llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ? 6308 llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) : 6309 VTy; 6310 llvm::Constant *cst = cast<Constant>(Ops[3]); 6311 Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst); 6312 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy); 6313 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane"); 6314 6315 Ops.pop_back(); 6316 Int = Intrinsic::fma; 6317 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla"); 6318 } 6319 case NEON::BI__builtin_neon_vfma_laneq_v: { 6320 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 6321 // v1f64 fma should be mapped to Neon scalar f64 fma 6322 if (VTy && VTy->getElementType() == DoubleTy) { 6323 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 6324 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); 6325 llvm::Type *VTy = GetNeonType(this, 6326 NeonTypeFlags(NeonTypeFlags::Float64, false, true)); 6327 Ops[2] = Builder.CreateBitCast(Ops[2], VTy); 6328 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); 6329 Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy); 6330 Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); 6331 return Builder.CreateBitCast(Result, Ty); 6332 } 6333 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 6334 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6335 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6336 6337 llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(), 6338 VTy->getNumElements() * 2); 6339 Ops[2] = Builder.CreateBitCast(Ops[2], STy); 6340 Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), 6341 cast<ConstantInt>(Ops[3])); 6342 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane"); 6343 6344 return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]}); 6345 } 6346 case NEON::BI__builtin_neon_vfmaq_laneq_v: { 6347 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 6348 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6349 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6350 6351 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 6352 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3])); 6353 return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]}); 6354 } 6355 case NEON::BI__builtin_neon_vfmas_lane_f32: 6356 case NEON::BI__builtin_neon_vfmas_laneq_f32: 6357 case NEON::BI__builtin_neon_vfmad_lane_f64: 6358 case NEON::BI__builtin_neon_vfmad_laneq_f64: { 6359 Ops.push_back(EmitScalarExpr(E->getArg(3))); 6360 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext())); 6361 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 6362 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); 6363 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); 6364 } 6365 case NEON::BI__builtin_neon_vmull_v: 6366 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 6367 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull; 6368 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull; 6369 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull"); 6370 case NEON::BI__builtin_neon_vmax_v: 6371 case NEON::BI__builtin_neon_vmaxq_v: 6372 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 6373 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax; 6374 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax; 6375 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax"); 6376 case NEON::BI__builtin_neon_vmin_v: 6377 case NEON::BI__builtin_neon_vminq_v: 6378 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 6379 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin; 6380 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin; 6381 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin"); 6382 case NEON::BI__builtin_neon_vabd_v: 6383 case NEON::BI__builtin_neon_vabdq_v: 6384 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 6385 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd; 6386 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd; 6387 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd"); 6388 case NEON::BI__builtin_neon_vpadal_v: 6389 case NEON::BI__builtin_neon_vpadalq_v: { 6390 unsigned ArgElts = VTy->getNumElements(); 6391 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType()); 6392 unsigned BitWidth = EltTy->getBitWidth(); 6393 llvm::Type *ArgTy = llvm::VectorType::get( 6394 llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts); 6395 llvm::Type* Tys[2] = { VTy, ArgTy }; 6396 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp; 6397 SmallVector<llvm::Value*, 1> TmpOps; 6398 TmpOps.push_back(Ops[1]); 6399 Function *F = CGM.getIntrinsic(Int, Tys); 6400 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal"); 6401 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType()); 6402 return Builder.CreateAdd(tmp, addend); 6403 } 6404 case NEON::BI__builtin_neon_vpmin_v: 6405 case NEON::BI__builtin_neon_vpminq_v: 6406 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 6407 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp; 6408 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp; 6409 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin"); 6410 case NEON::BI__builtin_neon_vpmax_v: 6411 case NEON::BI__builtin_neon_vpmaxq_v: 6412 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 6413 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp; 6414 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp; 6415 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax"); 6416 case NEON::BI__builtin_neon_vminnm_v: 6417 case NEON::BI__builtin_neon_vminnmq_v: 6418 Int = Intrinsic::aarch64_neon_fminnm; 6419 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm"); 6420 case NEON::BI__builtin_neon_vmaxnm_v: 6421 case NEON::BI__builtin_neon_vmaxnmq_v: 6422 Int = Intrinsic::aarch64_neon_fmaxnm; 6423 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm"); 6424 case NEON::BI__builtin_neon_vrecpss_f32: { 6425 Ops.push_back(EmitScalarExpr(E->getArg(1))); 6426 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy), 6427 Ops, "vrecps"); 6428 } 6429 case NEON::BI__builtin_neon_vrecpsd_f64: { 6430 Ops.push_back(EmitScalarExpr(E->getArg(1))); 6431 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy), 6432 Ops, "vrecps"); 6433 } 6434 case NEON::BI__builtin_neon_vqshrun_n_v: 6435 Int = Intrinsic::aarch64_neon_sqshrun; 6436 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n"); 6437 case NEON::BI__builtin_neon_vqrshrun_n_v: 6438 Int = Intrinsic::aarch64_neon_sqrshrun; 6439 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n"); 6440 case NEON::BI__builtin_neon_vqshrn_n_v: 6441 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn; 6442 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n"); 6443 case NEON::BI__builtin_neon_vrshrn_n_v: 6444 Int = Intrinsic::aarch64_neon_rshrn; 6445 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n"); 6446 case NEON::BI__builtin_neon_vqrshrn_n_v: 6447 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn; 6448 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n"); 6449 case NEON::BI__builtin_neon_vrnda_v: 6450 case NEON::BI__builtin_neon_vrndaq_v: { 6451 Int = Intrinsic::round; 6452 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda"); 6453 } 6454 case NEON::BI__builtin_neon_vrndi_v: 6455 case NEON::BI__builtin_neon_vrndiq_v: { 6456 Int = Intrinsic::nearbyint; 6457 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi"); 6458 } 6459 case NEON::BI__builtin_neon_vrndm_v: 6460 case NEON::BI__builtin_neon_vrndmq_v: { 6461 Int = Intrinsic::floor; 6462 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm"); 6463 } 6464 case NEON::BI__builtin_neon_vrndn_v: 6465 case NEON::BI__builtin_neon_vrndnq_v: { 6466 Int = Intrinsic::aarch64_neon_frintn; 6467 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn"); 6468 } 6469 case NEON::BI__builtin_neon_vrndp_v: 6470 case NEON::BI__builtin_neon_vrndpq_v: { 6471 Int = Intrinsic::ceil; 6472 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp"); 6473 } 6474 case NEON::BI__builtin_neon_vrndx_v: 6475 case NEON::BI__builtin_neon_vrndxq_v: { 6476 Int = Intrinsic::rint; 6477 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx"); 6478 } 6479 case NEON::BI__builtin_neon_vrnd_v: 6480 case NEON::BI__builtin_neon_vrndq_v: { 6481 Int = Intrinsic::trunc; 6482 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz"); 6483 } 6484 case NEON::BI__builtin_neon_vceqz_v: 6485 case NEON::BI__builtin_neon_vceqzq_v: 6486 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ, 6487 ICmpInst::ICMP_EQ, "vceqz"); 6488 case NEON::BI__builtin_neon_vcgez_v: 6489 case NEON::BI__builtin_neon_vcgezq_v: 6490 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE, 6491 ICmpInst::ICMP_SGE, "vcgez"); 6492 case NEON::BI__builtin_neon_vclez_v: 6493 case NEON::BI__builtin_neon_vclezq_v: 6494 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE, 6495 ICmpInst::ICMP_SLE, "vclez"); 6496 case NEON::BI__builtin_neon_vcgtz_v: 6497 case NEON::BI__builtin_neon_vcgtzq_v: 6498 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT, 6499 ICmpInst::ICMP_SGT, "vcgtz"); 6500 case NEON::BI__builtin_neon_vcltz_v: 6501 case NEON::BI__builtin_neon_vcltzq_v: 6502 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT, 6503 ICmpInst::ICMP_SLT, "vcltz"); 6504 case NEON::BI__builtin_neon_vcvt_f64_v: 6505 case NEON::BI__builtin_neon_vcvtq_f64_v: 6506 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6507 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad)); 6508 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 6509 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 6510 case NEON::BI__builtin_neon_vcvt_f64_f32: { 6511 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad && 6512 "unexpected vcvt_f64_f32 builtin"); 6513 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false); 6514 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag)); 6515 6516 return Builder.CreateFPExt(Ops[0], Ty, "vcvt"); 6517 } 6518 case NEON::BI__builtin_neon_vcvt_f32_f64: { 6519 assert(Type.getEltType() == NeonTypeFlags::Float32 && 6520 "unexpected vcvt_f32_f64 builtin"); 6521 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true); 6522 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag)); 6523 6524 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt"); 6525 } 6526 case NEON::BI__builtin_neon_vcvt_s32_v: 6527 case NEON::BI__builtin_neon_vcvt_u32_v: 6528 case NEON::BI__builtin_neon_vcvt_s64_v: 6529 case NEON::BI__builtin_neon_vcvt_u64_v: 6530 case NEON::BI__builtin_neon_vcvtq_s32_v: 6531 case NEON::BI__builtin_neon_vcvtq_u32_v: 6532 case NEON::BI__builtin_neon_vcvtq_s64_v: 6533 case NEON::BI__builtin_neon_vcvtq_u64_v: { 6534 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type)); 6535 if (usgn) 6536 return Builder.CreateFPToUI(Ops[0], Ty); 6537 return Builder.CreateFPToSI(Ops[0], Ty); 6538 } 6539 case NEON::BI__builtin_neon_vcvta_s32_v: 6540 case NEON::BI__builtin_neon_vcvtaq_s32_v: 6541 case NEON::BI__builtin_neon_vcvta_u32_v: 6542 case NEON::BI__builtin_neon_vcvtaq_u32_v: 6543 case NEON::BI__builtin_neon_vcvta_s64_v: 6544 case NEON::BI__builtin_neon_vcvtaq_s64_v: 6545 case NEON::BI__builtin_neon_vcvta_u64_v: 6546 case NEON::BI__builtin_neon_vcvtaq_u64_v: { 6547 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas; 6548 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 6549 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta"); 6550 } 6551 case NEON::BI__builtin_neon_vcvtm_s32_v: 6552 case NEON::BI__builtin_neon_vcvtmq_s32_v: 6553 case NEON::BI__builtin_neon_vcvtm_u32_v: 6554 case NEON::BI__builtin_neon_vcvtmq_u32_v: 6555 case NEON::BI__builtin_neon_vcvtm_s64_v: 6556 case NEON::BI__builtin_neon_vcvtmq_s64_v: 6557 case NEON::BI__builtin_neon_vcvtm_u64_v: 6558 case NEON::BI__builtin_neon_vcvtmq_u64_v: { 6559 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms; 6560 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 6561 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm"); 6562 } 6563 case NEON::BI__builtin_neon_vcvtn_s32_v: 6564 case NEON::BI__builtin_neon_vcvtnq_s32_v: 6565 case NEON::BI__builtin_neon_vcvtn_u32_v: 6566 case NEON::BI__builtin_neon_vcvtnq_u32_v: 6567 case NEON::BI__builtin_neon_vcvtn_s64_v: 6568 case NEON::BI__builtin_neon_vcvtnq_s64_v: 6569 case NEON::BI__builtin_neon_vcvtn_u64_v: 6570 case NEON::BI__builtin_neon_vcvtnq_u64_v: { 6571 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns; 6572 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 6573 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn"); 6574 } 6575 case NEON::BI__builtin_neon_vcvtp_s32_v: 6576 case NEON::BI__builtin_neon_vcvtpq_s32_v: 6577 case NEON::BI__builtin_neon_vcvtp_u32_v: 6578 case NEON::BI__builtin_neon_vcvtpq_u32_v: 6579 case NEON::BI__builtin_neon_vcvtp_s64_v: 6580 case NEON::BI__builtin_neon_vcvtpq_s64_v: 6581 case NEON::BI__builtin_neon_vcvtp_u64_v: 6582 case NEON::BI__builtin_neon_vcvtpq_u64_v: { 6583 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps; 6584 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 6585 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp"); 6586 } 6587 case NEON::BI__builtin_neon_vmulx_v: 6588 case NEON::BI__builtin_neon_vmulxq_v: { 6589 Int = Intrinsic::aarch64_neon_fmulx; 6590 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx"); 6591 } 6592 case NEON::BI__builtin_neon_vmul_lane_v: 6593 case NEON::BI__builtin_neon_vmul_laneq_v: { 6594 // v1f64 vmul_lane should be mapped to Neon scalar mul lane 6595 bool Quad = false; 6596 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v) 6597 Quad = true; 6598 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 6599 llvm::Type *VTy = GetNeonType(this, 6600 NeonTypeFlags(NeonTypeFlags::Float64, false, Quad)); 6601 Ops[1] = Builder.CreateBitCast(Ops[1], VTy); 6602 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract"); 6603 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]); 6604 return Builder.CreateBitCast(Result, Ty); 6605 } 6606 case NEON::BI__builtin_neon_vnegd_s64: 6607 return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd"); 6608 case NEON::BI__builtin_neon_vpmaxnm_v: 6609 case NEON::BI__builtin_neon_vpmaxnmq_v: { 6610 Int = Intrinsic::aarch64_neon_fmaxnmp; 6611 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm"); 6612 } 6613 case NEON::BI__builtin_neon_vpminnm_v: 6614 case NEON::BI__builtin_neon_vpminnmq_v: { 6615 Int = Intrinsic::aarch64_neon_fminnmp; 6616 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm"); 6617 } 6618 case NEON::BI__builtin_neon_vsqrt_v: 6619 case NEON::BI__builtin_neon_vsqrtq_v: { 6620 Int = Intrinsic::sqrt; 6621 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6622 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt"); 6623 } 6624 case NEON::BI__builtin_neon_vrbit_v: 6625 case NEON::BI__builtin_neon_vrbitq_v: { 6626 Int = Intrinsic::aarch64_neon_rbit; 6627 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit"); 6628 } 6629 case NEON::BI__builtin_neon_vaddv_u8: 6630 // FIXME: These are handled by the AArch64 scalar code. 6631 usgn = true; 6632 // FALLTHROUGH 6633 case NEON::BI__builtin_neon_vaddv_s8: { 6634 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 6635 Ty = Int32Ty; 6636 VTy = llvm::VectorType::get(Int8Ty, 8); 6637 llvm::Type *Tys[2] = { Ty, VTy }; 6638 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6639 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 6640 return Builder.CreateTrunc(Ops[0], Int8Ty); 6641 } 6642 case NEON::BI__builtin_neon_vaddv_u16: 6643 usgn = true; 6644 // FALLTHROUGH 6645 case NEON::BI__builtin_neon_vaddv_s16: { 6646 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 6647 Ty = Int32Ty; 6648 VTy = llvm::VectorType::get(Int16Ty, 4); 6649 llvm::Type *Tys[2] = { Ty, VTy }; 6650 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6651 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 6652 return Builder.CreateTrunc(Ops[0], Int16Ty); 6653 } 6654 case NEON::BI__builtin_neon_vaddvq_u8: 6655 usgn = true; 6656 // FALLTHROUGH 6657 case NEON::BI__builtin_neon_vaddvq_s8: { 6658 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 6659 Ty = Int32Ty; 6660 VTy = llvm::VectorType::get(Int8Ty, 16); 6661 llvm::Type *Tys[2] = { Ty, VTy }; 6662 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6663 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 6664 return Builder.CreateTrunc(Ops[0], Int8Ty); 6665 } 6666 case NEON::BI__builtin_neon_vaddvq_u16: 6667 usgn = true; 6668 // FALLTHROUGH 6669 case NEON::BI__builtin_neon_vaddvq_s16: { 6670 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 6671 Ty = Int32Ty; 6672 VTy = llvm::VectorType::get(Int16Ty, 8); 6673 llvm::Type *Tys[2] = { Ty, VTy }; 6674 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6675 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 6676 return Builder.CreateTrunc(Ops[0], Int16Ty); 6677 } 6678 case NEON::BI__builtin_neon_vmaxv_u8: { 6679 Int = Intrinsic::aarch64_neon_umaxv; 6680 Ty = Int32Ty; 6681 VTy = llvm::VectorType::get(Int8Ty, 8); 6682 llvm::Type *Tys[2] = { Ty, VTy }; 6683 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6684 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6685 return Builder.CreateTrunc(Ops[0], Int8Ty); 6686 } 6687 case NEON::BI__builtin_neon_vmaxv_u16: { 6688 Int = Intrinsic::aarch64_neon_umaxv; 6689 Ty = Int32Ty; 6690 VTy = llvm::VectorType::get(Int16Ty, 4); 6691 llvm::Type *Tys[2] = { Ty, VTy }; 6692 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6693 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6694 return Builder.CreateTrunc(Ops[0], Int16Ty); 6695 } 6696 case NEON::BI__builtin_neon_vmaxvq_u8: { 6697 Int = Intrinsic::aarch64_neon_umaxv; 6698 Ty = Int32Ty; 6699 VTy = llvm::VectorType::get(Int8Ty, 16); 6700 llvm::Type *Tys[2] = { Ty, VTy }; 6701 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6702 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6703 return Builder.CreateTrunc(Ops[0], Int8Ty); 6704 } 6705 case NEON::BI__builtin_neon_vmaxvq_u16: { 6706 Int = Intrinsic::aarch64_neon_umaxv; 6707 Ty = Int32Ty; 6708 VTy = llvm::VectorType::get(Int16Ty, 8); 6709 llvm::Type *Tys[2] = { Ty, VTy }; 6710 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6711 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6712 return Builder.CreateTrunc(Ops[0], Int16Ty); 6713 } 6714 case NEON::BI__builtin_neon_vmaxv_s8: { 6715 Int = Intrinsic::aarch64_neon_smaxv; 6716 Ty = Int32Ty; 6717 VTy = llvm::VectorType::get(Int8Ty, 8); 6718 llvm::Type *Tys[2] = { Ty, VTy }; 6719 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6720 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6721 return Builder.CreateTrunc(Ops[0], Int8Ty); 6722 } 6723 case NEON::BI__builtin_neon_vmaxv_s16: { 6724 Int = Intrinsic::aarch64_neon_smaxv; 6725 Ty = Int32Ty; 6726 VTy = llvm::VectorType::get(Int16Ty, 4); 6727 llvm::Type *Tys[2] = { Ty, VTy }; 6728 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6729 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6730 return Builder.CreateTrunc(Ops[0], Int16Ty); 6731 } 6732 case NEON::BI__builtin_neon_vmaxvq_s8: { 6733 Int = Intrinsic::aarch64_neon_smaxv; 6734 Ty = Int32Ty; 6735 VTy = llvm::VectorType::get(Int8Ty, 16); 6736 llvm::Type *Tys[2] = { Ty, VTy }; 6737 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6738 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6739 return Builder.CreateTrunc(Ops[0], Int8Ty); 6740 } 6741 case NEON::BI__builtin_neon_vmaxvq_s16: { 6742 Int = Intrinsic::aarch64_neon_smaxv; 6743 Ty = Int32Ty; 6744 VTy = llvm::VectorType::get(Int16Ty, 8); 6745 llvm::Type *Tys[2] = { Ty, VTy }; 6746 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6747 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6748 return Builder.CreateTrunc(Ops[0], Int16Ty); 6749 } 6750 case NEON::BI__builtin_neon_vminv_u8: { 6751 Int = Intrinsic::aarch64_neon_uminv; 6752 Ty = Int32Ty; 6753 VTy = llvm::VectorType::get(Int8Ty, 8); 6754 llvm::Type *Tys[2] = { Ty, VTy }; 6755 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6756 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6757 return Builder.CreateTrunc(Ops[0], Int8Ty); 6758 } 6759 case NEON::BI__builtin_neon_vminv_u16: { 6760 Int = Intrinsic::aarch64_neon_uminv; 6761 Ty = Int32Ty; 6762 VTy = llvm::VectorType::get(Int16Ty, 4); 6763 llvm::Type *Tys[2] = { Ty, VTy }; 6764 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6765 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6766 return Builder.CreateTrunc(Ops[0], Int16Ty); 6767 } 6768 case NEON::BI__builtin_neon_vminvq_u8: { 6769 Int = Intrinsic::aarch64_neon_uminv; 6770 Ty = Int32Ty; 6771 VTy = llvm::VectorType::get(Int8Ty, 16); 6772 llvm::Type *Tys[2] = { Ty, VTy }; 6773 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6774 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6775 return Builder.CreateTrunc(Ops[0], Int8Ty); 6776 } 6777 case NEON::BI__builtin_neon_vminvq_u16: { 6778 Int = Intrinsic::aarch64_neon_uminv; 6779 Ty = Int32Ty; 6780 VTy = llvm::VectorType::get(Int16Ty, 8); 6781 llvm::Type *Tys[2] = { Ty, VTy }; 6782 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6783 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6784 return Builder.CreateTrunc(Ops[0], Int16Ty); 6785 } 6786 case NEON::BI__builtin_neon_vminv_s8: { 6787 Int = Intrinsic::aarch64_neon_sminv; 6788 Ty = Int32Ty; 6789 VTy = llvm::VectorType::get(Int8Ty, 8); 6790 llvm::Type *Tys[2] = { Ty, VTy }; 6791 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6792 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6793 return Builder.CreateTrunc(Ops[0], Int8Ty); 6794 } 6795 case NEON::BI__builtin_neon_vminv_s16: { 6796 Int = Intrinsic::aarch64_neon_sminv; 6797 Ty = Int32Ty; 6798 VTy = llvm::VectorType::get(Int16Ty, 4); 6799 llvm::Type *Tys[2] = { Ty, VTy }; 6800 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6801 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6802 return Builder.CreateTrunc(Ops[0], Int16Ty); 6803 } 6804 case NEON::BI__builtin_neon_vminvq_s8: { 6805 Int = Intrinsic::aarch64_neon_sminv; 6806 Ty = Int32Ty; 6807 VTy = llvm::VectorType::get(Int8Ty, 16); 6808 llvm::Type *Tys[2] = { Ty, VTy }; 6809 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6810 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6811 return Builder.CreateTrunc(Ops[0], Int8Ty); 6812 } 6813 case NEON::BI__builtin_neon_vminvq_s16: { 6814 Int = Intrinsic::aarch64_neon_sminv; 6815 Ty = Int32Ty; 6816 VTy = llvm::VectorType::get(Int16Ty, 8); 6817 llvm::Type *Tys[2] = { Ty, VTy }; 6818 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6819 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6820 return Builder.CreateTrunc(Ops[0], Int16Ty); 6821 } 6822 case NEON::BI__builtin_neon_vmul_n_f64: { 6823 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 6824 Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy); 6825 return Builder.CreateFMul(Ops[0], RHS); 6826 } 6827 case NEON::BI__builtin_neon_vaddlv_u8: { 6828 Int = Intrinsic::aarch64_neon_uaddlv; 6829 Ty = Int32Ty; 6830 VTy = llvm::VectorType::get(Int8Ty, 8); 6831 llvm::Type *Tys[2] = { Ty, VTy }; 6832 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6833 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6834 return Builder.CreateTrunc(Ops[0], Int16Ty); 6835 } 6836 case NEON::BI__builtin_neon_vaddlv_u16: { 6837 Int = Intrinsic::aarch64_neon_uaddlv; 6838 Ty = Int32Ty; 6839 VTy = llvm::VectorType::get(Int16Ty, 4); 6840 llvm::Type *Tys[2] = { Ty, VTy }; 6841 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6842 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6843 } 6844 case NEON::BI__builtin_neon_vaddlvq_u8: { 6845 Int = Intrinsic::aarch64_neon_uaddlv; 6846 Ty = Int32Ty; 6847 VTy = llvm::VectorType::get(Int8Ty, 16); 6848 llvm::Type *Tys[2] = { Ty, VTy }; 6849 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6850 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6851 return Builder.CreateTrunc(Ops[0], Int16Ty); 6852 } 6853 case NEON::BI__builtin_neon_vaddlvq_u16: { 6854 Int = Intrinsic::aarch64_neon_uaddlv; 6855 Ty = Int32Ty; 6856 VTy = llvm::VectorType::get(Int16Ty, 8); 6857 llvm::Type *Tys[2] = { Ty, VTy }; 6858 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6859 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6860 } 6861 case NEON::BI__builtin_neon_vaddlv_s8: { 6862 Int = Intrinsic::aarch64_neon_saddlv; 6863 Ty = Int32Ty; 6864 VTy = llvm::VectorType::get(Int8Ty, 8); 6865 llvm::Type *Tys[2] = { Ty, VTy }; 6866 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6867 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6868 return Builder.CreateTrunc(Ops[0], Int16Ty); 6869 } 6870 case NEON::BI__builtin_neon_vaddlv_s16: { 6871 Int = Intrinsic::aarch64_neon_saddlv; 6872 Ty = Int32Ty; 6873 VTy = llvm::VectorType::get(Int16Ty, 4); 6874 llvm::Type *Tys[2] = { Ty, VTy }; 6875 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6876 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6877 } 6878 case NEON::BI__builtin_neon_vaddlvq_s8: { 6879 Int = Intrinsic::aarch64_neon_saddlv; 6880 Ty = Int32Ty; 6881 VTy = llvm::VectorType::get(Int8Ty, 16); 6882 llvm::Type *Tys[2] = { Ty, VTy }; 6883 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6884 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6885 return Builder.CreateTrunc(Ops[0], Int16Ty); 6886 } 6887 case NEON::BI__builtin_neon_vaddlvq_s16: { 6888 Int = Intrinsic::aarch64_neon_saddlv; 6889 Ty = Int32Ty; 6890 VTy = llvm::VectorType::get(Int16Ty, 8); 6891 llvm::Type *Tys[2] = { Ty, VTy }; 6892 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6893 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6894 } 6895 case NEON::BI__builtin_neon_vsri_n_v: 6896 case NEON::BI__builtin_neon_vsriq_n_v: { 6897 Int = Intrinsic::aarch64_neon_vsri; 6898 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty); 6899 return EmitNeonCall(Intrin, Ops, "vsri_n"); 6900 } 6901 case NEON::BI__builtin_neon_vsli_n_v: 6902 case NEON::BI__builtin_neon_vsliq_n_v: { 6903 Int = Intrinsic::aarch64_neon_vsli; 6904 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty); 6905 return EmitNeonCall(Intrin, Ops, "vsli_n"); 6906 } 6907 case NEON::BI__builtin_neon_vsra_n_v: 6908 case NEON::BI__builtin_neon_vsraq_n_v: 6909 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6910 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n"); 6911 return Builder.CreateAdd(Ops[0], Ops[1]); 6912 case NEON::BI__builtin_neon_vrsra_n_v: 6913 case NEON::BI__builtin_neon_vrsraq_n_v: { 6914 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl; 6915 SmallVector<llvm::Value*,2> TmpOps; 6916 TmpOps.push_back(Ops[1]); 6917 TmpOps.push_back(Ops[2]); 6918 Function* F = CGM.getIntrinsic(Int, Ty); 6919 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true); 6920 Ops[0] = Builder.CreateBitCast(Ops[0], VTy); 6921 return Builder.CreateAdd(Ops[0], tmp); 6922 } 6923 // FIXME: Sharing loads & stores with 32-bit is complicated by the absence 6924 // of an Align parameter here. 6925 case NEON::BI__builtin_neon_vld1_x2_v: 6926 case NEON::BI__builtin_neon_vld1q_x2_v: 6927 case NEON::BI__builtin_neon_vld1_x3_v: 6928 case NEON::BI__builtin_neon_vld1q_x3_v: 6929 case NEON::BI__builtin_neon_vld1_x4_v: 6930 case NEON::BI__builtin_neon_vld1q_x4_v: { 6931 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType()); 6932 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6933 llvm::Type *Tys[2] = { VTy, PTy }; 6934 unsigned Int; 6935 switch (BuiltinID) { 6936 case NEON::BI__builtin_neon_vld1_x2_v: 6937 case NEON::BI__builtin_neon_vld1q_x2_v: 6938 Int = Intrinsic::aarch64_neon_ld1x2; 6939 break; 6940 case NEON::BI__builtin_neon_vld1_x3_v: 6941 case NEON::BI__builtin_neon_vld1q_x3_v: 6942 Int = Intrinsic::aarch64_neon_ld1x3; 6943 break; 6944 case NEON::BI__builtin_neon_vld1_x4_v: 6945 case NEON::BI__builtin_neon_vld1q_x4_v: 6946 Int = Intrinsic::aarch64_neon_ld1x4; 6947 break; 6948 } 6949 Function *F = CGM.getIntrinsic(Int, Tys); 6950 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN"); 6951 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 6952 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6953 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6954 } 6955 case NEON::BI__builtin_neon_vst1_x2_v: 6956 case NEON::BI__builtin_neon_vst1q_x2_v: 6957 case NEON::BI__builtin_neon_vst1_x3_v: 6958 case NEON::BI__builtin_neon_vst1q_x3_v: 6959 case NEON::BI__builtin_neon_vst1_x4_v: 6960 case NEON::BI__builtin_neon_vst1q_x4_v: { 6961 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType()); 6962 llvm::Type *Tys[2] = { VTy, PTy }; 6963 unsigned Int; 6964 switch (BuiltinID) { 6965 case NEON::BI__builtin_neon_vst1_x2_v: 6966 case NEON::BI__builtin_neon_vst1q_x2_v: 6967 Int = Intrinsic::aarch64_neon_st1x2; 6968 break; 6969 case NEON::BI__builtin_neon_vst1_x3_v: 6970 case NEON::BI__builtin_neon_vst1q_x3_v: 6971 Int = Intrinsic::aarch64_neon_st1x3; 6972 break; 6973 case NEON::BI__builtin_neon_vst1_x4_v: 6974 case NEON::BI__builtin_neon_vst1q_x4_v: 6975 Int = Intrinsic::aarch64_neon_st1x4; 6976 break; 6977 } 6978 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end()); 6979 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, ""); 6980 } 6981 case NEON::BI__builtin_neon_vld1_v: 6982 case NEON::BI__builtin_neon_vld1q_v: { 6983 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); 6984 auto Alignment = CharUnits::fromQuantity( 6985 BuiltinID == NEON::BI__builtin_neon_vld1_v ? 8 : 16); 6986 return Builder.CreateAlignedLoad(VTy, Ops[0], Alignment); 6987 } 6988 case NEON::BI__builtin_neon_vst1_v: 6989 case NEON::BI__builtin_neon_vst1q_v: 6990 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); 6991 Ops[1] = Builder.CreateBitCast(Ops[1], VTy); 6992 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6993 case NEON::BI__builtin_neon_vld1_lane_v: 6994 case NEON::BI__builtin_neon_vld1q_lane_v: { 6995 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6996 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 6997 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6998 auto Alignment = CharUnits::fromQuantity( 6999 BuiltinID == NEON::BI__builtin_neon_vld1_lane_v ? 8 : 16); 7000 Ops[0] = 7001 Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment); 7002 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane"); 7003 } 7004 case NEON::BI__builtin_neon_vld1_dup_v: 7005 case NEON::BI__builtin_neon_vld1q_dup_v: { 7006 Value *V = UndefValue::get(Ty); 7007 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 7008 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 7009 auto Alignment = CharUnits::fromQuantity( 7010 BuiltinID == NEON::BI__builtin_neon_vld1_dup_v ? 8 : 16); 7011 Ops[0] = 7012 Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment); 7013 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); 7014 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI); 7015 return EmitNeonSplat(Ops[0], CI); 7016 } 7017 case NEON::BI__builtin_neon_vst1_lane_v: 7018 case NEON::BI__builtin_neon_vst1q_lane_v: 7019 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 7020 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); 7021 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 7022 return Builder.CreateDefaultAlignedStore(Ops[1], 7023 Builder.CreateBitCast(Ops[0], Ty)); 7024 case NEON::BI__builtin_neon_vld2_v: 7025 case NEON::BI__builtin_neon_vld2q_v: { 7026 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 7027 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 7028 llvm::Type *Tys[2] = { VTy, PTy }; 7029 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys); 7030 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); 7031 Ops[0] = Builder.CreateBitCast(Ops[0], 7032 llvm::PointerType::getUnqual(Ops[1]->getType())); 7033 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 7034 } 7035 case NEON::BI__builtin_neon_vld3_v: 7036 case NEON::BI__builtin_neon_vld3q_v: { 7037 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 7038 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 7039 llvm::Type *Tys[2] = { VTy, PTy }; 7040 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys); 7041 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); 7042 Ops[0] = Builder.CreateBitCast(Ops[0], 7043 llvm::PointerType::getUnqual(Ops[1]->getType())); 7044 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 7045 } 7046 case NEON::BI__builtin_neon_vld4_v: 7047 case NEON::BI__builtin_neon_vld4q_v: { 7048 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 7049 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 7050 llvm::Type *Tys[2] = { VTy, PTy }; 7051 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys); 7052 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); 7053 Ops[0] = Builder.CreateBitCast(Ops[0], 7054 llvm::PointerType::getUnqual(Ops[1]->getType())); 7055 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 7056 } 7057 case NEON::BI__builtin_neon_vld2_dup_v: 7058 case NEON::BI__builtin_neon_vld2q_dup_v: { 7059 llvm::Type *PTy = 7060 llvm::PointerType::getUnqual(VTy->getElementType()); 7061 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 7062 llvm::Type *Tys[2] = { VTy, PTy }; 7063 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys); 7064 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); 7065 Ops[0] = Builder.CreateBitCast(Ops[0], 7066 llvm::PointerType::getUnqual(Ops[1]->getType())); 7067 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 7068 } 7069 case NEON::BI__builtin_neon_vld3_dup_v: 7070 case NEON::BI__builtin_neon_vld3q_dup_v: { 7071 llvm::Type *PTy = 7072 llvm::PointerType::getUnqual(VTy->getElementType()); 7073 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 7074 llvm::Type *Tys[2] = { VTy, PTy }; 7075 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys); 7076 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); 7077 Ops[0] = Builder.CreateBitCast(Ops[0], 7078 llvm::PointerType::getUnqual(Ops[1]->getType())); 7079 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 7080 } 7081 case NEON::BI__builtin_neon_vld4_dup_v: 7082 case NEON::BI__builtin_neon_vld4q_dup_v: { 7083 llvm::Type *PTy = 7084 llvm::PointerType::getUnqual(VTy->getElementType()); 7085 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 7086 llvm::Type *Tys[2] = { VTy, PTy }; 7087 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys); 7088 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); 7089 Ops[0] = Builder.CreateBitCast(Ops[0], 7090 llvm::PointerType::getUnqual(Ops[1]->getType())); 7091 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 7092 } 7093 case NEON::BI__builtin_neon_vld2_lane_v: 7094 case NEON::BI__builtin_neon_vld2q_lane_v: { 7095 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 7096 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys); 7097 Ops.push_back(Ops[1]); 7098 Ops.erase(Ops.begin()+1); 7099 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 7100 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 7101 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty); 7102 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane"); 7103 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 7104 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 7105 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 7106 } 7107 case NEON::BI__builtin_neon_vld3_lane_v: 7108 case NEON::BI__builtin_neon_vld3q_lane_v: { 7109 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 7110 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys); 7111 Ops.push_back(Ops[1]); 7112 Ops.erase(Ops.begin()+1); 7113 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 7114 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 7115 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 7116 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty); 7117 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane"); 7118 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 7119 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 7120 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 7121 } 7122 case NEON::BI__builtin_neon_vld4_lane_v: 7123 case NEON::BI__builtin_neon_vld4q_lane_v: { 7124 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 7125 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys); 7126 Ops.push_back(Ops[1]); 7127 Ops.erase(Ops.begin()+1); 7128 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 7129 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 7130 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 7131 Ops[4] = Builder.CreateBitCast(Ops[4], Ty); 7132 Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty); 7133 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane"); 7134 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 7135 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 7136 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 7137 } 7138 case NEON::BI__builtin_neon_vst2_v: 7139 case NEON::BI__builtin_neon_vst2q_v: { 7140 Ops.push_back(Ops[0]); 7141 Ops.erase(Ops.begin()); 7142 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() }; 7143 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys), 7144 Ops, ""); 7145 } 7146 case NEON::BI__builtin_neon_vst2_lane_v: 7147 case NEON::BI__builtin_neon_vst2q_lane_v: { 7148 Ops.push_back(Ops[0]); 7149 Ops.erase(Ops.begin()); 7150 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty); 7151 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() }; 7152 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys), 7153 Ops, ""); 7154 } 7155 case NEON::BI__builtin_neon_vst3_v: 7156 case NEON::BI__builtin_neon_vst3q_v: { 7157 Ops.push_back(Ops[0]); 7158 Ops.erase(Ops.begin()); 7159 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() }; 7160 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys), 7161 Ops, ""); 7162 } 7163 case NEON::BI__builtin_neon_vst3_lane_v: 7164 case NEON::BI__builtin_neon_vst3q_lane_v: { 7165 Ops.push_back(Ops[0]); 7166 Ops.erase(Ops.begin()); 7167 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty); 7168 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() }; 7169 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys), 7170 Ops, ""); 7171 } 7172 case NEON::BI__builtin_neon_vst4_v: 7173 case NEON::BI__builtin_neon_vst4q_v: { 7174 Ops.push_back(Ops[0]); 7175 Ops.erase(Ops.begin()); 7176 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() }; 7177 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys), 7178 Ops, ""); 7179 } 7180 case NEON::BI__builtin_neon_vst4_lane_v: 7181 case NEON::BI__builtin_neon_vst4q_lane_v: { 7182 Ops.push_back(Ops[0]); 7183 Ops.erase(Ops.begin()); 7184 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty); 7185 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() }; 7186 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys), 7187 Ops, ""); 7188 } 7189 case NEON::BI__builtin_neon_vtrn_v: 7190 case NEON::BI__builtin_neon_vtrnq_v: { 7191 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 7192 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 7193 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 7194 Value *SV = nullptr; 7195 7196 for (unsigned vi = 0; vi != 2; ++vi) { 7197 SmallVector<uint32_t, 16> Indices; 7198 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 7199 Indices.push_back(i+vi); 7200 Indices.push_back(i+e+vi); 7201 } 7202 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 7203 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn"); 7204 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 7205 } 7206 return SV; 7207 } 7208 case NEON::BI__builtin_neon_vuzp_v: 7209 case NEON::BI__builtin_neon_vuzpq_v: { 7210 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 7211 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 7212 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 7213 Value *SV = nullptr; 7214 7215 for (unsigned vi = 0; vi != 2; ++vi) { 7216 SmallVector<uint32_t, 16> Indices; 7217 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 7218 Indices.push_back(2*i+vi); 7219 7220 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 7221 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp"); 7222 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 7223 } 7224 return SV; 7225 } 7226 case NEON::BI__builtin_neon_vzip_v: 7227 case NEON::BI__builtin_neon_vzipq_v: { 7228 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 7229 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 7230 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 7231 Value *SV = nullptr; 7232 7233 for (unsigned vi = 0; vi != 2; ++vi) { 7234 SmallVector<uint32_t, 16> Indices; 7235 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 7236 Indices.push_back((i + vi*e) >> 1); 7237 Indices.push_back(((i + vi*e) >> 1)+e); 7238 } 7239 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 7240 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip"); 7241 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 7242 } 7243 return SV; 7244 } 7245 case NEON::BI__builtin_neon_vqtbl1q_v: { 7246 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty), 7247 Ops, "vtbl1"); 7248 } 7249 case NEON::BI__builtin_neon_vqtbl2q_v: { 7250 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty), 7251 Ops, "vtbl2"); 7252 } 7253 case NEON::BI__builtin_neon_vqtbl3q_v: { 7254 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty), 7255 Ops, "vtbl3"); 7256 } 7257 case NEON::BI__builtin_neon_vqtbl4q_v: { 7258 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty), 7259 Ops, "vtbl4"); 7260 } 7261 case NEON::BI__builtin_neon_vqtbx1q_v: { 7262 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty), 7263 Ops, "vtbx1"); 7264 } 7265 case NEON::BI__builtin_neon_vqtbx2q_v: { 7266 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty), 7267 Ops, "vtbx2"); 7268 } 7269 case NEON::BI__builtin_neon_vqtbx3q_v: { 7270 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty), 7271 Ops, "vtbx3"); 7272 } 7273 case NEON::BI__builtin_neon_vqtbx4q_v: { 7274 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty), 7275 Ops, "vtbx4"); 7276 } 7277 case NEON::BI__builtin_neon_vsqadd_v: 7278 case NEON::BI__builtin_neon_vsqaddq_v: { 7279 Int = Intrinsic::aarch64_neon_usqadd; 7280 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd"); 7281 } 7282 case NEON::BI__builtin_neon_vuqadd_v: 7283 case NEON::BI__builtin_neon_vuqaddq_v: { 7284 Int = Intrinsic::aarch64_neon_suqadd; 7285 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd"); 7286 } 7287 } 7288 } 7289 7290 llvm::Value *CodeGenFunction:: 7291 BuildVector(ArrayRef<llvm::Value*> Ops) { 7292 assert((Ops.size() & (Ops.size() - 1)) == 0 && 7293 "Not a power-of-two sized vector!"); 7294 bool AllConstants = true; 7295 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i) 7296 AllConstants &= isa<Constant>(Ops[i]); 7297 7298 // If this is a constant vector, create a ConstantVector. 7299 if (AllConstants) { 7300 SmallVector<llvm::Constant*, 16> CstOps; 7301 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 7302 CstOps.push_back(cast<Constant>(Ops[i])); 7303 return llvm::ConstantVector::get(CstOps); 7304 } 7305 7306 // Otherwise, insertelement the values to build the vector. 7307 Value *Result = 7308 llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size())); 7309 7310 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 7311 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i)); 7312 7313 return Result; 7314 } 7315 7316 // Convert the mask from an integer type to a vector of i1. 7317 static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask, 7318 unsigned NumElts) { 7319 7320 llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(), 7321 cast<IntegerType>(Mask->getType())->getBitWidth()); 7322 Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy); 7323 7324 // If we have less than 8 elements, then the starting mask was an i8 and 7325 // we need to extract down to the right number of elements. 7326 if (NumElts < 8) { 7327 uint32_t Indices[4]; 7328 for (unsigned i = 0; i != NumElts; ++i) 7329 Indices[i] = i; 7330 MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec, 7331 makeArrayRef(Indices, NumElts), 7332 "extract"); 7333 } 7334 return MaskVec; 7335 } 7336 7337 static Value *EmitX86MaskedStore(CodeGenFunction &CGF, 7338 SmallVectorImpl<Value *> &Ops, 7339 unsigned Align) { 7340 // Cast the pointer to right type. 7341 Ops[0] = CGF.Builder.CreateBitCast(Ops[0], 7342 llvm::PointerType::getUnqual(Ops[1]->getType())); 7343 7344 // If the mask is all ones just emit a regular store. 7345 if (const auto *C = dyn_cast<Constant>(Ops[2])) 7346 if (C->isAllOnesValue()) 7347 return CGF.Builder.CreateAlignedStore(Ops[1], Ops[0], Align); 7348 7349 Value *MaskVec = getMaskVecValue(CGF, Ops[2], 7350 Ops[1]->getType()->getVectorNumElements()); 7351 7352 return CGF.Builder.CreateMaskedStore(Ops[1], Ops[0], Align, MaskVec); 7353 } 7354 7355 static Value *EmitX86MaskedLoad(CodeGenFunction &CGF, 7356 SmallVectorImpl<Value *> &Ops, unsigned Align) { 7357 // Cast the pointer to right type. 7358 Ops[0] = CGF.Builder.CreateBitCast(Ops[0], 7359 llvm::PointerType::getUnqual(Ops[1]->getType())); 7360 7361 // If the mask is all ones just emit a regular store. 7362 if (const auto *C = dyn_cast<Constant>(Ops[2])) 7363 if (C->isAllOnesValue()) 7364 return CGF.Builder.CreateAlignedLoad(Ops[0], Align); 7365 7366 Value *MaskVec = getMaskVecValue(CGF, Ops[2], 7367 Ops[1]->getType()->getVectorNumElements()); 7368 7369 return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]); 7370 } 7371 7372 static Value *EmitX86SubVectorBroadcast(CodeGenFunction &CGF, 7373 SmallVectorImpl<Value *> &Ops, 7374 llvm::Type *DstTy, 7375 unsigned SrcSizeInBits, 7376 unsigned Align) { 7377 // Load the subvector. 7378 Ops[0] = CGF.Builder.CreateAlignedLoad(Ops[0], Align); 7379 7380 // Create broadcast mask. 7381 unsigned NumDstElts = DstTy->getVectorNumElements(); 7382 unsigned NumSrcElts = SrcSizeInBits / DstTy->getScalarSizeInBits(); 7383 7384 SmallVector<uint32_t, 8> Mask; 7385 for (unsigned i = 0; i != NumDstElts; i += NumSrcElts) 7386 for (unsigned j = 0; j != NumSrcElts; ++j) 7387 Mask.push_back(j); 7388 7389 return CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], Mask, "subvecbcst"); 7390 } 7391 7392 static Value *EmitX86Select(CodeGenFunction &CGF, 7393 Value *Mask, Value *Op0, Value *Op1) { 7394 7395 // If the mask is all ones just return first argument. 7396 if (const auto *C = dyn_cast<Constant>(Mask)) 7397 if (C->isAllOnesValue()) 7398 return Op0; 7399 7400 Mask = getMaskVecValue(CGF, Mask, Op0->getType()->getVectorNumElements()); 7401 7402 return CGF.Builder.CreateSelect(Mask, Op0, Op1); 7403 } 7404 7405 static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, 7406 bool Signed, SmallVectorImpl<Value *> &Ops) { 7407 unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); 7408 Value *Cmp; 7409 7410 if (CC == 3) { 7411 Cmp = Constant::getNullValue( 7412 llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts)); 7413 } else if (CC == 7) { 7414 Cmp = Constant::getAllOnesValue( 7415 llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts)); 7416 } else { 7417 ICmpInst::Predicate Pred; 7418 switch (CC) { 7419 default: llvm_unreachable("Unknown condition code"); 7420 case 0: Pred = ICmpInst::ICMP_EQ; break; 7421 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break; 7422 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break; 7423 case 4: Pred = ICmpInst::ICMP_NE; break; 7424 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break; 7425 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break; 7426 } 7427 Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]); 7428 } 7429 7430 const auto *C = dyn_cast<Constant>(Ops.back()); 7431 if (!C || !C->isAllOnesValue()) 7432 Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops.back(), NumElts)); 7433 7434 if (NumElts < 8) { 7435 uint32_t Indices[8]; 7436 for (unsigned i = 0; i != NumElts; ++i) 7437 Indices[i] = i; 7438 for (unsigned i = NumElts; i != 8; ++i) 7439 Indices[i] = i % NumElts + NumElts; 7440 Cmp = CGF.Builder.CreateShuffleVector( 7441 Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices); 7442 } 7443 return CGF.Builder.CreateBitCast(Cmp, 7444 IntegerType::get(CGF.getLLVMContext(), 7445 std::max(NumElts, 8U))); 7446 } 7447 7448 static Value *EmitX86Abs(CodeGenFunction &CGF, ArrayRef<Value *> Ops) { 7449 7450 llvm::Type *Ty = Ops[0]->getType(); 7451 Value *Zero = llvm::Constant::getNullValue(Ty); 7452 Value *Sub = CGF.Builder.CreateSub(Zero, Ops[0]); 7453 Value *Cmp = CGF.Builder.CreateICmp(ICmpInst::ICMP_SGT, Ops[0], Zero); 7454 Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Sub); 7455 if (Ops.size() == 1) 7456 return Res; 7457 return EmitX86Select(CGF, Ops[2], Res, Ops[1]); 7458 } 7459 7460 static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred, 7461 ArrayRef<Value *> Ops) { 7462 Value *Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]); 7463 Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Ops[1]); 7464 7465 if (Ops.size() == 2) 7466 return Res; 7467 7468 assert(Ops.size() == 4); 7469 return EmitX86Select(CGF, Ops[3], Res, Ops[2]); 7470 } 7471 7472 static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, 7473 llvm::Type *DstTy) { 7474 unsigned NumberOfElements = DstTy->getVectorNumElements(); 7475 Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements); 7476 return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2"); 7477 } 7478 7479 Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) { 7480 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts(); 7481 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString(); 7482 return EmitX86CpuIs(CPUStr); 7483 } 7484 7485 Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) { 7486 7487 // This enum contains the vendor, type, and subtype enums from the 7488 // runtime library concatenated together. The _START labels mark 7489 // the start and are used to adjust the value into the correct 7490 // encoding space. 7491 enum X86CPUs { 7492 INTEL = 1, 7493 AMD, 7494 CPU_TYPE_START, 7495 INTEL_BONNELL, 7496 INTEL_CORE2, 7497 INTEL_COREI7, 7498 AMDFAM10H, 7499 AMDFAM15H, 7500 INTEL_SILVERMONT, 7501 INTEL_KNL, 7502 AMD_BTVER1, 7503 AMD_BTVER2, 7504 CPU_SUBTYPE_START, 7505 INTEL_COREI7_NEHALEM, 7506 INTEL_COREI7_WESTMERE, 7507 INTEL_COREI7_SANDYBRIDGE, 7508 AMDFAM10H_BARCELONA, 7509 AMDFAM10H_SHANGHAI, 7510 AMDFAM10H_ISTANBUL, 7511 AMDFAM15H_BDVER1, 7512 AMDFAM15H_BDVER2, 7513 AMDFAM15H_BDVER3, 7514 AMDFAM15H_BDVER4, 7515 AMDFAM17H_ZNVER1, 7516 INTEL_COREI7_IVYBRIDGE, 7517 INTEL_COREI7_HASWELL, 7518 INTEL_COREI7_BROADWELL, 7519 INTEL_COREI7_SKYLAKE, 7520 INTEL_COREI7_SKYLAKE_AVX512, 7521 }; 7522 7523 X86CPUs CPU = 7524 StringSwitch<X86CPUs>(CPUStr) 7525 .Case("amd", AMD) 7526 .Case("amdfam10h", AMDFAM10H) 7527 .Case("amdfam10", AMDFAM10H) 7528 .Case("amdfam15h", AMDFAM15H) 7529 .Case("amdfam15", AMDFAM15H) 7530 .Case("atom", INTEL_BONNELL) 7531 .Case("barcelona", AMDFAM10H_BARCELONA) 7532 .Case("bdver1", AMDFAM15H_BDVER1) 7533 .Case("bdver2", AMDFAM15H_BDVER2) 7534 .Case("bdver3", AMDFAM15H_BDVER3) 7535 .Case("bdver4", AMDFAM15H_BDVER4) 7536 .Case("bonnell", INTEL_BONNELL) 7537 .Case("broadwell", INTEL_COREI7_BROADWELL) 7538 .Case("btver1", AMD_BTVER1) 7539 .Case("btver2", AMD_BTVER2) 7540 .Case("core2", INTEL_CORE2) 7541 .Case("corei7", INTEL_COREI7) 7542 .Case("haswell", INTEL_COREI7_HASWELL) 7543 .Case("intel", INTEL) 7544 .Case("istanbul", AMDFAM10H_ISTANBUL) 7545 .Case("ivybridge", INTEL_COREI7_IVYBRIDGE) 7546 .Case("knl", INTEL_KNL) 7547 .Case("nehalem", INTEL_COREI7_NEHALEM) 7548 .Case("sandybridge", INTEL_COREI7_SANDYBRIDGE) 7549 .Case("shanghai", AMDFAM10H_SHANGHAI) 7550 .Case("silvermont", INTEL_SILVERMONT) 7551 .Case("skylake", INTEL_COREI7_SKYLAKE) 7552 .Case("skylake-avx512", INTEL_COREI7_SKYLAKE_AVX512) 7553 .Case("slm", INTEL_SILVERMONT) 7554 .Case("westmere", INTEL_COREI7_WESTMERE) 7555 .Case("znver1", AMDFAM17H_ZNVER1); 7556 7557 llvm::Type *Int32Ty = Builder.getInt32Ty(); 7558 7559 // Matching the struct layout from the compiler-rt/libgcc structure that is 7560 // filled in: 7561 // unsigned int __cpu_vendor; 7562 // unsigned int __cpu_type; 7563 // unsigned int __cpu_subtype; 7564 // unsigned int __cpu_features[1]; 7565 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, 7566 llvm::ArrayType::get(Int32Ty, 1)); 7567 7568 // Grab the global __cpu_model. 7569 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model"); 7570 7571 // Calculate the index needed to access the correct field based on the 7572 // range. Also adjust the expected value. 7573 unsigned Index; 7574 unsigned Value; 7575 if (CPU > CPU_SUBTYPE_START) { 7576 Index = 2; 7577 Value = CPU - CPU_SUBTYPE_START; 7578 } else if (CPU > CPU_TYPE_START) { 7579 Index = 1; 7580 Value = CPU - CPU_TYPE_START; 7581 } else { 7582 Index = 0; 7583 Value = CPU; 7584 } 7585 7586 // Grab the appropriate field from __cpu_model. 7587 llvm::Value *Idxs[] = { 7588 ConstantInt::get(Int32Ty, 0), 7589 ConstantInt::get(Int32Ty, Index) 7590 }; 7591 llvm::Value *CpuValue = Builder.CreateGEP(STy, CpuModel, Idxs); 7592 CpuValue = Builder.CreateAlignedLoad(CpuValue, CharUnits::fromQuantity(4)); 7593 7594 // Check the value of the field against the requested value. 7595 return Builder.CreateICmpEQ(CpuValue, 7596 llvm::ConstantInt::get(Int32Ty, Value)); 7597 } 7598 7599 Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) { 7600 const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts(); 7601 StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString(); 7602 return EmitX86CpuSupports(FeatureStr); 7603 } 7604 7605 Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) { 7606 // TODO: When/if this becomes more than x86 specific then use a TargetInfo 7607 // based mapping. 7608 // Processor features and mapping to processor feature value. 7609 enum X86Features { 7610 CMOV = 0, 7611 MMX, 7612 POPCNT, 7613 SSE, 7614 SSE2, 7615 SSE3, 7616 SSSE3, 7617 SSE4_1, 7618 SSE4_2, 7619 AVX, 7620 AVX2, 7621 SSE4_A, 7622 FMA4, 7623 XOP, 7624 FMA, 7625 AVX512F, 7626 BMI, 7627 BMI2, 7628 AES, 7629 PCLMUL, 7630 AVX512VL, 7631 AVX512BW, 7632 AVX512DQ, 7633 AVX512CD, 7634 AVX512ER, 7635 AVX512PF, 7636 AVX512VBMI, 7637 AVX512IFMA, 7638 AVX5124VNNIW, 7639 AVX5124FMAPS, 7640 AVX512VPOPCNTDQ, 7641 MAX 7642 }; 7643 7644 uint32_t FeaturesMask = 0; 7645 7646 for (const StringRef &FeatureStr : FeatureStrs) { 7647 X86Features Feature = 7648 StringSwitch<X86Features>(FeatureStr) 7649 .Case("cmov", X86Features::CMOV) 7650 .Case("mmx", X86Features::MMX) 7651 .Case("popcnt", X86Features::POPCNT) 7652 .Case("sse", X86Features::SSE) 7653 .Case("sse2", X86Features::SSE2) 7654 .Case("sse3", X86Features::SSE3) 7655 .Case("ssse3", X86Features::SSSE3) 7656 .Case("sse4.1", X86Features::SSE4_1) 7657 .Case("sse4.2", X86Features::SSE4_2) 7658 .Case("avx", X86Features::AVX) 7659 .Case("avx2", X86Features::AVX2) 7660 .Case("sse4a", X86Features::SSE4_A) 7661 .Case("fma4", X86Features::FMA4) 7662 .Case("xop", X86Features::XOP) 7663 .Case("fma", X86Features::FMA) 7664 .Case("avx512f", X86Features::AVX512F) 7665 .Case("bmi", X86Features::BMI) 7666 .Case("bmi2", X86Features::BMI2) 7667 .Case("aes", X86Features::AES) 7668 .Case("pclmul", X86Features::PCLMUL) 7669 .Case("avx512vl", X86Features::AVX512VL) 7670 .Case("avx512bw", X86Features::AVX512BW) 7671 .Case("avx512dq", X86Features::AVX512DQ) 7672 .Case("avx512cd", X86Features::AVX512CD) 7673 .Case("avx512er", X86Features::AVX512ER) 7674 .Case("avx512pf", X86Features::AVX512PF) 7675 .Case("avx512vbmi", X86Features::AVX512VBMI) 7676 .Case("avx512ifma", X86Features::AVX512IFMA) 7677 .Case("avx5124vnniw", X86Features::AVX5124VNNIW) 7678 .Case("avx5124fmaps", X86Features::AVX5124FMAPS) 7679 .Case("avx512vpopcntdq", X86Features::AVX512VPOPCNTDQ) 7680 .Default(X86Features::MAX); 7681 assert(Feature != X86Features::MAX && "Invalid feature!"); 7682 FeaturesMask |= (1U << Feature); 7683 } 7684 7685 // Matching the struct layout from the compiler-rt/libgcc structure that is 7686 // filled in: 7687 // unsigned int __cpu_vendor; 7688 // unsigned int __cpu_type; 7689 // unsigned int __cpu_subtype; 7690 // unsigned int __cpu_features[1]; 7691 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, 7692 llvm::ArrayType::get(Int32Ty, 1)); 7693 7694 // Grab the global __cpu_model. 7695 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model"); 7696 7697 // Grab the first (0th) element from the field __cpu_features off of the 7698 // global in the struct STy. 7699 Value *Idxs[] = {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 3), 7700 ConstantInt::get(Int32Ty, 0)}; 7701 Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs); 7702 Value *Features = 7703 Builder.CreateAlignedLoad(CpuFeatures, CharUnits::fromQuantity(4)); 7704 7705 // Check the value of the bit corresponding to the feature requested. 7706 Value *Bitset = Builder.CreateAnd( 7707 Features, llvm::ConstantInt::get(Int32Ty, FeaturesMask)); 7708 return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0)); 7709 } 7710 7711 Value *CodeGenFunction::EmitX86CpuInit() { 7712 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, 7713 /*Variadic*/ false); 7714 llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init"); 7715 return Builder.CreateCall(Func); 7716 } 7717 7718 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, 7719 const CallExpr *E) { 7720 if (BuiltinID == X86::BI__builtin_cpu_is) 7721 return EmitX86CpuIs(E); 7722 if (BuiltinID == X86::BI__builtin_cpu_supports) 7723 return EmitX86CpuSupports(E); 7724 if (BuiltinID == X86::BI__builtin_cpu_init) 7725 return EmitX86CpuInit(); 7726 7727 SmallVector<Value*, 4> Ops; 7728 7729 // Find out if any arguments are required to be integer constant expressions. 7730 unsigned ICEArguments = 0; 7731 ASTContext::GetBuiltinTypeError Error; 7732 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 7733 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 7734 7735 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { 7736 // If this is a normal argument, just emit it as a scalar. 7737 if ((ICEArguments & (1 << i)) == 0) { 7738 Ops.push_back(EmitScalarExpr(E->getArg(i))); 7739 continue; 7740 } 7741 7742 // If this is required to be a constant, constant fold it so that we know 7743 // that the generated intrinsic gets a ConstantInt. 7744 llvm::APSInt Result; 7745 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); 7746 assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst; 7747 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); 7748 } 7749 7750 // These exist so that the builtin that takes an immediate can be bounds 7751 // checked by clang to avoid passing bad immediates to the backend. Since 7752 // AVX has a larger immediate than SSE we would need separate builtins to 7753 // do the different bounds checking. Rather than create a clang specific 7754 // SSE only builtin, this implements eight separate builtins to match gcc 7755 // implementation. 7756 auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) { 7757 Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm)); 7758 llvm::Function *F = CGM.getIntrinsic(ID); 7759 return Builder.CreateCall(F, Ops); 7760 }; 7761 7762 // For the vector forms of FP comparisons, translate the builtins directly to 7763 // IR. 7764 // TODO: The builtins could be removed if the SSE header files used vector 7765 // extension comparisons directly (vector ordered/unordered may need 7766 // additional support via __builtin_isnan()). 7767 auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred) { 7768 Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]); 7769 llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType()); 7770 llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy); 7771 Value *Sext = Builder.CreateSExt(Cmp, IntVecTy); 7772 return Builder.CreateBitCast(Sext, FPVecTy); 7773 }; 7774 7775 switch (BuiltinID) { 7776 default: return nullptr; 7777 case X86::BI_mm_prefetch: { 7778 Value *Address = Ops[0]; 7779 Value *RW = ConstantInt::get(Int32Ty, 0); 7780 Value *Locality = Ops[1]; 7781 Value *Data = ConstantInt::get(Int32Ty, 1); 7782 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 7783 return Builder.CreateCall(F, {Address, RW, Locality, Data}); 7784 } 7785 case X86::BI_mm_clflush: { 7786 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush), 7787 Ops[0]); 7788 } 7789 case X86::BI_mm_lfence: { 7790 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence)); 7791 } 7792 case X86::BI_mm_mfence: { 7793 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence)); 7794 } 7795 case X86::BI_mm_sfence: { 7796 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence)); 7797 } 7798 case X86::BI_mm_pause: { 7799 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause)); 7800 } 7801 case X86::BI__rdtsc: { 7802 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc)); 7803 } 7804 case X86::BI__builtin_ia32_undef128: 7805 case X86::BI__builtin_ia32_undef256: 7806 case X86::BI__builtin_ia32_undef512: 7807 // The x86 definition of "undef" is not the same as the LLVM definition 7808 // (PR32176). We leave optimizing away an unnecessary zero constant to the 7809 // IR optimizer and backend. 7810 // TODO: If we had a "freeze" IR instruction to generate a fixed undef 7811 // value, we should use that here instead of a zero. 7812 return llvm::Constant::getNullValue(ConvertType(E->getType())); 7813 case X86::BI__builtin_ia32_vec_init_v8qi: 7814 case X86::BI__builtin_ia32_vec_init_v4hi: 7815 case X86::BI__builtin_ia32_vec_init_v2si: 7816 return Builder.CreateBitCast(BuildVector(Ops), 7817 llvm::Type::getX86_MMXTy(getLLVMContext())); 7818 case X86::BI__builtin_ia32_vec_ext_v2si: 7819 return Builder.CreateExtractElement(Ops[0], 7820 llvm::ConstantInt::get(Ops[1]->getType(), 0)); 7821 case X86::BI_mm_setcsr: 7822 case X86::BI__builtin_ia32_ldmxcsr: { 7823 Address Tmp = CreateMemTemp(E->getArg(0)->getType()); 7824 Builder.CreateStore(Ops[0], Tmp); 7825 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr), 7826 Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy)); 7827 } 7828 case X86::BI_mm_getcsr: 7829 case X86::BI__builtin_ia32_stmxcsr: { 7830 Address Tmp = CreateMemTemp(E->getType()); 7831 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr), 7832 Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy)); 7833 return Builder.CreateLoad(Tmp, "stmxcsr"); 7834 } 7835 case X86::BI__builtin_ia32_xsave: 7836 case X86::BI__builtin_ia32_xsave64: 7837 case X86::BI__builtin_ia32_xrstor: 7838 case X86::BI__builtin_ia32_xrstor64: 7839 case X86::BI__builtin_ia32_xsaveopt: 7840 case X86::BI__builtin_ia32_xsaveopt64: 7841 case X86::BI__builtin_ia32_xrstors: 7842 case X86::BI__builtin_ia32_xrstors64: 7843 case X86::BI__builtin_ia32_xsavec: 7844 case X86::BI__builtin_ia32_xsavec64: 7845 case X86::BI__builtin_ia32_xsaves: 7846 case X86::BI__builtin_ia32_xsaves64: { 7847 Intrinsic::ID ID; 7848 #define INTRINSIC_X86_XSAVE_ID(NAME) \ 7849 case X86::BI__builtin_ia32_##NAME: \ 7850 ID = Intrinsic::x86_##NAME; \ 7851 break 7852 switch (BuiltinID) { 7853 default: llvm_unreachable("Unsupported intrinsic!"); 7854 INTRINSIC_X86_XSAVE_ID(xsave); 7855 INTRINSIC_X86_XSAVE_ID(xsave64); 7856 INTRINSIC_X86_XSAVE_ID(xrstor); 7857 INTRINSIC_X86_XSAVE_ID(xrstor64); 7858 INTRINSIC_X86_XSAVE_ID(xsaveopt); 7859 INTRINSIC_X86_XSAVE_ID(xsaveopt64); 7860 INTRINSIC_X86_XSAVE_ID(xrstors); 7861 INTRINSIC_X86_XSAVE_ID(xrstors64); 7862 INTRINSIC_X86_XSAVE_ID(xsavec); 7863 INTRINSIC_X86_XSAVE_ID(xsavec64); 7864 INTRINSIC_X86_XSAVE_ID(xsaves); 7865 INTRINSIC_X86_XSAVE_ID(xsaves64); 7866 } 7867 #undef INTRINSIC_X86_XSAVE_ID 7868 Value *Mhi = Builder.CreateTrunc( 7869 Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty); 7870 Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty); 7871 Ops[1] = Mhi; 7872 Ops.push_back(Mlo); 7873 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); 7874 } 7875 case X86::BI__builtin_ia32_storedqudi128_mask: 7876 case X86::BI__builtin_ia32_storedqusi128_mask: 7877 case X86::BI__builtin_ia32_storedquhi128_mask: 7878 case X86::BI__builtin_ia32_storedquqi128_mask: 7879 case X86::BI__builtin_ia32_storeupd128_mask: 7880 case X86::BI__builtin_ia32_storeups128_mask: 7881 case X86::BI__builtin_ia32_storedqudi256_mask: 7882 case X86::BI__builtin_ia32_storedqusi256_mask: 7883 case X86::BI__builtin_ia32_storedquhi256_mask: 7884 case X86::BI__builtin_ia32_storedquqi256_mask: 7885 case X86::BI__builtin_ia32_storeupd256_mask: 7886 case X86::BI__builtin_ia32_storeups256_mask: 7887 case X86::BI__builtin_ia32_storedqudi512_mask: 7888 case X86::BI__builtin_ia32_storedqusi512_mask: 7889 case X86::BI__builtin_ia32_storedquhi512_mask: 7890 case X86::BI__builtin_ia32_storedquqi512_mask: 7891 case X86::BI__builtin_ia32_storeupd512_mask: 7892 case X86::BI__builtin_ia32_storeups512_mask: 7893 return EmitX86MaskedStore(*this, Ops, 1); 7894 7895 case X86::BI__builtin_ia32_storess128_mask: 7896 case X86::BI__builtin_ia32_storesd128_mask: { 7897 return EmitX86MaskedStore(*this, Ops, 16); 7898 } 7899 case X86::BI__builtin_ia32_vpopcntd_512: 7900 case X86::BI__builtin_ia32_vpopcntq_512: { 7901 llvm::Type *ResultType = ConvertType(E->getType()); 7902 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); 7903 return Builder.CreateCall(F, Ops); 7904 } 7905 case X86::BI__builtin_ia32_cvtmask2b128: 7906 case X86::BI__builtin_ia32_cvtmask2b256: 7907 case X86::BI__builtin_ia32_cvtmask2b512: 7908 case X86::BI__builtin_ia32_cvtmask2w128: 7909 case X86::BI__builtin_ia32_cvtmask2w256: 7910 case X86::BI__builtin_ia32_cvtmask2w512: 7911 case X86::BI__builtin_ia32_cvtmask2d128: 7912 case X86::BI__builtin_ia32_cvtmask2d256: 7913 case X86::BI__builtin_ia32_cvtmask2d512: 7914 case X86::BI__builtin_ia32_cvtmask2q128: 7915 case X86::BI__builtin_ia32_cvtmask2q256: 7916 case X86::BI__builtin_ia32_cvtmask2q512: 7917 return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType())); 7918 7919 case X86::BI__builtin_ia32_movdqa32store128_mask: 7920 case X86::BI__builtin_ia32_movdqa64store128_mask: 7921 case X86::BI__builtin_ia32_storeaps128_mask: 7922 case X86::BI__builtin_ia32_storeapd128_mask: 7923 case X86::BI__builtin_ia32_movdqa32store256_mask: 7924 case X86::BI__builtin_ia32_movdqa64store256_mask: 7925 case X86::BI__builtin_ia32_storeaps256_mask: 7926 case X86::BI__builtin_ia32_storeapd256_mask: 7927 case X86::BI__builtin_ia32_movdqa32store512_mask: 7928 case X86::BI__builtin_ia32_movdqa64store512_mask: 7929 case X86::BI__builtin_ia32_storeaps512_mask: 7930 case X86::BI__builtin_ia32_storeapd512_mask: { 7931 unsigned Align = 7932 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity(); 7933 return EmitX86MaskedStore(*this, Ops, Align); 7934 } 7935 case X86::BI__builtin_ia32_loadups128_mask: 7936 case X86::BI__builtin_ia32_loadups256_mask: 7937 case X86::BI__builtin_ia32_loadups512_mask: 7938 case X86::BI__builtin_ia32_loadupd128_mask: 7939 case X86::BI__builtin_ia32_loadupd256_mask: 7940 case X86::BI__builtin_ia32_loadupd512_mask: 7941 case X86::BI__builtin_ia32_loaddquqi128_mask: 7942 case X86::BI__builtin_ia32_loaddquqi256_mask: 7943 case X86::BI__builtin_ia32_loaddquqi512_mask: 7944 case X86::BI__builtin_ia32_loaddquhi128_mask: 7945 case X86::BI__builtin_ia32_loaddquhi256_mask: 7946 case X86::BI__builtin_ia32_loaddquhi512_mask: 7947 case X86::BI__builtin_ia32_loaddqusi128_mask: 7948 case X86::BI__builtin_ia32_loaddqusi256_mask: 7949 case X86::BI__builtin_ia32_loaddqusi512_mask: 7950 case X86::BI__builtin_ia32_loaddqudi128_mask: 7951 case X86::BI__builtin_ia32_loaddqudi256_mask: 7952 case X86::BI__builtin_ia32_loaddqudi512_mask: 7953 return EmitX86MaskedLoad(*this, Ops, 1); 7954 7955 case X86::BI__builtin_ia32_loadss128_mask: 7956 case X86::BI__builtin_ia32_loadsd128_mask: 7957 return EmitX86MaskedLoad(*this, Ops, 16); 7958 7959 case X86::BI__builtin_ia32_loadaps128_mask: 7960 case X86::BI__builtin_ia32_loadaps256_mask: 7961 case X86::BI__builtin_ia32_loadaps512_mask: 7962 case X86::BI__builtin_ia32_loadapd128_mask: 7963 case X86::BI__builtin_ia32_loadapd256_mask: 7964 case X86::BI__builtin_ia32_loadapd512_mask: 7965 case X86::BI__builtin_ia32_movdqa32load128_mask: 7966 case X86::BI__builtin_ia32_movdqa32load256_mask: 7967 case X86::BI__builtin_ia32_movdqa32load512_mask: 7968 case X86::BI__builtin_ia32_movdqa64load128_mask: 7969 case X86::BI__builtin_ia32_movdqa64load256_mask: 7970 case X86::BI__builtin_ia32_movdqa64load512_mask: { 7971 unsigned Align = 7972 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity(); 7973 return EmitX86MaskedLoad(*this, Ops, Align); 7974 } 7975 7976 case X86::BI__builtin_ia32_vbroadcastf128_pd256: 7977 case X86::BI__builtin_ia32_vbroadcastf128_ps256: { 7978 llvm::Type *DstTy = ConvertType(E->getType()); 7979 return EmitX86SubVectorBroadcast(*this, Ops, DstTy, 128, 1); 7980 } 7981 7982 case X86::BI__builtin_ia32_storehps: 7983 case X86::BI__builtin_ia32_storelps: { 7984 llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty); 7985 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2); 7986 7987 // cast val v2i64 7988 Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast"); 7989 7990 // extract (0, 1) 7991 unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1; 7992 llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index); 7993 Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract"); 7994 7995 // cast pointer to i64 & store 7996 Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy); 7997 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 7998 } 7999 case X86::BI__builtin_ia32_palignr128: 8000 case X86::BI__builtin_ia32_palignr256: 8001 case X86::BI__builtin_ia32_palignr512_mask: { 8002 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 8003 8004 unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); 8005 assert(NumElts % 16 == 0); 8006 8007 // If palignr is shifting the pair of vectors more than the size of two 8008 // lanes, emit zero. 8009 if (ShiftVal >= 32) 8010 return llvm::Constant::getNullValue(ConvertType(E->getType())); 8011 8012 // If palignr is shifting the pair of input vectors more than one lane, 8013 // but less than two lanes, convert to shifting in zeroes. 8014 if (ShiftVal > 16) { 8015 ShiftVal -= 16; 8016 Ops[1] = Ops[0]; 8017 Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType()); 8018 } 8019 8020 uint32_t Indices[64]; 8021 // 256-bit palignr operates on 128-bit lanes so we need to handle that 8022 for (unsigned l = 0; l != NumElts; l += 16) { 8023 for (unsigned i = 0; i != 16; ++i) { 8024 unsigned Idx = ShiftVal + i; 8025 if (Idx >= 16) 8026 Idx += NumElts - 16; // End of lane, switch operand. 8027 Indices[l + i] = Idx + l; 8028 } 8029 } 8030 8031 Value *Align = Builder.CreateShuffleVector(Ops[1], Ops[0], 8032 makeArrayRef(Indices, NumElts), 8033 "palignr"); 8034 8035 // If this isn't a masked builtin, just return the align operation. 8036 if (Ops.size() == 3) 8037 return Align; 8038 8039 return EmitX86Select(*this, Ops[4], Align, Ops[3]); 8040 } 8041 8042 case X86::BI__builtin_ia32_vperm2f128_pd256: 8043 case X86::BI__builtin_ia32_vperm2f128_ps256: 8044 case X86::BI__builtin_ia32_vperm2f128_si256: 8045 case X86::BI__builtin_ia32_permti256: { 8046 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 8047 unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); 8048 8049 // This takes a very simple approach since there are two lanes and a 8050 // shuffle can have 2 inputs. So we reserve the first input for the first 8051 // lane and the second input for the second lane. This may result in 8052 // duplicate sources, but this can be dealt with in the backend. 8053 8054 Value *OutOps[2]; 8055 uint32_t Indices[8]; 8056 for (unsigned l = 0; l != 2; ++l) { 8057 // Determine the source for this lane. 8058 if (Imm & (1 << ((l * 4) + 3))) 8059 OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType()); 8060 else if (Imm & (1 << ((l * 4) + 1))) 8061 OutOps[l] = Ops[1]; 8062 else 8063 OutOps[l] = Ops[0]; 8064 8065 for (unsigned i = 0; i != NumElts/2; ++i) { 8066 // Start with ith element of the source for this lane. 8067 unsigned Idx = (l * NumElts) + i; 8068 // If bit 0 of the immediate half is set, switch to the high half of 8069 // the source. 8070 if (Imm & (1 << (l * 4))) 8071 Idx += NumElts/2; 8072 Indices[(l * (NumElts/2)) + i] = Idx; 8073 } 8074 } 8075 8076 return Builder.CreateShuffleVector(OutOps[0], OutOps[1], 8077 makeArrayRef(Indices, NumElts), 8078 "vperm"); 8079 } 8080 8081 case X86::BI__builtin_ia32_movnti: 8082 case X86::BI__builtin_ia32_movnti64: 8083 case X86::BI__builtin_ia32_movntsd: 8084 case X86::BI__builtin_ia32_movntss: { 8085 llvm::MDNode *Node = llvm::MDNode::get( 8086 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1))); 8087 8088 Value *Ptr = Ops[0]; 8089 Value *Src = Ops[1]; 8090 8091 // Extract the 0'th element of the source vector. 8092 if (BuiltinID == X86::BI__builtin_ia32_movntsd || 8093 BuiltinID == X86::BI__builtin_ia32_movntss) 8094 Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract"); 8095 8096 // Convert the type of the pointer to a pointer to the stored type. 8097 Value *BC = Builder.CreateBitCast( 8098 Ptr, llvm::PointerType::getUnqual(Src->getType()), "cast"); 8099 8100 // Unaligned nontemporal store of the scalar value. 8101 StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, BC); 8102 SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); 8103 SI->setAlignment(1); 8104 return SI; 8105 } 8106 8107 case X86::BI__builtin_ia32_selectb_128: 8108 case X86::BI__builtin_ia32_selectb_256: 8109 case X86::BI__builtin_ia32_selectb_512: 8110 case X86::BI__builtin_ia32_selectw_128: 8111 case X86::BI__builtin_ia32_selectw_256: 8112 case X86::BI__builtin_ia32_selectw_512: 8113 case X86::BI__builtin_ia32_selectd_128: 8114 case X86::BI__builtin_ia32_selectd_256: 8115 case X86::BI__builtin_ia32_selectd_512: 8116 case X86::BI__builtin_ia32_selectq_128: 8117 case X86::BI__builtin_ia32_selectq_256: 8118 case X86::BI__builtin_ia32_selectq_512: 8119 case X86::BI__builtin_ia32_selectps_128: 8120 case X86::BI__builtin_ia32_selectps_256: 8121 case X86::BI__builtin_ia32_selectps_512: 8122 case X86::BI__builtin_ia32_selectpd_128: 8123 case X86::BI__builtin_ia32_selectpd_256: 8124 case X86::BI__builtin_ia32_selectpd_512: 8125 return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]); 8126 case X86::BI__builtin_ia32_pcmpeqb128_mask: 8127 case X86::BI__builtin_ia32_pcmpeqb256_mask: 8128 case X86::BI__builtin_ia32_pcmpeqb512_mask: 8129 case X86::BI__builtin_ia32_pcmpeqw128_mask: 8130 case X86::BI__builtin_ia32_pcmpeqw256_mask: 8131 case X86::BI__builtin_ia32_pcmpeqw512_mask: 8132 case X86::BI__builtin_ia32_pcmpeqd128_mask: 8133 case X86::BI__builtin_ia32_pcmpeqd256_mask: 8134 case X86::BI__builtin_ia32_pcmpeqd512_mask: 8135 case X86::BI__builtin_ia32_pcmpeqq128_mask: 8136 case X86::BI__builtin_ia32_pcmpeqq256_mask: 8137 case X86::BI__builtin_ia32_pcmpeqq512_mask: 8138 return EmitX86MaskedCompare(*this, 0, false, Ops); 8139 case X86::BI__builtin_ia32_pcmpgtb128_mask: 8140 case X86::BI__builtin_ia32_pcmpgtb256_mask: 8141 case X86::BI__builtin_ia32_pcmpgtb512_mask: 8142 case X86::BI__builtin_ia32_pcmpgtw128_mask: 8143 case X86::BI__builtin_ia32_pcmpgtw256_mask: 8144 case X86::BI__builtin_ia32_pcmpgtw512_mask: 8145 case X86::BI__builtin_ia32_pcmpgtd128_mask: 8146 case X86::BI__builtin_ia32_pcmpgtd256_mask: 8147 case X86::BI__builtin_ia32_pcmpgtd512_mask: 8148 case X86::BI__builtin_ia32_pcmpgtq128_mask: 8149 case X86::BI__builtin_ia32_pcmpgtq256_mask: 8150 case X86::BI__builtin_ia32_pcmpgtq512_mask: 8151 return EmitX86MaskedCompare(*this, 6, true, Ops); 8152 case X86::BI__builtin_ia32_cmpb128_mask: 8153 case X86::BI__builtin_ia32_cmpb256_mask: 8154 case X86::BI__builtin_ia32_cmpb512_mask: 8155 case X86::BI__builtin_ia32_cmpw128_mask: 8156 case X86::BI__builtin_ia32_cmpw256_mask: 8157 case X86::BI__builtin_ia32_cmpw512_mask: 8158 case X86::BI__builtin_ia32_cmpd128_mask: 8159 case X86::BI__builtin_ia32_cmpd256_mask: 8160 case X86::BI__builtin_ia32_cmpd512_mask: 8161 case X86::BI__builtin_ia32_cmpq128_mask: 8162 case X86::BI__builtin_ia32_cmpq256_mask: 8163 case X86::BI__builtin_ia32_cmpq512_mask: { 8164 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7; 8165 return EmitX86MaskedCompare(*this, CC, true, Ops); 8166 } 8167 case X86::BI__builtin_ia32_ucmpb128_mask: 8168 case X86::BI__builtin_ia32_ucmpb256_mask: 8169 case X86::BI__builtin_ia32_ucmpb512_mask: 8170 case X86::BI__builtin_ia32_ucmpw128_mask: 8171 case X86::BI__builtin_ia32_ucmpw256_mask: 8172 case X86::BI__builtin_ia32_ucmpw512_mask: 8173 case X86::BI__builtin_ia32_ucmpd128_mask: 8174 case X86::BI__builtin_ia32_ucmpd256_mask: 8175 case X86::BI__builtin_ia32_ucmpd512_mask: 8176 case X86::BI__builtin_ia32_ucmpq128_mask: 8177 case X86::BI__builtin_ia32_ucmpq256_mask: 8178 case X86::BI__builtin_ia32_ucmpq512_mask: { 8179 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7; 8180 return EmitX86MaskedCompare(*this, CC, false, Ops); 8181 } 8182 8183 case X86::BI__builtin_ia32_vplzcntd_128_mask: 8184 case X86::BI__builtin_ia32_vplzcntd_256_mask: 8185 case X86::BI__builtin_ia32_vplzcntd_512_mask: 8186 case X86::BI__builtin_ia32_vplzcntq_128_mask: 8187 case X86::BI__builtin_ia32_vplzcntq_256_mask: 8188 case X86::BI__builtin_ia32_vplzcntq_512_mask: { 8189 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType()); 8190 return EmitX86Select(*this, Ops[2], 8191 Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)}), 8192 Ops[1]); 8193 } 8194 8195 case X86::BI__builtin_ia32_pabsb128: 8196 case X86::BI__builtin_ia32_pabsw128: 8197 case X86::BI__builtin_ia32_pabsd128: 8198 case X86::BI__builtin_ia32_pabsb256: 8199 case X86::BI__builtin_ia32_pabsw256: 8200 case X86::BI__builtin_ia32_pabsd256: 8201 case X86::BI__builtin_ia32_pabsq128_mask: 8202 case X86::BI__builtin_ia32_pabsq256_mask: 8203 case X86::BI__builtin_ia32_pabsb512_mask: 8204 case X86::BI__builtin_ia32_pabsw512_mask: 8205 case X86::BI__builtin_ia32_pabsd512_mask: 8206 case X86::BI__builtin_ia32_pabsq512_mask: 8207 return EmitX86Abs(*this, Ops); 8208 8209 case X86::BI__builtin_ia32_pmaxsb128: 8210 case X86::BI__builtin_ia32_pmaxsw128: 8211 case X86::BI__builtin_ia32_pmaxsd128: 8212 case X86::BI__builtin_ia32_pmaxsq128_mask: 8213 case X86::BI__builtin_ia32_pmaxsb256: 8214 case X86::BI__builtin_ia32_pmaxsw256: 8215 case X86::BI__builtin_ia32_pmaxsd256: 8216 case X86::BI__builtin_ia32_pmaxsq256_mask: 8217 case X86::BI__builtin_ia32_pmaxsb512_mask: 8218 case X86::BI__builtin_ia32_pmaxsw512_mask: 8219 case X86::BI__builtin_ia32_pmaxsd512_mask: 8220 case X86::BI__builtin_ia32_pmaxsq512_mask: 8221 return EmitX86MinMax(*this, ICmpInst::ICMP_SGT, Ops); 8222 case X86::BI__builtin_ia32_pmaxub128: 8223 case X86::BI__builtin_ia32_pmaxuw128: 8224 case X86::BI__builtin_ia32_pmaxud128: 8225 case X86::BI__builtin_ia32_pmaxuq128_mask: 8226 case X86::BI__builtin_ia32_pmaxub256: 8227 case X86::BI__builtin_ia32_pmaxuw256: 8228 case X86::BI__builtin_ia32_pmaxud256: 8229 case X86::BI__builtin_ia32_pmaxuq256_mask: 8230 case X86::BI__builtin_ia32_pmaxub512_mask: 8231 case X86::BI__builtin_ia32_pmaxuw512_mask: 8232 case X86::BI__builtin_ia32_pmaxud512_mask: 8233 case X86::BI__builtin_ia32_pmaxuq512_mask: 8234 return EmitX86MinMax(*this, ICmpInst::ICMP_UGT, Ops); 8235 case X86::BI__builtin_ia32_pminsb128: 8236 case X86::BI__builtin_ia32_pminsw128: 8237 case X86::BI__builtin_ia32_pminsd128: 8238 case X86::BI__builtin_ia32_pminsq128_mask: 8239 case X86::BI__builtin_ia32_pminsb256: 8240 case X86::BI__builtin_ia32_pminsw256: 8241 case X86::BI__builtin_ia32_pminsd256: 8242 case X86::BI__builtin_ia32_pminsq256_mask: 8243 case X86::BI__builtin_ia32_pminsb512_mask: 8244 case X86::BI__builtin_ia32_pminsw512_mask: 8245 case X86::BI__builtin_ia32_pminsd512_mask: 8246 case X86::BI__builtin_ia32_pminsq512_mask: 8247 return EmitX86MinMax(*this, ICmpInst::ICMP_SLT, Ops); 8248 case X86::BI__builtin_ia32_pminub128: 8249 case X86::BI__builtin_ia32_pminuw128: 8250 case X86::BI__builtin_ia32_pminud128: 8251 case X86::BI__builtin_ia32_pminuq128_mask: 8252 case X86::BI__builtin_ia32_pminub256: 8253 case X86::BI__builtin_ia32_pminuw256: 8254 case X86::BI__builtin_ia32_pminud256: 8255 case X86::BI__builtin_ia32_pminuq256_mask: 8256 case X86::BI__builtin_ia32_pminub512_mask: 8257 case X86::BI__builtin_ia32_pminuw512_mask: 8258 case X86::BI__builtin_ia32_pminud512_mask: 8259 case X86::BI__builtin_ia32_pminuq512_mask: 8260 return EmitX86MinMax(*this, ICmpInst::ICMP_ULT, Ops); 8261 8262 // 3DNow! 8263 case X86::BI__builtin_ia32_pswapdsf: 8264 case X86::BI__builtin_ia32_pswapdsi: { 8265 llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext()); 8266 Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast"); 8267 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd); 8268 return Builder.CreateCall(F, Ops, "pswapd"); 8269 } 8270 case X86::BI__builtin_ia32_rdrand16_step: 8271 case X86::BI__builtin_ia32_rdrand32_step: 8272 case X86::BI__builtin_ia32_rdrand64_step: 8273 case X86::BI__builtin_ia32_rdseed16_step: 8274 case X86::BI__builtin_ia32_rdseed32_step: 8275 case X86::BI__builtin_ia32_rdseed64_step: { 8276 Intrinsic::ID ID; 8277 switch (BuiltinID) { 8278 default: llvm_unreachable("Unsupported intrinsic!"); 8279 case X86::BI__builtin_ia32_rdrand16_step: 8280 ID = Intrinsic::x86_rdrand_16; 8281 break; 8282 case X86::BI__builtin_ia32_rdrand32_step: 8283 ID = Intrinsic::x86_rdrand_32; 8284 break; 8285 case X86::BI__builtin_ia32_rdrand64_step: 8286 ID = Intrinsic::x86_rdrand_64; 8287 break; 8288 case X86::BI__builtin_ia32_rdseed16_step: 8289 ID = Intrinsic::x86_rdseed_16; 8290 break; 8291 case X86::BI__builtin_ia32_rdseed32_step: 8292 ID = Intrinsic::x86_rdseed_32; 8293 break; 8294 case X86::BI__builtin_ia32_rdseed64_step: 8295 ID = Intrinsic::x86_rdseed_64; 8296 break; 8297 } 8298 8299 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID)); 8300 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0), 8301 Ops[0]); 8302 return Builder.CreateExtractValue(Call, 1); 8303 } 8304 8305 // SSE packed comparison intrinsics 8306 case X86::BI__builtin_ia32_cmpeqps: 8307 case X86::BI__builtin_ia32_cmpeqpd: 8308 return getVectorFCmpIR(CmpInst::FCMP_OEQ); 8309 case X86::BI__builtin_ia32_cmpltps: 8310 case X86::BI__builtin_ia32_cmpltpd: 8311 return getVectorFCmpIR(CmpInst::FCMP_OLT); 8312 case X86::BI__builtin_ia32_cmpleps: 8313 case X86::BI__builtin_ia32_cmplepd: 8314 return getVectorFCmpIR(CmpInst::FCMP_OLE); 8315 case X86::BI__builtin_ia32_cmpunordps: 8316 case X86::BI__builtin_ia32_cmpunordpd: 8317 return getVectorFCmpIR(CmpInst::FCMP_UNO); 8318 case X86::BI__builtin_ia32_cmpneqps: 8319 case X86::BI__builtin_ia32_cmpneqpd: 8320 return getVectorFCmpIR(CmpInst::FCMP_UNE); 8321 case X86::BI__builtin_ia32_cmpnltps: 8322 case X86::BI__builtin_ia32_cmpnltpd: 8323 return getVectorFCmpIR(CmpInst::FCMP_UGE); 8324 case X86::BI__builtin_ia32_cmpnleps: 8325 case X86::BI__builtin_ia32_cmpnlepd: 8326 return getVectorFCmpIR(CmpInst::FCMP_UGT); 8327 case X86::BI__builtin_ia32_cmpordps: 8328 case X86::BI__builtin_ia32_cmpordpd: 8329 return getVectorFCmpIR(CmpInst::FCMP_ORD); 8330 case X86::BI__builtin_ia32_cmpps: 8331 case X86::BI__builtin_ia32_cmpps256: 8332 case X86::BI__builtin_ia32_cmppd: 8333 case X86::BI__builtin_ia32_cmppd256: { 8334 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 8335 // If this one of the SSE immediates, we can use native IR. 8336 if (CC < 8) { 8337 FCmpInst::Predicate Pred; 8338 switch (CC) { 8339 case 0: Pred = FCmpInst::FCMP_OEQ; break; 8340 case 1: Pred = FCmpInst::FCMP_OLT; break; 8341 case 2: Pred = FCmpInst::FCMP_OLE; break; 8342 case 3: Pred = FCmpInst::FCMP_UNO; break; 8343 case 4: Pred = FCmpInst::FCMP_UNE; break; 8344 case 5: Pred = FCmpInst::FCMP_UGE; break; 8345 case 6: Pred = FCmpInst::FCMP_UGT; break; 8346 case 7: Pred = FCmpInst::FCMP_ORD; break; 8347 } 8348 return getVectorFCmpIR(Pred); 8349 } 8350 8351 // We can't handle 8-31 immediates with native IR, use the intrinsic. 8352 // Except for predicates that create constants. 8353 Intrinsic::ID ID; 8354 switch (BuiltinID) { 8355 default: llvm_unreachable("Unsupported intrinsic!"); 8356 case X86::BI__builtin_ia32_cmpps: 8357 ID = Intrinsic::x86_sse_cmp_ps; 8358 break; 8359 case X86::BI__builtin_ia32_cmpps256: 8360 // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector 8361 // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0... 8362 if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) { 8363 Value *Constant = (CC == 0xf || CC == 0x1f) ? 8364 llvm::Constant::getAllOnesValue(Builder.getInt32Ty()) : 8365 llvm::Constant::getNullValue(Builder.getInt32Ty()); 8366 Value *Vec = Builder.CreateVectorSplat( 8367 Ops[0]->getType()->getVectorNumElements(), Constant); 8368 return Builder.CreateBitCast(Vec, Ops[0]->getType()); 8369 } 8370 ID = Intrinsic::x86_avx_cmp_ps_256; 8371 break; 8372 case X86::BI__builtin_ia32_cmppd: 8373 ID = Intrinsic::x86_sse2_cmp_pd; 8374 break; 8375 case X86::BI__builtin_ia32_cmppd256: 8376 // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector 8377 // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0... 8378 if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) { 8379 Value *Constant = (CC == 0xf || CC == 0x1f) ? 8380 llvm::Constant::getAllOnesValue(Builder.getInt64Ty()) : 8381 llvm::Constant::getNullValue(Builder.getInt64Ty()); 8382 Value *Vec = Builder.CreateVectorSplat( 8383 Ops[0]->getType()->getVectorNumElements(), Constant); 8384 return Builder.CreateBitCast(Vec, Ops[0]->getType()); 8385 } 8386 ID = Intrinsic::x86_avx_cmp_pd_256; 8387 break; 8388 } 8389 8390 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); 8391 } 8392 8393 // SSE scalar comparison intrinsics 8394 case X86::BI__builtin_ia32_cmpeqss: 8395 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0); 8396 case X86::BI__builtin_ia32_cmpltss: 8397 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1); 8398 case X86::BI__builtin_ia32_cmpless: 8399 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2); 8400 case X86::BI__builtin_ia32_cmpunordss: 8401 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3); 8402 case X86::BI__builtin_ia32_cmpneqss: 8403 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4); 8404 case X86::BI__builtin_ia32_cmpnltss: 8405 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5); 8406 case X86::BI__builtin_ia32_cmpnless: 8407 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6); 8408 case X86::BI__builtin_ia32_cmpordss: 8409 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7); 8410 case X86::BI__builtin_ia32_cmpeqsd: 8411 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0); 8412 case X86::BI__builtin_ia32_cmpltsd: 8413 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1); 8414 case X86::BI__builtin_ia32_cmplesd: 8415 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2); 8416 case X86::BI__builtin_ia32_cmpunordsd: 8417 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3); 8418 case X86::BI__builtin_ia32_cmpneqsd: 8419 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4); 8420 case X86::BI__builtin_ia32_cmpnltsd: 8421 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5); 8422 case X86::BI__builtin_ia32_cmpnlesd: 8423 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6); 8424 case X86::BI__builtin_ia32_cmpordsd: 8425 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7); 8426 8427 case X86::BI__emul: 8428 case X86::BI__emulu: { 8429 llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64); 8430 bool isSigned = (BuiltinID == X86::BI__emul); 8431 Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned); 8432 Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned); 8433 return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned); 8434 } 8435 case X86::BI__mulh: 8436 case X86::BI__umulh: 8437 case X86::BI_mul128: 8438 case X86::BI_umul128: { 8439 llvm::Type *ResType = ConvertType(E->getType()); 8440 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); 8441 8442 bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128); 8443 Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned); 8444 Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned); 8445 8446 Value *MulResult, *HigherBits; 8447 if (IsSigned) { 8448 MulResult = Builder.CreateNSWMul(LHS, RHS); 8449 HigherBits = Builder.CreateAShr(MulResult, 64); 8450 } else { 8451 MulResult = Builder.CreateNUWMul(LHS, RHS); 8452 HigherBits = Builder.CreateLShr(MulResult, 64); 8453 } 8454 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned); 8455 8456 if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh) 8457 return HigherBits; 8458 8459 Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2)); 8460 Builder.CreateStore(HigherBits, HighBitsAddress); 8461 return Builder.CreateIntCast(MulResult, ResType, IsSigned); 8462 } 8463 8464 case X86::BI__faststorefence: { 8465 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, 8466 llvm::SyncScope::System); 8467 } 8468 case X86::BI_ReadWriteBarrier: 8469 case X86::BI_ReadBarrier: 8470 case X86::BI_WriteBarrier: { 8471 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, 8472 llvm::SyncScope::SingleThread); 8473 } 8474 case X86::BI_BitScanForward: 8475 case X86::BI_BitScanForward64: 8476 return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E); 8477 case X86::BI_BitScanReverse: 8478 case X86::BI_BitScanReverse64: 8479 return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E); 8480 8481 case X86::BI_InterlockedAnd64: 8482 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E); 8483 case X86::BI_InterlockedExchange64: 8484 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E); 8485 case X86::BI_InterlockedExchangeAdd64: 8486 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E); 8487 case X86::BI_InterlockedExchangeSub64: 8488 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E); 8489 case X86::BI_InterlockedOr64: 8490 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E); 8491 case X86::BI_InterlockedXor64: 8492 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E); 8493 case X86::BI_InterlockedDecrement64: 8494 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E); 8495 case X86::BI_InterlockedIncrement64: 8496 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E); 8497 8498 case X86::BI_AddressOfReturnAddress: { 8499 Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress); 8500 return Builder.CreateCall(F); 8501 } 8502 case X86::BI__stosb: { 8503 // We treat __stosb as a volatile memset - it may not generate "rep stosb" 8504 // instruction, but it will create a memset that won't be optimized away. 8505 return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], 1, true); 8506 } 8507 case X86::BI__ud2: 8508 // llvm.trap makes a ud2a instruction on x86. 8509 return EmitTrapCall(Intrinsic::trap); 8510 case X86::BI__int2c: { 8511 // This syscall signals a driver assertion failure in x86 NT kernels. 8512 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false); 8513 llvm::InlineAsm *IA = 8514 llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*SideEffects=*/true); 8515 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get( 8516 getLLVMContext(), llvm::AttributeList::FunctionIndex, 8517 llvm::Attribute::NoReturn); 8518 CallSite CS = Builder.CreateCall(IA); 8519 CS.setAttributes(NoReturnAttr); 8520 return CS.getInstruction(); 8521 } 8522 case X86::BI__readfsbyte: 8523 case X86::BI__readfsword: 8524 case X86::BI__readfsdword: 8525 case X86::BI__readfsqword: { 8526 llvm::Type *IntTy = ConvertType(E->getType()); 8527 Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)), 8528 llvm::PointerType::get(IntTy, 257)); 8529 LoadInst *Load = Builder.CreateAlignedLoad( 8530 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType())); 8531 Load->setVolatile(true); 8532 return Load; 8533 } 8534 case X86::BI__readgsbyte: 8535 case X86::BI__readgsword: 8536 case X86::BI__readgsdword: 8537 case X86::BI__readgsqword: { 8538 llvm::Type *IntTy = ConvertType(E->getType()); 8539 Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)), 8540 llvm::PointerType::get(IntTy, 256)); 8541 LoadInst *Load = Builder.CreateAlignedLoad( 8542 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType())); 8543 Load->setVolatile(true); 8544 return Load; 8545 } 8546 } 8547 } 8548 8549 8550 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, 8551 const CallExpr *E) { 8552 SmallVector<Value*, 4> Ops; 8553 8554 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) 8555 Ops.push_back(EmitScalarExpr(E->getArg(i))); 8556 8557 Intrinsic::ID ID = Intrinsic::not_intrinsic; 8558 8559 switch (BuiltinID) { 8560 default: return nullptr; 8561 8562 // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we 8563 // call __builtin_readcyclecounter. 8564 case PPC::BI__builtin_ppc_get_timebase: 8565 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter)); 8566 8567 // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr 8568 case PPC::BI__builtin_altivec_lvx: 8569 case PPC::BI__builtin_altivec_lvxl: 8570 case PPC::BI__builtin_altivec_lvebx: 8571 case PPC::BI__builtin_altivec_lvehx: 8572 case PPC::BI__builtin_altivec_lvewx: 8573 case PPC::BI__builtin_altivec_lvsl: 8574 case PPC::BI__builtin_altivec_lvsr: 8575 case PPC::BI__builtin_vsx_lxvd2x: 8576 case PPC::BI__builtin_vsx_lxvw4x: 8577 case PPC::BI__builtin_vsx_lxvd2x_be: 8578 case PPC::BI__builtin_vsx_lxvw4x_be: 8579 case PPC::BI__builtin_vsx_lxvl: 8580 case PPC::BI__builtin_vsx_lxvll: 8581 { 8582 if(BuiltinID == PPC::BI__builtin_vsx_lxvl || 8583 BuiltinID == PPC::BI__builtin_vsx_lxvll){ 8584 Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy); 8585 }else { 8586 Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy); 8587 Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]); 8588 Ops.pop_back(); 8589 } 8590 8591 switch (BuiltinID) { 8592 default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!"); 8593 case PPC::BI__builtin_altivec_lvx: 8594 ID = Intrinsic::ppc_altivec_lvx; 8595 break; 8596 case PPC::BI__builtin_altivec_lvxl: 8597 ID = Intrinsic::ppc_altivec_lvxl; 8598 break; 8599 case PPC::BI__builtin_altivec_lvebx: 8600 ID = Intrinsic::ppc_altivec_lvebx; 8601 break; 8602 case PPC::BI__builtin_altivec_lvehx: 8603 ID = Intrinsic::ppc_altivec_lvehx; 8604 break; 8605 case PPC::BI__builtin_altivec_lvewx: 8606 ID = Intrinsic::ppc_altivec_lvewx; 8607 break; 8608 case PPC::BI__builtin_altivec_lvsl: 8609 ID = Intrinsic::ppc_altivec_lvsl; 8610 break; 8611 case PPC::BI__builtin_altivec_lvsr: 8612 ID = Intrinsic::ppc_altivec_lvsr; 8613 break; 8614 case PPC::BI__builtin_vsx_lxvd2x: 8615 ID = Intrinsic::ppc_vsx_lxvd2x; 8616 break; 8617 case PPC::BI__builtin_vsx_lxvw4x: 8618 ID = Intrinsic::ppc_vsx_lxvw4x; 8619 break; 8620 case PPC::BI__builtin_vsx_lxvd2x_be: 8621 ID = Intrinsic::ppc_vsx_lxvd2x_be; 8622 break; 8623 case PPC::BI__builtin_vsx_lxvw4x_be: 8624 ID = Intrinsic::ppc_vsx_lxvw4x_be; 8625 break; 8626 case PPC::BI__builtin_vsx_lxvl: 8627 ID = Intrinsic::ppc_vsx_lxvl; 8628 break; 8629 case PPC::BI__builtin_vsx_lxvll: 8630 ID = Intrinsic::ppc_vsx_lxvll; 8631 break; 8632 } 8633 llvm::Function *F = CGM.getIntrinsic(ID); 8634 return Builder.CreateCall(F, Ops, ""); 8635 } 8636 8637 // vec_st, vec_xst_be 8638 case PPC::BI__builtin_altivec_stvx: 8639 case PPC::BI__builtin_altivec_stvxl: 8640 case PPC::BI__builtin_altivec_stvebx: 8641 case PPC::BI__builtin_altivec_stvehx: 8642 case PPC::BI__builtin_altivec_stvewx: 8643 case PPC::BI__builtin_vsx_stxvd2x: 8644 case PPC::BI__builtin_vsx_stxvw4x: 8645 case PPC::BI__builtin_vsx_stxvd2x_be: 8646 case PPC::BI__builtin_vsx_stxvw4x_be: 8647 case PPC::BI__builtin_vsx_stxvl: 8648 case PPC::BI__builtin_vsx_stxvll: 8649 { 8650 if(BuiltinID == PPC::BI__builtin_vsx_stxvl || 8651 BuiltinID == PPC::BI__builtin_vsx_stxvll ){ 8652 Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy); 8653 }else { 8654 Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy); 8655 Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]); 8656 Ops.pop_back(); 8657 } 8658 8659 switch (BuiltinID) { 8660 default: llvm_unreachable("Unsupported st intrinsic!"); 8661 case PPC::BI__builtin_altivec_stvx: 8662 ID = Intrinsic::ppc_altivec_stvx; 8663 break; 8664 case PPC::BI__builtin_altivec_stvxl: 8665 ID = Intrinsic::ppc_altivec_stvxl; 8666 break; 8667 case PPC::BI__builtin_altivec_stvebx: 8668 ID = Intrinsic::ppc_altivec_stvebx; 8669 break; 8670 case PPC::BI__builtin_altivec_stvehx: 8671 ID = Intrinsic::ppc_altivec_stvehx; 8672 break; 8673 case PPC::BI__builtin_altivec_stvewx: 8674 ID = Intrinsic::ppc_altivec_stvewx; 8675 break; 8676 case PPC::BI__builtin_vsx_stxvd2x: 8677 ID = Intrinsic::ppc_vsx_stxvd2x; 8678 break; 8679 case PPC::BI__builtin_vsx_stxvw4x: 8680 ID = Intrinsic::ppc_vsx_stxvw4x; 8681 break; 8682 case PPC::BI__builtin_vsx_stxvd2x_be: 8683 ID = Intrinsic::ppc_vsx_stxvd2x_be; 8684 break; 8685 case PPC::BI__builtin_vsx_stxvw4x_be: 8686 ID = Intrinsic::ppc_vsx_stxvw4x_be; 8687 break; 8688 case PPC::BI__builtin_vsx_stxvl: 8689 ID = Intrinsic::ppc_vsx_stxvl; 8690 break; 8691 case PPC::BI__builtin_vsx_stxvll: 8692 ID = Intrinsic::ppc_vsx_stxvll; 8693 break; 8694 } 8695 llvm::Function *F = CGM.getIntrinsic(ID); 8696 return Builder.CreateCall(F, Ops, ""); 8697 } 8698 // Square root 8699 case PPC::BI__builtin_vsx_xvsqrtsp: 8700 case PPC::BI__builtin_vsx_xvsqrtdp: { 8701 llvm::Type *ResultType = ConvertType(E->getType()); 8702 Value *X = EmitScalarExpr(E->getArg(0)); 8703 ID = Intrinsic::sqrt; 8704 llvm::Function *F = CGM.getIntrinsic(ID, ResultType); 8705 return Builder.CreateCall(F, X); 8706 } 8707 // Count leading zeros 8708 case PPC::BI__builtin_altivec_vclzb: 8709 case PPC::BI__builtin_altivec_vclzh: 8710 case PPC::BI__builtin_altivec_vclzw: 8711 case PPC::BI__builtin_altivec_vclzd: { 8712 llvm::Type *ResultType = ConvertType(E->getType()); 8713 Value *X = EmitScalarExpr(E->getArg(0)); 8714 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 8715 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType); 8716 return Builder.CreateCall(F, {X, Undef}); 8717 } 8718 case PPC::BI__builtin_altivec_vctzb: 8719 case PPC::BI__builtin_altivec_vctzh: 8720 case PPC::BI__builtin_altivec_vctzw: 8721 case PPC::BI__builtin_altivec_vctzd: { 8722 llvm::Type *ResultType = ConvertType(E->getType()); 8723 Value *X = EmitScalarExpr(E->getArg(0)); 8724 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 8725 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType); 8726 return Builder.CreateCall(F, {X, Undef}); 8727 } 8728 case PPC::BI__builtin_altivec_vpopcntb: 8729 case PPC::BI__builtin_altivec_vpopcnth: 8730 case PPC::BI__builtin_altivec_vpopcntw: 8731 case PPC::BI__builtin_altivec_vpopcntd: { 8732 llvm::Type *ResultType = ConvertType(E->getType()); 8733 Value *X = EmitScalarExpr(E->getArg(0)); 8734 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); 8735 return Builder.CreateCall(F, X); 8736 } 8737 // Copy sign 8738 case PPC::BI__builtin_vsx_xvcpsgnsp: 8739 case PPC::BI__builtin_vsx_xvcpsgndp: { 8740 llvm::Type *ResultType = ConvertType(E->getType()); 8741 Value *X = EmitScalarExpr(E->getArg(0)); 8742 Value *Y = EmitScalarExpr(E->getArg(1)); 8743 ID = Intrinsic::copysign; 8744 llvm::Function *F = CGM.getIntrinsic(ID, ResultType); 8745 return Builder.CreateCall(F, {X, Y}); 8746 } 8747 // Rounding/truncation 8748 case PPC::BI__builtin_vsx_xvrspip: 8749 case PPC::BI__builtin_vsx_xvrdpip: 8750 case PPC::BI__builtin_vsx_xvrdpim: 8751 case PPC::BI__builtin_vsx_xvrspim: 8752 case PPC::BI__builtin_vsx_xvrdpi: 8753 case PPC::BI__builtin_vsx_xvrspi: 8754 case PPC::BI__builtin_vsx_xvrdpic: 8755 case PPC::BI__builtin_vsx_xvrspic: 8756 case PPC::BI__builtin_vsx_xvrdpiz: 8757 case PPC::BI__builtin_vsx_xvrspiz: { 8758 llvm::Type *ResultType = ConvertType(E->getType()); 8759 Value *X = EmitScalarExpr(E->getArg(0)); 8760 if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim || 8761 BuiltinID == PPC::BI__builtin_vsx_xvrspim) 8762 ID = Intrinsic::floor; 8763 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi || 8764 BuiltinID == PPC::BI__builtin_vsx_xvrspi) 8765 ID = Intrinsic::round; 8766 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic || 8767 BuiltinID == PPC::BI__builtin_vsx_xvrspic) 8768 ID = Intrinsic::nearbyint; 8769 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip || 8770 BuiltinID == PPC::BI__builtin_vsx_xvrspip) 8771 ID = Intrinsic::ceil; 8772 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz || 8773 BuiltinID == PPC::BI__builtin_vsx_xvrspiz) 8774 ID = Intrinsic::trunc; 8775 llvm::Function *F = CGM.getIntrinsic(ID, ResultType); 8776 return Builder.CreateCall(F, X); 8777 } 8778 8779 // Absolute value 8780 case PPC::BI__builtin_vsx_xvabsdp: 8781 case PPC::BI__builtin_vsx_xvabssp: { 8782 llvm::Type *ResultType = ConvertType(E->getType()); 8783 Value *X = EmitScalarExpr(E->getArg(0)); 8784 llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); 8785 return Builder.CreateCall(F, X); 8786 } 8787 8788 // FMA variations 8789 case PPC::BI__builtin_vsx_xvmaddadp: 8790 case PPC::BI__builtin_vsx_xvmaddasp: 8791 case PPC::BI__builtin_vsx_xvnmaddadp: 8792 case PPC::BI__builtin_vsx_xvnmaddasp: 8793 case PPC::BI__builtin_vsx_xvmsubadp: 8794 case PPC::BI__builtin_vsx_xvmsubasp: 8795 case PPC::BI__builtin_vsx_xvnmsubadp: 8796 case PPC::BI__builtin_vsx_xvnmsubasp: { 8797 llvm::Type *ResultType = ConvertType(E->getType()); 8798 Value *X = EmitScalarExpr(E->getArg(0)); 8799 Value *Y = EmitScalarExpr(E->getArg(1)); 8800 Value *Z = EmitScalarExpr(E->getArg(2)); 8801 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); 8802 llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 8803 switch (BuiltinID) { 8804 case PPC::BI__builtin_vsx_xvmaddadp: 8805 case PPC::BI__builtin_vsx_xvmaddasp: 8806 return Builder.CreateCall(F, {X, Y, Z}); 8807 case PPC::BI__builtin_vsx_xvnmaddadp: 8808 case PPC::BI__builtin_vsx_xvnmaddasp: 8809 return Builder.CreateFSub(Zero, 8810 Builder.CreateCall(F, {X, Y, Z}), "sub"); 8811 case PPC::BI__builtin_vsx_xvmsubadp: 8812 case PPC::BI__builtin_vsx_xvmsubasp: 8813 return Builder.CreateCall(F, 8814 {X, Y, Builder.CreateFSub(Zero, Z, "sub")}); 8815 case PPC::BI__builtin_vsx_xvnmsubadp: 8816 case PPC::BI__builtin_vsx_xvnmsubasp: 8817 Value *FsubRes = 8818 Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")}); 8819 return Builder.CreateFSub(Zero, FsubRes, "sub"); 8820 } 8821 llvm_unreachable("Unknown FMA operation"); 8822 return nullptr; // Suppress no-return warning 8823 } 8824 8825 case PPC::BI__builtin_vsx_insertword: { 8826 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw); 8827 8828 // Third argument is a compile time constant int. It must be clamped to 8829 // to the range [0, 12]. 8830 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]); 8831 assert(ArgCI && 8832 "Third arg to xxinsertw intrinsic must be constant integer"); 8833 const int64_t MaxIndex = 12; 8834 int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex); 8835 8836 // The builtin semantics don't exactly match the xxinsertw instructions 8837 // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the 8838 // word from the first argument, and inserts it in the second argument. The 8839 // instruction extracts the word from its second input register and inserts 8840 // it into its first input register, so swap the first and second arguments. 8841 std::swap(Ops[0], Ops[1]); 8842 8843 // Need to cast the second argument from a vector of unsigned int to a 8844 // vector of long long. 8845 Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2)); 8846 8847 if (getTarget().isLittleEndian()) { 8848 // Create a shuffle mask of (1, 0) 8849 Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1), 8850 ConstantInt::get(Int32Ty, 0) 8851 }; 8852 Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); 8853 8854 // Reverse the double words in the vector we will extract from. 8855 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); 8856 Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ShuffleMask); 8857 8858 // Reverse the index. 8859 Index = MaxIndex - Index; 8860 } 8861 8862 // Intrinsic expects the first arg to be a vector of int. 8863 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4)); 8864 Ops[2] = ConstantInt::getSigned(Int32Ty, Index); 8865 return Builder.CreateCall(F, Ops); 8866 } 8867 8868 case PPC::BI__builtin_vsx_extractuword: { 8869 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw); 8870 8871 // Intrinsic expects the first argument to be a vector of doublewords. 8872 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); 8873 8874 // The second argument is a compile time constant int that needs to 8875 // be clamped to the range [0, 12]. 8876 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[1]); 8877 assert(ArgCI && 8878 "Second Arg to xxextractuw intrinsic must be a constant integer!"); 8879 const int64_t MaxIndex = 12; 8880 int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex); 8881 8882 if (getTarget().isLittleEndian()) { 8883 // Reverse the index. 8884 Index = MaxIndex - Index; 8885 Ops[1] = ConstantInt::getSigned(Int32Ty, Index); 8886 8887 // Emit the call, then reverse the double words of the results vector. 8888 Value *Call = Builder.CreateCall(F, Ops); 8889 8890 // Create a shuffle mask of (1, 0) 8891 Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1), 8892 ConstantInt::get(Int32Ty, 0) 8893 }; 8894 Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); 8895 8896 Value *ShuffleCall = Builder.CreateShuffleVector(Call, Call, ShuffleMask); 8897 return ShuffleCall; 8898 } else { 8899 Ops[1] = ConstantInt::getSigned(Int32Ty, Index); 8900 return Builder.CreateCall(F, Ops); 8901 } 8902 } 8903 8904 case PPC::BI__builtin_vsx_xxpermdi: { 8905 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]); 8906 assert(ArgCI && "Third arg must be constant integer!"); 8907 8908 unsigned Index = ArgCI->getZExtValue(); 8909 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); 8910 Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2)); 8911 8912 // Element zero comes from the first input vector and element one comes from 8913 // the second. The element indices within each vector are numbered in big 8914 // endian order so the shuffle mask must be adjusted for this on little 8915 // endian platforms (i.e. index is complemented and source vector reversed). 8916 unsigned ElemIdx0; 8917 unsigned ElemIdx1; 8918 if (getTarget().isLittleEndian()) { 8919 ElemIdx0 = (~Index & 1) + 2; 8920 ElemIdx1 = (~Index & 2) >> 1; 8921 } else { // BigEndian 8922 ElemIdx0 = (Index & 2) >> 1; 8923 ElemIdx1 = 2 + (Index & 1); 8924 } 8925 8926 Constant *ShuffleElts[2] = {ConstantInt::get(Int32Ty, ElemIdx0), 8927 ConstantInt::get(Int32Ty, ElemIdx1)}; 8928 Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); 8929 8930 Value *ShuffleCall = 8931 Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask); 8932 QualType BIRetType = E->getType(); 8933 auto RetTy = ConvertType(BIRetType); 8934 return Builder.CreateBitCast(ShuffleCall, RetTy); 8935 } 8936 8937 case PPC::BI__builtin_vsx_xxsldwi: { 8938 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]); 8939 assert(ArgCI && "Third argument must be a compile time constant"); 8940 unsigned Index = ArgCI->getZExtValue() & 0x3; 8941 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4)); 8942 Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int32Ty, 4)); 8943 8944 // Create a shuffle mask 8945 unsigned ElemIdx0; 8946 unsigned ElemIdx1; 8947 unsigned ElemIdx2; 8948 unsigned ElemIdx3; 8949 if (getTarget().isLittleEndian()) { 8950 // Little endian element N comes from element 8+N-Index of the 8951 // concatenated wide vector (of course, using modulo arithmetic on 8952 // the total number of elements). 8953 ElemIdx0 = (8 - Index) % 8; 8954 ElemIdx1 = (9 - Index) % 8; 8955 ElemIdx2 = (10 - Index) % 8; 8956 ElemIdx3 = (11 - Index) % 8; 8957 } else { 8958 // Big endian ElemIdx<N> = Index + N 8959 ElemIdx0 = Index; 8960 ElemIdx1 = Index + 1; 8961 ElemIdx2 = Index + 2; 8962 ElemIdx3 = Index + 3; 8963 } 8964 8965 Constant *ShuffleElts[4] = {ConstantInt::get(Int32Ty, ElemIdx0), 8966 ConstantInt::get(Int32Ty, ElemIdx1), 8967 ConstantInt::get(Int32Ty, ElemIdx2), 8968 ConstantInt::get(Int32Ty, ElemIdx3)}; 8969 8970 Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); 8971 Value *ShuffleCall = 8972 Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask); 8973 QualType BIRetType = E->getType(); 8974 auto RetTy = ConvertType(BIRetType); 8975 return Builder.CreateBitCast(ShuffleCall, RetTy); 8976 } 8977 } 8978 } 8979 8980 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, 8981 const CallExpr *E) { 8982 switch (BuiltinID) { 8983 case AMDGPU::BI__builtin_amdgcn_div_scale: 8984 case AMDGPU::BI__builtin_amdgcn_div_scalef: { 8985 // Translate from the intrinsics's struct return to the builtin's out 8986 // argument. 8987 8988 Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3)); 8989 8990 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 8991 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 8992 llvm::Value *Z = EmitScalarExpr(E->getArg(2)); 8993 8994 llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale, 8995 X->getType()); 8996 8997 llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z}); 8998 8999 llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0); 9000 llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1); 9001 9002 llvm::Type *RealFlagType 9003 = FlagOutPtr.getPointer()->getType()->getPointerElementType(); 9004 9005 llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType); 9006 Builder.CreateStore(FlagExt, FlagOutPtr); 9007 return Result; 9008 } 9009 case AMDGPU::BI__builtin_amdgcn_div_fmas: 9010 case AMDGPU::BI__builtin_amdgcn_div_fmasf: { 9011 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); 9012 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); 9013 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); 9014 llvm::Value *Src3 = EmitScalarExpr(E->getArg(3)); 9015 9016 llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas, 9017 Src0->getType()); 9018 llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3); 9019 return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool}); 9020 } 9021 9022 case AMDGPU::BI__builtin_amdgcn_ds_swizzle: 9023 return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle); 9024 case AMDGPU::BI__builtin_amdgcn_mov_dpp: { 9025 llvm::SmallVector<llvm::Value *, 5> Args; 9026 for (unsigned I = 0; I != 5; ++I) 9027 Args.push_back(EmitScalarExpr(E->getArg(I))); 9028 Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_mov_dpp, 9029 Args[0]->getType()); 9030 return Builder.CreateCall(F, Args); 9031 } 9032 case AMDGPU::BI__builtin_amdgcn_div_fixup: 9033 case AMDGPU::BI__builtin_amdgcn_div_fixupf: 9034 case AMDGPU::BI__builtin_amdgcn_div_fixuph: 9035 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup); 9036 case AMDGPU::BI__builtin_amdgcn_trig_preop: 9037 case AMDGPU::BI__builtin_amdgcn_trig_preopf: 9038 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop); 9039 case AMDGPU::BI__builtin_amdgcn_rcp: 9040 case AMDGPU::BI__builtin_amdgcn_rcpf: 9041 case AMDGPU::BI__builtin_amdgcn_rcph: 9042 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp); 9043 case AMDGPU::BI__builtin_amdgcn_rsq: 9044 case AMDGPU::BI__builtin_amdgcn_rsqf: 9045 case AMDGPU::BI__builtin_amdgcn_rsqh: 9046 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq); 9047 case AMDGPU::BI__builtin_amdgcn_rsq_clamp: 9048 case AMDGPU::BI__builtin_amdgcn_rsq_clampf: 9049 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp); 9050 case AMDGPU::BI__builtin_amdgcn_sinf: 9051 case AMDGPU::BI__builtin_amdgcn_sinh: 9052 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin); 9053 case AMDGPU::BI__builtin_amdgcn_cosf: 9054 case AMDGPU::BI__builtin_amdgcn_cosh: 9055 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos); 9056 case AMDGPU::BI__builtin_amdgcn_log_clampf: 9057 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp); 9058 case AMDGPU::BI__builtin_amdgcn_ldexp: 9059 case AMDGPU::BI__builtin_amdgcn_ldexpf: 9060 case AMDGPU::BI__builtin_amdgcn_ldexph: 9061 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp); 9062 case AMDGPU::BI__builtin_amdgcn_frexp_mant: 9063 case AMDGPU::BI__builtin_amdgcn_frexp_mantf: 9064 case AMDGPU::BI__builtin_amdgcn_frexp_manth: 9065 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant); 9066 case AMDGPU::BI__builtin_amdgcn_frexp_exp: 9067 case AMDGPU::BI__builtin_amdgcn_frexp_expf: { 9068 Value *Src0 = EmitScalarExpr(E->getArg(0)); 9069 Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp, 9070 { Builder.getInt32Ty(), Src0->getType() }); 9071 return Builder.CreateCall(F, Src0); 9072 } 9073 case AMDGPU::BI__builtin_amdgcn_frexp_exph: { 9074 Value *Src0 = EmitScalarExpr(E->getArg(0)); 9075 Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp, 9076 { Builder.getInt16Ty(), Src0->getType() }); 9077 return Builder.CreateCall(F, Src0); 9078 } 9079 case AMDGPU::BI__builtin_amdgcn_fract: 9080 case AMDGPU::BI__builtin_amdgcn_fractf: 9081 case AMDGPU::BI__builtin_amdgcn_fracth: 9082 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract); 9083 case AMDGPU::BI__builtin_amdgcn_lerp: 9084 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp); 9085 case AMDGPU::BI__builtin_amdgcn_uicmp: 9086 case AMDGPU::BI__builtin_amdgcn_uicmpl: 9087 case AMDGPU::BI__builtin_amdgcn_sicmp: 9088 case AMDGPU::BI__builtin_amdgcn_sicmpl: 9089 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_icmp); 9090 case AMDGPU::BI__builtin_amdgcn_fcmp: 9091 case AMDGPU::BI__builtin_amdgcn_fcmpf: 9092 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fcmp); 9093 case AMDGPU::BI__builtin_amdgcn_class: 9094 case AMDGPU::BI__builtin_amdgcn_classf: 9095 case AMDGPU::BI__builtin_amdgcn_classh: 9096 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class); 9097 case AMDGPU::BI__builtin_amdgcn_fmed3f: 9098 case AMDGPU::BI__builtin_amdgcn_fmed3h: 9099 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3); 9100 case AMDGPU::BI__builtin_amdgcn_read_exec: { 9101 CallInst *CI = cast<CallInst>( 9102 EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec")); 9103 CI->setConvergent(); 9104 return CI; 9105 } 9106 9107 // amdgcn workitem 9108 case AMDGPU::BI__builtin_amdgcn_workitem_id_x: 9109 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024); 9110 case AMDGPU::BI__builtin_amdgcn_workitem_id_y: 9111 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024); 9112 case AMDGPU::BI__builtin_amdgcn_workitem_id_z: 9113 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024); 9114 9115 // r600 intrinsics 9116 case AMDGPU::BI__builtin_r600_recipsqrt_ieee: 9117 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef: 9118 return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee); 9119 case AMDGPU::BI__builtin_r600_read_tidig_x: 9120 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024); 9121 case AMDGPU::BI__builtin_r600_read_tidig_y: 9122 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024); 9123 case AMDGPU::BI__builtin_r600_read_tidig_z: 9124 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024); 9125 default: 9126 return nullptr; 9127 } 9128 } 9129 9130 /// Handle a SystemZ function in which the final argument is a pointer 9131 /// to an int that receives the post-instruction CC value. At the LLVM level 9132 /// this is represented as a function that returns a {result, cc} pair. 9133 static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF, 9134 unsigned IntrinsicID, 9135 const CallExpr *E) { 9136 unsigned NumArgs = E->getNumArgs() - 1; 9137 SmallVector<Value *, 8> Args(NumArgs); 9138 for (unsigned I = 0; I < NumArgs; ++I) 9139 Args[I] = CGF.EmitScalarExpr(E->getArg(I)); 9140 Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs)); 9141 Value *F = CGF.CGM.getIntrinsic(IntrinsicID); 9142 Value *Call = CGF.Builder.CreateCall(F, Args); 9143 Value *CC = CGF.Builder.CreateExtractValue(Call, 1); 9144 CGF.Builder.CreateStore(CC, CCPtr); 9145 return CGF.Builder.CreateExtractValue(Call, 0); 9146 } 9147 9148 Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, 9149 const CallExpr *E) { 9150 switch (BuiltinID) { 9151 case SystemZ::BI__builtin_tbegin: { 9152 Value *TDB = EmitScalarExpr(E->getArg(0)); 9153 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c); 9154 Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin); 9155 return Builder.CreateCall(F, {TDB, Control}); 9156 } 9157 case SystemZ::BI__builtin_tbegin_nofloat: { 9158 Value *TDB = EmitScalarExpr(E->getArg(0)); 9159 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c); 9160 Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat); 9161 return Builder.CreateCall(F, {TDB, Control}); 9162 } 9163 case SystemZ::BI__builtin_tbeginc: { 9164 Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy); 9165 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08); 9166 Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc); 9167 return Builder.CreateCall(F, {TDB, Control}); 9168 } 9169 case SystemZ::BI__builtin_tabort: { 9170 Value *Data = EmitScalarExpr(E->getArg(0)); 9171 Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort); 9172 return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort")); 9173 } 9174 case SystemZ::BI__builtin_non_tx_store: { 9175 Value *Address = EmitScalarExpr(E->getArg(0)); 9176 Value *Data = EmitScalarExpr(E->getArg(1)); 9177 Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg); 9178 return Builder.CreateCall(F, {Data, Address}); 9179 } 9180 9181 // Vector builtins. Note that most vector builtins are mapped automatically 9182 // to target-specific LLVM intrinsics. The ones handled specially here can 9183 // be represented via standard LLVM IR, which is preferable to enable common 9184 // LLVM optimizations. 9185 9186 case SystemZ::BI__builtin_s390_vpopctb: 9187 case SystemZ::BI__builtin_s390_vpopcth: 9188 case SystemZ::BI__builtin_s390_vpopctf: 9189 case SystemZ::BI__builtin_s390_vpopctg: { 9190 llvm::Type *ResultType = ConvertType(E->getType()); 9191 Value *X = EmitScalarExpr(E->getArg(0)); 9192 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); 9193 return Builder.CreateCall(F, X); 9194 } 9195 9196 case SystemZ::BI__builtin_s390_vclzb: 9197 case SystemZ::BI__builtin_s390_vclzh: 9198 case SystemZ::BI__builtin_s390_vclzf: 9199 case SystemZ::BI__builtin_s390_vclzg: { 9200 llvm::Type *ResultType = ConvertType(E->getType()); 9201 Value *X = EmitScalarExpr(E->getArg(0)); 9202 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 9203 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType); 9204 return Builder.CreateCall(F, {X, Undef}); 9205 } 9206 9207 case SystemZ::BI__builtin_s390_vctzb: 9208 case SystemZ::BI__builtin_s390_vctzh: 9209 case SystemZ::BI__builtin_s390_vctzf: 9210 case SystemZ::BI__builtin_s390_vctzg: { 9211 llvm::Type *ResultType = ConvertType(E->getType()); 9212 Value *X = EmitScalarExpr(E->getArg(0)); 9213 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 9214 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType); 9215 return Builder.CreateCall(F, {X, Undef}); 9216 } 9217 9218 case SystemZ::BI__builtin_s390_vfsqsb: 9219 case SystemZ::BI__builtin_s390_vfsqdb: { 9220 llvm::Type *ResultType = ConvertType(E->getType()); 9221 Value *X = EmitScalarExpr(E->getArg(0)); 9222 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType); 9223 return Builder.CreateCall(F, X); 9224 } 9225 case SystemZ::BI__builtin_s390_vfmasb: 9226 case SystemZ::BI__builtin_s390_vfmadb: { 9227 llvm::Type *ResultType = ConvertType(E->getType()); 9228 Value *X = EmitScalarExpr(E->getArg(0)); 9229 Value *Y = EmitScalarExpr(E->getArg(1)); 9230 Value *Z = EmitScalarExpr(E->getArg(2)); 9231 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 9232 return Builder.CreateCall(F, {X, Y, Z}); 9233 } 9234 case SystemZ::BI__builtin_s390_vfmssb: 9235 case SystemZ::BI__builtin_s390_vfmsdb: { 9236 llvm::Type *ResultType = ConvertType(E->getType()); 9237 Value *X = EmitScalarExpr(E->getArg(0)); 9238 Value *Y = EmitScalarExpr(E->getArg(1)); 9239 Value *Z = EmitScalarExpr(E->getArg(2)); 9240 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); 9241 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 9242 return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")}); 9243 } 9244 case SystemZ::BI__builtin_s390_vfnmasb: 9245 case SystemZ::BI__builtin_s390_vfnmadb: { 9246 llvm::Type *ResultType = ConvertType(E->getType()); 9247 Value *X = EmitScalarExpr(E->getArg(0)); 9248 Value *Y = EmitScalarExpr(E->getArg(1)); 9249 Value *Z = EmitScalarExpr(E->getArg(2)); 9250 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); 9251 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 9252 return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, Z}), "sub"); 9253 } 9254 case SystemZ::BI__builtin_s390_vfnmssb: 9255 case SystemZ::BI__builtin_s390_vfnmsdb: { 9256 llvm::Type *ResultType = ConvertType(E->getType()); 9257 Value *X = EmitScalarExpr(E->getArg(0)); 9258 Value *Y = EmitScalarExpr(E->getArg(1)); 9259 Value *Z = EmitScalarExpr(E->getArg(2)); 9260 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); 9261 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 9262 Value *NegZ = Builder.CreateFSub(Zero, Z, "sub"); 9263 return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, NegZ})); 9264 } 9265 case SystemZ::BI__builtin_s390_vflpsb: 9266 case SystemZ::BI__builtin_s390_vflpdb: { 9267 llvm::Type *ResultType = ConvertType(E->getType()); 9268 Value *X = EmitScalarExpr(E->getArg(0)); 9269 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); 9270 return Builder.CreateCall(F, X); 9271 } 9272 case SystemZ::BI__builtin_s390_vflnsb: 9273 case SystemZ::BI__builtin_s390_vflndb: { 9274 llvm::Type *ResultType = ConvertType(E->getType()); 9275 Value *X = EmitScalarExpr(E->getArg(0)); 9276 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); 9277 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); 9278 return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub"); 9279 } 9280 case SystemZ::BI__builtin_s390_vfisb: 9281 case SystemZ::BI__builtin_s390_vfidb: { 9282 llvm::Type *ResultType = ConvertType(E->getType()); 9283 Value *X = EmitScalarExpr(E->getArg(0)); 9284 // Constant-fold the M4 and M5 mask arguments. 9285 llvm::APSInt M4, M5; 9286 bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext()); 9287 bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext()); 9288 assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?"); 9289 (void)IsConstM4; (void)IsConstM5; 9290 // Check whether this instance can be represented via a LLVM standard 9291 // intrinsic. We only support some combinations of M4 and M5. 9292 Intrinsic::ID ID = Intrinsic::not_intrinsic; 9293 switch (M4.getZExtValue()) { 9294 default: break; 9295 case 0: // IEEE-inexact exception allowed 9296 switch (M5.getZExtValue()) { 9297 default: break; 9298 case 0: ID = Intrinsic::rint; break; 9299 } 9300 break; 9301 case 4: // IEEE-inexact exception suppressed 9302 switch (M5.getZExtValue()) { 9303 default: break; 9304 case 0: ID = Intrinsic::nearbyint; break; 9305 case 1: ID = Intrinsic::round; break; 9306 case 5: ID = Intrinsic::trunc; break; 9307 case 6: ID = Intrinsic::ceil; break; 9308 case 7: ID = Intrinsic::floor; break; 9309 } 9310 break; 9311 } 9312 if (ID != Intrinsic::not_intrinsic) { 9313 Function *F = CGM.getIntrinsic(ID, ResultType); 9314 return Builder.CreateCall(F, X); 9315 } 9316 switch (BuiltinID) { 9317 case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break; 9318 case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break; 9319 default: llvm_unreachable("Unknown BuiltinID"); 9320 } 9321 Function *F = CGM.getIntrinsic(ID); 9322 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4); 9323 Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5); 9324 return Builder.CreateCall(F, {X, M4Value, M5Value}); 9325 } 9326 case SystemZ::BI__builtin_s390_vfmaxsb: 9327 case SystemZ::BI__builtin_s390_vfmaxdb: { 9328 llvm::Type *ResultType = ConvertType(E->getType()); 9329 Value *X = EmitScalarExpr(E->getArg(0)); 9330 Value *Y = EmitScalarExpr(E->getArg(1)); 9331 // Constant-fold the M4 mask argument. 9332 llvm::APSInt M4; 9333 bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext()); 9334 assert(IsConstM4 && "Constant arg isn't actually constant?"); 9335 (void)IsConstM4; 9336 // Check whether this instance can be represented via a LLVM standard 9337 // intrinsic. We only support some values of M4. 9338 Intrinsic::ID ID = Intrinsic::not_intrinsic; 9339 switch (M4.getZExtValue()) { 9340 default: break; 9341 case 4: ID = Intrinsic::maxnum; break; 9342 } 9343 if (ID != Intrinsic::not_intrinsic) { 9344 Function *F = CGM.getIntrinsic(ID, ResultType); 9345 return Builder.CreateCall(F, {X, Y}); 9346 } 9347 switch (BuiltinID) { 9348 case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break; 9349 case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break; 9350 default: llvm_unreachable("Unknown BuiltinID"); 9351 } 9352 Function *F = CGM.getIntrinsic(ID); 9353 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4); 9354 return Builder.CreateCall(F, {X, Y, M4Value}); 9355 } 9356 case SystemZ::BI__builtin_s390_vfminsb: 9357 case SystemZ::BI__builtin_s390_vfmindb: { 9358 llvm::Type *ResultType = ConvertType(E->getType()); 9359 Value *X = EmitScalarExpr(E->getArg(0)); 9360 Value *Y = EmitScalarExpr(E->getArg(1)); 9361 // Constant-fold the M4 mask argument. 9362 llvm::APSInt M4; 9363 bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext()); 9364 assert(IsConstM4 && "Constant arg isn't actually constant?"); 9365 (void)IsConstM4; 9366 // Check whether this instance can be represented via a LLVM standard 9367 // intrinsic. We only support some values of M4. 9368 Intrinsic::ID ID = Intrinsic::not_intrinsic; 9369 switch (M4.getZExtValue()) { 9370 default: break; 9371 case 4: ID = Intrinsic::minnum; break; 9372 } 9373 if (ID != Intrinsic::not_intrinsic) { 9374 Function *F = CGM.getIntrinsic(ID, ResultType); 9375 return Builder.CreateCall(F, {X, Y}); 9376 } 9377 switch (BuiltinID) { 9378 case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break; 9379 case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break; 9380 default: llvm_unreachable("Unknown BuiltinID"); 9381 } 9382 Function *F = CGM.getIntrinsic(ID); 9383 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4); 9384 return Builder.CreateCall(F, {X, Y, M4Value}); 9385 } 9386 9387 // Vector intrisincs that output the post-instruction CC value. 9388 9389 #define INTRINSIC_WITH_CC(NAME) \ 9390 case SystemZ::BI__builtin_##NAME: \ 9391 return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E) 9392 9393 INTRINSIC_WITH_CC(s390_vpkshs); 9394 INTRINSIC_WITH_CC(s390_vpksfs); 9395 INTRINSIC_WITH_CC(s390_vpksgs); 9396 9397 INTRINSIC_WITH_CC(s390_vpklshs); 9398 INTRINSIC_WITH_CC(s390_vpklsfs); 9399 INTRINSIC_WITH_CC(s390_vpklsgs); 9400 9401 INTRINSIC_WITH_CC(s390_vceqbs); 9402 INTRINSIC_WITH_CC(s390_vceqhs); 9403 INTRINSIC_WITH_CC(s390_vceqfs); 9404 INTRINSIC_WITH_CC(s390_vceqgs); 9405 9406 INTRINSIC_WITH_CC(s390_vchbs); 9407 INTRINSIC_WITH_CC(s390_vchhs); 9408 INTRINSIC_WITH_CC(s390_vchfs); 9409 INTRINSIC_WITH_CC(s390_vchgs); 9410 9411 INTRINSIC_WITH_CC(s390_vchlbs); 9412 INTRINSIC_WITH_CC(s390_vchlhs); 9413 INTRINSIC_WITH_CC(s390_vchlfs); 9414 INTRINSIC_WITH_CC(s390_vchlgs); 9415 9416 INTRINSIC_WITH_CC(s390_vfaebs); 9417 INTRINSIC_WITH_CC(s390_vfaehs); 9418 INTRINSIC_WITH_CC(s390_vfaefs); 9419 9420 INTRINSIC_WITH_CC(s390_vfaezbs); 9421 INTRINSIC_WITH_CC(s390_vfaezhs); 9422 INTRINSIC_WITH_CC(s390_vfaezfs); 9423 9424 INTRINSIC_WITH_CC(s390_vfeebs); 9425 INTRINSIC_WITH_CC(s390_vfeehs); 9426 INTRINSIC_WITH_CC(s390_vfeefs); 9427 9428 INTRINSIC_WITH_CC(s390_vfeezbs); 9429 INTRINSIC_WITH_CC(s390_vfeezhs); 9430 INTRINSIC_WITH_CC(s390_vfeezfs); 9431 9432 INTRINSIC_WITH_CC(s390_vfenebs); 9433 INTRINSIC_WITH_CC(s390_vfenehs); 9434 INTRINSIC_WITH_CC(s390_vfenefs); 9435 9436 INTRINSIC_WITH_CC(s390_vfenezbs); 9437 INTRINSIC_WITH_CC(s390_vfenezhs); 9438 INTRINSIC_WITH_CC(s390_vfenezfs); 9439 9440 INTRINSIC_WITH_CC(s390_vistrbs); 9441 INTRINSIC_WITH_CC(s390_vistrhs); 9442 INTRINSIC_WITH_CC(s390_vistrfs); 9443 9444 INTRINSIC_WITH_CC(s390_vstrcbs); 9445 INTRINSIC_WITH_CC(s390_vstrchs); 9446 INTRINSIC_WITH_CC(s390_vstrcfs); 9447 9448 INTRINSIC_WITH_CC(s390_vstrczbs); 9449 INTRINSIC_WITH_CC(s390_vstrczhs); 9450 INTRINSIC_WITH_CC(s390_vstrczfs); 9451 9452 INTRINSIC_WITH_CC(s390_vfcesbs); 9453 INTRINSIC_WITH_CC(s390_vfcedbs); 9454 INTRINSIC_WITH_CC(s390_vfchsbs); 9455 INTRINSIC_WITH_CC(s390_vfchdbs); 9456 INTRINSIC_WITH_CC(s390_vfchesbs); 9457 INTRINSIC_WITH_CC(s390_vfchedbs); 9458 9459 INTRINSIC_WITH_CC(s390_vftcisb); 9460 INTRINSIC_WITH_CC(s390_vftcidb); 9461 9462 #undef INTRINSIC_WITH_CC 9463 9464 default: 9465 return nullptr; 9466 } 9467 } 9468 9469 Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, 9470 const CallExpr *E) { 9471 auto MakeLdg = [&](unsigned IntrinsicID) { 9472 Value *Ptr = EmitScalarExpr(E->getArg(0)); 9473 clang::CharUnits Align = 9474 getNaturalPointeeTypeAlignment(E->getArg(0)->getType()); 9475 return Builder.CreateCall( 9476 CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(), 9477 Ptr->getType()}), 9478 {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())}); 9479 }; 9480 auto MakeScopedAtomic = [&](unsigned IntrinsicID) { 9481 Value *Ptr = EmitScalarExpr(E->getArg(0)); 9482 return Builder.CreateCall( 9483 CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(), 9484 Ptr->getType()}), 9485 {Ptr, EmitScalarExpr(E->getArg(1))}); 9486 }; 9487 switch (BuiltinID) { 9488 case NVPTX::BI__nvvm_atom_add_gen_i: 9489 case NVPTX::BI__nvvm_atom_add_gen_l: 9490 case NVPTX::BI__nvvm_atom_add_gen_ll: 9491 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E); 9492 9493 case NVPTX::BI__nvvm_atom_sub_gen_i: 9494 case NVPTX::BI__nvvm_atom_sub_gen_l: 9495 case NVPTX::BI__nvvm_atom_sub_gen_ll: 9496 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E); 9497 9498 case NVPTX::BI__nvvm_atom_and_gen_i: 9499 case NVPTX::BI__nvvm_atom_and_gen_l: 9500 case NVPTX::BI__nvvm_atom_and_gen_ll: 9501 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E); 9502 9503 case NVPTX::BI__nvvm_atom_or_gen_i: 9504 case NVPTX::BI__nvvm_atom_or_gen_l: 9505 case NVPTX::BI__nvvm_atom_or_gen_ll: 9506 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E); 9507 9508 case NVPTX::BI__nvvm_atom_xor_gen_i: 9509 case NVPTX::BI__nvvm_atom_xor_gen_l: 9510 case NVPTX::BI__nvvm_atom_xor_gen_ll: 9511 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E); 9512 9513 case NVPTX::BI__nvvm_atom_xchg_gen_i: 9514 case NVPTX::BI__nvvm_atom_xchg_gen_l: 9515 case NVPTX::BI__nvvm_atom_xchg_gen_ll: 9516 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E); 9517 9518 case NVPTX::BI__nvvm_atom_max_gen_i: 9519 case NVPTX::BI__nvvm_atom_max_gen_l: 9520 case NVPTX::BI__nvvm_atom_max_gen_ll: 9521 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E); 9522 9523 case NVPTX::BI__nvvm_atom_max_gen_ui: 9524 case NVPTX::BI__nvvm_atom_max_gen_ul: 9525 case NVPTX::BI__nvvm_atom_max_gen_ull: 9526 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E); 9527 9528 case NVPTX::BI__nvvm_atom_min_gen_i: 9529 case NVPTX::BI__nvvm_atom_min_gen_l: 9530 case NVPTX::BI__nvvm_atom_min_gen_ll: 9531 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E); 9532 9533 case NVPTX::BI__nvvm_atom_min_gen_ui: 9534 case NVPTX::BI__nvvm_atom_min_gen_ul: 9535 case NVPTX::BI__nvvm_atom_min_gen_ull: 9536 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E); 9537 9538 case NVPTX::BI__nvvm_atom_cas_gen_i: 9539 case NVPTX::BI__nvvm_atom_cas_gen_l: 9540 case NVPTX::BI__nvvm_atom_cas_gen_ll: 9541 // __nvvm_atom_cas_gen_* should return the old value rather than the 9542 // success flag. 9543 return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false); 9544 9545 case NVPTX::BI__nvvm_atom_add_gen_f: { 9546 Value *Ptr = EmitScalarExpr(E->getArg(0)); 9547 Value *Val = EmitScalarExpr(E->getArg(1)); 9548 // atomicrmw only deals with integer arguments so we need to use 9549 // LLVM's nvvm_atomic_load_add_f32 intrinsic for that. 9550 Value *FnALAF32 = 9551 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType()); 9552 return Builder.CreateCall(FnALAF32, {Ptr, Val}); 9553 } 9554 9555 case NVPTX::BI__nvvm_atom_inc_gen_ui: { 9556 Value *Ptr = EmitScalarExpr(E->getArg(0)); 9557 Value *Val = EmitScalarExpr(E->getArg(1)); 9558 Value *FnALI32 = 9559 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType()); 9560 return Builder.CreateCall(FnALI32, {Ptr, Val}); 9561 } 9562 9563 case NVPTX::BI__nvvm_atom_dec_gen_ui: { 9564 Value *Ptr = EmitScalarExpr(E->getArg(0)); 9565 Value *Val = EmitScalarExpr(E->getArg(1)); 9566 Value *FnALD32 = 9567 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType()); 9568 return Builder.CreateCall(FnALD32, {Ptr, Val}); 9569 } 9570 9571 case NVPTX::BI__nvvm_ldg_c: 9572 case NVPTX::BI__nvvm_ldg_c2: 9573 case NVPTX::BI__nvvm_ldg_c4: 9574 case NVPTX::BI__nvvm_ldg_s: 9575 case NVPTX::BI__nvvm_ldg_s2: 9576 case NVPTX::BI__nvvm_ldg_s4: 9577 case NVPTX::BI__nvvm_ldg_i: 9578 case NVPTX::BI__nvvm_ldg_i2: 9579 case NVPTX::BI__nvvm_ldg_i4: 9580 case NVPTX::BI__nvvm_ldg_l: 9581 case NVPTX::BI__nvvm_ldg_ll: 9582 case NVPTX::BI__nvvm_ldg_ll2: 9583 case NVPTX::BI__nvvm_ldg_uc: 9584 case NVPTX::BI__nvvm_ldg_uc2: 9585 case NVPTX::BI__nvvm_ldg_uc4: 9586 case NVPTX::BI__nvvm_ldg_us: 9587 case NVPTX::BI__nvvm_ldg_us2: 9588 case NVPTX::BI__nvvm_ldg_us4: 9589 case NVPTX::BI__nvvm_ldg_ui: 9590 case NVPTX::BI__nvvm_ldg_ui2: 9591 case NVPTX::BI__nvvm_ldg_ui4: 9592 case NVPTX::BI__nvvm_ldg_ul: 9593 case NVPTX::BI__nvvm_ldg_ull: 9594 case NVPTX::BI__nvvm_ldg_ull2: 9595 // PTX Interoperability section 2.2: "For a vector with an even number of 9596 // elements, its alignment is set to number of elements times the alignment 9597 // of its member: n*alignof(t)." 9598 return MakeLdg(Intrinsic::nvvm_ldg_global_i); 9599 case NVPTX::BI__nvvm_ldg_f: 9600 case NVPTX::BI__nvvm_ldg_f2: 9601 case NVPTX::BI__nvvm_ldg_f4: 9602 case NVPTX::BI__nvvm_ldg_d: 9603 case NVPTX::BI__nvvm_ldg_d2: 9604 return MakeLdg(Intrinsic::nvvm_ldg_global_f); 9605 9606 case NVPTX::BI__nvvm_atom_cta_add_gen_i: 9607 case NVPTX::BI__nvvm_atom_cta_add_gen_l: 9608 case NVPTX::BI__nvvm_atom_cta_add_gen_ll: 9609 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta); 9610 case NVPTX::BI__nvvm_atom_sys_add_gen_i: 9611 case NVPTX::BI__nvvm_atom_sys_add_gen_l: 9612 case NVPTX::BI__nvvm_atom_sys_add_gen_ll: 9613 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys); 9614 case NVPTX::BI__nvvm_atom_cta_add_gen_f: 9615 case NVPTX::BI__nvvm_atom_cta_add_gen_d: 9616 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta); 9617 case NVPTX::BI__nvvm_atom_sys_add_gen_f: 9618 case NVPTX::BI__nvvm_atom_sys_add_gen_d: 9619 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys); 9620 case NVPTX::BI__nvvm_atom_cta_xchg_gen_i: 9621 case NVPTX::BI__nvvm_atom_cta_xchg_gen_l: 9622 case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll: 9623 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta); 9624 case NVPTX::BI__nvvm_atom_sys_xchg_gen_i: 9625 case NVPTX::BI__nvvm_atom_sys_xchg_gen_l: 9626 case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll: 9627 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys); 9628 case NVPTX::BI__nvvm_atom_cta_max_gen_i: 9629 case NVPTX::BI__nvvm_atom_cta_max_gen_ui: 9630 case NVPTX::BI__nvvm_atom_cta_max_gen_l: 9631 case NVPTX::BI__nvvm_atom_cta_max_gen_ul: 9632 case NVPTX::BI__nvvm_atom_cta_max_gen_ll: 9633 case NVPTX::BI__nvvm_atom_cta_max_gen_ull: 9634 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta); 9635 case NVPTX::BI__nvvm_atom_sys_max_gen_i: 9636 case NVPTX::BI__nvvm_atom_sys_max_gen_ui: 9637 case NVPTX::BI__nvvm_atom_sys_max_gen_l: 9638 case NVPTX::BI__nvvm_atom_sys_max_gen_ul: 9639 case NVPTX::BI__nvvm_atom_sys_max_gen_ll: 9640 case NVPTX::BI__nvvm_atom_sys_max_gen_ull: 9641 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys); 9642 case NVPTX::BI__nvvm_atom_cta_min_gen_i: 9643 case NVPTX::BI__nvvm_atom_cta_min_gen_ui: 9644 case NVPTX::BI__nvvm_atom_cta_min_gen_l: 9645 case NVPTX::BI__nvvm_atom_cta_min_gen_ul: 9646 case NVPTX::BI__nvvm_atom_cta_min_gen_ll: 9647 case NVPTX::BI__nvvm_atom_cta_min_gen_ull: 9648 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta); 9649 case NVPTX::BI__nvvm_atom_sys_min_gen_i: 9650 case NVPTX::BI__nvvm_atom_sys_min_gen_ui: 9651 case NVPTX::BI__nvvm_atom_sys_min_gen_l: 9652 case NVPTX::BI__nvvm_atom_sys_min_gen_ul: 9653 case NVPTX::BI__nvvm_atom_sys_min_gen_ll: 9654 case NVPTX::BI__nvvm_atom_sys_min_gen_ull: 9655 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys); 9656 case NVPTX::BI__nvvm_atom_cta_inc_gen_ui: 9657 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta); 9658 case NVPTX::BI__nvvm_atom_cta_dec_gen_ui: 9659 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta); 9660 case NVPTX::BI__nvvm_atom_sys_inc_gen_ui: 9661 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys); 9662 case NVPTX::BI__nvvm_atom_sys_dec_gen_ui: 9663 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys); 9664 case NVPTX::BI__nvvm_atom_cta_and_gen_i: 9665 case NVPTX::BI__nvvm_atom_cta_and_gen_l: 9666 case NVPTX::BI__nvvm_atom_cta_and_gen_ll: 9667 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta); 9668 case NVPTX::BI__nvvm_atom_sys_and_gen_i: 9669 case NVPTX::BI__nvvm_atom_sys_and_gen_l: 9670 case NVPTX::BI__nvvm_atom_sys_and_gen_ll: 9671 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys); 9672 case NVPTX::BI__nvvm_atom_cta_or_gen_i: 9673 case NVPTX::BI__nvvm_atom_cta_or_gen_l: 9674 case NVPTX::BI__nvvm_atom_cta_or_gen_ll: 9675 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta); 9676 case NVPTX::BI__nvvm_atom_sys_or_gen_i: 9677 case NVPTX::BI__nvvm_atom_sys_or_gen_l: 9678 case NVPTX::BI__nvvm_atom_sys_or_gen_ll: 9679 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys); 9680 case NVPTX::BI__nvvm_atom_cta_xor_gen_i: 9681 case NVPTX::BI__nvvm_atom_cta_xor_gen_l: 9682 case NVPTX::BI__nvvm_atom_cta_xor_gen_ll: 9683 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta); 9684 case NVPTX::BI__nvvm_atom_sys_xor_gen_i: 9685 case NVPTX::BI__nvvm_atom_sys_xor_gen_l: 9686 case NVPTX::BI__nvvm_atom_sys_xor_gen_ll: 9687 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys); 9688 case NVPTX::BI__nvvm_atom_cta_cas_gen_i: 9689 case NVPTX::BI__nvvm_atom_cta_cas_gen_l: 9690 case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: { 9691 Value *Ptr = EmitScalarExpr(E->getArg(0)); 9692 return Builder.CreateCall( 9693 CGM.getIntrinsic( 9694 Intrinsic::nvvm_atomic_cas_gen_i_cta, 9695 {Ptr->getType()->getPointerElementType(), Ptr->getType()}), 9696 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))}); 9697 } 9698 case NVPTX::BI__nvvm_atom_sys_cas_gen_i: 9699 case NVPTX::BI__nvvm_atom_sys_cas_gen_l: 9700 case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: { 9701 Value *Ptr = EmitScalarExpr(E->getArg(0)); 9702 return Builder.CreateCall( 9703 CGM.getIntrinsic( 9704 Intrinsic::nvvm_atomic_cas_gen_i_sys, 9705 {Ptr->getType()->getPointerElementType(), Ptr->getType()}), 9706 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))}); 9707 } 9708 case NVPTX::BI__nvvm_match_all_sync_i32p: 9709 case NVPTX::BI__nvvm_match_all_sync_i64p: { 9710 Value *Mask = EmitScalarExpr(E->getArg(0)); 9711 Value *Val = EmitScalarExpr(E->getArg(1)); 9712 Address PredOutPtr = EmitPointerWithAlignment(E->getArg(2)); 9713 Value *ResultPair = Builder.CreateCall( 9714 CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p 9715 ? Intrinsic::nvvm_match_all_sync_i32p 9716 : Intrinsic::nvvm_match_all_sync_i64p), 9717 {Mask, Val}); 9718 Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1), 9719 PredOutPtr.getElementType()); 9720 Builder.CreateStore(Pred, PredOutPtr); 9721 return Builder.CreateExtractValue(ResultPair, 0); 9722 } 9723 default: 9724 return nullptr; 9725 } 9726 } 9727 9728 Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, 9729 const CallExpr *E) { 9730 switch (BuiltinID) { 9731 case WebAssembly::BI__builtin_wasm_current_memory: { 9732 llvm::Type *ResultType = ConvertType(E->getType()); 9733 Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType); 9734 return Builder.CreateCall(Callee); 9735 } 9736 case WebAssembly::BI__builtin_wasm_grow_memory: { 9737 Value *X = EmitScalarExpr(E->getArg(0)); 9738 Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_grow_memory, X->getType()); 9739 return Builder.CreateCall(Callee, X); 9740 } 9741 case WebAssembly::BI__builtin_wasm_throw: { 9742 Value *Tag = EmitScalarExpr(E->getArg(0)); 9743 Value *Obj = EmitScalarExpr(E->getArg(1)); 9744 Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw); 9745 return Builder.CreateCall(Callee, {Tag, Obj}); 9746 } 9747 case WebAssembly::BI__builtin_wasm_rethrow: { 9748 Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow); 9749 return Builder.CreateCall(Callee); 9750 } 9751 9752 default: 9753 return nullptr; 9754 } 9755 } 9756