1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit Builtin calls as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CodeGenFunction.h" 15 #include "CGCXXABI.h" 16 #include "CGObjCRuntime.h" 17 #include "CGOpenCLRuntime.h" 18 #include "CodeGenModule.h" 19 #include "TargetInfo.h" 20 #include "clang/AST/ASTContext.h" 21 #include "clang/AST/Decl.h" 22 #include "clang/Basic/TargetBuiltins.h" 23 #include "clang/Basic/TargetInfo.h" 24 #include "clang/CodeGen/CGFunctionInfo.h" 25 #include "llvm/ADT/StringExtras.h" 26 #include "llvm/IR/CallSite.h" 27 #include "llvm/IR/DataLayout.h" 28 #include "llvm/IR/InlineAsm.h" 29 #include "llvm/IR/Intrinsics.h" 30 #include "llvm/IR/MDBuilder.h" 31 #include <sstream> 32 33 using namespace clang; 34 using namespace CodeGen; 35 using namespace llvm; 36 37 /// getBuiltinLibFunction - Given a builtin id for a function like 38 /// "__builtin_fabsf", return a Function* for "fabsf". 39 llvm::Value *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD, 40 unsigned BuiltinID) { 41 assert(Context.BuiltinInfo.isLibFunction(BuiltinID)); 42 43 // Get the name, skip over the __builtin_ prefix (if necessary). 44 StringRef Name; 45 GlobalDecl D(FD); 46 47 // If the builtin has been declared explicitly with an assembler label, 48 // use the mangled name. This differs from the plain label on platforms 49 // that prefix labels. 50 if (FD->hasAttr<AsmLabelAttr>()) 51 Name = getMangledName(D); 52 else 53 Name = Context.BuiltinInfo.getName(BuiltinID) + 10; 54 55 llvm::FunctionType *Ty = 56 cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType())); 57 58 return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false); 59 } 60 61 /// Emit the conversions required to turn the given value into an 62 /// integer of the given size. 63 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V, 64 QualType T, llvm::IntegerType *IntType) { 65 V = CGF.EmitToMemory(V, T); 66 67 if (V->getType()->isPointerTy()) 68 return CGF.Builder.CreatePtrToInt(V, IntType); 69 70 assert(V->getType() == IntType); 71 return V; 72 } 73 74 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, 75 QualType T, llvm::Type *ResultType) { 76 V = CGF.EmitFromMemory(V, T); 77 78 if (ResultType->isPointerTy()) 79 return CGF.Builder.CreateIntToPtr(V, ResultType); 80 81 assert(V->getType() == ResultType); 82 return V; 83 } 84 85 /// Utility to insert an atomic instruction based on Instrinsic::ID 86 /// and the expression node. 87 static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF, 88 llvm::AtomicRMWInst::BinOp Kind, 89 const CallExpr *E) { 90 QualType T = E->getType(); 91 assert(E->getArg(0)->getType()->isPointerType()); 92 assert(CGF.getContext().hasSameUnqualifiedType(T, 93 E->getArg(0)->getType()->getPointeeType())); 94 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); 95 96 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 97 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 98 99 llvm::IntegerType *IntType = 100 llvm::IntegerType::get(CGF.getLLVMContext(), 101 CGF.getContext().getTypeSize(T)); 102 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 103 104 llvm::Value *Args[2]; 105 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 106 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 107 llvm::Type *ValueType = Args[1]->getType(); 108 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 109 110 llvm::Value *Result = CGF.Builder.CreateAtomicRMW( 111 Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent); 112 return EmitFromInt(CGF, Result, T, ValueType); 113 } 114 115 static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) { 116 Value *Val = CGF.EmitScalarExpr(E->getArg(0)); 117 Value *Address = CGF.EmitScalarExpr(E->getArg(1)); 118 119 // Convert the type of the pointer to a pointer to the stored type. 120 Val = CGF.EmitToMemory(Val, E->getArg(0)->getType()); 121 Value *BC = CGF.Builder.CreateBitCast( 122 Address, llvm::PointerType::getUnqual(Val->getType()), "cast"); 123 LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType()); 124 LV.setNontemporal(true); 125 CGF.EmitStoreOfScalar(Val, LV, false); 126 return nullptr; 127 } 128 129 static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) { 130 Value *Address = CGF.EmitScalarExpr(E->getArg(0)); 131 132 LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType()); 133 LV.setNontemporal(true); 134 return CGF.EmitLoadOfScalar(LV, E->getExprLoc()); 135 } 136 137 static RValue EmitBinaryAtomic(CodeGenFunction &CGF, 138 llvm::AtomicRMWInst::BinOp Kind, 139 const CallExpr *E) { 140 return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E)); 141 } 142 143 /// Utility to insert an atomic instruction based Instrinsic::ID and 144 /// the expression node, where the return value is the result of the 145 /// operation. 146 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, 147 llvm::AtomicRMWInst::BinOp Kind, 148 const CallExpr *E, 149 Instruction::BinaryOps Op, 150 bool Invert = false) { 151 QualType T = E->getType(); 152 assert(E->getArg(0)->getType()->isPointerType()); 153 assert(CGF.getContext().hasSameUnqualifiedType(T, 154 E->getArg(0)->getType()->getPointeeType())); 155 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); 156 157 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 158 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 159 160 llvm::IntegerType *IntType = 161 llvm::IntegerType::get(CGF.getLLVMContext(), 162 CGF.getContext().getTypeSize(T)); 163 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 164 165 llvm::Value *Args[2]; 166 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 167 llvm::Type *ValueType = Args[1]->getType(); 168 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 169 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 170 171 llvm::Value *Result = CGF.Builder.CreateAtomicRMW( 172 Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent); 173 Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]); 174 if (Invert) 175 Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result, 176 llvm::ConstantInt::get(IntType, -1)); 177 Result = EmitFromInt(CGF, Result, T, ValueType); 178 return RValue::get(Result); 179 } 180 181 /// @brief Utility to insert an atomic cmpxchg instruction. 182 /// 183 /// @param CGF The current codegen function. 184 /// @param E Builtin call expression to convert to cmpxchg. 185 /// arg0 - address to operate on 186 /// arg1 - value to compare with 187 /// arg2 - new value 188 /// @param ReturnBool Specifies whether to return success flag of 189 /// cmpxchg result or the old value. 190 /// 191 /// @returns result of cmpxchg, according to ReturnBool 192 static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, 193 bool ReturnBool) { 194 QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType(); 195 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 196 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 197 198 llvm::IntegerType *IntType = llvm::IntegerType::get( 199 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T)); 200 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 201 202 Value *Args[3]; 203 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 204 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 205 llvm::Type *ValueType = Args[1]->getType(); 206 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 207 Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType); 208 209 Value *Pair = CGF.Builder.CreateAtomicCmpXchg( 210 Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent, 211 llvm::AtomicOrdering::SequentiallyConsistent); 212 if (ReturnBool) 213 // Extract boolean success flag and zext it to int. 214 return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1), 215 CGF.ConvertType(E->getType())); 216 else 217 // Extract old value and emit it using the same type as compare value. 218 return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T, 219 ValueType); 220 } 221 222 // Emit a simple mangled intrinsic that has 1 argument and a return type 223 // matching the argument type. 224 static Value *emitUnaryBuiltin(CodeGenFunction &CGF, 225 const CallExpr *E, 226 unsigned IntrinsicID) { 227 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 228 229 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 230 return CGF.Builder.CreateCall(F, Src0); 231 } 232 233 // Emit an intrinsic that has 2 operands of the same type as its result. 234 static Value *emitBinaryBuiltin(CodeGenFunction &CGF, 235 const CallExpr *E, 236 unsigned IntrinsicID) { 237 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 238 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 239 240 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 241 return CGF.Builder.CreateCall(F, { Src0, Src1 }); 242 } 243 244 // Emit an intrinsic that has 3 operands of the same type as its result. 245 static Value *emitTernaryBuiltin(CodeGenFunction &CGF, 246 const CallExpr *E, 247 unsigned IntrinsicID) { 248 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 249 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 250 llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); 251 252 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 253 return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 }); 254 } 255 256 // Emit an intrinsic that has 1 float or double operand, and 1 integer. 257 static Value *emitFPIntBuiltin(CodeGenFunction &CGF, 258 const CallExpr *E, 259 unsigned IntrinsicID) { 260 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 261 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 262 263 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 264 return CGF.Builder.CreateCall(F, {Src0, Src1}); 265 } 266 267 /// EmitFAbs - Emit a call to @llvm.fabs(). 268 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) { 269 Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType()); 270 llvm::CallInst *Call = CGF.Builder.CreateCall(F, V); 271 Call->setDoesNotAccessMemory(); 272 return Call; 273 } 274 275 /// Emit the computation of the sign bit for a floating point value. Returns 276 /// the i1 sign bit value. 277 static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) { 278 LLVMContext &C = CGF.CGM.getLLVMContext(); 279 280 llvm::Type *Ty = V->getType(); 281 int Width = Ty->getPrimitiveSizeInBits(); 282 llvm::Type *IntTy = llvm::IntegerType::get(C, Width); 283 V = CGF.Builder.CreateBitCast(V, IntTy); 284 if (Ty->isPPC_FP128Ty()) { 285 // We want the sign bit of the higher-order double. The bitcast we just 286 // did works as if the double-double was stored to memory and then 287 // read as an i128. The "store" will put the higher-order double in the 288 // lower address in both little- and big-Endian modes, but the "load" 289 // will treat those bits as a different part of the i128: the low bits in 290 // little-Endian, the high bits in big-Endian. Therefore, on big-Endian 291 // we need to shift the high bits down to the low before truncating. 292 Width >>= 1; 293 if (CGF.getTarget().isBigEndian()) { 294 Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width); 295 V = CGF.Builder.CreateLShr(V, ShiftCst); 296 } 297 // We are truncating value in order to extract the higher-order 298 // double, which we will be using to extract the sign from. 299 IntTy = llvm::IntegerType::get(C, Width); 300 V = CGF.Builder.CreateTrunc(V, IntTy); 301 } 302 Value *Zero = llvm::Constant::getNullValue(IntTy); 303 return CGF.Builder.CreateICmpSLT(V, Zero); 304 } 305 306 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *Fn, 307 const CallExpr *E, llvm::Value *calleeValue) { 308 return CGF.EmitCall(E->getCallee()->getType(), calleeValue, E, 309 ReturnValueSlot(), Fn); 310 } 311 312 /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.* 313 /// depending on IntrinsicID. 314 /// 315 /// \arg CGF The current codegen function. 316 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate. 317 /// \arg X The first argument to the llvm.*.with.overflow.*. 318 /// \arg Y The second argument to the llvm.*.with.overflow.*. 319 /// \arg Carry The carry returned by the llvm.*.with.overflow.*. 320 /// \returns The result (i.e. sum/product) returned by the intrinsic. 321 static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF, 322 const llvm::Intrinsic::ID IntrinsicID, 323 llvm::Value *X, llvm::Value *Y, 324 llvm::Value *&Carry) { 325 // Make sure we have integers of the same width. 326 assert(X->getType() == Y->getType() && 327 "Arguments must be the same type. (Did you forget to make sure both " 328 "arguments have the same integer width?)"); 329 330 llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType()); 331 llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y}); 332 Carry = CGF.Builder.CreateExtractValue(Tmp, 1); 333 return CGF.Builder.CreateExtractValue(Tmp, 0); 334 } 335 336 static Value *emitRangedBuiltin(CodeGenFunction &CGF, 337 unsigned IntrinsicID, 338 int low, int high) { 339 llvm::MDBuilder MDHelper(CGF.getLLVMContext()); 340 llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high)); 341 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {}); 342 llvm::Instruction *Call = CGF.Builder.CreateCall(F); 343 Call->setMetadata(llvm::LLVMContext::MD_range, RNode); 344 return Call; 345 } 346 347 namespace { 348 struct WidthAndSignedness { 349 unsigned Width; 350 bool Signed; 351 }; 352 } 353 354 static WidthAndSignedness 355 getIntegerWidthAndSignedness(const clang::ASTContext &context, 356 const clang::QualType Type) { 357 assert(Type->isIntegerType() && "Given type is not an integer."); 358 unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width; 359 bool Signed = Type->isSignedIntegerType(); 360 return {Width, Signed}; 361 } 362 363 // Given one or more integer types, this function produces an integer type that 364 // encompasses them: any value in one of the given types could be expressed in 365 // the encompassing type. 366 static struct WidthAndSignedness 367 EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) { 368 assert(Types.size() > 0 && "Empty list of types."); 369 370 // If any of the given types is signed, we must return a signed type. 371 bool Signed = false; 372 for (const auto &Type : Types) { 373 Signed |= Type.Signed; 374 } 375 376 // The encompassing type must have a width greater than or equal to the width 377 // of the specified types. Aditionally, if the encompassing type is signed, 378 // its width must be strictly greater than the width of any unsigned types 379 // given. 380 unsigned Width = 0; 381 for (const auto &Type : Types) { 382 unsigned MinWidth = Type.Width + (Signed && !Type.Signed); 383 if (Width < MinWidth) { 384 Width = MinWidth; 385 } 386 } 387 388 return {Width, Signed}; 389 } 390 391 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) { 392 llvm::Type *DestType = Int8PtrTy; 393 if (ArgValue->getType() != DestType) 394 ArgValue = 395 Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data()); 396 397 Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend; 398 return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue); 399 } 400 401 /// Checks if using the result of __builtin_object_size(p, @p From) in place of 402 /// __builtin_object_size(p, @p To) is correct 403 static bool areBOSTypesCompatible(int From, int To) { 404 // Note: Our __builtin_object_size implementation currently treats Type=0 and 405 // Type=2 identically. Encoding this implementation detail here may make 406 // improving __builtin_object_size difficult in the future, so it's omitted. 407 return From == To || (From == 0 && To == 1) || (From == 3 && To == 2); 408 } 409 410 static llvm::Value * 411 getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) { 412 return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true); 413 } 414 415 llvm::Value * 416 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type, 417 llvm::IntegerType *ResType) { 418 uint64_t ObjectSize; 419 if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type)) 420 return emitBuiltinObjectSize(E, Type, ResType); 421 return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true); 422 } 423 424 /// Returns a Value corresponding to the size of the given expression. 425 /// This Value may be either of the following: 426 /// - A llvm::Argument (if E is a param with the pass_object_size attribute on 427 /// it) 428 /// - A call to the @llvm.objectsize intrinsic 429 llvm::Value * 430 CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type, 431 llvm::IntegerType *ResType) { 432 // We need to reference an argument if the pointer is a parameter with the 433 // pass_object_size attribute. 434 if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) { 435 auto *Param = dyn_cast<ParmVarDecl>(D->getDecl()); 436 auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>(); 437 if (Param != nullptr && PS != nullptr && 438 areBOSTypesCompatible(PS->getType(), Type)) { 439 auto Iter = SizeArguments.find(Param); 440 assert(Iter != SizeArguments.end()); 441 442 const ImplicitParamDecl *D = Iter->second; 443 auto DIter = LocalDeclMap.find(D); 444 assert(DIter != LocalDeclMap.end()); 445 446 return EmitLoadOfScalar(DIter->second, /*volatile=*/false, 447 getContext().getSizeType(), E->getLocStart()); 448 } 449 } 450 451 // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't 452 // evaluate E for side-effects. In either case, we shouldn't lower to 453 // @llvm.objectsize. 454 if (Type == 3 || E->HasSideEffects(getContext())) 455 return getDefaultBuiltinObjectSizeResult(Type, ResType); 456 457 // LLVM only supports 0 and 2, make sure that we pass along that 458 // as a boolean. 459 auto *CI = ConstantInt::get(Builder.getInt1Ty(), (Type & 2) >> 1); 460 // FIXME: Get right address space. 461 llvm::Type *Tys[] = {ResType, Builder.getInt8PtrTy(0)}; 462 Value *F = CGM.getIntrinsic(Intrinsic::objectsize, Tys); 463 return Builder.CreateCall(F, {EmitScalarExpr(E), CI}); 464 } 465 466 // Many of MSVC builtins are on both x64 and ARM; to avoid repeating code, we 467 // handle them here. 468 enum class CodeGenFunction::MSVCIntrin { 469 _BitScanForward, 470 _BitScanReverse, 471 _InterlockedAnd, 472 _InterlockedDecrement, 473 _InterlockedExchange, 474 _InterlockedExchangeAdd, 475 _InterlockedExchangeSub, 476 _InterlockedIncrement, 477 _InterlockedOr, 478 _InterlockedXor, 479 }; 480 481 Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, 482 const CallExpr *E) { 483 switch (BuiltinID) { 484 case MSVCIntrin::_BitScanForward: 485 case MSVCIntrin::_BitScanReverse: { 486 Value *ArgValue = EmitScalarExpr(E->getArg(1)); 487 488 llvm::Type *ArgType = ArgValue->getType(); 489 llvm::Type *IndexType = 490 EmitScalarExpr(E->getArg(0))->getType()->getPointerElementType(); 491 llvm::Type *ResultType = ConvertType(E->getType()); 492 493 Value *ArgZero = llvm::Constant::getNullValue(ArgType); 494 Value *ResZero = llvm::Constant::getNullValue(ResultType); 495 Value *ResOne = llvm::ConstantInt::get(ResultType, 1); 496 497 BasicBlock *Begin = Builder.GetInsertBlock(); 498 BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn); 499 Builder.SetInsertPoint(End); 500 PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result"); 501 502 Builder.SetInsertPoint(Begin); 503 Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero); 504 BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn); 505 Builder.CreateCondBr(IsZero, End, NotZero); 506 Result->addIncoming(ResZero, Begin); 507 508 Builder.SetInsertPoint(NotZero); 509 Address IndexAddress = EmitPointerWithAlignment(E->getArg(0)); 510 511 if (BuiltinID == MSVCIntrin::_BitScanForward) { 512 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 513 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()}); 514 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false); 515 Builder.CreateStore(ZeroCount, IndexAddress, false); 516 } else { 517 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth(); 518 Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1); 519 520 Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); 521 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()}); 522 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false); 523 Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount); 524 Builder.CreateStore(Index, IndexAddress, false); 525 } 526 Builder.CreateBr(End); 527 Result->addIncoming(ResOne, NotZero); 528 529 Builder.SetInsertPoint(End); 530 return Result; 531 } 532 case MSVCIntrin::_InterlockedAnd: 533 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E); 534 case MSVCIntrin::_InterlockedExchange: 535 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E); 536 case MSVCIntrin::_InterlockedExchangeAdd: 537 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E); 538 case MSVCIntrin::_InterlockedExchangeSub: 539 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E); 540 case MSVCIntrin::_InterlockedOr: 541 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E); 542 case MSVCIntrin::_InterlockedXor: 543 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E); 544 545 case MSVCIntrin::_InterlockedDecrement: { 546 llvm::Type *IntTy = ConvertType(E->getType()); 547 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 548 AtomicRMWInst::Sub, 549 EmitScalarExpr(E->getArg(0)), 550 ConstantInt::get(IntTy, 1), 551 llvm::AtomicOrdering::SequentiallyConsistent); 552 return Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1)); 553 } 554 case MSVCIntrin::_InterlockedIncrement: { 555 llvm::Type *IntTy = ConvertType(E->getType()); 556 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 557 AtomicRMWInst::Add, 558 EmitScalarExpr(E->getArg(0)), 559 ConstantInt::get(IntTy, 1), 560 llvm::AtomicOrdering::SequentiallyConsistent); 561 return Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1)); 562 } 563 } 564 llvm_unreachable("Incorrect MSVC intrinsic!"); 565 } 566 567 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, 568 unsigned BuiltinID, const CallExpr *E, 569 ReturnValueSlot ReturnValue) { 570 // See if we can constant fold this builtin. If so, don't emit it at all. 571 Expr::EvalResult Result; 572 if (E->EvaluateAsRValue(Result, CGM.getContext()) && 573 !Result.hasSideEffects()) { 574 if (Result.Val.isInt()) 575 return RValue::get(llvm::ConstantInt::get(getLLVMContext(), 576 Result.Val.getInt())); 577 if (Result.Val.isFloat()) 578 return RValue::get(llvm::ConstantFP::get(getLLVMContext(), 579 Result.Val.getFloat())); 580 } 581 582 switch (BuiltinID) { 583 default: break; // Handle intrinsics and libm functions below. 584 case Builtin::BI__builtin___CFStringMakeConstantString: 585 case Builtin::BI__builtin___NSStringMakeConstantString: 586 return RValue::get(CGM.EmitConstantExpr(E, E->getType(), nullptr)); 587 case Builtin::BI__builtin_stdarg_start: 588 case Builtin::BI__builtin_va_start: 589 case Builtin::BI__va_start: 590 case Builtin::BI__builtin_va_end: 591 return RValue::get( 592 EmitVAStartEnd(BuiltinID == Builtin::BI__va_start 593 ? EmitScalarExpr(E->getArg(0)) 594 : EmitVAListRef(E->getArg(0)).getPointer(), 595 BuiltinID != Builtin::BI__builtin_va_end)); 596 case Builtin::BI__builtin_va_copy: { 597 Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer(); 598 Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer(); 599 600 llvm::Type *Type = Int8PtrTy; 601 602 DstPtr = Builder.CreateBitCast(DstPtr, Type); 603 SrcPtr = Builder.CreateBitCast(SrcPtr, Type); 604 return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy), 605 {DstPtr, SrcPtr})); 606 } 607 case Builtin::BI__builtin_abs: 608 case Builtin::BI__builtin_labs: 609 case Builtin::BI__builtin_llabs: { 610 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 611 612 Value *NegOp = Builder.CreateNeg(ArgValue, "neg"); 613 Value *CmpResult = 614 Builder.CreateICmpSGE(ArgValue, 615 llvm::Constant::getNullValue(ArgValue->getType()), 616 "abscond"); 617 Value *Result = 618 Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs"); 619 620 return RValue::get(Result); 621 } 622 case Builtin::BI__builtin_fabs: 623 case Builtin::BI__builtin_fabsf: 624 case Builtin::BI__builtin_fabsl: { 625 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs)); 626 } 627 case Builtin::BI__builtin_fmod: 628 case Builtin::BI__builtin_fmodf: 629 case Builtin::BI__builtin_fmodl: { 630 Value *Arg1 = EmitScalarExpr(E->getArg(0)); 631 Value *Arg2 = EmitScalarExpr(E->getArg(1)); 632 Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod"); 633 return RValue::get(Result); 634 } 635 case Builtin::BI__builtin_copysign: 636 case Builtin::BI__builtin_copysignf: 637 case Builtin::BI__builtin_copysignl: { 638 return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign)); 639 } 640 case Builtin::BI__builtin_ceil: 641 case Builtin::BI__builtin_ceilf: 642 case Builtin::BI__builtin_ceill: { 643 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil)); 644 } 645 case Builtin::BI__builtin_floor: 646 case Builtin::BI__builtin_floorf: 647 case Builtin::BI__builtin_floorl: { 648 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor)); 649 } 650 case Builtin::BI__builtin_trunc: 651 case Builtin::BI__builtin_truncf: 652 case Builtin::BI__builtin_truncl: { 653 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc)); 654 } 655 case Builtin::BI__builtin_rint: 656 case Builtin::BI__builtin_rintf: 657 case Builtin::BI__builtin_rintl: { 658 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint)); 659 } 660 case Builtin::BI__builtin_nearbyint: 661 case Builtin::BI__builtin_nearbyintf: 662 case Builtin::BI__builtin_nearbyintl: { 663 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::nearbyint)); 664 } 665 case Builtin::BI__builtin_round: 666 case Builtin::BI__builtin_roundf: 667 case Builtin::BI__builtin_roundl: { 668 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round)); 669 } 670 case Builtin::BI__builtin_fmin: 671 case Builtin::BI__builtin_fminf: 672 case Builtin::BI__builtin_fminl: { 673 return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum)); 674 } 675 case Builtin::BI__builtin_fmax: 676 case Builtin::BI__builtin_fmaxf: 677 case Builtin::BI__builtin_fmaxl: { 678 return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum)); 679 } 680 case Builtin::BI__builtin_conj: 681 case Builtin::BI__builtin_conjf: 682 case Builtin::BI__builtin_conjl: { 683 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 684 Value *Real = ComplexVal.first; 685 Value *Imag = ComplexVal.second; 686 Value *Zero = 687 Imag->getType()->isFPOrFPVectorTy() 688 ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType()) 689 : llvm::Constant::getNullValue(Imag->getType()); 690 691 Imag = Builder.CreateFSub(Zero, Imag, "sub"); 692 return RValue::getComplex(std::make_pair(Real, Imag)); 693 } 694 case Builtin::BI__builtin_creal: 695 case Builtin::BI__builtin_crealf: 696 case Builtin::BI__builtin_creall: 697 case Builtin::BIcreal: 698 case Builtin::BIcrealf: 699 case Builtin::BIcreall: { 700 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 701 return RValue::get(ComplexVal.first); 702 } 703 704 case Builtin::BI__builtin_cimag: 705 case Builtin::BI__builtin_cimagf: 706 case Builtin::BI__builtin_cimagl: 707 case Builtin::BIcimag: 708 case Builtin::BIcimagf: 709 case Builtin::BIcimagl: { 710 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 711 return RValue::get(ComplexVal.second); 712 } 713 714 case Builtin::BI__builtin_ctzs: 715 case Builtin::BI__builtin_ctz: 716 case Builtin::BI__builtin_ctzl: 717 case Builtin::BI__builtin_ctzll: { 718 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 719 720 llvm::Type *ArgType = ArgValue->getType(); 721 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 722 723 llvm::Type *ResultType = ConvertType(E->getType()); 724 Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); 725 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef}); 726 if (Result->getType() != ResultType) 727 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 728 "cast"); 729 return RValue::get(Result); 730 } 731 case Builtin::BI__builtin_clzs: 732 case Builtin::BI__builtin_clz: 733 case Builtin::BI__builtin_clzl: 734 case Builtin::BI__builtin_clzll: { 735 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 736 737 llvm::Type *ArgType = ArgValue->getType(); 738 Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); 739 740 llvm::Type *ResultType = ConvertType(E->getType()); 741 Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); 742 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef}); 743 if (Result->getType() != ResultType) 744 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 745 "cast"); 746 return RValue::get(Result); 747 } 748 case Builtin::BI__builtin_ffs: 749 case Builtin::BI__builtin_ffsl: 750 case Builtin::BI__builtin_ffsll: { 751 // ffs(x) -> x ? cttz(x) + 1 : 0 752 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 753 754 llvm::Type *ArgType = ArgValue->getType(); 755 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 756 757 llvm::Type *ResultType = ConvertType(E->getType()); 758 Value *Tmp = 759 Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}), 760 llvm::ConstantInt::get(ArgType, 1)); 761 Value *Zero = llvm::Constant::getNullValue(ArgType); 762 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero"); 763 Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs"); 764 if (Result->getType() != ResultType) 765 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 766 "cast"); 767 return RValue::get(Result); 768 } 769 case Builtin::BI__builtin_parity: 770 case Builtin::BI__builtin_parityl: 771 case Builtin::BI__builtin_parityll: { 772 // parity(x) -> ctpop(x) & 1 773 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 774 775 llvm::Type *ArgType = ArgValue->getType(); 776 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); 777 778 llvm::Type *ResultType = ConvertType(E->getType()); 779 Value *Tmp = Builder.CreateCall(F, ArgValue); 780 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1)); 781 if (Result->getType() != ResultType) 782 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 783 "cast"); 784 return RValue::get(Result); 785 } 786 case Builtin::BI__popcnt16: 787 case Builtin::BI__popcnt: 788 case Builtin::BI__popcnt64: 789 case Builtin::BI__builtin_popcount: 790 case Builtin::BI__builtin_popcountl: 791 case Builtin::BI__builtin_popcountll: { 792 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 793 794 llvm::Type *ArgType = ArgValue->getType(); 795 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); 796 797 llvm::Type *ResultType = ConvertType(E->getType()); 798 Value *Result = Builder.CreateCall(F, ArgValue); 799 if (Result->getType() != ResultType) 800 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 801 "cast"); 802 return RValue::get(Result); 803 } 804 case Builtin::BI_rotr8: 805 case Builtin::BI_rotr16: 806 case Builtin::BI_rotr: 807 case Builtin::BI_lrotr: 808 case Builtin::BI_rotr64: { 809 Value *Val = EmitScalarExpr(E->getArg(0)); 810 Value *Shift = EmitScalarExpr(E->getArg(1)); 811 812 llvm::Type *ArgType = Val->getType(); 813 Shift = Builder.CreateIntCast(Shift, ArgType, false); 814 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth(); 815 Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth); 816 Value *ArgZero = llvm::Constant::getNullValue(ArgType); 817 818 Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1); 819 Shift = Builder.CreateAnd(Shift, Mask); 820 Value *LeftShift = Builder.CreateSub(ArgTypeSize, Shift); 821 822 Value *RightShifted = Builder.CreateLShr(Val, Shift); 823 Value *LeftShifted = Builder.CreateShl(Val, LeftShift); 824 Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted); 825 826 Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero); 827 Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated); 828 return RValue::get(Result); 829 } 830 case Builtin::BI_rotl8: 831 case Builtin::BI_rotl16: 832 case Builtin::BI_rotl: 833 case Builtin::BI_lrotl: 834 case Builtin::BI_rotl64: { 835 Value *Val = EmitScalarExpr(E->getArg(0)); 836 Value *Shift = EmitScalarExpr(E->getArg(1)); 837 838 llvm::Type *ArgType = Val->getType(); 839 Shift = Builder.CreateIntCast(Shift, ArgType, false); 840 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth(); 841 Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth); 842 Value *ArgZero = llvm::Constant::getNullValue(ArgType); 843 844 Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1); 845 Shift = Builder.CreateAnd(Shift, Mask); 846 Value *RightShift = Builder.CreateSub(ArgTypeSize, Shift); 847 848 Value *LeftShifted = Builder.CreateShl(Val, Shift); 849 Value *RightShifted = Builder.CreateLShr(Val, RightShift); 850 Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted); 851 852 Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero); 853 Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated); 854 return RValue::get(Result); 855 } 856 case Builtin::BI__builtin_unpredictable: { 857 // Always return the argument of __builtin_unpredictable. LLVM does not 858 // handle this builtin. Metadata for this builtin should be added directly 859 // to instructions such as branches or switches that use it. 860 return RValue::get(EmitScalarExpr(E->getArg(0))); 861 } 862 case Builtin::BI__builtin_expect: { 863 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 864 llvm::Type *ArgType = ArgValue->getType(); 865 866 Value *ExpectedValue = EmitScalarExpr(E->getArg(1)); 867 // Don't generate llvm.expect on -O0 as the backend won't use it for 868 // anything. 869 // Note, we still IRGen ExpectedValue because it could have side-effects. 870 if (CGM.getCodeGenOpts().OptimizationLevel == 0) 871 return RValue::get(ArgValue); 872 873 Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType); 874 Value *Result = 875 Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval"); 876 return RValue::get(Result); 877 } 878 case Builtin::BI__builtin_assume_aligned: { 879 Value *PtrValue = EmitScalarExpr(E->getArg(0)); 880 Value *OffsetValue = 881 (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr; 882 883 Value *AlignmentValue = EmitScalarExpr(E->getArg(1)); 884 ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue); 885 unsigned Alignment = (unsigned) AlignmentCI->getZExtValue(); 886 887 EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue); 888 return RValue::get(PtrValue); 889 } 890 case Builtin::BI__assume: 891 case Builtin::BI__builtin_assume: { 892 if (E->getArg(0)->HasSideEffects(getContext())) 893 return RValue::get(nullptr); 894 895 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 896 Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume); 897 return RValue::get(Builder.CreateCall(FnAssume, ArgValue)); 898 } 899 case Builtin::BI__builtin_bswap16: 900 case Builtin::BI__builtin_bswap32: 901 case Builtin::BI__builtin_bswap64: { 902 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap)); 903 } 904 case Builtin::BI__builtin_bitreverse8: 905 case Builtin::BI__builtin_bitreverse16: 906 case Builtin::BI__builtin_bitreverse32: 907 case Builtin::BI__builtin_bitreverse64: { 908 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse)); 909 } 910 case Builtin::BI__builtin_object_size: { 911 unsigned Type = 912 E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue(); 913 auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType())); 914 915 // We pass this builtin onto the optimizer so that it can figure out the 916 // object size in more complex cases. 917 return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType)); 918 } 919 case Builtin::BI__builtin_prefetch: { 920 Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0)); 921 // FIXME: Technically these constants should of type 'int', yes? 922 RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) : 923 llvm::ConstantInt::get(Int32Ty, 0); 924 Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : 925 llvm::ConstantInt::get(Int32Ty, 3); 926 Value *Data = llvm::ConstantInt::get(Int32Ty, 1); 927 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 928 return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data})); 929 } 930 case Builtin::BI__builtin_readcyclecounter: { 931 Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter); 932 return RValue::get(Builder.CreateCall(F)); 933 } 934 case Builtin::BI__builtin___clear_cache: { 935 Value *Begin = EmitScalarExpr(E->getArg(0)); 936 Value *End = EmitScalarExpr(E->getArg(1)); 937 Value *F = CGM.getIntrinsic(Intrinsic::clear_cache); 938 return RValue::get(Builder.CreateCall(F, {Begin, End})); 939 } 940 case Builtin::BI__builtin_trap: 941 return RValue::get(EmitTrapCall(Intrinsic::trap)); 942 case Builtin::BI__debugbreak: 943 return RValue::get(EmitTrapCall(Intrinsic::debugtrap)); 944 case Builtin::BI__builtin_unreachable: { 945 if (SanOpts.has(SanitizerKind::Unreachable)) { 946 SanitizerScope SanScope(this); 947 EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()), 948 SanitizerKind::Unreachable), 949 "builtin_unreachable", EmitCheckSourceLocation(E->getExprLoc()), 950 None); 951 } else 952 Builder.CreateUnreachable(); 953 954 // We do need to preserve an insertion point. 955 EmitBlock(createBasicBlock("unreachable.cont")); 956 957 return RValue::get(nullptr); 958 } 959 960 case Builtin::BI__builtin_powi: 961 case Builtin::BI__builtin_powif: 962 case Builtin::BI__builtin_powil: { 963 Value *Base = EmitScalarExpr(E->getArg(0)); 964 Value *Exponent = EmitScalarExpr(E->getArg(1)); 965 llvm::Type *ArgType = Base->getType(); 966 Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType); 967 return RValue::get(Builder.CreateCall(F, {Base, Exponent})); 968 } 969 970 case Builtin::BI__builtin_isgreater: 971 case Builtin::BI__builtin_isgreaterequal: 972 case Builtin::BI__builtin_isless: 973 case Builtin::BI__builtin_islessequal: 974 case Builtin::BI__builtin_islessgreater: 975 case Builtin::BI__builtin_isunordered: { 976 // Ordered comparisons: we know the arguments to these are matching scalar 977 // floating point values. 978 Value *LHS = EmitScalarExpr(E->getArg(0)); 979 Value *RHS = EmitScalarExpr(E->getArg(1)); 980 981 switch (BuiltinID) { 982 default: llvm_unreachable("Unknown ordered comparison"); 983 case Builtin::BI__builtin_isgreater: 984 LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp"); 985 break; 986 case Builtin::BI__builtin_isgreaterequal: 987 LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp"); 988 break; 989 case Builtin::BI__builtin_isless: 990 LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp"); 991 break; 992 case Builtin::BI__builtin_islessequal: 993 LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp"); 994 break; 995 case Builtin::BI__builtin_islessgreater: 996 LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp"); 997 break; 998 case Builtin::BI__builtin_isunordered: 999 LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp"); 1000 break; 1001 } 1002 // ZExt bool to int type. 1003 return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType()))); 1004 } 1005 case Builtin::BI__builtin_isnan: { 1006 Value *V = EmitScalarExpr(E->getArg(0)); 1007 V = Builder.CreateFCmpUNO(V, V, "cmp"); 1008 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 1009 } 1010 1011 case Builtin::BIfinite: 1012 case Builtin::BI__finite: 1013 case Builtin::BIfinitef: 1014 case Builtin::BI__finitef: 1015 case Builtin::BIfinitel: 1016 case Builtin::BI__finitel: 1017 case Builtin::BI__builtin_isinf: 1018 case Builtin::BI__builtin_isfinite: { 1019 // isinf(x) --> fabs(x) == infinity 1020 // isfinite(x) --> fabs(x) != infinity 1021 // x != NaN via the ordered compare in either case. 1022 Value *V = EmitScalarExpr(E->getArg(0)); 1023 Value *Fabs = EmitFAbs(*this, V); 1024 Constant *Infinity = ConstantFP::getInfinity(V->getType()); 1025 CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf) 1026 ? CmpInst::FCMP_OEQ 1027 : CmpInst::FCMP_ONE; 1028 Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf"); 1029 return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType()))); 1030 } 1031 1032 case Builtin::BI__builtin_isinf_sign: { 1033 // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0 1034 Value *Arg = EmitScalarExpr(E->getArg(0)); 1035 Value *AbsArg = EmitFAbs(*this, Arg); 1036 Value *IsInf = Builder.CreateFCmpOEQ( 1037 AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf"); 1038 Value *IsNeg = EmitSignBit(*this, Arg); 1039 1040 llvm::Type *IntTy = ConvertType(E->getType()); 1041 Value *Zero = Constant::getNullValue(IntTy); 1042 Value *One = ConstantInt::get(IntTy, 1); 1043 Value *NegativeOne = ConstantInt::get(IntTy, -1); 1044 Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One); 1045 Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero); 1046 return RValue::get(Result); 1047 } 1048 1049 case Builtin::BI__builtin_isnormal: { 1050 // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min 1051 Value *V = EmitScalarExpr(E->getArg(0)); 1052 Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq"); 1053 1054 Value *Abs = EmitFAbs(*this, V); 1055 Value *IsLessThanInf = 1056 Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf"); 1057 APFloat Smallest = APFloat::getSmallestNormalized( 1058 getContext().getFloatTypeSemantics(E->getArg(0)->getType())); 1059 Value *IsNormal = 1060 Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest), 1061 "isnormal"); 1062 V = Builder.CreateAnd(Eq, IsLessThanInf, "and"); 1063 V = Builder.CreateAnd(V, IsNormal, "and"); 1064 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 1065 } 1066 1067 case Builtin::BI__builtin_fpclassify: { 1068 Value *V = EmitScalarExpr(E->getArg(5)); 1069 llvm::Type *Ty = ConvertType(E->getArg(5)->getType()); 1070 1071 // Create Result 1072 BasicBlock *Begin = Builder.GetInsertBlock(); 1073 BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn); 1074 Builder.SetInsertPoint(End); 1075 PHINode *Result = 1076 Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4, 1077 "fpclassify_result"); 1078 1079 // if (V==0) return FP_ZERO 1080 Builder.SetInsertPoint(Begin); 1081 Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty), 1082 "iszero"); 1083 Value *ZeroLiteral = EmitScalarExpr(E->getArg(4)); 1084 BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn); 1085 Builder.CreateCondBr(IsZero, End, NotZero); 1086 Result->addIncoming(ZeroLiteral, Begin); 1087 1088 // if (V != V) return FP_NAN 1089 Builder.SetInsertPoint(NotZero); 1090 Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp"); 1091 Value *NanLiteral = EmitScalarExpr(E->getArg(0)); 1092 BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn); 1093 Builder.CreateCondBr(IsNan, End, NotNan); 1094 Result->addIncoming(NanLiteral, NotZero); 1095 1096 // if (fabs(V) == infinity) return FP_INFINITY 1097 Builder.SetInsertPoint(NotNan); 1098 Value *VAbs = EmitFAbs(*this, V); 1099 Value *IsInf = 1100 Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()), 1101 "isinf"); 1102 Value *InfLiteral = EmitScalarExpr(E->getArg(1)); 1103 BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn); 1104 Builder.CreateCondBr(IsInf, End, NotInf); 1105 Result->addIncoming(InfLiteral, NotNan); 1106 1107 // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL 1108 Builder.SetInsertPoint(NotInf); 1109 APFloat Smallest = APFloat::getSmallestNormalized( 1110 getContext().getFloatTypeSemantics(E->getArg(5)->getType())); 1111 Value *IsNormal = 1112 Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest), 1113 "isnormal"); 1114 Value *NormalResult = 1115 Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)), 1116 EmitScalarExpr(E->getArg(3))); 1117 Builder.CreateBr(End); 1118 Result->addIncoming(NormalResult, NotInf); 1119 1120 // return Result 1121 Builder.SetInsertPoint(End); 1122 return RValue::get(Result); 1123 } 1124 1125 case Builtin::BIalloca: 1126 case Builtin::BI_alloca: 1127 case Builtin::BI__builtin_alloca: { 1128 Value *Size = EmitScalarExpr(E->getArg(0)); 1129 return RValue::get(Builder.CreateAlloca(Builder.getInt8Ty(), Size)); 1130 } 1131 case Builtin::BIbzero: 1132 case Builtin::BI__builtin_bzero: { 1133 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1134 Value *SizeVal = EmitScalarExpr(E->getArg(1)); 1135 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 1136 E->getArg(0)->getExprLoc(), FD, 0); 1137 Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false); 1138 return RValue::get(Dest.getPointer()); 1139 } 1140 case Builtin::BImemcpy: 1141 case Builtin::BI__builtin_memcpy: { 1142 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1143 Address Src = EmitPointerWithAlignment(E->getArg(1)); 1144 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 1145 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 1146 E->getArg(0)->getExprLoc(), FD, 0); 1147 EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(), 1148 E->getArg(1)->getExprLoc(), FD, 1); 1149 Builder.CreateMemCpy(Dest, Src, SizeVal, false); 1150 return RValue::get(Dest.getPointer()); 1151 } 1152 1153 case Builtin::BI__builtin___memcpy_chk: { 1154 // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2. 1155 llvm::APSInt Size, DstSize; 1156 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 1157 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 1158 break; 1159 if (Size.ugt(DstSize)) 1160 break; 1161 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1162 Address Src = EmitPointerWithAlignment(E->getArg(1)); 1163 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 1164 Builder.CreateMemCpy(Dest, Src, SizeVal, false); 1165 return RValue::get(Dest.getPointer()); 1166 } 1167 1168 case Builtin::BI__builtin_objc_memmove_collectable: { 1169 Address DestAddr = EmitPointerWithAlignment(E->getArg(0)); 1170 Address SrcAddr = EmitPointerWithAlignment(E->getArg(1)); 1171 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 1172 CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this, 1173 DestAddr, SrcAddr, SizeVal); 1174 return RValue::get(DestAddr.getPointer()); 1175 } 1176 1177 case Builtin::BI__builtin___memmove_chk: { 1178 // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2. 1179 llvm::APSInt Size, DstSize; 1180 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 1181 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 1182 break; 1183 if (Size.ugt(DstSize)) 1184 break; 1185 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1186 Address Src = EmitPointerWithAlignment(E->getArg(1)); 1187 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 1188 Builder.CreateMemMove(Dest, Src, SizeVal, false); 1189 return RValue::get(Dest.getPointer()); 1190 } 1191 1192 case Builtin::BImemmove: 1193 case Builtin::BI__builtin_memmove: { 1194 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1195 Address Src = EmitPointerWithAlignment(E->getArg(1)); 1196 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 1197 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 1198 E->getArg(0)->getExprLoc(), FD, 0); 1199 EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(), 1200 E->getArg(1)->getExprLoc(), FD, 1); 1201 Builder.CreateMemMove(Dest, Src, SizeVal, false); 1202 return RValue::get(Dest.getPointer()); 1203 } 1204 case Builtin::BImemset: 1205 case Builtin::BI__builtin_memset: { 1206 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1207 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), 1208 Builder.getInt8Ty()); 1209 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 1210 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 1211 E->getArg(0)->getExprLoc(), FD, 0); 1212 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false); 1213 return RValue::get(Dest.getPointer()); 1214 } 1215 case Builtin::BI__builtin___memset_chk: { 1216 // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2. 1217 llvm::APSInt Size, DstSize; 1218 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 1219 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 1220 break; 1221 if (Size.ugt(DstSize)) 1222 break; 1223 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1224 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), 1225 Builder.getInt8Ty()); 1226 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 1227 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false); 1228 return RValue::get(Dest.getPointer()); 1229 } 1230 case Builtin::BI__builtin_dwarf_cfa: { 1231 // The offset in bytes from the first argument to the CFA. 1232 // 1233 // Why on earth is this in the frontend? Is there any reason at 1234 // all that the backend can't reasonably determine this while 1235 // lowering llvm.eh.dwarf.cfa()? 1236 // 1237 // TODO: If there's a satisfactory reason, add a target hook for 1238 // this instead of hard-coding 0, which is correct for most targets. 1239 int32_t Offset = 0; 1240 1241 Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa); 1242 return RValue::get(Builder.CreateCall(F, 1243 llvm::ConstantInt::get(Int32Ty, Offset))); 1244 } 1245 case Builtin::BI__builtin_return_address: { 1246 Value *Depth = 1247 CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this); 1248 Value *F = CGM.getIntrinsic(Intrinsic::returnaddress); 1249 return RValue::get(Builder.CreateCall(F, Depth)); 1250 } 1251 case Builtin::BI_ReturnAddress: { 1252 Value *F = CGM.getIntrinsic(Intrinsic::returnaddress); 1253 return RValue::get(Builder.CreateCall(F, Builder.getInt32(0))); 1254 } 1255 case Builtin::BI__builtin_frame_address: { 1256 Value *Depth = 1257 CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this); 1258 Value *F = CGM.getIntrinsic(Intrinsic::frameaddress); 1259 return RValue::get(Builder.CreateCall(F, Depth)); 1260 } 1261 case Builtin::BI__builtin_extract_return_addr: { 1262 Value *Address = EmitScalarExpr(E->getArg(0)); 1263 Value *Result = getTargetHooks().decodeReturnAddress(*this, Address); 1264 return RValue::get(Result); 1265 } 1266 case Builtin::BI__builtin_frob_return_addr: { 1267 Value *Address = EmitScalarExpr(E->getArg(0)); 1268 Value *Result = getTargetHooks().encodeReturnAddress(*this, Address); 1269 return RValue::get(Result); 1270 } 1271 case Builtin::BI__builtin_dwarf_sp_column: { 1272 llvm::IntegerType *Ty 1273 = cast<llvm::IntegerType>(ConvertType(E->getType())); 1274 int Column = getTargetHooks().getDwarfEHStackPointer(CGM); 1275 if (Column == -1) { 1276 CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column"); 1277 return RValue::get(llvm::UndefValue::get(Ty)); 1278 } 1279 return RValue::get(llvm::ConstantInt::get(Ty, Column, true)); 1280 } 1281 case Builtin::BI__builtin_init_dwarf_reg_size_table: { 1282 Value *Address = EmitScalarExpr(E->getArg(0)); 1283 if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address)) 1284 CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table"); 1285 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType()))); 1286 } 1287 case Builtin::BI__builtin_eh_return: { 1288 Value *Int = EmitScalarExpr(E->getArg(0)); 1289 Value *Ptr = EmitScalarExpr(E->getArg(1)); 1290 1291 llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType()); 1292 assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) && 1293 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants"); 1294 Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32 1295 ? Intrinsic::eh_return_i32 1296 : Intrinsic::eh_return_i64); 1297 Builder.CreateCall(F, {Int, Ptr}); 1298 Builder.CreateUnreachable(); 1299 1300 // We do need to preserve an insertion point. 1301 EmitBlock(createBasicBlock("builtin_eh_return.cont")); 1302 1303 return RValue::get(nullptr); 1304 } 1305 case Builtin::BI__builtin_unwind_init: { 1306 Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init); 1307 return RValue::get(Builder.CreateCall(F)); 1308 } 1309 case Builtin::BI__builtin_extend_pointer: { 1310 // Extends a pointer to the size of an _Unwind_Word, which is 1311 // uint64_t on all platforms. Generally this gets poked into a 1312 // register and eventually used as an address, so if the 1313 // addressing registers are wider than pointers and the platform 1314 // doesn't implicitly ignore high-order bits when doing 1315 // addressing, we need to make sure we zext / sext based on 1316 // the platform's expectations. 1317 // 1318 // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html 1319 1320 // Cast the pointer to intptr_t. 1321 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1322 Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast"); 1323 1324 // If that's 64 bits, we're done. 1325 if (IntPtrTy->getBitWidth() == 64) 1326 return RValue::get(Result); 1327 1328 // Otherwise, ask the codegen data what to do. 1329 if (getTargetHooks().extendPointerWithSExt()) 1330 return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext")); 1331 else 1332 return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext")); 1333 } 1334 case Builtin::BI__builtin_setjmp: { 1335 // Buffer is a void**. 1336 Address Buf = EmitPointerWithAlignment(E->getArg(0)); 1337 1338 // Store the frame pointer to the setjmp buffer. 1339 Value *FrameAddr = 1340 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 1341 ConstantInt::get(Int32Ty, 0)); 1342 Builder.CreateStore(FrameAddr, Buf); 1343 1344 // Store the stack pointer to the setjmp buffer. 1345 Value *StackAddr = 1346 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave)); 1347 Address StackSaveSlot = 1348 Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize()); 1349 Builder.CreateStore(StackAddr, StackSaveSlot); 1350 1351 // Call LLVM's EH setjmp, which is lightweight. 1352 Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp); 1353 Buf = Builder.CreateBitCast(Buf, Int8PtrTy); 1354 return RValue::get(Builder.CreateCall(F, Buf.getPointer())); 1355 } 1356 case Builtin::BI__builtin_longjmp: { 1357 Value *Buf = EmitScalarExpr(E->getArg(0)); 1358 Buf = Builder.CreateBitCast(Buf, Int8PtrTy); 1359 1360 // Call LLVM's EH longjmp, which is lightweight. 1361 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf); 1362 1363 // longjmp doesn't return; mark this as unreachable. 1364 Builder.CreateUnreachable(); 1365 1366 // We do need to preserve an insertion point. 1367 EmitBlock(createBasicBlock("longjmp.cont")); 1368 1369 return RValue::get(nullptr); 1370 } 1371 case Builtin::BI__sync_fetch_and_add: 1372 case Builtin::BI__sync_fetch_and_sub: 1373 case Builtin::BI__sync_fetch_and_or: 1374 case Builtin::BI__sync_fetch_and_and: 1375 case Builtin::BI__sync_fetch_and_xor: 1376 case Builtin::BI__sync_fetch_and_nand: 1377 case Builtin::BI__sync_add_and_fetch: 1378 case Builtin::BI__sync_sub_and_fetch: 1379 case Builtin::BI__sync_and_and_fetch: 1380 case Builtin::BI__sync_or_and_fetch: 1381 case Builtin::BI__sync_xor_and_fetch: 1382 case Builtin::BI__sync_nand_and_fetch: 1383 case Builtin::BI__sync_val_compare_and_swap: 1384 case Builtin::BI__sync_bool_compare_and_swap: 1385 case Builtin::BI__sync_lock_test_and_set: 1386 case Builtin::BI__sync_lock_release: 1387 case Builtin::BI__sync_swap: 1388 llvm_unreachable("Shouldn't make it through sema"); 1389 case Builtin::BI__sync_fetch_and_add_1: 1390 case Builtin::BI__sync_fetch_and_add_2: 1391 case Builtin::BI__sync_fetch_and_add_4: 1392 case Builtin::BI__sync_fetch_and_add_8: 1393 case Builtin::BI__sync_fetch_and_add_16: 1394 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E); 1395 case Builtin::BI__sync_fetch_and_sub_1: 1396 case Builtin::BI__sync_fetch_and_sub_2: 1397 case Builtin::BI__sync_fetch_and_sub_4: 1398 case Builtin::BI__sync_fetch_and_sub_8: 1399 case Builtin::BI__sync_fetch_and_sub_16: 1400 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E); 1401 case Builtin::BI__sync_fetch_and_or_1: 1402 case Builtin::BI__sync_fetch_and_or_2: 1403 case Builtin::BI__sync_fetch_and_or_4: 1404 case Builtin::BI__sync_fetch_and_or_8: 1405 case Builtin::BI__sync_fetch_and_or_16: 1406 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E); 1407 case Builtin::BI__sync_fetch_and_and_1: 1408 case Builtin::BI__sync_fetch_and_and_2: 1409 case Builtin::BI__sync_fetch_and_and_4: 1410 case Builtin::BI__sync_fetch_and_and_8: 1411 case Builtin::BI__sync_fetch_and_and_16: 1412 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E); 1413 case Builtin::BI__sync_fetch_and_xor_1: 1414 case Builtin::BI__sync_fetch_and_xor_2: 1415 case Builtin::BI__sync_fetch_and_xor_4: 1416 case Builtin::BI__sync_fetch_and_xor_8: 1417 case Builtin::BI__sync_fetch_and_xor_16: 1418 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E); 1419 case Builtin::BI__sync_fetch_and_nand_1: 1420 case Builtin::BI__sync_fetch_and_nand_2: 1421 case Builtin::BI__sync_fetch_and_nand_4: 1422 case Builtin::BI__sync_fetch_and_nand_8: 1423 case Builtin::BI__sync_fetch_and_nand_16: 1424 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E); 1425 1426 // Clang extensions: not overloaded yet. 1427 case Builtin::BI__sync_fetch_and_min: 1428 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E); 1429 case Builtin::BI__sync_fetch_and_max: 1430 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E); 1431 case Builtin::BI__sync_fetch_and_umin: 1432 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E); 1433 case Builtin::BI__sync_fetch_and_umax: 1434 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E); 1435 1436 case Builtin::BI__sync_add_and_fetch_1: 1437 case Builtin::BI__sync_add_and_fetch_2: 1438 case Builtin::BI__sync_add_and_fetch_4: 1439 case Builtin::BI__sync_add_and_fetch_8: 1440 case Builtin::BI__sync_add_and_fetch_16: 1441 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E, 1442 llvm::Instruction::Add); 1443 case Builtin::BI__sync_sub_and_fetch_1: 1444 case Builtin::BI__sync_sub_and_fetch_2: 1445 case Builtin::BI__sync_sub_and_fetch_4: 1446 case Builtin::BI__sync_sub_and_fetch_8: 1447 case Builtin::BI__sync_sub_and_fetch_16: 1448 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E, 1449 llvm::Instruction::Sub); 1450 case Builtin::BI__sync_and_and_fetch_1: 1451 case Builtin::BI__sync_and_and_fetch_2: 1452 case Builtin::BI__sync_and_and_fetch_4: 1453 case Builtin::BI__sync_and_and_fetch_8: 1454 case Builtin::BI__sync_and_and_fetch_16: 1455 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E, 1456 llvm::Instruction::And); 1457 case Builtin::BI__sync_or_and_fetch_1: 1458 case Builtin::BI__sync_or_and_fetch_2: 1459 case Builtin::BI__sync_or_and_fetch_4: 1460 case Builtin::BI__sync_or_and_fetch_8: 1461 case Builtin::BI__sync_or_and_fetch_16: 1462 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E, 1463 llvm::Instruction::Or); 1464 case Builtin::BI__sync_xor_and_fetch_1: 1465 case Builtin::BI__sync_xor_and_fetch_2: 1466 case Builtin::BI__sync_xor_and_fetch_4: 1467 case Builtin::BI__sync_xor_and_fetch_8: 1468 case Builtin::BI__sync_xor_and_fetch_16: 1469 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E, 1470 llvm::Instruction::Xor); 1471 case Builtin::BI__sync_nand_and_fetch_1: 1472 case Builtin::BI__sync_nand_and_fetch_2: 1473 case Builtin::BI__sync_nand_and_fetch_4: 1474 case Builtin::BI__sync_nand_and_fetch_8: 1475 case Builtin::BI__sync_nand_and_fetch_16: 1476 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E, 1477 llvm::Instruction::And, true); 1478 1479 case Builtin::BI__sync_val_compare_and_swap_1: 1480 case Builtin::BI__sync_val_compare_and_swap_2: 1481 case Builtin::BI__sync_val_compare_and_swap_4: 1482 case Builtin::BI__sync_val_compare_and_swap_8: 1483 case Builtin::BI__sync_val_compare_and_swap_16: 1484 return RValue::get(MakeAtomicCmpXchgValue(*this, E, false)); 1485 1486 case Builtin::BI__sync_bool_compare_and_swap_1: 1487 case Builtin::BI__sync_bool_compare_and_swap_2: 1488 case Builtin::BI__sync_bool_compare_and_swap_4: 1489 case Builtin::BI__sync_bool_compare_and_swap_8: 1490 case Builtin::BI__sync_bool_compare_and_swap_16: 1491 return RValue::get(MakeAtomicCmpXchgValue(*this, E, true)); 1492 1493 case Builtin::BI__sync_swap_1: 1494 case Builtin::BI__sync_swap_2: 1495 case Builtin::BI__sync_swap_4: 1496 case Builtin::BI__sync_swap_8: 1497 case Builtin::BI__sync_swap_16: 1498 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 1499 1500 case Builtin::BI__sync_lock_test_and_set_1: 1501 case Builtin::BI__sync_lock_test_and_set_2: 1502 case Builtin::BI__sync_lock_test_and_set_4: 1503 case Builtin::BI__sync_lock_test_and_set_8: 1504 case Builtin::BI__sync_lock_test_and_set_16: 1505 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 1506 1507 case Builtin::BI__sync_lock_release_1: 1508 case Builtin::BI__sync_lock_release_2: 1509 case Builtin::BI__sync_lock_release_4: 1510 case Builtin::BI__sync_lock_release_8: 1511 case Builtin::BI__sync_lock_release_16: { 1512 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1513 QualType ElTy = E->getArg(0)->getType()->getPointeeType(); 1514 CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy); 1515 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), 1516 StoreSize.getQuantity() * 8); 1517 Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); 1518 llvm::StoreInst *Store = 1519 Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr, 1520 StoreSize); 1521 Store->setAtomic(llvm::AtomicOrdering::Release); 1522 return RValue::get(nullptr); 1523 } 1524 1525 case Builtin::BI__sync_synchronize: { 1526 // We assume this is supposed to correspond to a C++0x-style 1527 // sequentially-consistent fence (i.e. this is only usable for 1528 // synchonization, not device I/O or anything like that). This intrinsic 1529 // is really badly designed in the sense that in theory, there isn't 1530 // any way to safely use it... but in practice, it mostly works 1531 // to use it with non-atomic loads and stores to get acquire/release 1532 // semantics. 1533 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent); 1534 return RValue::get(nullptr); 1535 } 1536 1537 case Builtin::BI__builtin_nontemporal_load: 1538 return RValue::get(EmitNontemporalLoad(*this, E)); 1539 case Builtin::BI__builtin_nontemporal_store: 1540 return RValue::get(EmitNontemporalStore(*this, E)); 1541 case Builtin::BI__c11_atomic_is_lock_free: 1542 case Builtin::BI__atomic_is_lock_free: { 1543 // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the 1544 // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since 1545 // _Atomic(T) is always properly-aligned. 1546 const char *LibCallName = "__atomic_is_lock_free"; 1547 CallArgList Args; 1548 Args.add(RValue::get(EmitScalarExpr(E->getArg(0))), 1549 getContext().getSizeType()); 1550 if (BuiltinID == Builtin::BI__atomic_is_lock_free) 1551 Args.add(RValue::get(EmitScalarExpr(E->getArg(1))), 1552 getContext().VoidPtrTy); 1553 else 1554 Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)), 1555 getContext().VoidPtrTy); 1556 const CGFunctionInfo &FuncInfo = 1557 CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args); 1558 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo); 1559 llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName); 1560 return EmitCall(FuncInfo, Func, ReturnValueSlot(), Args); 1561 } 1562 1563 case Builtin::BI__atomic_test_and_set: { 1564 // Look at the argument type to determine whether this is a volatile 1565 // operation. The parameter type is always volatile. 1566 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); 1567 bool Volatile = 1568 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); 1569 1570 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1571 unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace(); 1572 Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace)); 1573 Value *NewVal = Builder.getInt8(1); 1574 Value *Order = EmitScalarExpr(E->getArg(1)); 1575 if (isa<llvm::ConstantInt>(Order)) { 1576 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 1577 AtomicRMWInst *Result = nullptr; 1578 switch (ord) { 1579 case 0: // memory_order_relaxed 1580 default: // invalid order 1581 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 1582 llvm::AtomicOrdering::Monotonic); 1583 break; 1584 case 1: // memory_order_consume 1585 case 2: // memory_order_acquire 1586 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 1587 llvm::AtomicOrdering::Acquire); 1588 break; 1589 case 3: // memory_order_release 1590 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 1591 llvm::AtomicOrdering::Release); 1592 break; 1593 case 4: // memory_order_acq_rel 1594 1595 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 1596 llvm::AtomicOrdering::AcquireRelease); 1597 break; 1598 case 5: // memory_order_seq_cst 1599 Result = Builder.CreateAtomicRMW( 1600 llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 1601 llvm::AtomicOrdering::SequentiallyConsistent); 1602 break; 1603 } 1604 Result->setVolatile(Volatile); 1605 return RValue::get(Builder.CreateIsNotNull(Result, "tobool")); 1606 } 1607 1608 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 1609 1610 llvm::BasicBlock *BBs[5] = { 1611 createBasicBlock("monotonic", CurFn), 1612 createBasicBlock("acquire", CurFn), 1613 createBasicBlock("release", CurFn), 1614 createBasicBlock("acqrel", CurFn), 1615 createBasicBlock("seqcst", CurFn) 1616 }; 1617 llvm::AtomicOrdering Orders[5] = { 1618 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire, 1619 llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease, 1620 llvm::AtomicOrdering::SequentiallyConsistent}; 1621 1622 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 1623 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); 1624 1625 Builder.SetInsertPoint(ContBB); 1626 PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set"); 1627 1628 for (unsigned i = 0; i < 5; ++i) { 1629 Builder.SetInsertPoint(BBs[i]); 1630 AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 1631 Ptr, NewVal, Orders[i]); 1632 RMW->setVolatile(Volatile); 1633 Result->addIncoming(RMW, BBs[i]); 1634 Builder.CreateBr(ContBB); 1635 } 1636 1637 SI->addCase(Builder.getInt32(0), BBs[0]); 1638 SI->addCase(Builder.getInt32(1), BBs[1]); 1639 SI->addCase(Builder.getInt32(2), BBs[1]); 1640 SI->addCase(Builder.getInt32(3), BBs[2]); 1641 SI->addCase(Builder.getInt32(4), BBs[3]); 1642 SI->addCase(Builder.getInt32(5), BBs[4]); 1643 1644 Builder.SetInsertPoint(ContBB); 1645 return RValue::get(Builder.CreateIsNotNull(Result, "tobool")); 1646 } 1647 1648 case Builtin::BI__atomic_clear: { 1649 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); 1650 bool Volatile = 1651 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); 1652 1653 Address Ptr = EmitPointerWithAlignment(E->getArg(0)); 1654 unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace(); 1655 Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace)); 1656 Value *NewVal = Builder.getInt8(0); 1657 Value *Order = EmitScalarExpr(E->getArg(1)); 1658 if (isa<llvm::ConstantInt>(Order)) { 1659 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 1660 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); 1661 switch (ord) { 1662 case 0: // memory_order_relaxed 1663 default: // invalid order 1664 Store->setOrdering(llvm::AtomicOrdering::Monotonic); 1665 break; 1666 case 3: // memory_order_release 1667 Store->setOrdering(llvm::AtomicOrdering::Release); 1668 break; 1669 case 5: // memory_order_seq_cst 1670 Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent); 1671 break; 1672 } 1673 return RValue::get(nullptr); 1674 } 1675 1676 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 1677 1678 llvm::BasicBlock *BBs[3] = { 1679 createBasicBlock("monotonic", CurFn), 1680 createBasicBlock("release", CurFn), 1681 createBasicBlock("seqcst", CurFn) 1682 }; 1683 llvm::AtomicOrdering Orders[3] = { 1684 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release, 1685 llvm::AtomicOrdering::SequentiallyConsistent}; 1686 1687 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 1688 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); 1689 1690 for (unsigned i = 0; i < 3; ++i) { 1691 Builder.SetInsertPoint(BBs[i]); 1692 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); 1693 Store->setOrdering(Orders[i]); 1694 Builder.CreateBr(ContBB); 1695 } 1696 1697 SI->addCase(Builder.getInt32(0), BBs[0]); 1698 SI->addCase(Builder.getInt32(3), BBs[1]); 1699 SI->addCase(Builder.getInt32(5), BBs[2]); 1700 1701 Builder.SetInsertPoint(ContBB); 1702 return RValue::get(nullptr); 1703 } 1704 1705 case Builtin::BI__atomic_thread_fence: 1706 case Builtin::BI__atomic_signal_fence: 1707 case Builtin::BI__c11_atomic_thread_fence: 1708 case Builtin::BI__c11_atomic_signal_fence: { 1709 llvm::SynchronizationScope Scope; 1710 if (BuiltinID == Builtin::BI__atomic_signal_fence || 1711 BuiltinID == Builtin::BI__c11_atomic_signal_fence) 1712 Scope = llvm::SingleThread; 1713 else 1714 Scope = llvm::CrossThread; 1715 Value *Order = EmitScalarExpr(E->getArg(0)); 1716 if (isa<llvm::ConstantInt>(Order)) { 1717 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 1718 switch (ord) { 1719 case 0: // memory_order_relaxed 1720 default: // invalid order 1721 break; 1722 case 1: // memory_order_consume 1723 case 2: // memory_order_acquire 1724 Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope); 1725 break; 1726 case 3: // memory_order_release 1727 Builder.CreateFence(llvm::AtomicOrdering::Release, Scope); 1728 break; 1729 case 4: // memory_order_acq_rel 1730 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope); 1731 break; 1732 case 5: // memory_order_seq_cst 1733 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, 1734 Scope); 1735 break; 1736 } 1737 return RValue::get(nullptr); 1738 } 1739 1740 llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB; 1741 AcquireBB = createBasicBlock("acquire", CurFn); 1742 ReleaseBB = createBasicBlock("release", CurFn); 1743 AcqRelBB = createBasicBlock("acqrel", CurFn); 1744 SeqCstBB = createBasicBlock("seqcst", CurFn); 1745 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 1746 1747 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 1748 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB); 1749 1750 Builder.SetInsertPoint(AcquireBB); 1751 Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope); 1752 Builder.CreateBr(ContBB); 1753 SI->addCase(Builder.getInt32(1), AcquireBB); 1754 SI->addCase(Builder.getInt32(2), AcquireBB); 1755 1756 Builder.SetInsertPoint(ReleaseBB); 1757 Builder.CreateFence(llvm::AtomicOrdering::Release, Scope); 1758 Builder.CreateBr(ContBB); 1759 SI->addCase(Builder.getInt32(3), ReleaseBB); 1760 1761 Builder.SetInsertPoint(AcqRelBB); 1762 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope); 1763 Builder.CreateBr(ContBB); 1764 SI->addCase(Builder.getInt32(4), AcqRelBB); 1765 1766 Builder.SetInsertPoint(SeqCstBB); 1767 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, Scope); 1768 Builder.CreateBr(ContBB); 1769 SI->addCase(Builder.getInt32(5), SeqCstBB); 1770 1771 Builder.SetInsertPoint(ContBB); 1772 return RValue::get(nullptr); 1773 } 1774 1775 // Library functions with special handling. 1776 case Builtin::BIsqrt: 1777 case Builtin::BIsqrtf: 1778 case Builtin::BIsqrtl: { 1779 // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only 1780 // in finite- or unsafe-math mode (the intrinsic has different semantics 1781 // for handling negative numbers compared to the library function, so 1782 // -fmath-errno=0 is not enough). 1783 if (!FD->hasAttr<ConstAttr>()) 1784 break; 1785 if (!(CGM.getCodeGenOpts().UnsafeFPMath || 1786 CGM.getCodeGenOpts().NoNaNsFPMath)) 1787 break; 1788 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 1789 llvm::Type *ArgType = Arg0->getType(); 1790 Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType); 1791 return RValue::get(Builder.CreateCall(F, Arg0)); 1792 } 1793 1794 case Builtin::BI__builtin_pow: 1795 case Builtin::BI__builtin_powf: 1796 case Builtin::BI__builtin_powl: 1797 case Builtin::BIpow: 1798 case Builtin::BIpowf: 1799 case Builtin::BIpowl: { 1800 // Transform a call to pow* into a @llvm.pow.* intrinsic call. 1801 if (!FD->hasAttr<ConstAttr>()) 1802 break; 1803 Value *Base = EmitScalarExpr(E->getArg(0)); 1804 Value *Exponent = EmitScalarExpr(E->getArg(1)); 1805 llvm::Type *ArgType = Base->getType(); 1806 Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType); 1807 return RValue::get(Builder.CreateCall(F, {Base, Exponent})); 1808 } 1809 1810 case Builtin::BIfma: 1811 case Builtin::BIfmaf: 1812 case Builtin::BIfmal: 1813 case Builtin::BI__builtin_fma: 1814 case Builtin::BI__builtin_fmaf: 1815 case Builtin::BI__builtin_fmal: { 1816 // Rewrite fma to intrinsic. 1817 Value *FirstArg = EmitScalarExpr(E->getArg(0)); 1818 llvm::Type *ArgType = FirstArg->getType(); 1819 Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType); 1820 return RValue::get( 1821 Builder.CreateCall(F, {FirstArg, EmitScalarExpr(E->getArg(1)), 1822 EmitScalarExpr(E->getArg(2))})); 1823 } 1824 1825 case Builtin::BI__builtin_signbit: 1826 case Builtin::BI__builtin_signbitf: 1827 case Builtin::BI__builtin_signbitl: { 1828 return RValue::get( 1829 Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))), 1830 ConvertType(E->getType()))); 1831 } 1832 case Builtin::BI__builtin_annotation: { 1833 llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0)); 1834 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation, 1835 AnnVal->getType()); 1836 1837 // Get the annotation string, go through casts. Sema requires this to be a 1838 // non-wide string literal, potentially casted, so the cast<> is safe. 1839 const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts(); 1840 StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString(); 1841 return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc())); 1842 } 1843 case Builtin::BI__builtin_addcb: 1844 case Builtin::BI__builtin_addcs: 1845 case Builtin::BI__builtin_addc: 1846 case Builtin::BI__builtin_addcl: 1847 case Builtin::BI__builtin_addcll: 1848 case Builtin::BI__builtin_subcb: 1849 case Builtin::BI__builtin_subcs: 1850 case Builtin::BI__builtin_subc: 1851 case Builtin::BI__builtin_subcl: 1852 case Builtin::BI__builtin_subcll: { 1853 1854 // We translate all of these builtins from expressions of the form: 1855 // int x = ..., y = ..., carryin = ..., carryout, result; 1856 // result = __builtin_addc(x, y, carryin, &carryout); 1857 // 1858 // to LLVM IR of the form: 1859 // 1860 // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y) 1861 // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0 1862 // %carry1 = extractvalue {i32, i1} %tmp1, 1 1863 // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1, 1864 // i32 %carryin) 1865 // %result = extractvalue {i32, i1} %tmp2, 0 1866 // %carry2 = extractvalue {i32, i1} %tmp2, 1 1867 // %tmp3 = or i1 %carry1, %carry2 1868 // %tmp4 = zext i1 %tmp3 to i32 1869 // store i32 %tmp4, i32* %carryout 1870 1871 // Scalarize our inputs. 1872 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 1873 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 1874 llvm::Value *Carryin = EmitScalarExpr(E->getArg(2)); 1875 Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3)); 1876 1877 // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow. 1878 llvm::Intrinsic::ID IntrinsicId; 1879 switch (BuiltinID) { 1880 default: llvm_unreachable("Unknown multiprecision builtin id."); 1881 case Builtin::BI__builtin_addcb: 1882 case Builtin::BI__builtin_addcs: 1883 case Builtin::BI__builtin_addc: 1884 case Builtin::BI__builtin_addcl: 1885 case Builtin::BI__builtin_addcll: 1886 IntrinsicId = llvm::Intrinsic::uadd_with_overflow; 1887 break; 1888 case Builtin::BI__builtin_subcb: 1889 case Builtin::BI__builtin_subcs: 1890 case Builtin::BI__builtin_subc: 1891 case Builtin::BI__builtin_subcl: 1892 case Builtin::BI__builtin_subcll: 1893 IntrinsicId = llvm::Intrinsic::usub_with_overflow; 1894 break; 1895 } 1896 1897 // Construct our resulting LLVM IR expression. 1898 llvm::Value *Carry1; 1899 llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId, 1900 X, Y, Carry1); 1901 llvm::Value *Carry2; 1902 llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId, 1903 Sum1, Carryin, Carry2); 1904 llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2), 1905 X->getType()); 1906 Builder.CreateStore(CarryOut, CarryOutPtr); 1907 return RValue::get(Sum2); 1908 } 1909 1910 case Builtin::BI__builtin_add_overflow: 1911 case Builtin::BI__builtin_sub_overflow: 1912 case Builtin::BI__builtin_mul_overflow: { 1913 const clang::Expr *LeftArg = E->getArg(0); 1914 const clang::Expr *RightArg = E->getArg(1); 1915 const clang::Expr *ResultArg = E->getArg(2); 1916 1917 clang::QualType ResultQTy = 1918 ResultArg->getType()->castAs<PointerType>()->getPointeeType(); 1919 1920 WidthAndSignedness LeftInfo = 1921 getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType()); 1922 WidthAndSignedness RightInfo = 1923 getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType()); 1924 WidthAndSignedness ResultInfo = 1925 getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy); 1926 WidthAndSignedness EncompassingInfo = 1927 EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo}); 1928 1929 llvm::Type *EncompassingLLVMTy = 1930 llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width); 1931 1932 llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy); 1933 1934 llvm::Intrinsic::ID IntrinsicId; 1935 switch (BuiltinID) { 1936 default: 1937 llvm_unreachable("Unknown overflow builtin id."); 1938 case Builtin::BI__builtin_add_overflow: 1939 IntrinsicId = EncompassingInfo.Signed 1940 ? llvm::Intrinsic::sadd_with_overflow 1941 : llvm::Intrinsic::uadd_with_overflow; 1942 break; 1943 case Builtin::BI__builtin_sub_overflow: 1944 IntrinsicId = EncompassingInfo.Signed 1945 ? llvm::Intrinsic::ssub_with_overflow 1946 : llvm::Intrinsic::usub_with_overflow; 1947 break; 1948 case Builtin::BI__builtin_mul_overflow: 1949 IntrinsicId = EncompassingInfo.Signed 1950 ? llvm::Intrinsic::smul_with_overflow 1951 : llvm::Intrinsic::umul_with_overflow; 1952 break; 1953 } 1954 1955 llvm::Value *Left = EmitScalarExpr(LeftArg); 1956 llvm::Value *Right = EmitScalarExpr(RightArg); 1957 Address ResultPtr = EmitPointerWithAlignment(ResultArg); 1958 1959 // Extend each operand to the encompassing type. 1960 Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed); 1961 Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed); 1962 1963 // Perform the operation on the extended values. 1964 llvm::Value *Overflow, *Result; 1965 Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow); 1966 1967 if (EncompassingInfo.Width > ResultInfo.Width) { 1968 // The encompassing type is wider than the result type, so we need to 1969 // truncate it. 1970 llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy); 1971 1972 // To see if the truncation caused an overflow, we will extend 1973 // the result and then compare it to the original result. 1974 llvm::Value *ResultTruncExt = Builder.CreateIntCast( 1975 ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed); 1976 llvm::Value *TruncationOverflow = 1977 Builder.CreateICmpNE(Result, ResultTruncExt); 1978 1979 Overflow = Builder.CreateOr(Overflow, TruncationOverflow); 1980 Result = ResultTrunc; 1981 } 1982 1983 // Finally, store the result using the pointer. 1984 bool isVolatile = 1985 ResultArg->getType()->getPointeeType().isVolatileQualified(); 1986 Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile); 1987 1988 return RValue::get(Overflow); 1989 } 1990 1991 case Builtin::BI__builtin_uadd_overflow: 1992 case Builtin::BI__builtin_uaddl_overflow: 1993 case Builtin::BI__builtin_uaddll_overflow: 1994 case Builtin::BI__builtin_usub_overflow: 1995 case Builtin::BI__builtin_usubl_overflow: 1996 case Builtin::BI__builtin_usubll_overflow: 1997 case Builtin::BI__builtin_umul_overflow: 1998 case Builtin::BI__builtin_umull_overflow: 1999 case Builtin::BI__builtin_umulll_overflow: 2000 case Builtin::BI__builtin_sadd_overflow: 2001 case Builtin::BI__builtin_saddl_overflow: 2002 case Builtin::BI__builtin_saddll_overflow: 2003 case Builtin::BI__builtin_ssub_overflow: 2004 case Builtin::BI__builtin_ssubl_overflow: 2005 case Builtin::BI__builtin_ssubll_overflow: 2006 case Builtin::BI__builtin_smul_overflow: 2007 case Builtin::BI__builtin_smull_overflow: 2008 case Builtin::BI__builtin_smulll_overflow: { 2009 2010 // We translate all of these builtins directly to the relevant llvm IR node. 2011 2012 // Scalarize our inputs. 2013 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 2014 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 2015 Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2)); 2016 2017 // Decide which of the overflow intrinsics we are lowering to: 2018 llvm::Intrinsic::ID IntrinsicId; 2019 switch (BuiltinID) { 2020 default: llvm_unreachable("Unknown overflow builtin id."); 2021 case Builtin::BI__builtin_uadd_overflow: 2022 case Builtin::BI__builtin_uaddl_overflow: 2023 case Builtin::BI__builtin_uaddll_overflow: 2024 IntrinsicId = llvm::Intrinsic::uadd_with_overflow; 2025 break; 2026 case Builtin::BI__builtin_usub_overflow: 2027 case Builtin::BI__builtin_usubl_overflow: 2028 case Builtin::BI__builtin_usubll_overflow: 2029 IntrinsicId = llvm::Intrinsic::usub_with_overflow; 2030 break; 2031 case Builtin::BI__builtin_umul_overflow: 2032 case Builtin::BI__builtin_umull_overflow: 2033 case Builtin::BI__builtin_umulll_overflow: 2034 IntrinsicId = llvm::Intrinsic::umul_with_overflow; 2035 break; 2036 case Builtin::BI__builtin_sadd_overflow: 2037 case Builtin::BI__builtin_saddl_overflow: 2038 case Builtin::BI__builtin_saddll_overflow: 2039 IntrinsicId = llvm::Intrinsic::sadd_with_overflow; 2040 break; 2041 case Builtin::BI__builtin_ssub_overflow: 2042 case Builtin::BI__builtin_ssubl_overflow: 2043 case Builtin::BI__builtin_ssubll_overflow: 2044 IntrinsicId = llvm::Intrinsic::ssub_with_overflow; 2045 break; 2046 case Builtin::BI__builtin_smul_overflow: 2047 case Builtin::BI__builtin_smull_overflow: 2048 case Builtin::BI__builtin_smulll_overflow: 2049 IntrinsicId = llvm::Intrinsic::smul_with_overflow; 2050 break; 2051 } 2052 2053 2054 llvm::Value *Carry; 2055 llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry); 2056 Builder.CreateStore(Sum, SumOutPtr); 2057 2058 return RValue::get(Carry); 2059 } 2060 case Builtin::BI__builtin_addressof: 2061 return RValue::get(EmitLValue(E->getArg(0)).getPointer()); 2062 case Builtin::BI__builtin_operator_new: 2063 return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(), 2064 E->getArg(0), false); 2065 case Builtin::BI__builtin_operator_delete: 2066 return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(), 2067 E->getArg(0), true); 2068 case Builtin::BI__noop: 2069 // __noop always evaluates to an integer literal zero. 2070 return RValue::get(ConstantInt::get(IntTy, 0)); 2071 case Builtin::BI__builtin_call_with_static_chain: { 2072 const CallExpr *Call = cast<CallExpr>(E->getArg(0)); 2073 const Expr *Chain = E->getArg(1); 2074 return EmitCall(Call->getCallee()->getType(), 2075 EmitScalarExpr(Call->getCallee()), Call, ReturnValue, 2076 Call->getCalleeDecl(), EmitScalarExpr(Chain)); 2077 } 2078 case Builtin::BI_InterlockedExchange8: 2079 case Builtin::BI_InterlockedExchange16: 2080 case Builtin::BI_InterlockedExchange: 2081 case Builtin::BI_InterlockedExchangePointer: 2082 return RValue::get( 2083 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E)); 2084 case Builtin::BI_InterlockedCompareExchangePointer: { 2085 llvm::Type *RTy; 2086 llvm::IntegerType *IntType = 2087 IntegerType::get(getLLVMContext(), 2088 getContext().getTypeSize(E->getType())); 2089 llvm::Type *IntPtrType = IntType->getPointerTo(); 2090 2091 llvm::Value *Destination = 2092 Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType); 2093 2094 llvm::Value *Exchange = EmitScalarExpr(E->getArg(1)); 2095 RTy = Exchange->getType(); 2096 Exchange = Builder.CreatePtrToInt(Exchange, IntType); 2097 2098 llvm::Value *Comparand = 2099 Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType); 2100 2101 auto Result = 2102 Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange, 2103 AtomicOrdering::SequentiallyConsistent, 2104 AtomicOrdering::SequentiallyConsistent); 2105 Result->setVolatile(true); 2106 2107 return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result, 2108 0), 2109 RTy)); 2110 } 2111 case Builtin::BI_InterlockedCompareExchange8: 2112 case Builtin::BI_InterlockedCompareExchange16: 2113 case Builtin::BI_InterlockedCompareExchange: 2114 case Builtin::BI_InterlockedCompareExchange64: { 2115 AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg( 2116 EmitScalarExpr(E->getArg(0)), 2117 EmitScalarExpr(E->getArg(2)), 2118 EmitScalarExpr(E->getArg(1)), 2119 AtomicOrdering::SequentiallyConsistent, 2120 AtomicOrdering::SequentiallyConsistent); 2121 CXI->setVolatile(true); 2122 return RValue::get(Builder.CreateExtractValue(CXI, 0)); 2123 } 2124 case Builtin::BI_InterlockedIncrement16: 2125 case Builtin::BI_InterlockedIncrement: 2126 return RValue::get( 2127 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E)); 2128 case Builtin::BI_InterlockedDecrement16: 2129 case Builtin::BI_InterlockedDecrement: 2130 return RValue::get( 2131 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E)); 2132 case Builtin::BI_InterlockedAnd8: 2133 case Builtin::BI_InterlockedAnd16: 2134 case Builtin::BI_InterlockedAnd: 2135 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E)); 2136 case Builtin::BI_InterlockedExchangeAdd8: 2137 case Builtin::BI_InterlockedExchangeAdd16: 2138 case Builtin::BI_InterlockedExchangeAdd: 2139 return RValue::get( 2140 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E)); 2141 case Builtin::BI_InterlockedExchangeSub8: 2142 case Builtin::BI_InterlockedExchangeSub16: 2143 case Builtin::BI_InterlockedExchangeSub: 2144 return RValue::get( 2145 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E)); 2146 case Builtin::BI_InterlockedOr8: 2147 case Builtin::BI_InterlockedOr16: 2148 case Builtin::BI_InterlockedOr: 2149 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E)); 2150 case Builtin::BI_InterlockedXor8: 2151 case Builtin::BI_InterlockedXor16: 2152 case Builtin::BI_InterlockedXor: 2153 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E)); 2154 case Builtin::BI__readfsdword: { 2155 llvm::Type *IntTy = ConvertType(E->getType()); 2156 Value *IntToPtr = 2157 Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)), 2158 llvm::PointerType::get(IntTy, 257)); 2159 LoadInst *Load = 2160 Builder.CreateDefaultAlignedLoad(IntToPtr, /*isVolatile=*/true); 2161 return RValue::get(Load); 2162 } 2163 2164 case Builtin::BI__exception_code: 2165 case Builtin::BI_exception_code: 2166 return RValue::get(EmitSEHExceptionCode()); 2167 case Builtin::BI__exception_info: 2168 case Builtin::BI_exception_info: 2169 return RValue::get(EmitSEHExceptionInfo()); 2170 case Builtin::BI__abnormal_termination: 2171 case Builtin::BI_abnormal_termination: 2172 return RValue::get(EmitSEHAbnormalTermination()); 2173 case Builtin::BI_setjmpex: { 2174 if (getTarget().getTriple().isOSMSVCRT()) { 2175 llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy}; 2176 llvm::AttributeSet ReturnsTwiceAttr = 2177 AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex, 2178 llvm::Attribute::ReturnsTwice); 2179 llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction( 2180 llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false), 2181 "_setjmpex", ReturnsTwiceAttr); 2182 llvm::Value *Buf = Builder.CreateBitOrPointerCast( 2183 EmitScalarExpr(E->getArg(0)), Int8PtrTy); 2184 llvm::Value *FrameAddr = 2185 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 2186 ConstantInt::get(Int32Ty, 0)); 2187 llvm::Value *Args[] = {Buf, FrameAddr}; 2188 llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args); 2189 CS.setAttributes(ReturnsTwiceAttr); 2190 return RValue::get(CS.getInstruction()); 2191 } 2192 break; 2193 } 2194 case Builtin::BI_setjmp: { 2195 if (getTarget().getTriple().isOSMSVCRT()) { 2196 llvm::AttributeSet ReturnsTwiceAttr = 2197 AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex, 2198 llvm::Attribute::ReturnsTwice); 2199 llvm::Value *Buf = Builder.CreateBitOrPointerCast( 2200 EmitScalarExpr(E->getArg(0)), Int8PtrTy); 2201 llvm::CallSite CS; 2202 if (getTarget().getTriple().getArch() == llvm::Triple::x86) { 2203 llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy}; 2204 llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction( 2205 llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true), 2206 "_setjmp3", ReturnsTwiceAttr); 2207 llvm::Value *Count = ConstantInt::get(IntTy, 0); 2208 llvm::Value *Args[] = {Buf, Count}; 2209 CS = EmitRuntimeCallOrInvoke(SetJmp3, Args); 2210 } else { 2211 llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy}; 2212 llvm::Constant *SetJmp = CGM.CreateRuntimeFunction( 2213 llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false), 2214 "_setjmp", ReturnsTwiceAttr); 2215 llvm::Value *FrameAddr = 2216 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 2217 ConstantInt::get(Int32Ty, 0)); 2218 llvm::Value *Args[] = {Buf, FrameAddr}; 2219 CS = EmitRuntimeCallOrInvoke(SetJmp, Args); 2220 } 2221 CS.setAttributes(ReturnsTwiceAttr); 2222 return RValue::get(CS.getInstruction()); 2223 } 2224 break; 2225 } 2226 2227 case Builtin::BI__GetExceptionInfo: { 2228 if (llvm::GlobalVariable *GV = 2229 CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType())) 2230 return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy)); 2231 break; 2232 } 2233 2234 case Builtin::BI__builtin_coro_size: { 2235 auto & Context = getContext(); 2236 auto SizeTy = Context.getSizeType(); 2237 auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy)); 2238 Value *F = CGM.getIntrinsic(Intrinsic::coro_size, T); 2239 return RValue::get(Builder.CreateCall(F)); 2240 } 2241 2242 case Builtin::BI__builtin_coro_id: 2243 return EmitCoroutineIntrinsic(E, Intrinsic::coro_id); 2244 case Builtin::BI__builtin_coro_promise: 2245 return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise); 2246 case Builtin::BI__builtin_coro_resume: 2247 return EmitCoroutineIntrinsic(E, Intrinsic::coro_resume); 2248 case Builtin::BI__builtin_coro_frame: 2249 return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame); 2250 case Builtin::BI__builtin_coro_free: 2251 return EmitCoroutineIntrinsic(E, Intrinsic::coro_free); 2252 case Builtin::BI__builtin_coro_destroy: 2253 return EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy); 2254 case Builtin::BI__builtin_coro_done: 2255 return EmitCoroutineIntrinsic(E, Intrinsic::coro_done); 2256 case Builtin::BI__builtin_coro_alloc: 2257 return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc); 2258 case Builtin::BI__builtin_coro_begin: 2259 return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin); 2260 case Builtin::BI__builtin_coro_end: 2261 return EmitCoroutineIntrinsic(E, Intrinsic::coro_end); 2262 case Builtin::BI__builtin_coro_suspend: 2263 return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend); 2264 case Builtin::BI__builtin_coro_param: 2265 return EmitCoroutineIntrinsic(E, Intrinsic::coro_param); 2266 2267 // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions 2268 case Builtin::BIread_pipe: 2269 case Builtin::BIwrite_pipe: { 2270 Value *Arg0 = EmitScalarExpr(E->getArg(0)), 2271 *Arg1 = EmitScalarExpr(E->getArg(1)); 2272 CGOpenCLRuntime OpenCLRT(CGM); 2273 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); 2274 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); 2275 2276 // Type of the generic packet parameter. 2277 unsigned GenericAS = 2278 getContext().getTargetAddressSpace(LangAS::opencl_generic); 2279 llvm::Type *I8PTy = llvm::PointerType::get( 2280 llvm::Type::getInt8Ty(getLLVMContext()), GenericAS); 2281 2282 // Testing which overloaded version we should generate the call for. 2283 if (2U == E->getNumArgs()) { 2284 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2" 2285 : "__write_pipe_2"; 2286 // Creating a generic function type to be able to call with any builtin or 2287 // user defined type. 2288 llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty}; 2289 llvm::FunctionType *FTy = llvm::FunctionType::get( 2290 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2291 Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy); 2292 return RValue::get( 2293 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2294 {Arg0, BCast, PacketSize, PacketAlign})); 2295 } else { 2296 assert(4 == E->getNumArgs() && 2297 "Illegal number of parameters to pipe function"); 2298 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4" 2299 : "__write_pipe_4"; 2300 2301 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy, 2302 Int32Ty, Int32Ty}; 2303 Value *Arg2 = EmitScalarExpr(E->getArg(2)), 2304 *Arg3 = EmitScalarExpr(E->getArg(3)); 2305 llvm::FunctionType *FTy = llvm::FunctionType::get( 2306 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2307 Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy); 2308 // We know the third argument is an integer type, but we may need to cast 2309 // it to i32. 2310 if (Arg2->getType() != Int32Ty) 2311 Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty); 2312 return RValue::get(Builder.CreateCall( 2313 CGM.CreateRuntimeFunction(FTy, Name), 2314 {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign})); 2315 } 2316 } 2317 // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write 2318 // functions 2319 case Builtin::BIreserve_read_pipe: 2320 case Builtin::BIreserve_write_pipe: 2321 case Builtin::BIwork_group_reserve_read_pipe: 2322 case Builtin::BIwork_group_reserve_write_pipe: 2323 case Builtin::BIsub_group_reserve_read_pipe: 2324 case Builtin::BIsub_group_reserve_write_pipe: { 2325 // Composing the mangled name for the function. 2326 const char *Name; 2327 if (BuiltinID == Builtin::BIreserve_read_pipe) 2328 Name = "__reserve_read_pipe"; 2329 else if (BuiltinID == Builtin::BIreserve_write_pipe) 2330 Name = "__reserve_write_pipe"; 2331 else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe) 2332 Name = "__work_group_reserve_read_pipe"; 2333 else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe) 2334 Name = "__work_group_reserve_write_pipe"; 2335 else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe) 2336 Name = "__sub_group_reserve_read_pipe"; 2337 else 2338 Name = "__sub_group_reserve_write_pipe"; 2339 2340 Value *Arg0 = EmitScalarExpr(E->getArg(0)), 2341 *Arg1 = EmitScalarExpr(E->getArg(1)); 2342 llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy); 2343 CGOpenCLRuntime OpenCLRT(CGM); 2344 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); 2345 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); 2346 2347 // Building the generic function prototype. 2348 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty}; 2349 llvm::FunctionType *FTy = llvm::FunctionType::get( 2350 ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2351 // We know the second argument is an integer type, but we may need to cast 2352 // it to i32. 2353 if (Arg1->getType() != Int32Ty) 2354 Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty); 2355 return RValue::get( 2356 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2357 {Arg0, Arg1, PacketSize, PacketAlign})); 2358 } 2359 // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write 2360 // functions 2361 case Builtin::BIcommit_read_pipe: 2362 case Builtin::BIcommit_write_pipe: 2363 case Builtin::BIwork_group_commit_read_pipe: 2364 case Builtin::BIwork_group_commit_write_pipe: 2365 case Builtin::BIsub_group_commit_read_pipe: 2366 case Builtin::BIsub_group_commit_write_pipe: { 2367 const char *Name; 2368 if (BuiltinID == Builtin::BIcommit_read_pipe) 2369 Name = "__commit_read_pipe"; 2370 else if (BuiltinID == Builtin::BIcommit_write_pipe) 2371 Name = "__commit_write_pipe"; 2372 else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe) 2373 Name = "__work_group_commit_read_pipe"; 2374 else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe) 2375 Name = "__work_group_commit_write_pipe"; 2376 else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe) 2377 Name = "__sub_group_commit_read_pipe"; 2378 else 2379 Name = "__sub_group_commit_write_pipe"; 2380 2381 Value *Arg0 = EmitScalarExpr(E->getArg(0)), 2382 *Arg1 = EmitScalarExpr(E->getArg(1)); 2383 CGOpenCLRuntime OpenCLRT(CGM); 2384 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); 2385 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); 2386 2387 // Building the generic function prototype. 2388 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty}; 2389 llvm::FunctionType *FTy = 2390 llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()), 2391 llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2392 2393 return RValue::get( 2394 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2395 {Arg0, Arg1, PacketSize, PacketAlign})); 2396 } 2397 // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions 2398 case Builtin::BIget_pipe_num_packets: 2399 case Builtin::BIget_pipe_max_packets: { 2400 const char *Name; 2401 if (BuiltinID == Builtin::BIget_pipe_num_packets) 2402 Name = "__get_pipe_num_packets"; 2403 else 2404 Name = "__get_pipe_max_packets"; 2405 2406 // Building the generic function prototype. 2407 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 2408 CGOpenCLRuntime OpenCLRT(CGM); 2409 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); 2410 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); 2411 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty}; 2412 llvm::FunctionType *FTy = llvm::FunctionType::get( 2413 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2414 2415 return RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2416 {Arg0, PacketSize, PacketAlign})); 2417 } 2418 2419 // OpenCL v2.0 s6.13.9 - Address space qualifier functions. 2420 case Builtin::BIto_global: 2421 case Builtin::BIto_local: 2422 case Builtin::BIto_private: { 2423 auto Arg0 = EmitScalarExpr(E->getArg(0)); 2424 auto NewArgT = llvm::PointerType::get(Int8Ty, 2425 CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); 2426 auto NewRetT = llvm::PointerType::get(Int8Ty, 2427 CGM.getContext().getTargetAddressSpace( 2428 E->getType()->getPointeeType().getAddressSpace())); 2429 auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false); 2430 llvm::Value *NewArg; 2431 if (Arg0->getType()->getPointerAddressSpace() != 2432 NewArgT->getPointerAddressSpace()) 2433 NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT); 2434 else 2435 NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT); 2436 auto NewName = std::string("__") + E->getDirectCallee()->getName().str(); 2437 auto NewCall = 2438 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg}); 2439 return RValue::get(Builder.CreateBitOrPointerCast(NewCall, 2440 ConvertType(E->getType()))); 2441 } 2442 2443 // OpenCL v2.0, s6.13.17 - Enqueue kernel function. 2444 // It contains four different overload formats specified in Table 6.13.17.1. 2445 case Builtin::BIenqueue_kernel: { 2446 StringRef Name; // Generated function call name 2447 unsigned NumArgs = E->getNumArgs(); 2448 2449 llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy); 2450 llvm::Type *RangeTy = ConvertType(getContext().OCLNDRangeTy); 2451 2452 llvm::Value *Queue = EmitScalarExpr(E->getArg(0)); 2453 llvm::Value *Flags = EmitScalarExpr(E->getArg(1)); 2454 llvm::Value *Range = EmitScalarExpr(E->getArg(2)); 2455 2456 if (NumArgs == 4) { 2457 // The most basic form of the call with parameters: 2458 // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void) 2459 Name = "__enqueue_kernel_basic"; 2460 llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, Int8PtrTy}; 2461 llvm::FunctionType *FTy = llvm::FunctionType::get( 2462 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys, 4), false); 2463 2464 llvm::Value *Block = 2465 Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy); 2466 2467 return RValue::get(Builder.CreateCall( 2468 CGM.CreateRuntimeFunction(FTy, Name), {Queue, Flags, Range, Block})); 2469 } 2470 assert(NumArgs >= 5 && "Invalid enqueue_kernel signature"); 2471 2472 // Could have events and/or vaargs. 2473 if (E->getArg(3)->getType()->isBlockPointerType()) { 2474 // No events passed, but has variadic arguments. 2475 Name = "__enqueue_kernel_vaargs"; 2476 llvm::Value *Block = 2477 Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy); 2478 // Create a vector of the arguments, as well as a constant value to 2479 // express to the runtime the number of variadic arguments. 2480 std::vector<llvm::Value *> Args = {Queue, Flags, Range, Block, 2481 ConstantInt::get(IntTy, NumArgs - 4)}; 2482 std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, RangeTy, Int8PtrTy, 2483 IntTy}; 2484 2485 // Add the variadics. 2486 for (unsigned I = 4; I < NumArgs; ++I) { 2487 llvm::Value *ArgSize = EmitScalarExpr(E->getArg(I)); 2488 unsigned TypeSizeInBytes = 2489 getContext() 2490 .getTypeSizeInChars(E->getArg(I)->getType()) 2491 .getQuantity(); 2492 Args.push_back(TypeSizeInBytes < 4 2493 ? Builder.CreateZExt(ArgSize, Int32Ty) 2494 : ArgSize); 2495 } 2496 2497 llvm::FunctionType *FTy = llvm::FunctionType::get( 2498 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true); 2499 return RValue::get( 2500 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2501 llvm::ArrayRef<llvm::Value *>(Args))); 2502 } 2503 // Any calls now have event arguments passed. 2504 if (NumArgs >= 7) { 2505 llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy); 2506 unsigned AS4 = 2507 E->getArg(4)->getType()->isArrayType() 2508 ? E->getArg(4)->getType().getAddressSpace() 2509 : E->getArg(4)->getType()->getPointeeType().getAddressSpace(); 2510 llvm::Type *EventPtrAS4Ty = 2511 EventTy->getPointerTo(CGM.getContext().getTargetAddressSpace(AS4)); 2512 unsigned AS5 = 2513 E->getArg(5)->getType()->getPointeeType().getAddressSpace(); 2514 llvm::Type *EventPtrAS5Ty = 2515 EventTy->getPointerTo(CGM.getContext().getTargetAddressSpace(AS5)); 2516 2517 llvm::Value *NumEvents = EmitScalarExpr(E->getArg(3)); 2518 llvm::Value *EventList = 2519 E->getArg(4)->getType()->isArrayType() 2520 ? EmitArrayToPointerDecay(E->getArg(4)).getPointer() 2521 : EmitScalarExpr(E->getArg(4)); 2522 llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5)); 2523 llvm::Value *Block = 2524 Builder.CreateBitCast(EmitScalarExpr(E->getArg(6)), Int8PtrTy); 2525 2526 std::vector<llvm::Type *> ArgTys = { 2527 QueueTy, Int32Ty, RangeTy, Int32Ty, 2528 EventPtrAS4Ty, EventPtrAS5Ty, Int8PtrTy}; 2529 std::vector<llvm::Value *> Args = {Queue, Flags, Range, NumEvents, 2530 EventList, ClkEvent, Block}; 2531 2532 if (NumArgs == 7) { 2533 // Has events but no variadics. 2534 Name = "__enqueue_kernel_basic_events"; 2535 llvm::FunctionType *FTy = llvm::FunctionType::get( 2536 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2537 return RValue::get( 2538 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2539 llvm::ArrayRef<llvm::Value *>(Args))); 2540 } 2541 // Has event info and variadics 2542 // Pass the number of variadics to the runtime function too. 2543 Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7)); 2544 ArgTys.push_back(Int32Ty); 2545 Name = "__enqueue_kernel_events_vaargs"; 2546 2547 // Add the variadics. 2548 for (unsigned I = 7; I < NumArgs; ++I) { 2549 llvm::Value *ArgSize = EmitScalarExpr(E->getArg(I)); 2550 unsigned TypeSizeInBytes = 2551 getContext() 2552 .getTypeSizeInChars(E->getArg(I)->getType()) 2553 .getQuantity(); 2554 Args.push_back(TypeSizeInBytes < 4 2555 ? Builder.CreateZExt(ArgSize, Int32Ty) 2556 : ArgSize); 2557 } 2558 llvm::FunctionType *FTy = llvm::FunctionType::get( 2559 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true); 2560 return RValue::get( 2561 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2562 llvm::ArrayRef<llvm::Value *>(Args))); 2563 } 2564 } 2565 // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block 2566 // parameter. 2567 case Builtin::BIget_kernel_work_group_size: { 2568 Value *Arg = EmitScalarExpr(E->getArg(0)); 2569 Arg = Builder.CreateBitCast(Arg, Int8PtrTy); 2570 return RValue::get( 2571 Builder.CreateCall(CGM.CreateRuntimeFunction( 2572 llvm::FunctionType::get(IntTy, Int8PtrTy, false), 2573 "__get_kernel_work_group_size_impl"), 2574 Arg)); 2575 } 2576 case Builtin::BIget_kernel_preferred_work_group_size_multiple: { 2577 Value *Arg = EmitScalarExpr(E->getArg(0)); 2578 Arg = Builder.CreateBitCast(Arg, Int8PtrTy); 2579 return RValue::get(Builder.CreateCall( 2580 CGM.CreateRuntimeFunction( 2581 llvm::FunctionType::get(IntTy, Int8PtrTy, false), 2582 "__get_kernel_preferred_work_group_multiple_impl"), 2583 Arg)); 2584 } 2585 case Builtin::BIprintf: 2586 if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) 2587 return EmitCUDADevicePrintfCallExpr(E, ReturnValue); 2588 break; 2589 case Builtin::BI__builtin_canonicalize: 2590 case Builtin::BI__builtin_canonicalizef: 2591 case Builtin::BI__builtin_canonicalizel: 2592 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize)); 2593 2594 case Builtin::BI__builtin_thread_pointer: { 2595 if (!getContext().getTargetInfo().isTLSSupported()) 2596 CGM.ErrorUnsupported(E, "__builtin_thread_pointer"); 2597 // Fall through - it's already mapped to the intrinsic by GCCBuiltin. 2598 break; 2599 } 2600 } 2601 2602 // If this is an alias for a lib function (e.g. __builtin_sin), emit 2603 // the call using the normal call path, but using the unmangled 2604 // version of the function name. 2605 if (getContext().BuiltinInfo.isLibFunction(BuiltinID)) 2606 return emitLibraryCall(*this, FD, E, 2607 CGM.getBuiltinLibFunction(FD, BuiltinID)); 2608 2609 // If this is a predefined lib function (e.g. malloc), emit the call 2610 // using exactly the normal call path. 2611 if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID)) 2612 return emitLibraryCall(*this, FD, E, EmitScalarExpr(E->getCallee())); 2613 2614 // Check that a call to a target specific builtin has the correct target 2615 // features. 2616 // This is down here to avoid non-target specific builtins, however, if 2617 // generic builtins start to require generic target features then we 2618 // can move this up to the beginning of the function. 2619 checkTargetFeatures(E, FD); 2620 2621 // See if we have a target specific intrinsic. 2622 const char *Name = getContext().BuiltinInfo.getName(BuiltinID); 2623 Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic; 2624 StringRef Prefix = 2625 llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch()); 2626 if (!Prefix.empty()) { 2627 IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix.data(), Name); 2628 // NOTE we dont need to perform a compatibility flag check here since the 2629 // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the 2630 // MS builtins via ALL_MS_LANGUAGES and are filtered earlier. 2631 if (IntrinsicID == Intrinsic::not_intrinsic) 2632 IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name); 2633 } 2634 2635 if (IntrinsicID != Intrinsic::not_intrinsic) { 2636 SmallVector<Value*, 16> Args; 2637 2638 // Find out if any arguments are required to be integer constant 2639 // expressions. 2640 unsigned ICEArguments = 0; 2641 ASTContext::GetBuiltinTypeError Error; 2642 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 2643 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 2644 2645 Function *F = CGM.getIntrinsic(IntrinsicID); 2646 llvm::FunctionType *FTy = F->getFunctionType(); 2647 2648 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) { 2649 Value *ArgValue; 2650 // If this is a normal argument, just emit it as a scalar. 2651 if ((ICEArguments & (1 << i)) == 0) { 2652 ArgValue = EmitScalarExpr(E->getArg(i)); 2653 } else { 2654 // If this is required to be a constant, constant fold it so that we 2655 // know that the generated intrinsic gets a ConstantInt. 2656 llvm::APSInt Result; 2657 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext()); 2658 assert(IsConst && "Constant arg isn't actually constant?"); 2659 (void)IsConst; 2660 ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result); 2661 } 2662 2663 // If the intrinsic arg type is different from the builtin arg type 2664 // we need to do a bit cast. 2665 llvm::Type *PTy = FTy->getParamType(i); 2666 if (PTy != ArgValue->getType()) { 2667 assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) && 2668 "Must be able to losslessly bit cast to param"); 2669 ArgValue = Builder.CreateBitCast(ArgValue, PTy); 2670 } 2671 2672 Args.push_back(ArgValue); 2673 } 2674 2675 Value *V = Builder.CreateCall(F, Args); 2676 QualType BuiltinRetType = E->getType(); 2677 2678 llvm::Type *RetTy = VoidTy; 2679 if (!BuiltinRetType->isVoidType()) 2680 RetTy = ConvertType(BuiltinRetType); 2681 2682 if (RetTy != V->getType()) { 2683 assert(V->getType()->canLosslesslyBitCastTo(RetTy) && 2684 "Must be able to losslessly bit cast result type"); 2685 V = Builder.CreateBitCast(V, RetTy); 2686 } 2687 2688 return RValue::get(V); 2689 } 2690 2691 // See if we have a target specific builtin that needs to be lowered. 2692 if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E)) 2693 return RValue::get(V); 2694 2695 ErrorUnsupported(E, "builtin function"); 2696 2697 // Unknown builtin, for now just dump it out and return undef. 2698 return GetUndefRValue(E->getType()); 2699 } 2700 2701 static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, 2702 unsigned BuiltinID, const CallExpr *E, 2703 llvm::Triple::ArchType Arch) { 2704 switch (Arch) { 2705 case llvm::Triple::arm: 2706 case llvm::Triple::armeb: 2707 case llvm::Triple::thumb: 2708 case llvm::Triple::thumbeb: 2709 return CGF->EmitARMBuiltinExpr(BuiltinID, E); 2710 case llvm::Triple::aarch64: 2711 case llvm::Triple::aarch64_be: 2712 return CGF->EmitAArch64BuiltinExpr(BuiltinID, E); 2713 case llvm::Triple::x86: 2714 case llvm::Triple::x86_64: 2715 return CGF->EmitX86BuiltinExpr(BuiltinID, E); 2716 case llvm::Triple::ppc: 2717 case llvm::Triple::ppc64: 2718 case llvm::Triple::ppc64le: 2719 return CGF->EmitPPCBuiltinExpr(BuiltinID, E); 2720 case llvm::Triple::r600: 2721 case llvm::Triple::amdgcn: 2722 return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E); 2723 case llvm::Triple::systemz: 2724 return CGF->EmitSystemZBuiltinExpr(BuiltinID, E); 2725 case llvm::Triple::nvptx: 2726 case llvm::Triple::nvptx64: 2727 return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E); 2728 case llvm::Triple::wasm32: 2729 case llvm::Triple::wasm64: 2730 return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E); 2731 default: 2732 return nullptr; 2733 } 2734 } 2735 2736 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID, 2737 const CallExpr *E) { 2738 if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) { 2739 assert(getContext().getAuxTargetInfo() && "Missing aux target info"); 2740 return EmitTargetArchBuiltinExpr( 2741 this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E, 2742 getContext().getAuxTargetInfo()->getTriple().getArch()); 2743 } 2744 2745 return EmitTargetArchBuiltinExpr(this, BuiltinID, E, 2746 getTarget().getTriple().getArch()); 2747 } 2748 2749 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF, 2750 NeonTypeFlags TypeFlags, 2751 bool V1Ty=false) { 2752 int IsQuad = TypeFlags.isQuad(); 2753 switch (TypeFlags.getEltType()) { 2754 case NeonTypeFlags::Int8: 2755 case NeonTypeFlags::Poly8: 2756 return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad)); 2757 case NeonTypeFlags::Int16: 2758 case NeonTypeFlags::Poly16: 2759 case NeonTypeFlags::Float16: 2760 return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad)); 2761 case NeonTypeFlags::Int32: 2762 return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad)); 2763 case NeonTypeFlags::Int64: 2764 case NeonTypeFlags::Poly64: 2765 return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad)); 2766 case NeonTypeFlags::Poly128: 2767 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm. 2768 // There is a lot of i128 and f128 API missing. 2769 // so we use v16i8 to represent poly128 and get pattern matched. 2770 return llvm::VectorType::get(CGF->Int8Ty, 16); 2771 case NeonTypeFlags::Float32: 2772 return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad)); 2773 case NeonTypeFlags::Float64: 2774 return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad)); 2775 } 2776 llvm_unreachable("Unknown vector element type!"); 2777 } 2778 2779 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF, 2780 NeonTypeFlags IntTypeFlags) { 2781 int IsQuad = IntTypeFlags.isQuad(); 2782 switch (IntTypeFlags.getEltType()) { 2783 case NeonTypeFlags::Int32: 2784 return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad)); 2785 case NeonTypeFlags::Int64: 2786 return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad)); 2787 default: 2788 llvm_unreachable("Type can't be converted to floating-point!"); 2789 } 2790 } 2791 2792 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) { 2793 unsigned nElts = V->getType()->getVectorNumElements(); 2794 Value* SV = llvm::ConstantVector::getSplat(nElts, C); 2795 return Builder.CreateShuffleVector(V, V, SV, "lane"); 2796 } 2797 2798 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops, 2799 const char *name, 2800 unsigned shift, bool rightshift) { 2801 unsigned j = 0; 2802 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); 2803 ai != ae; ++ai, ++j) 2804 if (shift > 0 && shift == j) 2805 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift); 2806 else 2807 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name); 2808 2809 return Builder.CreateCall(F, Ops, name); 2810 } 2811 2812 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty, 2813 bool neg) { 2814 int SV = cast<ConstantInt>(V)->getSExtValue(); 2815 return ConstantInt::get(Ty, neg ? -SV : SV); 2816 } 2817 2818 // \brief Right-shift a vector by a constant. 2819 Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift, 2820 llvm::Type *Ty, bool usgn, 2821 const char *name) { 2822 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 2823 2824 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue(); 2825 int EltSize = VTy->getScalarSizeInBits(); 2826 2827 Vec = Builder.CreateBitCast(Vec, Ty); 2828 2829 // lshr/ashr are undefined when the shift amount is equal to the vector 2830 // element size. 2831 if (ShiftAmt == EltSize) { 2832 if (usgn) { 2833 // Right-shifting an unsigned value by its size yields 0. 2834 return llvm::ConstantAggregateZero::get(VTy); 2835 } else { 2836 // Right-shifting a signed value by its size is equivalent 2837 // to a shift of size-1. 2838 --ShiftAmt; 2839 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt); 2840 } 2841 } 2842 2843 Shift = EmitNeonShiftVector(Shift, Ty, false); 2844 if (usgn) 2845 return Builder.CreateLShr(Vec, Shift, name); 2846 else 2847 return Builder.CreateAShr(Vec, Shift, name); 2848 } 2849 2850 enum { 2851 AddRetType = (1 << 0), 2852 Add1ArgType = (1 << 1), 2853 Add2ArgTypes = (1 << 2), 2854 2855 VectorizeRetType = (1 << 3), 2856 VectorizeArgTypes = (1 << 4), 2857 2858 InventFloatType = (1 << 5), 2859 UnsignedAlts = (1 << 6), 2860 2861 Use64BitVectors = (1 << 7), 2862 Use128BitVectors = (1 << 8), 2863 2864 Vectorize1ArgType = Add1ArgType | VectorizeArgTypes, 2865 VectorRet = AddRetType | VectorizeRetType, 2866 VectorRetGetArgs01 = 2867 AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes, 2868 FpCmpzModifiers = 2869 AddRetType | VectorizeRetType | Add1ArgType | InventFloatType 2870 }; 2871 2872 namespace { 2873 struct NeonIntrinsicInfo { 2874 const char *NameHint; 2875 unsigned BuiltinID; 2876 unsigned LLVMIntrinsic; 2877 unsigned AltLLVMIntrinsic; 2878 unsigned TypeModifier; 2879 2880 bool operator<(unsigned RHSBuiltinID) const { 2881 return BuiltinID < RHSBuiltinID; 2882 } 2883 bool operator<(const NeonIntrinsicInfo &TE) const { 2884 return BuiltinID < TE.BuiltinID; 2885 } 2886 }; 2887 } // end anonymous namespace 2888 2889 #define NEONMAP0(NameBase) \ 2890 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 } 2891 2892 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \ 2893 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \ 2894 Intrinsic::LLVMIntrinsic, 0, TypeModifier } 2895 2896 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \ 2897 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \ 2898 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \ 2899 TypeModifier } 2900 2901 static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { 2902 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), 2903 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), 2904 NEONMAP1(vabs_v, arm_neon_vabs, 0), 2905 NEONMAP1(vabsq_v, arm_neon_vabs, 0), 2906 NEONMAP0(vaddhn_v), 2907 NEONMAP1(vaesdq_v, arm_neon_aesd, 0), 2908 NEONMAP1(vaeseq_v, arm_neon_aese, 0), 2909 NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0), 2910 NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0), 2911 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType), 2912 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType), 2913 NEONMAP1(vcage_v, arm_neon_vacge, 0), 2914 NEONMAP1(vcageq_v, arm_neon_vacge, 0), 2915 NEONMAP1(vcagt_v, arm_neon_vacgt, 0), 2916 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0), 2917 NEONMAP1(vcale_v, arm_neon_vacge, 0), 2918 NEONMAP1(vcaleq_v, arm_neon_vacge, 0), 2919 NEONMAP1(vcalt_v, arm_neon_vacgt, 0), 2920 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0), 2921 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType), 2922 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType), 2923 NEONMAP1(vclz_v, ctlz, Add1ArgType), 2924 NEONMAP1(vclzq_v, ctlz, Add1ArgType), 2925 NEONMAP1(vcnt_v, ctpop, Add1ArgType), 2926 NEONMAP1(vcntq_v, ctpop, Add1ArgType), 2927 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0), 2928 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0), 2929 NEONMAP0(vcvt_f32_v), 2930 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), 2931 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0), 2932 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0), 2933 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0), 2934 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0), 2935 NEONMAP0(vcvt_s32_v), 2936 NEONMAP0(vcvt_s64_v), 2937 NEONMAP0(vcvt_u32_v), 2938 NEONMAP0(vcvt_u64_v), 2939 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0), 2940 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0), 2941 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0), 2942 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0), 2943 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0), 2944 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0), 2945 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0), 2946 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0), 2947 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0), 2948 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0), 2949 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0), 2950 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0), 2951 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0), 2952 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0), 2953 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0), 2954 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0), 2955 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0), 2956 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0), 2957 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0), 2958 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0), 2959 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0), 2960 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0), 2961 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0), 2962 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0), 2963 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0), 2964 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0), 2965 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0), 2966 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0), 2967 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0), 2968 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0), 2969 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0), 2970 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0), 2971 NEONMAP0(vcvtq_f32_v), 2972 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), 2973 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0), 2974 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0), 2975 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0), 2976 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0), 2977 NEONMAP0(vcvtq_s32_v), 2978 NEONMAP0(vcvtq_s64_v), 2979 NEONMAP0(vcvtq_u32_v), 2980 NEONMAP0(vcvtq_u64_v), 2981 NEONMAP0(vext_v), 2982 NEONMAP0(vextq_v), 2983 NEONMAP0(vfma_v), 2984 NEONMAP0(vfmaq_v), 2985 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts), 2986 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts), 2987 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts), 2988 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts), 2989 NEONMAP0(vld1_dup_v), 2990 NEONMAP1(vld1_v, arm_neon_vld1, 0), 2991 NEONMAP0(vld1q_dup_v), 2992 NEONMAP1(vld1q_v, arm_neon_vld1, 0), 2993 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0), 2994 NEONMAP1(vld2_v, arm_neon_vld2, 0), 2995 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0), 2996 NEONMAP1(vld2q_v, arm_neon_vld2, 0), 2997 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0), 2998 NEONMAP1(vld3_v, arm_neon_vld3, 0), 2999 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0), 3000 NEONMAP1(vld3q_v, arm_neon_vld3, 0), 3001 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0), 3002 NEONMAP1(vld4_v, arm_neon_vld4, 0), 3003 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0), 3004 NEONMAP1(vld4q_v, arm_neon_vld4, 0), 3005 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts), 3006 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType), 3007 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType), 3008 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts), 3009 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), 3010 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType), 3011 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType), 3012 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), 3013 NEONMAP0(vmovl_v), 3014 NEONMAP0(vmovn_v), 3015 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType), 3016 NEONMAP0(vmull_v), 3017 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType), 3018 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts), 3019 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts), 3020 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType), 3021 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts), 3022 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts), 3023 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType), 3024 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts), 3025 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts), 3026 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType), 3027 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType), 3028 NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts), 3029 NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts), 3030 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0), 3031 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0), 3032 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType), 3033 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType), 3034 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType), 3035 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts), 3036 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType), 3037 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType), 3038 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType), 3039 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType), 3040 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType), 3041 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts), 3042 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts), 3043 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts), 3044 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts), 3045 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts), 3046 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts), 3047 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0), 3048 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0), 3049 NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts), 3050 NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts), 3051 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType), 3052 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), 3053 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), 3054 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType), 3055 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType), 3056 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts), 3057 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts), 3058 NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType), 3059 NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType), 3060 NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType), 3061 NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType), 3062 NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType), 3063 NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType), 3064 NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType), 3065 NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType), 3066 NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType), 3067 NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType), 3068 NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType), 3069 NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType), 3070 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts), 3071 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts), 3072 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts), 3073 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts), 3074 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0), 3075 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0), 3076 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType), 3077 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType), 3078 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType), 3079 NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0), 3080 NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0), 3081 NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0), 3082 NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0), 3083 NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0), 3084 NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0), 3085 NEONMAP0(vshl_n_v), 3086 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts), 3087 NEONMAP0(vshll_n_v), 3088 NEONMAP0(vshlq_n_v), 3089 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts), 3090 NEONMAP0(vshr_n_v), 3091 NEONMAP0(vshrn_n_v), 3092 NEONMAP0(vshrq_n_v), 3093 NEONMAP1(vst1_v, arm_neon_vst1, 0), 3094 NEONMAP1(vst1q_v, arm_neon_vst1, 0), 3095 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0), 3096 NEONMAP1(vst2_v, arm_neon_vst2, 0), 3097 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0), 3098 NEONMAP1(vst2q_v, arm_neon_vst2, 0), 3099 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0), 3100 NEONMAP1(vst3_v, arm_neon_vst3, 0), 3101 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0), 3102 NEONMAP1(vst3q_v, arm_neon_vst3, 0), 3103 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0), 3104 NEONMAP1(vst4_v, arm_neon_vst4, 0), 3105 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0), 3106 NEONMAP1(vst4q_v, arm_neon_vst4, 0), 3107 NEONMAP0(vsubhn_v), 3108 NEONMAP0(vtrn_v), 3109 NEONMAP0(vtrnq_v), 3110 NEONMAP0(vtst_v), 3111 NEONMAP0(vtstq_v), 3112 NEONMAP0(vuzp_v), 3113 NEONMAP0(vuzpq_v), 3114 NEONMAP0(vzip_v), 3115 NEONMAP0(vzipq_v) 3116 }; 3117 3118 static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = { 3119 NEONMAP1(vabs_v, aarch64_neon_abs, 0), 3120 NEONMAP1(vabsq_v, aarch64_neon_abs, 0), 3121 NEONMAP0(vaddhn_v), 3122 NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0), 3123 NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0), 3124 NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0), 3125 NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0), 3126 NEONMAP1(vcage_v, aarch64_neon_facge, 0), 3127 NEONMAP1(vcageq_v, aarch64_neon_facge, 0), 3128 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0), 3129 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0), 3130 NEONMAP1(vcale_v, aarch64_neon_facge, 0), 3131 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0), 3132 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0), 3133 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0), 3134 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType), 3135 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType), 3136 NEONMAP1(vclz_v, ctlz, Add1ArgType), 3137 NEONMAP1(vclzq_v, ctlz, Add1ArgType), 3138 NEONMAP1(vcnt_v, ctpop, Add1ArgType), 3139 NEONMAP1(vcntq_v, ctpop, Add1ArgType), 3140 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0), 3141 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0), 3142 NEONMAP0(vcvt_f32_v), 3143 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 3144 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 3145 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), 3146 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), 3147 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), 3148 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), 3149 NEONMAP0(vcvtq_f32_v), 3150 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 3151 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 3152 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), 3153 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), 3154 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), 3155 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), 3156 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType), 3157 NEONMAP0(vext_v), 3158 NEONMAP0(vextq_v), 3159 NEONMAP0(vfma_v), 3160 NEONMAP0(vfmaq_v), 3161 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), 3162 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), 3163 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), 3164 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), 3165 NEONMAP0(vmovl_v), 3166 NEONMAP0(vmovn_v), 3167 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType), 3168 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType), 3169 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType), 3170 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts), 3171 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts), 3172 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType), 3173 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType), 3174 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType), 3175 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts), 3176 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts), 3177 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0), 3178 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0), 3179 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType), 3180 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType), 3181 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType), 3182 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts), 3183 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType), 3184 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType), 3185 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType), 3186 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType), 3187 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType), 3188 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts), 3189 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts), 3190 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts), 3191 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts), 3192 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts), 3193 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts), 3194 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0), 3195 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0), 3196 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), 3197 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), 3198 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType), 3199 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), 3200 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), 3201 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType), 3202 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType), 3203 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), 3204 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), 3205 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), 3206 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), 3207 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), 3208 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), 3209 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0), 3210 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0), 3211 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType), 3212 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType), 3213 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType), 3214 NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0), 3215 NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0), 3216 NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0), 3217 NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0), 3218 NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0), 3219 NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0), 3220 NEONMAP0(vshl_n_v), 3221 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts), 3222 NEONMAP0(vshll_n_v), 3223 NEONMAP0(vshlq_n_v), 3224 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts), 3225 NEONMAP0(vshr_n_v), 3226 NEONMAP0(vshrn_n_v), 3227 NEONMAP0(vshrq_n_v), 3228 NEONMAP0(vsubhn_v), 3229 NEONMAP0(vtst_v), 3230 NEONMAP0(vtstq_v), 3231 }; 3232 3233 static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = { 3234 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType), 3235 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType), 3236 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType), 3237 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType), 3238 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType), 3239 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType), 3240 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType), 3241 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), 3242 NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType), 3243 NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3244 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), 3245 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType), 3246 NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType), 3247 NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType), 3248 NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3249 NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3250 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType), 3251 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType), 3252 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType), 3253 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType), 3254 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType), 3255 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType), 3256 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType), 3257 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType), 3258 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType), 3259 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType), 3260 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType), 3261 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType), 3262 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), 3263 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), 3264 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), 3265 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), 3266 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType), 3267 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), 3268 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType), 3269 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), 3270 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType), 3271 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), 3272 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType), 3273 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), 3274 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType), 3275 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), 3276 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType), 3277 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), 3278 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), 3279 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), 3280 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), 3281 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), 3282 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0), 3283 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3284 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3285 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3286 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3287 NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType), 3288 NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType), 3289 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3290 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3291 NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType), 3292 NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType), 3293 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3294 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3295 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3296 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 3297 NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType), 3298 NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType), 3299 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 3300 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType), 3301 NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType), 3302 NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType), 3303 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0), 3304 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType), 3305 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType), 3306 NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3307 NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3308 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3309 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3310 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3311 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3312 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3313 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3314 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType), 3315 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 3316 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors), 3317 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType), 3318 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors), 3319 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType), 3320 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors), 3321 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors), 3322 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType), 3323 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType), 3324 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors), 3325 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors), 3326 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType), 3327 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType), 3328 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors), 3329 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType), 3330 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors), 3331 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0), 3332 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType), 3333 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType), 3334 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors), 3335 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors), 3336 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors), 3337 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors), 3338 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType), 3339 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors), 3340 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors), 3341 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors), 3342 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType), 3343 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors), 3344 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType), 3345 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors), 3346 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType), 3347 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors), 3348 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors), 3349 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType), 3350 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType), 3351 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors), 3352 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors), 3353 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType), 3354 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType), 3355 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType), 3356 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType), 3357 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors), 3358 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors), 3359 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors), 3360 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors), 3361 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType), 3362 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors), 3363 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors), 3364 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 3365 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 3366 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 3367 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 3368 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType), 3369 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType), 3370 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 3371 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 3372 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 3373 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 3374 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType), 3375 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType), 3376 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType), 3377 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType), 3378 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors), 3379 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors), 3380 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType), 3381 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType), 3382 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType), 3383 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors), 3384 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors), 3385 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors), 3386 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors), 3387 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType), 3388 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors), 3389 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors), 3390 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors), 3391 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors), 3392 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType), 3393 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType), 3394 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors), 3395 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors), 3396 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType), 3397 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType), 3398 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType), 3399 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType), 3400 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType), 3401 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType), 3402 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType), 3403 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType), 3404 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType), 3405 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType), 3406 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType), 3407 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType), 3408 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0), 3409 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0), 3410 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0), 3411 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0), 3412 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType), 3413 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType), 3414 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType), 3415 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType), 3416 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors), 3417 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType), 3418 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors), 3419 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType), 3420 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType), 3421 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType), 3422 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors), 3423 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType), 3424 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors), 3425 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType), 3426 }; 3427 3428 #undef NEONMAP0 3429 #undef NEONMAP1 3430 #undef NEONMAP2 3431 3432 static bool NEONSIMDIntrinsicsProvenSorted = false; 3433 3434 static bool AArch64SIMDIntrinsicsProvenSorted = false; 3435 static bool AArch64SISDIntrinsicsProvenSorted = false; 3436 3437 3438 static const NeonIntrinsicInfo * 3439 findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap, 3440 unsigned BuiltinID, bool &MapProvenSorted) { 3441 3442 #ifndef NDEBUG 3443 if (!MapProvenSorted) { 3444 assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap))); 3445 MapProvenSorted = true; 3446 } 3447 #endif 3448 3449 const NeonIntrinsicInfo *Builtin = 3450 std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID); 3451 3452 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID) 3453 return Builtin; 3454 3455 return nullptr; 3456 } 3457 3458 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID, 3459 unsigned Modifier, 3460 llvm::Type *ArgType, 3461 const CallExpr *E) { 3462 int VectorSize = 0; 3463 if (Modifier & Use64BitVectors) 3464 VectorSize = 64; 3465 else if (Modifier & Use128BitVectors) 3466 VectorSize = 128; 3467 3468 // Return type. 3469 SmallVector<llvm::Type *, 3> Tys; 3470 if (Modifier & AddRetType) { 3471 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext())); 3472 if (Modifier & VectorizeRetType) 3473 Ty = llvm::VectorType::get( 3474 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1); 3475 3476 Tys.push_back(Ty); 3477 } 3478 3479 // Arguments. 3480 if (Modifier & VectorizeArgTypes) { 3481 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1; 3482 ArgType = llvm::VectorType::get(ArgType, Elts); 3483 } 3484 3485 if (Modifier & (Add1ArgType | Add2ArgTypes)) 3486 Tys.push_back(ArgType); 3487 3488 if (Modifier & Add2ArgTypes) 3489 Tys.push_back(ArgType); 3490 3491 if (Modifier & InventFloatType) 3492 Tys.push_back(FloatTy); 3493 3494 return CGM.getIntrinsic(IntrinsicID, Tys); 3495 } 3496 3497 static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, 3498 const NeonIntrinsicInfo &SISDInfo, 3499 SmallVectorImpl<Value *> &Ops, 3500 const CallExpr *E) { 3501 unsigned BuiltinID = SISDInfo.BuiltinID; 3502 unsigned int Int = SISDInfo.LLVMIntrinsic; 3503 unsigned Modifier = SISDInfo.TypeModifier; 3504 const char *s = SISDInfo.NameHint; 3505 3506 switch (BuiltinID) { 3507 case NEON::BI__builtin_neon_vcled_s64: 3508 case NEON::BI__builtin_neon_vcled_u64: 3509 case NEON::BI__builtin_neon_vcles_f32: 3510 case NEON::BI__builtin_neon_vcled_f64: 3511 case NEON::BI__builtin_neon_vcltd_s64: 3512 case NEON::BI__builtin_neon_vcltd_u64: 3513 case NEON::BI__builtin_neon_vclts_f32: 3514 case NEON::BI__builtin_neon_vcltd_f64: 3515 case NEON::BI__builtin_neon_vcales_f32: 3516 case NEON::BI__builtin_neon_vcaled_f64: 3517 case NEON::BI__builtin_neon_vcalts_f32: 3518 case NEON::BI__builtin_neon_vcaltd_f64: 3519 // Only one direction of comparisons actually exist, cmle is actually a cmge 3520 // with swapped operands. The table gives us the right intrinsic but we 3521 // still need to do the swap. 3522 std::swap(Ops[0], Ops[1]); 3523 break; 3524 } 3525 3526 assert(Int && "Generic code assumes a valid intrinsic"); 3527 3528 // Determine the type(s) of this overloaded AArch64 intrinsic. 3529 const Expr *Arg = E->getArg(0); 3530 llvm::Type *ArgTy = CGF.ConvertType(Arg->getType()); 3531 Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E); 3532 3533 int j = 0; 3534 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0); 3535 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); 3536 ai != ae; ++ai, ++j) { 3537 llvm::Type *ArgTy = ai->getType(); 3538 if (Ops[j]->getType()->getPrimitiveSizeInBits() == 3539 ArgTy->getPrimitiveSizeInBits()) 3540 continue; 3541 3542 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy()); 3543 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate 3544 // it before inserting. 3545 Ops[j] = 3546 CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType()); 3547 Ops[j] = 3548 CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0); 3549 } 3550 3551 Value *Result = CGF.EmitNeonCall(F, Ops, s); 3552 llvm::Type *ResultType = CGF.ConvertType(E->getType()); 3553 if (ResultType->getPrimitiveSizeInBits() < 3554 Result->getType()->getPrimitiveSizeInBits()) 3555 return CGF.Builder.CreateExtractElement(Result, C0); 3556 3557 return CGF.Builder.CreateBitCast(Result, ResultType, s); 3558 } 3559 3560 Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( 3561 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, 3562 const char *NameHint, unsigned Modifier, const CallExpr *E, 3563 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1) { 3564 // Get the last argument, which specifies the vector type. 3565 llvm::APSInt NeonTypeConst; 3566 const Expr *Arg = E->getArg(E->getNumArgs() - 1); 3567 if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext())) 3568 return nullptr; 3569 3570 // Determine the type of this overloaded NEON intrinsic. 3571 NeonTypeFlags Type(NeonTypeConst.getZExtValue()); 3572 bool Usgn = Type.isUnsigned(); 3573 bool Quad = Type.isQuad(); 3574 3575 llvm::VectorType *VTy = GetNeonType(this, Type); 3576 llvm::Type *Ty = VTy; 3577 if (!Ty) 3578 return nullptr; 3579 3580 auto getAlignmentValue32 = [&](Address addr) -> Value* { 3581 return Builder.getInt32(addr.getAlignment().getQuantity()); 3582 }; 3583 3584 unsigned Int = LLVMIntrinsic; 3585 if ((Modifier & UnsignedAlts) && !Usgn) 3586 Int = AltLLVMIntrinsic; 3587 3588 switch (BuiltinID) { 3589 default: break; 3590 case NEON::BI__builtin_neon_vabs_v: 3591 case NEON::BI__builtin_neon_vabsq_v: 3592 if (VTy->getElementType()->isFloatingPointTy()) 3593 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs"); 3594 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs"); 3595 case NEON::BI__builtin_neon_vaddhn_v: { 3596 llvm::VectorType *SrcTy = 3597 llvm::VectorType::getExtendedElementVectorType(VTy); 3598 3599 // %sum = add <4 x i32> %lhs, %rhs 3600 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 3601 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy); 3602 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn"); 3603 3604 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16> 3605 Constant *ShiftAmt = 3606 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2); 3607 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn"); 3608 3609 // %res = trunc <4 x i32> %high to <4 x i16> 3610 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn"); 3611 } 3612 case NEON::BI__builtin_neon_vcale_v: 3613 case NEON::BI__builtin_neon_vcaleq_v: 3614 case NEON::BI__builtin_neon_vcalt_v: 3615 case NEON::BI__builtin_neon_vcaltq_v: 3616 std::swap(Ops[0], Ops[1]); 3617 case NEON::BI__builtin_neon_vcage_v: 3618 case NEON::BI__builtin_neon_vcageq_v: 3619 case NEON::BI__builtin_neon_vcagt_v: 3620 case NEON::BI__builtin_neon_vcagtq_v: { 3621 llvm::Type *VecFlt = llvm::VectorType::get( 3622 VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy, 3623 VTy->getNumElements()); 3624 llvm::Type *Tys[] = { VTy, VecFlt }; 3625 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 3626 return EmitNeonCall(F, Ops, NameHint); 3627 } 3628 case NEON::BI__builtin_neon_vclz_v: 3629 case NEON::BI__builtin_neon_vclzq_v: 3630 // We generate target-independent intrinsic, which needs a second argument 3631 // for whether or not clz of zero is undefined; on ARM it isn't. 3632 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef())); 3633 break; 3634 case NEON::BI__builtin_neon_vcvt_f32_v: 3635 case NEON::BI__builtin_neon_vcvtq_f32_v: 3636 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3637 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad)); 3638 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 3639 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 3640 case NEON::BI__builtin_neon_vcvt_n_f32_v: 3641 case NEON::BI__builtin_neon_vcvt_n_f64_v: 3642 case NEON::BI__builtin_neon_vcvtq_n_f32_v: 3643 case NEON::BI__builtin_neon_vcvtq_n_f64_v: { 3644 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty }; 3645 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic; 3646 Function *F = CGM.getIntrinsic(Int, Tys); 3647 return EmitNeonCall(F, Ops, "vcvt_n"); 3648 } 3649 case NEON::BI__builtin_neon_vcvt_n_s32_v: 3650 case NEON::BI__builtin_neon_vcvt_n_u32_v: 3651 case NEON::BI__builtin_neon_vcvt_n_s64_v: 3652 case NEON::BI__builtin_neon_vcvt_n_u64_v: 3653 case NEON::BI__builtin_neon_vcvtq_n_s32_v: 3654 case NEON::BI__builtin_neon_vcvtq_n_u32_v: 3655 case NEON::BI__builtin_neon_vcvtq_n_s64_v: 3656 case NEON::BI__builtin_neon_vcvtq_n_u64_v: { 3657 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 3658 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 3659 return EmitNeonCall(F, Ops, "vcvt_n"); 3660 } 3661 case NEON::BI__builtin_neon_vcvt_s32_v: 3662 case NEON::BI__builtin_neon_vcvt_u32_v: 3663 case NEON::BI__builtin_neon_vcvt_s64_v: 3664 case NEON::BI__builtin_neon_vcvt_u64_v: 3665 case NEON::BI__builtin_neon_vcvtq_s32_v: 3666 case NEON::BI__builtin_neon_vcvtq_u32_v: 3667 case NEON::BI__builtin_neon_vcvtq_s64_v: 3668 case NEON::BI__builtin_neon_vcvtq_u64_v: { 3669 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type)); 3670 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt") 3671 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt"); 3672 } 3673 case NEON::BI__builtin_neon_vcvta_s32_v: 3674 case NEON::BI__builtin_neon_vcvta_s64_v: 3675 case NEON::BI__builtin_neon_vcvta_u32_v: 3676 case NEON::BI__builtin_neon_vcvta_u64_v: 3677 case NEON::BI__builtin_neon_vcvtaq_s32_v: 3678 case NEON::BI__builtin_neon_vcvtaq_s64_v: 3679 case NEON::BI__builtin_neon_vcvtaq_u32_v: 3680 case NEON::BI__builtin_neon_vcvtaq_u64_v: 3681 case NEON::BI__builtin_neon_vcvtn_s32_v: 3682 case NEON::BI__builtin_neon_vcvtn_s64_v: 3683 case NEON::BI__builtin_neon_vcvtn_u32_v: 3684 case NEON::BI__builtin_neon_vcvtn_u64_v: 3685 case NEON::BI__builtin_neon_vcvtnq_s32_v: 3686 case NEON::BI__builtin_neon_vcvtnq_s64_v: 3687 case NEON::BI__builtin_neon_vcvtnq_u32_v: 3688 case NEON::BI__builtin_neon_vcvtnq_u64_v: 3689 case NEON::BI__builtin_neon_vcvtp_s32_v: 3690 case NEON::BI__builtin_neon_vcvtp_s64_v: 3691 case NEON::BI__builtin_neon_vcvtp_u32_v: 3692 case NEON::BI__builtin_neon_vcvtp_u64_v: 3693 case NEON::BI__builtin_neon_vcvtpq_s32_v: 3694 case NEON::BI__builtin_neon_vcvtpq_s64_v: 3695 case NEON::BI__builtin_neon_vcvtpq_u32_v: 3696 case NEON::BI__builtin_neon_vcvtpq_u64_v: 3697 case NEON::BI__builtin_neon_vcvtm_s32_v: 3698 case NEON::BI__builtin_neon_vcvtm_s64_v: 3699 case NEON::BI__builtin_neon_vcvtm_u32_v: 3700 case NEON::BI__builtin_neon_vcvtm_u64_v: 3701 case NEON::BI__builtin_neon_vcvtmq_s32_v: 3702 case NEON::BI__builtin_neon_vcvtmq_s64_v: 3703 case NEON::BI__builtin_neon_vcvtmq_u32_v: 3704 case NEON::BI__builtin_neon_vcvtmq_u64_v: { 3705 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 3706 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint); 3707 } 3708 case NEON::BI__builtin_neon_vext_v: 3709 case NEON::BI__builtin_neon_vextq_v: { 3710 int CV = cast<ConstantInt>(Ops[2])->getSExtValue(); 3711 SmallVector<uint32_t, 16> Indices; 3712 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 3713 Indices.push_back(i+CV); 3714 3715 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3716 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3717 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext"); 3718 } 3719 case NEON::BI__builtin_neon_vfma_v: 3720 case NEON::BI__builtin_neon_vfmaq_v: { 3721 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 3722 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3723 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3724 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 3725 3726 // NEON intrinsic puts accumulator first, unlike the LLVM fma. 3727 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); 3728 } 3729 case NEON::BI__builtin_neon_vld1_v: 3730 case NEON::BI__builtin_neon_vld1q_v: { 3731 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 3732 Ops.push_back(getAlignmentValue32(PtrOp0)); 3733 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1"); 3734 } 3735 case NEON::BI__builtin_neon_vld2_v: 3736 case NEON::BI__builtin_neon_vld2q_v: 3737 case NEON::BI__builtin_neon_vld3_v: 3738 case NEON::BI__builtin_neon_vld3q_v: 3739 case NEON::BI__builtin_neon_vld4_v: 3740 case NEON::BI__builtin_neon_vld4q_v: { 3741 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 3742 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 3743 Value *Align = getAlignmentValue32(PtrOp1); 3744 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint); 3745 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 3746 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3747 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 3748 } 3749 case NEON::BI__builtin_neon_vld1_dup_v: 3750 case NEON::BI__builtin_neon_vld1q_dup_v: { 3751 Value *V = UndefValue::get(Ty); 3752 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 3753 PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty); 3754 LoadInst *Ld = Builder.CreateLoad(PtrOp0); 3755 llvm::Constant *CI = ConstantInt::get(SizeTy, 0); 3756 Ops[0] = Builder.CreateInsertElement(V, Ld, CI); 3757 return EmitNeonSplat(Ops[0], CI); 3758 } 3759 case NEON::BI__builtin_neon_vld2_lane_v: 3760 case NEON::BI__builtin_neon_vld2q_lane_v: 3761 case NEON::BI__builtin_neon_vld3_lane_v: 3762 case NEON::BI__builtin_neon_vld3q_lane_v: 3763 case NEON::BI__builtin_neon_vld4_lane_v: 3764 case NEON::BI__builtin_neon_vld4q_lane_v: { 3765 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 3766 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 3767 for (unsigned I = 2; I < Ops.size() - 1; ++I) 3768 Ops[I] = Builder.CreateBitCast(Ops[I], Ty); 3769 Ops.push_back(getAlignmentValue32(PtrOp1)); 3770 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint); 3771 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 3772 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3773 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 3774 } 3775 case NEON::BI__builtin_neon_vmovl_v: { 3776 llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy); 3777 Ops[0] = Builder.CreateBitCast(Ops[0], DTy); 3778 if (Usgn) 3779 return Builder.CreateZExt(Ops[0], Ty, "vmovl"); 3780 return Builder.CreateSExt(Ops[0], Ty, "vmovl"); 3781 } 3782 case NEON::BI__builtin_neon_vmovn_v: { 3783 llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy); 3784 Ops[0] = Builder.CreateBitCast(Ops[0], QTy); 3785 return Builder.CreateTrunc(Ops[0], Ty, "vmovn"); 3786 } 3787 case NEON::BI__builtin_neon_vmull_v: 3788 // FIXME: the integer vmull operations could be emitted in terms of pure 3789 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of 3790 // hoisting the exts outside loops. Until global ISel comes along that can 3791 // see through such movement this leads to bad CodeGen. So we need an 3792 // intrinsic for now. 3793 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls; 3794 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int; 3795 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull"); 3796 case NEON::BI__builtin_neon_vpadal_v: 3797 case NEON::BI__builtin_neon_vpadalq_v: { 3798 // The source operand type has twice as many elements of half the size. 3799 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); 3800 llvm::Type *EltTy = 3801 llvm::IntegerType::get(getLLVMContext(), EltBits / 2); 3802 llvm::Type *NarrowTy = 3803 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2); 3804 llvm::Type *Tys[2] = { Ty, NarrowTy }; 3805 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint); 3806 } 3807 case NEON::BI__builtin_neon_vpaddl_v: 3808 case NEON::BI__builtin_neon_vpaddlq_v: { 3809 // The source operand type has twice as many elements of half the size. 3810 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); 3811 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2); 3812 llvm::Type *NarrowTy = 3813 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2); 3814 llvm::Type *Tys[2] = { Ty, NarrowTy }; 3815 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl"); 3816 } 3817 case NEON::BI__builtin_neon_vqdmlal_v: 3818 case NEON::BI__builtin_neon_vqdmlsl_v: { 3819 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end()); 3820 Ops[1] = 3821 EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal"); 3822 Ops.resize(2); 3823 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint); 3824 } 3825 case NEON::BI__builtin_neon_vqshl_n_v: 3826 case NEON::BI__builtin_neon_vqshlq_n_v: 3827 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n", 3828 1, false); 3829 case NEON::BI__builtin_neon_vqshlu_n_v: 3830 case NEON::BI__builtin_neon_vqshluq_n_v: 3831 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n", 3832 1, false); 3833 case NEON::BI__builtin_neon_vrecpe_v: 3834 case NEON::BI__builtin_neon_vrecpeq_v: 3835 case NEON::BI__builtin_neon_vrsqrte_v: 3836 case NEON::BI__builtin_neon_vrsqrteq_v: 3837 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic; 3838 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint); 3839 3840 case NEON::BI__builtin_neon_vrshr_n_v: 3841 case NEON::BI__builtin_neon_vrshrq_n_v: 3842 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n", 3843 1, true); 3844 case NEON::BI__builtin_neon_vshl_n_v: 3845 case NEON::BI__builtin_neon_vshlq_n_v: 3846 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false); 3847 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1], 3848 "vshl_n"); 3849 case NEON::BI__builtin_neon_vshll_n_v: { 3850 llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy); 3851 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 3852 if (Usgn) 3853 Ops[0] = Builder.CreateZExt(Ops[0], VTy); 3854 else 3855 Ops[0] = Builder.CreateSExt(Ops[0], VTy); 3856 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false); 3857 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n"); 3858 } 3859 case NEON::BI__builtin_neon_vshrn_n_v: { 3860 llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy); 3861 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 3862 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false); 3863 if (Usgn) 3864 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]); 3865 else 3866 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]); 3867 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n"); 3868 } 3869 case NEON::BI__builtin_neon_vshr_n_v: 3870 case NEON::BI__builtin_neon_vshrq_n_v: 3871 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n"); 3872 case NEON::BI__builtin_neon_vst1_v: 3873 case NEON::BI__builtin_neon_vst1q_v: 3874 case NEON::BI__builtin_neon_vst2_v: 3875 case NEON::BI__builtin_neon_vst2q_v: 3876 case NEON::BI__builtin_neon_vst3_v: 3877 case NEON::BI__builtin_neon_vst3q_v: 3878 case NEON::BI__builtin_neon_vst4_v: 3879 case NEON::BI__builtin_neon_vst4q_v: 3880 case NEON::BI__builtin_neon_vst2_lane_v: 3881 case NEON::BI__builtin_neon_vst2q_lane_v: 3882 case NEON::BI__builtin_neon_vst3_lane_v: 3883 case NEON::BI__builtin_neon_vst3q_lane_v: 3884 case NEON::BI__builtin_neon_vst4_lane_v: 3885 case NEON::BI__builtin_neon_vst4q_lane_v: { 3886 llvm::Type *Tys[] = {Int8PtrTy, Ty}; 3887 Ops.push_back(getAlignmentValue32(PtrOp0)); 3888 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, ""); 3889 } 3890 case NEON::BI__builtin_neon_vsubhn_v: { 3891 llvm::VectorType *SrcTy = 3892 llvm::VectorType::getExtendedElementVectorType(VTy); 3893 3894 // %sum = add <4 x i32> %lhs, %rhs 3895 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 3896 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy); 3897 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn"); 3898 3899 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16> 3900 Constant *ShiftAmt = 3901 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2); 3902 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn"); 3903 3904 // %res = trunc <4 x i32> %high to <4 x i16> 3905 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn"); 3906 } 3907 case NEON::BI__builtin_neon_vtrn_v: 3908 case NEON::BI__builtin_neon_vtrnq_v: { 3909 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 3910 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3911 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 3912 Value *SV = nullptr; 3913 3914 for (unsigned vi = 0; vi != 2; ++vi) { 3915 SmallVector<uint32_t, 16> Indices; 3916 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 3917 Indices.push_back(i+vi); 3918 Indices.push_back(i+e+vi); 3919 } 3920 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 3921 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn"); 3922 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 3923 } 3924 return SV; 3925 } 3926 case NEON::BI__builtin_neon_vtst_v: 3927 case NEON::BI__builtin_neon_vtstq_v: { 3928 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3929 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3930 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); 3931 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], 3932 ConstantAggregateZero::get(Ty)); 3933 return Builder.CreateSExt(Ops[0], Ty, "vtst"); 3934 } 3935 case NEON::BI__builtin_neon_vuzp_v: 3936 case NEON::BI__builtin_neon_vuzpq_v: { 3937 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 3938 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3939 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 3940 Value *SV = nullptr; 3941 3942 for (unsigned vi = 0; vi != 2; ++vi) { 3943 SmallVector<uint32_t, 16> Indices; 3944 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 3945 Indices.push_back(2*i+vi); 3946 3947 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 3948 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp"); 3949 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 3950 } 3951 return SV; 3952 } 3953 case NEON::BI__builtin_neon_vzip_v: 3954 case NEON::BI__builtin_neon_vzipq_v: { 3955 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 3956 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3957 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 3958 Value *SV = nullptr; 3959 3960 for (unsigned vi = 0; vi != 2; ++vi) { 3961 SmallVector<uint32_t, 16> Indices; 3962 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 3963 Indices.push_back((i + vi*e) >> 1); 3964 Indices.push_back(((i + vi*e) >> 1)+e); 3965 } 3966 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 3967 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip"); 3968 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 3969 } 3970 return SV; 3971 } 3972 } 3973 3974 assert(Int && "Expected valid intrinsic number"); 3975 3976 // Determine the type(s) of this overloaded AArch64 intrinsic. 3977 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E); 3978 3979 Value *Result = EmitNeonCall(F, Ops, NameHint); 3980 llvm::Type *ResultType = ConvertType(E->getType()); 3981 // AArch64 intrinsic one-element vector type cast to 3982 // scalar type expected by the builtin 3983 return Builder.CreateBitCast(Result, ResultType, NameHint); 3984 } 3985 3986 Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr( 3987 Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp, 3988 const CmpInst::Predicate Ip, const Twine &Name) { 3989 llvm::Type *OTy = Op->getType(); 3990 3991 // FIXME: this is utterly horrific. We should not be looking at previous 3992 // codegen context to find out what needs doing. Unfortunately TableGen 3993 // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32 3994 // (etc). 3995 if (BitCastInst *BI = dyn_cast<BitCastInst>(Op)) 3996 OTy = BI->getOperand(0)->getType(); 3997 3998 Op = Builder.CreateBitCast(Op, OTy); 3999 if (OTy->getScalarType()->isFloatingPointTy()) { 4000 Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy)); 4001 } else { 4002 Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy)); 4003 } 4004 return Builder.CreateSExt(Op, Ty, Name); 4005 } 4006 4007 static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops, 4008 Value *ExtOp, Value *IndexOp, 4009 llvm::Type *ResTy, unsigned IntID, 4010 const char *Name) { 4011 SmallVector<Value *, 2> TblOps; 4012 if (ExtOp) 4013 TblOps.push_back(ExtOp); 4014 4015 // Build a vector containing sequential number like (0, 1, 2, ..., 15) 4016 SmallVector<uint32_t, 16> Indices; 4017 llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType()); 4018 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) { 4019 Indices.push_back(2*i); 4020 Indices.push_back(2*i+1); 4021 } 4022 4023 int PairPos = 0, End = Ops.size() - 1; 4024 while (PairPos < End) { 4025 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], 4026 Ops[PairPos+1], Indices, 4027 Name)); 4028 PairPos += 2; 4029 } 4030 4031 // If there's an odd number of 64-bit lookup table, fill the high 64-bit 4032 // of the 128-bit lookup table with zero. 4033 if (PairPos == End) { 4034 Value *ZeroTbl = ConstantAggregateZero::get(TblTy); 4035 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], 4036 ZeroTbl, Indices, Name)); 4037 } 4038 4039 Function *TblF; 4040 TblOps.push_back(IndexOp); 4041 TblF = CGF.CGM.getIntrinsic(IntID, ResTy); 4042 4043 return CGF.EmitNeonCall(TblF, TblOps, Name); 4044 } 4045 4046 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) { 4047 unsigned Value; 4048 switch (BuiltinID) { 4049 default: 4050 return nullptr; 4051 case ARM::BI__builtin_arm_nop: 4052 Value = 0; 4053 break; 4054 case ARM::BI__builtin_arm_yield: 4055 case ARM::BI__yield: 4056 Value = 1; 4057 break; 4058 case ARM::BI__builtin_arm_wfe: 4059 case ARM::BI__wfe: 4060 Value = 2; 4061 break; 4062 case ARM::BI__builtin_arm_wfi: 4063 case ARM::BI__wfi: 4064 Value = 3; 4065 break; 4066 case ARM::BI__builtin_arm_sev: 4067 case ARM::BI__sev: 4068 Value = 4; 4069 break; 4070 case ARM::BI__builtin_arm_sevl: 4071 case ARM::BI__sevl: 4072 Value = 5; 4073 break; 4074 } 4075 4076 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint), 4077 llvm::ConstantInt::get(Int32Ty, Value)); 4078 } 4079 4080 // Generates the IR for the read/write special register builtin, 4081 // ValueType is the type of the value that is to be written or read, 4082 // RegisterType is the type of the register being written to or read from. 4083 static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, 4084 const CallExpr *E, 4085 llvm::Type *RegisterType, 4086 llvm::Type *ValueType, 4087 bool IsRead, 4088 StringRef SysReg = "") { 4089 // write and register intrinsics only support 32 and 64 bit operations. 4090 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64)) 4091 && "Unsupported size for register."); 4092 4093 CodeGen::CGBuilderTy &Builder = CGF.Builder; 4094 CodeGen::CodeGenModule &CGM = CGF.CGM; 4095 LLVMContext &Context = CGM.getLLVMContext(); 4096 4097 if (SysReg.empty()) { 4098 const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts(); 4099 SysReg = cast<StringLiteral>(SysRegStrExpr)->getString(); 4100 } 4101 4102 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) }; 4103 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); 4104 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); 4105 4106 llvm::Type *Types[] = { RegisterType }; 4107 4108 bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32); 4109 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64)) 4110 && "Can't fit 64-bit value in 32-bit register"); 4111 4112 if (IsRead) { 4113 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types); 4114 llvm::Value *Call = Builder.CreateCall(F, Metadata); 4115 4116 if (MixedTypes) 4117 // Read into 64 bit register and then truncate result to 32 bit. 4118 return Builder.CreateTrunc(Call, ValueType); 4119 4120 if (ValueType->isPointerTy()) 4121 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*). 4122 return Builder.CreateIntToPtr(Call, ValueType); 4123 4124 return Call; 4125 } 4126 4127 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types); 4128 llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1)); 4129 if (MixedTypes) { 4130 // Extend 32 bit write value to 64 bit to pass to write. 4131 ArgValue = Builder.CreateZExt(ArgValue, RegisterType); 4132 return Builder.CreateCall(F, { Metadata, ArgValue }); 4133 } 4134 4135 if (ValueType->isPointerTy()) { 4136 // Have VoidPtrTy ArgValue but want to return an i32/i64. 4137 ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType); 4138 return Builder.CreateCall(F, { Metadata, ArgValue }); 4139 } 4140 4141 return Builder.CreateCall(F, { Metadata, ArgValue }); 4142 } 4143 4144 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra 4145 /// argument that specifies the vector type. 4146 static bool HasExtraNeonArgument(unsigned BuiltinID) { 4147 switch (BuiltinID) { 4148 default: break; 4149 case NEON::BI__builtin_neon_vget_lane_i8: 4150 case NEON::BI__builtin_neon_vget_lane_i16: 4151 case NEON::BI__builtin_neon_vget_lane_i32: 4152 case NEON::BI__builtin_neon_vget_lane_i64: 4153 case NEON::BI__builtin_neon_vget_lane_f32: 4154 case NEON::BI__builtin_neon_vgetq_lane_i8: 4155 case NEON::BI__builtin_neon_vgetq_lane_i16: 4156 case NEON::BI__builtin_neon_vgetq_lane_i32: 4157 case NEON::BI__builtin_neon_vgetq_lane_i64: 4158 case NEON::BI__builtin_neon_vgetq_lane_f32: 4159 case NEON::BI__builtin_neon_vset_lane_i8: 4160 case NEON::BI__builtin_neon_vset_lane_i16: 4161 case NEON::BI__builtin_neon_vset_lane_i32: 4162 case NEON::BI__builtin_neon_vset_lane_i64: 4163 case NEON::BI__builtin_neon_vset_lane_f32: 4164 case NEON::BI__builtin_neon_vsetq_lane_i8: 4165 case NEON::BI__builtin_neon_vsetq_lane_i16: 4166 case NEON::BI__builtin_neon_vsetq_lane_i32: 4167 case NEON::BI__builtin_neon_vsetq_lane_i64: 4168 case NEON::BI__builtin_neon_vsetq_lane_f32: 4169 case NEON::BI__builtin_neon_vsha1h_u32: 4170 case NEON::BI__builtin_neon_vsha1cq_u32: 4171 case NEON::BI__builtin_neon_vsha1pq_u32: 4172 case NEON::BI__builtin_neon_vsha1mq_u32: 4173 case ARM::BI_MoveToCoprocessor: 4174 case ARM::BI_MoveToCoprocessor2: 4175 return false; 4176 } 4177 return true; 4178 } 4179 4180 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, 4181 const CallExpr *E) { 4182 if (auto Hint = GetValueForARMHint(BuiltinID)) 4183 return Hint; 4184 4185 if (BuiltinID == ARM::BI__emit) { 4186 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb; 4187 llvm::FunctionType *FTy = 4188 llvm::FunctionType::get(VoidTy, /*Variadic=*/false); 4189 4190 APSInt Value; 4191 if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext())) 4192 llvm_unreachable("Sema will ensure that the parameter is constant"); 4193 4194 uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue(); 4195 4196 llvm::InlineAsm *Emit = 4197 IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "", 4198 /*SideEffects=*/true) 4199 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "", 4200 /*SideEffects=*/true); 4201 4202 return Builder.CreateCall(Emit); 4203 } 4204 4205 if (BuiltinID == ARM::BI__builtin_arm_dbg) { 4206 Value *Option = EmitScalarExpr(E->getArg(0)); 4207 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option); 4208 } 4209 4210 if (BuiltinID == ARM::BI__builtin_arm_prefetch) { 4211 Value *Address = EmitScalarExpr(E->getArg(0)); 4212 Value *RW = EmitScalarExpr(E->getArg(1)); 4213 Value *IsData = EmitScalarExpr(E->getArg(2)); 4214 4215 // Locality is not supported on ARM target 4216 Value *Locality = llvm::ConstantInt::get(Int32Ty, 3); 4217 4218 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 4219 return Builder.CreateCall(F, {Address, RW, Locality, IsData}); 4220 } 4221 4222 if (BuiltinID == ARM::BI__builtin_arm_rbit) { 4223 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_rbit), 4224 EmitScalarExpr(E->getArg(0)), 4225 "rbit"); 4226 } 4227 4228 if (BuiltinID == ARM::BI__clear_cache) { 4229 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); 4230 const FunctionDecl *FD = E->getDirectCallee(); 4231 Value *Ops[2]; 4232 for (unsigned i = 0; i < 2; i++) 4233 Ops[i] = EmitScalarExpr(E->getArg(i)); 4234 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); 4235 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); 4236 StringRef Name = FD->getName(); 4237 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); 4238 } 4239 4240 if (BuiltinID == ARM::BI__builtin_arm_mcrr || 4241 BuiltinID == ARM::BI__builtin_arm_mcrr2) { 4242 Function *F; 4243 4244 switch (BuiltinID) { 4245 default: llvm_unreachable("unexpected builtin"); 4246 case ARM::BI__builtin_arm_mcrr: 4247 F = CGM.getIntrinsic(Intrinsic::arm_mcrr); 4248 break; 4249 case ARM::BI__builtin_arm_mcrr2: 4250 F = CGM.getIntrinsic(Intrinsic::arm_mcrr2); 4251 break; 4252 } 4253 4254 // MCRR{2} instruction has 5 operands but 4255 // the intrinsic has 4 because Rt and Rt2 4256 // are represented as a single unsigned 64 4257 // bit integer in the intrinsic definition 4258 // but internally it's represented as 2 32 4259 // bit integers. 4260 4261 Value *Coproc = EmitScalarExpr(E->getArg(0)); 4262 Value *Opc1 = EmitScalarExpr(E->getArg(1)); 4263 Value *RtAndRt2 = EmitScalarExpr(E->getArg(2)); 4264 Value *CRm = EmitScalarExpr(E->getArg(3)); 4265 4266 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32); 4267 Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty); 4268 Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1); 4269 Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty); 4270 4271 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm}); 4272 } 4273 4274 if (BuiltinID == ARM::BI__builtin_arm_mrrc || 4275 BuiltinID == ARM::BI__builtin_arm_mrrc2) { 4276 Function *F; 4277 4278 switch (BuiltinID) { 4279 default: llvm_unreachable("unexpected builtin"); 4280 case ARM::BI__builtin_arm_mrrc: 4281 F = CGM.getIntrinsic(Intrinsic::arm_mrrc); 4282 break; 4283 case ARM::BI__builtin_arm_mrrc2: 4284 F = CGM.getIntrinsic(Intrinsic::arm_mrrc2); 4285 break; 4286 } 4287 4288 Value *Coproc = EmitScalarExpr(E->getArg(0)); 4289 Value *Opc1 = EmitScalarExpr(E->getArg(1)); 4290 Value *CRm = EmitScalarExpr(E->getArg(2)); 4291 Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm}); 4292 4293 // Returns an unsigned 64 bit integer, represented 4294 // as two 32 bit integers. 4295 4296 Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1); 4297 Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0); 4298 Rt = Builder.CreateZExt(Rt, Int64Ty); 4299 Rt1 = Builder.CreateZExt(Rt1, Int64Ty); 4300 4301 Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32); 4302 RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true); 4303 RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1); 4304 4305 return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType())); 4306 } 4307 4308 if (BuiltinID == ARM::BI__builtin_arm_ldrexd || 4309 ((BuiltinID == ARM::BI__builtin_arm_ldrex || 4310 BuiltinID == ARM::BI__builtin_arm_ldaex) && 4311 getContext().getTypeSize(E->getType()) == 64) || 4312 BuiltinID == ARM::BI__ldrexd) { 4313 Function *F; 4314 4315 switch (BuiltinID) { 4316 default: llvm_unreachable("unexpected builtin"); 4317 case ARM::BI__builtin_arm_ldaex: 4318 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd); 4319 break; 4320 case ARM::BI__builtin_arm_ldrexd: 4321 case ARM::BI__builtin_arm_ldrex: 4322 case ARM::BI__ldrexd: 4323 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd); 4324 break; 4325 } 4326 4327 Value *LdPtr = EmitScalarExpr(E->getArg(0)); 4328 Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy), 4329 "ldrexd"); 4330 4331 Value *Val0 = Builder.CreateExtractValue(Val, 1); 4332 Value *Val1 = Builder.CreateExtractValue(Val, 0); 4333 Val0 = Builder.CreateZExt(Val0, Int64Ty); 4334 Val1 = Builder.CreateZExt(Val1, Int64Ty); 4335 4336 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32); 4337 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */); 4338 Val = Builder.CreateOr(Val, Val1); 4339 return Builder.CreateBitCast(Val, ConvertType(E->getType())); 4340 } 4341 4342 if (BuiltinID == ARM::BI__builtin_arm_ldrex || 4343 BuiltinID == ARM::BI__builtin_arm_ldaex) { 4344 Value *LoadAddr = EmitScalarExpr(E->getArg(0)); 4345 4346 QualType Ty = E->getType(); 4347 llvm::Type *RealResTy = ConvertType(Ty); 4348 llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(), 4349 getContext().getTypeSize(Ty)); 4350 LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo()); 4351 4352 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex 4353 ? Intrinsic::arm_ldaex 4354 : Intrinsic::arm_ldrex, 4355 LoadAddr->getType()); 4356 Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex"); 4357 4358 if (RealResTy->isPointerTy()) 4359 return Builder.CreateIntToPtr(Val, RealResTy); 4360 else { 4361 Val = Builder.CreateTruncOrBitCast(Val, IntResTy); 4362 return Builder.CreateBitCast(Val, RealResTy); 4363 } 4364 } 4365 4366 if (BuiltinID == ARM::BI__builtin_arm_strexd || 4367 ((BuiltinID == ARM::BI__builtin_arm_stlex || 4368 BuiltinID == ARM::BI__builtin_arm_strex) && 4369 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) { 4370 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex 4371 ? Intrinsic::arm_stlexd 4372 : Intrinsic::arm_strexd); 4373 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, nullptr); 4374 4375 Address Tmp = CreateMemTemp(E->getArg(0)->getType()); 4376 Value *Val = EmitScalarExpr(E->getArg(0)); 4377 Builder.CreateStore(Val, Tmp); 4378 4379 Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy)); 4380 Val = Builder.CreateLoad(LdPtr); 4381 4382 Value *Arg0 = Builder.CreateExtractValue(Val, 0); 4383 Value *Arg1 = Builder.CreateExtractValue(Val, 1); 4384 Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy); 4385 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd"); 4386 } 4387 4388 if (BuiltinID == ARM::BI__builtin_arm_strex || 4389 BuiltinID == ARM::BI__builtin_arm_stlex) { 4390 Value *StoreVal = EmitScalarExpr(E->getArg(0)); 4391 Value *StoreAddr = EmitScalarExpr(E->getArg(1)); 4392 4393 QualType Ty = E->getArg(0)->getType(); 4394 llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(), 4395 getContext().getTypeSize(Ty)); 4396 StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo()); 4397 4398 if (StoreVal->getType()->isPointerTy()) 4399 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty); 4400 else { 4401 StoreVal = Builder.CreateBitCast(StoreVal, StoreTy); 4402 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty); 4403 } 4404 4405 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex 4406 ? Intrinsic::arm_stlex 4407 : Intrinsic::arm_strex, 4408 StoreAddr->getType()); 4409 return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex"); 4410 } 4411 4412 switch (BuiltinID) { 4413 case ARM::BI__iso_volatile_load8: 4414 case ARM::BI__iso_volatile_load16: 4415 case ARM::BI__iso_volatile_load32: 4416 case ARM::BI__iso_volatile_load64: { 4417 Value *Ptr = EmitScalarExpr(E->getArg(0)); 4418 QualType ElTy = E->getArg(0)->getType()->getPointeeType(); 4419 CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy); 4420 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), 4421 LoadSize.getQuantity() * 8); 4422 Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); 4423 llvm::LoadInst *Load = 4424 Builder.CreateAlignedLoad(Ptr, LoadSize); 4425 Load->setVolatile(true); 4426 return Load; 4427 } 4428 case ARM::BI__iso_volatile_store8: 4429 case ARM::BI__iso_volatile_store16: 4430 case ARM::BI__iso_volatile_store32: 4431 case ARM::BI__iso_volatile_store64: { 4432 Value *Ptr = EmitScalarExpr(E->getArg(0)); 4433 Value *Value = EmitScalarExpr(E->getArg(1)); 4434 QualType ElTy = E->getArg(0)->getType()->getPointeeType(); 4435 CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy); 4436 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), 4437 StoreSize.getQuantity() * 8); 4438 Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); 4439 llvm::StoreInst *Store = 4440 Builder.CreateAlignedStore(Value, Ptr, 4441 StoreSize); 4442 Store->setVolatile(true); 4443 return Store; 4444 } 4445 } 4446 4447 if (BuiltinID == ARM::BI__builtin_arm_clrex) { 4448 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex); 4449 return Builder.CreateCall(F); 4450 } 4451 4452 // CRC32 4453 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; 4454 switch (BuiltinID) { 4455 case ARM::BI__builtin_arm_crc32b: 4456 CRCIntrinsicID = Intrinsic::arm_crc32b; break; 4457 case ARM::BI__builtin_arm_crc32cb: 4458 CRCIntrinsicID = Intrinsic::arm_crc32cb; break; 4459 case ARM::BI__builtin_arm_crc32h: 4460 CRCIntrinsicID = Intrinsic::arm_crc32h; break; 4461 case ARM::BI__builtin_arm_crc32ch: 4462 CRCIntrinsicID = Intrinsic::arm_crc32ch; break; 4463 case ARM::BI__builtin_arm_crc32w: 4464 case ARM::BI__builtin_arm_crc32d: 4465 CRCIntrinsicID = Intrinsic::arm_crc32w; break; 4466 case ARM::BI__builtin_arm_crc32cw: 4467 case ARM::BI__builtin_arm_crc32cd: 4468 CRCIntrinsicID = Intrinsic::arm_crc32cw; break; 4469 } 4470 4471 if (CRCIntrinsicID != Intrinsic::not_intrinsic) { 4472 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 4473 Value *Arg1 = EmitScalarExpr(E->getArg(1)); 4474 4475 // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w 4476 // intrinsics, hence we need different codegen for these cases. 4477 if (BuiltinID == ARM::BI__builtin_arm_crc32d || 4478 BuiltinID == ARM::BI__builtin_arm_crc32cd) { 4479 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32); 4480 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty); 4481 Value *Arg1b = Builder.CreateLShr(Arg1, C1); 4482 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty); 4483 4484 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 4485 Value *Res = Builder.CreateCall(F, {Arg0, Arg1a}); 4486 return Builder.CreateCall(F, {Res, Arg1b}); 4487 } else { 4488 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty); 4489 4490 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 4491 return Builder.CreateCall(F, {Arg0, Arg1}); 4492 } 4493 } 4494 4495 if (BuiltinID == ARM::BI__builtin_arm_rsr || 4496 BuiltinID == ARM::BI__builtin_arm_rsr64 || 4497 BuiltinID == ARM::BI__builtin_arm_rsrp || 4498 BuiltinID == ARM::BI__builtin_arm_wsr || 4499 BuiltinID == ARM::BI__builtin_arm_wsr64 || 4500 BuiltinID == ARM::BI__builtin_arm_wsrp) { 4501 4502 bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr || 4503 BuiltinID == ARM::BI__builtin_arm_rsr64 || 4504 BuiltinID == ARM::BI__builtin_arm_rsrp; 4505 4506 bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp || 4507 BuiltinID == ARM::BI__builtin_arm_wsrp; 4508 4509 bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 || 4510 BuiltinID == ARM::BI__builtin_arm_wsr64; 4511 4512 llvm::Type *ValueType; 4513 llvm::Type *RegisterType; 4514 if (IsPointerBuiltin) { 4515 ValueType = VoidPtrTy; 4516 RegisterType = Int32Ty; 4517 } else if (Is64Bit) { 4518 ValueType = RegisterType = Int64Ty; 4519 } else { 4520 ValueType = RegisterType = Int32Ty; 4521 } 4522 4523 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead); 4524 } 4525 4526 // Find out if any arguments are required to be integer constant 4527 // expressions. 4528 unsigned ICEArguments = 0; 4529 ASTContext::GetBuiltinTypeError Error; 4530 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 4531 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 4532 4533 auto getAlignmentValue32 = [&](Address addr) -> Value* { 4534 return Builder.getInt32(addr.getAlignment().getQuantity()); 4535 }; 4536 4537 Address PtrOp0 = Address::invalid(); 4538 Address PtrOp1 = Address::invalid(); 4539 SmallVector<Value*, 4> Ops; 4540 bool HasExtraArg = HasExtraNeonArgument(BuiltinID); 4541 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0); 4542 for (unsigned i = 0, e = NumArgs; i != e; i++) { 4543 if (i == 0) { 4544 switch (BuiltinID) { 4545 case NEON::BI__builtin_neon_vld1_v: 4546 case NEON::BI__builtin_neon_vld1q_v: 4547 case NEON::BI__builtin_neon_vld1q_lane_v: 4548 case NEON::BI__builtin_neon_vld1_lane_v: 4549 case NEON::BI__builtin_neon_vld1_dup_v: 4550 case NEON::BI__builtin_neon_vld1q_dup_v: 4551 case NEON::BI__builtin_neon_vst1_v: 4552 case NEON::BI__builtin_neon_vst1q_v: 4553 case NEON::BI__builtin_neon_vst1q_lane_v: 4554 case NEON::BI__builtin_neon_vst1_lane_v: 4555 case NEON::BI__builtin_neon_vst2_v: 4556 case NEON::BI__builtin_neon_vst2q_v: 4557 case NEON::BI__builtin_neon_vst2_lane_v: 4558 case NEON::BI__builtin_neon_vst2q_lane_v: 4559 case NEON::BI__builtin_neon_vst3_v: 4560 case NEON::BI__builtin_neon_vst3q_v: 4561 case NEON::BI__builtin_neon_vst3_lane_v: 4562 case NEON::BI__builtin_neon_vst3q_lane_v: 4563 case NEON::BI__builtin_neon_vst4_v: 4564 case NEON::BI__builtin_neon_vst4q_v: 4565 case NEON::BI__builtin_neon_vst4_lane_v: 4566 case NEON::BI__builtin_neon_vst4q_lane_v: 4567 // Get the alignment for the argument in addition to the value; 4568 // we'll use it later. 4569 PtrOp0 = EmitPointerWithAlignment(E->getArg(0)); 4570 Ops.push_back(PtrOp0.getPointer()); 4571 continue; 4572 } 4573 } 4574 if (i == 1) { 4575 switch (BuiltinID) { 4576 case NEON::BI__builtin_neon_vld2_v: 4577 case NEON::BI__builtin_neon_vld2q_v: 4578 case NEON::BI__builtin_neon_vld3_v: 4579 case NEON::BI__builtin_neon_vld3q_v: 4580 case NEON::BI__builtin_neon_vld4_v: 4581 case NEON::BI__builtin_neon_vld4q_v: 4582 case NEON::BI__builtin_neon_vld2_lane_v: 4583 case NEON::BI__builtin_neon_vld2q_lane_v: 4584 case NEON::BI__builtin_neon_vld3_lane_v: 4585 case NEON::BI__builtin_neon_vld3q_lane_v: 4586 case NEON::BI__builtin_neon_vld4_lane_v: 4587 case NEON::BI__builtin_neon_vld4q_lane_v: 4588 case NEON::BI__builtin_neon_vld2_dup_v: 4589 case NEON::BI__builtin_neon_vld3_dup_v: 4590 case NEON::BI__builtin_neon_vld4_dup_v: 4591 // Get the alignment for the argument in addition to the value; 4592 // we'll use it later. 4593 PtrOp1 = EmitPointerWithAlignment(E->getArg(1)); 4594 Ops.push_back(PtrOp1.getPointer()); 4595 continue; 4596 } 4597 } 4598 4599 if ((ICEArguments & (1 << i)) == 0) { 4600 Ops.push_back(EmitScalarExpr(E->getArg(i))); 4601 } else { 4602 // If this is required to be a constant, constant fold it so that we know 4603 // that the generated intrinsic gets a ConstantInt. 4604 llvm::APSInt Result; 4605 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); 4606 assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst; 4607 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); 4608 } 4609 } 4610 4611 switch (BuiltinID) { 4612 default: break; 4613 4614 case NEON::BI__builtin_neon_vget_lane_i8: 4615 case NEON::BI__builtin_neon_vget_lane_i16: 4616 case NEON::BI__builtin_neon_vget_lane_i32: 4617 case NEON::BI__builtin_neon_vget_lane_i64: 4618 case NEON::BI__builtin_neon_vget_lane_f32: 4619 case NEON::BI__builtin_neon_vgetq_lane_i8: 4620 case NEON::BI__builtin_neon_vgetq_lane_i16: 4621 case NEON::BI__builtin_neon_vgetq_lane_i32: 4622 case NEON::BI__builtin_neon_vgetq_lane_i64: 4623 case NEON::BI__builtin_neon_vgetq_lane_f32: 4624 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane"); 4625 4626 case NEON::BI__builtin_neon_vset_lane_i8: 4627 case NEON::BI__builtin_neon_vset_lane_i16: 4628 case NEON::BI__builtin_neon_vset_lane_i32: 4629 case NEON::BI__builtin_neon_vset_lane_i64: 4630 case NEON::BI__builtin_neon_vset_lane_f32: 4631 case NEON::BI__builtin_neon_vsetq_lane_i8: 4632 case NEON::BI__builtin_neon_vsetq_lane_i16: 4633 case NEON::BI__builtin_neon_vsetq_lane_i32: 4634 case NEON::BI__builtin_neon_vsetq_lane_i64: 4635 case NEON::BI__builtin_neon_vsetq_lane_f32: 4636 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 4637 4638 case NEON::BI__builtin_neon_vsha1h_u32: 4639 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops, 4640 "vsha1h"); 4641 case NEON::BI__builtin_neon_vsha1cq_u32: 4642 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops, 4643 "vsha1h"); 4644 case NEON::BI__builtin_neon_vsha1pq_u32: 4645 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops, 4646 "vsha1h"); 4647 case NEON::BI__builtin_neon_vsha1mq_u32: 4648 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops, 4649 "vsha1h"); 4650 4651 // The ARM _MoveToCoprocessor builtins put the input register value as 4652 // the first argument, but the LLVM intrinsic expects it as the third one. 4653 case ARM::BI_MoveToCoprocessor: 4654 case ARM::BI_MoveToCoprocessor2: { 4655 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ? 4656 Intrinsic::arm_mcr : Intrinsic::arm_mcr2); 4657 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0], 4658 Ops[3], Ops[4], Ops[5]}); 4659 } 4660 case ARM::BI_BitScanForward: 4661 case ARM::BI_BitScanForward64: 4662 return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E); 4663 case ARM::BI_BitScanReverse: 4664 case ARM::BI_BitScanReverse64: 4665 return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E); 4666 4667 case ARM::BI_InterlockedAnd64: 4668 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E); 4669 case ARM::BI_InterlockedExchange64: 4670 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E); 4671 case ARM::BI_InterlockedExchangeAdd64: 4672 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E); 4673 case ARM::BI_InterlockedExchangeSub64: 4674 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E); 4675 case ARM::BI_InterlockedOr64: 4676 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E); 4677 case ARM::BI_InterlockedXor64: 4678 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E); 4679 case ARM::BI_InterlockedDecrement64: 4680 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E); 4681 case ARM::BI_InterlockedIncrement64: 4682 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E); 4683 } 4684 4685 // Get the last argument, which specifies the vector type. 4686 assert(HasExtraArg); 4687 llvm::APSInt Result; 4688 const Expr *Arg = E->getArg(E->getNumArgs()-1); 4689 if (!Arg->isIntegerConstantExpr(Result, getContext())) 4690 return nullptr; 4691 4692 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f || 4693 BuiltinID == ARM::BI__builtin_arm_vcvtr_d) { 4694 // Determine the overloaded type of this builtin. 4695 llvm::Type *Ty; 4696 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f) 4697 Ty = FloatTy; 4698 else 4699 Ty = DoubleTy; 4700 4701 // Determine whether this is an unsigned conversion or not. 4702 bool usgn = Result.getZExtValue() == 1; 4703 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr; 4704 4705 // Call the appropriate intrinsic. 4706 Function *F = CGM.getIntrinsic(Int, Ty); 4707 return Builder.CreateCall(F, Ops, "vcvtr"); 4708 } 4709 4710 // Determine the type of this overloaded NEON intrinsic. 4711 NeonTypeFlags Type(Result.getZExtValue()); 4712 bool usgn = Type.isUnsigned(); 4713 bool rightShift = false; 4714 4715 llvm::VectorType *VTy = GetNeonType(this, Type); 4716 llvm::Type *Ty = VTy; 4717 if (!Ty) 4718 return nullptr; 4719 4720 // Many NEON builtins have identical semantics and uses in ARM and 4721 // AArch64. Emit these in a single function. 4722 auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap); 4723 const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap( 4724 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted); 4725 if (Builtin) 4726 return EmitCommonNeonBuiltinExpr( 4727 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic, 4728 Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1); 4729 4730 unsigned Int; 4731 switch (BuiltinID) { 4732 default: return nullptr; 4733 case NEON::BI__builtin_neon_vld1q_lane_v: 4734 // Handle 64-bit integer elements as a special case. Use shuffles of 4735 // one-element vectors to avoid poor code for i64 in the backend. 4736 if (VTy->getElementType()->isIntegerTy(64)) { 4737 // Extract the other lane. 4738 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4739 uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue(); 4740 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane)); 4741 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); 4742 // Load the value as a one-element vector. 4743 Ty = llvm::VectorType::get(VTy->getElementType(), 1); 4744 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 4745 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys); 4746 Value *Align = getAlignmentValue32(PtrOp0); 4747 Value *Ld = Builder.CreateCall(F, {Ops[0], Align}); 4748 // Combine them. 4749 uint32_t Indices[] = {1 - Lane, Lane}; 4750 SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices); 4751 return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane"); 4752 } 4753 // fall through 4754 case NEON::BI__builtin_neon_vld1_lane_v: { 4755 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4756 PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType()); 4757 Value *Ld = Builder.CreateLoad(PtrOp0); 4758 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane"); 4759 } 4760 case NEON::BI__builtin_neon_vld2_dup_v: 4761 case NEON::BI__builtin_neon_vld3_dup_v: 4762 case NEON::BI__builtin_neon_vld4_dup_v: { 4763 // Handle 64-bit elements as a special-case. There is no "dup" needed. 4764 if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) { 4765 switch (BuiltinID) { 4766 case NEON::BI__builtin_neon_vld2_dup_v: 4767 Int = Intrinsic::arm_neon_vld2; 4768 break; 4769 case NEON::BI__builtin_neon_vld3_dup_v: 4770 Int = Intrinsic::arm_neon_vld3; 4771 break; 4772 case NEON::BI__builtin_neon_vld4_dup_v: 4773 Int = Intrinsic::arm_neon_vld4; 4774 break; 4775 default: llvm_unreachable("unknown vld_dup intrinsic?"); 4776 } 4777 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 4778 Function *F = CGM.getIntrinsic(Int, Tys); 4779 llvm::Value *Align = getAlignmentValue32(PtrOp1); 4780 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup"); 4781 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 4782 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4783 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 4784 } 4785 switch (BuiltinID) { 4786 case NEON::BI__builtin_neon_vld2_dup_v: 4787 Int = Intrinsic::arm_neon_vld2lane; 4788 break; 4789 case NEON::BI__builtin_neon_vld3_dup_v: 4790 Int = Intrinsic::arm_neon_vld3lane; 4791 break; 4792 case NEON::BI__builtin_neon_vld4_dup_v: 4793 Int = Intrinsic::arm_neon_vld4lane; 4794 break; 4795 default: llvm_unreachable("unknown vld_dup intrinsic?"); 4796 } 4797 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 4798 Function *F = CGM.getIntrinsic(Int, Tys); 4799 llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType()); 4800 4801 SmallVector<Value*, 6> Args; 4802 Args.push_back(Ops[1]); 4803 Args.append(STy->getNumElements(), UndefValue::get(Ty)); 4804 4805 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); 4806 Args.push_back(CI); 4807 Args.push_back(getAlignmentValue32(PtrOp1)); 4808 4809 Ops[1] = Builder.CreateCall(F, Args, "vld_dup"); 4810 // splat lane 0 to all elts in each vector of the result. 4811 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 4812 Value *Val = Builder.CreateExtractValue(Ops[1], i); 4813 Value *Elt = Builder.CreateBitCast(Val, Ty); 4814 Elt = EmitNeonSplat(Elt, CI); 4815 Elt = Builder.CreateBitCast(Elt, Val->getType()); 4816 Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i); 4817 } 4818 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 4819 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4820 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 4821 } 4822 case NEON::BI__builtin_neon_vqrshrn_n_v: 4823 Int = 4824 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns; 4825 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n", 4826 1, true); 4827 case NEON::BI__builtin_neon_vqrshrun_n_v: 4828 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty), 4829 Ops, "vqrshrun_n", 1, true); 4830 case NEON::BI__builtin_neon_vqshrn_n_v: 4831 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns; 4832 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n", 4833 1, true); 4834 case NEON::BI__builtin_neon_vqshrun_n_v: 4835 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty), 4836 Ops, "vqshrun_n", 1, true); 4837 case NEON::BI__builtin_neon_vrecpe_v: 4838 case NEON::BI__builtin_neon_vrecpeq_v: 4839 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty), 4840 Ops, "vrecpe"); 4841 case NEON::BI__builtin_neon_vrshrn_n_v: 4842 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty), 4843 Ops, "vrshrn_n", 1, true); 4844 case NEON::BI__builtin_neon_vrsra_n_v: 4845 case NEON::BI__builtin_neon_vrsraq_n_v: 4846 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4847 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4848 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true); 4849 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts; 4850 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]}); 4851 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n"); 4852 case NEON::BI__builtin_neon_vsri_n_v: 4853 case NEON::BI__builtin_neon_vsriq_n_v: 4854 rightShift = true; 4855 case NEON::BI__builtin_neon_vsli_n_v: 4856 case NEON::BI__builtin_neon_vsliq_n_v: 4857 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift); 4858 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty), 4859 Ops, "vsli_n"); 4860 case NEON::BI__builtin_neon_vsra_n_v: 4861 case NEON::BI__builtin_neon_vsraq_n_v: 4862 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4863 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n"); 4864 return Builder.CreateAdd(Ops[0], Ops[1]); 4865 case NEON::BI__builtin_neon_vst1q_lane_v: 4866 // Handle 64-bit integer elements as a special case. Use a shuffle to get 4867 // a one-element vector and avoid poor code for i64 in the backend. 4868 if (VTy->getElementType()->isIntegerTy(64)) { 4869 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4870 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2])); 4871 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); 4872 Ops[2] = getAlignmentValue32(PtrOp0); 4873 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()}; 4874 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, 4875 Tys), Ops); 4876 } 4877 // fall through 4878 case NEON::BI__builtin_neon_vst1_lane_v: { 4879 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4880 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); 4881 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 4882 auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty)); 4883 return St; 4884 } 4885 case NEON::BI__builtin_neon_vtbl1_v: 4886 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1), 4887 Ops, "vtbl1"); 4888 case NEON::BI__builtin_neon_vtbl2_v: 4889 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2), 4890 Ops, "vtbl2"); 4891 case NEON::BI__builtin_neon_vtbl3_v: 4892 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3), 4893 Ops, "vtbl3"); 4894 case NEON::BI__builtin_neon_vtbl4_v: 4895 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4), 4896 Ops, "vtbl4"); 4897 case NEON::BI__builtin_neon_vtbx1_v: 4898 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1), 4899 Ops, "vtbx1"); 4900 case NEON::BI__builtin_neon_vtbx2_v: 4901 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2), 4902 Ops, "vtbx2"); 4903 case NEON::BI__builtin_neon_vtbx3_v: 4904 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3), 4905 Ops, "vtbx3"); 4906 case NEON::BI__builtin_neon_vtbx4_v: 4907 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4), 4908 Ops, "vtbx4"); 4909 } 4910 } 4911 4912 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, 4913 const CallExpr *E, 4914 SmallVectorImpl<Value *> &Ops) { 4915 unsigned int Int = 0; 4916 const char *s = nullptr; 4917 4918 switch (BuiltinID) { 4919 default: 4920 return nullptr; 4921 case NEON::BI__builtin_neon_vtbl1_v: 4922 case NEON::BI__builtin_neon_vqtbl1_v: 4923 case NEON::BI__builtin_neon_vqtbl1q_v: 4924 case NEON::BI__builtin_neon_vtbl2_v: 4925 case NEON::BI__builtin_neon_vqtbl2_v: 4926 case NEON::BI__builtin_neon_vqtbl2q_v: 4927 case NEON::BI__builtin_neon_vtbl3_v: 4928 case NEON::BI__builtin_neon_vqtbl3_v: 4929 case NEON::BI__builtin_neon_vqtbl3q_v: 4930 case NEON::BI__builtin_neon_vtbl4_v: 4931 case NEON::BI__builtin_neon_vqtbl4_v: 4932 case NEON::BI__builtin_neon_vqtbl4q_v: 4933 break; 4934 case NEON::BI__builtin_neon_vtbx1_v: 4935 case NEON::BI__builtin_neon_vqtbx1_v: 4936 case NEON::BI__builtin_neon_vqtbx1q_v: 4937 case NEON::BI__builtin_neon_vtbx2_v: 4938 case NEON::BI__builtin_neon_vqtbx2_v: 4939 case NEON::BI__builtin_neon_vqtbx2q_v: 4940 case NEON::BI__builtin_neon_vtbx3_v: 4941 case NEON::BI__builtin_neon_vqtbx3_v: 4942 case NEON::BI__builtin_neon_vqtbx3q_v: 4943 case NEON::BI__builtin_neon_vtbx4_v: 4944 case NEON::BI__builtin_neon_vqtbx4_v: 4945 case NEON::BI__builtin_neon_vqtbx4q_v: 4946 break; 4947 } 4948 4949 assert(E->getNumArgs() >= 3); 4950 4951 // Get the last argument, which specifies the vector type. 4952 llvm::APSInt Result; 4953 const Expr *Arg = E->getArg(E->getNumArgs() - 1); 4954 if (!Arg->isIntegerConstantExpr(Result, CGF.getContext())) 4955 return nullptr; 4956 4957 // Determine the type of this overloaded NEON intrinsic. 4958 NeonTypeFlags Type(Result.getZExtValue()); 4959 llvm::VectorType *Ty = GetNeonType(&CGF, Type); 4960 if (!Ty) 4961 return nullptr; 4962 4963 CodeGen::CGBuilderTy &Builder = CGF.Builder; 4964 4965 // AArch64 scalar builtins are not overloaded, they do not have an extra 4966 // argument that specifies the vector type, need to handle each case. 4967 switch (BuiltinID) { 4968 case NEON::BI__builtin_neon_vtbl1_v: { 4969 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr, 4970 Ops[1], Ty, Intrinsic::aarch64_neon_tbl1, 4971 "vtbl1"); 4972 } 4973 case NEON::BI__builtin_neon_vtbl2_v: { 4974 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr, 4975 Ops[2], Ty, Intrinsic::aarch64_neon_tbl1, 4976 "vtbl1"); 4977 } 4978 case NEON::BI__builtin_neon_vtbl3_v: { 4979 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr, 4980 Ops[3], Ty, Intrinsic::aarch64_neon_tbl2, 4981 "vtbl2"); 4982 } 4983 case NEON::BI__builtin_neon_vtbl4_v: { 4984 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr, 4985 Ops[4], Ty, Intrinsic::aarch64_neon_tbl2, 4986 "vtbl2"); 4987 } 4988 case NEON::BI__builtin_neon_vtbx1_v: { 4989 Value *TblRes = 4990 packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2], 4991 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1"); 4992 4993 llvm::Constant *EightV = ConstantInt::get(Ty, 8); 4994 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV); 4995 CmpRes = Builder.CreateSExt(CmpRes, Ty); 4996 4997 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]); 4998 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes); 4999 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx"); 5000 } 5001 case NEON::BI__builtin_neon_vtbx2_v: { 5002 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0], 5003 Ops[3], Ty, Intrinsic::aarch64_neon_tbx1, 5004 "vtbx1"); 5005 } 5006 case NEON::BI__builtin_neon_vtbx3_v: { 5007 Value *TblRes = 5008 packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4], 5009 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2"); 5010 5011 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24); 5012 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4], 5013 TwentyFourV); 5014 CmpRes = Builder.CreateSExt(CmpRes, Ty); 5015 5016 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]); 5017 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes); 5018 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx"); 5019 } 5020 case NEON::BI__builtin_neon_vtbx4_v: { 5021 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0], 5022 Ops[5], Ty, Intrinsic::aarch64_neon_tbx2, 5023 "vtbx2"); 5024 } 5025 case NEON::BI__builtin_neon_vqtbl1_v: 5026 case NEON::BI__builtin_neon_vqtbl1q_v: 5027 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break; 5028 case NEON::BI__builtin_neon_vqtbl2_v: 5029 case NEON::BI__builtin_neon_vqtbl2q_v: { 5030 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break; 5031 case NEON::BI__builtin_neon_vqtbl3_v: 5032 case NEON::BI__builtin_neon_vqtbl3q_v: 5033 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break; 5034 case NEON::BI__builtin_neon_vqtbl4_v: 5035 case NEON::BI__builtin_neon_vqtbl4q_v: 5036 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break; 5037 case NEON::BI__builtin_neon_vqtbx1_v: 5038 case NEON::BI__builtin_neon_vqtbx1q_v: 5039 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break; 5040 case NEON::BI__builtin_neon_vqtbx2_v: 5041 case NEON::BI__builtin_neon_vqtbx2q_v: 5042 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break; 5043 case NEON::BI__builtin_neon_vqtbx3_v: 5044 case NEON::BI__builtin_neon_vqtbx3q_v: 5045 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break; 5046 case NEON::BI__builtin_neon_vqtbx4_v: 5047 case NEON::BI__builtin_neon_vqtbx4q_v: 5048 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break; 5049 } 5050 } 5051 5052 if (!Int) 5053 return nullptr; 5054 5055 Function *F = CGF.CGM.getIntrinsic(Int, Ty); 5056 return CGF.EmitNeonCall(F, Ops, s); 5057 } 5058 5059 Value *CodeGenFunction::vectorWrapScalar16(Value *Op) { 5060 llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4); 5061 Op = Builder.CreateBitCast(Op, Int16Ty); 5062 Value *V = UndefValue::get(VTy); 5063 llvm::Constant *CI = ConstantInt::get(SizeTy, 0); 5064 Op = Builder.CreateInsertElement(V, Op, CI); 5065 return Op; 5066 } 5067 5068 Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, 5069 const CallExpr *E) { 5070 unsigned HintID = static_cast<unsigned>(-1); 5071 switch (BuiltinID) { 5072 default: break; 5073 case AArch64::BI__builtin_arm_nop: 5074 HintID = 0; 5075 break; 5076 case AArch64::BI__builtin_arm_yield: 5077 HintID = 1; 5078 break; 5079 case AArch64::BI__builtin_arm_wfe: 5080 HintID = 2; 5081 break; 5082 case AArch64::BI__builtin_arm_wfi: 5083 HintID = 3; 5084 break; 5085 case AArch64::BI__builtin_arm_sev: 5086 HintID = 4; 5087 break; 5088 case AArch64::BI__builtin_arm_sevl: 5089 HintID = 5; 5090 break; 5091 } 5092 5093 if (HintID != static_cast<unsigned>(-1)) { 5094 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint); 5095 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID)); 5096 } 5097 5098 if (BuiltinID == AArch64::BI__builtin_arm_prefetch) { 5099 Value *Address = EmitScalarExpr(E->getArg(0)); 5100 Value *RW = EmitScalarExpr(E->getArg(1)); 5101 Value *CacheLevel = EmitScalarExpr(E->getArg(2)); 5102 Value *RetentionPolicy = EmitScalarExpr(E->getArg(3)); 5103 Value *IsData = EmitScalarExpr(E->getArg(4)); 5104 5105 Value *Locality = nullptr; 5106 if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) { 5107 // Temporal fetch, needs to convert cache level to locality. 5108 Locality = llvm::ConstantInt::get(Int32Ty, 5109 -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3); 5110 } else { 5111 // Streaming fetch. 5112 Locality = llvm::ConstantInt::get(Int32Ty, 0); 5113 } 5114 5115 // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify 5116 // PLDL3STRM or PLDL2STRM. 5117 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 5118 return Builder.CreateCall(F, {Address, RW, Locality, IsData}); 5119 } 5120 5121 if (BuiltinID == AArch64::BI__builtin_arm_rbit) { 5122 assert((getContext().getTypeSize(E->getType()) == 32) && 5123 "rbit of unusual size!"); 5124 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 5125 return Builder.CreateCall( 5126 CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit"); 5127 } 5128 if (BuiltinID == AArch64::BI__builtin_arm_rbit64) { 5129 assert((getContext().getTypeSize(E->getType()) == 64) && 5130 "rbit of unusual size!"); 5131 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 5132 return Builder.CreateCall( 5133 CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit"); 5134 } 5135 5136 if (BuiltinID == AArch64::BI__clear_cache) { 5137 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); 5138 const FunctionDecl *FD = E->getDirectCallee(); 5139 Value *Ops[2]; 5140 for (unsigned i = 0; i < 2; i++) 5141 Ops[i] = EmitScalarExpr(E->getArg(i)); 5142 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); 5143 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); 5144 StringRef Name = FD->getName(); 5145 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); 5146 } 5147 5148 if ((BuiltinID == AArch64::BI__builtin_arm_ldrex || 5149 BuiltinID == AArch64::BI__builtin_arm_ldaex) && 5150 getContext().getTypeSize(E->getType()) == 128) { 5151 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex 5152 ? Intrinsic::aarch64_ldaxp 5153 : Intrinsic::aarch64_ldxp); 5154 5155 Value *LdPtr = EmitScalarExpr(E->getArg(0)); 5156 Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy), 5157 "ldxp"); 5158 5159 Value *Val0 = Builder.CreateExtractValue(Val, 1); 5160 Value *Val1 = Builder.CreateExtractValue(Val, 0); 5161 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); 5162 Val0 = Builder.CreateZExt(Val0, Int128Ty); 5163 Val1 = Builder.CreateZExt(Val1, Int128Ty); 5164 5165 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64); 5166 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */); 5167 Val = Builder.CreateOr(Val, Val1); 5168 return Builder.CreateBitCast(Val, ConvertType(E->getType())); 5169 } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex || 5170 BuiltinID == AArch64::BI__builtin_arm_ldaex) { 5171 Value *LoadAddr = EmitScalarExpr(E->getArg(0)); 5172 5173 QualType Ty = E->getType(); 5174 llvm::Type *RealResTy = ConvertType(Ty); 5175 llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(), 5176 getContext().getTypeSize(Ty)); 5177 LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo()); 5178 5179 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex 5180 ? Intrinsic::aarch64_ldaxr 5181 : Intrinsic::aarch64_ldxr, 5182 LoadAddr->getType()); 5183 Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr"); 5184 5185 if (RealResTy->isPointerTy()) 5186 return Builder.CreateIntToPtr(Val, RealResTy); 5187 5188 Val = Builder.CreateTruncOrBitCast(Val, IntResTy); 5189 return Builder.CreateBitCast(Val, RealResTy); 5190 } 5191 5192 if ((BuiltinID == AArch64::BI__builtin_arm_strex || 5193 BuiltinID == AArch64::BI__builtin_arm_stlex) && 5194 getContext().getTypeSize(E->getArg(0)->getType()) == 128) { 5195 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex 5196 ? Intrinsic::aarch64_stlxp 5197 : Intrinsic::aarch64_stxp); 5198 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty, nullptr); 5199 5200 Address Tmp = CreateMemTemp(E->getArg(0)->getType()); 5201 EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true); 5202 5203 Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy)); 5204 llvm::Value *Val = Builder.CreateLoad(Tmp); 5205 5206 Value *Arg0 = Builder.CreateExtractValue(Val, 0); 5207 Value *Arg1 = Builder.CreateExtractValue(Val, 1); 5208 Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), 5209 Int8PtrTy); 5210 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp"); 5211 } 5212 5213 if (BuiltinID == AArch64::BI__builtin_arm_strex || 5214 BuiltinID == AArch64::BI__builtin_arm_stlex) { 5215 Value *StoreVal = EmitScalarExpr(E->getArg(0)); 5216 Value *StoreAddr = EmitScalarExpr(E->getArg(1)); 5217 5218 QualType Ty = E->getArg(0)->getType(); 5219 llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(), 5220 getContext().getTypeSize(Ty)); 5221 StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo()); 5222 5223 if (StoreVal->getType()->isPointerTy()) 5224 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty); 5225 else { 5226 StoreVal = Builder.CreateBitCast(StoreVal, StoreTy); 5227 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty); 5228 } 5229 5230 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex 5231 ? Intrinsic::aarch64_stlxr 5232 : Intrinsic::aarch64_stxr, 5233 StoreAddr->getType()); 5234 return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr"); 5235 } 5236 5237 if (BuiltinID == AArch64::BI__builtin_arm_clrex) { 5238 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex); 5239 return Builder.CreateCall(F); 5240 } 5241 5242 // CRC32 5243 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; 5244 switch (BuiltinID) { 5245 case AArch64::BI__builtin_arm_crc32b: 5246 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break; 5247 case AArch64::BI__builtin_arm_crc32cb: 5248 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break; 5249 case AArch64::BI__builtin_arm_crc32h: 5250 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break; 5251 case AArch64::BI__builtin_arm_crc32ch: 5252 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break; 5253 case AArch64::BI__builtin_arm_crc32w: 5254 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break; 5255 case AArch64::BI__builtin_arm_crc32cw: 5256 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break; 5257 case AArch64::BI__builtin_arm_crc32d: 5258 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break; 5259 case AArch64::BI__builtin_arm_crc32cd: 5260 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break; 5261 } 5262 5263 if (CRCIntrinsicID != Intrinsic::not_intrinsic) { 5264 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 5265 Value *Arg1 = EmitScalarExpr(E->getArg(1)); 5266 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 5267 5268 llvm::Type *DataTy = F->getFunctionType()->getParamType(1); 5269 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy); 5270 5271 return Builder.CreateCall(F, {Arg0, Arg1}); 5272 } 5273 5274 if (BuiltinID == AArch64::BI__builtin_arm_rsr || 5275 BuiltinID == AArch64::BI__builtin_arm_rsr64 || 5276 BuiltinID == AArch64::BI__builtin_arm_rsrp || 5277 BuiltinID == AArch64::BI__builtin_arm_wsr || 5278 BuiltinID == AArch64::BI__builtin_arm_wsr64 || 5279 BuiltinID == AArch64::BI__builtin_arm_wsrp) { 5280 5281 bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr || 5282 BuiltinID == AArch64::BI__builtin_arm_rsr64 || 5283 BuiltinID == AArch64::BI__builtin_arm_rsrp; 5284 5285 bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp || 5286 BuiltinID == AArch64::BI__builtin_arm_wsrp; 5287 5288 bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr && 5289 BuiltinID != AArch64::BI__builtin_arm_wsr; 5290 5291 llvm::Type *ValueType; 5292 llvm::Type *RegisterType = Int64Ty; 5293 if (IsPointerBuiltin) { 5294 ValueType = VoidPtrTy; 5295 } else if (Is64Bit) { 5296 ValueType = Int64Ty; 5297 } else { 5298 ValueType = Int32Ty; 5299 } 5300 5301 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead); 5302 } 5303 5304 // Find out if any arguments are required to be integer constant 5305 // expressions. 5306 unsigned ICEArguments = 0; 5307 ASTContext::GetBuiltinTypeError Error; 5308 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 5309 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 5310 5311 llvm::SmallVector<Value*, 4> Ops; 5312 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) { 5313 if ((ICEArguments & (1 << i)) == 0) { 5314 Ops.push_back(EmitScalarExpr(E->getArg(i))); 5315 } else { 5316 // If this is required to be a constant, constant fold it so that we know 5317 // that the generated intrinsic gets a ConstantInt. 5318 llvm::APSInt Result; 5319 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); 5320 assert(IsConst && "Constant arg isn't actually constant?"); 5321 (void)IsConst; 5322 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); 5323 } 5324 } 5325 5326 auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap); 5327 const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap( 5328 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted); 5329 5330 if (Builtin) { 5331 Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1))); 5332 Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E); 5333 assert(Result && "SISD intrinsic should have been handled"); 5334 return Result; 5335 } 5336 5337 llvm::APSInt Result; 5338 const Expr *Arg = E->getArg(E->getNumArgs()-1); 5339 NeonTypeFlags Type(0); 5340 if (Arg->isIntegerConstantExpr(Result, getContext())) 5341 // Determine the type of this overloaded NEON intrinsic. 5342 Type = NeonTypeFlags(Result.getZExtValue()); 5343 5344 bool usgn = Type.isUnsigned(); 5345 bool quad = Type.isQuad(); 5346 5347 // Handle non-overloaded intrinsics first. 5348 switch (BuiltinID) { 5349 default: break; 5350 case NEON::BI__builtin_neon_vldrq_p128: { 5351 llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128); 5352 Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy); 5353 return Builder.CreateDefaultAlignedLoad(Ptr); 5354 } 5355 case NEON::BI__builtin_neon_vstrq_p128: { 5356 llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128); 5357 Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy); 5358 return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr); 5359 } 5360 case NEON::BI__builtin_neon_vcvts_u32_f32: 5361 case NEON::BI__builtin_neon_vcvtd_u64_f64: 5362 usgn = true; 5363 // FALL THROUGH 5364 case NEON::BI__builtin_neon_vcvts_s32_f32: 5365 case NEON::BI__builtin_neon_vcvtd_s64_f64: { 5366 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5367 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64; 5368 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty; 5369 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy; 5370 Ops[0] = Builder.CreateBitCast(Ops[0], FTy); 5371 if (usgn) 5372 return Builder.CreateFPToUI(Ops[0], InTy); 5373 return Builder.CreateFPToSI(Ops[0], InTy); 5374 } 5375 case NEON::BI__builtin_neon_vcvts_f32_u32: 5376 case NEON::BI__builtin_neon_vcvtd_f64_u64: 5377 usgn = true; 5378 // FALL THROUGH 5379 case NEON::BI__builtin_neon_vcvts_f32_s32: 5380 case NEON::BI__builtin_neon_vcvtd_f64_s64: { 5381 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5382 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64; 5383 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty; 5384 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy; 5385 Ops[0] = Builder.CreateBitCast(Ops[0], InTy); 5386 if (usgn) 5387 return Builder.CreateUIToFP(Ops[0], FTy); 5388 return Builder.CreateSIToFP(Ops[0], FTy); 5389 } 5390 case NEON::BI__builtin_neon_vpaddd_s64: { 5391 llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2); 5392 Value *Vec = EmitScalarExpr(E->getArg(0)); 5393 // The vector is v2f64, so make sure it's bitcast to that. 5394 Vec = Builder.CreateBitCast(Vec, Ty, "v2i64"); 5395 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 5396 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 5397 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 5398 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 5399 // Pairwise addition of a v2f64 into a scalar f64. 5400 return Builder.CreateAdd(Op0, Op1, "vpaddd"); 5401 } 5402 case NEON::BI__builtin_neon_vpaddd_f64: { 5403 llvm::Type *Ty = 5404 llvm::VectorType::get(DoubleTy, 2); 5405 Value *Vec = EmitScalarExpr(E->getArg(0)); 5406 // The vector is v2f64, so make sure it's bitcast to that. 5407 Vec = Builder.CreateBitCast(Vec, Ty, "v2f64"); 5408 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 5409 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 5410 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 5411 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 5412 // Pairwise addition of a v2f64 into a scalar f64. 5413 return Builder.CreateFAdd(Op0, Op1, "vpaddd"); 5414 } 5415 case NEON::BI__builtin_neon_vpadds_f32: { 5416 llvm::Type *Ty = 5417 llvm::VectorType::get(FloatTy, 2); 5418 Value *Vec = EmitScalarExpr(E->getArg(0)); 5419 // The vector is v2f32, so make sure it's bitcast to that. 5420 Vec = Builder.CreateBitCast(Vec, Ty, "v2f32"); 5421 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 5422 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 5423 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 5424 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 5425 // Pairwise addition of a v2f32 into a scalar f32. 5426 return Builder.CreateFAdd(Op0, Op1, "vpaddd"); 5427 } 5428 case NEON::BI__builtin_neon_vceqzd_s64: 5429 case NEON::BI__builtin_neon_vceqzd_f64: 5430 case NEON::BI__builtin_neon_vceqzs_f32: 5431 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5432 return EmitAArch64CompareBuiltinExpr( 5433 Ops[0], ConvertType(E->getCallReturnType(getContext())), 5434 ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz"); 5435 case NEON::BI__builtin_neon_vcgezd_s64: 5436 case NEON::BI__builtin_neon_vcgezd_f64: 5437 case NEON::BI__builtin_neon_vcgezs_f32: 5438 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5439 return EmitAArch64CompareBuiltinExpr( 5440 Ops[0], ConvertType(E->getCallReturnType(getContext())), 5441 ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez"); 5442 case NEON::BI__builtin_neon_vclezd_s64: 5443 case NEON::BI__builtin_neon_vclezd_f64: 5444 case NEON::BI__builtin_neon_vclezs_f32: 5445 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5446 return EmitAArch64CompareBuiltinExpr( 5447 Ops[0], ConvertType(E->getCallReturnType(getContext())), 5448 ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez"); 5449 case NEON::BI__builtin_neon_vcgtzd_s64: 5450 case NEON::BI__builtin_neon_vcgtzd_f64: 5451 case NEON::BI__builtin_neon_vcgtzs_f32: 5452 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5453 return EmitAArch64CompareBuiltinExpr( 5454 Ops[0], ConvertType(E->getCallReturnType(getContext())), 5455 ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz"); 5456 case NEON::BI__builtin_neon_vcltzd_s64: 5457 case NEON::BI__builtin_neon_vcltzd_f64: 5458 case NEON::BI__builtin_neon_vcltzs_f32: 5459 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5460 return EmitAArch64CompareBuiltinExpr( 5461 Ops[0], ConvertType(E->getCallReturnType(getContext())), 5462 ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz"); 5463 5464 case NEON::BI__builtin_neon_vceqzd_u64: { 5465 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5466 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); 5467 Ops[0] = 5468 Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty)); 5469 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd"); 5470 } 5471 case NEON::BI__builtin_neon_vceqd_f64: 5472 case NEON::BI__builtin_neon_vcled_f64: 5473 case NEON::BI__builtin_neon_vcltd_f64: 5474 case NEON::BI__builtin_neon_vcged_f64: 5475 case NEON::BI__builtin_neon_vcgtd_f64: { 5476 llvm::CmpInst::Predicate P; 5477 switch (BuiltinID) { 5478 default: llvm_unreachable("missing builtin ID in switch!"); 5479 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break; 5480 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break; 5481 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break; 5482 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break; 5483 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break; 5484 } 5485 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5486 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 5487 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); 5488 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); 5489 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd"); 5490 } 5491 case NEON::BI__builtin_neon_vceqs_f32: 5492 case NEON::BI__builtin_neon_vcles_f32: 5493 case NEON::BI__builtin_neon_vclts_f32: 5494 case NEON::BI__builtin_neon_vcges_f32: 5495 case NEON::BI__builtin_neon_vcgts_f32: { 5496 llvm::CmpInst::Predicate P; 5497 switch (BuiltinID) { 5498 default: llvm_unreachable("missing builtin ID in switch!"); 5499 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break; 5500 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break; 5501 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break; 5502 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break; 5503 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break; 5504 } 5505 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5506 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy); 5507 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy); 5508 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); 5509 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd"); 5510 } 5511 case NEON::BI__builtin_neon_vceqd_s64: 5512 case NEON::BI__builtin_neon_vceqd_u64: 5513 case NEON::BI__builtin_neon_vcgtd_s64: 5514 case NEON::BI__builtin_neon_vcgtd_u64: 5515 case NEON::BI__builtin_neon_vcltd_s64: 5516 case NEON::BI__builtin_neon_vcltd_u64: 5517 case NEON::BI__builtin_neon_vcged_u64: 5518 case NEON::BI__builtin_neon_vcged_s64: 5519 case NEON::BI__builtin_neon_vcled_u64: 5520 case NEON::BI__builtin_neon_vcled_s64: { 5521 llvm::CmpInst::Predicate P; 5522 switch (BuiltinID) { 5523 default: llvm_unreachable("missing builtin ID in switch!"); 5524 case NEON::BI__builtin_neon_vceqd_s64: 5525 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break; 5526 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break; 5527 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break; 5528 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break; 5529 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break; 5530 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break; 5531 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break; 5532 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break; 5533 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break; 5534 } 5535 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5536 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); 5537 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 5538 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]); 5539 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd"); 5540 } 5541 case NEON::BI__builtin_neon_vtstd_s64: 5542 case NEON::BI__builtin_neon_vtstd_u64: { 5543 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5544 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); 5545 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 5546 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); 5547 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], 5548 llvm::Constant::getNullValue(Int64Ty)); 5549 return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd"); 5550 } 5551 case NEON::BI__builtin_neon_vset_lane_i8: 5552 case NEON::BI__builtin_neon_vset_lane_i16: 5553 case NEON::BI__builtin_neon_vset_lane_i32: 5554 case NEON::BI__builtin_neon_vset_lane_i64: 5555 case NEON::BI__builtin_neon_vset_lane_f32: 5556 case NEON::BI__builtin_neon_vsetq_lane_i8: 5557 case NEON::BI__builtin_neon_vsetq_lane_i16: 5558 case NEON::BI__builtin_neon_vsetq_lane_i32: 5559 case NEON::BI__builtin_neon_vsetq_lane_i64: 5560 case NEON::BI__builtin_neon_vsetq_lane_f32: 5561 Ops.push_back(EmitScalarExpr(E->getArg(2))); 5562 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 5563 case NEON::BI__builtin_neon_vset_lane_f64: 5564 // The vector type needs a cast for the v1f64 variant. 5565 Ops[1] = Builder.CreateBitCast(Ops[1], 5566 llvm::VectorType::get(DoubleTy, 1)); 5567 Ops.push_back(EmitScalarExpr(E->getArg(2))); 5568 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 5569 case NEON::BI__builtin_neon_vsetq_lane_f64: 5570 // The vector type needs a cast for the v2f64 variant. 5571 Ops[1] = Builder.CreateBitCast(Ops[1], 5572 llvm::VectorType::get(DoubleTy, 2)); 5573 Ops.push_back(EmitScalarExpr(E->getArg(2))); 5574 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 5575 5576 case NEON::BI__builtin_neon_vget_lane_i8: 5577 case NEON::BI__builtin_neon_vdupb_lane_i8: 5578 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8)); 5579 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5580 "vget_lane"); 5581 case NEON::BI__builtin_neon_vgetq_lane_i8: 5582 case NEON::BI__builtin_neon_vdupb_laneq_i8: 5583 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16)); 5584 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5585 "vgetq_lane"); 5586 case NEON::BI__builtin_neon_vget_lane_i16: 5587 case NEON::BI__builtin_neon_vduph_lane_i16: 5588 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4)); 5589 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5590 "vget_lane"); 5591 case NEON::BI__builtin_neon_vgetq_lane_i16: 5592 case NEON::BI__builtin_neon_vduph_laneq_i16: 5593 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8)); 5594 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5595 "vgetq_lane"); 5596 case NEON::BI__builtin_neon_vget_lane_i32: 5597 case NEON::BI__builtin_neon_vdups_lane_i32: 5598 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2)); 5599 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5600 "vget_lane"); 5601 case NEON::BI__builtin_neon_vdups_lane_f32: 5602 Ops[0] = Builder.CreateBitCast(Ops[0], 5603 llvm::VectorType::get(FloatTy, 2)); 5604 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5605 "vdups_lane"); 5606 case NEON::BI__builtin_neon_vgetq_lane_i32: 5607 case NEON::BI__builtin_neon_vdups_laneq_i32: 5608 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4)); 5609 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5610 "vgetq_lane"); 5611 case NEON::BI__builtin_neon_vget_lane_i64: 5612 case NEON::BI__builtin_neon_vdupd_lane_i64: 5613 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1)); 5614 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5615 "vget_lane"); 5616 case NEON::BI__builtin_neon_vdupd_lane_f64: 5617 Ops[0] = Builder.CreateBitCast(Ops[0], 5618 llvm::VectorType::get(DoubleTy, 1)); 5619 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5620 "vdupd_lane"); 5621 case NEON::BI__builtin_neon_vgetq_lane_i64: 5622 case NEON::BI__builtin_neon_vdupd_laneq_i64: 5623 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); 5624 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5625 "vgetq_lane"); 5626 case NEON::BI__builtin_neon_vget_lane_f32: 5627 Ops[0] = Builder.CreateBitCast(Ops[0], 5628 llvm::VectorType::get(FloatTy, 2)); 5629 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5630 "vget_lane"); 5631 case NEON::BI__builtin_neon_vget_lane_f64: 5632 Ops[0] = Builder.CreateBitCast(Ops[0], 5633 llvm::VectorType::get(DoubleTy, 1)); 5634 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5635 "vget_lane"); 5636 case NEON::BI__builtin_neon_vgetq_lane_f32: 5637 case NEON::BI__builtin_neon_vdups_laneq_f32: 5638 Ops[0] = Builder.CreateBitCast(Ops[0], 5639 llvm::VectorType::get(FloatTy, 4)); 5640 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5641 "vgetq_lane"); 5642 case NEON::BI__builtin_neon_vgetq_lane_f64: 5643 case NEON::BI__builtin_neon_vdupd_laneq_f64: 5644 Ops[0] = Builder.CreateBitCast(Ops[0], 5645 llvm::VectorType::get(DoubleTy, 2)); 5646 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5647 "vgetq_lane"); 5648 case NEON::BI__builtin_neon_vaddd_s64: 5649 case NEON::BI__builtin_neon_vaddd_u64: 5650 return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd"); 5651 case NEON::BI__builtin_neon_vsubd_s64: 5652 case NEON::BI__builtin_neon_vsubd_u64: 5653 return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd"); 5654 case NEON::BI__builtin_neon_vqdmlalh_s16: 5655 case NEON::BI__builtin_neon_vqdmlslh_s16: { 5656 SmallVector<Value *, 2> ProductOps; 5657 ProductOps.push_back(vectorWrapScalar16(Ops[1])); 5658 ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2)))); 5659 llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4); 5660 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy), 5661 ProductOps, "vqdmlXl"); 5662 Constant *CI = ConstantInt::get(SizeTy, 0); 5663 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0"); 5664 5665 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16 5666 ? Intrinsic::aarch64_neon_sqadd 5667 : Intrinsic::aarch64_neon_sqsub; 5668 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl"); 5669 } 5670 case NEON::BI__builtin_neon_vqshlud_n_s64: { 5671 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5672 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty); 5673 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty), 5674 Ops, "vqshlu_n"); 5675 } 5676 case NEON::BI__builtin_neon_vqshld_n_u64: 5677 case NEON::BI__builtin_neon_vqshld_n_s64: { 5678 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64 5679 ? Intrinsic::aarch64_neon_uqshl 5680 : Intrinsic::aarch64_neon_sqshl; 5681 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5682 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty); 5683 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n"); 5684 } 5685 case NEON::BI__builtin_neon_vrshrd_n_u64: 5686 case NEON::BI__builtin_neon_vrshrd_n_s64: { 5687 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64 5688 ? Intrinsic::aarch64_neon_urshl 5689 : Intrinsic::aarch64_neon_srshl; 5690 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5691 int SV = cast<ConstantInt>(Ops[1])->getSExtValue(); 5692 Ops[1] = ConstantInt::get(Int64Ty, -SV); 5693 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n"); 5694 } 5695 case NEON::BI__builtin_neon_vrsrad_n_u64: 5696 case NEON::BI__builtin_neon_vrsrad_n_s64: { 5697 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64 5698 ? Intrinsic::aarch64_neon_urshl 5699 : Intrinsic::aarch64_neon_srshl; 5700 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 5701 Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2)))); 5702 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty), 5703 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)}); 5704 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty)); 5705 } 5706 case NEON::BI__builtin_neon_vshld_n_s64: 5707 case NEON::BI__builtin_neon_vshld_n_u64: { 5708 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 5709 return Builder.CreateShl( 5710 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n"); 5711 } 5712 case NEON::BI__builtin_neon_vshrd_n_s64: { 5713 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 5714 return Builder.CreateAShr( 5715 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63), 5716 Amt->getZExtValue())), 5717 "shrd_n"); 5718 } 5719 case NEON::BI__builtin_neon_vshrd_n_u64: { 5720 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 5721 uint64_t ShiftAmt = Amt->getZExtValue(); 5722 // Right-shifting an unsigned value by its size yields 0. 5723 if (ShiftAmt == 64) 5724 return ConstantInt::get(Int64Ty, 0); 5725 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt), 5726 "shrd_n"); 5727 } 5728 case NEON::BI__builtin_neon_vsrad_n_s64: { 5729 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2))); 5730 Ops[1] = Builder.CreateAShr( 5731 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63), 5732 Amt->getZExtValue())), 5733 "shrd_n"); 5734 return Builder.CreateAdd(Ops[0], Ops[1]); 5735 } 5736 case NEON::BI__builtin_neon_vsrad_n_u64: { 5737 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2))); 5738 uint64_t ShiftAmt = Amt->getZExtValue(); 5739 // Right-shifting an unsigned value by its size yields 0. 5740 // As Op + 0 = Op, return Ops[0] directly. 5741 if (ShiftAmt == 64) 5742 return Ops[0]; 5743 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt), 5744 "shrd_n"); 5745 return Builder.CreateAdd(Ops[0], Ops[1]); 5746 } 5747 case NEON::BI__builtin_neon_vqdmlalh_lane_s16: 5748 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16: 5749 case NEON::BI__builtin_neon_vqdmlslh_lane_s16: 5750 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: { 5751 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)), 5752 "lane"); 5753 SmallVector<Value *, 2> ProductOps; 5754 ProductOps.push_back(vectorWrapScalar16(Ops[1])); 5755 ProductOps.push_back(vectorWrapScalar16(Ops[2])); 5756 llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4); 5757 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy), 5758 ProductOps, "vqdmlXl"); 5759 Constant *CI = ConstantInt::get(SizeTy, 0); 5760 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0"); 5761 Ops.pop_back(); 5762 5763 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 || 5764 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16) 5765 ? Intrinsic::aarch64_neon_sqadd 5766 : Intrinsic::aarch64_neon_sqsub; 5767 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl"); 5768 } 5769 case NEON::BI__builtin_neon_vqdmlals_s32: 5770 case NEON::BI__builtin_neon_vqdmlsls_s32: { 5771 SmallVector<Value *, 2> ProductOps; 5772 ProductOps.push_back(Ops[1]); 5773 ProductOps.push_back(EmitScalarExpr(E->getArg(2))); 5774 Ops[1] = 5775 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar), 5776 ProductOps, "vqdmlXl"); 5777 5778 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32 5779 ? Intrinsic::aarch64_neon_sqadd 5780 : Intrinsic::aarch64_neon_sqsub; 5781 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl"); 5782 } 5783 case NEON::BI__builtin_neon_vqdmlals_lane_s32: 5784 case NEON::BI__builtin_neon_vqdmlals_laneq_s32: 5785 case NEON::BI__builtin_neon_vqdmlsls_lane_s32: 5786 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: { 5787 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)), 5788 "lane"); 5789 SmallVector<Value *, 2> ProductOps; 5790 ProductOps.push_back(Ops[1]); 5791 ProductOps.push_back(Ops[2]); 5792 Ops[1] = 5793 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar), 5794 ProductOps, "vqdmlXl"); 5795 Ops.pop_back(); 5796 5797 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 || 5798 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32) 5799 ? Intrinsic::aarch64_neon_sqadd 5800 : Intrinsic::aarch64_neon_sqsub; 5801 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl"); 5802 } 5803 } 5804 5805 llvm::VectorType *VTy = GetNeonType(this, Type); 5806 llvm::Type *Ty = VTy; 5807 if (!Ty) 5808 return nullptr; 5809 5810 // Not all intrinsics handled by the common case work for AArch64 yet, so only 5811 // defer to common code if it's been added to our special map. 5812 Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID, 5813 AArch64SIMDIntrinsicsProvenSorted); 5814 5815 if (Builtin) 5816 return EmitCommonNeonBuiltinExpr( 5817 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic, 5818 Builtin->NameHint, Builtin->TypeModifier, E, Ops, 5819 /*never use addresses*/ Address::invalid(), Address::invalid()); 5820 5821 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops)) 5822 return V; 5823 5824 unsigned Int; 5825 switch (BuiltinID) { 5826 default: return nullptr; 5827 case NEON::BI__builtin_neon_vbsl_v: 5828 case NEON::BI__builtin_neon_vbslq_v: { 5829 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy); 5830 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl"); 5831 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl"); 5832 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl"); 5833 5834 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl"); 5835 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl"); 5836 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl"); 5837 return Builder.CreateBitCast(Ops[0], Ty); 5838 } 5839 case NEON::BI__builtin_neon_vfma_lane_v: 5840 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types 5841 // The ARM builtins (and instructions) have the addend as the first 5842 // operand, but the 'fma' intrinsics have it last. Swap it around here. 5843 Value *Addend = Ops[0]; 5844 Value *Multiplicand = Ops[1]; 5845 Value *LaneSource = Ops[2]; 5846 Ops[0] = Multiplicand; 5847 Ops[1] = LaneSource; 5848 Ops[2] = Addend; 5849 5850 // Now adjust things to handle the lane access. 5851 llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ? 5852 llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) : 5853 VTy; 5854 llvm::Constant *cst = cast<Constant>(Ops[3]); 5855 Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst); 5856 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy); 5857 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane"); 5858 5859 Ops.pop_back(); 5860 Int = Intrinsic::fma; 5861 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla"); 5862 } 5863 case NEON::BI__builtin_neon_vfma_laneq_v: { 5864 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 5865 // v1f64 fma should be mapped to Neon scalar f64 fma 5866 if (VTy && VTy->getElementType() == DoubleTy) { 5867 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 5868 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); 5869 llvm::Type *VTy = GetNeonType(this, 5870 NeonTypeFlags(NeonTypeFlags::Float64, false, true)); 5871 Ops[2] = Builder.CreateBitCast(Ops[2], VTy); 5872 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); 5873 Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy); 5874 Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); 5875 return Builder.CreateBitCast(Result, Ty); 5876 } 5877 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 5878 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5879 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5880 5881 llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(), 5882 VTy->getNumElements() * 2); 5883 Ops[2] = Builder.CreateBitCast(Ops[2], STy); 5884 Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), 5885 cast<ConstantInt>(Ops[3])); 5886 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane"); 5887 5888 return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]}); 5889 } 5890 case NEON::BI__builtin_neon_vfmaq_laneq_v: { 5891 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 5892 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5893 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5894 5895 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 5896 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3])); 5897 return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]}); 5898 } 5899 case NEON::BI__builtin_neon_vfmas_lane_f32: 5900 case NEON::BI__builtin_neon_vfmas_laneq_f32: 5901 case NEON::BI__builtin_neon_vfmad_lane_f64: 5902 case NEON::BI__builtin_neon_vfmad_laneq_f64: { 5903 Ops.push_back(EmitScalarExpr(E->getArg(3))); 5904 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext())); 5905 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 5906 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); 5907 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); 5908 } 5909 case NEON::BI__builtin_neon_vmull_v: 5910 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 5911 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull; 5912 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull; 5913 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull"); 5914 case NEON::BI__builtin_neon_vmax_v: 5915 case NEON::BI__builtin_neon_vmaxq_v: 5916 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 5917 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax; 5918 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax; 5919 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax"); 5920 case NEON::BI__builtin_neon_vmin_v: 5921 case NEON::BI__builtin_neon_vminq_v: 5922 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 5923 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin; 5924 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin; 5925 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin"); 5926 case NEON::BI__builtin_neon_vabd_v: 5927 case NEON::BI__builtin_neon_vabdq_v: 5928 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 5929 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd; 5930 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd; 5931 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd"); 5932 case NEON::BI__builtin_neon_vpadal_v: 5933 case NEON::BI__builtin_neon_vpadalq_v: { 5934 unsigned ArgElts = VTy->getNumElements(); 5935 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType()); 5936 unsigned BitWidth = EltTy->getBitWidth(); 5937 llvm::Type *ArgTy = llvm::VectorType::get( 5938 llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts); 5939 llvm::Type* Tys[2] = { VTy, ArgTy }; 5940 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp; 5941 SmallVector<llvm::Value*, 1> TmpOps; 5942 TmpOps.push_back(Ops[1]); 5943 Function *F = CGM.getIntrinsic(Int, Tys); 5944 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal"); 5945 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType()); 5946 return Builder.CreateAdd(tmp, addend); 5947 } 5948 case NEON::BI__builtin_neon_vpmin_v: 5949 case NEON::BI__builtin_neon_vpminq_v: 5950 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 5951 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp; 5952 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp; 5953 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin"); 5954 case NEON::BI__builtin_neon_vpmax_v: 5955 case NEON::BI__builtin_neon_vpmaxq_v: 5956 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 5957 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp; 5958 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp; 5959 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax"); 5960 case NEON::BI__builtin_neon_vminnm_v: 5961 case NEON::BI__builtin_neon_vminnmq_v: 5962 Int = Intrinsic::aarch64_neon_fminnm; 5963 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm"); 5964 case NEON::BI__builtin_neon_vmaxnm_v: 5965 case NEON::BI__builtin_neon_vmaxnmq_v: 5966 Int = Intrinsic::aarch64_neon_fmaxnm; 5967 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm"); 5968 case NEON::BI__builtin_neon_vrecpss_f32: { 5969 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5970 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy), 5971 Ops, "vrecps"); 5972 } 5973 case NEON::BI__builtin_neon_vrecpsd_f64: { 5974 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5975 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy), 5976 Ops, "vrecps"); 5977 } 5978 case NEON::BI__builtin_neon_vqshrun_n_v: 5979 Int = Intrinsic::aarch64_neon_sqshrun; 5980 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n"); 5981 case NEON::BI__builtin_neon_vqrshrun_n_v: 5982 Int = Intrinsic::aarch64_neon_sqrshrun; 5983 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n"); 5984 case NEON::BI__builtin_neon_vqshrn_n_v: 5985 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn; 5986 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n"); 5987 case NEON::BI__builtin_neon_vrshrn_n_v: 5988 Int = Intrinsic::aarch64_neon_rshrn; 5989 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n"); 5990 case NEON::BI__builtin_neon_vqrshrn_n_v: 5991 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn; 5992 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n"); 5993 case NEON::BI__builtin_neon_vrnda_v: 5994 case NEON::BI__builtin_neon_vrndaq_v: { 5995 Int = Intrinsic::round; 5996 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda"); 5997 } 5998 case NEON::BI__builtin_neon_vrndi_v: 5999 case NEON::BI__builtin_neon_vrndiq_v: { 6000 Int = Intrinsic::nearbyint; 6001 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi"); 6002 } 6003 case NEON::BI__builtin_neon_vrndm_v: 6004 case NEON::BI__builtin_neon_vrndmq_v: { 6005 Int = Intrinsic::floor; 6006 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm"); 6007 } 6008 case NEON::BI__builtin_neon_vrndn_v: 6009 case NEON::BI__builtin_neon_vrndnq_v: { 6010 Int = Intrinsic::aarch64_neon_frintn; 6011 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn"); 6012 } 6013 case NEON::BI__builtin_neon_vrndp_v: 6014 case NEON::BI__builtin_neon_vrndpq_v: { 6015 Int = Intrinsic::ceil; 6016 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp"); 6017 } 6018 case NEON::BI__builtin_neon_vrndx_v: 6019 case NEON::BI__builtin_neon_vrndxq_v: { 6020 Int = Intrinsic::rint; 6021 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx"); 6022 } 6023 case NEON::BI__builtin_neon_vrnd_v: 6024 case NEON::BI__builtin_neon_vrndq_v: { 6025 Int = Intrinsic::trunc; 6026 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz"); 6027 } 6028 case NEON::BI__builtin_neon_vceqz_v: 6029 case NEON::BI__builtin_neon_vceqzq_v: 6030 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ, 6031 ICmpInst::ICMP_EQ, "vceqz"); 6032 case NEON::BI__builtin_neon_vcgez_v: 6033 case NEON::BI__builtin_neon_vcgezq_v: 6034 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE, 6035 ICmpInst::ICMP_SGE, "vcgez"); 6036 case NEON::BI__builtin_neon_vclez_v: 6037 case NEON::BI__builtin_neon_vclezq_v: 6038 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE, 6039 ICmpInst::ICMP_SLE, "vclez"); 6040 case NEON::BI__builtin_neon_vcgtz_v: 6041 case NEON::BI__builtin_neon_vcgtzq_v: 6042 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT, 6043 ICmpInst::ICMP_SGT, "vcgtz"); 6044 case NEON::BI__builtin_neon_vcltz_v: 6045 case NEON::BI__builtin_neon_vcltzq_v: 6046 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT, 6047 ICmpInst::ICMP_SLT, "vcltz"); 6048 case NEON::BI__builtin_neon_vcvt_f64_v: 6049 case NEON::BI__builtin_neon_vcvtq_f64_v: 6050 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6051 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad)); 6052 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 6053 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 6054 case NEON::BI__builtin_neon_vcvt_f64_f32: { 6055 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad && 6056 "unexpected vcvt_f64_f32 builtin"); 6057 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false); 6058 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag)); 6059 6060 return Builder.CreateFPExt(Ops[0], Ty, "vcvt"); 6061 } 6062 case NEON::BI__builtin_neon_vcvt_f32_f64: { 6063 assert(Type.getEltType() == NeonTypeFlags::Float32 && 6064 "unexpected vcvt_f32_f64 builtin"); 6065 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true); 6066 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag)); 6067 6068 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt"); 6069 } 6070 case NEON::BI__builtin_neon_vcvt_s32_v: 6071 case NEON::BI__builtin_neon_vcvt_u32_v: 6072 case NEON::BI__builtin_neon_vcvt_s64_v: 6073 case NEON::BI__builtin_neon_vcvt_u64_v: 6074 case NEON::BI__builtin_neon_vcvtq_s32_v: 6075 case NEON::BI__builtin_neon_vcvtq_u32_v: 6076 case NEON::BI__builtin_neon_vcvtq_s64_v: 6077 case NEON::BI__builtin_neon_vcvtq_u64_v: { 6078 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type)); 6079 if (usgn) 6080 return Builder.CreateFPToUI(Ops[0], Ty); 6081 return Builder.CreateFPToSI(Ops[0], Ty); 6082 } 6083 case NEON::BI__builtin_neon_vcvta_s32_v: 6084 case NEON::BI__builtin_neon_vcvtaq_s32_v: 6085 case NEON::BI__builtin_neon_vcvta_u32_v: 6086 case NEON::BI__builtin_neon_vcvtaq_u32_v: 6087 case NEON::BI__builtin_neon_vcvta_s64_v: 6088 case NEON::BI__builtin_neon_vcvtaq_s64_v: 6089 case NEON::BI__builtin_neon_vcvta_u64_v: 6090 case NEON::BI__builtin_neon_vcvtaq_u64_v: { 6091 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas; 6092 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 6093 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta"); 6094 } 6095 case NEON::BI__builtin_neon_vcvtm_s32_v: 6096 case NEON::BI__builtin_neon_vcvtmq_s32_v: 6097 case NEON::BI__builtin_neon_vcvtm_u32_v: 6098 case NEON::BI__builtin_neon_vcvtmq_u32_v: 6099 case NEON::BI__builtin_neon_vcvtm_s64_v: 6100 case NEON::BI__builtin_neon_vcvtmq_s64_v: 6101 case NEON::BI__builtin_neon_vcvtm_u64_v: 6102 case NEON::BI__builtin_neon_vcvtmq_u64_v: { 6103 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms; 6104 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 6105 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm"); 6106 } 6107 case NEON::BI__builtin_neon_vcvtn_s32_v: 6108 case NEON::BI__builtin_neon_vcvtnq_s32_v: 6109 case NEON::BI__builtin_neon_vcvtn_u32_v: 6110 case NEON::BI__builtin_neon_vcvtnq_u32_v: 6111 case NEON::BI__builtin_neon_vcvtn_s64_v: 6112 case NEON::BI__builtin_neon_vcvtnq_s64_v: 6113 case NEON::BI__builtin_neon_vcvtn_u64_v: 6114 case NEON::BI__builtin_neon_vcvtnq_u64_v: { 6115 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns; 6116 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 6117 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn"); 6118 } 6119 case NEON::BI__builtin_neon_vcvtp_s32_v: 6120 case NEON::BI__builtin_neon_vcvtpq_s32_v: 6121 case NEON::BI__builtin_neon_vcvtp_u32_v: 6122 case NEON::BI__builtin_neon_vcvtpq_u32_v: 6123 case NEON::BI__builtin_neon_vcvtp_s64_v: 6124 case NEON::BI__builtin_neon_vcvtpq_s64_v: 6125 case NEON::BI__builtin_neon_vcvtp_u64_v: 6126 case NEON::BI__builtin_neon_vcvtpq_u64_v: { 6127 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps; 6128 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 6129 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp"); 6130 } 6131 case NEON::BI__builtin_neon_vmulx_v: 6132 case NEON::BI__builtin_neon_vmulxq_v: { 6133 Int = Intrinsic::aarch64_neon_fmulx; 6134 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx"); 6135 } 6136 case NEON::BI__builtin_neon_vmul_lane_v: 6137 case NEON::BI__builtin_neon_vmul_laneq_v: { 6138 // v1f64 vmul_lane should be mapped to Neon scalar mul lane 6139 bool Quad = false; 6140 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v) 6141 Quad = true; 6142 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 6143 llvm::Type *VTy = GetNeonType(this, 6144 NeonTypeFlags(NeonTypeFlags::Float64, false, Quad)); 6145 Ops[1] = Builder.CreateBitCast(Ops[1], VTy); 6146 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract"); 6147 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]); 6148 return Builder.CreateBitCast(Result, Ty); 6149 } 6150 case NEON::BI__builtin_neon_vnegd_s64: 6151 return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd"); 6152 case NEON::BI__builtin_neon_vpmaxnm_v: 6153 case NEON::BI__builtin_neon_vpmaxnmq_v: { 6154 Int = Intrinsic::aarch64_neon_fmaxnmp; 6155 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm"); 6156 } 6157 case NEON::BI__builtin_neon_vpminnm_v: 6158 case NEON::BI__builtin_neon_vpminnmq_v: { 6159 Int = Intrinsic::aarch64_neon_fminnmp; 6160 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm"); 6161 } 6162 case NEON::BI__builtin_neon_vsqrt_v: 6163 case NEON::BI__builtin_neon_vsqrtq_v: { 6164 Int = Intrinsic::sqrt; 6165 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6166 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt"); 6167 } 6168 case NEON::BI__builtin_neon_vrbit_v: 6169 case NEON::BI__builtin_neon_vrbitq_v: { 6170 Int = Intrinsic::aarch64_neon_rbit; 6171 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit"); 6172 } 6173 case NEON::BI__builtin_neon_vaddv_u8: 6174 // FIXME: These are handled by the AArch64 scalar code. 6175 usgn = true; 6176 // FALLTHROUGH 6177 case NEON::BI__builtin_neon_vaddv_s8: { 6178 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 6179 Ty = Int32Ty; 6180 VTy = llvm::VectorType::get(Int8Ty, 8); 6181 llvm::Type *Tys[2] = { Ty, VTy }; 6182 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6183 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 6184 return Builder.CreateTrunc(Ops[0], Int8Ty); 6185 } 6186 case NEON::BI__builtin_neon_vaddv_u16: 6187 usgn = true; 6188 // FALLTHROUGH 6189 case NEON::BI__builtin_neon_vaddv_s16: { 6190 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 6191 Ty = Int32Ty; 6192 VTy = llvm::VectorType::get(Int16Ty, 4); 6193 llvm::Type *Tys[2] = { Ty, VTy }; 6194 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6195 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 6196 return Builder.CreateTrunc(Ops[0], Int16Ty); 6197 } 6198 case NEON::BI__builtin_neon_vaddvq_u8: 6199 usgn = true; 6200 // FALLTHROUGH 6201 case NEON::BI__builtin_neon_vaddvq_s8: { 6202 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 6203 Ty = Int32Ty; 6204 VTy = llvm::VectorType::get(Int8Ty, 16); 6205 llvm::Type *Tys[2] = { Ty, VTy }; 6206 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6207 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 6208 return Builder.CreateTrunc(Ops[0], Int8Ty); 6209 } 6210 case NEON::BI__builtin_neon_vaddvq_u16: 6211 usgn = true; 6212 // FALLTHROUGH 6213 case NEON::BI__builtin_neon_vaddvq_s16: { 6214 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 6215 Ty = Int32Ty; 6216 VTy = llvm::VectorType::get(Int16Ty, 8); 6217 llvm::Type *Tys[2] = { Ty, VTy }; 6218 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6219 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 6220 return Builder.CreateTrunc(Ops[0], Int16Ty); 6221 } 6222 case NEON::BI__builtin_neon_vmaxv_u8: { 6223 Int = Intrinsic::aarch64_neon_umaxv; 6224 Ty = Int32Ty; 6225 VTy = llvm::VectorType::get(Int8Ty, 8); 6226 llvm::Type *Tys[2] = { Ty, VTy }; 6227 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6228 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6229 return Builder.CreateTrunc(Ops[0], Int8Ty); 6230 } 6231 case NEON::BI__builtin_neon_vmaxv_u16: { 6232 Int = Intrinsic::aarch64_neon_umaxv; 6233 Ty = Int32Ty; 6234 VTy = llvm::VectorType::get(Int16Ty, 4); 6235 llvm::Type *Tys[2] = { Ty, VTy }; 6236 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6237 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6238 return Builder.CreateTrunc(Ops[0], Int16Ty); 6239 } 6240 case NEON::BI__builtin_neon_vmaxvq_u8: { 6241 Int = Intrinsic::aarch64_neon_umaxv; 6242 Ty = Int32Ty; 6243 VTy = llvm::VectorType::get(Int8Ty, 16); 6244 llvm::Type *Tys[2] = { Ty, VTy }; 6245 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6246 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6247 return Builder.CreateTrunc(Ops[0], Int8Ty); 6248 } 6249 case NEON::BI__builtin_neon_vmaxvq_u16: { 6250 Int = Intrinsic::aarch64_neon_umaxv; 6251 Ty = Int32Ty; 6252 VTy = llvm::VectorType::get(Int16Ty, 8); 6253 llvm::Type *Tys[2] = { Ty, VTy }; 6254 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6255 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6256 return Builder.CreateTrunc(Ops[0], Int16Ty); 6257 } 6258 case NEON::BI__builtin_neon_vmaxv_s8: { 6259 Int = Intrinsic::aarch64_neon_smaxv; 6260 Ty = Int32Ty; 6261 VTy = llvm::VectorType::get(Int8Ty, 8); 6262 llvm::Type *Tys[2] = { Ty, VTy }; 6263 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6264 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6265 return Builder.CreateTrunc(Ops[0], Int8Ty); 6266 } 6267 case NEON::BI__builtin_neon_vmaxv_s16: { 6268 Int = Intrinsic::aarch64_neon_smaxv; 6269 Ty = Int32Ty; 6270 VTy = llvm::VectorType::get(Int16Ty, 4); 6271 llvm::Type *Tys[2] = { Ty, VTy }; 6272 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6273 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6274 return Builder.CreateTrunc(Ops[0], Int16Ty); 6275 } 6276 case NEON::BI__builtin_neon_vmaxvq_s8: { 6277 Int = Intrinsic::aarch64_neon_smaxv; 6278 Ty = Int32Ty; 6279 VTy = llvm::VectorType::get(Int8Ty, 16); 6280 llvm::Type *Tys[2] = { Ty, VTy }; 6281 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6282 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6283 return Builder.CreateTrunc(Ops[0], Int8Ty); 6284 } 6285 case NEON::BI__builtin_neon_vmaxvq_s16: { 6286 Int = Intrinsic::aarch64_neon_smaxv; 6287 Ty = Int32Ty; 6288 VTy = llvm::VectorType::get(Int16Ty, 8); 6289 llvm::Type *Tys[2] = { Ty, VTy }; 6290 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6291 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6292 return Builder.CreateTrunc(Ops[0], Int16Ty); 6293 } 6294 case NEON::BI__builtin_neon_vminv_u8: { 6295 Int = Intrinsic::aarch64_neon_uminv; 6296 Ty = Int32Ty; 6297 VTy = llvm::VectorType::get(Int8Ty, 8); 6298 llvm::Type *Tys[2] = { Ty, VTy }; 6299 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6300 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6301 return Builder.CreateTrunc(Ops[0], Int8Ty); 6302 } 6303 case NEON::BI__builtin_neon_vminv_u16: { 6304 Int = Intrinsic::aarch64_neon_uminv; 6305 Ty = Int32Ty; 6306 VTy = llvm::VectorType::get(Int16Ty, 4); 6307 llvm::Type *Tys[2] = { Ty, VTy }; 6308 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6309 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6310 return Builder.CreateTrunc(Ops[0], Int16Ty); 6311 } 6312 case NEON::BI__builtin_neon_vminvq_u8: { 6313 Int = Intrinsic::aarch64_neon_uminv; 6314 Ty = Int32Ty; 6315 VTy = llvm::VectorType::get(Int8Ty, 16); 6316 llvm::Type *Tys[2] = { Ty, VTy }; 6317 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6318 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6319 return Builder.CreateTrunc(Ops[0], Int8Ty); 6320 } 6321 case NEON::BI__builtin_neon_vminvq_u16: { 6322 Int = Intrinsic::aarch64_neon_uminv; 6323 Ty = Int32Ty; 6324 VTy = llvm::VectorType::get(Int16Ty, 8); 6325 llvm::Type *Tys[2] = { Ty, VTy }; 6326 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6327 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6328 return Builder.CreateTrunc(Ops[0], Int16Ty); 6329 } 6330 case NEON::BI__builtin_neon_vminv_s8: { 6331 Int = Intrinsic::aarch64_neon_sminv; 6332 Ty = Int32Ty; 6333 VTy = llvm::VectorType::get(Int8Ty, 8); 6334 llvm::Type *Tys[2] = { Ty, VTy }; 6335 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6336 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6337 return Builder.CreateTrunc(Ops[0], Int8Ty); 6338 } 6339 case NEON::BI__builtin_neon_vminv_s16: { 6340 Int = Intrinsic::aarch64_neon_sminv; 6341 Ty = Int32Ty; 6342 VTy = llvm::VectorType::get(Int16Ty, 4); 6343 llvm::Type *Tys[2] = { Ty, VTy }; 6344 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6345 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6346 return Builder.CreateTrunc(Ops[0], Int16Ty); 6347 } 6348 case NEON::BI__builtin_neon_vminvq_s8: { 6349 Int = Intrinsic::aarch64_neon_sminv; 6350 Ty = Int32Ty; 6351 VTy = llvm::VectorType::get(Int8Ty, 16); 6352 llvm::Type *Tys[2] = { Ty, VTy }; 6353 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6354 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6355 return Builder.CreateTrunc(Ops[0], Int8Ty); 6356 } 6357 case NEON::BI__builtin_neon_vminvq_s16: { 6358 Int = Intrinsic::aarch64_neon_sminv; 6359 Ty = Int32Ty; 6360 VTy = llvm::VectorType::get(Int16Ty, 8); 6361 llvm::Type *Tys[2] = { Ty, VTy }; 6362 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6363 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6364 return Builder.CreateTrunc(Ops[0], Int16Ty); 6365 } 6366 case NEON::BI__builtin_neon_vmul_n_f64: { 6367 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 6368 Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy); 6369 return Builder.CreateFMul(Ops[0], RHS); 6370 } 6371 case NEON::BI__builtin_neon_vaddlv_u8: { 6372 Int = Intrinsic::aarch64_neon_uaddlv; 6373 Ty = Int32Ty; 6374 VTy = llvm::VectorType::get(Int8Ty, 8); 6375 llvm::Type *Tys[2] = { Ty, VTy }; 6376 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6377 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6378 return Builder.CreateTrunc(Ops[0], Int16Ty); 6379 } 6380 case NEON::BI__builtin_neon_vaddlv_u16: { 6381 Int = Intrinsic::aarch64_neon_uaddlv; 6382 Ty = Int32Ty; 6383 VTy = llvm::VectorType::get(Int16Ty, 4); 6384 llvm::Type *Tys[2] = { Ty, VTy }; 6385 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6386 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6387 } 6388 case NEON::BI__builtin_neon_vaddlvq_u8: { 6389 Int = Intrinsic::aarch64_neon_uaddlv; 6390 Ty = Int32Ty; 6391 VTy = llvm::VectorType::get(Int8Ty, 16); 6392 llvm::Type *Tys[2] = { Ty, VTy }; 6393 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6394 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6395 return Builder.CreateTrunc(Ops[0], Int16Ty); 6396 } 6397 case NEON::BI__builtin_neon_vaddlvq_u16: { 6398 Int = Intrinsic::aarch64_neon_uaddlv; 6399 Ty = Int32Ty; 6400 VTy = llvm::VectorType::get(Int16Ty, 8); 6401 llvm::Type *Tys[2] = { Ty, VTy }; 6402 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6403 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6404 } 6405 case NEON::BI__builtin_neon_vaddlv_s8: { 6406 Int = Intrinsic::aarch64_neon_saddlv; 6407 Ty = Int32Ty; 6408 VTy = llvm::VectorType::get(Int8Ty, 8); 6409 llvm::Type *Tys[2] = { Ty, VTy }; 6410 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6411 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6412 return Builder.CreateTrunc(Ops[0], Int16Ty); 6413 } 6414 case NEON::BI__builtin_neon_vaddlv_s16: { 6415 Int = Intrinsic::aarch64_neon_saddlv; 6416 Ty = Int32Ty; 6417 VTy = llvm::VectorType::get(Int16Ty, 4); 6418 llvm::Type *Tys[2] = { Ty, VTy }; 6419 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6420 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6421 } 6422 case NEON::BI__builtin_neon_vaddlvq_s8: { 6423 Int = Intrinsic::aarch64_neon_saddlv; 6424 Ty = Int32Ty; 6425 VTy = llvm::VectorType::get(Int8Ty, 16); 6426 llvm::Type *Tys[2] = { Ty, VTy }; 6427 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6428 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6429 return Builder.CreateTrunc(Ops[0], Int16Ty); 6430 } 6431 case NEON::BI__builtin_neon_vaddlvq_s16: { 6432 Int = Intrinsic::aarch64_neon_saddlv; 6433 Ty = Int32Ty; 6434 VTy = llvm::VectorType::get(Int16Ty, 8); 6435 llvm::Type *Tys[2] = { Ty, VTy }; 6436 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6437 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6438 } 6439 case NEON::BI__builtin_neon_vsri_n_v: 6440 case NEON::BI__builtin_neon_vsriq_n_v: { 6441 Int = Intrinsic::aarch64_neon_vsri; 6442 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty); 6443 return EmitNeonCall(Intrin, Ops, "vsri_n"); 6444 } 6445 case NEON::BI__builtin_neon_vsli_n_v: 6446 case NEON::BI__builtin_neon_vsliq_n_v: { 6447 Int = Intrinsic::aarch64_neon_vsli; 6448 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty); 6449 return EmitNeonCall(Intrin, Ops, "vsli_n"); 6450 } 6451 case NEON::BI__builtin_neon_vsra_n_v: 6452 case NEON::BI__builtin_neon_vsraq_n_v: 6453 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6454 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n"); 6455 return Builder.CreateAdd(Ops[0], Ops[1]); 6456 case NEON::BI__builtin_neon_vrsra_n_v: 6457 case NEON::BI__builtin_neon_vrsraq_n_v: { 6458 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl; 6459 SmallVector<llvm::Value*,2> TmpOps; 6460 TmpOps.push_back(Ops[1]); 6461 TmpOps.push_back(Ops[2]); 6462 Function* F = CGM.getIntrinsic(Int, Ty); 6463 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true); 6464 Ops[0] = Builder.CreateBitCast(Ops[0], VTy); 6465 return Builder.CreateAdd(Ops[0], tmp); 6466 } 6467 // FIXME: Sharing loads & stores with 32-bit is complicated by the absence 6468 // of an Align parameter here. 6469 case NEON::BI__builtin_neon_vld1_x2_v: 6470 case NEON::BI__builtin_neon_vld1q_x2_v: 6471 case NEON::BI__builtin_neon_vld1_x3_v: 6472 case NEON::BI__builtin_neon_vld1q_x3_v: 6473 case NEON::BI__builtin_neon_vld1_x4_v: 6474 case NEON::BI__builtin_neon_vld1q_x4_v: { 6475 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType()); 6476 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6477 llvm::Type *Tys[2] = { VTy, PTy }; 6478 unsigned Int; 6479 switch (BuiltinID) { 6480 case NEON::BI__builtin_neon_vld1_x2_v: 6481 case NEON::BI__builtin_neon_vld1q_x2_v: 6482 Int = Intrinsic::aarch64_neon_ld1x2; 6483 break; 6484 case NEON::BI__builtin_neon_vld1_x3_v: 6485 case NEON::BI__builtin_neon_vld1q_x3_v: 6486 Int = Intrinsic::aarch64_neon_ld1x3; 6487 break; 6488 case NEON::BI__builtin_neon_vld1_x4_v: 6489 case NEON::BI__builtin_neon_vld1q_x4_v: 6490 Int = Intrinsic::aarch64_neon_ld1x4; 6491 break; 6492 } 6493 Function *F = CGM.getIntrinsic(Int, Tys); 6494 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN"); 6495 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 6496 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6497 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6498 } 6499 case NEON::BI__builtin_neon_vst1_x2_v: 6500 case NEON::BI__builtin_neon_vst1q_x2_v: 6501 case NEON::BI__builtin_neon_vst1_x3_v: 6502 case NEON::BI__builtin_neon_vst1q_x3_v: 6503 case NEON::BI__builtin_neon_vst1_x4_v: 6504 case NEON::BI__builtin_neon_vst1q_x4_v: { 6505 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType()); 6506 llvm::Type *Tys[2] = { VTy, PTy }; 6507 unsigned Int; 6508 switch (BuiltinID) { 6509 case NEON::BI__builtin_neon_vst1_x2_v: 6510 case NEON::BI__builtin_neon_vst1q_x2_v: 6511 Int = Intrinsic::aarch64_neon_st1x2; 6512 break; 6513 case NEON::BI__builtin_neon_vst1_x3_v: 6514 case NEON::BI__builtin_neon_vst1q_x3_v: 6515 Int = Intrinsic::aarch64_neon_st1x3; 6516 break; 6517 case NEON::BI__builtin_neon_vst1_x4_v: 6518 case NEON::BI__builtin_neon_vst1q_x4_v: 6519 Int = Intrinsic::aarch64_neon_st1x4; 6520 break; 6521 } 6522 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end()); 6523 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, ""); 6524 } 6525 case NEON::BI__builtin_neon_vld1_v: 6526 case NEON::BI__builtin_neon_vld1q_v: 6527 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); 6528 return Builder.CreateDefaultAlignedLoad(Ops[0]); 6529 case NEON::BI__builtin_neon_vst1_v: 6530 case NEON::BI__builtin_neon_vst1q_v: 6531 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); 6532 Ops[1] = Builder.CreateBitCast(Ops[1], VTy); 6533 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6534 case NEON::BI__builtin_neon_vld1_lane_v: 6535 case NEON::BI__builtin_neon_vld1q_lane_v: 6536 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6537 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 6538 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6539 Ops[0] = Builder.CreateDefaultAlignedLoad(Ops[0]); 6540 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane"); 6541 case NEON::BI__builtin_neon_vld1_dup_v: 6542 case NEON::BI__builtin_neon_vld1q_dup_v: { 6543 Value *V = UndefValue::get(Ty); 6544 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 6545 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6546 Ops[0] = Builder.CreateDefaultAlignedLoad(Ops[0]); 6547 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); 6548 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI); 6549 return EmitNeonSplat(Ops[0], CI); 6550 } 6551 case NEON::BI__builtin_neon_vst1_lane_v: 6552 case NEON::BI__builtin_neon_vst1q_lane_v: 6553 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6554 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); 6555 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 6556 return Builder.CreateDefaultAlignedStore(Ops[1], 6557 Builder.CreateBitCast(Ops[0], Ty)); 6558 case NEON::BI__builtin_neon_vld2_v: 6559 case NEON::BI__builtin_neon_vld2q_v: { 6560 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 6561 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6562 llvm::Type *Tys[2] = { VTy, PTy }; 6563 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys); 6564 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); 6565 Ops[0] = Builder.CreateBitCast(Ops[0], 6566 llvm::PointerType::getUnqual(Ops[1]->getType())); 6567 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6568 } 6569 case NEON::BI__builtin_neon_vld3_v: 6570 case NEON::BI__builtin_neon_vld3q_v: { 6571 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 6572 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6573 llvm::Type *Tys[2] = { VTy, PTy }; 6574 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys); 6575 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); 6576 Ops[0] = Builder.CreateBitCast(Ops[0], 6577 llvm::PointerType::getUnqual(Ops[1]->getType())); 6578 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6579 } 6580 case NEON::BI__builtin_neon_vld4_v: 6581 case NEON::BI__builtin_neon_vld4q_v: { 6582 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 6583 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6584 llvm::Type *Tys[2] = { VTy, PTy }; 6585 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys); 6586 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); 6587 Ops[0] = Builder.CreateBitCast(Ops[0], 6588 llvm::PointerType::getUnqual(Ops[1]->getType())); 6589 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6590 } 6591 case NEON::BI__builtin_neon_vld2_dup_v: 6592 case NEON::BI__builtin_neon_vld2q_dup_v: { 6593 llvm::Type *PTy = 6594 llvm::PointerType::getUnqual(VTy->getElementType()); 6595 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6596 llvm::Type *Tys[2] = { VTy, PTy }; 6597 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys); 6598 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); 6599 Ops[0] = Builder.CreateBitCast(Ops[0], 6600 llvm::PointerType::getUnqual(Ops[1]->getType())); 6601 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6602 } 6603 case NEON::BI__builtin_neon_vld3_dup_v: 6604 case NEON::BI__builtin_neon_vld3q_dup_v: { 6605 llvm::Type *PTy = 6606 llvm::PointerType::getUnqual(VTy->getElementType()); 6607 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6608 llvm::Type *Tys[2] = { VTy, PTy }; 6609 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys); 6610 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); 6611 Ops[0] = Builder.CreateBitCast(Ops[0], 6612 llvm::PointerType::getUnqual(Ops[1]->getType())); 6613 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6614 } 6615 case NEON::BI__builtin_neon_vld4_dup_v: 6616 case NEON::BI__builtin_neon_vld4q_dup_v: { 6617 llvm::Type *PTy = 6618 llvm::PointerType::getUnqual(VTy->getElementType()); 6619 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6620 llvm::Type *Tys[2] = { VTy, PTy }; 6621 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys); 6622 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); 6623 Ops[0] = Builder.CreateBitCast(Ops[0], 6624 llvm::PointerType::getUnqual(Ops[1]->getType())); 6625 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6626 } 6627 case NEON::BI__builtin_neon_vld2_lane_v: 6628 case NEON::BI__builtin_neon_vld2q_lane_v: { 6629 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 6630 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys); 6631 Ops.push_back(Ops[1]); 6632 Ops.erase(Ops.begin()+1); 6633 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6634 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 6635 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty); 6636 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane"); 6637 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 6638 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6639 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6640 } 6641 case NEON::BI__builtin_neon_vld3_lane_v: 6642 case NEON::BI__builtin_neon_vld3q_lane_v: { 6643 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 6644 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys); 6645 Ops.push_back(Ops[1]); 6646 Ops.erase(Ops.begin()+1); 6647 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6648 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 6649 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 6650 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty); 6651 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane"); 6652 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 6653 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6654 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6655 } 6656 case NEON::BI__builtin_neon_vld4_lane_v: 6657 case NEON::BI__builtin_neon_vld4q_lane_v: { 6658 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 6659 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys); 6660 Ops.push_back(Ops[1]); 6661 Ops.erase(Ops.begin()+1); 6662 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6663 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 6664 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 6665 Ops[4] = Builder.CreateBitCast(Ops[4], Ty); 6666 Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty); 6667 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane"); 6668 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 6669 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6670 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6671 } 6672 case NEON::BI__builtin_neon_vst2_v: 6673 case NEON::BI__builtin_neon_vst2q_v: { 6674 Ops.push_back(Ops[0]); 6675 Ops.erase(Ops.begin()); 6676 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() }; 6677 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys), 6678 Ops, ""); 6679 } 6680 case NEON::BI__builtin_neon_vst2_lane_v: 6681 case NEON::BI__builtin_neon_vst2q_lane_v: { 6682 Ops.push_back(Ops[0]); 6683 Ops.erase(Ops.begin()); 6684 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty); 6685 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() }; 6686 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys), 6687 Ops, ""); 6688 } 6689 case NEON::BI__builtin_neon_vst3_v: 6690 case NEON::BI__builtin_neon_vst3q_v: { 6691 Ops.push_back(Ops[0]); 6692 Ops.erase(Ops.begin()); 6693 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() }; 6694 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys), 6695 Ops, ""); 6696 } 6697 case NEON::BI__builtin_neon_vst3_lane_v: 6698 case NEON::BI__builtin_neon_vst3q_lane_v: { 6699 Ops.push_back(Ops[0]); 6700 Ops.erase(Ops.begin()); 6701 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty); 6702 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() }; 6703 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys), 6704 Ops, ""); 6705 } 6706 case NEON::BI__builtin_neon_vst4_v: 6707 case NEON::BI__builtin_neon_vst4q_v: { 6708 Ops.push_back(Ops[0]); 6709 Ops.erase(Ops.begin()); 6710 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() }; 6711 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys), 6712 Ops, ""); 6713 } 6714 case NEON::BI__builtin_neon_vst4_lane_v: 6715 case NEON::BI__builtin_neon_vst4q_lane_v: { 6716 Ops.push_back(Ops[0]); 6717 Ops.erase(Ops.begin()); 6718 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty); 6719 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() }; 6720 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys), 6721 Ops, ""); 6722 } 6723 case NEON::BI__builtin_neon_vtrn_v: 6724 case NEON::BI__builtin_neon_vtrnq_v: { 6725 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 6726 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6727 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 6728 Value *SV = nullptr; 6729 6730 for (unsigned vi = 0; vi != 2; ++vi) { 6731 SmallVector<uint32_t, 16> Indices; 6732 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 6733 Indices.push_back(i+vi); 6734 Indices.push_back(i+e+vi); 6735 } 6736 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 6737 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn"); 6738 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 6739 } 6740 return SV; 6741 } 6742 case NEON::BI__builtin_neon_vuzp_v: 6743 case NEON::BI__builtin_neon_vuzpq_v: { 6744 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 6745 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6746 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 6747 Value *SV = nullptr; 6748 6749 for (unsigned vi = 0; vi != 2; ++vi) { 6750 SmallVector<uint32_t, 16> Indices; 6751 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 6752 Indices.push_back(2*i+vi); 6753 6754 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 6755 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp"); 6756 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 6757 } 6758 return SV; 6759 } 6760 case NEON::BI__builtin_neon_vzip_v: 6761 case NEON::BI__builtin_neon_vzipq_v: { 6762 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 6763 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6764 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 6765 Value *SV = nullptr; 6766 6767 for (unsigned vi = 0; vi != 2; ++vi) { 6768 SmallVector<uint32_t, 16> Indices; 6769 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 6770 Indices.push_back((i + vi*e) >> 1); 6771 Indices.push_back(((i + vi*e) >> 1)+e); 6772 } 6773 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 6774 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip"); 6775 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 6776 } 6777 return SV; 6778 } 6779 case NEON::BI__builtin_neon_vqtbl1q_v: { 6780 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty), 6781 Ops, "vtbl1"); 6782 } 6783 case NEON::BI__builtin_neon_vqtbl2q_v: { 6784 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty), 6785 Ops, "vtbl2"); 6786 } 6787 case NEON::BI__builtin_neon_vqtbl3q_v: { 6788 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty), 6789 Ops, "vtbl3"); 6790 } 6791 case NEON::BI__builtin_neon_vqtbl4q_v: { 6792 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty), 6793 Ops, "vtbl4"); 6794 } 6795 case NEON::BI__builtin_neon_vqtbx1q_v: { 6796 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty), 6797 Ops, "vtbx1"); 6798 } 6799 case NEON::BI__builtin_neon_vqtbx2q_v: { 6800 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty), 6801 Ops, "vtbx2"); 6802 } 6803 case NEON::BI__builtin_neon_vqtbx3q_v: { 6804 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty), 6805 Ops, "vtbx3"); 6806 } 6807 case NEON::BI__builtin_neon_vqtbx4q_v: { 6808 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty), 6809 Ops, "vtbx4"); 6810 } 6811 case NEON::BI__builtin_neon_vsqadd_v: 6812 case NEON::BI__builtin_neon_vsqaddq_v: { 6813 Int = Intrinsic::aarch64_neon_usqadd; 6814 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd"); 6815 } 6816 case NEON::BI__builtin_neon_vuqadd_v: 6817 case NEON::BI__builtin_neon_vuqaddq_v: { 6818 Int = Intrinsic::aarch64_neon_suqadd; 6819 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd"); 6820 } 6821 } 6822 } 6823 6824 llvm::Value *CodeGenFunction:: 6825 BuildVector(ArrayRef<llvm::Value*> Ops) { 6826 assert((Ops.size() & (Ops.size() - 1)) == 0 && 6827 "Not a power-of-two sized vector!"); 6828 bool AllConstants = true; 6829 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i) 6830 AllConstants &= isa<Constant>(Ops[i]); 6831 6832 // If this is a constant vector, create a ConstantVector. 6833 if (AllConstants) { 6834 SmallVector<llvm::Constant*, 16> CstOps; 6835 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 6836 CstOps.push_back(cast<Constant>(Ops[i])); 6837 return llvm::ConstantVector::get(CstOps); 6838 } 6839 6840 // Otherwise, insertelement the values to build the vector. 6841 Value *Result = 6842 llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size())); 6843 6844 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 6845 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i)); 6846 6847 return Result; 6848 } 6849 6850 // Convert the mask from an integer type to a vector of i1. 6851 static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask, 6852 unsigned NumElts) { 6853 6854 llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(), 6855 cast<IntegerType>(Mask->getType())->getBitWidth()); 6856 Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy); 6857 6858 // If we have less than 8 elements, then the starting mask was an i8 and 6859 // we need to extract down to the right number of elements. 6860 if (NumElts < 8) { 6861 uint32_t Indices[4]; 6862 for (unsigned i = 0; i != NumElts; ++i) 6863 Indices[i] = i; 6864 MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec, 6865 makeArrayRef(Indices, NumElts), 6866 "extract"); 6867 } 6868 return MaskVec; 6869 } 6870 6871 static Value *EmitX86MaskedStore(CodeGenFunction &CGF, 6872 SmallVectorImpl<Value *> &Ops, 6873 unsigned Align) { 6874 // Cast the pointer to right type. 6875 Ops[0] = CGF.Builder.CreateBitCast(Ops[0], 6876 llvm::PointerType::getUnqual(Ops[1]->getType())); 6877 6878 // If the mask is all ones just emit a regular store. 6879 if (const auto *C = dyn_cast<Constant>(Ops[2])) 6880 if (C->isAllOnesValue()) 6881 return CGF.Builder.CreateAlignedStore(Ops[1], Ops[0], Align); 6882 6883 Value *MaskVec = getMaskVecValue(CGF, Ops[2], 6884 Ops[1]->getType()->getVectorNumElements()); 6885 6886 return CGF.Builder.CreateMaskedStore(Ops[1], Ops[0], Align, MaskVec); 6887 } 6888 6889 static Value *EmitX86MaskedLoad(CodeGenFunction &CGF, 6890 SmallVectorImpl<Value *> &Ops, unsigned Align) { 6891 // Cast the pointer to right type. 6892 Ops[0] = CGF.Builder.CreateBitCast(Ops[0], 6893 llvm::PointerType::getUnqual(Ops[1]->getType())); 6894 6895 // If the mask is all ones just emit a regular store. 6896 if (const auto *C = dyn_cast<Constant>(Ops[2])) 6897 if (C->isAllOnesValue()) 6898 return CGF.Builder.CreateAlignedLoad(Ops[0], Align); 6899 6900 Value *MaskVec = getMaskVecValue(CGF, Ops[2], 6901 Ops[1]->getType()->getVectorNumElements()); 6902 6903 return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]); 6904 } 6905 6906 static Value *EmitX86SubVectorBroadcast(CodeGenFunction &CGF, 6907 SmallVectorImpl<Value *> &Ops, 6908 llvm::Type *DstTy, 6909 unsigned SrcSizeInBits, 6910 unsigned Align) { 6911 // Load the subvector. 6912 Ops[0] = CGF.Builder.CreateAlignedLoad(Ops[0], Align); 6913 6914 // Create broadcast mask. 6915 unsigned NumDstElts = DstTy->getVectorNumElements(); 6916 unsigned NumSrcElts = SrcSizeInBits / DstTy->getScalarSizeInBits(); 6917 6918 SmallVector<uint32_t, 8> Mask; 6919 for (unsigned i = 0; i != NumDstElts; i += NumSrcElts) 6920 for (unsigned j = 0; j != NumSrcElts; ++j) 6921 Mask.push_back(j); 6922 6923 return CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], Mask, "subvecbcst"); 6924 } 6925 6926 static Value *EmitX86Select(CodeGenFunction &CGF, 6927 Value *Mask, Value *Op0, Value *Op1) { 6928 6929 // If the mask is all ones just return first argument. 6930 if (const auto *C = dyn_cast<Constant>(Mask)) 6931 if (C->isAllOnesValue()) 6932 return Op0; 6933 6934 Mask = getMaskVecValue(CGF, Mask, Op0->getType()->getVectorNumElements()); 6935 6936 return CGF.Builder.CreateSelect(Mask, Op0, Op1); 6937 } 6938 6939 static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, 6940 bool Signed, SmallVectorImpl<Value *> &Ops) { 6941 unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); 6942 Value *Cmp; 6943 6944 if (CC == 3) { 6945 Cmp = Constant::getNullValue( 6946 llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts)); 6947 } else if (CC == 7) { 6948 Cmp = Constant::getAllOnesValue( 6949 llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts)); 6950 } else { 6951 ICmpInst::Predicate Pred; 6952 switch (CC) { 6953 default: llvm_unreachable("Unknown condition code"); 6954 case 0: Pred = ICmpInst::ICMP_EQ; break; 6955 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break; 6956 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break; 6957 case 4: Pred = ICmpInst::ICMP_NE; break; 6958 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break; 6959 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break; 6960 } 6961 Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]); 6962 } 6963 6964 const auto *C = dyn_cast<Constant>(Ops.back()); 6965 if (!C || !C->isAllOnesValue()) 6966 Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops.back(), NumElts)); 6967 6968 if (NumElts < 8) { 6969 uint32_t Indices[8]; 6970 for (unsigned i = 0; i != NumElts; ++i) 6971 Indices[i] = i; 6972 for (unsigned i = NumElts; i != 8; ++i) 6973 Indices[i] = i % NumElts + NumElts; 6974 Cmp = CGF.Builder.CreateShuffleVector( 6975 Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices); 6976 } 6977 return CGF.Builder.CreateBitCast(Cmp, 6978 IntegerType::get(CGF.getLLVMContext(), 6979 std::max(NumElts, 8U))); 6980 } 6981 6982 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, 6983 const CallExpr *E) { 6984 if (BuiltinID == X86::BI__builtin_ms_va_start || 6985 BuiltinID == X86::BI__builtin_ms_va_end) 6986 return EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(), 6987 BuiltinID == X86::BI__builtin_ms_va_start); 6988 if (BuiltinID == X86::BI__builtin_ms_va_copy) { 6989 // Lower this manually. We can't reliably determine whether or not any 6990 // given va_copy() is for a Win64 va_list from the calling convention 6991 // alone, because it's legal to do this from a System V ABI function. 6992 // With opaque pointer types, we won't have enough information in LLVM 6993 // IR to determine this from the argument types, either. Best to do it 6994 // now, while we have enough information. 6995 Address DestAddr = EmitMSVAListRef(E->getArg(0)); 6996 Address SrcAddr = EmitMSVAListRef(E->getArg(1)); 6997 6998 llvm::Type *BPP = Int8PtrPtrTy; 6999 7000 DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"), 7001 DestAddr.getAlignment()); 7002 SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"), 7003 SrcAddr.getAlignment()); 7004 7005 Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val"); 7006 return Builder.CreateStore(ArgPtr, DestAddr); 7007 } 7008 7009 SmallVector<Value*, 4> Ops; 7010 7011 // Find out if any arguments are required to be integer constant expressions. 7012 unsigned ICEArguments = 0; 7013 ASTContext::GetBuiltinTypeError Error; 7014 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 7015 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 7016 7017 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { 7018 // If this is a normal argument, just emit it as a scalar. 7019 if ((ICEArguments & (1 << i)) == 0) { 7020 Ops.push_back(EmitScalarExpr(E->getArg(i))); 7021 continue; 7022 } 7023 7024 // If this is required to be a constant, constant fold it so that we know 7025 // that the generated intrinsic gets a ConstantInt. 7026 llvm::APSInt Result; 7027 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); 7028 assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst; 7029 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); 7030 } 7031 7032 // These exist so that the builtin that takes an immediate can be bounds 7033 // checked by clang to avoid passing bad immediates to the backend. Since 7034 // AVX has a larger immediate than SSE we would need separate builtins to 7035 // do the different bounds checking. Rather than create a clang specific 7036 // SSE only builtin, this implements eight separate builtins to match gcc 7037 // implementation. 7038 auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) { 7039 Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm)); 7040 llvm::Function *F = CGM.getIntrinsic(ID); 7041 return Builder.CreateCall(F, Ops); 7042 }; 7043 7044 // For the vector forms of FP comparisons, translate the builtins directly to 7045 // IR. 7046 // TODO: The builtins could be removed if the SSE header files used vector 7047 // extension comparisons directly (vector ordered/unordered may need 7048 // additional support via __builtin_isnan()). 7049 auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred) { 7050 Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]); 7051 llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType()); 7052 llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy); 7053 Value *Sext = Builder.CreateSExt(Cmp, IntVecTy); 7054 return Builder.CreateBitCast(Sext, FPVecTy); 7055 }; 7056 7057 switch (BuiltinID) { 7058 default: return nullptr; 7059 case X86::BI__builtin_cpu_supports: { 7060 const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts(); 7061 StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString(); 7062 7063 // TODO: When/if this becomes more than x86 specific then use a TargetInfo 7064 // based mapping. 7065 // Processor features and mapping to processor feature value. 7066 enum X86Features { 7067 CMOV = 0, 7068 MMX, 7069 POPCNT, 7070 SSE, 7071 SSE2, 7072 SSE3, 7073 SSSE3, 7074 SSE4_1, 7075 SSE4_2, 7076 AVX, 7077 AVX2, 7078 SSE4_A, 7079 FMA4, 7080 XOP, 7081 FMA, 7082 AVX512F, 7083 BMI, 7084 BMI2, 7085 AES, 7086 PCLMUL, 7087 AVX512VL, 7088 AVX512BW, 7089 AVX512DQ, 7090 AVX512CD, 7091 AVX512ER, 7092 AVX512PF, 7093 AVX512VBMI, 7094 AVX512IFMA, 7095 MAX 7096 }; 7097 7098 X86Features Feature = StringSwitch<X86Features>(FeatureStr) 7099 .Case("cmov", X86Features::CMOV) 7100 .Case("mmx", X86Features::MMX) 7101 .Case("popcnt", X86Features::POPCNT) 7102 .Case("sse", X86Features::SSE) 7103 .Case("sse2", X86Features::SSE2) 7104 .Case("sse3", X86Features::SSE3) 7105 .Case("ssse3", X86Features::SSSE3) 7106 .Case("sse4.1", X86Features::SSE4_1) 7107 .Case("sse4.2", X86Features::SSE4_2) 7108 .Case("avx", X86Features::AVX) 7109 .Case("avx2", X86Features::AVX2) 7110 .Case("sse4a", X86Features::SSE4_A) 7111 .Case("fma4", X86Features::FMA4) 7112 .Case("xop", X86Features::XOP) 7113 .Case("fma", X86Features::FMA) 7114 .Case("avx512f", X86Features::AVX512F) 7115 .Case("bmi", X86Features::BMI) 7116 .Case("bmi2", X86Features::BMI2) 7117 .Case("aes", X86Features::AES) 7118 .Case("pclmul", X86Features::PCLMUL) 7119 .Case("avx512vl", X86Features::AVX512VL) 7120 .Case("avx512bw", X86Features::AVX512BW) 7121 .Case("avx512dq", X86Features::AVX512DQ) 7122 .Case("avx512cd", X86Features::AVX512CD) 7123 .Case("avx512er", X86Features::AVX512ER) 7124 .Case("avx512pf", X86Features::AVX512PF) 7125 .Case("avx512vbmi", X86Features::AVX512VBMI) 7126 .Case("avx512ifma", X86Features::AVX512IFMA) 7127 .Default(X86Features::MAX); 7128 assert(Feature != X86Features::MAX && "Invalid feature!"); 7129 7130 // Matching the struct layout from the compiler-rt/libgcc structure that is 7131 // filled in: 7132 // unsigned int __cpu_vendor; 7133 // unsigned int __cpu_type; 7134 // unsigned int __cpu_subtype; 7135 // unsigned int __cpu_features[1]; 7136 llvm::Type *STy = llvm::StructType::get( 7137 Int32Ty, Int32Ty, Int32Ty, llvm::ArrayType::get(Int32Ty, 1), nullptr); 7138 7139 // Grab the global __cpu_model. 7140 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model"); 7141 7142 // Grab the first (0th) element from the field __cpu_features off of the 7143 // global in the struct STy. 7144 Value *Idxs[] = { 7145 ConstantInt::get(Int32Ty, 0), 7146 ConstantInt::get(Int32Ty, 3), 7147 ConstantInt::get(Int32Ty, 0) 7148 }; 7149 Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs); 7150 Value *Features = Builder.CreateAlignedLoad(CpuFeatures, 7151 CharUnits::fromQuantity(4)); 7152 7153 // Check the value of the bit corresponding to the feature requested. 7154 Value *Bitset = Builder.CreateAnd( 7155 Features, llvm::ConstantInt::get(Int32Ty, 1ULL << Feature)); 7156 return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0)); 7157 } 7158 case X86::BI_mm_prefetch: { 7159 Value *Address = Ops[0]; 7160 Value *RW = ConstantInt::get(Int32Ty, 0); 7161 Value *Locality = Ops[1]; 7162 Value *Data = ConstantInt::get(Int32Ty, 1); 7163 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 7164 return Builder.CreateCall(F, {Address, RW, Locality, Data}); 7165 } 7166 case X86::BI_mm_clflush: { 7167 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush), 7168 Ops[0]); 7169 } 7170 case X86::BI_mm_lfence: { 7171 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence)); 7172 } 7173 case X86::BI_mm_mfence: { 7174 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence)); 7175 } 7176 case X86::BI_mm_sfence: { 7177 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence)); 7178 } 7179 case X86::BI_mm_pause: { 7180 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause)); 7181 } 7182 case X86::BI__rdtsc: { 7183 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc)); 7184 } 7185 case X86::BI__builtin_ia32_undef128: 7186 case X86::BI__builtin_ia32_undef256: 7187 case X86::BI__builtin_ia32_undef512: 7188 return UndefValue::get(ConvertType(E->getType())); 7189 case X86::BI__builtin_ia32_vec_init_v8qi: 7190 case X86::BI__builtin_ia32_vec_init_v4hi: 7191 case X86::BI__builtin_ia32_vec_init_v2si: 7192 return Builder.CreateBitCast(BuildVector(Ops), 7193 llvm::Type::getX86_MMXTy(getLLVMContext())); 7194 case X86::BI__builtin_ia32_vec_ext_v2si: 7195 return Builder.CreateExtractElement(Ops[0], 7196 llvm::ConstantInt::get(Ops[1]->getType(), 0)); 7197 case X86::BI_mm_setcsr: 7198 case X86::BI__builtin_ia32_ldmxcsr: { 7199 Address Tmp = CreateMemTemp(E->getArg(0)->getType()); 7200 Builder.CreateStore(Ops[0], Tmp); 7201 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr), 7202 Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy)); 7203 } 7204 case X86::BI_mm_getcsr: 7205 case X86::BI__builtin_ia32_stmxcsr: { 7206 Address Tmp = CreateMemTemp(E->getType()); 7207 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr), 7208 Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy)); 7209 return Builder.CreateLoad(Tmp, "stmxcsr"); 7210 } 7211 case X86::BI__builtin_ia32_xsave: 7212 case X86::BI__builtin_ia32_xsave64: 7213 case X86::BI__builtin_ia32_xrstor: 7214 case X86::BI__builtin_ia32_xrstor64: 7215 case X86::BI__builtin_ia32_xsaveopt: 7216 case X86::BI__builtin_ia32_xsaveopt64: 7217 case X86::BI__builtin_ia32_xrstors: 7218 case X86::BI__builtin_ia32_xrstors64: 7219 case X86::BI__builtin_ia32_xsavec: 7220 case X86::BI__builtin_ia32_xsavec64: 7221 case X86::BI__builtin_ia32_xsaves: 7222 case X86::BI__builtin_ia32_xsaves64: { 7223 Intrinsic::ID ID; 7224 #define INTRINSIC_X86_XSAVE_ID(NAME) \ 7225 case X86::BI__builtin_ia32_##NAME: \ 7226 ID = Intrinsic::x86_##NAME; \ 7227 break 7228 switch (BuiltinID) { 7229 default: llvm_unreachable("Unsupported intrinsic!"); 7230 INTRINSIC_X86_XSAVE_ID(xsave); 7231 INTRINSIC_X86_XSAVE_ID(xsave64); 7232 INTRINSIC_X86_XSAVE_ID(xrstor); 7233 INTRINSIC_X86_XSAVE_ID(xrstor64); 7234 INTRINSIC_X86_XSAVE_ID(xsaveopt); 7235 INTRINSIC_X86_XSAVE_ID(xsaveopt64); 7236 INTRINSIC_X86_XSAVE_ID(xrstors); 7237 INTRINSIC_X86_XSAVE_ID(xrstors64); 7238 INTRINSIC_X86_XSAVE_ID(xsavec); 7239 INTRINSIC_X86_XSAVE_ID(xsavec64); 7240 INTRINSIC_X86_XSAVE_ID(xsaves); 7241 INTRINSIC_X86_XSAVE_ID(xsaves64); 7242 } 7243 #undef INTRINSIC_X86_XSAVE_ID 7244 Value *Mhi = Builder.CreateTrunc( 7245 Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty); 7246 Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty); 7247 Ops[1] = Mhi; 7248 Ops.push_back(Mlo); 7249 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); 7250 } 7251 case X86::BI__builtin_ia32_storedqudi128_mask: 7252 case X86::BI__builtin_ia32_storedqusi128_mask: 7253 case X86::BI__builtin_ia32_storedquhi128_mask: 7254 case X86::BI__builtin_ia32_storedquqi128_mask: 7255 case X86::BI__builtin_ia32_storeupd128_mask: 7256 case X86::BI__builtin_ia32_storeups128_mask: 7257 case X86::BI__builtin_ia32_storedqudi256_mask: 7258 case X86::BI__builtin_ia32_storedqusi256_mask: 7259 case X86::BI__builtin_ia32_storedquhi256_mask: 7260 case X86::BI__builtin_ia32_storedquqi256_mask: 7261 case X86::BI__builtin_ia32_storeupd256_mask: 7262 case X86::BI__builtin_ia32_storeups256_mask: 7263 case X86::BI__builtin_ia32_storedqudi512_mask: 7264 case X86::BI__builtin_ia32_storedqusi512_mask: 7265 case X86::BI__builtin_ia32_storedquhi512_mask: 7266 case X86::BI__builtin_ia32_storedquqi512_mask: 7267 case X86::BI__builtin_ia32_storeupd512_mask: 7268 case X86::BI__builtin_ia32_storeups512_mask: 7269 return EmitX86MaskedStore(*this, Ops, 1); 7270 7271 case X86::BI__builtin_ia32_movdqa32store128_mask: 7272 case X86::BI__builtin_ia32_movdqa64store128_mask: 7273 case X86::BI__builtin_ia32_storeaps128_mask: 7274 case X86::BI__builtin_ia32_storeapd128_mask: 7275 case X86::BI__builtin_ia32_movdqa32store256_mask: 7276 case X86::BI__builtin_ia32_movdqa64store256_mask: 7277 case X86::BI__builtin_ia32_storeaps256_mask: 7278 case X86::BI__builtin_ia32_storeapd256_mask: 7279 case X86::BI__builtin_ia32_movdqa32store512_mask: 7280 case X86::BI__builtin_ia32_movdqa64store512_mask: 7281 case X86::BI__builtin_ia32_storeaps512_mask: 7282 case X86::BI__builtin_ia32_storeapd512_mask: { 7283 unsigned Align = 7284 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity(); 7285 return EmitX86MaskedStore(*this, Ops, Align); 7286 } 7287 case X86::BI__builtin_ia32_loadups128_mask: 7288 case X86::BI__builtin_ia32_loadups256_mask: 7289 case X86::BI__builtin_ia32_loadups512_mask: 7290 case X86::BI__builtin_ia32_loadupd128_mask: 7291 case X86::BI__builtin_ia32_loadupd256_mask: 7292 case X86::BI__builtin_ia32_loadupd512_mask: 7293 case X86::BI__builtin_ia32_loaddquqi128_mask: 7294 case X86::BI__builtin_ia32_loaddquqi256_mask: 7295 case X86::BI__builtin_ia32_loaddquqi512_mask: 7296 case X86::BI__builtin_ia32_loaddquhi128_mask: 7297 case X86::BI__builtin_ia32_loaddquhi256_mask: 7298 case X86::BI__builtin_ia32_loaddquhi512_mask: 7299 case X86::BI__builtin_ia32_loaddqusi128_mask: 7300 case X86::BI__builtin_ia32_loaddqusi256_mask: 7301 case X86::BI__builtin_ia32_loaddqusi512_mask: 7302 case X86::BI__builtin_ia32_loaddqudi128_mask: 7303 case X86::BI__builtin_ia32_loaddqudi256_mask: 7304 case X86::BI__builtin_ia32_loaddqudi512_mask: 7305 return EmitX86MaskedLoad(*this, Ops, 1); 7306 7307 case X86::BI__builtin_ia32_loadaps128_mask: 7308 case X86::BI__builtin_ia32_loadaps256_mask: 7309 case X86::BI__builtin_ia32_loadaps512_mask: 7310 case X86::BI__builtin_ia32_loadapd128_mask: 7311 case X86::BI__builtin_ia32_loadapd256_mask: 7312 case X86::BI__builtin_ia32_loadapd512_mask: 7313 case X86::BI__builtin_ia32_movdqa32load128_mask: 7314 case X86::BI__builtin_ia32_movdqa32load256_mask: 7315 case X86::BI__builtin_ia32_movdqa32load512_mask: 7316 case X86::BI__builtin_ia32_movdqa64load128_mask: 7317 case X86::BI__builtin_ia32_movdqa64load256_mask: 7318 case X86::BI__builtin_ia32_movdqa64load512_mask: { 7319 unsigned Align = 7320 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity(); 7321 return EmitX86MaskedLoad(*this, Ops, Align); 7322 } 7323 7324 case X86::BI__builtin_ia32_vbroadcastf128_pd256: 7325 case X86::BI__builtin_ia32_vbroadcastf128_ps256: { 7326 llvm::Type *DstTy = ConvertType(E->getType()); 7327 return EmitX86SubVectorBroadcast(*this, Ops, DstTy, 128, 1); 7328 } 7329 7330 case X86::BI__builtin_ia32_storehps: 7331 case X86::BI__builtin_ia32_storelps: { 7332 llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty); 7333 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2); 7334 7335 // cast val v2i64 7336 Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast"); 7337 7338 // extract (0, 1) 7339 unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1; 7340 llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index); 7341 Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract"); 7342 7343 // cast pointer to i64 & store 7344 Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy); 7345 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 7346 } 7347 case X86::BI__builtin_ia32_palignr128: 7348 case X86::BI__builtin_ia32_palignr256: 7349 case X86::BI__builtin_ia32_palignr128_mask: 7350 case X86::BI__builtin_ia32_palignr256_mask: 7351 case X86::BI__builtin_ia32_palignr512_mask: { 7352 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 7353 7354 unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); 7355 assert(NumElts % 16 == 0); 7356 7357 // If palignr is shifting the pair of vectors more than the size of two 7358 // lanes, emit zero. 7359 if (ShiftVal >= 32) 7360 return llvm::Constant::getNullValue(ConvertType(E->getType())); 7361 7362 // If palignr is shifting the pair of input vectors more than one lane, 7363 // but less than two lanes, convert to shifting in zeroes. 7364 if (ShiftVal > 16) { 7365 ShiftVal -= 16; 7366 Ops[1] = Ops[0]; 7367 Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType()); 7368 } 7369 7370 uint32_t Indices[64]; 7371 // 256-bit palignr operates on 128-bit lanes so we need to handle that 7372 for (unsigned l = 0; l != NumElts; l += 16) { 7373 for (unsigned i = 0; i != 16; ++i) { 7374 unsigned Idx = ShiftVal + i; 7375 if (Idx >= 16) 7376 Idx += NumElts - 16; // End of lane, switch operand. 7377 Indices[l + i] = Idx + l; 7378 } 7379 } 7380 7381 Value *Align = Builder.CreateShuffleVector(Ops[1], Ops[0], 7382 makeArrayRef(Indices, NumElts), 7383 "palignr"); 7384 7385 // If this isn't a masked builtin, just return the align operation. 7386 if (Ops.size() == 3) 7387 return Align; 7388 7389 return EmitX86Select(*this, Ops[4], Align, Ops[3]); 7390 } 7391 7392 case X86::BI__builtin_ia32_movnti: 7393 case X86::BI__builtin_ia32_movnti64: { 7394 llvm::MDNode *Node = llvm::MDNode::get( 7395 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1))); 7396 7397 // Convert the type of the pointer to a pointer to the stored type. 7398 Value *BC = Builder.CreateBitCast(Ops[0], 7399 llvm::PointerType::getUnqual(Ops[1]->getType()), 7400 "cast"); 7401 StoreInst *SI = Builder.CreateDefaultAlignedStore(Ops[1], BC); 7402 SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); 7403 7404 // No alignment for scalar intrinsic store. 7405 SI->setAlignment(1); 7406 return SI; 7407 } 7408 case X86::BI__builtin_ia32_movntsd: 7409 case X86::BI__builtin_ia32_movntss: { 7410 llvm::MDNode *Node = llvm::MDNode::get( 7411 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1))); 7412 7413 // Extract the 0'th element of the source vector. 7414 Value *Scl = Builder.CreateExtractElement(Ops[1], (uint64_t)0, "extract"); 7415 7416 // Convert the type of the pointer to a pointer to the stored type. 7417 Value *BC = Builder.CreateBitCast(Ops[0], 7418 llvm::PointerType::getUnqual(Scl->getType()), 7419 "cast"); 7420 7421 // Unaligned nontemporal store of the scalar value. 7422 StoreInst *SI = Builder.CreateDefaultAlignedStore(Scl, BC); 7423 SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); 7424 SI->setAlignment(1); 7425 return SI; 7426 } 7427 7428 case X86::BI__builtin_ia32_selectb_128: 7429 case X86::BI__builtin_ia32_selectb_256: 7430 case X86::BI__builtin_ia32_selectb_512: 7431 case X86::BI__builtin_ia32_selectw_128: 7432 case X86::BI__builtin_ia32_selectw_256: 7433 case X86::BI__builtin_ia32_selectw_512: 7434 case X86::BI__builtin_ia32_selectd_128: 7435 case X86::BI__builtin_ia32_selectd_256: 7436 case X86::BI__builtin_ia32_selectd_512: 7437 case X86::BI__builtin_ia32_selectq_128: 7438 case X86::BI__builtin_ia32_selectq_256: 7439 case X86::BI__builtin_ia32_selectq_512: 7440 case X86::BI__builtin_ia32_selectps_128: 7441 case X86::BI__builtin_ia32_selectps_256: 7442 case X86::BI__builtin_ia32_selectps_512: 7443 case X86::BI__builtin_ia32_selectpd_128: 7444 case X86::BI__builtin_ia32_selectpd_256: 7445 case X86::BI__builtin_ia32_selectpd_512: 7446 return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]); 7447 case X86::BI__builtin_ia32_pcmpeqb128_mask: 7448 case X86::BI__builtin_ia32_pcmpeqb256_mask: 7449 case X86::BI__builtin_ia32_pcmpeqb512_mask: 7450 case X86::BI__builtin_ia32_pcmpeqw128_mask: 7451 case X86::BI__builtin_ia32_pcmpeqw256_mask: 7452 case X86::BI__builtin_ia32_pcmpeqw512_mask: 7453 case X86::BI__builtin_ia32_pcmpeqd128_mask: 7454 case X86::BI__builtin_ia32_pcmpeqd256_mask: 7455 case X86::BI__builtin_ia32_pcmpeqd512_mask: 7456 case X86::BI__builtin_ia32_pcmpeqq128_mask: 7457 case X86::BI__builtin_ia32_pcmpeqq256_mask: 7458 case X86::BI__builtin_ia32_pcmpeqq512_mask: 7459 return EmitX86MaskedCompare(*this, 0, false, Ops); 7460 case X86::BI__builtin_ia32_pcmpgtb128_mask: 7461 case X86::BI__builtin_ia32_pcmpgtb256_mask: 7462 case X86::BI__builtin_ia32_pcmpgtb512_mask: 7463 case X86::BI__builtin_ia32_pcmpgtw128_mask: 7464 case X86::BI__builtin_ia32_pcmpgtw256_mask: 7465 case X86::BI__builtin_ia32_pcmpgtw512_mask: 7466 case X86::BI__builtin_ia32_pcmpgtd128_mask: 7467 case X86::BI__builtin_ia32_pcmpgtd256_mask: 7468 case X86::BI__builtin_ia32_pcmpgtd512_mask: 7469 case X86::BI__builtin_ia32_pcmpgtq128_mask: 7470 case X86::BI__builtin_ia32_pcmpgtq256_mask: 7471 case X86::BI__builtin_ia32_pcmpgtq512_mask: 7472 return EmitX86MaskedCompare(*this, 6, true, Ops); 7473 case X86::BI__builtin_ia32_cmpb128_mask: 7474 case X86::BI__builtin_ia32_cmpb256_mask: 7475 case X86::BI__builtin_ia32_cmpb512_mask: 7476 case X86::BI__builtin_ia32_cmpw128_mask: 7477 case X86::BI__builtin_ia32_cmpw256_mask: 7478 case X86::BI__builtin_ia32_cmpw512_mask: 7479 case X86::BI__builtin_ia32_cmpd128_mask: 7480 case X86::BI__builtin_ia32_cmpd256_mask: 7481 case X86::BI__builtin_ia32_cmpd512_mask: 7482 case X86::BI__builtin_ia32_cmpq128_mask: 7483 case X86::BI__builtin_ia32_cmpq256_mask: 7484 case X86::BI__builtin_ia32_cmpq512_mask: { 7485 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7; 7486 return EmitX86MaskedCompare(*this, CC, true, Ops); 7487 } 7488 case X86::BI__builtin_ia32_ucmpb128_mask: 7489 case X86::BI__builtin_ia32_ucmpb256_mask: 7490 case X86::BI__builtin_ia32_ucmpb512_mask: 7491 case X86::BI__builtin_ia32_ucmpw128_mask: 7492 case X86::BI__builtin_ia32_ucmpw256_mask: 7493 case X86::BI__builtin_ia32_ucmpw512_mask: 7494 case X86::BI__builtin_ia32_ucmpd128_mask: 7495 case X86::BI__builtin_ia32_ucmpd256_mask: 7496 case X86::BI__builtin_ia32_ucmpd512_mask: 7497 case X86::BI__builtin_ia32_ucmpq128_mask: 7498 case X86::BI__builtin_ia32_ucmpq256_mask: 7499 case X86::BI__builtin_ia32_ucmpq512_mask: { 7500 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7; 7501 return EmitX86MaskedCompare(*this, CC, false, Ops); 7502 } 7503 7504 case X86::BI__builtin_ia32_vplzcntd_128_mask: 7505 case X86::BI__builtin_ia32_vplzcntd_256_mask: 7506 case X86::BI__builtin_ia32_vplzcntd_512_mask: 7507 case X86::BI__builtin_ia32_vplzcntq_128_mask: 7508 case X86::BI__builtin_ia32_vplzcntq_256_mask: 7509 case X86::BI__builtin_ia32_vplzcntq_512_mask: { 7510 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType()); 7511 return EmitX86Select(*this, Ops[2], 7512 Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)}), 7513 Ops[1]); 7514 } 7515 7516 // TODO: Handle 64/512-bit vector widths of min/max. 7517 case X86::BI__builtin_ia32_pmaxsb128: 7518 case X86::BI__builtin_ia32_pmaxsw128: 7519 case X86::BI__builtin_ia32_pmaxsd128: 7520 case X86::BI__builtin_ia32_pmaxsb256: 7521 case X86::BI__builtin_ia32_pmaxsw256: 7522 case X86::BI__builtin_ia32_pmaxsd256: { 7523 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Ops[0], Ops[1]); 7524 return Builder.CreateSelect(Cmp, Ops[0], Ops[1]); 7525 } 7526 case X86::BI__builtin_ia32_pmaxub128: 7527 case X86::BI__builtin_ia32_pmaxuw128: 7528 case X86::BI__builtin_ia32_pmaxud128: 7529 case X86::BI__builtin_ia32_pmaxub256: 7530 case X86::BI__builtin_ia32_pmaxuw256: 7531 case X86::BI__builtin_ia32_pmaxud256: { 7532 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_UGT, Ops[0], Ops[1]); 7533 return Builder.CreateSelect(Cmp, Ops[0], Ops[1]); 7534 } 7535 case X86::BI__builtin_ia32_pminsb128: 7536 case X86::BI__builtin_ia32_pminsw128: 7537 case X86::BI__builtin_ia32_pminsd128: 7538 case X86::BI__builtin_ia32_pminsb256: 7539 case X86::BI__builtin_ia32_pminsw256: 7540 case X86::BI__builtin_ia32_pminsd256: { 7541 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SLT, Ops[0], Ops[1]); 7542 return Builder.CreateSelect(Cmp, Ops[0], Ops[1]); 7543 } 7544 case X86::BI__builtin_ia32_pminub128: 7545 case X86::BI__builtin_ia32_pminuw128: 7546 case X86::BI__builtin_ia32_pminud128: 7547 case X86::BI__builtin_ia32_pminub256: 7548 case X86::BI__builtin_ia32_pminuw256: 7549 case X86::BI__builtin_ia32_pminud256: { 7550 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, Ops[0], Ops[1]); 7551 return Builder.CreateSelect(Cmp, Ops[0], Ops[1]); 7552 } 7553 7554 // 3DNow! 7555 case X86::BI__builtin_ia32_pswapdsf: 7556 case X86::BI__builtin_ia32_pswapdsi: { 7557 llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext()); 7558 Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast"); 7559 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd); 7560 return Builder.CreateCall(F, Ops, "pswapd"); 7561 } 7562 case X86::BI__builtin_ia32_rdrand16_step: 7563 case X86::BI__builtin_ia32_rdrand32_step: 7564 case X86::BI__builtin_ia32_rdrand64_step: 7565 case X86::BI__builtin_ia32_rdseed16_step: 7566 case X86::BI__builtin_ia32_rdseed32_step: 7567 case X86::BI__builtin_ia32_rdseed64_step: { 7568 Intrinsic::ID ID; 7569 switch (BuiltinID) { 7570 default: llvm_unreachable("Unsupported intrinsic!"); 7571 case X86::BI__builtin_ia32_rdrand16_step: 7572 ID = Intrinsic::x86_rdrand_16; 7573 break; 7574 case X86::BI__builtin_ia32_rdrand32_step: 7575 ID = Intrinsic::x86_rdrand_32; 7576 break; 7577 case X86::BI__builtin_ia32_rdrand64_step: 7578 ID = Intrinsic::x86_rdrand_64; 7579 break; 7580 case X86::BI__builtin_ia32_rdseed16_step: 7581 ID = Intrinsic::x86_rdseed_16; 7582 break; 7583 case X86::BI__builtin_ia32_rdseed32_step: 7584 ID = Intrinsic::x86_rdseed_32; 7585 break; 7586 case X86::BI__builtin_ia32_rdseed64_step: 7587 ID = Intrinsic::x86_rdseed_64; 7588 break; 7589 } 7590 7591 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID)); 7592 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0), 7593 Ops[0]); 7594 return Builder.CreateExtractValue(Call, 1); 7595 } 7596 7597 // SSE packed comparison intrinsics 7598 case X86::BI__builtin_ia32_cmpeqps: 7599 case X86::BI__builtin_ia32_cmpeqpd: 7600 return getVectorFCmpIR(CmpInst::FCMP_OEQ); 7601 case X86::BI__builtin_ia32_cmpltps: 7602 case X86::BI__builtin_ia32_cmpltpd: 7603 return getVectorFCmpIR(CmpInst::FCMP_OLT); 7604 case X86::BI__builtin_ia32_cmpleps: 7605 case X86::BI__builtin_ia32_cmplepd: 7606 return getVectorFCmpIR(CmpInst::FCMP_OLE); 7607 case X86::BI__builtin_ia32_cmpunordps: 7608 case X86::BI__builtin_ia32_cmpunordpd: 7609 return getVectorFCmpIR(CmpInst::FCMP_UNO); 7610 case X86::BI__builtin_ia32_cmpneqps: 7611 case X86::BI__builtin_ia32_cmpneqpd: 7612 return getVectorFCmpIR(CmpInst::FCMP_UNE); 7613 case X86::BI__builtin_ia32_cmpnltps: 7614 case X86::BI__builtin_ia32_cmpnltpd: 7615 return getVectorFCmpIR(CmpInst::FCMP_UGE); 7616 case X86::BI__builtin_ia32_cmpnleps: 7617 case X86::BI__builtin_ia32_cmpnlepd: 7618 return getVectorFCmpIR(CmpInst::FCMP_UGT); 7619 case X86::BI__builtin_ia32_cmpordps: 7620 case X86::BI__builtin_ia32_cmpordpd: 7621 return getVectorFCmpIR(CmpInst::FCMP_ORD); 7622 case X86::BI__builtin_ia32_cmpps: 7623 case X86::BI__builtin_ia32_cmpps256: 7624 case X86::BI__builtin_ia32_cmppd: 7625 case X86::BI__builtin_ia32_cmppd256: { 7626 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 7627 // If this one of the SSE immediates, we can use native IR. 7628 if (CC < 8) { 7629 FCmpInst::Predicate Pred; 7630 switch (CC) { 7631 case 0: Pred = FCmpInst::FCMP_OEQ; break; 7632 case 1: Pred = FCmpInst::FCMP_OLT; break; 7633 case 2: Pred = FCmpInst::FCMP_OLE; break; 7634 case 3: Pred = FCmpInst::FCMP_UNO; break; 7635 case 4: Pred = FCmpInst::FCMP_UNE; break; 7636 case 5: Pred = FCmpInst::FCMP_UGE; break; 7637 case 6: Pred = FCmpInst::FCMP_UGT; break; 7638 case 7: Pred = FCmpInst::FCMP_ORD; break; 7639 } 7640 return getVectorFCmpIR(Pred); 7641 } 7642 7643 // We can't handle 8-31 immediates with native IR, use the intrinsic. 7644 Intrinsic::ID ID; 7645 switch (BuiltinID) { 7646 default: llvm_unreachable("Unsupported intrinsic!"); 7647 case X86::BI__builtin_ia32_cmpps: 7648 ID = Intrinsic::x86_sse_cmp_ps; 7649 break; 7650 case X86::BI__builtin_ia32_cmpps256: 7651 ID = Intrinsic::x86_avx_cmp_ps_256; 7652 break; 7653 case X86::BI__builtin_ia32_cmppd: 7654 ID = Intrinsic::x86_sse2_cmp_pd; 7655 break; 7656 case X86::BI__builtin_ia32_cmppd256: 7657 ID = Intrinsic::x86_avx_cmp_pd_256; 7658 break; 7659 } 7660 7661 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); 7662 } 7663 7664 // SSE scalar comparison intrinsics 7665 case X86::BI__builtin_ia32_cmpeqss: 7666 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0); 7667 case X86::BI__builtin_ia32_cmpltss: 7668 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1); 7669 case X86::BI__builtin_ia32_cmpless: 7670 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2); 7671 case X86::BI__builtin_ia32_cmpunordss: 7672 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3); 7673 case X86::BI__builtin_ia32_cmpneqss: 7674 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4); 7675 case X86::BI__builtin_ia32_cmpnltss: 7676 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5); 7677 case X86::BI__builtin_ia32_cmpnless: 7678 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6); 7679 case X86::BI__builtin_ia32_cmpordss: 7680 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7); 7681 case X86::BI__builtin_ia32_cmpeqsd: 7682 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0); 7683 case X86::BI__builtin_ia32_cmpltsd: 7684 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1); 7685 case X86::BI__builtin_ia32_cmplesd: 7686 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2); 7687 case X86::BI__builtin_ia32_cmpunordsd: 7688 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3); 7689 case X86::BI__builtin_ia32_cmpneqsd: 7690 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4); 7691 case X86::BI__builtin_ia32_cmpnltsd: 7692 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5); 7693 case X86::BI__builtin_ia32_cmpnlesd: 7694 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6); 7695 case X86::BI__builtin_ia32_cmpordsd: 7696 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7); 7697 7698 case X86::BI__emul: 7699 case X86::BI__emulu: { 7700 llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64); 7701 bool isSigned = (BuiltinID == X86::BI__emul); 7702 Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned); 7703 Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned); 7704 return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned); 7705 } 7706 case X86::BI__mulh: 7707 case X86::BI__umulh: 7708 case X86::BI_mul128: 7709 case X86::BI_umul128: { 7710 llvm::Type *ResType = ConvertType(E->getType()); 7711 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); 7712 7713 bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128); 7714 Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned); 7715 Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned); 7716 7717 Value *MulResult, *HigherBits; 7718 if (IsSigned) { 7719 MulResult = Builder.CreateNSWMul(LHS, RHS); 7720 HigherBits = Builder.CreateAShr(MulResult, 64); 7721 } else { 7722 MulResult = Builder.CreateNUWMul(LHS, RHS); 7723 HigherBits = Builder.CreateLShr(MulResult, 64); 7724 } 7725 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned); 7726 7727 if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh) 7728 return HigherBits; 7729 7730 Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2)); 7731 Builder.CreateStore(HigherBits, HighBitsAddress); 7732 return Builder.CreateIntCast(MulResult, ResType, IsSigned); 7733 } 7734 7735 case X86::BI__faststorefence: { 7736 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, 7737 llvm::CrossThread); 7738 } 7739 case X86::BI_ReadWriteBarrier: 7740 case X86::BI_ReadBarrier: 7741 case X86::BI_WriteBarrier: { 7742 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, 7743 llvm::SingleThread); 7744 } 7745 case X86::BI_BitScanForward: 7746 case X86::BI_BitScanForward64: 7747 return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E); 7748 case X86::BI_BitScanReverse: 7749 case X86::BI_BitScanReverse64: 7750 return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E); 7751 7752 case X86::BI_InterlockedAnd64: 7753 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E); 7754 case X86::BI_InterlockedExchange64: 7755 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E); 7756 case X86::BI_InterlockedExchangeAdd64: 7757 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E); 7758 case X86::BI_InterlockedExchangeSub64: 7759 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E); 7760 case X86::BI_InterlockedOr64: 7761 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E); 7762 case X86::BI_InterlockedXor64: 7763 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E); 7764 case X86::BI_InterlockedDecrement64: 7765 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E); 7766 case X86::BI_InterlockedIncrement64: 7767 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E); 7768 7769 case X86::BI_AddressOfReturnAddress: { 7770 Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress); 7771 return Builder.CreateCall(F); 7772 } 7773 case X86::BI__stosb: { 7774 // We treat __stosb as a volatile memset - it may not generate "rep stosb" 7775 // instruction, but it will create a memset that won't be optimized away. 7776 return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], 1, true); 7777 } 7778 } 7779 } 7780 7781 7782 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, 7783 const CallExpr *E) { 7784 SmallVector<Value*, 4> Ops; 7785 7786 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) 7787 Ops.push_back(EmitScalarExpr(E->getArg(i))); 7788 7789 Intrinsic::ID ID = Intrinsic::not_intrinsic; 7790 7791 switch (BuiltinID) { 7792 default: return nullptr; 7793 7794 // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we 7795 // call __builtin_readcyclecounter. 7796 case PPC::BI__builtin_ppc_get_timebase: 7797 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter)); 7798 7799 // vec_ld, vec_lvsl, vec_lvsr 7800 case PPC::BI__builtin_altivec_lvx: 7801 case PPC::BI__builtin_altivec_lvxl: 7802 case PPC::BI__builtin_altivec_lvebx: 7803 case PPC::BI__builtin_altivec_lvehx: 7804 case PPC::BI__builtin_altivec_lvewx: 7805 case PPC::BI__builtin_altivec_lvsl: 7806 case PPC::BI__builtin_altivec_lvsr: 7807 case PPC::BI__builtin_vsx_lxvd2x: 7808 case PPC::BI__builtin_vsx_lxvw4x: 7809 { 7810 Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy); 7811 7812 Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]); 7813 Ops.pop_back(); 7814 7815 switch (BuiltinID) { 7816 default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!"); 7817 case PPC::BI__builtin_altivec_lvx: 7818 ID = Intrinsic::ppc_altivec_lvx; 7819 break; 7820 case PPC::BI__builtin_altivec_lvxl: 7821 ID = Intrinsic::ppc_altivec_lvxl; 7822 break; 7823 case PPC::BI__builtin_altivec_lvebx: 7824 ID = Intrinsic::ppc_altivec_lvebx; 7825 break; 7826 case PPC::BI__builtin_altivec_lvehx: 7827 ID = Intrinsic::ppc_altivec_lvehx; 7828 break; 7829 case PPC::BI__builtin_altivec_lvewx: 7830 ID = Intrinsic::ppc_altivec_lvewx; 7831 break; 7832 case PPC::BI__builtin_altivec_lvsl: 7833 ID = Intrinsic::ppc_altivec_lvsl; 7834 break; 7835 case PPC::BI__builtin_altivec_lvsr: 7836 ID = Intrinsic::ppc_altivec_lvsr; 7837 break; 7838 case PPC::BI__builtin_vsx_lxvd2x: 7839 ID = Intrinsic::ppc_vsx_lxvd2x; 7840 break; 7841 case PPC::BI__builtin_vsx_lxvw4x: 7842 ID = Intrinsic::ppc_vsx_lxvw4x; 7843 break; 7844 } 7845 llvm::Function *F = CGM.getIntrinsic(ID); 7846 return Builder.CreateCall(F, Ops, ""); 7847 } 7848 7849 // vec_st 7850 case PPC::BI__builtin_altivec_stvx: 7851 case PPC::BI__builtin_altivec_stvxl: 7852 case PPC::BI__builtin_altivec_stvebx: 7853 case PPC::BI__builtin_altivec_stvehx: 7854 case PPC::BI__builtin_altivec_stvewx: 7855 case PPC::BI__builtin_vsx_stxvd2x: 7856 case PPC::BI__builtin_vsx_stxvw4x: 7857 { 7858 Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy); 7859 Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]); 7860 Ops.pop_back(); 7861 7862 switch (BuiltinID) { 7863 default: llvm_unreachable("Unsupported st intrinsic!"); 7864 case PPC::BI__builtin_altivec_stvx: 7865 ID = Intrinsic::ppc_altivec_stvx; 7866 break; 7867 case PPC::BI__builtin_altivec_stvxl: 7868 ID = Intrinsic::ppc_altivec_stvxl; 7869 break; 7870 case PPC::BI__builtin_altivec_stvebx: 7871 ID = Intrinsic::ppc_altivec_stvebx; 7872 break; 7873 case PPC::BI__builtin_altivec_stvehx: 7874 ID = Intrinsic::ppc_altivec_stvehx; 7875 break; 7876 case PPC::BI__builtin_altivec_stvewx: 7877 ID = Intrinsic::ppc_altivec_stvewx; 7878 break; 7879 case PPC::BI__builtin_vsx_stxvd2x: 7880 ID = Intrinsic::ppc_vsx_stxvd2x; 7881 break; 7882 case PPC::BI__builtin_vsx_stxvw4x: 7883 ID = Intrinsic::ppc_vsx_stxvw4x; 7884 break; 7885 } 7886 llvm::Function *F = CGM.getIntrinsic(ID); 7887 return Builder.CreateCall(F, Ops, ""); 7888 } 7889 // Square root 7890 case PPC::BI__builtin_vsx_xvsqrtsp: 7891 case PPC::BI__builtin_vsx_xvsqrtdp: { 7892 llvm::Type *ResultType = ConvertType(E->getType()); 7893 Value *X = EmitScalarExpr(E->getArg(0)); 7894 ID = Intrinsic::sqrt; 7895 llvm::Function *F = CGM.getIntrinsic(ID, ResultType); 7896 return Builder.CreateCall(F, X); 7897 } 7898 // Count leading zeros 7899 case PPC::BI__builtin_altivec_vclzb: 7900 case PPC::BI__builtin_altivec_vclzh: 7901 case PPC::BI__builtin_altivec_vclzw: 7902 case PPC::BI__builtin_altivec_vclzd: { 7903 llvm::Type *ResultType = ConvertType(E->getType()); 7904 Value *X = EmitScalarExpr(E->getArg(0)); 7905 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 7906 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType); 7907 return Builder.CreateCall(F, {X, Undef}); 7908 } 7909 case PPC::BI__builtin_altivec_vctzb: 7910 case PPC::BI__builtin_altivec_vctzh: 7911 case PPC::BI__builtin_altivec_vctzw: 7912 case PPC::BI__builtin_altivec_vctzd: { 7913 llvm::Type *ResultType = ConvertType(E->getType()); 7914 Value *X = EmitScalarExpr(E->getArg(0)); 7915 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 7916 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType); 7917 return Builder.CreateCall(F, {X, Undef}); 7918 } 7919 case PPC::BI__builtin_altivec_vpopcntb: 7920 case PPC::BI__builtin_altivec_vpopcnth: 7921 case PPC::BI__builtin_altivec_vpopcntw: 7922 case PPC::BI__builtin_altivec_vpopcntd: { 7923 llvm::Type *ResultType = ConvertType(E->getType()); 7924 Value *X = EmitScalarExpr(E->getArg(0)); 7925 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); 7926 return Builder.CreateCall(F, X); 7927 } 7928 // Copy sign 7929 case PPC::BI__builtin_vsx_xvcpsgnsp: 7930 case PPC::BI__builtin_vsx_xvcpsgndp: { 7931 llvm::Type *ResultType = ConvertType(E->getType()); 7932 Value *X = EmitScalarExpr(E->getArg(0)); 7933 Value *Y = EmitScalarExpr(E->getArg(1)); 7934 ID = Intrinsic::copysign; 7935 llvm::Function *F = CGM.getIntrinsic(ID, ResultType); 7936 return Builder.CreateCall(F, {X, Y}); 7937 } 7938 // Rounding/truncation 7939 case PPC::BI__builtin_vsx_xvrspip: 7940 case PPC::BI__builtin_vsx_xvrdpip: 7941 case PPC::BI__builtin_vsx_xvrdpim: 7942 case PPC::BI__builtin_vsx_xvrspim: 7943 case PPC::BI__builtin_vsx_xvrdpi: 7944 case PPC::BI__builtin_vsx_xvrspi: 7945 case PPC::BI__builtin_vsx_xvrdpic: 7946 case PPC::BI__builtin_vsx_xvrspic: 7947 case PPC::BI__builtin_vsx_xvrdpiz: 7948 case PPC::BI__builtin_vsx_xvrspiz: { 7949 llvm::Type *ResultType = ConvertType(E->getType()); 7950 Value *X = EmitScalarExpr(E->getArg(0)); 7951 if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim || 7952 BuiltinID == PPC::BI__builtin_vsx_xvrspim) 7953 ID = Intrinsic::floor; 7954 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi || 7955 BuiltinID == PPC::BI__builtin_vsx_xvrspi) 7956 ID = Intrinsic::round; 7957 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic || 7958 BuiltinID == PPC::BI__builtin_vsx_xvrspic) 7959 ID = Intrinsic::nearbyint; 7960 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip || 7961 BuiltinID == PPC::BI__builtin_vsx_xvrspip) 7962 ID = Intrinsic::ceil; 7963 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz || 7964 BuiltinID == PPC::BI__builtin_vsx_xvrspiz) 7965 ID = Intrinsic::trunc; 7966 llvm::Function *F = CGM.getIntrinsic(ID, ResultType); 7967 return Builder.CreateCall(F, X); 7968 } 7969 7970 // Absolute value 7971 case PPC::BI__builtin_vsx_xvabsdp: 7972 case PPC::BI__builtin_vsx_xvabssp: { 7973 llvm::Type *ResultType = ConvertType(E->getType()); 7974 Value *X = EmitScalarExpr(E->getArg(0)); 7975 llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); 7976 return Builder.CreateCall(F, X); 7977 } 7978 7979 // FMA variations 7980 case PPC::BI__builtin_vsx_xvmaddadp: 7981 case PPC::BI__builtin_vsx_xvmaddasp: 7982 case PPC::BI__builtin_vsx_xvnmaddadp: 7983 case PPC::BI__builtin_vsx_xvnmaddasp: 7984 case PPC::BI__builtin_vsx_xvmsubadp: 7985 case PPC::BI__builtin_vsx_xvmsubasp: 7986 case PPC::BI__builtin_vsx_xvnmsubadp: 7987 case PPC::BI__builtin_vsx_xvnmsubasp: { 7988 llvm::Type *ResultType = ConvertType(E->getType()); 7989 Value *X = EmitScalarExpr(E->getArg(0)); 7990 Value *Y = EmitScalarExpr(E->getArg(1)); 7991 Value *Z = EmitScalarExpr(E->getArg(2)); 7992 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); 7993 llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 7994 switch (BuiltinID) { 7995 case PPC::BI__builtin_vsx_xvmaddadp: 7996 case PPC::BI__builtin_vsx_xvmaddasp: 7997 return Builder.CreateCall(F, {X, Y, Z}); 7998 case PPC::BI__builtin_vsx_xvnmaddadp: 7999 case PPC::BI__builtin_vsx_xvnmaddasp: 8000 return Builder.CreateFSub(Zero, 8001 Builder.CreateCall(F, {X, Y, Z}), "sub"); 8002 case PPC::BI__builtin_vsx_xvmsubadp: 8003 case PPC::BI__builtin_vsx_xvmsubasp: 8004 return Builder.CreateCall(F, 8005 {X, Y, Builder.CreateFSub(Zero, Z, "sub")}); 8006 case PPC::BI__builtin_vsx_xvnmsubadp: 8007 case PPC::BI__builtin_vsx_xvnmsubasp: 8008 Value *FsubRes = 8009 Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")}); 8010 return Builder.CreateFSub(Zero, FsubRes, "sub"); 8011 } 8012 llvm_unreachable("Unknown FMA operation"); 8013 return nullptr; // Suppress no-return warning 8014 } 8015 } 8016 } 8017 8018 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, 8019 const CallExpr *E) { 8020 switch (BuiltinID) { 8021 case AMDGPU::BI__builtin_amdgcn_div_scale: 8022 case AMDGPU::BI__builtin_amdgcn_div_scalef: { 8023 // Translate from the intrinsics's struct return to the builtin's out 8024 // argument. 8025 8026 Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3)); 8027 8028 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 8029 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 8030 llvm::Value *Z = EmitScalarExpr(E->getArg(2)); 8031 8032 llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale, 8033 X->getType()); 8034 8035 llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z}); 8036 8037 llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0); 8038 llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1); 8039 8040 llvm::Type *RealFlagType 8041 = FlagOutPtr.getPointer()->getType()->getPointerElementType(); 8042 8043 llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType); 8044 Builder.CreateStore(FlagExt, FlagOutPtr); 8045 return Result; 8046 } 8047 case AMDGPU::BI__builtin_amdgcn_div_fmas: 8048 case AMDGPU::BI__builtin_amdgcn_div_fmasf: { 8049 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); 8050 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); 8051 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); 8052 llvm::Value *Src3 = EmitScalarExpr(E->getArg(3)); 8053 8054 llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas, 8055 Src0->getType()); 8056 llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3); 8057 return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool}); 8058 } 8059 8060 case AMDGPU::BI__builtin_amdgcn_ds_swizzle: 8061 return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle); 8062 case AMDGPU::BI__builtin_amdgcn_div_fixup: 8063 case AMDGPU::BI__builtin_amdgcn_div_fixupf: 8064 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup); 8065 case AMDGPU::BI__builtin_amdgcn_trig_preop: 8066 case AMDGPU::BI__builtin_amdgcn_trig_preopf: 8067 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop); 8068 case AMDGPU::BI__builtin_amdgcn_rcp: 8069 case AMDGPU::BI__builtin_amdgcn_rcpf: 8070 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp); 8071 case AMDGPU::BI__builtin_amdgcn_rsq: 8072 case AMDGPU::BI__builtin_amdgcn_rsqf: 8073 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq); 8074 case AMDGPU::BI__builtin_amdgcn_rsq_clamp: 8075 case AMDGPU::BI__builtin_amdgcn_rsq_clampf: 8076 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp); 8077 case AMDGPU::BI__builtin_amdgcn_sinf: 8078 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin); 8079 case AMDGPU::BI__builtin_amdgcn_cosf: 8080 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos); 8081 case AMDGPU::BI__builtin_amdgcn_log_clampf: 8082 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp); 8083 case AMDGPU::BI__builtin_amdgcn_ldexp: 8084 case AMDGPU::BI__builtin_amdgcn_ldexpf: 8085 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp); 8086 case AMDGPU::BI__builtin_amdgcn_frexp_mant: 8087 case AMDGPU::BI__builtin_amdgcn_frexp_mantf: { 8088 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant); 8089 } 8090 case AMDGPU::BI__builtin_amdgcn_frexp_exp: 8091 case AMDGPU::BI__builtin_amdgcn_frexp_expf: { 8092 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_exp); 8093 } 8094 case AMDGPU::BI__builtin_amdgcn_fract: 8095 case AMDGPU::BI__builtin_amdgcn_fractf: 8096 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract); 8097 case AMDGPU::BI__builtin_amdgcn_lerp: 8098 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp); 8099 case AMDGPU::BI__builtin_amdgcn_uicmp: 8100 case AMDGPU::BI__builtin_amdgcn_uicmpl: 8101 case AMDGPU::BI__builtin_amdgcn_sicmp: 8102 case AMDGPU::BI__builtin_amdgcn_sicmpl: 8103 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_icmp); 8104 case AMDGPU::BI__builtin_amdgcn_fcmp: 8105 case AMDGPU::BI__builtin_amdgcn_fcmpf: 8106 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fcmp); 8107 case AMDGPU::BI__builtin_amdgcn_class: 8108 case AMDGPU::BI__builtin_amdgcn_classf: 8109 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class); 8110 8111 case AMDGPU::BI__builtin_amdgcn_read_exec: { 8112 CallInst *CI = cast<CallInst>( 8113 EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec")); 8114 CI->setConvergent(); 8115 return CI; 8116 } 8117 8118 // amdgcn workitem 8119 case AMDGPU::BI__builtin_amdgcn_workitem_id_x: 8120 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024); 8121 case AMDGPU::BI__builtin_amdgcn_workitem_id_y: 8122 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024); 8123 case AMDGPU::BI__builtin_amdgcn_workitem_id_z: 8124 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024); 8125 8126 // r600 intrinsics 8127 case AMDGPU::BI__builtin_r600_recipsqrt_ieee: 8128 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef: 8129 return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee); 8130 case AMDGPU::BI__builtin_r600_read_tidig_x: 8131 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024); 8132 case AMDGPU::BI__builtin_r600_read_tidig_y: 8133 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024); 8134 case AMDGPU::BI__builtin_r600_read_tidig_z: 8135 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024); 8136 default: 8137 return nullptr; 8138 } 8139 } 8140 8141 /// Handle a SystemZ function in which the final argument is a pointer 8142 /// to an int that receives the post-instruction CC value. At the LLVM level 8143 /// this is represented as a function that returns a {result, cc} pair. 8144 static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF, 8145 unsigned IntrinsicID, 8146 const CallExpr *E) { 8147 unsigned NumArgs = E->getNumArgs() - 1; 8148 SmallVector<Value *, 8> Args(NumArgs); 8149 for (unsigned I = 0; I < NumArgs; ++I) 8150 Args[I] = CGF.EmitScalarExpr(E->getArg(I)); 8151 Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs)); 8152 Value *F = CGF.CGM.getIntrinsic(IntrinsicID); 8153 Value *Call = CGF.Builder.CreateCall(F, Args); 8154 Value *CC = CGF.Builder.CreateExtractValue(Call, 1); 8155 CGF.Builder.CreateStore(CC, CCPtr); 8156 return CGF.Builder.CreateExtractValue(Call, 0); 8157 } 8158 8159 Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, 8160 const CallExpr *E) { 8161 switch (BuiltinID) { 8162 case SystemZ::BI__builtin_tbegin: { 8163 Value *TDB = EmitScalarExpr(E->getArg(0)); 8164 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c); 8165 Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin); 8166 return Builder.CreateCall(F, {TDB, Control}); 8167 } 8168 case SystemZ::BI__builtin_tbegin_nofloat: { 8169 Value *TDB = EmitScalarExpr(E->getArg(0)); 8170 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c); 8171 Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat); 8172 return Builder.CreateCall(F, {TDB, Control}); 8173 } 8174 case SystemZ::BI__builtin_tbeginc: { 8175 Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy); 8176 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08); 8177 Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc); 8178 return Builder.CreateCall(F, {TDB, Control}); 8179 } 8180 case SystemZ::BI__builtin_tabort: { 8181 Value *Data = EmitScalarExpr(E->getArg(0)); 8182 Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort); 8183 return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort")); 8184 } 8185 case SystemZ::BI__builtin_non_tx_store: { 8186 Value *Address = EmitScalarExpr(E->getArg(0)); 8187 Value *Data = EmitScalarExpr(E->getArg(1)); 8188 Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg); 8189 return Builder.CreateCall(F, {Data, Address}); 8190 } 8191 8192 // Vector builtins. Note that most vector builtins are mapped automatically 8193 // to target-specific LLVM intrinsics. The ones handled specially here can 8194 // be represented via standard LLVM IR, which is preferable to enable common 8195 // LLVM optimizations. 8196 8197 case SystemZ::BI__builtin_s390_vpopctb: 8198 case SystemZ::BI__builtin_s390_vpopcth: 8199 case SystemZ::BI__builtin_s390_vpopctf: 8200 case SystemZ::BI__builtin_s390_vpopctg: { 8201 llvm::Type *ResultType = ConvertType(E->getType()); 8202 Value *X = EmitScalarExpr(E->getArg(0)); 8203 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); 8204 return Builder.CreateCall(F, X); 8205 } 8206 8207 case SystemZ::BI__builtin_s390_vclzb: 8208 case SystemZ::BI__builtin_s390_vclzh: 8209 case SystemZ::BI__builtin_s390_vclzf: 8210 case SystemZ::BI__builtin_s390_vclzg: { 8211 llvm::Type *ResultType = ConvertType(E->getType()); 8212 Value *X = EmitScalarExpr(E->getArg(0)); 8213 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 8214 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType); 8215 return Builder.CreateCall(F, {X, Undef}); 8216 } 8217 8218 case SystemZ::BI__builtin_s390_vctzb: 8219 case SystemZ::BI__builtin_s390_vctzh: 8220 case SystemZ::BI__builtin_s390_vctzf: 8221 case SystemZ::BI__builtin_s390_vctzg: { 8222 llvm::Type *ResultType = ConvertType(E->getType()); 8223 Value *X = EmitScalarExpr(E->getArg(0)); 8224 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 8225 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType); 8226 return Builder.CreateCall(F, {X, Undef}); 8227 } 8228 8229 case SystemZ::BI__builtin_s390_vfsqdb: { 8230 llvm::Type *ResultType = ConvertType(E->getType()); 8231 Value *X = EmitScalarExpr(E->getArg(0)); 8232 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType); 8233 return Builder.CreateCall(F, X); 8234 } 8235 case SystemZ::BI__builtin_s390_vfmadb: { 8236 llvm::Type *ResultType = ConvertType(E->getType()); 8237 Value *X = EmitScalarExpr(E->getArg(0)); 8238 Value *Y = EmitScalarExpr(E->getArg(1)); 8239 Value *Z = EmitScalarExpr(E->getArg(2)); 8240 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 8241 return Builder.CreateCall(F, {X, Y, Z}); 8242 } 8243 case SystemZ::BI__builtin_s390_vfmsdb: { 8244 llvm::Type *ResultType = ConvertType(E->getType()); 8245 Value *X = EmitScalarExpr(E->getArg(0)); 8246 Value *Y = EmitScalarExpr(E->getArg(1)); 8247 Value *Z = EmitScalarExpr(E->getArg(2)); 8248 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); 8249 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 8250 return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")}); 8251 } 8252 case SystemZ::BI__builtin_s390_vflpdb: { 8253 llvm::Type *ResultType = ConvertType(E->getType()); 8254 Value *X = EmitScalarExpr(E->getArg(0)); 8255 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); 8256 return Builder.CreateCall(F, X); 8257 } 8258 case SystemZ::BI__builtin_s390_vflndb: { 8259 llvm::Type *ResultType = ConvertType(E->getType()); 8260 Value *X = EmitScalarExpr(E->getArg(0)); 8261 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); 8262 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); 8263 return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub"); 8264 } 8265 case SystemZ::BI__builtin_s390_vfidb: { 8266 llvm::Type *ResultType = ConvertType(E->getType()); 8267 Value *X = EmitScalarExpr(E->getArg(0)); 8268 // Constant-fold the M4 and M5 mask arguments. 8269 llvm::APSInt M4, M5; 8270 bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext()); 8271 bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext()); 8272 assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?"); 8273 (void)IsConstM4; (void)IsConstM5; 8274 // Check whether this instance of vfidb can be represented via a LLVM 8275 // standard intrinsic. We only support some combinations of M4 and M5. 8276 Intrinsic::ID ID = Intrinsic::not_intrinsic; 8277 switch (M4.getZExtValue()) { 8278 default: break; 8279 case 0: // IEEE-inexact exception allowed 8280 switch (M5.getZExtValue()) { 8281 default: break; 8282 case 0: ID = Intrinsic::rint; break; 8283 } 8284 break; 8285 case 4: // IEEE-inexact exception suppressed 8286 switch (M5.getZExtValue()) { 8287 default: break; 8288 case 0: ID = Intrinsic::nearbyint; break; 8289 case 1: ID = Intrinsic::round; break; 8290 case 5: ID = Intrinsic::trunc; break; 8291 case 6: ID = Intrinsic::ceil; break; 8292 case 7: ID = Intrinsic::floor; break; 8293 } 8294 break; 8295 } 8296 if (ID != Intrinsic::not_intrinsic) { 8297 Function *F = CGM.getIntrinsic(ID, ResultType); 8298 return Builder.CreateCall(F, X); 8299 } 8300 Function *F = CGM.getIntrinsic(Intrinsic::s390_vfidb); 8301 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4); 8302 Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5); 8303 return Builder.CreateCall(F, {X, M4Value, M5Value}); 8304 } 8305 8306 // Vector intrisincs that output the post-instruction CC value. 8307 8308 #define INTRINSIC_WITH_CC(NAME) \ 8309 case SystemZ::BI__builtin_##NAME: \ 8310 return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E) 8311 8312 INTRINSIC_WITH_CC(s390_vpkshs); 8313 INTRINSIC_WITH_CC(s390_vpksfs); 8314 INTRINSIC_WITH_CC(s390_vpksgs); 8315 8316 INTRINSIC_WITH_CC(s390_vpklshs); 8317 INTRINSIC_WITH_CC(s390_vpklsfs); 8318 INTRINSIC_WITH_CC(s390_vpklsgs); 8319 8320 INTRINSIC_WITH_CC(s390_vceqbs); 8321 INTRINSIC_WITH_CC(s390_vceqhs); 8322 INTRINSIC_WITH_CC(s390_vceqfs); 8323 INTRINSIC_WITH_CC(s390_vceqgs); 8324 8325 INTRINSIC_WITH_CC(s390_vchbs); 8326 INTRINSIC_WITH_CC(s390_vchhs); 8327 INTRINSIC_WITH_CC(s390_vchfs); 8328 INTRINSIC_WITH_CC(s390_vchgs); 8329 8330 INTRINSIC_WITH_CC(s390_vchlbs); 8331 INTRINSIC_WITH_CC(s390_vchlhs); 8332 INTRINSIC_WITH_CC(s390_vchlfs); 8333 INTRINSIC_WITH_CC(s390_vchlgs); 8334 8335 INTRINSIC_WITH_CC(s390_vfaebs); 8336 INTRINSIC_WITH_CC(s390_vfaehs); 8337 INTRINSIC_WITH_CC(s390_vfaefs); 8338 8339 INTRINSIC_WITH_CC(s390_vfaezbs); 8340 INTRINSIC_WITH_CC(s390_vfaezhs); 8341 INTRINSIC_WITH_CC(s390_vfaezfs); 8342 8343 INTRINSIC_WITH_CC(s390_vfeebs); 8344 INTRINSIC_WITH_CC(s390_vfeehs); 8345 INTRINSIC_WITH_CC(s390_vfeefs); 8346 8347 INTRINSIC_WITH_CC(s390_vfeezbs); 8348 INTRINSIC_WITH_CC(s390_vfeezhs); 8349 INTRINSIC_WITH_CC(s390_vfeezfs); 8350 8351 INTRINSIC_WITH_CC(s390_vfenebs); 8352 INTRINSIC_WITH_CC(s390_vfenehs); 8353 INTRINSIC_WITH_CC(s390_vfenefs); 8354 8355 INTRINSIC_WITH_CC(s390_vfenezbs); 8356 INTRINSIC_WITH_CC(s390_vfenezhs); 8357 INTRINSIC_WITH_CC(s390_vfenezfs); 8358 8359 INTRINSIC_WITH_CC(s390_vistrbs); 8360 INTRINSIC_WITH_CC(s390_vistrhs); 8361 INTRINSIC_WITH_CC(s390_vistrfs); 8362 8363 INTRINSIC_WITH_CC(s390_vstrcbs); 8364 INTRINSIC_WITH_CC(s390_vstrchs); 8365 INTRINSIC_WITH_CC(s390_vstrcfs); 8366 8367 INTRINSIC_WITH_CC(s390_vstrczbs); 8368 INTRINSIC_WITH_CC(s390_vstrczhs); 8369 INTRINSIC_WITH_CC(s390_vstrczfs); 8370 8371 INTRINSIC_WITH_CC(s390_vfcedbs); 8372 INTRINSIC_WITH_CC(s390_vfchdbs); 8373 INTRINSIC_WITH_CC(s390_vfchedbs); 8374 8375 INTRINSIC_WITH_CC(s390_vftcidb); 8376 8377 #undef INTRINSIC_WITH_CC 8378 8379 default: 8380 return nullptr; 8381 } 8382 } 8383 8384 Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, 8385 const CallExpr *E) { 8386 auto MakeLdg = [&](unsigned IntrinsicID) { 8387 Value *Ptr = EmitScalarExpr(E->getArg(0)); 8388 AlignmentSource AlignSource; 8389 clang::CharUnits Align = 8390 getNaturalPointeeTypeAlignment(E->getArg(0)->getType(), &AlignSource); 8391 return Builder.CreateCall( 8392 CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(), 8393 Ptr->getType()}), 8394 {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())}); 8395 }; 8396 auto MakeScopedAtomic = [&](unsigned IntrinsicID) { 8397 Value *Ptr = EmitScalarExpr(E->getArg(0)); 8398 return Builder.CreateCall( 8399 CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(), 8400 Ptr->getType()}), 8401 {Ptr, EmitScalarExpr(E->getArg(1))}); 8402 }; 8403 switch (BuiltinID) { 8404 case NVPTX::BI__nvvm_atom_add_gen_i: 8405 case NVPTX::BI__nvvm_atom_add_gen_l: 8406 case NVPTX::BI__nvvm_atom_add_gen_ll: 8407 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E); 8408 8409 case NVPTX::BI__nvvm_atom_sub_gen_i: 8410 case NVPTX::BI__nvvm_atom_sub_gen_l: 8411 case NVPTX::BI__nvvm_atom_sub_gen_ll: 8412 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E); 8413 8414 case NVPTX::BI__nvvm_atom_and_gen_i: 8415 case NVPTX::BI__nvvm_atom_and_gen_l: 8416 case NVPTX::BI__nvvm_atom_and_gen_ll: 8417 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E); 8418 8419 case NVPTX::BI__nvvm_atom_or_gen_i: 8420 case NVPTX::BI__nvvm_atom_or_gen_l: 8421 case NVPTX::BI__nvvm_atom_or_gen_ll: 8422 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E); 8423 8424 case NVPTX::BI__nvvm_atom_xor_gen_i: 8425 case NVPTX::BI__nvvm_atom_xor_gen_l: 8426 case NVPTX::BI__nvvm_atom_xor_gen_ll: 8427 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E); 8428 8429 case NVPTX::BI__nvvm_atom_xchg_gen_i: 8430 case NVPTX::BI__nvvm_atom_xchg_gen_l: 8431 case NVPTX::BI__nvvm_atom_xchg_gen_ll: 8432 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E); 8433 8434 case NVPTX::BI__nvvm_atom_max_gen_i: 8435 case NVPTX::BI__nvvm_atom_max_gen_l: 8436 case NVPTX::BI__nvvm_atom_max_gen_ll: 8437 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E); 8438 8439 case NVPTX::BI__nvvm_atom_max_gen_ui: 8440 case NVPTX::BI__nvvm_atom_max_gen_ul: 8441 case NVPTX::BI__nvvm_atom_max_gen_ull: 8442 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E); 8443 8444 case NVPTX::BI__nvvm_atom_min_gen_i: 8445 case NVPTX::BI__nvvm_atom_min_gen_l: 8446 case NVPTX::BI__nvvm_atom_min_gen_ll: 8447 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E); 8448 8449 case NVPTX::BI__nvvm_atom_min_gen_ui: 8450 case NVPTX::BI__nvvm_atom_min_gen_ul: 8451 case NVPTX::BI__nvvm_atom_min_gen_ull: 8452 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E); 8453 8454 case NVPTX::BI__nvvm_atom_cas_gen_i: 8455 case NVPTX::BI__nvvm_atom_cas_gen_l: 8456 case NVPTX::BI__nvvm_atom_cas_gen_ll: 8457 // __nvvm_atom_cas_gen_* should return the old value rather than the 8458 // success flag. 8459 return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false); 8460 8461 case NVPTX::BI__nvvm_atom_add_gen_f: { 8462 Value *Ptr = EmitScalarExpr(E->getArg(0)); 8463 Value *Val = EmitScalarExpr(E->getArg(1)); 8464 // atomicrmw only deals with integer arguments so we need to use 8465 // LLVM's nvvm_atomic_load_add_f32 intrinsic for that. 8466 Value *FnALAF32 = 8467 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType()); 8468 return Builder.CreateCall(FnALAF32, {Ptr, Val}); 8469 } 8470 8471 case NVPTX::BI__nvvm_atom_inc_gen_ui: { 8472 Value *Ptr = EmitScalarExpr(E->getArg(0)); 8473 Value *Val = EmitScalarExpr(E->getArg(1)); 8474 Value *FnALI32 = 8475 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType()); 8476 return Builder.CreateCall(FnALI32, {Ptr, Val}); 8477 } 8478 8479 case NVPTX::BI__nvvm_atom_dec_gen_ui: { 8480 Value *Ptr = EmitScalarExpr(E->getArg(0)); 8481 Value *Val = EmitScalarExpr(E->getArg(1)); 8482 Value *FnALD32 = 8483 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType()); 8484 return Builder.CreateCall(FnALD32, {Ptr, Val}); 8485 } 8486 8487 case NVPTX::BI__nvvm_ldg_c: 8488 case NVPTX::BI__nvvm_ldg_c2: 8489 case NVPTX::BI__nvvm_ldg_c4: 8490 case NVPTX::BI__nvvm_ldg_s: 8491 case NVPTX::BI__nvvm_ldg_s2: 8492 case NVPTX::BI__nvvm_ldg_s4: 8493 case NVPTX::BI__nvvm_ldg_i: 8494 case NVPTX::BI__nvvm_ldg_i2: 8495 case NVPTX::BI__nvvm_ldg_i4: 8496 case NVPTX::BI__nvvm_ldg_l: 8497 case NVPTX::BI__nvvm_ldg_ll: 8498 case NVPTX::BI__nvvm_ldg_ll2: 8499 case NVPTX::BI__nvvm_ldg_uc: 8500 case NVPTX::BI__nvvm_ldg_uc2: 8501 case NVPTX::BI__nvvm_ldg_uc4: 8502 case NVPTX::BI__nvvm_ldg_us: 8503 case NVPTX::BI__nvvm_ldg_us2: 8504 case NVPTX::BI__nvvm_ldg_us4: 8505 case NVPTX::BI__nvvm_ldg_ui: 8506 case NVPTX::BI__nvvm_ldg_ui2: 8507 case NVPTX::BI__nvvm_ldg_ui4: 8508 case NVPTX::BI__nvvm_ldg_ul: 8509 case NVPTX::BI__nvvm_ldg_ull: 8510 case NVPTX::BI__nvvm_ldg_ull2: 8511 // PTX Interoperability section 2.2: "For a vector with an even number of 8512 // elements, its alignment is set to number of elements times the alignment 8513 // of its member: n*alignof(t)." 8514 return MakeLdg(Intrinsic::nvvm_ldg_global_i); 8515 case NVPTX::BI__nvvm_ldg_f: 8516 case NVPTX::BI__nvvm_ldg_f2: 8517 case NVPTX::BI__nvvm_ldg_f4: 8518 case NVPTX::BI__nvvm_ldg_d: 8519 case NVPTX::BI__nvvm_ldg_d2: 8520 return MakeLdg(Intrinsic::nvvm_ldg_global_f); 8521 8522 case NVPTX::BI__nvvm_atom_cta_add_gen_i: 8523 case NVPTX::BI__nvvm_atom_cta_add_gen_l: 8524 case NVPTX::BI__nvvm_atom_cta_add_gen_ll: 8525 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta); 8526 case NVPTX::BI__nvvm_atom_sys_add_gen_i: 8527 case NVPTX::BI__nvvm_atom_sys_add_gen_l: 8528 case NVPTX::BI__nvvm_atom_sys_add_gen_ll: 8529 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys); 8530 case NVPTX::BI__nvvm_atom_cta_add_gen_f: 8531 case NVPTX::BI__nvvm_atom_cta_add_gen_d: 8532 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta); 8533 case NVPTX::BI__nvvm_atom_sys_add_gen_f: 8534 case NVPTX::BI__nvvm_atom_sys_add_gen_d: 8535 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys); 8536 case NVPTX::BI__nvvm_atom_cta_xchg_gen_i: 8537 case NVPTX::BI__nvvm_atom_cta_xchg_gen_l: 8538 case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll: 8539 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta); 8540 case NVPTX::BI__nvvm_atom_sys_xchg_gen_i: 8541 case NVPTX::BI__nvvm_atom_sys_xchg_gen_l: 8542 case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll: 8543 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys); 8544 case NVPTX::BI__nvvm_atom_cta_max_gen_i: 8545 case NVPTX::BI__nvvm_atom_cta_max_gen_ui: 8546 case NVPTX::BI__nvvm_atom_cta_max_gen_l: 8547 case NVPTX::BI__nvvm_atom_cta_max_gen_ul: 8548 case NVPTX::BI__nvvm_atom_cta_max_gen_ll: 8549 case NVPTX::BI__nvvm_atom_cta_max_gen_ull: 8550 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta); 8551 case NVPTX::BI__nvvm_atom_sys_max_gen_i: 8552 case NVPTX::BI__nvvm_atom_sys_max_gen_ui: 8553 case NVPTX::BI__nvvm_atom_sys_max_gen_l: 8554 case NVPTX::BI__nvvm_atom_sys_max_gen_ul: 8555 case NVPTX::BI__nvvm_atom_sys_max_gen_ll: 8556 case NVPTX::BI__nvvm_atom_sys_max_gen_ull: 8557 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys); 8558 case NVPTX::BI__nvvm_atom_cta_min_gen_i: 8559 case NVPTX::BI__nvvm_atom_cta_min_gen_ui: 8560 case NVPTX::BI__nvvm_atom_cta_min_gen_l: 8561 case NVPTX::BI__nvvm_atom_cta_min_gen_ul: 8562 case NVPTX::BI__nvvm_atom_cta_min_gen_ll: 8563 case NVPTX::BI__nvvm_atom_cta_min_gen_ull: 8564 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta); 8565 case NVPTX::BI__nvvm_atom_sys_min_gen_i: 8566 case NVPTX::BI__nvvm_atom_sys_min_gen_ui: 8567 case NVPTX::BI__nvvm_atom_sys_min_gen_l: 8568 case NVPTX::BI__nvvm_atom_sys_min_gen_ul: 8569 case NVPTX::BI__nvvm_atom_sys_min_gen_ll: 8570 case NVPTX::BI__nvvm_atom_sys_min_gen_ull: 8571 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys); 8572 case NVPTX::BI__nvvm_atom_cta_inc_gen_ui: 8573 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta); 8574 case NVPTX::BI__nvvm_atom_cta_dec_gen_ui: 8575 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta); 8576 case NVPTX::BI__nvvm_atom_sys_inc_gen_ui: 8577 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys); 8578 case NVPTX::BI__nvvm_atom_sys_dec_gen_ui: 8579 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys); 8580 case NVPTX::BI__nvvm_atom_cta_and_gen_i: 8581 case NVPTX::BI__nvvm_atom_cta_and_gen_l: 8582 case NVPTX::BI__nvvm_atom_cta_and_gen_ll: 8583 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta); 8584 case NVPTX::BI__nvvm_atom_sys_and_gen_i: 8585 case NVPTX::BI__nvvm_atom_sys_and_gen_l: 8586 case NVPTX::BI__nvvm_atom_sys_and_gen_ll: 8587 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys); 8588 case NVPTX::BI__nvvm_atom_cta_or_gen_i: 8589 case NVPTX::BI__nvvm_atom_cta_or_gen_l: 8590 case NVPTX::BI__nvvm_atom_cta_or_gen_ll: 8591 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta); 8592 case NVPTX::BI__nvvm_atom_sys_or_gen_i: 8593 case NVPTX::BI__nvvm_atom_sys_or_gen_l: 8594 case NVPTX::BI__nvvm_atom_sys_or_gen_ll: 8595 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys); 8596 case NVPTX::BI__nvvm_atom_cta_xor_gen_i: 8597 case NVPTX::BI__nvvm_atom_cta_xor_gen_l: 8598 case NVPTX::BI__nvvm_atom_cta_xor_gen_ll: 8599 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta); 8600 case NVPTX::BI__nvvm_atom_sys_xor_gen_i: 8601 case NVPTX::BI__nvvm_atom_sys_xor_gen_l: 8602 case NVPTX::BI__nvvm_atom_sys_xor_gen_ll: 8603 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys); 8604 case NVPTX::BI__nvvm_atom_cta_cas_gen_i: 8605 case NVPTX::BI__nvvm_atom_cta_cas_gen_l: 8606 case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: { 8607 Value *Ptr = EmitScalarExpr(E->getArg(0)); 8608 return Builder.CreateCall( 8609 CGM.getIntrinsic( 8610 Intrinsic::nvvm_atomic_cas_gen_i_cta, 8611 {Ptr->getType()->getPointerElementType(), Ptr->getType()}), 8612 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))}); 8613 } 8614 case NVPTX::BI__nvvm_atom_sys_cas_gen_i: 8615 case NVPTX::BI__nvvm_atom_sys_cas_gen_l: 8616 case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: { 8617 Value *Ptr = EmitScalarExpr(E->getArg(0)); 8618 return Builder.CreateCall( 8619 CGM.getIntrinsic( 8620 Intrinsic::nvvm_atomic_cas_gen_i_sys, 8621 {Ptr->getType()->getPointerElementType(), Ptr->getType()}), 8622 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))}); 8623 } 8624 default: 8625 return nullptr; 8626 } 8627 } 8628 8629 Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, 8630 const CallExpr *E) { 8631 switch (BuiltinID) { 8632 case WebAssembly::BI__builtin_wasm_current_memory: { 8633 llvm::Type *ResultType = ConvertType(E->getType()); 8634 Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType); 8635 return Builder.CreateCall(Callee); 8636 } 8637 case WebAssembly::BI__builtin_wasm_grow_memory: { 8638 Value *X = EmitScalarExpr(E->getArg(0)); 8639 Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_grow_memory, X->getType()); 8640 return Builder.CreateCall(Callee, X); 8641 } 8642 8643 default: 8644 return nullptr; 8645 } 8646 } 8647