1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit Builtin calls as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCXXABI.h" 15 #include "CGObjCRuntime.h" 16 #include "CGOpenCLRuntime.h" 17 #include "CodeGenFunction.h" 18 #include "CodeGenModule.h" 19 #include "ConstantEmitter.h" 20 #include "TargetInfo.h" 21 #include "clang/AST/ASTContext.h" 22 #include "clang/AST/Decl.h" 23 #include "clang/Analysis/Analyses/OSLog.h" 24 #include "clang/Basic/TargetBuiltins.h" 25 #include "clang/Basic/TargetInfo.h" 26 #include "clang/CodeGen/CGFunctionInfo.h" 27 #include "llvm/ADT/StringExtras.h" 28 #include "llvm/IR/CallSite.h" 29 #include "llvm/IR/DataLayout.h" 30 #include "llvm/IR/InlineAsm.h" 31 #include "llvm/IR/Intrinsics.h" 32 #include "llvm/IR/MDBuilder.h" 33 #include "llvm/Support/ConvertUTF.h" 34 #include "llvm/Support/ScopedPrinter.h" 35 #include "llvm/Support/TargetParser.h" 36 #include <sstream> 37 38 using namespace clang; 39 using namespace CodeGen; 40 using namespace llvm; 41 42 static 43 int64_t clamp(int64_t Value, int64_t Low, int64_t High) { 44 return std::min(High, std::max(Low, Value)); 45 } 46 47 /// getBuiltinLibFunction - Given a builtin id for a function like 48 /// "__builtin_fabsf", return a Function* for "fabsf". 49 llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD, 50 unsigned BuiltinID) { 51 assert(Context.BuiltinInfo.isLibFunction(BuiltinID)); 52 53 // Get the name, skip over the __builtin_ prefix (if necessary). 54 StringRef Name; 55 GlobalDecl D(FD); 56 57 // If the builtin has been declared explicitly with an assembler label, 58 // use the mangled name. This differs from the plain label on platforms 59 // that prefix labels. 60 if (FD->hasAttr<AsmLabelAttr>()) 61 Name = getMangledName(D); 62 else 63 Name = Context.BuiltinInfo.getName(BuiltinID) + 10; 64 65 llvm::FunctionType *Ty = 66 cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType())); 67 68 return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false); 69 } 70 71 /// Emit the conversions required to turn the given value into an 72 /// integer of the given size. 73 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V, 74 QualType T, llvm::IntegerType *IntType) { 75 V = CGF.EmitToMemory(V, T); 76 77 if (V->getType()->isPointerTy()) 78 return CGF.Builder.CreatePtrToInt(V, IntType); 79 80 assert(V->getType() == IntType); 81 return V; 82 } 83 84 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, 85 QualType T, llvm::Type *ResultType) { 86 V = CGF.EmitFromMemory(V, T); 87 88 if (ResultType->isPointerTy()) 89 return CGF.Builder.CreateIntToPtr(V, ResultType); 90 91 assert(V->getType() == ResultType); 92 return V; 93 } 94 95 /// Utility to insert an atomic instruction based on Instrinsic::ID 96 /// and the expression node. 97 static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF, 98 llvm::AtomicRMWInst::BinOp Kind, 99 const CallExpr *E) { 100 QualType T = E->getType(); 101 assert(E->getArg(0)->getType()->isPointerType()); 102 assert(CGF.getContext().hasSameUnqualifiedType(T, 103 E->getArg(0)->getType()->getPointeeType())); 104 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); 105 106 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 107 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 108 109 llvm::IntegerType *IntType = 110 llvm::IntegerType::get(CGF.getLLVMContext(), 111 CGF.getContext().getTypeSize(T)); 112 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 113 114 llvm::Value *Args[2]; 115 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 116 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 117 llvm::Type *ValueType = Args[1]->getType(); 118 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 119 120 llvm::Value *Result = CGF.Builder.CreateAtomicRMW( 121 Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent); 122 return EmitFromInt(CGF, Result, T, ValueType); 123 } 124 125 static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) { 126 Value *Val = CGF.EmitScalarExpr(E->getArg(0)); 127 Value *Address = CGF.EmitScalarExpr(E->getArg(1)); 128 129 // Convert the type of the pointer to a pointer to the stored type. 130 Val = CGF.EmitToMemory(Val, E->getArg(0)->getType()); 131 Value *BC = CGF.Builder.CreateBitCast( 132 Address, llvm::PointerType::getUnqual(Val->getType()), "cast"); 133 LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType()); 134 LV.setNontemporal(true); 135 CGF.EmitStoreOfScalar(Val, LV, false); 136 return nullptr; 137 } 138 139 static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) { 140 Value *Address = CGF.EmitScalarExpr(E->getArg(0)); 141 142 LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType()); 143 LV.setNontemporal(true); 144 return CGF.EmitLoadOfScalar(LV, E->getExprLoc()); 145 } 146 147 static RValue EmitBinaryAtomic(CodeGenFunction &CGF, 148 llvm::AtomicRMWInst::BinOp Kind, 149 const CallExpr *E) { 150 return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E)); 151 } 152 153 /// Utility to insert an atomic instruction based Instrinsic::ID and 154 /// the expression node, where the return value is the result of the 155 /// operation. 156 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, 157 llvm::AtomicRMWInst::BinOp Kind, 158 const CallExpr *E, 159 Instruction::BinaryOps Op, 160 bool Invert = false) { 161 QualType T = E->getType(); 162 assert(E->getArg(0)->getType()->isPointerType()); 163 assert(CGF.getContext().hasSameUnqualifiedType(T, 164 E->getArg(0)->getType()->getPointeeType())); 165 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); 166 167 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 168 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 169 170 llvm::IntegerType *IntType = 171 llvm::IntegerType::get(CGF.getLLVMContext(), 172 CGF.getContext().getTypeSize(T)); 173 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 174 175 llvm::Value *Args[2]; 176 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 177 llvm::Type *ValueType = Args[1]->getType(); 178 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 179 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 180 181 llvm::Value *Result = CGF.Builder.CreateAtomicRMW( 182 Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent); 183 Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]); 184 if (Invert) 185 Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result, 186 llvm::ConstantInt::get(IntType, -1)); 187 Result = EmitFromInt(CGF, Result, T, ValueType); 188 return RValue::get(Result); 189 } 190 191 /// @brief Utility to insert an atomic cmpxchg instruction. 192 /// 193 /// @param CGF The current codegen function. 194 /// @param E Builtin call expression to convert to cmpxchg. 195 /// arg0 - address to operate on 196 /// arg1 - value to compare with 197 /// arg2 - new value 198 /// @param ReturnBool Specifies whether to return success flag of 199 /// cmpxchg result or the old value. 200 /// 201 /// @returns result of cmpxchg, according to ReturnBool 202 static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, 203 bool ReturnBool) { 204 QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType(); 205 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 206 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 207 208 llvm::IntegerType *IntType = llvm::IntegerType::get( 209 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T)); 210 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 211 212 Value *Args[3]; 213 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 214 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 215 llvm::Type *ValueType = Args[1]->getType(); 216 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 217 Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType); 218 219 Value *Pair = CGF.Builder.CreateAtomicCmpXchg( 220 Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent, 221 llvm::AtomicOrdering::SequentiallyConsistent); 222 if (ReturnBool) 223 // Extract boolean success flag and zext it to int. 224 return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1), 225 CGF.ConvertType(E->getType())); 226 else 227 // Extract old value and emit it using the same type as compare value. 228 return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T, 229 ValueType); 230 } 231 232 // Emit a simple mangled intrinsic that has 1 argument and a return type 233 // matching the argument type. 234 static Value *emitUnaryBuiltin(CodeGenFunction &CGF, 235 const CallExpr *E, 236 unsigned IntrinsicID) { 237 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 238 239 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 240 return CGF.Builder.CreateCall(F, Src0); 241 } 242 243 // Emit an intrinsic that has 2 operands of the same type as its result. 244 static Value *emitBinaryBuiltin(CodeGenFunction &CGF, 245 const CallExpr *E, 246 unsigned IntrinsicID) { 247 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 248 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 249 250 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 251 return CGF.Builder.CreateCall(F, { Src0, Src1 }); 252 } 253 254 // Emit an intrinsic that has 3 operands of the same type as its result. 255 static Value *emitTernaryBuiltin(CodeGenFunction &CGF, 256 const CallExpr *E, 257 unsigned IntrinsicID) { 258 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 259 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 260 llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); 261 262 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 263 return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 }); 264 } 265 266 // Emit an intrinsic that has 1 float or double operand, and 1 integer. 267 static Value *emitFPIntBuiltin(CodeGenFunction &CGF, 268 const CallExpr *E, 269 unsigned IntrinsicID) { 270 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 271 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 272 273 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 274 return CGF.Builder.CreateCall(F, {Src0, Src1}); 275 } 276 277 /// EmitFAbs - Emit a call to @llvm.fabs(). 278 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) { 279 Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType()); 280 llvm::CallInst *Call = CGF.Builder.CreateCall(F, V); 281 Call->setDoesNotAccessMemory(); 282 return Call; 283 } 284 285 /// Emit the computation of the sign bit for a floating point value. Returns 286 /// the i1 sign bit value. 287 static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) { 288 LLVMContext &C = CGF.CGM.getLLVMContext(); 289 290 llvm::Type *Ty = V->getType(); 291 int Width = Ty->getPrimitiveSizeInBits(); 292 llvm::Type *IntTy = llvm::IntegerType::get(C, Width); 293 V = CGF.Builder.CreateBitCast(V, IntTy); 294 if (Ty->isPPC_FP128Ty()) { 295 // We want the sign bit of the higher-order double. The bitcast we just 296 // did works as if the double-double was stored to memory and then 297 // read as an i128. The "store" will put the higher-order double in the 298 // lower address in both little- and big-Endian modes, but the "load" 299 // will treat those bits as a different part of the i128: the low bits in 300 // little-Endian, the high bits in big-Endian. Therefore, on big-Endian 301 // we need to shift the high bits down to the low before truncating. 302 Width >>= 1; 303 if (CGF.getTarget().isBigEndian()) { 304 Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width); 305 V = CGF.Builder.CreateLShr(V, ShiftCst); 306 } 307 // We are truncating value in order to extract the higher-order 308 // double, which we will be using to extract the sign from. 309 IntTy = llvm::IntegerType::get(C, Width); 310 V = CGF.Builder.CreateTrunc(V, IntTy); 311 } 312 Value *Zero = llvm::Constant::getNullValue(IntTy); 313 return CGF.Builder.CreateICmpSLT(V, Zero); 314 } 315 316 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD, 317 const CallExpr *E, llvm::Constant *calleeValue) { 318 CGCallee callee = CGCallee::forDirect(calleeValue, FD); 319 return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot()); 320 } 321 322 /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.* 323 /// depending on IntrinsicID. 324 /// 325 /// \arg CGF The current codegen function. 326 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate. 327 /// \arg X The first argument to the llvm.*.with.overflow.*. 328 /// \arg Y The second argument to the llvm.*.with.overflow.*. 329 /// \arg Carry The carry returned by the llvm.*.with.overflow.*. 330 /// \returns The result (i.e. sum/product) returned by the intrinsic. 331 static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF, 332 const llvm::Intrinsic::ID IntrinsicID, 333 llvm::Value *X, llvm::Value *Y, 334 llvm::Value *&Carry) { 335 // Make sure we have integers of the same width. 336 assert(X->getType() == Y->getType() && 337 "Arguments must be the same type. (Did you forget to make sure both " 338 "arguments have the same integer width?)"); 339 340 llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType()); 341 llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y}); 342 Carry = CGF.Builder.CreateExtractValue(Tmp, 1); 343 return CGF.Builder.CreateExtractValue(Tmp, 0); 344 } 345 346 static Value *emitRangedBuiltin(CodeGenFunction &CGF, 347 unsigned IntrinsicID, 348 int low, int high) { 349 llvm::MDBuilder MDHelper(CGF.getLLVMContext()); 350 llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high)); 351 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {}); 352 llvm::Instruction *Call = CGF.Builder.CreateCall(F); 353 Call->setMetadata(llvm::LLVMContext::MD_range, RNode); 354 return Call; 355 } 356 357 namespace { 358 struct WidthAndSignedness { 359 unsigned Width; 360 bool Signed; 361 }; 362 } 363 364 static WidthAndSignedness 365 getIntegerWidthAndSignedness(const clang::ASTContext &context, 366 const clang::QualType Type) { 367 assert(Type->isIntegerType() && "Given type is not an integer."); 368 unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width; 369 bool Signed = Type->isSignedIntegerType(); 370 return {Width, Signed}; 371 } 372 373 // Given one or more integer types, this function produces an integer type that 374 // encompasses them: any value in one of the given types could be expressed in 375 // the encompassing type. 376 static struct WidthAndSignedness 377 EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) { 378 assert(Types.size() > 0 && "Empty list of types."); 379 380 // If any of the given types is signed, we must return a signed type. 381 bool Signed = false; 382 for (const auto &Type : Types) { 383 Signed |= Type.Signed; 384 } 385 386 // The encompassing type must have a width greater than or equal to the width 387 // of the specified types. Aditionally, if the encompassing type is signed, 388 // its width must be strictly greater than the width of any unsigned types 389 // given. 390 unsigned Width = 0; 391 for (const auto &Type : Types) { 392 unsigned MinWidth = Type.Width + (Signed && !Type.Signed); 393 if (Width < MinWidth) { 394 Width = MinWidth; 395 } 396 } 397 398 return {Width, Signed}; 399 } 400 401 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) { 402 llvm::Type *DestType = Int8PtrTy; 403 if (ArgValue->getType() != DestType) 404 ArgValue = 405 Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data()); 406 407 Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend; 408 return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue); 409 } 410 411 /// Checks if using the result of __builtin_object_size(p, @p From) in place of 412 /// __builtin_object_size(p, @p To) is correct 413 static bool areBOSTypesCompatible(int From, int To) { 414 // Note: Our __builtin_object_size implementation currently treats Type=0 and 415 // Type=2 identically. Encoding this implementation detail here may make 416 // improving __builtin_object_size difficult in the future, so it's omitted. 417 return From == To || (From == 0 && To == 1) || (From == 3 && To == 2); 418 } 419 420 static llvm::Value * 421 getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) { 422 return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true); 423 } 424 425 llvm::Value * 426 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type, 427 llvm::IntegerType *ResType, 428 llvm::Value *EmittedE) { 429 uint64_t ObjectSize; 430 if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type)) 431 return emitBuiltinObjectSize(E, Type, ResType, EmittedE); 432 return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true); 433 } 434 435 /// Returns a Value corresponding to the size of the given expression. 436 /// This Value may be either of the following: 437 /// - A llvm::Argument (if E is a param with the pass_object_size attribute on 438 /// it) 439 /// - A call to the @llvm.objectsize intrinsic 440 /// 441 /// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null 442 /// and we wouldn't otherwise try to reference a pass_object_size parameter, 443 /// we'll call @llvm.objectsize on EmittedE, rather than emitting E. 444 llvm::Value * 445 CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type, 446 llvm::IntegerType *ResType, 447 llvm::Value *EmittedE) { 448 // We need to reference an argument if the pointer is a parameter with the 449 // pass_object_size attribute. 450 if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) { 451 auto *Param = dyn_cast<ParmVarDecl>(D->getDecl()); 452 auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>(); 453 if (Param != nullptr && PS != nullptr && 454 areBOSTypesCompatible(PS->getType(), Type)) { 455 auto Iter = SizeArguments.find(Param); 456 assert(Iter != SizeArguments.end()); 457 458 const ImplicitParamDecl *D = Iter->second; 459 auto DIter = LocalDeclMap.find(D); 460 assert(DIter != LocalDeclMap.end()); 461 462 return EmitLoadOfScalar(DIter->second, /*volatile=*/false, 463 getContext().getSizeType(), E->getLocStart()); 464 } 465 } 466 467 // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't 468 // evaluate E for side-effects. In either case, we shouldn't lower to 469 // @llvm.objectsize. 470 if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext()))) 471 return getDefaultBuiltinObjectSizeResult(Type, ResType); 472 473 Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E); 474 assert(Ptr->getType()->isPointerTy() && 475 "Non-pointer passed to __builtin_object_size?"); 476 477 Value *F = CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()}); 478 479 // LLVM only supports 0 and 2, make sure that we pass along that as a boolean. 480 Value *Min = Builder.getInt1((Type & 2) != 0); 481 // For GCC compatability, __builtin_object_size treat NULL as unknown size. 482 Value *NullIsUnknown = Builder.getTrue(); 483 return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown}); 484 } 485 486 // Many of MSVC builtins are on both x64 and ARM; to avoid repeating code, we 487 // handle them here. 488 enum class CodeGenFunction::MSVCIntrin { 489 _BitScanForward, 490 _BitScanReverse, 491 _InterlockedAnd, 492 _InterlockedDecrement, 493 _InterlockedExchange, 494 _InterlockedExchangeAdd, 495 _InterlockedExchangeSub, 496 _InterlockedIncrement, 497 _InterlockedOr, 498 _InterlockedXor, 499 _interlockedbittestandset, 500 __fastfail, 501 }; 502 503 Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, 504 const CallExpr *E) { 505 switch (BuiltinID) { 506 case MSVCIntrin::_BitScanForward: 507 case MSVCIntrin::_BitScanReverse: { 508 Value *ArgValue = EmitScalarExpr(E->getArg(1)); 509 510 llvm::Type *ArgType = ArgValue->getType(); 511 llvm::Type *IndexType = 512 EmitScalarExpr(E->getArg(0))->getType()->getPointerElementType(); 513 llvm::Type *ResultType = ConvertType(E->getType()); 514 515 Value *ArgZero = llvm::Constant::getNullValue(ArgType); 516 Value *ResZero = llvm::Constant::getNullValue(ResultType); 517 Value *ResOne = llvm::ConstantInt::get(ResultType, 1); 518 519 BasicBlock *Begin = Builder.GetInsertBlock(); 520 BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn); 521 Builder.SetInsertPoint(End); 522 PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result"); 523 524 Builder.SetInsertPoint(Begin); 525 Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero); 526 BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn); 527 Builder.CreateCondBr(IsZero, End, NotZero); 528 Result->addIncoming(ResZero, Begin); 529 530 Builder.SetInsertPoint(NotZero); 531 Address IndexAddress = EmitPointerWithAlignment(E->getArg(0)); 532 533 if (BuiltinID == MSVCIntrin::_BitScanForward) { 534 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 535 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()}); 536 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false); 537 Builder.CreateStore(ZeroCount, IndexAddress, false); 538 } else { 539 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth(); 540 Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1); 541 542 Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); 543 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()}); 544 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false); 545 Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount); 546 Builder.CreateStore(Index, IndexAddress, false); 547 } 548 Builder.CreateBr(End); 549 Result->addIncoming(ResOne, NotZero); 550 551 Builder.SetInsertPoint(End); 552 return Result; 553 } 554 case MSVCIntrin::_InterlockedAnd: 555 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E); 556 case MSVCIntrin::_InterlockedExchange: 557 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E); 558 case MSVCIntrin::_InterlockedExchangeAdd: 559 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E); 560 case MSVCIntrin::_InterlockedExchangeSub: 561 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E); 562 case MSVCIntrin::_InterlockedOr: 563 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E); 564 case MSVCIntrin::_InterlockedXor: 565 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E); 566 567 case MSVCIntrin::_interlockedbittestandset: { 568 llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); 569 llvm::Value *Bit = EmitScalarExpr(E->getArg(1)); 570 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 571 AtomicRMWInst::Or, Addr, 572 Builder.CreateShl(ConstantInt::get(Bit->getType(), 1), Bit), 573 llvm::AtomicOrdering::SequentiallyConsistent); 574 // Shift the relevant bit to the least significant position, truncate to 575 // the result type, and test the low bit. 576 llvm::Value *Shifted = Builder.CreateLShr(RMWI, Bit); 577 llvm::Value *Truncated = 578 Builder.CreateTrunc(Shifted, ConvertType(E->getType())); 579 return Builder.CreateAnd(Truncated, 580 ConstantInt::get(Truncated->getType(), 1)); 581 } 582 583 case MSVCIntrin::_InterlockedDecrement: { 584 llvm::Type *IntTy = ConvertType(E->getType()); 585 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 586 AtomicRMWInst::Sub, 587 EmitScalarExpr(E->getArg(0)), 588 ConstantInt::get(IntTy, 1), 589 llvm::AtomicOrdering::SequentiallyConsistent); 590 return Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1)); 591 } 592 case MSVCIntrin::_InterlockedIncrement: { 593 llvm::Type *IntTy = ConvertType(E->getType()); 594 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 595 AtomicRMWInst::Add, 596 EmitScalarExpr(E->getArg(0)), 597 ConstantInt::get(IntTy, 1), 598 llvm::AtomicOrdering::SequentiallyConsistent); 599 return Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1)); 600 } 601 602 case MSVCIntrin::__fastfail: { 603 // Request immediate process termination from the kernel. The instruction 604 // sequences to do this are documented on MSDN: 605 // https://msdn.microsoft.com/en-us/library/dn774154.aspx 606 llvm::Triple::ArchType ISA = getTarget().getTriple().getArch(); 607 StringRef Asm, Constraints; 608 switch (ISA) { 609 default: 610 ErrorUnsupported(E, "__fastfail call for this architecture"); 611 break; 612 case llvm::Triple::x86: 613 case llvm::Triple::x86_64: 614 Asm = "int $$0x29"; 615 Constraints = "{cx}"; 616 break; 617 case llvm::Triple::thumb: 618 Asm = "udf #251"; 619 Constraints = "{r0}"; 620 break; 621 } 622 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false); 623 llvm::InlineAsm *IA = 624 llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true); 625 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get( 626 getLLVMContext(), llvm::AttributeList::FunctionIndex, 627 llvm::Attribute::NoReturn); 628 CallSite CS = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0))); 629 CS.setAttributes(NoReturnAttr); 630 return CS.getInstruction(); 631 } 632 } 633 llvm_unreachable("Incorrect MSVC intrinsic!"); 634 } 635 636 namespace { 637 // ARC cleanup for __builtin_os_log_format 638 struct CallObjCArcUse final : EHScopeStack::Cleanup { 639 CallObjCArcUse(llvm::Value *object) : object(object) {} 640 llvm::Value *object; 641 642 void Emit(CodeGenFunction &CGF, Flags flags) override { 643 CGF.EmitARCIntrinsicUse(object); 644 } 645 }; 646 } 647 648 Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E, 649 BuiltinCheckKind Kind) { 650 assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero) 651 && "Unsupported builtin check kind"); 652 653 Value *ArgValue = EmitScalarExpr(E); 654 if (!SanOpts.has(SanitizerKind::Builtin) || !getTarget().isCLZForZeroUndef()) 655 return ArgValue; 656 657 SanitizerScope SanScope(this); 658 Value *Cond = Builder.CreateICmpNE( 659 ArgValue, llvm::Constant::getNullValue(ArgValue->getType())); 660 EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin), 661 SanitizerHandler::InvalidBuiltin, 662 {EmitCheckSourceLocation(E->getExprLoc()), 663 llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)}, 664 None); 665 return ArgValue; 666 } 667 668 /// Get the argument type for arguments to os_log_helper. 669 static CanQualType getOSLogArgType(ASTContext &C, int Size) { 670 QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false); 671 return C.getCanonicalType(UnsignedTy); 672 } 673 674 llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction( 675 const analyze_os_log::OSLogBufferLayout &Layout, 676 CharUnits BufferAlignment) { 677 ASTContext &Ctx = getContext(); 678 679 llvm::SmallString<64> Name; 680 { 681 raw_svector_ostream OS(Name); 682 OS << "__os_log_helper"; 683 OS << "_" << BufferAlignment.getQuantity(); 684 OS << "_" << int(Layout.getSummaryByte()); 685 OS << "_" << int(Layout.getNumArgsByte()); 686 for (const auto &Item : Layout.Items) 687 OS << "_" << int(Item.getSizeByte()) << "_" 688 << int(Item.getDescriptorByte()); 689 } 690 691 if (llvm::Function *F = CGM.getModule().getFunction(Name)) 692 return F; 693 694 llvm::SmallVector<ImplicitParamDecl, 4> Params; 695 Params.emplace_back(Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"), 696 Ctx.VoidPtrTy, ImplicitParamDecl::Other); 697 698 for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) { 699 char Size = Layout.Items[I].getSizeByte(); 700 if (!Size) 701 continue; 702 703 Params.emplace_back( 704 Ctx, nullptr, SourceLocation(), 705 &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), 706 getOSLogArgType(Ctx, Size), ImplicitParamDecl::Other); 707 } 708 709 FunctionArgList Args; 710 for (auto &P : Params) 711 Args.push_back(&P); 712 713 // The helper function has linkonce_odr linkage to enable the linker to merge 714 // identical functions. To ensure the merging always happens, 'noinline' is 715 // attached to the function when compiling with -Oz. 716 const CGFunctionInfo &FI = 717 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args); 718 llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI); 719 llvm::Function *Fn = llvm::Function::Create( 720 FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule()); 721 Fn->setVisibility(llvm::GlobalValue::HiddenVisibility); 722 CGM.SetLLVMFunctionAttributes(nullptr, FI, Fn); 723 CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn); 724 725 // Attach 'noinline' at -Oz. 726 if (CGM.getCodeGenOpts().OptimizeSize == 2) 727 Fn->addFnAttr(llvm::Attribute::NoInline); 728 729 auto NL = ApplyDebugLocation::CreateEmpty(*this); 730 IdentifierInfo *II = &Ctx.Idents.get(Name); 731 FunctionDecl *FD = FunctionDecl::Create( 732 Ctx, Ctx.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II, 733 Ctx.VoidTy, nullptr, SC_PrivateExtern, false, false); 734 735 StartFunction(FD, Ctx.VoidTy, Fn, FI, Args); 736 737 // Create a scope with an artificial location for the body of this function. 738 auto AL = ApplyDebugLocation::CreateArtificial(*this); 739 740 CharUnits Offset; 741 Address BufAddr(Builder.CreateLoad(GetAddrOfLocalVar(&Params[0]), "buf"), 742 BufferAlignment); 743 Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()), 744 Builder.CreateConstByteGEP(BufAddr, Offset++, "summary")); 745 Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()), 746 Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs")); 747 748 unsigned I = 1; 749 for (const auto &Item : Layout.Items) { 750 Builder.CreateStore( 751 Builder.getInt8(Item.getDescriptorByte()), 752 Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor")); 753 Builder.CreateStore( 754 Builder.getInt8(Item.getSizeByte()), 755 Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize")); 756 757 CharUnits Size = Item.size(); 758 if (!Size.getQuantity()) 759 continue; 760 761 Address Arg = GetAddrOfLocalVar(&Params[I]); 762 Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData"); 763 Addr = Builder.CreateBitCast(Addr, Arg.getPointer()->getType(), 764 "argDataCast"); 765 Builder.CreateStore(Builder.CreateLoad(Arg), Addr); 766 Offset += Size; 767 ++I; 768 } 769 770 FinishFunction(); 771 772 return Fn; 773 } 774 775 RValue CodeGenFunction::emitBuiltinOSLogFormat(const CallExpr &E) { 776 assert(E.getNumArgs() >= 2 && 777 "__builtin_os_log_format takes at least 2 arguments"); 778 ASTContext &Ctx = getContext(); 779 analyze_os_log::OSLogBufferLayout Layout; 780 analyze_os_log::computeOSLogBufferLayout(Ctx, &E, Layout); 781 Address BufAddr = EmitPointerWithAlignment(E.getArg(0)); 782 llvm::SmallVector<llvm::Value *, 4> RetainableOperands; 783 784 // Ignore argument 1, the format string. It is not currently used. 785 CallArgList Args; 786 Args.add(RValue::get(BufAddr.getPointer()), Ctx.VoidPtrTy); 787 788 for (const auto &Item : Layout.Items) { 789 int Size = Item.getSizeByte(); 790 if (!Size) 791 continue; 792 793 llvm::Value *ArgVal; 794 795 if (const Expr *TheExpr = Item.getExpr()) { 796 ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false); 797 798 // Check if this is a retainable type. 799 if (TheExpr->getType()->isObjCRetainableType()) { 800 assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar && 801 "Only scalar can be a ObjC retainable type"); 802 // Check if the object is constant, if not, save it in 803 // RetainableOperands. 804 if (!isa<Constant>(ArgVal)) 805 RetainableOperands.push_back(ArgVal); 806 } 807 } else { 808 ArgVal = Builder.getInt32(Item.getConstValue().getQuantity()); 809 } 810 811 unsigned ArgValSize = 812 CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType()); 813 llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(), 814 ArgValSize); 815 ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy); 816 CanQualType ArgTy = getOSLogArgType(Ctx, Size); 817 // If ArgVal has type x86_fp80, zero-extend ArgVal. 818 ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy)); 819 Args.add(RValue::get(ArgVal), ArgTy); 820 } 821 822 const CGFunctionInfo &FI = 823 CGM.getTypes().arrangeBuiltinFunctionCall(Ctx.VoidTy, Args); 824 llvm::Function *F = CodeGenFunction(CGM).generateBuiltinOSLogHelperFunction( 825 Layout, BufAddr.getAlignment()); 826 EmitCall(FI, CGCallee::forDirect(F), ReturnValueSlot(), Args); 827 828 // Push a clang.arc.use cleanup for each object in RetainableOperands. The 829 // cleanup will cause the use to appear after the final log call, keeping 830 // the object valid while it’s held in the log buffer. Note that if there’s 831 // a release cleanup on the object, it will already be active; since 832 // cleanups are emitted in reverse order, the use will occur before the 833 // object is released. 834 if (!RetainableOperands.empty() && getLangOpts().ObjCAutoRefCount && 835 CGM.getCodeGenOpts().OptimizationLevel != 0) 836 for (llvm::Value *Object : RetainableOperands) 837 pushFullExprCleanup<CallObjCArcUse>(getARCCleanupKind(), Object); 838 839 return RValue::get(BufAddr.getPointer()); 840 } 841 842 /// Determine if a binop is a checked mixed-sign multiply we can specialize. 843 static bool isSpecialMixedSignMultiply(unsigned BuiltinID, 844 WidthAndSignedness Op1Info, 845 WidthAndSignedness Op2Info, 846 WidthAndSignedness ResultInfo) { 847 return BuiltinID == Builtin::BI__builtin_mul_overflow && 848 Op1Info.Width == Op2Info.Width && Op1Info.Width >= ResultInfo.Width && 849 Op1Info.Signed != Op2Info.Signed; 850 } 851 852 /// Emit a checked mixed-sign multiply. This is a cheaper specialization of 853 /// the generic checked-binop irgen. 854 static RValue 855 EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1, 856 WidthAndSignedness Op1Info, const clang::Expr *Op2, 857 WidthAndSignedness Op2Info, 858 const clang::Expr *ResultArg, QualType ResultQTy, 859 WidthAndSignedness ResultInfo) { 860 assert(isSpecialMixedSignMultiply(Builtin::BI__builtin_mul_overflow, Op1Info, 861 Op2Info, ResultInfo) && 862 "Not a mixed-sign multipliction we can specialize"); 863 864 // Emit the signed and unsigned operands. 865 const clang::Expr *SignedOp = Op1Info.Signed ? Op1 : Op2; 866 const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1; 867 llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp); 868 llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp); 869 870 llvm::Type *OpTy = Signed->getType(); 871 llvm::Value *Zero = llvm::Constant::getNullValue(OpTy); 872 Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg); 873 llvm::Type *ResTy = ResultPtr.getElementType(); 874 875 // Take the absolute value of the signed operand. 876 llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero); 877 llvm::Value *AbsOfNegative = CGF.Builder.CreateSub(Zero, Signed); 878 llvm::Value *AbsSigned = 879 CGF.Builder.CreateSelect(IsNegative, AbsOfNegative, Signed); 880 881 // Perform a checked unsigned multiplication. 882 llvm::Value *UnsignedOverflow; 883 llvm::Value *UnsignedResult = 884 EmitOverflowIntrinsic(CGF, llvm::Intrinsic::umul_with_overflow, AbsSigned, 885 Unsigned, UnsignedOverflow); 886 887 llvm::Value *Overflow, *Result; 888 if (ResultInfo.Signed) { 889 // Signed overflow occurs if the result is greater than INT_MAX or lesser 890 // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative). 891 auto IntMax = llvm::APInt::getSignedMaxValue(ResultInfo.Width) 892 .zextOrSelf(Op1Info.Width); 893 llvm::Value *MaxResult = 894 CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax), 895 CGF.Builder.CreateZExt(IsNegative, OpTy)); 896 llvm::Value *SignedOverflow = 897 CGF.Builder.CreateICmpUGT(UnsignedResult, MaxResult); 898 Overflow = CGF.Builder.CreateOr(UnsignedOverflow, SignedOverflow); 899 900 // Prepare the signed result (possibly by negating it). 901 llvm::Value *NegativeResult = CGF.Builder.CreateNeg(UnsignedResult); 902 llvm::Value *SignedResult = 903 CGF.Builder.CreateSelect(IsNegative, NegativeResult, UnsignedResult); 904 Result = CGF.Builder.CreateTrunc(SignedResult, ResTy); 905 } else { 906 // Unsigned overflow occurs if the result is < 0 or greater than UINT_MAX. 907 llvm::Value *Underflow = CGF.Builder.CreateAnd( 908 IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult)); 909 Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow); 910 if (ResultInfo.Width < Op1Info.Width) { 911 auto IntMax = 912 llvm::APInt::getMaxValue(ResultInfo.Width).zext(Op1Info.Width); 913 llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT( 914 UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax)); 915 Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow); 916 } 917 918 // Negate the product if it would be negative in infinite precision. 919 Result = CGF.Builder.CreateSelect( 920 IsNegative, CGF.Builder.CreateNeg(UnsignedResult), UnsignedResult); 921 922 Result = CGF.Builder.CreateTrunc(Result, ResTy); 923 } 924 assert(Overflow && Result && "Missing overflow or result"); 925 926 bool isVolatile = 927 ResultArg->getType()->getPointeeType().isVolatileQualified(); 928 CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr, 929 isVolatile); 930 return RValue::get(Overflow); 931 } 932 933 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, 934 unsigned BuiltinID, const CallExpr *E, 935 ReturnValueSlot ReturnValue) { 936 // See if we can constant fold this builtin. If so, don't emit it at all. 937 Expr::EvalResult Result; 938 if (E->EvaluateAsRValue(Result, CGM.getContext()) && 939 !Result.hasSideEffects()) { 940 if (Result.Val.isInt()) 941 return RValue::get(llvm::ConstantInt::get(getLLVMContext(), 942 Result.Val.getInt())); 943 if (Result.Val.isFloat()) 944 return RValue::get(llvm::ConstantFP::get(getLLVMContext(), 945 Result.Val.getFloat())); 946 } 947 948 // There are LLVM math intrinsics/instructions corresponding to math library 949 // functions except the LLVM op will never set errno while the math library 950 // might. Also, math builtins have the same semantics as their math library 951 // twins. Thus, we can transform math library and builtin calls to their 952 // LLVM counterparts if the call is marked 'const' (known to never set errno). 953 if (FD->hasAttr<ConstAttr>()) { 954 switch (BuiltinID) { 955 case Builtin::BIceil: 956 case Builtin::BIceilf: 957 case Builtin::BIceill: 958 case Builtin::BI__builtin_ceil: 959 case Builtin::BI__builtin_ceilf: 960 case Builtin::BI__builtin_ceill: 961 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil)); 962 963 case Builtin::BIcopysign: 964 case Builtin::BIcopysignf: 965 case Builtin::BIcopysignl: 966 case Builtin::BI__builtin_copysign: 967 case Builtin::BI__builtin_copysignf: 968 case Builtin::BI__builtin_copysignl: 969 return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign)); 970 971 case Builtin::BIcos: 972 case Builtin::BIcosf: 973 case Builtin::BIcosl: 974 case Builtin::BI__builtin_cos: 975 case Builtin::BI__builtin_cosf: 976 case Builtin::BI__builtin_cosl: 977 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::cos)); 978 979 case Builtin::BIexp: 980 case Builtin::BIexpf: 981 case Builtin::BIexpl: 982 case Builtin::BI__builtin_exp: 983 case Builtin::BI__builtin_expf: 984 case Builtin::BI__builtin_expl: 985 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp)); 986 987 case Builtin::BIexp2: 988 case Builtin::BIexp2f: 989 case Builtin::BIexp2l: 990 case Builtin::BI__builtin_exp2: 991 case Builtin::BI__builtin_exp2f: 992 case Builtin::BI__builtin_exp2l: 993 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp2)); 994 995 case Builtin::BIfabs: 996 case Builtin::BIfabsf: 997 case Builtin::BIfabsl: 998 case Builtin::BI__builtin_fabs: 999 case Builtin::BI__builtin_fabsf: 1000 case Builtin::BI__builtin_fabsl: 1001 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs)); 1002 1003 case Builtin::BIfloor: 1004 case Builtin::BIfloorf: 1005 case Builtin::BIfloorl: 1006 case Builtin::BI__builtin_floor: 1007 case Builtin::BI__builtin_floorf: 1008 case Builtin::BI__builtin_floorl: 1009 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor)); 1010 1011 case Builtin::BIfma: 1012 case Builtin::BIfmaf: 1013 case Builtin::BIfmal: 1014 case Builtin::BI__builtin_fma: 1015 case Builtin::BI__builtin_fmaf: 1016 case Builtin::BI__builtin_fmal: 1017 return RValue::get(emitTernaryBuiltin(*this, E, Intrinsic::fma)); 1018 1019 case Builtin::BIfmax: 1020 case Builtin::BIfmaxf: 1021 case Builtin::BIfmaxl: 1022 case Builtin::BI__builtin_fmax: 1023 case Builtin::BI__builtin_fmaxf: 1024 case Builtin::BI__builtin_fmaxl: 1025 return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum)); 1026 1027 case Builtin::BIfmin: 1028 case Builtin::BIfminf: 1029 case Builtin::BIfminl: 1030 case Builtin::BI__builtin_fmin: 1031 case Builtin::BI__builtin_fminf: 1032 case Builtin::BI__builtin_fminl: 1033 return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum)); 1034 1035 // fmod() is a special-case. It maps to the frem instruction rather than an 1036 // LLVM intrinsic. 1037 case Builtin::BIfmod: 1038 case Builtin::BIfmodf: 1039 case Builtin::BIfmodl: 1040 case Builtin::BI__builtin_fmod: 1041 case Builtin::BI__builtin_fmodf: 1042 case Builtin::BI__builtin_fmodl: { 1043 Value *Arg1 = EmitScalarExpr(E->getArg(0)); 1044 Value *Arg2 = EmitScalarExpr(E->getArg(1)); 1045 return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod")); 1046 } 1047 1048 case Builtin::BIlog: 1049 case Builtin::BIlogf: 1050 case Builtin::BIlogl: 1051 case Builtin::BI__builtin_log: 1052 case Builtin::BI__builtin_logf: 1053 case Builtin::BI__builtin_logl: 1054 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log)); 1055 1056 case Builtin::BIlog10: 1057 case Builtin::BIlog10f: 1058 case Builtin::BIlog10l: 1059 case Builtin::BI__builtin_log10: 1060 case Builtin::BI__builtin_log10f: 1061 case Builtin::BI__builtin_log10l: 1062 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log10)); 1063 1064 case Builtin::BIlog2: 1065 case Builtin::BIlog2f: 1066 case Builtin::BIlog2l: 1067 case Builtin::BI__builtin_log2: 1068 case Builtin::BI__builtin_log2f: 1069 case Builtin::BI__builtin_log2l: 1070 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log2)); 1071 1072 case Builtin::BInearbyint: 1073 case Builtin::BInearbyintf: 1074 case Builtin::BInearbyintl: 1075 case Builtin::BI__builtin_nearbyint: 1076 case Builtin::BI__builtin_nearbyintf: 1077 case Builtin::BI__builtin_nearbyintl: 1078 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::nearbyint)); 1079 1080 case Builtin::BIpow: 1081 case Builtin::BIpowf: 1082 case Builtin::BIpowl: 1083 case Builtin::BI__builtin_pow: 1084 case Builtin::BI__builtin_powf: 1085 case Builtin::BI__builtin_powl: 1086 return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::pow)); 1087 1088 case Builtin::BIrint: 1089 case Builtin::BIrintf: 1090 case Builtin::BIrintl: 1091 case Builtin::BI__builtin_rint: 1092 case Builtin::BI__builtin_rintf: 1093 case Builtin::BI__builtin_rintl: 1094 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint)); 1095 1096 case Builtin::BIround: 1097 case Builtin::BIroundf: 1098 case Builtin::BIroundl: 1099 case Builtin::BI__builtin_round: 1100 case Builtin::BI__builtin_roundf: 1101 case Builtin::BI__builtin_roundl: 1102 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round)); 1103 1104 case Builtin::BIsin: 1105 case Builtin::BIsinf: 1106 case Builtin::BIsinl: 1107 case Builtin::BI__builtin_sin: 1108 case Builtin::BI__builtin_sinf: 1109 case Builtin::BI__builtin_sinl: 1110 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::sin)); 1111 1112 case Builtin::BIsqrt: 1113 case Builtin::BIsqrtf: 1114 case Builtin::BIsqrtl: 1115 case Builtin::BI__builtin_sqrt: 1116 case Builtin::BI__builtin_sqrtf: 1117 case Builtin::BI__builtin_sqrtl: 1118 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::sqrt)); 1119 1120 case Builtin::BItrunc: 1121 case Builtin::BItruncf: 1122 case Builtin::BItruncl: 1123 case Builtin::BI__builtin_trunc: 1124 case Builtin::BI__builtin_truncf: 1125 case Builtin::BI__builtin_truncl: 1126 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc)); 1127 1128 default: 1129 break; 1130 } 1131 } 1132 1133 switch (BuiltinID) { 1134 default: break; 1135 case Builtin::BI__builtin___CFStringMakeConstantString: 1136 case Builtin::BI__builtin___NSStringMakeConstantString: 1137 return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType())); 1138 case Builtin::BI__builtin_stdarg_start: 1139 case Builtin::BI__builtin_va_start: 1140 case Builtin::BI__va_start: 1141 case Builtin::BI__builtin_va_end: 1142 return RValue::get( 1143 EmitVAStartEnd(BuiltinID == Builtin::BI__va_start 1144 ? EmitScalarExpr(E->getArg(0)) 1145 : EmitVAListRef(E->getArg(0)).getPointer(), 1146 BuiltinID != Builtin::BI__builtin_va_end)); 1147 case Builtin::BI__builtin_va_copy: { 1148 Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer(); 1149 Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer(); 1150 1151 llvm::Type *Type = Int8PtrTy; 1152 1153 DstPtr = Builder.CreateBitCast(DstPtr, Type); 1154 SrcPtr = Builder.CreateBitCast(SrcPtr, Type); 1155 return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy), 1156 {DstPtr, SrcPtr})); 1157 } 1158 case Builtin::BI__builtin_abs: 1159 case Builtin::BI__builtin_labs: 1160 case Builtin::BI__builtin_llabs: { 1161 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 1162 1163 Value *NegOp = Builder.CreateNeg(ArgValue, "neg"); 1164 Value *CmpResult = 1165 Builder.CreateICmpSGE(ArgValue, 1166 llvm::Constant::getNullValue(ArgValue->getType()), 1167 "abscond"); 1168 Value *Result = 1169 Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs"); 1170 1171 return RValue::get(Result); 1172 } 1173 case Builtin::BI__builtin_conj: 1174 case Builtin::BI__builtin_conjf: 1175 case Builtin::BI__builtin_conjl: { 1176 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 1177 Value *Real = ComplexVal.first; 1178 Value *Imag = ComplexVal.second; 1179 Value *Zero = 1180 Imag->getType()->isFPOrFPVectorTy() 1181 ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType()) 1182 : llvm::Constant::getNullValue(Imag->getType()); 1183 1184 Imag = Builder.CreateFSub(Zero, Imag, "sub"); 1185 return RValue::getComplex(std::make_pair(Real, Imag)); 1186 } 1187 case Builtin::BI__builtin_creal: 1188 case Builtin::BI__builtin_crealf: 1189 case Builtin::BI__builtin_creall: 1190 case Builtin::BIcreal: 1191 case Builtin::BIcrealf: 1192 case Builtin::BIcreall: { 1193 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 1194 return RValue::get(ComplexVal.first); 1195 } 1196 1197 case Builtin::BI__builtin_cimag: 1198 case Builtin::BI__builtin_cimagf: 1199 case Builtin::BI__builtin_cimagl: 1200 case Builtin::BIcimag: 1201 case Builtin::BIcimagf: 1202 case Builtin::BIcimagl: { 1203 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 1204 return RValue::get(ComplexVal.second); 1205 } 1206 1207 case Builtin::BI__builtin_ctzs: 1208 case Builtin::BI__builtin_ctz: 1209 case Builtin::BI__builtin_ctzl: 1210 case Builtin::BI__builtin_ctzll: { 1211 Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero); 1212 1213 llvm::Type *ArgType = ArgValue->getType(); 1214 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 1215 1216 llvm::Type *ResultType = ConvertType(E->getType()); 1217 Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); 1218 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef}); 1219 if (Result->getType() != ResultType) 1220 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 1221 "cast"); 1222 return RValue::get(Result); 1223 } 1224 case Builtin::BI__builtin_clzs: 1225 case Builtin::BI__builtin_clz: 1226 case Builtin::BI__builtin_clzl: 1227 case Builtin::BI__builtin_clzll: { 1228 Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero); 1229 1230 llvm::Type *ArgType = ArgValue->getType(); 1231 Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); 1232 1233 llvm::Type *ResultType = ConvertType(E->getType()); 1234 Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); 1235 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef}); 1236 if (Result->getType() != ResultType) 1237 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 1238 "cast"); 1239 return RValue::get(Result); 1240 } 1241 case Builtin::BI__builtin_ffs: 1242 case Builtin::BI__builtin_ffsl: 1243 case Builtin::BI__builtin_ffsll: { 1244 // ffs(x) -> x ? cttz(x) + 1 : 0 1245 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 1246 1247 llvm::Type *ArgType = ArgValue->getType(); 1248 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 1249 1250 llvm::Type *ResultType = ConvertType(E->getType()); 1251 Value *Tmp = 1252 Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}), 1253 llvm::ConstantInt::get(ArgType, 1)); 1254 Value *Zero = llvm::Constant::getNullValue(ArgType); 1255 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero"); 1256 Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs"); 1257 if (Result->getType() != ResultType) 1258 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 1259 "cast"); 1260 return RValue::get(Result); 1261 } 1262 case Builtin::BI__builtin_parity: 1263 case Builtin::BI__builtin_parityl: 1264 case Builtin::BI__builtin_parityll: { 1265 // parity(x) -> ctpop(x) & 1 1266 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 1267 1268 llvm::Type *ArgType = ArgValue->getType(); 1269 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); 1270 1271 llvm::Type *ResultType = ConvertType(E->getType()); 1272 Value *Tmp = Builder.CreateCall(F, ArgValue); 1273 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1)); 1274 if (Result->getType() != ResultType) 1275 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 1276 "cast"); 1277 return RValue::get(Result); 1278 } 1279 case Builtin::BI__popcnt16: 1280 case Builtin::BI__popcnt: 1281 case Builtin::BI__popcnt64: 1282 case Builtin::BI__builtin_popcount: 1283 case Builtin::BI__builtin_popcountl: 1284 case Builtin::BI__builtin_popcountll: { 1285 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 1286 1287 llvm::Type *ArgType = ArgValue->getType(); 1288 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); 1289 1290 llvm::Type *ResultType = ConvertType(E->getType()); 1291 Value *Result = Builder.CreateCall(F, ArgValue); 1292 if (Result->getType() != ResultType) 1293 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 1294 "cast"); 1295 return RValue::get(Result); 1296 } 1297 case Builtin::BI_rotr8: 1298 case Builtin::BI_rotr16: 1299 case Builtin::BI_rotr: 1300 case Builtin::BI_lrotr: 1301 case Builtin::BI_rotr64: { 1302 Value *Val = EmitScalarExpr(E->getArg(0)); 1303 Value *Shift = EmitScalarExpr(E->getArg(1)); 1304 1305 llvm::Type *ArgType = Val->getType(); 1306 Shift = Builder.CreateIntCast(Shift, ArgType, false); 1307 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth(); 1308 Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth); 1309 Value *ArgZero = llvm::Constant::getNullValue(ArgType); 1310 1311 Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1); 1312 Shift = Builder.CreateAnd(Shift, Mask); 1313 Value *LeftShift = Builder.CreateSub(ArgTypeSize, Shift); 1314 1315 Value *RightShifted = Builder.CreateLShr(Val, Shift); 1316 Value *LeftShifted = Builder.CreateShl(Val, LeftShift); 1317 Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted); 1318 1319 Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero); 1320 Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated); 1321 return RValue::get(Result); 1322 } 1323 case Builtin::BI_rotl8: 1324 case Builtin::BI_rotl16: 1325 case Builtin::BI_rotl: 1326 case Builtin::BI_lrotl: 1327 case Builtin::BI_rotl64: { 1328 Value *Val = EmitScalarExpr(E->getArg(0)); 1329 Value *Shift = EmitScalarExpr(E->getArg(1)); 1330 1331 llvm::Type *ArgType = Val->getType(); 1332 Shift = Builder.CreateIntCast(Shift, ArgType, false); 1333 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth(); 1334 Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth); 1335 Value *ArgZero = llvm::Constant::getNullValue(ArgType); 1336 1337 Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1); 1338 Shift = Builder.CreateAnd(Shift, Mask); 1339 Value *RightShift = Builder.CreateSub(ArgTypeSize, Shift); 1340 1341 Value *LeftShifted = Builder.CreateShl(Val, Shift); 1342 Value *RightShifted = Builder.CreateLShr(Val, RightShift); 1343 Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted); 1344 1345 Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero); 1346 Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated); 1347 return RValue::get(Result); 1348 } 1349 case Builtin::BI__builtin_unpredictable: { 1350 // Always return the argument of __builtin_unpredictable. LLVM does not 1351 // handle this builtin. Metadata for this builtin should be added directly 1352 // to instructions such as branches or switches that use it. 1353 return RValue::get(EmitScalarExpr(E->getArg(0))); 1354 } 1355 case Builtin::BI__builtin_expect: { 1356 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 1357 llvm::Type *ArgType = ArgValue->getType(); 1358 1359 Value *ExpectedValue = EmitScalarExpr(E->getArg(1)); 1360 // Don't generate llvm.expect on -O0 as the backend won't use it for 1361 // anything. 1362 // Note, we still IRGen ExpectedValue because it could have side-effects. 1363 if (CGM.getCodeGenOpts().OptimizationLevel == 0) 1364 return RValue::get(ArgValue); 1365 1366 Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType); 1367 Value *Result = 1368 Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval"); 1369 return RValue::get(Result); 1370 } 1371 case Builtin::BI__builtin_assume_aligned: { 1372 Value *PtrValue = EmitScalarExpr(E->getArg(0)); 1373 Value *OffsetValue = 1374 (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr; 1375 1376 Value *AlignmentValue = EmitScalarExpr(E->getArg(1)); 1377 ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue); 1378 unsigned Alignment = (unsigned) AlignmentCI->getZExtValue(); 1379 1380 EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue); 1381 return RValue::get(PtrValue); 1382 } 1383 case Builtin::BI__assume: 1384 case Builtin::BI__builtin_assume: { 1385 if (E->getArg(0)->HasSideEffects(getContext())) 1386 return RValue::get(nullptr); 1387 1388 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 1389 Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume); 1390 return RValue::get(Builder.CreateCall(FnAssume, ArgValue)); 1391 } 1392 case Builtin::BI__builtin_bswap16: 1393 case Builtin::BI__builtin_bswap32: 1394 case Builtin::BI__builtin_bswap64: { 1395 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap)); 1396 } 1397 case Builtin::BI__builtin_bitreverse8: 1398 case Builtin::BI__builtin_bitreverse16: 1399 case Builtin::BI__builtin_bitreverse32: 1400 case Builtin::BI__builtin_bitreverse64: { 1401 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse)); 1402 } 1403 case Builtin::BI__builtin_object_size: { 1404 unsigned Type = 1405 E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue(); 1406 auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType())); 1407 1408 // We pass this builtin onto the optimizer so that it can figure out the 1409 // object size in more complex cases. 1410 return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType, 1411 /*EmittedE=*/nullptr)); 1412 } 1413 case Builtin::BI__builtin_prefetch: { 1414 Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0)); 1415 // FIXME: Technically these constants should of type 'int', yes? 1416 RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) : 1417 llvm::ConstantInt::get(Int32Ty, 0); 1418 Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : 1419 llvm::ConstantInt::get(Int32Ty, 3); 1420 Value *Data = llvm::ConstantInt::get(Int32Ty, 1); 1421 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 1422 return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data})); 1423 } 1424 case Builtin::BI__builtin_readcyclecounter: { 1425 Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter); 1426 return RValue::get(Builder.CreateCall(F)); 1427 } 1428 case Builtin::BI__builtin___clear_cache: { 1429 Value *Begin = EmitScalarExpr(E->getArg(0)); 1430 Value *End = EmitScalarExpr(E->getArg(1)); 1431 Value *F = CGM.getIntrinsic(Intrinsic::clear_cache); 1432 return RValue::get(Builder.CreateCall(F, {Begin, End})); 1433 } 1434 case Builtin::BI__builtin_trap: 1435 return RValue::get(EmitTrapCall(Intrinsic::trap)); 1436 case Builtin::BI__debugbreak: 1437 return RValue::get(EmitTrapCall(Intrinsic::debugtrap)); 1438 case Builtin::BI__builtin_unreachable: { 1439 EmitUnreachable(E->getExprLoc()); 1440 1441 // We do need to preserve an insertion point. 1442 EmitBlock(createBasicBlock("unreachable.cont")); 1443 1444 return RValue::get(nullptr); 1445 } 1446 1447 case Builtin::BI__builtin_powi: 1448 case Builtin::BI__builtin_powif: 1449 case Builtin::BI__builtin_powil: { 1450 Value *Base = EmitScalarExpr(E->getArg(0)); 1451 Value *Exponent = EmitScalarExpr(E->getArg(1)); 1452 llvm::Type *ArgType = Base->getType(); 1453 Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType); 1454 return RValue::get(Builder.CreateCall(F, {Base, Exponent})); 1455 } 1456 1457 case Builtin::BI__builtin_isgreater: 1458 case Builtin::BI__builtin_isgreaterequal: 1459 case Builtin::BI__builtin_isless: 1460 case Builtin::BI__builtin_islessequal: 1461 case Builtin::BI__builtin_islessgreater: 1462 case Builtin::BI__builtin_isunordered: { 1463 // Ordered comparisons: we know the arguments to these are matching scalar 1464 // floating point values. 1465 Value *LHS = EmitScalarExpr(E->getArg(0)); 1466 Value *RHS = EmitScalarExpr(E->getArg(1)); 1467 1468 switch (BuiltinID) { 1469 default: llvm_unreachable("Unknown ordered comparison"); 1470 case Builtin::BI__builtin_isgreater: 1471 LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp"); 1472 break; 1473 case Builtin::BI__builtin_isgreaterequal: 1474 LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp"); 1475 break; 1476 case Builtin::BI__builtin_isless: 1477 LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp"); 1478 break; 1479 case Builtin::BI__builtin_islessequal: 1480 LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp"); 1481 break; 1482 case Builtin::BI__builtin_islessgreater: 1483 LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp"); 1484 break; 1485 case Builtin::BI__builtin_isunordered: 1486 LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp"); 1487 break; 1488 } 1489 // ZExt bool to int type. 1490 return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType()))); 1491 } 1492 case Builtin::BI__builtin_isnan: { 1493 Value *V = EmitScalarExpr(E->getArg(0)); 1494 V = Builder.CreateFCmpUNO(V, V, "cmp"); 1495 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 1496 } 1497 1498 case Builtin::BIfinite: 1499 case Builtin::BI__finite: 1500 case Builtin::BIfinitef: 1501 case Builtin::BI__finitef: 1502 case Builtin::BIfinitel: 1503 case Builtin::BI__finitel: 1504 case Builtin::BI__builtin_isinf: 1505 case Builtin::BI__builtin_isfinite: { 1506 // isinf(x) --> fabs(x) == infinity 1507 // isfinite(x) --> fabs(x) != infinity 1508 // x != NaN via the ordered compare in either case. 1509 Value *V = EmitScalarExpr(E->getArg(0)); 1510 Value *Fabs = EmitFAbs(*this, V); 1511 Constant *Infinity = ConstantFP::getInfinity(V->getType()); 1512 CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf) 1513 ? CmpInst::FCMP_OEQ 1514 : CmpInst::FCMP_ONE; 1515 Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf"); 1516 return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType()))); 1517 } 1518 1519 case Builtin::BI__builtin_isinf_sign: { 1520 // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0 1521 Value *Arg = EmitScalarExpr(E->getArg(0)); 1522 Value *AbsArg = EmitFAbs(*this, Arg); 1523 Value *IsInf = Builder.CreateFCmpOEQ( 1524 AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf"); 1525 Value *IsNeg = EmitSignBit(*this, Arg); 1526 1527 llvm::Type *IntTy = ConvertType(E->getType()); 1528 Value *Zero = Constant::getNullValue(IntTy); 1529 Value *One = ConstantInt::get(IntTy, 1); 1530 Value *NegativeOne = ConstantInt::get(IntTy, -1); 1531 Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One); 1532 Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero); 1533 return RValue::get(Result); 1534 } 1535 1536 case Builtin::BI__builtin_isnormal: { 1537 // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min 1538 Value *V = EmitScalarExpr(E->getArg(0)); 1539 Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq"); 1540 1541 Value *Abs = EmitFAbs(*this, V); 1542 Value *IsLessThanInf = 1543 Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf"); 1544 APFloat Smallest = APFloat::getSmallestNormalized( 1545 getContext().getFloatTypeSemantics(E->getArg(0)->getType())); 1546 Value *IsNormal = 1547 Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest), 1548 "isnormal"); 1549 V = Builder.CreateAnd(Eq, IsLessThanInf, "and"); 1550 V = Builder.CreateAnd(V, IsNormal, "and"); 1551 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 1552 } 1553 1554 case Builtin::BI__builtin_fpclassify: { 1555 Value *V = EmitScalarExpr(E->getArg(5)); 1556 llvm::Type *Ty = ConvertType(E->getArg(5)->getType()); 1557 1558 // Create Result 1559 BasicBlock *Begin = Builder.GetInsertBlock(); 1560 BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn); 1561 Builder.SetInsertPoint(End); 1562 PHINode *Result = 1563 Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4, 1564 "fpclassify_result"); 1565 1566 // if (V==0) return FP_ZERO 1567 Builder.SetInsertPoint(Begin); 1568 Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty), 1569 "iszero"); 1570 Value *ZeroLiteral = EmitScalarExpr(E->getArg(4)); 1571 BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn); 1572 Builder.CreateCondBr(IsZero, End, NotZero); 1573 Result->addIncoming(ZeroLiteral, Begin); 1574 1575 // if (V != V) return FP_NAN 1576 Builder.SetInsertPoint(NotZero); 1577 Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp"); 1578 Value *NanLiteral = EmitScalarExpr(E->getArg(0)); 1579 BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn); 1580 Builder.CreateCondBr(IsNan, End, NotNan); 1581 Result->addIncoming(NanLiteral, NotZero); 1582 1583 // if (fabs(V) == infinity) return FP_INFINITY 1584 Builder.SetInsertPoint(NotNan); 1585 Value *VAbs = EmitFAbs(*this, V); 1586 Value *IsInf = 1587 Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()), 1588 "isinf"); 1589 Value *InfLiteral = EmitScalarExpr(E->getArg(1)); 1590 BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn); 1591 Builder.CreateCondBr(IsInf, End, NotInf); 1592 Result->addIncoming(InfLiteral, NotNan); 1593 1594 // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL 1595 Builder.SetInsertPoint(NotInf); 1596 APFloat Smallest = APFloat::getSmallestNormalized( 1597 getContext().getFloatTypeSemantics(E->getArg(5)->getType())); 1598 Value *IsNormal = 1599 Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest), 1600 "isnormal"); 1601 Value *NormalResult = 1602 Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)), 1603 EmitScalarExpr(E->getArg(3))); 1604 Builder.CreateBr(End); 1605 Result->addIncoming(NormalResult, NotInf); 1606 1607 // return Result 1608 Builder.SetInsertPoint(End); 1609 return RValue::get(Result); 1610 } 1611 1612 case Builtin::BIalloca: 1613 case Builtin::BI_alloca: 1614 case Builtin::BI__builtin_alloca: { 1615 Value *Size = EmitScalarExpr(E->getArg(0)); 1616 const TargetInfo &TI = getContext().getTargetInfo(); 1617 // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__. 1618 unsigned SuitableAlignmentInBytes = 1619 CGM.getContext() 1620 .toCharUnitsFromBits(TI.getSuitableAlign()) 1621 .getQuantity(); 1622 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size); 1623 AI->setAlignment(SuitableAlignmentInBytes); 1624 return RValue::get(AI); 1625 } 1626 1627 case Builtin::BI__builtin_alloca_with_align: { 1628 Value *Size = EmitScalarExpr(E->getArg(0)); 1629 Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1)); 1630 auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue); 1631 unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue(); 1632 unsigned AlignmentInBytes = 1633 CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getQuantity(); 1634 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size); 1635 AI->setAlignment(AlignmentInBytes); 1636 return RValue::get(AI); 1637 } 1638 1639 case Builtin::BIbzero: 1640 case Builtin::BI__builtin_bzero: { 1641 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1642 Value *SizeVal = EmitScalarExpr(E->getArg(1)); 1643 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 1644 E->getArg(0)->getExprLoc(), FD, 0); 1645 Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false); 1646 return RValue::get(nullptr); 1647 } 1648 case Builtin::BImemcpy: 1649 case Builtin::BI__builtin_memcpy: { 1650 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1651 Address Src = EmitPointerWithAlignment(E->getArg(1)); 1652 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 1653 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 1654 E->getArg(0)->getExprLoc(), FD, 0); 1655 EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(), 1656 E->getArg(1)->getExprLoc(), FD, 1); 1657 Builder.CreateMemCpy(Dest, Src, SizeVal, false); 1658 return RValue::get(Dest.getPointer()); 1659 } 1660 1661 case Builtin::BI__builtin_char_memchr: 1662 BuiltinID = Builtin::BI__builtin_memchr; 1663 break; 1664 1665 case Builtin::BI__builtin___memcpy_chk: { 1666 // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2. 1667 llvm::APSInt Size, DstSize; 1668 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 1669 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 1670 break; 1671 if (Size.ugt(DstSize)) 1672 break; 1673 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1674 Address Src = EmitPointerWithAlignment(E->getArg(1)); 1675 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 1676 Builder.CreateMemCpy(Dest, Src, SizeVal, false); 1677 return RValue::get(Dest.getPointer()); 1678 } 1679 1680 case Builtin::BI__builtin_objc_memmove_collectable: { 1681 Address DestAddr = EmitPointerWithAlignment(E->getArg(0)); 1682 Address SrcAddr = EmitPointerWithAlignment(E->getArg(1)); 1683 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 1684 CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this, 1685 DestAddr, SrcAddr, SizeVal); 1686 return RValue::get(DestAddr.getPointer()); 1687 } 1688 1689 case Builtin::BI__builtin___memmove_chk: { 1690 // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2. 1691 llvm::APSInt Size, DstSize; 1692 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 1693 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 1694 break; 1695 if (Size.ugt(DstSize)) 1696 break; 1697 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1698 Address Src = EmitPointerWithAlignment(E->getArg(1)); 1699 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 1700 Builder.CreateMemMove(Dest, Src, SizeVal, false); 1701 return RValue::get(Dest.getPointer()); 1702 } 1703 1704 case Builtin::BImemmove: 1705 case Builtin::BI__builtin_memmove: { 1706 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1707 Address Src = EmitPointerWithAlignment(E->getArg(1)); 1708 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 1709 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 1710 E->getArg(0)->getExprLoc(), FD, 0); 1711 EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(), 1712 E->getArg(1)->getExprLoc(), FD, 1); 1713 Builder.CreateMemMove(Dest, Src, SizeVal, false); 1714 return RValue::get(Dest.getPointer()); 1715 } 1716 case Builtin::BImemset: 1717 case Builtin::BI__builtin_memset: { 1718 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1719 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), 1720 Builder.getInt8Ty()); 1721 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 1722 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 1723 E->getArg(0)->getExprLoc(), FD, 0); 1724 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false); 1725 return RValue::get(Dest.getPointer()); 1726 } 1727 case Builtin::BI__builtin___memset_chk: { 1728 // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2. 1729 llvm::APSInt Size, DstSize; 1730 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 1731 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 1732 break; 1733 if (Size.ugt(DstSize)) 1734 break; 1735 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1736 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), 1737 Builder.getInt8Ty()); 1738 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 1739 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false); 1740 return RValue::get(Dest.getPointer()); 1741 } 1742 case Builtin::BI__builtin_dwarf_cfa: { 1743 // The offset in bytes from the first argument to the CFA. 1744 // 1745 // Why on earth is this in the frontend? Is there any reason at 1746 // all that the backend can't reasonably determine this while 1747 // lowering llvm.eh.dwarf.cfa()? 1748 // 1749 // TODO: If there's a satisfactory reason, add a target hook for 1750 // this instead of hard-coding 0, which is correct for most targets. 1751 int32_t Offset = 0; 1752 1753 Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa); 1754 return RValue::get(Builder.CreateCall(F, 1755 llvm::ConstantInt::get(Int32Ty, Offset))); 1756 } 1757 case Builtin::BI__builtin_return_address: { 1758 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0), 1759 getContext().UnsignedIntTy); 1760 Value *F = CGM.getIntrinsic(Intrinsic::returnaddress); 1761 return RValue::get(Builder.CreateCall(F, Depth)); 1762 } 1763 case Builtin::BI_ReturnAddress: { 1764 Value *F = CGM.getIntrinsic(Intrinsic::returnaddress); 1765 return RValue::get(Builder.CreateCall(F, Builder.getInt32(0))); 1766 } 1767 case Builtin::BI__builtin_frame_address: { 1768 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0), 1769 getContext().UnsignedIntTy); 1770 Value *F = CGM.getIntrinsic(Intrinsic::frameaddress); 1771 return RValue::get(Builder.CreateCall(F, Depth)); 1772 } 1773 case Builtin::BI__builtin_extract_return_addr: { 1774 Value *Address = EmitScalarExpr(E->getArg(0)); 1775 Value *Result = getTargetHooks().decodeReturnAddress(*this, Address); 1776 return RValue::get(Result); 1777 } 1778 case Builtin::BI__builtin_frob_return_addr: { 1779 Value *Address = EmitScalarExpr(E->getArg(0)); 1780 Value *Result = getTargetHooks().encodeReturnAddress(*this, Address); 1781 return RValue::get(Result); 1782 } 1783 case Builtin::BI__builtin_dwarf_sp_column: { 1784 llvm::IntegerType *Ty 1785 = cast<llvm::IntegerType>(ConvertType(E->getType())); 1786 int Column = getTargetHooks().getDwarfEHStackPointer(CGM); 1787 if (Column == -1) { 1788 CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column"); 1789 return RValue::get(llvm::UndefValue::get(Ty)); 1790 } 1791 return RValue::get(llvm::ConstantInt::get(Ty, Column, true)); 1792 } 1793 case Builtin::BI__builtin_init_dwarf_reg_size_table: { 1794 Value *Address = EmitScalarExpr(E->getArg(0)); 1795 if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address)) 1796 CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table"); 1797 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType()))); 1798 } 1799 case Builtin::BI__builtin_eh_return: { 1800 Value *Int = EmitScalarExpr(E->getArg(0)); 1801 Value *Ptr = EmitScalarExpr(E->getArg(1)); 1802 1803 llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType()); 1804 assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) && 1805 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants"); 1806 Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32 1807 ? Intrinsic::eh_return_i32 1808 : Intrinsic::eh_return_i64); 1809 Builder.CreateCall(F, {Int, Ptr}); 1810 Builder.CreateUnreachable(); 1811 1812 // We do need to preserve an insertion point. 1813 EmitBlock(createBasicBlock("builtin_eh_return.cont")); 1814 1815 return RValue::get(nullptr); 1816 } 1817 case Builtin::BI__builtin_unwind_init: { 1818 Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init); 1819 return RValue::get(Builder.CreateCall(F)); 1820 } 1821 case Builtin::BI__builtin_extend_pointer: { 1822 // Extends a pointer to the size of an _Unwind_Word, which is 1823 // uint64_t on all platforms. Generally this gets poked into a 1824 // register and eventually used as an address, so if the 1825 // addressing registers are wider than pointers and the platform 1826 // doesn't implicitly ignore high-order bits when doing 1827 // addressing, we need to make sure we zext / sext based on 1828 // the platform's expectations. 1829 // 1830 // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html 1831 1832 // Cast the pointer to intptr_t. 1833 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1834 Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast"); 1835 1836 // If that's 64 bits, we're done. 1837 if (IntPtrTy->getBitWidth() == 64) 1838 return RValue::get(Result); 1839 1840 // Otherwise, ask the codegen data what to do. 1841 if (getTargetHooks().extendPointerWithSExt()) 1842 return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext")); 1843 else 1844 return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext")); 1845 } 1846 case Builtin::BI__builtin_setjmp: { 1847 // Buffer is a void**. 1848 Address Buf = EmitPointerWithAlignment(E->getArg(0)); 1849 1850 // Store the frame pointer to the setjmp buffer. 1851 Value *FrameAddr = 1852 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 1853 ConstantInt::get(Int32Ty, 0)); 1854 Builder.CreateStore(FrameAddr, Buf); 1855 1856 // Store the stack pointer to the setjmp buffer. 1857 Value *StackAddr = 1858 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave)); 1859 Address StackSaveSlot = 1860 Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize()); 1861 Builder.CreateStore(StackAddr, StackSaveSlot); 1862 1863 // Call LLVM's EH setjmp, which is lightweight. 1864 Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp); 1865 Buf = Builder.CreateBitCast(Buf, Int8PtrTy); 1866 return RValue::get(Builder.CreateCall(F, Buf.getPointer())); 1867 } 1868 case Builtin::BI__builtin_longjmp: { 1869 Value *Buf = EmitScalarExpr(E->getArg(0)); 1870 Buf = Builder.CreateBitCast(Buf, Int8PtrTy); 1871 1872 // Call LLVM's EH longjmp, which is lightweight. 1873 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf); 1874 1875 // longjmp doesn't return; mark this as unreachable. 1876 Builder.CreateUnreachable(); 1877 1878 // We do need to preserve an insertion point. 1879 EmitBlock(createBasicBlock("longjmp.cont")); 1880 1881 return RValue::get(nullptr); 1882 } 1883 case Builtin::BI__sync_fetch_and_add: 1884 case Builtin::BI__sync_fetch_and_sub: 1885 case Builtin::BI__sync_fetch_and_or: 1886 case Builtin::BI__sync_fetch_and_and: 1887 case Builtin::BI__sync_fetch_and_xor: 1888 case Builtin::BI__sync_fetch_and_nand: 1889 case Builtin::BI__sync_add_and_fetch: 1890 case Builtin::BI__sync_sub_and_fetch: 1891 case Builtin::BI__sync_and_and_fetch: 1892 case Builtin::BI__sync_or_and_fetch: 1893 case Builtin::BI__sync_xor_and_fetch: 1894 case Builtin::BI__sync_nand_and_fetch: 1895 case Builtin::BI__sync_val_compare_and_swap: 1896 case Builtin::BI__sync_bool_compare_and_swap: 1897 case Builtin::BI__sync_lock_test_and_set: 1898 case Builtin::BI__sync_lock_release: 1899 case Builtin::BI__sync_swap: 1900 llvm_unreachable("Shouldn't make it through sema"); 1901 case Builtin::BI__sync_fetch_and_add_1: 1902 case Builtin::BI__sync_fetch_and_add_2: 1903 case Builtin::BI__sync_fetch_and_add_4: 1904 case Builtin::BI__sync_fetch_and_add_8: 1905 case Builtin::BI__sync_fetch_and_add_16: 1906 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E); 1907 case Builtin::BI__sync_fetch_and_sub_1: 1908 case Builtin::BI__sync_fetch_and_sub_2: 1909 case Builtin::BI__sync_fetch_and_sub_4: 1910 case Builtin::BI__sync_fetch_and_sub_8: 1911 case Builtin::BI__sync_fetch_and_sub_16: 1912 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E); 1913 case Builtin::BI__sync_fetch_and_or_1: 1914 case Builtin::BI__sync_fetch_and_or_2: 1915 case Builtin::BI__sync_fetch_and_or_4: 1916 case Builtin::BI__sync_fetch_and_or_8: 1917 case Builtin::BI__sync_fetch_and_or_16: 1918 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E); 1919 case Builtin::BI__sync_fetch_and_and_1: 1920 case Builtin::BI__sync_fetch_and_and_2: 1921 case Builtin::BI__sync_fetch_and_and_4: 1922 case Builtin::BI__sync_fetch_and_and_8: 1923 case Builtin::BI__sync_fetch_and_and_16: 1924 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E); 1925 case Builtin::BI__sync_fetch_and_xor_1: 1926 case Builtin::BI__sync_fetch_and_xor_2: 1927 case Builtin::BI__sync_fetch_and_xor_4: 1928 case Builtin::BI__sync_fetch_and_xor_8: 1929 case Builtin::BI__sync_fetch_and_xor_16: 1930 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E); 1931 case Builtin::BI__sync_fetch_and_nand_1: 1932 case Builtin::BI__sync_fetch_and_nand_2: 1933 case Builtin::BI__sync_fetch_and_nand_4: 1934 case Builtin::BI__sync_fetch_and_nand_8: 1935 case Builtin::BI__sync_fetch_and_nand_16: 1936 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E); 1937 1938 // Clang extensions: not overloaded yet. 1939 case Builtin::BI__sync_fetch_and_min: 1940 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E); 1941 case Builtin::BI__sync_fetch_and_max: 1942 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E); 1943 case Builtin::BI__sync_fetch_and_umin: 1944 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E); 1945 case Builtin::BI__sync_fetch_and_umax: 1946 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E); 1947 1948 case Builtin::BI__sync_add_and_fetch_1: 1949 case Builtin::BI__sync_add_and_fetch_2: 1950 case Builtin::BI__sync_add_and_fetch_4: 1951 case Builtin::BI__sync_add_and_fetch_8: 1952 case Builtin::BI__sync_add_and_fetch_16: 1953 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E, 1954 llvm::Instruction::Add); 1955 case Builtin::BI__sync_sub_and_fetch_1: 1956 case Builtin::BI__sync_sub_and_fetch_2: 1957 case Builtin::BI__sync_sub_and_fetch_4: 1958 case Builtin::BI__sync_sub_and_fetch_8: 1959 case Builtin::BI__sync_sub_and_fetch_16: 1960 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E, 1961 llvm::Instruction::Sub); 1962 case Builtin::BI__sync_and_and_fetch_1: 1963 case Builtin::BI__sync_and_and_fetch_2: 1964 case Builtin::BI__sync_and_and_fetch_4: 1965 case Builtin::BI__sync_and_and_fetch_8: 1966 case Builtin::BI__sync_and_and_fetch_16: 1967 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E, 1968 llvm::Instruction::And); 1969 case Builtin::BI__sync_or_and_fetch_1: 1970 case Builtin::BI__sync_or_and_fetch_2: 1971 case Builtin::BI__sync_or_and_fetch_4: 1972 case Builtin::BI__sync_or_and_fetch_8: 1973 case Builtin::BI__sync_or_and_fetch_16: 1974 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E, 1975 llvm::Instruction::Or); 1976 case Builtin::BI__sync_xor_and_fetch_1: 1977 case Builtin::BI__sync_xor_and_fetch_2: 1978 case Builtin::BI__sync_xor_and_fetch_4: 1979 case Builtin::BI__sync_xor_and_fetch_8: 1980 case Builtin::BI__sync_xor_and_fetch_16: 1981 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E, 1982 llvm::Instruction::Xor); 1983 case Builtin::BI__sync_nand_and_fetch_1: 1984 case Builtin::BI__sync_nand_and_fetch_2: 1985 case Builtin::BI__sync_nand_and_fetch_4: 1986 case Builtin::BI__sync_nand_and_fetch_8: 1987 case Builtin::BI__sync_nand_and_fetch_16: 1988 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E, 1989 llvm::Instruction::And, true); 1990 1991 case Builtin::BI__sync_val_compare_and_swap_1: 1992 case Builtin::BI__sync_val_compare_and_swap_2: 1993 case Builtin::BI__sync_val_compare_and_swap_4: 1994 case Builtin::BI__sync_val_compare_and_swap_8: 1995 case Builtin::BI__sync_val_compare_and_swap_16: 1996 return RValue::get(MakeAtomicCmpXchgValue(*this, E, false)); 1997 1998 case Builtin::BI__sync_bool_compare_and_swap_1: 1999 case Builtin::BI__sync_bool_compare_and_swap_2: 2000 case Builtin::BI__sync_bool_compare_and_swap_4: 2001 case Builtin::BI__sync_bool_compare_and_swap_8: 2002 case Builtin::BI__sync_bool_compare_and_swap_16: 2003 return RValue::get(MakeAtomicCmpXchgValue(*this, E, true)); 2004 2005 case Builtin::BI__sync_swap_1: 2006 case Builtin::BI__sync_swap_2: 2007 case Builtin::BI__sync_swap_4: 2008 case Builtin::BI__sync_swap_8: 2009 case Builtin::BI__sync_swap_16: 2010 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 2011 2012 case Builtin::BI__sync_lock_test_and_set_1: 2013 case Builtin::BI__sync_lock_test_and_set_2: 2014 case Builtin::BI__sync_lock_test_and_set_4: 2015 case Builtin::BI__sync_lock_test_and_set_8: 2016 case Builtin::BI__sync_lock_test_and_set_16: 2017 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 2018 2019 case Builtin::BI__sync_lock_release_1: 2020 case Builtin::BI__sync_lock_release_2: 2021 case Builtin::BI__sync_lock_release_4: 2022 case Builtin::BI__sync_lock_release_8: 2023 case Builtin::BI__sync_lock_release_16: { 2024 Value *Ptr = EmitScalarExpr(E->getArg(0)); 2025 QualType ElTy = E->getArg(0)->getType()->getPointeeType(); 2026 CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy); 2027 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), 2028 StoreSize.getQuantity() * 8); 2029 Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); 2030 llvm::StoreInst *Store = 2031 Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr, 2032 StoreSize); 2033 Store->setAtomic(llvm::AtomicOrdering::Release); 2034 return RValue::get(nullptr); 2035 } 2036 2037 case Builtin::BI__sync_synchronize: { 2038 // We assume this is supposed to correspond to a C++0x-style 2039 // sequentially-consistent fence (i.e. this is only usable for 2040 // synchonization, not device I/O or anything like that). This intrinsic 2041 // is really badly designed in the sense that in theory, there isn't 2042 // any way to safely use it... but in practice, it mostly works 2043 // to use it with non-atomic loads and stores to get acquire/release 2044 // semantics. 2045 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent); 2046 return RValue::get(nullptr); 2047 } 2048 2049 case Builtin::BI__builtin_nontemporal_load: 2050 return RValue::get(EmitNontemporalLoad(*this, E)); 2051 case Builtin::BI__builtin_nontemporal_store: 2052 return RValue::get(EmitNontemporalStore(*this, E)); 2053 case Builtin::BI__c11_atomic_is_lock_free: 2054 case Builtin::BI__atomic_is_lock_free: { 2055 // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the 2056 // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since 2057 // _Atomic(T) is always properly-aligned. 2058 const char *LibCallName = "__atomic_is_lock_free"; 2059 CallArgList Args; 2060 Args.add(RValue::get(EmitScalarExpr(E->getArg(0))), 2061 getContext().getSizeType()); 2062 if (BuiltinID == Builtin::BI__atomic_is_lock_free) 2063 Args.add(RValue::get(EmitScalarExpr(E->getArg(1))), 2064 getContext().VoidPtrTy); 2065 else 2066 Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)), 2067 getContext().VoidPtrTy); 2068 const CGFunctionInfo &FuncInfo = 2069 CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args); 2070 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo); 2071 llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName); 2072 return EmitCall(FuncInfo, CGCallee::forDirect(Func), 2073 ReturnValueSlot(), Args); 2074 } 2075 2076 case Builtin::BI__atomic_test_and_set: { 2077 // Look at the argument type to determine whether this is a volatile 2078 // operation. The parameter type is always volatile. 2079 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); 2080 bool Volatile = 2081 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); 2082 2083 Value *Ptr = EmitScalarExpr(E->getArg(0)); 2084 unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace(); 2085 Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace)); 2086 Value *NewVal = Builder.getInt8(1); 2087 Value *Order = EmitScalarExpr(E->getArg(1)); 2088 if (isa<llvm::ConstantInt>(Order)) { 2089 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 2090 AtomicRMWInst *Result = nullptr; 2091 switch (ord) { 2092 case 0: // memory_order_relaxed 2093 default: // invalid order 2094 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 2095 llvm::AtomicOrdering::Monotonic); 2096 break; 2097 case 1: // memory_order_consume 2098 case 2: // memory_order_acquire 2099 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 2100 llvm::AtomicOrdering::Acquire); 2101 break; 2102 case 3: // memory_order_release 2103 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 2104 llvm::AtomicOrdering::Release); 2105 break; 2106 case 4: // memory_order_acq_rel 2107 2108 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 2109 llvm::AtomicOrdering::AcquireRelease); 2110 break; 2111 case 5: // memory_order_seq_cst 2112 Result = Builder.CreateAtomicRMW( 2113 llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 2114 llvm::AtomicOrdering::SequentiallyConsistent); 2115 break; 2116 } 2117 Result->setVolatile(Volatile); 2118 return RValue::get(Builder.CreateIsNotNull(Result, "tobool")); 2119 } 2120 2121 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 2122 2123 llvm::BasicBlock *BBs[5] = { 2124 createBasicBlock("monotonic", CurFn), 2125 createBasicBlock("acquire", CurFn), 2126 createBasicBlock("release", CurFn), 2127 createBasicBlock("acqrel", CurFn), 2128 createBasicBlock("seqcst", CurFn) 2129 }; 2130 llvm::AtomicOrdering Orders[5] = { 2131 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire, 2132 llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease, 2133 llvm::AtomicOrdering::SequentiallyConsistent}; 2134 2135 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 2136 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); 2137 2138 Builder.SetInsertPoint(ContBB); 2139 PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set"); 2140 2141 for (unsigned i = 0; i < 5; ++i) { 2142 Builder.SetInsertPoint(BBs[i]); 2143 AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 2144 Ptr, NewVal, Orders[i]); 2145 RMW->setVolatile(Volatile); 2146 Result->addIncoming(RMW, BBs[i]); 2147 Builder.CreateBr(ContBB); 2148 } 2149 2150 SI->addCase(Builder.getInt32(0), BBs[0]); 2151 SI->addCase(Builder.getInt32(1), BBs[1]); 2152 SI->addCase(Builder.getInt32(2), BBs[1]); 2153 SI->addCase(Builder.getInt32(3), BBs[2]); 2154 SI->addCase(Builder.getInt32(4), BBs[3]); 2155 SI->addCase(Builder.getInt32(5), BBs[4]); 2156 2157 Builder.SetInsertPoint(ContBB); 2158 return RValue::get(Builder.CreateIsNotNull(Result, "tobool")); 2159 } 2160 2161 case Builtin::BI__atomic_clear: { 2162 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); 2163 bool Volatile = 2164 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); 2165 2166 Address Ptr = EmitPointerWithAlignment(E->getArg(0)); 2167 unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace(); 2168 Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace)); 2169 Value *NewVal = Builder.getInt8(0); 2170 Value *Order = EmitScalarExpr(E->getArg(1)); 2171 if (isa<llvm::ConstantInt>(Order)) { 2172 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 2173 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); 2174 switch (ord) { 2175 case 0: // memory_order_relaxed 2176 default: // invalid order 2177 Store->setOrdering(llvm::AtomicOrdering::Monotonic); 2178 break; 2179 case 3: // memory_order_release 2180 Store->setOrdering(llvm::AtomicOrdering::Release); 2181 break; 2182 case 5: // memory_order_seq_cst 2183 Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent); 2184 break; 2185 } 2186 return RValue::get(nullptr); 2187 } 2188 2189 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 2190 2191 llvm::BasicBlock *BBs[3] = { 2192 createBasicBlock("monotonic", CurFn), 2193 createBasicBlock("release", CurFn), 2194 createBasicBlock("seqcst", CurFn) 2195 }; 2196 llvm::AtomicOrdering Orders[3] = { 2197 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release, 2198 llvm::AtomicOrdering::SequentiallyConsistent}; 2199 2200 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 2201 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); 2202 2203 for (unsigned i = 0; i < 3; ++i) { 2204 Builder.SetInsertPoint(BBs[i]); 2205 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); 2206 Store->setOrdering(Orders[i]); 2207 Builder.CreateBr(ContBB); 2208 } 2209 2210 SI->addCase(Builder.getInt32(0), BBs[0]); 2211 SI->addCase(Builder.getInt32(3), BBs[1]); 2212 SI->addCase(Builder.getInt32(5), BBs[2]); 2213 2214 Builder.SetInsertPoint(ContBB); 2215 return RValue::get(nullptr); 2216 } 2217 2218 case Builtin::BI__atomic_thread_fence: 2219 case Builtin::BI__atomic_signal_fence: 2220 case Builtin::BI__c11_atomic_thread_fence: 2221 case Builtin::BI__c11_atomic_signal_fence: { 2222 llvm::SyncScope::ID SSID; 2223 if (BuiltinID == Builtin::BI__atomic_signal_fence || 2224 BuiltinID == Builtin::BI__c11_atomic_signal_fence) 2225 SSID = llvm::SyncScope::SingleThread; 2226 else 2227 SSID = llvm::SyncScope::System; 2228 Value *Order = EmitScalarExpr(E->getArg(0)); 2229 if (isa<llvm::ConstantInt>(Order)) { 2230 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 2231 switch (ord) { 2232 case 0: // memory_order_relaxed 2233 default: // invalid order 2234 break; 2235 case 1: // memory_order_consume 2236 case 2: // memory_order_acquire 2237 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID); 2238 break; 2239 case 3: // memory_order_release 2240 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID); 2241 break; 2242 case 4: // memory_order_acq_rel 2243 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID); 2244 break; 2245 case 5: // memory_order_seq_cst 2246 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID); 2247 break; 2248 } 2249 return RValue::get(nullptr); 2250 } 2251 2252 llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB; 2253 AcquireBB = createBasicBlock("acquire", CurFn); 2254 ReleaseBB = createBasicBlock("release", CurFn); 2255 AcqRelBB = createBasicBlock("acqrel", CurFn); 2256 SeqCstBB = createBasicBlock("seqcst", CurFn); 2257 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 2258 2259 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 2260 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB); 2261 2262 Builder.SetInsertPoint(AcquireBB); 2263 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID); 2264 Builder.CreateBr(ContBB); 2265 SI->addCase(Builder.getInt32(1), AcquireBB); 2266 SI->addCase(Builder.getInt32(2), AcquireBB); 2267 2268 Builder.SetInsertPoint(ReleaseBB); 2269 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID); 2270 Builder.CreateBr(ContBB); 2271 SI->addCase(Builder.getInt32(3), ReleaseBB); 2272 2273 Builder.SetInsertPoint(AcqRelBB); 2274 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID); 2275 Builder.CreateBr(ContBB); 2276 SI->addCase(Builder.getInt32(4), AcqRelBB); 2277 2278 Builder.SetInsertPoint(SeqCstBB); 2279 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID); 2280 Builder.CreateBr(ContBB); 2281 SI->addCase(Builder.getInt32(5), SeqCstBB); 2282 2283 Builder.SetInsertPoint(ContBB); 2284 return RValue::get(nullptr); 2285 } 2286 2287 case Builtin::BI__builtin_signbit: 2288 case Builtin::BI__builtin_signbitf: 2289 case Builtin::BI__builtin_signbitl: { 2290 return RValue::get( 2291 Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))), 2292 ConvertType(E->getType()))); 2293 } 2294 case Builtin::BI__annotation: { 2295 // Re-encode each wide string to UTF8 and make an MDString. 2296 SmallVector<Metadata *, 1> Strings; 2297 for (const Expr *Arg : E->arguments()) { 2298 const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts()); 2299 assert(Str->getCharByteWidth() == 2); 2300 StringRef WideBytes = Str->getBytes(); 2301 std::string StrUtf8; 2302 if (!convertUTF16ToUTF8String( 2303 makeArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) { 2304 CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument"); 2305 continue; 2306 } 2307 Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8)); 2308 } 2309 2310 // Build and MDTuple of MDStrings and emit the intrinsic call. 2311 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {}); 2312 MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings); 2313 Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple)); 2314 return RValue::getIgnored(); 2315 } 2316 case Builtin::BI__builtin_annotation: { 2317 llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0)); 2318 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation, 2319 AnnVal->getType()); 2320 2321 // Get the annotation string, go through casts. Sema requires this to be a 2322 // non-wide string literal, potentially casted, so the cast<> is safe. 2323 const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts(); 2324 StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString(); 2325 return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc())); 2326 } 2327 case Builtin::BI__builtin_addcb: 2328 case Builtin::BI__builtin_addcs: 2329 case Builtin::BI__builtin_addc: 2330 case Builtin::BI__builtin_addcl: 2331 case Builtin::BI__builtin_addcll: 2332 case Builtin::BI__builtin_subcb: 2333 case Builtin::BI__builtin_subcs: 2334 case Builtin::BI__builtin_subc: 2335 case Builtin::BI__builtin_subcl: 2336 case Builtin::BI__builtin_subcll: { 2337 2338 // We translate all of these builtins from expressions of the form: 2339 // int x = ..., y = ..., carryin = ..., carryout, result; 2340 // result = __builtin_addc(x, y, carryin, &carryout); 2341 // 2342 // to LLVM IR of the form: 2343 // 2344 // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y) 2345 // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0 2346 // %carry1 = extractvalue {i32, i1} %tmp1, 1 2347 // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1, 2348 // i32 %carryin) 2349 // %result = extractvalue {i32, i1} %tmp2, 0 2350 // %carry2 = extractvalue {i32, i1} %tmp2, 1 2351 // %tmp3 = or i1 %carry1, %carry2 2352 // %tmp4 = zext i1 %tmp3 to i32 2353 // store i32 %tmp4, i32* %carryout 2354 2355 // Scalarize our inputs. 2356 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 2357 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 2358 llvm::Value *Carryin = EmitScalarExpr(E->getArg(2)); 2359 Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3)); 2360 2361 // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow. 2362 llvm::Intrinsic::ID IntrinsicId; 2363 switch (BuiltinID) { 2364 default: llvm_unreachable("Unknown multiprecision builtin id."); 2365 case Builtin::BI__builtin_addcb: 2366 case Builtin::BI__builtin_addcs: 2367 case Builtin::BI__builtin_addc: 2368 case Builtin::BI__builtin_addcl: 2369 case Builtin::BI__builtin_addcll: 2370 IntrinsicId = llvm::Intrinsic::uadd_with_overflow; 2371 break; 2372 case Builtin::BI__builtin_subcb: 2373 case Builtin::BI__builtin_subcs: 2374 case Builtin::BI__builtin_subc: 2375 case Builtin::BI__builtin_subcl: 2376 case Builtin::BI__builtin_subcll: 2377 IntrinsicId = llvm::Intrinsic::usub_with_overflow; 2378 break; 2379 } 2380 2381 // Construct our resulting LLVM IR expression. 2382 llvm::Value *Carry1; 2383 llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId, 2384 X, Y, Carry1); 2385 llvm::Value *Carry2; 2386 llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId, 2387 Sum1, Carryin, Carry2); 2388 llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2), 2389 X->getType()); 2390 Builder.CreateStore(CarryOut, CarryOutPtr); 2391 return RValue::get(Sum2); 2392 } 2393 2394 case Builtin::BI__builtin_add_overflow: 2395 case Builtin::BI__builtin_sub_overflow: 2396 case Builtin::BI__builtin_mul_overflow: { 2397 const clang::Expr *LeftArg = E->getArg(0); 2398 const clang::Expr *RightArg = E->getArg(1); 2399 const clang::Expr *ResultArg = E->getArg(2); 2400 2401 clang::QualType ResultQTy = 2402 ResultArg->getType()->castAs<PointerType>()->getPointeeType(); 2403 2404 WidthAndSignedness LeftInfo = 2405 getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType()); 2406 WidthAndSignedness RightInfo = 2407 getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType()); 2408 WidthAndSignedness ResultInfo = 2409 getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy); 2410 2411 // Handle mixed-sign multiplication as a special case, because adding 2412 // runtime or backend support for our generic irgen would be too expensive. 2413 if (isSpecialMixedSignMultiply(BuiltinID, LeftInfo, RightInfo, ResultInfo)) 2414 return EmitCheckedMixedSignMultiply(*this, LeftArg, LeftInfo, RightArg, 2415 RightInfo, ResultArg, ResultQTy, 2416 ResultInfo); 2417 2418 WidthAndSignedness EncompassingInfo = 2419 EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo}); 2420 2421 llvm::Type *EncompassingLLVMTy = 2422 llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width); 2423 2424 llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy); 2425 2426 llvm::Intrinsic::ID IntrinsicId; 2427 switch (BuiltinID) { 2428 default: 2429 llvm_unreachable("Unknown overflow builtin id."); 2430 case Builtin::BI__builtin_add_overflow: 2431 IntrinsicId = EncompassingInfo.Signed 2432 ? llvm::Intrinsic::sadd_with_overflow 2433 : llvm::Intrinsic::uadd_with_overflow; 2434 break; 2435 case Builtin::BI__builtin_sub_overflow: 2436 IntrinsicId = EncompassingInfo.Signed 2437 ? llvm::Intrinsic::ssub_with_overflow 2438 : llvm::Intrinsic::usub_with_overflow; 2439 break; 2440 case Builtin::BI__builtin_mul_overflow: 2441 IntrinsicId = EncompassingInfo.Signed 2442 ? llvm::Intrinsic::smul_with_overflow 2443 : llvm::Intrinsic::umul_with_overflow; 2444 break; 2445 } 2446 2447 llvm::Value *Left = EmitScalarExpr(LeftArg); 2448 llvm::Value *Right = EmitScalarExpr(RightArg); 2449 Address ResultPtr = EmitPointerWithAlignment(ResultArg); 2450 2451 // Extend each operand to the encompassing type. 2452 Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed); 2453 Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed); 2454 2455 // Perform the operation on the extended values. 2456 llvm::Value *Overflow, *Result; 2457 Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow); 2458 2459 if (EncompassingInfo.Width > ResultInfo.Width) { 2460 // The encompassing type is wider than the result type, so we need to 2461 // truncate it. 2462 llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy); 2463 2464 // To see if the truncation caused an overflow, we will extend 2465 // the result and then compare it to the original result. 2466 llvm::Value *ResultTruncExt = Builder.CreateIntCast( 2467 ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed); 2468 llvm::Value *TruncationOverflow = 2469 Builder.CreateICmpNE(Result, ResultTruncExt); 2470 2471 Overflow = Builder.CreateOr(Overflow, TruncationOverflow); 2472 Result = ResultTrunc; 2473 } 2474 2475 // Finally, store the result using the pointer. 2476 bool isVolatile = 2477 ResultArg->getType()->getPointeeType().isVolatileQualified(); 2478 Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile); 2479 2480 return RValue::get(Overflow); 2481 } 2482 2483 case Builtin::BI__builtin_uadd_overflow: 2484 case Builtin::BI__builtin_uaddl_overflow: 2485 case Builtin::BI__builtin_uaddll_overflow: 2486 case Builtin::BI__builtin_usub_overflow: 2487 case Builtin::BI__builtin_usubl_overflow: 2488 case Builtin::BI__builtin_usubll_overflow: 2489 case Builtin::BI__builtin_umul_overflow: 2490 case Builtin::BI__builtin_umull_overflow: 2491 case Builtin::BI__builtin_umulll_overflow: 2492 case Builtin::BI__builtin_sadd_overflow: 2493 case Builtin::BI__builtin_saddl_overflow: 2494 case Builtin::BI__builtin_saddll_overflow: 2495 case Builtin::BI__builtin_ssub_overflow: 2496 case Builtin::BI__builtin_ssubl_overflow: 2497 case Builtin::BI__builtin_ssubll_overflow: 2498 case Builtin::BI__builtin_smul_overflow: 2499 case Builtin::BI__builtin_smull_overflow: 2500 case Builtin::BI__builtin_smulll_overflow: { 2501 2502 // We translate all of these builtins directly to the relevant llvm IR node. 2503 2504 // Scalarize our inputs. 2505 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 2506 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 2507 Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2)); 2508 2509 // Decide which of the overflow intrinsics we are lowering to: 2510 llvm::Intrinsic::ID IntrinsicId; 2511 switch (BuiltinID) { 2512 default: llvm_unreachable("Unknown overflow builtin id."); 2513 case Builtin::BI__builtin_uadd_overflow: 2514 case Builtin::BI__builtin_uaddl_overflow: 2515 case Builtin::BI__builtin_uaddll_overflow: 2516 IntrinsicId = llvm::Intrinsic::uadd_with_overflow; 2517 break; 2518 case Builtin::BI__builtin_usub_overflow: 2519 case Builtin::BI__builtin_usubl_overflow: 2520 case Builtin::BI__builtin_usubll_overflow: 2521 IntrinsicId = llvm::Intrinsic::usub_with_overflow; 2522 break; 2523 case Builtin::BI__builtin_umul_overflow: 2524 case Builtin::BI__builtin_umull_overflow: 2525 case Builtin::BI__builtin_umulll_overflow: 2526 IntrinsicId = llvm::Intrinsic::umul_with_overflow; 2527 break; 2528 case Builtin::BI__builtin_sadd_overflow: 2529 case Builtin::BI__builtin_saddl_overflow: 2530 case Builtin::BI__builtin_saddll_overflow: 2531 IntrinsicId = llvm::Intrinsic::sadd_with_overflow; 2532 break; 2533 case Builtin::BI__builtin_ssub_overflow: 2534 case Builtin::BI__builtin_ssubl_overflow: 2535 case Builtin::BI__builtin_ssubll_overflow: 2536 IntrinsicId = llvm::Intrinsic::ssub_with_overflow; 2537 break; 2538 case Builtin::BI__builtin_smul_overflow: 2539 case Builtin::BI__builtin_smull_overflow: 2540 case Builtin::BI__builtin_smulll_overflow: 2541 IntrinsicId = llvm::Intrinsic::smul_with_overflow; 2542 break; 2543 } 2544 2545 2546 llvm::Value *Carry; 2547 llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry); 2548 Builder.CreateStore(Sum, SumOutPtr); 2549 2550 return RValue::get(Carry); 2551 } 2552 case Builtin::BI__builtin_addressof: 2553 return RValue::get(EmitLValue(E->getArg(0)).getPointer()); 2554 case Builtin::BI__builtin_operator_new: 2555 return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(), 2556 E->getArg(0), false); 2557 case Builtin::BI__builtin_operator_delete: 2558 return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(), 2559 E->getArg(0), true); 2560 case Builtin::BI__noop: 2561 // __noop always evaluates to an integer literal zero. 2562 return RValue::get(ConstantInt::get(IntTy, 0)); 2563 case Builtin::BI__builtin_call_with_static_chain: { 2564 const CallExpr *Call = cast<CallExpr>(E->getArg(0)); 2565 const Expr *Chain = E->getArg(1); 2566 return EmitCall(Call->getCallee()->getType(), 2567 EmitCallee(Call->getCallee()), Call, ReturnValue, 2568 EmitScalarExpr(Chain)); 2569 } 2570 case Builtin::BI_InterlockedExchange8: 2571 case Builtin::BI_InterlockedExchange16: 2572 case Builtin::BI_InterlockedExchange: 2573 case Builtin::BI_InterlockedExchangePointer: 2574 return RValue::get( 2575 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E)); 2576 case Builtin::BI_InterlockedCompareExchangePointer: { 2577 llvm::Type *RTy; 2578 llvm::IntegerType *IntType = 2579 IntegerType::get(getLLVMContext(), 2580 getContext().getTypeSize(E->getType())); 2581 llvm::Type *IntPtrType = IntType->getPointerTo(); 2582 2583 llvm::Value *Destination = 2584 Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType); 2585 2586 llvm::Value *Exchange = EmitScalarExpr(E->getArg(1)); 2587 RTy = Exchange->getType(); 2588 Exchange = Builder.CreatePtrToInt(Exchange, IntType); 2589 2590 llvm::Value *Comparand = 2591 Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType); 2592 2593 auto Result = 2594 Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange, 2595 AtomicOrdering::SequentiallyConsistent, 2596 AtomicOrdering::SequentiallyConsistent); 2597 Result->setVolatile(true); 2598 2599 return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result, 2600 0), 2601 RTy)); 2602 } 2603 case Builtin::BI_InterlockedCompareExchange8: 2604 case Builtin::BI_InterlockedCompareExchange16: 2605 case Builtin::BI_InterlockedCompareExchange: 2606 case Builtin::BI_InterlockedCompareExchange64: { 2607 AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg( 2608 EmitScalarExpr(E->getArg(0)), 2609 EmitScalarExpr(E->getArg(2)), 2610 EmitScalarExpr(E->getArg(1)), 2611 AtomicOrdering::SequentiallyConsistent, 2612 AtomicOrdering::SequentiallyConsistent); 2613 CXI->setVolatile(true); 2614 return RValue::get(Builder.CreateExtractValue(CXI, 0)); 2615 } 2616 case Builtin::BI_InterlockedIncrement16: 2617 case Builtin::BI_InterlockedIncrement: 2618 return RValue::get( 2619 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E)); 2620 case Builtin::BI_InterlockedDecrement16: 2621 case Builtin::BI_InterlockedDecrement: 2622 return RValue::get( 2623 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E)); 2624 case Builtin::BI_InterlockedAnd8: 2625 case Builtin::BI_InterlockedAnd16: 2626 case Builtin::BI_InterlockedAnd: 2627 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E)); 2628 case Builtin::BI_InterlockedExchangeAdd8: 2629 case Builtin::BI_InterlockedExchangeAdd16: 2630 case Builtin::BI_InterlockedExchangeAdd: 2631 return RValue::get( 2632 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E)); 2633 case Builtin::BI_InterlockedExchangeSub8: 2634 case Builtin::BI_InterlockedExchangeSub16: 2635 case Builtin::BI_InterlockedExchangeSub: 2636 return RValue::get( 2637 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E)); 2638 case Builtin::BI_InterlockedOr8: 2639 case Builtin::BI_InterlockedOr16: 2640 case Builtin::BI_InterlockedOr: 2641 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E)); 2642 case Builtin::BI_InterlockedXor8: 2643 case Builtin::BI_InterlockedXor16: 2644 case Builtin::BI_InterlockedXor: 2645 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E)); 2646 case Builtin::BI_interlockedbittestandset: 2647 return RValue::get( 2648 EmitMSVCBuiltinExpr(MSVCIntrin::_interlockedbittestandset, E)); 2649 2650 case Builtin::BI__exception_code: 2651 case Builtin::BI_exception_code: 2652 return RValue::get(EmitSEHExceptionCode()); 2653 case Builtin::BI__exception_info: 2654 case Builtin::BI_exception_info: 2655 return RValue::get(EmitSEHExceptionInfo()); 2656 case Builtin::BI__abnormal_termination: 2657 case Builtin::BI_abnormal_termination: 2658 return RValue::get(EmitSEHAbnormalTermination()); 2659 case Builtin::BI_setjmpex: { 2660 if (getTarget().getTriple().isOSMSVCRT()) { 2661 llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy}; 2662 llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get( 2663 getLLVMContext(), llvm::AttributeList::FunctionIndex, 2664 llvm::Attribute::ReturnsTwice); 2665 llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction( 2666 llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false), 2667 "_setjmpex", ReturnsTwiceAttr, /*Local=*/true); 2668 llvm::Value *Buf = Builder.CreateBitOrPointerCast( 2669 EmitScalarExpr(E->getArg(0)), Int8PtrTy); 2670 llvm::Value *FrameAddr = 2671 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 2672 ConstantInt::get(Int32Ty, 0)); 2673 llvm::Value *Args[] = {Buf, FrameAddr}; 2674 llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args); 2675 CS.setAttributes(ReturnsTwiceAttr); 2676 return RValue::get(CS.getInstruction()); 2677 } 2678 break; 2679 } 2680 case Builtin::BI_setjmp: { 2681 if (getTarget().getTriple().isOSMSVCRT()) { 2682 llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get( 2683 getLLVMContext(), llvm::AttributeList::FunctionIndex, 2684 llvm::Attribute::ReturnsTwice); 2685 llvm::Value *Buf = Builder.CreateBitOrPointerCast( 2686 EmitScalarExpr(E->getArg(0)), Int8PtrTy); 2687 llvm::CallSite CS; 2688 if (getTarget().getTriple().getArch() == llvm::Triple::x86) { 2689 llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy}; 2690 llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction( 2691 llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true), 2692 "_setjmp3", ReturnsTwiceAttr, /*Local=*/true); 2693 llvm::Value *Count = ConstantInt::get(IntTy, 0); 2694 llvm::Value *Args[] = {Buf, Count}; 2695 CS = EmitRuntimeCallOrInvoke(SetJmp3, Args); 2696 } else { 2697 llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy}; 2698 llvm::Constant *SetJmp = CGM.CreateRuntimeFunction( 2699 llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false), 2700 "_setjmp", ReturnsTwiceAttr, /*Local=*/true); 2701 llvm::Value *FrameAddr = 2702 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 2703 ConstantInt::get(Int32Ty, 0)); 2704 llvm::Value *Args[] = {Buf, FrameAddr}; 2705 CS = EmitRuntimeCallOrInvoke(SetJmp, Args); 2706 } 2707 CS.setAttributes(ReturnsTwiceAttr); 2708 return RValue::get(CS.getInstruction()); 2709 } 2710 break; 2711 } 2712 2713 case Builtin::BI__GetExceptionInfo: { 2714 if (llvm::GlobalVariable *GV = 2715 CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType())) 2716 return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy)); 2717 break; 2718 } 2719 2720 case Builtin::BI__fastfail: 2721 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E)); 2722 2723 case Builtin::BI__builtin_coro_size: { 2724 auto & Context = getContext(); 2725 auto SizeTy = Context.getSizeType(); 2726 auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy)); 2727 Value *F = CGM.getIntrinsic(Intrinsic::coro_size, T); 2728 return RValue::get(Builder.CreateCall(F)); 2729 } 2730 2731 case Builtin::BI__builtin_coro_id: 2732 return EmitCoroutineIntrinsic(E, Intrinsic::coro_id); 2733 case Builtin::BI__builtin_coro_promise: 2734 return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise); 2735 case Builtin::BI__builtin_coro_resume: 2736 return EmitCoroutineIntrinsic(E, Intrinsic::coro_resume); 2737 case Builtin::BI__builtin_coro_frame: 2738 return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame); 2739 case Builtin::BI__builtin_coro_free: 2740 return EmitCoroutineIntrinsic(E, Intrinsic::coro_free); 2741 case Builtin::BI__builtin_coro_destroy: 2742 return EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy); 2743 case Builtin::BI__builtin_coro_done: 2744 return EmitCoroutineIntrinsic(E, Intrinsic::coro_done); 2745 case Builtin::BI__builtin_coro_alloc: 2746 return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc); 2747 case Builtin::BI__builtin_coro_begin: 2748 return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin); 2749 case Builtin::BI__builtin_coro_end: 2750 return EmitCoroutineIntrinsic(E, Intrinsic::coro_end); 2751 case Builtin::BI__builtin_coro_suspend: 2752 return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend); 2753 case Builtin::BI__builtin_coro_param: 2754 return EmitCoroutineIntrinsic(E, Intrinsic::coro_param); 2755 2756 // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions 2757 case Builtin::BIread_pipe: 2758 case Builtin::BIwrite_pipe: { 2759 Value *Arg0 = EmitScalarExpr(E->getArg(0)), 2760 *Arg1 = EmitScalarExpr(E->getArg(1)); 2761 CGOpenCLRuntime OpenCLRT(CGM); 2762 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); 2763 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); 2764 2765 // Type of the generic packet parameter. 2766 unsigned GenericAS = 2767 getContext().getTargetAddressSpace(LangAS::opencl_generic); 2768 llvm::Type *I8PTy = llvm::PointerType::get( 2769 llvm::Type::getInt8Ty(getLLVMContext()), GenericAS); 2770 2771 // Testing which overloaded version we should generate the call for. 2772 if (2U == E->getNumArgs()) { 2773 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2" 2774 : "__write_pipe_2"; 2775 // Creating a generic function type to be able to call with any builtin or 2776 // user defined type. 2777 llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty}; 2778 llvm::FunctionType *FTy = llvm::FunctionType::get( 2779 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2780 Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy); 2781 return RValue::get( 2782 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2783 {Arg0, BCast, PacketSize, PacketAlign})); 2784 } else { 2785 assert(4 == E->getNumArgs() && 2786 "Illegal number of parameters to pipe function"); 2787 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4" 2788 : "__write_pipe_4"; 2789 2790 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy, 2791 Int32Ty, Int32Ty}; 2792 Value *Arg2 = EmitScalarExpr(E->getArg(2)), 2793 *Arg3 = EmitScalarExpr(E->getArg(3)); 2794 llvm::FunctionType *FTy = llvm::FunctionType::get( 2795 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2796 Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy); 2797 // We know the third argument is an integer type, but we may need to cast 2798 // it to i32. 2799 if (Arg2->getType() != Int32Ty) 2800 Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty); 2801 return RValue::get(Builder.CreateCall( 2802 CGM.CreateRuntimeFunction(FTy, Name), 2803 {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign})); 2804 } 2805 } 2806 // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write 2807 // functions 2808 case Builtin::BIreserve_read_pipe: 2809 case Builtin::BIreserve_write_pipe: 2810 case Builtin::BIwork_group_reserve_read_pipe: 2811 case Builtin::BIwork_group_reserve_write_pipe: 2812 case Builtin::BIsub_group_reserve_read_pipe: 2813 case Builtin::BIsub_group_reserve_write_pipe: { 2814 // Composing the mangled name for the function. 2815 const char *Name; 2816 if (BuiltinID == Builtin::BIreserve_read_pipe) 2817 Name = "__reserve_read_pipe"; 2818 else if (BuiltinID == Builtin::BIreserve_write_pipe) 2819 Name = "__reserve_write_pipe"; 2820 else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe) 2821 Name = "__work_group_reserve_read_pipe"; 2822 else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe) 2823 Name = "__work_group_reserve_write_pipe"; 2824 else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe) 2825 Name = "__sub_group_reserve_read_pipe"; 2826 else 2827 Name = "__sub_group_reserve_write_pipe"; 2828 2829 Value *Arg0 = EmitScalarExpr(E->getArg(0)), 2830 *Arg1 = EmitScalarExpr(E->getArg(1)); 2831 llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy); 2832 CGOpenCLRuntime OpenCLRT(CGM); 2833 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); 2834 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); 2835 2836 // Building the generic function prototype. 2837 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty}; 2838 llvm::FunctionType *FTy = llvm::FunctionType::get( 2839 ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2840 // We know the second argument is an integer type, but we may need to cast 2841 // it to i32. 2842 if (Arg1->getType() != Int32Ty) 2843 Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty); 2844 return RValue::get( 2845 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2846 {Arg0, Arg1, PacketSize, PacketAlign})); 2847 } 2848 // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write 2849 // functions 2850 case Builtin::BIcommit_read_pipe: 2851 case Builtin::BIcommit_write_pipe: 2852 case Builtin::BIwork_group_commit_read_pipe: 2853 case Builtin::BIwork_group_commit_write_pipe: 2854 case Builtin::BIsub_group_commit_read_pipe: 2855 case Builtin::BIsub_group_commit_write_pipe: { 2856 const char *Name; 2857 if (BuiltinID == Builtin::BIcommit_read_pipe) 2858 Name = "__commit_read_pipe"; 2859 else if (BuiltinID == Builtin::BIcommit_write_pipe) 2860 Name = "__commit_write_pipe"; 2861 else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe) 2862 Name = "__work_group_commit_read_pipe"; 2863 else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe) 2864 Name = "__work_group_commit_write_pipe"; 2865 else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe) 2866 Name = "__sub_group_commit_read_pipe"; 2867 else 2868 Name = "__sub_group_commit_write_pipe"; 2869 2870 Value *Arg0 = EmitScalarExpr(E->getArg(0)), 2871 *Arg1 = EmitScalarExpr(E->getArg(1)); 2872 CGOpenCLRuntime OpenCLRT(CGM); 2873 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); 2874 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); 2875 2876 // Building the generic function prototype. 2877 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty}; 2878 llvm::FunctionType *FTy = 2879 llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()), 2880 llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2881 2882 return RValue::get( 2883 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2884 {Arg0, Arg1, PacketSize, PacketAlign})); 2885 } 2886 // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions 2887 case Builtin::BIget_pipe_num_packets: 2888 case Builtin::BIget_pipe_max_packets: { 2889 const char *Name; 2890 if (BuiltinID == Builtin::BIget_pipe_num_packets) 2891 Name = "__get_pipe_num_packets"; 2892 else 2893 Name = "__get_pipe_max_packets"; 2894 2895 // Building the generic function prototype. 2896 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 2897 CGOpenCLRuntime OpenCLRT(CGM); 2898 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); 2899 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); 2900 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty}; 2901 llvm::FunctionType *FTy = llvm::FunctionType::get( 2902 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2903 2904 return RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2905 {Arg0, PacketSize, PacketAlign})); 2906 } 2907 2908 // OpenCL v2.0 s6.13.9 - Address space qualifier functions. 2909 case Builtin::BIto_global: 2910 case Builtin::BIto_local: 2911 case Builtin::BIto_private: { 2912 auto Arg0 = EmitScalarExpr(E->getArg(0)); 2913 auto NewArgT = llvm::PointerType::get(Int8Ty, 2914 CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); 2915 auto NewRetT = llvm::PointerType::get(Int8Ty, 2916 CGM.getContext().getTargetAddressSpace( 2917 E->getType()->getPointeeType().getAddressSpace())); 2918 auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false); 2919 llvm::Value *NewArg; 2920 if (Arg0->getType()->getPointerAddressSpace() != 2921 NewArgT->getPointerAddressSpace()) 2922 NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT); 2923 else 2924 NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT); 2925 auto NewName = std::string("__") + E->getDirectCallee()->getName().str(); 2926 auto NewCall = 2927 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg}); 2928 return RValue::get(Builder.CreateBitOrPointerCast(NewCall, 2929 ConvertType(E->getType()))); 2930 } 2931 2932 // OpenCL v2.0, s6.13.17 - Enqueue kernel function. 2933 // It contains four different overload formats specified in Table 6.13.17.1. 2934 case Builtin::BIenqueue_kernel: { 2935 StringRef Name; // Generated function call name 2936 unsigned NumArgs = E->getNumArgs(); 2937 2938 llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy); 2939 llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( 2940 getContext().getTargetAddressSpace(LangAS::opencl_generic)); 2941 2942 llvm::Value *Queue = EmitScalarExpr(E->getArg(0)); 2943 llvm::Value *Flags = EmitScalarExpr(E->getArg(1)); 2944 LValue NDRangeL = EmitAggExprToLValue(E->getArg(2)); 2945 llvm::Value *Range = NDRangeL.getAddress().getPointer(); 2946 llvm::Type *RangeTy = NDRangeL.getAddress().getType(); 2947 2948 if (NumArgs == 4) { 2949 // The most basic form of the call with parameters: 2950 // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void) 2951 Name = "__enqueue_kernel_basic"; 2952 llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy, 2953 GenericVoidPtrTy}; 2954 llvm::FunctionType *FTy = llvm::FunctionType::get( 2955 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2956 2957 auto Info = 2958 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3)); 2959 llvm::Value *Kernel = 2960 Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy); 2961 llvm::Value *Block = 2962 Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); 2963 2964 AttrBuilder B; 2965 B.addAttribute(Attribute::ByVal); 2966 llvm::AttributeList ByValAttrSet = 2967 llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B); 2968 2969 auto RTCall = 2970 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet), 2971 {Queue, Flags, Range, Kernel, Block}); 2972 RTCall->setAttributes(ByValAttrSet); 2973 return RValue::get(RTCall); 2974 } 2975 assert(NumArgs >= 5 && "Invalid enqueue_kernel signature"); 2976 2977 // Create a temporary array to hold the sizes of local pointer arguments 2978 // for the block. \p First is the position of the first size argument. 2979 auto CreateArrayForSizeVar = [=](unsigned First) { 2980 auto *AT = llvm::ArrayType::get(SizeTy, NumArgs - First); 2981 auto *Arr = Builder.CreateAlloca(AT); 2982 llvm::Value *Ptr; 2983 // Each of the following arguments specifies the size of the corresponding 2984 // argument passed to the enqueued block. 2985 auto *Zero = llvm::ConstantInt::get(IntTy, 0); 2986 for (unsigned I = First; I < NumArgs; ++I) { 2987 auto *Index = llvm::ConstantInt::get(IntTy, I - First); 2988 auto *GEP = Builder.CreateGEP(Arr, {Zero, Index}); 2989 if (I == First) 2990 Ptr = GEP; 2991 auto *V = 2992 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy); 2993 Builder.CreateAlignedStore( 2994 V, GEP, CGM.getDataLayout().getPrefTypeAlignment(SizeTy)); 2995 } 2996 return Ptr; 2997 }; 2998 2999 // Could have events and/or vaargs. 3000 if (E->getArg(3)->getType()->isBlockPointerType()) { 3001 // No events passed, but has variadic arguments. 3002 Name = "__enqueue_kernel_vaargs"; 3003 auto Info = 3004 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3)); 3005 llvm::Value *Kernel = 3006 Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy); 3007 auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); 3008 auto *PtrToSizeArray = CreateArrayForSizeVar(4); 3009 3010 // Create a vector of the arguments, as well as a constant value to 3011 // express to the runtime the number of variadic arguments. 3012 std::vector<llvm::Value *> Args = { 3013 Queue, Flags, Range, 3014 Kernel, Block, ConstantInt::get(IntTy, NumArgs - 4), 3015 PtrToSizeArray}; 3016 std::vector<llvm::Type *> ArgTys = { 3017 QueueTy, IntTy, RangeTy, 3018 GenericVoidPtrTy, GenericVoidPtrTy, IntTy, 3019 PtrToSizeArray->getType()}; 3020 3021 llvm::FunctionType *FTy = llvm::FunctionType::get( 3022 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 3023 return RValue::get( 3024 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 3025 llvm::ArrayRef<llvm::Value *>(Args))); 3026 } 3027 // Any calls now have event arguments passed. 3028 if (NumArgs >= 7) { 3029 llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy); 3030 llvm::Type *EventPtrTy = EventTy->getPointerTo( 3031 CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); 3032 3033 llvm::Value *NumEvents = 3034 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty); 3035 llvm::Value *EventList = 3036 E->getArg(4)->getType()->isArrayType() 3037 ? EmitArrayToPointerDecay(E->getArg(4)).getPointer() 3038 : EmitScalarExpr(E->getArg(4)); 3039 llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5)); 3040 // Convert to generic address space. 3041 EventList = Builder.CreatePointerCast(EventList, EventPtrTy); 3042 ClkEvent = Builder.CreatePointerCast(ClkEvent, EventPtrTy); 3043 auto Info = 3044 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6)); 3045 llvm::Value *Kernel = 3046 Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy); 3047 llvm::Value *Block = 3048 Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); 3049 3050 std::vector<llvm::Type *> ArgTys = { 3051 QueueTy, Int32Ty, RangeTy, Int32Ty, 3052 EventPtrTy, EventPtrTy, GenericVoidPtrTy, GenericVoidPtrTy}; 3053 3054 std::vector<llvm::Value *> Args = {Queue, Flags, Range, NumEvents, 3055 EventList, ClkEvent, Kernel, Block}; 3056 3057 if (NumArgs == 7) { 3058 // Has events but no variadics. 3059 Name = "__enqueue_kernel_basic_events"; 3060 llvm::FunctionType *FTy = llvm::FunctionType::get( 3061 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 3062 return RValue::get( 3063 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 3064 llvm::ArrayRef<llvm::Value *>(Args))); 3065 } 3066 // Has event info and variadics 3067 // Pass the number of variadics to the runtime function too. 3068 Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7)); 3069 ArgTys.push_back(Int32Ty); 3070 Name = "__enqueue_kernel_events_vaargs"; 3071 3072 auto *PtrToSizeArray = CreateArrayForSizeVar(7); 3073 Args.push_back(PtrToSizeArray); 3074 ArgTys.push_back(PtrToSizeArray->getType()); 3075 3076 llvm::FunctionType *FTy = llvm::FunctionType::get( 3077 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 3078 return RValue::get( 3079 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 3080 llvm::ArrayRef<llvm::Value *>(Args))); 3081 } 3082 LLVM_FALLTHROUGH; 3083 } 3084 // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block 3085 // parameter. 3086 case Builtin::BIget_kernel_work_group_size: { 3087 llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( 3088 getContext().getTargetAddressSpace(LangAS::opencl_generic)); 3089 auto Info = 3090 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0)); 3091 Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy); 3092 Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); 3093 return RValue::get(Builder.CreateCall( 3094 CGM.CreateRuntimeFunction( 3095 llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy}, 3096 false), 3097 "__get_kernel_work_group_size_impl"), 3098 {Kernel, Arg})); 3099 } 3100 case Builtin::BIget_kernel_preferred_work_group_size_multiple: { 3101 llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( 3102 getContext().getTargetAddressSpace(LangAS::opencl_generic)); 3103 auto Info = 3104 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0)); 3105 Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy); 3106 Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); 3107 return RValue::get(Builder.CreateCall( 3108 CGM.CreateRuntimeFunction( 3109 llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy}, 3110 false), 3111 "__get_kernel_preferred_work_group_multiple_impl"), 3112 {Kernel, Arg})); 3113 } 3114 case Builtin::BIget_kernel_max_sub_group_size_for_ndrange: 3115 case Builtin::BIget_kernel_sub_group_count_for_ndrange: { 3116 llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( 3117 getContext().getTargetAddressSpace(LangAS::opencl_generic)); 3118 LValue NDRangeL = EmitAggExprToLValue(E->getArg(0)); 3119 llvm::Value *NDRange = NDRangeL.getAddress().getPointer(); 3120 auto Info = 3121 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(1)); 3122 Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy); 3123 Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); 3124 const char *Name = 3125 BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange 3126 ? "__get_kernel_max_sub_group_size_for_ndrange_impl" 3127 : "__get_kernel_sub_group_count_for_ndrange_impl"; 3128 return RValue::get(Builder.CreateCall( 3129 CGM.CreateRuntimeFunction( 3130 llvm::FunctionType::get( 3131 IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy}, 3132 false), 3133 Name), 3134 {NDRange, Kernel, Block})); 3135 } 3136 3137 case Builtin::BI__builtin_store_half: 3138 case Builtin::BI__builtin_store_halff: { 3139 Value *Val = EmitScalarExpr(E->getArg(0)); 3140 Address Address = EmitPointerWithAlignment(E->getArg(1)); 3141 Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy()); 3142 return RValue::get(Builder.CreateStore(HalfVal, Address)); 3143 } 3144 case Builtin::BI__builtin_load_half: { 3145 Address Address = EmitPointerWithAlignment(E->getArg(0)); 3146 Value *HalfVal = Builder.CreateLoad(Address); 3147 return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy())); 3148 } 3149 case Builtin::BI__builtin_load_halff: { 3150 Address Address = EmitPointerWithAlignment(E->getArg(0)); 3151 Value *HalfVal = Builder.CreateLoad(Address); 3152 return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy())); 3153 } 3154 case Builtin::BIprintf: 3155 if (getTarget().getTriple().isNVPTX()) 3156 return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue); 3157 break; 3158 case Builtin::BI__builtin_canonicalize: 3159 case Builtin::BI__builtin_canonicalizef: 3160 case Builtin::BI__builtin_canonicalizel: 3161 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize)); 3162 3163 case Builtin::BI__builtin_thread_pointer: { 3164 if (!getContext().getTargetInfo().isTLSSupported()) 3165 CGM.ErrorUnsupported(E, "__builtin_thread_pointer"); 3166 // Fall through - it's already mapped to the intrinsic by GCCBuiltin. 3167 break; 3168 } 3169 case Builtin::BI__builtin_os_log_format: 3170 return emitBuiltinOSLogFormat(*E); 3171 3172 case Builtin::BI__builtin_os_log_format_buffer_size: { 3173 analyze_os_log::OSLogBufferLayout Layout; 3174 analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout); 3175 return RValue::get(ConstantInt::get(ConvertType(E->getType()), 3176 Layout.size().getQuantity())); 3177 } 3178 3179 case Builtin::BI__xray_customevent: { 3180 if (!ShouldXRayInstrumentFunction()) 3181 return RValue::getIgnored(); 3182 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>()) 3183 if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents()) 3184 return RValue::getIgnored(); 3185 3186 Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent); 3187 auto FTy = F->getFunctionType(); 3188 auto Arg0 = E->getArg(0); 3189 auto Arg0Val = EmitScalarExpr(Arg0); 3190 auto Arg0Ty = Arg0->getType(); 3191 auto PTy0 = FTy->getParamType(0); 3192 if (PTy0 != Arg0Val->getType()) { 3193 if (Arg0Ty->isArrayType()) 3194 Arg0Val = EmitArrayToPointerDecay(Arg0).getPointer(); 3195 else 3196 Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0); 3197 } 3198 auto Arg1 = EmitScalarExpr(E->getArg(1)); 3199 auto PTy1 = FTy->getParamType(1); 3200 if (PTy1 != Arg1->getType()) 3201 Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1); 3202 return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1})); 3203 } 3204 3205 case Builtin::BI__builtin_ms_va_start: 3206 case Builtin::BI__builtin_ms_va_end: 3207 return RValue::get( 3208 EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(), 3209 BuiltinID == Builtin::BI__builtin_ms_va_start)); 3210 3211 case Builtin::BI__builtin_ms_va_copy: { 3212 // Lower this manually. We can't reliably determine whether or not any 3213 // given va_copy() is for a Win64 va_list from the calling convention 3214 // alone, because it's legal to do this from a System V ABI function. 3215 // With opaque pointer types, we won't have enough information in LLVM 3216 // IR to determine this from the argument types, either. Best to do it 3217 // now, while we have enough information. 3218 Address DestAddr = EmitMSVAListRef(E->getArg(0)); 3219 Address SrcAddr = EmitMSVAListRef(E->getArg(1)); 3220 3221 llvm::Type *BPP = Int8PtrPtrTy; 3222 3223 DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"), 3224 DestAddr.getAlignment()); 3225 SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"), 3226 SrcAddr.getAlignment()); 3227 3228 Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val"); 3229 return RValue::get(Builder.CreateStore(ArgPtr, DestAddr)); 3230 } 3231 } 3232 3233 // If this is an alias for a lib function (e.g. __builtin_sin), emit 3234 // the call using the normal call path, but using the unmangled 3235 // version of the function name. 3236 if (getContext().BuiltinInfo.isLibFunction(BuiltinID)) 3237 return emitLibraryCall(*this, FD, E, 3238 CGM.getBuiltinLibFunction(FD, BuiltinID)); 3239 3240 // If this is a predefined lib function (e.g. malloc), emit the call 3241 // using exactly the normal call path. 3242 if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID)) 3243 return emitLibraryCall(*this, FD, E, 3244 cast<llvm::Constant>(EmitScalarExpr(E->getCallee()))); 3245 3246 // Check that a call to a target specific builtin has the correct target 3247 // features. 3248 // This is down here to avoid non-target specific builtins, however, if 3249 // generic builtins start to require generic target features then we 3250 // can move this up to the beginning of the function. 3251 checkTargetFeatures(E, FD); 3252 3253 // See if we have a target specific intrinsic. 3254 const char *Name = getContext().BuiltinInfo.getName(BuiltinID); 3255 Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic; 3256 StringRef Prefix = 3257 llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch()); 3258 if (!Prefix.empty()) { 3259 IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix.data(), Name); 3260 // NOTE we dont need to perform a compatibility flag check here since the 3261 // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the 3262 // MS builtins via ALL_MS_LANGUAGES and are filtered earlier. 3263 if (IntrinsicID == Intrinsic::not_intrinsic) 3264 IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name); 3265 } 3266 3267 if (IntrinsicID != Intrinsic::not_intrinsic) { 3268 SmallVector<Value*, 16> Args; 3269 3270 // Find out if any arguments are required to be integer constant 3271 // expressions. 3272 unsigned ICEArguments = 0; 3273 ASTContext::GetBuiltinTypeError Error; 3274 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 3275 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 3276 3277 Function *F = CGM.getIntrinsic(IntrinsicID); 3278 llvm::FunctionType *FTy = F->getFunctionType(); 3279 3280 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) { 3281 Value *ArgValue; 3282 // If this is a normal argument, just emit it as a scalar. 3283 if ((ICEArguments & (1 << i)) == 0) { 3284 ArgValue = EmitScalarExpr(E->getArg(i)); 3285 } else { 3286 // If this is required to be a constant, constant fold it so that we 3287 // know that the generated intrinsic gets a ConstantInt. 3288 llvm::APSInt Result; 3289 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext()); 3290 assert(IsConst && "Constant arg isn't actually constant?"); 3291 (void)IsConst; 3292 ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result); 3293 } 3294 3295 // If the intrinsic arg type is different from the builtin arg type 3296 // we need to do a bit cast. 3297 llvm::Type *PTy = FTy->getParamType(i); 3298 if (PTy != ArgValue->getType()) { 3299 assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) && 3300 "Must be able to losslessly bit cast to param"); 3301 ArgValue = Builder.CreateBitCast(ArgValue, PTy); 3302 } 3303 3304 Args.push_back(ArgValue); 3305 } 3306 3307 Value *V = Builder.CreateCall(F, Args); 3308 QualType BuiltinRetType = E->getType(); 3309 3310 llvm::Type *RetTy = VoidTy; 3311 if (!BuiltinRetType->isVoidType()) 3312 RetTy = ConvertType(BuiltinRetType); 3313 3314 if (RetTy != V->getType()) { 3315 assert(V->getType()->canLosslesslyBitCastTo(RetTy) && 3316 "Must be able to losslessly bit cast result type"); 3317 V = Builder.CreateBitCast(V, RetTy); 3318 } 3319 3320 return RValue::get(V); 3321 } 3322 3323 // See if we have a target specific builtin that needs to be lowered. 3324 if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E)) 3325 return RValue::get(V); 3326 3327 ErrorUnsupported(E, "builtin function"); 3328 3329 // Unknown builtin, for now just dump it out and return undef. 3330 return GetUndefRValue(E->getType()); 3331 } 3332 3333 static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, 3334 unsigned BuiltinID, const CallExpr *E, 3335 llvm::Triple::ArchType Arch) { 3336 switch (Arch) { 3337 case llvm::Triple::arm: 3338 case llvm::Triple::armeb: 3339 case llvm::Triple::thumb: 3340 case llvm::Triple::thumbeb: 3341 return CGF->EmitARMBuiltinExpr(BuiltinID, E, Arch); 3342 case llvm::Triple::aarch64: 3343 case llvm::Triple::aarch64_be: 3344 return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch); 3345 case llvm::Triple::x86: 3346 case llvm::Triple::x86_64: 3347 return CGF->EmitX86BuiltinExpr(BuiltinID, E); 3348 case llvm::Triple::ppc: 3349 case llvm::Triple::ppc64: 3350 case llvm::Triple::ppc64le: 3351 return CGF->EmitPPCBuiltinExpr(BuiltinID, E); 3352 case llvm::Triple::r600: 3353 case llvm::Triple::amdgcn: 3354 return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E); 3355 case llvm::Triple::systemz: 3356 return CGF->EmitSystemZBuiltinExpr(BuiltinID, E); 3357 case llvm::Triple::nvptx: 3358 case llvm::Triple::nvptx64: 3359 return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E); 3360 case llvm::Triple::wasm32: 3361 case llvm::Triple::wasm64: 3362 return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E); 3363 case llvm::Triple::hexagon: 3364 return CGF->EmitHexagonBuiltinExpr(BuiltinID, E); 3365 default: 3366 return nullptr; 3367 } 3368 } 3369 3370 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID, 3371 const CallExpr *E) { 3372 if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) { 3373 assert(getContext().getAuxTargetInfo() && "Missing aux target info"); 3374 return EmitTargetArchBuiltinExpr( 3375 this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E, 3376 getContext().getAuxTargetInfo()->getTriple().getArch()); 3377 } 3378 3379 return EmitTargetArchBuiltinExpr(this, BuiltinID, E, 3380 getTarget().getTriple().getArch()); 3381 } 3382 3383 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF, 3384 NeonTypeFlags TypeFlags, 3385 llvm::Triple::ArchType Arch, 3386 bool V1Ty=false) { 3387 int IsQuad = TypeFlags.isQuad(); 3388 switch (TypeFlags.getEltType()) { 3389 case NeonTypeFlags::Int8: 3390 case NeonTypeFlags::Poly8: 3391 return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad)); 3392 case NeonTypeFlags::Int16: 3393 case NeonTypeFlags::Poly16: 3394 return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad)); 3395 case NeonTypeFlags::Float16: 3396 // FIXME: Only AArch64 backend can so far properly handle half types. 3397 // Remove else part once ARM backend support for half is complete. 3398 if (Arch == llvm::Triple::aarch64) 3399 return llvm::VectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad)); 3400 else 3401 return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad)); 3402 case NeonTypeFlags::Int32: 3403 return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad)); 3404 case NeonTypeFlags::Int64: 3405 case NeonTypeFlags::Poly64: 3406 return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad)); 3407 case NeonTypeFlags::Poly128: 3408 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm. 3409 // There is a lot of i128 and f128 API missing. 3410 // so we use v16i8 to represent poly128 and get pattern matched. 3411 return llvm::VectorType::get(CGF->Int8Ty, 16); 3412 case NeonTypeFlags::Float32: 3413 return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad)); 3414 case NeonTypeFlags::Float64: 3415 return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad)); 3416 } 3417 llvm_unreachable("Unknown vector element type!"); 3418 } 3419 3420 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF, 3421 NeonTypeFlags IntTypeFlags) { 3422 int IsQuad = IntTypeFlags.isQuad(); 3423 switch (IntTypeFlags.getEltType()) { 3424 case NeonTypeFlags::Int16: 3425 return llvm::VectorType::get(CGF->HalfTy, (4 << IsQuad)); 3426 case NeonTypeFlags::Int32: 3427 return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad)); 3428 case NeonTypeFlags::Int64: 3429 return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad)); 3430 default: 3431 llvm_unreachable("Type can't be converted to floating-point!"); 3432 } 3433 } 3434 3435 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) { 3436 unsigned nElts = V->getType()->getVectorNumElements(); 3437 Value* SV = llvm::ConstantVector::getSplat(nElts, C); 3438 return Builder.CreateShuffleVector(V, V, SV, "lane"); 3439 } 3440 3441 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops, 3442 const char *name, 3443 unsigned shift, bool rightshift) { 3444 unsigned j = 0; 3445 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); 3446 ai != ae; ++ai, ++j) 3447 if (shift > 0 && shift == j) 3448 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift); 3449 else 3450 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name); 3451 3452 return Builder.CreateCall(F, Ops, name); 3453 } 3454 3455 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty, 3456 bool neg) { 3457 int SV = cast<ConstantInt>(V)->getSExtValue(); 3458 return ConstantInt::get(Ty, neg ? -SV : SV); 3459 } 3460 3461 // \brief Right-shift a vector by a constant. 3462 Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift, 3463 llvm::Type *Ty, bool usgn, 3464 const char *name) { 3465 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 3466 3467 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue(); 3468 int EltSize = VTy->getScalarSizeInBits(); 3469 3470 Vec = Builder.CreateBitCast(Vec, Ty); 3471 3472 // lshr/ashr are undefined when the shift amount is equal to the vector 3473 // element size. 3474 if (ShiftAmt == EltSize) { 3475 if (usgn) { 3476 // Right-shifting an unsigned value by its size yields 0. 3477 return llvm::ConstantAggregateZero::get(VTy); 3478 } else { 3479 // Right-shifting a signed value by its size is equivalent 3480 // to a shift of size-1. 3481 --ShiftAmt; 3482 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt); 3483 } 3484 } 3485 3486 Shift = EmitNeonShiftVector(Shift, Ty, false); 3487 if (usgn) 3488 return Builder.CreateLShr(Vec, Shift, name); 3489 else 3490 return Builder.CreateAShr(Vec, Shift, name); 3491 } 3492 3493 enum { 3494 AddRetType = (1 << 0), 3495 Add1ArgType = (1 << 1), 3496 Add2ArgTypes = (1 << 2), 3497 3498 VectorizeRetType = (1 << 3), 3499 VectorizeArgTypes = (1 << 4), 3500 3501 InventFloatType = (1 << 5), 3502 UnsignedAlts = (1 << 6), 3503 3504 Use64BitVectors = (1 << 7), 3505 Use128BitVectors = (1 << 8), 3506 3507 Vectorize1ArgType = Add1ArgType | VectorizeArgTypes, 3508 VectorRet = AddRetType | VectorizeRetType, 3509 VectorRetGetArgs01 = 3510 AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes, 3511 FpCmpzModifiers = 3512 AddRetType | VectorizeRetType | Add1ArgType | InventFloatType 3513 }; 3514 3515 namespace { 3516 struct NeonIntrinsicInfo { 3517 const char *NameHint; 3518 unsigned BuiltinID; 3519 unsigned LLVMIntrinsic; 3520 unsigned AltLLVMIntrinsic; 3521 unsigned TypeModifier; 3522 3523 bool operator<(unsigned RHSBuiltinID) const { 3524 return BuiltinID < RHSBuiltinID; 3525 } 3526 bool operator<(const NeonIntrinsicInfo &TE) const { 3527 return BuiltinID < TE.BuiltinID; 3528 } 3529 }; 3530 } // end anonymous namespace 3531 3532 #define NEONMAP0(NameBase) \ 3533 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 } 3534 3535 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \ 3536 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \ 3537 Intrinsic::LLVMIntrinsic, 0, TypeModifier } 3538 3539 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \ 3540 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \ 3541 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \ 3542 TypeModifier } 3543 3544 static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { 3545 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), 3546 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), 3547 NEONMAP1(vabs_v, arm_neon_vabs, 0), 3548 NEONMAP1(vabsq_v, arm_neon_vabs, 0), 3549 NEONMAP0(vaddhn_v), 3550 NEONMAP1(vaesdq_v, arm_neon_aesd, 0), 3551 NEONMAP1(vaeseq_v, arm_neon_aese, 0), 3552 NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0), 3553 NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0), 3554 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType), 3555 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType), 3556 NEONMAP1(vcage_v, arm_neon_vacge, 0), 3557 NEONMAP1(vcageq_v, arm_neon_vacge, 0), 3558 NEONMAP1(vcagt_v, arm_neon_vacgt, 0), 3559 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0), 3560 NEONMAP1(vcale_v, arm_neon_vacge, 0), 3561 NEONMAP1(vcaleq_v, arm_neon_vacge, 0), 3562 NEONMAP1(vcalt_v, arm_neon_vacgt, 0), 3563 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0), 3564 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType), 3565 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType), 3566 NEONMAP1(vclz_v, ctlz, Add1ArgType), 3567 NEONMAP1(vclzq_v, ctlz, Add1ArgType), 3568 NEONMAP1(vcnt_v, ctpop, Add1ArgType), 3569 NEONMAP1(vcntq_v, ctpop, Add1ArgType), 3570 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0), 3571 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0), 3572 NEONMAP0(vcvt_f32_v), 3573 NEONMAP2(vcvt_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), 3574 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), 3575 NEONMAP1(vcvt_n_s16_v, arm_neon_vcvtfp2fxs, 0), 3576 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0), 3577 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0), 3578 NEONMAP1(vcvt_n_u16_v, arm_neon_vcvtfp2fxu, 0), 3579 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0), 3580 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0), 3581 NEONMAP0(vcvt_s16_v), 3582 NEONMAP0(vcvt_s32_v), 3583 NEONMAP0(vcvt_s64_v), 3584 NEONMAP0(vcvt_u16_v), 3585 NEONMAP0(vcvt_u32_v), 3586 NEONMAP0(vcvt_u64_v), 3587 NEONMAP1(vcvta_s16_v, arm_neon_vcvtas, 0), 3588 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0), 3589 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0), 3590 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0), 3591 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0), 3592 NEONMAP1(vcvtaq_s16_v, arm_neon_vcvtas, 0), 3593 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0), 3594 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0), 3595 NEONMAP1(vcvtaq_u16_v, arm_neon_vcvtau, 0), 3596 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0), 3597 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0), 3598 NEONMAP1(vcvtm_s16_v, arm_neon_vcvtms, 0), 3599 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0), 3600 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0), 3601 NEONMAP1(vcvtm_u16_v, arm_neon_vcvtmu, 0), 3602 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0), 3603 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0), 3604 NEONMAP1(vcvtmq_s16_v, arm_neon_vcvtms, 0), 3605 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0), 3606 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0), 3607 NEONMAP1(vcvtmq_u16_v, arm_neon_vcvtmu, 0), 3608 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0), 3609 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0), 3610 NEONMAP1(vcvtn_s16_v, arm_neon_vcvtns, 0), 3611 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0), 3612 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0), 3613 NEONMAP1(vcvtn_u16_v, arm_neon_vcvtnu, 0), 3614 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0), 3615 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0), 3616 NEONMAP1(vcvtnq_s16_v, arm_neon_vcvtns, 0), 3617 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0), 3618 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0), 3619 NEONMAP1(vcvtnq_u16_v, arm_neon_vcvtnu, 0), 3620 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0), 3621 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0), 3622 NEONMAP1(vcvtp_s16_v, arm_neon_vcvtps, 0), 3623 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0), 3624 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0), 3625 NEONMAP1(vcvtp_u16_v, arm_neon_vcvtpu, 0), 3626 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0), 3627 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0), 3628 NEONMAP1(vcvtpq_s16_v, arm_neon_vcvtps, 0), 3629 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0), 3630 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0), 3631 NEONMAP1(vcvtpq_u16_v, arm_neon_vcvtpu, 0), 3632 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0), 3633 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0), 3634 NEONMAP0(vcvtq_f32_v), 3635 NEONMAP2(vcvtq_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), 3636 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), 3637 NEONMAP1(vcvtq_n_s16_v, arm_neon_vcvtfp2fxs, 0), 3638 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0), 3639 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0), 3640 NEONMAP1(vcvtq_n_u16_v, arm_neon_vcvtfp2fxu, 0), 3641 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0), 3642 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0), 3643 NEONMAP0(vcvtq_s16_v), 3644 NEONMAP0(vcvtq_s32_v), 3645 NEONMAP0(vcvtq_s64_v), 3646 NEONMAP0(vcvtq_u16_v), 3647 NEONMAP0(vcvtq_u32_v), 3648 NEONMAP0(vcvtq_u64_v), 3649 NEONMAP0(vext_v), 3650 NEONMAP0(vextq_v), 3651 NEONMAP0(vfma_v), 3652 NEONMAP0(vfmaq_v), 3653 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts), 3654 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts), 3655 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts), 3656 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts), 3657 NEONMAP0(vld1_dup_v), 3658 NEONMAP1(vld1_v, arm_neon_vld1, 0), 3659 NEONMAP0(vld1q_dup_v), 3660 NEONMAP1(vld1q_v, arm_neon_vld1, 0), 3661 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0), 3662 NEONMAP1(vld2_v, arm_neon_vld2, 0), 3663 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0), 3664 NEONMAP1(vld2q_v, arm_neon_vld2, 0), 3665 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0), 3666 NEONMAP1(vld3_v, arm_neon_vld3, 0), 3667 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0), 3668 NEONMAP1(vld3q_v, arm_neon_vld3, 0), 3669 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0), 3670 NEONMAP1(vld4_v, arm_neon_vld4, 0), 3671 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0), 3672 NEONMAP1(vld4q_v, arm_neon_vld4, 0), 3673 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts), 3674 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType), 3675 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType), 3676 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts), 3677 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), 3678 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType), 3679 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType), 3680 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), 3681 NEONMAP0(vmovl_v), 3682 NEONMAP0(vmovn_v), 3683 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType), 3684 NEONMAP0(vmull_v), 3685 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType), 3686 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts), 3687 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts), 3688 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType), 3689 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts), 3690 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts), 3691 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType), 3692 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts), 3693 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts), 3694 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType), 3695 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType), 3696 NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts), 3697 NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts), 3698 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0), 3699 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0), 3700 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType), 3701 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType), 3702 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType), 3703 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts), 3704 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType), 3705 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType), 3706 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType), 3707 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType), 3708 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType), 3709 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts), 3710 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts), 3711 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts), 3712 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts), 3713 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts), 3714 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts), 3715 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0), 3716 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0), 3717 NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts), 3718 NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts), 3719 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType), 3720 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), 3721 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), 3722 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType), 3723 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType), 3724 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts), 3725 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts), 3726 NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType), 3727 NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType), 3728 NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType), 3729 NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType), 3730 NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType), 3731 NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType), 3732 NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType), 3733 NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType), 3734 NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType), 3735 NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType), 3736 NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType), 3737 NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType), 3738 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts), 3739 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts), 3740 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts), 3741 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts), 3742 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0), 3743 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0), 3744 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType), 3745 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType), 3746 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType), 3747 NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0), 3748 NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0), 3749 NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0), 3750 NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0), 3751 NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0), 3752 NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0), 3753 NEONMAP0(vshl_n_v), 3754 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts), 3755 NEONMAP0(vshll_n_v), 3756 NEONMAP0(vshlq_n_v), 3757 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts), 3758 NEONMAP0(vshr_n_v), 3759 NEONMAP0(vshrn_n_v), 3760 NEONMAP0(vshrq_n_v), 3761 NEONMAP1(vst1_v, arm_neon_vst1, 0), 3762 NEONMAP1(vst1q_v, arm_neon_vst1, 0), 3763 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0), 3764 NEONMAP1(vst2_v, arm_neon_vst2, 0), 3765 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0), 3766 NEONMAP1(vst2q_v, arm_neon_vst2, 0), 3767 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0), 3768 NEONMAP1(vst3_v, arm_neon_vst3, 0), 3769 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0), 3770 NEONMAP1(vst3q_v, arm_neon_vst3, 0), 3771 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0), 3772 NEONMAP1(vst4_v, arm_neon_vst4, 0), 3773 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0), 3774 NEONMAP1(vst4q_v, arm_neon_vst4, 0), 3775 NEONMAP0(vsubhn_v), 3776 NEONMAP0(vtrn_v), 3777 NEONMAP0(vtrnq_v), 3778 NEONMAP0(vtst_v), 3779 NEONMAP0(vtstq_v), 3780 NEONMAP0(vuzp_v), 3781 NEONMAP0(vuzpq_v), 3782 NEONMAP0(vzip_v), 3783 NEONMAP0(vzipq_v) 3784 }; 3785 3786 static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = { 3787 NEONMAP1(vabs_v, aarch64_neon_abs, 0), 3788 NEONMAP1(vabsq_v, aarch64_neon_abs, 0), 3789 NEONMAP0(vaddhn_v), 3790 NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0), 3791 NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0), 3792 NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0), 3793 NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0), 3794 NEONMAP1(vcage_v, aarch64_neon_facge, 0), 3795 NEONMAP1(vcageq_v, aarch64_neon_facge, 0), 3796 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0), 3797 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0), 3798 NEONMAP1(vcale_v, aarch64_neon_facge, 0), 3799 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0), 3800 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0), 3801 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0), 3802 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType), 3803 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType), 3804 NEONMAP1(vclz_v, ctlz, Add1ArgType), 3805 NEONMAP1(vclzq_v, ctlz, Add1ArgType), 3806 NEONMAP1(vcnt_v, ctpop, Add1ArgType), 3807 NEONMAP1(vcntq_v, ctpop, Add1ArgType), 3808 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0), 3809 NEONMAP0(vcvt_f16_v), 3810 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0), 3811 NEONMAP0(vcvt_f32_v), 3812 NEONMAP2(vcvt_n_f16_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 3813 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 3814 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 3815 NEONMAP1(vcvt_n_s16_v, aarch64_neon_vcvtfp2fxs, 0), 3816 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), 3817 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), 3818 NEONMAP1(vcvt_n_u16_v, aarch64_neon_vcvtfp2fxu, 0), 3819 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), 3820 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), 3821 NEONMAP0(vcvtq_f16_v), 3822 NEONMAP0(vcvtq_f32_v), 3823 NEONMAP2(vcvtq_n_f16_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 3824 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 3825 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 3826 NEONMAP1(vcvtq_n_s16_v, aarch64_neon_vcvtfp2fxs, 0), 3827 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), 3828 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), 3829 NEONMAP1(vcvtq_n_u16_v, aarch64_neon_vcvtfp2fxu, 0), 3830 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), 3831 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), 3832 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType), 3833 NEONMAP0(vext_v), 3834 NEONMAP0(vextq_v), 3835 NEONMAP0(vfma_v), 3836 NEONMAP0(vfmaq_v), 3837 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), 3838 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), 3839 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), 3840 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), 3841 NEONMAP0(vmovl_v), 3842 NEONMAP0(vmovn_v), 3843 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType), 3844 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType), 3845 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType), 3846 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts), 3847 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts), 3848 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType), 3849 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType), 3850 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType), 3851 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts), 3852 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts), 3853 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0), 3854 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0), 3855 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType), 3856 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType), 3857 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType), 3858 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts), 3859 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType), 3860 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType), 3861 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType), 3862 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType), 3863 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType), 3864 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts), 3865 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts), 3866 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts), 3867 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts), 3868 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts), 3869 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts), 3870 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0), 3871 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0), 3872 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), 3873 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), 3874 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType), 3875 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), 3876 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), 3877 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType), 3878 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType), 3879 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), 3880 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), 3881 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), 3882 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), 3883 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), 3884 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), 3885 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0), 3886 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0), 3887 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType), 3888 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType), 3889 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType), 3890 NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0), 3891 NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0), 3892 NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0), 3893 NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0), 3894 NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0), 3895 NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0), 3896 NEONMAP0(vshl_n_v), 3897 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts), 3898 NEONMAP0(vshll_n_v), 3899 NEONMAP0(vshlq_n_v), 3900 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts), 3901 NEONMAP0(vshr_n_v), 3902 NEONMAP0(vshrn_n_v), 3903 NEONMAP0(vshrq_n_v), 3904 NEONMAP0(vsubhn_v), 3905 NEONMAP0(vtst_v), 3906 NEONMAP0(vtstq_v), 3907 }; 3908 3909 static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = { 3910 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType), 3911 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType), 3912 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType), 3913 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType), 3914 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType), 3915 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType), 3916 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType), 3917 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), 3918 NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType), 3919 NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3920 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), 3921 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType), 3922 NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType), 3923 NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType), 3924 NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3925 NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3926 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType), 3927 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType), 3928 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType), 3929 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType), 3930 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType), 3931 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType), 3932 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType), 3933 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType), 3934 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType), 3935 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType), 3936 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType), 3937 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType), 3938 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), 3939 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), 3940 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), 3941 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), 3942 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType), 3943 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), 3944 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType), 3945 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), 3946 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType), 3947 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), 3948 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType), 3949 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), 3950 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType), 3951 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), 3952 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType), 3953 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), 3954 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), 3955 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), 3956 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), 3957 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), 3958 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0), 3959 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3960 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3961 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3962 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3963 NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType), 3964 NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType), 3965 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3966 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3967 NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType), 3968 NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType), 3969 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3970 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3971 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3972 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 3973 NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType), 3974 NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType), 3975 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 3976 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType), 3977 NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType), 3978 NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType), 3979 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0), 3980 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType), 3981 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType), 3982 NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3983 NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3984 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3985 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3986 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3987 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3988 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3989 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3990 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType), 3991 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 3992 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors), 3993 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType), 3994 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors), 3995 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType), 3996 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors), 3997 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors), 3998 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType), 3999 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType), 4000 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors), 4001 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors), 4002 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType), 4003 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType), 4004 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors), 4005 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType), 4006 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors), 4007 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0), 4008 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType), 4009 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType), 4010 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors), 4011 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors), 4012 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors), 4013 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors), 4014 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType), 4015 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors), 4016 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors), 4017 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors), 4018 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType), 4019 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors), 4020 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType), 4021 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors), 4022 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType), 4023 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors), 4024 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors), 4025 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType), 4026 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType), 4027 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors), 4028 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors), 4029 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType), 4030 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType), 4031 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType), 4032 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType), 4033 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors), 4034 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors), 4035 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors), 4036 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors), 4037 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType), 4038 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors), 4039 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors), 4040 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 4041 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 4042 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 4043 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 4044 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType), 4045 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType), 4046 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 4047 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 4048 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 4049 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 4050 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType), 4051 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType), 4052 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType), 4053 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType), 4054 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors), 4055 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors), 4056 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType), 4057 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType), 4058 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType), 4059 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors), 4060 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors), 4061 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors), 4062 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors), 4063 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType), 4064 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors), 4065 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors), 4066 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors), 4067 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors), 4068 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType), 4069 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType), 4070 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors), 4071 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors), 4072 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType), 4073 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType), 4074 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType), 4075 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType), 4076 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType), 4077 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType), 4078 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType), 4079 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType), 4080 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType), 4081 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType), 4082 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType), 4083 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType), 4084 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0), 4085 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0), 4086 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0), 4087 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0), 4088 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType), 4089 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType), 4090 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType), 4091 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType), 4092 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors), 4093 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType), 4094 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors), 4095 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType), 4096 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType), 4097 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType), 4098 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors), 4099 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType), 4100 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors), 4101 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType), 4102 }; 4103 4104 #undef NEONMAP0 4105 #undef NEONMAP1 4106 #undef NEONMAP2 4107 4108 static bool NEONSIMDIntrinsicsProvenSorted = false; 4109 4110 static bool AArch64SIMDIntrinsicsProvenSorted = false; 4111 static bool AArch64SISDIntrinsicsProvenSorted = false; 4112 4113 4114 static const NeonIntrinsicInfo * 4115 findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap, 4116 unsigned BuiltinID, bool &MapProvenSorted) { 4117 4118 #ifndef NDEBUG 4119 if (!MapProvenSorted) { 4120 assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap))); 4121 MapProvenSorted = true; 4122 } 4123 #endif 4124 4125 const NeonIntrinsicInfo *Builtin = 4126 std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID); 4127 4128 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID) 4129 return Builtin; 4130 4131 return nullptr; 4132 } 4133 4134 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID, 4135 unsigned Modifier, 4136 llvm::Type *ArgType, 4137 const CallExpr *E) { 4138 int VectorSize = 0; 4139 if (Modifier & Use64BitVectors) 4140 VectorSize = 64; 4141 else if (Modifier & Use128BitVectors) 4142 VectorSize = 128; 4143 4144 // Return type. 4145 SmallVector<llvm::Type *, 3> Tys; 4146 if (Modifier & AddRetType) { 4147 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext())); 4148 if (Modifier & VectorizeRetType) 4149 Ty = llvm::VectorType::get( 4150 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1); 4151 4152 Tys.push_back(Ty); 4153 } 4154 4155 // Arguments. 4156 if (Modifier & VectorizeArgTypes) { 4157 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1; 4158 ArgType = llvm::VectorType::get(ArgType, Elts); 4159 } 4160 4161 if (Modifier & (Add1ArgType | Add2ArgTypes)) 4162 Tys.push_back(ArgType); 4163 4164 if (Modifier & Add2ArgTypes) 4165 Tys.push_back(ArgType); 4166 4167 if (Modifier & InventFloatType) 4168 Tys.push_back(FloatTy); 4169 4170 return CGM.getIntrinsic(IntrinsicID, Tys); 4171 } 4172 4173 static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, 4174 const NeonIntrinsicInfo &SISDInfo, 4175 SmallVectorImpl<Value *> &Ops, 4176 const CallExpr *E) { 4177 unsigned BuiltinID = SISDInfo.BuiltinID; 4178 unsigned int Int = SISDInfo.LLVMIntrinsic; 4179 unsigned Modifier = SISDInfo.TypeModifier; 4180 const char *s = SISDInfo.NameHint; 4181 4182 switch (BuiltinID) { 4183 case NEON::BI__builtin_neon_vcled_s64: 4184 case NEON::BI__builtin_neon_vcled_u64: 4185 case NEON::BI__builtin_neon_vcles_f32: 4186 case NEON::BI__builtin_neon_vcled_f64: 4187 case NEON::BI__builtin_neon_vcltd_s64: 4188 case NEON::BI__builtin_neon_vcltd_u64: 4189 case NEON::BI__builtin_neon_vclts_f32: 4190 case NEON::BI__builtin_neon_vcltd_f64: 4191 case NEON::BI__builtin_neon_vcales_f32: 4192 case NEON::BI__builtin_neon_vcaled_f64: 4193 case NEON::BI__builtin_neon_vcalts_f32: 4194 case NEON::BI__builtin_neon_vcaltd_f64: 4195 // Only one direction of comparisons actually exist, cmle is actually a cmge 4196 // with swapped operands. The table gives us the right intrinsic but we 4197 // still need to do the swap. 4198 std::swap(Ops[0], Ops[1]); 4199 break; 4200 } 4201 4202 assert(Int && "Generic code assumes a valid intrinsic"); 4203 4204 // Determine the type(s) of this overloaded AArch64 intrinsic. 4205 const Expr *Arg = E->getArg(0); 4206 llvm::Type *ArgTy = CGF.ConvertType(Arg->getType()); 4207 Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E); 4208 4209 int j = 0; 4210 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0); 4211 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); 4212 ai != ae; ++ai, ++j) { 4213 llvm::Type *ArgTy = ai->getType(); 4214 if (Ops[j]->getType()->getPrimitiveSizeInBits() == 4215 ArgTy->getPrimitiveSizeInBits()) 4216 continue; 4217 4218 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy()); 4219 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate 4220 // it before inserting. 4221 Ops[j] = 4222 CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType()); 4223 Ops[j] = 4224 CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0); 4225 } 4226 4227 Value *Result = CGF.EmitNeonCall(F, Ops, s); 4228 llvm::Type *ResultType = CGF.ConvertType(E->getType()); 4229 if (ResultType->getPrimitiveSizeInBits() < 4230 Result->getType()->getPrimitiveSizeInBits()) 4231 return CGF.Builder.CreateExtractElement(Result, C0); 4232 4233 return CGF.Builder.CreateBitCast(Result, ResultType, s); 4234 } 4235 4236 Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( 4237 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, 4238 const char *NameHint, unsigned Modifier, const CallExpr *E, 4239 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1, 4240 llvm::Triple::ArchType Arch) { 4241 // Get the last argument, which specifies the vector type. 4242 llvm::APSInt NeonTypeConst; 4243 const Expr *Arg = E->getArg(E->getNumArgs() - 1); 4244 if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext())) 4245 return nullptr; 4246 4247 // Determine the type of this overloaded NEON intrinsic. 4248 NeonTypeFlags Type(NeonTypeConst.getZExtValue()); 4249 bool Usgn = Type.isUnsigned(); 4250 bool Quad = Type.isQuad(); 4251 4252 llvm::VectorType *VTy = GetNeonType(this, Type, Arch); 4253 llvm::Type *Ty = VTy; 4254 if (!Ty) 4255 return nullptr; 4256 4257 auto getAlignmentValue32 = [&](Address addr) -> Value* { 4258 return Builder.getInt32(addr.getAlignment().getQuantity()); 4259 }; 4260 4261 unsigned Int = LLVMIntrinsic; 4262 if ((Modifier & UnsignedAlts) && !Usgn) 4263 Int = AltLLVMIntrinsic; 4264 4265 switch (BuiltinID) { 4266 default: break; 4267 case NEON::BI__builtin_neon_vabs_v: 4268 case NEON::BI__builtin_neon_vabsq_v: 4269 if (VTy->getElementType()->isFloatingPointTy()) 4270 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs"); 4271 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs"); 4272 case NEON::BI__builtin_neon_vaddhn_v: { 4273 llvm::VectorType *SrcTy = 4274 llvm::VectorType::getExtendedElementVectorType(VTy); 4275 4276 // %sum = add <4 x i32> %lhs, %rhs 4277 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 4278 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy); 4279 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn"); 4280 4281 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16> 4282 Constant *ShiftAmt = 4283 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2); 4284 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn"); 4285 4286 // %res = trunc <4 x i32> %high to <4 x i16> 4287 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn"); 4288 } 4289 case NEON::BI__builtin_neon_vcale_v: 4290 case NEON::BI__builtin_neon_vcaleq_v: 4291 case NEON::BI__builtin_neon_vcalt_v: 4292 case NEON::BI__builtin_neon_vcaltq_v: 4293 std::swap(Ops[0], Ops[1]); 4294 LLVM_FALLTHROUGH; 4295 case NEON::BI__builtin_neon_vcage_v: 4296 case NEON::BI__builtin_neon_vcageq_v: 4297 case NEON::BI__builtin_neon_vcagt_v: 4298 case NEON::BI__builtin_neon_vcagtq_v: { 4299 llvm::Type *Ty; 4300 switch (VTy->getScalarSizeInBits()) { 4301 default: llvm_unreachable("unexpected type"); 4302 case 32: 4303 Ty = FloatTy; 4304 break; 4305 case 64: 4306 Ty = DoubleTy; 4307 break; 4308 case 16: 4309 Ty = HalfTy; 4310 break; 4311 } 4312 llvm::Type *VecFlt = llvm::VectorType::get(Ty, VTy->getNumElements()); 4313 llvm::Type *Tys[] = { VTy, VecFlt }; 4314 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 4315 return EmitNeonCall(F, Ops, NameHint); 4316 } 4317 case NEON::BI__builtin_neon_vclz_v: 4318 case NEON::BI__builtin_neon_vclzq_v: 4319 // We generate target-independent intrinsic, which needs a second argument 4320 // for whether or not clz of zero is undefined; on ARM it isn't. 4321 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef())); 4322 break; 4323 case NEON::BI__builtin_neon_vcvt_f32_v: 4324 case NEON::BI__builtin_neon_vcvtq_f32_v: 4325 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4326 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad), Arch); 4327 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 4328 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 4329 case NEON::BI__builtin_neon_vcvt_f16_v: 4330 case NEON::BI__builtin_neon_vcvtq_f16_v: 4331 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4332 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad), Arch); 4333 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 4334 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 4335 case NEON::BI__builtin_neon_vcvt_n_f16_v: 4336 case NEON::BI__builtin_neon_vcvt_n_f32_v: 4337 case NEON::BI__builtin_neon_vcvt_n_f64_v: 4338 case NEON::BI__builtin_neon_vcvtq_n_f16_v: 4339 case NEON::BI__builtin_neon_vcvtq_n_f32_v: 4340 case NEON::BI__builtin_neon_vcvtq_n_f64_v: { 4341 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty }; 4342 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic; 4343 Function *F = CGM.getIntrinsic(Int, Tys); 4344 return EmitNeonCall(F, Ops, "vcvt_n"); 4345 } 4346 case NEON::BI__builtin_neon_vcvt_n_s16_v: 4347 case NEON::BI__builtin_neon_vcvt_n_s32_v: 4348 case NEON::BI__builtin_neon_vcvt_n_u16_v: 4349 case NEON::BI__builtin_neon_vcvt_n_u32_v: 4350 case NEON::BI__builtin_neon_vcvt_n_s64_v: 4351 case NEON::BI__builtin_neon_vcvt_n_u64_v: 4352 case NEON::BI__builtin_neon_vcvtq_n_s16_v: 4353 case NEON::BI__builtin_neon_vcvtq_n_s32_v: 4354 case NEON::BI__builtin_neon_vcvtq_n_u16_v: 4355 case NEON::BI__builtin_neon_vcvtq_n_u32_v: 4356 case NEON::BI__builtin_neon_vcvtq_n_s64_v: 4357 case NEON::BI__builtin_neon_vcvtq_n_u64_v: { 4358 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 4359 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 4360 return EmitNeonCall(F, Ops, "vcvt_n"); 4361 } 4362 case NEON::BI__builtin_neon_vcvt_s32_v: 4363 case NEON::BI__builtin_neon_vcvt_u32_v: 4364 case NEON::BI__builtin_neon_vcvt_s64_v: 4365 case NEON::BI__builtin_neon_vcvt_u64_v: 4366 case NEON::BI__builtin_neon_vcvt_s16_v: 4367 case NEON::BI__builtin_neon_vcvt_u16_v: 4368 case NEON::BI__builtin_neon_vcvtq_s32_v: 4369 case NEON::BI__builtin_neon_vcvtq_u32_v: 4370 case NEON::BI__builtin_neon_vcvtq_s64_v: 4371 case NEON::BI__builtin_neon_vcvtq_u64_v: 4372 case NEON::BI__builtin_neon_vcvtq_s16_v: 4373 case NEON::BI__builtin_neon_vcvtq_u16_v: { 4374 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type)); 4375 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt") 4376 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt"); 4377 } 4378 case NEON::BI__builtin_neon_vcvta_s16_v: 4379 case NEON::BI__builtin_neon_vcvta_s32_v: 4380 case NEON::BI__builtin_neon_vcvta_s64_v: 4381 case NEON::BI__builtin_neon_vcvta_u32_v: 4382 case NEON::BI__builtin_neon_vcvta_u64_v: 4383 case NEON::BI__builtin_neon_vcvtaq_s16_v: 4384 case NEON::BI__builtin_neon_vcvtaq_s32_v: 4385 case NEON::BI__builtin_neon_vcvtaq_s64_v: 4386 case NEON::BI__builtin_neon_vcvtaq_u16_v: 4387 case NEON::BI__builtin_neon_vcvtaq_u32_v: 4388 case NEON::BI__builtin_neon_vcvtaq_u64_v: 4389 case NEON::BI__builtin_neon_vcvtn_s16_v: 4390 case NEON::BI__builtin_neon_vcvtn_s32_v: 4391 case NEON::BI__builtin_neon_vcvtn_s64_v: 4392 case NEON::BI__builtin_neon_vcvtn_u16_v: 4393 case NEON::BI__builtin_neon_vcvtn_u32_v: 4394 case NEON::BI__builtin_neon_vcvtn_u64_v: 4395 case NEON::BI__builtin_neon_vcvtnq_s16_v: 4396 case NEON::BI__builtin_neon_vcvtnq_s32_v: 4397 case NEON::BI__builtin_neon_vcvtnq_s64_v: 4398 case NEON::BI__builtin_neon_vcvtnq_u16_v: 4399 case NEON::BI__builtin_neon_vcvtnq_u32_v: 4400 case NEON::BI__builtin_neon_vcvtnq_u64_v: 4401 case NEON::BI__builtin_neon_vcvtp_s16_v: 4402 case NEON::BI__builtin_neon_vcvtp_s32_v: 4403 case NEON::BI__builtin_neon_vcvtp_s64_v: 4404 case NEON::BI__builtin_neon_vcvtp_u16_v: 4405 case NEON::BI__builtin_neon_vcvtp_u32_v: 4406 case NEON::BI__builtin_neon_vcvtp_u64_v: 4407 case NEON::BI__builtin_neon_vcvtpq_s16_v: 4408 case NEON::BI__builtin_neon_vcvtpq_s32_v: 4409 case NEON::BI__builtin_neon_vcvtpq_s64_v: 4410 case NEON::BI__builtin_neon_vcvtpq_u16_v: 4411 case NEON::BI__builtin_neon_vcvtpq_u32_v: 4412 case NEON::BI__builtin_neon_vcvtpq_u64_v: 4413 case NEON::BI__builtin_neon_vcvtm_s16_v: 4414 case NEON::BI__builtin_neon_vcvtm_s32_v: 4415 case NEON::BI__builtin_neon_vcvtm_s64_v: 4416 case NEON::BI__builtin_neon_vcvtm_u16_v: 4417 case NEON::BI__builtin_neon_vcvtm_u32_v: 4418 case NEON::BI__builtin_neon_vcvtm_u64_v: 4419 case NEON::BI__builtin_neon_vcvtmq_s16_v: 4420 case NEON::BI__builtin_neon_vcvtmq_s32_v: 4421 case NEON::BI__builtin_neon_vcvtmq_s64_v: 4422 case NEON::BI__builtin_neon_vcvtmq_u16_v: 4423 case NEON::BI__builtin_neon_vcvtmq_u32_v: 4424 case NEON::BI__builtin_neon_vcvtmq_u64_v: { 4425 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 4426 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint); 4427 } 4428 case NEON::BI__builtin_neon_vext_v: 4429 case NEON::BI__builtin_neon_vextq_v: { 4430 int CV = cast<ConstantInt>(Ops[2])->getSExtValue(); 4431 SmallVector<uint32_t, 16> Indices; 4432 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 4433 Indices.push_back(i+CV); 4434 4435 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4436 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4437 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext"); 4438 } 4439 case NEON::BI__builtin_neon_vfma_v: 4440 case NEON::BI__builtin_neon_vfmaq_v: { 4441 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 4442 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4443 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4444 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 4445 4446 // NEON intrinsic puts accumulator first, unlike the LLVM fma. 4447 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); 4448 } 4449 case NEON::BI__builtin_neon_vld1_v: 4450 case NEON::BI__builtin_neon_vld1q_v: { 4451 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 4452 Ops.push_back(getAlignmentValue32(PtrOp0)); 4453 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1"); 4454 } 4455 case NEON::BI__builtin_neon_vld2_v: 4456 case NEON::BI__builtin_neon_vld2q_v: 4457 case NEON::BI__builtin_neon_vld3_v: 4458 case NEON::BI__builtin_neon_vld3q_v: 4459 case NEON::BI__builtin_neon_vld4_v: 4460 case NEON::BI__builtin_neon_vld4q_v: { 4461 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 4462 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 4463 Value *Align = getAlignmentValue32(PtrOp1); 4464 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint); 4465 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 4466 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4467 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 4468 } 4469 case NEON::BI__builtin_neon_vld1_dup_v: 4470 case NEON::BI__builtin_neon_vld1q_dup_v: { 4471 Value *V = UndefValue::get(Ty); 4472 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 4473 PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty); 4474 LoadInst *Ld = Builder.CreateLoad(PtrOp0); 4475 llvm::Constant *CI = ConstantInt::get(SizeTy, 0); 4476 Ops[0] = Builder.CreateInsertElement(V, Ld, CI); 4477 return EmitNeonSplat(Ops[0], CI); 4478 } 4479 case NEON::BI__builtin_neon_vld2_lane_v: 4480 case NEON::BI__builtin_neon_vld2q_lane_v: 4481 case NEON::BI__builtin_neon_vld3_lane_v: 4482 case NEON::BI__builtin_neon_vld3q_lane_v: 4483 case NEON::BI__builtin_neon_vld4_lane_v: 4484 case NEON::BI__builtin_neon_vld4q_lane_v: { 4485 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 4486 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 4487 for (unsigned I = 2; I < Ops.size() - 1; ++I) 4488 Ops[I] = Builder.CreateBitCast(Ops[I], Ty); 4489 Ops.push_back(getAlignmentValue32(PtrOp1)); 4490 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint); 4491 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 4492 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4493 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 4494 } 4495 case NEON::BI__builtin_neon_vmovl_v: { 4496 llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy); 4497 Ops[0] = Builder.CreateBitCast(Ops[0], DTy); 4498 if (Usgn) 4499 return Builder.CreateZExt(Ops[0], Ty, "vmovl"); 4500 return Builder.CreateSExt(Ops[0], Ty, "vmovl"); 4501 } 4502 case NEON::BI__builtin_neon_vmovn_v: { 4503 llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy); 4504 Ops[0] = Builder.CreateBitCast(Ops[0], QTy); 4505 return Builder.CreateTrunc(Ops[0], Ty, "vmovn"); 4506 } 4507 case NEON::BI__builtin_neon_vmull_v: 4508 // FIXME: the integer vmull operations could be emitted in terms of pure 4509 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of 4510 // hoisting the exts outside loops. Until global ISel comes along that can 4511 // see through such movement this leads to bad CodeGen. So we need an 4512 // intrinsic for now. 4513 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls; 4514 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int; 4515 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull"); 4516 case NEON::BI__builtin_neon_vpadal_v: 4517 case NEON::BI__builtin_neon_vpadalq_v: { 4518 // The source operand type has twice as many elements of half the size. 4519 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); 4520 llvm::Type *EltTy = 4521 llvm::IntegerType::get(getLLVMContext(), EltBits / 2); 4522 llvm::Type *NarrowTy = 4523 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2); 4524 llvm::Type *Tys[2] = { Ty, NarrowTy }; 4525 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint); 4526 } 4527 case NEON::BI__builtin_neon_vpaddl_v: 4528 case NEON::BI__builtin_neon_vpaddlq_v: { 4529 // The source operand type has twice as many elements of half the size. 4530 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); 4531 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2); 4532 llvm::Type *NarrowTy = 4533 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2); 4534 llvm::Type *Tys[2] = { Ty, NarrowTy }; 4535 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl"); 4536 } 4537 case NEON::BI__builtin_neon_vqdmlal_v: 4538 case NEON::BI__builtin_neon_vqdmlsl_v: { 4539 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end()); 4540 Ops[1] = 4541 EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal"); 4542 Ops.resize(2); 4543 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint); 4544 } 4545 case NEON::BI__builtin_neon_vqshl_n_v: 4546 case NEON::BI__builtin_neon_vqshlq_n_v: 4547 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n", 4548 1, false); 4549 case NEON::BI__builtin_neon_vqshlu_n_v: 4550 case NEON::BI__builtin_neon_vqshluq_n_v: 4551 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n", 4552 1, false); 4553 case NEON::BI__builtin_neon_vrecpe_v: 4554 case NEON::BI__builtin_neon_vrecpeq_v: 4555 case NEON::BI__builtin_neon_vrsqrte_v: 4556 case NEON::BI__builtin_neon_vrsqrteq_v: 4557 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic; 4558 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint); 4559 4560 case NEON::BI__builtin_neon_vrshr_n_v: 4561 case NEON::BI__builtin_neon_vrshrq_n_v: 4562 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n", 4563 1, true); 4564 case NEON::BI__builtin_neon_vshl_n_v: 4565 case NEON::BI__builtin_neon_vshlq_n_v: 4566 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false); 4567 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1], 4568 "vshl_n"); 4569 case NEON::BI__builtin_neon_vshll_n_v: { 4570 llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy); 4571 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 4572 if (Usgn) 4573 Ops[0] = Builder.CreateZExt(Ops[0], VTy); 4574 else 4575 Ops[0] = Builder.CreateSExt(Ops[0], VTy); 4576 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false); 4577 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n"); 4578 } 4579 case NEON::BI__builtin_neon_vshrn_n_v: { 4580 llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy); 4581 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 4582 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false); 4583 if (Usgn) 4584 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]); 4585 else 4586 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]); 4587 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n"); 4588 } 4589 case NEON::BI__builtin_neon_vshr_n_v: 4590 case NEON::BI__builtin_neon_vshrq_n_v: 4591 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n"); 4592 case NEON::BI__builtin_neon_vst1_v: 4593 case NEON::BI__builtin_neon_vst1q_v: 4594 case NEON::BI__builtin_neon_vst2_v: 4595 case NEON::BI__builtin_neon_vst2q_v: 4596 case NEON::BI__builtin_neon_vst3_v: 4597 case NEON::BI__builtin_neon_vst3q_v: 4598 case NEON::BI__builtin_neon_vst4_v: 4599 case NEON::BI__builtin_neon_vst4q_v: 4600 case NEON::BI__builtin_neon_vst2_lane_v: 4601 case NEON::BI__builtin_neon_vst2q_lane_v: 4602 case NEON::BI__builtin_neon_vst3_lane_v: 4603 case NEON::BI__builtin_neon_vst3q_lane_v: 4604 case NEON::BI__builtin_neon_vst4_lane_v: 4605 case NEON::BI__builtin_neon_vst4q_lane_v: { 4606 llvm::Type *Tys[] = {Int8PtrTy, Ty}; 4607 Ops.push_back(getAlignmentValue32(PtrOp0)); 4608 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, ""); 4609 } 4610 case NEON::BI__builtin_neon_vsubhn_v: { 4611 llvm::VectorType *SrcTy = 4612 llvm::VectorType::getExtendedElementVectorType(VTy); 4613 4614 // %sum = add <4 x i32> %lhs, %rhs 4615 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 4616 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy); 4617 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn"); 4618 4619 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16> 4620 Constant *ShiftAmt = 4621 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2); 4622 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn"); 4623 4624 // %res = trunc <4 x i32> %high to <4 x i16> 4625 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn"); 4626 } 4627 case NEON::BI__builtin_neon_vtrn_v: 4628 case NEON::BI__builtin_neon_vtrnq_v: { 4629 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 4630 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4631 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 4632 Value *SV = nullptr; 4633 4634 for (unsigned vi = 0; vi != 2; ++vi) { 4635 SmallVector<uint32_t, 16> Indices; 4636 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 4637 Indices.push_back(i+vi); 4638 Indices.push_back(i+e+vi); 4639 } 4640 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 4641 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn"); 4642 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 4643 } 4644 return SV; 4645 } 4646 case NEON::BI__builtin_neon_vtst_v: 4647 case NEON::BI__builtin_neon_vtstq_v: { 4648 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4649 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4650 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); 4651 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], 4652 ConstantAggregateZero::get(Ty)); 4653 return Builder.CreateSExt(Ops[0], Ty, "vtst"); 4654 } 4655 case NEON::BI__builtin_neon_vuzp_v: 4656 case NEON::BI__builtin_neon_vuzpq_v: { 4657 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 4658 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4659 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 4660 Value *SV = nullptr; 4661 4662 for (unsigned vi = 0; vi != 2; ++vi) { 4663 SmallVector<uint32_t, 16> Indices; 4664 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 4665 Indices.push_back(2*i+vi); 4666 4667 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 4668 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp"); 4669 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 4670 } 4671 return SV; 4672 } 4673 case NEON::BI__builtin_neon_vzip_v: 4674 case NEON::BI__builtin_neon_vzipq_v: { 4675 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 4676 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4677 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 4678 Value *SV = nullptr; 4679 4680 for (unsigned vi = 0; vi != 2; ++vi) { 4681 SmallVector<uint32_t, 16> Indices; 4682 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 4683 Indices.push_back((i + vi*e) >> 1); 4684 Indices.push_back(((i + vi*e) >> 1)+e); 4685 } 4686 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 4687 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip"); 4688 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 4689 } 4690 return SV; 4691 } 4692 } 4693 4694 assert(Int && "Expected valid intrinsic number"); 4695 4696 // Determine the type(s) of this overloaded AArch64 intrinsic. 4697 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E); 4698 4699 Value *Result = EmitNeonCall(F, Ops, NameHint); 4700 llvm::Type *ResultType = ConvertType(E->getType()); 4701 // AArch64 intrinsic one-element vector type cast to 4702 // scalar type expected by the builtin 4703 return Builder.CreateBitCast(Result, ResultType, NameHint); 4704 } 4705 4706 Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr( 4707 Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp, 4708 const CmpInst::Predicate Ip, const Twine &Name) { 4709 llvm::Type *OTy = Op->getType(); 4710 4711 // FIXME: this is utterly horrific. We should not be looking at previous 4712 // codegen context to find out what needs doing. Unfortunately TableGen 4713 // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32 4714 // (etc). 4715 if (BitCastInst *BI = dyn_cast<BitCastInst>(Op)) 4716 OTy = BI->getOperand(0)->getType(); 4717 4718 Op = Builder.CreateBitCast(Op, OTy); 4719 if (OTy->getScalarType()->isFloatingPointTy()) { 4720 Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy)); 4721 } else { 4722 Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy)); 4723 } 4724 return Builder.CreateSExt(Op, Ty, Name); 4725 } 4726 4727 static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops, 4728 Value *ExtOp, Value *IndexOp, 4729 llvm::Type *ResTy, unsigned IntID, 4730 const char *Name) { 4731 SmallVector<Value *, 2> TblOps; 4732 if (ExtOp) 4733 TblOps.push_back(ExtOp); 4734 4735 // Build a vector containing sequential number like (0, 1, 2, ..., 15) 4736 SmallVector<uint32_t, 16> Indices; 4737 llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType()); 4738 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) { 4739 Indices.push_back(2*i); 4740 Indices.push_back(2*i+1); 4741 } 4742 4743 int PairPos = 0, End = Ops.size() - 1; 4744 while (PairPos < End) { 4745 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], 4746 Ops[PairPos+1], Indices, 4747 Name)); 4748 PairPos += 2; 4749 } 4750 4751 // If there's an odd number of 64-bit lookup table, fill the high 64-bit 4752 // of the 128-bit lookup table with zero. 4753 if (PairPos == End) { 4754 Value *ZeroTbl = ConstantAggregateZero::get(TblTy); 4755 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], 4756 ZeroTbl, Indices, Name)); 4757 } 4758 4759 Function *TblF; 4760 TblOps.push_back(IndexOp); 4761 TblF = CGF.CGM.getIntrinsic(IntID, ResTy); 4762 4763 return CGF.EmitNeonCall(TblF, TblOps, Name); 4764 } 4765 4766 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) { 4767 unsigned Value; 4768 switch (BuiltinID) { 4769 default: 4770 return nullptr; 4771 case ARM::BI__builtin_arm_nop: 4772 Value = 0; 4773 break; 4774 case ARM::BI__builtin_arm_yield: 4775 case ARM::BI__yield: 4776 Value = 1; 4777 break; 4778 case ARM::BI__builtin_arm_wfe: 4779 case ARM::BI__wfe: 4780 Value = 2; 4781 break; 4782 case ARM::BI__builtin_arm_wfi: 4783 case ARM::BI__wfi: 4784 Value = 3; 4785 break; 4786 case ARM::BI__builtin_arm_sev: 4787 case ARM::BI__sev: 4788 Value = 4; 4789 break; 4790 case ARM::BI__builtin_arm_sevl: 4791 case ARM::BI__sevl: 4792 Value = 5; 4793 break; 4794 } 4795 4796 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint), 4797 llvm::ConstantInt::get(Int32Ty, Value)); 4798 } 4799 4800 // Generates the IR for the read/write special register builtin, 4801 // ValueType is the type of the value that is to be written or read, 4802 // RegisterType is the type of the register being written to or read from. 4803 static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, 4804 const CallExpr *E, 4805 llvm::Type *RegisterType, 4806 llvm::Type *ValueType, 4807 bool IsRead, 4808 StringRef SysReg = "") { 4809 // write and register intrinsics only support 32 and 64 bit operations. 4810 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64)) 4811 && "Unsupported size for register."); 4812 4813 CodeGen::CGBuilderTy &Builder = CGF.Builder; 4814 CodeGen::CodeGenModule &CGM = CGF.CGM; 4815 LLVMContext &Context = CGM.getLLVMContext(); 4816 4817 if (SysReg.empty()) { 4818 const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts(); 4819 SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString(); 4820 } 4821 4822 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) }; 4823 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); 4824 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); 4825 4826 llvm::Type *Types[] = { RegisterType }; 4827 4828 bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32); 4829 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64)) 4830 && "Can't fit 64-bit value in 32-bit register"); 4831 4832 if (IsRead) { 4833 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types); 4834 llvm::Value *Call = Builder.CreateCall(F, Metadata); 4835 4836 if (MixedTypes) 4837 // Read into 64 bit register and then truncate result to 32 bit. 4838 return Builder.CreateTrunc(Call, ValueType); 4839 4840 if (ValueType->isPointerTy()) 4841 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*). 4842 return Builder.CreateIntToPtr(Call, ValueType); 4843 4844 return Call; 4845 } 4846 4847 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types); 4848 llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1)); 4849 if (MixedTypes) { 4850 // Extend 32 bit write value to 64 bit to pass to write. 4851 ArgValue = Builder.CreateZExt(ArgValue, RegisterType); 4852 return Builder.CreateCall(F, { Metadata, ArgValue }); 4853 } 4854 4855 if (ValueType->isPointerTy()) { 4856 // Have VoidPtrTy ArgValue but want to return an i32/i64. 4857 ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType); 4858 return Builder.CreateCall(F, { Metadata, ArgValue }); 4859 } 4860 4861 return Builder.CreateCall(F, { Metadata, ArgValue }); 4862 } 4863 4864 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra 4865 /// argument that specifies the vector type. 4866 static bool HasExtraNeonArgument(unsigned BuiltinID) { 4867 switch (BuiltinID) { 4868 default: break; 4869 case NEON::BI__builtin_neon_vget_lane_i8: 4870 case NEON::BI__builtin_neon_vget_lane_i16: 4871 case NEON::BI__builtin_neon_vget_lane_i32: 4872 case NEON::BI__builtin_neon_vget_lane_i64: 4873 case NEON::BI__builtin_neon_vget_lane_f32: 4874 case NEON::BI__builtin_neon_vgetq_lane_i8: 4875 case NEON::BI__builtin_neon_vgetq_lane_i16: 4876 case NEON::BI__builtin_neon_vgetq_lane_i32: 4877 case NEON::BI__builtin_neon_vgetq_lane_i64: 4878 case NEON::BI__builtin_neon_vgetq_lane_f32: 4879 case NEON::BI__builtin_neon_vset_lane_i8: 4880 case NEON::BI__builtin_neon_vset_lane_i16: 4881 case NEON::BI__builtin_neon_vset_lane_i32: 4882 case NEON::BI__builtin_neon_vset_lane_i64: 4883 case NEON::BI__builtin_neon_vset_lane_f32: 4884 case NEON::BI__builtin_neon_vsetq_lane_i8: 4885 case NEON::BI__builtin_neon_vsetq_lane_i16: 4886 case NEON::BI__builtin_neon_vsetq_lane_i32: 4887 case NEON::BI__builtin_neon_vsetq_lane_i64: 4888 case NEON::BI__builtin_neon_vsetq_lane_f32: 4889 case NEON::BI__builtin_neon_vsha1h_u32: 4890 case NEON::BI__builtin_neon_vsha1cq_u32: 4891 case NEON::BI__builtin_neon_vsha1pq_u32: 4892 case NEON::BI__builtin_neon_vsha1mq_u32: 4893 case clang::ARM::BI_MoveToCoprocessor: 4894 case clang::ARM::BI_MoveToCoprocessor2: 4895 return false; 4896 } 4897 return true; 4898 } 4899 4900 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, 4901 const CallExpr *E, 4902 llvm::Triple::ArchType Arch) { 4903 if (auto Hint = GetValueForARMHint(BuiltinID)) 4904 return Hint; 4905 4906 if (BuiltinID == ARM::BI__emit) { 4907 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb; 4908 llvm::FunctionType *FTy = 4909 llvm::FunctionType::get(VoidTy, /*Variadic=*/false); 4910 4911 APSInt Value; 4912 if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext())) 4913 llvm_unreachable("Sema will ensure that the parameter is constant"); 4914 4915 uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue(); 4916 4917 llvm::InlineAsm *Emit = 4918 IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "", 4919 /*SideEffects=*/true) 4920 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "", 4921 /*SideEffects=*/true); 4922 4923 return Builder.CreateCall(Emit); 4924 } 4925 4926 if (BuiltinID == ARM::BI__builtin_arm_dbg) { 4927 Value *Option = EmitScalarExpr(E->getArg(0)); 4928 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option); 4929 } 4930 4931 if (BuiltinID == ARM::BI__builtin_arm_prefetch) { 4932 Value *Address = EmitScalarExpr(E->getArg(0)); 4933 Value *RW = EmitScalarExpr(E->getArg(1)); 4934 Value *IsData = EmitScalarExpr(E->getArg(2)); 4935 4936 // Locality is not supported on ARM target 4937 Value *Locality = llvm::ConstantInt::get(Int32Ty, 3); 4938 4939 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 4940 return Builder.CreateCall(F, {Address, RW, Locality, IsData}); 4941 } 4942 4943 if (BuiltinID == ARM::BI__builtin_arm_rbit) { 4944 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 4945 return Builder.CreateCall( 4946 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); 4947 } 4948 4949 if (BuiltinID == ARM::BI__clear_cache) { 4950 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); 4951 const FunctionDecl *FD = E->getDirectCallee(); 4952 Value *Ops[2]; 4953 for (unsigned i = 0; i < 2; i++) 4954 Ops[i] = EmitScalarExpr(E->getArg(i)); 4955 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); 4956 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); 4957 StringRef Name = FD->getName(); 4958 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); 4959 } 4960 4961 if (BuiltinID == ARM::BI__builtin_arm_mcrr || 4962 BuiltinID == ARM::BI__builtin_arm_mcrr2) { 4963 Function *F; 4964 4965 switch (BuiltinID) { 4966 default: llvm_unreachable("unexpected builtin"); 4967 case ARM::BI__builtin_arm_mcrr: 4968 F = CGM.getIntrinsic(Intrinsic::arm_mcrr); 4969 break; 4970 case ARM::BI__builtin_arm_mcrr2: 4971 F = CGM.getIntrinsic(Intrinsic::arm_mcrr2); 4972 break; 4973 } 4974 4975 // MCRR{2} instruction has 5 operands but 4976 // the intrinsic has 4 because Rt and Rt2 4977 // are represented as a single unsigned 64 4978 // bit integer in the intrinsic definition 4979 // but internally it's represented as 2 32 4980 // bit integers. 4981 4982 Value *Coproc = EmitScalarExpr(E->getArg(0)); 4983 Value *Opc1 = EmitScalarExpr(E->getArg(1)); 4984 Value *RtAndRt2 = EmitScalarExpr(E->getArg(2)); 4985 Value *CRm = EmitScalarExpr(E->getArg(3)); 4986 4987 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32); 4988 Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty); 4989 Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1); 4990 Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty); 4991 4992 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm}); 4993 } 4994 4995 if (BuiltinID == ARM::BI__builtin_arm_mrrc || 4996 BuiltinID == ARM::BI__builtin_arm_mrrc2) { 4997 Function *F; 4998 4999 switch (BuiltinID) { 5000 default: llvm_unreachable("unexpected builtin"); 5001 case ARM::BI__builtin_arm_mrrc: 5002 F = CGM.getIntrinsic(Intrinsic::arm_mrrc); 5003 break; 5004 case ARM::BI__builtin_arm_mrrc2: 5005 F = CGM.getIntrinsic(Intrinsic::arm_mrrc2); 5006 break; 5007 } 5008 5009 Value *Coproc = EmitScalarExpr(E->getArg(0)); 5010 Value *Opc1 = EmitScalarExpr(E->getArg(1)); 5011 Value *CRm = EmitScalarExpr(E->getArg(2)); 5012 Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm}); 5013 5014 // Returns an unsigned 64 bit integer, represented 5015 // as two 32 bit integers. 5016 5017 Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1); 5018 Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0); 5019 Rt = Builder.CreateZExt(Rt, Int64Ty); 5020 Rt1 = Builder.CreateZExt(Rt1, Int64Ty); 5021 5022 Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32); 5023 RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true); 5024 RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1); 5025 5026 return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType())); 5027 } 5028 5029 if (BuiltinID == ARM::BI__builtin_arm_ldrexd || 5030 ((BuiltinID == ARM::BI__builtin_arm_ldrex || 5031 BuiltinID == ARM::BI__builtin_arm_ldaex) && 5032 getContext().getTypeSize(E->getType()) == 64) || 5033 BuiltinID == ARM::BI__ldrexd) { 5034 Function *F; 5035 5036 switch (BuiltinID) { 5037 default: llvm_unreachable("unexpected builtin"); 5038 case ARM::BI__builtin_arm_ldaex: 5039 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd); 5040 break; 5041 case ARM::BI__builtin_arm_ldrexd: 5042 case ARM::BI__builtin_arm_ldrex: 5043 case ARM::BI__ldrexd: 5044 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd); 5045 break; 5046 } 5047 5048 Value *LdPtr = EmitScalarExpr(E->getArg(0)); 5049 Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy), 5050 "ldrexd"); 5051 5052 Value *Val0 = Builder.CreateExtractValue(Val, 1); 5053 Value *Val1 = Builder.CreateExtractValue(Val, 0); 5054 Val0 = Builder.CreateZExt(Val0, Int64Ty); 5055 Val1 = Builder.CreateZExt(Val1, Int64Ty); 5056 5057 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32); 5058 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */); 5059 Val = Builder.CreateOr(Val, Val1); 5060 return Builder.CreateBitCast(Val, ConvertType(E->getType())); 5061 } 5062 5063 if (BuiltinID == ARM::BI__builtin_arm_ldrex || 5064 BuiltinID == ARM::BI__builtin_arm_ldaex) { 5065 Value *LoadAddr = EmitScalarExpr(E->getArg(0)); 5066 5067 QualType Ty = E->getType(); 5068 llvm::Type *RealResTy = ConvertType(Ty); 5069 llvm::Type *PtrTy = llvm::IntegerType::get( 5070 getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo(); 5071 LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy); 5072 5073 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex 5074 ? Intrinsic::arm_ldaex 5075 : Intrinsic::arm_ldrex, 5076 PtrTy); 5077 Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex"); 5078 5079 if (RealResTy->isPointerTy()) 5080 return Builder.CreateIntToPtr(Val, RealResTy); 5081 else { 5082 llvm::Type *IntResTy = llvm::IntegerType::get( 5083 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy)); 5084 Val = Builder.CreateTruncOrBitCast(Val, IntResTy); 5085 return Builder.CreateBitCast(Val, RealResTy); 5086 } 5087 } 5088 5089 if (BuiltinID == ARM::BI__builtin_arm_strexd || 5090 ((BuiltinID == ARM::BI__builtin_arm_stlex || 5091 BuiltinID == ARM::BI__builtin_arm_strex) && 5092 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) { 5093 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex 5094 ? Intrinsic::arm_stlexd 5095 : Intrinsic::arm_strexd); 5096 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty); 5097 5098 Address Tmp = CreateMemTemp(E->getArg(0)->getType()); 5099 Value *Val = EmitScalarExpr(E->getArg(0)); 5100 Builder.CreateStore(Val, Tmp); 5101 5102 Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy)); 5103 Val = Builder.CreateLoad(LdPtr); 5104 5105 Value *Arg0 = Builder.CreateExtractValue(Val, 0); 5106 Value *Arg1 = Builder.CreateExtractValue(Val, 1); 5107 Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy); 5108 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd"); 5109 } 5110 5111 if (BuiltinID == ARM::BI__builtin_arm_strex || 5112 BuiltinID == ARM::BI__builtin_arm_stlex) { 5113 Value *StoreVal = EmitScalarExpr(E->getArg(0)); 5114 Value *StoreAddr = EmitScalarExpr(E->getArg(1)); 5115 5116 QualType Ty = E->getArg(0)->getType(); 5117 llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(), 5118 getContext().getTypeSize(Ty)); 5119 StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo()); 5120 5121 if (StoreVal->getType()->isPointerTy()) 5122 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty); 5123 else { 5124 llvm::Type *IntTy = llvm::IntegerType::get( 5125 getLLVMContext(), 5126 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType())); 5127 StoreVal = Builder.CreateBitCast(StoreVal, IntTy); 5128 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty); 5129 } 5130 5131 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex 5132 ? Intrinsic::arm_stlex 5133 : Intrinsic::arm_strex, 5134 StoreAddr->getType()); 5135 return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex"); 5136 } 5137 5138 switch (BuiltinID) { 5139 case ARM::BI__iso_volatile_load8: 5140 case ARM::BI__iso_volatile_load16: 5141 case ARM::BI__iso_volatile_load32: 5142 case ARM::BI__iso_volatile_load64: { 5143 Value *Ptr = EmitScalarExpr(E->getArg(0)); 5144 QualType ElTy = E->getArg(0)->getType()->getPointeeType(); 5145 CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy); 5146 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), 5147 LoadSize.getQuantity() * 8); 5148 Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); 5149 llvm::LoadInst *Load = 5150 Builder.CreateAlignedLoad(Ptr, LoadSize); 5151 Load->setVolatile(true); 5152 return Load; 5153 } 5154 case ARM::BI__iso_volatile_store8: 5155 case ARM::BI__iso_volatile_store16: 5156 case ARM::BI__iso_volatile_store32: 5157 case ARM::BI__iso_volatile_store64: { 5158 Value *Ptr = EmitScalarExpr(E->getArg(0)); 5159 Value *Value = EmitScalarExpr(E->getArg(1)); 5160 QualType ElTy = E->getArg(0)->getType()->getPointeeType(); 5161 CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy); 5162 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), 5163 StoreSize.getQuantity() * 8); 5164 Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); 5165 llvm::StoreInst *Store = 5166 Builder.CreateAlignedStore(Value, Ptr, 5167 StoreSize); 5168 Store->setVolatile(true); 5169 return Store; 5170 } 5171 } 5172 5173 if (BuiltinID == ARM::BI__builtin_arm_clrex) { 5174 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex); 5175 return Builder.CreateCall(F); 5176 } 5177 5178 // CRC32 5179 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; 5180 switch (BuiltinID) { 5181 case ARM::BI__builtin_arm_crc32b: 5182 CRCIntrinsicID = Intrinsic::arm_crc32b; break; 5183 case ARM::BI__builtin_arm_crc32cb: 5184 CRCIntrinsicID = Intrinsic::arm_crc32cb; break; 5185 case ARM::BI__builtin_arm_crc32h: 5186 CRCIntrinsicID = Intrinsic::arm_crc32h; break; 5187 case ARM::BI__builtin_arm_crc32ch: 5188 CRCIntrinsicID = Intrinsic::arm_crc32ch; break; 5189 case ARM::BI__builtin_arm_crc32w: 5190 case ARM::BI__builtin_arm_crc32d: 5191 CRCIntrinsicID = Intrinsic::arm_crc32w; break; 5192 case ARM::BI__builtin_arm_crc32cw: 5193 case ARM::BI__builtin_arm_crc32cd: 5194 CRCIntrinsicID = Intrinsic::arm_crc32cw; break; 5195 } 5196 5197 if (CRCIntrinsicID != Intrinsic::not_intrinsic) { 5198 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 5199 Value *Arg1 = EmitScalarExpr(E->getArg(1)); 5200 5201 // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w 5202 // intrinsics, hence we need different codegen for these cases. 5203 if (BuiltinID == ARM::BI__builtin_arm_crc32d || 5204 BuiltinID == ARM::BI__builtin_arm_crc32cd) { 5205 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32); 5206 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty); 5207 Value *Arg1b = Builder.CreateLShr(Arg1, C1); 5208 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty); 5209 5210 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 5211 Value *Res = Builder.CreateCall(F, {Arg0, Arg1a}); 5212 return Builder.CreateCall(F, {Res, Arg1b}); 5213 } else { 5214 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty); 5215 5216 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 5217 return Builder.CreateCall(F, {Arg0, Arg1}); 5218 } 5219 } 5220 5221 if (BuiltinID == ARM::BI__builtin_arm_rsr || 5222 BuiltinID == ARM::BI__builtin_arm_rsr64 || 5223 BuiltinID == ARM::BI__builtin_arm_rsrp || 5224 BuiltinID == ARM::BI__builtin_arm_wsr || 5225 BuiltinID == ARM::BI__builtin_arm_wsr64 || 5226 BuiltinID == ARM::BI__builtin_arm_wsrp) { 5227 5228 bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr || 5229 BuiltinID == ARM::BI__builtin_arm_rsr64 || 5230 BuiltinID == ARM::BI__builtin_arm_rsrp; 5231 5232 bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp || 5233 BuiltinID == ARM::BI__builtin_arm_wsrp; 5234 5235 bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 || 5236 BuiltinID == ARM::BI__builtin_arm_wsr64; 5237 5238 llvm::Type *ValueType; 5239 llvm::Type *RegisterType; 5240 if (IsPointerBuiltin) { 5241 ValueType = VoidPtrTy; 5242 RegisterType = Int32Ty; 5243 } else if (Is64Bit) { 5244 ValueType = RegisterType = Int64Ty; 5245 } else { 5246 ValueType = RegisterType = Int32Ty; 5247 } 5248 5249 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead); 5250 } 5251 5252 // Find out if any arguments are required to be integer constant 5253 // expressions. 5254 unsigned ICEArguments = 0; 5255 ASTContext::GetBuiltinTypeError Error; 5256 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 5257 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 5258 5259 auto getAlignmentValue32 = [&](Address addr) -> Value* { 5260 return Builder.getInt32(addr.getAlignment().getQuantity()); 5261 }; 5262 5263 Address PtrOp0 = Address::invalid(); 5264 Address PtrOp1 = Address::invalid(); 5265 SmallVector<Value*, 4> Ops; 5266 bool HasExtraArg = HasExtraNeonArgument(BuiltinID); 5267 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0); 5268 for (unsigned i = 0, e = NumArgs; i != e; i++) { 5269 if (i == 0) { 5270 switch (BuiltinID) { 5271 case NEON::BI__builtin_neon_vld1_v: 5272 case NEON::BI__builtin_neon_vld1q_v: 5273 case NEON::BI__builtin_neon_vld1q_lane_v: 5274 case NEON::BI__builtin_neon_vld1_lane_v: 5275 case NEON::BI__builtin_neon_vld1_dup_v: 5276 case NEON::BI__builtin_neon_vld1q_dup_v: 5277 case NEON::BI__builtin_neon_vst1_v: 5278 case NEON::BI__builtin_neon_vst1q_v: 5279 case NEON::BI__builtin_neon_vst1q_lane_v: 5280 case NEON::BI__builtin_neon_vst1_lane_v: 5281 case NEON::BI__builtin_neon_vst2_v: 5282 case NEON::BI__builtin_neon_vst2q_v: 5283 case NEON::BI__builtin_neon_vst2_lane_v: 5284 case NEON::BI__builtin_neon_vst2q_lane_v: 5285 case NEON::BI__builtin_neon_vst3_v: 5286 case NEON::BI__builtin_neon_vst3q_v: 5287 case NEON::BI__builtin_neon_vst3_lane_v: 5288 case NEON::BI__builtin_neon_vst3q_lane_v: 5289 case NEON::BI__builtin_neon_vst4_v: 5290 case NEON::BI__builtin_neon_vst4q_v: 5291 case NEON::BI__builtin_neon_vst4_lane_v: 5292 case NEON::BI__builtin_neon_vst4q_lane_v: 5293 // Get the alignment for the argument in addition to the value; 5294 // we'll use it later. 5295 PtrOp0 = EmitPointerWithAlignment(E->getArg(0)); 5296 Ops.push_back(PtrOp0.getPointer()); 5297 continue; 5298 } 5299 } 5300 if (i == 1) { 5301 switch (BuiltinID) { 5302 case NEON::BI__builtin_neon_vld2_v: 5303 case NEON::BI__builtin_neon_vld2q_v: 5304 case NEON::BI__builtin_neon_vld3_v: 5305 case NEON::BI__builtin_neon_vld3q_v: 5306 case NEON::BI__builtin_neon_vld4_v: 5307 case NEON::BI__builtin_neon_vld4q_v: 5308 case NEON::BI__builtin_neon_vld2_lane_v: 5309 case NEON::BI__builtin_neon_vld2q_lane_v: 5310 case NEON::BI__builtin_neon_vld3_lane_v: 5311 case NEON::BI__builtin_neon_vld3q_lane_v: 5312 case NEON::BI__builtin_neon_vld4_lane_v: 5313 case NEON::BI__builtin_neon_vld4q_lane_v: 5314 case NEON::BI__builtin_neon_vld2_dup_v: 5315 case NEON::BI__builtin_neon_vld3_dup_v: 5316 case NEON::BI__builtin_neon_vld4_dup_v: 5317 // Get the alignment for the argument in addition to the value; 5318 // we'll use it later. 5319 PtrOp1 = EmitPointerWithAlignment(E->getArg(1)); 5320 Ops.push_back(PtrOp1.getPointer()); 5321 continue; 5322 } 5323 } 5324 5325 if ((ICEArguments & (1 << i)) == 0) { 5326 Ops.push_back(EmitScalarExpr(E->getArg(i))); 5327 } else { 5328 // If this is required to be a constant, constant fold it so that we know 5329 // that the generated intrinsic gets a ConstantInt. 5330 llvm::APSInt Result; 5331 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); 5332 assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst; 5333 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); 5334 } 5335 } 5336 5337 switch (BuiltinID) { 5338 default: break; 5339 5340 case NEON::BI__builtin_neon_vget_lane_i8: 5341 case NEON::BI__builtin_neon_vget_lane_i16: 5342 case NEON::BI__builtin_neon_vget_lane_i32: 5343 case NEON::BI__builtin_neon_vget_lane_i64: 5344 case NEON::BI__builtin_neon_vget_lane_f32: 5345 case NEON::BI__builtin_neon_vgetq_lane_i8: 5346 case NEON::BI__builtin_neon_vgetq_lane_i16: 5347 case NEON::BI__builtin_neon_vgetq_lane_i32: 5348 case NEON::BI__builtin_neon_vgetq_lane_i64: 5349 case NEON::BI__builtin_neon_vgetq_lane_f32: 5350 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane"); 5351 5352 case NEON::BI__builtin_neon_vset_lane_i8: 5353 case NEON::BI__builtin_neon_vset_lane_i16: 5354 case NEON::BI__builtin_neon_vset_lane_i32: 5355 case NEON::BI__builtin_neon_vset_lane_i64: 5356 case NEON::BI__builtin_neon_vset_lane_f32: 5357 case NEON::BI__builtin_neon_vsetq_lane_i8: 5358 case NEON::BI__builtin_neon_vsetq_lane_i16: 5359 case NEON::BI__builtin_neon_vsetq_lane_i32: 5360 case NEON::BI__builtin_neon_vsetq_lane_i64: 5361 case NEON::BI__builtin_neon_vsetq_lane_f32: 5362 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 5363 5364 case NEON::BI__builtin_neon_vsha1h_u32: 5365 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops, 5366 "vsha1h"); 5367 case NEON::BI__builtin_neon_vsha1cq_u32: 5368 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops, 5369 "vsha1h"); 5370 case NEON::BI__builtin_neon_vsha1pq_u32: 5371 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops, 5372 "vsha1h"); 5373 case NEON::BI__builtin_neon_vsha1mq_u32: 5374 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops, 5375 "vsha1h"); 5376 5377 // The ARM _MoveToCoprocessor builtins put the input register value as 5378 // the first argument, but the LLVM intrinsic expects it as the third one. 5379 case ARM::BI_MoveToCoprocessor: 5380 case ARM::BI_MoveToCoprocessor2: { 5381 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ? 5382 Intrinsic::arm_mcr : Intrinsic::arm_mcr2); 5383 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0], 5384 Ops[3], Ops[4], Ops[5]}); 5385 } 5386 case ARM::BI_BitScanForward: 5387 case ARM::BI_BitScanForward64: 5388 return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E); 5389 case ARM::BI_BitScanReverse: 5390 case ARM::BI_BitScanReverse64: 5391 return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E); 5392 5393 case ARM::BI_InterlockedAnd64: 5394 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E); 5395 case ARM::BI_InterlockedExchange64: 5396 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E); 5397 case ARM::BI_InterlockedExchangeAdd64: 5398 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E); 5399 case ARM::BI_InterlockedExchangeSub64: 5400 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E); 5401 case ARM::BI_InterlockedOr64: 5402 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E); 5403 case ARM::BI_InterlockedXor64: 5404 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E); 5405 case ARM::BI_InterlockedDecrement64: 5406 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E); 5407 case ARM::BI_InterlockedIncrement64: 5408 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E); 5409 } 5410 5411 // Get the last argument, which specifies the vector type. 5412 assert(HasExtraArg); 5413 llvm::APSInt Result; 5414 const Expr *Arg = E->getArg(E->getNumArgs()-1); 5415 if (!Arg->isIntegerConstantExpr(Result, getContext())) 5416 return nullptr; 5417 5418 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f || 5419 BuiltinID == ARM::BI__builtin_arm_vcvtr_d) { 5420 // Determine the overloaded type of this builtin. 5421 llvm::Type *Ty; 5422 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f) 5423 Ty = FloatTy; 5424 else 5425 Ty = DoubleTy; 5426 5427 // Determine whether this is an unsigned conversion or not. 5428 bool usgn = Result.getZExtValue() == 1; 5429 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr; 5430 5431 // Call the appropriate intrinsic. 5432 Function *F = CGM.getIntrinsic(Int, Ty); 5433 return Builder.CreateCall(F, Ops, "vcvtr"); 5434 } 5435 5436 // Determine the type of this overloaded NEON intrinsic. 5437 NeonTypeFlags Type(Result.getZExtValue()); 5438 bool usgn = Type.isUnsigned(); 5439 bool rightShift = false; 5440 5441 llvm::VectorType *VTy = GetNeonType(this, Type, Arch); 5442 llvm::Type *Ty = VTy; 5443 if (!Ty) 5444 return nullptr; 5445 5446 // Many NEON builtins have identical semantics and uses in ARM and 5447 // AArch64. Emit these in a single function. 5448 auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap); 5449 const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap( 5450 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted); 5451 if (Builtin) 5452 return EmitCommonNeonBuiltinExpr( 5453 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic, 5454 Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch); 5455 5456 unsigned Int; 5457 switch (BuiltinID) { 5458 default: return nullptr; 5459 case NEON::BI__builtin_neon_vld1q_lane_v: 5460 // Handle 64-bit integer elements as a special case. Use shuffles of 5461 // one-element vectors to avoid poor code for i64 in the backend. 5462 if (VTy->getElementType()->isIntegerTy(64)) { 5463 // Extract the other lane. 5464 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5465 uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue(); 5466 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane)); 5467 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); 5468 // Load the value as a one-element vector. 5469 Ty = llvm::VectorType::get(VTy->getElementType(), 1); 5470 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 5471 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys); 5472 Value *Align = getAlignmentValue32(PtrOp0); 5473 Value *Ld = Builder.CreateCall(F, {Ops[0], Align}); 5474 // Combine them. 5475 uint32_t Indices[] = {1 - Lane, Lane}; 5476 SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices); 5477 return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane"); 5478 } 5479 LLVM_FALLTHROUGH; 5480 case NEON::BI__builtin_neon_vld1_lane_v: { 5481 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5482 PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType()); 5483 Value *Ld = Builder.CreateLoad(PtrOp0); 5484 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane"); 5485 } 5486 case NEON::BI__builtin_neon_vld2_dup_v: 5487 case NEON::BI__builtin_neon_vld3_dup_v: 5488 case NEON::BI__builtin_neon_vld4_dup_v: { 5489 // Handle 64-bit elements as a special-case. There is no "dup" needed. 5490 if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) { 5491 switch (BuiltinID) { 5492 case NEON::BI__builtin_neon_vld2_dup_v: 5493 Int = Intrinsic::arm_neon_vld2; 5494 break; 5495 case NEON::BI__builtin_neon_vld3_dup_v: 5496 Int = Intrinsic::arm_neon_vld3; 5497 break; 5498 case NEON::BI__builtin_neon_vld4_dup_v: 5499 Int = Intrinsic::arm_neon_vld4; 5500 break; 5501 default: llvm_unreachable("unknown vld_dup intrinsic?"); 5502 } 5503 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 5504 Function *F = CGM.getIntrinsic(Int, Tys); 5505 llvm::Value *Align = getAlignmentValue32(PtrOp1); 5506 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup"); 5507 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 5508 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5509 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 5510 } 5511 switch (BuiltinID) { 5512 case NEON::BI__builtin_neon_vld2_dup_v: 5513 Int = Intrinsic::arm_neon_vld2lane; 5514 break; 5515 case NEON::BI__builtin_neon_vld3_dup_v: 5516 Int = Intrinsic::arm_neon_vld3lane; 5517 break; 5518 case NEON::BI__builtin_neon_vld4_dup_v: 5519 Int = Intrinsic::arm_neon_vld4lane; 5520 break; 5521 default: llvm_unreachable("unknown vld_dup intrinsic?"); 5522 } 5523 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 5524 Function *F = CGM.getIntrinsic(Int, Tys); 5525 llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType()); 5526 5527 SmallVector<Value*, 6> Args; 5528 Args.push_back(Ops[1]); 5529 Args.append(STy->getNumElements(), UndefValue::get(Ty)); 5530 5531 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); 5532 Args.push_back(CI); 5533 Args.push_back(getAlignmentValue32(PtrOp1)); 5534 5535 Ops[1] = Builder.CreateCall(F, Args, "vld_dup"); 5536 // splat lane 0 to all elts in each vector of the result. 5537 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 5538 Value *Val = Builder.CreateExtractValue(Ops[1], i); 5539 Value *Elt = Builder.CreateBitCast(Val, Ty); 5540 Elt = EmitNeonSplat(Elt, CI); 5541 Elt = Builder.CreateBitCast(Elt, Val->getType()); 5542 Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i); 5543 } 5544 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 5545 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5546 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 5547 } 5548 case NEON::BI__builtin_neon_vqrshrn_n_v: 5549 Int = 5550 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns; 5551 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n", 5552 1, true); 5553 case NEON::BI__builtin_neon_vqrshrun_n_v: 5554 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty), 5555 Ops, "vqrshrun_n", 1, true); 5556 case NEON::BI__builtin_neon_vqshrn_n_v: 5557 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns; 5558 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n", 5559 1, true); 5560 case NEON::BI__builtin_neon_vqshrun_n_v: 5561 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty), 5562 Ops, "vqshrun_n", 1, true); 5563 case NEON::BI__builtin_neon_vrecpe_v: 5564 case NEON::BI__builtin_neon_vrecpeq_v: 5565 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty), 5566 Ops, "vrecpe"); 5567 case NEON::BI__builtin_neon_vrshrn_n_v: 5568 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty), 5569 Ops, "vrshrn_n", 1, true); 5570 case NEON::BI__builtin_neon_vrsra_n_v: 5571 case NEON::BI__builtin_neon_vrsraq_n_v: 5572 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5573 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5574 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true); 5575 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts; 5576 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]}); 5577 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n"); 5578 case NEON::BI__builtin_neon_vsri_n_v: 5579 case NEON::BI__builtin_neon_vsriq_n_v: 5580 rightShift = true; 5581 LLVM_FALLTHROUGH; 5582 case NEON::BI__builtin_neon_vsli_n_v: 5583 case NEON::BI__builtin_neon_vsliq_n_v: 5584 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift); 5585 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty), 5586 Ops, "vsli_n"); 5587 case NEON::BI__builtin_neon_vsra_n_v: 5588 case NEON::BI__builtin_neon_vsraq_n_v: 5589 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5590 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n"); 5591 return Builder.CreateAdd(Ops[0], Ops[1]); 5592 case NEON::BI__builtin_neon_vst1q_lane_v: 5593 // Handle 64-bit integer elements as a special case. Use a shuffle to get 5594 // a one-element vector and avoid poor code for i64 in the backend. 5595 if (VTy->getElementType()->isIntegerTy(64)) { 5596 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5597 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2])); 5598 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); 5599 Ops[2] = getAlignmentValue32(PtrOp0); 5600 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()}; 5601 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, 5602 Tys), Ops); 5603 } 5604 LLVM_FALLTHROUGH; 5605 case NEON::BI__builtin_neon_vst1_lane_v: { 5606 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5607 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); 5608 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 5609 auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty)); 5610 return St; 5611 } 5612 case NEON::BI__builtin_neon_vtbl1_v: 5613 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1), 5614 Ops, "vtbl1"); 5615 case NEON::BI__builtin_neon_vtbl2_v: 5616 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2), 5617 Ops, "vtbl2"); 5618 case NEON::BI__builtin_neon_vtbl3_v: 5619 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3), 5620 Ops, "vtbl3"); 5621 case NEON::BI__builtin_neon_vtbl4_v: 5622 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4), 5623 Ops, "vtbl4"); 5624 case NEON::BI__builtin_neon_vtbx1_v: 5625 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1), 5626 Ops, "vtbx1"); 5627 case NEON::BI__builtin_neon_vtbx2_v: 5628 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2), 5629 Ops, "vtbx2"); 5630 case NEON::BI__builtin_neon_vtbx3_v: 5631 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3), 5632 Ops, "vtbx3"); 5633 case NEON::BI__builtin_neon_vtbx4_v: 5634 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4), 5635 Ops, "vtbx4"); 5636 } 5637 } 5638 5639 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, 5640 const CallExpr *E, 5641 SmallVectorImpl<Value *> &Ops, 5642 llvm::Triple::ArchType Arch) { 5643 unsigned int Int = 0; 5644 const char *s = nullptr; 5645 5646 switch (BuiltinID) { 5647 default: 5648 return nullptr; 5649 case NEON::BI__builtin_neon_vtbl1_v: 5650 case NEON::BI__builtin_neon_vqtbl1_v: 5651 case NEON::BI__builtin_neon_vqtbl1q_v: 5652 case NEON::BI__builtin_neon_vtbl2_v: 5653 case NEON::BI__builtin_neon_vqtbl2_v: 5654 case NEON::BI__builtin_neon_vqtbl2q_v: 5655 case NEON::BI__builtin_neon_vtbl3_v: 5656 case NEON::BI__builtin_neon_vqtbl3_v: 5657 case NEON::BI__builtin_neon_vqtbl3q_v: 5658 case NEON::BI__builtin_neon_vtbl4_v: 5659 case NEON::BI__builtin_neon_vqtbl4_v: 5660 case NEON::BI__builtin_neon_vqtbl4q_v: 5661 break; 5662 case NEON::BI__builtin_neon_vtbx1_v: 5663 case NEON::BI__builtin_neon_vqtbx1_v: 5664 case NEON::BI__builtin_neon_vqtbx1q_v: 5665 case NEON::BI__builtin_neon_vtbx2_v: 5666 case NEON::BI__builtin_neon_vqtbx2_v: 5667 case NEON::BI__builtin_neon_vqtbx2q_v: 5668 case NEON::BI__builtin_neon_vtbx3_v: 5669 case NEON::BI__builtin_neon_vqtbx3_v: 5670 case NEON::BI__builtin_neon_vqtbx3q_v: 5671 case NEON::BI__builtin_neon_vtbx4_v: 5672 case NEON::BI__builtin_neon_vqtbx4_v: 5673 case NEON::BI__builtin_neon_vqtbx4q_v: 5674 break; 5675 } 5676 5677 assert(E->getNumArgs() >= 3); 5678 5679 // Get the last argument, which specifies the vector type. 5680 llvm::APSInt Result; 5681 const Expr *Arg = E->getArg(E->getNumArgs() - 1); 5682 if (!Arg->isIntegerConstantExpr(Result, CGF.getContext())) 5683 return nullptr; 5684 5685 // Determine the type of this overloaded NEON intrinsic. 5686 NeonTypeFlags Type(Result.getZExtValue()); 5687 llvm::VectorType *Ty = GetNeonType(&CGF, Type, Arch); 5688 if (!Ty) 5689 return nullptr; 5690 5691 CodeGen::CGBuilderTy &Builder = CGF.Builder; 5692 5693 // AArch64 scalar builtins are not overloaded, they do not have an extra 5694 // argument that specifies the vector type, need to handle each case. 5695 switch (BuiltinID) { 5696 case NEON::BI__builtin_neon_vtbl1_v: { 5697 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr, 5698 Ops[1], Ty, Intrinsic::aarch64_neon_tbl1, 5699 "vtbl1"); 5700 } 5701 case NEON::BI__builtin_neon_vtbl2_v: { 5702 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr, 5703 Ops[2], Ty, Intrinsic::aarch64_neon_tbl1, 5704 "vtbl1"); 5705 } 5706 case NEON::BI__builtin_neon_vtbl3_v: { 5707 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr, 5708 Ops[3], Ty, Intrinsic::aarch64_neon_tbl2, 5709 "vtbl2"); 5710 } 5711 case NEON::BI__builtin_neon_vtbl4_v: { 5712 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr, 5713 Ops[4], Ty, Intrinsic::aarch64_neon_tbl2, 5714 "vtbl2"); 5715 } 5716 case NEON::BI__builtin_neon_vtbx1_v: { 5717 Value *TblRes = 5718 packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2], 5719 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1"); 5720 5721 llvm::Constant *EightV = ConstantInt::get(Ty, 8); 5722 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV); 5723 CmpRes = Builder.CreateSExt(CmpRes, Ty); 5724 5725 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]); 5726 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes); 5727 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx"); 5728 } 5729 case NEON::BI__builtin_neon_vtbx2_v: { 5730 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0], 5731 Ops[3], Ty, Intrinsic::aarch64_neon_tbx1, 5732 "vtbx1"); 5733 } 5734 case NEON::BI__builtin_neon_vtbx3_v: { 5735 Value *TblRes = 5736 packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4], 5737 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2"); 5738 5739 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24); 5740 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4], 5741 TwentyFourV); 5742 CmpRes = Builder.CreateSExt(CmpRes, Ty); 5743 5744 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]); 5745 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes); 5746 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx"); 5747 } 5748 case NEON::BI__builtin_neon_vtbx4_v: { 5749 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0], 5750 Ops[5], Ty, Intrinsic::aarch64_neon_tbx2, 5751 "vtbx2"); 5752 } 5753 case NEON::BI__builtin_neon_vqtbl1_v: 5754 case NEON::BI__builtin_neon_vqtbl1q_v: 5755 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break; 5756 case NEON::BI__builtin_neon_vqtbl2_v: 5757 case NEON::BI__builtin_neon_vqtbl2q_v: { 5758 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break; 5759 case NEON::BI__builtin_neon_vqtbl3_v: 5760 case NEON::BI__builtin_neon_vqtbl3q_v: 5761 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break; 5762 case NEON::BI__builtin_neon_vqtbl4_v: 5763 case NEON::BI__builtin_neon_vqtbl4q_v: 5764 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break; 5765 case NEON::BI__builtin_neon_vqtbx1_v: 5766 case NEON::BI__builtin_neon_vqtbx1q_v: 5767 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break; 5768 case NEON::BI__builtin_neon_vqtbx2_v: 5769 case NEON::BI__builtin_neon_vqtbx2q_v: 5770 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break; 5771 case NEON::BI__builtin_neon_vqtbx3_v: 5772 case NEON::BI__builtin_neon_vqtbx3q_v: 5773 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break; 5774 case NEON::BI__builtin_neon_vqtbx4_v: 5775 case NEON::BI__builtin_neon_vqtbx4q_v: 5776 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break; 5777 } 5778 } 5779 5780 if (!Int) 5781 return nullptr; 5782 5783 Function *F = CGF.CGM.getIntrinsic(Int, Ty); 5784 return CGF.EmitNeonCall(F, Ops, s); 5785 } 5786 5787 Value *CodeGenFunction::vectorWrapScalar16(Value *Op) { 5788 llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4); 5789 Op = Builder.CreateBitCast(Op, Int16Ty); 5790 Value *V = UndefValue::get(VTy); 5791 llvm::Constant *CI = ConstantInt::get(SizeTy, 0); 5792 Op = Builder.CreateInsertElement(V, Op, CI); 5793 return Op; 5794 } 5795 5796 Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, 5797 const CallExpr *E, 5798 llvm::Triple::ArchType Arch) { 5799 unsigned HintID = static_cast<unsigned>(-1); 5800 switch (BuiltinID) { 5801 default: break; 5802 case AArch64::BI__builtin_arm_nop: 5803 HintID = 0; 5804 break; 5805 case AArch64::BI__builtin_arm_yield: 5806 HintID = 1; 5807 break; 5808 case AArch64::BI__builtin_arm_wfe: 5809 HintID = 2; 5810 break; 5811 case AArch64::BI__builtin_arm_wfi: 5812 HintID = 3; 5813 break; 5814 case AArch64::BI__builtin_arm_sev: 5815 HintID = 4; 5816 break; 5817 case AArch64::BI__builtin_arm_sevl: 5818 HintID = 5; 5819 break; 5820 } 5821 5822 if (HintID != static_cast<unsigned>(-1)) { 5823 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint); 5824 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID)); 5825 } 5826 5827 if (BuiltinID == AArch64::BI__builtin_arm_prefetch) { 5828 Value *Address = EmitScalarExpr(E->getArg(0)); 5829 Value *RW = EmitScalarExpr(E->getArg(1)); 5830 Value *CacheLevel = EmitScalarExpr(E->getArg(2)); 5831 Value *RetentionPolicy = EmitScalarExpr(E->getArg(3)); 5832 Value *IsData = EmitScalarExpr(E->getArg(4)); 5833 5834 Value *Locality = nullptr; 5835 if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) { 5836 // Temporal fetch, needs to convert cache level to locality. 5837 Locality = llvm::ConstantInt::get(Int32Ty, 5838 -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3); 5839 } else { 5840 // Streaming fetch. 5841 Locality = llvm::ConstantInt::get(Int32Ty, 0); 5842 } 5843 5844 // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify 5845 // PLDL3STRM or PLDL2STRM. 5846 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 5847 return Builder.CreateCall(F, {Address, RW, Locality, IsData}); 5848 } 5849 5850 if (BuiltinID == AArch64::BI__builtin_arm_rbit) { 5851 assert((getContext().getTypeSize(E->getType()) == 32) && 5852 "rbit of unusual size!"); 5853 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 5854 return Builder.CreateCall( 5855 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); 5856 } 5857 if (BuiltinID == AArch64::BI__builtin_arm_rbit64) { 5858 assert((getContext().getTypeSize(E->getType()) == 64) && 5859 "rbit of unusual size!"); 5860 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 5861 return Builder.CreateCall( 5862 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); 5863 } 5864 5865 if (BuiltinID == AArch64::BI__clear_cache) { 5866 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); 5867 const FunctionDecl *FD = E->getDirectCallee(); 5868 Value *Ops[2]; 5869 for (unsigned i = 0; i < 2; i++) 5870 Ops[i] = EmitScalarExpr(E->getArg(i)); 5871 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); 5872 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); 5873 StringRef Name = FD->getName(); 5874 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); 5875 } 5876 5877 if ((BuiltinID == AArch64::BI__builtin_arm_ldrex || 5878 BuiltinID == AArch64::BI__builtin_arm_ldaex) && 5879 getContext().getTypeSize(E->getType()) == 128) { 5880 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex 5881 ? Intrinsic::aarch64_ldaxp 5882 : Intrinsic::aarch64_ldxp); 5883 5884 Value *LdPtr = EmitScalarExpr(E->getArg(0)); 5885 Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy), 5886 "ldxp"); 5887 5888 Value *Val0 = Builder.CreateExtractValue(Val, 1); 5889 Value *Val1 = Builder.CreateExtractValue(Val, 0); 5890 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); 5891 Val0 = Builder.CreateZExt(Val0, Int128Ty); 5892 Val1 = Builder.CreateZExt(Val1, Int128Ty); 5893 5894 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64); 5895 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */); 5896 Val = Builder.CreateOr(Val, Val1); 5897 return Builder.CreateBitCast(Val, ConvertType(E->getType())); 5898 } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex || 5899 BuiltinID == AArch64::BI__builtin_arm_ldaex) { 5900 Value *LoadAddr = EmitScalarExpr(E->getArg(0)); 5901 5902 QualType Ty = E->getType(); 5903 llvm::Type *RealResTy = ConvertType(Ty); 5904 llvm::Type *PtrTy = llvm::IntegerType::get( 5905 getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo(); 5906 LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy); 5907 5908 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex 5909 ? Intrinsic::aarch64_ldaxr 5910 : Intrinsic::aarch64_ldxr, 5911 PtrTy); 5912 Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr"); 5913 5914 if (RealResTy->isPointerTy()) 5915 return Builder.CreateIntToPtr(Val, RealResTy); 5916 5917 llvm::Type *IntResTy = llvm::IntegerType::get( 5918 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy)); 5919 Val = Builder.CreateTruncOrBitCast(Val, IntResTy); 5920 return Builder.CreateBitCast(Val, RealResTy); 5921 } 5922 5923 if ((BuiltinID == AArch64::BI__builtin_arm_strex || 5924 BuiltinID == AArch64::BI__builtin_arm_stlex) && 5925 getContext().getTypeSize(E->getArg(0)->getType()) == 128) { 5926 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex 5927 ? Intrinsic::aarch64_stlxp 5928 : Intrinsic::aarch64_stxp); 5929 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty); 5930 5931 Address Tmp = CreateMemTemp(E->getArg(0)->getType()); 5932 EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true); 5933 5934 Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy)); 5935 llvm::Value *Val = Builder.CreateLoad(Tmp); 5936 5937 Value *Arg0 = Builder.CreateExtractValue(Val, 0); 5938 Value *Arg1 = Builder.CreateExtractValue(Val, 1); 5939 Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), 5940 Int8PtrTy); 5941 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp"); 5942 } 5943 5944 if (BuiltinID == AArch64::BI__builtin_arm_strex || 5945 BuiltinID == AArch64::BI__builtin_arm_stlex) { 5946 Value *StoreVal = EmitScalarExpr(E->getArg(0)); 5947 Value *StoreAddr = EmitScalarExpr(E->getArg(1)); 5948 5949 QualType Ty = E->getArg(0)->getType(); 5950 llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(), 5951 getContext().getTypeSize(Ty)); 5952 StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo()); 5953 5954 if (StoreVal->getType()->isPointerTy()) 5955 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty); 5956 else { 5957 llvm::Type *IntTy = llvm::IntegerType::get( 5958 getLLVMContext(), 5959 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType())); 5960 StoreVal = Builder.CreateBitCast(StoreVal, IntTy); 5961 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty); 5962 } 5963 5964 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex 5965 ? Intrinsic::aarch64_stlxr 5966 : Intrinsic::aarch64_stxr, 5967 StoreAddr->getType()); 5968 return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr"); 5969 } 5970 5971 if (BuiltinID == AArch64::BI__builtin_arm_clrex) { 5972 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex); 5973 return Builder.CreateCall(F); 5974 } 5975 5976 // CRC32 5977 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; 5978 switch (BuiltinID) { 5979 case AArch64::BI__builtin_arm_crc32b: 5980 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break; 5981 case AArch64::BI__builtin_arm_crc32cb: 5982 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break; 5983 case AArch64::BI__builtin_arm_crc32h: 5984 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break; 5985 case AArch64::BI__builtin_arm_crc32ch: 5986 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break; 5987 case AArch64::BI__builtin_arm_crc32w: 5988 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break; 5989 case AArch64::BI__builtin_arm_crc32cw: 5990 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break; 5991 case AArch64::BI__builtin_arm_crc32d: 5992 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break; 5993 case AArch64::BI__builtin_arm_crc32cd: 5994 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break; 5995 } 5996 5997 if (CRCIntrinsicID != Intrinsic::not_intrinsic) { 5998 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 5999 Value *Arg1 = EmitScalarExpr(E->getArg(1)); 6000 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 6001 6002 llvm::Type *DataTy = F->getFunctionType()->getParamType(1); 6003 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy); 6004 6005 return Builder.CreateCall(F, {Arg0, Arg1}); 6006 } 6007 6008 if (BuiltinID == AArch64::BI__builtin_arm_rsr || 6009 BuiltinID == AArch64::BI__builtin_arm_rsr64 || 6010 BuiltinID == AArch64::BI__builtin_arm_rsrp || 6011 BuiltinID == AArch64::BI__builtin_arm_wsr || 6012 BuiltinID == AArch64::BI__builtin_arm_wsr64 || 6013 BuiltinID == AArch64::BI__builtin_arm_wsrp) { 6014 6015 bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr || 6016 BuiltinID == AArch64::BI__builtin_arm_rsr64 || 6017 BuiltinID == AArch64::BI__builtin_arm_rsrp; 6018 6019 bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp || 6020 BuiltinID == AArch64::BI__builtin_arm_wsrp; 6021 6022 bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr && 6023 BuiltinID != AArch64::BI__builtin_arm_wsr; 6024 6025 llvm::Type *ValueType; 6026 llvm::Type *RegisterType = Int64Ty; 6027 if (IsPointerBuiltin) { 6028 ValueType = VoidPtrTy; 6029 } else if (Is64Bit) { 6030 ValueType = Int64Ty; 6031 } else { 6032 ValueType = Int32Ty; 6033 } 6034 6035 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead); 6036 } 6037 6038 // Find out if any arguments are required to be integer constant 6039 // expressions. 6040 unsigned ICEArguments = 0; 6041 ASTContext::GetBuiltinTypeError Error; 6042 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 6043 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 6044 6045 llvm::SmallVector<Value*, 4> Ops; 6046 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) { 6047 if ((ICEArguments & (1 << i)) == 0) { 6048 Ops.push_back(EmitScalarExpr(E->getArg(i))); 6049 } else { 6050 // If this is required to be a constant, constant fold it so that we know 6051 // that the generated intrinsic gets a ConstantInt. 6052 llvm::APSInt Result; 6053 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); 6054 assert(IsConst && "Constant arg isn't actually constant?"); 6055 (void)IsConst; 6056 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); 6057 } 6058 } 6059 6060 auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap); 6061 const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap( 6062 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted); 6063 6064 if (Builtin) { 6065 Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1))); 6066 Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E); 6067 assert(Result && "SISD intrinsic should have been handled"); 6068 return Result; 6069 } 6070 6071 llvm::APSInt Result; 6072 const Expr *Arg = E->getArg(E->getNumArgs()-1); 6073 NeonTypeFlags Type(0); 6074 if (Arg->isIntegerConstantExpr(Result, getContext())) 6075 // Determine the type of this overloaded NEON intrinsic. 6076 Type = NeonTypeFlags(Result.getZExtValue()); 6077 6078 bool usgn = Type.isUnsigned(); 6079 bool quad = Type.isQuad(); 6080 6081 // Handle non-overloaded intrinsics first. 6082 switch (BuiltinID) { 6083 default: break; 6084 case NEON::BI__builtin_neon_vldrq_p128: { 6085 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128); 6086 llvm::Type *Int128PTy = llvm::PointerType::get(Int128Ty, 0); 6087 Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy); 6088 return Builder.CreateAlignedLoad(Int128Ty, Ptr, 6089 CharUnits::fromQuantity(16)); 6090 } 6091 case NEON::BI__builtin_neon_vstrq_p128: { 6092 llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128); 6093 Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy); 6094 return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr); 6095 } 6096 case NEON::BI__builtin_neon_vcvts_u32_f32: 6097 case NEON::BI__builtin_neon_vcvtd_u64_f64: 6098 usgn = true; 6099 LLVM_FALLTHROUGH; 6100 case NEON::BI__builtin_neon_vcvts_s32_f32: 6101 case NEON::BI__builtin_neon_vcvtd_s64_f64: { 6102 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6103 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64; 6104 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty; 6105 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy; 6106 Ops[0] = Builder.CreateBitCast(Ops[0], FTy); 6107 if (usgn) 6108 return Builder.CreateFPToUI(Ops[0], InTy); 6109 return Builder.CreateFPToSI(Ops[0], InTy); 6110 } 6111 case NEON::BI__builtin_neon_vcvts_f32_u32: 6112 case NEON::BI__builtin_neon_vcvtd_f64_u64: 6113 usgn = true; 6114 LLVM_FALLTHROUGH; 6115 case NEON::BI__builtin_neon_vcvts_f32_s32: 6116 case NEON::BI__builtin_neon_vcvtd_f64_s64: { 6117 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6118 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64; 6119 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty; 6120 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy; 6121 Ops[0] = Builder.CreateBitCast(Ops[0], InTy); 6122 if (usgn) 6123 return Builder.CreateUIToFP(Ops[0], FTy); 6124 return Builder.CreateSIToFP(Ops[0], FTy); 6125 } 6126 case NEON::BI__builtin_neon_vpaddd_s64: { 6127 llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2); 6128 Value *Vec = EmitScalarExpr(E->getArg(0)); 6129 // The vector is v2f64, so make sure it's bitcast to that. 6130 Vec = Builder.CreateBitCast(Vec, Ty, "v2i64"); 6131 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 6132 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 6133 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 6134 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 6135 // Pairwise addition of a v2f64 into a scalar f64. 6136 return Builder.CreateAdd(Op0, Op1, "vpaddd"); 6137 } 6138 case NEON::BI__builtin_neon_vpaddd_f64: { 6139 llvm::Type *Ty = 6140 llvm::VectorType::get(DoubleTy, 2); 6141 Value *Vec = EmitScalarExpr(E->getArg(0)); 6142 // The vector is v2f64, so make sure it's bitcast to that. 6143 Vec = Builder.CreateBitCast(Vec, Ty, "v2f64"); 6144 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 6145 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 6146 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 6147 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 6148 // Pairwise addition of a v2f64 into a scalar f64. 6149 return Builder.CreateFAdd(Op0, Op1, "vpaddd"); 6150 } 6151 case NEON::BI__builtin_neon_vpadds_f32: { 6152 llvm::Type *Ty = 6153 llvm::VectorType::get(FloatTy, 2); 6154 Value *Vec = EmitScalarExpr(E->getArg(0)); 6155 // The vector is v2f32, so make sure it's bitcast to that. 6156 Vec = Builder.CreateBitCast(Vec, Ty, "v2f32"); 6157 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 6158 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 6159 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 6160 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 6161 // Pairwise addition of a v2f32 into a scalar f32. 6162 return Builder.CreateFAdd(Op0, Op1, "vpaddd"); 6163 } 6164 case NEON::BI__builtin_neon_vceqzd_s64: 6165 case NEON::BI__builtin_neon_vceqzd_f64: 6166 case NEON::BI__builtin_neon_vceqzs_f32: 6167 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6168 return EmitAArch64CompareBuiltinExpr( 6169 Ops[0], ConvertType(E->getCallReturnType(getContext())), 6170 ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz"); 6171 case NEON::BI__builtin_neon_vcgezd_s64: 6172 case NEON::BI__builtin_neon_vcgezd_f64: 6173 case NEON::BI__builtin_neon_vcgezs_f32: 6174 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6175 return EmitAArch64CompareBuiltinExpr( 6176 Ops[0], ConvertType(E->getCallReturnType(getContext())), 6177 ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez"); 6178 case NEON::BI__builtin_neon_vclezd_s64: 6179 case NEON::BI__builtin_neon_vclezd_f64: 6180 case NEON::BI__builtin_neon_vclezs_f32: 6181 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6182 return EmitAArch64CompareBuiltinExpr( 6183 Ops[0], ConvertType(E->getCallReturnType(getContext())), 6184 ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez"); 6185 case NEON::BI__builtin_neon_vcgtzd_s64: 6186 case NEON::BI__builtin_neon_vcgtzd_f64: 6187 case NEON::BI__builtin_neon_vcgtzs_f32: 6188 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6189 return EmitAArch64CompareBuiltinExpr( 6190 Ops[0], ConvertType(E->getCallReturnType(getContext())), 6191 ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz"); 6192 case NEON::BI__builtin_neon_vcltzd_s64: 6193 case NEON::BI__builtin_neon_vcltzd_f64: 6194 case NEON::BI__builtin_neon_vcltzs_f32: 6195 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6196 return EmitAArch64CompareBuiltinExpr( 6197 Ops[0], ConvertType(E->getCallReturnType(getContext())), 6198 ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz"); 6199 6200 case NEON::BI__builtin_neon_vceqzd_u64: { 6201 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6202 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); 6203 Ops[0] = 6204 Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty)); 6205 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd"); 6206 } 6207 case NEON::BI__builtin_neon_vceqd_f64: 6208 case NEON::BI__builtin_neon_vcled_f64: 6209 case NEON::BI__builtin_neon_vcltd_f64: 6210 case NEON::BI__builtin_neon_vcged_f64: 6211 case NEON::BI__builtin_neon_vcgtd_f64: { 6212 llvm::CmpInst::Predicate P; 6213 switch (BuiltinID) { 6214 default: llvm_unreachable("missing builtin ID in switch!"); 6215 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break; 6216 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break; 6217 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break; 6218 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break; 6219 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break; 6220 } 6221 Ops.push_back(EmitScalarExpr(E->getArg(1))); 6222 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 6223 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); 6224 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); 6225 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd"); 6226 } 6227 case NEON::BI__builtin_neon_vceqs_f32: 6228 case NEON::BI__builtin_neon_vcles_f32: 6229 case NEON::BI__builtin_neon_vclts_f32: 6230 case NEON::BI__builtin_neon_vcges_f32: 6231 case NEON::BI__builtin_neon_vcgts_f32: { 6232 llvm::CmpInst::Predicate P; 6233 switch (BuiltinID) { 6234 default: llvm_unreachable("missing builtin ID in switch!"); 6235 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break; 6236 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break; 6237 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break; 6238 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break; 6239 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break; 6240 } 6241 Ops.push_back(EmitScalarExpr(E->getArg(1))); 6242 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy); 6243 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy); 6244 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); 6245 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd"); 6246 } 6247 case NEON::BI__builtin_neon_vceqd_s64: 6248 case NEON::BI__builtin_neon_vceqd_u64: 6249 case NEON::BI__builtin_neon_vcgtd_s64: 6250 case NEON::BI__builtin_neon_vcgtd_u64: 6251 case NEON::BI__builtin_neon_vcltd_s64: 6252 case NEON::BI__builtin_neon_vcltd_u64: 6253 case NEON::BI__builtin_neon_vcged_u64: 6254 case NEON::BI__builtin_neon_vcged_s64: 6255 case NEON::BI__builtin_neon_vcled_u64: 6256 case NEON::BI__builtin_neon_vcled_s64: { 6257 llvm::CmpInst::Predicate P; 6258 switch (BuiltinID) { 6259 default: llvm_unreachable("missing builtin ID in switch!"); 6260 case NEON::BI__builtin_neon_vceqd_s64: 6261 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break; 6262 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break; 6263 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break; 6264 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break; 6265 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break; 6266 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break; 6267 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break; 6268 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break; 6269 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break; 6270 } 6271 Ops.push_back(EmitScalarExpr(E->getArg(1))); 6272 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); 6273 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 6274 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]); 6275 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd"); 6276 } 6277 case NEON::BI__builtin_neon_vtstd_s64: 6278 case NEON::BI__builtin_neon_vtstd_u64: { 6279 Ops.push_back(EmitScalarExpr(E->getArg(1))); 6280 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); 6281 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 6282 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); 6283 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], 6284 llvm::Constant::getNullValue(Int64Ty)); 6285 return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd"); 6286 } 6287 case NEON::BI__builtin_neon_vset_lane_i8: 6288 case NEON::BI__builtin_neon_vset_lane_i16: 6289 case NEON::BI__builtin_neon_vset_lane_i32: 6290 case NEON::BI__builtin_neon_vset_lane_i64: 6291 case NEON::BI__builtin_neon_vset_lane_f32: 6292 case NEON::BI__builtin_neon_vsetq_lane_i8: 6293 case NEON::BI__builtin_neon_vsetq_lane_i16: 6294 case NEON::BI__builtin_neon_vsetq_lane_i32: 6295 case NEON::BI__builtin_neon_vsetq_lane_i64: 6296 case NEON::BI__builtin_neon_vsetq_lane_f32: 6297 Ops.push_back(EmitScalarExpr(E->getArg(2))); 6298 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 6299 case NEON::BI__builtin_neon_vset_lane_f64: 6300 // The vector type needs a cast for the v1f64 variant. 6301 Ops[1] = Builder.CreateBitCast(Ops[1], 6302 llvm::VectorType::get(DoubleTy, 1)); 6303 Ops.push_back(EmitScalarExpr(E->getArg(2))); 6304 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 6305 case NEON::BI__builtin_neon_vsetq_lane_f64: 6306 // The vector type needs a cast for the v2f64 variant. 6307 Ops[1] = Builder.CreateBitCast(Ops[1], 6308 llvm::VectorType::get(DoubleTy, 2)); 6309 Ops.push_back(EmitScalarExpr(E->getArg(2))); 6310 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 6311 6312 case NEON::BI__builtin_neon_vget_lane_i8: 6313 case NEON::BI__builtin_neon_vdupb_lane_i8: 6314 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8)); 6315 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 6316 "vget_lane"); 6317 case NEON::BI__builtin_neon_vgetq_lane_i8: 6318 case NEON::BI__builtin_neon_vdupb_laneq_i8: 6319 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16)); 6320 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 6321 "vgetq_lane"); 6322 case NEON::BI__builtin_neon_vget_lane_i16: 6323 case NEON::BI__builtin_neon_vduph_lane_i16: 6324 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4)); 6325 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 6326 "vget_lane"); 6327 case NEON::BI__builtin_neon_vgetq_lane_i16: 6328 case NEON::BI__builtin_neon_vduph_laneq_i16: 6329 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8)); 6330 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 6331 "vgetq_lane"); 6332 case NEON::BI__builtin_neon_vget_lane_i32: 6333 case NEON::BI__builtin_neon_vdups_lane_i32: 6334 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2)); 6335 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 6336 "vget_lane"); 6337 case NEON::BI__builtin_neon_vdups_lane_f32: 6338 Ops[0] = Builder.CreateBitCast(Ops[0], 6339 llvm::VectorType::get(FloatTy, 2)); 6340 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 6341 "vdups_lane"); 6342 case NEON::BI__builtin_neon_vgetq_lane_i32: 6343 case NEON::BI__builtin_neon_vdups_laneq_i32: 6344 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4)); 6345 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 6346 "vgetq_lane"); 6347 case NEON::BI__builtin_neon_vget_lane_i64: 6348 case NEON::BI__builtin_neon_vdupd_lane_i64: 6349 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1)); 6350 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 6351 "vget_lane"); 6352 case NEON::BI__builtin_neon_vdupd_lane_f64: 6353 Ops[0] = Builder.CreateBitCast(Ops[0], 6354 llvm::VectorType::get(DoubleTy, 1)); 6355 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 6356 "vdupd_lane"); 6357 case NEON::BI__builtin_neon_vgetq_lane_i64: 6358 case NEON::BI__builtin_neon_vdupd_laneq_i64: 6359 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); 6360 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 6361 "vgetq_lane"); 6362 case NEON::BI__builtin_neon_vget_lane_f32: 6363 Ops[0] = Builder.CreateBitCast(Ops[0], 6364 llvm::VectorType::get(FloatTy, 2)); 6365 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 6366 "vget_lane"); 6367 case NEON::BI__builtin_neon_vget_lane_f64: 6368 Ops[0] = Builder.CreateBitCast(Ops[0], 6369 llvm::VectorType::get(DoubleTy, 1)); 6370 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 6371 "vget_lane"); 6372 case NEON::BI__builtin_neon_vgetq_lane_f32: 6373 case NEON::BI__builtin_neon_vdups_laneq_f32: 6374 Ops[0] = Builder.CreateBitCast(Ops[0], 6375 llvm::VectorType::get(FloatTy, 4)); 6376 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 6377 "vgetq_lane"); 6378 case NEON::BI__builtin_neon_vgetq_lane_f64: 6379 case NEON::BI__builtin_neon_vdupd_laneq_f64: 6380 Ops[0] = Builder.CreateBitCast(Ops[0], 6381 llvm::VectorType::get(DoubleTy, 2)); 6382 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 6383 "vgetq_lane"); 6384 case NEON::BI__builtin_neon_vaddd_s64: 6385 case NEON::BI__builtin_neon_vaddd_u64: 6386 return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd"); 6387 case NEON::BI__builtin_neon_vsubd_s64: 6388 case NEON::BI__builtin_neon_vsubd_u64: 6389 return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd"); 6390 case NEON::BI__builtin_neon_vqdmlalh_s16: 6391 case NEON::BI__builtin_neon_vqdmlslh_s16: { 6392 SmallVector<Value *, 2> ProductOps; 6393 ProductOps.push_back(vectorWrapScalar16(Ops[1])); 6394 ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2)))); 6395 llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4); 6396 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy), 6397 ProductOps, "vqdmlXl"); 6398 Constant *CI = ConstantInt::get(SizeTy, 0); 6399 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0"); 6400 6401 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16 6402 ? Intrinsic::aarch64_neon_sqadd 6403 : Intrinsic::aarch64_neon_sqsub; 6404 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl"); 6405 } 6406 case NEON::BI__builtin_neon_vqshlud_n_s64: { 6407 Ops.push_back(EmitScalarExpr(E->getArg(1))); 6408 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty); 6409 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty), 6410 Ops, "vqshlu_n"); 6411 } 6412 case NEON::BI__builtin_neon_vqshld_n_u64: 6413 case NEON::BI__builtin_neon_vqshld_n_s64: { 6414 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64 6415 ? Intrinsic::aarch64_neon_uqshl 6416 : Intrinsic::aarch64_neon_sqshl; 6417 Ops.push_back(EmitScalarExpr(E->getArg(1))); 6418 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty); 6419 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n"); 6420 } 6421 case NEON::BI__builtin_neon_vrshrd_n_u64: 6422 case NEON::BI__builtin_neon_vrshrd_n_s64: { 6423 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64 6424 ? Intrinsic::aarch64_neon_urshl 6425 : Intrinsic::aarch64_neon_srshl; 6426 Ops.push_back(EmitScalarExpr(E->getArg(1))); 6427 int SV = cast<ConstantInt>(Ops[1])->getSExtValue(); 6428 Ops[1] = ConstantInt::get(Int64Ty, -SV); 6429 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n"); 6430 } 6431 case NEON::BI__builtin_neon_vrsrad_n_u64: 6432 case NEON::BI__builtin_neon_vrsrad_n_s64: { 6433 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64 6434 ? Intrinsic::aarch64_neon_urshl 6435 : Intrinsic::aarch64_neon_srshl; 6436 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 6437 Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2)))); 6438 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty), 6439 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)}); 6440 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty)); 6441 } 6442 case NEON::BI__builtin_neon_vshld_n_s64: 6443 case NEON::BI__builtin_neon_vshld_n_u64: { 6444 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 6445 return Builder.CreateShl( 6446 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n"); 6447 } 6448 case NEON::BI__builtin_neon_vshrd_n_s64: { 6449 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 6450 return Builder.CreateAShr( 6451 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63), 6452 Amt->getZExtValue())), 6453 "shrd_n"); 6454 } 6455 case NEON::BI__builtin_neon_vshrd_n_u64: { 6456 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 6457 uint64_t ShiftAmt = Amt->getZExtValue(); 6458 // Right-shifting an unsigned value by its size yields 0. 6459 if (ShiftAmt == 64) 6460 return ConstantInt::get(Int64Ty, 0); 6461 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt), 6462 "shrd_n"); 6463 } 6464 case NEON::BI__builtin_neon_vsrad_n_s64: { 6465 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2))); 6466 Ops[1] = Builder.CreateAShr( 6467 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63), 6468 Amt->getZExtValue())), 6469 "shrd_n"); 6470 return Builder.CreateAdd(Ops[0], Ops[1]); 6471 } 6472 case NEON::BI__builtin_neon_vsrad_n_u64: { 6473 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2))); 6474 uint64_t ShiftAmt = Amt->getZExtValue(); 6475 // Right-shifting an unsigned value by its size yields 0. 6476 // As Op + 0 = Op, return Ops[0] directly. 6477 if (ShiftAmt == 64) 6478 return Ops[0]; 6479 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt), 6480 "shrd_n"); 6481 return Builder.CreateAdd(Ops[0], Ops[1]); 6482 } 6483 case NEON::BI__builtin_neon_vqdmlalh_lane_s16: 6484 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16: 6485 case NEON::BI__builtin_neon_vqdmlslh_lane_s16: 6486 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: { 6487 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)), 6488 "lane"); 6489 SmallVector<Value *, 2> ProductOps; 6490 ProductOps.push_back(vectorWrapScalar16(Ops[1])); 6491 ProductOps.push_back(vectorWrapScalar16(Ops[2])); 6492 llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4); 6493 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy), 6494 ProductOps, "vqdmlXl"); 6495 Constant *CI = ConstantInt::get(SizeTy, 0); 6496 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0"); 6497 Ops.pop_back(); 6498 6499 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 || 6500 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16) 6501 ? Intrinsic::aarch64_neon_sqadd 6502 : Intrinsic::aarch64_neon_sqsub; 6503 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl"); 6504 } 6505 case NEON::BI__builtin_neon_vqdmlals_s32: 6506 case NEON::BI__builtin_neon_vqdmlsls_s32: { 6507 SmallVector<Value *, 2> ProductOps; 6508 ProductOps.push_back(Ops[1]); 6509 ProductOps.push_back(EmitScalarExpr(E->getArg(2))); 6510 Ops[1] = 6511 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar), 6512 ProductOps, "vqdmlXl"); 6513 6514 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32 6515 ? Intrinsic::aarch64_neon_sqadd 6516 : Intrinsic::aarch64_neon_sqsub; 6517 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl"); 6518 } 6519 case NEON::BI__builtin_neon_vqdmlals_lane_s32: 6520 case NEON::BI__builtin_neon_vqdmlals_laneq_s32: 6521 case NEON::BI__builtin_neon_vqdmlsls_lane_s32: 6522 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: { 6523 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)), 6524 "lane"); 6525 SmallVector<Value *, 2> ProductOps; 6526 ProductOps.push_back(Ops[1]); 6527 ProductOps.push_back(Ops[2]); 6528 Ops[1] = 6529 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar), 6530 ProductOps, "vqdmlXl"); 6531 Ops.pop_back(); 6532 6533 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 || 6534 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32) 6535 ? Intrinsic::aarch64_neon_sqadd 6536 : Intrinsic::aarch64_neon_sqsub; 6537 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl"); 6538 } 6539 } 6540 6541 llvm::VectorType *VTy = GetNeonType(this, Type, Arch); 6542 llvm::Type *Ty = VTy; 6543 if (!Ty) 6544 return nullptr; 6545 6546 // Not all intrinsics handled by the common case work for AArch64 yet, so only 6547 // defer to common code if it's been added to our special map. 6548 Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID, 6549 AArch64SIMDIntrinsicsProvenSorted); 6550 6551 if (Builtin) 6552 return EmitCommonNeonBuiltinExpr( 6553 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic, 6554 Builtin->NameHint, Builtin->TypeModifier, E, Ops, 6555 /*never use addresses*/ Address::invalid(), Address::invalid(), Arch); 6556 6557 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch)) 6558 return V; 6559 6560 unsigned Int; 6561 switch (BuiltinID) { 6562 default: return nullptr; 6563 case NEON::BI__builtin_neon_vbsl_v: 6564 case NEON::BI__builtin_neon_vbslq_v: { 6565 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy); 6566 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl"); 6567 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl"); 6568 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl"); 6569 6570 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl"); 6571 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl"); 6572 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl"); 6573 return Builder.CreateBitCast(Ops[0], Ty); 6574 } 6575 case NEON::BI__builtin_neon_vfma_lane_v: 6576 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types 6577 // The ARM builtins (and instructions) have the addend as the first 6578 // operand, but the 'fma' intrinsics have it last. Swap it around here. 6579 Value *Addend = Ops[0]; 6580 Value *Multiplicand = Ops[1]; 6581 Value *LaneSource = Ops[2]; 6582 Ops[0] = Multiplicand; 6583 Ops[1] = LaneSource; 6584 Ops[2] = Addend; 6585 6586 // Now adjust things to handle the lane access. 6587 llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ? 6588 llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) : 6589 VTy; 6590 llvm::Constant *cst = cast<Constant>(Ops[3]); 6591 Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst); 6592 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy); 6593 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane"); 6594 6595 Ops.pop_back(); 6596 Int = Intrinsic::fma; 6597 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla"); 6598 } 6599 case NEON::BI__builtin_neon_vfma_laneq_v: { 6600 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 6601 // v1f64 fma should be mapped to Neon scalar f64 fma 6602 if (VTy && VTy->getElementType() == DoubleTy) { 6603 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 6604 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); 6605 llvm::Type *VTy = GetNeonType(this, 6606 NeonTypeFlags(NeonTypeFlags::Float64, false, true), Arch); 6607 Ops[2] = Builder.CreateBitCast(Ops[2], VTy); 6608 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); 6609 Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy); 6610 Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); 6611 return Builder.CreateBitCast(Result, Ty); 6612 } 6613 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 6614 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6615 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6616 6617 llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(), 6618 VTy->getNumElements() * 2); 6619 Ops[2] = Builder.CreateBitCast(Ops[2], STy); 6620 Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), 6621 cast<ConstantInt>(Ops[3])); 6622 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane"); 6623 6624 return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]}); 6625 } 6626 case NEON::BI__builtin_neon_vfmaq_laneq_v: { 6627 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 6628 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6629 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6630 6631 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 6632 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3])); 6633 return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]}); 6634 } 6635 case NEON::BI__builtin_neon_vfmah_lane_f16: 6636 case NEON::BI__builtin_neon_vfmas_lane_f32: 6637 case NEON::BI__builtin_neon_vfmah_laneq_f16: 6638 case NEON::BI__builtin_neon_vfmas_laneq_f32: 6639 case NEON::BI__builtin_neon_vfmad_lane_f64: 6640 case NEON::BI__builtin_neon_vfmad_laneq_f64: { 6641 Ops.push_back(EmitScalarExpr(E->getArg(3))); 6642 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext())); 6643 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 6644 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); 6645 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); 6646 } 6647 case NEON::BI__builtin_neon_vmull_v: 6648 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 6649 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull; 6650 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull; 6651 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull"); 6652 case NEON::BI__builtin_neon_vmax_v: 6653 case NEON::BI__builtin_neon_vmaxq_v: 6654 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 6655 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax; 6656 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax; 6657 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax"); 6658 case NEON::BI__builtin_neon_vmin_v: 6659 case NEON::BI__builtin_neon_vminq_v: 6660 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 6661 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin; 6662 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin; 6663 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin"); 6664 case NEON::BI__builtin_neon_vabd_v: 6665 case NEON::BI__builtin_neon_vabdq_v: 6666 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 6667 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd; 6668 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd; 6669 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd"); 6670 case NEON::BI__builtin_neon_vpadal_v: 6671 case NEON::BI__builtin_neon_vpadalq_v: { 6672 unsigned ArgElts = VTy->getNumElements(); 6673 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType()); 6674 unsigned BitWidth = EltTy->getBitWidth(); 6675 llvm::Type *ArgTy = llvm::VectorType::get( 6676 llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts); 6677 llvm::Type* Tys[2] = { VTy, ArgTy }; 6678 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp; 6679 SmallVector<llvm::Value*, 1> TmpOps; 6680 TmpOps.push_back(Ops[1]); 6681 Function *F = CGM.getIntrinsic(Int, Tys); 6682 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal"); 6683 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType()); 6684 return Builder.CreateAdd(tmp, addend); 6685 } 6686 case NEON::BI__builtin_neon_vpmin_v: 6687 case NEON::BI__builtin_neon_vpminq_v: 6688 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 6689 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp; 6690 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp; 6691 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin"); 6692 case NEON::BI__builtin_neon_vpmax_v: 6693 case NEON::BI__builtin_neon_vpmaxq_v: 6694 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 6695 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp; 6696 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp; 6697 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax"); 6698 case NEON::BI__builtin_neon_vminnm_v: 6699 case NEON::BI__builtin_neon_vminnmq_v: 6700 Int = Intrinsic::aarch64_neon_fminnm; 6701 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm"); 6702 case NEON::BI__builtin_neon_vmaxnm_v: 6703 case NEON::BI__builtin_neon_vmaxnmq_v: 6704 Int = Intrinsic::aarch64_neon_fmaxnm; 6705 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm"); 6706 case NEON::BI__builtin_neon_vrecpss_f32: { 6707 Ops.push_back(EmitScalarExpr(E->getArg(1))); 6708 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy), 6709 Ops, "vrecps"); 6710 } 6711 case NEON::BI__builtin_neon_vrecpsd_f64: { 6712 Ops.push_back(EmitScalarExpr(E->getArg(1))); 6713 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy), 6714 Ops, "vrecps"); 6715 } 6716 case NEON::BI__builtin_neon_vqshrun_n_v: 6717 Int = Intrinsic::aarch64_neon_sqshrun; 6718 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n"); 6719 case NEON::BI__builtin_neon_vqrshrun_n_v: 6720 Int = Intrinsic::aarch64_neon_sqrshrun; 6721 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n"); 6722 case NEON::BI__builtin_neon_vqshrn_n_v: 6723 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn; 6724 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n"); 6725 case NEON::BI__builtin_neon_vrshrn_n_v: 6726 Int = Intrinsic::aarch64_neon_rshrn; 6727 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n"); 6728 case NEON::BI__builtin_neon_vqrshrn_n_v: 6729 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn; 6730 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n"); 6731 case NEON::BI__builtin_neon_vrnda_v: 6732 case NEON::BI__builtin_neon_vrndaq_v: { 6733 Int = Intrinsic::round; 6734 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda"); 6735 } 6736 case NEON::BI__builtin_neon_vrndi_v: 6737 case NEON::BI__builtin_neon_vrndiq_v: { 6738 Int = Intrinsic::nearbyint; 6739 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi"); 6740 } 6741 case NEON::BI__builtin_neon_vrndm_v: 6742 case NEON::BI__builtin_neon_vrndmq_v: { 6743 Int = Intrinsic::floor; 6744 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm"); 6745 } 6746 case NEON::BI__builtin_neon_vrndn_v: 6747 case NEON::BI__builtin_neon_vrndnq_v: { 6748 Int = Intrinsic::aarch64_neon_frintn; 6749 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn"); 6750 } 6751 case NEON::BI__builtin_neon_vrndp_v: 6752 case NEON::BI__builtin_neon_vrndpq_v: { 6753 Int = Intrinsic::ceil; 6754 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp"); 6755 } 6756 case NEON::BI__builtin_neon_vrndx_v: 6757 case NEON::BI__builtin_neon_vrndxq_v: { 6758 Int = Intrinsic::rint; 6759 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx"); 6760 } 6761 case NEON::BI__builtin_neon_vrnd_v: 6762 case NEON::BI__builtin_neon_vrndq_v: { 6763 Int = Intrinsic::trunc; 6764 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz"); 6765 } 6766 case NEON::BI__builtin_neon_vceqz_v: 6767 case NEON::BI__builtin_neon_vceqzq_v: 6768 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ, 6769 ICmpInst::ICMP_EQ, "vceqz"); 6770 case NEON::BI__builtin_neon_vcgez_v: 6771 case NEON::BI__builtin_neon_vcgezq_v: 6772 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE, 6773 ICmpInst::ICMP_SGE, "vcgez"); 6774 case NEON::BI__builtin_neon_vclez_v: 6775 case NEON::BI__builtin_neon_vclezq_v: 6776 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE, 6777 ICmpInst::ICMP_SLE, "vclez"); 6778 case NEON::BI__builtin_neon_vcgtz_v: 6779 case NEON::BI__builtin_neon_vcgtzq_v: 6780 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT, 6781 ICmpInst::ICMP_SGT, "vcgtz"); 6782 case NEON::BI__builtin_neon_vcltz_v: 6783 case NEON::BI__builtin_neon_vcltzq_v: 6784 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT, 6785 ICmpInst::ICMP_SLT, "vcltz"); 6786 case NEON::BI__builtin_neon_vcvt_f64_v: 6787 case NEON::BI__builtin_neon_vcvtq_f64_v: 6788 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6789 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad), Arch); 6790 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 6791 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 6792 case NEON::BI__builtin_neon_vcvt_f64_f32: { 6793 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad && 6794 "unexpected vcvt_f64_f32 builtin"); 6795 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false); 6796 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag, Arch)); 6797 6798 return Builder.CreateFPExt(Ops[0], Ty, "vcvt"); 6799 } 6800 case NEON::BI__builtin_neon_vcvt_f32_f64: { 6801 assert(Type.getEltType() == NeonTypeFlags::Float32 && 6802 "unexpected vcvt_f32_f64 builtin"); 6803 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true); 6804 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag, Arch)); 6805 6806 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt"); 6807 } 6808 case NEON::BI__builtin_neon_vcvt_s32_v: 6809 case NEON::BI__builtin_neon_vcvt_u32_v: 6810 case NEON::BI__builtin_neon_vcvt_s64_v: 6811 case NEON::BI__builtin_neon_vcvt_u64_v: 6812 case NEON::BI__builtin_neon_vcvt_s16_v: 6813 case NEON::BI__builtin_neon_vcvt_u16_v: 6814 case NEON::BI__builtin_neon_vcvtq_s32_v: 6815 case NEON::BI__builtin_neon_vcvtq_u32_v: 6816 case NEON::BI__builtin_neon_vcvtq_s64_v: 6817 case NEON::BI__builtin_neon_vcvtq_u64_v: 6818 case NEON::BI__builtin_neon_vcvtq_s16_v: 6819 case NEON::BI__builtin_neon_vcvtq_u16_v: { 6820 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type)); 6821 if (usgn) 6822 return Builder.CreateFPToUI(Ops[0], Ty); 6823 return Builder.CreateFPToSI(Ops[0], Ty); 6824 } 6825 case NEON::BI__builtin_neon_vcvta_s16_v: 6826 case NEON::BI__builtin_neon_vcvta_s32_v: 6827 case NEON::BI__builtin_neon_vcvtaq_s16_v: 6828 case NEON::BI__builtin_neon_vcvtaq_s32_v: 6829 case NEON::BI__builtin_neon_vcvta_u32_v: 6830 case NEON::BI__builtin_neon_vcvtaq_u16_v: 6831 case NEON::BI__builtin_neon_vcvtaq_u32_v: 6832 case NEON::BI__builtin_neon_vcvta_s64_v: 6833 case NEON::BI__builtin_neon_vcvtaq_s64_v: 6834 case NEON::BI__builtin_neon_vcvta_u64_v: 6835 case NEON::BI__builtin_neon_vcvtaq_u64_v: { 6836 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas; 6837 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 6838 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta"); 6839 } 6840 case NEON::BI__builtin_neon_vcvtm_s16_v: 6841 case NEON::BI__builtin_neon_vcvtm_s32_v: 6842 case NEON::BI__builtin_neon_vcvtmq_s16_v: 6843 case NEON::BI__builtin_neon_vcvtmq_s32_v: 6844 case NEON::BI__builtin_neon_vcvtm_u16_v: 6845 case NEON::BI__builtin_neon_vcvtm_u32_v: 6846 case NEON::BI__builtin_neon_vcvtmq_u16_v: 6847 case NEON::BI__builtin_neon_vcvtmq_u32_v: 6848 case NEON::BI__builtin_neon_vcvtm_s64_v: 6849 case NEON::BI__builtin_neon_vcvtmq_s64_v: 6850 case NEON::BI__builtin_neon_vcvtm_u64_v: 6851 case NEON::BI__builtin_neon_vcvtmq_u64_v: { 6852 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms; 6853 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 6854 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm"); 6855 } 6856 case NEON::BI__builtin_neon_vcvtn_s16_v: 6857 case NEON::BI__builtin_neon_vcvtn_s32_v: 6858 case NEON::BI__builtin_neon_vcvtnq_s16_v: 6859 case NEON::BI__builtin_neon_vcvtnq_s32_v: 6860 case NEON::BI__builtin_neon_vcvtn_u16_v: 6861 case NEON::BI__builtin_neon_vcvtn_u32_v: 6862 case NEON::BI__builtin_neon_vcvtnq_u16_v: 6863 case NEON::BI__builtin_neon_vcvtnq_u32_v: 6864 case NEON::BI__builtin_neon_vcvtn_s64_v: 6865 case NEON::BI__builtin_neon_vcvtnq_s64_v: 6866 case NEON::BI__builtin_neon_vcvtn_u64_v: 6867 case NEON::BI__builtin_neon_vcvtnq_u64_v: { 6868 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns; 6869 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 6870 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn"); 6871 } 6872 case NEON::BI__builtin_neon_vcvtp_s16_v: 6873 case NEON::BI__builtin_neon_vcvtp_s32_v: 6874 case NEON::BI__builtin_neon_vcvtpq_s16_v: 6875 case NEON::BI__builtin_neon_vcvtpq_s32_v: 6876 case NEON::BI__builtin_neon_vcvtp_u16_v: 6877 case NEON::BI__builtin_neon_vcvtp_u32_v: 6878 case NEON::BI__builtin_neon_vcvtpq_u16_v: 6879 case NEON::BI__builtin_neon_vcvtpq_u32_v: 6880 case NEON::BI__builtin_neon_vcvtp_s64_v: 6881 case NEON::BI__builtin_neon_vcvtpq_s64_v: 6882 case NEON::BI__builtin_neon_vcvtp_u64_v: 6883 case NEON::BI__builtin_neon_vcvtpq_u64_v: { 6884 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps; 6885 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 6886 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp"); 6887 } 6888 case NEON::BI__builtin_neon_vmulx_v: 6889 case NEON::BI__builtin_neon_vmulxq_v: { 6890 Int = Intrinsic::aarch64_neon_fmulx; 6891 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx"); 6892 } 6893 case NEON::BI__builtin_neon_vmul_lane_v: 6894 case NEON::BI__builtin_neon_vmul_laneq_v: { 6895 // v1f64 vmul_lane should be mapped to Neon scalar mul lane 6896 bool Quad = false; 6897 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v) 6898 Quad = true; 6899 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 6900 llvm::Type *VTy = GetNeonType(this, 6901 NeonTypeFlags(NeonTypeFlags::Float64, false, Quad), Arch); 6902 Ops[1] = Builder.CreateBitCast(Ops[1], VTy); 6903 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract"); 6904 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]); 6905 return Builder.CreateBitCast(Result, Ty); 6906 } 6907 case NEON::BI__builtin_neon_vnegd_s64: 6908 return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd"); 6909 case NEON::BI__builtin_neon_vpmaxnm_v: 6910 case NEON::BI__builtin_neon_vpmaxnmq_v: { 6911 Int = Intrinsic::aarch64_neon_fmaxnmp; 6912 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm"); 6913 } 6914 case NEON::BI__builtin_neon_vpminnm_v: 6915 case NEON::BI__builtin_neon_vpminnmq_v: { 6916 Int = Intrinsic::aarch64_neon_fminnmp; 6917 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm"); 6918 } 6919 case NEON::BI__builtin_neon_vsqrt_v: 6920 case NEON::BI__builtin_neon_vsqrtq_v: { 6921 Int = Intrinsic::sqrt; 6922 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6923 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt"); 6924 } 6925 case NEON::BI__builtin_neon_vrbit_v: 6926 case NEON::BI__builtin_neon_vrbitq_v: { 6927 Int = Intrinsic::aarch64_neon_rbit; 6928 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit"); 6929 } 6930 case NEON::BI__builtin_neon_vaddv_u8: 6931 // FIXME: These are handled by the AArch64 scalar code. 6932 usgn = true; 6933 LLVM_FALLTHROUGH; 6934 case NEON::BI__builtin_neon_vaddv_s8: { 6935 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 6936 Ty = Int32Ty; 6937 VTy = llvm::VectorType::get(Int8Ty, 8); 6938 llvm::Type *Tys[2] = { Ty, VTy }; 6939 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6940 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 6941 return Builder.CreateTrunc(Ops[0], Int8Ty); 6942 } 6943 case NEON::BI__builtin_neon_vaddv_u16: 6944 usgn = true; 6945 LLVM_FALLTHROUGH; 6946 case NEON::BI__builtin_neon_vaddv_s16: { 6947 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 6948 Ty = Int32Ty; 6949 VTy = llvm::VectorType::get(Int16Ty, 4); 6950 llvm::Type *Tys[2] = { Ty, VTy }; 6951 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6952 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 6953 return Builder.CreateTrunc(Ops[0], Int16Ty); 6954 } 6955 case NEON::BI__builtin_neon_vaddvq_u8: 6956 usgn = true; 6957 LLVM_FALLTHROUGH; 6958 case NEON::BI__builtin_neon_vaddvq_s8: { 6959 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 6960 Ty = Int32Ty; 6961 VTy = llvm::VectorType::get(Int8Ty, 16); 6962 llvm::Type *Tys[2] = { Ty, VTy }; 6963 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6964 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 6965 return Builder.CreateTrunc(Ops[0], Int8Ty); 6966 } 6967 case NEON::BI__builtin_neon_vaddvq_u16: 6968 usgn = true; 6969 LLVM_FALLTHROUGH; 6970 case NEON::BI__builtin_neon_vaddvq_s16: { 6971 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 6972 Ty = Int32Ty; 6973 VTy = llvm::VectorType::get(Int16Ty, 8); 6974 llvm::Type *Tys[2] = { Ty, VTy }; 6975 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6976 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 6977 return Builder.CreateTrunc(Ops[0], Int16Ty); 6978 } 6979 case NEON::BI__builtin_neon_vmaxv_u8: { 6980 Int = Intrinsic::aarch64_neon_umaxv; 6981 Ty = Int32Ty; 6982 VTy = llvm::VectorType::get(Int8Ty, 8); 6983 llvm::Type *Tys[2] = { Ty, VTy }; 6984 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6985 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6986 return Builder.CreateTrunc(Ops[0], Int8Ty); 6987 } 6988 case NEON::BI__builtin_neon_vmaxv_u16: { 6989 Int = Intrinsic::aarch64_neon_umaxv; 6990 Ty = Int32Ty; 6991 VTy = llvm::VectorType::get(Int16Ty, 4); 6992 llvm::Type *Tys[2] = { Ty, VTy }; 6993 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6994 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6995 return Builder.CreateTrunc(Ops[0], Int16Ty); 6996 } 6997 case NEON::BI__builtin_neon_vmaxvq_u8: { 6998 Int = Intrinsic::aarch64_neon_umaxv; 6999 Ty = Int32Ty; 7000 VTy = llvm::VectorType::get(Int8Ty, 16); 7001 llvm::Type *Tys[2] = { Ty, VTy }; 7002 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7003 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 7004 return Builder.CreateTrunc(Ops[0], Int8Ty); 7005 } 7006 case NEON::BI__builtin_neon_vmaxvq_u16: { 7007 Int = Intrinsic::aarch64_neon_umaxv; 7008 Ty = Int32Ty; 7009 VTy = llvm::VectorType::get(Int16Ty, 8); 7010 llvm::Type *Tys[2] = { Ty, VTy }; 7011 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7012 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 7013 return Builder.CreateTrunc(Ops[0], Int16Ty); 7014 } 7015 case NEON::BI__builtin_neon_vmaxv_s8: { 7016 Int = Intrinsic::aarch64_neon_smaxv; 7017 Ty = Int32Ty; 7018 VTy = llvm::VectorType::get(Int8Ty, 8); 7019 llvm::Type *Tys[2] = { Ty, VTy }; 7020 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7021 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 7022 return Builder.CreateTrunc(Ops[0], Int8Ty); 7023 } 7024 case NEON::BI__builtin_neon_vmaxv_s16: { 7025 Int = Intrinsic::aarch64_neon_smaxv; 7026 Ty = Int32Ty; 7027 VTy = llvm::VectorType::get(Int16Ty, 4); 7028 llvm::Type *Tys[2] = { Ty, VTy }; 7029 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7030 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 7031 return Builder.CreateTrunc(Ops[0], Int16Ty); 7032 } 7033 case NEON::BI__builtin_neon_vmaxvq_s8: { 7034 Int = Intrinsic::aarch64_neon_smaxv; 7035 Ty = Int32Ty; 7036 VTy = llvm::VectorType::get(Int8Ty, 16); 7037 llvm::Type *Tys[2] = { Ty, VTy }; 7038 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7039 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 7040 return Builder.CreateTrunc(Ops[0], Int8Ty); 7041 } 7042 case NEON::BI__builtin_neon_vmaxvq_s16: { 7043 Int = Intrinsic::aarch64_neon_smaxv; 7044 Ty = Int32Ty; 7045 VTy = llvm::VectorType::get(Int16Ty, 8); 7046 llvm::Type *Tys[2] = { Ty, VTy }; 7047 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7048 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 7049 return Builder.CreateTrunc(Ops[0], Int16Ty); 7050 } 7051 case NEON::BI__builtin_neon_vmaxv_f16: { 7052 Int = Intrinsic::aarch64_neon_fmaxv; 7053 Ty = HalfTy; 7054 VTy = llvm::VectorType::get(HalfTy, 4); 7055 llvm::Type *Tys[2] = { Ty, VTy }; 7056 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7057 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 7058 return Builder.CreateTrunc(Ops[0], HalfTy); 7059 } 7060 case NEON::BI__builtin_neon_vmaxvq_f16: { 7061 Int = Intrinsic::aarch64_neon_fmaxv; 7062 Ty = HalfTy; 7063 VTy = llvm::VectorType::get(HalfTy, 8); 7064 llvm::Type *Tys[2] = { Ty, VTy }; 7065 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7066 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 7067 return Builder.CreateTrunc(Ops[0], HalfTy); 7068 } 7069 case NEON::BI__builtin_neon_vminv_u8: { 7070 Int = Intrinsic::aarch64_neon_uminv; 7071 Ty = Int32Ty; 7072 VTy = llvm::VectorType::get(Int8Ty, 8); 7073 llvm::Type *Tys[2] = { Ty, VTy }; 7074 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7075 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 7076 return Builder.CreateTrunc(Ops[0], Int8Ty); 7077 } 7078 case NEON::BI__builtin_neon_vminv_u16: { 7079 Int = Intrinsic::aarch64_neon_uminv; 7080 Ty = Int32Ty; 7081 VTy = llvm::VectorType::get(Int16Ty, 4); 7082 llvm::Type *Tys[2] = { Ty, VTy }; 7083 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7084 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 7085 return Builder.CreateTrunc(Ops[0], Int16Ty); 7086 } 7087 case NEON::BI__builtin_neon_vminvq_u8: { 7088 Int = Intrinsic::aarch64_neon_uminv; 7089 Ty = Int32Ty; 7090 VTy = llvm::VectorType::get(Int8Ty, 16); 7091 llvm::Type *Tys[2] = { Ty, VTy }; 7092 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7093 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 7094 return Builder.CreateTrunc(Ops[0], Int8Ty); 7095 } 7096 case NEON::BI__builtin_neon_vminvq_u16: { 7097 Int = Intrinsic::aarch64_neon_uminv; 7098 Ty = Int32Ty; 7099 VTy = llvm::VectorType::get(Int16Ty, 8); 7100 llvm::Type *Tys[2] = { Ty, VTy }; 7101 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7102 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 7103 return Builder.CreateTrunc(Ops[0], Int16Ty); 7104 } 7105 case NEON::BI__builtin_neon_vminv_s8: { 7106 Int = Intrinsic::aarch64_neon_sminv; 7107 Ty = Int32Ty; 7108 VTy = llvm::VectorType::get(Int8Ty, 8); 7109 llvm::Type *Tys[2] = { Ty, VTy }; 7110 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7111 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 7112 return Builder.CreateTrunc(Ops[0], Int8Ty); 7113 } 7114 case NEON::BI__builtin_neon_vminv_s16: { 7115 Int = Intrinsic::aarch64_neon_sminv; 7116 Ty = Int32Ty; 7117 VTy = llvm::VectorType::get(Int16Ty, 4); 7118 llvm::Type *Tys[2] = { Ty, VTy }; 7119 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7120 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 7121 return Builder.CreateTrunc(Ops[0], Int16Ty); 7122 } 7123 case NEON::BI__builtin_neon_vminvq_s8: { 7124 Int = Intrinsic::aarch64_neon_sminv; 7125 Ty = Int32Ty; 7126 VTy = llvm::VectorType::get(Int8Ty, 16); 7127 llvm::Type *Tys[2] = { Ty, VTy }; 7128 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7129 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 7130 return Builder.CreateTrunc(Ops[0], Int8Ty); 7131 } 7132 case NEON::BI__builtin_neon_vminvq_s16: { 7133 Int = Intrinsic::aarch64_neon_sminv; 7134 Ty = Int32Ty; 7135 VTy = llvm::VectorType::get(Int16Ty, 8); 7136 llvm::Type *Tys[2] = { Ty, VTy }; 7137 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7138 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 7139 return Builder.CreateTrunc(Ops[0], Int16Ty); 7140 } 7141 case NEON::BI__builtin_neon_vminv_f16: { 7142 Int = Intrinsic::aarch64_neon_fminv; 7143 Ty = HalfTy; 7144 VTy = llvm::VectorType::get(HalfTy, 4); 7145 llvm::Type *Tys[2] = { Ty, VTy }; 7146 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7147 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 7148 return Builder.CreateTrunc(Ops[0], HalfTy); 7149 } 7150 case NEON::BI__builtin_neon_vminvq_f16: { 7151 Int = Intrinsic::aarch64_neon_fminv; 7152 Ty = HalfTy; 7153 VTy = llvm::VectorType::get(HalfTy, 8); 7154 llvm::Type *Tys[2] = { Ty, VTy }; 7155 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7156 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 7157 return Builder.CreateTrunc(Ops[0], HalfTy); 7158 } 7159 case NEON::BI__builtin_neon_vmaxnmv_f16: { 7160 Int = Intrinsic::aarch64_neon_fmaxnmv; 7161 Ty = HalfTy; 7162 VTy = llvm::VectorType::get(HalfTy, 4); 7163 llvm::Type *Tys[2] = { Ty, VTy }; 7164 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7165 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv"); 7166 return Builder.CreateTrunc(Ops[0], HalfTy); 7167 } 7168 case NEON::BI__builtin_neon_vmaxnmvq_f16: { 7169 Int = Intrinsic::aarch64_neon_fmaxnmv; 7170 Ty = HalfTy; 7171 VTy = llvm::VectorType::get(HalfTy, 8); 7172 llvm::Type *Tys[2] = { Ty, VTy }; 7173 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7174 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv"); 7175 return Builder.CreateTrunc(Ops[0], HalfTy); 7176 } 7177 case NEON::BI__builtin_neon_vminnmv_f16: { 7178 Int = Intrinsic::aarch64_neon_fminnmv; 7179 Ty = HalfTy; 7180 VTy = llvm::VectorType::get(HalfTy, 4); 7181 llvm::Type *Tys[2] = { Ty, VTy }; 7182 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7183 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv"); 7184 return Builder.CreateTrunc(Ops[0], HalfTy); 7185 } 7186 case NEON::BI__builtin_neon_vminnmvq_f16: { 7187 Int = Intrinsic::aarch64_neon_fminnmv; 7188 Ty = HalfTy; 7189 VTy = llvm::VectorType::get(HalfTy, 8); 7190 llvm::Type *Tys[2] = { Ty, VTy }; 7191 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7192 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv"); 7193 return Builder.CreateTrunc(Ops[0], HalfTy); 7194 } 7195 case NEON::BI__builtin_neon_vmul_n_f64: { 7196 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 7197 Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy); 7198 return Builder.CreateFMul(Ops[0], RHS); 7199 } 7200 case NEON::BI__builtin_neon_vaddlv_u8: { 7201 Int = Intrinsic::aarch64_neon_uaddlv; 7202 Ty = Int32Ty; 7203 VTy = llvm::VectorType::get(Int8Ty, 8); 7204 llvm::Type *Tys[2] = { Ty, VTy }; 7205 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7206 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 7207 return Builder.CreateTrunc(Ops[0], Int16Ty); 7208 } 7209 case NEON::BI__builtin_neon_vaddlv_u16: { 7210 Int = Intrinsic::aarch64_neon_uaddlv; 7211 Ty = Int32Ty; 7212 VTy = llvm::VectorType::get(Int16Ty, 4); 7213 llvm::Type *Tys[2] = { Ty, VTy }; 7214 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7215 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 7216 } 7217 case NEON::BI__builtin_neon_vaddlvq_u8: { 7218 Int = Intrinsic::aarch64_neon_uaddlv; 7219 Ty = Int32Ty; 7220 VTy = llvm::VectorType::get(Int8Ty, 16); 7221 llvm::Type *Tys[2] = { Ty, VTy }; 7222 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7223 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 7224 return Builder.CreateTrunc(Ops[0], Int16Ty); 7225 } 7226 case NEON::BI__builtin_neon_vaddlvq_u16: { 7227 Int = Intrinsic::aarch64_neon_uaddlv; 7228 Ty = Int32Ty; 7229 VTy = llvm::VectorType::get(Int16Ty, 8); 7230 llvm::Type *Tys[2] = { Ty, VTy }; 7231 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7232 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 7233 } 7234 case NEON::BI__builtin_neon_vaddlv_s8: { 7235 Int = Intrinsic::aarch64_neon_saddlv; 7236 Ty = Int32Ty; 7237 VTy = llvm::VectorType::get(Int8Ty, 8); 7238 llvm::Type *Tys[2] = { Ty, VTy }; 7239 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7240 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 7241 return Builder.CreateTrunc(Ops[0], Int16Ty); 7242 } 7243 case NEON::BI__builtin_neon_vaddlv_s16: { 7244 Int = Intrinsic::aarch64_neon_saddlv; 7245 Ty = Int32Ty; 7246 VTy = llvm::VectorType::get(Int16Ty, 4); 7247 llvm::Type *Tys[2] = { Ty, VTy }; 7248 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7249 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 7250 } 7251 case NEON::BI__builtin_neon_vaddlvq_s8: { 7252 Int = Intrinsic::aarch64_neon_saddlv; 7253 Ty = Int32Ty; 7254 VTy = llvm::VectorType::get(Int8Ty, 16); 7255 llvm::Type *Tys[2] = { Ty, VTy }; 7256 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7257 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 7258 return Builder.CreateTrunc(Ops[0], Int16Ty); 7259 } 7260 case NEON::BI__builtin_neon_vaddlvq_s16: { 7261 Int = Intrinsic::aarch64_neon_saddlv; 7262 Ty = Int32Ty; 7263 VTy = llvm::VectorType::get(Int16Ty, 8); 7264 llvm::Type *Tys[2] = { Ty, VTy }; 7265 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7266 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 7267 } 7268 case NEON::BI__builtin_neon_vsri_n_v: 7269 case NEON::BI__builtin_neon_vsriq_n_v: { 7270 Int = Intrinsic::aarch64_neon_vsri; 7271 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty); 7272 return EmitNeonCall(Intrin, Ops, "vsri_n"); 7273 } 7274 case NEON::BI__builtin_neon_vsli_n_v: 7275 case NEON::BI__builtin_neon_vsliq_n_v: { 7276 Int = Intrinsic::aarch64_neon_vsli; 7277 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty); 7278 return EmitNeonCall(Intrin, Ops, "vsli_n"); 7279 } 7280 case NEON::BI__builtin_neon_vsra_n_v: 7281 case NEON::BI__builtin_neon_vsraq_n_v: 7282 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 7283 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n"); 7284 return Builder.CreateAdd(Ops[0], Ops[1]); 7285 case NEON::BI__builtin_neon_vrsra_n_v: 7286 case NEON::BI__builtin_neon_vrsraq_n_v: { 7287 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl; 7288 SmallVector<llvm::Value*,2> TmpOps; 7289 TmpOps.push_back(Ops[1]); 7290 TmpOps.push_back(Ops[2]); 7291 Function* F = CGM.getIntrinsic(Int, Ty); 7292 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true); 7293 Ops[0] = Builder.CreateBitCast(Ops[0], VTy); 7294 return Builder.CreateAdd(Ops[0], tmp); 7295 } 7296 // FIXME: Sharing loads & stores with 32-bit is complicated by the absence 7297 // of an Align parameter here. 7298 case NEON::BI__builtin_neon_vld1_x2_v: 7299 case NEON::BI__builtin_neon_vld1q_x2_v: 7300 case NEON::BI__builtin_neon_vld1_x3_v: 7301 case NEON::BI__builtin_neon_vld1q_x3_v: 7302 case NEON::BI__builtin_neon_vld1_x4_v: 7303 case NEON::BI__builtin_neon_vld1q_x4_v: { 7304 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType()); 7305 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 7306 llvm::Type *Tys[2] = { VTy, PTy }; 7307 unsigned Int; 7308 switch (BuiltinID) { 7309 case NEON::BI__builtin_neon_vld1_x2_v: 7310 case NEON::BI__builtin_neon_vld1q_x2_v: 7311 Int = Intrinsic::aarch64_neon_ld1x2; 7312 break; 7313 case NEON::BI__builtin_neon_vld1_x3_v: 7314 case NEON::BI__builtin_neon_vld1q_x3_v: 7315 Int = Intrinsic::aarch64_neon_ld1x3; 7316 break; 7317 case NEON::BI__builtin_neon_vld1_x4_v: 7318 case NEON::BI__builtin_neon_vld1q_x4_v: 7319 Int = Intrinsic::aarch64_neon_ld1x4; 7320 break; 7321 } 7322 Function *F = CGM.getIntrinsic(Int, Tys); 7323 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN"); 7324 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 7325 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 7326 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 7327 } 7328 case NEON::BI__builtin_neon_vst1_x2_v: 7329 case NEON::BI__builtin_neon_vst1q_x2_v: 7330 case NEON::BI__builtin_neon_vst1_x3_v: 7331 case NEON::BI__builtin_neon_vst1q_x3_v: 7332 case NEON::BI__builtin_neon_vst1_x4_v: 7333 case NEON::BI__builtin_neon_vst1q_x4_v: { 7334 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType()); 7335 llvm::Type *Tys[2] = { VTy, PTy }; 7336 unsigned Int; 7337 switch (BuiltinID) { 7338 case NEON::BI__builtin_neon_vst1_x2_v: 7339 case NEON::BI__builtin_neon_vst1q_x2_v: 7340 Int = Intrinsic::aarch64_neon_st1x2; 7341 break; 7342 case NEON::BI__builtin_neon_vst1_x3_v: 7343 case NEON::BI__builtin_neon_vst1q_x3_v: 7344 Int = Intrinsic::aarch64_neon_st1x3; 7345 break; 7346 case NEON::BI__builtin_neon_vst1_x4_v: 7347 case NEON::BI__builtin_neon_vst1q_x4_v: 7348 Int = Intrinsic::aarch64_neon_st1x4; 7349 break; 7350 } 7351 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end()); 7352 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, ""); 7353 } 7354 case NEON::BI__builtin_neon_vld1_v: 7355 case NEON::BI__builtin_neon_vld1q_v: { 7356 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); 7357 auto Alignment = CharUnits::fromQuantity( 7358 BuiltinID == NEON::BI__builtin_neon_vld1_v ? 8 : 16); 7359 return Builder.CreateAlignedLoad(VTy, Ops[0], Alignment); 7360 } 7361 case NEON::BI__builtin_neon_vst1_v: 7362 case NEON::BI__builtin_neon_vst1q_v: 7363 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); 7364 Ops[1] = Builder.CreateBitCast(Ops[1], VTy); 7365 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 7366 case NEON::BI__builtin_neon_vld1_lane_v: 7367 case NEON::BI__builtin_neon_vld1q_lane_v: { 7368 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 7369 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 7370 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 7371 auto Alignment = CharUnits::fromQuantity( 7372 BuiltinID == NEON::BI__builtin_neon_vld1_lane_v ? 8 : 16); 7373 Ops[0] = 7374 Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment); 7375 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane"); 7376 } 7377 case NEON::BI__builtin_neon_vld1_dup_v: 7378 case NEON::BI__builtin_neon_vld1q_dup_v: { 7379 Value *V = UndefValue::get(Ty); 7380 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 7381 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 7382 auto Alignment = CharUnits::fromQuantity( 7383 BuiltinID == NEON::BI__builtin_neon_vld1_dup_v ? 8 : 16); 7384 Ops[0] = 7385 Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment); 7386 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); 7387 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI); 7388 return EmitNeonSplat(Ops[0], CI); 7389 } 7390 case NEON::BI__builtin_neon_vst1_lane_v: 7391 case NEON::BI__builtin_neon_vst1q_lane_v: 7392 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 7393 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); 7394 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 7395 return Builder.CreateDefaultAlignedStore(Ops[1], 7396 Builder.CreateBitCast(Ops[0], Ty)); 7397 case NEON::BI__builtin_neon_vld2_v: 7398 case NEON::BI__builtin_neon_vld2q_v: { 7399 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 7400 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 7401 llvm::Type *Tys[2] = { VTy, PTy }; 7402 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys); 7403 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); 7404 Ops[0] = Builder.CreateBitCast(Ops[0], 7405 llvm::PointerType::getUnqual(Ops[1]->getType())); 7406 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 7407 } 7408 case NEON::BI__builtin_neon_vld3_v: 7409 case NEON::BI__builtin_neon_vld3q_v: { 7410 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 7411 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 7412 llvm::Type *Tys[2] = { VTy, PTy }; 7413 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys); 7414 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); 7415 Ops[0] = Builder.CreateBitCast(Ops[0], 7416 llvm::PointerType::getUnqual(Ops[1]->getType())); 7417 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 7418 } 7419 case NEON::BI__builtin_neon_vld4_v: 7420 case NEON::BI__builtin_neon_vld4q_v: { 7421 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 7422 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 7423 llvm::Type *Tys[2] = { VTy, PTy }; 7424 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys); 7425 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); 7426 Ops[0] = Builder.CreateBitCast(Ops[0], 7427 llvm::PointerType::getUnqual(Ops[1]->getType())); 7428 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 7429 } 7430 case NEON::BI__builtin_neon_vld2_dup_v: 7431 case NEON::BI__builtin_neon_vld2q_dup_v: { 7432 llvm::Type *PTy = 7433 llvm::PointerType::getUnqual(VTy->getElementType()); 7434 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 7435 llvm::Type *Tys[2] = { VTy, PTy }; 7436 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys); 7437 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); 7438 Ops[0] = Builder.CreateBitCast(Ops[0], 7439 llvm::PointerType::getUnqual(Ops[1]->getType())); 7440 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 7441 } 7442 case NEON::BI__builtin_neon_vld3_dup_v: 7443 case NEON::BI__builtin_neon_vld3q_dup_v: { 7444 llvm::Type *PTy = 7445 llvm::PointerType::getUnqual(VTy->getElementType()); 7446 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 7447 llvm::Type *Tys[2] = { VTy, PTy }; 7448 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys); 7449 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); 7450 Ops[0] = Builder.CreateBitCast(Ops[0], 7451 llvm::PointerType::getUnqual(Ops[1]->getType())); 7452 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 7453 } 7454 case NEON::BI__builtin_neon_vld4_dup_v: 7455 case NEON::BI__builtin_neon_vld4q_dup_v: { 7456 llvm::Type *PTy = 7457 llvm::PointerType::getUnqual(VTy->getElementType()); 7458 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 7459 llvm::Type *Tys[2] = { VTy, PTy }; 7460 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys); 7461 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); 7462 Ops[0] = Builder.CreateBitCast(Ops[0], 7463 llvm::PointerType::getUnqual(Ops[1]->getType())); 7464 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 7465 } 7466 case NEON::BI__builtin_neon_vld2_lane_v: 7467 case NEON::BI__builtin_neon_vld2q_lane_v: { 7468 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 7469 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys); 7470 Ops.push_back(Ops[1]); 7471 Ops.erase(Ops.begin()+1); 7472 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 7473 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 7474 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty); 7475 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane"); 7476 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 7477 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 7478 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 7479 } 7480 case NEON::BI__builtin_neon_vld3_lane_v: 7481 case NEON::BI__builtin_neon_vld3q_lane_v: { 7482 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 7483 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys); 7484 Ops.push_back(Ops[1]); 7485 Ops.erase(Ops.begin()+1); 7486 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 7487 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 7488 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 7489 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty); 7490 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane"); 7491 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 7492 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 7493 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 7494 } 7495 case NEON::BI__builtin_neon_vld4_lane_v: 7496 case NEON::BI__builtin_neon_vld4q_lane_v: { 7497 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 7498 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys); 7499 Ops.push_back(Ops[1]); 7500 Ops.erase(Ops.begin()+1); 7501 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 7502 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 7503 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 7504 Ops[4] = Builder.CreateBitCast(Ops[4], Ty); 7505 Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty); 7506 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane"); 7507 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 7508 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 7509 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 7510 } 7511 case NEON::BI__builtin_neon_vst2_v: 7512 case NEON::BI__builtin_neon_vst2q_v: { 7513 Ops.push_back(Ops[0]); 7514 Ops.erase(Ops.begin()); 7515 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() }; 7516 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys), 7517 Ops, ""); 7518 } 7519 case NEON::BI__builtin_neon_vst2_lane_v: 7520 case NEON::BI__builtin_neon_vst2q_lane_v: { 7521 Ops.push_back(Ops[0]); 7522 Ops.erase(Ops.begin()); 7523 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty); 7524 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() }; 7525 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys), 7526 Ops, ""); 7527 } 7528 case NEON::BI__builtin_neon_vst3_v: 7529 case NEON::BI__builtin_neon_vst3q_v: { 7530 Ops.push_back(Ops[0]); 7531 Ops.erase(Ops.begin()); 7532 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() }; 7533 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys), 7534 Ops, ""); 7535 } 7536 case NEON::BI__builtin_neon_vst3_lane_v: 7537 case NEON::BI__builtin_neon_vst3q_lane_v: { 7538 Ops.push_back(Ops[0]); 7539 Ops.erase(Ops.begin()); 7540 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty); 7541 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() }; 7542 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys), 7543 Ops, ""); 7544 } 7545 case NEON::BI__builtin_neon_vst4_v: 7546 case NEON::BI__builtin_neon_vst4q_v: { 7547 Ops.push_back(Ops[0]); 7548 Ops.erase(Ops.begin()); 7549 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() }; 7550 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys), 7551 Ops, ""); 7552 } 7553 case NEON::BI__builtin_neon_vst4_lane_v: 7554 case NEON::BI__builtin_neon_vst4q_lane_v: { 7555 Ops.push_back(Ops[0]); 7556 Ops.erase(Ops.begin()); 7557 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty); 7558 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() }; 7559 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys), 7560 Ops, ""); 7561 } 7562 case NEON::BI__builtin_neon_vtrn_v: 7563 case NEON::BI__builtin_neon_vtrnq_v: { 7564 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 7565 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 7566 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 7567 Value *SV = nullptr; 7568 7569 for (unsigned vi = 0; vi != 2; ++vi) { 7570 SmallVector<uint32_t, 16> Indices; 7571 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 7572 Indices.push_back(i+vi); 7573 Indices.push_back(i+e+vi); 7574 } 7575 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 7576 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn"); 7577 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 7578 } 7579 return SV; 7580 } 7581 case NEON::BI__builtin_neon_vuzp_v: 7582 case NEON::BI__builtin_neon_vuzpq_v: { 7583 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 7584 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 7585 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 7586 Value *SV = nullptr; 7587 7588 for (unsigned vi = 0; vi != 2; ++vi) { 7589 SmallVector<uint32_t, 16> Indices; 7590 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 7591 Indices.push_back(2*i+vi); 7592 7593 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 7594 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp"); 7595 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 7596 } 7597 return SV; 7598 } 7599 case NEON::BI__builtin_neon_vzip_v: 7600 case NEON::BI__builtin_neon_vzipq_v: { 7601 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 7602 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 7603 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 7604 Value *SV = nullptr; 7605 7606 for (unsigned vi = 0; vi != 2; ++vi) { 7607 SmallVector<uint32_t, 16> Indices; 7608 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 7609 Indices.push_back((i + vi*e) >> 1); 7610 Indices.push_back(((i + vi*e) >> 1)+e); 7611 } 7612 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 7613 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip"); 7614 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 7615 } 7616 return SV; 7617 } 7618 case NEON::BI__builtin_neon_vqtbl1q_v: { 7619 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty), 7620 Ops, "vtbl1"); 7621 } 7622 case NEON::BI__builtin_neon_vqtbl2q_v: { 7623 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty), 7624 Ops, "vtbl2"); 7625 } 7626 case NEON::BI__builtin_neon_vqtbl3q_v: { 7627 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty), 7628 Ops, "vtbl3"); 7629 } 7630 case NEON::BI__builtin_neon_vqtbl4q_v: { 7631 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty), 7632 Ops, "vtbl4"); 7633 } 7634 case NEON::BI__builtin_neon_vqtbx1q_v: { 7635 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty), 7636 Ops, "vtbx1"); 7637 } 7638 case NEON::BI__builtin_neon_vqtbx2q_v: { 7639 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty), 7640 Ops, "vtbx2"); 7641 } 7642 case NEON::BI__builtin_neon_vqtbx3q_v: { 7643 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty), 7644 Ops, "vtbx3"); 7645 } 7646 case NEON::BI__builtin_neon_vqtbx4q_v: { 7647 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty), 7648 Ops, "vtbx4"); 7649 } 7650 case NEON::BI__builtin_neon_vsqadd_v: 7651 case NEON::BI__builtin_neon_vsqaddq_v: { 7652 Int = Intrinsic::aarch64_neon_usqadd; 7653 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd"); 7654 } 7655 case NEON::BI__builtin_neon_vuqadd_v: 7656 case NEON::BI__builtin_neon_vuqaddq_v: { 7657 Int = Intrinsic::aarch64_neon_suqadd; 7658 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd"); 7659 } 7660 } 7661 } 7662 7663 llvm::Value *CodeGenFunction:: 7664 BuildVector(ArrayRef<llvm::Value*> Ops) { 7665 assert((Ops.size() & (Ops.size() - 1)) == 0 && 7666 "Not a power-of-two sized vector!"); 7667 bool AllConstants = true; 7668 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i) 7669 AllConstants &= isa<Constant>(Ops[i]); 7670 7671 // If this is a constant vector, create a ConstantVector. 7672 if (AllConstants) { 7673 SmallVector<llvm::Constant*, 16> CstOps; 7674 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 7675 CstOps.push_back(cast<Constant>(Ops[i])); 7676 return llvm::ConstantVector::get(CstOps); 7677 } 7678 7679 // Otherwise, insertelement the values to build the vector. 7680 Value *Result = 7681 llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size())); 7682 7683 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 7684 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i)); 7685 7686 return Result; 7687 } 7688 7689 // Convert the mask from an integer type to a vector of i1. 7690 static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask, 7691 unsigned NumElts) { 7692 7693 llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(), 7694 cast<IntegerType>(Mask->getType())->getBitWidth()); 7695 Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy); 7696 7697 // If we have less than 8 elements, then the starting mask was an i8 and 7698 // we need to extract down to the right number of elements. 7699 if (NumElts < 8) { 7700 uint32_t Indices[4]; 7701 for (unsigned i = 0; i != NumElts; ++i) 7702 Indices[i] = i; 7703 MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec, 7704 makeArrayRef(Indices, NumElts), 7705 "extract"); 7706 } 7707 return MaskVec; 7708 } 7709 7710 static Value *EmitX86MaskedStore(CodeGenFunction &CGF, 7711 SmallVectorImpl<Value *> &Ops, 7712 unsigned Align) { 7713 // Cast the pointer to right type. 7714 Ops[0] = CGF.Builder.CreateBitCast(Ops[0], 7715 llvm::PointerType::getUnqual(Ops[1]->getType())); 7716 7717 // If the mask is all ones just emit a regular store. 7718 if (const auto *C = dyn_cast<Constant>(Ops[2])) 7719 if (C->isAllOnesValue()) 7720 return CGF.Builder.CreateAlignedStore(Ops[1], Ops[0], Align); 7721 7722 Value *MaskVec = getMaskVecValue(CGF, Ops[2], 7723 Ops[1]->getType()->getVectorNumElements()); 7724 7725 return CGF.Builder.CreateMaskedStore(Ops[1], Ops[0], Align, MaskVec); 7726 } 7727 7728 static Value *EmitX86MaskedLoad(CodeGenFunction &CGF, 7729 SmallVectorImpl<Value *> &Ops, unsigned Align) { 7730 // Cast the pointer to right type. 7731 Ops[0] = CGF.Builder.CreateBitCast(Ops[0], 7732 llvm::PointerType::getUnqual(Ops[1]->getType())); 7733 7734 // If the mask is all ones just emit a regular store. 7735 if (const auto *C = dyn_cast<Constant>(Ops[2])) 7736 if (C->isAllOnesValue()) 7737 return CGF.Builder.CreateAlignedLoad(Ops[0], Align); 7738 7739 Value *MaskVec = getMaskVecValue(CGF, Ops[2], 7740 Ops[1]->getType()->getVectorNumElements()); 7741 7742 return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]); 7743 } 7744 7745 static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc, 7746 unsigned NumElts, SmallVectorImpl<Value *> &Ops, 7747 bool InvertLHS = false) { 7748 Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts); 7749 Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts); 7750 7751 if (InvertLHS) 7752 LHS = CGF.Builder.CreateNot(LHS); 7753 7754 return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS), 7755 CGF.Builder.getIntNTy(std::max(NumElts, 8U))); 7756 } 7757 7758 static Value *EmitX86SubVectorBroadcast(CodeGenFunction &CGF, 7759 SmallVectorImpl<Value *> &Ops, 7760 llvm::Type *DstTy, 7761 unsigned SrcSizeInBits, 7762 unsigned Align) { 7763 // Load the subvector. 7764 Ops[0] = CGF.Builder.CreateAlignedLoad(Ops[0], Align); 7765 7766 // Create broadcast mask. 7767 unsigned NumDstElts = DstTy->getVectorNumElements(); 7768 unsigned NumSrcElts = SrcSizeInBits / DstTy->getScalarSizeInBits(); 7769 7770 SmallVector<uint32_t, 8> Mask; 7771 for (unsigned i = 0; i != NumDstElts; i += NumSrcElts) 7772 for (unsigned j = 0; j != NumSrcElts; ++j) 7773 Mask.push_back(j); 7774 7775 return CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], Mask, "subvecbcst"); 7776 } 7777 7778 static Value *EmitX86Select(CodeGenFunction &CGF, 7779 Value *Mask, Value *Op0, Value *Op1) { 7780 7781 // If the mask is all ones just return first argument. 7782 if (const auto *C = dyn_cast<Constant>(Mask)) 7783 if (C->isAllOnesValue()) 7784 return Op0; 7785 7786 Mask = getMaskVecValue(CGF, Mask, Op0->getType()->getVectorNumElements()); 7787 7788 return CGF.Builder.CreateSelect(Mask, Op0, Op1); 7789 } 7790 7791 static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, 7792 bool Signed, SmallVectorImpl<Value *> &Ops) { 7793 unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); 7794 Value *Cmp; 7795 7796 if (CC == 3) { 7797 Cmp = Constant::getNullValue( 7798 llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts)); 7799 } else if (CC == 7) { 7800 Cmp = Constant::getAllOnesValue( 7801 llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts)); 7802 } else { 7803 ICmpInst::Predicate Pred; 7804 switch (CC) { 7805 default: llvm_unreachable("Unknown condition code"); 7806 case 0: Pred = ICmpInst::ICMP_EQ; break; 7807 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break; 7808 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break; 7809 case 4: Pred = ICmpInst::ICMP_NE; break; 7810 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break; 7811 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break; 7812 } 7813 Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]); 7814 } 7815 7816 const auto *C = dyn_cast<Constant>(Ops.back()); 7817 if (!C || !C->isAllOnesValue()) 7818 Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops.back(), NumElts)); 7819 7820 if (NumElts < 8) { 7821 uint32_t Indices[8]; 7822 for (unsigned i = 0; i != NumElts; ++i) 7823 Indices[i] = i; 7824 for (unsigned i = NumElts; i != 8; ++i) 7825 Indices[i] = i % NumElts + NumElts; 7826 Cmp = CGF.Builder.CreateShuffleVector( 7827 Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices); 7828 } 7829 return CGF.Builder.CreateBitCast(Cmp, 7830 IntegerType::get(CGF.getLLVMContext(), 7831 std::max(NumElts, 8U))); 7832 } 7833 7834 static Value *EmitX86Abs(CodeGenFunction &CGF, ArrayRef<Value *> Ops) { 7835 7836 llvm::Type *Ty = Ops[0]->getType(); 7837 Value *Zero = llvm::Constant::getNullValue(Ty); 7838 Value *Sub = CGF.Builder.CreateSub(Zero, Ops[0]); 7839 Value *Cmp = CGF.Builder.CreateICmp(ICmpInst::ICMP_SGT, Ops[0], Zero); 7840 Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Sub); 7841 if (Ops.size() == 1) 7842 return Res; 7843 return EmitX86Select(CGF, Ops[2], Res, Ops[1]); 7844 } 7845 7846 static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred, 7847 ArrayRef<Value *> Ops) { 7848 Value *Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]); 7849 Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Ops[1]); 7850 7851 if (Ops.size() == 2) 7852 return Res; 7853 7854 assert(Ops.size() == 4); 7855 return EmitX86Select(CGF, Ops[3], Res, Ops[2]); 7856 } 7857 7858 static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, 7859 llvm::Type *DstTy) { 7860 unsigned NumberOfElements = DstTy->getVectorNumElements(); 7861 Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements); 7862 return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2"); 7863 } 7864 7865 Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) { 7866 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts(); 7867 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString(); 7868 return EmitX86CpuIs(CPUStr); 7869 } 7870 7871 Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) { 7872 7873 llvm::Type *Int32Ty = Builder.getInt32Ty(); 7874 7875 // Matching the struct layout from the compiler-rt/libgcc structure that is 7876 // filled in: 7877 // unsigned int __cpu_vendor; 7878 // unsigned int __cpu_type; 7879 // unsigned int __cpu_subtype; 7880 // unsigned int __cpu_features[1]; 7881 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, 7882 llvm::ArrayType::get(Int32Ty, 1)); 7883 7884 // Grab the global __cpu_model. 7885 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model"); 7886 7887 // Calculate the index needed to access the correct field based on the 7888 // range. Also adjust the expected value. 7889 unsigned Index; 7890 unsigned Value; 7891 std::tie(Index, Value) = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr) 7892 #define X86_VENDOR(ENUM, STRING) \ 7893 .Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)}) 7894 #define X86_CPU_TYPE_COMPAT_WITH_ALIAS(ARCHNAME, ENUM, STR, ALIAS) \ 7895 .Cases(STR, ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)}) 7896 #define X86_CPU_TYPE_COMPAT(ARCHNAME, ENUM, STR) \ 7897 .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)}) 7898 #define X86_CPU_SUBTYPE_COMPAT(ARCHNAME, ENUM, STR) \ 7899 .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)}) 7900 #include "llvm/Support/X86TargetParser.def" 7901 .Default({0, 0}); 7902 assert(Value != 0 && "Invalid CPUStr passed to CpuIs"); 7903 7904 // Grab the appropriate field from __cpu_model. 7905 llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0), 7906 ConstantInt::get(Int32Ty, Index)}; 7907 llvm::Value *CpuValue = Builder.CreateGEP(STy, CpuModel, Idxs); 7908 CpuValue = Builder.CreateAlignedLoad(CpuValue, CharUnits::fromQuantity(4)); 7909 7910 // Check the value of the field against the requested value. 7911 return Builder.CreateICmpEQ(CpuValue, 7912 llvm::ConstantInt::get(Int32Ty, Value)); 7913 } 7914 7915 Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) { 7916 const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts(); 7917 StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString(); 7918 return EmitX86CpuSupports(FeatureStr); 7919 } 7920 7921 Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) { 7922 // Processor features and mapping to processor feature value. 7923 7924 uint32_t FeaturesMask = 0; 7925 7926 for (const StringRef &FeatureStr : FeatureStrs) { 7927 unsigned Feature = 7928 StringSwitch<unsigned>(FeatureStr) 7929 #define X86_FEATURE_COMPAT(VAL, ENUM, STR) .Case(STR, VAL) 7930 #include "llvm/Support/X86TargetParser.def" 7931 ; 7932 FeaturesMask |= (1U << Feature); 7933 } 7934 7935 // Matching the struct layout from the compiler-rt/libgcc structure that is 7936 // filled in: 7937 // unsigned int __cpu_vendor; 7938 // unsigned int __cpu_type; 7939 // unsigned int __cpu_subtype; 7940 // unsigned int __cpu_features[1]; 7941 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, 7942 llvm::ArrayType::get(Int32Ty, 1)); 7943 7944 // Grab the global __cpu_model. 7945 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model"); 7946 7947 // Grab the first (0th) element from the field __cpu_features off of the 7948 // global in the struct STy. 7949 Value *Idxs[] = {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 3), 7950 ConstantInt::get(Int32Ty, 0)}; 7951 Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs); 7952 Value *Features = 7953 Builder.CreateAlignedLoad(CpuFeatures, CharUnits::fromQuantity(4)); 7954 7955 // Check the value of the bit corresponding to the feature requested. 7956 Value *Bitset = Builder.CreateAnd( 7957 Features, llvm::ConstantInt::get(Int32Ty, FeaturesMask)); 7958 return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0)); 7959 } 7960 7961 Value *CodeGenFunction::EmitX86CpuInit() { 7962 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, 7963 /*Variadic*/ false); 7964 llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init"); 7965 return Builder.CreateCall(Func); 7966 } 7967 7968 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, 7969 const CallExpr *E) { 7970 if (BuiltinID == X86::BI__builtin_cpu_is) 7971 return EmitX86CpuIs(E); 7972 if (BuiltinID == X86::BI__builtin_cpu_supports) 7973 return EmitX86CpuSupports(E); 7974 if (BuiltinID == X86::BI__builtin_cpu_init) 7975 return EmitX86CpuInit(); 7976 7977 SmallVector<Value*, 4> Ops; 7978 7979 // Find out if any arguments are required to be integer constant expressions. 7980 unsigned ICEArguments = 0; 7981 ASTContext::GetBuiltinTypeError Error; 7982 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 7983 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 7984 7985 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { 7986 // If this is a normal argument, just emit it as a scalar. 7987 if ((ICEArguments & (1 << i)) == 0) { 7988 Ops.push_back(EmitScalarExpr(E->getArg(i))); 7989 continue; 7990 } 7991 7992 // If this is required to be a constant, constant fold it so that we know 7993 // that the generated intrinsic gets a ConstantInt. 7994 llvm::APSInt Result; 7995 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); 7996 assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst; 7997 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); 7998 } 7999 8000 // These exist so that the builtin that takes an immediate can be bounds 8001 // checked by clang to avoid passing bad immediates to the backend. Since 8002 // AVX has a larger immediate than SSE we would need separate builtins to 8003 // do the different bounds checking. Rather than create a clang specific 8004 // SSE only builtin, this implements eight separate builtins to match gcc 8005 // implementation. 8006 auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) { 8007 Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm)); 8008 llvm::Function *F = CGM.getIntrinsic(ID); 8009 return Builder.CreateCall(F, Ops); 8010 }; 8011 8012 // For the vector forms of FP comparisons, translate the builtins directly to 8013 // IR. 8014 // TODO: The builtins could be removed if the SSE header files used vector 8015 // extension comparisons directly (vector ordered/unordered may need 8016 // additional support via __builtin_isnan()). 8017 auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred) { 8018 Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]); 8019 llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType()); 8020 llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy); 8021 Value *Sext = Builder.CreateSExt(Cmp, IntVecTy); 8022 return Builder.CreateBitCast(Sext, FPVecTy); 8023 }; 8024 8025 switch (BuiltinID) { 8026 default: return nullptr; 8027 case X86::BI_mm_prefetch: { 8028 Value *Address = Ops[0]; 8029 ConstantInt *C = cast<ConstantInt>(Ops[1]); 8030 Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1); 8031 Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3); 8032 Value *Data = ConstantInt::get(Int32Ty, 1); 8033 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 8034 return Builder.CreateCall(F, {Address, RW, Locality, Data}); 8035 } 8036 case X86::BI_mm_clflush: { 8037 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush), 8038 Ops[0]); 8039 } 8040 case X86::BI_mm_lfence: { 8041 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence)); 8042 } 8043 case X86::BI_mm_mfence: { 8044 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence)); 8045 } 8046 case X86::BI_mm_sfence: { 8047 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence)); 8048 } 8049 case X86::BI_mm_pause: { 8050 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause)); 8051 } 8052 case X86::BI__rdtsc: { 8053 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc)); 8054 } 8055 case X86::BI__builtin_ia32_undef128: 8056 case X86::BI__builtin_ia32_undef256: 8057 case X86::BI__builtin_ia32_undef512: 8058 // The x86 definition of "undef" is not the same as the LLVM definition 8059 // (PR32176). We leave optimizing away an unnecessary zero constant to the 8060 // IR optimizer and backend. 8061 // TODO: If we had a "freeze" IR instruction to generate a fixed undef 8062 // value, we should use that here instead of a zero. 8063 return llvm::Constant::getNullValue(ConvertType(E->getType())); 8064 case X86::BI__builtin_ia32_vec_init_v8qi: 8065 case X86::BI__builtin_ia32_vec_init_v4hi: 8066 case X86::BI__builtin_ia32_vec_init_v2si: 8067 return Builder.CreateBitCast(BuildVector(Ops), 8068 llvm::Type::getX86_MMXTy(getLLVMContext())); 8069 case X86::BI__builtin_ia32_vec_ext_v2si: 8070 return Builder.CreateExtractElement(Ops[0], 8071 llvm::ConstantInt::get(Ops[1]->getType(), 0)); 8072 case X86::BI_mm_setcsr: 8073 case X86::BI__builtin_ia32_ldmxcsr: { 8074 Address Tmp = CreateMemTemp(E->getArg(0)->getType()); 8075 Builder.CreateStore(Ops[0], Tmp); 8076 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr), 8077 Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy)); 8078 } 8079 case X86::BI_mm_getcsr: 8080 case X86::BI__builtin_ia32_stmxcsr: { 8081 Address Tmp = CreateMemTemp(E->getType()); 8082 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr), 8083 Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy)); 8084 return Builder.CreateLoad(Tmp, "stmxcsr"); 8085 } 8086 case X86::BI__builtin_ia32_xsave: 8087 case X86::BI__builtin_ia32_xsave64: 8088 case X86::BI__builtin_ia32_xrstor: 8089 case X86::BI__builtin_ia32_xrstor64: 8090 case X86::BI__builtin_ia32_xsaveopt: 8091 case X86::BI__builtin_ia32_xsaveopt64: 8092 case X86::BI__builtin_ia32_xrstors: 8093 case X86::BI__builtin_ia32_xrstors64: 8094 case X86::BI__builtin_ia32_xsavec: 8095 case X86::BI__builtin_ia32_xsavec64: 8096 case X86::BI__builtin_ia32_xsaves: 8097 case X86::BI__builtin_ia32_xsaves64: { 8098 Intrinsic::ID ID; 8099 #define INTRINSIC_X86_XSAVE_ID(NAME) \ 8100 case X86::BI__builtin_ia32_##NAME: \ 8101 ID = Intrinsic::x86_##NAME; \ 8102 break 8103 switch (BuiltinID) { 8104 default: llvm_unreachable("Unsupported intrinsic!"); 8105 INTRINSIC_X86_XSAVE_ID(xsave); 8106 INTRINSIC_X86_XSAVE_ID(xsave64); 8107 INTRINSIC_X86_XSAVE_ID(xrstor); 8108 INTRINSIC_X86_XSAVE_ID(xrstor64); 8109 INTRINSIC_X86_XSAVE_ID(xsaveopt); 8110 INTRINSIC_X86_XSAVE_ID(xsaveopt64); 8111 INTRINSIC_X86_XSAVE_ID(xrstors); 8112 INTRINSIC_X86_XSAVE_ID(xrstors64); 8113 INTRINSIC_X86_XSAVE_ID(xsavec); 8114 INTRINSIC_X86_XSAVE_ID(xsavec64); 8115 INTRINSIC_X86_XSAVE_ID(xsaves); 8116 INTRINSIC_X86_XSAVE_ID(xsaves64); 8117 } 8118 #undef INTRINSIC_X86_XSAVE_ID 8119 Value *Mhi = Builder.CreateTrunc( 8120 Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty); 8121 Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty); 8122 Ops[1] = Mhi; 8123 Ops.push_back(Mlo); 8124 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); 8125 } 8126 case X86::BI__builtin_ia32_storedqudi128_mask: 8127 case X86::BI__builtin_ia32_storedqusi128_mask: 8128 case X86::BI__builtin_ia32_storedquhi128_mask: 8129 case X86::BI__builtin_ia32_storedquqi128_mask: 8130 case X86::BI__builtin_ia32_storeupd128_mask: 8131 case X86::BI__builtin_ia32_storeups128_mask: 8132 case X86::BI__builtin_ia32_storedqudi256_mask: 8133 case X86::BI__builtin_ia32_storedqusi256_mask: 8134 case X86::BI__builtin_ia32_storedquhi256_mask: 8135 case X86::BI__builtin_ia32_storedquqi256_mask: 8136 case X86::BI__builtin_ia32_storeupd256_mask: 8137 case X86::BI__builtin_ia32_storeups256_mask: 8138 case X86::BI__builtin_ia32_storedqudi512_mask: 8139 case X86::BI__builtin_ia32_storedqusi512_mask: 8140 case X86::BI__builtin_ia32_storedquhi512_mask: 8141 case X86::BI__builtin_ia32_storedquqi512_mask: 8142 case X86::BI__builtin_ia32_storeupd512_mask: 8143 case X86::BI__builtin_ia32_storeups512_mask: 8144 return EmitX86MaskedStore(*this, Ops, 1); 8145 8146 case X86::BI__builtin_ia32_storess128_mask: 8147 case X86::BI__builtin_ia32_storesd128_mask: { 8148 return EmitX86MaskedStore(*this, Ops, 16); 8149 } 8150 case X86::BI__builtin_ia32_vpopcntb_128: 8151 case X86::BI__builtin_ia32_vpopcntd_128: 8152 case X86::BI__builtin_ia32_vpopcntq_128: 8153 case X86::BI__builtin_ia32_vpopcntw_128: 8154 case X86::BI__builtin_ia32_vpopcntb_256: 8155 case X86::BI__builtin_ia32_vpopcntd_256: 8156 case X86::BI__builtin_ia32_vpopcntq_256: 8157 case X86::BI__builtin_ia32_vpopcntw_256: 8158 case X86::BI__builtin_ia32_vpopcntb_512: 8159 case X86::BI__builtin_ia32_vpopcntd_512: 8160 case X86::BI__builtin_ia32_vpopcntq_512: 8161 case X86::BI__builtin_ia32_vpopcntw_512: { 8162 llvm::Type *ResultType = ConvertType(E->getType()); 8163 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); 8164 return Builder.CreateCall(F, Ops); 8165 } 8166 case X86::BI__builtin_ia32_cvtmask2b128: 8167 case X86::BI__builtin_ia32_cvtmask2b256: 8168 case X86::BI__builtin_ia32_cvtmask2b512: 8169 case X86::BI__builtin_ia32_cvtmask2w128: 8170 case X86::BI__builtin_ia32_cvtmask2w256: 8171 case X86::BI__builtin_ia32_cvtmask2w512: 8172 case X86::BI__builtin_ia32_cvtmask2d128: 8173 case X86::BI__builtin_ia32_cvtmask2d256: 8174 case X86::BI__builtin_ia32_cvtmask2d512: 8175 case X86::BI__builtin_ia32_cvtmask2q128: 8176 case X86::BI__builtin_ia32_cvtmask2q256: 8177 case X86::BI__builtin_ia32_cvtmask2q512: 8178 return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType())); 8179 8180 case X86::BI__builtin_ia32_movdqa32store128_mask: 8181 case X86::BI__builtin_ia32_movdqa64store128_mask: 8182 case X86::BI__builtin_ia32_storeaps128_mask: 8183 case X86::BI__builtin_ia32_storeapd128_mask: 8184 case X86::BI__builtin_ia32_movdqa32store256_mask: 8185 case X86::BI__builtin_ia32_movdqa64store256_mask: 8186 case X86::BI__builtin_ia32_storeaps256_mask: 8187 case X86::BI__builtin_ia32_storeapd256_mask: 8188 case X86::BI__builtin_ia32_movdqa32store512_mask: 8189 case X86::BI__builtin_ia32_movdqa64store512_mask: 8190 case X86::BI__builtin_ia32_storeaps512_mask: 8191 case X86::BI__builtin_ia32_storeapd512_mask: { 8192 unsigned Align = 8193 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity(); 8194 return EmitX86MaskedStore(*this, Ops, Align); 8195 } 8196 case X86::BI__builtin_ia32_loadups128_mask: 8197 case X86::BI__builtin_ia32_loadups256_mask: 8198 case X86::BI__builtin_ia32_loadups512_mask: 8199 case X86::BI__builtin_ia32_loadupd128_mask: 8200 case X86::BI__builtin_ia32_loadupd256_mask: 8201 case X86::BI__builtin_ia32_loadupd512_mask: 8202 case X86::BI__builtin_ia32_loaddquqi128_mask: 8203 case X86::BI__builtin_ia32_loaddquqi256_mask: 8204 case X86::BI__builtin_ia32_loaddquqi512_mask: 8205 case X86::BI__builtin_ia32_loaddquhi128_mask: 8206 case X86::BI__builtin_ia32_loaddquhi256_mask: 8207 case X86::BI__builtin_ia32_loaddquhi512_mask: 8208 case X86::BI__builtin_ia32_loaddqusi128_mask: 8209 case X86::BI__builtin_ia32_loaddqusi256_mask: 8210 case X86::BI__builtin_ia32_loaddqusi512_mask: 8211 case X86::BI__builtin_ia32_loaddqudi128_mask: 8212 case X86::BI__builtin_ia32_loaddqudi256_mask: 8213 case X86::BI__builtin_ia32_loaddqudi512_mask: 8214 return EmitX86MaskedLoad(*this, Ops, 1); 8215 8216 case X86::BI__builtin_ia32_loadss128_mask: 8217 case X86::BI__builtin_ia32_loadsd128_mask: 8218 return EmitX86MaskedLoad(*this, Ops, 16); 8219 8220 case X86::BI__builtin_ia32_loadaps128_mask: 8221 case X86::BI__builtin_ia32_loadaps256_mask: 8222 case X86::BI__builtin_ia32_loadaps512_mask: 8223 case X86::BI__builtin_ia32_loadapd128_mask: 8224 case X86::BI__builtin_ia32_loadapd256_mask: 8225 case X86::BI__builtin_ia32_loadapd512_mask: 8226 case X86::BI__builtin_ia32_movdqa32load128_mask: 8227 case X86::BI__builtin_ia32_movdqa32load256_mask: 8228 case X86::BI__builtin_ia32_movdqa32load512_mask: 8229 case X86::BI__builtin_ia32_movdqa64load128_mask: 8230 case X86::BI__builtin_ia32_movdqa64load256_mask: 8231 case X86::BI__builtin_ia32_movdqa64load512_mask: { 8232 unsigned Align = 8233 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity(); 8234 return EmitX86MaskedLoad(*this, Ops, Align); 8235 } 8236 8237 case X86::BI__builtin_ia32_vbroadcastf128_pd256: 8238 case X86::BI__builtin_ia32_vbroadcastf128_ps256: { 8239 llvm::Type *DstTy = ConvertType(E->getType()); 8240 return EmitX86SubVectorBroadcast(*this, Ops, DstTy, 128, 1); 8241 } 8242 8243 case X86::BI__builtin_ia32_storehps: 8244 case X86::BI__builtin_ia32_storelps: { 8245 llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty); 8246 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2); 8247 8248 // cast val v2i64 8249 Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast"); 8250 8251 // extract (0, 1) 8252 unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1; 8253 llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index); 8254 Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract"); 8255 8256 // cast pointer to i64 & store 8257 Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy); 8258 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 8259 } 8260 case X86::BI__builtin_ia32_palignr128: 8261 case X86::BI__builtin_ia32_palignr256: 8262 case X86::BI__builtin_ia32_palignr512_mask: { 8263 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 8264 8265 unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); 8266 assert(NumElts % 16 == 0); 8267 8268 // If palignr is shifting the pair of vectors more than the size of two 8269 // lanes, emit zero. 8270 if (ShiftVal >= 32) 8271 return llvm::Constant::getNullValue(ConvertType(E->getType())); 8272 8273 // If palignr is shifting the pair of input vectors more than one lane, 8274 // but less than two lanes, convert to shifting in zeroes. 8275 if (ShiftVal > 16) { 8276 ShiftVal -= 16; 8277 Ops[1] = Ops[0]; 8278 Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType()); 8279 } 8280 8281 uint32_t Indices[64]; 8282 // 256-bit palignr operates on 128-bit lanes so we need to handle that 8283 for (unsigned l = 0; l != NumElts; l += 16) { 8284 for (unsigned i = 0; i != 16; ++i) { 8285 unsigned Idx = ShiftVal + i; 8286 if (Idx >= 16) 8287 Idx += NumElts - 16; // End of lane, switch operand. 8288 Indices[l + i] = Idx + l; 8289 } 8290 } 8291 8292 Value *Align = Builder.CreateShuffleVector(Ops[1], Ops[0], 8293 makeArrayRef(Indices, NumElts), 8294 "palignr"); 8295 8296 // If this isn't a masked builtin, just return the align operation. 8297 if (Ops.size() == 3) 8298 return Align; 8299 8300 return EmitX86Select(*this, Ops[4], Align, Ops[3]); 8301 } 8302 8303 case X86::BI__builtin_ia32_vperm2f128_pd256: 8304 case X86::BI__builtin_ia32_vperm2f128_ps256: 8305 case X86::BI__builtin_ia32_vperm2f128_si256: 8306 case X86::BI__builtin_ia32_permti256: { 8307 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 8308 unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); 8309 8310 // This takes a very simple approach since there are two lanes and a 8311 // shuffle can have 2 inputs. So we reserve the first input for the first 8312 // lane and the second input for the second lane. This may result in 8313 // duplicate sources, but this can be dealt with in the backend. 8314 8315 Value *OutOps[2]; 8316 uint32_t Indices[8]; 8317 for (unsigned l = 0; l != 2; ++l) { 8318 // Determine the source for this lane. 8319 if (Imm & (1 << ((l * 4) + 3))) 8320 OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType()); 8321 else if (Imm & (1 << ((l * 4) + 1))) 8322 OutOps[l] = Ops[1]; 8323 else 8324 OutOps[l] = Ops[0]; 8325 8326 for (unsigned i = 0; i != NumElts/2; ++i) { 8327 // Start with ith element of the source for this lane. 8328 unsigned Idx = (l * NumElts) + i; 8329 // If bit 0 of the immediate half is set, switch to the high half of 8330 // the source. 8331 if (Imm & (1 << (l * 4))) 8332 Idx += NumElts/2; 8333 Indices[(l * (NumElts/2)) + i] = Idx; 8334 } 8335 } 8336 8337 return Builder.CreateShuffleVector(OutOps[0], OutOps[1], 8338 makeArrayRef(Indices, NumElts), 8339 "vperm"); 8340 } 8341 8342 case X86::BI__builtin_ia32_movnti: 8343 case X86::BI__builtin_ia32_movnti64: 8344 case X86::BI__builtin_ia32_movntsd: 8345 case X86::BI__builtin_ia32_movntss: { 8346 llvm::MDNode *Node = llvm::MDNode::get( 8347 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1))); 8348 8349 Value *Ptr = Ops[0]; 8350 Value *Src = Ops[1]; 8351 8352 // Extract the 0'th element of the source vector. 8353 if (BuiltinID == X86::BI__builtin_ia32_movntsd || 8354 BuiltinID == X86::BI__builtin_ia32_movntss) 8355 Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract"); 8356 8357 // Convert the type of the pointer to a pointer to the stored type. 8358 Value *BC = Builder.CreateBitCast( 8359 Ptr, llvm::PointerType::getUnqual(Src->getType()), "cast"); 8360 8361 // Unaligned nontemporal store of the scalar value. 8362 StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, BC); 8363 SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); 8364 SI->setAlignment(1); 8365 return SI; 8366 } 8367 8368 case X86::BI__builtin_ia32_selectb_128: 8369 case X86::BI__builtin_ia32_selectb_256: 8370 case X86::BI__builtin_ia32_selectb_512: 8371 case X86::BI__builtin_ia32_selectw_128: 8372 case X86::BI__builtin_ia32_selectw_256: 8373 case X86::BI__builtin_ia32_selectw_512: 8374 case X86::BI__builtin_ia32_selectd_128: 8375 case X86::BI__builtin_ia32_selectd_256: 8376 case X86::BI__builtin_ia32_selectd_512: 8377 case X86::BI__builtin_ia32_selectq_128: 8378 case X86::BI__builtin_ia32_selectq_256: 8379 case X86::BI__builtin_ia32_selectq_512: 8380 case X86::BI__builtin_ia32_selectps_128: 8381 case X86::BI__builtin_ia32_selectps_256: 8382 case X86::BI__builtin_ia32_selectps_512: 8383 case X86::BI__builtin_ia32_selectpd_128: 8384 case X86::BI__builtin_ia32_selectpd_256: 8385 case X86::BI__builtin_ia32_selectpd_512: 8386 return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]); 8387 case X86::BI__builtin_ia32_cmpb128_mask: 8388 case X86::BI__builtin_ia32_cmpb256_mask: 8389 case X86::BI__builtin_ia32_cmpb512_mask: 8390 case X86::BI__builtin_ia32_cmpw128_mask: 8391 case X86::BI__builtin_ia32_cmpw256_mask: 8392 case X86::BI__builtin_ia32_cmpw512_mask: 8393 case X86::BI__builtin_ia32_cmpd128_mask: 8394 case X86::BI__builtin_ia32_cmpd256_mask: 8395 case X86::BI__builtin_ia32_cmpd512_mask: 8396 case X86::BI__builtin_ia32_cmpq128_mask: 8397 case X86::BI__builtin_ia32_cmpq256_mask: 8398 case X86::BI__builtin_ia32_cmpq512_mask: { 8399 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7; 8400 return EmitX86MaskedCompare(*this, CC, true, Ops); 8401 } 8402 case X86::BI__builtin_ia32_ucmpb128_mask: 8403 case X86::BI__builtin_ia32_ucmpb256_mask: 8404 case X86::BI__builtin_ia32_ucmpb512_mask: 8405 case X86::BI__builtin_ia32_ucmpw128_mask: 8406 case X86::BI__builtin_ia32_ucmpw256_mask: 8407 case X86::BI__builtin_ia32_ucmpw512_mask: 8408 case X86::BI__builtin_ia32_ucmpd128_mask: 8409 case X86::BI__builtin_ia32_ucmpd256_mask: 8410 case X86::BI__builtin_ia32_ucmpd512_mask: 8411 case X86::BI__builtin_ia32_ucmpq128_mask: 8412 case X86::BI__builtin_ia32_ucmpq256_mask: 8413 case X86::BI__builtin_ia32_ucmpq512_mask: { 8414 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7; 8415 return EmitX86MaskedCompare(*this, CC, false, Ops); 8416 } 8417 8418 case X86::BI__builtin_ia32_kandhi: 8419 return EmitX86MaskLogic(*this, Instruction::And, 16, Ops); 8420 case X86::BI__builtin_ia32_kandnhi: 8421 return EmitX86MaskLogic(*this, Instruction::And, 16, Ops, true); 8422 case X86::BI__builtin_ia32_korhi: 8423 return EmitX86MaskLogic(*this, Instruction::Or, 16, Ops); 8424 case X86::BI__builtin_ia32_kxnorhi: 8425 return EmitX86MaskLogic(*this, Instruction::Xor, 16, Ops, true); 8426 case X86::BI__builtin_ia32_kxorhi: 8427 return EmitX86MaskLogic(*this, Instruction::Xor, 16, Ops); 8428 case X86::BI__builtin_ia32_knothi: { 8429 Ops[0] = getMaskVecValue(*this, Ops[0], 16); 8430 return Builder.CreateBitCast(Builder.CreateNot(Ops[0]), 8431 Builder.getInt16Ty()); 8432 } 8433 8434 case X86::BI__builtin_ia32_vplzcntd_128_mask: 8435 case X86::BI__builtin_ia32_vplzcntd_256_mask: 8436 case X86::BI__builtin_ia32_vplzcntd_512_mask: 8437 case X86::BI__builtin_ia32_vplzcntq_128_mask: 8438 case X86::BI__builtin_ia32_vplzcntq_256_mask: 8439 case X86::BI__builtin_ia32_vplzcntq_512_mask: { 8440 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType()); 8441 return EmitX86Select(*this, Ops[2], 8442 Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)}), 8443 Ops[1]); 8444 } 8445 8446 case X86::BI__builtin_ia32_pabsb128: 8447 case X86::BI__builtin_ia32_pabsw128: 8448 case X86::BI__builtin_ia32_pabsd128: 8449 case X86::BI__builtin_ia32_pabsb256: 8450 case X86::BI__builtin_ia32_pabsw256: 8451 case X86::BI__builtin_ia32_pabsd256: 8452 case X86::BI__builtin_ia32_pabsq128_mask: 8453 case X86::BI__builtin_ia32_pabsq256_mask: 8454 case X86::BI__builtin_ia32_pabsb512_mask: 8455 case X86::BI__builtin_ia32_pabsw512_mask: 8456 case X86::BI__builtin_ia32_pabsd512_mask: 8457 case X86::BI__builtin_ia32_pabsq512_mask: 8458 return EmitX86Abs(*this, Ops); 8459 8460 case X86::BI__builtin_ia32_pmaxsb128: 8461 case X86::BI__builtin_ia32_pmaxsw128: 8462 case X86::BI__builtin_ia32_pmaxsd128: 8463 case X86::BI__builtin_ia32_pmaxsq128_mask: 8464 case X86::BI__builtin_ia32_pmaxsb256: 8465 case X86::BI__builtin_ia32_pmaxsw256: 8466 case X86::BI__builtin_ia32_pmaxsd256: 8467 case X86::BI__builtin_ia32_pmaxsq256_mask: 8468 case X86::BI__builtin_ia32_pmaxsb512_mask: 8469 case X86::BI__builtin_ia32_pmaxsw512_mask: 8470 case X86::BI__builtin_ia32_pmaxsd512_mask: 8471 case X86::BI__builtin_ia32_pmaxsq512_mask: 8472 return EmitX86MinMax(*this, ICmpInst::ICMP_SGT, Ops); 8473 case X86::BI__builtin_ia32_pmaxub128: 8474 case X86::BI__builtin_ia32_pmaxuw128: 8475 case X86::BI__builtin_ia32_pmaxud128: 8476 case X86::BI__builtin_ia32_pmaxuq128_mask: 8477 case X86::BI__builtin_ia32_pmaxub256: 8478 case X86::BI__builtin_ia32_pmaxuw256: 8479 case X86::BI__builtin_ia32_pmaxud256: 8480 case X86::BI__builtin_ia32_pmaxuq256_mask: 8481 case X86::BI__builtin_ia32_pmaxub512_mask: 8482 case X86::BI__builtin_ia32_pmaxuw512_mask: 8483 case X86::BI__builtin_ia32_pmaxud512_mask: 8484 case X86::BI__builtin_ia32_pmaxuq512_mask: 8485 return EmitX86MinMax(*this, ICmpInst::ICMP_UGT, Ops); 8486 case X86::BI__builtin_ia32_pminsb128: 8487 case X86::BI__builtin_ia32_pminsw128: 8488 case X86::BI__builtin_ia32_pminsd128: 8489 case X86::BI__builtin_ia32_pminsq128_mask: 8490 case X86::BI__builtin_ia32_pminsb256: 8491 case X86::BI__builtin_ia32_pminsw256: 8492 case X86::BI__builtin_ia32_pminsd256: 8493 case X86::BI__builtin_ia32_pminsq256_mask: 8494 case X86::BI__builtin_ia32_pminsb512_mask: 8495 case X86::BI__builtin_ia32_pminsw512_mask: 8496 case X86::BI__builtin_ia32_pminsd512_mask: 8497 case X86::BI__builtin_ia32_pminsq512_mask: 8498 return EmitX86MinMax(*this, ICmpInst::ICMP_SLT, Ops); 8499 case X86::BI__builtin_ia32_pminub128: 8500 case X86::BI__builtin_ia32_pminuw128: 8501 case X86::BI__builtin_ia32_pminud128: 8502 case X86::BI__builtin_ia32_pminuq128_mask: 8503 case X86::BI__builtin_ia32_pminub256: 8504 case X86::BI__builtin_ia32_pminuw256: 8505 case X86::BI__builtin_ia32_pminud256: 8506 case X86::BI__builtin_ia32_pminuq256_mask: 8507 case X86::BI__builtin_ia32_pminub512_mask: 8508 case X86::BI__builtin_ia32_pminuw512_mask: 8509 case X86::BI__builtin_ia32_pminud512_mask: 8510 case X86::BI__builtin_ia32_pminuq512_mask: 8511 return EmitX86MinMax(*this, ICmpInst::ICMP_ULT, Ops); 8512 8513 // 3DNow! 8514 case X86::BI__builtin_ia32_pswapdsf: 8515 case X86::BI__builtin_ia32_pswapdsi: { 8516 llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext()); 8517 Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast"); 8518 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd); 8519 return Builder.CreateCall(F, Ops, "pswapd"); 8520 } 8521 case X86::BI__builtin_ia32_rdrand16_step: 8522 case X86::BI__builtin_ia32_rdrand32_step: 8523 case X86::BI__builtin_ia32_rdrand64_step: 8524 case X86::BI__builtin_ia32_rdseed16_step: 8525 case X86::BI__builtin_ia32_rdseed32_step: 8526 case X86::BI__builtin_ia32_rdseed64_step: { 8527 Intrinsic::ID ID; 8528 switch (BuiltinID) { 8529 default: llvm_unreachable("Unsupported intrinsic!"); 8530 case X86::BI__builtin_ia32_rdrand16_step: 8531 ID = Intrinsic::x86_rdrand_16; 8532 break; 8533 case X86::BI__builtin_ia32_rdrand32_step: 8534 ID = Intrinsic::x86_rdrand_32; 8535 break; 8536 case X86::BI__builtin_ia32_rdrand64_step: 8537 ID = Intrinsic::x86_rdrand_64; 8538 break; 8539 case X86::BI__builtin_ia32_rdseed16_step: 8540 ID = Intrinsic::x86_rdseed_16; 8541 break; 8542 case X86::BI__builtin_ia32_rdseed32_step: 8543 ID = Intrinsic::x86_rdseed_32; 8544 break; 8545 case X86::BI__builtin_ia32_rdseed64_step: 8546 ID = Intrinsic::x86_rdseed_64; 8547 break; 8548 } 8549 8550 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID)); 8551 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0), 8552 Ops[0]); 8553 return Builder.CreateExtractValue(Call, 1); 8554 } 8555 8556 // SSE packed comparison intrinsics 8557 case X86::BI__builtin_ia32_cmpeqps: 8558 case X86::BI__builtin_ia32_cmpeqpd: 8559 return getVectorFCmpIR(CmpInst::FCMP_OEQ); 8560 case X86::BI__builtin_ia32_cmpltps: 8561 case X86::BI__builtin_ia32_cmpltpd: 8562 return getVectorFCmpIR(CmpInst::FCMP_OLT); 8563 case X86::BI__builtin_ia32_cmpleps: 8564 case X86::BI__builtin_ia32_cmplepd: 8565 return getVectorFCmpIR(CmpInst::FCMP_OLE); 8566 case X86::BI__builtin_ia32_cmpunordps: 8567 case X86::BI__builtin_ia32_cmpunordpd: 8568 return getVectorFCmpIR(CmpInst::FCMP_UNO); 8569 case X86::BI__builtin_ia32_cmpneqps: 8570 case X86::BI__builtin_ia32_cmpneqpd: 8571 return getVectorFCmpIR(CmpInst::FCMP_UNE); 8572 case X86::BI__builtin_ia32_cmpnltps: 8573 case X86::BI__builtin_ia32_cmpnltpd: 8574 return getVectorFCmpIR(CmpInst::FCMP_UGE); 8575 case X86::BI__builtin_ia32_cmpnleps: 8576 case X86::BI__builtin_ia32_cmpnlepd: 8577 return getVectorFCmpIR(CmpInst::FCMP_UGT); 8578 case X86::BI__builtin_ia32_cmpordps: 8579 case X86::BI__builtin_ia32_cmpordpd: 8580 return getVectorFCmpIR(CmpInst::FCMP_ORD); 8581 case X86::BI__builtin_ia32_cmpps: 8582 case X86::BI__builtin_ia32_cmpps256: 8583 case X86::BI__builtin_ia32_cmppd: 8584 case X86::BI__builtin_ia32_cmppd256: { 8585 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 8586 // If this one of the SSE immediates, we can use native IR. 8587 if (CC < 8) { 8588 FCmpInst::Predicate Pred; 8589 switch (CC) { 8590 case 0: Pred = FCmpInst::FCMP_OEQ; break; 8591 case 1: Pred = FCmpInst::FCMP_OLT; break; 8592 case 2: Pred = FCmpInst::FCMP_OLE; break; 8593 case 3: Pred = FCmpInst::FCMP_UNO; break; 8594 case 4: Pred = FCmpInst::FCMP_UNE; break; 8595 case 5: Pred = FCmpInst::FCMP_UGE; break; 8596 case 6: Pred = FCmpInst::FCMP_UGT; break; 8597 case 7: Pred = FCmpInst::FCMP_ORD; break; 8598 } 8599 return getVectorFCmpIR(Pred); 8600 } 8601 8602 // We can't handle 8-31 immediates with native IR, use the intrinsic. 8603 // Except for predicates that create constants. 8604 Intrinsic::ID ID; 8605 switch (BuiltinID) { 8606 default: llvm_unreachable("Unsupported intrinsic!"); 8607 case X86::BI__builtin_ia32_cmpps: 8608 ID = Intrinsic::x86_sse_cmp_ps; 8609 break; 8610 case X86::BI__builtin_ia32_cmpps256: 8611 // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector 8612 // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0... 8613 if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) { 8614 Value *Constant = (CC == 0xf || CC == 0x1f) ? 8615 llvm::Constant::getAllOnesValue(Builder.getInt32Ty()) : 8616 llvm::Constant::getNullValue(Builder.getInt32Ty()); 8617 Value *Vec = Builder.CreateVectorSplat( 8618 Ops[0]->getType()->getVectorNumElements(), Constant); 8619 return Builder.CreateBitCast(Vec, Ops[0]->getType()); 8620 } 8621 ID = Intrinsic::x86_avx_cmp_ps_256; 8622 break; 8623 case X86::BI__builtin_ia32_cmppd: 8624 ID = Intrinsic::x86_sse2_cmp_pd; 8625 break; 8626 case X86::BI__builtin_ia32_cmppd256: 8627 // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector 8628 // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0... 8629 if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) { 8630 Value *Constant = (CC == 0xf || CC == 0x1f) ? 8631 llvm::Constant::getAllOnesValue(Builder.getInt64Ty()) : 8632 llvm::Constant::getNullValue(Builder.getInt64Ty()); 8633 Value *Vec = Builder.CreateVectorSplat( 8634 Ops[0]->getType()->getVectorNumElements(), Constant); 8635 return Builder.CreateBitCast(Vec, Ops[0]->getType()); 8636 } 8637 ID = Intrinsic::x86_avx_cmp_pd_256; 8638 break; 8639 } 8640 8641 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); 8642 } 8643 8644 // SSE scalar comparison intrinsics 8645 case X86::BI__builtin_ia32_cmpeqss: 8646 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0); 8647 case X86::BI__builtin_ia32_cmpltss: 8648 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1); 8649 case X86::BI__builtin_ia32_cmpless: 8650 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2); 8651 case X86::BI__builtin_ia32_cmpunordss: 8652 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3); 8653 case X86::BI__builtin_ia32_cmpneqss: 8654 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4); 8655 case X86::BI__builtin_ia32_cmpnltss: 8656 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5); 8657 case X86::BI__builtin_ia32_cmpnless: 8658 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6); 8659 case X86::BI__builtin_ia32_cmpordss: 8660 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7); 8661 case X86::BI__builtin_ia32_cmpeqsd: 8662 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0); 8663 case X86::BI__builtin_ia32_cmpltsd: 8664 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1); 8665 case X86::BI__builtin_ia32_cmplesd: 8666 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2); 8667 case X86::BI__builtin_ia32_cmpunordsd: 8668 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3); 8669 case X86::BI__builtin_ia32_cmpneqsd: 8670 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4); 8671 case X86::BI__builtin_ia32_cmpnltsd: 8672 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5); 8673 case X86::BI__builtin_ia32_cmpnlesd: 8674 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6); 8675 case X86::BI__builtin_ia32_cmpordsd: 8676 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7); 8677 8678 case X86::BI__emul: 8679 case X86::BI__emulu: { 8680 llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64); 8681 bool isSigned = (BuiltinID == X86::BI__emul); 8682 Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned); 8683 Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned); 8684 return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned); 8685 } 8686 case X86::BI__mulh: 8687 case X86::BI__umulh: 8688 case X86::BI_mul128: 8689 case X86::BI_umul128: { 8690 llvm::Type *ResType = ConvertType(E->getType()); 8691 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); 8692 8693 bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128); 8694 Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned); 8695 Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned); 8696 8697 Value *MulResult, *HigherBits; 8698 if (IsSigned) { 8699 MulResult = Builder.CreateNSWMul(LHS, RHS); 8700 HigherBits = Builder.CreateAShr(MulResult, 64); 8701 } else { 8702 MulResult = Builder.CreateNUWMul(LHS, RHS); 8703 HigherBits = Builder.CreateLShr(MulResult, 64); 8704 } 8705 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned); 8706 8707 if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh) 8708 return HigherBits; 8709 8710 Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2)); 8711 Builder.CreateStore(HigherBits, HighBitsAddress); 8712 return Builder.CreateIntCast(MulResult, ResType, IsSigned); 8713 } 8714 8715 case X86::BI__faststorefence: { 8716 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, 8717 llvm::SyncScope::System); 8718 } 8719 case X86::BI_ReadWriteBarrier: 8720 case X86::BI_ReadBarrier: 8721 case X86::BI_WriteBarrier: { 8722 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, 8723 llvm::SyncScope::SingleThread); 8724 } 8725 case X86::BI_BitScanForward: 8726 case X86::BI_BitScanForward64: 8727 return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E); 8728 case X86::BI_BitScanReverse: 8729 case X86::BI_BitScanReverse64: 8730 return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E); 8731 8732 case X86::BI_InterlockedAnd64: 8733 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E); 8734 case X86::BI_InterlockedExchange64: 8735 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E); 8736 case X86::BI_InterlockedExchangeAdd64: 8737 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E); 8738 case X86::BI_InterlockedExchangeSub64: 8739 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E); 8740 case X86::BI_InterlockedOr64: 8741 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E); 8742 case X86::BI_InterlockedXor64: 8743 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E); 8744 case X86::BI_InterlockedDecrement64: 8745 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E); 8746 case X86::BI_InterlockedIncrement64: 8747 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E); 8748 case X86::BI_InterlockedCompareExchange128: { 8749 // InterlockedCompareExchange128 doesn't directly refer to 128bit ints, 8750 // instead it takes pointers to 64bit ints for Destination and 8751 // ComparandResult, and exchange is taken as two 64bit ints (high & low). 8752 // The previous value is written to ComparandResult, and success is 8753 // returned. 8754 8755 llvm::Type *Int128Ty = Builder.getInt128Ty(); 8756 llvm::Type *Int128PtrTy = Int128Ty->getPointerTo(); 8757 8758 Value *Destination = 8759 Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PtrTy); 8760 Value *ExchangeHigh128 = 8761 Builder.CreateZExt(EmitScalarExpr(E->getArg(1)), Int128Ty); 8762 Value *ExchangeLow128 = 8763 Builder.CreateZExt(EmitScalarExpr(E->getArg(2)), Int128Ty); 8764 Address ComparandResult( 8765 Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int128PtrTy), 8766 getContext().toCharUnitsFromBits(128)); 8767 8768 Value *Exchange = Builder.CreateOr( 8769 Builder.CreateShl(ExchangeHigh128, 64, "", false, false), 8770 ExchangeLow128); 8771 8772 Value *Comparand = Builder.CreateLoad(ComparandResult); 8773 8774 AtomicCmpXchgInst *CXI = 8775 Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange, 8776 AtomicOrdering::SequentiallyConsistent, 8777 AtomicOrdering::SequentiallyConsistent); 8778 CXI->setVolatile(true); 8779 8780 // Write the result back to the inout pointer. 8781 Builder.CreateStore(Builder.CreateExtractValue(CXI, 0), ComparandResult); 8782 8783 // Get the success boolean and zero extend it to i8. 8784 Value *Success = Builder.CreateExtractValue(CXI, 1); 8785 return Builder.CreateZExt(Success, ConvertType(E->getType())); 8786 } 8787 8788 case X86::BI_AddressOfReturnAddress: { 8789 Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress); 8790 return Builder.CreateCall(F); 8791 } 8792 case X86::BI__stosb: { 8793 // We treat __stosb as a volatile memset - it may not generate "rep stosb" 8794 // instruction, but it will create a memset that won't be optimized away. 8795 return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], 1, true); 8796 } 8797 case X86::BI__ud2: 8798 // llvm.trap makes a ud2a instruction on x86. 8799 return EmitTrapCall(Intrinsic::trap); 8800 case X86::BI__int2c: { 8801 // This syscall signals a driver assertion failure in x86 NT kernels. 8802 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false); 8803 llvm::InlineAsm *IA = 8804 llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*SideEffects=*/true); 8805 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get( 8806 getLLVMContext(), llvm::AttributeList::FunctionIndex, 8807 llvm::Attribute::NoReturn); 8808 CallSite CS = Builder.CreateCall(IA); 8809 CS.setAttributes(NoReturnAttr); 8810 return CS.getInstruction(); 8811 } 8812 case X86::BI__readfsbyte: 8813 case X86::BI__readfsword: 8814 case X86::BI__readfsdword: 8815 case X86::BI__readfsqword: { 8816 llvm::Type *IntTy = ConvertType(E->getType()); 8817 Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)), 8818 llvm::PointerType::get(IntTy, 257)); 8819 LoadInst *Load = Builder.CreateAlignedLoad( 8820 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType())); 8821 Load->setVolatile(true); 8822 return Load; 8823 } 8824 case X86::BI__readgsbyte: 8825 case X86::BI__readgsword: 8826 case X86::BI__readgsdword: 8827 case X86::BI__readgsqword: { 8828 llvm::Type *IntTy = ConvertType(E->getType()); 8829 Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)), 8830 llvm::PointerType::get(IntTy, 256)); 8831 LoadInst *Load = Builder.CreateAlignedLoad( 8832 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType())); 8833 Load->setVolatile(true); 8834 return Load; 8835 } 8836 } 8837 } 8838 8839 8840 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, 8841 const CallExpr *E) { 8842 SmallVector<Value*, 4> Ops; 8843 8844 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) 8845 Ops.push_back(EmitScalarExpr(E->getArg(i))); 8846 8847 Intrinsic::ID ID = Intrinsic::not_intrinsic; 8848 8849 switch (BuiltinID) { 8850 default: return nullptr; 8851 8852 // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we 8853 // call __builtin_readcyclecounter. 8854 case PPC::BI__builtin_ppc_get_timebase: 8855 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter)); 8856 8857 // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr 8858 case PPC::BI__builtin_altivec_lvx: 8859 case PPC::BI__builtin_altivec_lvxl: 8860 case PPC::BI__builtin_altivec_lvebx: 8861 case PPC::BI__builtin_altivec_lvehx: 8862 case PPC::BI__builtin_altivec_lvewx: 8863 case PPC::BI__builtin_altivec_lvsl: 8864 case PPC::BI__builtin_altivec_lvsr: 8865 case PPC::BI__builtin_vsx_lxvd2x: 8866 case PPC::BI__builtin_vsx_lxvw4x: 8867 case PPC::BI__builtin_vsx_lxvd2x_be: 8868 case PPC::BI__builtin_vsx_lxvw4x_be: 8869 case PPC::BI__builtin_vsx_lxvl: 8870 case PPC::BI__builtin_vsx_lxvll: 8871 { 8872 if(BuiltinID == PPC::BI__builtin_vsx_lxvl || 8873 BuiltinID == PPC::BI__builtin_vsx_lxvll){ 8874 Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy); 8875 }else { 8876 Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy); 8877 Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]); 8878 Ops.pop_back(); 8879 } 8880 8881 switch (BuiltinID) { 8882 default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!"); 8883 case PPC::BI__builtin_altivec_lvx: 8884 ID = Intrinsic::ppc_altivec_lvx; 8885 break; 8886 case PPC::BI__builtin_altivec_lvxl: 8887 ID = Intrinsic::ppc_altivec_lvxl; 8888 break; 8889 case PPC::BI__builtin_altivec_lvebx: 8890 ID = Intrinsic::ppc_altivec_lvebx; 8891 break; 8892 case PPC::BI__builtin_altivec_lvehx: 8893 ID = Intrinsic::ppc_altivec_lvehx; 8894 break; 8895 case PPC::BI__builtin_altivec_lvewx: 8896 ID = Intrinsic::ppc_altivec_lvewx; 8897 break; 8898 case PPC::BI__builtin_altivec_lvsl: 8899 ID = Intrinsic::ppc_altivec_lvsl; 8900 break; 8901 case PPC::BI__builtin_altivec_lvsr: 8902 ID = Intrinsic::ppc_altivec_lvsr; 8903 break; 8904 case PPC::BI__builtin_vsx_lxvd2x: 8905 ID = Intrinsic::ppc_vsx_lxvd2x; 8906 break; 8907 case PPC::BI__builtin_vsx_lxvw4x: 8908 ID = Intrinsic::ppc_vsx_lxvw4x; 8909 break; 8910 case PPC::BI__builtin_vsx_lxvd2x_be: 8911 ID = Intrinsic::ppc_vsx_lxvd2x_be; 8912 break; 8913 case PPC::BI__builtin_vsx_lxvw4x_be: 8914 ID = Intrinsic::ppc_vsx_lxvw4x_be; 8915 break; 8916 case PPC::BI__builtin_vsx_lxvl: 8917 ID = Intrinsic::ppc_vsx_lxvl; 8918 break; 8919 case PPC::BI__builtin_vsx_lxvll: 8920 ID = Intrinsic::ppc_vsx_lxvll; 8921 break; 8922 } 8923 llvm::Function *F = CGM.getIntrinsic(ID); 8924 return Builder.CreateCall(F, Ops, ""); 8925 } 8926 8927 // vec_st, vec_xst_be 8928 case PPC::BI__builtin_altivec_stvx: 8929 case PPC::BI__builtin_altivec_stvxl: 8930 case PPC::BI__builtin_altivec_stvebx: 8931 case PPC::BI__builtin_altivec_stvehx: 8932 case PPC::BI__builtin_altivec_stvewx: 8933 case PPC::BI__builtin_vsx_stxvd2x: 8934 case PPC::BI__builtin_vsx_stxvw4x: 8935 case PPC::BI__builtin_vsx_stxvd2x_be: 8936 case PPC::BI__builtin_vsx_stxvw4x_be: 8937 case PPC::BI__builtin_vsx_stxvl: 8938 case PPC::BI__builtin_vsx_stxvll: 8939 { 8940 if(BuiltinID == PPC::BI__builtin_vsx_stxvl || 8941 BuiltinID == PPC::BI__builtin_vsx_stxvll ){ 8942 Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy); 8943 }else { 8944 Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy); 8945 Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]); 8946 Ops.pop_back(); 8947 } 8948 8949 switch (BuiltinID) { 8950 default: llvm_unreachable("Unsupported st intrinsic!"); 8951 case PPC::BI__builtin_altivec_stvx: 8952 ID = Intrinsic::ppc_altivec_stvx; 8953 break; 8954 case PPC::BI__builtin_altivec_stvxl: 8955 ID = Intrinsic::ppc_altivec_stvxl; 8956 break; 8957 case PPC::BI__builtin_altivec_stvebx: 8958 ID = Intrinsic::ppc_altivec_stvebx; 8959 break; 8960 case PPC::BI__builtin_altivec_stvehx: 8961 ID = Intrinsic::ppc_altivec_stvehx; 8962 break; 8963 case PPC::BI__builtin_altivec_stvewx: 8964 ID = Intrinsic::ppc_altivec_stvewx; 8965 break; 8966 case PPC::BI__builtin_vsx_stxvd2x: 8967 ID = Intrinsic::ppc_vsx_stxvd2x; 8968 break; 8969 case PPC::BI__builtin_vsx_stxvw4x: 8970 ID = Intrinsic::ppc_vsx_stxvw4x; 8971 break; 8972 case PPC::BI__builtin_vsx_stxvd2x_be: 8973 ID = Intrinsic::ppc_vsx_stxvd2x_be; 8974 break; 8975 case PPC::BI__builtin_vsx_stxvw4x_be: 8976 ID = Intrinsic::ppc_vsx_stxvw4x_be; 8977 break; 8978 case PPC::BI__builtin_vsx_stxvl: 8979 ID = Intrinsic::ppc_vsx_stxvl; 8980 break; 8981 case PPC::BI__builtin_vsx_stxvll: 8982 ID = Intrinsic::ppc_vsx_stxvll; 8983 break; 8984 } 8985 llvm::Function *F = CGM.getIntrinsic(ID); 8986 return Builder.CreateCall(F, Ops, ""); 8987 } 8988 // Square root 8989 case PPC::BI__builtin_vsx_xvsqrtsp: 8990 case PPC::BI__builtin_vsx_xvsqrtdp: { 8991 llvm::Type *ResultType = ConvertType(E->getType()); 8992 Value *X = EmitScalarExpr(E->getArg(0)); 8993 ID = Intrinsic::sqrt; 8994 llvm::Function *F = CGM.getIntrinsic(ID, ResultType); 8995 return Builder.CreateCall(F, X); 8996 } 8997 // Count leading zeros 8998 case PPC::BI__builtin_altivec_vclzb: 8999 case PPC::BI__builtin_altivec_vclzh: 9000 case PPC::BI__builtin_altivec_vclzw: 9001 case PPC::BI__builtin_altivec_vclzd: { 9002 llvm::Type *ResultType = ConvertType(E->getType()); 9003 Value *X = EmitScalarExpr(E->getArg(0)); 9004 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 9005 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType); 9006 return Builder.CreateCall(F, {X, Undef}); 9007 } 9008 case PPC::BI__builtin_altivec_vctzb: 9009 case PPC::BI__builtin_altivec_vctzh: 9010 case PPC::BI__builtin_altivec_vctzw: 9011 case PPC::BI__builtin_altivec_vctzd: { 9012 llvm::Type *ResultType = ConvertType(E->getType()); 9013 Value *X = EmitScalarExpr(E->getArg(0)); 9014 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 9015 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType); 9016 return Builder.CreateCall(F, {X, Undef}); 9017 } 9018 case PPC::BI__builtin_altivec_vpopcntb: 9019 case PPC::BI__builtin_altivec_vpopcnth: 9020 case PPC::BI__builtin_altivec_vpopcntw: 9021 case PPC::BI__builtin_altivec_vpopcntd: { 9022 llvm::Type *ResultType = ConvertType(E->getType()); 9023 Value *X = EmitScalarExpr(E->getArg(0)); 9024 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); 9025 return Builder.CreateCall(F, X); 9026 } 9027 // Copy sign 9028 case PPC::BI__builtin_vsx_xvcpsgnsp: 9029 case PPC::BI__builtin_vsx_xvcpsgndp: { 9030 llvm::Type *ResultType = ConvertType(E->getType()); 9031 Value *X = EmitScalarExpr(E->getArg(0)); 9032 Value *Y = EmitScalarExpr(E->getArg(1)); 9033 ID = Intrinsic::copysign; 9034 llvm::Function *F = CGM.getIntrinsic(ID, ResultType); 9035 return Builder.CreateCall(F, {X, Y}); 9036 } 9037 // Rounding/truncation 9038 case PPC::BI__builtin_vsx_xvrspip: 9039 case PPC::BI__builtin_vsx_xvrdpip: 9040 case PPC::BI__builtin_vsx_xvrdpim: 9041 case PPC::BI__builtin_vsx_xvrspim: 9042 case PPC::BI__builtin_vsx_xvrdpi: 9043 case PPC::BI__builtin_vsx_xvrspi: 9044 case PPC::BI__builtin_vsx_xvrdpic: 9045 case PPC::BI__builtin_vsx_xvrspic: 9046 case PPC::BI__builtin_vsx_xvrdpiz: 9047 case PPC::BI__builtin_vsx_xvrspiz: { 9048 llvm::Type *ResultType = ConvertType(E->getType()); 9049 Value *X = EmitScalarExpr(E->getArg(0)); 9050 if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim || 9051 BuiltinID == PPC::BI__builtin_vsx_xvrspim) 9052 ID = Intrinsic::floor; 9053 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi || 9054 BuiltinID == PPC::BI__builtin_vsx_xvrspi) 9055 ID = Intrinsic::round; 9056 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic || 9057 BuiltinID == PPC::BI__builtin_vsx_xvrspic) 9058 ID = Intrinsic::nearbyint; 9059 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip || 9060 BuiltinID == PPC::BI__builtin_vsx_xvrspip) 9061 ID = Intrinsic::ceil; 9062 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz || 9063 BuiltinID == PPC::BI__builtin_vsx_xvrspiz) 9064 ID = Intrinsic::trunc; 9065 llvm::Function *F = CGM.getIntrinsic(ID, ResultType); 9066 return Builder.CreateCall(F, X); 9067 } 9068 9069 // Absolute value 9070 case PPC::BI__builtin_vsx_xvabsdp: 9071 case PPC::BI__builtin_vsx_xvabssp: { 9072 llvm::Type *ResultType = ConvertType(E->getType()); 9073 Value *X = EmitScalarExpr(E->getArg(0)); 9074 llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); 9075 return Builder.CreateCall(F, X); 9076 } 9077 9078 // FMA variations 9079 case PPC::BI__builtin_vsx_xvmaddadp: 9080 case PPC::BI__builtin_vsx_xvmaddasp: 9081 case PPC::BI__builtin_vsx_xvnmaddadp: 9082 case PPC::BI__builtin_vsx_xvnmaddasp: 9083 case PPC::BI__builtin_vsx_xvmsubadp: 9084 case PPC::BI__builtin_vsx_xvmsubasp: 9085 case PPC::BI__builtin_vsx_xvnmsubadp: 9086 case PPC::BI__builtin_vsx_xvnmsubasp: { 9087 llvm::Type *ResultType = ConvertType(E->getType()); 9088 Value *X = EmitScalarExpr(E->getArg(0)); 9089 Value *Y = EmitScalarExpr(E->getArg(1)); 9090 Value *Z = EmitScalarExpr(E->getArg(2)); 9091 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); 9092 llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 9093 switch (BuiltinID) { 9094 case PPC::BI__builtin_vsx_xvmaddadp: 9095 case PPC::BI__builtin_vsx_xvmaddasp: 9096 return Builder.CreateCall(F, {X, Y, Z}); 9097 case PPC::BI__builtin_vsx_xvnmaddadp: 9098 case PPC::BI__builtin_vsx_xvnmaddasp: 9099 return Builder.CreateFSub(Zero, 9100 Builder.CreateCall(F, {X, Y, Z}), "sub"); 9101 case PPC::BI__builtin_vsx_xvmsubadp: 9102 case PPC::BI__builtin_vsx_xvmsubasp: 9103 return Builder.CreateCall(F, 9104 {X, Y, Builder.CreateFSub(Zero, Z, "sub")}); 9105 case PPC::BI__builtin_vsx_xvnmsubadp: 9106 case PPC::BI__builtin_vsx_xvnmsubasp: 9107 Value *FsubRes = 9108 Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")}); 9109 return Builder.CreateFSub(Zero, FsubRes, "sub"); 9110 } 9111 llvm_unreachable("Unknown FMA operation"); 9112 return nullptr; // Suppress no-return warning 9113 } 9114 9115 case PPC::BI__builtin_vsx_insertword: { 9116 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw); 9117 9118 // Third argument is a compile time constant int. It must be clamped to 9119 // to the range [0, 12]. 9120 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]); 9121 assert(ArgCI && 9122 "Third arg to xxinsertw intrinsic must be constant integer"); 9123 const int64_t MaxIndex = 12; 9124 int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex); 9125 9126 // The builtin semantics don't exactly match the xxinsertw instructions 9127 // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the 9128 // word from the first argument, and inserts it in the second argument. The 9129 // instruction extracts the word from its second input register and inserts 9130 // it into its first input register, so swap the first and second arguments. 9131 std::swap(Ops[0], Ops[1]); 9132 9133 // Need to cast the second argument from a vector of unsigned int to a 9134 // vector of long long. 9135 Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2)); 9136 9137 if (getTarget().isLittleEndian()) { 9138 // Create a shuffle mask of (1, 0) 9139 Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1), 9140 ConstantInt::get(Int32Ty, 0) 9141 }; 9142 Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); 9143 9144 // Reverse the double words in the vector we will extract from. 9145 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); 9146 Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ShuffleMask); 9147 9148 // Reverse the index. 9149 Index = MaxIndex - Index; 9150 } 9151 9152 // Intrinsic expects the first arg to be a vector of int. 9153 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4)); 9154 Ops[2] = ConstantInt::getSigned(Int32Ty, Index); 9155 return Builder.CreateCall(F, Ops); 9156 } 9157 9158 case PPC::BI__builtin_vsx_extractuword: { 9159 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw); 9160 9161 // Intrinsic expects the first argument to be a vector of doublewords. 9162 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); 9163 9164 // The second argument is a compile time constant int that needs to 9165 // be clamped to the range [0, 12]. 9166 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[1]); 9167 assert(ArgCI && 9168 "Second Arg to xxextractuw intrinsic must be a constant integer!"); 9169 const int64_t MaxIndex = 12; 9170 int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex); 9171 9172 if (getTarget().isLittleEndian()) { 9173 // Reverse the index. 9174 Index = MaxIndex - Index; 9175 Ops[1] = ConstantInt::getSigned(Int32Ty, Index); 9176 9177 // Emit the call, then reverse the double words of the results vector. 9178 Value *Call = Builder.CreateCall(F, Ops); 9179 9180 // Create a shuffle mask of (1, 0) 9181 Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1), 9182 ConstantInt::get(Int32Ty, 0) 9183 }; 9184 Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); 9185 9186 Value *ShuffleCall = Builder.CreateShuffleVector(Call, Call, ShuffleMask); 9187 return ShuffleCall; 9188 } else { 9189 Ops[1] = ConstantInt::getSigned(Int32Ty, Index); 9190 return Builder.CreateCall(F, Ops); 9191 } 9192 } 9193 9194 case PPC::BI__builtin_vsx_xxpermdi: { 9195 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]); 9196 assert(ArgCI && "Third arg must be constant integer!"); 9197 9198 unsigned Index = ArgCI->getZExtValue(); 9199 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); 9200 Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2)); 9201 9202 // Element zero comes from the first input vector and element one comes from 9203 // the second. The element indices within each vector are numbered in big 9204 // endian order so the shuffle mask must be adjusted for this on little 9205 // endian platforms (i.e. index is complemented and source vector reversed). 9206 unsigned ElemIdx0; 9207 unsigned ElemIdx1; 9208 if (getTarget().isLittleEndian()) { 9209 ElemIdx0 = (~Index & 1) + 2; 9210 ElemIdx1 = (~Index & 2) >> 1; 9211 } else { // BigEndian 9212 ElemIdx0 = (Index & 2) >> 1; 9213 ElemIdx1 = 2 + (Index & 1); 9214 } 9215 9216 Constant *ShuffleElts[2] = {ConstantInt::get(Int32Ty, ElemIdx0), 9217 ConstantInt::get(Int32Ty, ElemIdx1)}; 9218 Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); 9219 9220 Value *ShuffleCall = 9221 Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask); 9222 QualType BIRetType = E->getType(); 9223 auto RetTy = ConvertType(BIRetType); 9224 return Builder.CreateBitCast(ShuffleCall, RetTy); 9225 } 9226 9227 case PPC::BI__builtin_vsx_xxsldwi: { 9228 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]); 9229 assert(ArgCI && "Third argument must be a compile time constant"); 9230 unsigned Index = ArgCI->getZExtValue() & 0x3; 9231 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4)); 9232 Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int32Ty, 4)); 9233 9234 // Create a shuffle mask 9235 unsigned ElemIdx0; 9236 unsigned ElemIdx1; 9237 unsigned ElemIdx2; 9238 unsigned ElemIdx3; 9239 if (getTarget().isLittleEndian()) { 9240 // Little endian element N comes from element 8+N-Index of the 9241 // concatenated wide vector (of course, using modulo arithmetic on 9242 // the total number of elements). 9243 ElemIdx0 = (8 - Index) % 8; 9244 ElemIdx1 = (9 - Index) % 8; 9245 ElemIdx2 = (10 - Index) % 8; 9246 ElemIdx3 = (11 - Index) % 8; 9247 } else { 9248 // Big endian ElemIdx<N> = Index + N 9249 ElemIdx0 = Index; 9250 ElemIdx1 = Index + 1; 9251 ElemIdx2 = Index + 2; 9252 ElemIdx3 = Index + 3; 9253 } 9254 9255 Constant *ShuffleElts[4] = {ConstantInt::get(Int32Ty, ElemIdx0), 9256 ConstantInt::get(Int32Ty, ElemIdx1), 9257 ConstantInt::get(Int32Ty, ElemIdx2), 9258 ConstantInt::get(Int32Ty, ElemIdx3)}; 9259 9260 Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); 9261 Value *ShuffleCall = 9262 Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask); 9263 QualType BIRetType = E->getType(); 9264 auto RetTy = ConvertType(BIRetType); 9265 return Builder.CreateBitCast(ShuffleCall, RetTy); 9266 } 9267 } 9268 } 9269 9270 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, 9271 const CallExpr *E) { 9272 switch (BuiltinID) { 9273 case AMDGPU::BI__builtin_amdgcn_div_scale: 9274 case AMDGPU::BI__builtin_amdgcn_div_scalef: { 9275 // Translate from the intrinsics's struct return to the builtin's out 9276 // argument. 9277 9278 Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3)); 9279 9280 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 9281 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 9282 llvm::Value *Z = EmitScalarExpr(E->getArg(2)); 9283 9284 llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale, 9285 X->getType()); 9286 9287 llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z}); 9288 9289 llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0); 9290 llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1); 9291 9292 llvm::Type *RealFlagType 9293 = FlagOutPtr.getPointer()->getType()->getPointerElementType(); 9294 9295 llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType); 9296 Builder.CreateStore(FlagExt, FlagOutPtr); 9297 return Result; 9298 } 9299 case AMDGPU::BI__builtin_amdgcn_div_fmas: 9300 case AMDGPU::BI__builtin_amdgcn_div_fmasf: { 9301 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); 9302 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); 9303 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); 9304 llvm::Value *Src3 = EmitScalarExpr(E->getArg(3)); 9305 9306 llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas, 9307 Src0->getType()); 9308 llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3); 9309 return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool}); 9310 } 9311 9312 case AMDGPU::BI__builtin_amdgcn_ds_swizzle: 9313 return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle); 9314 case AMDGPU::BI__builtin_amdgcn_mov_dpp: { 9315 llvm::SmallVector<llvm::Value *, 5> Args; 9316 for (unsigned I = 0; I != 5; ++I) 9317 Args.push_back(EmitScalarExpr(E->getArg(I))); 9318 Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_mov_dpp, 9319 Args[0]->getType()); 9320 return Builder.CreateCall(F, Args); 9321 } 9322 case AMDGPU::BI__builtin_amdgcn_div_fixup: 9323 case AMDGPU::BI__builtin_amdgcn_div_fixupf: 9324 case AMDGPU::BI__builtin_amdgcn_div_fixuph: 9325 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup); 9326 case AMDGPU::BI__builtin_amdgcn_trig_preop: 9327 case AMDGPU::BI__builtin_amdgcn_trig_preopf: 9328 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop); 9329 case AMDGPU::BI__builtin_amdgcn_rcp: 9330 case AMDGPU::BI__builtin_amdgcn_rcpf: 9331 case AMDGPU::BI__builtin_amdgcn_rcph: 9332 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp); 9333 case AMDGPU::BI__builtin_amdgcn_rsq: 9334 case AMDGPU::BI__builtin_amdgcn_rsqf: 9335 case AMDGPU::BI__builtin_amdgcn_rsqh: 9336 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq); 9337 case AMDGPU::BI__builtin_amdgcn_rsq_clamp: 9338 case AMDGPU::BI__builtin_amdgcn_rsq_clampf: 9339 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp); 9340 case AMDGPU::BI__builtin_amdgcn_sinf: 9341 case AMDGPU::BI__builtin_amdgcn_sinh: 9342 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin); 9343 case AMDGPU::BI__builtin_amdgcn_cosf: 9344 case AMDGPU::BI__builtin_amdgcn_cosh: 9345 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos); 9346 case AMDGPU::BI__builtin_amdgcn_log_clampf: 9347 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp); 9348 case AMDGPU::BI__builtin_amdgcn_ldexp: 9349 case AMDGPU::BI__builtin_amdgcn_ldexpf: 9350 case AMDGPU::BI__builtin_amdgcn_ldexph: 9351 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp); 9352 case AMDGPU::BI__builtin_amdgcn_frexp_mant: 9353 case AMDGPU::BI__builtin_amdgcn_frexp_mantf: 9354 case AMDGPU::BI__builtin_amdgcn_frexp_manth: 9355 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant); 9356 case AMDGPU::BI__builtin_amdgcn_frexp_exp: 9357 case AMDGPU::BI__builtin_amdgcn_frexp_expf: { 9358 Value *Src0 = EmitScalarExpr(E->getArg(0)); 9359 Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp, 9360 { Builder.getInt32Ty(), Src0->getType() }); 9361 return Builder.CreateCall(F, Src0); 9362 } 9363 case AMDGPU::BI__builtin_amdgcn_frexp_exph: { 9364 Value *Src0 = EmitScalarExpr(E->getArg(0)); 9365 Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp, 9366 { Builder.getInt16Ty(), Src0->getType() }); 9367 return Builder.CreateCall(F, Src0); 9368 } 9369 case AMDGPU::BI__builtin_amdgcn_fract: 9370 case AMDGPU::BI__builtin_amdgcn_fractf: 9371 case AMDGPU::BI__builtin_amdgcn_fracth: 9372 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract); 9373 case AMDGPU::BI__builtin_amdgcn_lerp: 9374 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp); 9375 case AMDGPU::BI__builtin_amdgcn_uicmp: 9376 case AMDGPU::BI__builtin_amdgcn_uicmpl: 9377 case AMDGPU::BI__builtin_amdgcn_sicmp: 9378 case AMDGPU::BI__builtin_amdgcn_sicmpl: 9379 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_icmp); 9380 case AMDGPU::BI__builtin_amdgcn_fcmp: 9381 case AMDGPU::BI__builtin_amdgcn_fcmpf: 9382 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fcmp); 9383 case AMDGPU::BI__builtin_amdgcn_class: 9384 case AMDGPU::BI__builtin_amdgcn_classf: 9385 case AMDGPU::BI__builtin_amdgcn_classh: 9386 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class); 9387 case AMDGPU::BI__builtin_amdgcn_fmed3f: 9388 case AMDGPU::BI__builtin_amdgcn_fmed3h: 9389 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3); 9390 case AMDGPU::BI__builtin_amdgcn_read_exec: { 9391 CallInst *CI = cast<CallInst>( 9392 EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec")); 9393 CI->setConvergent(); 9394 return CI; 9395 } 9396 case AMDGPU::BI__builtin_amdgcn_read_exec_lo: 9397 case AMDGPU::BI__builtin_amdgcn_read_exec_hi: { 9398 StringRef RegName = BuiltinID == AMDGPU::BI__builtin_amdgcn_read_exec_lo ? 9399 "exec_lo" : "exec_hi"; 9400 CallInst *CI = cast<CallInst>( 9401 EmitSpecialRegisterBuiltin(*this, E, Int32Ty, Int32Ty, true, RegName)); 9402 CI->setConvergent(); 9403 return CI; 9404 } 9405 9406 // amdgcn workitem 9407 case AMDGPU::BI__builtin_amdgcn_workitem_id_x: 9408 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024); 9409 case AMDGPU::BI__builtin_amdgcn_workitem_id_y: 9410 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024); 9411 case AMDGPU::BI__builtin_amdgcn_workitem_id_z: 9412 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024); 9413 9414 // r600 intrinsics 9415 case AMDGPU::BI__builtin_r600_recipsqrt_ieee: 9416 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef: 9417 return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee); 9418 case AMDGPU::BI__builtin_r600_read_tidig_x: 9419 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024); 9420 case AMDGPU::BI__builtin_r600_read_tidig_y: 9421 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024); 9422 case AMDGPU::BI__builtin_r600_read_tidig_z: 9423 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024); 9424 default: 9425 return nullptr; 9426 } 9427 } 9428 9429 /// Handle a SystemZ function in which the final argument is a pointer 9430 /// to an int that receives the post-instruction CC value. At the LLVM level 9431 /// this is represented as a function that returns a {result, cc} pair. 9432 static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF, 9433 unsigned IntrinsicID, 9434 const CallExpr *E) { 9435 unsigned NumArgs = E->getNumArgs() - 1; 9436 SmallVector<Value *, 8> Args(NumArgs); 9437 for (unsigned I = 0; I < NumArgs; ++I) 9438 Args[I] = CGF.EmitScalarExpr(E->getArg(I)); 9439 Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs)); 9440 Value *F = CGF.CGM.getIntrinsic(IntrinsicID); 9441 Value *Call = CGF.Builder.CreateCall(F, Args); 9442 Value *CC = CGF.Builder.CreateExtractValue(Call, 1); 9443 CGF.Builder.CreateStore(CC, CCPtr); 9444 return CGF.Builder.CreateExtractValue(Call, 0); 9445 } 9446 9447 Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, 9448 const CallExpr *E) { 9449 switch (BuiltinID) { 9450 case SystemZ::BI__builtin_tbegin: { 9451 Value *TDB = EmitScalarExpr(E->getArg(0)); 9452 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c); 9453 Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin); 9454 return Builder.CreateCall(F, {TDB, Control}); 9455 } 9456 case SystemZ::BI__builtin_tbegin_nofloat: { 9457 Value *TDB = EmitScalarExpr(E->getArg(0)); 9458 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c); 9459 Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat); 9460 return Builder.CreateCall(F, {TDB, Control}); 9461 } 9462 case SystemZ::BI__builtin_tbeginc: { 9463 Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy); 9464 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08); 9465 Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc); 9466 return Builder.CreateCall(F, {TDB, Control}); 9467 } 9468 case SystemZ::BI__builtin_tabort: { 9469 Value *Data = EmitScalarExpr(E->getArg(0)); 9470 Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort); 9471 return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort")); 9472 } 9473 case SystemZ::BI__builtin_non_tx_store: { 9474 Value *Address = EmitScalarExpr(E->getArg(0)); 9475 Value *Data = EmitScalarExpr(E->getArg(1)); 9476 Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg); 9477 return Builder.CreateCall(F, {Data, Address}); 9478 } 9479 9480 // Vector builtins. Note that most vector builtins are mapped automatically 9481 // to target-specific LLVM intrinsics. The ones handled specially here can 9482 // be represented via standard LLVM IR, which is preferable to enable common 9483 // LLVM optimizations. 9484 9485 case SystemZ::BI__builtin_s390_vpopctb: 9486 case SystemZ::BI__builtin_s390_vpopcth: 9487 case SystemZ::BI__builtin_s390_vpopctf: 9488 case SystemZ::BI__builtin_s390_vpopctg: { 9489 llvm::Type *ResultType = ConvertType(E->getType()); 9490 Value *X = EmitScalarExpr(E->getArg(0)); 9491 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); 9492 return Builder.CreateCall(F, X); 9493 } 9494 9495 case SystemZ::BI__builtin_s390_vclzb: 9496 case SystemZ::BI__builtin_s390_vclzh: 9497 case SystemZ::BI__builtin_s390_vclzf: 9498 case SystemZ::BI__builtin_s390_vclzg: { 9499 llvm::Type *ResultType = ConvertType(E->getType()); 9500 Value *X = EmitScalarExpr(E->getArg(0)); 9501 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 9502 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType); 9503 return Builder.CreateCall(F, {X, Undef}); 9504 } 9505 9506 case SystemZ::BI__builtin_s390_vctzb: 9507 case SystemZ::BI__builtin_s390_vctzh: 9508 case SystemZ::BI__builtin_s390_vctzf: 9509 case SystemZ::BI__builtin_s390_vctzg: { 9510 llvm::Type *ResultType = ConvertType(E->getType()); 9511 Value *X = EmitScalarExpr(E->getArg(0)); 9512 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 9513 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType); 9514 return Builder.CreateCall(F, {X, Undef}); 9515 } 9516 9517 case SystemZ::BI__builtin_s390_vfsqsb: 9518 case SystemZ::BI__builtin_s390_vfsqdb: { 9519 llvm::Type *ResultType = ConvertType(E->getType()); 9520 Value *X = EmitScalarExpr(E->getArg(0)); 9521 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType); 9522 return Builder.CreateCall(F, X); 9523 } 9524 case SystemZ::BI__builtin_s390_vfmasb: 9525 case SystemZ::BI__builtin_s390_vfmadb: { 9526 llvm::Type *ResultType = ConvertType(E->getType()); 9527 Value *X = EmitScalarExpr(E->getArg(0)); 9528 Value *Y = EmitScalarExpr(E->getArg(1)); 9529 Value *Z = EmitScalarExpr(E->getArg(2)); 9530 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 9531 return Builder.CreateCall(F, {X, Y, Z}); 9532 } 9533 case SystemZ::BI__builtin_s390_vfmssb: 9534 case SystemZ::BI__builtin_s390_vfmsdb: { 9535 llvm::Type *ResultType = ConvertType(E->getType()); 9536 Value *X = EmitScalarExpr(E->getArg(0)); 9537 Value *Y = EmitScalarExpr(E->getArg(1)); 9538 Value *Z = EmitScalarExpr(E->getArg(2)); 9539 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); 9540 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 9541 return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")}); 9542 } 9543 case SystemZ::BI__builtin_s390_vfnmasb: 9544 case SystemZ::BI__builtin_s390_vfnmadb: { 9545 llvm::Type *ResultType = ConvertType(E->getType()); 9546 Value *X = EmitScalarExpr(E->getArg(0)); 9547 Value *Y = EmitScalarExpr(E->getArg(1)); 9548 Value *Z = EmitScalarExpr(E->getArg(2)); 9549 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); 9550 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 9551 return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, Z}), "sub"); 9552 } 9553 case SystemZ::BI__builtin_s390_vfnmssb: 9554 case SystemZ::BI__builtin_s390_vfnmsdb: { 9555 llvm::Type *ResultType = ConvertType(E->getType()); 9556 Value *X = EmitScalarExpr(E->getArg(0)); 9557 Value *Y = EmitScalarExpr(E->getArg(1)); 9558 Value *Z = EmitScalarExpr(E->getArg(2)); 9559 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); 9560 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 9561 Value *NegZ = Builder.CreateFSub(Zero, Z, "sub"); 9562 return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, NegZ})); 9563 } 9564 case SystemZ::BI__builtin_s390_vflpsb: 9565 case SystemZ::BI__builtin_s390_vflpdb: { 9566 llvm::Type *ResultType = ConvertType(E->getType()); 9567 Value *X = EmitScalarExpr(E->getArg(0)); 9568 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); 9569 return Builder.CreateCall(F, X); 9570 } 9571 case SystemZ::BI__builtin_s390_vflnsb: 9572 case SystemZ::BI__builtin_s390_vflndb: { 9573 llvm::Type *ResultType = ConvertType(E->getType()); 9574 Value *X = EmitScalarExpr(E->getArg(0)); 9575 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); 9576 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); 9577 return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub"); 9578 } 9579 case SystemZ::BI__builtin_s390_vfisb: 9580 case SystemZ::BI__builtin_s390_vfidb: { 9581 llvm::Type *ResultType = ConvertType(E->getType()); 9582 Value *X = EmitScalarExpr(E->getArg(0)); 9583 // Constant-fold the M4 and M5 mask arguments. 9584 llvm::APSInt M4, M5; 9585 bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext()); 9586 bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext()); 9587 assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?"); 9588 (void)IsConstM4; (void)IsConstM5; 9589 // Check whether this instance can be represented via a LLVM standard 9590 // intrinsic. We only support some combinations of M4 and M5. 9591 Intrinsic::ID ID = Intrinsic::not_intrinsic; 9592 switch (M4.getZExtValue()) { 9593 default: break; 9594 case 0: // IEEE-inexact exception allowed 9595 switch (M5.getZExtValue()) { 9596 default: break; 9597 case 0: ID = Intrinsic::rint; break; 9598 } 9599 break; 9600 case 4: // IEEE-inexact exception suppressed 9601 switch (M5.getZExtValue()) { 9602 default: break; 9603 case 0: ID = Intrinsic::nearbyint; break; 9604 case 1: ID = Intrinsic::round; break; 9605 case 5: ID = Intrinsic::trunc; break; 9606 case 6: ID = Intrinsic::ceil; break; 9607 case 7: ID = Intrinsic::floor; break; 9608 } 9609 break; 9610 } 9611 if (ID != Intrinsic::not_intrinsic) { 9612 Function *F = CGM.getIntrinsic(ID, ResultType); 9613 return Builder.CreateCall(F, X); 9614 } 9615 switch (BuiltinID) { 9616 case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break; 9617 case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break; 9618 default: llvm_unreachable("Unknown BuiltinID"); 9619 } 9620 Function *F = CGM.getIntrinsic(ID); 9621 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4); 9622 Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5); 9623 return Builder.CreateCall(F, {X, M4Value, M5Value}); 9624 } 9625 case SystemZ::BI__builtin_s390_vfmaxsb: 9626 case SystemZ::BI__builtin_s390_vfmaxdb: { 9627 llvm::Type *ResultType = ConvertType(E->getType()); 9628 Value *X = EmitScalarExpr(E->getArg(0)); 9629 Value *Y = EmitScalarExpr(E->getArg(1)); 9630 // Constant-fold the M4 mask argument. 9631 llvm::APSInt M4; 9632 bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext()); 9633 assert(IsConstM4 && "Constant arg isn't actually constant?"); 9634 (void)IsConstM4; 9635 // Check whether this instance can be represented via a LLVM standard 9636 // intrinsic. We only support some values of M4. 9637 Intrinsic::ID ID = Intrinsic::not_intrinsic; 9638 switch (M4.getZExtValue()) { 9639 default: break; 9640 case 4: ID = Intrinsic::maxnum; break; 9641 } 9642 if (ID != Intrinsic::not_intrinsic) { 9643 Function *F = CGM.getIntrinsic(ID, ResultType); 9644 return Builder.CreateCall(F, {X, Y}); 9645 } 9646 switch (BuiltinID) { 9647 case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break; 9648 case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break; 9649 default: llvm_unreachable("Unknown BuiltinID"); 9650 } 9651 Function *F = CGM.getIntrinsic(ID); 9652 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4); 9653 return Builder.CreateCall(F, {X, Y, M4Value}); 9654 } 9655 case SystemZ::BI__builtin_s390_vfminsb: 9656 case SystemZ::BI__builtin_s390_vfmindb: { 9657 llvm::Type *ResultType = ConvertType(E->getType()); 9658 Value *X = EmitScalarExpr(E->getArg(0)); 9659 Value *Y = EmitScalarExpr(E->getArg(1)); 9660 // Constant-fold the M4 mask argument. 9661 llvm::APSInt M4; 9662 bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext()); 9663 assert(IsConstM4 && "Constant arg isn't actually constant?"); 9664 (void)IsConstM4; 9665 // Check whether this instance can be represented via a LLVM standard 9666 // intrinsic. We only support some values of M4. 9667 Intrinsic::ID ID = Intrinsic::not_intrinsic; 9668 switch (M4.getZExtValue()) { 9669 default: break; 9670 case 4: ID = Intrinsic::minnum; break; 9671 } 9672 if (ID != Intrinsic::not_intrinsic) { 9673 Function *F = CGM.getIntrinsic(ID, ResultType); 9674 return Builder.CreateCall(F, {X, Y}); 9675 } 9676 switch (BuiltinID) { 9677 case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break; 9678 case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break; 9679 default: llvm_unreachable("Unknown BuiltinID"); 9680 } 9681 Function *F = CGM.getIntrinsic(ID); 9682 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4); 9683 return Builder.CreateCall(F, {X, Y, M4Value}); 9684 } 9685 9686 // Vector intrisincs that output the post-instruction CC value. 9687 9688 #define INTRINSIC_WITH_CC(NAME) \ 9689 case SystemZ::BI__builtin_##NAME: \ 9690 return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E) 9691 9692 INTRINSIC_WITH_CC(s390_vpkshs); 9693 INTRINSIC_WITH_CC(s390_vpksfs); 9694 INTRINSIC_WITH_CC(s390_vpksgs); 9695 9696 INTRINSIC_WITH_CC(s390_vpklshs); 9697 INTRINSIC_WITH_CC(s390_vpklsfs); 9698 INTRINSIC_WITH_CC(s390_vpklsgs); 9699 9700 INTRINSIC_WITH_CC(s390_vceqbs); 9701 INTRINSIC_WITH_CC(s390_vceqhs); 9702 INTRINSIC_WITH_CC(s390_vceqfs); 9703 INTRINSIC_WITH_CC(s390_vceqgs); 9704 9705 INTRINSIC_WITH_CC(s390_vchbs); 9706 INTRINSIC_WITH_CC(s390_vchhs); 9707 INTRINSIC_WITH_CC(s390_vchfs); 9708 INTRINSIC_WITH_CC(s390_vchgs); 9709 9710 INTRINSIC_WITH_CC(s390_vchlbs); 9711 INTRINSIC_WITH_CC(s390_vchlhs); 9712 INTRINSIC_WITH_CC(s390_vchlfs); 9713 INTRINSIC_WITH_CC(s390_vchlgs); 9714 9715 INTRINSIC_WITH_CC(s390_vfaebs); 9716 INTRINSIC_WITH_CC(s390_vfaehs); 9717 INTRINSIC_WITH_CC(s390_vfaefs); 9718 9719 INTRINSIC_WITH_CC(s390_vfaezbs); 9720 INTRINSIC_WITH_CC(s390_vfaezhs); 9721 INTRINSIC_WITH_CC(s390_vfaezfs); 9722 9723 INTRINSIC_WITH_CC(s390_vfeebs); 9724 INTRINSIC_WITH_CC(s390_vfeehs); 9725 INTRINSIC_WITH_CC(s390_vfeefs); 9726 9727 INTRINSIC_WITH_CC(s390_vfeezbs); 9728 INTRINSIC_WITH_CC(s390_vfeezhs); 9729 INTRINSIC_WITH_CC(s390_vfeezfs); 9730 9731 INTRINSIC_WITH_CC(s390_vfenebs); 9732 INTRINSIC_WITH_CC(s390_vfenehs); 9733 INTRINSIC_WITH_CC(s390_vfenefs); 9734 9735 INTRINSIC_WITH_CC(s390_vfenezbs); 9736 INTRINSIC_WITH_CC(s390_vfenezhs); 9737 INTRINSIC_WITH_CC(s390_vfenezfs); 9738 9739 INTRINSIC_WITH_CC(s390_vistrbs); 9740 INTRINSIC_WITH_CC(s390_vistrhs); 9741 INTRINSIC_WITH_CC(s390_vistrfs); 9742 9743 INTRINSIC_WITH_CC(s390_vstrcbs); 9744 INTRINSIC_WITH_CC(s390_vstrchs); 9745 INTRINSIC_WITH_CC(s390_vstrcfs); 9746 9747 INTRINSIC_WITH_CC(s390_vstrczbs); 9748 INTRINSIC_WITH_CC(s390_vstrczhs); 9749 INTRINSIC_WITH_CC(s390_vstrczfs); 9750 9751 INTRINSIC_WITH_CC(s390_vfcesbs); 9752 INTRINSIC_WITH_CC(s390_vfcedbs); 9753 INTRINSIC_WITH_CC(s390_vfchsbs); 9754 INTRINSIC_WITH_CC(s390_vfchdbs); 9755 INTRINSIC_WITH_CC(s390_vfchesbs); 9756 INTRINSIC_WITH_CC(s390_vfchedbs); 9757 9758 INTRINSIC_WITH_CC(s390_vftcisb); 9759 INTRINSIC_WITH_CC(s390_vftcidb); 9760 9761 #undef INTRINSIC_WITH_CC 9762 9763 default: 9764 return nullptr; 9765 } 9766 } 9767 9768 Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, 9769 const CallExpr *E) { 9770 auto MakeLdg = [&](unsigned IntrinsicID) { 9771 Value *Ptr = EmitScalarExpr(E->getArg(0)); 9772 clang::CharUnits Align = 9773 getNaturalPointeeTypeAlignment(E->getArg(0)->getType()); 9774 return Builder.CreateCall( 9775 CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(), 9776 Ptr->getType()}), 9777 {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())}); 9778 }; 9779 auto MakeScopedAtomic = [&](unsigned IntrinsicID) { 9780 Value *Ptr = EmitScalarExpr(E->getArg(0)); 9781 return Builder.CreateCall( 9782 CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(), 9783 Ptr->getType()}), 9784 {Ptr, EmitScalarExpr(E->getArg(1))}); 9785 }; 9786 switch (BuiltinID) { 9787 case NVPTX::BI__nvvm_atom_add_gen_i: 9788 case NVPTX::BI__nvvm_atom_add_gen_l: 9789 case NVPTX::BI__nvvm_atom_add_gen_ll: 9790 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E); 9791 9792 case NVPTX::BI__nvvm_atom_sub_gen_i: 9793 case NVPTX::BI__nvvm_atom_sub_gen_l: 9794 case NVPTX::BI__nvvm_atom_sub_gen_ll: 9795 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E); 9796 9797 case NVPTX::BI__nvvm_atom_and_gen_i: 9798 case NVPTX::BI__nvvm_atom_and_gen_l: 9799 case NVPTX::BI__nvvm_atom_and_gen_ll: 9800 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E); 9801 9802 case NVPTX::BI__nvvm_atom_or_gen_i: 9803 case NVPTX::BI__nvvm_atom_or_gen_l: 9804 case NVPTX::BI__nvvm_atom_or_gen_ll: 9805 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E); 9806 9807 case NVPTX::BI__nvvm_atom_xor_gen_i: 9808 case NVPTX::BI__nvvm_atom_xor_gen_l: 9809 case NVPTX::BI__nvvm_atom_xor_gen_ll: 9810 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E); 9811 9812 case NVPTX::BI__nvvm_atom_xchg_gen_i: 9813 case NVPTX::BI__nvvm_atom_xchg_gen_l: 9814 case NVPTX::BI__nvvm_atom_xchg_gen_ll: 9815 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E); 9816 9817 case NVPTX::BI__nvvm_atom_max_gen_i: 9818 case NVPTX::BI__nvvm_atom_max_gen_l: 9819 case NVPTX::BI__nvvm_atom_max_gen_ll: 9820 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E); 9821 9822 case NVPTX::BI__nvvm_atom_max_gen_ui: 9823 case NVPTX::BI__nvvm_atom_max_gen_ul: 9824 case NVPTX::BI__nvvm_atom_max_gen_ull: 9825 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E); 9826 9827 case NVPTX::BI__nvvm_atom_min_gen_i: 9828 case NVPTX::BI__nvvm_atom_min_gen_l: 9829 case NVPTX::BI__nvvm_atom_min_gen_ll: 9830 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E); 9831 9832 case NVPTX::BI__nvvm_atom_min_gen_ui: 9833 case NVPTX::BI__nvvm_atom_min_gen_ul: 9834 case NVPTX::BI__nvvm_atom_min_gen_ull: 9835 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E); 9836 9837 case NVPTX::BI__nvvm_atom_cas_gen_i: 9838 case NVPTX::BI__nvvm_atom_cas_gen_l: 9839 case NVPTX::BI__nvvm_atom_cas_gen_ll: 9840 // __nvvm_atom_cas_gen_* should return the old value rather than the 9841 // success flag. 9842 return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false); 9843 9844 case NVPTX::BI__nvvm_atom_add_gen_f: { 9845 Value *Ptr = EmitScalarExpr(E->getArg(0)); 9846 Value *Val = EmitScalarExpr(E->getArg(1)); 9847 // atomicrmw only deals with integer arguments so we need to use 9848 // LLVM's nvvm_atomic_load_add_f32 intrinsic for that. 9849 Value *FnALAF32 = 9850 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType()); 9851 return Builder.CreateCall(FnALAF32, {Ptr, Val}); 9852 } 9853 9854 case NVPTX::BI__nvvm_atom_add_gen_d: { 9855 Value *Ptr = EmitScalarExpr(E->getArg(0)); 9856 Value *Val = EmitScalarExpr(E->getArg(1)); 9857 // atomicrmw only deals with integer arguments, so we need to use 9858 // LLVM's nvvm_atomic_load_add_f64 intrinsic. 9859 Value *FnALAF64 = 9860 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f64, Ptr->getType()); 9861 return Builder.CreateCall(FnALAF64, {Ptr, Val}); 9862 } 9863 9864 case NVPTX::BI__nvvm_atom_inc_gen_ui: { 9865 Value *Ptr = EmitScalarExpr(E->getArg(0)); 9866 Value *Val = EmitScalarExpr(E->getArg(1)); 9867 Value *FnALI32 = 9868 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType()); 9869 return Builder.CreateCall(FnALI32, {Ptr, Val}); 9870 } 9871 9872 case NVPTX::BI__nvvm_atom_dec_gen_ui: { 9873 Value *Ptr = EmitScalarExpr(E->getArg(0)); 9874 Value *Val = EmitScalarExpr(E->getArg(1)); 9875 Value *FnALD32 = 9876 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType()); 9877 return Builder.CreateCall(FnALD32, {Ptr, Val}); 9878 } 9879 9880 case NVPTX::BI__nvvm_ldg_c: 9881 case NVPTX::BI__nvvm_ldg_c2: 9882 case NVPTX::BI__nvvm_ldg_c4: 9883 case NVPTX::BI__nvvm_ldg_s: 9884 case NVPTX::BI__nvvm_ldg_s2: 9885 case NVPTX::BI__nvvm_ldg_s4: 9886 case NVPTX::BI__nvvm_ldg_i: 9887 case NVPTX::BI__nvvm_ldg_i2: 9888 case NVPTX::BI__nvvm_ldg_i4: 9889 case NVPTX::BI__nvvm_ldg_l: 9890 case NVPTX::BI__nvvm_ldg_ll: 9891 case NVPTX::BI__nvvm_ldg_ll2: 9892 case NVPTX::BI__nvvm_ldg_uc: 9893 case NVPTX::BI__nvvm_ldg_uc2: 9894 case NVPTX::BI__nvvm_ldg_uc4: 9895 case NVPTX::BI__nvvm_ldg_us: 9896 case NVPTX::BI__nvvm_ldg_us2: 9897 case NVPTX::BI__nvvm_ldg_us4: 9898 case NVPTX::BI__nvvm_ldg_ui: 9899 case NVPTX::BI__nvvm_ldg_ui2: 9900 case NVPTX::BI__nvvm_ldg_ui4: 9901 case NVPTX::BI__nvvm_ldg_ul: 9902 case NVPTX::BI__nvvm_ldg_ull: 9903 case NVPTX::BI__nvvm_ldg_ull2: 9904 // PTX Interoperability section 2.2: "For a vector with an even number of 9905 // elements, its alignment is set to number of elements times the alignment 9906 // of its member: n*alignof(t)." 9907 return MakeLdg(Intrinsic::nvvm_ldg_global_i); 9908 case NVPTX::BI__nvvm_ldg_f: 9909 case NVPTX::BI__nvvm_ldg_f2: 9910 case NVPTX::BI__nvvm_ldg_f4: 9911 case NVPTX::BI__nvvm_ldg_d: 9912 case NVPTX::BI__nvvm_ldg_d2: 9913 return MakeLdg(Intrinsic::nvvm_ldg_global_f); 9914 9915 case NVPTX::BI__nvvm_atom_cta_add_gen_i: 9916 case NVPTX::BI__nvvm_atom_cta_add_gen_l: 9917 case NVPTX::BI__nvvm_atom_cta_add_gen_ll: 9918 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta); 9919 case NVPTX::BI__nvvm_atom_sys_add_gen_i: 9920 case NVPTX::BI__nvvm_atom_sys_add_gen_l: 9921 case NVPTX::BI__nvvm_atom_sys_add_gen_ll: 9922 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys); 9923 case NVPTX::BI__nvvm_atom_cta_add_gen_f: 9924 case NVPTX::BI__nvvm_atom_cta_add_gen_d: 9925 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta); 9926 case NVPTX::BI__nvvm_atom_sys_add_gen_f: 9927 case NVPTX::BI__nvvm_atom_sys_add_gen_d: 9928 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys); 9929 case NVPTX::BI__nvvm_atom_cta_xchg_gen_i: 9930 case NVPTX::BI__nvvm_atom_cta_xchg_gen_l: 9931 case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll: 9932 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta); 9933 case NVPTX::BI__nvvm_atom_sys_xchg_gen_i: 9934 case NVPTX::BI__nvvm_atom_sys_xchg_gen_l: 9935 case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll: 9936 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys); 9937 case NVPTX::BI__nvvm_atom_cta_max_gen_i: 9938 case NVPTX::BI__nvvm_atom_cta_max_gen_ui: 9939 case NVPTX::BI__nvvm_atom_cta_max_gen_l: 9940 case NVPTX::BI__nvvm_atom_cta_max_gen_ul: 9941 case NVPTX::BI__nvvm_atom_cta_max_gen_ll: 9942 case NVPTX::BI__nvvm_atom_cta_max_gen_ull: 9943 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta); 9944 case NVPTX::BI__nvvm_atom_sys_max_gen_i: 9945 case NVPTX::BI__nvvm_atom_sys_max_gen_ui: 9946 case NVPTX::BI__nvvm_atom_sys_max_gen_l: 9947 case NVPTX::BI__nvvm_atom_sys_max_gen_ul: 9948 case NVPTX::BI__nvvm_atom_sys_max_gen_ll: 9949 case NVPTX::BI__nvvm_atom_sys_max_gen_ull: 9950 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys); 9951 case NVPTX::BI__nvvm_atom_cta_min_gen_i: 9952 case NVPTX::BI__nvvm_atom_cta_min_gen_ui: 9953 case NVPTX::BI__nvvm_atom_cta_min_gen_l: 9954 case NVPTX::BI__nvvm_atom_cta_min_gen_ul: 9955 case NVPTX::BI__nvvm_atom_cta_min_gen_ll: 9956 case NVPTX::BI__nvvm_atom_cta_min_gen_ull: 9957 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta); 9958 case NVPTX::BI__nvvm_atom_sys_min_gen_i: 9959 case NVPTX::BI__nvvm_atom_sys_min_gen_ui: 9960 case NVPTX::BI__nvvm_atom_sys_min_gen_l: 9961 case NVPTX::BI__nvvm_atom_sys_min_gen_ul: 9962 case NVPTX::BI__nvvm_atom_sys_min_gen_ll: 9963 case NVPTX::BI__nvvm_atom_sys_min_gen_ull: 9964 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys); 9965 case NVPTX::BI__nvvm_atom_cta_inc_gen_ui: 9966 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta); 9967 case NVPTX::BI__nvvm_atom_cta_dec_gen_ui: 9968 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta); 9969 case NVPTX::BI__nvvm_atom_sys_inc_gen_ui: 9970 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys); 9971 case NVPTX::BI__nvvm_atom_sys_dec_gen_ui: 9972 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys); 9973 case NVPTX::BI__nvvm_atom_cta_and_gen_i: 9974 case NVPTX::BI__nvvm_atom_cta_and_gen_l: 9975 case NVPTX::BI__nvvm_atom_cta_and_gen_ll: 9976 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta); 9977 case NVPTX::BI__nvvm_atom_sys_and_gen_i: 9978 case NVPTX::BI__nvvm_atom_sys_and_gen_l: 9979 case NVPTX::BI__nvvm_atom_sys_and_gen_ll: 9980 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys); 9981 case NVPTX::BI__nvvm_atom_cta_or_gen_i: 9982 case NVPTX::BI__nvvm_atom_cta_or_gen_l: 9983 case NVPTX::BI__nvvm_atom_cta_or_gen_ll: 9984 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta); 9985 case NVPTX::BI__nvvm_atom_sys_or_gen_i: 9986 case NVPTX::BI__nvvm_atom_sys_or_gen_l: 9987 case NVPTX::BI__nvvm_atom_sys_or_gen_ll: 9988 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys); 9989 case NVPTX::BI__nvvm_atom_cta_xor_gen_i: 9990 case NVPTX::BI__nvvm_atom_cta_xor_gen_l: 9991 case NVPTX::BI__nvvm_atom_cta_xor_gen_ll: 9992 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta); 9993 case NVPTX::BI__nvvm_atom_sys_xor_gen_i: 9994 case NVPTX::BI__nvvm_atom_sys_xor_gen_l: 9995 case NVPTX::BI__nvvm_atom_sys_xor_gen_ll: 9996 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys); 9997 case NVPTX::BI__nvvm_atom_cta_cas_gen_i: 9998 case NVPTX::BI__nvvm_atom_cta_cas_gen_l: 9999 case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: { 10000 Value *Ptr = EmitScalarExpr(E->getArg(0)); 10001 return Builder.CreateCall( 10002 CGM.getIntrinsic( 10003 Intrinsic::nvvm_atomic_cas_gen_i_cta, 10004 {Ptr->getType()->getPointerElementType(), Ptr->getType()}), 10005 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))}); 10006 } 10007 case NVPTX::BI__nvvm_atom_sys_cas_gen_i: 10008 case NVPTX::BI__nvvm_atom_sys_cas_gen_l: 10009 case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: { 10010 Value *Ptr = EmitScalarExpr(E->getArg(0)); 10011 return Builder.CreateCall( 10012 CGM.getIntrinsic( 10013 Intrinsic::nvvm_atomic_cas_gen_i_sys, 10014 {Ptr->getType()->getPointerElementType(), Ptr->getType()}), 10015 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))}); 10016 } 10017 case NVPTX::BI__nvvm_match_all_sync_i32p: 10018 case NVPTX::BI__nvvm_match_all_sync_i64p: { 10019 Value *Mask = EmitScalarExpr(E->getArg(0)); 10020 Value *Val = EmitScalarExpr(E->getArg(1)); 10021 Address PredOutPtr = EmitPointerWithAlignment(E->getArg(2)); 10022 Value *ResultPair = Builder.CreateCall( 10023 CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p 10024 ? Intrinsic::nvvm_match_all_sync_i32p 10025 : Intrinsic::nvvm_match_all_sync_i64p), 10026 {Mask, Val}); 10027 Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1), 10028 PredOutPtr.getElementType()); 10029 Builder.CreateStore(Pred, PredOutPtr); 10030 return Builder.CreateExtractValue(ResultPair, 0); 10031 } 10032 case NVPTX::BI__hmma_m16n16k16_ld_a: 10033 case NVPTX::BI__hmma_m16n16k16_ld_b: 10034 case NVPTX::BI__hmma_m16n16k16_ld_c_f16: 10035 case NVPTX::BI__hmma_m16n16k16_ld_c_f32: { 10036 Address Dst = EmitPointerWithAlignment(E->getArg(0)); 10037 Value *Src = EmitScalarExpr(E->getArg(1)); 10038 Value *Ldm = EmitScalarExpr(E->getArg(2)); 10039 llvm::APSInt isColMajorArg; 10040 if (!E->getArg(3)->isIntegerConstantExpr(isColMajorArg, getContext())) 10041 return nullptr; 10042 bool isColMajor = isColMajorArg.getSExtValue(); 10043 unsigned IID; 10044 unsigned NumResults; 10045 switch (BuiltinID) { 10046 case NVPTX::BI__hmma_m16n16k16_ld_a: 10047 IID = isColMajor ? Intrinsic::nvvm_wmma_load_a_f16_col_stride 10048 : Intrinsic::nvvm_wmma_load_a_f16_row_stride; 10049 NumResults = 8; 10050 break; 10051 case NVPTX::BI__hmma_m16n16k16_ld_b: 10052 IID = isColMajor ? Intrinsic::nvvm_wmma_load_b_f16_col_stride 10053 : Intrinsic::nvvm_wmma_load_b_f16_row_stride; 10054 NumResults = 8; 10055 break; 10056 case NVPTX::BI__hmma_m16n16k16_ld_c_f16: 10057 IID = isColMajor ? Intrinsic::nvvm_wmma_load_c_f16_col_stride 10058 : Intrinsic::nvvm_wmma_load_c_f16_row_stride; 10059 NumResults = 4; 10060 break; 10061 case NVPTX::BI__hmma_m16n16k16_ld_c_f32: 10062 IID = isColMajor ? Intrinsic::nvvm_wmma_load_c_f32_col_stride 10063 : Intrinsic::nvvm_wmma_load_c_f32_row_stride; 10064 NumResults = 8; 10065 break; 10066 default: 10067 llvm_unreachable("Unexpected builtin ID."); 10068 } 10069 Value *Result = 10070 Builder.CreateCall(CGM.getIntrinsic(IID), 10071 {Builder.CreatePointerCast(Src, VoidPtrTy), Ldm}); 10072 10073 // Save returned values. 10074 for (unsigned i = 0; i < NumResults; ++i) { 10075 Builder.CreateAlignedStore( 10076 Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), 10077 Dst.getElementType()), 10078 Builder.CreateGEP(Dst.getPointer(), llvm::ConstantInt::get(IntTy, i)), 10079 CharUnits::fromQuantity(4)); 10080 } 10081 return Result; 10082 } 10083 10084 case NVPTX::BI__hmma_m16n16k16_st_c_f16: 10085 case NVPTX::BI__hmma_m16n16k16_st_c_f32: { 10086 Value *Dst = EmitScalarExpr(E->getArg(0)); 10087 Address Src = EmitPointerWithAlignment(E->getArg(1)); 10088 Value *Ldm = EmitScalarExpr(E->getArg(2)); 10089 llvm::APSInt isColMajorArg; 10090 if (!E->getArg(3)->isIntegerConstantExpr(isColMajorArg, getContext())) 10091 return nullptr; 10092 bool isColMajor = isColMajorArg.getSExtValue(); 10093 unsigned IID; 10094 unsigned NumResults = 8; 10095 // PTX Instructions (and LLVM instrinsics) are defined for slice _d_, yet 10096 // for some reason nvcc builtins use _c_. 10097 switch (BuiltinID) { 10098 case NVPTX::BI__hmma_m16n16k16_st_c_f16: 10099 IID = isColMajor ? Intrinsic::nvvm_wmma_store_d_f16_col_stride 10100 : Intrinsic::nvvm_wmma_store_d_f16_row_stride; 10101 NumResults = 4; 10102 break; 10103 case NVPTX::BI__hmma_m16n16k16_st_c_f32: 10104 IID = isColMajor ? Intrinsic::nvvm_wmma_store_d_f32_col_stride 10105 : Intrinsic::nvvm_wmma_store_d_f32_row_stride; 10106 break; 10107 default: 10108 llvm_unreachable("Unexpected builtin ID."); 10109 } 10110 Function *Intrinsic = CGM.getIntrinsic(IID); 10111 llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1); 10112 SmallVector<Value *, 10> Values; 10113 Values.push_back(Builder.CreatePointerCast(Dst, VoidPtrTy)); 10114 for (unsigned i = 0; i < NumResults; ++i) { 10115 Value *V = Builder.CreateAlignedLoad( 10116 Builder.CreateGEP(Src.getPointer(), llvm::ConstantInt::get(IntTy, i)), 10117 CharUnits::fromQuantity(4)); 10118 Values.push_back(Builder.CreateBitCast(V, ParamType)); 10119 } 10120 Values.push_back(Ldm); 10121 Value *Result = Builder.CreateCall(Intrinsic, Values); 10122 return Result; 10123 } 10124 10125 // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) 10126 // --> Intrinsic::nvvm_wmma_mma_sync<layout A,B><DType><CType><Satf> 10127 case NVPTX::BI__hmma_m16n16k16_mma_f16f16: 10128 case NVPTX::BI__hmma_m16n16k16_mma_f32f16: 10129 case NVPTX::BI__hmma_m16n16k16_mma_f32f32: 10130 case NVPTX::BI__hmma_m16n16k16_mma_f16f32: { 10131 Address Dst = EmitPointerWithAlignment(E->getArg(0)); 10132 Address SrcA = EmitPointerWithAlignment(E->getArg(1)); 10133 Address SrcB = EmitPointerWithAlignment(E->getArg(2)); 10134 Address SrcC = EmitPointerWithAlignment(E->getArg(3)); 10135 llvm::APSInt LayoutArg; 10136 if (!E->getArg(4)->isIntegerConstantExpr(LayoutArg, getContext())) 10137 return nullptr; 10138 int Layout = LayoutArg.getSExtValue(); 10139 if (Layout < 0 || Layout > 3) 10140 return nullptr; 10141 llvm::APSInt SatfArg; 10142 if (!E->getArg(5)->isIntegerConstantExpr(SatfArg, getContext())) 10143 return nullptr; 10144 bool Satf = SatfArg.getSExtValue(); 10145 10146 // clang-format off 10147 #define MMA_VARIANTS(type) {{ \ 10148 Intrinsic::nvvm_wmma_mma_sync_row_row_##type, \ 10149 Intrinsic::nvvm_wmma_mma_sync_row_row_##type##_satfinite, \ 10150 Intrinsic::nvvm_wmma_mma_sync_row_col_##type, \ 10151 Intrinsic::nvvm_wmma_mma_sync_row_col_##type##_satfinite, \ 10152 Intrinsic::nvvm_wmma_mma_sync_col_row_##type, \ 10153 Intrinsic::nvvm_wmma_mma_sync_col_row_##type##_satfinite, \ 10154 Intrinsic::nvvm_wmma_mma_sync_col_col_##type, \ 10155 Intrinsic::nvvm_wmma_mma_sync_col_col_##type##_satfinite \ 10156 }} 10157 // clang-format on 10158 10159 auto getMMAIntrinsic = [Layout, Satf](std::array<unsigned, 8> Variants) { 10160 unsigned Index = Layout * 2 + Satf; 10161 assert(Index < 8); 10162 return Variants[Index]; 10163 }; 10164 unsigned IID; 10165 unsigned NumEltsC; 10166 unsigned NumEltsD; 10167 switch (BuiltinID) { 10168 case NVPTX::BI__hmma_m16n16k16_mma_f16f16: 10169 IID = getMMAIntrinsic(MMA_VARIANTS(f16_f16)); 10170 NumEltsC = 4; 10171 NumEltsD = 4; 10172 break; 10173 case NVPTX::BI__hmma_m16n16k16_mma_f32f16: 10174 IID = getMMAIntrinsic(MMA_VARIANTS(f32_f16)); 10175 NumEltsC = 4; 10176 NumEltsD = 8; 10177 break; 10178 case NVPTX::BI__hmma_m16n16k16_mma_f16f32: 10179 IID = getMMAIntrinsic(MMA_VARIANTS(f16_f32)); 10180 NumEltsC = 8; 10181 NumEltsD = 4; 10182 break; 10183 case NVPTX::BI__hmma_m16n16k16_mma_f32f32: 10184 IID = getMMAIntrinsic(MMA_VARIANTS(f32_f32)); 10185 NumEltsC = 8; 10186 NumEltsD = 8; 10187 break; 10188 default: 10189 llvm_unreachable("Unexpected builtin ID."); 10190 } 10191 #undef MMA_VARIANTS 10192 10193 SmallVector<Value *, 24> Values; 10194 Function *Intrinsic = CGM.getIntrinsic(IID); 10195 llvm::Type *ABType = Intrinsic->getFunctionType()->getParamType(0); 10196 // Load A 10197 for (unsigned i = 0; i < 8; ++i) { 10198 Value *V = Builder.CreateAlignedLoad( 10199 Builder.CreateGEP(SrcA.getPointer(), 10200 llvm::ConstantInt::get(IntTy, i)), 10201 CharUnits::fromQuantity(4)); 10202 Values.push_back(Builder.CreateBitCast(V, ABType)); 10203 } 10204 // Load B 10205 for (unsigned i = 0; i < 8; ++i) { 10206 Value *V = Builder.CreateAlignedLoad( 10207 Builder.CreateGEP(SrcB.getPointer(), 10208 llvm::ConstantInt::get(IntTy, i)), 10209 CharUnits::fromQuantity(4)); 10210 Values.push_back(Builder.CreateBitCast(V, ABType)); 10211 } 10212 // Load C 10213 llvm::Type *CType = Intrinsic->getFunctionType()->getParamType(16); 10214 for (unsigned i = 0; i < NumEltsC; ++i) { 10215 Value *V = Builder.CreateAlignedLoad( 10216 Builder.CreateGEP(SrcC.getPointer(), 10217 llvm::ConstantInt::get(IntTy, i)), 10218 CharUnits::fromQuantity(4)); 10219 Values.push_back(Builder.CreateBitCast(V, CType)); 10220 } 10221 Value *Result = Builder.CreateCall(Intrinsic, Values); 10222 llvm::Type *DType = Dst.getElementType(); 10223 for (unsigned i = 0; i < NumEltsD; ++i) 10224 Builder.CreateAlignedStore( 10225 Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType), 10226 Builder.CreateGEP(Dst.getPointer(), llvm::ConstantInt::get(IntTy, i)), 10227 CharUnits::fromQuantity(4)); 10228 return Result; 10229 } 10230 default: 10231 return nullptr; 10232 } 10233 } 10234 10235 Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, 10236 const CallExpr *E) { 10237 switch (BuiltinID) { 10238 case WebAssembly::BI__builtin_wasm_current_memory: { 10239 llvm::Type *ResultType = ConvertType(E->getType()); 10240 Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType); 10241 return Builder.CreateCall(Callee); 10242 } 10243 case WebAssembly::BI__builtin_wasm_grow_memory: { 10244 Value *X = EmitScalarExpr(E->getArg(0)); 10245 Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_grow_memory, X->getType()); 10246 return Builder.CreateCall(Callee, X); 10247 } 10248 case WebAssembly::BI__builtin_wasm_throw: { 10249 Value *Tag = EmitScalarExpr(E->getArg(0)); 10250 Value *Obj = EmitScalarExpr(E->getArg(1)); 10251 Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw); 10252 return Builder.CreateCall(Callee, {Tag, Obj}); 10253 } 10254 case WebAssembly::BI__builtin_wasm_rethrow: { 10255 Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow); 10256 return Builder.CreateCall(Callee); 10257 } 10258 10259 default: 10260 return nullptr; 10261 } 10262 } 10263 10264 Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID, 10265 const CallExpr *E) { 10266 SmallVector<llvm::Value *, 4> Ops; 10267 Intrinsic::ID ID = Intrinsic::not_intrinsic; 10268 10269 switch (BuiltinID) { 10270 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry: 10271 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B: { 10272 Address Dest = EmitPointerWithAlignment(E->getArg(2)); 10273 unsigned Size; 10274 if (BuiltinID == Hexagon::BI__builtin_HEXAGON_V6_vaddcarry) { 10275 Size = 512; 10276 ID = Intrinsic::hexagon_V6_vaddcarry; 10277 } else { 10278 Size = 1024; 10279 ID = Intrinsic::hexagon_V6_vaddcarry_128B; 10280 } 10281 Dest = Builder.CreateBitCast(Dest, 10282 llvm::VectorType::get(Builder.getInt1Ty(), Size)->getPointerTo(0)); 10283 LoadInst *QLd = Builder.CreateLoad(Dest); 10284 Ops = { EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), QLd }; 10285 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID), Ops); 10286 llvm::Value *Vprd = Builder.CreateExtractValue(Result, 1); 10287 llvm::Value *Base = Builder.CreateBitCast(EmitScalarExpr(E->getArg(2)), 10288 Vprd->getType()->getPointerTo(0)); 10289 Builder.CreateAlignedStore(Vprd, Base, Dest.getAlignment()); 10290 return Builder.CreateExtractValue(Result, 0); 10291 } 10292 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry: 10293 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B: { 10294 Address Dest = EmitPointerWithAlignment(E->getArg(2)); 10295 unsigned Size; 10296 if (BuiltinID == Hexagon::BI__builtin_HEXAGON_V6_vsubcarry) { 10297 Size = 512; 10298 ID = Intrinsic::hexagon_V6_vsubcarry; 10299 } else { 10300 Size = 1024; 10301 ID = Intrinsic::hexagon_V6_vsubcarry_128B; 10302 } 10303 Dest = Builder.CreateBitCast(Dest, 10304 llvm::VectorType::get(Builder.getInt1Ty(), Size)->getPointerTo(0)); 10305 LoadInst *QLd = Builder.CreateLoad(Dest); 10306 Ops = { EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), QLd }; 10307 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID), Ops); 10308 llvm::Value *Vprd = Builder.CreateExtractValue(Result, 1); 10309 llvm::Value *Base = Builder.CreateBitCast(EmitScalarExpr(E->getArg(2)), 10310 Vprd->getType()->getPointerTo(0)); 10311 Builder.CreateAlignedStore(Vprd, Base, Dest.getAlignment()); 10312 return Builder.CreateExtractValue(Result, 0); 10313 } 10314 } // switch 10315 10316 return nullptr; 10317 } 10318