1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit Builtin calls as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCXXABI.h" 15 #include "CGObjCRuntime.h" 16 #include "CGOpenCLRuntime.h" 17 #include "CGRecordLayout.h" 18 #include "CodeGenFunction.h" 19 #include "CodeGenModule.h" 20 #include "ConstantEmitter.h" 21 #include "TargetInfo.h" 22 #include "clang/AST/ASTContext.h" 23 #include "clang/AST/Decl.h" 24 #include "clang/Analysis/Analyses/OSLog.h" 25 #include "clang/Basic/TargetBuiltins.h" 26 #include "clang/Basic/TargetInfo.h" 27 #include "clang/CodeGen/CGFunctionInfo.h" 28 #include "llvm/ADT/StringExtras.h" 29 #include "llvm/IR/CallSite.h" 30 #include "llvm/IR/DataLayout.h" 31 #include "llvm/IR/InlineAsm.h" 32 #include "llvm/IR/Intrinsics.h" 33 #include "llvm/IR/MDBuilder.h" 34 #include "llvm/Support/ConvertUTF.h" 35 #include "llvm/Support/ScopedPrinter.h" 36 #include "llvm/Support/TargetParser.h" 37 #include <sstream> 38 39 using namespace clang; 40 using namespace CodeGen; 41 using namespace llvm; 42 43 static 44 int64_t clamp(int64_t Value, int64_t Low, int64_t High) { 45 return std::min(High, std::max(Low, Value)); 46 } 47 48 /// getBuiltinLibFunction - Given a builtin id for a function like 49 /// "__builtin_fabsf", return a Function* for "fabsf". 50 llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD, 51 unsigned BuiltinID) { 52 assert(Context.BuiltinInfo.isLibFunction(BuiltinID)); 53 54 // Get the name, skip over the __builtin_ prefix (if necessary). 55 StringRef Name; 56 GlobalDecl D(FD); 57 58 // If the builtin has been declared explicitly with an assembler label, 59 // use the mangled name. This differs from the plain label on platforms 60 // that prefix labels. 61 if (FD->hasAttr<AsmLabelAttr>()) 62 Name = getMangledName(D); 63 else 64 Name = Context.BuiltinInfo.getName(BuiltinID) + 10; 65 66 llvm::FunctionType *Ty = 67 cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType())); 68 69 return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false); 70 } 71 72 /// Emit the conversions required to turn the given value into an 73 /// integer of the given size. 74 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V, 75 QualType T, llvm::IntegerType *IntType) { 76 V = CGF.EmitToMemory(V, T); 77 78 if (V->getType()->isPointerTy()) 79 return CGF.Builder.CreatePtrToInt(V, IntType); 80 81 assert(V->getType() == IntType); 82 return V; 83 } 84 85 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, 86 QualType T, llvm::Type *ResultType) { 87 V = CGF.EmitFromMemory(V, T); 88 89 if (ResultType->isPointerTy()) 90 return CGF.Builder.CreateIntToPtr(V, ResultType); 91 92 assert(V->getType() == ResultType); 93 return V; 94 } 95 96 /// Utility to insert an atomic instruction based on Instrinsic::ID 97 /// and the expression node. 98 static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF, 99 llvm::AtomicRMWInst::BinOp Kind, 100 const CallExpr *E) { 101 QualType T = E->getType(); 102 assert(E->getArg(0)->getType()->isPointerType()); 103 assert(CGF.getContext().hasSameUnqualifiedType(T, 104 E->getArg(0)->getType()->getPointeeType())); 105 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); 106 107 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 108 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 109 110 llvm::IntegerType *IntType = 111 llvm::IntegerType::get(CGF.getLLVMContext(), 112 CGF.getContext().getTypeSize(T)); 113 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 114 115 llvm::Value *Args[2]; 116 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 117 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 118 llvm::Type *ValueType = Args[1]->getType(); 119 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 120 121 llvm::Value *Result = CGF.Builder.CreateAtomicRMW( 122 Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent); 123 return EmitFromInt(CGF, Result, T, ValueType); 124 } 125 126 static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) { 127 Value *Val = CGF.EmitScalarExpr(E->getArg(0)); 128 Value *Address = CGF.EmitScalarExpr(E->getArg(1)); 129 130 // Convert the type of the pointer to a pointer to the stored type. 131 Val = CGF.EmitToMemory(Val, E->getArg(0)->getType()); 132 Value *BC = CGF.Builder.CreateBitCast( 133 Address, llvm::PointerType::getUnqual(Val->getType()), "cast"); 134 LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType()); 135 LV.setNontemporal(true); 136 CGF.EmitStoreOfScalar(Val, LV, false); 137 return nullptr; 138 } 139 140 static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) { 141 Value *Address = CGF.EmitScalarExpr(E->getArg(0)); 142 143 LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType()); 144 LV.setNontemporal(true); 145 return CGF.EmitLoadOfScalar(LV, E->getExprLoc()); 146 } 147 148 static RValue EmitBinaryAtomic(CodeGenFunction &CGF, 149 llvm::AtomicRMWInst::BinOp Kind, 150 const CallExpr *E) { 151 return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E)); 152 } 153 154 /// Utility to insert an atomic instruction based Instrinsic::ID and 155 /// the expression node, where the return value is the result of the 156 /// operation. 157 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, 158 llvm::AtomicRMWInst::BinOp Kind, 159 const CallExpr *E, 160 Instruction::BinaryOps Op, 161 bool Invert = false) { 162 QualType T = E->getType(); 163 assert(E->getArg(0)->getType()->isPointerType()); 164 assert(CGF.getContext().hasSameUnqualifiedType(T, 165 E->getArg(0)->getType()->getPointeeType())); 166 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); 167 168 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 169 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 170 171 llvm::IntegerType *IntType = 172 llvm::IntegerType::get(CGF.getLLVMContext(), 173 CGF.getContext().getTypeSize(T)); 174 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 175 176 llvm::Value *Args[2]; 177 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 178 llvm::Type *ValueType = Args[1]->getType(); 179 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 180 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 181 182 llvm::Value *Result = CGF.Builder.CreateAtomicRMW( 183 Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent); 184 Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]); 185 if (Invert) 186 Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result, 187 llvm::ConstantInt::get(IntType, -1)); 188 Result = EmitFromInt(CGF, Result, T, ValueType); 189 return RValue::get(Result); 190 } 191 192 /// @brief Utility to insert an atomic cmpxchg instruction. 193 /// 194 /// @param CGF The current codegen function. 195 /// @param E Builtin call expression to convert to cmpxchg. 196 /// arg0 - address to operate on 197 /// arg1 - value to compare with 198 /// arg2 - new value 199 /// @param ReturnBool Specifies whether to return success flag of 200 /// cmpxchg result or the old value. 201 /// 202 /// @returns result of cmpxchg, according to ReturnBool 203 static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, 204 bool ReturnBool) { 205 QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType(); 206 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 207 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 208 209 llvm::IntegerType *IntType = llvm::IntegerType::get( 210 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T)); 211 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 212 213 Value *Args[3]; 214 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 215 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 216 llvm::Type *ValueType = Args[1]->getType(); 217 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 218 Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType); 219 220 Value *Pair = CGF.Builder.CreateAtomicCmpXchg( 221 Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent, 222 llvm::AtomicOrdering::SequentiallyConsistent); 223 if (ReturnBool) 224 // Extract boolean success flag and zext it to int. 225 return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1), 226 CGF.ConvertType(E->getType())); 227 else 228 // Extract old value and emit it using the same type as compare value. 229 return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T, 230 ValueType); 231 } 232 233 // Emit a simple mangled intrinsic that has 1 argument and a return type 234 // matching the argument type. 235 static Value *emitUnaryBuiltin(CodeGenFunction &CGF, 236 const CallExpr *E, 237 unsigned IntrinsicID) { 238 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 239 240 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 241 return CGF.Builder.CreateCall(F, Src0); 242 } 243 244 // Emit an intrinsic that has 2 operands of the same type as its result. 245 static Value *emitBinaryBuiltin(CodeGenFunction &CGF, 246 const CallExpr *E, 247 unsigned IntrinsicID) { 248 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 249 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 250 251 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 252 return CGF.Builder.CreateCall(F, { Src0, Src1 }); 253 } 254 255 // Emit an intrinsic that has 3 operands of the same type as its result. 256 static Value *emitTernaryBuiltin(CodeGenFunction &CGF, 257 const CallExpr *E, 258 unsigned IntrinsicID) { 259 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 260 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 261 llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); 262 263 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 264 return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 }); 265 } 266 267 // Emit an intrinsic that has 1 float or double operand, and 1 integer. 268 static Value *emitFPIntBuiltin(CodeGenFunction &CGF, 269 const CallExpr *E, 270 unsigned IntrinsicID) { 271 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 272 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 273 274 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 275 return CGF.Builder.CreateCall(F, {Src0, Src1}); 276 } 277 278 /// EmitFAbs - Emit a call to @llvm.fabs(). 279 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) { 280 Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType()); 281 llvm::CallInst *Call = CGF.Builder.CreateCall(F, V); 282 Call->setDoesNotAccessMemory(); 283 return Call; 284 } 285 286 /// Emit the computation of the sign bit for a floating point value. Returns 287 /// the i1 sign bit value. 288 static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) { 289 LLVMContext &C = CGF.CGM.getLLVMContext(); 290 291 llvm::Type *Ty = V->getType(); 292 int Width = Ty->getPrimitiveSizeInBits(); 293 llvm::Type *IntTy = llvm::IntegerType::get(C, Width); 294 V = CGF.Builder.CreateBitCast(V, IntTy); 295 if (Ty->isPPC_FP128Ty()) { 296 // We want the sign bit of the higher-order double. The bitcast we just 297 // did works as if the double-double was stored to memory and then 298 // read as an i128. The "store" will put the higher-order double in the 299 // lower address in both little- and big-Endian modes, but the "load" 300 // will treat those bits as a different part of the i128: the low bits in 301 // little-Endian, the high bits in big-Endian. Therefore, on big-Endian 302 // we need to shift the high bits down to the low before truncating. 303 Width >>= 1; 304 if (CGF.getTarget().isBigEndian()) { 305 Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width); 306 V = CGF.Builder.CreateLShr(V, ShiftCst); 307 } 308 // We are truncating value in order to extract the higher-order 309 // double, which we will be using to extract the sign from. 310 IntTy = llvm::IntegerType::get(C, Width); 311 V = CGF.Builder.CreateTrunc(V, IntTy); 312 } 313 Value *Zero = llvm::Constant::getNullValue(IntTy); 314 return CGF.Builder.CreateICmpSLT(V, Zero); 315 } 316 317 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD, 318 const CallExpr *E, llvm::Constant *calleeValue) { 319 CGCallee callee = CGCallee::forDirect(calleeValue, FD); 320 return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot()); 321 } 322 323 /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.* 324 /// depending on IntrinsicID. 325 /// 326 /// \arg CGF The current codegen function. 327 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate. 328 /// \arg X The first argument to the llvm.*.with.overflow.*. 329 /// \arg Y The second argument to the llvm.*.with.overflow.*. 330 /// \arg Carry The carry returned by the llvm.*.with.overflow.*. 331 /// \returns The result (i.e. sum/product) returned by the intrinsic. 332 static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF, 333 const llvm::Intrinsic::ID IntrinsicID, 334 llvm::Value *X, llvm::Value *Y, 335 llvm::Value *&Carry) { 336 // Make sure we have integers of the same width. 337 assert(X->getType() == Y->getType() && 338 "Arguments must be the same type. (Did you forget to make sure both " 339 "arguments have the same integer width?)"); 340 341 llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType()); 342 llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y}); 343 Carry = CGF.Builder.CreateExtractValue(Tmp, 1); 344 return CGF.Builder.CreateExtractValue(Tmp, 0); 345 } 346 347 static Value *emitRangedBuiltin(CodeGenFunction &CGF, 348 unsigned IntrinsicID, 349 int low, int high) { 350 llvm::MDBuilder MDHelper(CGF.getLLVMContext()); 351 llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high)); 352 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {}); 353 llvm::Instruction *Call = CGF.Builder.CreateCall(F); 354 Call->setMetadata(llvm::LLVMContext::MD_range, RNode); 355 return Call; 356 } 357 358 namespace { 359 struct WidthAndSignedness { 360 unsigned Width; 361 bool Signed; 362 }; 363 } 364 365 static WidthAndSignedness 366 getIntegerWidthAndSignedness(const clang::ASTContext &context, 367 const clang::QualType Type) { 368 assert(Type->isIntegerType() && "Given type is not an integer."); 369 unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width; 370 bool Signed = Type->isSignedIntegerType(); 371 return {Width, Signed}; 372 } 373 374 // Given one or more integer types, this function produces an integer type that 375 // encompasses them: any value in one of the given types could be expressed in 376 // the encompassing type. 377 static struct WidthAndSignedness 378 EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) { 379 assert(Types.size() > 0 && "Empty list of types."); 380 381 // If any of the given types is signed, we must return a signed type. 382 bool Signed = false; 383 for (const auto &Type : Types) { 384 Signed |= Type.Signed; 385 } 386 387 // The encompassing type must have a width greater than or equal to the width 388 // of the specified types. Additionally, if the encompassing type is signed, 389 // its width must be strictly greater than the width of any unsigned types 390 // given. 391 unsigned Width = 0; 392 for (const auto &Type : Types) { 393 unsigned MinWidth = Type.Width + (Signed && !Type.Signed); 394 if (Width < MinWidth) { 395 Width = MinWidth; 396 } 397 } 398 399 return {Width, Signed}; 400 } 401 402 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) { 403 llvm::Type *DestType = Int8PtrTy; 404 if (ArgValue->getType() != DestType) 405 ArgValue = 406 Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data()); 407 408 Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend; 409 return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue); 410 } 411 412 /// Checks if using the result of __builtin_object_size(p, @p From) in place of 413 /// __builtin_object_size(p, @p To) is correct 414 static bool areBOSTypesCompatible(int From, int To) { 415 // Note: Our __builtin_object_size implementation currently treats Type=0 and 416 // Type=2 identically. Encoding this implementation detail here may make 417 // improving __builtin_object_size difficult in the future, so it's omitted. 418 return From == To || (From == 0 && To == 1) || (From == 3 && To == 2); 419 } 420 421 static llvm::Value * 422 getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) { 423 return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true); 424 } 425 426 llvm::Value * 427 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type, 428 llvm::IntegerType *ResType, 429 llvm::Value *EmittedE) { 430 uint64_t ObjectSize; 431 if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type)) 432 return emitBuiltinObjectSize(E, Type, ResType, EmittedE); 433 return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true); 434 } 435 436 /// Returns a Value corresponding to the size of the given expression. 437 /// This Value may be either of the following: 438 /// - A llvm::Argument (if E is a param with the pass_object_size attribute on 439 /// it) 440 /// - A call to the @llvm.objectsize intrinsic 441 /// 442 /// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null 443 /// and we wouldn't otherwise try to reference a pass_object_size parameter, 444 /// we'll call @llvm.objectsize on EmittedE, rather than emitting E. 445 llvm::Value * 446 CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type, 447 llvm::IntegerType *ResType, 448 llvm::Value *EmittedE) { 449 // We need to reference an argument if the pointer is a parameter with the 450 // pass_object_size attribute. 451 if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) { 452 auto *Param = dyn_cast<ParmVarDecl>(D->getDecl()); 453 auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>(); 454 if (Param != nullptr && PS != nullptr && 455 areBOSTypesCompatible(PS->getType(), Type)) { 456 auto Iter = SizeArguments.find(Param); 457 assert(Iter != SizeArguments.end()); 458 459 const ImplicitParamDecl *D = Iter->second; 460 auto DIter = LocalDeclMap.find(D); 461 assert(DIter != LocalDeclMap.end()); 462 463 return EmitLoadOfScalar(DIter->second, /*volatile=*/false, 464 getContext().getSizeType(), E->getLocStart()); 465 } 466 } 467 468 // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't 469 // evaluate E for side-effects. In either case, we shouldn't lower to 470 // @llvm.objectsize. 471 if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext()))) 472 return getDefaultBuiltinObjectSizeResult(Type, ResType); 473 474 Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E); 475 assert(Ptr->getType()->isPointerTy() && 476 "Non-pointer passed to __builtin_object_size?"); 477 478 Value *F = CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()}); 479 480 // LLVM only supports 0 and 2, make sure that we pass along that as a boolean. 481 Value *Min = Builder.getInt1((Type & 2) != 0); 482 // For GCC compatibility, __builtin_object_size treat NULL as unknown size. 483 Value *NullIsUnknown = Builder.getTrue(); 484 return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown}); 485 } 486 487 // Many of MSVC builtins are on both x64 and ARM; to avoid repeating code, we 488 // handle them here. 489 enum class CodeGenFunction::MSVCIntrin { 490 _BitScanForward, 491 _BitScanReverse, 492 _InterlockedAnd, 493 _InterlockedDecrement, 494 _InterlockedExchange, 495 _InterlockedExchangeAdd, 496 _InterlockedExchangeSub, 497 _InterlockedIncrement, 498 _InterlockedOr, 499 _InterlockedXor, 500 _interlockedbittestandset, 501 __fastfail, 502 }; 503 504 Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, 505 const CallExpr *E) { 506 switch (BuiltinID) { 507 case MSVCIntrin::_BitScanForward: 508 case MSVCIntrin::_BitScanReverse: { 509 Value *ArgValue = EmitScalarExpr(E->getArg(1)); 510 511 llvm::Type *ArgType = ArgValue->getType(); 512 llvm::Type *IndexType = 513 EmitScalarExpr(E->getArg(0))->getType()->getPointerElementType(); 514 llvm::Type *ResultType = ConvertType(E->getType()); 515 516 Value *ArgZero = llvm::Constant::getNullValue(ArgType); 517 Value *ResZero = llvm::Constant::getNullValue(ResultType); 518 Value *ResOne = llvm::ConstantInt::get(ResultType, 1); 519 520 BasicBlock *Begin = Builder.GetInsertBlock(); 521 BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn); 522 Builder.SetInsertPoint(End); 523 PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result"); 524 525 Builder.SetInsertPoint(Begin); 526 Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero); 527 BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn); 528 Builder.CreateCondBr(IsZero, End, NotZero); 529 Result->addIncoming(ResZero, Begin); 530 531 Builder.SetInsertPoint(NotZero); 532 Address IndexAddress = EmitPointerWithAlignment(E->getArg(0)); 533 534 if (BuiltinID == MSVCIntrin::_BitScanForward) { 535 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 536 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()}); 537 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false); 538 Builder.CreateStore(ZeroCount, IndexAddress, false); 539 } else { 540 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth(); 541 Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1); 542 543 Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); 544 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()}); 545 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false); 546 Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount); 547 Builder.CreateStore(Index, IndexAddress, false); 548 } 549 Builder.CreateBr(End); 550 Result->addIncoming(ResOne, NotZero); 551 552 Builder.SetInsertPoint(End); 553 return Result; 554 } 555 case MSVCIntrin::_InterlockedAnd: 556 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E); 557 case MSVCIntrin::_InterlockedExchange: 558 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E); 559 case MSVCIntrin::_InterlockedExchangeAdd: 560 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E); 561 case MSVCIntrin::_InterlockedExchangeSub: 562 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E); 563 case MSVCIntrin::_InterlockedOr: 564 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E); 565 case MSVCIntrin::_InterlockedXor: 566 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E); 567 568 case MSVCIntrin::_interlockedbittestandset: { 569 llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); 570 llvm::Value *Bit = EmitScalarExpr(E->getArg(1)); 571 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 572 AtomicRMWInst::Or, Addr, 573 Builder.CreateShl(ConstantInt::get(Bit->getType(), 1), Bit), 574 llvm::AtomicOrdering::SequentiallyConsistent); 575 // Shift the relevant bit to the least significant position, truncate to 576 // the result type, and test the low bit. 577 llvm::Value *Shifted = Builder.CreateLShr(RMWI, Bit); 578 llvm::Value *Truncated = 579 Builder.CreateTrunc(Shifted, ConvertType(E->getType())); 580 return Builder.CreateAnd(Truncated, 581 ConstantInt::get(Truncated->getType(), 1)); 582 } 583 584 case MSVCIntrin::_InterlockedDecrement: { 585 llvm::Type *IntTy = ConvertType(E->getType()); 586 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 587 AtomicRMWInst::Sub, 588 EmitScalarExpr(E->getArg(0)), 589 ConstantInt::get(IntTy, 1), 590 llvm::AtomicOrdering::SequentiallyConsistent); 591 return Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1)); 592 } 593 case MSVCIntrin::_InterlockedIncrement: { 594 llvm::Type *IntTy = ConvertType(E->getType()); 595 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 596 AtomicRMWInst::Add, 597 EmitScalarExpr(E->getArg(0)), 598 ConstantInt::get(IntTy, 1), 599 llvm::AtomicOrdering::SequentiallyConsistent); 600 return Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1)); 601 } 602 603 case MSVCIntrin::__fastfail: { 604 // Request immediate process termination from the kernel. The instruction 605 // sequences to do this are documented on MSDN: 606 // https://msdn.microsoft.com/en-us/library/dn774154.aspx 607 llvm::Triple::ArchType ISA = getTarget().getTriple().getArch(); 608 StringRef Asm, Constraints; 609 switch (ISA) { 610 default: 611 ErrorUnsupported(E, "__fastfail call for this architecture"); 612 break; 613 case llvm::Triple::x86: 614 case llvm::Triple::x86_64: 615 Asm = "int $$0x29"; 616 Constraints = "{cx}"; 617 break; 618 case llvm::Triple::thumb: 619 Asm = "udf #251"; 620 Constraints = "{r0}"; 621 break; 622 } 623 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false); 624 llvm::InlineAsm *IA = 625 llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true); 626 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get( 627 getLLVMContext(), llvm::AttributeList::FunctionIndex, 628 llvm::Attribute::NoReturn); 629 CallSite CS = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0))); 630 CS.setAttributes(NoReturnAttr); 631 return CS.getInstruction(); 632 } 633 } 634 llvm_unreachable("Incorrect MSVC intrinsic!"); 635 } 636 637 namespace { 638 // ARC cleanup for __builtin_os_log_format 639 struct CallObjCArcUse final : EHScopeStack::Cleanup { 640 CallObjCArcUse(llvm::Value *object) : object(object) {} 641 llvm::Value *object; 642 643 void Emit(CodeGenFunction &CGF, Flags flags) override { 644 CGF.EmitARCIntrinsicUse(object); 645 } 646 }; 647 } 648 649 Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E, 650 BuiltinCheckKind Kind) { 651 assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero) 652 && "Unsupported builtin check kind"); 653 654 Value *ArgValue = EmitScalarExpr(E); 655 if (!SanOpts.has(SanitizerKind::Builtin) || !getTarget().isCLZForZeroUndef()) 656 return ArgValue; 657 658 SanitizerScope SanScope(this); 659 Value *Cond = Builder.CreateICmpNE( 660 ArgValue, llvm::Constant::getNullValue(ArgValue->getType())); 661 EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin), 662 SanitizerHandler::InvalidBuiltin, 663 {EmitCheckSourceLocation(E->getExprLoc()), 664 llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)}, 665 None); 666 return ArgValue; 667 } 668 669 /// Get the argument type for arguments to os_log_helper. 670 static CanQualType getOSLogArgType(ASTContext &C, int Size) { 671 QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false); 672 return C.getCanonicalType(UnsignedTy); 673 } 674 675 llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction( 676 const analyze_os_log::OSLogBufferLayout &Layout, 677 CharUnits BufferAlignment) { 678 ASTContext &Ctx = getContext(); 679 680 llvm::SmallString<64> Name; 681 { 682 raw_svector_ostream OS(Name); 683 OS << "__os_log_helper"; 684 OS << "_" << BufferAlignment.getQuantity(); 685 OS << "_" << int(Layout.getSummaryByte()); 686 OS << "_" << int(Layout.getNumArgsByte()); 687 for (const auto &Item : Layout.Items) 688 OS << "_" << int(Item.getSizeByte()) << "_" 689 << int(Item.getDescriptorByte()); 690 } 691 692 if (llvm::Function *F = CGM.getModule().getFunction(Name)) 693 return F; 694 695 llvm::SmallVector<ImplicitParamDecl, 4> Params; 696 Params.emplace_back(Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"), 697 Ctx.VoidPtrTy, ImplicitParamDecl::Other); 698 699 for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) { 700 char Size = Layout.Items[I].getSizeByte(); 701 if (!Size) 702 continue; 703 704 Params.emplace_back( 705 Ctx, nullptr, SourceLocation(), 706 &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), 707 getOSLogArgType(Ctx, Size), ImplicitParamDecl::Other); 708 } 709 710 FunctionArgList Args; 711 for (auto &P : Params) 712 Args.push_back(&P); 713 714 // The helper function has linkonce_odr linkage to enable the linker to merge 715 // identical functions. To ensure the merging always happens, 'noinline' is 716 // attached to the function when compiling with -Oz. 717 const CGFunctionInfo &FI = 718 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args); 719 llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI); 720 llvm::Function *Fn = llvm::Function::Create( 721 FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule()); 722 Fn->setVisibility(llvm::GlobalValue::HiddenVisibility); 723 CGM.SetLLVMFunctionAttributes(nullptr, FI, Fn); 724 CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn); 725 726 // Attach 'noinline' at -Oz. 727 if (CGM.getCodeGenOpts().OptimizeSize == 2) 728 Fn->addFnAttr(llvm::Attribute::NoInline); 729 730 auto NL = ApplyDebugLocation::CreateEmpty(*this); 731 IdentifierInfo *II = &Ctx.Idents.get(Name); 732 FunctionDecl *FD = FunctionDecl::Create( 733 Ctx, Ctx.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II, 734 Ctx.VoidTy, nullptr, SC_PrivateExtern, false, false); 735 736 StartFunction(FD, Ctx.VoidTy, Fn, FI, Args); 737 738 // Create a scope with an artificial location for the body of this function. 739 auto AL = ApplyDebugLocation::CreateArtificial(*this); 740 741 CharUnits Offset; 742 Address BufAddr(Builder.CreateLoad(GetAddrOfLocalVar(&Params[0]), "buf"), 743 BufferAlignment); 744 Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()), 745 Builder.CreateConstByteGEP(BufAddr, Offset++, "summary")); 746 Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()), 747 Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs")); 748 749 unsigned I = 1; 750 for (const auto &Item : Layout.Items) { 751 Builder.CreateStore( 752 Builder.getInt8(Item.getDescriptorByte()), 753 Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor")); 754 Builder.CreateStore( 755 Builder.getInt8(Item.getSizeByte()), 756 Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize")); 757 758 CharUnits Size = Item.size(); 759 if (!Size.getQuantity()) 760 continue; 761 762 Address Arg = GetAddrOfLocalVar(&Params[I]); 763 Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData"); 764 Addr = Builder.CreateBitCast(Addr, Arg.getPointer()->getType(), 765 "argDataCast"); 766 Builder.CreateStore(Builder.CreateLoad(Arg), Addr); 767 Offset += Size; 768 ++I; 769 } 770 771 FinishFunction(); 772 773 return Fn; 774 } 775 776 RValue CodeGenFunction::emitBuiltinOSLogFormat(const CallExpr &E) { 777 assert(E.getNumArgs() >= 2 && 778 "__builtin_os_log_format takes at least 2 arguments"); 779 ASTContext &Ctx = getContext(); 780 analyze_os_log::OSLogBufferLayout Layout; 781 analyze_os_log::computeOSLogBufferLayout(Ctx, &E, Layout); 782 Address BufAddr = EmitPointerWithAlignment(E.getArg(0)); 783 llvm::SmallVector<llvm::Value *, 4> RetainableOperands; 784 785 // Ignore argument 1, the format string. It is not currently used. 786 CallArgList Args; 787 Args.add(RValue::get(BufAddr.getPointer()), Ctx.VoidPtrTy); 788 789 for (const auto &Item : Layout.Items) { 790 int Size = Item.getSizeByte(); 791 if (!Size) 792 continue; 793 794 llvm::Value *ArgVal; 795 796 if (const Expr *TheExpr = Item.getExpr()) { 797 ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false); 798 799 // Check if this is a retainable type. 800 if (TheExpr->getType()->isObjCRetainableType()) { 801 assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar && 802 "Only scalar can be a ObjC retainable type"); 803 // Check if the object is constant, if not, save it in 804 // RetainableOperands. 805 if (!isa<Constant>(ArgVal)) 806 RetainableOperands.push_back(ArgVal); 807 } 808 } else { 809 ArgVal = Builder.getInt32(Item.getConstValue().getQuantity()); 810 } 811 812 unsigned ArgValSize = 813 CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType()); 814 llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(), 815 ArgValSize); 816 ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy); 817 CanQualType ArgTy = getOSLogArgType(Ctx, Size); 818 // If ArgVal has type x86_fp80, zero-extend ArgVal. 819 ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy)); 820 Args.add(RValue::get(ArgVal), ArgTy); 821 } 822 823 const CGFunctionInfo &FI = 824 CGM.getTypes().arrangeBuiltinFunctionCall(Ctx.VoidTy, Args); 825 llvm::Function *F = CodeGenFunction(CGM).generateBuiltinOSLogHelperFunction( 826 Layout, BufAddr.getAlignment()); 827 EmitCall(FI, CGCallee::forDirect(F), ReturnValueSlot(), Args); 828 829 // Push a clang.arc.use cleanup for each object in RetainableOperands. The 830 // cleanup will cause the use to appear after the final log call, keeping 831 // the object valid while it’s held in the log buffer. Note that if there’s 832 // a release cleanup on the object, it will already be active; since 833 // cleanups are emitted in reverse order, the use will occur before the 834 // object is released. 835 if (!RetainableOperands.empty() && getLangOpts().ObjCAutoRefCount && 836 CGM.getCodeGenOpts().OptimizationLevel != 0) 837 for (llvm::Value *Object : RetainableOperands) 838 pushFullExprCleanup<CallObjCArcUse>(getARCCleanupKind(), Object); 839 840 return RValue::get(BufAddr.getPointer()); 841 } 842 843 /// Determine if a binop is a checked mixed-sign multiply we can specialize. 844 static bool isSpecialMixedSignMultiply(unsigned BuiltinID, 845 WidthAndSignedness Op1Info, 846 WidthAndSignedness Op2Info, 847 WidthAndSignedness ResultInfo) { 848 return BuiltinID == Builtin::BI__builtin_mul_overflow && 849 Op1Info.Width == Op2Info.Width && Op1Info.Width >= ResultInfo.Width && 850 Op1Info.Signed != Op2Info.Signed; 851 } 852 853 /// Emit a checked mixed-sign multiply. This is a cheaper specialization of 854 /// the generic checked-binop irgen. 855 static RValue 856 EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1, 857 WidthAndSignedness Op1Info, const clang::Expr *Op2, 858 WidthAndSignedness Op2Info, 859 const clang::Expr *ResultArg, QualType ResultQTy, 860 WidthAndSignedness ResultInfo) { 861 assert(isSpecialMixedSignMultiply(Builtin::BI__builtin_mul_overflow, Op1Info, 862 Op2Info, ResultInfo) && 863 "Not a mixed-sign multipliction we can specialize"); 864 865 // Emit the signed and unsigned operands. 866 const clang::Expr *SignedOp = Op1Info.Signed ? Op1 : Op2; 867 const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1; 868 llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp); 869 llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp); 870 871 llvm::Type *OpTy = Signed->getType(); 872 llvm::Value *Zero = llvm::Constant::getNullValue(OpTy); 873 Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg); 874 llvm::Type *ResTy = ResultPtr.getElementType(); 875 876 // Take the absolute value of the signed operand. 877 llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero); 878 llvm::Value *AbsOfNegative = CGF.Builder.CreateSub(Zero, Signed); 879 llvm::Value *AbsSigned = 880 CGF.Builder.CreateSelect(IsNegative, AbsOfNegative, Signed); 881 882 // Perform a checked unsigned multiplication. 883 llvm::Value *UnsignedOverflow; 884 llvm::Value *UnsignedResult = 885 EmitOverflowIntrinsic(CGF, llvm::Intrinsic::umul_with_overflow, AbsSigned, 886 Unsigned, UnsignedOverflow); 887 888 llvm::Value *Overflow, *Result; 889 if (ResultInfo.Signed) { 890 // Signed overflow occurs if the result is greater than INT_MAX or lesser 891 // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative). 892 auto IntMax = llvm::APInt::getSignedMaxValue(ResultInfo.Width) 893 .zextOrSelf(Op1Info.Width); 894 llvm::Value *MaxResult = 895 CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax), 896 CGF.Builder.CreateZExt(IsNegative, OpTy)); 897 llvm::Value *SignedOverflow = 898 CGF.Builder.CreateICmpUGT(UnsignedResult, MaxResult); 899 Overflow = CGF.Builder.CreateOr(UnsignedOverflow, SignedOverflow); 900 901 // Prepare the signed result (possibly by negating it). 902 llvm::Value *NegativeResult = CGF.Builder.CreateNeg(UnsignedResult); 903 llvm::Value *SignedResult = 904 CGF.Builder.CreateSelect(IsNegative, NegativeResult, UnsignedResult); 905 Result = CGF.Builder.CreateTrunc(SignedResult, ResTy); 906 } else { 907 // Unsigned overflow occurs if the result is < 0 or greater than UINT_MAX. 908 llvm::Value *Underflow = CGF.Builder.CreateAnd( 909 IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult)); 910 Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow); 911 if (ResultInfo.Width < Op1Info.Width) { 912 auto IntMax = 913 llvm::APInt::getMaxValue(ResultInfo.Width).zext(Op1Info.Width); 914 llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT( 915 UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax)); 916 Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow); 917 } 918 919 // Negate the product if it would be negative in infinite precision. 920 Result = CGF.Builder.CreateSelect( 921 IsNegative, CGF.Builder.CreateNeg(UnsignedResult), UnsignedResult); 922 923 Result = CGF.Builder.CreateTrunc(Result, ResTy); 924 } 925 assert(Overflow && Result && "Missing overflow or result"); 926 927 bool isVolatile = 928 ResultArg->getType()->getPointeeType().isVolatileQualified(); 929 CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr, 930 isVolatile); 931 return RValue::get(Overflow); 932 } 933 934 static llvm::Value *dumpRecord(CodeGenFunction &CGF, QualType RType, 935 Value *&RecordPtr, CharUnits Align, Value *Func, 936 int Lvl) { 937 const auto *RT = RType->getAs<RecordType>(); 938 ASTContext &Context = CGF.getContext(); 939 RecordDecl *RD = RT->getDecl()->getDefinition(); 940 ASTContext &Ctx = RD->getASTContext(); 941 const ASTRecordLayout &RL = Ctx.getASTRecordLayout(RD); 942 std::string Pad = std::string(Lvl * 4, ' '); 943 944 Value *GString = 945 CGF.Builder.CreateGlobalStringPtr(RType.getAsString() + " {\n"); 946 Value *Res = CGF.Builder.CreateCall(Func, {GString}); 947 948 static llvm::DenseMap<QualType, const char *> Types; 949 if (Types.empty()) { 950 Types[Context.CharTy] = "%c"; 951 Types[Context.BoolTy] = "%d"; 952 Types[Context.SignedCharTy] = "%hhd"; 953 Types[Context.UnsignedCharTy] = "%hhu"; 954 Types[Context.IntTy] = "%d"; 955 Types[Context.UnsignedIntTy] = "%u"; 956 Types[Context.LongTy] = "%ld"; 957 Types[Context.UnsignedLongTy] = "%lu"; 958 Types[Context.LongLongTy] = "%lld"; 959 Types[Context.UnsignedLongLongTy] = "%llu"; 960 Types[Context.ShortTy] = "%hd"; 961 Types[Context.UnsignedShortTy] = "%hu"; 962 Types[Context.VoidPtrTy] = "%p"; 963 Types[Context.FloatTy] = "%f"; 964 Types[Context.DoubleTy] = "%f"; 965 Types[Context.LongDoubleTy] = "%Lf"; 966 Types[Context.getPointerType(Context.CharTy)] = "%s"; 967 Types[Context.getPointerType(Context.getConstType(Context.CharTy))] = "%s"; 968 } 969 970 for (const auto *FD : RD->fields()) { 971 uint64_t Off = RL.getFieldOffset(FD->getFieldIndex()); 972 Off = Ctx.toCharUnitsFromBits(Off).getQuantity(); 973 974 Value *FieldPtr = RecordPtr; 975 if (RD->isUnion()) 976 FieldPtr = CGF.Builder.CreatePointerCast( 977 FieldPtr, CGF.ConvertType(Context.getPointerType(FD->getType()))); 978 else 979 FieldPtr = CGF.Builder.CreateStructGEP(CGF.ConvertType(RType), FieldPtr, 980 FD->getFieldIndex()); 981 982 GString = CGF.Builder.CreateGlobalStringPtr( 983 llvm::Twine(Pad) 984 .concat(FD->getType().getAsString()) 985 .concat(llvm::Twine(' ')) 986 .concat(FD->getNameAsString()) 987 .concat(" : ") 988 .str()); 989 Value *TmpRes = CGF.Builder.CreateCall(Func, {GString}); 990 Res = CGF.Builder.CreateAdd(Res, TmpRes); 991 992 QualType CanonicalType = 993 FD->getType().getUnqualifiedType().getCanonicalType(); 994 995 // We check whether we are in a recursive type 996 if (CanonicalType->isRecordType()) { 997 Value *TmpRes = 998 dumpRecord(CGF, CanonicalType, FieldPtr, Align, Func, Lvl + 1); 999 Res = CGF.Builder.CreateAdd(TmpRes, Res); 1000 continue; 1001 } 1002 1003 // We try to determine the best format to print the current field 1004 llvm::Twine Format = Types.find(CanonicalType) == Types.end() 1005 ? Types[Context.VoidPtrTy] 1006 : Types[CanonicalType]; 1007 1008 Address FieldAddress = Address(FieldPtr, Align); 1009 FieldPtr = CGF.Builder.CreateLoad(FieldAddress); 1010 1011 // FIXME Need to handle bitfield here 1012 GString = CGF.Builder.CreateGlobalStringPtr( 1013 Format.concat(llvm::Twine('\n')).str()); 1014 TmpRes = CGF.Builder.CreateCall(Func, {GString, FieldPtr}); 1015 Res = CGF.Builder.CreateAdd(Res, TmpRes); 1016 } 1017 1018 GString = CGF.Builder.CreateGlobalStringPtr(Pad + "}\n"); 1019 Value *TmpRes = CGF.Builder.CreateCall(Func, {GString}); 1020 Res = CGF.Builder.CreateAdd(Res, TmpRes); 1021 return Res; 1022 } 1023 1024 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, 1025 unsigned BuiltinID, const CallExpr *E, 1026 ReturnValueSlot ReturnValue) { 1027 // See if we can constant fold this builtin. If so, don't emit it at all. 1028 Expr::EvalResult Result; 1029 if (E->EvaluateAsRValue(Result, CGM.getContext()) && 1030 !Result.hasSideEffects()) { 1031 if (Result.Val.isInt()) 1032 return RValue::get(llvm::ConstantInt::get(getLLVMContext(), 1033 Result.Val.getInt())); 1034 if (Result.Val.isFloat()) 1035 return RValue::get(llvm::ConstantFP::get(getLLVMContext(), 1036 Result.Val.getFloat())); 1037 } 1038 1039 // There are LLVM math intrinsics/instructions corresponding to math library 1040 // functions except the LLVM op will never set errno while the math library 1041 // might. Also, math builtins have the same semantics as their math library 1042 // twins. Thus, we can transform math library and builtin calls to their 1043 // LLVM counterparts if the call is marked 'const' (known to never set errno). 1044 if (FD->hasAttr<ConstAttr>()) { 1045 switch (BuiltinID) { 1046 case Builtin::BIceil: 1047 case Builtin::BIceilf: 1048 case Builtin::BIceill: 1049 case Builtin::BI__builtin_ceil: 1050 case Builtin::BI__builtin_ceilf: 1051 case Builtin::BI__builtin_ceill: 1052 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil)); 1053 1054 case Builtin::BIcopysign: 1055 case Builtin::BIcopysignf: 1056 case Builtin::BIcopysignl: 1057 case Builtin::BI__builtin_copysign: 1058 case Builtin::BI__builtin_copysignf: 1059 case Builtin::BI__builtin_copysignl: 1060 case Builtin::BI__builtin_copysignf128: 1061 return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign)); 1062 1063 case Builtin::BIcos: 1064 case Builtin::BIcosf: 1065 case Builtin::BIcosl: 1066 case Builtin::BI__builtin_cos: 1067 case Builtin::BI__builtin_cosf: 1068 case Builtin::BI__builtin_cosl: 1069 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::cos)); 1070 1071 case Builtin::BIexp: 1072 case Builtin::BIexpf: 1073 case Builtin::BIexpl: 1074 case Builtin::BI__builtin_exp: 1075 case Builtin::BI__builtin_expf: 1076 case Builtin::BI__builtin_expl: 1077 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp)); 1078 1079 case Builtin::BIexp2: 1080 case Builtin::BIexp2f: 1081 case Builtin::BIexp2l: 1082 case Builtin::BI__builtin_exp2: 1083 case Builtin::BI__builtin_exp2f: 1084 case Builtin::BI__builtin_exp2l: 1085 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp2)); 1086 1087 case Builtin::BIfabs: 1088 case Builtin::BIfabsf: 1089 case Builtin::BIfabsl: 1090 case Builtin::BI__builtin_fabs: 1091 case Builtin::BI__builtin_fabsf: 1092 case Builtin::BI__builtin_fabsl: 1093 case Builtin::BI__builtin_fabsf128: 1094 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs)); 1095 1096 case Builtin::BIfloor: 1097 case Builtin::BIfloorf: 1098 case Builtin::BIfloorl: 1099 case Builtin::BI__builtin_floor: 1100 case Builtin::BI__builtin_floorf: 1101 case Builtin::BI__builtin_floorl: 1102 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor)); 1103 1104 case Builtin::BIfma: 1105 case Builtin::BIfmaf: 1106 case Builtin::BIfmal: 1107 case Builtin::BI__builtin_fma: 1108 case Builtin::BI__builtin_fmaf: 1109 case Builtin::BI__builtin_fmal: 1110 return RValue::get(emitTernaryBuiltin(*this, E, Intrinsic::fma)); 1111 1112 case Builtin::BIfmax: 1113 case Builtin::BIfmaxf: 1114 case Builtin::BIfmaxl: 1115 case Builtin::BI__builtin_fmax: 1116 case Builtin::BI__builtin_fmaxf: 1117 case Builtin::BI__builtin_fmaxl: 1118 return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum)); 1119 1120 case Builtin::BIfmin: 1121 case Builtin::BIfminf: 1122 case Builtin::BIfminl: 1123 case Builtin::BI__builtin_fmin: 1124 case Builtin::BI__builtin_fminf: 1125 case Builtin::BI__builtin_fminl: 1126 return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum)); 1127 1128 // fmod() is a special-case. It maps to the frem instruction rather than an 1129 // LLVM intrinsic. 1130 case Builtin::BIfmod: 1131 case Builtin::BIfmodf: 1132 case Builtin::BIfmodl: 1133 case Builtin::BI__builtin_fmod: 1134 case Builtin::BI__builtin_fmodf: 1135 case Builtin::BI__builtin_fmodl: { 1136 Value *Arg1 = EmitScalarExpr(E->getArg(0)); 1137 Value *Arg2 = EmitScalarExpr(E->getArg(1)); 1138 return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod")); 1139 } 1140 1141 case Builtin::BIlog: 1142 case Builtin::BIlogf: 1143 case Builtin::BIlogl: 1144 case Builtin::BI__builtin_log: 1145 case Builtin::BI__builtin_logf: 1146 case Builtin::BI__builtin_logl: 1147 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log)); 1148 1149 case Builtin::BIlog10: 1150 case Builtin::BIlog10f: 1151 case Builtin::BIlog10l: 1152 case Builtin::BI__builtin_log10: 1153 case Builtin::BI__builtin_log10f: 1154 case Builtin::BI__builtin_log10l: 1155 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log10)); 1156 1157 case Builtin::BIlog2: 1158 case Builtin::BIlog2f: 1159 case Builtin::BIlog2l: 1160 case Builtin::BI__builtin_log2: 1161 case Builtin::BI__builtin_log2f: 1162 case Builtin::BI__builtin_log2l: 1163 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log2)); 1164 1165 case Builtin::BInearbyint: 1166 case Builtin::BInearbyintf: 1167 case Builtin::BInearbyintl: 1168 case Builtin::BI__builtin_nearbyint: 1169 case Builtin::BI__builtin_nearbyintf: 1170 case Builtin::BI__builtin_nearbyintl: 1171 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::nearbyint)); 1172 1173 case Builtin::BIpow: 1174 case Builtin::BIpowf: 1175 case Builtin::BIpowl: 1176 case Builtin::BI__builtin_pow: 1177 case Builtin::BI__builtin_powf: 1178 case Builtin::BI__builtin_powl: 1179 return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::pow)); 1180 1181 case Builtin::BIrint: 1182 case Builtin::BIrintf: 1183 case Builtin::BIrintl: 1184 case Builtin::BI__builtin_rint: 1185 case Builtin::BI__builtin_rintf: 1186 case Builtin::BI__builtin_rintl: 1187 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint)); 1188 1189 case Builtin::BIround: 1190 case Builtin::BIroundf: 1191 case Builtin::BIroundl: 1192 case Builtin::BI__builtin_round: 1193 case Builtin::BI__builtin_roundf: 1194 case Builtin::BI__builtin_roundl: 1195 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round)); 1196 1197 case Builtin::BIsin: 1198 case Builtin::BIsinf: 1199 case Builtin::BIsinl: 1200 case Builtin::BI__builtin_sin: 1201 case Builtin::BI__builtin_sinf: 1202 case Builtin::BI__builtin_sinl: 1203 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::sin)); 1204 1205 case Builtin::BIsqrt: 1206 case Builtin::BIsqrtf: 1207 case Builtin::BIsqrtl: 1208 case Builtin::BI__builtin_sqrt: 1209 case Builtin::BI__builtin_sqrtf: 1210 case Builtin::BI__builtin_sqrtl: 1211 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::sqrt)); 1212 1213 case Builtin::BItrunc: 1214 case Builtin::BItruncf: 1215 case Builtin::BItruncl: 1216 case Builtin::BI__builtin_trunc: 1217 case Builtin::BI__builtin_truncf: 1218 case Builtin::BI__builtin_truncl: 1219 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc)); 1220 1221 default: 1222 break; 1223 } 1224 } 1225 1226 switch (BuiltinID) { 1227 default: break; 1228 case Builtin::BI__builtin___CFStringMakeConstantString: 1229 case Builtin::BI__builtin___NSStringMakeConstantString: 1230 return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType())); 1231 case Builtin::BI__builtin_stdarg_start: 1232 case Builtin::BI__builtin_va_start: 1233 case Builtin::BI__va_start: 1234 case Builtin::BI__builtin_va_end: 1235 return RValue::get( 1236 EmitVAStartEnd(BuiltinID == Builtin::BI__va_start 1237 ? EmitScalarExpr(E->getArg(0)) 1238 : EmitVAListRef(E->getArg(0)).getPointer(), 1239 BuiltinID != Builtin::BI__builtin_va_end)); 1240 case Builtin::BI__builtin_va_copy: { 1241 Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer(); 1242 Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer(); 1243 1244 llvm::Type *Type = Int8PtrTy; 1245 1246 DstPtr = Builder.CreateBitCast(DstPtr, Type); 1247 SrcPtr = Builder.CreateBitCast(SrcPtr, Type); 1248 return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy), 1249 {DstPtr, SrcPtr})); 1250 } 1251 case Builtin::BI__builtin_abs: 1252 case Builtin::BI__builtin_labs: 1253 case Builtin::BI__builtin_llabs: { 1254 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 1255 1256 Value *NegOp = Builder.CreateNeg(ArgValue, "neg"); 1257 Value *CmpResult = 1258 Builder.CreateICmpSGE(ArgValue, 1259 llvm::Constant::getNullValue(ArgValue->getType()), 1260 "abscond"); 1261 Value *Result = 1262 Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs"); 1263 1264 return RValue::get(Result); 1265 } 1266 case Builtin::BI__builtin_conj: 1267 case Builtin::BI__builtin_conjf: 1268 case Builtin::BI__builtin_conjl: { 1269 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 1270 Value *Real = ComplexVal.first; 1271 Value *Imag = ComplexVal.second; 1272 Value *Zero = 1273 Imag->getType()->isFPOrFPVectorTy() 1274 ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType()) 1275 : llvm::Constant::getNullValue(Imag->getType()); 1276 1277 Imag = Builder.CreateFSub(Zero, Imag, "sub"); 1278 return RValue::getComplex(std::make_pair(Real, Imag)); 1279 } 1280 case Builtin::BI__builtin_creal: 1281 case Builtin::BI__builtin_crealf: 1282 case Builtin::BI__builtin_creall: 1283 case Builtin::BIcreal: 1284 case Builtin::BIcrealf: 1285 case Builtin::BIcreall: { 1286 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 1287 return RValue::get(ComplexVal.first); 1288 } 1289 1290 case Builtin::BI__builtin_dump_struct: { 1291 Value *Func = EmitScalarExpr(E->getArg(1)->IgnoreImpCasts()); 1292 CharUnits Arg0Align = EmitPointerWithAlignment(E->getArg(0)).getAlignment(); 1293 1294 const Expr *Arg0 = E->getArg(0)->IgnoreImpCasts(); 1295 QualType Arg0Type = Arg0->getType()->getPointeeType(); 1296 1297 Value *RecordPtr = EmitScalarExpr(Arg0); 1298 Value *Res = dumpRecord(*this, Arg0Type, RecordPtr, Arg0Align, Func, 0); 1299 return RValue::get(Res); 1300 } 1301 1302 case Builtin::BI__builtin_cimag: 1303 case Builtin::BI__builtin_cimagf: 1304 case Builtin::BI__builtin_cimagl: 1305 case Builtin::BIcimag: 1306 case Builtin::BIcimagf: 1307 case Builtin::BIcimagl: { 1308 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 1309 return RValue::get(ComplexVal.second); 1310 } 1311 1312 case Builtin::BI__builtin_ctzs: 1313 case Builtin::BI__builtin_ctz: 1314 case Builtin::BI__builtin_ctzl: 1315 case Builtin::BI__builtin_ctzll: { 1316 Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero); 1317 1318 llvm::Type *ArgType = ArgValue->getType(); 1319 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 1320 1321 llvm::Type *ResultType = ConvertType(E->getType()); 1322 Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); 1323 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef}); 1324 if (Result->getType() != ResultType) 1325 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 1326 "cast"); 1327 return RValue::get(Result); 1328 } 1329 case Builtin::BI__builtin_clzs: 1330 case Builtin::BI__builtin_clz: 1331 case Builtin::BI__builtin_clzl: 1332 case Builtin::BI__builtin_clzll: { 1333 Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero); 1334 1335 llvm::Type *ArgType = ArgValue->getType(); 1336 Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); 1337 1338 llvm::Type *ResultType = ConvertType(E->getType()); 1339 Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); 1340 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef}); 1341 if (Result->getType() != ResultType) 1342 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 1343 "cast"); 1344 return RValue::get(Result); 1345 } 1346 case Builtin::BI__builtin_ffs: 1347 case Builtin::BI__builtin_ffsl: 1348 case Builtin::BI__builtin_ffsll: { 1349 // ffs(x) -> x ? cttz(x) + 1 : 0 1350 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 1351 1352 llvm::Type *ArgType = ArgValue->getType(); 1353 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 1354 1355 llvm::Type *ResultType = ConvertType(E->getType()); 1356 Value *Tmp = 1357 Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}), 1358 llvm::ConstantInt::get(ArgType, 1)); 1359 Value *Zero = llvm::Constant::getNullValue(ArgType); 1360 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero"); 1361 Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs"); 1362 if (Result->getType() != ResultType) 1363 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 1364 "cast"); 1365 return RValue::get(Result); 1366 } 1367 case Builtin::BI__builtin_parity: 1368 case Builtin::BI__builtin_parityl: 1369 case Builtin::BI__builtin_parityll: { 1370 // parity(x) -> ctpop(x) & 1 1371 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 1372 1373 llvm::Type *ArgType = ArgValue->getType(); 1374 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); 1375 1376 llvm::Type *ResultType = ConvertType(E->getType()); 1377 Value *Tmp = Builder.CreateCall(F, ArgValue); 1378 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1)); 1379 if (Result->getType() != ResultType) 1380 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 1381 "cast"); 1382 return RValue::get(Result); 1383 } 1384 case Builtin::BI__popcnt16: 1385 case Builtin::BI__popcnt: 1386 case Builtin::BI__popcnt64: 1387 case Builtin::BI__builtin_popcount: 1388 case Builtin::BI__builtin_popcountl: 1389 case Builtin::BI__builtin_popcountll: { 1390 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 1391 1392 llvm::Type *ArgType = ArgValue->getType(); 1393 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); 1394 1395 llvm::Type *ResultType = ConvertType(E->getType()); 1396 Value *Result = Builder.CreateCall(F, ArgValue); 1397 if (Result->getType() != ResultType) 1398 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 1399 "cast"); 1400 return RValue::get(Result); 1401 } 1402 case Builtin::BI_rotr8: 1403 case Builtin::BI_rotr16: 1404 case Builtin::BI_rotr: 1405 case Builtin::BI_lrotr: 1406 case Builtin::BI_rotr64: { 1407 Value *Val = EmitScalarExpr(E->getArg(0)); 1408 Value *Shift = EmitScalarExpr(E->getArg(1)); 1409 1410 llvm::Type *ArgType = Val->getType(); 1411 Shift = Builder.CreateIntCast(Shift, ArgType, false); 1412 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth(); 1413 Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth); 1414 Value *ArgZero = llvm::Constant::getNullValue(ArgType); 1415 1416 Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1); 1417 Shift = Builder.CreateAnd(Shift, Mask); 1418 Value *LeftShift = Builder.CreateSub(ArgTypeSize, Shift); 1419 1420 Value *RightShifted = Builder.CreateLShr(Val, Shift); 1421 Value *LeftShifted = Builder.CreateShl(Val, LeftShift); 1422 Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted); 1423 1424 Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero); 1425 Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated); 1426 return RValue::get(Result); 1427 } 1428 case Builtin::BI_rotl8: 1429 case Builtin::BI_rotl16: 1430 case Builtin::BI_rotl: 1431 case Builtin::BI_lrotl: 1432 case Builtin::BI_rotl64: { 1433 Value *Val = EmitScalarExpr(E->getArg(0)); 1434 Value *Shift = EmitScalarExpr(E->getArg(1)); 1435 1436 llvm::Type *ArgType = Val->getType(); 1437 Shift = Builder.CreateIntCast(Shift, ArgType, false); 1438 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth(); 1439 Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth); 1440 Value *ArgZero = llvm::Constant::getNullValue(ArgType); 1441 1442 Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1); 1443 Shift = Builder.CreateAnd(Shift, Mask); 1444 Value *RightShift = Builder.CreateSub(ArgTypeSize, Shift); 1445 1446 Value *LeftShifted = Builder.CreateShl(Val, Shift); 1447 Value *RightShifted = Builder.CreateLShr(Val, RightShift); 1448 Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted); 1449 1450 Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero); 1451 Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated); 1452 return RValue::get(Result); 1453 } 1454 case Builtin::BI__builtin_unpredictable: { 1455 // Always return the argument of __builtin_unpredictable. LLVM does not 1456 // handle this builtin. Metadata for this builtin should be added directly 1457 // to instructions such as branches or switches that use it. 1458 return RValue::get(EmitScalarExpr(E->getArg(0))); 1459 } 1460 case Builtin::BI__builtin_expect: { 1461 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 1462 llvm::Type *ArgType = ArgValue->getType(); 1463 1464 Value *ExpectedValue = EmitScalarExpr(E->getArg(1)); 1465 // Don't generate llvm.expect on -O0 as the backend won't use it for 1466 // anything. 1467 // Note, we still IRGen ExpectedValue because it could have side-effects. 1468 if (CGM.getCodeGenOpts().OptimizationLevel == 0) 1469 return RValue::get(ArgValue); 1470 1471 Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType); 1472 Value *Result = 1473 Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval"); 1474 return RValue::get(Result); 1475 } 1476 case Builtin::BI__builtin_assume_aligned: { 1477 Value *PtrValue = EmitScalarExpr(E->getArg(0)); 1478 Value *OffsetValue = 1479 (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr; 1480 1481 Value *AlignmentValue = EmitScalarExpr(E->getArg(1)); 1482 ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue); 1483 unsigned Alignment = (unsigned) AlignmentCI->getZExtValue(); 1484 1485 EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue); 1486 return RValue::get(PtrValue); 1487 } 1488 case Builtin::BI__assume: 1489 case Builtin::BI__builtin_assume: { 1490 if (E->getArg(0)->HasSideEffects(getContext())) 1491 return RValue::get(nullptr); 1492 1493 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 1494 Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume); 1495 return RValue::get(Builder.CreateCall(FnAssume, ArgValue)); 1496 } 1497 case Builtin::BI__builtin_bswap16: 1498 case Builtin::BI__builtin_bswap32: 1499 case Builtin::BI__builtin_bswap64: { 1500 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap)); 1501 } 1502 case Builtin::BI__builtin_bitreverse8: 1503 case Builtin::BI__builtin_bitreverse16: 1504 case Builtin::BI__builtin_bitreverse32: 1505 case Builtin::BI__builtin_bitreverse64: { 1506 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse)); 1507 } 1508 case Builtin::BI__builtin_object_size: { 1509 unsigned Type = 1510 E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue(); 1511 auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType())); 1512 1513 // We pass this builtin onto the optimizer so that it can figure out the 1514 // object size in more complex cases. 1515 return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType, 1516 /*EmittedE=*/nullptr)); 1517 } 1518 case Builtin::BI__builtin_prefetch: { 1519 Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0)); 1520 // FIXME: Technically these constants should of type 'int', yes? 1521 RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) : 1522 llvm::ConstantInt::get(Int32Ty, 0); 1523 Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : 1524 llvm::ConstantInt::get(Int32Ty, 3); 1525 Value *Data = llvm::ConstantInt::get(Int32Ty, 1); 1526 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 1527 return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data})); 1528 } 1529 case Builtin::BI__builtin_readcyclecounter: { 1530 Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter); 1531 return RValue::get(Builder.CreateCall(F)); 1532 } 1533 case Builtin::BI__builtin___clear_cache: { 1534 Value *Begin = EmitScalarExpr(E->getArg(0)); 1535 Value *End = EmitScalarExpr(E->getArg(1)); 1536 Value *F = CGM.getIntrinsic(Intrinsic::clear_cache); 1537 return RValue::get(Builder.CreateCall(F, {Begin, End})); 1538 } 1539 case Builtin::BI__builtin_trap: 1540 return RValue::get(EmitTrapCall(Intrinsic::trap)); 1541 case Builtin::BI__debugbreak: 1542 return RValue::get(EmitTrapCall(Intrinsic::debugtrap)); 1543 case Builtin::BI__builtin_unreachable: { 1544 EmitUnreachable(E->getExprLoc()); 1545 1546 // We do need to preserve an insertion point. 1547 EmitBlock(createBasicBlock("unreachable.cont")); 1548 1549 return RValue::get(nullptr); 1550 } 1551 1552 case Builtin::BI__builtin_powi: 1553 case Builtin::BI__builtin_powif: 1554 case Builtin::BI__builtin_powil: { 1555 Value *Base = EmitScalarExpr(E->getArg(0)); 1556 Value *Exponent = EmitScalarExpr(E->getArg(1)); 1557 llvm::Type *ArgType = Base->getType(); 1558 Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType); 1559 return RValue::get(Builder.CreateCall(F, {Base, Exponent})); 1560 } 1561 1562 case Builtin::BI__builtin_isgreater: 1563 case Builtin::BI__builtin_isgreaterequal: 1564 case Builtin::BI__builtin_isless: 1565 case Builtin::BI__builtin_islessequal: 1566 case Builtin::BI__builtin_islessgreater: 1567 case Builtin::BI__builtin_isunordered: { 1568 // Ordered comparisons: we know the arguments to these are matching scalar 1569 // floating point values. 1570 Value *LHS = EmitScalarExpr(E->getArg(0)); 1571 Value *RHS = EmitScalarExpr(E->getArg(1)); 1572 1573 switch (BuiltinID) { 1574 default: llvm_unreachable("Unknown ordered comparison"); 1575 case Builtin::BI__builtin_isgreater: 1576 LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp"); 1577 break; 1578 case Builtin::BI__builtin_isgreaterequal: 1579 LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp"); 1580 break; 1581 case Builtin::BI__builtin_isless: 1582 LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp"); 1583 break; 1584 case Builtin::BI__builtin_islessequal: 1585 LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp"); 1586 break; 1587 case Builtin::BI__builtin_islessgreater: 1588 LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp"); 1589 break; 1590 case Builtin::BI__builtin_isunordered: 1591 LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp"); 1592 break; 1593 } 1594 // ZExt bool to int type. 1595 return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType()))); 1596 } 1597 case Builtin::BI__builtin_isnan: { 1598 Value *V = EmitScalarExpr(E->getArg(0)); 1599 V = Builder.CreateFCmpUNO(V, V, "cmp"); 1600 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 1601 } 1602 1603 case Builtin::BIfinite: 1604 case Builtin::BI__finite: 1605 case Builtin::BIfinitef: 1606 case Builtin::BI__finitef: 1607 case Builtin::BIfinitel: 1608 case Builtin::BI__finitel: 1609 case Builtin::BI__builtin_isinf: 1610 case Builtin::BI__builtin_isfinite: { 1611 // isinf(x) --> fabs(x) == infinity 1612 // isfinite(x) --> fabs(x) != infinity 1613 // x != NaN via the ordered compare in either case. 1614 Value *V = EmitScalarExpr(E->getArg(0)); 1615 Value *Fabs = EmitFAbs(*this, V); 1616 Constant *Infinity = ConstantFP::getInfinity(V->getType()); 1617 CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf) 1618 ? CmpInst::FCMP_OEQ 1619 : CmpInst::FCMP_ONE; 1620 Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf"); 1621 return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType()))); 1622 } 1623 1624 case Builtin::BI__builtin_isinf_sign: { 1625 // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0 1626 Value *Arg = EmitScalarExpr(E->getArg(0)); 1627 Value *AbsArg = EmitFAbs(*this, Arg); 1628 Value *IsInf = Builder.CreateFCmpOEQ( 1629 AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf"); 1630 Value *IsNeg = EmitSignBit(*this, Arg); 1631 1632 llvm::Type *IntTy = ConvertType(E->getType()); 1633 Value *Zero = Constant::getNullValue(IntTy); 1634 Value *One = ConstantInt::get(IntTy, 1); 1635 Value *NegativeOne = ConstantInt::get(IntTy, -1); 1636 Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One); 1637 Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero); 1638 return RValue::get(Result); 1639 } 1640 1641 case Builtin::BI__builtin_isnormal: { 1642 // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min 1643 Value *V = EmitScalarExpr(E->getArg(0)); 1644 Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq"); 1645 1646 Value *Abs = EmitFAbs(*this, V); 1647 Value *IsLessThanInf = 1648 Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf"); 1649 APFloat Smallest = APFloat::getSmallestNormalized( 1650 getContext().getFloatTypeSemantics(E->getArg(0)->getType())); 1651 Value *IsNormal = 1652 Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest), 1653 "isnormal"); 1654 V = Builder.CreateAnd(Eq, IsLessThanInf, "and"); 1655 V = Builder.CreateAnd(V, IsNormal, "and"); 1656 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 1657 } 1658 1659 case Builtin::BI__builtin_fpclassify: { 1660 Value *V = EmitScalarExpr(E->getArg(5)); 1661 llvm::Type *Ty = ConvertType(E->getArg(5)->getType()); 1662 1663 // Create Result 1664 BasicBlock *Begin = Builder.GetInsertBlock(); 1665 BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn); 1666 Builder.SetInsertPoint(End); 1667 PHINode *Result = 1668 Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4, 1669 "fpclassify_result"); 1670 1671 // if (V==0) return FP_ZERO 1672 Builder.SetInsertPoint(Begin); 1673 Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty), 1674 "iszero"); 1675 Value *ZeroLiteral = EmitScalarExpr(E->getArg(4)); 1676 BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn); 1677 Builder.CreateCondBr(IsZero, End, NotZero); 1678 Result->addIncoming(ZeroLiteral, Begin); 1679 1680 // if (V != V) return FP_NAN 1681 Builder.SetInsertPoint(NotZero); 1682 Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp"); 1683 Value *NanLiteral = EmitScalarExpr(E->getArg(0)); 1684 BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn); 1685 Builder.CreateCondBr(IsNan, End, NotNan); 1686 Result->addIncoming(NanLiteral, NotZero); 1687 1688 // if (fabs(V) == infinity) return FP_INFINITY 1689 Builder.SetInsertPoint(NotNan); 1690 Value *VAbs = EmitFAbs(*this, V); 1691 Value *IsInf = 1692 Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()), 1693 "isinf"); 1694 Value *InfLiteral = EmitScalarExpr(E->getArg(1)); 1695 BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn); 1696 Builder.CreateCondBr(IsInf, End, NotInf); 1697 Result->addIncoming(InfLiteral, NotNan); 1698 1699 // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL 1700 Builder.SetInsertPoint(NotInf); 1701 APFloat Smallest = APFloat::getSmallestNormalized( 1702 getContext().getFloatTypeSemantics(E->getArg(5)->getType())); 1703 Value *IsNormal = 1704 Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest), 1705 "isnormal"); 1706 Value *NormalResult = 1707 Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)), 1708 EmitScalarExpr(E->getArg(3))); 1709 Builder.CreateBr(End); 1710 Result->addIncoming(NormalResult, NotInf); 1711 1712 // return Result 1713 Builder.SetInsertPoint(End); 1714 return RValue::get(Result); 1715 } 1716 1717 case Builtin::BIalloca: 1718 case Builtin::BI_alloca: 1719 case Builtin::BI__builtin_alloca: { 1720 Value *Size = EmitScalarExpr(E->getArg(0)); 1721 const TargetInfo &TI = getContext().getTargetInfo(); 1722 // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__. 1723 unsigned SuitableAlignmentInBytes = 1724 CGM.getContext() 1725 .toCharUnitsFromBits(TI.getSuitableAlign()) 1726 .getQuantity(); 1727 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size); 1728 AI->setAlignment(SuitableAlignmentInBytes); 1729 return RValue::get(AI); 1730 } 1731 1732 case Builtin::BI__builtin_alloca_with_align: { 1733 Value *Size = EmitScalarExpr(E->getArg(0)); 1734 Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1)); 1735 auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue); 1736 unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue(); 1737 unsigned AlignmentInBytes = 1738 CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getQuantity(); 1739 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size); 1740 AI->setAlignment(AlignmentInBytes); 1741 return RValue::get(AI); 1742 } 1743 1744 case Builtin::BIbzero: 1745 case Builtin::BI__builtin_bzero: { 1746 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1747 Value *SizeVal = EmitScalarExpr(E->getArg(1)); 1748 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 1749 E->getArg(0)->getExprLoc(), FD, 0); 1750 Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false); 1751 return RValue::get(nullptr); 1752 } 1753 case Builtin::BImemcpy: 1754 case Builtin::BI__builtin_memcpy: { 1755 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1756 Address Src = EmitPointerWithAlignment(E->getArg(1)); 1757 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 1758 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 1759 E->getArg(0)->getExprLoc(), FD, 0); 1760 EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(), 1761 E->getArg(1)->getExprLoc(), FD, 1); 1762 Builder.CreateMemCpy(Dest, Src, SizeVal, false); 1763 return RValue::get(Dest.getPointer()); 1764 } 1765 1766 case Builtin::BI__builtin_char_memchr: 1767 BuiltinID = Builtin::BI__builtin_memchr; 1768 break; 1769 1770 case Builtin::BI__builtin___memcpy_chk: { 1771 // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2. 1772 llvm::APSInt Size, DstSize; 1773 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 1774 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 1775 break; 1776 if (Size.ugt(DstSize)) 1777 break; 1778 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1779 Address Src = EmitPointerWithAlignment(E->getArg(1)); 1780 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 1781 Builder.CreateMemCpy(Dest, Src, SizeVal, false); 1782 return RValue::get(Dest.getPointer()); 1783 } 1784 1785 case Builtin::BI__builtin_objc_memmove_collectable: { 1786 Address DestAddr = EmitPointerWithAlignment(E->getArg(0)); 1787 Address SrcAddr = EmitPointerWithAlignment(E->getArg(1)); 1788 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 1789 CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this, 1790 DestAddr, SrcAddr, SizeVal); 1791 return RValue::get(DestAddr.getPointer()); 1792 } 1793 1794 case Builtin::BI__builtin___memmove_chk: { 1795 // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2. 1796 llvm::APSInt Size, DstSize; 1797 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 1798 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 1799 break; 1800 if (Size.ugt(DstSize)) 1801 break; 1802 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1803 Address Src = EmitPointerWithAlignment(E->getArg(1)); 1804 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 1805 Builder.CreateMemMove(Dest, Src, SizeVal, false); 1806 return RValue::get(Dest.getPointer()); 1807 } 1808 1809 case Builtin::BImemmove: 1810 case Builtin::BI__builtin_memmove: { 1811 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1812 Address Src = EmitPointerWithAlignment(E->getArg(1)); 1813 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 1814 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 1815 E->getArg(0)->getExprLoc(), FD, 0); 1816 EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(), 1817 E->getArg(1)->getExprLoc(), FD, 1); 1818 Builder.CreateMemMove(Dest, Src, SizeVal, false); 1819 return RValue::get(Dest.getPointer()); 1820 } 1821 case Builtin::BImemset: 1822 case Builtin::BI__builtin_memset: { 1823 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1824 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), 1825 Builder.getInt8Ty()); 1826 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 1827 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 1828 E->getArg(0)->getExprLoc(), FD, 0); 1829 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false); 1830 return RValue::get(Dest.getPointer()); 1831 } 1832 case Builtin::BI__builtin___memset_chk: { 1833 // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2. 1834 llvm::APSInt Size, DstSize; 1835 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 1836 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 1837 break; 1838 if (Size.ugt(DstSize)) 1839 break; 1840 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1841 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), 1842 Builder.getInt8Ty()); 1843 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 1844 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false); 1845 return RValue::get(Dest.getPointer()); 1846 } 1847 case Builtin::BI__builtin_wmemcmp: { 1848 // The MSVC runtime library does not provide a definition of wmemcmp, so we 1849 // need an inline implementation. 1850 if (!getTarget().getTriple().isOSMSVCRT()) 1851 break; 1852 1853 llvm::Type *WCharTy = ConvertType(getContext().WCharTy); 1854 1855 Value *Dst = EmitScalarExpr(E->getArg(0)); 1856 Value *Src = EmitScalarExpr(E->getArg(1)); 1857 Value *Size = EmitScalarExpr(E->getArg(2)); 1858 1859 BasicBlock *Entry = Builder.GetInsertBlock(); 1860 BasicBlock *CmpGT = createBasicBlock("wmemcmp.gt"); 1861 BasicBlock *CmpLT = createBasicBlock("wmemcmp.lt"); 1862 BasicBlock *Next = createBasicBlock("wmemcmp.next"); 1863 BasicBlock *Exit = createBasicBlock("wmemcmp.exit"); 1864 Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0)); 1865 Builder.CreateCondBr(SizeEq0, Exit, CmpGT); 1866 1867 EmitBlock(CmpGT); 1868 PHINode *DstPhi = Builder.CreatePHI(Dst->getType(), 2); 1869 DstPhi->addIncoming(Dst, Entry); 1870 PHINode *SrcPhi = Builder.CreatePHI(Src->getType(), 2); 1871 SrcPhi->addIncoming(Src, Entry); 1872 PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2); 1873 SizePhi->addIncoming(Size, Entry); 1874 CharUnits WCharAlign = 1875 getContext().getTypeAlignInChars(getContext().WCharTy); 1876 Value *DstCh = Builder.CreateAlignedLoad(WCharTy, DstPhi, WCharAlign); 1877 Value *SrcCh = Builder.CreateAlignedLoad(WCharTy, SrcPhi, WCharAlign); 1878 Value *DstGtSrc = Builder.CreateICmpUGT(DstCh, SrcCh); 1879 Builder.CreateCondBr(DstGtSrc, Exit, CmpLT); 1880 1881 EmitBlock(CmpLT); 1882 Value *DstLtSrc = Builder.CreateICmpULT(DstCh, SrcCh); 1883 Builder.CreateCondBr(DstLtSrc, Exit, Next); 1884 1885 EmitBlock(Next); 1886 Value *NextDst = Builder.CreateConstInBoundsGEP1_32(WCharTy, DstPhi, 1); 1887 Value *NextSrc = Builder.CreateConstInBoundsGEP1_32(WCharTy, SrcPhi, 1); 1888 Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1)); 1889 Value *NextSizeEq0 = 1890 Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0)); 1891 Builder.CreateCondBr(NextSizeEq0, Exit, CmpGT); 1892 DstPhi->addIncoming(NextDst, Next); 1893 SrcPhi->addIncoming(NextSrc, Next); 1894 SizePhi->addIncoming(NextSize, Next); 1895 1896 EmitBlock(Exit); 1897 PHINode *Ret = Builder.CreatePHI(IntTy, 4); 1898 Ret->addIncoming(ConstantInt::get(IntTy, 0), Entry); 1899 Ret->addIncoming(ConstantInt::get(IntTy, 1), CmpGT); 1900 Ret->addIncoming(ConstantInt::get(IntTy, -1), CmpLT); 1901 Ret->addIncoming(ConstantInt::get(IntTy, 0), Next); 1902 return RValue::get(Ret); 1903 } 1904 case Builtin::BI__builtin_dwarf_cfa: { 1905 // The offset in bytes from the first argument to the CFA. 1906 // 1907 // Why on earth is this in the frontend? Is there any reason at 1908 // all that the backend can't reasonably determine this while 1909 // lowering llvm.eh.dwarf.cfa()? 1910 // 1911 // TODO: If there's a satisfactory reason, add a target hook for 1912 // this instead of hard-coding 0, which is correct for most targets. 1913 int32_t Offset = 0; 1914 1915 Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa); 1916 return RValue::get(Builder.CreateCall(F, 1917 llvm::ConstantInt::get(Int32Ty, Offset))); 1918 } 1919 case Builtin::BI__builtin_return_address: { 1920 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0), 1921 getContext().UnsignedIntTy); 1922 Value *F = CGM.getIntrinsic(Intrinsic::returnaddress); 1923 return RValue::get(Builder.CreateCall(F, Depth)); 1924 } 1925 case Builtin::BI_ReturnAddress: { 1926 Value *F = CGM.getIntrinsic(Intrinsic::returnaddress); 1927 return RValue::get(Builder.CreateCall(F, Builder.getInt32(0))); 1928 } 1929 case Builtin::BI__builtin_frame_address: { 1930 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0), 1931 getContext().UnsignedIntTy); 1932 Value *F = CGM.getIntrinsic(Intrinsic::frameaddress); 1933 return RValue::get(Builder.CreateCall(F, Depth)); 1934 } 1935 case Builtin::BI__builtin_extract_return_addr: { 1936 Value *Address = EmitScalarExpr(E->getArg(0)); 1937 Value *Result = getTargetHooks().decodeReturnAddress(*this, Address); 1938 return RValue::get(Result); 1939 } 1940 case Builtin::BI__builtin_frob_return_addr: { 1941 Value *Address = EmitScalarExpr(E->getArg(0)); 1942 Value *Result = getTargetHooks().encodeReturnAddress(*this, Address); 1943 return RValue::get(Result); 1944 } 1945 case Builtin::BI__builtin_dwarf_sp_column: { 1946 llvm::IntegerType *Ty 1947 = cast<llvm::IntegerType>(ConvertType(E->getType())); 1948 int Column = getTargetHooks().getDwarfEHStackPointer(CGM); 1949 if (Column == -1) { 1950 CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column"); 1951 return RValue::get(llvm::UndefValue::get(Ty)); 1952 } 1953 return RValue::get(llvm::ConstantInt::get(Ty, Column, true)); 1954 } 1955 case Builtin::BI__builtin_init_dwarf_reg_size_table: { 1956 Value *Address = EmitScalarExpr(E->getArg(0)); 1957 if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address)) 1958 CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table"); 1959 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType()))); 1960 } 1961 case Builtin::BI__builtin_eh_return: { 1962 Value *Int = EmitScalarExpr(E->getArg(0)); 1963 Value *Ptr = EmitScalarExpr(E->getArg(1)); 1964 1965 llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType()); 1966 assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) && 1967 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants"); 1968 Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32 1969 ? Intrinsic::eh_return_i32 1970 : Intrinsic::eh_return_i64); 1971 Builder.CreateCall(F, {Int, Ptr}); 1972 Builder.CreateUnreachable(); 1973 1974 // We do need to preserve an insertion point. 1975 EmitBlock(createBasicBlock("builtin_eh_return.cont")); 1976 1977 return RValue::get(nullptr); 1978 } 1979 case Builtin::BI__builtin_unwind_init: { 1980 Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init); 1981 return RValue::get(Builder.CreateCall(F)); 1982 } 1983 case Builtin::BI__builtin_extend_pointer: { 1984 // Extends a pointer to the size of an _Unwind_Word, which is 1985 // uint64_t on all platforms. Generally this gets poked into a 1986 // register and eventually used as an address, so if the 1987 // addressing registers are wider than pointers and the platform 1988 // doesn't implicitly ignore high-order bits when doing 1989 // addressing, we need to make sure we zext / sext based on 1990 // the platform's expectations. 1991 // 1992 // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html 1993 1994 // Cast the pointer to intptr_t. 1995 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1996 Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast"); 1997 1998 // If that's 64 bits, we're done. 1999 if (IntPtrTy->getBitWidth() == 64) 2000 return RValue::get(Result); 2001 2002 // Otherwise, ask the codegen data what to do. 2003 if (getTargetHooks().extendPointerWithSExt()) 2004 return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext")); 2005 else 2006 return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext")); 2007 } 2008 case Builtin::BI__builtin_setjmp: { 2009 // Buffer is a void**. 2010 Address Buf = EmitPointerWithAlignment(E->getArg(0)); 2011 2012 // Store the frame pointer to the setjmp buffer. 2013 Value *FrameAddr = 2014 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 2015 ConstantInt::get(Int32Ty, 0)); 2016 Builder.CreateStore(FrameAddr, Buf); 2017 2018 // Store the stack pointer to the setjmp buffer. 2019 Value *StackAddr = 2020 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave)); 2021 Address StackSaveSlot = 2022 Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize()); 2023 Builder.CreateStore(StackAddr, StackSaveSlot); 2024 2025 // Call LLVM's EH setjmp, which is lightweight. 2026 Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp); 2027 Buf = Builder.CreateBitCast(Buf, Int8PtrTy); 2028 return RValue::get(Builder.CreateCall(F, Buf.getPointer())); 2029 } 2030 case Builtin::BI__builtin_longjmp: { 2031 Value *Buf = EmitScalarExpr(E->getArg(0)); 2032 Buf = Builder.CreateBitCast(Buf, Int8PtrTy); 2033 2034 // Call LLVM's EH longjmp, which is lightweight. 2035 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf); 2036 2037 // longjmp doesn't return; mark this as unreachable. 2038 Builder.CreateUnreachable(); 2039 2040 // We do need to preserve an insertion point. 2041 EmitBlock(createBasicBlock("longjmp.cont")); 2042 2043 return RValue::get(nullptr); 2044 } 2045 case Builtin::BI__sync_fetch_and_add: 2046 case Builtin::BI__sync_fetch_and_sub: 2047 case Builtin::BI__sync_fetch_and_or: 2048 case Builtin::BI__sync_fetch_and_and: 2049 case Builtin::BI__sync_fetch_and_xor: 2050 case Builtin::BI__sync_fetch_and_nand: 2051 case Builtin::BI__sync_add_and_fetch: 2052 case Builtin::BI__sync_sub_and_fetch: 2053 case Builtin::BI__sync_and_and_fetch: 2054 case Builtin::BI__sync_or_and_fetch: 2055 case Builtin::BI__sync_xor_and_fetch: 2056 case Builtin::BI__sync_nand_and_fetch: 2057 case Builtin::BI__sync_val_compare_and_swap: 2058 case Builtin::BI__sync_bool_compare_and_swap: 2059 case Builtin::BI__sync_lock_test_and_set: 2060 case Builtin::BI__sync_lock_release: 2061 case Builtin::BI__sync_swap: 2062 llvm_unreachable("Shouldn't make it through sema"); 2063 case Builtin::BI__sync_fetch_and_add_1: 2064 case Builtin::BI__sync_fetch_and_add_2: 2065 case Builtin::BI__sync_fetch_and_add_4: 2066 case Builtin::BI__sync_fetch_and_add_8: 2067 case Builtin::BI__sync_fetch_and_add_16: 2068 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E); 2069 case Builtin::BI__sync_fetch_and_sub_1: 2070 case Builtin::BI__sync_fetch_and_sub_2: 2071 case Builtin::BI__sync_fetch_and_sub_4: 2072 case Builtin::BI__sync_fetch_and_sub_8: 2073 case Builtin::BI__sync_fetch_and_sub_16: 2074 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E); 2075 case Builtin::BI__sync_fetch_and_or_1: 2076 case Builtin::BI__sync_fetch_and_or_2: 2077 case Builtin::BI__sync_fetch_and_or_4: 2078 case Builtin::BI__sync_fetch_and_or_8: 2079 case Builtin::BI__sync_fetch_and_or_16: 2080 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E); 2081 case Builtin::BI__sync_fetch_and_and_1: 2082 case Builtin::BI__sync_fetch_and_and_2: 2083 case Builtin::BI__sync_fetch_and_and_4: 2084 case Builtin::BI__sync_fetch_and_and_8: 2085 case Builtin::BI__sync_fetch_and_and_16: 2086 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E); 2087 case Builtin::BI__sync_fetch_and_xor_1: 2088 case Builtin::BI__sync_fetch_and_xor_2: 2089 case Builtin::BI__sync_fetch_and_xor_4: 2090 case Builtin::BI__sync_fetch_and_xor_8: 2091 case Builtin::BI__sync_fetch_and_xor_16: 2092 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E); 2093 case Builtin::BI__sync_fetch_and_nand_1: 2094 case Builtin::BI__sync_fetch_and_nand_2: 2095 case Builtin::BI__sync_fetch_and_nand_4: 2096 case Builtin::BI__sync_fetch_and_nand_8: 2097 case Builtin::BI__sync_fetch_and_nand_16: 2098 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E); 2099 2100 // Clang extensions: not overloaded yet. 2101 case Builtin::BI__sync_fetch_and_min: 2102 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E); 2103 case Builtin::BI__sync_fetch_and_max: 2104 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E); 2105 case Builtin::BI__sync_fetch_and_umin: 2106 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E); 2107 case Builtin::BI__sync_fetch_and_umax: 2108 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E); 2109 2110 case Builtin::BI__sync_add_and_fetch_1: 2111 case Builtin::BI__sync_add_and_fetch_2: 2112 case Builtin::BI__sync_add_and_fetch_4: 2113 case Builtin::BI__sync_add_and_fetch_8: 2114 case Builtin::BI__sync_add_and_fetch_16: 2115 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E, 2116 llvm::Instruction::Add); 2117 case Builtin::BI__sync_sub_and_fetch_1: 2118 case Builtin::BI__sync_sub_and_fetch_2: 2119 case Builtin::BI__sync_sub_and_fetch_4: 2120 case Builtin::BI__sync_sub_and_fetch_8: 2121 case Builtin::BI__sync_sub_and_fetch_16: 2122 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E, 2123 llvm::Instruction::Sub); 2124 case Builtin::BI__sync_and_and_fetch_1: 2125 case Builtin::BI__sync_and_and_fetch_2: 2126 case Builtin::BI__sync_and_and_fetch_4: 2127 case Builtin::BI__sync_and_and_fetch_8: 2128 case Builtin::BI__sync_and_and_fetch_16: 2129 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E, 2130 llvm::Instruction::And); 2131 case Builtin::BI__sync_or_and_fetch_1: 2132 case Builtin::BI__sync_or_and_fetch_2: 2133 case Builtin::BI__sync_or_and_fetch_4: 2134 case Builtin::BI__sync_or_and_fetch_8: 2135 case Builtin::BI__sync_or_and_fetch_16: 2136 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E, 2137 llvm::Instruction::Or); 2138 case Builtin::BI__sync_xor_and_fetch_1: 2139 case Builtin::BI__sync_xor_and_fetch_2: 2140 case Builtin::BI__sync_xor_and_fetch_4: 2141 case Builtin::BI__sync_xor_and_fetch_8: 2142 case Builtin::BI__sync_xor_and_fetch_16: 2143 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E, 2144 llvm::Instruction::Xor); 2145 case Builtin::BI__sync_nand_and_fetch_1: 2146 case Builtin::BI__sync_nand_and_fetch_2: 2147 case Builtin::BI__sync_nand_and_fetch_4: 2148 case Builtin::BI__sync_nand_and_fetch_8: 2149 case Builtin::BI__sync_nand_and_fetch_16: 2150 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E, 2151 llvm::Instruction::And, true); 2152 2153 case Builtin::BI__sync_val_compare_and_swap_1: 2154 case Builtin::BI__sync_val_compare_and_swap_2: 2155 case Builtin::BI__sync_val_compare_and_swap_4: 2156 case Builtin::BI__sync_val_compare_and_swap_8: 2157 case Builtin::BI__sync_val_compare_and_swap_16: 2158 return RValue::get(MakeAtomicCmpXchgValue(*this, E, false)); 2159 2160 case Builtin::BI__sync_bool_compare_and_swap_1: 2161 case Builtin::BI__sync_bool_compare_and_swap_2: 2162 case Builtin::BI__sync_bool_compare_and_swap_4: 2163 case Builtin::BI__sync_bool_compare_and_swap_8: 2164 case Builtin::BI__sync_bool_compare_and_swap_16: 2165 return RValue::get(MakeAtomicCmpXchgValue(*this, E, true)); 2166 2167 case Builtin::BI__sync_swap_1: 2168 case Builtin::BI__sync_swap_2: 2169 case Builtin::BI__sync_swap_4: 2170 case Builtin::BI__sync_swap_8: 2171 case Builtin::BI__sync_swap_16: 2172 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 2173 2174 case Builtin::BI__sync_lock_test_and_set_1: 2175 case Builtin::BI__sync_lock_test_and_set_2: 2176 case Builtin::BI__sync_lock_test_and_set_4: 2177 case Builtin::BI__sync_lock_test_and_set_8: 2178 case Builtin::BI__sync_lock_test_and_set_16: 2179 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 2180 2181 case Builtin::BI__sync_lock_release_1: 2182 case Builtin::BI__sync_lock_release_2: 2183 case Builtin::BI__sync_lock_release_4: 2184 case Builtin::BI__sync_lock_release_8: 2185 case Builtin::BI__sync_lock_release_16: { 2186 Value *Ptr = EmitScalarExpr(E->getArg(0)); 2187 QualType ElTy = E->getArg(0)->getType()->getPointeeType(); 2188 CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy); 2189 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), 2190 StoreSize.getQuantity() * 8); 2191 Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); 2192 llvm::StoreInst *Store = 2193 Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr, 2194 StoreSize); 2195 Store->setAtomic(llvm::AtomicOrdering::Release); 2196 return RValue::get(nullptr); 2197 } 2198 2199 case Builtin::BI__sync_synchronize: { 2200 // We assume this is supposed to correspond to a C++0x-style 2201 // sequentially-consistent fence (i.e. this is only usable for 2202 // synchronization, not device I/O or anything like that). This intrinsic 2203 // is really badly designed in the sense that in theory, there isn't 2204 // any way to safely use it... but in practice, it mostly works 2205 // to use it with non-atomic loads and stores to get acquire/release 2206 // semantics. 2207 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent); 2208 return RValue::get(nullptr); 2209 } 2210 2211 case Builtin::BI__builtin_nontemporal_load: 2212 return RValue::get(EmitNontemporalLoad(*this, E)); 2213 case Builtin::BI__builtin_nontemporal_store: 2214 return RValue::get(EmitNontemporalStore(*this, E)); 2215 case Builtin::BI__c11_atomic_is_lock_free: 2216 case Builtin::BI__atomic_is_lock_free: { 2217 // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the 2218 // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since 2219 // _Atomic(T) is always properly-aligned. 2220 const char *LibCallName = "__atomic_is_lock_free"; 2221 CallArgList Args; 2222 Args.add(RValue::get(EmitScalarExpr(E->getArg(0))), 2223 getContext().getSizeType()); 2224 if (BuiltinID == Builtin::BI__atomic_is_lock_free) 2225 Args.add(RValue::get(EmitScalarExpr(E->getArg(1))), 2226 getContext().VoidPtrTy); 2227 else 2228 Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)), 2229 getContext().VoidPtrTy); 2230 const CGFunctionInfo &FuncInfo = 2231 CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args); 2232 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo); 2233 llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName); 2234 return EmitCall(FuncInfo, CGCallee::forDirect(Func), 2235 ReturnValueSlot(), Args); 2236 } 2237 2238 case Builtin::BI__atomic_test_and_set: { 2239 // Look at the argument type to determine whether this is a volatile 2240 // operation. The parameter type is always volatile. 2241 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); 2242 bool Volatile = 2243 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); 2244 2245 Value *Ptr = EmitScalarExpr(E->getArg(0)); 2246 unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace(); 2247 Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace)); 2248 Value *NewVal = Builder.getInt8(1); 2249 Value *Order = EmitScalarExpr(E->getArg(1)); 2250 if (isa<llvm::ConstantInt>(Order)) { 2251 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 2252 AtomicRMWInst *Result = nullptr; 2253 switch (ord) { 2254 case 0: // memory_order_relaxed 2255 default: // invalid order 2256 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 2257 llvm::AtomicOrdering::Monotonic); 2258 break; 2259 case 1: // memory_order_consume 2260 case 2: // memory_order_acquire 2261 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 2262 llvm::AtomicOrdering::Acquire); 2263 break; 2264 case 3: // memory_order_release 2265 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 2266 llvm::AtomicOrdering::Release); 2267 break; 2268 case 4: // memory_order_acq_rel 2269 2270 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 2271 llvm::AtomicOrdering::AcquireRelease); 2272 break; 2273 case 5: // memory_order_seq_cst 2274 Result = Builder.CreateAtomicRMW( 2275 llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 2276 llvm::AtomicOrdering::SequentiallyConsistent); 2277 break; 2278 } 2279 Result->setVolatile(Volatile); 2280 return RValue::get(Builder.CreateIsNotNull(Result, "tobool")); 2281 } 2282 2283 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 2284 2285 llvm::BasicBlock *BBs[5] = { 2286 createBasicBlock("monotonic", CurFn), 2287 createBasicBlock("acquire", CurFn), 2288 createBasicBlock("release", CurFn), 2289 createBasicBlock("acqrel", CurFn), 2290 createBasicBlock("seqcst", CurFn) 2291 }; 2292 llvm::AtomicOrdering Orders[5] = { 2293 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire, 2294 llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease, 2295 llvm::AtomicOrdering::SequentiallyConsistent}; 2296 2297 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 2298 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); 2299 2300 Builder.SetInsertPoint(ContBB); 2301 PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set"); 2302 2303 for (unsigned i = 0; i < 5; ++i) { 2304 Builder.SetInsertPoint(BBs[i]); 2305 AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 2306 Ptr, NewVal, Orders[i]); 2307 RMW->setVolatile(Volatile); 2308 Result->addIncoming(RMW, BBs[i]); 2309 Builder.CreateBr(ContBB); 2310 } 2311 2312 SI->addCase(Builder.getInt32(0), BBs[0]); 2313 SI->addCase(Builder.getInt32(1), BBs[1]); 2314 SI->addCase(Builder.getInt32(2), BBs[1]); 2315 SI->addCase(Builder.getInt32(3), BBs[2]); 2316 SI->addCase(Builder.getInt32(4), BBs[3]); 2317 SI->addCase(Builder.getInt32(5), BBs[4]); 2318 2319 Builder.SetInsertPoint(ContBB); 2320 return RValue::get(Builder.CreateIsNotNull(Result, "tobool")); 2321 } 2322 2323 case Builtin::BI__atomic_clear: { 2324 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); 2325 bool Volatile = 2326 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); 2327 2328 Address Ptr = EmitPointerWithAlignment(E->getArg(0)); 2329 unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace(); 2330 Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace)); 2331 Value *NewVal = Builder.getInt8(0); 2332 Value *Order = EmitScalarExpr(E->getArg(1)); 2333 if (isa<llvm::ConstantInt>(Order)) { 2334 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 2335 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); 2336 switch (ord) { 2337 case 0: // memory_order_relaxed 2338 default: // invalid order 2339 Store->setOrdering(llvm::AtomicOrdering::Monotonic); 2340 break; 2341 case 3: // memory_order_release 2342 Store->setOrdering(llvm::AtomicOrdering::Release); 2343 break; 2344 case 5: // memory_order_seq_cst 2345 Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent); 2346 break; 2347 } 2348 return RValue::get(nullptr); 2349 } 2350 2351 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 2352 2353 llvm::BasicBlock *BBs[3] = { 2354 createBasicBlock("monotonic", CurFn), 2355 createBasicBlock("release", CurFn), 2356 createBasicBlock("seqcst", CurFn) 2357 }; 2358 llvm::AtomicOrdering Orders[3] = { 2359 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release, 2360 llvm::AtomicOrdering::SequentiallyConsistent}; 2361 2362 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 2363 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); 2364 2365 for (unsigned i = 0; i < 3; ++i) { 2366 Builder.SetInsertPoint(BBs[i]); 2367 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); 2368 Store->setOrdering(Orders[i]); 2369 Builder.CreateBr(ContBB); 2370 } 2371 2372 SI->addCase(Builder.getInt32(0), BBs[0]); 2373 SI->addCase(Builder.getInt32(3), BBs[1]); 2374 SI->addCase(Builder.getInt32(5), BBs[2]); 2375 2376 Builder.SetInsertPoint(ContBB); 2377 return RValue::get(nullptr); 2378 } 2379 2380 case Builtin::BI__atomic_thread_fence: 2381 case Builtin::BI__atomic_signal_fence: 2382 case Builtin::BI__c11_atomic_thread_fence: 2383 case Builtin::BI__c11_atomic_signal_fence: { 2384 llvm::SyncScope::ID SSID; 2385 if (BuiltinID == Builtin::BI__atomic_signal_fence || 2386 BuiltinID == Builtin::BI__c11_atomic_signal_fence) 2387 SSID = llvm::SyncScope::SingleThread; 2388 else 2389 SSID = llvm::SyncScope::System; 2390 Value *Order = EmitScalarExpr(E->getArg(0)); 2391 if (isa<llvm::ConstantInt>(Order)) { 2392 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 2393 switch (ord) { 2394 case 0: // memory_order_relaxed 2395 default: // invalid order 2396 break; 2397 case 1: // memory_order_consume 2398 case 2: // memory_order_acquire 2399 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID); 2400 break; 2401 case 3: // memory_order_release 2402 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID); 2403 break; 2404 case 4: // memory_order_acq_rel 2405 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID); 2406 break; 2407 case 5: // memory_order_seq_cst 2408 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID); 2409 break; 2410 } 2411 return RValue::get(nullptr); 2412 } 2413 2414 llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB; 2415 AcquireBB = createBasicBlock("acquire", CurFn); 2416 ReleaseBB = createBasicBlock("release", CurFn); 2417 AcqRelBB = createBasicBlock("acqrel", CurFn); 2418 SeqCstBB = createBasicBlock("seqcst", CurFn); 2419 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 2420 2421 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 2422 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB); 2423 2424 Builder.SetInsertPoint(AcquireBB); 2425 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID); 2426 Builder.CreateBr(ContBB); 2427 SI->addCase(Builder.getInt32(1), AcquireBB); 2428 SI->addCase(Builder.getInt32(2), AcquireBB); 2429 2430 Builder.SetInsertPoint(ReleaseBB); 2431 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID); 2432 Builder.CreateBr(ContBB); 2433 SI->addCase(Builder.getInt32(3), ReleaseBB); 2434 2435 Builder.SetInsertPoint(AcqRelBB); 2436 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID); 2437 Builder.CreateBr(ContBB); 2438 SI->addCase(Builder.getInt32(4), AcqRelBB); 2439 2440 Builder.SetInsertPoint(SeqCstBB); 2441 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID); 2442 Builder.CreateBr(ContBB); 2443 SI->addCase(Builder.getInt32(5), SeqCstBB); 2444 2445 Builder.SetInsertPoint(ContBB); 2446 return RValue::get(nullptr); 2447 } 2448 2449 case Builtin::BI__builtin_signbit: 2450 case Builtin::BI__builtin_signbitf: 2451 case Builtin::BI__builtin_signbitl: { 2452 return RValue::get( 2453 Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))), 2454 ConvertType(E->getType()))); 2455 } 2456 case Builtin::BI__annotation: { 2457 // Re-encode each wide string to UTF8 and make an MDString. 2458 SmallVector<Metadata *, 1> Strings; 2459 for (const Expr *Arg : E->arguments()) { 2460 const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts()); 2461 assert(Str->getCharByteWidth() == 2); 2462 StringRef WideBytes = Str->getBytes(); 2463 std::string StrUtf8; 2464 if (!convertUTF16ToUTF8String( 2465 makeArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) { 2466 CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument"); 2467 continue; 2468 } 2469 Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8)); 2470 } 2471 2472 // Build and MDTuple of MDStrings and emit the intrinsic call. 2473 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {}); 2474 MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings); 2475 Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple)); 2476 return RValue::getIgnored(); 2477 } 2478 case Builtin::BI__builtin_annotation: { 2479 llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0)); 2480 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation, 2481 AnnVal->getType()); 2482 2483 // Get the annotation string, go through casts. Sema requires this to be a 2484 // non-wide string literal, potentially casted, so the cast<> is safe. 2485 const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts(); 2486 StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString(); 2487 return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc())); 2488 } 2489 case Builtin::BI__builtin_addcb: 2490 case Builtin::BI__builtin_addcs: 2491 case Builtin::BI__builtin_addc: 2492 case Builtin::BI__builtin_addcl: 2493 case Builtin::BI__builtin_addcll: 2494 case Builtin::BI__builtin_subcb: 2495 case Builtin::BI__builtin_subcs: 2496 case Builtin::BI__builtin_subc: 2497 case Builtin::BI__builtin_subcl: 2498 case Builtin::BI__builtin_subcll: { 2499 2500 // We translate all of these builtins from expressions of the form: 2501 // int x = ..., y = ..., carryin = ..., carryout, result; 2502 // result = __builtin_addc(x, y, carryin, &carryout); 2503 // 2504 // to LLVM IR of the form: 2505 // 2506 // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y) 2507 // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0 2508 // %carry1 = extractvalue {i32, i1} %tmp1, 1 2509 // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1, 2510 // i32 %carryin) 2511 // %result = extractvalue {i32, i1} %tmp2, 0 2512 // %carry2 = extractvalue {i32, i1} %tmp2, 1 2513 // %tmp3 = or i1 %carry1, %carry2 2514 // %tmp4 = zext i1 %tmp3 to i32 2515 // store i32 %tmp4, i32* %carryout 2516 2517 // Scalarize our inputs. 2518 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 2519 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 2520 llvm::Value *Carryin = EmitScalarExpr(E->getArg(2)); 2521 Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3)); 2522 2523 // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow. 2524 llvm::Intrinsic::ID IntrinsicId; 2525 switch (BuiltinID) { 2526 default: llvm_unreachable("Unknown multiprecision builtin id."); 2527 case Builtin::BI__builtin_addcb: 2528 case Builtin::BI__builtin_addcs: 2529 case Builtin::BI__builtin_addc: 2530 case Builtin::BI__builtin_addcl: 2531 case Builtin::BI__builtin_addcll: 2532 IntrinsicId = llvm::Intrinsic::uadd_with_overflow; 2533 break; 2534 case Builtin::BI__builtin_subcb: 2535 case Builtin::BI__builtin_subcs: 2536 case Builtin::BI__builtin_subc: 2537 case Builtin::BI__builtin_subcl: 2538 case Builtin::BI__builtin_subcll: 2539 IntrinsicId = llvm::Intrinsic::usub_with_overflow; 2540 break; 2541 } 2542 2543 // Construct our resulting LLVM IR expression. 2544 llvm::Value *Carry1; 2545 llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId, 2546 X, Y, Carry1); 2547 llvm::Value *Carry2; 2548 llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId, 2549 Sum1, Carryin, Carry2); 2550 llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2), 2551 X->getType()); 2552 Builder.CreateStore(CarryOut, CarryOutPtr); 2553 return RValue::get(Sum2); 2554 } 2555 2556 case Builtin::BI__builtin_add_overflow: 2557 case Builtin::BI__builtin_sub_overflow: 2558 case Builtin::BI__builtin_mul_overflow: { 2559 const clang::Expr *LeftArg = E->getArg(0); 2560 const clang::Expr *RightArg = E->getArg(1); 2561 const clang::Expr *ResultArg = E->getArg(2); 2562 2563 clang::QualType ResultQTy = 2564 ResultArg->getType()->castAs<PointerType>()->getPointeeType(); 2565 2566 WidthAndSignedness LeftInfo = 2567 getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType()); 2568 WidthAndSignedness RightInfo = 2569 getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType()); 2570 WidthAndSignedness ResultInfo = 2571 getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy); 2572 2573 // Handle mixed-sign multiplication as a special case, because adding 2574 // runtime or backend support for our generic irgen would be too expensive. 2575 if (isSpecialMixedSignMultiply(BuiltinID, LeftInfo, RightInfo, ResultInfo)) 2576 return EmitCheckedMixedSignMultiply(*this, LeftArg, LeftInfo, RightArg, 2577 RightInfo, ResultArg, ResultQTy, 2578 ResultInfo); 2579 2580 WidthAndSignedness EncompassingInfo = 2581 EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo}); 2582 2583 llvm::Type *EncompassingLLVMTy = 2584 llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width); 2585 2586 llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy); 2587 2588 llvm::Intrinsic::ID IntrinsicId; 2589 switch (BuiltinID) { 2590 default: 2591 llvm_unreachable("Unknown overflow builtin id."); 2592 case Builtin::BI__builtin_add_overflow: 2593 IntrinsicId = EncompassingInfo.Signed 2594 ? llvm::Intrinsic::sadd_with_overflow 2595 : llvm::Intrinsic::uadd_with_overflow; 2596 break; 2597 case Builtin::BI__builtin_sub_overflow: 2598 IntrinsicId = EncompassingInfo.Signed 2599 ? llvm::Intrinsic::ssub_with_overflow 2600 : llvm::Intrinsic::usub_with_overflow; 2601 break; 2602 case Builtin::BI__builtin_mul_overflow: 2603 IntrinsicId = EncompassingInfo.Signed 2604 ? llvm::Intrinsic::smul_with_overflow 2605 : llvm::Intrinsic::umul_with_overflow; 2606 break; 2607 } 2608 2609 llvm::Value *Left = EmitScalarExpr(LeftArg); 2610 llvm::Value *Right = EmitScalarExpr(RightArg); 2611 Address ResultPtr = EmitPointerWithAlignment(ResultArg); 2612 2613 // Extend each operand to the encompassing type. 2614 Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed); 2615 Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed); 2616 2617 // Perform the operation on the extended values. 2618 llvm::Value *Overflow, *Result; 2619 Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow); 2620 2621 if (EncompassingInfo.Width > ResultInfo.Width) { 2622 // The encompassing type is wider than the result type, so we need to 2623 // truncate it. 2624 llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy); 2625 2626 // To see if the truncation caused an overflow, we will extend 2627 // the result and then compare it to the original result. 2628 llvm::Value *ResultTruncExt = Builder.CreateIntCast( 2629 ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed); 2630 llvm::Value *TruncationOverflow = 2631 Builder.CreateICmpNE(Result, ResultTruncExt); 2632 2633 Overflow = Builder.CreateOr(Overflow, TruncationOverflow); 2634 Result = ResultTrunc; 2635 } 2636 2637 // Finally, store the result using the pointer. 2638 bool isVolatile = 2639 ResultArg->getType()->getPointeeType().isVolatileQualified(); 2640 Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile); 2641 2642 return RValue::get(Overflow); 2643 } 2644 2645 case Builtin::BI__builtin_uadd_overflow: 2646 case Builtin::BI__builtin_uaddl_overflow: 2647 case Builtin::BI__builtin_uaddll_overflow: 2648 case Builtin::BI__builtin_usub_overflow: 2649 case Builtin::BI__builtin_usubl_overflow: 2650 case Builtin::BI__builtin_usubll_overflow: 2651 case Builtin::BI__builtin_umul_overflow: 2652 case Builtin::BI__builtin_umull_overflow: 2653 case Builtin::BI__builtin_umulll_overflow: 2654 case Builtin::BI__builtin_sadd_overflow: 2655 case Builtin::BI__builtin_saddl_overflow: 2656 case Builtin::BI__builtin_saddll_overflow: 2657 case Builtin::BI__builtin_ssub_overflow: 2658 case Builtin::BI__builtin_ssubl_overflow: 2659 case Builtin::BI__builtin_ssubll_overflow: 2660 case Builtin::BI__builtin_smul_overflow: 2661 case Builtin::BI__builtin_smull_overflow: 2662 case Builtin::BI__builtin_smulll_overflow: { 2663 2664 // We translate all of these builtins directly to the relevant llvm IR node. 2665 2666 // Scalarize our inputs. 2667 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 2668 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 2669 Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2)); 2670 2671 // Decide which of the overflow intrinsics we are lowering to: 2672 llvm::Intrinsic::ID IntrinsicId; 2673 switch (BuiltinID) { 2674 default: llvm_unreachable("Unknown overflow builtin id."); 2675 case Builtin::BI__builtin_uadd_overflow: 2676 case Builtin::BI__builtin_uaddl_overflow: 2677 case Builtin::BI__builtin_uaddll_overflow: 2678 IntrinsicId = llvm::Intrinsic::uadd_with_overflow; 2679 break; 2680 case Builtin::BI__builtin_usub_overflow: 2681 case Builtin::BI__builtin_usubl_overflow: 2682 case Builtin::BI__builtin_usubll_overflow: 2683 IntrinsicId = llvm::Intrinsic::usub_with_overflow; 2684 break; 2685 case Builtin::BI__builtin_umul_overflow: 2686 case Builtin::BI__builtin_umull_overflow: 2687 case Builtin::BI__builtin_umulll_overflow: 2688 IntrinsicId = llvm::Intrinsic::umul_with_overflow; 2689 break; 2690 case Builtin::BI__builtin_sadd_overflow: 2691 case Builtin::BI__builtin_saddl_overflow: 2692 case Builtin::BI__builtin_saddll_overflow: 2693 IntrinsicId = llvm::Intrinsic::sadd_with_overflow; 2694 break; 2695 case Builtin::BI__builtin_ssub_overflow: 2696 case Builtin::BI__builtin_ssubl_overflow: 2697 case Builtin::BI__builtin_ssubll_overflow: 2698 IntrinsicId = llvm::Intrinsic::ssub_with_overflow; 2699 break; 2700 case Builtin::BI__builtin_smul_overflow: 2701 case Builtin::BI__builtin_smull_overflow: 2702 case Builtin::BI__builtin_smulll_overflow: 2703 IntrinsicId = llvm::Intrinsic::smul_with_overflow; 2704 break; 2705 } 2706 2707 2708 llvm::Value *Carry; 2709 llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry); 2710 Builder.CreateStore(Sum, SumOutPtr); 2711 2712 return RValue::get(Carry); 2713 } 2714 case Builtin::BI__builtin_addressof: 2715 return RValue::get(EmitLValue(E->getArg(0)).getPointer()); 2716 case Builtin::BI__builtin_operator_new: 2717 return EmitBuiltinNewDeleteCall( 2718 E->getCallee()->getType()->castAs<FunctionProtoType>(), E, false); 2719 case Builtin::BI__builtin_operator_delete: 2720 return EmitBuiltinNewDeleteCall( 2721 E->getCallee()->getType()->castAs<FunctionProtoType>(), E, true); 2722 2723 case Builtin::BI__noop: 2724 // __noop always evaluates to an integer literal zero. 2725 return RValue::get(ConstantInt::get(IntTy, 0)); 2726 case Builtin::BI__builtin_call_with_static_chain: { 2727 const CallExpr *Call = cast<CallExpr>(E->getArg(0)); 2728 const Expr *Chain = E->getArg(1); 2729 return EmitCall(Call->getCallee()->getType(), 2730 EmitCallee(Call->getCallee()), Call, ReturnValue, 2731 EmitScalarExpr(Chain)); 2732 } 2733 case Builtin::BI_InterlockedExchange8: 2734 case Builtin::BI_InterlockedExchange16: 2735 case Builtin::BI_InterlockedExchange: 2736 case Builtin::BI_InterlockedExchangePointer: 2737 return RValue::get( 2738 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E)); 2739 case Builtin::BI_InterlockedCompareExchangePointer: { 2740 llvm::Type *RTy; 2741 llvm::IntegerType *IntType = 2742 IntegerType::get(getLLVMContext(), 2743 getContext().getTypeSize(E->getType())); 2744 llvm::Type *IntPtrType = IntType->getPointerTo(); 2745 2746 llvm::Value *Destination = 2747 Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType); 2748 2749 llvm::Value *Exchange = EmitScalarExpr(E->getArg(1)); 2750 RTy = Exchange->getType(); 2751 Exchange = Builder.CreatePtrToInt(Exchange, IntType); 2752 2753 llvm::Value *Comparand = 2754 Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType); 2755 2756 auto Result = 2757 Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange, 2758 AtomicOrdering::SequentiallyConsistent, 2759 AtomicOrdering::SequentiallyConsistent); 2760 Result->setVolatile(true); 2761 2762 return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result, 2763 0), 2764 RTy)); 2765 } 2766 case Builtin::BI_InterlockedCompareExchange8: 2767 case Builtin::BI_InterlockedCompareExchange16: 2768 case Builtin::BI_InterlockedCompareExchange: 2769 case Builtin::BI_InterlockedCompareExchange64: { 2770 AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg( 2771 EmitScalarExpr(E->getArg(0)), 2772 EmitScalarExpr(E->getArg(2)), 2773 EmitScalarExpr(E->getArg(1)), 2774 AtomicOrdering::SequentiallyConsistent, 2775 AtomicOrdering::SequentiallyConsistent); 2776 CXI->setVolatile(true); 2777 return RValue::get(Builder.CreateExtractValue(CXI, 0)); 2778 } 2779 case Builtin::BI_InterlockedIncrement16: 2780 case Builtin::BI_InterlockedIncrement: 2781 return RValue::get( 2782 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E)); 2783 case Builtin::BI_InterlockedDecrement16: 2784 case Builtin::BI_InterlockedDecrement: 2785 return RValue::get( 2786 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E)); 2787 case Builtin::BI_InterlockedAnd8: 2788 case Builtin::BI_InterlockedAnd16: 2789 case Builtin::BI_InterlockedAnd: 2790 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E)); 2791 case Builtin::BI_InterlockedExchangeAdd8: 2792 case Builtin::BI_InterlockedExchangeAdd16: 2793 case Builtin::BI_InterlockedExchangeAdd: 2794 return RValue::get( 2795 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E)); 2796 case Builtin::BI_InterlockedExchangeSub8: 2797 case Builtin::BI_InterlockedExchangeSub16: 2798 case Builtin::BI_InterlockedExchangeSub: 2799 return RValue::get( 2800 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E)); 2801 case Builtin::BI_InterlockedOr8: 2802 case Builtin::BI_InterlockedOr16: 2803 case Builtin::BI_InterlockedOr: 2804 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E)); 2805 case Builtin::BI_InterlockedXor8: 2806 case Builtin::BI_InterlockedXor16: 2807 case Builtin::BI_InterlockedXor: 2808 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E)); 2809 case Builtin::BI_interlockedbittestandset: 2810 return RValue::get( 2811 EmitMSVCBuiltinExpr(MSVCIntrin::_interlockedbittestandset, E)); 2812 2813 case Builtin::BI__exception_code: 2814 case Builtin::BI_exception_code: 2815 return RValue::get(EmitSEHExceptionCode()); 2816 case Builtin::BI__exception_info: 2817 case Builtin::BI_exception_info: 2818 return RValue::get(EmitSEHExceptionInfo()); 2819 case Builtin::BI__abnormal_termination: 2820 case Builtin::BI_abnormal_termination: 2821 return RValue::get(EmitSEHAbnormalTermination()); 2822 case Builtin::BI_setjmpex: { 2823 if (getTarget().getTriple().isOSMSVCRT()) { 2824 llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy}; 2825 llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get( 2826 getLLVMContext(), llvm::AttributeList::FunctionIndex, 2827 llvm::Attribute::ReturnsTwice); 2828 llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction( 2829 llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false), 2830 "_setjmpex", ReturnsTwiceAttr, /*Local=*/true); 2831 llvm::Value *Buf = Builder.CreateBitOrPointerCast( 2832 EmitScalarExpr(E->getArg(0)), Int8PtrTy); 2833 llvm::Value *FrameAddr = 2834 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 2835 ConstantInt::get(Int32Ty, 0)); 2836 llvm::Value *Args[] = {Buf, FrameAddr}; 2837 llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args); 2838 CS.setAttributes(ReturnsTwiceAttr); 2839 return RValue::get(CS.getInstruction()); 2840 } 2841 break; 2842 } 2843 case Builtin::BI_setjmp: { 2844 if (getTarget().getTriple().isOSMSVCRT()) { 2845 llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get( 2846 getLLVMContext(), llvm::AttributeList::FunctionIndex, 2847 llvm::Attribute::ReturnsTwice); 2848 llvm::Value *Buf = Builder.CreateBitOrPointerCast( 2849 EmitScalarExpr(E->getArg(0)), Int8PtrTy); 2850 llvm::CallSite CS; 2851 if (getTarget().getTriple().getArch() == llvm::Triple::x86) { 2852 llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy}; 2853 llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction( 2854 llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true), 2855 "_setjmp3", ReturnsTwiceAttr, /*Local=*/true); 2856 llvm::Value *Count = ConstantInt::get(IntTy, 0); 2857 llvm::Value *Args[] = {Buf, Count}; 2858 CS = EmitRuntimeCallOrInvoke(SetJmp3, Args); 2859 } else { 2860 llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy}; 2861 llvm::Constant *SetJmp = CGM.CreateRuntimeFunction( 2862 llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false), 2863 "_setjmp", ReturnsTwiceAttr, /*Local=*/true); 2864 llvm::Value *FrameAddr = 2865 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 2866 ConstantInt::get(Int32Ty, 0)); 2867 llvm::Value *Args[] = {Buf, FrameAddr}; 2868 CS = EmitRuntimeCallOrInvoke(SetJmp, Args); 2869 } 2870 CS.setAttributes(ReturnsTwiceAttr); 2871 return RValue::get(CS.getInstruction()); 2872 } 2873 break; 2874 } 2875 2876 case Builtin::BI__GetExceptionInfo: { 2877 if (llvm::GlobalVariable *GV = 2878 CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType())) 2879 return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy)); 2880 break; 2881 } 2882 2883 case Builtin::BI__fastfail: 2884 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E)); 2885 2886 case Builtin::BI__builtin_coro_size: { 2887 auto & Context = getContext(); 2888 auto SizeTy = Context.getSizeType(); 2889 auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy)); 2890 Value *F = CGM.getIntrinsic(Intrinsic::coro_size, T); 2891 return RValue::get(Builder.CreateCall(F)); 2892 } 2893 2894 case Builtin::BI__builtin_coro_id: 2895 return EmitCoroutineIntrinsic(E, Intrinsic::coro_id); 2896 case Builtin::BI__builtin_coro_promise: 2897 return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise); 2898 case Builtin::BI__builtin_coro_resume: 2899 return EmitCoroutineIntrinsic(E, Intrinsic::coro_resume); 2900 case Builtin::BI__builtin_coro_frame: 2901 return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame); 2902 case Builtin::BI__builtin_coro_noop: 2903 return EmitCoroutineIntrinsic(E, Intrinsic::coro_noop); 2904 case Builtin::BI__builtin_coro_free: 2905 return EmitCoroutineIntrinsic(E, Intrinsic::coro_free); 2906 case Builtin::BI__builtin_coro_destroy: 2907 return EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy); 2908 case Builtin::BI__builtin_coro_done: 2909 return EmitCoroutineIntrinsic(E, Intrinsic::coro_done); 2910 case Builtin::BI__builtin_coro_alloc: 2911 return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc); 2912 case Builtin::BI__builtin_coro_begin: 2913 return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin); 2914 case Builtin::BI__builtin_coro_end: 2915 return EmitCoroutineIntrinsic(E, Intrinsic::coro_end); 2916 case Builtin::BI__builtin_coro_suspend: 2917 return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend); 2918 case Builtin::BI__builtin_coro_param: 2919 return EmitCoroutineIntrinsic(E, Intrinsic::coro_param); 2920 2921 // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions 2922 case Builtin::BIread_pipe: 2923 case Builtin::BIwrite_pipe: { 2924 Value *Arg0 = EmitScalarExpr(E->getArg(0)), 2925 *Arg1 = EmitScalarExpr(E->getArg(1)); 2926 CGOpenCLRuntime OpenCLRT(CGM); 2927 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); 2928 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); 2929 2930 // Type of the generic packet parameter. 2931 unsigned GenericAS = 2932 getContext().getTargetAddressSpace(LangAS::opencl_generic); 2933 llvm::Type *I8PTy = llvm::PointerType::get( 2934 llvm::Type::getInt8Ty(getLLVMContext()), GenericAS); 2935 2936 // Testing which overloaded version we should generate the call for. 2937 if (2U == E->getNumArgs()) { 2938 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2" 2939 : "__write_pipe_2"; 2940 // Creating a generic function type to be able to call with any builtin or 2941 // user defined type. 2942 llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty}; 2943 llvm::FunctionType *FTy = llvm::FunctionType::get( 2944 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2945 Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy); 2946 return RValue::get( 2947 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2948 {Arg0, BCast, PacketSize, PacketAlign})); 2949 } else { 2950 assert(4 == E->getNumArgs() && 2951 "Illegal number of parameters to pipe function"); 2952 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4" 2953 : "__write_pipe_4"; 2954 2955 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy, 2956 Int32Ty, Int32Ty}; 2957 Value *Arg2 = EmitScalarExpr(E->getArg(2)), 2958 *Arg3 = EmitScalarExpr(E->getArg(3)); 2959 llvm::FunctionType *FTy = llvm::FunctionType::get( 2960 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2961 Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy); 2962 // We know the third argument is an integer type, but we may need to cast 2963 // it to i32. 2964 if (Arg2->getType() != Int32Ty) 2965 Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty); 2966 return RValue::get(Builder.CreateCall( 2967 CGM.CreateRuntimeFunction(FTy, Name), 2968 {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign})); 2969 } 2970 } 2971 // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write 2972 // functions 2973 case Builtin::BIreserve_read_pipe: 2974 case Builtin::BIreserve_write_pipe: 2975 case Builtin::BIwork_group_reserve_read_pipe: 2976 case Builtin::BIwork_group_reserve_write_pipe: 2977 case Builtin::BIsub_group_reserve_read_pipe: 2978 case Builtin::BIsub_group_reserve_write_pipe: { 2979 // Composing the mangled name for the function. 2980 const char *Name; 2981 if (BuiltinID == Builtin::BIreserve_read_pipe) 2982 Name = "__reserve_read_pipe"; 2983 else if (BuiltinID == Builtin::BIreserve_write_pipe) 2984 Name = "__reserve_write_pipe"; 2985 else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe) 2986 Name = "__work_group_reserve_read_pipe"; 2987 else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe) 2988 Name = "__work_group_reserve_write_pipe"; 2989 else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe) 2990 Name = "__sub_group_reserve_read_pipe"; 2991 else 2992 Name = "__sub_group_reserve_write_pipe"; 2993 2994 Value *Arg0 = EmitScalarExpr(E->getArg(0)), 2995 *Arg1 = EmitScalarExpr(E->getArg(1)); 2996 llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy); 2997 CGOpenCLRuntime OpenCLRT(CGM); 2998 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); 2999 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); 3000 3001 // Building the generic function prototype. 3002 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty}; 3003 llvm::FunctionType *FTy = llvm::FunctionType::get( 3004 ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 3005 // We know the second argument is an integer type, but we may need to cast 3006 // it to i32. 3007 if (Arg1->getType() != Int32Ty) 3008 Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty); 3009 return RValue::get( 3010 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 3011 {Arg0, Arg1, PacketSize, PacketAlign})); 3012 } 3013 // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write 3014 // functions 3015 case Builtin::BIcommit_read_pipe: 3016 case Builtin::BIcommit_write_pipe: 3017 case Builtin::BIwork_group_commit_read_pipe: 3018 case Builtin::BIwork_group_commit_write_pipe: 3019 case Builtin::BIsub_group_commit_read_pipe: 3020 case Builtin::BIsub_group_commit_write_pipe: { 3021 const char *Name; 3022 if (BuiltinID == Builtin::BIcommit_read_pipe) 3023 Name = "__commit_read_pipe"; 3024 else if (BuiltinID == Builtin::BIcommit_write_pipe) 3025 Name = "__commit_write_pipe"; 3026 else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe) 3027 Name = "__work_group_commit_read_pipe"; 3028 else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe) 3029 Name = "__work_group_commit_write_pipe"; 3030 else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe) 3031 Name = "__sub_group_commit_read_pipe"; 3032 else 3033 Name = "__sub_group_commit_write_pipe"; 3034 3035 Value *Arg0 = EmitScalarExpr(E->getArg(0)), 3036 *Arg1 = EmitScalarExpr(E->getArg(1)); 3037 CGOpenCLRuntime OpenCLRT(CGM); 3038 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); 3039 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); 3040 3041 // Building the generic function prototype. 3042 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty}; 3043 llvm::FunctionType *FTy = 3044 llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()), 3045 llvm::ArrayRef<llvm::Type *>(ArgTys), false); 3046 3047 return RValue::get( 3048 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 3049 {Arg0, Arg1, PacketSize, PacketAlign})); 3050 } 3051 // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions 3052 case Builtin::BIget_pipe_num_packets: 3053 case Builtin::BIget_pipe_max_packets: { 3054 const char *BaseName; 3055 const PipeType *PipeTy = E->getArg(0)->getType()->getAs<PipeType>(); 3056 if (BuiltinID == Builtin::BIget_pipe_num_packets) 3057 BaseName = "__get_pipe_num_packets"; 3058 else 3059 BaseName = "__get_pipe_max_packets"; 3060 auto Name = std::string(BaseName) + 3061 std::string(PipeTy->isReadOnly() ? "_ro" : "_wo"); 3062 3063 // Building the generic function prototype. 3064 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 3065 CGOpenCLRuntime OpenCLRT(CGM); 3066 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); 3067 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); 3068 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty}; 3069 llvm::FunctionType *FTy = llvm::FunctionType::get( 3070 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 3071 3072 return RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 3073 {Arg0, PacketSize, PacketAlign})); 3074 } 3075 3076 // OpenCL v2.0 s6.13.9 - Address space qualifier functions. 3077 case Builtin::BIto_global: 3078 case Builtin::BIto_local: 3079 case Builtin::BIto_private: { 3080 auto Arg0 = EmitScalarExpr(E->getArg(0)); 3081 auto NewArgT = llvm::PointerType::get(Int8Ty, 3082 CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); 3083 auto NewRetT = llvm::PointerType::get(Int8Ty, 3084 CGM.getContext().getTargetAddressSpace( 3085 E->getType()->getPointeeType().getAddressSpace())); 3086 auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false); 3087 llvm::Value *NewArg; 3088 if (Arg0->getType()->getPointerAddressSpace() != 3089 NewArgT->getPointerAddressSpace()) 3090 NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT); 3091 else 3092 NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT); 3093 auto NewName = std::string("__") + E->getDirectCallee()->getName().str(); 3094 auto NewCall = 3095 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg}); 3096 return RValue::get(Builder.CreateBitOrPointerCast(NewCall, 3097 ConvertType(E->getType()))); 3098 } 3099 3100 // OpenCL v2.0, s6.13.17 - Enqueue kernel function. 3101 // It contains four different overload formats specified in Table 6.13.17.1. 3102 case Builtin::BIenqueue_kernel: { 3103 StringRef Name; // Generated function call name 3104 unsigned NumArgs = E->getNumArgs(); 3105 3106 llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy); 3107 llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( 3108 getContext().getTargetAddressSpace(LangAS::opencl_generic)); 3109 3110 llvm::Value *Queue = EmitScalarExpr(E->getArg(0)); 3111 llvm::Value *Flags = EmitScalarExpr(E->getArg(1)); 3112 LValue NDRangeL = EmitAggExprToLValue(E->getArg(2)); 3113 llvm::Value *Range = NDRangeL.getAddress().getPointer(); 3114 llvm::Type *RangeTy = NDRangeL.getAddress().getType(); 3115 3116 if (NumArgs == 4) { 3117 // The most basic form of the call with parameters: 3118 // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void) 3119 Name = "__enqueue_kernel_basic"; 3120 llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy, 3121 GenericVoidPtrTy}; 3122 llvm::FunctionType *FTy = llvm::FunctionType::get( 3123 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 3124 3125 auto Info = 3126 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3)); 3127 llvm::Value *Kernel = 3128 Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy); 3129 llvm::Value *Block = 3130 Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); 3131 3132 AttrBuilder B; 3133 B.addAttribute(Attribute::ByVal); 3134 llvm::AttributeList ByValAttrSet = 3135 llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B); 3136 3137 auto RTCall = 3138 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet), 3139 {Queue, Flags, Range, Kernel, Block}); 3140 RTCall->setAttributes(ByValAttrSet); 3141 return RValue::get(RTCall); 3142 } 3143 assert(NumArgs >= 5 && "Invalid enqueue_kernel signature"); 3144 3145 // Create a temporary array to hold the sizes of local pointer arguments 3146 // for the block. \p First is the position of the first size argument. 3147 auto CreateArrayForSizeVar = [=](unsigned First) { 3148 auto *AT = llvm::ArrayType::get(SizeTy, NumArgs - First); 3149 auto *Arr = Builder.CreateAlloca(AT); 3150 llvm::Value *Ptr; 3151 // Each of the following arguments specifies the size of the corresponding 3152 // argument passed to the enqueued block. 3153 auto *Zero = llvm::ConstantInt::get(IntTy, 0); 3154 for (unsigned I = First; I < NumArgs; ++I) { 3155 auto *Index = llvm::ConstantInt::get(IntTy, I - First); 3156 auto *GEP = Builder.CreateGEP(Arr, {Zero, Index}); 3157 if (I == First) 3158 Ptr = GEP; 3159 auto *V = 3160 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy); 3161 Builder.CreateAlignedStore( 3162 V, GEP, CGM.getDataLayout().getPrefTypeAlignment(SizeTy)); 3163 } 3164 return Ptr; 3165 }; 3166 3167 // Could have events and/or vaargs. 3168 if (E->getArg(3)->getType()->isBlockPointerType()) { 3169 // No events passed, but has variadic arguments. 3170 Name = "__enqueue_kernel_vaargs"; 3171 auto Info = 3172 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3)); 3173 llvm::Value *Kernel = 3174 Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy); 3175 auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); 3176 auto *PtrToSizeArray = CreateArrayForSizeVar(4); 3177 3178 // Create a vector of the arguments, as well as a constant value to 3179 // express to the runtime the number of variadic arguments. 3180 std::vector<llvm::Value *> Args = { 3181 Queue, Flags, Range, 3182 Kernel, Block, ConstantInt::get(IntTy, NumArgs - 4), 3183 PtrToSizeArray}; 3184 std::vector<llvm::Type *> ArgTys = { 3185 QueueTy, IntTy, RangeTy, 3186 GenericVoidPtrTy, GenericVoidPtrTy, IntTy, 3187 PtrToSizeArray->getType()}; 3188 3189 llvm::FunctionType *FTy = llvm::FunctionType::get( 3190 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 3191 return RValue::get( 3192 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 3193 llvm::ArrayRef<llvm::Value *>(Args))); 3194 } 3195 // Any calls now have event arguments passed. 3196 if (NumArgs >= 7) { 3197 llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy); 3198 llvm::Type *EventPtrTy = EventTy->getPointerTo( 3199 CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); 3200 3201 llvm::Value *NumEvents = 3202 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty); 3203 llvm::Value *EventList = 3204 E->getArg(4)->getType()->isArrayType() 3205 ? EmitArrayToPointerDecay(E->getArg(4)).getPointer() 3206 : EmitScalarExpr(E->getArg(4)); 3207 llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5)); 3208 // Convert to generic address space. 3209 EventList = Builder.CreatePointerCast(EventList, EventPtrTy); 3210 ClkEvent = Builder.CreatePointerCast(ClkEvent, EventPtrTy); 3211 auto Info = 3212 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6)); 3213 llvm::Value *Kernel = 3214 Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy); 3215 llvm::Value *Block = 3216 Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); 3217 3218 std::vector<llvm::Type *> ArgTys = { 3219 QueueTy, Int32Ty, RangeTy, Int32Ty, 3220 EventPtrTy, EventPtrTy, GenericVoidPtrTy, GenericVoidPtrTy}; 3221 3222 std::vector<llvm::Value *> Args = {Queue, Flags, Range, NumEvents, 3223 EventList, ClkEvent, Kernel, Block}; 3224 3225 if (NumArgs == 7) { 3226 // Has events but no variadics. 3227 Name = "__enqueue_kernel_basic_events"; 3228 llvm::FunctionType *FTy = llvm::FunctionType::get( 3229 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 3230 return RValue::get( 3231 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 3232 llvm::ArrayRef<llvm::Value *>(Args))); 3233 } 3234 // Has event info and variadics 3235 // Pass the number of variadics to the runtime function too. 3236 Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7)); 3237 ArgTys.push_back(Int32Ty); 3238 Name = "__enqueue_kernel_events_vaargs"; 3239 3240 auto *PtrToSizeArray = CreateArrayForSizeVar(7); 3241 Args.push_back(PtrToSizeArray); 3242 ArgTys.push_back(PtrToSizeArray->getType()); 3243 3244 llvm::FunctionType *FTy = llvm::FunctionType::get( 3245 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 3246 return RValue::get( 3247 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 3248 llvm::ArrayRef<llvm::Value *>(Args))); 3249 } 3250 LLVM_FALLTHROUGH; 3251 } 3252 // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block 3253 // parameter. 3254 case Builtin::BIget_kernel_work_group_size: { 3255 llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( 3256 getContext().getTargetAddressSpace(LangAS::opencl_generic)); 3257 auto Info = 3258 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0)); 3259 Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy); 3260 Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); 3261 return RValue::get(Builder.CreateCall( 3262 CGM.CreateRuntimeFunction( 3263 llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy}, 3264 false), 3265 "__get_kernel_work_group_size_impl"), 3266 {Kernel, Arg})); 3267 } 3268 case Builtin::BIget_kernel_preferred_work_group_size_multiple: { 3269 llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( 3270 getContext().getTargetAddressSpace(LangAS::opencl_generic)); 3271 auto Info = 3272 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0)); 3273 Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy); 3274 Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); 3275 return RValue::get(Builder.CreateCall( 3276 CGM.CreateRuntimeFunction( 3277 llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy}, 3278 false), 3279 "__get_kernel_preferred_work_group_multiple_impl"), 3280 {Kernel, Arg})); 3281 } 3282 case Builtin::BIget_kernel_max_sub_group_size_for_ndrange: 3283 case Builtin::BIget_kernel_sub_group_count_for_ndrange: { 3284 llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( 3285 getContext().getTargetAddressSpace(LangAS::opencl_generic)); 3286 LValue NDRangeL = EmitAggExprToLValue(E->getArg(0)); 3287 llvm::Value *NDRange = NDRangeL.getAddress().getPointer(); 3288 auto Info = 3289 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(1)); 3290 Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy); 3291 Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); 3292 const char *Name = 3293 BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange 3294 ? "__get_kernel_max_sub_group_size_for_ndrange_impl" 3295 : "__get_kernel_sub_group_count_for_ndrange_impl"; 3296 return RValue::get(Builder.CreateCall( 3297 CGM.CreateRuntimeFunction( 3298 llvm::FunctionType::get( 3299 IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy}, 3300 false), 3301 Name), 3302 {NDRange, Kernel, Block})); 3303 } 3304 3305 case Builtin::BI__builtin_store_half: 3306 case Builtin::BI__builtin_store_halff: { 3307 Value *Val = EmitScalarExpr(E->getArg(0)); 3308 Address Address = EmitPointerWithAlignment(E->getArg(1)); 3309 Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy()); 3310 return RValue::get(Builder.CreateStore(HalfVal, Address)); 3311 } 3312 case Builtin::BI__builtin_load_half: { 3313 Address Address = EmitPointerWithAlignment(E->getArg(0)); 3314 Value *HalfVal = Builder.CreateLoad(Address); 3315 return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy())); 3316 } 3317 case Builtin::BI__builtin_load_halff: { 3318 Address Address = EmitPointerWithAlignment(E->getArg(0)); 3319 Value *HalfVal = Builder.CreateLoad(Address); 3320 return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy())); 3321 } 3322 case Builtin::BIprintf: 3323 if (getTarget().getTriple().isNVPTX()) 3324 return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue); 3325 break; 3326 case Builtin::BI__builtin_canonicalize: 3327 case Builtin::BI__builtin_canonicalizef: 3328 case Builtin::BI__builtin_canonicalizel: 3329 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize)); 3330 3331 case Builtin::BI__builtin_thread_pointer: { 3332 if (!getContext().getTargetInfo().isTLSSupported()) 3333 CGM.ErrorUnsupported(E, "__builtin_thread_pointer"); 3334 // Fall through - it's already mapped to the intrinsic by GCCBuiltin. 3335 break; 3336 } 3337 case Builtin::BI__builtin_os_log_format: 3338 return emitBuiltinOSLogFormat(*E); 3339 3340 case Builtin::BI__builtin_os_log_format_buffer_size: { 3341 analyze_os_log::OSLogBufferLayout Layout; 3342 analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout); 3343 return RValue::get(ConstantInt::get(ConvertType(E->getType()), 3344 Layout.size().getQuantity())); 3345 } 3346 3347 case Builtin::BI__xray_customevent: { 3348 if (!ShouldXRayInstrumentFunction()) 3349 return RValue::getIgnored(); 3350 3351 if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has( 3352 XRayInstrKind::Custom)) 3353 return RValue::getIgnored(); 3354 3355 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>()) 3356 if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents()) 3357 return RValue::getIgnored(); 3358 3359 Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent); 3360 auto FTy = F->getFunctionType(); 3361 auto Arg0 = E->getArg(0); 3362 auto Arg0Val = EmitScalarExpr(Arg0); 3363 auto Arg0Ty = Arg0->getType(); 3364 auto PTy0 = FTy->getParamType(0); 3365 if (PTy0 != Arg0Val->getType()) { 3366 if (Arg0Ty->isArrayType()) 3367 Arg0Val = EmitArrayToPointerDecay(Arg0).getPointer(); 3368 else 3369 Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0); 3370 } 3371 auto Arg1 = EmitScalarExpr(E->getArg(1)); 3372 auto PTy1 = FTy->getParamType(1); 3373 if (PTy1 != Arg1->getType()) 3374 Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1); 3375 return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1})); 3376 } 3377 3378 case Builtin::BI__xray_typedevent: { 3379 // TODO: There should be a way to always emit events even if the current 3380 // function is not instrumented. Losing events in a stream can cripple 3381 // a trace. 3382 if (!ShouldXRayInstrumentFunction()) 3383 return RValue::getIgnored(); 3384 3385 if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has( 3386 XRayInstrKind::Typed)) 3387 return RValue::getIgnored(); 3388 3389 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>()) 3390 if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayTypedEvents()) 3391 return RValue::getIgnored(); 3392 3393 Function *F = CGM.getIntrinsic(Intrinsic::xray_typedevent); 3394 auto FTy = F->getFunctionType(); 3395 auto Arg0 = EmitScalarExpr(E->getArg(0)); 3396 auto PTy0 = FTy->getParamType(0); 3397 if (PTy0 != Arg0->getType()) 3398 Arg0 = Builder.CreateTruncOrBitCast(Arg0, PTy0); 3399 auto Arg1 = E->getArg(1); 3400 auto Arg1Val = EmitScalarExpr(Arg1); 3401 auto Arg1Ty = Arg1->getType(); 3402 auto PTy1 = FTy->getParamType(1); 3403 if (PTy1 != Arg1Val->getType()) { 3404 if (Arg1Ty->isArrayType()) 3405 Arg1Val = EmitArrayToPointerDecay(Arg1).getPointer(); 3406 else 3407 Arg1Val = Builder.CreatePointerCast(Arg1Val, PTy1); 3408 } 3409 auto Arg2 = EmitScalarExpr(E->getArg(2)); 3410 auto PTy2 = FTy->getParamType(2); 3411 if (PTy2 != Arg2->getType()) 3412 Arg2 = Builder.CreateTruncOrBitCast(Arg2, PTy2); 3413 return RValue::get(Builder.CreateCall(F, {Arg0, Arg1Val, Arg2})); 3414 } 3415 3416 case Builtin::BI__builtin_ms_va_start: 3417 case Builtin::BI__builtin_ms_va_end: 3418 return RValue::get( 3419 EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(), 3420 BuiltinID == Builtin::BI__builtin_ms_va_start)); 3421 3422 case Builtin::BI__builtin_ms_va_copy: { 3423 // Lower this manually. We can't reliably determine whether or not any 3424 // given va_copy() is for a Win64 va_list from the calling convention 3425 // alone, because it's legal to do this from a System V ABI function. 3426 // With opaque pointer types, we won't have enough information in LLVM 3427 // IR to determine this from the argument types, either. Best to do it 3428 // now, while we have enough information. 3429 Address DestAddr = EmitMSVAListRef(E->getArg(0)); 3430 Address SrcAddr = EmitMSVAListRef(E->getArg(1)); 3431 3432 llvm::Type *BPP = Int8PtrPtrTy; 3433 3434 DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"), 3435 DestAddr.getAlignment()); 3436 SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"), 3437 SrcAddr.getAlignment()); 3438 3439 Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val"); 3440 return RValue::get(Builder.CreateStore(ArgPtr, DestAddr)); 3441 } 3442 } 3443 3444 // If this is an alias for a lib function (e.g. __builtin_sin), emit 3445 // the call using the normal call path, but using the unmangled 3446 // version of the function name. 3447 if (getContext().BuiltinInfo.isLibFunction(BuiltinID)) 3448 return emitLibraryCall(*this, FD, E, 3449 CGM.getBuiltinLibFunction(FD, BuiltinID)); 3450 3451 // If this is a predefined lib function (e.g. malloc), emit the call 3452 // using exactly the normal call path. 3453 if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID)) 3454 return emitLibraryCall(*this, FD, E, 3455 cast<llvm::Constant>(EmitScalarExpr(E->getCallee()))); 3456 3457 // Check that a call to a target specific builtin has the correct target 3458 // features. 3459 // This is down here to avoid non-target specific builtins, however, if 3460 // generic builtins start to require generic target features then we 3461 // can move this up to the beginning of the function. 3462 checkTargetFeatures(E, FD); 3463 3464 // See if we have a target specific intrinsic. 3465 const char *Name = getContext().BuiltinInfo.getName(BuiltinID); 3466 Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic; 3467 StringRef Prefix = 3468 llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch()); 3469 if (!Prefix.empty()) { 3470 IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix.data(), Name); 3471 // NOTE we don't need to perform a compatibility flag check here since the 3472 // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the 3473 // MS builtins via ALL_MS_LANGUAGES and are filtered earlier. 3474 if (IntrinsicID == Intrinsic::not_intrinsic) 3475 IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name); 3476 } 3477 3478 if (IntrinsicID != Intrinsic::not_intrinsic) { 3479 SmallVector<Value*, 16> Args; 3480 3481 // Find out if any arguments are required to be integer constant 3482 // expressions. 3483 unsigned ICEArguments = 0; 3484 ASTContext::GetBuiltinTypeError Error; 3485 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 3486 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 3487 3488 Function *F = CGM.getIntrinsic(IntrinsicID); 3489 llvm::FunctionType *FTy = F->getFunctionType(); 3490 3491 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) { 3492 Value *ArgValue; 3493 // If this is a normal argument, just emit it as a scalar. 3494 if ((ICEArguments & (1 << i)) == 0) { 3495 ArgValue = EmitScalarExpr(E->getArg(i)); 3496 } else { 3497 // If this is required to be a constant, constant fold it so that we 3498 // know that the generated intrinsic gets a ConstantInt. 3499 llvm::APSInt Result; 3500 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext()); 3501 assert(IsConst && "Constant arg isn't actually constant?"); 3502 (void)IsConst; 3503 ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result); 3504 } 3505 3506 // If the intrinsic arg type is different from the builtin arg type 3507 // we need to do a bit cast. 3508 llvm::Type *PTy = FTy->getParamType(i); 3509 if (PTy != ArgValue->getType()) { 3510 assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) && 3511 "Must be able to losslessly bit cast to param"); 3512 ArgValue = Builder.CreateBitCast(ArgValue, PTy); 3513 } 3514 3515 Args.push_back(ArgValue); 3516 } 3517 3518 Value *V = Builder.CreateCall(F, Args); 3519 QualType BuiltinRetType = E->getType(); 3520 3521 llvm::Type *RetTy = VoidTy; 3522 if (!BuiltinRetType->isVoidType()) 3523 RetTy = ConvertType(BuiltinRetType); 3524 3525 if (RetTy != V->getType()) { 3526 assert(V->getType()->canLosslesslyBitCastTo(RetTy) && 3527 "Must be able to losslessly bit cast result type"); 3528 V = Builder.CreateBitCast(V, RetTy); 3529 } 3530 3531 return RValue::get(V); 3532 } 3533 3534 // See if we have a target specific builtin that needs to be lowered. 3535 if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E)) 3536 return RValue::get(V); 3537 3538 ErrorUnsupported(E, "builtin function"); 3539 3540 // Unknown builtin, for now just dump it out and return undef. 3541 return GetUndefRValue(E->getType()); 3542 } 3543 3544 static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, 3545 unsigned BuiltinID, const CallExpr *E, 3546 llvm::Triple::ArchType Arch) { 3547 switch (Arch) { 3548 case llvm::Triple::arm: 3549 case llvm::Triple::armeb: 3550 case llvm::Triple::thumb: 3551 case llvm::Triple::thumbeb: 3552 return CGF->EmitARMBuiltinExpr(BuiltinID, E, Arch); 3553 case llvm::Triple::aarch64: 3554 case llvm::Triple::aarch64_be: 3555 return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch); 3556 case llvm::Triple::x86: 3557 case llvm::Triple::x86_64: 3558 return CGF->EmitX86BuiltinExpr(BuiltinID, E); 3559 case llvm::Triple::ppc: 3560 case llvm::Triple::ppc64: 3561 case llvm::Triple::ppc64le: 3562 return CGF->EmitPPCBuiltinExpr(BuiltinID, E); 3563 case llvm::Triple::r600: 3564 case llvm::Triple::amdgcn: 3565 return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E); 3566 case llvm::Triple::systemz: 3567 return CGF->EmitSystemZBuiltinExpr(BuiltinID, E); 3568 case llvm::Triple::nvptx: 3569 case llvm::Triple::nvptx64: 3570 return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E); 3571 case llvm::Triple::wasm32: 3572 case llvm::Triple::wasm64: 3573 return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E); 3574 case llvm::Triple::hexagon: 3575 return CGF->EmitHexagonBuiltinExpr(BuiltinID, E); 3576 default: 3577 return nullptr; 3578 } 3579 } 3580 3581 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID, 3582 const CallExpr *E) { 3583 if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) { 3584 assert(getContext().getAuxTargetInfo() && "Missing aux target info"); 3585 return EmitTargetArchBuiltinExpr( 3586 this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E, 3587 getContext().getAuxTargetInfo()->getTriple().getArch()); 3588 } 3589 3590 return EmitTargetArchBuiltinExpr(this, BuiltinID, E, 3591 getTarget().getTriple().getArch()); 3592 } 3593 3594 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF, 3595 NeonTypeFlags TypeFlags, 3596 bool HasLegalHalfType=true, 3597 bool V1Ty=false) { 3598 int IsQuad = TypeFlags.isQuad(); 3599 switch (TypeFlags.getEltType()) { 3600 case NeonTypeFlags::Int8: 3601 case NeonTypeFlags::Poly8: 3602 return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad)); 3603 case NeonTypeFlags::Int16: 3604 case NeonTypeFlags::Poly16: 3605 return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad)); 3606 case NeonTypeFlags::Float16: 3607 if (HasLegalHalfType) 3608 return llvm::VectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad)); 3609 else 3610 return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad)); 3611 case NeonTypeFlags::Int32: 3612 return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad)); 3613 case NeonTypeFlags::Int64: 3614 case NeonTypeFlags::Poly64: 3615 return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad)); 3616 case NeonTypeFlags::Poly128: 3617 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm. 3618 // There is a lot of i128 and f128 API missing. 3619 // so we use v16i8 to represent poly128 and get pattern matched. 3620 return llvm::VectorType::get(CGF->Int8Ty, 16); 3621 case NeonTypeFlags::Float32: 3622 return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad)); 3623 case NeonTypeFlags::Float64: 3624 return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad)); 3625 } 3626 llvm_unreachable("Unknown vector element type!"); 3627 } 3628 3629 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF, 3630 NeonTypeFlags IntTypeFlags) { 3631 int IsQuad = IntTypeFlags.isQuad(); 3632 switch (IntTypeFlags.getEltType()) { 3633 case NeonTypeFlags::Int16: 3634 return llvm::VectorType::get(CGF->HalfTy, (4 << IsQuad)); 3635 case NeonTypeFlags::Int32: 3636 return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad)); 3637 case NeonTypeFlags::Int64: 3638 return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad)); 3639 default: 3640 llvm_unreachable("Type can't be converted to floating-point!"); 3641 } 3642 } 3643 3644 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) { 3645 unsigned nElts = V->getType()->getVectorNumElements(); 3646 Value* SV = llvm::ConstantVector::getSplat(nElts, C); 3647 return Builder.CreateShuffleVector(V, V, SV, "lane"); 3648 } 3649 3650 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops, 3651 const char *name, 3652 unsigned shift, bool rightshift) { 3653 unsigned j = 0; 3654 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); 3655 ai != ae; ++ai, ++j) 3656 if (shift > 0 && shift == j) 3657 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift); 3658 else 3659 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name); 3660 3661 return Builder.CreateCall(F, Ops, name); 3662 } 3663 3664 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty, 3665 bool neg) { 3666 int SV = cast<ConstantInt>(V)->getSExtValue(); 3667 return ConstantInt::get(Ty, neg ? -SV : SV); 3668 } 3669 3670 // \brief Right-shift a vector by a constant. 3671 Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift, 3672 llvm::Type *Ty, bool usgn, 3673 const char *name) { 3674 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 3675 3676 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue(); 3677 int EltSize = VTy->getScalarSizeInBits(); 3678 3679 Vec = Builder.CreateBitCast(Vec, Ty); 3680 3681 // lshr/ashr are undefined when the shift amount is equal to the vector 3682 // element size. 3683 if (ShiftAmt == EltSize) { 3684 if (usgn) { 3685 // Right-shifting an unsigned value by its size yields 0. 3686 return llvm::ConstantAggregateZero::get(VTy); 3687 } else { 3688 // Right-shifting a signed value by its size is equivalent 3689 // to a shift of size-1. 3690 --ShiftAmt; 3691 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt); 3692 } 3693 } 3694 3695 Shift = EmitNeonShiftVector(Shift, Ty, false); 3696 if (usgn) 3697 return Builder.CreateLShr(Vec, Shift, name); 3698 else 3699 return Builder.CreateAShr(Vec, Shift, name); 3700 } 3701 3702 enum { 3703 AddRetType = (1 << 0), 3704 Add1ArgType = (1 << 1), 3705 Add2ArgTypes = (1 << 2), 3706 3707 VectorizeRetType = (1 << 3), 3708 VectorizeArgTypes = (1 << 4), 3709 3710 InventFloatType = (1 << 5), 3711 UnsignedAlts = (1 << 6), 3712 3713 Use64BitVectors = (1 << 7), 3714 Use128BitVectors = (1 << 8), 3715 3716 Vectorize1ArgType = Add1ArgType | VectorizeArgTypes, 3717 VectorRet = AddRetType | VectorizeRetType, 3718 VectorRetGetArgs01 = 3719 AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes, 3720 FpCmpzModifiers = 3721 AddRetType | VectorizeRetType | Add1ArgType | InventFloatType 3722 }; 3723 3724 namespace { 3725 struct NeonIntrinsicInfo { 3726 const char *NameHint; 3727 unsigned BuiltinID; 3728 unsigned LLVMIntrinsic; 3729 unsigned AltLLVMIntrinsic; 3730 unsigned TypeModifier; 3731 3732 bool operator<(unsigned RHSBuiltinID) const { 3733 return BuiltinID < RHSBuiltinID; 3734 } 3735 bool operator<(const NeonIntrinsicInfo &TE) const { 3736 return BuiltinID < TE.BuiltinID; 3737 } 3738 }; 3739 } // end anonymous namespace 3740 3741 #define NEONMAP0(NameBase) \ 3742 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 } 3743 3744 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \ 3745 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \ 3746 Intrinsic::LLVMIntrinsic, 0, TypeModifier } 3747 3748 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \ 3749 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \ 3750 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \ 3751 TypeModifier } 3752 3753 static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { 3754 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), 3755 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), 3756 NEONMAP1(vabs_v, arm_neon_vabs, 0), 3757 NEONMAP1(vabsq_v, arm_neon_vabs, 0), 3758 NEONMAP0(vaddhn_v), 3759 NEONMAP1(vaesdq_v, arm_neon_aesd, 0), 3760 NEONMAP1(vaeseq_v, arm_neon_aese, 0), 3761 NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0), 3762 NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0), 3763 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType), 3764 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType), 3765 NEONMAP1(vcage_v, arm_neon_vacge, 0), 3766 NEONMAP1(vcageq_v, arm_neon_vacge, 0), 3767 NEONMAP1(vcagt_v, arm_neon_vacgt, 0), 3768 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0), 3769 NEONMAP1(vcale_v, arm_neon_vacge, 0), 3770 NEONMAP1(vcaleq_v, arm_neon_vacge, 0), 3771 NEONMAP1(vcalt_v, arm_neon_vacgt, 0), 3772 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0), 3773 NEONMAP0(vceqz_v), 3774 NEONMAP0(vceqzq_v), 3775 NEONMAP0(vcgez_v), 3776 NEONMAP0(vcgezq_v), 3777 NEONMAP0(vcgtz_v), 3778 NEONMAP0(vcgtzq_v), 3779 NEONMAP0(vclez_v), 3780 NEONMAP0(vclezq_v), 3781 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType), 3782 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType), 3783 NEONMAP0(vcltz_v), 3784 NEONMAP0(vcltzq_v), 3785 NEONMAP1(vclz_v, ctlz, Add1ArgType), 3786 NEONMAP1(vclzq_v, ctlz, Add1ArgType), 3787 NEONMAP1(vcnt_v, ctpop, Add1ArgType), 3788 NEONMAP1(vcntq_v, ctpop, Add1ArgType), 3789 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0), 3790 NEONMAP0(vcvt_f16_v), 3791 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0), 3792 NEONMAP0(vcvt_f32_v), 3793 NEONMAP2(vcvt_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), 3794 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), 3795 NEONMAP1(vcvt_n_s16_v, arm_neon_vcvtfp2fxs, 0), 3796 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0), 3797 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0), 3798 NEONMAP1(vcvt_n_u16_v, arm_neon_vcvtfp2fxu, 0), 3799 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0), 3800 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0), 3801 NEONMAP0(vcvt_s16_v), 3802 NEONMAP0(vcvt_s32_v), 3803 NEONMAP0(vcvt_s64_v), 3804 NEONMAP0(vcvt_u16_v), 3805 NEONMAP0(vcvt_u32_v), 3806 NEONMAP0(vcvt_u64_v), 3807 NEONMAP1(vcvta_s16_v, arm_neon_vcvtas, 0), 3808 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0), 3809 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0), 3810 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0), 3811 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0), 3812 NEONMAP1(vcvtaq_s16_v, arm_neon_vcvtas, 0), 3813 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0), 3814 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0), 3815 NEONMAP1(vcvtaq_u16_v, arm_neon_vcvtau, 0), 3816 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0), 3817 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0), 3818 NEONMAP1(vcvtm_s16_v, arm_neon_vcvtms, 0), 3819 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0), 3820 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0), 3821 NEONMAP1(vcvtm_u16_v, arm_neon_vcvtmu, 0), 3822 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0), 3823 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0), 3824 NEONMAP1(vcvtmq_s16_v, arm_neon_vcvtms, 0), 3825 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0), 3826 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0), 3827 NEONMAP1(vcvtmq_u16_v, arm_neon_vcvtmu, 0), 3828 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0), 3829 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0), 3830 NEONMAP1(vcvtn_s16_v, arm_neon_vcvtns, 0), 3831 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0), 3832 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0), 3833 NEONMAP1(vcvtn_u16_v, arm_neon_vcvtnu, 0), 3834 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0), 3835 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0), 3836 NEONMAP1(vcvtnq_s16_v, arm_neon_vcvtns, 0), 3837 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0), 3838 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0), 3839 NEONMAP1(vcvtnq_u16_v, arm_neon_vcvtnu, 0), 3840 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0), 3841 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0), 3842 NEONMAP1(vcvtp_s16_v, arm_neon_vcvtps, 0), 3843 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0), 3844 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0), 3845 NEONMAP1(vcvtp_u16_v, arm_neon_vcvtpu, 0), 3846 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0), 3847 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0), 3848 NEONMAP1(vcvtpq_s16_v, arm_neon_vcvtps, 0), 3849 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0), 3850 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0), 3851 NEONMAP1(vcvtpq_u16_v, arm_neon_vcvtpu, 0), 3852 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0), 3853 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0), 3854 NEONMAP0(vcvtq_f16_v), 3855 NEONMAP0(vcvtq_f32_v), 3856 NEONMAP2(vcvtq_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), 3857 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), 3858 NEONMAP1(vcvtq_n_s16_v, arm_neon_vcvtfp2fxs, 0), 3859 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0), 3860 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0), 3861 NEONMAP1(vcvtq_n_u16_v, arm_neon_vcvtfp2fxu, 0), 3862 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0), 3863 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0), 3864 NEONMAP0(vcvtq_s16_v), 3865 NEONMAP0(vcvtq_s32_v), 3866 NEONMAP0(vcvtq_s64_v), 3867 NEONMAP0(vcvtq_u16_v), 3868 NEONMAP0(vcvtq_u32_v), 3869 NEONMAP0(vcvtq_u64_v), 3870 NEONMAP2(vdot_v, arm_neon_udot, arm_neon_sdot, 0), 3871 NEONMAP2(vdotq_v, arm_neon_udot, arm_neon_sdot, 0), 3872 NEONMAP0(vext_v), 3873 NEONMAP0(vextq_v), 3874 NEONMAP0(vfma_v), 3875 NEONMAP0(vfmaq_v), 3876 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts), 3877 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts), 3878 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts), 3879 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts), 3880 NEONMAP0(vld1_dup_v), 3881 NEONMAP1(vld1_v, arm_neon_vld1, 0), 3882 NEONMAP0(vld1q_dup_v), 3883 NEONMAP1(vld1q_v, arm_neon_vld1, 0), 3884 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0), 3885 NEONMAP1(vld2_v, arm_neon_vld2, 0), 3886 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0), 3887 NEONMAP1(vld2q_v, arm_neon_vld2, 0), 3888 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0), 3889 NEONMAP1(vld3_v, arm_neon_vld3, 0), 3890 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0), 3891 NEONMAP1(vld3q_v, arm_neon_vld3, 0), 3892 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0), 3893 NEONMAP1(vld4_v, arm_neon_vld4, 0), 3894 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0), 3895 NEONMAP1(vld4q_v, arm_neon_vld4, 0), 3896 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts), 3897 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType), 3898 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType), 3899 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts), 3900 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), 3901 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType), 3902 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType), 3903 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), 3904 NEONMAP0(vmovl_v), 3905 NEONMAP0(vmovn_v), 3906 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType), 3907 NEONMAP0(vmull_v), 3908 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType), 3909 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts), 3910 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts), 3911 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType), 3912 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts), 3913 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts), 3914 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType), 3915 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts), 3916 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts), 3917 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType), 3918 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType), 3919 NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts), 3920 NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts), 3921 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0), 3922 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0), 3923 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType), 3924 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType), 3925 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType), 3926 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts), 3927 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType), 3928 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType), 3929 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType), 3930 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType), 3931 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType), 3932 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts), 3933 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts), 3934 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts), 3935 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts), 3936 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts), 3937 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts), 3938 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0), 3939 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0), 3940 NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts), 3941 NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts), 3942 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType), 3943 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), 3944 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), 3945 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType), 3946 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType), 3947 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts), 3948 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts), 3949 NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType), 3950 NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType), 3951 NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType), 3952 NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType), 3953 NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType), 3954 NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType), 3955 NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType), 3956 NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType), 3957 NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType), 3958 NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType), 3959 NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType), 3960 NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType), 3961 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts), 3962 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts), 3963 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts), 3964 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts), 3965 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0), 3966 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0), 3967 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType), 3968 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType), 3969 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType), 3970 NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0), 3971 NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0), 3972 NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0), 3973 NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0), 3974 NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0), 3975 NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0), 3976 NEONMAP0(vshl_n_v), 3977 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts), 3978 NEONMAP0(vshll_n_v), 3979 NEONMAP0(vshlq_n_v), 3980 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts), 3981 NEONMAP0(vshr_n_v), 3982 NEONMAP0(vshrn_n_v), 3983 NEONMAP0(vshrq_n_v), 3984 NEONMAP1(vst1_v, arm_neon_vst1, 0), 3985 NEONMAP1(vst1q_v, arm_neon_vst1, 0), 3986 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0), 3987 NEONMAP1(vst2_v, arm_neon_vst2, 0), 3988 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0), 3989 NEONMAP1(vst2q_v, arm_neon_vst2, 0), 3990 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0), 3991 NEONMAP1(vst3_v, arm_neon_vst3, 0), 3992 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0), 3993 NEONMAP1(vst3q_v, arm_neon_vst3, 0), 3994 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0), 3995 NEONMAP1(vst4_v, arm_neon_vst4, 0), 3996 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0), 3997 NEONMAP1(vst4q_v, arm_neon_vst4, 0), 3998 NEONMAP0(vsubhn_v), 3999 NEONMAP0(vtrn_v), 4000 NEONMAP0(vtrnq_v), 4001 NEONMAP0(vtst_v), 4002 NEONMAP0(vtstq_v), 4003 NEONMAP0(vuzp_v), 4004 NEONMAP0(vuzpq_v), 4005 NEONMAP0(vzip_v), 4006 NEONMAP0(vzipq_v) 4007 }; 4008 4009 static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = { 4010 NEONMAP1(vabs_v, aarch64_neon_abs, 0), 4011 NEONMAP1(vabsq_v, aarch64_neon_abs, 0), 4012 NEONMAP0(vaddhn_v), 4013 NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0), 4014 NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0), 4015 NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0), 4016 NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0), 4017 NEONMAP1(vcage_v, aarch64_neon_facge, 0), 4018 NEONMAP1(vcageq_v, aarch64_neon_facge, 0), 4019 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0), 4020 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0), 4021 NEONMAP1(vcale_v, aarch64_neon_facge, 0), 4022 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0), 4023 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0), 4024 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0), 4025 NEONMAP0(vceqz_v), 4026 NEONMAP0(vceqzq_v), 4027 NEONMAP0(vcgez_v), 4028 NEONMAP0(vcgezq_v), 4029 NEONMAP0(vcgtz_v), 4030 NEONMAP0(vcgtzq_v), 4031 NEONMAP0(vclez_v), 4032 NEONMAP0(vclezq_v), 4033 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType), 4034 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType), 4035 NEONMAP0(vcltz_v), 4036 NEONMAP0(vcltzq_v), 4037 NEONMAP1(vclz_v, ctlz, Add1ArgType), 4038 NEONMAP1(vclzq_v, ctlz, Add1ArgType), 4039 NEONMAP1(vcnt_v, ctpop, Add1ArgType), 4040 NEONMAP1(vcntq_v, ctpop, Add1ArgType), 4041 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0), 4042 NEONMAP0(vcvt_f16_v), 4043 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0), 4044 NEONMAP0(vcvt_f32_v), 4045 NEONMAP2(vcvt_n_f16_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 4046 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 4047 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 4048 NEONMAP1(vcvt_n_s16_v, aarch64_neon_vcvtfp2fxs, 0), 4049 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), 4050 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), 4051 NEONMAP1(vcvt_n_u16_v, aarch64_neon_vcvtfp2fxu, 0), 4052 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), 4053 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), 4054 NEONMAP0(vcvtq_f16_v), 4055 NEONMAP0(vcvtq_f32_v), 4056 NEONMAP2(vcvtq_n_f16_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 4057 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 4058 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 4059 NEONMAP1(vcvtq_n_s16_v, aarch64_neon_vcvtfp2fxs, 0), 4060 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), 4061 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), 4062 NEONMAP1(vcvtq_n_u16_v, aarch64_neon_vcvtfp2fxu, 0), 4063 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), 4064 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), 4065 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType), 4066 NEONMAP2(vdot_v, aarch64_neon_udot, aarch64_neon_sdot, 0), 4067 NEONMAP2(vdotq_v, aarch64_neon_udot, aarch64_neon_sdot, 0), 4068 NEONMAP0(vext_v), 4069 NEONMAP0(vextq_v), 4070 NEONMAP0(vfma_v), 4071 NEONMAP0(vfmaq_v), 4072 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), 4073 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), 4074 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), 4075 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), 4076 NEONMAP0(vmovl_v), 4077 NEONMAP0(vmovn_v), 4078 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType), 4079 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType), 4080 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType), 4081 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts), 4082 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts), 4083 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType), 4084 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType), 4085 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType), 4086 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts), 4087 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts), 4088 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0), 4089 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0), 4090 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType), 4091 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType), 4092 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType), 4093 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts), 4094 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType), 4095 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType), 4096 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType), 4097 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType), 4098 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType), 4099 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts), 4100 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts), 4101 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts), 4102 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts), 4103 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts), 4104 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts), 4105 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0), 4106 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0), 4107 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), 4108 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), 4109 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType), 4110 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), 4111 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), 4112 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType), 4113 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType), 4114 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), 4115 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), 4116 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), 4117 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), 4118 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), 4119 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), 4120 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0), 4121 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0), 4122 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType), 4123 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType), 4124 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType), 4125 NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0), 4126 NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0), 4127 NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0), 4128 NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0), 4129 NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0), 4130 NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0), 4131 NEONMAP0(vshl_n_v), 4132 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts), 4133 NEONMAP0(vshll_n_v), 4134 NEONMAP0(vshlq_n_v), 4135 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts), 4136 NEONMAP0(vshr_n_v), 4137 NEONMAP0(vshrn_n_v), 4138 NEONMAP0(vshrq_n_v), 4139 NEONMAP0(vsubhn_v), 4140 NEONMAP0(vtst_v), 4141 NEONMAP0(vtstq_v), 4142 }; 4143 4144 static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = { 4145 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType), 4146 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType), 4147 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType), 4148 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType), 4149 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType), 4150 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType), 4151 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType), 4152 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), 4153 NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType), 4154 NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType), 4155 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), 4156 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType), 4157 NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType), 4158 NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType), 4159 NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType), 4160 NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 4161 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType), 4162 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType), 4163 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType), 4164 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType), 4165 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType), 4166 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType), 4167 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType), 4168 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType), 4169 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType), 4170 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType), 4171 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType), 4172 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType), 4173 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), 4174 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), 4175 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), 4176 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), 4177 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType), 4178 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), 4179 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType), 4180 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), 4181 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType), 4182 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), 4183 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType), 4184 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), 4185 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType), 4186 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), 4187 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType), 4188 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), 4189 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), 4190 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), 4191 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), 4192 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), 4193 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0), 4194 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 4195 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 4196 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 4197 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 4198 NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType), 4199 NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType), 4200 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 4201 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 4202 NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType), 4203 NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType), 4204 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 4205 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 4206 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 4207 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 4208 NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType), 4209 NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType), 4210 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 4211 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType), 4212 NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType), 4213 NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType), 4214 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0), 4215 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType), 4216 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType), 4217 NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 4218 NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 4219 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 4220 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 4221 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 4222 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 4223 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 4224 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 4225 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType), 4226 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 4227 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors), 4228 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType), 4229 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors), 4230 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType), 4231 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors), 4232 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors), 4233 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType), 4234 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType), 4235 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors), 4236 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors), 4237 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType), 4238 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType), 4239 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors), 4240 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType), 4241 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors), 4242 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0), 4243 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType), 4244 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType), 4245 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors), 4246 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors), 4247 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors), 4248 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors), 4249 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType), 4250 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors), 4251 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors), 4252 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors), 4253 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType), 4254 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors), 4255 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType), 4256 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors), 4257 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType), 4258 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors), 4259 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors), 4260 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType), 4261 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType), 4262 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors), 4263 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors), 4264 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType), 4265 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType), 4266 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType), 4267 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType), 4268 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors), 4269 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors), 4270 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors), 4271 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors), 4272 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType), 4273 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors), 4274 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors), 4275 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 4276 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 4277 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 4278 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 4279 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType), 4280 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType), 4281 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 4282 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 4283 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 4284 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 4285 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType), 4286 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType), 4287 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType), 4288 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType), 4289 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors), 4290 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors), 4291 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType), 4292 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType), 4293 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType), 4294 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors), 4295 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors), 4296 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors), 4297 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors), 4298 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType), 4299 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors), 4300 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors), 4301 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors), 4302 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors), 4303 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType), 4304 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType), 4305 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors), 4306 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors), 4307 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType), 4308 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType), 4309 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType), 4310 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType), 4311 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType), 4312 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType), 4313 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType), 4314 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType), 4315 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType), 4316 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType), 4317 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType), 4318 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType), 4319 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0), 4320 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0), 4321 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0), 4322 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0), 4323 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType), 4324 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType), 4325 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType), 4326 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType), 4327 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors), 4328 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType), 4329 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors), 4330 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType), 4331 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType), 4332 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType), 4333 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors), 4334 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType), 4335 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors), 4336 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType), 4337 // FP16 scalar intrinisics go here. 4338 NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType), 4339 NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType), 4340 NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType), 4341 NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType), 4342 NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType), 4343 NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), 4344 NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), 4345 NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), 4346 NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), 4347 NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), 4348 NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), 4349 NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), 4350 NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), 4351 NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType), 4352 NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType), 4353 NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), 4354 NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), 4355 NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType), 4356 NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType), 4357 NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), 4358 NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), 4359 NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType), 4360 NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType), 4361 NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), 4362 NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), 4363 NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType), 4364 NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType), 4365 NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType), 4366 NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType), 4367 NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType), 4368 }; 4369 4370 #undef NEONMAP0 4371 #undef NEONMAP1 4372 #undef NEONMAP2 4373 4374 static bool NEONSIMDIntrinsicsProvenSorted = false; 4375 4376 static bool AArch64SIMDIntrinsicsProvenSorted = false; 4377 static bool AArch64SISDIntrinsicsProvenSorted = false; 4378 4379 4380 static const NeonIntrinsicInfo * 4381 findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap, 4382 unsigned BuiltinID, bool &MapProvenSorted) { 4383 4384 #ifndef NDEBUG 4385 if (!MapProvenSorted) { 4386 assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap))); 4387 MapProvenSorted = true; 4388 } 4389 #endif 4390 4391 const NeonIntrinsicInfo *Builtin = 4392 std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID); 4393 4394 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID) 4395 return Builtin; 4396 4397 return nullptr; 4398 } 4399 4400 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID, 4401 unsigned Modifier, 4402 llvm::Type *ArgType, 4403 const CallExpr *E) { 4404 int VectorSize = 0; 4405 if (Modifier & Use64BitVectors) 4406 VectorSize = 64; 4407 else if (Modifier & Use128BitVectors) 4408 VectorSize = 128; 4409 4410 // Return type. 4411 SmallVector<llvm::Type *, 3> Tys; 4412 if (Modifier & AddRetType) { 4413 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext())); 4414 if (Modifier & VectorizeRetType) 4415 Ty = llvm::VectorType::get( 4416 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1); 4417 4418 Tys.push_back(Ty); 4419 } 4420 4421 // Arguments. 4422 if (Modifier & VectorizeArgTypes) { 4423 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1; 4424 ArgType = llvm::VectorType::get(ArgType, Elts); 4425 } 4426 4427 if (Modifier & (Add1ArgType | Add2ArgTypes)) 4428 Tys.push_back(ArgType); 4429 4430 if (Modifier & Add2ArgTypes) 4431 Tys.push_back(ArgType); 4432 4433 if (Modifier & InventFloatType) 4434 Tys.push_back(FloatTy); 4435 4436 return CGM.getIntrinsic(IntrinsicID, Tys); 4437 } 4438 4439 static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, 4440 const NeonIntrinsicInfo &SISDInfo, 4441 SmallVectorImpl<Value *> &Ops, 4442 const CallExpr *E) { 4443 unsigned BuiltinID = SISDInfo.BuiltinID; 4444 unsigned int Int = SISDInfo.LLVMIntrinsic; 4445 unsigned Modifier = SISDInfo.TypeModifier; 4446 const char *s = SISDInfo.NameHint; 4447 4448 switch (BuiltinID) { 4449 case NEON::BI__builtin_neon_vcled_s64: 4450 case NEON::BI__builtin_neon_vcled_u64: 4451 case NEON::BI__builtin_neon_vcles_f32: 4452 case NEON::BI__builtin_neon_vcled_f64: 4453 case NEON::BI__builtin_neon_vcltd_s64: 4454 case NEON::BI__builtin_neon_vcltd_u64: 4455 case NEON::BI__builtin_neon_vclts_f32: 4456 case NEON::BI__builtin_neon_vcltd_f64: 4457 case NEON::BI__builtin_neon_vcales_f32: 4458 case NEON::BI__builtin_neon_vcaled_f64: 4459 case NEON::BI__builtin_neon_vcalts_f32: 4460 case NEON::BI__builtin_neon_vcaltd_f64: 4461 // Only one direction of comparisons actually exist, cmle is actually a cmge 4462 // with swapped operands. The table gives us the right intrinsic but we 4463 // still need to do the swap. 4464 std::swap(Ops[0], Ops[1]); 4465 break; 4466 } 4467 4468 assert(Int && "Generic code assumes a valid intrinsic"); 4469 4470 // Determine the type(s) of this overloaded AArch64 intrinsic. 4471 const Expr *Arg = E->getArg(0); 4472 llvm::Type *ArgTy = CGF.ConvertType(Arg->getType()); 4473 Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E); 4474 4475 int j = 0; 4476 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0); 4477 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); 4478 ai != ae; ++ai, ++j) { 4479 llvm::Type *ArgTy = ai->getType(); 4480 if (Ops[j]->getType()->getPrimitiveSizeInBits() == 4481 ArgTy->getPrimitiveSizeInBits()) 4482 continue; 4483 4484 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy()); 4485 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate 4486 // it before inserting. 4487 Ops[j] = 4488 CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType()); 4489 Ops[j] = 4490 CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0); 4491 } 4492 4493 Value *Result = CGF.EmitNeonCall(F, Ops, s); 4494 llvm::Type *ResultType = CGF.ConvertType(E->getType()); 4495 if (ResultType->getPrimitiveSizeInBits() < 4496 Result->getType()->getPrimitiveSizeInBits()) 4497 return CGF.Builder.CreateExtractElement(Result, C0); 4498 4499 return CGF.Builder.CreateBitCast(Result, ResultType, s); 4500 } 4501 4502 Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( 4503 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, 4504 const char *NameHint, unsigned Modifier, const CallExpr *E, 4505 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1, 4506 llvm::Triple::ArchType Arch) { 4507 // Get the last argument, which specifies the vector type. 4508 llvm::APSInt NeonTypeConst; 4509 const Expr *Arg = E->getArg(E->getNumArgs() - 1); 4510 if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext())) 4511 return nullptr; 4512 4513 // Determine the type of this overloaded NEON intrinsic. 4514 NeonTypeFlags Type(NeonTypeConst.getZExtValue()); 4515 bool Usgn = Type.isUnsigned(); 4516 bool Quad = Type.isQuad(); 4517 const bool HasLegalHalfType = getTarget().hasLegalHalfType(); 4518 4519 llvm::VectorType *VTy = GetNeonType(this, Type, HasLegalHalfType); 4520 llvm::Type *Ty = VTy; 4521 if (!Ty) 4522 return nullptr; 4523 4524 auto getAlignmentValue32 = [&](Address addr) -> Value* { 4525 return Builder.getInt32(addr.getAlignment().getQuantity()); 4526 }; 4527 4528 unsigned Int = LLVMIntrinsic; 4529 if ((Modifier & UnsignedAlts) && !Usgn) 4530 Int = AltLLVMIntrinsic; 4531 4532 switch (BuiltinID) { 4533 default: break; 4534 case NEON::BI__builtin_neon_vabs_v: 4535 case NEON::BI__builtin_neon_vabsq_v: 4536 if (VTy->getElementType()->isFloatingPointTy()) 4537 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs"); 4538 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs"); 4539 case NEON::BI__builtin_neon_vaddhn_v: { 4540 llvm::VectorType *SrcTy = 4541 llvm::VectorType::getExtendedElementVectorType(VTy); 4542 4543 // %sum = add <4 x i32> %lhs, %rhs 4544 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 4545 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy); 4546 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn"); 4547 4548 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16> 4549 Constant *ShiftAmt = 4550 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2); 4551 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn"); 4552 4553 // %res = trunc <4 x i32> %high to <4 x i16> 4554 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn"); 4555 } 4556 case NEON::BI__builtin_neon_vcale_v: 4557 case NEON::BI__builtin_neon_vcaleq_v: 4558 case NEON::BI__builtin_neon_vcalt_v: 4559 case NEON::BI__builtin_neon_vcaltq_v: 4560 std::swap(Ops[0], Ops[1]); 4561 LLVM_FALLTHROUGH; 4562 case NEON::BI__builtin_neon_vcage_v: 4563 case NEON::BI__builtin_neon_vcageq_v: 4564 case NEON::BI__builtin_neon_vcagt_v: 4565 case NEON::BI__builtin_neon_vcagtq_v: { 4566 llvm::Type *Ty; 4567 switch (VTy->getScalarSizeInBits()) { 4568 default: llvm_unreachable("unexpected type"); 4569 case 32: 4570 Ty = FloatTy; 4571 break; 4572 case 64: 4573 Ty = DoubleTy; 4574 break; 4575 case 16: 4576 Ty = HalfTy; 4577 break; 4578 } 4579 llvm::Type *VecFlt = llvm::VectorType::get(Ty, VTy->getNumElements()); 4580 llvm::Type *Tys[] = { VTy, VecFlt }; 4581 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 4582 return EmitNeonCall(F, Ops, NameHint); 4583 } 4584 case NEON::BI__builtin_neon_vceqz_v: 4585 case NEON::BI__builtin_neon_vceqzq_v: 4586 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ, 4587 ICmpInst::ICMP_EQ, "vceqz"); 4588 case NEON::BI__builtin_neon_vcgez_v: 4589 case NEON::BI__builtin_neon_vcgezq_v: 4590 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE, 4591 ICmpInst::ICMP_SGE, "vcgez"); 4592 case NEON::BI__builtin_neon_vclez_v: 4593 case NEON::BI__builtin_neon_vclezq_v: 4594 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE, 4595 ICmpInst::ICMP_SLE, "vclez"); 4596 case NEON::BI__builtin_neon_vcgtz_v: 4597 case NEON::BI__builtin_neon_vcgtzq_v: 4598 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT, 4599 ICmpInst::ICMP_SGT, "vcgtz"); 4600 case NEON::BI__builtin_neon_vcltz_v: 4601 case NEON::BI__builtin_neon_vcltzq_v: 4602 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT, 4603 ICmpInst::ICMP_SLT, "vcltz"); 4604 case NEON::BI__builtin_neon_vclz_v: 4605 case NEON::BI__builtin_neon_vclzq_v: 4606 // We generate target-independent intrinsic, which needs a second argument 4607 // for whether or not clz of zero is undefined; on ARM it isn't. 4608 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef())); 4609 break; 4610 case NEON::BI__builtin_neon_vcvt_f32_v: 4611 case NEON::BI__builtin_neon_vcvtq_f32_v: 4612 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4613 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad), 4614 HasLegalHalfType); 4615 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 4616 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 4617 case NEON::BI__builtin_neon_vcvt_f16_v: 4618 case NEON::BI__builtin_neon_vcvtq_f16_v: 4619 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4620 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad), 4621 HasLegalHalfType); 4622 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 4623 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 4624 case NEON::BI__builtin_neon_vcvt_n_f16_v: 4625 case NEON::BI__builtin_neon_vcvt_n_f32_v: 4626 case NEON::BI__builtin_neon_vcvt_n_f64_v: 4627 case NEON::BI__builtin_neon_vcvtq_n_f16_v: 4628 case NEON::BI__builtin_neon_vcvtq_n_f32_v: 4629 case NEON::BI__builtin_neon_vcvtq_n_f64_v: { 4630 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty }; 4631 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic; 4632 Function *F = CGM.getIntrinsic(Int, Tys); 4633 return EmitNeonCall(F, Ops, "vcvt_n"); 4634 } 4635 case NEON::BI__builtin_neon_vcvt_n_s16_v: 4636 case NEON::BI__builtin_neon_vcvt_n_s32_v: 4637 case NEON::BI__builtin_neon_vcvt_n_u16_v: 4638 case NEON::BI__builtin_neon_vcvt_n_u32_v: 4639 case NEON::BI__builtin_neon_vcvt_n_s64_v: 4640 case NEON::BI__builtin_neon_vcvt_n_u64_v: 4641 case NEON::BI__builtin_neon_vcvtq_n_s16_v: 4642 case NEON::BI__builtin_neon_vcvtq_n_s32_v: 4643 case NEON::BI__builtin_neon_vcvtq_n_u16_v: 4644 case NEON::BI__builtin_neon_vcvtq_n_u32_v: 4645 case NEON::BI__builtin_neon_vcvtq_n_s64_v: 4646 case NEON::BI__builtin_neon_vcvtq_n_u64_v: { 4647 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 4648 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 4649 return EmitNeonCall(F, Ops, "vcvt_n"); 4650 } 4651 case NEON::BI__builtin_neon_vcvt_s32_v: 4652 case NEON::BI__builtin_neon_vcvt_u32_v: 4653 case NEON::BI__builtin_neon_vcvt_s64_v: 4654 case NEON::BI__builtin_neon_vcvt_u64_v: 4655 case NEON::BI__builtin_neon_vcvt_s16_v: 4656 case NEON::BI__builtin_neon_vcvt_u16_v: 4657 case NEON::BI__builtin_neon_vcvtq_s32_v: 4658 case NEON::BI__builtin_neon_vcvtq_u32_v: 4659 case NEON::BI__builtin_neon_vcvtq_s64_v: 4660 case NEON::BI__builtin_neon_vcvtq_u64_v: 4661 case NEON::BI__builtin_neon_vcvtq_s16_v: 4662 case NEON::BI__builtin_neon_vcvtq_u16_v: { 4663 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type)); 4664 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt") 4665 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt"); 4666 } 4667 case NEON::BI__builtin_neon_vcvta_s16_v: 4668 case NEON::BI__builtin_neon_vcvta_s32_v: 4669 case NEON::BI__builtin_neon_vcvta_s64_v: 4670 case NEON::BI__builtin_neon_vcvta_u32_v: 4671 case NEON::BI__builtin_neon_vcvta_u64_v: 4672 case NEON::BI__builtin_neon_vcvtaq_s16_v: 4673 case NEON::BI__builtin_neon_vcvtaq_s32_v: 4674 case NEON::BI__builtin_neon_vcvtaq_s64_v: 4675 case NEON::BI__builtin_neon_vcvtaq_u16_v: 4676 case NEON::BI__builtin_neon_vcvtaq_u32_v: 4677 case NEON::BI__builtin_neon_vcvtaq_u64_v: 4678 case NEON::BI__builtin_neon_vcvtn_s16_v: 4679 case NEON::BI__builtin_neon_vcvtn_s32_v: 4680 case NEON::BI__builtin_neon_vcvtn_s64_v: 4681 case NEON::BI__builtin_neon_vcvtn_u16_v: 4682 case NEON::BI__builtin_neon_vcvtn_u32_v: 4683 case NEON::BI__builtin_neon_vcvtn_u64_v: 4684 case NEON::BI__builtin_neon_vcvtnq_s16_v: 4685 case NEON::BI__builtin_neon_vcvtnq_s32_v: 4686 case NEON::BI__builtin_neon_vcvtnq_s64_v: 4687 case NEON::BI__builtin_neon_vcvtnq_u16_v: 4688 case NEON::BI__builtin_neon_vcvtnq_u32_v: 4689 case NEON::BI__builtin_neon_vcvtnq_u64_v: 4690 case NEON::BI__builtin_neon_vcvtp_s16_v: 4691 case NEON::BI__builtin_neon_vcvtp_s32_v: 4692 case NEON::BI__builtin_neon_vcvtp_s64_v: 4693 case NEON::BI__builtin_neon_vcvtp_u16_v: 4694 case NEON::BI__builtin_neon_vcvtp_u32_v: 4695 case NEON::BI__builtin_neon_vcvtp_u64_v: 4696 case NEON::BI__builtin_neon_vcvtpq_s16_v: 4697 case NEON::BI__builtin_neon_vcvtpq_s32_v: 4698 case NEON::BI__builtin_neon_vcvtpq_s64_v: 4699 case NEON::BI__builtin_neon_vcvtpq_u16_v: 4700 case NEON::BI__builtin_neon_vcvtpq_u32_v: 4701 case NEON::BI__builtin_neon_vcvtpq_u64_v: 4702 case NEON::BI__builtin_neon_vcvtm_s16_v: 4703 case NEON::BI__builtin_neon_vcvtm_s32_v: 4704 case NEON::BI__builtin_neon_vcvtm_s64_v: 4705 case NEON::BI__builtin_neon_vcvtm_u16_v: 4706 case NEON::BI__builtin_neon_vcvtm_u32_v: 4707 case NEON::BI__builtin_neon_vcvtm_u64_v: 4708 case NEON::BI__builtin_neon_vcvtmq_s16_v: 4709 case NEON::BI__builtin_neon_vcvtmq_s32_v: 4710 case NEON::BI__builtin_neon_vcvtmq_s64_v: 4711 case NEON::BI__builtin_neon_vcvtmq_u16_v: 4712 case NEON::BI__builtin_neon_vcvtmq_u32_v: 4713 case NEON::BI__builtin_neon_vcvtmq_u64_v: { 4714 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 4715 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint); 4716 } 4717 case NEON::BI__builtin_neon_vext_v: 4718 case NEON::BI__builtin_neon_vextq_v: { 4719 int CV = cast<ConstantInt>(Ops[2])->getSExtValue(); 4720 SmallVector<uint32_t, 16> Indices; 4721 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 4722 Indices.push_back(i+CV); 4723 4724 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4725 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4726 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext"); 4727 } 4728 case NEON::BI__builtin_neon_vfma_v: 4729 case NEON::BI__builtin_neon_vfmaq_v: { 4730 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 4731 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4732 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4733 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 4734 4735 // NEON intrinsic puts accumulator first, unlike the LLVM fma. 4736 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); 4737 } 4738 case NEON::BI__builtin_neon_vld1_v: 4739 case NEON::BI__builtin_neon_vld1q_v: { 4740 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 4741 Ops.push_back(getAlignmentValue32(PtrOp0)); 4742 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1"); 4743 } 4744 case NEON::BI__builtin_neon_vld2_v: 4745 case NEON::BI__builtin_neon_vld2q_v: 4746 case NEON::BI__builtin_neon_vld3_v: 4747 case NEON::BI__builtin_neon_vld3q_v: 4748 case NEON::BI__builtin_neon_vld4_v: 4749 case NEON::BI__builtin_neon_vld4q_v: { 4750 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 4751 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 4752 Value *Align = getAlignmentValue32(PtrOp1); 4753 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint); 4754 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 4755 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4756 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 4757 } 4758 case NEON::BI__builtin_neon_vld1_dup_v: 4759 case NEON::BI__builtin_neon_vld1q_dup_v: { 4760 Value *V = UndefValue::get(Ty); 4761 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 4762 PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty); 4763 LoadInst *Ld = Builder.CreateLoad(PtrOp0); 4764 llvm::Constant *CI = ConstantInt::get(SizeTy, 0); 4765 Ops[0] = Builder.CreateInsertElement(V, Ld, CI); 4766 return EmitNeonSplat(Ops[0], CI); 4767 } 4768 case NEON::BI__builtin_neon_vld2_lane_v: 4769 case NEON::BI__builtin_neon_vld2q_lane_v: 4770 case NEON::BI__builtin_neon_vld3_lane_v: 4771 case NEON::BI__builtin_neon_vld3q_lane_v: 4772 case NEON::BI__builtin_neon_vld4_lane_v: 4773 case NEON::BI__builtin_neon_vld4q_lane_v: { 4774 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 4775 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 4776 for (unsigned I = 2; I < Ops.size() - 1; ++I) 4777 Ops[I] = Builder.CreateBitCast(Ops[I], Ty); 4778 Ops.push_back(getAlignmentValue32(PtrOp1)); 4779 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint); 4780 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 4781 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4782 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 4783 } 4784 case NEON::BI__builtin_neon_vmovl_v: { 4785 llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy); 4786 Ops[0] = Builder.CreateBitCast(Ops[0], DTy); 4787 if (Usgn) 4788 return Builder.CreateZExt(Ops[0], Ty, "vmovl"); 4789 return Builder.CreateSExt(Ops[0], Ty, "vmovl"); 4790 } 4791 case NEON::BI__builtin_neon_vmovn_v: { 4792 llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy); 4793 Ops[0] = Builder.CreateBitCast(Ops[0], QTy); 4794 return Builder.CreateTrunc(Ops[0], Ty, "vmovn"); 4795 } 4796 case NEON::BI__builtin_neon_vmull_v: 4797 // FIXME: the integer vmull operations could be emitted in terms of pure 4798 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of 4799 // hoisting the exts outside loops. Until global ISel comes along that can 4800 // see through such movement this leads to bad CodeGen. So we need an 4801 // intrinsic for now. 4802 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls; 4803 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int; 4804 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull"); 4805 case NEON::BI__builtin_neon_vpadal_v: 4806 case NEON::BI__builtin_neon_vpadalq_v: { 4807 // The source operand type has twice as many elements of half the size. 4808 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); 4809 llvm::Type *EltTy = 4810 llvm::IntegerType::get(getLLVMContext(), EltBits / 2); 4811 llvm::Type *NarrowTy = 4812 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2); 4813 llvm::Type *Tys[2] = { Ty, NarrowTy }; 4814 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint); 4815 } 4816 case NEON::BI__builtin_neon_vpaddl_v: 4817 case NEON::BI__builtin_neon_vpaddlq_v: { 4818 // The source operand type has twice as many elements of half the size. 4819 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); 4820 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2); 4821 llvm::Type *NarrowTy = 4822 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2); 4823 llvm::Type *Tys[2] = { Ty, NarrowTy }; 4824 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl"); 4825 } 4826 case NEON::BI__builtin_neon_vqdmlal_v: 4827 case NEON::BI__builtin_neon_vqdmlsl_v: { 4828 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end()); 4829 Ops[1] = 4830 EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal"); 4831 Ops.resize(2); 4832 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint); 4833 } 4834 case NEON::BI__builtin_neon_vqshl_n_v: 4835 case NEON::BI__builtin_neon_vqshlq_n_v: 4836 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n", 4837 1, false); 4838 case NEON::BI__builtin_neon_vqshlu_n_v: 4839 case NEON::BI__builtin_neon_vqshluq_n_v: 4840 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n", 4841 1, false); 4842 case NEON::BI__builtin_neon_vrecpe_v: 4843 case NEON::BI__builtin_neon_vrecpeq_v: 4844 case NEON::BI__builtin_neon_vrsqrte_v: 4845 case NEON::BI__builtin_neon_vrsqrteq_v: 4846 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic; 4847 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint); 4848 4849 case NEON::BI__builtin_neon_vrshr_n_v: 4850 case NEON::BI__builtin_neon_vrshrq_n_v: 4851 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n", 4852 1, true); 4853 case NEON::BI__builtin_neon_vshl_n_v: 4854 case NEON::BI__builtin_neon_vshlq_n_v: 4855 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false); 4856 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1], 4857 "vshl_n"); 4858 case NEON::BI__builtin_neon_vshll_n_v: { 4859 llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy); 4860 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 4861 if (Usgn) 4862 Ops[0] = Builder.CreateZExt(Ops[0], VTy); 4863 else 4864 Ops[0] = Builder.CreateSExt(Ops[0], VTy); 4865 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false); 4866 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n"); 4867 } 4868 case NEON::BI__builtin_neon_vshrn_n_v: { 4869 llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy); 4870 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 4871 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false); 4872 if (Usgn) 4873 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]); 4874 else 4875 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]); 4876 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n"); 4877 } 4878 case NEON::BI__builtin_neon_vshr_n_v: 4879 case NEON::BI__builtin_neon_vshrq_n_v: 4880 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n"); 4881 case NEON::BI__builtin_neon_vst1_v: 4882 case NEON::BI__builtin_neon_vst1q_v: 4883 case NEON::BI__builtin_neon_vst2_v: 4884 case NEON::BI__builtin_neon_vst2q_v: 4885 case NEON::BI__builtin_neon_vst3_v: 4886 case NEON::BI__builtin_neon_vst3q_v: 4887 case NEON::BI__builtin_neon_vst4_v: 4888 case NEON::BI__builtin_neon_vst4q_v: 4889 case NEON::BI__builtin_neon_vst2_lane_v: 4890 case NEON::BI__builtin_neon_vst2q_lane_v: 4891 case NEON::BI__builtin_neon_vst3_lane_v: 4892 case NEON::BI__builtin_neon_vst3q_lane_v: 4893 case NEON::BI__builtin_neon_vst4_lane_v: 4894 case NEON::BI__builtin_neon_vst4q_lane_v: { 4895 llvm::Type *Tys[] = {Int8PtrTy, Ty}; 4896 Ops.push_back(getAlignmentValue32(PtrOp0)); 4897 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, ""); 4898 } 4899 case NEON::BI__builtin_neon_vsubhn_v: { 4900 llvm::VectorType *SrcTy = 4901 llvm::VectorType::getExtendedElementVectorType(VTy); 4902 4903 // %sum = add <4 x i32> %lhs, %rhs 4904 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 4905 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy); 4906 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn"); 4907 4908 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16> 4909 Constant *ShiftAmt = 4910 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2); 4911 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn"); 4912 4913 // %res = trunc <4 x i32> %high to <4 x i16> 4914 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn"); 4915 } 4916 case NEON::BI__builtin_neon_vtrn_v: 4917 case NEON::BI__builtin_neon_vtrnq_v: { 4918 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 4919 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4920 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 4921 Value *SV = nullptr; 4922 4923 for (unsigned vi = 0; vi != 2; ++vi) { 4924 SmallVector<uint32_t, 16> Indices; 4925 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 4926 Indices.push_back(i+vi); 4927 Indices.push_back(i+e+vi); 4928 } 4929 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 4930 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn"); 4931 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 4932 } 4933 return SV; 4934 } 4935 case NEON::BI__builtin_neon_vtst_v: 4936 case NEON::BI__builtin_neon_vtstq_v: { 4937 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4938 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4939 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); 4940 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], 4941 ConstantAggregateZero::get(Ty)); 4942 return Builder.CreateSExt(Ops[0], Ty, "vtst"); 4943 } 4944 case NEON::BI__builtin_neon_vuzp_v: 4945 case NEON::BI__builtin_neon_vuzpq_v: { 4946 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 4947 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4948 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 4949 Value *SV = nullptr; 4950 4951 for (unsigned vi = 0; vi != 2; ++vi) { 4952 SmallVector<uint32_t, 16> Indices; 4953 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 4954 Indices.push_back(2*i+vi); 4955 4956 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 4957 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp"); 4958 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 4959 } 4960 return SV; 4961 } 4962 case NEON::BI__builtin_neon_vzip_v: 4963 case NEON::BI__builtin_neon_vzipq_v: { 4964 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 4965 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4966 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 4967 Value *SV = nullptr; 4968 4969 for (unsigned vi = 0; vi != 2; ++vi) { 4970 SmallVector<uint32_t, 16> Indices; 4971 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 4972 Indices.push_back((i + vi*e) >> 1); 4973 Indices.push_back(((i + vi*e) >> 1)+e); 4974 } 4975 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 4976 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip"); 4977 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 4978 } 4979 return SV; 4980 } 4981 case NEON::BI__builtin_neon_vdot_v: 4982 case NEON::BI__builtin_neon_vdotq_v: { 4983 llvm::Type *InputTy = 4984 llvm::VectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8); 4985 llvm::Type *Tys[2] = { Ty, InputTy }; 4986 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic; 4987 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot"); 4988 } 4989 } 4990 4991 assert(Int && "Expected valid intrinsic number"); 4992 4993 // Determine the type(s) of this overloaded AArch64 intrinsic. 4994 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E); 4995 4996 Value *Result = EmitNeonCall(F, Ops, NameHint); 4997 llvm::Type *ResultType = ConvertType(E->getType()); 4998 // AArch64 intrinsic one-element vector type cast to 4999 // scalar type expected by the builtin 5000 return Builder.CreateBitCast(Result, ResultType, NameHint); 5001 } 5002 5003 Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr( 5004 Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp, 5005 const CmpInst::Predicate Ip, const Twine &Name) { 5006 llvm::Type *OTy = Op->getType(); 5007 5008 // FIXME: this is utterly horrific. We should not be looking at previous 5009 // codegen context to find out what needs doing. Unfortunately TableGen 5010 // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32 5011 // (etc). 5012 if (BitCastInst *BI = dyn_cast<BitCastInst>(Op)) 5013 OTy = BI->getOperand(0)->getType(); 5014 5015 Op = Builder.CreateBitCast(Op, OTy); 5016 if (OTy->getScalarType()->isFloatingPointTy()) { 5017 Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy)); 5018 } else { 5019 Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy)); 5020 } 5021 return Builder.CreateSExt(Op, Ty, Name); 5022 } 5023 5024 static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops, 5025 Value *ExtOp, Value *IndexOp, 5026 llvm::Type *ResTy, unsigned IntID, 5027 const char *Name) { 5028 SmallVector<Value *, 2> TblOps; 5029 if (ExtOp) 5030 TblOps.push_back(ExtOp); 5031 5032 // Build a vector containing sequential number like (0, 1, 2, ..., 15) 5033 SmallVector<uint32_t, 16> Indices; 5034 llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType()); 5035 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) { 5036 Indices.push_back(2*i); 5037 Indices.push_back(2*i+1); 5038 } 5039 5040 int PairPos = 0, End = Ops.size() - 1; 5041 while (PairPos < End) { 5042 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], 5043 Ops[PairPos+1], Indices, 5044 Name)); 5045 PairPos += 2; 5046 } 5047 5048 // If there's an odd number of 64-bit lookup table, fill the high 64-bit 5049 // of the 128-bit lookup table with zero. 5050 if (PairPos == End) { 5051 Value *ZeroTbl = ConstantAggregateZero::get(TblTy); 5052 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], 5053 ZeroTbl, Indices, Name)); 5054 } 5055 5056 Function *TblF; 5057 TblOps.push_back(IndexOp); 5058 TblF = CGF.CGM.getIntrinsic(IntID, ResTy); 5059 5060 return CGF.EmitNeonCall(TblF, TblOps, Name); 5061 } 5062 5063 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) { 5064 unsigned Value; 5065 switch (BuiltinID) { 5066 default: 5067 return nullptr; 5068 case ARM::BI__builtin_arm_nop: 5069 Value = 0; 5070 break; 5071 case ARM::BI__builtin_arm_yield: 5072 case ARM::BI__yield: 5073 Value = 1; 5074 break; 5075 case ARM::BI__builtin_arm_wfe: 5076 case ARM::BI__wfe: 5077 Value = 2; 5078 break; 5079 case ARM::BI__builtin_arm_wfi: 5080 case ARM::BI__wfi: 5081 Value = 3; 5082 break; 5083 case ARM::BI__builtin_arm_sev: 5084 case ARM::BI__sev: 5085 Value = 4; 5086 break; 5087 case ARM::BI__builtin_arm_sevl: 5088 case ARM::BI__sevl: 5089 Value = 5; 5090 break; 5091 } 5092 5093 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint), 5094 llvm::ConstantInt::get(Int32Ty, Value)); 5095 } 5096 5097 // Generates the IR for the read/write special register builtin, 5098 // ValueType is the type of the value that is to be written or read, 5099 // RegisterType is the type of the register being written to or read from. 5100 static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, 5101 const CallExpr *E, 5102 llvm::Type *RegisterType, 5103 llvm::Type *ValueType, 5104 bool IsRead, 5105 StringRef SysReg = "") { 5106 // write and register intrinsics only support 32 and 64 bit operations. 5107 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64)) 5108 && "Unsupported size for register."); 5109 5110 CodeGen::CGBuilderTy &Builder = CGF.Builder; 5111 CodeGen::CodeGenModule &CGM = CGF.CGM; 5112 LLVMContext &Context = CGM.getLLVMContext(); 5113 5114 if (SysReg.empty()) { 5115 const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts(); 5116 SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString(); 5117 } 5118 5119 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) }; 5120 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); 5121 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); 5122 5123 llvm::Type *Types[] = { RegisterType }; 5124 5125 bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32); 5126 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64)) 5127 && "Can't fit 64-bit value in 32-bit register"); 5128 5129 if (IsRead) { 5130 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types); 5131 llvm::Value *Call = Builder.CreateCall(F, Metadata); 5132 5133 if (MixedTypes) 5134 // Read into 64 bit register and then truncate result to 32 bit. 5135 return Builder.CreateTrunc(Call, ValueType); 5136 5137 if (ValueType->isPointerTy()) 5138 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*). 5139 return Builder.CreateIntToPtr(Call, ValueType); 5140 5141 return Call; 5142 } 5143 5144 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types); 5145 llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1)); 5146 if (MixedTypes) { 5147 // Extend 32 bit write value to 64 bit to pass to write. 5148 ArgValue = Builder.CreateZExt(ArgValue, RegisterType); 5149 return Builder.CreateCall(F, { Metadata, ArgValue }); 5150 } 5151 5152 if (ValueType->isPointerTy()) { 5153 // Have VoidPtrTy ArgValue but want to return an i32/i64. 5154 ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType); 5155 return Builder.CreateCall(F, { Metadata, ArgValue }); 5156 } 5157 5158 return Builder.CreateCall(F, { Metadata, ArgValue }); 5159 } 5160 5161 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra 5162 /// argument that specifies the vector type. 5163 static bool HasExtraNeonArgument(unsigned BuiltinID) { 5164 switch (BuiltinID) { 5165 default: break; 5166 case NEON::BI__builtin_neon_vget_lane_i8: 5167 case NEON::BI__builtin_neon_vget_lane_i16: 5168 case NEON::BI__builtin_neon_vget_lane_i32: 5169 case NEON::BI__builtin_neon_vget_lane_i64: 5170 case NEON::BI__builtin_neon_vget_lane_f32: 5171 case NEON::BI__builtin_neon_vgetq_lane_i8: 5172 case NEON::BI__builtin_neon_vgetq_lane_i16: 5173 case NEON::BI__builtin_neon_vgetq_lane_i32: 5174 case NEON::BI__builtin_neon_vgetq_lane_i64: 5175 case NEON::BI__builtin_neon_vgetq_lane_f32: 5176 case NEON::BI__builtin_neon_vset_lane_i8: 5177 case NEON::BI__builtin_neon_vset_lane_i16: 5178 case NEON::BI__builtin_neon_vset_lane_i32: 5179 case NEON::BI__builtin_neon_vset_lane_i64: 5180 case NEON::BI__builtin_neon_vset_lane_f32: 5181 case NEON::BI__builtin_neon_vsetq_lane_i8: 5182 case NEON::BI__builtin_neon_vsetq_lane_i16: 5183 case NEON::BI__builtin_neon_vsetq_lane_i32: 5184 case NEON::BI__builtin_neon_vsetq_lane_i64: 5185 case NEON::BI__builtin_neon_vsetq_lane_f32: 5186 case NEON::BI__builtin_neon_vsha1h_u32: 5187 case NEON::BI__builtin_neon_vsha1cq_u32: 5188 case NEON::BI__builtin_neon_vsha1pq_u32: 5189 case NEON::BI__builtin_neon_vsha1mq_u32: 5190 case clang::ARM::BI_MoveToCoprocessor: 5191 case clang::ARM::BI_MoveToCoprocessor2: 5192 return false; 5193 } 5194 return true; 5195 } 5196 5197 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, 5198 const CallExpr *E, 5199 llvm::Triple::ArchType Arch) { 5200 if (auto Hint = GetValueForARMHint(BuiltinID)) 5201 return Hint; 5202 5203 if (BuiltinID == ARM::BI__emit) { 5204 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb; 5205 llvm::FunctionType *FTy = 5206 llvm::FunctionType::get(VoidTy, /*Variadic=*/false); 5207 5208 APSInt Value; 5209 if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext())) 5210 llvm_unreachable("Sema will ensure that the parameter is constant"); 5211 5212 uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue(); 5213 5214 llvm::InlineAsm *Emit = 5215 IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "", 5216 /*SideEffects=*/true) 5217 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "", 5218 /*SideEffects=*/true); 5219 5220 return Builder.CreateCall(Emit); 5221 } 5222 5223 if (BuiltinID == ARM::BI__builtin_arm_dbg) { 5224 Value *Option = EmitScalarExpr(E->getArg(0)); 5225 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option); 5226 } 5227 5228 if (BuiltinID == ARM::BI__builtin_arm_prefetch) { 5229 Value *Address = EmitScalarExpr(E->getArg(0)); 5230 Value *RW = EmitScalarExpr(E->getArg(1)); 5231 Value *IsData = EmitScalarExpr(E->getArg(2)); 5232 5233 // Locality is not supported on ARM target 5234 Value *Locality = llvm::ConstantInt::get(Int32Ty, 3); 5235 5236 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 5237 return Builder.CreateCall(F, {Address, RW, Locality, IsData}); 5238 } 5239 5240 if (BuiltinID == ARM::BI__builtin_arm_rbit) { 5241 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 5242 return Builder.CreateCall( 5243 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); 5244 } 5245 5246 if (BuiltinID == ARM::BI__clear_cache) { 5247 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); 5248 const FunctionDecl *FD = E->getDirectCallee(); 5249 Value *Ops[2]; 5250 for (unsigned i = 0; i < 2; i++) 5251 Ops[i] = EmitScalarExpr(E->getArg(i)); 5252 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); 5253 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); 5254 StringRef Name = FD->getName(); 5255 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); 5256 } 5257 5258 if (BuiltinID == ARM::BI__builtin_arm_mcrr || 5259 BuiltinID == ARM::BI__builtin_arm_mcrr2) { 5260 Function *F; 5261 5262 switch (BuiltinID) { 5263 default: llvm_unreachable("unexpected builtin"); 5264 case ARM::BI__builtin_arm_mcrr: 5265 F = CGM.getIntrinsic(Intrinsic::arm_mcrr); 5266 break; 5267 case ARM::BI__builtin_arm_mcrr2: 5268 F = CGM.getIntrinsic(Intrinsic::arm_mcrr2); 5269 break; 5270 } 5271 5272 // MCRR{2} instruction has 5 operands but 5273 // the intrinsic has 4 because Rt and Rt2 5274 // are represented as a single unsigned 64 5275 // bit integer in the intrinsic definition 5276 // but internally it's represented as 2 32 5277 // bit integers. 5278 5279 Value *Coproc = EmitScalarExpr(E->getArg(0)); 5280 Value *Opc1 = EmitScalarExpr(E->getArg(1)); 5281 Value *RtAndRt2 = EmitScalarExpr(E->getArg(2)); 5282 Value *CRm = EmitScalarExpr(E->getArg(3)); 5283 5284 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32); 5285 Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty); 5286 Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1); 5287 Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty); 5288 5289 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm}); 5290 } 5291 5292 if (BuiltinID == ARM::BI__builtin_arm_mrrc || 5293 BuiltinID == ARM::BI__builtin_arm_mrrc2) { 5294 Function *F; 5295 5296 switch (BuiltinID) { 5297 default: llvm_unreachable("unexpected builtin"); 5298 case ARM::BI__builtin_arm_mrrc: 5299 F = CGM.getIntrinsic(Intrinsic::arm_mrrc); 5300 break; 5301 case ARM::BI__builtin_arm_mrrc2: 5302 F = CGM.getIntrinsic(Intrinsic::arm_mrrc2); 5303 break; 5304 } 5305 5306 Value *Coproc = EmitScalarExpr(E->getArg(0)); 5307 Value *Opc1 = EmitScalarExpr(E->getArg(1)); 5308 Value *CRm = EmitScalarExpr(E->getArg(2)); 5309 Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm}); 5310 5311 // Returns an unsigned 64 bit integer, represented 5312 // as two 32 bit integers. 5313 5314 Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1); 5315 Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0); 5316 Rt = Builder.CreateZExt(Rt, Int64Ty); 5317 Rt1 = Builder.CreateZExt(Rt1, Int64Ty); 5318 5319 Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32); 5320 RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true); 5321 RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1); 5322 5323 return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType())); 5324 } 5325 5326 if (BuiltinID == ARM::BI__builtin_arm_ldrexd || 5327 ((BuiltinID == ARM::BI__builtin_arm_ldrex || 5328 BuiltinID == ARM::BI__builtin_arm_ldaex) && 5329 getContext().getTypeSize(E->getType()) == 64) || 5330 BuiltinID == ARM::BI__ldrexd) { 5331 Function *F; 5332 5333 switch (BuiltinID) { 5334 default: llvm_unreachable("unexpected builtin"); 5335 case ARM::BI__builtin_arm_ldaex: 5336 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd); 5337 break; 5338 case ARM::BI__builtin_arm_ldrexd: 5339 case ARM::BI__builtin_arm_ldrex: 5340 case ARM::BI__ldrexd: 5341 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd); 5342 break; 5343 } 5344 5345 Value *LdPtr = EmitScalarExpr(E->getArg(0)); 5346 Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy), 5347 "ldrexd"); 5348 5349 Value *Val0 = Builder.CreateExtractValue(Val, 1); 5350 Value *Val1 = Builder.CreateExtractValue(Val, 0); 5351 Val0 = Builder.CreateZExt(Val0, Int64Ty); 5352 Val1 = Builder.CreateZExt(Val1, Int64Ty); 5353 5354 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32); 5355 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */); 5356 Val = Builder.CreateOr(Val, Val1); 5357 return Builder.CreateBitCast(Val, ConvertType(E->getType())); 5358 } 5359 5360 if (BuiltinID == ARM::BI__builtin_arm_ldrex || 5361 BuiltinID == ARM::BI__builtin_arm_ldaex) { 5362 Value *LoadAddr = EmitScalarExpr(E->getArg(0)); 5363 5364 QualType Ty = E->getType(); 5365 llvm::Type *RealResTy = ConvertType(Ty); 5366 llvm::Type *PtrTy = llvm::IntegerType::get( 5367 getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo(); 5368 LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy); 5369 5370 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex 5371 ? Intrinsic::arm_ldaex 5372 : Intrinsic::arm_ldrex, 5373 PtrTy); 5374 Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex"); 5375 5376 if (RealResTy->isPointerTy()) 5377 return Builder.CreateIntToPtr(Val, RealResTy); 5378 else { 5379 llvm::Type *IntResTy = llvm::IntegerType::get( 5380 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy)); 5381 Val = Builder.CreateTruncOrBitCast(Val, IntResTy); 5382 return Builder.CreateBitCast(Val, RealResTy); 5383 } 5384 } 5385 5386 if (BuiltinID == ARM::BI__builtin_arm_strexd || 5387 ((BuiltinID == ARM::BI__builtin_arm_stlex || 5388 BuiltinID == ARM::BI__builtin_arm_strex) && 5389 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) { 5390 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex 5391 ? Intrinsic::arm_stlexd 5392 : Intrinsic::arm_strexd); 5393 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty); 5394 5395 Address Tmp = CreateMemTemp(E->getArg(0)->getType()); 5396 Value *Val = EmitScalarExpr(E->getArg(0)); 5397 Builder.CreateStore(Val, Tmp); 5398 5399 Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy)); 5400 Val = Builder.CreateLoad(LdPtr); 5401 5402 Value *Arg0 = Builder.CreateExtractValue(Val, 0); 5403 Value *Arg1 = Builder.CreateExtractValue(Val, 1); 5404 Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy); 5405 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd"); 5406 } 5407 5408 if (BuiltinID == ARM::BI__builtin_arm_strex || 5409 BuiltinID == ARM::BI__builtin_arm_stlex) { 5410 Value *StoreVal = EmitScalarExpr(E->getArg(0)); 5411 Value *StoreAddr = EmitScalarExpr(E->getArg(1)); 5412 5413 QualType Ty = E->getArg(0)->getType(); 5414 llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(), 5415 getContext().getTypeSize(Ty)); 5416 StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo()); 5417 5418 if (StoreVal->getType()->isPointerTy()) 5419 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty); 5420 else { 5421 llvm::Type *IntTy = llvm::IntegerType::get( 5422 getLLVMContext(), 5423 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType())); 5424 StoreVal = Builder.CreateBitCast(StoreVal, IntTy); 5425 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty); 5426 } 5427 5428 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex 5429 ? Intrinsic::arm_stlex 5430 : Intrinsic::arm_strex, 5431 StoreAddr->getType()); 5432 return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex"); 5433 } 5434 5435 switch (BuiltinID) { 5436 case ARM::BI__iso_volatile_load8: 5437 case ARM::BI__iso_volatile_load16: 5438 case ARM::BI__iso_volatile_load32: 5439 case ARM::BI__iso_volatile_load64: { 5440 Value *Ptr = EmitScalarExpr(E->getArg(0)); 5441 QualType ElTy = E->getArg(0)->getType()->getPointeeType(); 5442 CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy); 5443 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), 5444 LoadSize.getQuantity() * 8); 5445 Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); 5446 llvm::LoadInst *Load = 5447 Builder.CreateAlignedLoad(Ptr, LoadSize); 5448 Load->setVolatile(true); 5449 return Load; 5450 } 5451 case ARM::BI__iso_volatile_store8: 5452 case ARM::BI__iso_volatile_store16: 5453 case ARM::BI__iso_volatile_store32: 5454 case ARM::BI__iso_volatile_store64: { 5455 Value *Ptr = EmitScalarExpr(E->getArg(0)); 5456 Value *Value = EmitScalarExpr(E->getArg(1)); 5457 QualType ElTy = E->getArg(0)->getType()->getPointeeType(); 5458 CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy); 5459 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), 5460 StoreSize.getQuantity() * 8); 5461 Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); 5462 llvm::StoreInst *Store = 5463 Builder.CreateAlignedStore(Value, Ptr, 5464 StoreSize); 5465 Store->setVolatile(true); 5466 return Store; 5467 } 5468 } 5469 5470 if (BuiltinID == ARM::BI__builtin_arm_clrex) { 5471 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex); 5472 return Builder.CreateCall(F); 5473 } 5474 5475 // CRC32 5476 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; 5477 switch (BuiltinID) { 5478 case ARM::BI__builtin_arm_crc32b: 5479 CRCIntrinsicID = Intrinsic::arm_crc32b; break; 5480 case ARM::BI__builtin_arm_crc32cb: 5481 CRCIntrinsicID = Intrinsic::arm_crc32cb; break; 5482 case ARM::BI__builtin_arm_crc32h: 5483 CRCIntrinsicID = Intrinsic::arm_crc32h; break; 5484 case ARM::BI__builtin_arm_crc32ch: 5485 CRCIntrinsicID = Intrinsic::arm_crc32ch; break; 5486 case ARM::BI__builtin_arm_crc32w: 5487 case ARM::BI__builtin_arm_crc32d: 5488 CRCIntrinsicID = Intrinsic::arm_crc32w; break; 5489 case ARM::BI__builtin_arm_crc32cw: 5490 case ARM::BI__builtin_arm_crc32cd: 5491 CRCIntrinsicID = Intrinsic::arm_crc32cw; break; 5492 } 5493 5494 if (CRCIntrinsicID != Intrinsic::not_intrinsic) { 5495 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 5496 Value *Arg1 = EmitScalarExpr(E->getArg(1)); 5497 5498 // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w 5499 // intrinsics, hence we need different codegen for these cases. 5500 if (BuiltinID == ARM::BI__builtin_arm_crc32d || 5501 BuiltinID == ARM::BI__builtin_arm_crc32cd) { 5502 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32); 5503 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty); 5504 Value *Arg1b = Builder.CreateLShr(Arg1, C1); 5505 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty); 5506 5507 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 5508 Value *Res = Builder.CreateCall(F, {Arg0, Arg1a}); 5509 return Builder.CreateCall(F, {Res, Arg1b}); 5510 } else { 5511 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty); 5512 5513 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 5514 return Builder.CreateCall(F, {Arg0, Arg1}); 5515 } 5516 } 5517 5518 if (BuiltinID == ARM::BI__builtin_arm_rsr || 5519 BuiltinID == ARM::BI__builtin_arm_rsr64 || 5520 BuiltinID == ARM::BI__builtin_arm_rsrp || 5521 BuiltinID == ARM::BI__builtin_arm_wsr || 5522 BuiltinID == ARM::BI__builtin_arm_wsr64 || 5523 BuiltinID == ARM::BI__builtin_arm_wsrp) { 5524 5525 bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr || 5526 BuiltinID == ARM::BI__builtin_arm_rsr64 || 5527 BuiltinID == ARM::BI__builtin_arm_rsrp; 5528 5529 bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp || 5530 BuiltinID == ARM::BI__builtin_arm_wsrp; 5531 5532 bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 || 5533 BuiltinID == ARM::BI__builtin_arm_wsr64; 5534 5535 llvm::Type *ValueType; 5536 llvm::Type *RegisterType; 5537 if (IsPointerBuiltin) { 5538 ValueType = VoidPtrTy; 5539 RegisterType = Int32Ty; 5540 } else if (Is64Bit) { 5541 ValueType = RegisterType = Int64Ty; 5542 } else { 5543 ValueType = RegisterType = Int32Ty; 5544 } 5545 5546 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead); 5547 } 5548 5549 // Find out if any arguments are required to be integer constant 5550 // expressions. 5551 unsigned ICEArguments = 0; 5552 ASTContext::GetBuiltinTypeError Error; 5553 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 5554 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 5555 5556 auto getAlignmentValue32 = [&](Address addr) -> Value* { 5557 return Builder.getInt32(addr.getAlignment().getQuantity()); 5558 }; 5559 5560 Address PtrOp0 = Address::invalid(); 5561 Address PtrOp1 = Address::invalid(); 5562 SmallVector<Value*, 4> Ops; 5563 bool HasExtraArg = HasExtraNeonArgument(BuiltinID); 5564 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0); 5565 for (unsigned i = 0, e = NumArgs; i != e; i++) { 5566 if (i == 0) { 5567 switch (BuiltinID) { 5568 case NEON::BI__builtin_neon_vld1_v: 5569 case NEON::BI__builtin_neon_vld1q_v: 5570 case NEON::BI__builtin_neon_vld1q_lane_v: 5571 case NEON::BI__builtin_neon_vld1_lane_v: 5572 case NEON::BI__builtin_neon_vld1_dup_v: 5573 case NEON::BI__builtin_neon_vld1q_dup_v: 5574 case NEON::BI__builtin_neon_vst1_v: 5575 case NEON::BI__builtin_neon_vst1q_v: 5576 case NEON::BI__builtin_neon_vst1q_lane_v: 5577 case NEON::BI__builtin_neon_vst1_lane_v: 5578 case NEON::BI__builtin_neon_vst2_v: 5579 case NEON::BI__builtin_neon_vst2q_v: 5580 case NEON::BI__builtin_neon_vst2_lane_v: 5581 case NEON::BI__builtin_neon_vst2q_lane_v: 5582 case NEON::BI__builtin_neon_vst3_v: 5583 case NEON::BI__builtin_neon_vst3q_v: 5584 case NEON::BI__builtin_neon_vst3_lane_v: 5585 case NEON::BI__builtin_neon_vst3q_lane_v: 5586 case NEON::BI__builtin_neon_vst4_v: 5587 case NEON::BI__builtin_neon_vst4q_v: 5588 case NEON::BI__builtin_neon_vst4_lane_v: 5589 case NEON::BI__builtin_neon_vst4q_lane_v: 5590 // Get the alignment for the argument in addition to the value; 5591 // we'll use it later. 5592 PtrOp0 = EmitPointerWithAlignment(E->getArg(0)); 5593 Ops.push_back(PtrOp0.getPointer()); 5594 continue; 5595 } 5596 } 5597 if (i == 1) { 5598 switch (BuiltinID) { 5599 case NEON::BI__builtin_neon_vld2_v: 5600 case NEON::BI__builtin_neon_vld2q_v: 5601 case NEON::BI__builtin_neon_vld3_v: 5602 case NEON::BI__builtin_neon_vld3q_v: 5603 case NEON::BI__builtin_neon_vld4_v: 5604 case NEON::BI__builtin_neon_vld4q_v: 5605 case NEON::BI__builtin_neon_vld2_lane_v: 5606 case NEON::BI__builtin_neon_vld2q_lane_v: 5607 case NEON::BI__builtin_neon_vld3_lane_v: 5608 case NEON::BI__builtin_neon_vld3q_lane_v: 5609 case NEON::BI__builtin_neon_vld4_lane_v: 5610 case NEON::BI__builtin_neon_vld4q_lane_v: 5611 case NEON::BI__builtin_neon_vld2_dup_v: 5612 case NEON::BI__builtin_neon_vld3_dup_v: 5613 case NEON::BI__builtin_neon_vld4_dup_v: 5614 // Get the alignment for the argument in addition to the value; 5615 // we'll use it later. 5616 PtrOp1 = EmitPointerWithAlignment(E->getArg(1)); 5617 Ops.push_back(PtrOp1.getPointer()); 5618 continue; 5619 } 5620 } 5621 5622 if ((ICEArguments & (1 << i)) == 0) { 5623 Ops.push_back(EmitScalarExpr(E->getArg(i))); 5624 } else { 5625 // If this is required to be a constant, constant fold it so that we know 5626 // that the generated intrinsic gets a ConstantInt. 5627 llvm::APSInt Result; 5628 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); 5629 assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst; 5630 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); 5631 } 5632 } 5633 5634 switch (BuiltinID) { 5635 default: break; 5636 5637 case NEON::BI__builtin_neon_vget_lane_i8: 5638 case NEON::BI__builtin_neon_vget_lane_i16: 5639 case NEON::BI__builtin_neon_vget_lane_i32: 5640 case NEON::BI__builtin_neon_vget_lane_i64: 5641 case NEON::BI__builtin_neon_vget_lane_f32: 5642 case NEON::BI__builtin_neon_vgetq_lane_i8: 5643 case NEON::BI__builtin_neon_vgetq_lane_i16: 5644 case NEON::BI__builtin_neon_vgetq_lane_i32: 5645 case NEON::BI__builtin_neon_vgetq_lane_i64: 5646 case NEON::BI__builtin_neon_vgetq_lane_f32: 5647 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane"); 5648 5649 case NEON::BI__builtin_neon_vrndns_f32: { 5650 Value *Arg = EmitScalarExpr(E->getArg(0)); 5651 llvm::Type *Tys[] = {Arg->getType()}; 5652 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vrintn, Tys); 5653 return Builder.CreateCall(F, {Arg}, "vrndn"); } 5654 5655 case NEON::BI__builtin_neon_vset_lane_i8: 5656 case NEON::BI__builtin_neon_vset_lane_i16: 5657 case NEON::BI__builtin_neon_vset_lane_i32: 5658 case NEON::BI__builtin_neon_vset_lane_i64: 5659 case NEON::BI__builtin_neon_vset_lane_f32: 5660 case NEON::BI__builtin_neon_vsetq_lane_i8: 5661 case NEON::BI__builtin_neon_vsetq_lane_i16: 5662 case NEON::BI__builtin_neon_vsetq_lane_i32: 5663 case NEON::BI__builtin_neon_vsetq_lane_i64: 5664 case NEON::BI__builtin_neon_vsetq_lane_f32: 5665 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 5666 5667 case NEON::BI__builtin_neon_vsha1h_u32: 5668 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops, 5669 "vsha1h"); 5670 case NEON::BI__builtin_neon_vsha1cq_u32: 5671 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops, 5672 "vsha1h"); 5673 case NEON::BI__builtin_neon_vsha1pq_u32: 5674 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops, 5675 "vsha1h"); 5676 case NEON::BI__builtin_neon_vsha1mq_u32: 5677 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops, 5678 "vsha1h"); 5679 5680 // The ARM _MoveToCoprocessor builtins put the input register value as 5681 // the first argument, but the LLVM intrinsic expects it as the third one. 5682 case ARM::BI_MoveToCoprocessor: 5683 case ARM::BI_MoveToCoprocessor2: { 5684 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ? 5685 Intrinsic::arm_mcr : Intrinsic::arm_mcr2); 5686 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0], 5687 Ops[3], Ops[4], Ops[5]}); 5688 } 5689 case ARM::BI_BitScanForward: 5690 case ARM::BI_BitScanForward64: 5691 return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E); 5692 case ARM::BI_BitScanReverse: 5693 case ARM::BI_BitScanReverse64: 5694 return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E); 5695 5696 case ARM::BI_InterlockedAnd64: 5697 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E); 5698 case ARM::BI_InterlockedExchange64: 5699 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E); 5700 case ARM::BI_InterlockedExchangeAdd64: 5701 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E); 5702 case ARM::BI_InterlockedExchangeSub64: 5703 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E); 5704 case ARM::BI_InterlockedOr64: 5705 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E); 5706 case ARM::BI_InterlockedXor64: 5707 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E); 5708 case ARM::BI_InterlockedDecrement64: 5709 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E); 5710 case ARM::BI_InterlockedIncrement64: 5711 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E); 5712 } 5713 5714 // Get the last argument, which specifies the vector type. 5715 assert(HasExtraArg); 5716 llvm::APSInt Result; 5717 const Expr *Arg = E->getArg(E->getNumArgs()-1); 5718 if (!Arg->isIntegerConstantExpr(Result, getContext())) 5719 return nullptr; 5720 5721 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f || 5722 BuiltinID == ARM::BI__builtin_arm_vcvtr_d) { 5723 // Determine the overloaded type of this builtin. 5724 llvm::Type *Ty; 5725 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f) 5726 Ty = FloatTy; 5727 else 5728 Ty = DoubleTy; 5729 5730 // Determine whether this is an unsigned conversion or not. 5731 bool usgn = Result.getZExtValue() == 1; 5732 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr; 5733 5734 // Call the appropriate intrinsic. 5735 Function *F = CGM.getIntrinsic(Int, Ty); 5736 return Builder.CreateCall(F, Ops, "vcvtr"); 5737 } 5738 5739 // Determine the type of this overloaded NEON intrinsic. 5740 NeonTypeFlags Type(Result.getZExtValue()); 5741 bool usgn = Type.isUnsigned(); 5742 bool rightShift = false; 5743 5744 llvm::VectorType *VTy = GetNeonType(this, Type, 5745 getTarget().hasLegalHalfType()); 5746 llvm::Type *Ty = VTy; 5747 if (!Ty) 5748 return nullptr; 5749 5750 // Many NEON builtins have identical semantics and uses in ARM and 5751 // AArch64. Emit these in a single function. 5752 auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap); 5753 const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap( 5754 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted); 5755 if (Builtin) 5756 return EmitCommonNeonBuiltinExpr( 5757 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic, 5758 Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch); 5759 5760 unsigned Int; 5761 switch (BuiltinID) { 5762 default: return nullptr; 5763 case NEON::BI__builtin_neon_vld1q_lane_v: 5764 // Handle 64-bit integer elements as a special case. Use shuffles of 5765 // one-element vectors to avoid poor code for i64 in the backend. 5766 if (VTy->getElementType()->isIntegerTy(64)) { 5767 // Extract the other lane. 5768 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5769 uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue(); 5770 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane)); 5771 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); 5772 // Load the value as a one-element vector. 5773 Ty = llvm::VectorType::get(VTy->getElementType(), 1); 5774 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 5775 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys); 5776 Value *Align = getAlignmentValue32(PtrOp0); 5777 Value *Ld = Builder.CreateCall(F, {Ops[0], Align}); 5778 // Combine them. 5779 uint32_t Indices[] = {1 - Lane, Lane}; 5780 SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices); 5781 return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane"); 5782 } 5783 LLVM_FALLTHROUGH; 5784 case NEON::BI__builtin_neon_vld1_lane_v: { 5785 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5786 PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType()); 5787 Value *Ld = Builder.CreateLoad(PtrOp0); 5788 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane"); 5789 } 5790 case NEON::BI__builtin_neon_vld2_dup_v: 5791 case NEON::BI__builtin_neon_vld3_dup_v: 5792 case NEON::BI__builtin_neon_vld4_dup_v: { 5793 // Handle 64-bit elements as a special-case. There is no "dup" needed. 5794 if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) { 5795 switch (BuiltinID) { 5796 case NEON::BI__builtin_neon_vld2_dup_v: 5797 Int = Intrinsic::arm_neon_vld2; 5798 break; 5799 case NEON::BI__builtin_neon_vld3_dup_v: 5800 Int = Intrinsic::arm_neon_vld3; 5801 break; 5802 case NEON::BI__builtin_neon_vld4_dup_v: 5803 Int = Intrinsic::arm_neon_vld4; 5804 break; 5805 default: llvm_unreachable("unknown vld_dup intrinsic?"); 5806 } 5807 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 5808 Function *F = CGM.getIntrinsic(Int, Tys); 5809 llvm::Value *Align = getAlignmentValue32(PtrOp1); 5810 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup"); 5811 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 5812 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5813 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 5814 } 5815 switch (BuiltinID) { 5816 case NEON::BI__builtin_neon_vld2_dup_v: 5817 Int = Intrinsic::arm_neon_vld2lane; 5818 break; 5819 case NEON::BI__builtin_neon_vld3_dup_v: 5820 Int = Intrinsic::arm_neon_vld3lane; 5821 break; 5822 case NEON::BI__builtin_neon_vld4_dup_v: 5823 Int = Intrinsic::arm_neon_vld4lane; 5824 break; 5825 default: llvm_unreachable("unknown vld_dup intrinsic?"); 5826 } 5827 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 5828 Function *F = CGM.getIntrinsic(Int, Tys); 5829 llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType()); 5830 5831 SmallVector<Value*, 6> Args; 5832 Args.push_back(Ops[1]); 5833 Args.append(STy->getNumElements(), UndefValue::get(Ty)); 5834 5835 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); 5836 Args.push_back(CI); 5837 Args.push_back(getAlignmentValue32(PtrOp1)); 5838 5839 Ops[1] = Builder.CreateCall(F, Args, "vld_dup"); 5840 // splat lane 0 to all elts in each vector of the result. 5841 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 5842 Value *Val = Builder.CreateExtractValue(Ops[1], i); 5843 Value *Elt = Builder.CreateBitCast(Val, Ty); 5844 Elt = EmitNeonSplat(Elt, CI); 5845 Elt = Builder.CreateBitCast(Elt, Val->getType()); 5846 Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i); 5847 } 5848 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 5849 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5850 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 5851 } 5852 case NEON::BI__builtin_neon_vqrshrn_n_v: 5853 Int = 5854 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns; 5855 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n", 5856 1, true); 5857 case NEON::BI__builtin_neon_vqrshrun_n_v: 5858 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty), 5859 Ops, "vqrshrun_n", 1, true); 5860 case NEON::BI__builtin_neon_vqshrn_n_v: 5861 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns; 5862 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n", 5863 1, true); 5864 case NEON::BI__builtin_neon_vqshrun_n_v: 5865 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty), 5866 Ops, "vqshrun_n", 1, true); 5867 case NEON::BI__builtin_neon_vrecpe_v: 5868 case NEON::BI__builtin_neon_vrecpeq_v: 5869 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty), 5870 Ops, "vrecpe"); 5871 case NEON::BI__builtin_neon_vrshrn_n_v: 5872 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty), 5873 Ops, "vrshrn_n", 1, true); 5874 case NEON::BI__builtin_neon_vrsra_n_v: 5875 case NEON::BI__builtin_neon_vrsraq_n_v: 5876 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5877 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5878 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true); 5879 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts; 5880 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]}); 5881 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n"); 5882 case NEON::BI__builtin_neon_vsri_n_v: 5883 case NEON::BI__builtin_neon_vsriq_n_v: 5884 rightShift = true; 5885 LLVM_FALLTHROUGH; 5886 case NEON::BI__builtin_neon_vsli_n_v: 5887 case NEON::BI__builtin_neon_vsliq_n_v: 5888 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift); 5889 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty), 5890 Ops, "vsli_n"); 5891 case NEON::BI__builtin_neon_vsra_n_v: 5892 case NEON::BI__builtin_neon_vsraq_n_v: 5893 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5894 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n"); 5895 return Builder.CreateAdd(Ops[0], Ops[1]); 5896 case NEON::BI__builtin_neon_vst1q_lane_v: 5897 // Handle 64-bit integer elements as a special case. Use a shuffle to get 5898 // a one-element vector and avoid poor code for i64 in the backend. 5899 if (VTy->getElementType()->isIntegerTy(64)) { 5900 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5901 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2])); 5902 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); 5903 Ops[2] = getAlignmentValue32(PtrOp0); 5904 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()}; 5905 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, 5906 Tys), Ops); 5907 } 5908 LLVM_FALLTHROUGH; 5909 case NEON::BI__builtin_neon_vst1_lane_v: { 5910 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5911 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); 5912 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 5913 auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty)); 5914 return St; 5915 } 5916 case NEON::BI__builtin_neon_vtbl1_v: 5917 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1), 5918 Ops, "vtbl1"); 5919 case NEON::BI__builtin_neon_vtbl2_v: 5920 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2), 5921 Ops, "vtbl2"); 5922 case NEON::BI__builtin_neon_vtbl3_v: 5923 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3), 5924 Ops, "vtbl3"); 5925 case NEON::BI__builtin_neon_vtbl4_v: 5926 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4), 5927 Ops, "vtbl4"); 5928 case NEON::BI__builtin_neon_vtbx1_v: 5929 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1), 5930 Ops, "vtbx1"); 5931 case NEON::BI__builtin_neon_vtbx2_v: 5932 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2), 5933 Ops, "vtbx2"); 5934 case NEON::BI__builtin_neon_vtbx3_v: 5935 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3), 5936 Ops, "vtbx3"); 5937 case NEON::BI__builtin_neon_vtbx4_v: 5938 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4), 5939 Ops, "vtbx4"); 5940 } 5941 } 5942 5943 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, 5944 const CallExpr *E, 5945 SmallVectorImpl<Value *> &Ops, 5946 llvm::Triple::ArchType Arch) { 5947 unsigned int Int = 0; 5948 const char *s = nullptr; 5949 5950 switch (BuiltinID) { 5951 default: 5952 return nullptr; 5953 case NEON::BI__builtin_neon_vtbl1_v: 5954 case NEON::BI__builtin_neon_vqtbl1_v: 5955 case NEON::BI__builtin_neon_vqtbl1q_v: 5956 case NEON::BI__builtin_neon_vtbl2_v: 5957 case NEON::BI__builtin_neon_vqtbl2_v: 5958 case NEON::BI__builtin_neon_vqtbl2q_v: 5959 case NEON::BI__builtin_neon_vtbl3_v: 5960 case NEON::BI__builtin_neon_vqtbl3_v: 5961 case NEON::BI__builtin_neon_vqtbl3q_v: 5962 case NEON::BI__builtin_neon_vtbl4_v: 5963 case NEON::BI__builtin_neon_vqtbl4_v: 5964 case NEON::BI__builtin_neon_vqtbl4q_v: 5965 break; 5966 case NEON::BI__builtin_neon_vtbx1_v: 5967 case NEON::BI__builtin_neon_vqtbx1_v: 5968 case NEON::BI__builtin_neon_vqtbx1q_v: 5969 case NEON::BI__builtin_neon_vtbx2_v: 5970 case NEON::BI__builtin_neon_vqtbx2_v: 5971 case NEON::BI__builtin_neon_vqtbx2q_v: 5972 case NEON::BI__builtin_neon_vtbx3_v: 5973 case NEON::BI__builtin_neon_vqtbx3_v: 5974 case NEON::BI__builtin_neon_vqtbx3q_v: 5975 case NEON::BI__builtin_neon_vtbx4_v: 5976 case NEON::BI__builtin_neon_vqtbx4_v: 5977 case NEON::BI__builtin_neon_vqtbx4q_v: 5978 break; 5979 } 5980 5981 assert(E->getNumArgs() >= 3); 5982 5983 // Get the last argument, which specifies the vector type. 5984 llvm::APSInt Result; 5985 const Expr *Arg = E->getArg(E->getNumArgs() - 1); 5986 if (!Arg->isIntegerConstantExpr(Result, CGF.getContext())) 5987 return nullptr; 5988 5989 // Determine the type of this overloaded NEON intrinsic. 5990 NeonTypeFlags Type(Result.getZExtValue()); 5991 llvm::VectorType *Ty = GetNeonType(&CGF, Type); 5992 if (!Ty) 5993 return nullptr; 5994 5995 CodeGen::CGBuilderTy &Builder = CGF.Builder; 5996 5997 // AArch64 scalar builtins are not overloaded, they do not have an extra 5998 // argument that specifies the vector type, need to handle each case. 5999 switch (BuiltinID) { 6000 case NEON::BI__builtin_neon_vtbl1_v: { 6001 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr, 6002 Ops[1], Ty, Intrinsic::aarch64_neon_tbl1, 6003 "vtbl1"); 6004 } 6005 case NEON::BI__builtin_neon_vtbl2_v: { 6006 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr, 6007 Ops[2], Ty, Intrinsic::aarch64_neon_tbl1, 6008 "vtbl1"); 6009 } 6010 case NEON::BI__builtin_neon_vtbl3_v: { 6011 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr, 6012 Ops[3], Ty, Intrinsic::aarch64_neon_tbl2, 6013 "vtbl2"); 6014 } 6015 case NEON::BI__builtin_neon_vtbl4_v: { 6016 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr, 6017 Ops[4], Ty, Intrinsic::aarch64_neon_tbl2, 6018 "vtbl2"); 6019 } 6020 case NEON::BI__builtin_neon_vtbx1_v: { 6021 Value *TblRes = 6022 packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2], 6023 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1"); 6024 6025 llvm::Constant *EightV = ConstantInt::get(Ty, 8); 6026 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV); 6027 CmpRes = Builder.CreateSExt(CmpRes, Ty); 6028 6029 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]); 6030 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes); 6031 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx"); 6032 } 6033 case NEON::BI__builtin_neon_vtbx2_v: { 6034 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0], 6035 Ops[3], Ty, Intrinsic::aarch64_neon_tbx1, 6036 "vtbx1"); 6037 } 6038 case NEON::BI__builtin_neon_vtbx3_v: { 6039 Value *TblRes = 6040 packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4], 6041 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2"); 6042 6043 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24); 6044 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4], 6045 TwentyFourV); 6046 CmpRes = Builder.CreateSExt(CmpRes, Ty); 6047 6048 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]); 6049 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes); 6050 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx"); 6051 } 6052 case NEON::BI__builtin_neon_vtbx4_v: { 6053 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0], 6054 Ops[5], Ty, Intrinsic::aarch64_neon_tbx2, 6055 "vtbx2"); 6056 } 6057 case NEON::BI__builtin_neon_vqtbl1_v: 6058 case NEON::BI__builtin_neon_vqtbl1q_v: 6059 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break; 6060 case NEON::BI__builtin_neon_vqtbl2_v: 6061 case NEON::BI__builtin_neon_vqtbl2q_v: { 6062 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break; 6063 case NEON::BI__builtin_neon_vqtbl3_v: 6064 case NEON::BI__builtin_neon_vqtbl3q_v: 6065 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break; 6066 case NEON::BI__builtin_neon_vqtbl4_v: 6067 case NEON::BI__builtin_neon_vqtbl4q_v: 6068 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break; 6069 case NEON::BI__builtin_neon_vqtbx1_v: 6070 case NEON::BI__builtin_neon_vqtbx1q_v: 6071 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break; 6072 case NEON::BI__builtin_neon_vqtbx2_v: 6073 case NEON::BI__builtin_neon_vqtbx2q_v: 6074 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break; 6075 case NEON::BI__builtin_neon_vqtbx3_v: 6076 case NEON::BI__builtin_neon_vqtbx3q_v: 6077 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break; 6078 case NEON::BI__builtin_neon_vqtbx4_v: 6079 case NEON::BI__builtin_neon_vqtbx4q_v: 6080 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break; 6081 } 6082 } 6083 6084 if (!Int) 6085 return nullptr; 6086 6087 Function *F = CGF.CGM.getIntrinsic(Int, Ty); 6088 return CGF.EmitNeonCall(F, Ops, s); 6089 } 6090 6091 Value *CodeGenFunction::vectorWrapScalar16(Value *Op) { 6092 llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4); 6093 Op = Builder.CreateBitCast(Op, Int16Ty); 6094 Value *V = UndefValue::get(VTy); 6095 llvm::Constant *CI = ConstantInt::get(SizeTy, 0); 6096 Op = Builder.CreateInsertElement(V, Op, CI); 6097 return Op; 6098 } 6099 6100 Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, 6101 const CallExpr *E, 6102 llvm::Triple::ArchType Arch) { 6103 unsigned HintID = static_cast<unsigned>(-1); 6104 switch (BuiltinID) { 6105 default: break; 6106 case AArch64::BI__builtin_arm_nop: 6107 HintID = 0; 6108 break; 6109 case AArch64::BI__builtin_arm_yield: 6110 HintID = 1; 6111 break; 6112 case AArch64::BI__builtin_arm_wfe: 6113 HintID = 2; 6114 break; 6115 case AArch64::BI__builtin_arm_wfi: 6116 HintID = 3; 6117 break; 6118 case AArch64::BI__builtin_arm_sev: 6119 HintID = 4; 6120 break; 6121 case AArch64::BI__builtin_arm_sevl: 6122 HintID = 5; 6123 break; 6124 } 6125 6126 if (HintID != static_cast<unsigned>(-1)) { 6127 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint); 6128 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID)); 6129 } 6130 6131 if (BuiltinID == AArch64::BI__builtin_arm_prefetch) { 6132 Value *Address = EmitScalarExpr(E->getArg(0)); 6133 Value *RW = EmitScalarExpr(E->getArg(1)); 6134 Value *CacheLevel = EmitScalarExpr(E->getArg(2)); 6135 Value *RetentionPolicy = EmitScalarExpr(E->getArg(3)); 6136 Value *IsData = EmitScalarExpr(E->getArg(4)); 6137 6138 Value *Locality = nullptr; 6139 if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) { 6140 // Temporal fetch, needs to convert cache level to locality. 6141 Locality = llvm::ConstantInt::get(Int32Ty, 6142 -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3); 6143 } else { 6144 // Streaming fetch. 6145 Locality = llvm::ConstantInt::get(Int32Ty, 0); 6146 } 6147 6148 // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify 6149 // PLDL3STRM or PLDL2STRM. 6150 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 6151 return Builder.CreateCall(F, {Address, RW, Locality, IsData}); 6152 } 6153 6154 if (BuiltinID == AArch64::BI__builtin_arm_rbit) { 6155 assert((getContext().getTypeSize(E->getType()) == 32) && 6156 "rbit of unusual size!"); 6157 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 6158 return Builder.CreateCall( 6159 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); 6160 } 6161 if (BuiltinID == AArch64::BI__builtin_arm_rbit64) { 6162 assert((getContext().getTypeSize(E->getType()) == 64) && 6163 "rbit of unusual size!"); 6164 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 6165 return Builder.CreateCall( 6166 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); 6167 } 6168 6169 if (BuiltinID == AArch64::BI__clear_cache) { 6170 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); 6171 const FunctionDecl *FD = E->getDirectCallee(); 6172 Value *Ops[2]; 6173 for (unsigned i = 0; i < 2; i++) 6174 Ops[i] = EmitScalarExpr(E->getArg(i)); 6175 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); 6176 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); 6177 StringRef Name = FD->getName(); 6178 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); 6179 } 6180 6181 if ((BuiltinID == AArch64::BI__builtin_arm_ldrex || 6182 BuiltinID == AArch64::BI__builtin_arm_ldaex) && 6183 getContext().getTypeSize(E->getType()) == 128) { 6184 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex 6185 ? Intrinsic::aarch64_ldaxp 6186 : Intrinsic::aarch64_ldxp); 6187 6188 Value *LdPtr = EmitScalarExpr(E->getArg(0)); 6189 Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy), 6190 "ldxp"); 6191 6192 Value *Val0 = Builder.CreateExtractValue(Val, 1); 6193 Value *Val1 = Builder.CreateExtractValue(Val, 0); 6194 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); 6195 Val0 = Builder.CreateZExt(Val0, Int128Ty); 6196 Val1 = Builder.CreateZExt(Val1, Int128Ty); 6197 6198 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64); 6199 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */); 6200 Val = Builder.CreateOr(Val, Val1); 6201 return Builder.CreateBitCast(Val, ConvertType(E->getType())); 6202 } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex || 6203 BuiltinID == AArch64::BI__builtin_arm_ldaex) { 6204 Value *LoadAddr = EmitScalarExpr(E->getArg(0)); 6205 6206 QualType Ty = E->getType(); 6207 llvm::Type *RealResTy = ConvertType(Ty); 6208 llvm::Type *PtrTy = llvm::IntegerType::get( 6209 getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo(); 6210 LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy); 6211 6212 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex 6213 ? Intrinsic::aarch64_ldaxr 6214 : Intrinsic::aarch64_ldxr, 6215 PtrTy); 6216 Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr"); 6217 6218 if (RealResTy->isPointerTy()) 6219 return Builder.CreateIntToPtr(Val, RealResTy); 6220 6221 llvm::Type *IntResTy = llvm::IntegerType::get( 6222 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy)); 6223 Val = Builder.CreateTruncOrBitCast(Val, IntResTy); 6224 return Builder.CreateBitCast(Val, RealResTy); 6225 } 6226 6227 if ((BuiltinID == AArch64::BI__builtin_arm_strex || 6228 BuiltinID == AArch64::BI__builtin_arm_stlex) && 6229 getContext().getTypeSize(E->getArg(0)->getType()) == 128) { 6230 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex 6231 ? Intrinsic::aarch64_stlxp 6232 : Intrinsic::aarch64_stxp); 6233 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty); 6234 6235 Address Tmp = CreateMemTemp(E->getArg(0)->getType()); 6236 EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true); 6237 6238 Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy)); 6239 llvm::Value *Val = Builder.CreateLoad(Tmp); 6240 6241 Value *Arg0 = Builder.CreateExtractValue(Val, 0); 6242 Value *Arg1 = Builder.CreateExtractValue(Val, 1); 6243 Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), 6244 Int8PtrTy); 6245 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp"); 6246 } 6247 6248 if (BuiltinID == AArch64::BI__builtin_arm_strex || 6249 BuiltinID == AArch64::BI__builtin_arm_stlex) { 6250 Value *StoreVal = EmitScalarExpr(E->getArg(0)); 6251 Value *StoreAddr = EmitScalarExpr(E->getArg(1)); 6252 6253 QualType Ty = E->getArg(0)->getType(); 6254 llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(), 6255 getContext().getTypeSize(Ty)); 6256 StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo()); 6257 6258 if (StoreVal->getType()->isPointerTy()) 6259 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty); 6260 else { 6261 llvm::Type *IntTy = llvm::IntegerType::get( 6262 getLLVMContext(), 6263 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType())); 6264 StoreVal = Builder.CreateBitCast(StoreVal, IntTy); 6265 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty); 6266 } 6267 6268 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex 6269 ? Intrinsic::aarch64_stlxr 6270 : Intrinsic::aarch64_stxr, 6271 StoreAddr->getType()); 6272 return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr"); 6273 } 6274 6275 if (BuiltinID == AArch64::BI__builtin_arm_clrex) { 6276 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex); 6277 return Builder.CreateCall(F); 6278 } 6279 6280 // CRC32 6281 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; 6282 switch (BuiltinID) { 6283 case AArch64::BI__builtin_arm_crc32b: 6284 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break; 6285 case AArch64::BI__builtin_arm_crc32cb: 6286 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break; 6287 case AArch64::BI__builtin_arm_crc32h: 6288 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break; 6289 case AArch64::BI__builtin_arm_crc32ch: 6290 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break; 6291 case AArch64::BI__builtin_arm_crc32w: 6292 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break; 6293 case AArch64::BI__builtin_arm_crc32cw: 6294 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break; 6295 case AArch64::BI__builtin_arm_crc32d: 6296 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break; 6297 case AArch64::BI__builtin_arm_crc32cd: 6298 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break; 6299 } 6300 6301 if (CRCIntrinsicID != Intrinsic::not_intrinsic) { 6302 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 6303 Value *Arg1 = EmitScalarExpr(E->getArg(1)); 6304 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 6305 6306 llvm::Type *DataTy = F->getFunctionType()->getParamType(1); 6307 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy); 6308 6309 return Builder.CreateCall(F, {Arg0, Arg1}); 6310 } 6311 6312 if (BuiltinID == AArch64::BI__builtin_arm_rsr || 6313 BuiltinID == AArch64::BI__builtin_arm_rsr64 || 6314 BuiltinID == AArch64::BI__builtin_arm_rsrp || 6315 BuiltinID == AArch64::BI__builtin_arm_wsr || 6316 BuiltinID == AArch64::BI__builtin_arm_wsr64 || 6317 BuiltinID == AArch64::BI__builtin_arm_wsrp) { 6318 6319 bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr || 6320 BuiltinID == AArch64::BI__builtin_arm_rsr64 || 6321 BuiltinID == AArch64::BI__builtin_arm_rsrp; 6322 6323 bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp || 6324 BuiltinID == AArch64::BI__builtin_arm_wsrp; 6325 6326 bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr && 6327 BuiltinID != AArch64::BI__builtin_arm_wsr; 6328 6329 llvm::Type *ValueType; 6330 llvm::Type *RegisterType = Int64Ty; 6331 if (IsPointerBuiltin) { 6332 ValueType = VoidPtrTy; 6333 } else if (Is64Bit) { 6334 ValueType = Int64Ty; 6335 } else { 6336 ValueType = Int32Ty; 6337 } 6338 6339 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead); 6340 } 6341 6342 // Find out if any arguments are required to be integer constant 6343 // expressions. 6344 unsigned ICEArguments = 0; 6345 ASTContext::GetBuiltinTypeError Error; 6346 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 6347 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 6348 6349 llvm::SmallVector<Value*, 4> Ops; 6350 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) { 6351 if ((ICEArguments & (1 << i)) == 0) { 6352 Ops.push_back(EmitScalarExpr(E->getArg(i))); 6353 } else { 6354 // If this is required to be a constant, constant fold it so that we know 6355 // that the generated intrinsic gets a ConstantInt. 6356 llvm::APSInt Result; 6357 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); 6358 assert(IsConst && "Constant arg isn't actually constant?"); 6359 (void)IsConst; 6360 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); 6361 } 6362 } 6363 6364 auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap); 6365 const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap( 6366 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted); 6367 6368 if (Builtin) { 6369 Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1))); 6370 Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E); 6371 assert(Result && "SISD intrinsic should have been handled"); 6372 return Result; 6373 } 6374 6375 llvm::APSInt Result; 6376 const Expr *Arg = E->getArg(E->getNumArgs()-1); 6377 NeonTypeFlags Type(0); 6378 if (Arg->isIntegerConstantExpr(Result, getContext())) 6379 // Determine the type of this overloaded NEON intrinsic. 6380 Type = NeonTypeFlags(Result.getZExtValue()); 6381 6382 bool usgn = Type.isUnsigned(); 6383 bool quad = Type.isQuad(); 6384 6385 // Handle non-overloaded intrinsics first. 6386 switch (BuiltinID) { 6387 default: break; 6388 case NEON::BI__builtin_neon_vabsh_f16: 6389 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6390 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs"); 6391 case NEON::BI__builtin_neon_vldrq_p128: { 6392 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128); 6393 llvm::Type *Int128PTy = llvm::PointerType::get(Int128Ty, 0); 6394 Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy); 6395 return Builder.CreateAlignedLoad(Int128Ty, Ptr, 6396 CharUnits::fromQuantity(16)); 6397 } 6398 case NEON::BI__builtin_neon_vstrq_p128: { 6399 llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128); 6400 Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy); 6401 return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr); 6402 } 6403 case NEON::BI__builtin_neon_vcvts_u32_f32: 6404 case NEON::BI__builtin_neon_vcvtd_u64_f64: 6405 usgn = true; 6406 LLVM_FALLTHROUGH; 6407 case NEON::BI__builtin_neon_vcvts_s32_f32: 6408 case NEON::BI__builtin_neon_vcvtd_s64_f64: { 6409 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6410 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64; 6411 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty; 6412 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy; 6413 Ops[0] = Builder.CreateBitCast(Ops[0], FTy); 6414 if (usgn) 6415 return Builder.CreateFPToUI(Ops[0], InTy); 6416 return Builder.CreateFPToSI(Ops[0], InTy); 6417 } 6418 case NEON::BI__builtin_neon_vcvts_f32_u32: 6419 case NEON::BI__builtin_neon_vcvtd_f64_u64: 6420 usgn = true; 6421 LLVM_FALLTHROUGH; 6422 case NEON::BI__builtin_neon_vcvts_f32_s32: 6423 case NEON::BI__builtin_neon_vcvtd_f64_s64: { 6424 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6425 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64; 6426 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty; 6427 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy; 6428 Ops[0] = Builder.CreateBitCast(Ops[0], InTy); 6429 if (usgn) 6430 return Builder.CreateUIToFP(Ops[0], FTy); 6431 return Builder.CreateSIToFP(Ops[0], FTy); 6432 } 6433 case NEON::BI__builtin_neon_vcvth_f16_u16: 6434 case NEON::BI__builtin_neon_vcvth_f16_u32: 6435 case NEON::BI__builtin_neon_vcvth_f16_u64: 6436 usgn = true; 6437 // FALL THROUGH 6438 case NEON::BI__builtin_neon_vcvth_f16_s16: 6439 case NEON::BI__builtin_neon_vcvth_f16_s32: 6440 case NEON::BI__builtin_neon_vcvth_f16_s64: { 6441 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6442 llvm::Type *FTy = HalfTy; 6443 llvm::Type *InTy; 6444 if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64) 6445 InTy = Int64Ty; 6446 else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32) 6447 InTy = Int32Ty; 6448 else 6449 InTy = Int16Ty; 6450 Ops[0] = Builder.CreateBitCast(Ops[0], InTy); 6451 if (usgn) 6452 return Builder.CreateUIToFP(Ops[0], FTy); 6453 return Builder.CreateSIToFP(Ops[0], FTy); 6454 } 6455 case NEON::BI__builtin_neon_vcvth_u16_f16: 6456 usgn = true; 6457 // FALL THROUGH 6458 case NEON::BI__builtin_neon_vcvth_s16_f16: { 6459 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6460 Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy); 6461 if (usgn) 6462 return Builder.CreateFPToUI(Ops[0], Int16Ty); 6463 return Builder.CreateFPToSI(Ops[0], Int16Ty); 6464 } 6465 case NEON::BI__builtin_neon_vcvth_u32_f16: 6466 usgn = true; 6467 // FALL THROUGH 6468 case NEON::BI__builtin_neon_vcvth_s32_f16: { 6469 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6470 Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy); 6471 if (usgn) 6472 return Builder.CreateFPToUI(Ops[0], Int32Ty); 6473 return Builder.CreateFPToSI(Ops[0], Int32Ty); 6474 } 6475 case NEON::BI__builtin_neon_vcvth_u64_f16: 6476 usgn = true; 6477 // FALL THROUGH 6478 case NEON::BI__builtin_neon_vcvth_s64_f16: { 6479 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6480 Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy); 6481 if (usgn) 6482 return Builder.CreateFPToUI(Ops[0], Int64Ty); 6483 return Builder.CreateFPToSI(Ops[0], Int64Ty); 6484 } 6485 case NEON::BI__builtin_neon_vcvtah_u16_f16: 6486 case NEON::BI__builtin_neon_vcvtmh_u16_f16: 6487 case NEON::BI__builtin_neon_vcvtnh_u16_f16: 6488 case NEON::BI__builtin_neon_vcvtph_u16_f16: 6489 case NEON::BI__builtin_neon_vcvtah_s16_f16: 6490 case NEON::BI__builtin_neon_vcvtmh_s16_f16: 6491 case NEON::BI__builtin_neon_vcvtnh_s16_f16: 6492 case NEON::BI__builtin_neon_vcvtph_s16_f16: { 6493 unsigned Int; 6494 llvm::Type* InTy = Int32Ty; 6495 llvm::Type* FTy = HalfTy; 6496 llvm::Type *Tys[2] = {InTy, FTy}; 6497 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6498 switch (BuiltinID) { 6499 default: llvm_unreachable("missing builtin ID in switch!"); 6500 case NEON::BI__builtin_neon_vcvtah_u16_f16: 6501 Int = Intrinsic::aarch64_neon_fcvtau; break; 6502 case NEON::BI__builtin_neon_vcvtmh_u16_f16: 6503 Int = Intrinsic::aarch64_neon_fcvtmu; break; 6504 case NEON::BI__builtin_neon_vcvtnh_u16_f16: 6505 Int = Intrinsic::aarch64_neon_fcvtnu; break; 6506 case NEON::BI__builtin_neon_vcvtph_u16_f16: 6507 Int = Intrinsic::aarch64_neon_fcvtpu; break; 6508 case NEON::BI__builtin_neon_vcvtah_s16_f16: 6509 Int = Intrinsic::aarch64_neon_fcvtas; break; 6510 case NEON::BI__builtin_neon_vcvtmh_s16_f16: 6511 Int = Intrinsic::aarch64_neon_fcvtms; break; 6512 case NEON::BI__builtin_neon_vcvtnh_s16_f16: 6513 Int = Intrinsic::aarch64_neon_fcvtns; break; 6514 case NEON::BI__builtin_neon_vcvtph_s16_f16: 6515 Int = Intrinsic::aarch64_neon_fcvtps; break; 6516 } 6517 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt"); 6518 return Builder.CreateTrunc(Ops[0], Int16Ty); 6519 } 6520 case NEON::BI__builtin_neon_vcaleh_f16: 6521 case NEON::BI__builtin_neon_vcalth_f16: 6522 case NEON::BI__builtin_neon_vcageh_f16: 6523 case NEON::BI__builtin_neon_vcagth_f16: { 6524 unsigned Int; 6525 llvm::Type* InTy = Int32Ty; 6526 llvm::Type* FTy = HalfTy; 6527 llvm::Type *Tys[2] = {InTy, FTy}; 6528 Ops.push_back(EmitScalarExpr(E->getArg(1))); 6529 switch (BuiltinID) { 6530 default: llvm_unreachable("missing builtin ID in switch!"); 6531 case NEON::BI__builtin_neon_vcageh_f16: 6532 Int = Intrinsic::aarch64_neon_facge; break; 6533 case NEON::BI__builtin_neon_vcagth_f16: 6534 Int = Intrinsic::aarch64_neon_facgt; break; 6535 case NEON::BI__builtin_neon_vcaleh_f16: 6536 Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break; 6537 case NEON::BI__builtin_neon_vcalth_f16: 6538 Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break; 6539 } 6540 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg"); 6541 return Builder.CreateTrunc(Ops[0], Int16Ty); 6542 } 6543 case NEON::BI__builtin_neon_vcvth_n_s16_f16: 6544 case NEON::BI__builtin_neon_vcvth_n_u16_f16: { 6545 unsigned Int; 6546 llvm::Type* InTy = Int32Ty; 6547 llvm::Type* FTy = HalfTy; 6548 llvm::Type *Tys[2] = {InTy, FTy}; 6549 Ops.push_back(EmitScalarExpr(E->getArg(1))); 6550 switch (BuiltinID) { 6551 default: llvm_unreachable("missing builtin ID in switch!"); 6552 case NEON::BI__builtin_neon_vcvth_n_s16_f16: 6553 Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break; 6554 case NEON::BI__builtin_neon_vcvth_n_u16_f16: 6555 Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break; 6556 } 6557 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n"); 6558 return Builder.CreateTrunc(Ops[0], Int16Ty); 6559 } 6560 case NEON::BI__builtin_neon_vcvth_n_f16_s16: 6561 case NEON::BI__builtin_neon_vcvth_n_f16_u16: { 6562 unsigned Int; 6563 llvm::Type* FTy = HalfTy; 6564 llvm::Type* InTy = Int32Ty; 6565 llvm::Type *Tys[2] = {FTy, InTy}; 6566 Ops.push_back(EmitScalarExpr(E->getArg(1))); 6567 switch (BuiltinID) { 6568 default: llvm_unreachable("missing builtin ID in switch!"); 6569 case NEON::BI__builtin_neon_vcvth_n_f16_s16: 6570 Int = Intrinsic::aarch64_neon_vcvtfxs2fp; 6571 Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext"); 6572 break; 6573 case NEON::BI__builtin_neon_vcvth_n_f16_u16: 6574 Int = Intrinsic::aarch64_neon_vcvtfxu2fp; 6575 Ops[0] = Builder.CreateZExt(Ops[0], InTy); 6576 break; 6577 } 6578 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n"); 6579 } 6580 case NEON::BI__builtin_neon_vpaddd_s64: { 6581 llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2); 6582 Value *Vec = EmitScalarExpr(E->getArg(0)); 6583 // The vector is v2f64, so make sure it's bitcast to that. 6584 Vec = Builder.CreateBitCast(Vec, Ty, "v2i64"); 6585 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 6586 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 6587 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 6588 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 6589 // Pairwise addition of a v2f64 into a scalar f64. 6590 return Builder.CreateAdd(Op0, Op1, "vpaddd"); 6591 } 6592 case NEON::BI__builtin_neon_vpaddd_f64: { 6593 llvm::Type *Ty = 6594 llvm::VectorType::get(DoubleTy, 2); 6595 Value *Vec = EmitScalarExpr(E->getArg(0)); 6596 // The vector is v2f64, so make sure it's bitcast to that. 6597 Vec = Builder.CreateBitCast(Vec, Ty, "v2f64"); 6598 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 6599 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 6600 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 6601 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 6602 // Pairwise addition of a v2f64 into a scalar f64. 6603 return Builder.CreateFAdd(Op0, Op1, "vpaddd"); 6604 } 6605 case NEON::BI__builtin_neon_vpadds_f32: { 6606 llvm::Type *Ty = 6607 llvm::VectorType::get(FloatTy, 2); 6608 Value *Vec = EmitScalarExpr(E->getArg(0)); 6609 // The vector is v2f32, so make sure it's bitcast to that. 6610 Vec = Builder.CreateBitCast(Vec, Ty, "v2f32"); 6611 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 6612 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 6613 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 6614 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 6615 // Pairwise addition of a v2f32 into a scalar f32. 6616 return Builder.CreateFAdd(Op0, Op1, "vpaddd"); 6617 } 6618 case NEON::BI__builtin_neon_vceqzd_s64: 6619 case NEON::BI__builtin_neon_vceqzd_f64: 6620 case NEON::BI__builtin_neon_vceqzs_f32: 6621 case NEON::BI__builtin_neon_vceqzh_f16: 6622 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6623 return EmitAArch64CompareBuiltinExpr( 6624 Ops[0], ConvertType(E->getCallReturnType(getContext())), 6625 ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz"); 6626 case NEON::BI__builtin_neon_vcgezd_s64: 6627 case NEON::BI__builtin_neon_vcgezd_f64: 6628 case NEON::BI__builtin_neon_vcgezs_f32: 6629 case NEON::BI__builtin_neon_vcgezh_f16: 6630 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6631 return EmitAArch64CompareBuiltinExpr( 6632 Ops[0], ConvertType(E->getCallReturnType(getContext())), 6633 ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez"); 6634 case NEON::BI__builtin_neon_vclezd_s64: 6635 case NEON::BI__builtin_neon_vclezd_f64: 6636 case NEON::BI__builtin_neon_vclezs_f32: 6637 case NEON::BI__builtin_neon_vclezh_f16: 6638 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6639 return EmitAArch64CompareBuiltinExpr( 6640 Ops[0], ConvertType(E->getCallReturnType(getContext())), 6641 ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez"); 6642 case NEON::BI__builtin_neon_vcgtzd_s64: 6643 case NEON::BI__builtin_neon_vcgtzd_f64: 6644 case NEON::BI__builtin_neon_vcgtzs_f32: 6645 case NEON::BI__builtin_neon_vcgtzh_f16: 6646 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6647 return EmitAArch64CompareBuiltinExpr( 6648 Ops[0], ConvertType(E->getCallReturnType(getContext())), 6649 ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz"); 6650 case NEON::BI__builtin_neon_vcltzd_s64: 6651 case NEON::BI__builtin_neon_vcltzd_f64: 6652 case NEON::BI__builtin_neon_vcltzs_f32: 6653 case NEON::BI__builtin_neon_vcltzh_f16: 6654 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6655 return EmitAArch64CompareBuiltinExpr( 6656 Ops[0], ConvertType(E->getCallReturnType(getContext())), 6657 ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz"); 6658 6659 case NEON::BI__builtin_neon_vceqzd_u64: { 6660 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6661 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); 6662 Ops[0] = 6663 Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty)); 6664 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd"); 6665 } 6666 case NEON::BI__builtin_neon_vceqd_f64: 6667 case NEON::BI__builtin_neon_vcled_f64: 6668 case NEON::BI__builtin_neon_vcltd_f64: 6669 case NEON::BI__builtin_neon_vcged_f64: 6670 case NEON::BI__builtin_neon_vcgtd_f64: { 6671 llvm::CmpInst::Predicate P; 6672 switch (BuiltinID) { 6673 default: llvm_unreachable("missing builtin ID in switch!"); 6674 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break; 6675 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break; 6676 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break; 6677 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break; 6678 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break; 6679 } 6680 Ops.push_back(EmitScalarExpr(E->getArg(1))); 6681 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 6682 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); 6683 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); 6684 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd"); 6685 } 6686 case NEON::BI__builtin_neon_vceqs_f32: 6687 case NEON::BI__builtin_neon_vcles_f32: 6688 case NEON::BI__builtin_neon_vclts_f32: 6689 case NEON::BI__builtin_neon_vcges_f32: 6690 case NEON::BI__builtin_neon_vcgts_f32: { 6691 llvm::CmpInst::Predicate P; 6692 switch (BuiltinID) { 6693 default: llvm_unreachable("missing builtin ID in switch!"); 6694 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break; 6695 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break; 6696 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break; 6697 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break; 6698 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break; 6699 } 6700 Ops.push_back(EmitScalarExpr(E->getArg(1))); 6701 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy); 6702 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy); 6703 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); 6704 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd"); 6705 } 6706 case NEON::BI__builtin_neon_vceqh_f16: 6707 case NEON::BI__builtin_neon_vcleh_f16: 6708 case NEON::BI__builtin_neon_vclth_f16: 6709 case NEON::BI__builtin_neon_vcgeh_f16: 6710 case NEON::BI__builtin_neon_vcgth_f16: { 6711 llvm::CmpInst::Predicate P; 6712 switch (BuiltinID) { 6713 default: llvm_unreachable("missing builtin ID in switch!"); 6714 case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break; 6715 case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break; 6716 case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break; 6717 case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break; 6718 case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break; 6719 } 6720 Ops.push_back(EmitScalarExpr(E->getArg(1))); 6721 Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy); 6722 Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy); 6723 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); 6724 return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd"); 6725 } 6726 case NEON::BI__builtin_neon_vceqd_s64: 6727 case NEON::BI__builtin_neon_vceqd_u64: 6728 case NEON::BI__builtin_neon_vcgtd_s64: 6729 case NEON::BI__builtin_neon_vcgtd_u64: 6730 case NEON::BI__builtin_neon_vcltd_s64: 6731 case NEON::BI__builtin_neon_vcltd_u64: 6732 case NEON::BI__builtin_neon_vcged_u64: 6733 case NEON::BI__builtin_neon_vcged_s64: 6734 case NEON::BI__builtin_neon_vcled_u64: 6735 case NEON::BI__builtin_neon_vcled_s64: { 6736 llvm::CmpInst::Predicate P; 6737 switch (BuiltinID) { 6738 default: llvm_unreachable("missing builtin ID in switch!"); 6739 case NEON::BI__builtin_neon_vceqd_s64: 6740 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break; 6741 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break; 6742 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break; 6743 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break; 6744 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break; 6745 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break; 6746 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break; 6747 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break; 6748 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break; 6749 } 6750 Ops.push_back(EmitScalarExpr(E->getArg(1))); 6751 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); 6752 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 6753 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]); 6754 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd"); 6755 } 6756 case NEON::BI__builtin_neon_vtstd_s64: 6757 case NEON::BI__builtin_neon_vtstd_u64: { 6758 Ops.push_back(EmitScalarExpr(E->getArg(1))); 6759 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); 6760 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 6761 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); 6762 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], 6763 llvm::Constant::getNullValue(Int64Ty)); 6764 return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd"); 6765 } 6766 case NEON::BI__builtin_neon_vset_lane_i8: 6767 case NEON::BI__builtin_neon_vset_lane_i16: 6768 case NEON::BI__builtin_neon_vset_lane_i32: 6769 case NEON::BI__builtin_neon_vset_lane_i64: 6770 case NEON::BI__builtin_neon_vset_lane_f32: 6771 case NEON::BI__builtin_neon_vsetq_lane_i8: 6772 case NEON::BI__builtin_neon_vsetq_lane_i16: 6773 case NEON::BI__builtin_neon_vsetq_lane_i32: 6774 case NEON::BI__builtin_neon_vsetq_lane_i64: 6775 case NEON::BI__builtin_neon_vsetq_lane_f32: 6776 Ops.push_back(EmitScalarExpr(E->getArg(2))); 6777 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 6778 case NEON::BI__builtin_neon_vset_lane_f64: 6779 // The vector type needs a cast for the v1f64 variant. 6780 Ops[1] = Builder.CreateBitCast(Ops[1], 6781 llvm::VectorType::get(DoubleTy, 1)); 6782 Ops.push_back(EmitScalarExpr(E->getArg(2))); 6783 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 6784 case NEON::BI__builtin_neon_vsetq_lane_f64: 6785 // The vector type needs a cast for the v2f64 variant. 6786 Ops[1] = Builder.CreateBitCast(Ops[1], 6787 llvm::VectorType::get(DoubleTy, 2)); 6788 Ops.push_back(EmitScalarExpr(E->getArg(2))); 6789 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 6790 6791 case NEON::BI__builtin_neon_vget_lane_i8: 6792 case NEON::BI__builtin_neon_vdupb_lane_i8: 6793 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8)); 6794 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 6795 "vget_lane"); 6796 case NEON::BI__builtin_neon_vgetq_lane_i8: 6797 case NEON::BI__builtin_neon_vdupb_laneq_i8: 6798 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16)); 6799 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 6800 "vgetq_lane"); 6801 case NEON::BI__builtin_neon_vget_lane_i16: 6802 case NEON::BI__builtin_neon_vduph_lane_i16: 6803 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4)); 6804 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 6805 "vget_lane"); 6806 case NEON::BI__builtin_neon_vgetq_lane_i16: 6807 case NEON::BI__builtin_neon_vduph_laneq_i16: 6808 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8)); 6809 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 6810 "vgetq_lane"); 6811 case NEON::BI__builtin_neon_vget_lane_i32: 6812 case NEON::BI__builtin_neon_vdups_lane_i32: 6813 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2)); 6814 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 6815 "vget_lane"); 6816 case NEON::BI__builtin_neon_vdups_lane_f32: 6817 Ops[0] = Builder.CreateBitCast(Ops[0], 6818 llvm::VectorType::get(FloatTy, 2)); 6819 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 6820 "vdups_lane"); 6821 case NEON::BI__builtin_neon_vgetq_lane_i32: 6822 case NEON::BI__builtin_neon_vdups_laneq_i32: 6823 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4)); 6824 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 6825 "vgetq_lane"); 6826 case NEON::BI__builtin_neon_vget_lane_i64: 6827 case NEON::BI__builtin_neon_vdupd_lane_i64: 6828 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1)); 6829 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 6830 "vget_lane"); 6831 case NEON::BI__builtin_neon_vdupd_lane_f64: 6832 Ops[0] = Builder.CreateBitCast(Ops[0], 6833 llvm::VectorType::get(DoubleTy, 1)); 6834 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 6835 "vdupd_lane"); 6836 case NEON::BI__builtin_neon_vgetq_lane_i64: 6837 case NEON::BI__builtin_neon_vdupd_laneq_i64: 6838 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); 6839 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 6840 "vgetq_lane"); 6841 case NEON::BI__builtin_neon_vget_lane_f32: 6842 Ops[0] = Builder.CreateBitCast(Ops[0], 6843 llvm::VectorType::get(FloatTy, 2)); 6844 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 6845 "vget_lane"); 6846 case NEON::BI__builtin_neon_vget_lane_f64: 6847 Ops[0] = Builder.CreateBitCast(Ops[0], 6848 llvm::VectorType::get(DoubleTy, 1)); 6849 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 6850 "vget_lane"); 6851 case NEON::BI__builtin_neon_vgetq_lane_f32: 6852 case NEON::BI__builtin_neon_vdups_laneq_f32: 6853 Ops[0] = Builder.CreateBitCast(Ops[0], 6854 llvm::VectorType::get(FloatTy, 4)); 6855 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 6856 "vgetq_lane"); 6857 case NEON::BI__builtin_neon_vgetq_lane_f64: 6858 case NEON::BI__builtin_neon_vdupd_laneq_f64: 6859 Ops[0] = Builder.CreateBitCast(Ops[0], 6860 llvm::VectorType::get(DoubleTy, 2)); 6861 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 6862 "vgetq_lane"); 6863 case NEON::BI__builtin_neon_vaddh_f16: 6864 Ops.push_back(EmitScalarExpr(E->getArg(1))); 6865 return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh"); 6866 case NEON::BI__builtin_neon_vsubh_f16: 6867 Ops.push_back(EmitScalarExpr(E->getArg(1))); 6868 return Builder.CreateFSub(Ops[0], Ops[1], "vsubh"); 6869 case NEON::BI__builtin_neon_vmulh_f16: 6870 Ops.push_back(EmitScalarExpr(E->getArg(1))); 6871 return Builder.CreateFMul(Ops[0], Ops[1], "vmulh"); 6872 case NEON::BI__builtin_neon_vdivh_f16: 6873 Ops.push_back(EmitScalarExpr(E->getArg(1))); 6874 return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh"); 6875 case NEON::BI__builtin_neon_vfmah_f16: { 6876 Value *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy); 6877 // NEON intrinsic puts accumulator first, unlike the LLVM fma. 6878 return Builder.CreateCall(F, 6879 {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]}); 6880 } 6881 case NEON::BI__builtin_neon_vfmsh_f16: { 6882 Value *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy); 6883 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(HalfTy); 6884 Value* Sub = Builder.CreateFSub(Zero, EmitScalarExpr(E->getArg(1)), "vsubh"); 6885 // NEON intrinsic puts accumulator first, unlike the LLVM fma. 6886 return Builder.CreateCall(F, {Sub, EmitScalarExpr(E->getArg(2)), Ops[0]}); 6887 } 6888 case NEON::BI__builtin_neon_vaddd_s64: 6889 case NEON::BI__builtin_neon_vaddd_u64: 6890 return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd"); 6891 case NEON::BI__builtin_neon_vsubd_s64: 6892 case NEON::BI__builtin_neon_vsubd_u64: 6893 return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd"); 6894 case NEON::BI__builtin_neon_vqdmlalh_s16: 6895 case NEON::BI__builtin_neon_vqdmlslh_s16: { 6896 SmallVector<Value *, 2> ProductOps; 6897 ProductOps.push_back(vectorWrapScalar16(Ops[1])); 6898 ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2)))); 6899 llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4); 6900 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy), 6901 ProductOps, "vqdmlXl"); 6902 Constant *CI = ConstantInt::get(SizeTy, 0); 6903 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0"); 6904 6905 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16 6906 ? Intrinsic::aarch64_neon_sqadd 6907 : Intrinsic::aarch64_neon_sqsub; 6908 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl"); 6909 } 6910 case NEON::BI__builtin_neon_vqshlud_n_s64: { 6911 Ops.push_back(EmitScalarExpr(E->getArg(1))); 6912 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty); 6913 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty), 6914 Ops, "vqshlu_n"); 6915 } 6916 case NEON::BI__builtin_neon_vqshld_n_u64: 6917 case NEON::BI__builtin_neon_vqshld_n_s64: { 6918 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64 6919 ? Intrinsic::aarch64_neon_uqshl 6920 : Intrinsic::aarch64_neon_sqshl; 6921 Ops.push_back(EmitScalarExpr(E->getArg(1))); 6922 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty); 6923 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n"); 6924 } 6925 case NEON::BI__builtin_neon_vrshrd_n_u64: 6926 case NEON::BI__builtin_neon_vrshrd_n_s64: { 6927 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64 6928 ? Intrinsic::aarch64_neon_urshl 6929 : Intrinsic::aarch64_neon_srshl; 6930 Ops.push_back(EmitScalarExpr(E->getArg(1))); 6931 int SV = cast<ConstantInt>(Ops[1])->getSExtValue(); 6932 Ops[1] = ConstantInt::get(Int64Ty, -SV); 6933 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n"); 6934 } 6935 case NEON::BI__builtin_neon_vrsrad_n_u64: 6936 case NEON::BI__builtin_neon_vrsrad_n_s64: { 6937 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64 6938 ? Intrinsic::aarch64_neon_urshl 6939 : Intrinsic::aarch64_neon_srshl; 6940 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 6941 Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2)))); 6942 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty), 6943 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)}); 6944 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty)); 6945 } 6946 case NEON::BI__builtin_neon_vshld_n_s64: 6947 case NEON::BI__builtin_neon_vshld_n_u64: { 6948 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 6949 return Builder.CreateShl( 6950 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n"); 6951 } 6952 case NEON::BI__builtin_neon_vshrd_n_s64: { 6953 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 6954 return Builder.CreateAShr( 6955 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63), 6956 Amt->getZExtValue())), 6957 "shrd_n"); 6958 } 6959 case NEON::BI__builtin_neon_vshrd_n_u64: { 6960 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 6961 uint64_t ShiftAmt = Amt->getZExtValue(); 6962 // Right-shifting an unsigned value by its size yields 0. 6963 if (ShiftAmt == 64) 6964 return ConstantInt::get(Int64Ty, 0); 6965 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt), 6966 "shrd_n"); 6967 } 6968 case NEON::BI__builtin_neon_vsrad_n_s64: { 6969 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2))); 6970 Ops[1] = Builder.CreateAShr( 6971 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63), 6972 Amt->getZExtValue())), 6973 "shrd_n"); 6974 return Builder.CreateAdd(Ops[0], Ops[1]); 6975 } 6976 case NEON::BI__builtin_neon_vsrad_n_u64: { 6977 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2))); 6978 uint64_t ShiftAmt = Amt->getZExtValue(); 6979 // Right-shifting an unsigned value by its size yields 0. 6980 // As Op + 0 = Op, return Ops[0] directly. 6981 if (ShiftAmt == 64) 6982 return Ops[0]; 6983 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt), 6984 "shrd_n"); 6985 return Builder.CreateAdd(Ops[0], Ops[1]); 6986 } 6987 case NEON::BI__builtin_neon_vqdmlalh_lane_s16: 6988 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16: 6989 case NEON::BI__builtin_neon_vqdmlslh_lane_s16: 6990 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: { 6991 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)), 6992 "lane"); 6993 SmallVector<Value *, 2> ProductOps; 6994 ProductOps.push_back(vectorWrapScalar16(Ops[1])); 6995 ProductOps.push_back(vectorWrapScalar16(Ops[2])); 6996 llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4); 6997 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy), 6998 ProductOps, "vqdmlXl"); 6999 Constant *CI = ConstantInt::get(SizeTy, 0); 7000 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0"); 7001 Ops.pop_back(); 7002 7003 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 || 7004 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16) 7005 ? Intrinsic::aarch64_neon_sqadd 7006 : Intrinsic::aarch64_neon_sqsub; 7007 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl"); 7008 } 7009 case NEON::BI__builtin_neon_vqdmlals_s32: 7010 case NEON::BI__builtin_neon_vqdmlsls_s32: { 7011 SmallVector<Value *, 2> ProductOps; 7012 ProductOps.push_back(Ops[1]); 7013 ProductOps.push_back(EmitScalarExpr(E->getArg(2))); 7014 Ops[1] = 7015 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar), 7016 ProductOps, "vqdmlXl"); 7017 7018 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32 7019 ? Intrinsic::aarch64_neon_sqadd 7020 : Intrinsic::aarch64_neon_sqsub; 7021 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl"); 7022 } 7023 case NEON::BI__builtin_neon_vqdmlals_lane_s32: 7024 case NEON::BI__builtin_neon_vqdmlals_laneq_s32: 7025 case NEON::BI__builtin_neon_vqdmlsls_lane_s32: 7026 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: { 7027 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)), 7028 "lane"); 7029 SmallVector<Value *, 2> ProductOps; 7030 ProductOps.push_back(Ops[1]); 7031 ProductOps.push_back(Ops[2]); 7032 Ops[1] = 7033 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar), 7034 ProductOps, "vqdmlXl"); 7035 Ops.pop_back(); 7036 7037 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 || 7038 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32) 7039 ? Intrinsic::aarch64_neon_sqadd 7040 : Intrinsic::aarch64_neon_sqsub; 7041 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl"); 7042 } 7043 } 7044 7045 llvm::VectorType *VTy = GetNeonType(this, Type); 7046 llvm::Type *Ty = VTy; 7047 if (!Ty) 7048 return nullptr; 7049 7050 // Not all intrinsics handled by the common case work for AArch64 yet, so only 7051 // defer to common code if it's been added to our special map. 7052 Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID, 7053 AArch64SIMDIntrinsicsProvenSorted); 7054 7055 if (Builtin) 7056 return EmitCommonNeonBuiltinExpr( 7057 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic, 7058 Builtin->NameHint, Builtin->TypeModifier, E, Ops, 7059 /*never use addresses*/ Address::invalid(), Address::invalid(), Arch); 7060 7061 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch)) 7062 return V; 7063 7064 unsigned Int; 7065 switch (BuiltinID) { 7066 default: return nullptr; 7067 case NEON::BI__builtin_neon_vbsl_v: 7068 case NEON::BI__builtin_neon_vbslq_v: { 7069 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy); 7070 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl"); 7071 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl"); 7072 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl"); 7073 7074 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl"); 7075 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl"); 7076 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl"); 7077 return Builder.CreateBitCast(Ops[0], Ty); 7078 } 7079 case NEON::BI__builtin_neon_vfma_lane_v: 7080 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types 7081 // The ARM builtins (and instructions) have the addend as the first 7082 // operand, but the 'fma' intrinsics have it last. Swap it around here. 7083 Value *Addend = Ops[0]; 7084 Value *Multiplicand = Ops[1]; 7085 Value *LaneSource = Ops[2]; 7086 Ops[0] = Multiplicand; 7087 Ops[1] = LaneSource; 7088 Ops[2] = Addend; 7089 7090 // Now adjust things to handle the lane access. 7091 llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ? 7092 llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) : 7093 VTy; 7094 llvm::Constant *cst = cast<Constant>(Ops[3]); 7095 Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst); 7096 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy); 7097 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane"); 7098 7099 Ops.pop_back(); 7100 Int = Intrinsic::fma; 7101 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla"); 7102 } 7103 case NEON::BI__builtin_neon_vfma_laneq_v: { 7104 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 7105 // v1f64 fma should be mapped to Neon scalar f64 fma 7106 if (VTy && VTy->getElementType() == DoubleTy) { 7107 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 7108 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); 7109 llvm::Type *VTy = GetNeonType(this, 7110 NeonTypeFlags(NeonTypeFlags::Float64, false, true)); 7111 Ops[2] = Builder.CreateBitCast(Ops[2], VTy); 7112 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); 7113 Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy); 7114 Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); 7115 return Builder.CreateBitCast(Result, Ty); 7116 } 7117 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 7118 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 7119 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 7120 7121 llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(), 7122 VTy->getNumElements() * 2); 7123 Ops[2] = Builder.CreateBitCast(Ops[2], STy); 7124 Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), 7125 cast<ConstantInt>(Ops[3])); 7126 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane"); 7127 7128 return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]}); 7129 } 7130 case NEON::BI__builtin_neon_vfmaq_laneq_v: { 7131 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 7132 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 7133 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 7134 7135 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 7136 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3])); 7137 return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]}); 7138 } 7139 case NEON::BI__builtin_neon_vfmah_lane_f16: 7140 case NEON::BI__builtin_neon_vfmas_lane_f32: 7141 case NEON::BI__builtin_neon_vfmah_laneq_f16: 7142 case NEON::BI__builtin_neon_vfmas_laneq_f32: 7143 case NEON::BI__builtin_neon_vfmad_lane_f64: 7144 case NEON::BI__builtin_neon_vfmad_laneq_f64: { 7145 Ops.push_back(EmitScalarExpr(E->getArg(3))); 7146 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext())); 7147 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 7148 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); 7149 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); 7150 } 7151 case NEON::BI__builtin_neon_vmull_v: 7152 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 7153 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull; 7154 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull; 7155 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull"); 7156 case NEON::BI__builtin_neon_vmax_v: 7157 case NEON::BI__builtin_neon_vmaxq_v: 7158 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 7159 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax; 7160 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax; 7161 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax"); 7162 case NEON::BI__builtin_neon_vmaxh_f16: { 7163 Ops.push_back(EmitScalarExpr(E->getArg(1))); 7164 Int = Intrinsic::aarch64_neon_fmax; 7165 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax"); 7166 } 7167 case NEON::BI__builtin_neon_vmin_v: 7168 case NEON::BI__builtin_neon_vminq_v: 7169 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 7170 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin; 7171 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin; 7172 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin"); 7173 case NEON::BI__builtin_neon_vminh_f16: { 7174 Ops.push_back(EmitScalarExpr(E->getArg(1))); 7175 Int = Intrinsic::aarch64_neon_fmin; 7176 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin"); 7177 } 7178 case NEON::BI__builtin_neon_vabd_v: 7179 case NEON::BI__builtin_neon_vabdq_v: 7180 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 7181 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd; 7182 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd; 7183 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd"); 7184 case NEON::BI__builtin_neon_vpadal_v: 7185 case NEON::BI__builtin_neon_vpadalq_v: { 7186 unsigned ArgElts = VTy->getNumElements(); 7187 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType()); 7188 unsigned BitWidth = EltTy->getBitWidth(); 7189 llvm::Type *ArgTy = llvm::VectorType::get( 7190 llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts); 7191 llvm::Type* Tys[2] = { VTy, ArgTy }; 7192 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp; 7193 SmallVector<llvm::Value*, 1> TmpOps; 7194 TmpOps.push_back(Ops[1]); 7195 Function *F = CGM.getIntrinsic(Int, Tys); 7196 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal"); 7197 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType()); 7198 return Builder.CreateAdd(tmp, addend); 7199 } 7200 case NEON::BI__builtin_neon_vpmin_v: 7201 case NEON::BI__builtin_neon_vpminq_v: 7202 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 7203 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp; 7204 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp; 7205 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin"); 7206 case NEON::BI__builtin_neon_vpmax_v: 7207 case NEON::BI__builtin_neon_vpmaxq_v: 7208 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 7209 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp; 7210 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp; 7211 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax"); 7212 case NEON::BI__builtin_neon_vminnm_v: 7213 case NEON::BI__builtin_neon_vminnmq_v: 7214 Int = Intrinsic::aarch64_neon_fminnm; 7215 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm"); 7216 case NEON::BI__builtin_neon_vminnmh_f16: 7217 Ops.push_back(EmitScalarExpr(E->getArg(1))); 7218 Int = Intrinsic::aarch64_neon_fminnm; 7219 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm"); 7220 case NEON::BI__builtin_neon_vmaxnm_v: 7221 case NEON::BI__builtin_neon_vmaxnmq_v: 7222 Int = Intrinsic::aarch64_neon_fmaxnm; 7223 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm"); 7224 case NEON::BI__builtin_neon_vmaxnmh_f16: 7225 Ops.push_back(EmitScalarExpr(E->getArg(1))); 7226 Int = Intrinsic::aarch64_neon_fmaxnm; 7227 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm"); 7228 case NEON::BI__builtin_neon_vrecpss_f32: { 7229 Ops.push_back(EmitScalarExpr(E->getArg(1))); 7230 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy), 7231 Ops, "vrecps"); 7232 } 7233 case NEON::BI__builtin_neon_vrecpsd_f64: 7234 Ops.push_back(EmitScalarExpr(E->getArg(1))); 7235 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy), 7236 Ops, "vrecps"); 7237 case NEON::BI__builtin_neon_vrecpsh_f16: 7238 Ops.push_back(EmitScalarExpr(E->getArg(1))); 7239 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy), 7240 Ops, "vrecps"); 7241 case NEON::BI__builtin_neon_vqshrun_n_v: 7242 Int = Intrinsic::aarch64_neon_sqshrun; 7243 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n"); 7244 case NEON::BI__builtin_neon_vqrshrun_n_v: 7245 Int = Intrinsic::aarch64_neon_sqrshrun; 7246 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n"); 7247 case NEON::BI__builtin_neon_vqshrn_n_v: 7248 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn; 7249 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n"); 7250 case NEON::BI__builtin_neon_vrshrn_n_v: 7251 Int = Intrinsic::aarch64_neon_rshrn; 7252 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n"); 7253 case NEON::BI__builtin_neon_vqrshrn_n_v: 7254 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn; 7255 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n"); 7256 case NEON::BI__builtin_neon_vrndah_f16: { 7257 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7258 Int = Intrinsic::round; 7259 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda"); 7260 } 7261 case NEON::BI__builtin_neon_vrnda_v: 7262 case NEON::BI__builtin_neon_vrndaq_v: { 7263 Int = Intrinsic::round; 7264 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda"); 7265 } 7266 case NEON::BI__builtin_neon_vrndih_f16: { 7267 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7268 Int = Intrinsic::nearbyint; 7269 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi"); 7270 } 7271 case NEON::BI__builtin_neon_vrndi_v: 7272 case NEON::BI__builtin_neon_vrndiq_v: { 7273 Int = Intrinsic::nearbyint; 7274 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi"); 7275 } 7276 case NEON::BI__builtin_neon_vrndmh_f16: { 7277 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7278 Int = Intrinsic::floor; 7279 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm"); 7280 } 7281 case NEON::BI__builtin_neon_vrndm_v: 7282 case NEON::BI__builtin_neon_vrndmq_v: { 7283 Int = Intrinsic::floor; 7284 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm"); 7285 } 7286 case NEON::BI__builtin_neon_vrndnh_f16: { 7287 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7288 Int = Intrinsic::aarch64_neon_frintn; 7289 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn"); 7290 } 7291 case NEON::BI__builtin_neon_vrndn_v: 7292 case NEON::BI__builtin_neon_vrndnq_v: { 7293 Int = Intrinsic::aarch64_neon_frintn; 7294 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn"); 7295 } 7296 case NEON::BI__builtin_neon_vrndph_f16: { 7297 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7298 Int = Intrinsic::ceil; 7299 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp"); 7300 } 7301 case NEON::BI__builtin_neon_vrndp_v: 7302 case NEON::BI__builtin_neon_vrndpq_v: { 7303 Int = Intrinsic::ceil; 7304 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp"); 7305 } 7306 case NEON::BI__builtin_neon_vrndxh_f16: { 7307 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7308 Int = Intrinsic::rint; 7309 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx"); 7310 } 7311 case NEON::BI__builtin_neon_vrndx_v: 7312 case NEON::BI__builtin_neon_vrndxq_v: { 7313 Int = Intrinsic::rint; 7314 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx"); 7315 } 7316 case NEON::BI__builtin_neon_vrndh_f16: { 7317 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7318 Int = Intrinsic::trunc; 7319 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz"); 7320 } 7321 case NEON::BI__builtin_neon_vrnd_v: 7322 case NEON::BI__builtin_neon_vrndq_v: { 7323 Int = Intrinsic::trunc; 7324 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz"); 7325 } 7326 case NEON::BI__builtin_neon_vcvt_f64_v: 7327 case NEON::BI__builtin_neon_vcvtq_f64_v: 7328 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 7329 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad)); 7330 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 7331 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 7332 case NEON::BI__builtin_neon_vcvt_f64_f32: { 7333 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad && 7334 "unexpected vcvt_f64_f32 builtin"); 7335 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false); 7336 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag)); 7337 7338 return Builder.CreateFPExt(Ops[0], Ty, "vcvt"); 7339 } 7340 case NEON::BI__builtin_neon_vcvt_f32_f64: { 7341 assert(Type.getEltType() == NeonTypeFlags::Float32 && 7342 "unexpected vcvt_f32_f64 builtin"); 7343 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true); 7344 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag)); 7345 7346 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt"); 7347 } 7348 case NEON::BI__builtin_neon_vcvt_s32_v: 7349 case NEON::BI__builtin_neon_vcvt_u32_v: 7350 case NEON::BI__builtin_neon_vcvt_s64_v: 7351 case NEON::BI__builtin_neon_vcvt_u64_v: 7352 case NEON::BI__builtin_neon_vcvt_s16_v: 7353 case NEON::BI__builtin_neon_vcvt_u16_v: 7354 case NEON::BI__builtin_neon_vcvtq_s32_v: 7355 case NEON::BI__builtin_neon_vcvtq_u32_v: 7356 case NEON::BI__builtin_neon_vcvtq_s64_v: 7357 case NEON::BI__builtin_neon_vcvtq_u64_v: 7358 case NEON::BI__builtin_neon_vcvtq_s16_v: 7359 case NEON::BI__builtin_neon_vcvtq_u16_v: { 7360 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type)); 7361 if (usgn) 7362 return Builder.CreateFPToUI(Ops[0], Ty); 7363 return Builder.CreateFPToSI(Ops[0], Ty); 7364 } 7365 case NEON::BI__builtin_neon_vcvta_s16_v: 7366 case NEON::BI__builtin_neon_vcvta_s32_v: 7367 case NEON::BI__builtin_neon_vcvtaq_s16_v: 7368 case NEON::BI__builtin_neon_vcvtaq_s32_v: 7369 case NEON::BI__builtin_neon_vcvta_u32_v: 7370 case NEON::BI__builtin_neon_vcvtaq_u16_v: 7371 case NEON::BI__builtin_neon_vcvtaq_u32_v: 7372 case NEON::BI__builtin_neon_vcvta_s64_v: 7373 case NEON::BI__builtin_neon_vcvtaq_s64_v: 7374 case NEON::BI__builtin_neon_vcvta_u64_v: 7375 case NEON::BI__builtin_neon_vcvtaq_u64_v: { 7376 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas; 7377 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 7378 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta"); 7379 } 7380 case NEON::BI__builtin_neon_vcvtm_s16_v: 7381 case NEON::BI__builtin_neon_vcvtm_s32_v: 7382 case NEON::BI__builtin_neon_vcvtmq_s16_v: 7383 case NEON::BI__builtin_neon_vcvtmq_s32_v: 7384 case NEON::BI__builtin_neon_vcvtm_u16_v: 7385 case NEON::BI__builtin_neon_vcvtm_u32_v: 7386 case NEON::BI__builtin_neon_vcvtmq_u16_v: 7387 case NEON::BI__builtin_neon_vcvtmq_u32_v: 7388 case NEON::BI__builtin_neon_vcvtm_s64_v: 7389 case NEON::BI__builtin_neon_vcvtmq_s64_v: 7390 case NEON::BI__builtin_neon_vcvtm_u64_v: 7391 case NEON::BI__builtin_neon_vcvtmq_u64_v: { 7392 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms; 7393 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 7394 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm"); 7395 } 7396 case NEON::BI__builtin_neon_vcvtn_s16_v: 7397 case NEON::BI__builtin_neon_vcvtn_s32_v: 7398 case NEON::BI__builtin_neon_vcvtnq_s16_v: 7399 case NEON::BI__builtin_neon_vcvtnq_s32_v: 7400 case NEON::BI__builtin_neon_vcvtn_u16_v: 7401 case NEON::BI__builtin_neon_vcvtn_u32_v: 7402 case NEON::BI__builtin_neon_vcvtnq_u16_v: 7403 case NEON::BI__builtin_neon_vcvtnq_u32_v: 7404 case NEON::BI__builtin_neon_vcvtn_s64_v: 7405 case NEON::BI__builtin_neon_vcvtnq_s64_v: 7406 case NEON::BI__builtin_neon_vcvtn_u64_v: 7407 case NEON::BI__builtin_neon_vcvtnq_u64_v: { 7408 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns; 7409 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 7410 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn"); 7411 } 7412 case NEON::BI__builtin_neon_vcvtp_s16_v: 7413 case NEON::BI__builtin_neon_vcvtp_s32_v: 7414 case NEON::BI__builtin_neon_vcvtpq_s16_v: 7415 case NEON::BI__builtin_neon_vcvtpq_s32_v: 7416 case NEON::BI__builtin_neon_vcvtp_u16_v: 7417 case NEON::BI__builtin_neon_vcvtp_u32_v: 7418 case NEON::BI__builtin_neon_vcvtpq_u16_v: 7419 case NEON::BI__builtin_neon_vcvtpq_u32_v: 7420 case NEON::BI__builtin_neon_vcvtp_s64_v: 7421 case NEON::BI__builtin_neon_vcvtpq_s64_v: 7422 case NEON::BI__builtin_neon_vcvtp_u64_v: 7423 case NEON::BI__builtin_neon_vcvtpq_u64_v: { 7424 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps; 7425 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 7426 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp"); 7427 } 7428 case NEON::BI__builtin_neon_vmulx_v: 7429 case NEON::BI__builtin_neon_vmulxq_v: { 7430 Int = Intrinsic::aarch64_neon_fmulx; 7431 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx"); 7432 } 7433 case NEON::BI__builtin_neon_vmulxh_lane_f16: 7434 case NEON::BI__builtin_neon_vmulxh_laneq_f16: { 7435 // vmulx_lane should be mapped to Neon scalar mulx after 7436 // extracting the scalar element 7437 Ops.push_back(EmitScalarExpr(E->getArg(2))); 7438 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract"); 7439 Ops.pop_back(); 7440 Int = Intrinsic::aarch64_neon_fmulx; 7441 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx"); 7442 } 7443 case NEON::BI__builtin_neon_vmul_lane_v: 7444 case NEON::BI__builtin_neon_vmul_laneq_v: { 7445 // v1f64 vmul_lane should be mapped to Neon scalar mul lane 7446 bool Quad = false; 7447 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v) 7448 Quad = true; 7449 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 7450 llvm::Type *VTy = GetNeonType(this, 7451 NeonTypeFlags(NeonTypeFlags::Float64, false, Quad)); 7452 Ops[1] = Builder.CreateBitCast(Ops[1], VTy); 7453 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract"); 7454 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]); 7455 return Builder.CreateBitCast(Result, Ty); 7456 } 7457 case NEON::BI__builtin_neon_vnegd_s64: 7458 return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd"); 7459 case NEON::BI__builtin_neon_vnegh_f16: 7460 return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh"); 7461 case NEON::BI__builtin_neon_vpmaxnm_v: 7462 case NEON::BI__builtin_neon_vpmaxnmq_v: { 7463 Int = Intrinsic::aarch64_neon_fmaxnmp; 7464 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm"); 7465 } 7466 case NEON::BI__builtin_neon_vpminnm_v: 7467 case NEON::BI__builtin_neon_vpminnmq_v: { 7468 Int = Intrinsic::aarch64_neon_fminnmp; 7469 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm"); 7470 } 7471 case NEON::BI__builtin_neon_vsqrth_f16: { 7472 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7473 Int = Intrinsic::sqrt; 7474 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt"); 7475 } 7476 case NEON::BI__builtin_neon_vsqrt_v: 7477 case NEON::BI__builtin_neon_vsqrtq_v: { 7478 Int = Intrinsic::sqrt; 7479 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 7480 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt"); 7481 } 7482 case NEON::BI__builtin_neon_vrbit_v: 7483 case NEON::BI__builtin_neon_vrbitq_v: { 7484 Int = Intrinsic::aarch64_neon_rbit; 7485 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit"); 7486 } 7487 case NEON::BI__builtin_neon_vaddv_u8: 7488 // FIXME: These are handled by the AArch64 scalar code. 7489 usgn = true; 7490 LLVM_FALLTHROUGH; 7491 case NEON::BI__builtin_neon_vaddv_s8: { 7492 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 7493 Ty = Int32Ty; 7494 VTy = llvm::VectorType::get(Int8Ty, 8); 7495 llvm::Type *Tys[2] = { Ty, VTy }; 7496 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7497 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 7498 return Builder.CreateTrunc(Ops[0], Int8Ty); 7499 } 7500 case NEON::BI__builtin_neon_vaddv_u16: 7501 usgn = true; 7502 LLVM_FALLTHROUGH; 7503 case NEON::BI__builtin_neon_vaddv_s16: { 7504 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 7505 Ty = Int32Ty; 7506 VTy = llvm::VectorType::get(Int16Ty, 4); 7507 llvm::Type *Tys[2] = { Ty, VTy }; 7508 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7509 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 7510 return Builder.CreateTrunc(Ops[0], Int16Ty); 7511 } 7512 case NEON::BI__builtin_neon_vaddvq_u8: 7513 usgn = true; 7514 LLVM_FALLTHROUGH; 7515 case NEON::BI__builtin_neon_vaddvq_s8: { 7516 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 7517 Ty = Int32Ty; 7518 VTy = llvm::VectorType::get(Int8Ty, 16); 7519 llvm::Type *Tys[2] = { Ty, VTy }; 7520 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7521 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 7522 return Builder.CreateTrunc(Ops[0], Int8Ty); 7523 } 7524 case NEON::BI__builtin_neon_vaddvq_u16: 7525 usgn = true; 7526 LLVM_FALLTHROUGH; 7527 case NEON::BI__builtin_neon_vaddvq_s16: { 7528 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 7529 Ty = Int32Ty; 7530 VTy = llvm::VectorType::get(Int16Ty, 8); 7531 llvm::Type *Tys[2] = { Ty, VTy }; 7532 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7533 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 7534 return Builder.CreateTrunc(Ops[0], Int16Ty); 7535 } 7536 case NEON::BI__builtin_neon_vmaxv_u8: { 7537 Int = Intrinsic::aarch64_neon_umaxv; 7538 Ty = Int32Ty; 7539 VTy = llvm::VectorType::get(Int8Ty, 8); 7540 llvm::Type *Tys[2] = { Ty, VTy }; 7541 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7542 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 7543 return Builder.CreateTrunc(Ops[0], Int8Ty); 7544 } 7545 case NEON::BI__builtin_neon_vmaxv_u16: { 7546 Int = Intrinsic::aarch64_neon_umaxv; 7547 Ty = Int32Ty; 7548 VTy = llvm::VectorType::get(Int16Ty, 4); 7549 llvm::Type *Tys[2] = { Ty, VTy }; 7550 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7551 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 7552 return Builder.CreateTrunc(Ops[0], Int16Ty); 7553 } 7554 case NEON::BI__builtin_neon_vmaxvq_u8: { 7555 Int = Intrinsic::aarch64_neon_umaxv; 7556 Ty = Int32Ty; 7557 VTy = llvm::VectorType::get(Int8Ty, 16); 7558 llvm::Type *Tys[2] = { Ty, VTy }; 7559 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7560 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 7561 return Builder.CreateTrunc(Ops[0], Int8Ty); 7562 } 7563 case NEON::BI__builtin_neon_vmaxvq_u16: { 7564 Int = Intrinsic::aarch64_neon_umaxv; 7565 Ty = Int32Ty; 7566 VTy = llvm::VectorType::get(Int16Ty, 8); 7567 llvm::Type *Tys[2] = { Ty, VTy }; 7568 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7569 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 7570 return Builder.CreateTrunc(Ops[0], Int16Ty); 7571 } 7572 case NEON::BI__builtin_neon_vmaxv_s8: { 7573 Int = Intrinsic::aarch64_neon_smaxv; 7574 Ty = Int32Ty; 7575 VTy = llvm::VectorType::get(Int8Ty, 8); 7576 llvm::Type *Tys[2] = { Ty, VTy }; 7577 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7578 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 7579 return Builder.CreateTrunc(Ops[0], Int8Ty); 7580 } 7581 case NEON::BI__builtin_neon_vmaxv_s16: { 7582 Int = Intrinsic::aarch64_neon_smaxv; 7583 Ty = Int32Ty; 7584 VTy = llvm::VectorType::get(Int16Ty, 4); 7585 llvm::Type *Tys[2] = { Ty, VTy }; 7586 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7587 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 7588 return Builder.CreateTrunc(Ops[0], Int16Ty); 7589 } 7590 case NEON::BI__builtin_neon_vmaxvq_s8: { 7591 Int = Intrinsic::aarch64_neon_smaxv; 7592 Ty = Int32Ty; 7593 VTy = llvm::VectorType::get(Int8Ty, 16); 7594 llvm::Type *Tys[2] = { Ty, VTy }; 7595 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7596 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 7597 return Builder.CreateTrunc(Ops[0], Int8Ty); 7598 } 7599 case NEON::BI__builtin_neon_vmaxvq_s16: { 7600 Int = Intrinsic::aarch64_neon_smaxv; 7601 Ty = Int32Ty; 7602 VTy = llvm::VectorType::get(Int16Ty, 8); 7603 llvm::Type *Tys[2] = { Ty, VTy }; 7604 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7605 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 7606 return Builder.CreateTrunc(Ops[0], Int16Ty); 7607 } 7608 case NEON::BI__builtin_neon_vmaxv_f16: { 7609 Int = Intrinsic::aarch64_neon_fmaxv; 7610 Ty = HalfTy; 7611 VTy = llvm::VectorType::get(HalfTy, 4); 7612 llvm::Type *Tys[2] = { Ty, VTy }; 7613 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7614 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 7615 return Builder.CreateTrunc(Ops[0], HalfTy); 7616 } 7617 case NEON::BI__builtin_neon_vmaxvq_f16: { 7618 Int = Intrinsic::aarch64_neon_fmaxv; 7619 Ty = HalfTy; 7620 VTy = llvm::VectorType::get(HalfTy, 8); 7621 llvm::Type *Tys[2] = { Ty, VTy }; 7622 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7623 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 7624 return Builder.CreateTrunc(Ops[0], HalfTy); 7625 } 7626 case NEON::BI__builtin_neon_vminv_u8: { 7627 Int = Intrinsic::aarch64_neon_uminv; 7628 Ty = Int32Ty; 7629 VTy = llvm::VectorType::get(Int8Ty, 8); 7630 llvm::Type *Tys[2] = { Ty, VTy }; 7631 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7632 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 7633 return Builder.CreateTrunc(Ops[0], Int8Ty); 7634 } 7635 case NEON::BI__builtin_neon_vminv_u16: { 7636 Int = Intrinsic::aarch64_neon_uminv; 7637 Ty = Int32Ty; 7638 VTy = llvm::VectorType::get(Int16Ty, 4); 7639 llvm::Type *Tys[2] = { Ty, VTy }; 7640 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7641 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 7642 return Builder.CreateTrunc(Ops[0], Int16Ty); 7643 } 7644 case NEON::BI__builtin_neon_vminvq_u8: { 7645 Int = Intrinsic::aarch64_neon_uminv; 7646 Ty = Int32Ty; 7647 VTy = llvm::VectorType::get(Int8Ty, 16); 7648 llvm::Type *Tys[2] = { Ty, VTy }; 7649 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7650 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 7651 return Builder.CreateTrunc(Ops[0], Int8Ty); 7652 } 7653 case NEON::BI__builtin_neon_vminvq_u16: { 7654 Int = Intrinsic::aarch64_neon_uminv; 7655 Ty = Int32Ty; 7656 VTy = llvm::VectorType::get(Int16Ty, 8); 7657 llvm::Type *Tys[2] = { Ty, VTy }; 7658 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7659 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 7660 return Builder.CreateTrunc(Ops[0], Int16Ty); 7661 } 7662 case NEON::BI__builtin_neon_vminv_s8: { 7663 Int = Intrinsic::aarch64_neon_sminv; 7664 Ty = Int32Ty; 7665 VTy = llvm::VectorType::get(Int8Ty, 8); 7666 llvm::Type *Tys[2] = { Ty, VTy }; 7667 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7668 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 7669 return Builder.CreateTrunc(Ops[0], Int8Ty); 7670 } 7671 case NEON::BI__builtin_neon_vminv_s16: { 7672 Int = Intrinsic::aarch64_neon_sminv; 7673 Ty = Int32Ty; 7674 VTy = llvm::VectorType::get(Int16Ty, 4); 7675 llvm::Type *Tys[2] = { Ty, VTy }; 7676 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7677 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 7678 return Builder.CreateTrunc(Ops[0], Int16Ty); 7679 } 7680 case NEON::BI__builtin_neon_vminvq_s8: { 7681 Int = Intrinsic::aarch64_neon_sminv; 7682 Ty = Int32Ty; 7683 VTy = llvm::VectorType::get(Int8Ty, 16); 7684 llvm::Type *Tys[2] = { Ty, VTy }; 7685 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7686 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 7687 return Builder.CreateTrunc(Ops[0], Int8Ty); 7688 } 7689 case NEON::BI__builtin_neon_vminvq_s16: { 7690 Int = Intrinsic::aarch64_neon_sminv; 7691 Ty = Int32Ty; 7692 VTy = llvm::VectorType::get(Int16Ty, 8); 7693 llvm::Type *Tys[2] = { Ty, VTy }; 7694 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7695 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 7696 return Builder.CreateTrunc(Ops[0], Int16Ty); 7697 } 7698 case NEON::BI__builtin_neon_vminv_f16: { 7699 Int = Intrinsic::aarch64_neon_fminv; 7700 Ty = HalfTy; 7701 VTy = llvm::VectorType::get(HalfTy, 4); 7702 llvm::Type *Tys[2] = { Ty, VTy }; 7703 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7704 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 7705 return Builder.CreateTrunc(Ops[0], HalfTy); 7706 } 7707 case NEON::BI__builtin_neon_vminvq_f16: { 7708 Int = Intrinsic::aarch64_neon_fminv; 7709 Ty = HalfTy; 7710 VTy = llvm::VectorType::get(HalfTy, 8); 7711 llvm::Type *Tys[2] = { Ty, VTy }; 7712 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7713 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 7714 return Builder.CreateTrunc(Ops[0], HalfTy); 7715 } 7716 case NEON::BI__builtin_neon_vmaxnmv_f16: { 7717 Int = Intrinsic::aarch64_neon_fmaxnmv; 7718 Ty = HalfTy; 7719 VTy = llvm::VectorType::get(HalfTy, 4); 7720 llvm::Type *Tys[2] = { Ty, VTy }; 7721 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7722 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv"); 7723 return Builder.CreateTrunc(Ops[0], HalfTy); 7724 } 7725 case NEON::BI__builtin_neon_vmaxnmvq_f16: { 7726 Int = Intrinsic::aarch64_neon_fmaxnmv; 7727 Ty = HalfTy; 7728 VTy = llvm::VectorType::get(HalfTy, 8); 7729 llvm::Type *Tys[2] = { Ty, VTy }; 7730 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7731 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv"); 7732 return Builder.CreateTrunc(Ops[0], HalfTy); 7733 } 7734 case NEON::BI__builtin_neon_vminnmv_f16: { 7735 Int = Intrinsic::aarch64_neon_fminnmv; 7736 Ty = HalfTy; 7737 VTy = llvm::VectorType::get(HalfTy, 4); 7738 llvm::Type *Tys[2] = { Ty, VTy }; 7739 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7740 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv"); 7741 return Builder.CreateTrunc(Ops[0], HalfTy); 7742 } 7743 case NEON::BI__builtin_neon_vminnmvq_f16: { 7744 Int = Intrinsic::aarch64_neon_fminnmv; 7745 Ty = HalfTy; 7746 VTy = llvm::VectorType::get(HalfTy, 8); 7747 llvm::Type *Tys[2] = { Ty, VTy }; 7748 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7749 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv"); 7750 return Builder.CreateTrunc(Ops[0], HalfTy); 7751 } 7752 case NEON::BI__builtin_neon_vmul_n_f64: { 7753 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 7754 Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy); 7755 return Builder.CreateFMul(Ops[0], RHS); 7756 } 7757 case NEON::BI__builtin_neon_vaddlv_u8: { 7758 Int = Intrinsic::aarch64_neon_uaddlv; 7759 Ty = Int32Ty; 7760 VTy = llvm::VectorType::get(Int8Ty, 8); 7761 llvm::Type *Tys[2] = { Ty, VTy }; 7762 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7763 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 7764 return Builder.CreateTrunc(Ops[0], Int16Ty); 7765 } 7766 case NEON::BI__builtin_neon_vaddlv_u16: { 7767 Int = Intrinsic::aarch64_neon_uaddlv; 7768 Ty = Int32Ty; 7769 VTy = llvm::VectorType::get(Int16Ty, 4); 7770 llvm::Type *Tys[2] = { Ty, VTy }; 7771 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7772 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 7773 } 7774 case NEON::BI__builtin_neon_vaddlvq_u8: { 7775 Int = Intrinsic::aarch64_neon_uaddlv; 7776 Ty = Int32Ty; 7777 VTy = llvm::VectorType::get(Int8Ty, 16); 7778 llvm::Type *Tys[2] = { Ty, VTy }; 7779 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7780 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 7781 return Builder.CreateTrunc(Ops[0], Int16Ty); 7782 } 7783 case NEON::BI__builtin_neon_vaddlvq_u16: { 7784 Int = Intrinsic::aarch64_neon_uaddlv; 7785 Ty = Int32Ty; 7786 VTy = llvm::VectorType::get(Int16Ty, 8); 7787 llvm::Type *Tys[2] = { Ty, VTy }; 7788 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7789 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 7790 } 7791 case NEON::BI__builtin_neon_vaddlv_s8: { 7792 Int = Intrinsic::aarch64_neon_saddlv; 7793 Ty = Int32Ty; 7794 VTy = llvm::VectorType::get(Int8Ty, 8); 7795 llvm::Type *Tys[2] = { Ty, VTy }; 7796 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7797 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 7798 return Builder.CreateTrunc(Ops[0], Int16Ty); 7799 } 7800 case NEON::BI__builtin_neon_vaddlv_s16: { 7801 Int = Intrinsic::aarch64_neon_saddlv; 7802 Ty = Int32Ty; 7803 VTy = llvm::VectorType::get(Int16Ty, 4); 7804 llvm::Type *Tys[2] = { Ty, VTy }; 7805 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7806 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 7807 } 7808 case NEON::BI__builtin_neon_vaddlvq_s8: { 7809 Int = Intrinsic::aarch64_neon_saddlv; 7810 Ty = Int32Ty; 7811 VTy = llvm::VectorType::get(Int8Ty, 16); 7812 llvm::Type *Tys[2] = { Ty, VTy }; 7813 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7814 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 7815 return Builder.CreateTrunc(Ops[0], Int16Ty); 7816 } 7817 case NEON::BI__builtin_neon_vaddlvq_s16: { 7818 Int = Intrinsic::aarch64_neon_saddlv; 7819 Ty = Int32Ty; 7820 VTy = llvm::VectorType::get(Int16Ty, 8); 7821 llvm::Type *Tys[2] = { Ty, VTy }; 7822 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7823 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 7824 } 7825 case NEON::BI__builtin_neon_vsri_n_v: 7826 case NEON::BI__builtin_neon_vsriq_n_v: { 7827 Int = Intrinsic::aarch64_neon_vsri; 7828 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty); 7829 return EmitNeonCall(Intrin, Ops, "vsri_n"); 7830 } 7831 case NEON::BI__builtin_neon_vsli_n_v: 7832 case NEON::BI__builtin_neon_vsliq_n_v: { 7833 Int = Intrinsic::aarch64_neon_vsli; 7834 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty); 7835 return EmitNeonCall(Intrin, Ops, "vsli_n"); 7836 } 7837 case NEON::BI__builtin_neon_vsra_n_v: 7838 case NEON::BI__builtin_neon_vsraq_n_v: 7839 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 7840 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n"); 7841 return Builder.CreateAdd(Ops[0], Ops[1]); 7842 case NEON::BI__builtin_neon_vrsra_n_v: 7843 case NEON::BI__builtin_neon_vrsraq_n_v: { 7844 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl; 7845 SmallVector<llvm::Value*,2> TmpOps; 7846 TmpOps.push_back(Ops[1]); 7847 TmpOps.push_back(Ops[2]); 7848 Function* F = CGM.getIntrinsic(Int, Ty); 7849 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true); 7850 Ops[0] = Builder.CreateBitCast(Ops[0], VTy); 7851 return Builder.CreateAdd(Ops[0], tmp); 7852 } 7853 // FIXME: Sharing loads & stores with 32-bit is complicated by the absence 7854 // of an Align parameter here. 7855 case NEON::BI__builtin_neon_vld1_x2_v: 7856 case NEON::BI__builtin_neon_vld1q_x2_v: 7857 case NEON::BI__builtin_neon_vld1_x3_v: 7858 case NEON::BI__builtin_neon_vld1q_x3_v: 7859 case NEON::BI__builtin_neon_vld1_x4_v: 7860 case NEON::BI__builtin_neon_vld1q_x4_v: { 7861 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType()); 7862 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 7863 llvm::Type *Tys[2] = { VTy, PTy }; 7864 unsigned Int; 7865 switch (BuiltinID) { 7866 case NEON::BI__builtin_neon_vld1_x2_v: 7867 case NEON::BI__builtin_neon_vld1q_x2_v: 7868 Int = Intrinsic::aarch64_neon_ld1x2; 7869 break; 7870 case NEON::BI__builtin_neon_vld1_x3_v: 7871 case NEON::BI__builtin_neon_vld1q_x3_v: 7872 Int = Intrinsic::aarch64_neon_ld1x3; 7873 break; 7874 case NEON::BI__builtin_neon_vld1_x4_v: 7875 case NEON::BI__builtin_neon_vld1q_x4_v: 7876 Int = Intrinsic::aarch64_neon_ld1x4; 7877 break; 7878 } 7879 Function *F = CGM.getIntrinsic(Int, Tys); 7880 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN"); 7881 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 7882 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 7883 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 7884 } 7885 case NEON::BI__builtin_neon_vst1_x2_v: 7886 case NEON::BI__builtin_neon_vst1q_x2_v: 7887 case NEON::BI__builtin_neon_vst1_x3_v: 7888 case NEON::BI__builtin_neon_vst1q_x3_v: 7889 case NEON::BI__builtin_neon_vst1_x4_v: 7890 case NEON::BI__builtin_neon_vst1q_x4_v: { 7891 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType()); 7892 llvm::Type *Tys[2] = { VTy, PTy }; 7893 unsigned Int; 7894 switch (BuiltinID) { 7895 case NEON::BI__builtin_neon_vst1_x2_v: 7896 case NEON::BI__builtin_neon_vst1q_x2_v: 7897 Int = Intrinsic::aarch64_neon_st1x2; 7898 break; 7899 case NEON::BI__builtin_neon_vst1_x3_v: 7900 case NEON::BI__builtin_neon_vst1q_x3_v: 7901 Int = Intrinsic::aarch64_neon_st1x3; 7902 break; 7903 case NEON::BI__builtin_neon_vst1_x4_v: 7904 case NEON::BI__builtin_neon_vst1q_x4_v: 7905 Int = Intrinsic::aarch64_neon_st1x4; 7906 break; 7907 } 7908 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end()); 7909 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, ""); 7910 } 7911 case NEON::BI__builtin_neon_vld1_v: 7912 case NEON::BI__builtin_neon_vld1q_v: { 7913 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); 7914 auto Alignment = CharUnits::fromQuantity( 7915 BuiltinID == NEON::BI__builtin_neon_vld1_v ? 8 : 16); 7916 return Builder.CreateAlignedLoad(VTy, Ops[0], Alignment); 7917 } 7918 case NEON::BI__builtin_neon_vst1_v: 7919 case NEON::BI__builtin_neon_vst1q_v: 7920 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); 7921 Ops[1] = Builder.CreateBitCast(Ops[1], VTy); 7922 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 7923 case NEON::BI__builtin_neon_vld1_lane_v: 7924 case NEON::BI__builtin_neon_vld1q_lane_v: { 7925 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 7926 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 7927 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 7928 auto Alignment = CharUnits::fromQuantity( 7929 BuiltinID == NEON::BI__builtin_neon_vld1_lane_v ? 8 : 16); 7930 Ops[0] = 7931 Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment); 7932 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane"); 7933 } 7934 case NEON::BI__builtin_neon_vld1_dup_v: 7935 case NEON::BI__builtin_neon_vld1q_dup_v: { 7936 Value *V = UndefValue::get(Ty); 7937 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 7938 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 7939 auto Alignment = CharUnits::fromQuantity( 7940 BuiltinID == NEON::BI__builtin_neon_vld1_dup_v ? 8 : 16); 7941 Ops[0] = 7942 Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment); 7943 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); 7944 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI); 7945 return EmitNeonSplat(Ops[0], CI); 7946 } 7947 case NEON::BI__builtin_neon_vst1_lane_v: 7948 case NEON::BI__builtin_neon_vst1q_lane_v: 7949 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 7950 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); 7951 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 7952 return Builder.CreateDefaultAlignedStore(Ops[1], 7953 Builder.CreateBitCast(Ops[0], Ty)); 7954 case NEON::BI__builtin_neon_vld2_v: 7955 case NEON::BI__builtin_neon_vld2q_v: { 7956 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 7957 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 7958 llvm::Type *Tys[2] = { VTy, PTy }; 7959 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys); 7960 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); 7961 Ops[0] = Builder.CreateBitCast(Ops[0], 7962 llvm::PointerType::getUnqual(Ops[1]->getType())); 7963 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 7964 } 7965 case NEON::BI__builtin_neon_vld3_v: 7966 case NEON::BI__builtin_neon_vld3q_v: { 7967 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 7968 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 7969 llvm::Type *Tys[2] = { VTy, PTy }; 7970 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys); 7971 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); 7972 Ops[0] = Builder.CreateBitCast(Ops[0], 7973 llvm::PointerType::getUnqual(Ops[1]->getType())); 7974 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 7975 } 7976 case NEON::BI__builtin_neon_vld4_v: 7977 case NEON::BI__builtin_neon_vld4q_v: { 7978 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 7979 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 7980 llvm::Type *Tys[2] = { VTy, PTy }; 7981 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys); 7982 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); 7983 Ops[0] = Builder.CreateBitCast(Ops[0], 7984 llvm::PointerType::getUnqual(Ops[1]->getType())); 7985 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 7986 } 7987 case NEON::BI__builtin_neon_vld2_dup_v: 7988 case NEON::BI__builtin_neon_vld2q_dup_v: { 7989 llvm::Type *PTy = 7990 llvm::PointerType::getUnqual(VTy->getElementType()); 7991 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 7992 llvm::Type *Tys[2] = { VTy, PTy }; 7993 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys); 7994 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); 7995 Ops[0] = Builder.CreateBitCast(Ops[0], 7996 llvm::PointerType::getUnqual(Ops[1]->getType())); 7997 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 7998 } 7999 case NEON::BI__builtin_neon_vld3_dup_v: 8000 case NEON::BI__builtin_neon_vld3q_dup_v: { 8001 llvm::Type *PTy = 8002 llvm::PointerType::getUnqual(VTy->getElementType()); 8003 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 8004 llvm::Type *Tys[2] = { VTy, PTy }; 8005 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys); 8006 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); 8007 Ops[0] = Builder.CreateBitCast(Ops[0], 8008 llvm::PointerType::getUnqual(Ops[1]->getType())); 8009 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 8010 } 8011 case NEON::BI__builtin_neon_vld4_dup_v: 8012 case NEON::BI__builtin_neon_vld4q_dup_v: { 8013 llvm::Type *PTy = 8014 llvm::PointerType::getUnqual(VTy->getElementType()); 8015 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 8016 llvm::Type *Tys[2] = { VTy, PTy }; 8017 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys); 8018 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); 8019 Ops[0] = Builder.CreateBitCast(Ops[0], 8020 llvm::PointerType::getUnqual(Ops[1]->getType())); 8021 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 8022 } 8023 case NEON::BI__builtin_neon_vld2_lane_v: 8024 case NEON::BI__builtin_neon_vld2q_lane_v: { 8025 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 8026 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys); 8027 Ops.push_back(Ops[1]); 8028 Ops.erase(Ops.begin()+1); 8029 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 8030 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 8031 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty); 8032 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane"); 8033 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 8034 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 8035 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 8036 } 8037 case NEON::BI__builtin_neon_vld3_lane_v: 8038 case NEON::BI__builtin_neon_vld3q_lane_v: { 8039 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 8040 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys); 8041 Ops.push_back(Ops[1]); 8042 Ops.erase(Ops.begin()+1); 8043 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 8044 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 8045 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 8046 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty); 8047 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane"); 8048 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 8049 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 8050 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 8051 } 8052 case NEON::BI__builtin_neon_vld4_lane_v: 8053 case NEON::BI__builtin_neon_vld4q_lane_v: { 8054 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 8055 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys); 8056 Ops.push_back(Ops[1]); 8057 Ops.erase(Ops.begin()+1); 8058 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 8059 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 8060 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 8061 Ops[4] = Builder.CreateBitCast(Ops[4], Ty); 8062 Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty); 8063 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane"); 8064 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 8065 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 8066 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 8067 } 8068 case NEON::BI__builtin_neon_vst2_v: 8069 case NEON::BI__builtin_neon_vst2q_v: { 8070 Ops.push_back(Ops[0]); 8071 Ops.erase(Ops.begin()); 8072 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() }; 8073 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys), 8074 Ops, ""); 8075 } 8076 case NEON::BI__builtin_neon_vst2_lane_v: 8077 case NEON::BI__builtin_neon_vst2q_lane_v: { 8078 Ops.push_back(Ops[0]); 8079 Ops.erase(Ops.begin()); 8080 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty); 8081 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() }; 8082 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys), 8083 Ops, ""); 8084 } 8085 case NEON::BI__builtin_neon_vst3_v: 8086 case NEON::BI__builtin_neon_vst3q_v: { 8087 Ops.push_back(Ops[0]); 8088 Ops.erase(Ops.begin()); 8089 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() }; 8090 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys), 8091 Ops, ""); 8092 } 8093 case NEON::BI__builtin_neon_vst3_lane_v: 8094 case NEON::BI__builtin_neon_vst3q_lane_v: { 8095 Ops.push_back(Ops[0]); 8096 Ops.erase(Ops.begin()); 8097 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty); 8098 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() }; 8099 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys), 8100 Ops, ""); 8101 } 8102 case NEON::BI__builtin_neon_vst4_v: 8103 case NEON::BI__builtin_neon_vst4q_v: { 8104 Ops.push_back(Ops[0]); 8105 Ops.erase(Ops.begin()); 8106 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() }; 8107 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys), 8108 Ops, ""); 8109 } 8110 case NEON::BI__builtin_neon_vst4_lane_v: 8111 case NEON::BI__builtin_neon_vst4q_lane_v: { 8112 Ops.push_back(Ops[0]); 8113 Ops.erase(Ops.begin()); 8114 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty); 8115 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() }; 8116 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys), 8117 Ops, ""); 8118 } 8119 case NEON::BI__builtin_neon_vtrn_v: 8120 case NEON::BI__builtin_neon_vtrnq_v: { 8121 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 8122 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 8123 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 8124 Value *SV = nullptr; 8125 8126 for (unsigned vi = 0; vi != 2; ++vi) { 8127 SmallVector<uint32_t, 16> Indices; 8128 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 8129 Indices.push_back(i+vi); 8130 Indices.push_back(i+e+vi); 8131 } 8132 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 8133 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn"); 8134 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 8135 } 8136 return SV; 8137 } 8138 case NEON::BI__builtin_neon_vuzp_v: 8139 case NEON::BI__builtin_neon_vuzpq_v: { 8140 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 8141 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 8142 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 8143 Value *SV = nullptr; 8144 8145 for (unsigned vi = 0; vi != 2; ++vi) { 8146 SmallVector<uint32_t, 16> Indices; 8147 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 8148 Indices.push_back(2*i+vi); 8149 8150 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 8151 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp"); 8152 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 8153 } 8154 return SV; 8155 } 8156 case NEON::BI__builtin_neon_vzip_v: 8157 case NEON::BI__builtin_neon_vzipq_v: { 8158 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 8159 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 8160 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 8161 Value *SV = nullptr; 8162 8163 for (unsigned vi = 0; vi != 2; ++vi) { 8164 SmallVector<uint32_t, 16> Indices; 8165 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 8166 Indices.push_back((i + vi*e) >> 1); 8167 Indices.push_back(((i + vi*e) >> 1)+e); 8168 } 8169 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 8170 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip"); 8171 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 8172 } 8173 return SV; 8174 } 8175 case NEON::BI__builtin_neon_vqtbl1q_v: { 8176 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty), 8177 Ops, "vtbl1"); 8178 } 8179 case NEON::BI__builtin_neon_vqtbl2q_v: { 8180 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty), 8181 Ops, "vtbl2"); 8182 } 8183 case NEON::BI__builtin_neon_vqtbl3q_v: { 8184 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty), 8185 Ops, "vtbl3"); 8186 } 8187 case NEON::BI__builtin_neon_vqtbl4q_v: { 8188 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty), 8189 Ops, "vtbl4"); 8190 } 8191 case NEON::BI__builtin_neon_vqtbx1q_v: { 8192 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty), 8193 Ops, "vtbx1"); 8194 } 8195 case NEON::BI__builtin_neon_vqtbx2q_v: { 8196 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty), 8197 Ops, "vtbx2"); 8198 } 8199 case NEON::BI__builtin_neon_vqtbx3q_v: { 8200 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty), 8201 Ops, "vtbx3"); 8202 } 8203 case NEON::BI__builtin_neon_vqtbx4q_v: { 8204 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty), 8205 Ops, "vtbx4"); 8206 } 8207 case NEON::BI__builtin_neon_vsqadd_v: 8208 case NEON::BI__builtin_neon_vsqaddq_v: { 8209 Int = Intrinsic::aarch64_neon_usqadd; 8210 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd"); 8211 } 8212 case NEON::BI__builtin_neon_vuqadd_v: 8213 case NEON::BI__builtin_neon_vuqaddq_v: { 8214 Int = Intrinsic::aarch64_neon_suqadd; 8215 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd"); 8216 } 8217 } 8218 } 8219 8220 llvm::Value *CodeGenFunction:: 8221 BuildVector(ArrayRef<llvm::Value*> Ops) { 8222 assert((Ops.size() & (Ops.size() - 1)) == 0 && 8223 "Not a power-of-two sized vector!"); 8224 bool AllConstants = true; 8225 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i) 8226 AllConstants &= isa<Constant>(Ops[i]); 8227 8228 // If this is a constant vector, create a ConstantVector. 8229 if (AllConstants) { 8230 SmallVector<llvm::Constant*, 16> CstOps; 8231 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 8232 CstOps.push_back(cast<Constant>(Ops[i])); 8233 return llvm::ConstantVector::get(CstOps); 8234 } 8235 8236 // Otherwise, insertelement the values to build the vector. 8237 Value *Result = 8238 llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size())); 8239 8240 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 8241 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i)); 8242 8243 return Result; 8244 } 8245 8246 // Convert the mask from an integer type to a vector of i1. 8247 static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask, 8248 unsigned NumElts) { 8249 8250 llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(), 8251 cast<IntegerType>(Mask->getType())->getBitWidth()); 8252 Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy); 8253 8254 // If we have less than 8 elements, then the starting mask was an i8 and 8255 // we need to extract down to the right number of elements. 8256 if (NumElts < 8) { 8257 uint32_t Indices[4]; 8258 for (unsigned i = 0; i != NumElts; ++i) 8259 Indices[i] = i; 8260 MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec, 8261 makeArrayRef(Indices, NumElts), 8262 "extract"); 8263 } 8264 return MaskVec; 8265 } 8266 8267 static Value *EmitX86MaskedStore(CodeGenFunction &CGF, 8268 SmallVectorImpl<Value *> &Ops, 8269 unsigned Align) { 8270 // Cast the pointer to right type. 8271 Ops[0] = CGF.Builder.CreateBitCast(Ops[0], 8272 llvm::PointerType::getUnqual(Ops[1]->getType())); 8273 8274 // If the mask is all ones just emit a regular store. 8275 if (const auto *C = dyn_cast<Constant>(Ops[2])) 8276 if (C->isAllOnesValue()) 8277 return CGF.Builder.CreateAlignedStore(Ops[1], Ops[0], Align); 8278 8279 Value *MaskVec = getMaskVecValue(CGF, Ops[2], 8280 Ops[1]->getType()->getVectorNumElements()); 8281 8282 return CGF.Builder.CreateMaskedStore(Ops[1], Ops[0], Align, MaskVec); 8283 } 8284 8285 static Value *EmitX86MaskedLoad(CodeGenFunction &CGF, 8286 SmallVectorImpl<Value *> &Ops, unsigned Align) { 8287 // Cast the pointer to right type. 8288 Ops[0] = CGF.Builder.CreateBitCast(Ops[0], 8289 llvm::PointerType::getUnqual(Ops[1]->getType())); 8290 8291 // If the mask is all ones just emit a regular store. 8292 if (const auto *C = dyn_cast<Constant>(Ops[2])) 8293 if (C->isAllOnesValue()) 8294 return CGF.Builder.CreateAlignedLoad(Ops[0], Align); 8295 8296 Value *MaskVec = getMaskVecValue(CGF, Ops[2], 8297 Ops[1]->getType()->getVectorNumElements()); 8298 8299 return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]); 8300 } 8301 8302 static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc, 8303 unsigned NumElts, SmallVectorImpl<Value *> &Ops, 8304 bool InvertLHS = false) { 8305 Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts); 8306 Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts); 8307 8308 if (InvertLHS) 8309 LHS = CGF.Builder.CreateNot(LHS); 8310 8311 return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS), 8312 CGF.Builder.getIntNTy(std::max(NumElts, 8U))); 8313 } 8314 8315 static Value *EmitX86SubVectorBroadcast(CodeGenFunction &CGF, 8316 SmallVectorImpl<Value *> &Ops, 8317 llvm::Type *DstTy, 8318 unsigned SrcSizeInBits, 8319 unsigned Align) { 8320 // Load the subvector. 8321 Ops[0] = CGF.Builder.CreateAlignedLoad(Ops[0], Align); 8322 8323 // Create broadcast mask. 8324 unsigned NumDstElts = DstTy->getVectorNumElements(); 8325 unsigned NumSrcElts = SrcSizeInBits / DstTy->getScalarSizeInBits(); 8326 8327 SmallVector<uint32_t, 8> Mask; 8328 for (unsigned i = 0; i != NumDstElts; i += NumSrcElts) 8329 for (unsigned j = 0; j != NumSrcElts; ++j) 8330 Mask.push_back(j); 8331 8332 return CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], Mask, "subvecbcst"); 8333 } 8334 8335 static Value *EmitX86Select(CodeGenFunction &CGF, 8336 Value *Mask, Value *Op0, Value *Op1) { 8337 8338 // If the mask is all ones just return first argument. 8339 if (const auto *C = dyn_cast<Constant>(Mask)) 8340 if (C->isAllOnesValue()) 8341 return Op0; 8342 8343 Mask = getMaskVecValue(CGF, Mask, Op0->getType()->getVectorNumElements()); 8344 8345 return CGF.Builder.CreateSelect(Mask, Op0, Op1); 8346 } 8347 8348 static Value *EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp, 8349 unsigned NumElts, Value *MaskIn) { 8350 if (MaskIn) { 8351 const auto *C = dyn_cast<Constant>(MaskIn); 8352 if (!C || !C->isAllOnesValue()) 8353 Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts)); 8354 } 8355 8356 if (NumElts < 8) { 8357 uint32_t Indices[8]; 8358 for (unsigned i = 0; i != NumElts; ++i) 8359 Indices[i] = i; 8360 for (unsigned i = NumElts; i != 8; ++i) 8361 Indices[i] = i % NumElts + NumElts; 8362 Cmp = CGF.Builder.CreateShuffleVector( 8363 Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices); 8364 } 8365 8366 return CGF.Builder.CreateBitCast(Cmp, 8367 IntegerType::get(CGF.getLLVMContext(), 8368 std::max(NumElts, 8U))); 8369 } 8370 8371 static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, 8372 bool Signed, ArrayRef<Value *> Ops) { 8373 assert((Ops.size() == 2 || Ops.size() == 4) && 8374 "Unexpected number of arguments"); 8375 unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); 8376 Value *Cmp; 8377 8378 if (CC == 3) { 8379 Cmp = Constant::getNullValue( 8380 llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts)); 8381 } else if (CC == 7) { 8382 Cmp = Constant::getAllOnesValue( 8383 llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts)); 8384 } else { 8385 ICmpInst::Predicate Pred; 8386 switch (CC) { 8387 default: llvm_unreachable("Unknown condition code"); 8388 case 0: Pred = ICmpInst::ICMP_EQ; break; 8389 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break; 8390 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break; 8391 case 4: Pred = ICmpInst::ICMP_NE; break; 8392 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break; 8393 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break; 8394 } 8395 Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]); 8396 } 8397 8398 Value *MaskIn = nullptr; 8399 if (Ops.size() == 4) 8400 MaskIn = Ops[3]; 8401 8402 return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn); 8403 } 8404 8405 static Value *EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In) { 8406 Value *Zero = Constant::getNullValue(In->getType()); 8407 return EmitX86MaskedCompare(CGF, 1, true, { In, Zero }); 8408 } 8409 8410 static Value *EmitX86Abs(CodeGenFunction &CGF, ArrayRef<Value *> Ops) { 8411 8412 llvm::Type *Ty = Ops[0]->getType(); 8413 Value *Zero = llvm::Constant::getNullValue(Ty); 8414 Value *Sub = CGF.Builder.CreateSub(Zero, Ops[0]); 8415 Value *Cmp = CGF.Builder.CreateICmp(ICmpInst::ICMP_SGT, Ops[0], Zero); 8416 Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Sub); 8417 if (Ops.size() == 1) 8418 return Res; 8419 return EmitX86Select(CGF, Ops[2], Res, Ops[1]); 8420 } 8421 8422 static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred, 8423 ArrayRef<Value *> Ops) { 8424 Value *Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]); 8425 Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Ops[1]); 8426 8427 if (Ops.size() == 2) 8428 return Res; 8429 8430 assert(Ops.size() == 4); 8431 return EmitX86Select(CGF, Ops[3], Res, Ops[2]); 8432 } 8433 8434 static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned, 8435 ArrayRef<Value *> Ops) { 8436 llvm::Type *Ty = Ops[0]->getType(); 8437 // Arguments have a vXi32 type so cast to vXi64. 8438 Ty = llvm::VectorType::get(CGF.Int64Ty, 8439 Ty->getPrimitiveSizeInBits() / 64); 8440 Value *LHS = CGF.Builder.CreateBitCast(Ops[0], Ty); 8441 Value *RHS = CGF.Builder.CreateBitCast(Ops[1], Ty); 8442 8443 if (IsSigned) { 8444 // Shift left then arithmetic shift right. 8445 Constant *ShiftAmt = ConstantInt::get(Ty, 32); 8446 LHS = CGF.Builder.CreateShl(LHS, ShiftAmt); 8447 LHS = CGF.Builder.CreateAShr(LHS, ShiftAmt); 8448 RHS = CGF.Builder.CreateShl(RHS, ShiftAmt); 8449 RHS = CGF.Builder.CreateAShr(RHS, ShiftAmt); 8450 } else { 8451 // Clear the upper bits. 8452 Constant *Mask = ConstantInt::get(Ty, 0xffffffff); 8453 LHS = CGF.Builder.CreateAnd(LHS, Mask); 8454 RHS = CGF.Builder.CreateAnd(RHS, Mask); 8455 } 8456 8457 return CGF.Builder.CreateMul(LHS, RHS); 8458 } 8459 8460 static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, 8461 llvm::Type *DstTy) { 8462 unsigned NumberOfElements = DstTy->getVectorNumElements(); 8463 Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements); 8464 return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2"); 8465 } 8466 8467 Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) { 8468 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts(); 8469 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString(); 8470 return EmitX86CpuIs(CPUStr); 8471 } 8472 8473 Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) { 8474 8475 llvm::Type *Int32Ty = Builder.getInt32Ty(); 8476 8477 // Matching the struct layout from the compiler-rt/libgcc structure that is 8478 // filled in: 8479 // unsigned int __cpu_vendor; 8480 // unsigned int __cpu_type; 8481 // unsigned int __cpu_subtype; 8482 // unsigned int __cpu_features[1]; 8483 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, 8484 llvm::ArrayType::get(Int32Ty, 1)); 8485 8486 // Grab the global __cpu_model. 8487 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model"); 8488 8489 // Calculate the index needed to access the correct field based on the 8490 // range. Also adjust the expected value. 8491 unsigned Index; 8492 unsigned Value; 8493 std::tie(Index, Value) = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr) 8494 #define X86_VENDOR(ENUM, STRING) \ 8495 .Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)}) 8496 #define X86_CPU_TYPE_COMPAT_WITH_ALIAS(ARCHNAME, ENUM, STR, ALIAS) \ 8497 .Cases(STR, ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)}) 8498 #define X86_CPU_TYPE_COMPAT(ARCHNAME, ENUM, STR) \ 8499 .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)}) 8500 #define X86_CPU_SUBTYPE_COMPAT(ARCHNAME, ENUM, STR) \ 8501 .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)}) 8502 #include "llvm/Support/X86TargetParser.def" 8503 .Default({0, 0}); 8504 assert(Value != 0 && "Invalid CPUStr passed to CpuIs"); 8505 8506 // Grab the appropriate field from __cpu_model. 8507 llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0), 8508 ConstantInt::get(Int32Ty, Index)}; 8509 llvm::Value *CpuValue = Builder.CreateGEP(STy, CpuModel, Idxs); 8510 CpuValue = Builder.CreateAlignedLoad(CpuValue, CharUnits::fromQuantity(4)); 8511 8512 // Check the value of the field against the requested value. 8513 return Builder.CreateICmpEQ(CpuValue, 8514 llvm::ConstantInt::get(Int32Ty, Value)); 8515 } 8516 8517 Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) { 8518 const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts(); 8519 StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString(); 8520 return EmitX86CpuSupports(FeatureStr); 8521 } 8522 8523 Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) { 8524 // Processor features and mapping to processor feature value. 8525 8526 uint32_t FeaturesMask = 0; 8527 8528 for (const StringRef &FeatureStr : FeatureStrs) { 8529 unsigned Feature = 8530 StringSwitch<unsigned>(FeatureStr) 8531 #define X86_FEATURE_COMPAT(VAL, ENUM, STR) .Case(STR, VAL) 8532 #include "llvm/Support/X86TargetParser.def" 8533 ; 8534 FeaturesMask |= (1U << Feature); 8535 } 8536 8537 // Matching the struct layout from the compiler-rt/libgcc structure that is 8538 // filled in: 8539 // unsigned int __cpu_vendor; 8540 // unsigned int __cpu_type; 8541 // unsigned int __cpu_subtype; 8542 // unsigned int __cpu_features[1]; 8543 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, 8544 llvm::ArrayType::get(Int32Ty, 1)); 8545 8546 // Grab the global __cpu_model. 8547 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model"); 8548 8549 // Grab the first (0th) element from the field __cpu_features off of the 8550 // global in the struct STy. 8551 Value *Idxs[] = {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 3), 8552 ConstantInt::get(Int32Ty, 0)}; 8553 Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs); 8554 Value *Features = 8555 Builder.CreateAlignedLoad(CpuFeatures, CharUnits::fromQuantity(4)); 8556 8557 // Check the value of the bit corresponding to the feature requested. 8558 Value *Bitset = Builder.CreateAnd( 8559 Features, llvm::ConstantInt::get(Int32Ty, FeaturesMask)); 8560 return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0)); 8561 } 8562 8563 Value *CodeGenFunction::EmitX86CpuInit() { 8564 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, 8565 /*Variadic*/ false); 8566 llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init"); 8567 return Builder.CreateCall(Func); 8568 } 8569 8570 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, 8571 const CallExpr *E) { 8572 if (BuiltinID == X86::BI__builtin_cpu_is) 8573 return EmitX86CpuIs(E); 8574 if (BuiltinID == X86::BI__builtin_cpu_supports) 8575 return EmitX86CpuSupports(E); 8576 if (BuiltinID == X86::BI__builtin_cpu_init) 8577 return EmitX86CpuInit(); 8578 8579 SmallVector<Value*, 4> Ops; 8580 8581 // Find out if any arguments are required to be integer constant expressions. 8582 unsigned ICEArguments = 0; 8583 ASTContext::GetBuiltinTypeError Error; 8584 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 8585 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 8586 8587 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { 8588 // If this is a normal argument, just emit it as a scalar. 8589 if ((ICEArguments & (1 << i)) == 0) { 8590 Ops.push_back(EmitScalarExpr(E->getArg(i))); 8591 continue; 8592 } 8593 8594 // If this is required to be a constant, constant fold it so that we know 8595 // that the generated intrinsic gets a ConstantInt. 8596 llvm::APSInt Result; 8597 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); 8598 assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst; 8599 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); 8600 } 8601 8602 // These exist so that the builtin that takes an immediate can be bounds 8603 // checked by clang to avoid passing bad immediates to the backend. Since 8604 // AVX has a larger immediate than SSE we would need separate builtins to 8605 // do the different bounds checking. Rather than create a clang specific 8606 // SSE only builtin, this implements eight separate builtins to match gcc 8607 // implementation. 8608 auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) { 8609 Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm)); 8610 llvm::Function *F = CGM.getIntrinsic(ID); 8611 return Builder.CreateCall(F, Ops); 8612 }; 8613 8614 // For the vector forms of FP comparisons, translate the builtins directly to 8615 // IR. 8616 // TODO: The builtins could be removed if the SSE header files used vector 8617 // extension comparisons directly (vector ordered/unordered may need 8618 // additional support via __builtin_isnan()). 8619 auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred) { 8620 Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]); 8621 llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType()); 8622 llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy); 8623 Value *Sext = Builder.CreateSExt(Cmp, IntVecTy); 8624 return Builder.CreateBitCast(Sext, FPVecTy); 8625 }; 8626 8627 switch (BuiltinID) { 8628 default: return nullptr; 8629 case X86::BI_mm_prefetch: { 8630 Value *Address = Ops[0]; 8631 ConstantInt *C = cast<ConstantInt>(Ops[1]); 8632 Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1); 8633 Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3); 8634 Value *Data = ConstantInt::get(Int32Ty, 1); 8635 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 8636 return Builder.CreateCall(F, {Address, RW, Locality, Data}); 8637 } 8638 case X86::BI_mm_clflush: { 8639 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush), 8640 Ops[0]); 8641 } 8642 case X86::BI_mm_lfence: { 8643 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence)); 8644 } 8645 case X86::BI_mm_mfence: { 8646 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence)); 8647 } 8648 case X86::BI_mm_sfence: { 8649 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence)); 8650 } 8651 case X86::BI_mm_pause: { 8652 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause)); 8653 } 8654 case X86::BI__rdtsc: { 8655 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc)); 8656 } 8657 case X86::BI__builtin_ia32_undef128: 8658 case X86::BI__builtin_ia32_undef256: 8659 case X86::BI__builtin_ia32_undef512: 8660 // The x86 definition of "undef" is not the same as the LLVM definition 8661 // (PR32176). We leave optimizing away an unnecessary zero constant to the 8662 // IR optimizer and backend. 8663 // TODO: If we had a "freeze" IR instruction to generate a fixed undef 8664 // value, we should use that here instead of a zero. 8665 return llvm::Constant::getNullValue(ConvertType(E->getType())); 8666 case X86::BI__builtin_ia32_vec_init_v8qi: 8667 case X86::BI__builtin_ia32_vec_init_v4hi: 8668 case X86::BI__builtin_ia32_vec_init_v2si: 8669 return Builder.CreateBitCast(BuildVector(Ops), 8670 llvm::Type::getX86_MMXTy(getLLVMContext())); 8671 case X86::BI__builtin_ia32_vec_ext_v2si: 8672 return Builder.CreateExtractElement(Ops[0], 8673 llvm::ConstantInt::get(Ops[1]->getType(), 0)); 8674 case X86::BI_mm_setcsr: 8675 case X86::BI__builtin_ia32_ldmxcsr: { 8676 Address Tmp = CreateMemTemp(E->getArg(0)->getType()); 8677 Builder.CreateStore(Ops[0], Tmp); 8678 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr), 8679 Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy)); 8680 } 8681 case X86::BI_mm_getcsr: 8682 case X86::BI__builtin_ia32_stmxcsr: { 8683 Address Tmp = CreateMemTemp(E->getType()); 8684 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr), 8685 Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy)); 8686 return Builder.CreateLoad(Tmp, "stmxcsr"); 8687 } 8688 case X86::BI__builtin_ia32_xsave: 8689 case X86::BI__builtin_ia32_xsave64: 8690 case X86::BI__builtin_ia32_xrstor: 8691 case X86::BI__builtin_ia32_xrstor64: 8692 case X86::BI__builtin_ia32_xsaveopt: 8693 case X86::BI__builtin_ia32_xsaveopt64: 8694 case X86::BI__builtin_ia32_xrstors: 8695 case X86::BI__builtin_ia32_xrstors64: 8696 case X86::BI__builtin_ia32_xsavec: 8697 case X86::BI__builtin_ia32_xsavec64: 8698 case X86::BI__builtin_ia32_xsaves: 8699 case X86::BI__builtin_ia32_xsaves64: { 8700 Intrinsic::ID ID; 8701 #define INTRINSIC_X86_XSAVE_ID(NAME) \ 8702 case X86::BI__builtin_ia32_##NAME: \ 8703 ID = Intrinsic::x86_##NAME; \ 8704 break 8705 switch (BuiltinID) { 8706 default: llvm_unreachable("Unsupported intrinsic!"); 8707 INTRINSIC_X86_XSAVE_ID(xsave); 8708 INTRINSIC_X86_XSAVE_ID(xsave64); 8709 INTRINSIC_X86_XSAVE_ID(xrstor); 8710 INTRINSIC_X86_XSAVE_ID(xrstor64); 8711 INTRINSIC_X86_XSAVE_ID(xsaveopt); 8712 INTRINSIC_X86_XSAVE_ID(xsaveopt64); 8713 INTRINSIC_X86_XSAVE_ID(xrstors); 8714 INTRINSIC_X86_XSAVE_ID(xrstors64); 8715 INTRINSIC_X86_XSAVE_ID(xsavec); 8716 INTRINSIC_X86_XSAVE_ID(xsavec64); 8717 INTRINSIC_X86_XSAVE_ID(xsaves); 8718 INTRINSIC_X86_XSAVE_ID(xsaves64); 8719 } 8720 #undef INTRINSIC_X86_XSAVE_ID 8721 Value *Mhi = Builder.CreateTrunc( 8722 Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty); 8723 Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty); 8724 Ops[1] = Mhi; 8725 Ops.push_back(Mlo); 8726 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); 8727 } 8728 case X86::BI__builtin_ia32_storedqudi128_mask: 8729 case X86::BI__builtin_ia32_storedqusi128_mask: 8730 case X86::BI__builtin_ia32_storedquhi128_mask: 8731 case X86::BI__builtin_ia32_storedquqi128_mask: 8732 case X86::BI__builtin_ia32_storeupd128_mask: 8733 case X86::BI__builtin_ia32_storeups128_mask: 8734 case X86::BI__builtin_ia32_storedqudi256_mask: 8735 case X86::BI__builtin_ia32_storedqusi256_mask: 8736 case X86::BI__builtin_ia32_storedquhi256_mask: 8737 case X86::BI__builtin_ia32_storedquqi256_mask: 8738 case X86::BI__builtin_ia32_storeupd256_mask: 8739 case X86::BI__builtin_ia32_storeups256_mask: 8740 case X86::BI__builtin_ia32_storedqudi512_mask: 8741 case X86::BI__builtin_ia32_storedqusi512_mask: 8742 case X86::BI__builtin_ia32_storedquhi512_mask: 8743 case X86::BI__builtin_ia32_storedquqi512_mask: 8744 case X86::BI__builtin_ia32_storeupd512_mask: 8745 case X86::BI__builtin_ia32_storeups512_mask: 8746 return EmitX86MaskedStore(*this, Ops, 1); 8747 8748 case X86::BI__builtin_ia32_storess128_mask: 8749 case X86::BI__builtin_ia32_storesd128_mask: { 8750 return EmitX86MaskedStore(*this, Ops, 16); 8751 } 8752 case X86::BI__builtin_ia32_vpopcntb_128: 8753 case X86::BI__builtin_ia32_vpopcntd_128: 8754 case X86::BI__builtin_ia32_vpopcntq_128: 8755 case X86::BI__builtin_ia32_vpopcntw_128: 8756 case X86::BI__builtin_ia32_vpopcntb_256: 8757 case X86::BI__builtin_ia32_vpopcntd_256: 8758 case X86::BI__builtin_ia32_vpopcntq_256: 8759 case X86::BI__builtin_ia32_vpopcntw_256: 8760 case X86::BI__builtin_ia32_vpopcntb_512: 8761 case X86::BI__builtin_ia32_vpopcntd_512: 8762 case X86::BI__builtin_ia32_vpopcntq_512: 8763 case X86::BI__builtin_ia32_vpopcntw_512: { 8764 llvm::Type *ResultType = ConvertType(E->getType()); 8765 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); 8766 return Builder.CreateCall(F, Ops); 8767 } 8768 case X86::BI__builtin_ia32_cvtmask2b128: 8769 case X86::BI__builtin_ia32_cvtmask2b256: 8770 case X86::BI__builtin_ia32_cvtmask2b512: 8771 case X86::BI__builtin_ia32_cvtmask2w128: 8772 case X86::BI__builtin_ia32_cvtmask2w256: 8773 case X86::BI__builtin_ia32_cvtmask2w512: 8774 case X86::BI__builtin_ia32_cvtmask2d128: 8775 case X86::BI__builtin_ia32_cvtmask2d256: 8776 case X86::BI__builtin_ia32_cvtmask2d512: 8777 case X86::BI__builtin_ia32_cvtmask2q128: 8778 case X86::BI__builtin_ia32_cvtmask2q256: 8779 case X86::BI__builtin_ia32_cvtmask2q512: 8780 return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType())); 8781 8782 case X86::BI__builtin_ia32_cvtb2mask128: 8783 case X86::BI__builtin_ia32_cvtb2mask256: 8784 case X86::BI__builtin_ia32_cvtb2mask512: 8785 case X86::BI__builtin_ia32_cvtw2mask128: 8786 case X86::BI__builtin_ia32_cvtw2mask256: 8787 case X86::BI__builtin_ia32_cvtw2mask512: 8788 case X86::BI__builtin_ia32_cvtd2mask128: 8789 case X86::BI__builtin_ia32_cvtd2mask256: 8790 case X86::BI__builtin_ia32_cvtd2mask512: 8791 case X86::BI__builtin_ia32_cvtq2mask128: 8792 case X86::BI__builtin_ia32_cvtq2mask256: 8793 case X86::BI__builtin_ia32_cvtq2mask512: 8794 return EmitX86ConvertToMask(*this, Ops[0]); 8795 8796 case X86::BI__builtin_ia32_movdqa32store128_mask: 8797 case X86::BI__builtin_ia32_movdqa64store128_mask: 8798 case X86::BI__builtin_ia32_storeaps128_mask: 8799 case X86::BI__builtin_ia32_storeapd128_mask: 8800 case X86::BI__builtin_ia32_movdqa32store256_mask: 8801 case X86::BI__builtin_ia32_movdqa64store256_mask: 8802 case X86::BI__builtin_ia32_storeaps256_mask: 8803 case X86::BI__builtin_ia32_storeapd256_mask: 8804 case X86::BI__builtin_ia32_movdqa32store512_mask: 8805 case X86::BI__builtin_ia32_movdqa64store512_mask: 8806 case X86::BI__builtin_ia32_storeaps512_mask: 8807 case X86::BI__builtin_ia32_storeapd512_mask: { 8808 unsigned Align = 8809 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity(); 8810 return EmitX86MaskedStore(*this, Ops, Align); 8811 } 8812 case X86::BI__builtin_ia32_loadups128_mask: 8813 case X86::BI__builtin_ia32_loadups256_mask: 8814 case X86::BI__builtin_ia32_loadups512_mask: 8815 case X86::BI__builtin_ia32_loadupd128_mask: 8816 case X86::BI__builtin_ia32_loadupd256_mask: 8817 case X86::BI__builtin_ia32_loadupd512_mask: 8818 case X86::BI__builtin_ia32_loaddquqi128_mask: 8819 case X86::BI__builtin_ia32_loaddquqi256_mask: 8820 case X86::BI__builtin_ia32_loaddquqi512_mask: 8821 case X86::BI__builtin_ia32_loaddquhi128_mask: 8822 case X86::BI__builtin_ia32_loaddquhi256_mask: 8823 case X86::BI__builtin_ia32_loaddquhi512_mask: 8824 case X86::BI__builtin_ia32_loaddqusi128_mask: 8825 case X86::BI__builtin_ia32_loaddqusi256_mask: 8826 case X86::BI__builtin_ia32_loaddqusi512_mask: 8827 case X86::BI__builtin_ia32_loaddqudi128_mask: 8828 case X86::BI__builtin_ia32_loaddqudi256_mask: 8829 case X86::BI__builtin_ia32_loaddqudi512_mask: 8830 return EmitX86MaskedLoad(*this, Ops, 1); 8831 8832 case X86::BI__builtin_ia32_loadss128_mask: 8833 case X86::BI__builtin_ia32_loadsd128_mask: 8834 return EmitX86MaskedLoad(*this, Ops, 16); 8835 8836 case X86::BI__builtin_ia32_loadaps128_mask: 8837 case X86::BI__builtin_ia32_loadaps256_mask: 8838 case X86::BI__builtin_ia32_loadaps512_mask: 8839 case X86::BI__builtin_ia32_loadapd128_mask: 8840 case X86::BI__builtin_ia32_loadapd256_mask: 8841 case X86::BI__builtin_ia32_loadapd512_mask: 8842 case X86::BI__builtin_ia32_movdqa32load128_mask: 8843 case X86::BI__builtin_ia32_movdqa32load256_mask: 8844 case X86::BI__builtin_ia32_movdqa32load512_mask: 8845 case X86::BI__builtin_ia32_movdqa64load128_mask: 8846 case X86::BI__builtin_ia32_movdqa64load256_mask: 8847 case X86::BI__builtin_ia32_movdqa64load512_mask: { 8848 unsigned Align = 8849 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity(); 8850 return EmitX86MaskedLoad(*this, Ops, Align); 8851 } 8852 8853 case X86::BI__builtin_ia32_vbroadcastf128_pd256: 8854 case X86::BI__builtin_ia32_vbroadcastf128_ps256: { 8855 llvm::Type *DstTy = ConvertType(E->getType()); 8856 return EmitX86SubVectorBroadcast(*this, Ops, DstTy, 128, 1); 8857 } 8858 8859 case X86::BI__builtin_ia32_storehps: 8860 case X86::BI__builtin_ia32_storelps: { 8861 llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty); 8862 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2); 8863 8864 // cast val v2i64 8865 Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast"); 8866 8867 // extract (0, 1) 8868 unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1; 8869 llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index); 8870 Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract"); 8871 8872 // cast pointer to i64 & store 8873 Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy); 8874 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 8875 } 8876 case X86::BI__builtin_ia32_palignr128: 8877 case X86::BI__builtin_ia32_palignr256: 8878 case X86::BI__builtin_ia32_palignr512_mask: { 8879 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 8880 8881 unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); 8882 assert(NumElts % 16 == 0); 8883 8884 // If palignr is shifting the pair of vectors more than the size of two 8885 // lanes, emit zero. 8886 if (ShiftVal >= 32) 8887 return llvm::Constant::getNullValue(ConvertType(E->getType())); 8888 8889 // If palignr is shifting the pair of input vectors more than one lane, 8890 // but less than two lanes, convert to shifting in zeroes. 8891 if (ShiftVal > 16) { 8892 ShiftVal -= 16; 8893 Ops[1] = Ops[0]; 8894 Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType()); 8895 } 8896 8897 uint32_t Indices[64]; 8898 // 256-bit palignr operates on 128-bit lanes so we need to handle that 8899 for (unsigned l = 0; l != NumElts; l += 16) { 8900 for (unsigned i = 0; i != 16; ++i) { 8901 unsigned Idx = ShiftVal + i; 8902 if (Idx >= 16) 8903 Idx += NumElts - 16; // End of lane, switch operand. 8904 Indices[l + i] = Idx + l; 8905 } 8906 } 8907 8908 Value *Align = Builder.CreateShuffleVector(Ops[1], Ops[0], 8909 makeArrayRef(Indices, NumElts), 8910 "palignr"); 8911 8912 // If this isn't a masked builtin, just return the align operation. 8913 if (Ops.size() == 3) 8914 return Align; 8915 8916 return EmitX86Select(*this, Ops[4], Align, Ops[3]); 8917 } 8918 8919 case X86::BI__builtin_ia32_vperm2f128_pd256: 8920 case X86::BI__builtin_ia32_vperm2f128_ps256: 8921 case X86::BI__builtin_ia32_vperm2f128_si256: 8922 case X86::BI__builtin_ia32_permti256: { 8923 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 8924 unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); 8925 8926 // This takes a very simple approach since there are two lanes and a 8927 // shuffle can have 2 inputs. So we reserve the first input for the first 8928 // lane and the second input for the second lane. This may result in 8929 // duplicate sources, but this can be dealt with in the backend. 8930 8931 Value *OutOps[2]; 8932 uint32_t Indices[8]; 8933 for (unsigned l = 0; l != 2; ++l) { 8934 // Determine the source for this lane. 8935 if (Imm & (1 << ((l * 4) + 3))) 8936 OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType()); 8937 else if (Imm & (1 << ((l * 4) + 1))) 8938 OutOps[l] = Ops[1]; 8939 else 8940 OutOps[l] = Ops[0]; 8941 8942 for (unsigned i = 0; i != NumElts/2; ++i) { 8943 // Start with ith element of the source for this lane. 8944 unsigned Idx = (l * NumElts) + i; 8945 // If bit 0 of the immediate half is set, switch to the high half of 8946 // the source. 8947 if (Imm & (1 << (l * 4))) 8948 Idx += NumElts/2; 8949 Indices[(l * (NumElts/2)) + i] = Idx; 8950 } 8951 } 8952 8953 return Builder.CreateShuffleVector(OutOps[0], OutOps[1], 8954 makeArrayRef(Indices, NumElts), 8955 "vperm"); 8956 } 8957 8958 case X86::BI__builtin_ia32_movnti: 8959 case X86::BI__builtin_ia32_movnti64: 8960 case X86::BI__builtin_ia32_movntsd: 8961 case X86::BI__builtin_ia32_movntss: { 8962 llvm::MDNode *Node = llvm::MDNode::get( 8963 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1))); 8964 8965 Value *Ptr = Ops[0]; 8966 Value *Src = Ops[1]; 8967 8968 // Extract the 0'th element of the source vector. 8969 if (BuiltinID == X86::BI__builtin_ia32_movntsd || 8970 BuiltinID == X86::BI__builtin_ia32_movntss) 8971 Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract"); 8972 8973 // Convert the type of the pointer to a pointer to the stored type. 8974 Value *BC = Builder.CreateBitCast( 8975 Ptr, llvm::PointerType::getUnqual(Src->getType()), "cast"); 8976 8977 // Unaligned nontemporal store of the scalar value. 8978 StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, BC); 8979 SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); 8980 SI->setAlignment(1); 8981 return SI; 8982 } 8983 8984 case X86::BI__builtin_ia32_selectb_128: 8985 case X86::BI__builtin_ia32_selectb_256: 8986 case X86::BI__builtin_ia32_selectb_512: 8987 case X86::BI__builtin_ia32_selectw_128: 8988 case X86::BI__builtin_ia32_selectw_256: 8989 case X86::BI__builtin_ia32_selectw_512: 8990 case X86::BI__builtin_ia32_selectd_128: 8991 case X86::BI__builtin_ia32_selectd_256: 8992 case X86::BI__builtin_ia32_selectd_512: 8993 case X86::BI__builtin_ia32_selectq_128: 8994 case X86::BI__builtin_ia32_selectq_256: 8995 case X86::BI__builtin_ia32_selectq_512: 8996 case X86::BI__builtin_ia32_selectps_128: 8997 case X86::BI__builtin_ia32_selectps_256: 8998 case X86::BI__builtin_ia32_selectps_512: 8999 case X86::BI__builtin_ia32_selectpd_128: 9000 case X86::BI__builtin_ia32_selectpd_256: 9001 case X86::BI__builtin_ia32_selectpd_512: 9002 return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]); 9003 case X86::BI__builtin_ia32_cmpb128_mask: 9004 case X86::BI__builtin_ia32_cmpb256_mask: 9005 case X86::BI__builtin_ia32_cmpb512_mask: 9006 case X86::BI__builtin_ia32_cmpw128_mask: 9007 case X86::BI__builtin_ia32_cmpw256_mask: 9008 case X86::BI__builtin_ia32_cmpw512_mask: 9009 case X86::BI__builtin_ia32_cmpd128_mask: 9010 case X86::BI__builtin_ia32_cmpd256_mask: 9011 case X86::BI__builtin_ia32_cmpd512_mask: 9012 case X86::BI__builtin_ia32_cmpq128_mask: 9013 case X86::BI__builtin_ia32_cmpq256_mask: 9014 case X86::BI__builtin_ia32_cmpq512_mask: { 9015 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7; 9016 return EmitX86MaskedCompare(*this, CC, true, Ops); 9017 } 9018 case X86::BI__builtin_ia32_ucmpb128_mask: 9019 case X86::BI__builtin_ia32_ucmpb256_mask: 9020 case X86::BI__builtin_ia32_ucmpb512_mask: 9021 case X86::BI__builtin_ia32_ucmpw128_mask: 9022 case X86::BI__builtin_ia32_ucmpw256_mask: 9023 case X86::BI__builtin_ia32_ucmpw512_mask: 9024 case X86::BI__builtin_ia32_ucmpd128_mask: 9025 case X86::BI__builtin_ia32_ucmpd256_mask: 9026 case X86::BI__builtin_ia32_ucmpd512_mask: 9027 case X86::BI__builtin_ia32_ucmpq128_mask: 9028 case X86::BI__builtin_ia32_ucmpq256_mask: 9029 case X86::BI__builtin_ia32_ucmpq512_mask: { 9030 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7; 9031 return EmitX86MaskedCompare(*this, CC, false, Ops); 9032 } 9033 9034 case X86::BI__builtin_ia32_kortestchi: 9035 case X86::BI__builtin_ia32_kortestzhi: { 9036 Value *Or = EmitX86MaskLogic(*this, Instruction::Or, 16, Ops); 9037 Value *C; 9038 if (BuiltinID == X86::BI__builtin_ia32_kortestchi) 9039 C = llvm::Constant::getAllOnesValue(Builder.getInt16Ty()); 9040 else 9041 C = llvm::Constant::getNullValue(Builder.getInt16Ty()); 9042 Value *Cmp = Builder.CreateICmpEQ(Or, C); 9043 return Builder.CreateZExt(Cmp, ConvertType(E->getType())); 9044 } 9045 9046 case X86::BI__builtin_ia32_kandhi: 9047 return EmitX86MaskLogic(*this, Instruction::And, 16, Ops); 9048 case X86::BI__builtin_ia32_kandnhi: 9049 return EmitX86MaskLogic(*this, Instruction::And, 16, Ops, true); 9050 case X86::BI__builtin_ia32_korhi: 9051 return EmitX86MaskLogic(*this, Instruction::Or, 16, Ops); 9052 case X86::BI__builtin_ia32_kxnorhi: 9053 return EmitX86MaskLogic(*this, Instruction::Xor, 16, Ops, true); 9054 case X86::BI__builtin_ia32_kxorhi: 9055 return EmitX86MaskLogic(*this, Instruction::Xor, 16, Ops); 9056 case X86::BI__builtin_ia32_knothi: { 9057 Ops[0] = getMaskVecValue(*this, Ops[0], 16); 9058 return Builder.CreateBitCast(Builder.CreateNot(Ops[0]), 9059 Builder.getInt16Ty()); 9060 } 9061 9062 case X86::BI__builtin_ia32_kunpckdi: 9063 case X86::BI__builtin_ia32_kunpcksi: 9064 case X86::BI__builtin_ia32_kunpckhi: { 9065 unsigned NumElts = Ops[0]->getType()->getScalarSizeInBits(); 9066 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts); 9067 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts); 9068 uint32_t Indices[64]; 9069 for (unsigned i = 0; i != NumElts; ++i) 9070 Indices[i] = i; 9071 9072 // First extract half of each vector. This gives better codegen than 9073 // doing it in a single shuffle. 9074 LHS = Builder.CreateShuffleVector(LHS, LHS, 9075 makeArrayRef(Indices, NumElts / 2)); 9076 RHS = Builder.CreateShuffleVector(RHS, RHS, 9077 makeArrayRef(Indices, NumElts / 2)); 9078 // Concat the vectors. 9079 // NOTE: Operands are swapped to match the intrinsic definition. 9080 Value *Res = Builder.CreateShuffleVector(RHS, LHS, 9081 makeArrayRef(Indices, NumElts)); 9082 return Builder.CreateBitCast(Res, Ops[0]->getType()); 9083 } 9084 9085 case X86::BI__builtin_ia32_vplzcntd_128_mask: 9086 case X86::BI__builtin_ia32_vplzcntd_256_mask: 9087 case X86::BI__builtin_ia32_vplzcntd_512_mask: 9088 case X86::BI__builtin_ia32_vplzcntq_128_mask: 9089 case X86::BI__builtin_ia32_vplzcntq_256_mask: 9090 case X86::BI__builtin_ia32_vplzcntq_512_mask: { 9091 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType()); 9092 return EmitX86Select(*this, Ops[2], 9093 Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)}), 9094 Ops[1]); 9095 } 9096 9097 case X86::BI__builtin_ia32_pabsb128: 9098 case X86::BI__builtin_ia32_pabsw128: 9099 case X86::BI__builtin_ia32_pabsd128: 9100 case X86::BI__builtin_ia32_pabsb256: 9101 case X86::BI__builtin_ia32_pabsw256: 9102 case X86::BI__builtin_ia32_pabsd256: 9103 case X86::BI__builtin_ia32_pabsq128_mask: 9104 case X86::BI__builtin_ia32_pabsq256_mask: 9105 case X86::BI__builtin_ia32_pabsb512_mask: 9106 case X86::BI__builtin_ia32_pabsw512_mask: 9107 case X86::BI__builtin_ia32_pabsd512_mask: 9108 case X86::BI__builtin_ia32_pabsq512_mask: 9109 return EmitX86Abs(*this, Ops); 9110 9111 case X86::BI__builtin_ia32_pmaxsb128: 9112 case X86::BI__builtin_ia32_pmaxsw128: 9113 case X86::BI__builtin_ia32_pmaxsd128: 9114 case X86::BI__builtin_ia32_pmaxsq128_mask: 9115 case X86::BI__builtin_ia32_pmaxsb256: 9116 case X86::BI__builtin_ia32_pmaxsw256: 9117 case X86::BI__builtin_ia32_pmaxsd256: 9118 case X86::BI__builtin_ia32_pmaxsq256_mask: 9119 case X86::BI__builtin_ia32_pmaxsb512_mask: 9120 case X86::BI__builtin_ia32_pmaxsw512_mask: 9121 case X86::BI__builtin_ia32_pmaxsd512_mask: 9122 case X86::BI__builtin_ia32_pmaxsq512_mask: 9123 return EmitX86MinMax(*this, ICmpInst::ICMP_SGT, Ops); 9124 case X86::BI__builtin_ia32_pmaxub128: 9125 case X86::BI__builtin_ia32_pmaxuw128: 9126 case X86::BI__builtin_ia32_pmaxud128: 9127 case X86::BI__builtin_ia32_pmaxuq128_mask: 9128 case X86::BI__builtin_ia32_pmaxub256: 9129 case X86::BI__builtin_ia32_pmaxuw256: 9130 case X86::BI__builtin_ia32_pmaxud256: 9131 case X86::BI__builtin_ia32_pmaxuq256_mask: 9132 case X86::BI__builtin_ia32_pmaxub512_mask: 9133 case X86::BI__builtin_ia32_pmaxuw512_mask: 9134 case X86::BI__builtin_ia32_pmaxud512_mask: 9135 case X86::BI__builtin_ia32_pmaxuq512_mask: 9136 return EmitX86MinMax(*this, ICmpInst::ICMP_UGT, Ops); 9137 case X86::BI__builtin_ia32_pminsb128: 9138 case X86::BI__builtin_ia32_pminsw128: 9139 case X86::BI__builtin_ia32_pminsd128: 9140 case X86::BI__builtin_ia32_pminsq128_mask: 9141 case X86::BI__builtin_ia32_pminsb256: 9142 case X86::BI__builtin_ia32_pminsw256: 9143 case X86::BI__builtin_ia32_pminsd256: 9144 case X86::BI__builtin_ia32_pminsq256_mask: 9145 case X86::BI__builtin_ia32_pminsb512_mask: 9146 case X86::BI__builtin_ia32_pminsw512_mask: 9147 case X86::BI__builtin_ia32_pminsd512_mask: 9148 case X86::BI__builtin_ia32_pminsq512_mask: 9149 return EmitX86MinMax(*this, ICmpInst::ICMP_SLT, Ops); 9150 case X86::BI__builtin_ia32_pminub128: 9151 case X86::BI__builtin_ia32_pminuw128: 9152 case X86::BI__builtin_ia32_pminud128: 9153 case X86::BI__builtin_ia32_pminuq128_mask: 9154 case X86::BI__builtin_ia32_pminub256: 9155 case X86::BI__builtin_ia32_pminuw256: 9156 case X86::BI__builtin_ia32_pminud256: 9157 case X86::BI__builtin_ia32_pminuq256_mask: 9158 case X86::BI__builtin_ia32_pminub512_mask: 9159 case X86::BI__builtin_ia32_pminuw512_mask: 9160 case X86::BI__builtin_ia32_pminud512_mask: 9161 case X86::BI__builtin_ia32_pminuq512_mask: 9162 return EmitX86MinMax(*this, ICmpInst::ICMP_ULT, Ops); 9163 9164 case X86::BI__builtin_ia32_pmuludq128: 9165 case X86::BI__builtin_ia32_pmuludq256: 9166 case X86::BI__builtin_ia32_pmuludq512: 9167 return EmitX86Muldq(*this, /*IsSigned*/false, Ops); 9168 9169 case X86::BI__builtin_ia32_pmuldq128: 9170 case X86::BI__builtin_ia32_pmuldq256: 9171 case X86::BI__builtin_ia32_pmuldq512: 9172 return EmitX86Muldq(*this, /*IsSigned*/true, Ops); 9173 9174 // 3DNow! 9175 case X86::BI__builtin_ia32_pswapdsf: 9176 case X86::BI__builtin_ia32_pswapdsi: { 9177 llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext()); 9178 Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast"); 9179 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd); 9180 return Builder.CreateCall(F, Ops, "pswapd"); 9181 } 9182 case X86::BI__builtin_ia32_rdrand16_step: 9183 case X86::BI__builtin_ia32_rdrand32_step: 9184 case X86::BI__builtin_ia32_rdrand64_step: 9185 case X86::BI__builtin_ia32_rdseed16_step: 9186 case X86::BI__builtin_ia32_rdseed32_step: 9187 case X86::BI__builtin_ia32_rdseed64_step: { 9188 Intrinsic::ID ID; 9189 switch (BuiltinID) { 9190 default: llvm_unreachable("Unsupported intrinsic!"); 9191 case X86::BI__builtin_ia32_rdrand16_step: 9192 ID = Intrinsic::x86_rdrand_16; 9193 break; 9194 case X86::BI__builtin_ia32_rdrand32_step: 9195 ID = Intrinsic::x86_rdrand_32; 9196 break; 9197 case X86::BI__builtin_ia32_rdrand64_step: 9198 ID = Intrinsic::x86_rdrand_64; 9199 break; 9200 case X86::BI__builtin_ia32_rdseed16_step: 9201 ID = Intrinsic::x86_rdseed_16; 9202 break; 9203 case X86::BI__builtin_ia32_rdseed32_step: 9204 ID = Intrinsic::x86_rdseed_32; 9205 break; 9206 case X86::BI__builtin_ia32_rdseed64_step: 9207 ID = Intrinsic::x86_rdseed_64; 9208 break; 9209 } 9210 9211 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID)); 9212 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0), 9213 Ops[0]); 9214 return Builder.CreateExtractValue(Call, 1); 9215 } 9216 9217 case X86::BI__builtin_ia32_cmpps128_mask: 9218 case X86::BI__builtin_ia32_cmpps256_mask: 9219 case X86::BI__builtin_ia32_cmpps512_mask: 9220 case X86::BI__builtin_ia32_cmppd128_mask: 9221 case X86::BI__builtin_ia32_cmppd256_mask: 9222 case X86::BI__builtin_ia32_cmppd512_mask: { 9223 unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); 9224 Value *MaskIn = Ops[3]; 9225 Ops.erase(&Ops[3]); 9226 9227 Intrinsic::ID ID; 9228 switch (BuiltinID) { 9229 default: llvm_unreachable("Unsupported intrinsic!"); 9230 case X86::BI__builtin_ia32_cmpps128_mask: 9231 ID = Intrinsic::x86_avx512_mask_cmp_ps_128; 9232 break; 9233 case X86::BI__builtin_ia32_cmpps256_mask: 9234 ID = Intrinsic::x86_avx512_mask_cmp_ps_256; 9235 break; 9236 case X86::BI__builtin_ia32_cmpps512_mask: 9237 ID = Intrinsic::x86_avx512_mask_cmp_ps_512; 9238 break; 9239 case X86::BI__builtin_ia32_cmppd128_mask: 9240 ID = Intrinsic::x86_avx512_mask_cmp_pd_128; 9241 break; 9242 case X86::BI__builtin_ia32_cmppd256_mask: 9243 ID = Intrinsic::x86_avx512_mask_cmp_pd_256; 9244 break; 9245 case X86::BI__builtin_ia32_cmppd512_mask: 9246 ID = Intrinsic::x86_avx512_mask_cmp_pd_512; 9247 break; 9248 } 9249 9250 Value *Cmp = Builder.CreateCall(CGM.getIntrinsic(ID), Ops); 9251 return EmitX86MaskedCompareResult(*this, Cmp, NumElts, MaskIn); 9252 } 9253 9254 // SSE packed comparison intrinsics 9255 case X86::BI__builtin_ia32_cmpeqps: 9256 case X86::BI__builtin_ia32_cmpeqpd: 9257 return getVectorFCmpIR(CmpInst::FCMP_OEQ); 9258 case X86::BI__builtin_ia32_cmpltps: 9259 case X86::BI__builtin_ia32_cmpltpd: 9260 return getVectorFCmpIR(CmpInst::FCMP_OLT); 9261 case X86::BI__builtin_ia32_cmpleps: 9262 case X86::BI__builtin_ia32_cmplepd: 9263 return getVectorFCmpIR(CmpInst::FCMP_OLE); 9264 case X86::BI__builtin_ia32_cmpunordps: 9265 case X86::BI__builtin_ia32_cmpunordpd: 9266 return getVectorFCmpIR(CmpInst::FCMP_UNO); 9267 case X86::BI__builtin_ia32_cmpneqps: 9268 case X86::BI__builtin_ia32_cmpneqpd: 9269 return getVectorFCmpIR(CmpInst::FCMP_UNE); 9270 case X86::BI__builtin_ia32_cmpnltps: 9271 case X86::BI__builtin_ia32_cmpnltpd: 9272 return getVectorFCmpIR(CmpInst::FCMP_UGE); 9273 case X86::BI__builtin_ia32_cmpnleps: 9274 case X86::BI__builtin_ia32_cmpnlepd: 9275 return getVectorFCmpIR(CmpInst::FCMP_UGT); 9276 case X86::BI__builtin_ia32_cmpordps: 9277 case X86::BI__builtin_ia32_cmpordpd: 9278 return getVectorFCmpIR(CmpInst::FCMP_ORD); 9279 case X86::BI__builtin_ia32_cmpps: 9280 case X86::BI__builtin_ia32_cmpps256: 9281 case X86::BI__builtin_ia32_cmppd: 9282 case X86::BI__builtin_ia32_cmppd256: { 9283 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 9284 // If this one of the SSE immediates, we can use native IR. 9285 if (CC < 8) { 9286 FCmpInst::Predicate Pred; 9287 switch (CC) { 9288 case 0: Pred = FCmpInst::FCMP_OEQ; break; 9289 case 1: Pred = FCmpInst::FCMP_OLT; break; 9290 case 2: Pred = FCmpInst::FCMP_OLE; break; 9291 case 3: Pred = FCmpInst::FCMP_UNO; break; 9292 case 4: Pred = FCmpInst::FCMP_UNE; break; 9293 case 5: Pred = FCmpInst::FCMP_UGE; break; 9294 case 6: Pred = FCmpInst::FCMP_UGT; break; 9295 case 7: Pred = FCmpInst::FCMP_ORD; break; 9296 } 9297 return getVectorFCmpIR(Pred); 9298 } 9299 9300 // We can't handle 8-31 immediates with native IR, use the intrinsic. 9301 // Except for predicates that create constants. 9302 Intrinsic::ID ID; 9303 switch (BuiltinID) { 9304 default: llvm_unreachable("Unsupported intrinsic!"); 9305 case X86::BI__builtin_ia32_cmpps: 9306 ID = Intrinsic::x86_sse_cmp_ps; 9307 break; 9308 case X86::BI__builtin_ia32_cmpps256: 9309 // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector 9310 // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0... 9311 if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) { 9312 Value *Constant = (CC == 0xf || CC == 0x1f) ? 9313 llvm::Constant::getAllOnesValue(Builder.getInt32Ty()) : 9314 llvm::Constant::getNullValue(Builder.getInt32Ty()); 9315 Value *Vec = Builder.CreateVectorSplat( 9316 Ops[0]->getType()->getVectorNumElements(), Constant); 9317 return Builder.CreateBitCast(Vec, Ops[0]->getType()); 9318 } 9319 ID = Intrinsic::x86_avx_cmp_ps_256; 9320 break; 9321 case X86::BI__builtin_ia32_cmppd: 9322 ID = Intrinsic::x86_sse2_cmp_pd; 9323 break; 9324 case X86::BI__builtin_ia32_cmppd256: 9325 // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector 9326 // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0... 9327 if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) { 9328 Value *Constant = (CC == 0xf || CC == 0x1f) ? 9329 llvm::Constant::getAllOnesValue(Builder.getInt64Ty()) : 9330 llvm::Constant::getNullValue(Builder.getInt64Ty()); 9331 Value *Vec = Builder.CreateVectorSplat( 9332 Ops[0]->getType()->getVectorNumElements(), Constant); 9333 return Builder.CreateBitCast(Vec, Ops[0]->getType()); 9334 } 9335 ID = Intrinsic::x86_avx_cmp_pd_256; 9336 break; 9337 } 9338 9339 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); 9340 } 9341 9342 // SSE scalar comparison intrinsics 9343 case X86::BI__builtin_ia32_cmpeqss: 9344 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0); 9345 case X86::BI__builtin_ia32_cmpltss: 9346 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1); 9347 case X86::BI__builtin_ia32_cmpless: 9348 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2); 9349 case X86::BI__builtin_ia32_cmpunordss: 9350 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3); 9351 case X86::BI__builtin_ia32_cmpneqss: 9352 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4); 9353 case X86::BI__builtin_ia32_cmpnltss: 9354 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5); 9355 case X86::BI__builtin_ia32_cmpnless: 9356 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6); 9357 case X86::BI__builtin_ia32_cmpordss: 9358 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7); 9359 case X86::BI__builtin_ia32_cmpeqsd: 9360 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0); 9361 case X86::BI__builtin_ia32_cmpltsd: 9362 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1); 9363 case X86::BI__builtin_ia32_cmplesd: 9364 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2); 9365 case X86::BI__builtin_ia32_cmpunordsd: 9366 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3); 9367 case X86::BI__builtin_ia32_cmpneqsd: 9368 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4); 9369 case X86::BI__builtin_ia32_cmpnltsd: 9370 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5); 9371 case X86::BI__builtin_ia32_cmpnlesd: 9372 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6); 9373 case X86::BI__builtin_ia32_cmpordsd: 9374 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7); 9375 9376 case X86::BI__emul: 9377 case X86::BI__emulu: { 9378 llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64); 9379 bool isSigned = (BuiltinID == X86::BI__emul); 9380 Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned); 9381 Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned); 9382 return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned); 9383 } 9384 case X86::BI__mulh: 9385 case X86::BI__umulh: 9386 case X86::BI_mul128: 9387 case X86::BI_umul128: { 9388 llvm::Type *ResType = ConvertType(E->getType()); 9389 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); 9390 9391 bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128); 9392 Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned); 9393 Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned); 9394 9395 Value *MulResult, *HigherBits; 9396 if (IsSigned) { 9397 MulResult = Builder.CreateNSWMul(LHS, RHS); 9398 HigherBits = Builder.CreateAShr(MulResult, 64); 9399 } else { 9400 MulResult = Builder.CreateNUWMul(LHS, RHS); 9401 HigherBits = Builder.CreateLShr(MulResult, 64); 9402 } 9403 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned); 9404 9405 if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh) 9406 return HigherBits; 9407 9408 Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2)); 9409 Builder.CreateStore(HigherBits, HighBitsAddress); 9410 return Builder.CreateIntCast(MulResult, ResType, IsSigned); 9411 } 9412 9413 case X86::BI__faststorefence: { 9414 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, 9415 llvm::SyncScope::System); 9416 } 9417 case X86::BI_ReadWriteBarrier: 9418 case X86::BI_ReadBarrier: 9419 case X86::BI_WriteBarrier: { 9420 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, 9421 llvm::SyncScope::SingleThread); 9422 } 9423 case X86::BI_BitScanForward: 9424 case X86::BI_BitScanForward64: 9425 return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E); 9426 case X86::BI_BitScanReverse: 9427 case X86::BI_BitScanReverse64: 9428 return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E); 9429 9430 case X86::BI_InterlockedAnd64: 9431 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E); 9432 case X86::BI_InterlockedExchange64: 9433 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E); 9434 case X86::BI_InterlockedExchangeAdd64: 9435 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E); 9436 case X86::BI_InterlockedExchangeSub64: 9437 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E); 9438 case X86::BI_InterlockedOr64: 9439 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E); 9440 case X86::BI_InterlockedXor64: 9441 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E); 9442 case X86::BI_InterlockedDecrement64: 9443 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E); 9444 case X86::BI_InterlockedIncrement64: 9445 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E); 9446 case X86::BI_InterlockedCompareExchange128: { 9447 // InterlockedCompareExchange128 doesn't directly refer to 128bit ints, 9448 // instead it takes pointers to 64bit ints for Destination and 9449 // ComparandResult, and exchange is taken as two 64bit ints (high & low). 9450 // The previous value is written to ComparandResult, and success is 9451 // returned. 9452 9453 llvm::Type *Int128Ty = Builder.getInt128Ty(); 9454 llvm::Type *Int128PtrTy = Int128Ty->getPointerTo(); 9455 9456 Value *Destination = 9457 Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PtrTy); 9458 Value *ExchangeHigh128 = 9459 Builder.CreateZExt(EmitScalarExpr(E->getArg(1)), Int128Ty); 9460 Value *ExchangeLow128 = 9461 Builder.CreateZExt(EmitScalarExpr(E->getArg(2)), Int128Ty); 9462 Address ComparandResult( 9463 Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int128PtrTy), 9464 getContext().toCharUnitsFromBits(128)); 9465 9466 Value *Exchange = Builder.CreateOr( 9467 Builder.CreateShl(ExchangeHigh128, 64, "", false, false), 9468 ExchangeLow128); 9469 9470 Value *Comparand = Builder.CreateLoad(ComparandResult); 9471 9472 AtomicCmpXchgInst *CXI = 9473 Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange, 9474 AtomicOrdering::SequentiallyConsistent, 9475 AtomicOrdering::SequentiallyConsistent); 9476 CXI->setVolatile(true); 9477 9478 // Write the result back to the inout pointer. 9479 Builder.CreateStore(Builder.CreateExtractValue(CXI, 0), ComparandResult); 9480 9481 // Get the success boolean and zero extend it to i8. 9482 Value *Success = Builder.CreateExtractValue(CXI, 1); 9483 return Builder.CreateZExt(Success, ConvertType(E->getType())); 9484 } 9485 9486 case X86::BI_AddressOfReturnAddress: { 9487 Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress); 9488 return Builder.CreateCall(F); 9489 } 9490 case X86::BI__stosb: { 9491 // We treat __stosb as a volatile memset - it may not generate "rep stosb" 9492 // instruction, but it will create a memset that won't be optimized away. 9493 return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], 1, true); 9494 } 9495 case X86::BI__ud2: 9496 // llvm.trap makes a ud2a instruction on x86. 9497 return EmitTrapCall(Intrinsic::trap); 9498 case X86::BI__int2c: { 9499 // This syscall signals a driver assertion failure in x86 NT kernels. 9500 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false); 9501 llvm::InlineAsm *IA = 9502 llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*SideEffects=*/true); 9503 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get( 9504 getLLVMContext(), llvm::AttributeList::FunctionIndex, 9505 llvm::Attribute::NoReturn); 9506 CallSite CS = Builder.CreateCall(IA); 9507 CS.setAttributes(NoReturnAttr); 9508 return CS.getInstruction(); 9509 } 9510 case X86::BI__readfsbyte: 9511 case X86::BI__readfsword: 9512 case X86::BI__readfsdword: 9513 case X86::BI__readfsqword: { 9514 llvm::Type *IntTy = ConvertType(E->getType()); 9515 Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)), 9516 llvm::PointerType::get(IntTy, 257)); 9517 LoadInst *Load = Builder.CreateAlignedLoad( 9518 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType())); 9519 Load->setVolatile(true); 9520 return Load; 9521 } 9522 case X86::BI__readgsbyte: 9523 case X86::BI__readgsword: 9524 case X86::BI__readgsdword: 9525 case X86::BI__readgsqword: { 9526 llvm::Type *IntTy = ConvertType(E->getType()); 9527 Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)), 9528 llvm::PointerType::get(IntTy, 256)); 9529 LoadInst *Load = Builder.CreateAlignedLoad( 9530 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType())); 9531 Load->setVolatile(true); 9532 return Load; 9533 } 9534 } 9535 } 9536 9537 9538 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, 9539 const CallExpr *E) { 9540 SmallVector<Value*, 4> Ops; 9541 9542 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) 9543 Ops.push_back(EmitScalarExpr(E->getArg(i))); 9544 9545 Intrinsic::ID ID = Intrinsic::not_intrinsic; 9546 9547 switch (BuiltinID) { 9548 default: return nullptr; 9549 9550 // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we 9551 // call __builtin_readcyclecounter. 9552 case PPC::BI__builtin_ppc_get_timebase: 9553 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter)); 9554 9555 // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr 9556 case PPC::BI__builtin_altivec_lvx: 9557 case PPC::BI__builtin_altivec_lvxl: 9558 case PPC::BI__builtin_altivec_lvebx: 9559 case PPC::BI__builtin_altivec_lvehx: 9560 case PPC::BI__builtin_altivec_lvewx: 9561 case PPC::BI__builtin_altivec_lvsl: 9562 case PPC::BI__builtin_altivec_lvsr: 9563 case PPC::BI__builtin_vsx_lxvd2x: 9564 case PPC::BI__builtin_vsx_lxvw4x: 9565 case PPC::BI__builtin_vsx_lxvd2x_be: 9566 case PPC::BI__builtin_vsx_lxvw4x_be: 9567 case PPC::BI__builtin_vsx_lxvl: 9568 case PPC::BI__builtin_vsx_lxvll: 9569 { 9570 if(BuiltinID == PPC::BI__builtin_vsx_lxvl || 9571 BuiltinID == PPC::BI__builtin_vsx_lxvll){ 9572 Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy); 9573 }else { 9574 Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy); 9575 Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]); 9576 Ops.pop_back(); 9577 } 9578 9579 switch (BuiltinID) { 9580 default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!"); 9581 case PPC::BI__builtin_altivec_lvx: 9582 ID = Intrinsic::ppc_altivec_lvx; 9583 break; 9584 case PPC::BI__builtin_altivec_lvxl: 9585 ID = Intrinsic::ppc_altivec_lvxl; 9586 break; 9587 case PPC::BI__builtin_altivec_lvebx: 9588 ID = Intrinsic::ppc_altivec_lvebx; 9589 break; 9590 case PPC::BI__builtin_altivec_lvehx: 9591 ID = Intrinsic::ppc_altivec_lvehx; 9592 break; 9593 case PPC::BI__builtin_altivec_lvewx: 9594 ID = Intrinsic::ppc_altivec_lvewx; 9595 break; 9596 case PPC::BI__builtin_altivec_lvsl: 9597 ID = Intrinsic::ppc_altivec_lvsl; 9598 break; 9599 case PPC::BI__builtin_altivec_lvsr: 9600 ID = Intrinsic::ppc_altivec_lvsr; 9601 break; 9602 case PPC::BI__builtin_vsx_lxvd2x: 9603 ID = Intrinsic::ppc_vsx_lxvd2x; 9604 break; 9605 case PPC::BI__builtin_vsx_lxvw4x: 9606 ID = Intrinsic::ppc_vsx_lxvw4x; 9607 break; 9608 case PPC::BI__builtin_vsx_lxvd2x_be: 9609 ID = Intrinsic::ppc_vsx_lxvd2x_be; 9610 break; 9611 case PPC::BI__builtin_vsx_lxvw4x_be: 9612 ID = Intrinsic::ppc_vsx_lxvw4x_be; 9613 break; 9614 case PPC::BI__builtin_vsx_lxvl: 9615 ID = Intrinsic::ppc_vsx_lxvl; 9616 break; 9617 case PPC::BI__builtin_vsx_lxvll: 9618 ID = Intrinsic::ppc_vsx_lxvll; 9619 break; 9620 } 9621 llvm::Function *F = CGM.getIntrinsic(ID); 9622 return Builder.CreateCall(F, Ops, ""); 9623 } 9624 9625 // vec_st, vec_xst_be 9626 case PPC::BI__builtin_altivec_stvx: 9627 case PPC::BI__builtin_altivec_stvxl: 9628 case PPC::BI__builtin_altivec_stvebx: 9629 case PPC::BI__builtin_altivec_stvehx: 9630 case PPC::BI__builtin_altivec_stvewx: 9631 case PPC::BI__builtin_vsx_stxvd2x: 9632 case PPC::BI__builtin_vsx_stxvw4x: 9633 case PPC::BI__builtin_vsx_stxvd2x_be: 9634 case PPC::BI__builtin_vsx_stxvw4x_be: 9635 case PPC::BI__builtin_vsx_stxvl: 9636 case PPC::BI__builtin_vsx_stxvll: 9637 { 9638 if(BuiltinID == PPC::BI__builtin_vsx_stxvl || 9639 BuiltinID == PPC::BI__builtin_vsx_stxvll ){ 9640 Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy); 9641 }else { 9642 Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy); 9643 Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]); 9644 Ops.pop_back(); 9645 } 9646 9647 switch (BuiltinID) { 9648 default: llvm_unreachable("Unsupported st intrinsic!"); 9649 case PPC::BI__builtin_altivec_stvx: 9650 ID = Intrinsic::ppc_altivec_stvx; 9651 break; 9652 case PPC::BI__builtin_altivec_stvxl: 9653 ID = Intrinsic::ppc_altivec_stvxl; 9654 break; 9655 case PPC::BI__builtin_altivec_stvebx: 9656 ID = Intrinsic::ppc_altivec_stvebx; 9657 break; 9658 case PPC::BI__builtin_altivec_stvehx: 9659 ID = Intrinsic::ppc_altivec_stvehx; 9660 break; 9661 case PPC::BI__builtin_altivec_stvewx: 9662 ID = Intrinsic::ppc_altivec_stvewx; 9663 break; 9664 case PPC::BI__builtin_vsx_stxvd2x: 9665 ID = Intrinsic::ppc_vsx_stxvd2x; 9666 break; 9667 case PPC::BI__builtin_vsx_stxvw4x: 9668 ID = Intrinsic::ppc_vsx_stxvw4x; 9669 break; 9670 case PPC::BI__builtin_vsx_stxvd2x_be: 9671 ID = Intrinsic::ppc_vsx_stxvd2x_be; 9672 break; 9673 case PPC::BI__builtin_vsx_stxvw4x_be: 9674 ID = Intrinsic::ppc_vsx_stxvw4x_be; 9675 break; 9676 case PPC::BI__builtin_vsx_stxvl: 9677 ID = Intrinsic::ppc_vsx_stxvl; 9678 break; 9679 case PPC::BI__builtin_vsx_stxvll: 9680 ID = Intrinsic::ppc_vsx_stxvll; 9681 break; 9682 } 9683 llvm::Function *F = CGM.getIntrinsic(ID); 9684 return Builder.CreateCall(F, Ops, ""); 9685 } 9686 // Square root 9687 case PPC::BI__builtin_vsx_xvsqrtsp: 9688 case PPC::BI__builtin_vsx_xvsqrtdp: { 9689 llvm::Type *ResultType = ConvertType(E->getType()); 9690 Value *X = EmitScalarExpr(E->getArg(0)); 9691 ID = Intrinsic::sqrt; 9692 llvm::Function *F = CGM.getIntrinsic(ID, ResultType); 9693 return Builder.CreateCall(F, X); 9694 } 9695 // Count leading zeros 9696 case PPC::BI__builtin_altivec_vclzb: 9697 case PPC::BI__builtin_altivec_vclzh: 9698 case PPC::BI__builtin_altivec_vclzw: 9699 case PPC::BI__builtin_altivec_vclzd: { 9700 llvm::Type *ResultType = ConvertType(E->getType()); 9701 Value *X = EmitScalarExpr(E->getArg(0)); 9702 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 9703 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType); 9704 return Builder.CreateCall(F, {X, Undef}); 9705 } 9706 case PPC::BI__builtin_altivec_vctzb: 9707 case PPC::BI__builtin_altivec_vctzh: 9708 case PPC::BI__builtin_altivec_vctzw: 9709 case PPC::BI__builtin_altivec_vctzd: { 9710 llvm::Type *ResultType = ConvertType(E->getType()); 9711 Value *X = EmitScalarExpr(E->getArg(0)); 9712 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 9713 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType); 9714 return Builder.CreateCall(F, {X, Undef}); 9715 } 9716 case PPC::BI__builtin_altivec_vpopcntb: 9717 case PPC::BI__builtin_altivec_vpopcnth: 9718 case PPC::BI__builtin_altivec_vpopcntw: 9719 case PPC::BI__builtin_altivec_vpopcntd: { 9720 llvm::Type *ResultType = ConvertType(E->getType()); 9721 Value *X = EmitScalarExpr(E->getArg(0)); 9722 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); 9723 return Builder.CreateCall(F, X); 9724 } 9725 // Copy sign 9726 case PPC::BI__builtin_vsx_xvcpsgnsp: 9727 case PPC::BI__builtin_vsx_xvcpsgndp: { 9728 llvm::Type *ResultType = ConvertType(E->getType()); 9729 Value *X = EmitScalarExpr(E->getArg(0)); 9730 Value *Y = EmitScalarExpr(E->getArg(1)); 9731 ID = Intrinsic::copysign; 9732 llvm::Function *F = CGM.getIntrinsic(ID, ResultType); 9733 return Builder.CreateCall(F, {X, Y}); 9734 } 9735 // Rounding/truncation 9736 case PPC::BI__builtin_vsx_xvrspip: 9737 case PPC::BI__builtin_vsx_xvrdpip: 9738 case PPC::BI__builtin_vsx_xvrdpim: 9739 case PPC::BI__builtin_vsx_xvrspim: 9740 case PPC::BI__builtin_vsx_xvrdpi: 9741 case PPC::BI__builtin_vsx_xvrspi: 9742 case PPC::BI__builtin_vsx_xvrdpic: 9743 case PPC::BI__builtin_vsx_xvrspic: 9744 case PPC::BI__builtin_vsx_xvrdpiz: 9745 case PPC::BI__builtin_vsx_xvrspiz: { 9746 llvm::Type *ResultType = ConvertType(E->getType()); 9747 Value *X = EmitScalarExpr(E->getArg(0)); 9748 if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim || 9749 BuiltinID == PPC::BI__builtin_vsx_xvrspim) 9750 ID = Intrinsic::floor; 9751 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi || 9752 BuiltinID == PPC::BI__builtin_vsx_xvrspi) 9753 ID = Intrinsic::round; 9754 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic || 9755 BuiltinID == PPC::BI__builtin_vsx_xvrspic) 9756 ID = Intrinsic::nearbyint; 9757 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip || 9758 BuiltinID == PPC::BI__builtin_vsx_xvrspip) 9759 ID = Intrinsic::ceil; 9760 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz || 9761 BuiltinID == PPC::BI__builtin_vsx_xvrspiz) 9762 ID = Intrinsic::trunc; 9763 llvm::Function *F = CGM.getIntrinsic(ID, ResultType); 9764 return Builder.CreateCall(F, X); 9765 } 9766 9767 // Absolute value 9768 case PPC::BI__builtin_vsx_xvabsdp: 9769 case PPC::BI__builtin_vsx_xvabssp: { 9770 llvm::Type *ResultType = ConvertType(E->getType()); 9771 Value *X = EmitScalarExpr(E->getArg(0)); 9772 llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); 9773 return Builder.CreateCall(F, X); 9774 } 9775 9776 // FMA variations 9777 case PPC::BI__builtin_vsx_xvmaddadp: 9778 case PPC::BI__builtin_vsx_xvmaddasp: 9779 case PPC::BI__builtin_vsx_xvnmaddadp: 9780 case PPC::BI__builtin_vsx_xvnmaddasp: 9781 case PPC::BI__builtin_vsx_xvmsubadp: 9782 case PPC::BI__builtin_vsx_xvmsubasp: 9783 case PPC::BI__builtin_vsx_xvnmsubadp: 9784 case PPC::BI__builtin_vsx_xvnmsubasp: { 9785 llvm::Type *ResultType = ConvertType(E->getType()); 9786 Value *X = EmitScalarExpr(E->getArg(0)); 9787 Value *Y = EmitScalarExpr(E->getArg(1)); 9788 Value *Z = EmitScalarExpr(E->getArg(2)); 9789 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); 9790 llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 9791 switch (BuiltinID) { 9792 case PPC::BI__builtin_vsx_xvmaddadp: 9793 case PPC::BI__builtin_vsx_xvmaddasp: 9794 return Builder.CreateCall(F, {X, Y, Z}); 9795 case PPC::BI__builtin_vsx_xvnmaddadp: 9796 case PPC::BI__builtin_vsx_xvnmaddasp: 9797 return Builder.CreateFSub(Zero, 9798 Builder.CreateCall(F, {X, Y, Z}), "sub"); 9799 case PPC::BI__builtin_vsx_xvmsubadp: 9800 case PPC::BI__builtin_vsx_xvmsubasp: 9801 return Builder.CreateCall(F, 9802 {X, Y, Builder.CreateFSub(Zero, Z, "sub")}); 9803 case PPC::BI__builtin_vsx_xvnmsubadp: 9804 case PPC::BI__builtin_vsx_xvnmsubasp: 9805 Value *FsubRes = 9806 Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")}); 9807 return Builder.CreateFSub(Zero, FsubRes, "sub"); 9808 } 9809 llvm_unreachable("Unknown FMA operation"); 9810 return nullptr; // Suppress no-return warning 9811 } 9812 9813 case PPC::BI__builtin_vsx_insertword: { 9814 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw); 9815 9816 // Third argument is a compile time constant int. It must be clamped to 9817 // to the range [0, 12]. 9818 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]); 9819 assert(ArgCI && 9820 "Third arg to xxinsertw intrinsic must be constant integer"); 9821 const int64_t MaxIndex = 12; 9822 int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex); 9823 9824 // The builtin semantics don't exactly match the xxinsertw instructions 9825 // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the 9826 // word from the first argument, and inserts it in the second argument. The 9827 // instruction extracts the word from its second input register and inserts 9828 // it into its first input register, so swap the first and second arguments. 9829 std::swap(Ops[0], Ops[1]); 9830 9831 // Need to cast the second argument from a vector of unsigned int to a 9832 // vector of long long. 9833 Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2)); 9834 9835 if (getTarget().isLittleEndian()) { 9836 // Create a shuffle mask of (1, 0) 9837 Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1), 9838 ConstantInt::get(Int32Ty, 0) 9839 }; 9840 Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); 9841 9842 // Reverse the double words in the vector we will extract from. 9843 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); 9844 Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ShuffleMask); 9845 9846 // Reverse the index. 9847 Index = MaxIndex - Index; 9848 } 9849 9850 // Intrinsic expects the first arg to be a vector of int. 9851 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4)); 9852 Ops[2] = ConstantInt::getSigned(Int32Ty, Index); 9853 return Builder.CreateCall(F, Ops); 9854 } 9855 9856 case PPC::BI__builtin_vsx_extractuword: { 9857 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw); 9858 9859 // Intrinsic expects the first argument to be a vector of doublewords. 9860 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); 9861 9862 // The second argument is a compile time constant int that needs to 9863 // be clamped to the range [0, 12]. 9864 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[1]); 9865 assert(ArgCI && 9866 "Second Arg to xxextractuw intrinsic must be a constant integer!"); 9867 const int64_t MaxIndex = 12; 9868 int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex); 9869 9870 if (getTarget().isLittleEndian()) { 9871 // Reverse the index. 9872 Index = MaxIndex - Index; 9873 Ops[1] = ConstantInt::getSigned(Int32Ty, Index); 9874 9875 // Emit the call, then reverse the double words of the results vector. 9876 Value *Call = Builder.CreateCall(F, Ops); 9877 9878 // Create a shuffle mask of (1, 0) 9879 Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1), 9880 ConstantInt::get(Int32Ty, 0) 9881 }; 9882 Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); 9883 9884 Value *ShuffleCall = Builder.CreateShuffleVector(Call, Call, ShuffleMask); 9885 return ShuffleCall; 9886 } else { 9887 Ops[1] = ConstantInt::getSigned(Int32Ty, Index); 9888 return Builder.CreateCall(F, Ops); 9889 } 9890 } 9891 9892 case PPC::BI__builtin_vsx_xxpermdi: { 9893 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]); 9894 assert(ArgCI && "Third arg must be constant integer!"); 9895 9896 unsigned Index = ArgCI->getZExtValue(); 9897 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); 9898 Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2)); 9899 9900 // Element zero comes from the first input vector and element one comes from 9901 // the second. The element indices within each vector are numbered in big 9902 // endian order so the shuffle mask must be adjusted for this on little 9903 // endian platforms (i.e. index is complemented and source vector reversed). 9904 unsigned ElemIdx0; 9905 unsigned ElemIdx1; 9906 if (getTarget().isLittleEndian()) { 9907 ElemIdx0 = (~Index & 1) + 2; 9908 ElemIdx1 = (~Index & 2) >> 1; 9909 } else { // BigEndian 9910 ElemIdx0 = (Index & 2) >> 1; 9911 ElemIdx1 = 2 + (Index & 1); 9912 } 9913 9914 Constant *ShuffleElts[2] = {ConstantInt::get(Int32Ty, ElemIdx0), 9915 ConstantInt::get(Int32Ty, ElemIdx1)}; 9916 Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); 9917 9918 Value *ShuffleCall = 9919 Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask); 9920 QualType BIRetType = E->getType(); 9921 auto RetTy = ConvertType(BIRetType); 9922 return Builder.CreateBitCast(ShuffleCall, RetTy); 9923 } 9924 9925 case PPC::BI__builtin_vsx_xxsldwi: { 9926 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]); 9927 assert(ArgCI && "Third argument must be a compile time constant"); 9928 unsigned Index = ArgCI->getZExtValue() & 0x3; 9929 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4)); 9930 Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int32Ty, 4)); 9931 9932 // Create a shuffle mask 9933 unsigned ElemIdx0; 9934 unsigned ElemIdx1; 9935 unsigned ElemIdx2; 9936 unsigned ElemIdx3; 9937 if (getTarget().isLittleEndian()) { 9938 // Little endian element N comes from element 8+N-Index of the 9939 // concatenated wide vector (of course, using modulo arithmetic on 9940 // the total number of elements). 9941 ElemIdx0 = (8 - Index) % 8; 9942 ElemIdx1 = (9 - Index) % 8; 9943 ElemIdx2 = (10 - Index) % 8; 9944 ElemIdx3 = (11 - Index) % 8; 9945 } else { 9946 // Big endian ElemIdx<N> = Index + N 9947 ElemIdx0 = Index; 9948 ElemIdx1 = Index + 1; 9949 ElemIdx2 = Index + 2; 9950 ElemIdx3 = Index + 3; 9951 } 9952 9953 Constant *ShuffleElts[4] = {ConstantInt::get(Int32Ty, ElemIdx0), 9954 ConstantInt::get(Int32Ty, ElemIdx1), 9955 ConstantInt::get(Int32Ty, ElemIdx2), 9956 ConstantInt::get(Int32Ty, ElemIdx3)}; 9957 9958 Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); 9959 Value *ShuffleCall = 9960 Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask); 9961 QualType BIRetType = E->getType(); 9962 auto RetTy = ConvertType(BIRetType); 9963 return Builder.CreateBitCast(ShuffleCall, RetTy); 9964 } 9965 } 9966 } 9967 9968 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, 9969 const CallExpr *E) { 9970 switch (BuiltinID) { 9971 case AMDGPU::BI__builtin_amdgcn_div_scale: 9972 case AMDGPU::BI__builtin_amdgcn_div_scalef: { 9973 // Translate from the intrinsics's struct return to the builtin's out 9974 // argument. 9975 9976 Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3)); 9977 9978 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 9979 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 9980 llvm::Value *Z = EmitScalarExpr(E->getArg(2)); 9981 9982 llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale, 9983 X->getType()); 9984 9985 llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z}); 9986 9987 llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0); 9988 llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1); 9989 9990 llvm::Type *RealFlagType 9991 = FlagOutPtr.getPointer()->getType()->getPointerElementType(); 9992 9993 llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType); 9994 Builder.CreateStore(FlagExt, FlagOutPtr); 9995 return Result; 9996 } 9997 case AMDGPU::BI__builtin_amdgcn_div_fmas: 9998 case AMDGPU::BI__builtin_amdgcn_div_fmasf: { 9999 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); 10000 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); 10001 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); 10002 llvm::Value *Src3 = EmitScalarExpr(E->getArg(3)); 10003 10004 llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas, 10005 Src0->getType()); 10006 llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3); 10007 return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool}); 10008 } 10009 10010 case AMDGPU::BI__builtin_amdgcn_ds_swizzle: 10011 return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle); 10012 case AMDGPU::BI__builtin_amdgcn_mov_dpp: { 10013 llvm::SmallVector<llvm::Value *, 5> Args; 10014 for (unsigned I = 0; I != 5; ++I) 10015 Args.push_back(EmitScalarExpr(E->getArg(I))); 10016 Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_mov_dpp, 10017 Args[0]->getType()); 10018 return Builder.CreateCall(F, Args); 10019 } 10020 case AMDGPU::BI__builtin_amdgcn_div_fixup: 10021 case AMDGPU::BI__builtin_amdgcn_div_fixupf: 10022 case AMDGPU::BI__builtin_amdgcn_div_fixuph: 10023 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup); 10024 case AMDGPU::BI__builtin_amdgcn_trig_preop: 10025 case AMDGPU::BI__builtin_amdgcn_trig_preopf: 10026 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop); 10027 case AMDGPU::BI__builtin_amdgcn_rcp: 10028 case AMDGPU::BI__builtin_amdgcn_rcpf: 10029 case AMDGPU::BI__builtin_amdgcn_rcph: 10030 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp); 10031 case AMDGPU::BI__builtin_amdgcn_rsq: 10032 case AMDGPU::BI__builtin_amdgcn_rsqf: 10033 case AMDGPU::BI__builtin_amdgcn_rsqh: 10034 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq); 10035 case AMDGPU::BI__builtin_amdgcn_rsq_clamp: 10036 case AMDGPU::BI__builtin_amdgcn_rsq_clampf: 10037 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp); 10038 case AMDGPU::BI__builtin_amdgcn_sinf: 10039 case AMDGPU::BI__builtin_amdgcn_sinh: 10040 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin); 10041 case AMDGPU::BI__builtin_amdgcn_cosf: 10042 case AMDGPU::BI__builtin_amdgcn_cosh: 10043 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos); 10044 case AMDGPU::BI__builtin_amdgcn_log_clampf: 10045 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp); 10046 case AMDGPU::BI__builtin_amdgcn_ldexp: 10047 case AMDGPU::BI__builtin_amdgcn_ldexpf: 10048 case AMDGPU::BI__builtin_amdgcn_ldexph: 10049 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp); 10050 case AMDGPU::BI__builtin_amdgcn_frexp_mant: 10051 case AMDGPU::BI__builtin_amdgcn_frexp_mantf: 10052 case AMDGPU::BI__builtin_amdgcn_frexp_manth: 10053 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant); 10054 case AMDGPU::BI__builtin_amdgcn_frexp_exp: 10055 case AMDGPU::BI__builtin_amdgcn_frexp_expf: { 10056 Value *Src0 = EmitScalarExpr(E->getArg(0)); 10057 Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp, 10058 { Builder.getInt32Ty(), Src0->getType() }); 10059 return Builder.CreateCall(F, Src0); 10060 } 10061 case AMDGPU::BI__builtin_amdgcn_frexp_exph: { 10062 Value *Src0 = EmitScalarExpr(E->getArg(0)); 10063 Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp, 10064 { Builder.getInt16Ty(), Src0->getType() }); 10065 return Builder.CreateCall(F, Src0); 10066 } 10067 case AMDGPU::BI__builtin_amdgcn_fract: 10068 case AMDGPU::BI__builtin_amdgcn_fractf: 10069 case AMDGPU::BI__builtin_amdgcn_fracth: 10070 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract); 10071 case AMDGPU::BI__builtin_amdgcn_lerp: 10072 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp); 10073 case AMDGPU::BI__builtin_amdgcn_uicmp: 10074 case AMDGPU::BI__builtin_amdgcn_uicmpl: 10075 case AMDGPU::BI__builtin_amdgcn_sicmp: 10076 case AMDGPU::BI__builtin_amdgcn_sicmpl: 10077 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_icmp); 10078 case AMDGPU::BI__builtin_amdgcn_fcmp: 10079 case AMDGPU::BI__builtin_amdgcn_fcmpf: 10080 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fcmp); 10081 case AMDGPU::BI__builtin_amdgcn_class: 10082 case AMDGPU::BI__builtin_amdgcn_classf: 10083 case AMDGPU::BI__builtin_amdgcn_classh: 10084 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class); 10085 case AMDGPU::BI__builtin_amdgcn_fmed3f: 10086 case AMDGPU::BI__builtin_amdgcn_fmed3h: 10087 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3); 10088 case AMDGPU::BI__builtin_amdgcn_read_exec: { 10089 CallInst *CI = cast<CallInst>( 10090 EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec")); 10091 CI->setConvergent(); 10092 return CI; 10093 } 10094 case AMDGPU::BI__builtin_amdgcn_read_exec_lo: 10095 case AMDGPU::BI__builtin_amdgcn_read_exec_hi: { 10096 StringRef RegName = BuiltinID == AMDGPU::BI__builtin_amdgcn_read_exec_lo ? 10097 "exec_lo" : "exec_hi"; 10098 CallInst *CI = cast<CallInst>( 10099 EmitSpecialRegisterBuiltin(*this, E, Int32Ty, Int32Ty, true, RegName)); 10100 CI->setConvergent(); 10101 return CI; 10102 } 10103 10104 // amdgcn workitem 10105 case AMDGPU::BI__builtin_amdgcn_workitem_id_x: 10106 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024); 10107 case AMDGPU::BI__builtin_amdgcn_workitem_id_y: 10108 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024); 10109 case AMDGPU::BI__builtin_amdgcn_workitem_id_z: 10110 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024); 10111 10112 // r600 intrinsics 10113 case AMDGPU::BI__builtin_r600_recipsqrt_ieee: 10114 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef: 10115 return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee); 10116 case AMDGPU::BI__builtin_r600_read_tidig_x: 10117 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024); 10118 case AMDGPU::BI__builtin_r600_read_tidig_y: 10119 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024); 10120 case AMDGPU::BI__builtin_r600_read_tidig_z: 10121 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024); 10122 default: 10123 return nullptr; 10124 } 10125 } 10126 10127 /// Handle a SystemZ function in which the final argument is a pointer 10128 /// to an int that receives the post-instruction CC value. At the LLVM level 10129 /// this is represented as a function that returns a {result, cc} pair. 10130 static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF, 10131 unsigned IntrinsicID, 10132 const CallExpr *E) { 10133 unsigned NumArgs = E->getNumArgs() - 1; 10134 SmallVector<Value *, 8> Args(NumArgs); 10135 for (unsigned I = 0; I < NumArgs; ++I) 10136 Args[I] = CGF.EmitScalarExpr(E->getArg(I)); 10137 Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs)); 10138 Value *F = CGF.CGM.getIntrinsic(IntrinsicID); 10139 Value *Call = CGF.Builder.CreateCall(F, Args); 10140 Value *CC = CGF.Builder.CreateExtractValue(Call, 1); 10141 CGF.Builder.CreateStore(CC, CCPtr); 10142 return CGF.Builder.CreateExtractValue(Call, 0); 10143 } 10144 10145 Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, 10146 const CallExpr *E) { 10147 switch (BuiltinID) { 10148 case SystemZ::BI__builtin_tbegin: { 10149 Value *TDB = EmitScalarExpr(E->getArg(0)); 10150 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c); 10151 Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin); 10152 return Builder.CreateCall(F, {TDB, Control}); 10153 } 10154 case SystemZ::BI__builtin_tbegin_nofloat: { 10155 Value *TDB = EmitScalarExpr(E->getArg(0)); 10156 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c); 10157 Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat); 10158 return Builder.CreateCall(F, {TDB, Control}); 10159 } 10160 case SystemZ::BI__builtin_tbeginc: { 10161 Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy); 10162 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08); 10163 Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc); 10164 return Builder.CreateCall(F, {TDB, Control}); 10165 } 10166 case SystemZ::BI__builtin_tabort: { 10167 Value *Data = EmitScalarExpr(E->getArg(0)); 10168 Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort); 10169 return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort")); 10170 } 10171 case SystemZ::BI__builtin_non_tx_store: { 10172 Value *Address = EmitScalarExpr(E->getArg(0)); 10173 Value *Data = EmitScalarExpr(E->getArg(1)); 10174 Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg); 10175 return Builder.CreateCall(F, {Data, Address}); 10176 } 10177 10178 // Vector builtins. Note that most vector builtins are mapped automatically 10179 // to target-specific LLVM intrinsics. The ones handled specially here can 10180 // be represented via standard LLVM IR, which is preferable to enable common 10181 // LLVM optimizations. 10182 10183 case SystemZ::BI__builtin_s390_vpopctb: 10184 case SystemZ::BI__builtin_s390_vpopcth: 10185 case SystemZ::BI__builtin_s390_vpopctf: 10186 case SystemZ::BI__builtin_s390_vpopctg: { 10187 llvm::Type *ResultType = ConvertType(E->getType()); 10188 Value *X = EmitScalarExpr(E->getArg(0)); 10189 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); 10190 return Builder.CreateCall(F, X); 10191 } 10192 10193 case SystemZ::BI__builtin_s390_vclzb: 10194 case SystemZ::BI__builtin_s390_vclzh: 10195 case SystemZ::BI__builtin_s390_vclzf: 10196 case SystemZ::BI__builtin_s390_vclzg: { 10197 llvm::Type *ResultType = ConvertType(E->getType()); 10198 Value *X = EmitScalarExpr(E->getArg(0)); 10199 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 10200 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType); 10201 return Builder.CreateCall(F, {X, Undef}); 10202 } 10203 10204 case SystemZ::BI__builtin_s390_vctzb: 10205 case SystemZ::BI__builtin_s390_vctzh: 10206 case SystemZ::BI__builtin_s390_vctzf: 10207 case SystemZ::BI__builtin_s390_vctzg: { 10208 llvm::Type *ResultType = ConvertType(E->getType()); 10209 Value *X = EmitScalarExpr(E->getArg(0)); 10210 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 10211 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType); 10212 return Builder.CreateCall(F, {X, Undef}); 10213 } 10214 10215 case SystemZ::BI__builtin_s390_vfsqsb: 10216 case SystemZ::BI__builtin_s390_vfsqdb: { 10217 llvm::Type *ResultType = ConvertType(E->getType()); 10218 Value *X = EmitScalarExpr(E->getArg(0)); 10219 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType); 10220 return Builder.CreateCall(F, X); 10221 } 10222 case SystemZ::BI__builtin_s390_vfmasb: 10223 case SystemZ::BI__builtin_s390_vfmadb: { 10224 llvm::Type *ResultType = ConvertType(E->getType()); 10225 Value *X = EmitScalarExpr(E->getArg(0)); 10226 Value *Y = EmitScalarExpr(E->getArg(1)); 10227 Value *Z = EmitScalarExpr(E->getArg(2)); 10228 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 10229 return Builder.CreateCall(F, {X, Y, Z}); 10230 } 10231 case SystemZ::BI__builtin_s390_vfmssb: 10232 case SystemZ::BI__builtin_s390_vfmsdb: { 10233 llvm::Type *ResultType = ConvertType(E->getType()); 10234 Value *X = EmitScalarExpr(E->getArg(0)); 10235 Value *Y = EmitScalarExpr(E->getArg(1)); 10236 Value *Z = EmitScalarExpr(E->getArg(2)); 10237 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); 10238 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 10239 return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")}); 10240 } 10241 case SystemZ::BI__builtin_s390_vfnmasb: 10242 case SystemZ::BI__builtin_s390_vfnmadb: { 10243 llvm::Type *ResultType = ConvertType(E->getType()); 10244 Value *X = EmitScalarExpr(E->getArg(0)); 10245 Value *Y = EmitScalarExpr(E->getArg(1)); 10246 Value *Z = EmitScalarExpr(E->getArg(2)); 10247 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); 10248 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 10249 return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, Z}), "sub"); 10250 } 10251 case SystemZ::BI__builtin_s390_vfnmssb: 10252 case SystemZ::BI__builtin_s390_vfnmsdb: { 10253 llvm::Type *ResultType = ConvertType(E->getType()); 10254 Value *X = EmitScalarExpr(E->getArg(0)); 10255 Value *Y = EmitScalarExpr(E->getArg(1)); 10256 Value *Z = EmitScalarExpr(E->getArg(2)); 10257 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); 10258 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 10259 Value *NegZ = Builder.CreateFSub(Zero, Z, "sub"); 10260 return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, NegZ})); 10261 } 10262 case SystemZ::BI__builtin_s390_vflpsb: 10263 case SystemZ::BI__builtin_s390_vflpdb: { 10264 llvm::Type *ResultType = ConvertType(E->getType()); 10265 Value *X = EmitScalarExpr(E->getArg(0)); 10266 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); 10267 return Builder.CreateCall(F, X); 10268 } 10269 case SystemZ::BI__builtin_s390_vflnsb: 10270 case SystemZ::BI__builtin_s390_vflndb: { 10271 llvm::Type *ResultType = ConvertType(E->getType()); 10272 Value *X = EmitScalarExpr(E->getArg(0)); 10273 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); 10274 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); 10275 return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub"); 10276 } 10277 case SystemZ::BI__builtin_s390_vfisb: 10278 case SystemZ::BI__builtin_s390_vfidb: { 10279 llvm::Type *ResultType = ConvertType(E->getType()); 10280 Value *X = EmitScalarExpr(E->getArg(0)); 10281 // Constant-fold the M4 and M5 mask arguments. 10282 llvm::APSInt M4, M5; 10283 bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext()); 10284 bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext()); 10285 assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?"); 10286 (void)IsConstM4; (void)IsConstM5; 10287 // Check whether this instance can be represented via a LLVM standard 10288 // intrinsic. We only support some combinations of M4 and M5. 10289 Intrinsic::ID ID = Intrinsic::not_intrinsic; 10290 switch (M4.getZExtValue()) { 10291 default: break; 10292 case 0: // IEEE-inexact exception allowed 10293 switch (M5.getZExtValue()) { 10294 default: break; 10295 case 0: ID = Intrinsic::rint; break; 10296 } 10297 break; 10298 case 4: // IEEE-inexact exception suppressed 10299 switch (M5.getZExtValue()) { 10300 default: break; 10301 case 0: ID = Intrinsic::nearbyint; break; 10302 case 1: ID = Intrinsic::round; break; 10303 case 5: ID = Intrinsic::trunc; break; 10304 case 6: ID = Intrinsic::ceil; break; 10305 case 7: ID = Intrinsic::floor; break; 10306 } 10307 break; 10308 } 10309 if (ID != Intrinsic::not_intrinsic) { 10310 Function *F = CGM.getIntrinsic(ID, ResultType); 10311 return Builder.CreateCall(F, X); 10312 } 10313 switch (BuiltinID) { 10314 case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break; 10315 case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break; 10316 default: llvm_unreachable("Unknown BuiltinID"); 10317 } 10318 Function *F = CGM.getIntrinsic(ID); 10319 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4); 10320 Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5); 10321 return Builder.CreateCall(F, {X, M4Value, M5Value}); 10322 } 10323 case SystemZ::BI__builtin_s390_vfmaxsb: 10324 case SystemZ::BI__builtin_s390_vfmaxdb: { 10325 llvm::Type *ResultType = ConvertType(E->getType()); 10326 Value *X = EmitScalarExpr(E->getArg(0)); 10327 Value *Y = EmitScalarExpr(E->getArg(1)); 10328 // Constant-fold the M4 mask argument. 10329 llvm::APSInt M4; 10330 bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext()); 10331 assert(IsConstM4 && "Constant arg isn't actually constant?"); 10332 (void)IsConstM4; 10333 // Check whether this instance can be represented via a LLVM standard 10334 // intrinsic. We only support some values of M4. 10335 Intrinsic::ID ID = Intrinsic::not_intrinsic; 10336 switch (M4.getZExtValue()) { 10337 default: break; 10338 case 4: ID = Intrinsic::maxnum; break; 10339 } 10340 if (ID != Intrinsic::not_intrinsic) { 10341 Function *F = CGM.getIntrinsic(ID, ResultType); 10342 return Builder.CreateCall(F, {X, Y}); 10343 } 10344 switch (BuiltinID) { 10345 case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break; 10346 case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break; 10347 default: llvm_unreachable("Unknown BuiltinID"); 10348 } 10349 Function *F = CGM.getIntrinsic(ID); 10350 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4); 10351 return Builder.CreateCall(F, {X, Y, M4Value}); 10352 } 10353 case SystemZ::BI__builtin_s390_vfminsb: 10354 case SystemZ::BI__builtin_s390_vfmindb: { 10355 llvm::Type *ResultType = ConvertType(E->getType()); 10356 Value *X = EmitScalarExpr(E->getArg(0)); 10357 Value *Y = EmitScalarExpr(E->getArg(1)); 10358 // Constant-fold the M4 mask argument. 10359 llvm::APSInt M4; 10360 bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext()); 10361 assert(IsConstM4 && "Constant arg isn't actually constant?"); 10362 (void)IsConstM4; 10363 // Check whether this instance can be represented via a LLVM standard 10364 // intrinsic. We only support some values of M4. 10365 Intrinsic::ID ID = Intrinsic::not_intrinsic; 10366 switch (M4.getZExtValue()) { 10367 default: break; 10368 case 4: ID = Intrinsic::minnum; break; 10369 } 10370 if (ID != Intrinsic::not_intrinsic) { 10371 Function *F = CGM.getIntrinsic(ID, ResultType); 10372 return Builder.CreateCall(F, {X, Y}); 10373 } 10374 switch (BuiltinID) { 10375 case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break; 10376 case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break; 10377 default: llvm_unreachable("Unknown BuiltinID"); 10378 } 10379 Function *F = CGM.getIntrinsic(ID); 10380 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4); 10381 return Builder.CreateCall(F, {X, Y, M4Value}); 10382 } 10383 10384 // Vector intrisincs that output the post-instruction CC value. 10385 10386 #define INTRINSIC_WITH_CC(NAME) \ 10387 case SystemZ::BI__builtin_##NAME: \ 10388 return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E) 10389 10390 INTRINSIC_WITH_CC(s390_vpkshs); 10391 INTRINSIC_WITH_CC(s390_vpksfs); 10392 INTRINSIC_WITH_CC(s390_vpksgs); 10393 10394 INTRINSIC_WITH_CC(s390_vpklshs); 10395 INTRINSIC_WITH_CC(s390_vpklsfs); 10396 INTRINSIC_WITH_CC(s390_vpklsgs); 10397 10398 INTRINSIC_WITH_CC(s390_vceqbs); 10399 INTRINSIC_WITH_CC(s390_vceqhs); 10400 INTRINSIC_WITH_CC(s390_vceqfs); 10401 INTRINSIC_WITH_CC(s390_vceqgs); 10402 10403 INTRINSIC_WITH_CC(s390_vchbs); 10404 INTRINSIC_WITH_CC(s390_vchhs); 10405 INTRINSIC_WITH_CC(s390_vchfs); 10406 INTRINSIC_WITH_CC(s390_vchgs); 10407 10408 INTRINSIC_WITH_CC(s390_vchlbs); 10409 INTRINSIC_WITH_CC(s390_vchlhs); 10410 INTRINSIC_WITH_CC(s390_vchlfs); 10411 INTRINSIC_WITH_CC(s390_vchlgs); 10412 10413 INTRINSIC_WITH_CC(s390_vfaebs); 10414 INTRINSIC_WITH_CC(s390_vfaehs); 10415 INTRINSIC_WITH_CC(s390_vfaefs); 10416 10417 INTRINSIC_WITH_CC(s390_vfaezbs); 10418 INTRINSIC_WITH_CC(s390_vfaezhs); 10419 INTRINSIC_WITH_CC(s390_vfaezfs); 10420 10421 INTRINSIC_WITH_CC(s390_vfeebs); 10422 INTRINSIC_WITH_CC(s390_vfeehs); 10423 INTRINSIC_WITH_CC(s390_vfeefs); 10424 10425 INTRINSIC_WITH_CC(s390_vfeezbs); 10426 INTRINSIC_WITH_CC(s390_vfeezhs); 10427 INTRINSIC_WITH_CC(s390_vfeezfs); 10428 10429 INTRINSIC_WITH_CC(s390_vfenebs); 10430 INTRINSIC_WITH_CC(s390_vfenehs); 10431 INTRINSIC_WITH_CC(s390_vfenefs); 10432 10433 INTRINSIC_WITH_CC(s390_vfenezbs); 10434 INTRINSIC_WITH_CC(s390_vfenezhs); 10435 INTRINSIC_WITH_CC(s390_vfenezfs); 10436 10437 INTRINSIC_WITH_CC(s390_vistrbs); 10438 INTRINSIC_WITH_CC(s390_vistrhs); 10439 INTRINSIC_WITH_CC(s390_vistrfs); 10440 10441 INTRINSIC_WITH_CC(s390_vstrcbs); 10442 INTRINSIC_WITH_CC(s390_vstrchs); 10443 INTRINSIC_WITH_CC(s390_vstrcfs); 10444 10445 INTRINSIC_WITH_CC(s390_vstrczbs); 10446 INTRINSIC_WITH_CC(s390_vstrczhs); 10447 INTRINSIC_WITH_CC(s390_vstrczfs); 10448 10449 INTRINSIC_WITH_CC(s390_vfcesbs); 10450 INTRINSIC_WITH_CC(s390_vfcedbs); 10451 INTRINSIC_WITH_CC(s390_vfchsbs); 10452 INTRINSIC_WITH_CC(s390_vfchdbs); 10453 INTRINSIC_WITH_CC(s390_vfchesbs); 10454 INTRINSIC_WITH_CC(s390_vfchedbs); 10455 10456 INTRINSIC_WITH_CC(s390_vftcisb); 10457 INTRINSIC_WITH_CC(s390_vftcidb); 10458 10459 #undef INTRINSIC_WITH_CC 10460 10461 default: 10462 return nullptr; 10463 } 10464 } 10465 10466 Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, 10467 const CallExpr *E) { 10468 auto MakeLdg = [&](unsigned IntrinsicID) { 10469 Value *Ptr = EmitScalarExpr(E->getArg(0)); 10470 clang::CharUnits Align = 10471 getNaturalPointeeTypeAlignment(E->getArg(0)->getType()); 10472 return Builder.CreateCall( 10473 CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(), 10474 Ptr->getType()}), 10475 {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())}); 10476 }; 10477 auto MakeScopedAtomic = [&](unsigned IntrinsicID) { 10478 Value *Ptr = EmitScalarExpr(E->getArg(0)); 10479 return Builder.CreateCall( 10480 CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(), 10481 Ptr->getType()}), 10482 {Ptr, EmitScalarExpr(E->getArg(1))}); 10483 }; 10484 switch (BuiltinID) { 10485 case NVPTX::BI__nvvm_atom_add_gen_i: 10486 case NVPTX::BI__nvvm_atom_add_gen_l: 10487 case NVPTX::BI__nvvm_atom_add_gen_ll: 10488 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E); 10489 10490 case NVPTX::BI__nvvm_atom_sub_gen_i: 10491 case NVPTX::BI__nvvm_atom_sub_gen_l: 10492 case NVPTX::BI__nvvm_atom_sub_gen_ll: 10493 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E); 10494 10495 case NVPTX::BI__nvvm_atom_and_gen_i: 10496 case NVPTX::BI__nvvm_atom_and_gen_l: 10497 case NVPTX::BI__nvvm_atom_and_gen_ll: 10498 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E); 10499 10500 case NVPTX::BI__nvvm_atom_or_gen_i: 10501 case NVPTX::BI__nvvm_atom_or_gen_l: 10502 case NVPTX::BI__nvvm_atom_or_gen_ll: 10503 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E); 10504 10505 case NVPTX::BI__nvvm_atom_xor_gen_i: 10506 case NVPTX::BI__nvvm_atom_xor_gen_l: 10507 case NVPTX::BI__nvvm_atom_xor_gen_ll: 10508 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E); 10509 10510 case NVPTX::BI__nvvm_atom_xchg_gen_i: 10511 case NVPTX::BI__nvvm_atom_xchg_gen_l: 10512 case NVPTX::BI__nvvm_atom_xchg_gen_ll: 10513 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E); 10514 10515 case NVPTX::BI__nvvm_atom_max_gen_i: 10516 case NVPTX::BI__nvvm_atom_max_gen_l: 10517 case NVPTX::BI__nvvm_atom_max_gen_ll: 10518 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E); 10519 10520 case NVPTX::BI__nvvm_atom_max_gen_ui: 10521 case NVPTX::BI__nvvm_atom_max_gen_ul: 10522 case NVPTX::BI__nvvm_atom_max_gen_ull: 10523 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E); 10524 10525 case NVPTX::BI__nvvm_atom_min_gen_i: 10526 case NVPTX::BI__nvvm_atom_min_gen_l: 10527 case NVPTX::BI__nvvm_atom_min_gen_ll: 10528 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E); 10529 10530 case NVPTX::BI__nvvm_atom_min_gen_ui: 10531 case NVPTX::BI__nvvm_atom_min_gen_ul: 10532 case NVPTX::BI__nvvm_atom_min_gen_ull: 10533 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E); 10534 10535 case NVPTX::BI__nvvm_atom_cas_gen_i: 10536 case NVPTX::BI__nvvm_atom_cas_gen_l: 10537 case NVPTX::BI__nvvm_atom_cas_gen_ll: 10538 // __nvvm_atom_cas_gen_* should return the old value rather than the 10539 // success flag. 10540 return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false); 10541 10542 case NVPTX::BI__nvvm_atom_add_gen_f: { 10543 Value *Ptr = EmitScalarExpr(E->getArg(0)); 10544 Value *Val = EmitScalarExpr(E->getArg(1)); 10545 // atomicrmw only deals with integer arguments so we need to use 10546 // LLVM's nvvm_atomic_load_add_f32 intrinsic for that. 10547 Value *FnALAF32 = 10548 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType()); 10549 return Builder.CreateCall(FnALAF32, {Ptr, Val}); 10550 } 10551 10552 case NVPTX::BI__nvvm_atom_add_gen_d: { 10553 Value *Ptr = EmitScalarExpr(E->getArg(0)); 10554 Value *Val = EmitScalarExpr(E->getArg(1)); 10555 // atomicrmw only deals with integer arguments, so we need to use 10556 // LLVM's nvvm_atomic_load_add_f64 intrinsic. 10557 Value *FnALAF64 = 10558 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f64, Ptr->getType()); 10559 return Builder.CreateCall(FnALAF64, {Ptr, Val}); 10560 } 10561 10562 case NVPTX::BI__nvvm_atom_inc_gen_ui: { 10563 Value *Ptr = EmitScalarExpr(E->getArg(0)); 10564 Value *Val = EmitScalarExpr(E->getArg(1)); 10565 Value *FnALI32 = 10566 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType()); 10567 return Builder.CreateCall(FnALI32, {Ptr, Val}); 10568 } 10569 10570 case NVPTX::BI__nvvm_atom_dec_gen_ui: { 10571 Value *Ptr = EmitScalarExpr(E->getArg(0)); 10572 Value *Val = EmitScalarExpr(E->getArg(1)); 10573 Value *FnALD32 = 10574 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType()); 10575 return Builder.CreateCall(FnALD32, {Ptr, Val}); 10576 } 10577 10578 case NVPTX::BI__nvvm_ldg_c: 10579 case NVPTX::BI__nvvm_ldg_c2: 10580 case NVPTX::BI__nvvm_ldg_c4: 10581 case NVPTX::BI__nvvm_ldg_s: 10582 case NVPTX::BI__nvvm_ldg_s2: 10583 case NVPTX::BI__nvvm_ldg_s4: 10584 case NVPTX::BI__nvvm_ldg_i: 10585 case NVPTX::BI__nvvm_ldg_i2: 10586 case NVPTX::BI__nvvm_ldg_i4: 10587 case NVPTX::BI__nvvm_ldg_l: 10588 case NVPTX::BI__nvvm_ldg_ll: 10589 case NVPTX::BI__nvvm_ldg_ll2: 10590 case NVPTX::BI__nvvm_ldg_uc: 10591 case NVPTX::BI__nvvm_ldg_uc2: 10592 case NVPTX::BI__nvvm_ldg_uc4: 10593 case NVPTX::BI__nvvm_ldg_us: 10594 case NVPTX::BI__nvvm_ldg_us2: 10595 case NVPTX::BI__nvvm_ldg_us4: 10596 case NVPTX::BI__nvvm_ldg_ui: 10597 case NVPTX::BI__nvvm_ldg_ui2: 10598 case NVPTX::BI__nvvm_ldg_ui4: 10599 case NVPTX::BI__nvvm_ldg_ul: 10600 case NVPTX::BI__nvvm_ldg_ull: 10601 case NVPTX::BI__nvvm_ldg_ull2: 10602 // PTX Interoperability section 2.2: "For a vector with an even number of 10603 // elements, its alignment is set to number of elements times the alignment 10604 // of its member: n*alignof(t)." 10605 return MakeLdg(Intrinsic::nvvm_ldg_global_i); 10606 case NVPTX::BI__nvvm_ldg_f: 10607 case NVPTX::BI__nvvm_ldg_f2: 10608 case NVPTX::BI__nvvm_ldg_f4: 10609 case NVPTX::BI__nvvm_ldg_d: 10610 case NVPTX::BI__nvvm_ldg_d2: 10611 return MakeLdg(Intrinsic::nvvm_ldg_global_f); 10612 10613 case NVPTX::BI__nvvm_atom_cta_add_gen_i: 10614 case NVPTX::BI__nvvm_atom_cta_add_gen_l: 10615 case NVPTX::BI__nvvm_atom_cta_add_gen_ll: 10616 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta); 10617 case NVPTX::BI__nvvm_atom_sys_add_gen_i: 10618 case NVPTX::BI__nvvm_atom_sys_add_gen_l: 10619 case NVPTX::BI__nvvm_atom_sys_add_gen_ll: 10620 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys); 10621 case NVPTX::BI__nvvm_atom_cta_add_gen_f: 10622 case NVPTX::BI__nvvm_atom_cta_add_gen_d: 10623 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta); 10624 case NVPTX::BI__nvvm_atom_sys_add_gen_f: 10625 case NVPTX::BI__nvvm_atom_sys_add_gen_d: 10626 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys); 10627 case NVPTX::BI__nvvm_atom_cta_xchg_gen_i: 10628 case NVPTX::BI__nvvm_atom_cta_xchg_gen_l: 10629 case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll: 10630 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta); 10631 case NVPTX::BI__nvvm_atom_sys_xchg_gen_i: 10632 case NVPTX::BI__nvvm_atom_sys_xchg_gen_l: 10633 case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll: 10634 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys); 10635 case NVPTX::BI__nvvm_atom_cta_max_gen_i: 10636 case NVPTX::BI__nvvm_atom_cta_max_gen_ui: 10637 case NVPTX::BI__nvvm_atom_cta_max_gen_l: 10638 case NVPTX::BI__nvvm_atom_cta_max_gen_ul: 10639 case NVPTX::BI__nvvm_atom_cta_max_gen_ll: 10640 case NVPTX::BI__nvvm_atom_cta_max_gen_ull: 10641 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta); 10642 case NVPTX::BI__nvvm_atom_sys_max_gen_i: 10643 case NVPTX::BI__nvvm_atom_sys_max_gen_ui: 10644 case NVPTX::BI__nvvm_atom_sys_max_gen_l: 10645 case NVPTX::BI__nvvm_atom_sys_max_gen_ul: 10646 case NVPTX::BI__nvvm_atom_sys_max_gen_ll: 10647 case NVPTX::BI__nvvm_atom_sys_max_gen_ull: 10648 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys); 10649 case NVPTX::BI__nvvm_atom_cta_min_gen_i: 10650 case NVPTX::BI__nvvm_atom_cta_min_gen_ui: 10651 case NVPTX::BI__nvvm_atom_cta_min_gen_l: 10652 case NVPTX::BI__nvvm_atom_cta_min_gen_ul: 10653 case NVPTX::BI__nvvm_atom_cta_min_gen_ll: 10654 case NVPTX::BI__nvvm_atom_cta_min_gen_ull: 10655 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta); 10656 case NVPTX::BI__nvvm_atom_sys_min_gen_i: 10657 case NVPTX::BI__nvvm_atom_sys_min_gen_ui: 10658 case NVPTX::BI__nvvm_atom_sys_min_gen_l: 10659 case NVPTX::BI__nvvm_atom_sys_min_gen_ul: 10660 case NVPTX::BI__nvvm_atom_sys_min_gen_ll: 10661 case NVPTX::BI__nvvm_atom_sys_min_gen_ull: 10662 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys); 10663 case NVPTX::BI__nvvm_atom_cta_inc_gen_ui: 10664 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta); 10665 case NVPTX::BI__nvvm_atom_cta_dec_gen_ui: 10666 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta); 10667 case NVPTX::BI__nvvm_atom_sys_inc_gen_ui: 10668 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys); 10669 case NVPTX::BI__nvvm_atom_sys_dec_gen_ui: 10670 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys); 10671 case NVPTX::BI__nvvm_atom_cta_and_gen_i: 10672 case NVPTX::BI__nvvm_atom_cta_and_gen_l: 10673 case NVPTX::BI__nvvm_atom_cta_and_gen_ll: 10674 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta); 10675 case NVPTX::BI__nvvm_atom_sys_and_gen_i: 10676 case NVPTX::BI__nvvm_atom_sys_and_gen_l: 10677 case NVPTX::BI__nvvm_atom_sys_and_gen_ll: 10678 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys); 10679 case NVPTX::BI__nvvm_atom_cta_or_gen_i: 10680 case NVPTX::BI__nvvm_atom_cta_or_gen_l: 10681 case NVPTX::BI__nvvm_atom_cta_or_gen_ll: 10682 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta); 10683 case NVPTX::BI__nvvm_atom_sys_or_gen_i: 10684 case NVPTX::BI__nvvm_atom_sys_or_gen_l: 10685 case NVPTX::BI__nvvm_atom_sys_or_gen_ll: 10686 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys); 10687 case NVPTX::BI__nvvm_atom_cta_xor_gen_i: 10688 case NVPTX::BI__nvvm_atom_cta_xor_gen_l: 10689 case NVPTX::BI__nvvm_atom_cta_xor_gen_ll: 10690 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta); 10691 case NVPTX::BI__nvvm_atom_sys_xor_gen_i: 10692 case NVPTX::BI__nvvm_atom_sys_xor_gen_l: 10693 case NVPTX::BI__nvvm_atom_sys_xor_gen_ll: 10694 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys); 10695 case NVPTX::BI__nvvm_atom_cta_cas_gen_i: 10696 case NVPTX::BI__nvvm_atom_cta_cas_gen_l: 10697 case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: { 10698 Value *Ptr = EmitScalarExpr(E->getArg(0)); 10699 return Builder.CreateCall( 10700 CGM.getIntrinsic( 10701 Intrinsic::nvvm_atomic_cas_gen_i_cta, 10702 {Ptr->getType()->getPointerElementType(), Ptr->getType()}), 10703 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))}); 10704 } 10705 case NVPTX::BI__nvvm_atom_sys_cas_gen_i: 10706 case NVPTX::BI__nvvm_atom_sys_cas_gen_l: 10707 case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: { 10708 Value *Ptr = EmitScalarExpr(E->getArg(0)); 10709 return Builder.CreateCall( 10710 CGM.getIntrinsic( 10711 Intrinsic::nvvm_atomic_cas_gen_i_sys, 10712 {Ptr->getType()->getPointerElementType(), Ptr->getType()}), 10713 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))}); 10714 } 10715 case NVPTX::BI__nvvm_match_all_sync_i32p: 10716 case NVPTX::BI__nvvm_match_all_sync_i64p: { 10717 Value *Mask = EmitScalarExpr(E->getArg(0)); 10718 Value *Val = EmitScalarExpr(E->getArg(1)); 10719 Address PredOutPtr = EmitPointerWithAlignment(E->getArg(2)); 10720 Value *ResultPair = Builder.CreateCall( 10721 CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p 10722 ? Intrinsic::nvvm_match_all_sync_i32p 10723 : Intrinsic::nvvm_match_all_sync_i64p), 10724 {Mask, Val}); 10725 Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1), 10726 PredOutPtr.getElementType()); 10727 Builder.CreateStore(Pred, PredOutPtr); 10728 return Builder.CreateExtractValue(ResultPair, 0); 10729 } 10730 case NVPTX::BI__hmma_m16n16k16_ld_a: 10731 case NVPTX::BI__hmma_m16n16k16_ld_b: 10732 case NVPTX::BI__hmma_m16n16k16_ld_c_f16: 10733 case NVPTX::BI__hmma_m16n16k16_ld_c_f32: 10734 case NVPTX::BI__hmma_m32n8k16_ld_a: 10735 case NVPTX::BI__hmma_m32n8k16_ld_b: 10736 case NVPTX::BI__hmma_m32n8k16_ld_c_f16: 10737 case NVPTX::BI__hmma_m32n8k16_ld_c_f32: 10738 case NVPTX::BI__hmma_m8n32k16_ld_a: 10739 case NVPTX::BI__hmma_m8n32k16_ld_b: 10740 case NVPTX::BI__hmma_m8n32k16_ld_c_f16: 10741 case NVPTX::BI__hmma_m8n32k16_ld_c_f32: { 10742 Address Dst = EmitPointerWithAlignment(E->getArg(0)); 10743 Value *Src = EmitScalarExpr(E->getArg(1)); 10744 Value *Ldm = EmitScalarExpr(E->getArg(2)); 10745 llvm::APSInt isColMajorArg; 10746 if (!E->getArg(3)->isIntegerConstantExpr(isColMajorArg, getContext())) 10747 return nullptr; 10748 bool isColMajor = isColMajorArg.getSExtValue(); 10749 unsigned IID; 10750 unsigned NumResults; 10751 switch (BuiltinID) { 10752 case NVPTX::BI__hmma_m16n16k16_ld_a: 10753 IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_col_stride 10754 : Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_row_stride; 10755 NumResults = 8; 10756 break; 10757 case NVPTX::BI__hmma_m16n16k16_ld_b: 10758 IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_col_stride 10759 : Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_row_stride; 10760 NumResults = 8; 10761 break; 10762 case NVPTX::BI__hmma_m16n16k16_ld_c_f16: 10763 IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_col_stride 10764 : Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row_stride; 10765 NumResults = 4; 10766 break; 10767 case NVPTX::BI__hmma_m16n16k16_ld_c_f32: 10768 IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_col_stride 10769 : Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row_stride; 10770 NumResults = 8; 10771 break; 10772 case NVPTX::BI__hmma_m32n8k16_ld_a: 10773 IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_col_stride 10774 : Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_row_stride; 10775 NumResults = 8; 10776 break; 10777 case NVPTX::BI__hmma_m32n8k16_ld_b: 10778 IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_col_stride 10779 : Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_row_stride; 10780 NumResults = 8; 10781 break; 10782 case NVPTX::BI__hmma_m32n8k16_ld_c_f16: 10783 IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_col_stride 10784 : Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_row_stride; 10785 NumResults = 4; 10786 break; 10787 case NVPTX::BI__hmma_m32n8k16_ld_c_f32: 10788 IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_col_stride 10789 : Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_row_stride; 10790 NumResults = 8; 10791 break; 10792 case NVPTX::BI__hmma_m8n32k16_ld_a: 10793 IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_col_stride 10794 : Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_row_stride; 10795 NumResults = 8; 10796 break; 10797 case NVPTX::BI__hmma_m8n32k16_ld_b: 10798 IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_col_stride 10799 : Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_row_stride; 10800 NumResults = 8; 10801 break; 10802 case NVPTX::BI__hmma_m8n32k16_ld_c_f16: 10803 IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_col_stride 10804 : Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_row_stride; 10805 NumResults = 4; 10806 break; 10807 case NVPTX::BI__hmma_m8n32k16_ld_c_f32: 10808 IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_col_stride 10809 : Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_row_stride; 10810 NumResults = 8; 10811 break; 10812 default: 10813 llvm_unreachable("Unexpected builtin ID."); 10814 } 10815 Value *Result = 10816 Builder.CreateCall(CGM.getIntrinsic(IID, Src->getType()), {Src, Ldm}); 10817 10818 // Save returned values. 10819 for (unsigned i = 0; i < NumResults; ++i) { 10820 Builder.CreateAlignedStore( 10821 Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), 10822 Dst.getElementType()), 10823 Builder.CreateGEP(Dst.getPointer(), llvm::ConstantInt::get(IntTy, i)), 10824 CharUnits::fromQuantity(4)); 10825 } 10826 return Result; 10827 } 10828 10829 case NVPTX::BI__hmma_m16n16k16_st_c_f16: 10830 case NVPTX::BI__hmma_m16n16k16_st_c_f32: 10831 case NVPTX::BI__hmma_m32n8k16_st_c_f16: 10832 case NVPTX::BI__hmma_m32n8k16_st_c_f32: 10833 case NVPTX::BI__hmma_m8n32k16_st_c_f16: 10834 case NVPTX::BI__hmma_m8n32k16_st_c_f32: { 10835 Value *Dst = EmitScalarExpr(E->getArg(0)); 10836 Address Src = EmitPointerWithAlignment(E->getArg(1)); 10837 Value *Ldm = EmitScalarExpr(E->getArg(2)); 10838 llvm::APSInt isColMajorArg; 10839 if (!E->getArg(3)->isIntegerConstantExpr(isColMajorArg, getContext())) 10840 return nullptr; 10841 bool isColMajor = isColMajorArg.getSExtValue(); 10842 unsigned IID; 10843 unsigned NumResults = 8; 10844 // PTX Instructions (and LLVM instrinsics) are defined for slice _d_, yet 10845 // for some reason nvcc builtins use _c_. 10846 switch (BuiltinID) { 10847 case NVPTX::BI__hmma_m16n16k16_st_c_f16: 10848 IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col_stride 10849 : Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row_stride; 10850 NumResults = 4; 10851 break; 10852 case NVPTX::BI__hmma_m16n16k16_st_c_f32: 10853 IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_col_stride 10854 : Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row_stride; 10855 break; 10856 case NVPTX::BI__hmma_m32n8k16_st_c_f16: 10857 IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_col_stride 10858 : Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_row_stride; 10859 NumResults = 4; 10860 break; 10861 case NVPTX::BI__hmma_m32n8k16_st_c_f32: 10862 IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_col_stride 10863 : Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_row_stride; 10864 break; 10865 case NVPTX::BI__hmma_m8n32k16_st_c_f16: 10866 IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_col_stride 10867 : Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_row_stride; 10868 NumResults = 4; 10869 break; 10870 case NVPTX::BI__hmma_m8n32k16_st_c_f32: 10871 IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_col_stride 10872 : Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_row_stride; 10873 break; 10874 default: 10875 llvm_unreachable("Unexpected builtin ID."); 10876 } 10877 Function *Intrinsic = CGM.getIntrinsic(IID, Dst->getType()); 10878 llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1); 10879 SmallVector<Value *, 10> Values = {Dst}; 10880 for (unsigned i = 0; i < NumResults; ++i) { 10881 Value *V = Builder.CreateAlignedLoad( 10882 Builder.CreateGEP(Src.getPointer(), llvm::ConstantInt::get(IntTy, i)), 10883 CharUnits::fromQuantity(4)); 10884 Values.push_back(Builder.CreateBitCast(V, ParamType)); 10885 } 10886 Values.push_back(Ldm); 10887 Value *Result = Builder.CreateCall(Intrinsic, Values); 10888 return Result; 10889 } 10890 10891 // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) --> 10892 // Intrinsic::nvvm_wmma_m16n16k16_mma_sync<layout A,B><DType><CType><Satf> 10893 case NVPTX::BI__hmma_m16n16k16_mma_f16f16: 10894 case NVPTX::BI__hmma_m16n16k16_mma_f32f16: 10895 case NVPTX::BI__hmma_m16n16k16_mma_f32f32: 10896 case NVPTX::BI__hmma_m16n16k16_mma_f16f32: 10897 case NVPTX::BI__hmma_m32n8k16_mma_f16f16: 10898 case NVPTX::BI__hmma_m32n8k16_mma_f32f16: 10899 case NVPTX::BI__hmma_m32n8k16_mma_f32f32: 10900 case NVPTX::BI__hmma_m32n8k16_mma_f16f32: 10901 case NVPTX::BI__hmma_m8n32k16_mma_f16f16: 10902 case NVPTX::BI__hmma_m8n32k16_mma_f32f16: 10903 case NVPTX::BI__hmma_m8n32k16_mma_f32f32: 10904 case NVPTX::BI__hmma_m8n32k16_mma_f16f32: { 10905 Address Dst = EmitPointerWithAlignment(E->getArg(0)); 10906 Address SrcA = EmitPointerWithAlignment(E->getArg(1)); 10907 Address SrcB = EmitPointerWithAlignment(E->getArg(2)); 10908 Address SrcC = EmitPointerWithAlignment(E->getArg(3)); 10909 llvm::APSInt LayoutArg; 10910 if (!E->getArg(4)->isIntegerConstantExpr(LayoutArg, getContext())) 10911 return nullptr; 10912 int Layout = LayoutArg.getSExtValue(); 10913 if (Layout < 0 || Layout > 3) 10914 return nullptr; 10915 llvm::APSInt SatfArg; 10916 if (!E->getArg(5)->isIntegerConstantExpr(SatfArg, getContext())) 10917 return nullptr; 10918 bool Satf = SatfArg.getSExtValue(); 10919 10920 // clang-format off 10921 #define MMA_VARIANTS(geom, type) {{ \ 10922 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \ 10923 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \ 10924 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \ 10925 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \ 10926 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \ 10927 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \ 10928 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type, \ 10929 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite \ 10930 }} 10931 // clang-format on 10932 10933 auto getMMAIntrinsic = [Layout, Satf](std::array<unsigned, 8> Variants) { 10934 unsigned Index = Layout * 2 + Satf; 10935 assert(Index < 8); 10936 return Variants[Index]; 10937 }; 10938 unsigned IID; 10939 unsigned NumEltsC; 10940 unsigned NumEltsD; 10941 switch (BuiltinID) { 10942 case NVPTX::BI__hmma_m16n16k16_mma_f16f16: 10943 IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f16_f16)); 10944 NumEltsC = 4; 10945 NumEltsD = 4; 10946 break; 10947 case NVPTX::BI__hmma_m16n16k16_mma_f32f16: 10948 IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f32_f16)); 10949 NumEltsC = 4; 10950 NumEltsD = 8; 10951 break; 10952 case NVPTX::BI__hmma_m16n16k16_mma_f16f32: 10953 IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f16_f32)); 10954 NumEltsC = 8; 10955 NumEltsD = 4; 10956 break; 10957 case NVPTX::BI__hmma_m16n16k16_mma_f32f32: 10958 IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f32_f32)); 10959 NumEltsC = 8; 10960 NumEltsD = 8; 10961 break; 10962 case NVPTX::BI__hmma_m32n8k16_mma_f16f16: 10963 IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f16_f16)); 10964 NumEltsC = 4; 10965 NumEltsD = 4; 10966 break; 10967 case NVPTX::BI__hmma_m32n8k16_mma_f32f16: 10968 IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f32_f16)); 10969 NumEltsC = 4; 10970 NumEltsD = 8; 10971 break; 10972 case NVPTX::BI__hmma_m32n8k16_mma_f16f32: 10973 IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f16_f32)); 10974 NumEltsC = 8; 10975 NumEltsD = 4; 10976 break; 10977 case NVPTX::BI__hmma_m32n8k16_mma_f32f32: 10978 IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f32_f32)); 10979 NumEltsC = 8; 10980 NumEltsD = 8; 10981 break; 10982 case NVPTX::BI__hmma_m8n32k16_mma_f16f16: 10983 IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f16_f16)); 10984 NumEltsC = 4; 10985 NumEltsD = 4; 10986 break; 10987 case NVPTX::BI__hmma_m8n32k16_mma_f32f16: 10988 IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f32_f16)); 10989 NumEltsC = 4; 10990 NumEltsD = 8; 10991 break; 10992 case NVPTX::BI__hmma_m8n32k16_mma_f16f32: 10993 IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f16_f32)); 10994 NumEltsC = 8; 10995 NumEltsD = 4; 10996 break; 10997 case NVPTX::BI__hmma_m8n32k16_mma_f32f32: 10998 IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f32_f32)); 10999 NumEltsC = 8; 11000 NumEltsD = 8; 11001 break; 11002 default: 11003 llvm_unreachable("Unexpected builtin ID."); 11004 } 11005 #undef MMA_VARIANTS 11006 11007 SmallVector<Value *, 24> Values; 11008 Function *Intrinsic = CGM.getIntrinsic(IID); 11009 llvm::Type *ABType = Intrinsic->getFunctionType()->getParamType(0); 11010 // Load A 11011 for (unsigned i = 0; i < 8; ++i) { 11012 Value *V = Builder.CreateAlignedLoad( 11013 Builder.CreateGEP(SrcA.getPointer(), 11014 llvm::ConstantInt::get(IntTy, i)), 11015 CharUnits::fromQuantity(4)); 11016 Values.push_back(Builder.CreateBitCast(V, ABType)); 11017 } 11018 // Load B 11019 for (unsigned i = 0; i < 8; ++i) { 11020 Value *V = Builder.CreateAlignedLoad( 11021 Builder.CreateGEP(SrcB.getPointer(), 11022 llvm::ConstantInt::get(IntTy, i)), 11023 CharUnits::fromQuantity(4)); 11024 Values.push_back(Builder.CreateBitCast(V, ABType)); 11025 } 11026 // Load C 11027 llvm::Type *CType = Intrinsic->getFunctionType()->getParamType(16); 11028 for (unsigned i = 0; i < NumEltsC; ++i) { 11029 Value *V = Builder.CreateAlignedLoad( 11030 Builder.CreateGEP(SrcC.getPointer(), 11031 llvm::ConstantInt::get(IntTy, i)), 11032 CharUnits::fromQuantity(4)); 11033 Values.push_back(Builder.CreateBitCast(V, CType)); 11034 } 11035 Value *Result = Builder.CreateCall(Intrinsic, Values); 11036 llvm::Type *DType = Dst.getElementType(); 11037 for (unsigned i = 0; i < NumEltsD; ++i) 11038 Builder.CreateAlignedStore( 11039 Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType), 11040 Builder.CreateGEP(Dst.getPointer(), llvm::ConstantInt::get(IntTy, i)), 11041 CharUnits::fromQuantity(4)); 11042 return Result; 11043 } 11044 default: 11045 return nullptr; 11046 } 11047 } 11048 11049 Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, 11050 const CallExpr *E) { 11051 switch (BuiltinID) { 11052 case WebAssembly::BI__builtin_wasm_mem_size: { 11053 llvm::Type *ResultType = ConvertType(E->getType()); 11054 Value *I = EmitScalarExpr(E->getArg(0)); 11055 Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_mem_size, ResultType); 11056 return Builder.CreateCall(Callee, I); 11057 } 11058 case WebAssembly::BI__builtin_wasm_mem_grow: { 11059 llvm::Type *ResultType = ConvertType(E->getType()); 11060 Value *Args[] = { 11061 EmitScalarExpr(E->getArg(0)), 11062 EmitScalarExpr(E->getArg(1)) 11063 }; 11064 Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_mem_grow, ResultType); 11065 return Builder.CreateCall(Callee, Args); 11066 } 11067 case WebAssembly::BI__builtin_wasm_current_memory: { 11068 llvm::Type *ResultType = ConvertType(E->getType()); 11069 Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType); 11070 return Builder.CreateCall(Callee); 11071 } 11072 case WebAssembly::BI__builtin_wasm_grow_memory: { 11073 Value *X = EmitScalarExpr(E->getArg(0)); 11074 Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_grow_memory, X->getType()); 11075 return Builder.CreateCall(Callee, X); 11076 } 11077 case WebAssembly::BI__builtin_wasm_throw: { 11078 Value *Tag = EmitScalarExpr(E->getArg(0)); 11079 Value *Obj = EmitScalarExpr(E->getArg(1)); 11080 Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw); 11081 return Builder.CreateCall(Callee, {Tag, Obj}); 11082 } 11083 case WebAssembly::BI__builtin_wasm_rethrow: { 11084 Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow); 11085 return Builder.CreateCall(Callee); 11086 } 11087 11088 default: 11089 return nullptr; 11090 } 11091 } 11092 11093 Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID, 11094 const CallExpr *E) { 11095 SmallVector<llvm::Value *, 4> Ops; 11096 Intrinsic::ID ID = Intrinsic::not_intrinsic; 11097 11098 auto MakeCircLd = [&](unsigned IntID, bool HasImm) { 11099 // The base pointer is passed by address, so it needs to be loaded. 11100 Address BP = EmitPointerWithAlignment(E->getArg(0)); 11101 BP = Address(Builder.CreateBitCast(BP.getPointer(), Int8PtrPtrTy), 11102 BP.getAlignment()); 11103 llvm::Value *Base = Builder.CreateLoad(BP); 11104 // Operands are Base, Increment, Modifier, Start. 11105 if (HasImm) 11106 Ops = { Base, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), 11107 EmitScalarExpr(E->getArg(3)) }; 11108 else 11109 Ops = { Base, EmitScalarExpr(E->getArg(1)), 11110 EmitScalarExpr(E->getArg(2)) }; 11111 11112 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops); 11113 llvm::Value *NewBase = Builder.CreateExtractValue(Result, 1); 11114 llvm::Value *LV = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), 11115 NewBase->getType()->getPointerTo()); 11116 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 11117 // The intrinsic generates two results. The new value for the base pointer 11118 // needs to be stored. 11119 Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment()); 11120 return Builder.CreateExtractValue(Result, 0); 11121 }; 11122 11123 auto MakeCircSt = [&](unsigned IntID, bool HasImm) { 11124 // The base pointer is passed by address, so it needs to be loaded. 11125 Address BP = EmitPointerWithAlignment(E->getArg(0)); 11126 BP = Address(Builder.CreateBitCast(BP.getPointer(), Int8PtrPtrTy), 11127 BP.getAlignment()); 11128 llvm::Value *Base = Builder.CreateLoad(BP); 11129 // Operands are Base, Increment, Modifier, Value, Start. 11130 if (HasImm) 11131 Ops = { Base, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), 11132 EmitScalarExpr(E->getArg(3)), EmitScalarExpr(E->getArg(4)) }; 11133 else 11134 Ops = { Base, EmitScalarExpr(E->getArg(1)), 11135 EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3)) }; 11136 11137 llvm::Value *NewBase = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops); 11138 llvm::Value *LV = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), 11139 NewBase->getType()->getPointerTo()); 11140 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 11141 // The intrinsic generates one result, which is the new value for the base 11142 // pointer. It needs to be stored. 11143 return Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment()); 11144 }; 11145 11146 // Handle the conversion of bit-reverse load intrinsics to bit code. 11147 // The intrinsic call after this function only reads from memory and the 11148 // write to memory is dealt by the store instruction. 11149 auto MakeBrevLd = [&](unsigned IntID, llvm::Type *DestTy) { 11150 // The intrinsic generates one result, which is the new value for the base 11151 // pointer. It needs to be returned. The result of the load instruction is 11152 // passed to intrinsic by address, so the value needs to be stored. 11153 llvm::Value *BaseAddress = 11154 Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int8PtrTy); 11155 11156 // Expressions like &(*pt++) will be incremented per evaluation. 11157 // EmitPointerWithAlignment and EmitScalarExpr evaluates the expression 11158 // per call. 11159 Address DestAddr = EmitPointerWithAlignment(E->getArg(1)); 11160 DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), Int8PtrTy), 11161 DestAddr.getAlignment()); 11162 llvm::Value *DestAddress = DestAddr.getPointer(); 11163 11164 // Operands are Base, Dest, Modifier. 11165 // The intrinsic format in LLVM IR is defined as 11166 // { ValueType, i8* } (i8*, i32). 11167 Ops = {BaseAddress, EmitScalarExpr(E->getArg(2))}; 11168 11169 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops); 11170 // The value needs to be stored as the variable is passed by reference. 11171 llvm::Value *DestVal = Builder.CreateExtractValue(Result, 0); 11172 11173 // The store needs to be truncated to fit the destination type. 11174 // While i32 and i64 are natively supported on Hexagon, i8 and i16 needs 11175 // to be handled with stores of respective destination type. 11176 DestVal = Builder.CreateTrunc(DestVal, DestTy); 11177 11178 llvm::Value *DestForStore = 11179 Builder.CreateBitCast(DestAddress, DestVal->getType()->getPointerTo()); 11180 Builder.CreateAlignedStore(DestVal, DestForStore, DestAddr.getAlignment()); 11181 // The updated value of the base pointer is returned. 11182 return Builder.CreateExtractValue(Result, 1); 11183 }; 11184 11185 switch (BuiltinID) { 11186 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry: 11187 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B: { 11188 Address Dest = EmitPointerWithAlignment(E->getArg(2)); 11189 unsigned Size; 11190 if (BuiltinID == Hexagon::BI__builtin_HEXAGON_V6_vaddcarry) { 11191 Size = 512; 11192 ID = Intrinsic::hexagon_V6_vaddcarry; 11193 } else { 11194 Size = 1024; 11195 ID = Intrinsic::hexagon_V6_vaddcarry_128B; 11196 } 11197 Dest = Builder.CreateBitCast(Dest, 11198 llvm::VectorType::get(Builder.getInt1Ty(), Size)->getPointerTo(0)); 11199 LoadInst *QLd = Builder.CreateLoad(Dest); 11200 Ops = { EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), QLd }; 11201 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID), Ops); 11202 llvm::Value *Vprd = Builder.CreateExtractValue(Result, 1); 11203 llvm::Value *Base = Builder.CreateBitCast(EmitScalarExpr(E->getArg(2)), 11204 Vprd->getType()->getPointerTo(0)); 11205 Builder.CreateAlignedStore(Vprd, Base, Dest.getAlignment()); 11206 return Builder.CreateExtractValue(Result, 0); 11207 } 11208 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry: 11209 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B: { 11210 Address Dest = EmitPointerWithAlignment(E->getArg(2)); 11211 unsigned Size; 11212 if (BuiltinID == Hexagon::BI__builtin_HEXAGON_V6_vsubcarry) { 11213 Size = 512; 11214 ID = Intrinsic::hexagon_V6_vsubcarry; 11215 } else { 11216 Size = 1024; 11217 ID = Intrinsic::hexagon_V6_vsubcarry_128B; 11218 } 11219 Dest = Builder.CreateBitCast(Dest, 11220 llvm::VectorType::get(Builder.getInt1Ty(), Size)->getPointerTo(0)); 11221 LoadInst *QLd = Builder.CreateLoad(Dest); 11222 Ops = { EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), QLd }; 11223 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID), Ops); 11224 llvm::Value *Vprd = Builder.CreateExtractValue(Result, 1); 11225 llvm::Value *Base = Builder.CreateBitCast(EmitScalarExpr(E->getArg(2)), 11226 Vprd->getType()->getPointerTo(0)); 11227 Builder.CreateAlignedStore(Vprd, Base, Dest.getAlignment()); 11228 return Builder.CreateExtractValue(Result, 0); 11229 } 11230 case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci: 11231 return MakeCircLd(Intrinsic::hexagon_L2_loadrub_pci, /*HasImm*/true); 11232 case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci: 11233 return MakeCircLd(Intrinsic::hexagon_L2_loadrb_pci, /*HasImm*/true); 11234 case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pci: 11235 return MakeCircLd(Intrinsic::hexagon_L2_loadruh_pci, /*HasImm*/true); 11236 case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pci: 11237 return MakeCircLd(Intrinsic::hexagon_L2_loadrh_pci, /*HasImm*/true); 11238 case Hexagon::BI__builtin_HEXAGON_L2_loadri_pci: 11239 return MakeCircLd(Intrinsic::hexagon_L2_loadri_pci, /*HasImm*/true); 11240 case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pci: 11241 return MakeCircLd(Intrinsic::hexagon_L2_loadrd_pci, /*HasImm*/true); 11242 case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pcr: 11243 return MakeCircLd(Intrinsic::hexagon_L2_loadrub_pcr, /*HasImm*/false); 11244 case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pcr: 11245 return MakeCircLd(Intrinsic::hexagon_L2_loadrb_pcr, /*HasImm*/false); 11246 case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pcr: 11247 return MakeCircLd(Intrinsic::hexagon_L2_loadruh_pcr, /*HasImm*/false); 11248 case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pcr: 11249 return MakeCircLd(Intrinsic::hexagon_L2_loadrh_pcr, /*HasImm*/false); 11250 case Hexagon::BI__builtin_HEXAGON_L2_loadri_pcr: 11251 return MakeCircLd(Intrinsic::hexagon_L2_loadri_pcr, /*HasImm*/false); 11252 case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pcr: 11253 return MakeCircLd(Intrinsic::hexagon_L2_loadrd_pcr, /*HasImm*/false); 11254 case Hexagon::BI__builtin_HEXAGON_S2_storerb_pci: 11255 return MakeCircSt(Intrinsic::hexagon_S2_storerb_pci, /*HasImm*/true); 11256 case Hexagon::BI__builtin_HEXAGON_S2_storerh_pci: 11257 return MakeCircSt(Intrinsic::hexagon_S2_storerh_pci, /*HasImm*/true); 11258 case Hexagon::BI__builtin_HEXAGON_S2_storerf_pci: 11259 return MakeCircSt(Intrinsic::hexagon_S2_storerf_pci, /*HasImm*/true); 11260 case Hexagon::BI__builtin_HEXAGON_S2_storeri_pci: 11261 return MakeCircSt(Intrinsic::hexagon_S2_storeri_pci, /*HasImm*/true); 11262 case Hexagon::BI__builtin_HEXAGON_S2_storerd_pci: 11263 return MakeCircSt(Intrinsic::hexagon_S2_storerd_pci, /*HasImm*/true); 11264 case Hexagon::BI__builtin_HEXAGON_S2_storerb_pcr: 11265 return MakeCircSt(Intrinsic::hexagon_S2_storerb_pcr, /*HasImm*/false); 11266 case Hexagon::BI__builtin_HEXAGON_S2_storerh_pcr: 11267 return MakeCircSt(Intrinsic::hexagon_S2_storerh_pcr, /*HasImm*/false); 11268 case Hexagon::BI__builtin_HEXAGON_S2_storerf_pcr: 11269 return MakeCircSt(Intrinsic::hexagon_S2_storerf_pcr, /*HasImm*/false); 11270 case Hexagon::BI__builtin_HEXAGON_S2_storeri_pcr: 11271 return MakeCircSt(Intrinsic::hexagon_S2_storeri_pcr, /*HasImm*/false); 11272 case Hexagon::BI__builtin_HEXAGON_S2_storerd_pcr: 11273 return MakeCircSt(Intrinsic::hexagon_S2_storerd_pcr, /*HasImm*/false); 11274 case Hexagon::BI__builtin_brev_ldub: 11275 return MakeBrevLd(Intrinsic::hexagon_L2_loadrub_pbr, Int8Ty); 11276 case Hexagon::BI__builtin_brev_ldb: 11277 return MakeBrevLd(Intrinsic::hexagon_L2_loadrb_pbr, Int8Ty); 11278 case Hexagon::BI__builtin_brev_lduh: 11279 return MakeBrevLd(Intrinsic::hexagon_L2_loadruh_pbr, Int16Ty); 11280 case Hexagon::BI__builtin_brev_ldh: 11281 return MakeBrevLd(Intrinsic::hexagon_L2_loadrh_pbr, Int16Ty); 11282 case Hexagon::BI__builtin_brev_ldw: 11283 return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr, Int32Ty); 11284 case Hexagon::BI__builtin_brev_ldd: 11285 return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr, Int64Ty); 11286 default: 11287 break; 11288 } // switch 11289 11290 return nullptr; 11291 } 11292