1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit Builtin calls as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CodeGenFunction.h" 15 #include "CGCXXABI.h" 16 #include "CGObjCRuntime.h" 17 #include "CodeGenModule.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/ASTContext.h" 20 #include "clang/AST/Decl.h" 21 #include "clang/Basic/TargetBuiltins.h" 22 #include "clang/Basic/TargetInfo.h" 23 #include "clang/CodeGen/CGFunctionInfo.h" 24 #include "llvm/ADT/StringExtras.h" 25 #include "llvm/IR/CallSite.h" 26 #include "llvm/IR/DataLayout.h" 27 #include "llvm/IR/InlineAsm.h" 28 #include "llvm/IR/Intrinsics.h" 29 #include "llvm/IR/MDBuilder.h" 30 #include <sstream> 31 32 using namespace clang; 33 using namespace CodeGen; 34 using namespace llvm; 35 36 /// getBuiltinLibFunction - Given a builtin id for a function like 37 /// "__builtin_fabsf", return a Function* for "fabsf". 38 llvm::Value *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD, 39 unsigned BuiltinID) { 40 assert(Context.BuiltinInfo.isLibFunction(BuiltinID)); 41 42 // Get the name, skip over the __builtin_ prefix (if necessary). 43 StringRef Name; 44 GlobalDecl D(FD); 45 46 // If the builtin has been declared explicitly with an assembler label, 47 // use the mangled name. This differs from the plain label on platforms 48 // that prefix labels. 49 if (FD->hasAttr<AsmLabelAttr>()) 50 Name = getMangledName(D); 51 else 52 Name = Context.BuiltinInfo.getName(BuiltinID) + 10; 53 54 llvm::FunctionType *Ty = 55 cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType())); 56 57 return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false); 58 } 59 60 /// Emit the conversions required to turn the given value into an 61 /// integer of the given size. 62 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V, 63 QualType T, llvm::IntegerType *IntType) { 64 V = CGF.EmitToMemory(V, T); 65 66 if (V->getType()->isPointerTy()) 67 return CGF.Builder.CreatePtrToInt(V, IntType); 68 69 assert(V->getType() == IntType); 70 return V; 71 } 72 73 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, 74 QualType T, llvm::Type *ResultType) { 75 V = CGF.EmitFromMemory(V, T); 76 77 if (ResultType->isPointerTy()) 78 return CGF.Builder.CreateIntToPtr(V, ResultType); 79 80 assert(V->getType() == ResultType); 81 return V; 82 } 83 84 /// Utility to insert an atomic instruction based on Instrinsic::ID 85 /// and the expression node. 86 static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF, 87 llvm::AtomicRMWInst::BinOp Kind, 88 const CallExpr *E) { 89 QualType T = E->getType(); 90 assert(E->getArg(0)->getType()->isPointerType()); 91 assert(CGF.getContext().hasSameUnqualifiedType(T, 92 E->getArg(0)->getType()->getPointeeType())); 93 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); 94 95 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 96 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 97 98 llvm::IntegerType *IntType = 99 llvm::IntegerType::get(CGF.getLLVMContext(), 100 CGF.getContext().getTypeSize(T)); 101 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 102 103 llvm::Value *Args[2]; 104 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 105 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 106 llvm::Type *ValueType = Args[1]->getType(); 107 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 108 109 llvm::Value *Result = CGF.Builder.CreateAtomicRMW( 110 Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent); 111 return EmitFromInt(CGF, Result, T, ValueType); 112 } 113 114 static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) { 115 Value *Val = CGF.EmitScalarExpr(E->getArg(0)); 116 Value *Address = CGF.EmitScalarExpr(E->getArg(1)); 117 118 // Convert the type of the pointer to a pointer to the stored type. 119 Val = CGF.EmitToMemory(Val, E->getArg(0)->getType()); 120 Value *BC = CGF.Builder.CreateBitCast( 121 Address, llvm::PointerType::getUnqual(Val->getType()), "cast"); 122 LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType()); 123 LV.setNontemporal(true); 124 CGF.EmitStoreOfScalar(Val, LV, false); 125 return nullptr; 126 } 127 128 static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) { 129 Value *Address = CGF.EmitScalarExpr(E->getArg(0)); 130 131 LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType()); 132 LV.setNontemporal(true); 133 return CGF.EmitLoadOfScalar(LV, E->getExprLoc()); 134 } 135 136 static RValue EmitBinaryAtomic(CodeGenFunction &CGF, 137 llvm::AtomicRMWInst::BinOp Kind, 138 const CallExpr *E) { 139 return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E)); 140 } 141 142 /// Utility to insert an atomic instruction based Instrinsic::ID and 143 /// the expression node, where the return value is the result of the 144 /// operation. 145 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, 146 llvm::AtomicRMWInst::BinOp Kind, 147 const CallExpr *E, 148 Instruction::BinaryOps Op, 149 bool Invert = false) { 150 QualType T = E->getType(); 151 assert(E->getArg(0)->getType()->isPointerType()); 152 assert(CGF.getContext().hasSameUnqualifiedType(T, 153 E->getArg(0)->getType()->getPointeeType())); 154 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); 155 156 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 157 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 158 159 llvm::IntegerType *IntType = 160 llvm::IntegerType::get(CGF.getLLVMContext(), 161 CGF.getContext().getTypeSize(T)); 162 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 163 164 llvm::Value *Args[2]; 165 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 166 llvm::Type *ValueType = Args[1]->getType(); 167 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 168 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 169 170 llvm::Value *Result = CGF.Builder.CreateAtomicRMW( 171 Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent); 172 Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]); 173 if (Invert) 174 Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result, 175 llvm::ConstantInt::get(IntType, -1)); 176 Result = EmitFromInt(CGF, Result, T, ValueType); 177 return RValue::get(Result); 178 } 179 180 /// @brief Utility to insert an atomic cmpxchg instruction. 181 /// 182 /// @param CGF The current codegen function. 183 /// @param E Builtin call expression to convert to cmpxchg. 184 /// arg0 - address to operate on 185 /// arg1 - value to compare with 186 /// arg2 - new value 187 /// @param ReturnBool Specifies whether to return success flag of 188 /// cmpxchg result or the old value. 189 /// 190 /// @returns result of cmpxchg, according to ReturnBool 191 static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, 192 bool ReturnBool) { 193 QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType(); 194 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 195 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 196 197 llvm::IntegerType *IntType = llvm::IntegerType::get( 198 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T)); 199 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 200 201 Value *Args[3]; 202 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 203 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 204 llvm::Type *ValueType = Args[1]->getType(); 205 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 206 Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType); 207 208 Value *Pair = CGF.Builder.CreateAtomicCmpXchg( 209 Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent, 210 llvm::AtomicOrdering::SequentiallyConsistent); 211 if (ReturnBool) 212 // Extract boolean success flag and zext it to int. 213 return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1), 214 CGF.ConvertType(E->getType())); 215 else 216 // Extract old value and emit it using the same type as compare value. 217 return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T, 218 ValueType); 219 } 220 221 // Emit a simple mangled intrinsic that has 1 argument and a return type 222 // matching the argument type. 223 static Value *emitUnaryBuiltin(CodeGenFunction &CGF, 224 const CallExpr *E, 225 unsigned IntrinsicID) { 226 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 227 228 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 229 return CGF.Builder.CreateCall(F, Src0); 230 } 231 232 // Emit an intrinsic that has 2 operands of the same type as its result. 233 static Value *emitBinaryBuiltin(CodeGenFunction &CGF, 234 const CallExpr *E, 235 unsigned IntrinsicID) { 236 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 237 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 238 239 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 240 return CGF.Builder.CreateCall(F, { Src0, Src1 }); 241 } 242 243 // Emit an intrinsic that has 3 operands of the same type as its result. 244 static Value *emitTernaryBuiltin(CodeGenFunction &CGF, 245 const CallExpr *E, 246 unsigned IntrinsicID) { 247 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 248 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 249 llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); 250 251 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 252 return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 }); 253 } 254 255 // Emit an intrinsic that has 1 float or double operand, and 1 integer. 256 static Value *emitFPIntBuiltin(CodeGenFunction &CGF, 257 const CallExpr *E, 258 unsigned IntrinsicID) { 259 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 260 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 261 262 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 263 return CGF.Builder.CreateCall(F, {Src0, Src1}); 264 } 265 266 /// EmitFAbs - Emit a call to @llvm.fabs(). 267 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) { 268 Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType()); 269 llvm::CallInst *Call = CGF.Builder.CreateCall(F, V); 270 Call->setDoesNotAccessMemory(); 271 return Call; 272 } 273 274 /// Emit the computation of the sign bit for a floating point value. Returns 275 /// the i1 sign bit value. 276 static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) { 277 LLVMContext &C = CGF.CGM.getLLVMContext(); 278 279 llvm::Type *Ty = V->getType(); 280 int Width = Ty->getPrimitiveSizeInBits(); 281 llvm::Type *IntTy = llvm::IntegerType::get(C, Width); 282 V = CGF.Builder.CreateBitCast(V, IntTy); 283 if (Ty->isPPC_FP128Ty()) { 284 // We want the sign bit of the higher-order double. The bitcast we just 285 // did works as if the double-double was stored to memory and then 286 // read as an i128. The "store" will put the higher-order double in the 287 // lower address in both little- and big-Endian modes, but the "load" 288 // will treat those bits as a different part of the i128: the low bits in 289 // little-Endian, the high bits in big-Endian. Therefore, on big-Endian 290 // we need to shift the high bits down to the low before truncating. 291 Width >>= 1; 292 if (CGF.getTarget().isBigEndian()) { 293 Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width); 294 V = CGF.Builder.CreateLShr(V, ShiftCst); 295 } 296 // We are truncating value in order to extract the higher-order 297 // double, which we will be using to extract the sign from. 298 IntTy = llvm::IntegerType::get(C, Width); 299 V = CGF.Builder.CreateTrunc(V, IntTy); 300 } 301 Value *Zero = llvm::Constant::getNullValue(IntTy); 302 return CGF.Builder.CreateICmpSLT(V, Zero); 303 } 304 305 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *Fn, 306 const CallExpr *E, llvm::Value *calleeValue) { 307 return CGF.EmitCall(E->getCallee()->getType(), calleeValue, E, 308 ReturnValueSlot(), Fn); 309 } 310 311 /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.* 312 /// depending on IntrinsicID. 313 /// 314 /// \arg CGF The current codegen function. 315 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate. 316 /// \arg X The first argument to the llvm.*.with.overflow.*. 317 /// \arg Y The second argument to the llvm.*.with.overflow.*. 318 /// \arg Carry The carry returned by the llvm.*.with.overflow.*. 319 /// \returns The result (i.e. sum/product) returned by the intrinsic. 320 static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF, 321 const llvm::Intrinsic::ID IntrinsicID, 322 llvm::Value *X, llvm::Value *Y, 323 llvm::Value *&Carry) { 324 // Make sure we have integers of the same width. 325 assert(X->getType() == Y->getType() && 326 "Arguments must be the same type. (Did you forget to make sure both " 327 "arguments have the same integer width?)"); 328 329 llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType()); 330 llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y}); 331 Carry = CGF.Builder.CreateExtractValue(Tmp, 1); 332 return CGF.Builder.CreateExtractValue(Tmp, 0); 333 } 334 335 static Value *emitRangedBuiltin(CodeGenFunction &CGF, 336 unsigned IntrinsicID, 337 int low, int high) { 338 llvm::MDBuilder MDHelper(CGF.getLLVMContext()); 339 llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high)); 340 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {}); 341 llvm::Instruction *Call = CGF.Builder.CreateCall(F); 342 Call->setMetadata(llvm::LLVMContext::MD_range, RNode); 343 return Call; 344 } 345 346 namespace { 347 struct WidthAndSignedness { 348 unsigned Width; 349 bool Signed; 350 }; 351 } 352 353 static WidthAndSignedness 354 getIntegerWidthAndSignedness(const clang::ASTContext &context, 355 const clang::QualType Type) { 356 assert(Type->isIntegerType() && "Given type is not an integer."); 357 unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width; 358 bool Signed = Type->isSignedIntegerType(); 359 return {Width, Signed}; 360 } 361 362 // Given one or more integer types, this function produces an integer type that 363 // encompasses them: any value in one of the given types could be expressed in 364 // the encompassing type. 365 static struct WidthAndSignedness 366 EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) { 367 assert(Types.size() > 0 && "Empty list of types."); 368 369 // If any of the given types is signed, we must return a signed type. 370 bool Signed = false; 371 for (const auto &Type : Types) { 372 Signed |= Type.Signed; 373 } 374 375 // The encompassing type must have a width greater than or equal to the width 376 // of the specified types. Aditionally, if the encompassing type is signed, 377 // its width must be strictly greater than the width of any unsigned types 378 // given. 379 unsigned Width = 0; 380 for (const auto &Type : Types) { 381 unsigned MinWidth = Type.Width + (Signed && !Type.Signed); 382 if (Width < MinWidth) { 383 Width = MinWidth; 384 } 385 } 386 387 return {Width, Signed}; 388 } 389 390 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) { 391 llvm::Type *DestType = Int8PtrTy; 392 if (ArgValue->getType() != DestType) 393 ArgValue = 394 Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data()); 395 396 Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend; 397 return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue); 398 } 399 400 /// Checks if using the result of __builtin_object_size(p, @p From) in place of 401 /// __builtin_object_size(p, @p To) is correct 402 static bool areBOSTypesCompatible(int From, int To) { 403 // Note: Our __builtin_object_size implementation currently treats Type=0 and 404 // Type=2 identically. Encoding this implementation detail here may make 405 // improving __builtin_object_size difficult in the future, so it's omitted. 406 return From == To || (From == 0 && To == 1) || (From == 3 && To == 2); 407 } 408 409 static llvm::Value * 410 getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) { 411 return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true); 412 } 413 414 llvm::Value * 415 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type, 416 llvm::IntegerType *ResType) { 417 uint64_t ObjectSize; 418 if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type)) 419 return emitBuiltinObjectSize(E, Type, ResType); 420 return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true); 421 } 422 423 /// Returns a Value corresponding to the size of the given expression. 424 /// This Value may be either of the following: 425 /// - A llvm::Argument (if E is a param with the pass_object_size attribute on 426 /// it) 427 /// - A call to the @llvm.objectsize intrinsic 428 llvm::Value * 429 CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type, 430 llvm::IntegerType *ResType) { 431 // We need to reference an argument if the pointer is a parameter with the 432 // pass_object_size attribute. 433 if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) { 434 auto *Param = dyn_cast<ParmVarDecl>(D->getDecl()); 435 auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>(); 436 if (Param != nullptr && PS != nullptr && 437 areBOSTypesCompatible(PS->getType(), Type)) { 438 auto Iter = SizeArguments.find(Param); 439 assert(Iter != SizeArguments.end()); 440 441 const ImplicitParamDecl *D = Iter->second; 442 auto DIter = LocalDeclMap.find(D); 443 assert(DIter != LocalDeclMap.end()); 444 445 return EmitLoadOfScalar(DIter->second, /*volatile=*/false, 446 getContext().getSizeType(), E->getLocStart()); 447 } 448 } 449 450 // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't 451 // evaluate E for side-effects. In either case, we shouldn't lower to 452 // @llvm.objectsize. 453 if (Type == 3 || E->HasSideEffects(getContext())) 454 return getDefaultBuiltinObjectSizeResult(Type, ResType); 455 456 // LLVM only supports 0 and 2, make sure that we pass along that 457 // as a boolean. 458 auto *CI = ConstantInt::get(Builder.getInt1Ty(), (Type & 2) >> 1); 459 // FIXME: Get right address space. 460 llvm::Type *Tys[] = {ResType, Builder.getInt8PtrTy(0)}; 461 Value *F = CGM.getIntrinsic(Intrinsic::objectsize, Tys); 462 return Builder.CreateCall(F, {EmitScalarExpr(E), CI}); 463 } 464 465 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, 466 unsigned BuiltinID, const CallExpr *E, 467 ReturnValueSlot ReturnValue) { 468 // See if we can constant fold this builtin. If so, don't emit it at all. 469 Expr::EvalResult Result; 470 if (E->EvaluateAsRValue(Result, CGM.getContext()) && 471 !Result.hasSideEffects()) { 472 if (Result.Val.isInt()) 473 return RValue::get(llvm::ConstantInt::get(getLLVMContext(), 474 Result.Val.getInt())); 475 if (Result.Val.isFloat()) 476 return RValue::get(llvm::ConstantFP::get(getLLVMContext(), 477 Result.Val.getFloat())); 478 } 479 480 switch (BuiltinID) { 481 default: break; // Handle intrinsics and libm functions below. 482 case Builtin::BI__builtin___CFStringMakeConstantString: 483 case Builtin::BI__builtin___NSStringMakeConstantString: 484 return RValue::get(CGM.EmitConstantExpr(E, E->getType(), nullptr)); 485 case Builtin::BI__builtin_stdarg_start: 486 case Builtin::BI__builtin_va_start: 487 case Builtin::BI__va_start: 488 case Builtin::BI__builtin_va_end: 489 return RValue::get( 490 EmitVAStartEnd(BuiltinID == Builtin::BI__va_start 491 ? EmitScalarExpr(E->getArg(0)) 492 : EmitVAListRef(E->getArg(0)).getPointer(), 493 BuiltinID != Builtin::BI__builtin_va_end)); 494 case Builtin::BI__builtin_va_copy: { 495 Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer(); 496 Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer(); 497 498 llvm::Type *Type = Int8PtrTy; 499 500 DstPtr = Builder.CreateBitCast(DstPtr, Type); 501 SrcPtr = Builder.CreateBitCast(SrcPtr, Type); 502 return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy), 503 {DstPtr, SrcPtr})); 504 } 505 case Builtin::BI__builtin_abs: 506 case Builtin::BI__builtin_labs: 507 case Builtin::BI__builtin_llabs: { 508 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 509 510 Value *NegOp = Builder.CreateNeg(ArgValue, "neg"); 511 Value *CmpResult = 512 Builder.CreateICmpSGE(ArgValue, 513 llvm::Constant::getNullValue(ArgValue->getType()), 514 "abscond"); 515 Value *Result = 516 Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs"); 517 518 return RValue::get(Result); 519 } 520 case Builtin::BI__builtin_fabs: 521 case Builtin::BI__builtin_fabsf: 522 case Builtin::BI__builtin_fabsl: { 523 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs)); 524 } 525 case Builtin::BI__builtin_fmod: 526 case Builtin::BI__builtin_fmodf: 527 case Builtin::BI__builtin_fmodl: { 528 Value *Arg1 = EmitScalarExpr(E->getArg(0)); 529 Value *Arg2 = EmitScalarExpr(E->getArg(1)); 530 Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod"); 531 return RValue::get(Result); 532 } 533 case Builtin::BI__builtin_copysign: 534 case Builtin::BI__builtin_copysignf: 535 case Builtin::BI__builtin_copysignl: { 536 return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign)); 537 } 538 case Builtin::BI__builtin_ceil: 539 case Builtin::BI__builtin_ceilf: 540 case Builtin::BI__builtin_ceill: { 541 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil)); 542 } 543 case Builtin::BI__builtin_floor: 544 case Builtin::BI__builtin_floorf: 545 case Builtin::BI__builtin_floorl: { 546 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor)); 547 } 548 case Builtin::BI__builtin_trunc: 549 case Builtin::BI__builtin_truncf: 550 case Builtin::BI__builtin_truncl: { 551 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc)); 552 } 553 case Builtin::BI__builtin_rint: 554 case Builtin::BI__builtin_rintf: 555 case Builtin::BI__builtin_rintl: { 556 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint)); 557 } 558 case Builtin::BI__builtin_nearbyint: 559 case Builtin::BI__builtin_nearbyintf: 560 case Builtin::BI__builtin_nearbyintl: { 561 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::nearbyint)); 562 } 563 case Builtin::BI__builtin_round: 564 case Builtin::BI__builtin_roundf: 565 case Builtin::BI__builtin_roundl: { 566 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round)); 567 } 568 case Builtin::BI__builtin_fmin: 569 case Builtin::BI__builtin_fminf: 570 case Builtin::BI__builtin_fminl: { 571 return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum)); 572 } 573 case Builtin::BI__builtin_fmax: 574 case Builtin::BI__builtin_fmaxf: 575 case Builtin::BI__builtin_fmaxl: { 576 return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum)); 577 } 578 case Builtin::BI__builtin_conj: 579 case Builtin::BI__builtin_conjf: 580 case Builtin::BI__builtin_conjl: { 581 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 582 Value *Real = ComplexVal.first; 583 Value *Imag = ComplexVal.second; 584 Value *Zero = 585 Imag->getType()->isFPOrFPVectorTy() 586 ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType()) 587 : llvm::Constant::getNullValue(Imag->getType()); 588 589 Imag = Builder.CreateFSub(Zero, Imag, "sub"); 590 return RValue::getComplex(std::make_pair(Real, Imag)); 591 } 592 case Builtin::BI__builtin_creal: 593 case Builtin::BI__builtin_crealf: 594 case Builtin::BI__builtin_creall: 595 case Builtin::BIcreal: 596 case Builtin::BIcrealf: 597 case Builtin::BIcreall: { 598 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 599 return RValue::get(ComplexVal.first); 600 } 601 602 case Builtin::BI__builtin_cimag: 603 case Builtin::BI__builtin_cimagf: 604 case Builtin::BI__builtin_cimagl: 605 case Builtin::BIcimag: 606 case Builtin::BIcimagf: 607 case Builtin::BIcimagl: { 608 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 609 return RValue::get(ComplexVal.second); 610 } 611 612 case Builtin::BI__builtin_ctzs: 613 case Builtin::BI__builtin_ctz: 614 case Builtin::BI__builtin_ctzl: 615 case Builtin::BI__builtin_ctzll: { 616 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 617 618 llvm::Type *ArgType = ArgValue->getType(); 619 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 620 621 llvm::Type *ResultType = ConvertType(E->getType()); 622 Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); 623 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef}); 624 if (Result->getType() != ResultType) 625 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 626 "cast"); 627 return RValue::get(Result); 628 } 629 case Builtin::BI__builtin_clzs: 630 case Builtin::BI__builtin_clz: 631 case Builtin::BI__builtin_clzl: 632 case Builtin::BI__builtin_clzll: { 633 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 634 635 llvm::Type *ArgType = ArgValue->getType(); 636 Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); 637 638 llvm::Type *ResultType = ConvertType(E->getType()); 639 Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); 640 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef}); 641 if (Result->getType() != ResultType) 642 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 643 "cast"); 644 return RValue::get(Result); 645 } 646 case Builtin::BI__builtin_ffs: 647 case Builtin::BI__builtin_ffsl: 648 case Builtin::BI__builtin_ffsll: { 649 // ffs(x) -> x ? cttz(x) + 1 : 0 650 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 651 652 llvm::Type *ArgType = ArgValue->getType(); 653 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 654 655 llvm::Type *ResultType = ConvertType(E->getType()); 656 Value *Tmp = 657 Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}), 658 llvm::ConstantInt::get(ArgType, 1)); 659 Value *Zero = llvm::Constant::getNullValue(ArgType); 660 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero"); 661 Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs"); 662 if (Result->getType() != ResultType) 663 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 664 "cast"); 665 return RValue::get(Result); 666 } 667 case Builtin::BI__builtin_parity: 668 case Builtin::BI__builtin_parityl: 669 case Builtin::BI__builtin_parityll: { 670 // parity(x) -> ctpop(x) & 1 671 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 672 673 llvm::Type *ArgType = ArgValue->getType(); 674 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); 675 676 llvm::Type *ResultType = ConvertType(E->getType()); 677 Value *Tmp = Builder.CreateCall(F, ArgValue); 678 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1)); 679 if (Result->getType() != ResultType) 680 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 681 "cast"); 682 return RValue::get(Result); 683 } 684 case Builtin::BI__builtin_popcount: 685 case Builtin::BI__builtin_popcountl: 686 case Builtin::BI__builtin_popcountll: { 687 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 688 689 llvm::Type *ArgType = ArgValue->getType(); 690 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); 691 692 llvm::Type *ResultType = ConvertType(E->getType()); 693 Value *Result = Builder.CreateCall(F, ArgValue); 694 if (Result->getType() != ResultType) 695 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 696 "cast"); 697 return RValue::get(Result); 698 } 699 case Builtin::BI__builtin_unpredictable: { 700 // Always return the argument of __builtin_unpredictable. LLVM does not 701 // handle this builtin. Metadata for this builtin should be added directly 702 // to instructions such as branches or switches that use it. 703 return RValue::get(EmitScalarExpr(E->getArg(0))); 704 } 705 case Builtin::BI__builtin_expect: { 706 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 707 llvm::Type *ArgType = ArgValue->getType(); 708 709 Value *ExpectedValue = EmitScalarExpr(E->getArg(1)); 710 // Don't generate llvm.expect on -O0 as the backend won't use it for 711 // anything. 712 // Note, we still IRGen ExpectedValue because it could have side-effects. 713 if (CGM.getCodeGenOpts().OptimizationLevel == 0) 714 return RValue::get(ArgValue); 715 716 Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType); 717 Value *Result = 718 Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval"); 719 return RValue::get(Result); 720 } 721 case Builtin::BI__builtin_assume_aligned: { 722 Value *PtrValue = EmitScalarExpr(E->getArg(0)); 723 Value *OffsetValue = 724 (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr; 725 726 Value *AlignmentValue = EmitScalarExpr(E->getArg(1)); 727 ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue); 728 unsigned Alignment = (unsigned) AlignmentCI->getZExtValue(); 729 730 EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue); 731 return RValue::get(PtrValue); 732 } 733 case Builtin::BI__assume: 734 case Builtin::BI__builtin_assume: { 735 if (E->getArg(0)->HasSideEffects(getContext())) 736 return RValue::get(nullptr); 737 738 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 739 Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume); 740 return RValue::get(Builder.CreateCall(FnAssume, ArgValue)); 741 } 742 case Builtin::BI__builtin_bswap16: 743 case Builtin::BI__builtin_bswap32: 744 case Builtin::BI__builtin_bswap64: { 745 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap)); 746 } 747 case Builtin::BI__builtin_bitreverse8: 748 case Builtin::BI__builtin_bitreverse16: 749 case Builtin::BI__builtin_bitreverse32: 750 case Builtin::BI__builtin_bitreverse64: { 751 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse)); 752 } 753 case Builtin::BI__builtin_object_size: { 754 unsigned Type = 755 E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue(); 756 auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType())); 757 758 // We pass this builtin onto the optimizer so that it can figure out the 759 // object size in more complex cases. 760 return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType)); 761 } 762 case Builtin::BI__builtin_prefetch: { 763 Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0)); 764 // FIXME: Technically these constants should of type 'int', yes? 765 RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) : 766 llvm::ConstantInt::get(Int32Ty, 0); 767 Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : 768 llvm::ConstantInt::get(Int32Ty, 3); 769 Value *Data = llvm::ConstantInt::get(Int32Ty, 1); 770 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 771 return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data})); 772 } 773 case Builtin::BI__builtin_readcyclecounter: { 774 Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter); 775 return RValue::get(Builder.CreateCall(F)); 776 } 777 case Builtin::BI__builtin___clear_cache: { 778 Value *Begin = EmitScalarExpr(E->getArg(0)); 779 Value *End = EmitScalarExpr(E->getArg(1)); 780 Value *F = CGM.getIntrinsic(Intrinsic::clear_cache); 781 return RValue::get(Builder.CreateCall(F, {Begin, End})); 782 } 783 case Builtin::BI__builtin_trap: 784 return RValue::get(EmitTrapCall(Intrinsic::trap)); 785 case Builtin::BI__debugbreak: 786 return RValue::get(EmitTrapCall(Intrinsic::debugtrap)); 787 case Builtin::BI__builtin_unreachable: { 788 if (SanOpts.has(SanitizerKind::Unreachable)) { 789 SanitizerScope SanScope(this); 790 EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()), 791 SanitizerKind::Unreachable), 792 "builtin_unreachable", EmitCheckSourceLocation(E->getExprLoc()), 793 None); 794 } else 795 Builder.CreateUnreachable(); 796 797 // We do need to preserve an insertion point. 798 EmitBlock(createBasicBlock("unreachable.cont")); 799 800 return RValue::get(nullptr); 801 } 802 803 case Builtin::BI__builtin_powi: 804 case Builtin::BI__builtin_powif: 805 case Builtin::BI__builtin_powil: { 806 Value *Base = EmitScalarExpr(E->getArg(0)); 807 Value *Exponent = EmitScalarExpr(E->getArg(1)); 808 llvm::Type *ArgType = Base->getType(); 809 Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType); 810 return RValue::get(Builder.CreateCall(F, {Base, Exponent})); 811 } 812 813 case Builtin::BI__builtin_isgreater: 814 case Builtin::BI__builtin_isgreaterequal: 815 case Builtin::BI__builtin_isless: 816 case Builtin::BI__builtin_islessequal: 817 case Builtin::BI__builtin_islessgreater: 818 case Builtin::BI__builtin_isunordered: { 819 // Ordered comparisons: we know the arguments to these are matching scalar 820 // floating point values. 821 Value *LHS = EmitScalarExpr(E->getArg(0)); 822 Value *RHS = EmitScalarExpr(E->getArg(1)); 823 824 switch (BuiltinID) { 825 default: llvm_unreachable("Unknown ordered comparison"); 826 case Builtin::BI__builtin_isgreater: 827 LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp"); 828 break; 829 case Builtin::BI__builtin_isgreaterequal: 830 LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp"); 831 break; 832 case Builtin::BI__builtin_isless: 833 LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp"); 834 break; 835 case Builtin::BI__builtin_islessequal: 836 LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp"); 837 break; 838 case Builtin::BI__builtin_islessgreater: 839 LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp"); 840 break; 841 case Builtin::BI__builtin_isunordered: 842 LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp"); 843 break; 844 } 845 // ZExt bool to int type. 846 return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType()))); 847 } 848 case Builtin::BI__builtin_isnan: { 849 Value *V = EmitScalarExpr(E->getArg(0)); 850 V = Builder.CreateFCmpUNO(V, V, "cmp"); 851 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 852 } 853 854 case Builtin::BI__builtin_isinf: 855 case Builtin::BI__builtin_isfinite: { 856 // isinf(x) --> fabs(x) == infinity 857 // isfinite(x) --> fabs(x) != infinity 858 // x != NaN via the ordered compare in either case. 859 Value *V = EmitScalarExpr(E->getArg(0)); 860 Value *Fabs = EmitFAbs(*this, V); 861 Constant *Infinity = ConstantFP::getInfinity(V->getType()); 862 CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf) 863 ? CmpInst::FCMP_OEQ 864 : CmpInst::FCMP_ONE; 865 Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf"); 866 return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType()))); 867 } 868 869 case Builtin::BI__builtin_isinf_sign: { 870 // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0 871 Value *Arg = EmitScalarExpr(E->getArg(0)); 872 Value *AbsArg = EmitFAbs(*this, Arg); 873 Value *IsInf = Builder.CreateFCmpOEQ( 874 AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf"); 875 Value *IsNeg = EmitSignBit(*this, Arg); 876 877 llvm::Type *IntTy = ConvertType(E->getType()); 878 Value *Zero = Constant::getNullValue(IntTy); 879 Value *One = ConstantInt::get(IntTy, 1); 880 Value *NegativeOne = ConstantInt::get(IntTy, -1); 881 Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One); 882 Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero); 883 return RValue::get(Result); 884 } 885 886 case Builtin::BI__builtin_isnormal: { 887 // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min 888 Value *V = EmitScalarExpr(E->getArg(0)); 889 Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq"); 890 891 Value *Abs = EmitFAbs(*this, V); 892 Value *IsLessThanInf = 893 Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf"); 894 APFloat Smallest = APFloat::getSmallestNormalized( 895 getContext().getFloatTypeSemantics(E->getArg(0)->getType())); 896 Value *IsNormal = 897 Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest), 898 "isnormal"); 899 V = Builder.CreateAnd(Eq, IsLessThanInf, "and"); 900 V = Builder.CreateAnd(V, IsNormal, "and"); 901 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 902 } 903 904 case Builtin::BI__builtin_fpclassify: { 905 Value *V = EmitScalarExpr(E->getArg(5)); 906 llvm::Type *Ty = ConvertType(E->getArg(5)->getType()); 907 908 // Create Result 909 BasicBlock *Begin = Builder.GetInsertBlock(); 910 BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn); 911 Builder.SetInsertPoint(End); 912 PHINode *Result = 913 Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4, 914 "fpclassify_result"); 915 916 // if (V==0) return FP_ZERO 917 Builder.SetInsertPoint(Begin); 918 Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty), 919 "iszero"); 920 Value *ZeroLiteral = EmitScalarExpr(E->getArg(4)); 921 BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn); 922 Builder.CreateCondBr(IsZero, End, NotZero); 923 Result->addIncoming(ZeroLiteral, Begin); 924 925 // if (V != V) return FP_NAN 926 Builder.SetInsertPoint(NotZero); 927 Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp"); 928 Value *NanLiteral = EmitScalarExpr(E->getArg(0)); 929 BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn); 930 Builder.CreateCondBr(IsNan, End, NotNan); 931 Result->addIncoming(NanLiteral, NotZero); 932 933 // if (fabs(V) == infinity) return FP_INFINITY 934 Builder.SetInsertPoint(NotNan); 935 Value *VAbs = EmitFAbs(*this, V); 936 Value *IsInf = 937 Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()), 938 "isinf"); 939 Value *InfLiteral = EmitScalarExpr(E->getArg(1)); 940 BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn); 941 Builder.CreateCondBr(IsInf, End, NotInf); 942 Result->addIncoming(InfLiteral, NotNan); 943 944 // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL 945 Builder.SetInsertPoint(NotInf); 946 APFloat Smallest = APFloat::getSmallestNormalized( 947 getContext().getFloatTypeSemantics(E->getArg(5)->getType())); 948 Value *IsNormal = 949 Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest), 950 "isnormal"); 951 Value *NormalResult = 952 Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)), 953 EmitScalarExpr(E->getArg(3))); 954 Builder.CreateBr(End); 955 Result->addIncoming(NormalResult, NotInf); 956 957 // return Result 958 Builder.SetInsertPoint(End); 959 return RValue::get(Result); 960 } 961 962 case Builtin::BIalloca: 963 case Builtin::BI_alloca: 964 case Builtin::BI__builtin_alloca: { 965 Value *Size = EmitScalarExpr(E->getArg(0)); 966 return RValue::get(Builder.CreateAlloca(Builder.getInt8Ty(), Size)); 967 } 968 case Builtin::BIbzero: 969 case Builtin::BI__builtin_bzero: { 970 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 971 Value *SizeVal = EmitScalarExpr(E->getArg(1)); 972 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 973 E->getArg(0)->getExprLoc(), FD, 0); 974 Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false); 975 return RValue::get(Dest.getPointer()); 976 } 977 case Builtin::BImemcpy: 978 case Builtin::BI__builtin_memcpy: { 979 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 980 Address Src = EmitPointerWithAlignment(E->getArg(1)); 981 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 982 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 983 E->getArg(0)->getExprLoc(), FD, 0); 984 EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(), 985 E->getArg(1)->getExprLoc(), FD, 1); 986 Builder.CreateMemCpy(Dest, Src, SizeVal, false); 987 return RValue::get(Dest.getPointer()); 988 } 989 990 case Builtin::BI__builtin___memcpy_chk: { 991 // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2. 992 llvm::APSInt Size, DstSize; 993 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 994 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 995 break; 996 if (Size.ugt(DstSize)) 997 break; 998 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 999 Address Src = EmitPointerWithAlignment(E->getArg(1)); 1000 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 1001 Builder.CreateMemCpy(Dest, Src, SizeVal, false); 1002 return RValue::get(Dest.getPointer()); 1003 } 1004 1005 case Builtin::BI__builtin_objc_memmove_collectable: { 1006 Address DestAddr = EmitPointerWithAlignment(E->getArg(0)); 1007 Address SrcAddr = EmitPointerWithAlignment(E->getArg(1)); 1008 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 1009 CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this, 1010 DestAddr, SrcAddr, SizeVal); 1011 return RValue::get(DestAddr.getPointer()); 1012 } 1013 1014 case Builtin::BI__builtin___memmove_chk: { 1015 // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2. 1016 llvm::APSInt Size, DstSize; 1017 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 1018 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 1019 break; 1020 if (Size.ugt(DstSize)) 1021 break; 1022 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1023 Address Src = EmitPointerWithAlignment(E->getArg(1)); 1024 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 1025 Builder.CreateMemMove(Dest, Src, SizeVal, false); 1026 return RValue::get(Dest.getPointer()); 1027 } 1028 1029 case Builtin::BImemmove: 1030 case Builtin::BI__builtin_memmove: { 1031 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1032 Address Src = EmitPointerWithAlignment(E->getArg(1)); 1033 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 1034 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 1035 E->getArg(0)->getExprLoc(), FD, 0); 1036 EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(), 1037 E->getArg(1)->getExprLoc(), FD, 1); 1038 Builder.CreateMemMove(Dest, Src, SizeVal, false); 1039 return RValue::get(Dest.getPointer()); 1040 } 1041 case Builtin::BImemset: 1042 case Builtin::BI__builtin_memset: { 1043 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1044 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), 1045 Builder.getInt8Ty()); 1046 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 1047 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 1048 E->getArg(0)->getExprLoc(), FD, 0); 1049 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false); 1050 return RValue::get(Dest.getPointer()); 1051 } 1052 case Builtin::BI__builtin___memset_chk: { 1053 // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2. 1054 llvm::APSInt Size, DstSize; 1055 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 1056 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 1057 break; 1058 if (Size.ugt(DstSize)) 1059 break; 1060 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1061 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), 1062 Builder.getInt8Ty()); 1063 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 1064 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false); 1065 return RValue::get(Dest.getPointer()); 1066 } 1067 case Builtin::BI__builtin_dwarf_cfa: { 1068 // The offset in bytes from the first argument to the CFA. 1069 // 1070 // Why on earth is this in the frontend? Is there any reason at 1071 // all that the backend can't reasonably determine this while 1072 // lowering llvm.eh.dwarf.cfa()? 1073 // 1074 // TODO: If there's a satisfactory reason, add a target hook for 1075 // this instead of hard-coding 0, which is correct for most targets. 1076 int32_t Offset = 0; 1077 1078 Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa); 1079 return RValue::get(Builder.CreateCall(F, 1080 llvm::ConstantInt::get(Int32Ty, Offset))); 1081 } 1082 case Builtin::BI__builtin_return_address: { 1083 Value *Depth = 1084 CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this); 1085 Value *F = CGM.getIntrinsic(Intrinsic::returnaddress); 1086 return RValue::get(Builder.CreateCall(F, Depth)); 1087 } 1088 case Builtin::BI__builtin_frame_address: { 1089 Value *Depth = 1090 CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this); 1091 Value *F = CGM.getIntrinsic(Intrinsic::frameaddress); 1092 return RValue::get(Builder.CreateCall(F, Depth)); 1093 } 1094 case Builtin::BI__builtin_extract_return_addr: { 1095 Value *Address = EmitScalarExpr(E->getArg(0)); 1096 Value *Result = getTargetHooks().decodeReturnAddress(*this, Address); 1097 return RValue::get(Result); 1098 } 1099 case Builtin::BI__builtin_frob_return_addr: { 1100 Value *Address = EmitScalarExpr(E->getArg(0)); 1101 Value *Result = getTargetHooks().encodeReturnAddress(*this, Address); 1102 return RValue::get(Result); 1103 } 1104 case Builtin::BI__builtin_dwarf_sp_column: { 1105 llvm::IntegerType *Ty 1106 = cast<llvm::IntegerType>(ConvertType(E->getType())); 1107 int Column = getTargetHooks().getDwarfEHStackPointer(CGM); 1108 if (Column == -1) { 1109 CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column"); 1110 return RValue::get(llvm::UndefValue::get(Ty)); 1111 } 1112 return RValue::get(llvm::ConstantInt::get(Ty, Column, true)); 1113 } 1114 case Builtin::BI__builtin_init_dwarf_reg_size_table: { 1115 Value *Address = EmitScalarExpr(E->getArg(0)); 1116 if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address)) 1117 CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table"); 1118 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType()))); 1119 } 1120 case Builtin::BI__builtin_eh_return: { 1121 Value *Int = EmitScalarExpr(E->getArg(0)); 1122 Value *Ptr = EmitScalarExpr(E->getArg(1)); 1123 1124 llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType()); 1125 assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) && 1126 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants"); 1127 Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32 1128 ? Intrinsic::eh_return_i32 1129 : Intrinsic::eh_return_i64); 1130 Builder.CreateCall(F, {Int, Ptr}); 1131 Builder.CreateUnreachable(); 1132 1133 // We do need to preserve an insertion point. 1134 EmitBlock(createBasicBlock("builtin_eh_return.cont")); 1135 1136 return RValue::get(nullptr); 1137 } 1138 case Builtin::BI__builtin_unwind_init: { 1139 Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init); 1140 return RValue::get(Builder.CreateCall(F)); 1141 } 1142 case Builtin::BI__builtin_extend_pointer: { 1143 // Extends a pointer to the size of an _Unwind_Word, which is 1144 // uint64_t on all platforms. Generally this gets poked into a 1145 // register and eventually used as an address, so if the 1146 // addressing registers are wider than pointers and the platform 1147 // doesn't implicitly ignore high-order bits when doing 1148 // addressing, we need to make sure we zext / sext based on 1149 // the platform's expectations. 1150 // 1151 // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html 1152 1153 // Cast the pointer to intptr_t. 1154 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1155 Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast"); 1156 1157 // If that's 64 bits, we're done. 1158 if (IntPtrTy->getBitWidth() == 64) 1159 return RValue::get(Result); 1160 1161 // Otherwise, ask the codegen data what to do. 1162 if (getTargetHooks().extendPointerWithSExt()) 1163 return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext")); 1164 else 1165 return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext")); 1166 } 1167 case Builtin::BI__builtin_setjmp: { 1168 // Buffer is a void**. 1169 Address Buf = EmitPointerWithAlignment(E->getArg(0)); 1170 1171 // Store the frame pointer to the setjmp buffer. 1172 Value *FrameAddr = 1173 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 1174 ConstantInt::get(Int32Ty, 0)); 1175 Builder.CreateStore(FrameAddr, Buf); 1176 1177 // Store the stack pointer to the setjmp buffer. 1178 Value *StackAddr = 1179 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave)); 1180 Address StackSaveSlot = 1181 Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize()); 1182 Builder.CreateStore(StackAddr, StackSaveSlot); 1183 1184 // Call LLVM's EH setjmp, which is lightweight. 1185 Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp); 1186 Buf = Builder.CreateBitCast(Buf, Int8PtrTy); 1187 return RValue::get(Builder.CreateCall(F, Buf.getPointer())); 1188 } 1189 case Builtin::BI__builtin_longjmp: { 1190 Value *Buf = EmitScalarExpr(E->getArg(0)); 1191 Buf = Builder.CreateBitCast(Buf, Int8PtrTy); 1192 1193 // Call LLVM's EH longjmp, which is lightweight. 1194 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf); 1195 1196 // longjmp doesn't return; mark this as unreachable. 1197 Builder.CreateUnreachable(); 1198 1199 // We do need to preserve an insertion point. 1200 EmitBlock(createBasicBlock("longjmp.cont")); 1201 1202 return RValue::get(nullptr); 1203 } 1204 case Builtin::BI__sync_fetch_and_add: 1205 case Builtin::BI__sync_fetch_and_sub: 1206 case Builtin::BI__sync_fetch_and_or: 1207 case Builtin::BI__sync_fetch_and_and: 1208 case Builtin::BI__sync_fetch_and_xor: 1209 case Builtin::BI__sync_fetch_and_nand: 1210 case Builtin::BI__sync_add_and_fetch: 1211 case Builtin::BI__sync_sub_and_fetch: 1212 case Builtin::BI__sync_and_and_fetch: 1213 case Builtin::BI__sync_or_and_fetch: 1214 case Builtin::BI__sync_xor_and_fetch: 1215 case Builtin::BI__sync_nand_and_fetch: 1216 case Builtin::BI__sync_val_compare_and_swap: 1217 case Builtin::BI__sync_bool_compare_and_swap: 1218 case Builtin::BI__sync_lock_test_and_set: 1219 case Builtin::BI__sync_lock_release: 1220 case Builtin::BI__sync_swap: 1221 llvm_unreachable("Shouldn't make it through sema"); 1222 case Builtin::BI__sync_fetch_and_add_1: 1223 case Builtin::BI__sync_fetch_and_add_2: 1224 case Builtin::BI__sync_fetch_and_add_4: 1225 case Builtin::BI__sync_fetch_and_add_8: 1226 case Builtin::BI__sync_fetch_and_add_16: 1227 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E); 1228 case Builtin::BI__sync_fetch_and_sub_1: 1229 case Builtin::BI__sync_fetch_and_sub_2: 1230 case Builtin::BI__sync_fetch_and_sub_4: 1231 case Builtin::BI__sync_fetch_and_sub_8: 1232 case Builtin::BI__sync_fetch_and_sub_16: 1233 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E); 1234 case Builtin::BI__sync_fetch_and_or_1: 1235 case Builtin::BI__sync_fetch_and_or_2: 1236 case Builtin::BI__sync_fetch_and_or_4: 1237 case Builtin::BI__sync_fetch_and_or_8: 1238 case Builtin::BI__sync_fetch_and_or_16: 1239 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E); 1240 case Builtin::BI__sync_fetch_and_and_1: 1241 case Builtin::BI__sync_fetch_and_and_2: 1242 case Builtin::BI__sync_fetch_and_and_4: 1243 case Builtin::BI__sync_fetch_and_and_8: 1244 case Builtin::BI__sync_fetch_and_and_16: 1245 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E); 1246 case Builtin::BI__sync_fetch_and_xor_1: 1247 case Builtin::BI__sync_fetch_and_xor_2: 1248 case Builtin::BI__sync_fetch_and_xor_4: 1249 case Builtin::BI__sync_fetch_and_xor_8: 1250 case Builtin::BI__sync_fetch_and_xor_16: 1251 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E); 1252 case Builtin::BI__sync_fetch_and_nand_1: 1253 case Builtin::BI__sync_fetch_and_nand_2: 1254 case Builtin::BI__sync_fetch_and_nand_4: 1255 case Builtin::BI__sync_fetch_and_nand_8: 1256 case Builtin::BI__sync_fetch_and_nand_16: 1257 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E); 1258 1259 // Clang extensions: not overloaded yet. 1260 case Builtin::BI__sync_fetch_and_min: 1261 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E); 1262 case Builtin::BI__sync_fetch_and_max: 1263 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E); 1264 case Builtin::BI__sync_fetch_and_umin: 1265 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E); 1266 case Builtin::BI__sync_fetch_and_umax: 1267 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E); 1268 1269 case Builtin::BI__sync_add_and_fetch_1: 1270 case Builtin::BI__sync_add_and_fetch_2: 1271 case Builtin::BI__sync_add_and_fetch_4: 1272 case Builtin::BI__sync_add_and_fetch_8: 1273 case Builtin::BI__sync_add_and_fetch_16: 1274 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E, 1275 llvm::Instruction::Add); 1276 case Builtin::BI__sync_sub_and_fetch_1: 1277 case Builtin::BI__sync_sub_and_fetch_2: 1278 case Builtin::BI__sync_sub_and_fetch_4: 1279 case Builtin::BI__sync_sub_and_fetch_8: 1280 case Builtin::BI__sync_sub_and_fetch_16: 1281 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E, 1282 llvm::Instruction::Sub); 1283 case Builtin::BI__sync_and_and_fetch_1: 1284 case Builtin::BI__sync_and_and_fetch_2: 1285 case Builtin::BI__sync_and_and_fetch_4: 1286 case Builtin::BI__sync_and_and_fetch_8: 1287 case Builtin::BI__sync_and_and_fetch_16: 1288 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E, 1289 llvm::Instruction::And); 1290 case Builtin::BI__sync_or_and_fetch_1: 1291 case Builtin::BI__sync_or_and_fetch_2: 1292 case Builtin::BI__sync_or_and_fetch_4: 1293 case Builtin::BI__sync_or_and_fetch_8: 1294 case Builtin::BI__sync_or_and_fetch_16: 1295 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E, 1296 llvm::Instruction::Or); 1297 case Builtin::BI__sync_xor_and_fetch_1: 1298 case Builtin::BI__sync_xor_and_fetch_2: 1299 case Builtin::BI__sync_xor_and_fetch_4: 1300 case Builtin::BI__sync_xor_and_fetch_8: 1301 case Builtin::BI__sync_xor_and_fetch_16: 1302 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E, 1303 llvm::Instruction::Xor); 1304 case Builtin::BI__sync_nand_and_fetch_1: 1305 case Builtin::BI__sync_nand_and_fetch_2: 1306 case Builtin::BI__sync_nand_and_fetch_4: 1307 case Builtin::BI__sync_nand_and_fetch_8: 1308 case Builtin::BI__sync_nand_and_fetch_16: 1309 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E, 1310 llvm::Instruction::And, true); 1311 1312 case Builtin::BI__sync_val_compare_and_swap_1: 1313 case Builtin::BI__sync_val_compare_and_swap_2: 1314 case Builtin::BI__sync_val_compare_and_swap_4: 1315 case Builtin::BI__sync_val_compare_and_swap_8: 1316 case Builtin::BI__sync_val_compare_and_swap_16: 1317 return RValue::get(MakeAtomicCmpXchgValue(*this, E, false)); 1318 1319 case Builtin::BI__sync_bool_compare_and_swap_1: 1320 case Builtin::BI__sync_bool_compare_and_swap_2: 1321 case Builtin::BI__sync_bool_compare_and_swap_4: 1322 case Builtin::BI__sync_bool_compare_and_swap_8: 1323 case Builtin::BI__sync_bool_compare_and_swap_16: 1324 return RValue::get(MakeAtomicCmpXchgValue(*this, E, true)); 1325 1326 case Builtin::BI__sync_swap_1: 1327 case Builtin::BI__sync_swap_2: 1328 case Builtin::BI__sync_swap_4: 1329 case Builtin::BI__sync_swap_8: 1330 case Builtin::BI__sync_swap_16: 1331 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 1332 1333 case Builtin::BI__sync_lock_test_and_set_1: 1334 case Builtin::BI__sync_lock_test_and_set_2: 1335 case Builtin::BI__sync_lock_test_and_set_4: 1336 case Builtin::BI__sync_lock_test_and_set_8: 1337 case Builtin::BI__sync_lock_test_and_set_16: 1338 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 1339 1340 case Builtin::BI__sync_lock_release_1: 1341 case Builtin::BI__sync_lock_release_2: 1342 case Builtin::BI__sync_lock_release_4: 1343 case Builtin::BI__sync_lock_release_8: 1344 case Builtin::BI__sync_lock_release_16: { 1345 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1346 QualType ElTy = E->getArg(0)->getType()->getPointeeType(); 1347 CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy); 1348 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), 1349 StoreSize.getQuantity() * 8); 1350 Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); 1351 llvm::StoreInst *Store = 1352 Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr, 1353 StoreSize); 1354 Store->setAtomic(llvm::AtomicOrdering::Release); 1355 return RValue::get(nullptr); 1356 } 1357 1358 case Builtin::BI__sync_synchronize: { 1359 // We assume this is supposed to correspond to a C++0x-style 1360 // sequentially-consistent fence (i.e. this is only usable for 1361 // synchonization, not device I/O or anything like that). This intrinsic 1362 // is really badly designed in the sense that in theory, there isn't 1363 // any way to safely use it... but in practice, it mostly works 1364 // to use it with non-atomic loads and stores to get acquire/release 1365 // semantics. 1366 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent); 1367 return RValue::get(nullptr); 1368 } 1369 1370 case Builtin::BI__builtin_nontemporal_load: 1371 return RValue::get(EmitNontemporalLoad(*this, E)); 1372 case Builtin::BI__builtin_nontemporal_store: 1373 return RValue::get(EmitNontemporalStore(*this, E)); 1374 case Builtin::BI__c11_atomic_is_lock_free: 1375 case Builtin::BI__atomic_is_lock_free: { 1376 // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the 1377 // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since 1378 // _Atomic(T) is always properly-aligned. 1379 const char *LibCallName = "__atomic_is_lock_free"; 1380 CallArgList Args; 1381 Args.add(RValue::get(EmitScalarExpr(E->getArg(0))), 1382 getContext().getSizeType()); 1383 if (BuiltinID == Builtin::BI__atomic_is_lock_free) 1384 Args.add(RValue::get(EmitScalarExpr(E->getArg(1))), 1385 getContext().VoidPtrTy); 1386 else 1387 Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)), 1388 getContext().VoidPtrTy); 1389 const CGFunctionInfo &FuncInfo = 1390 CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args); 1391 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo); 1392 llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName); 1393 return EmitCall(FuncInfo, Func, ReturnValueSlot(), Args); 1394 } 1395 1396 case Builtin::BI__atomic_test_and_set: { 1397 // Look at the argument type to determine whether this is a volatile 1398 // operation. The parameter type is always volatile. 1399 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); 1400 bool Volatile = 1401 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); 1402 1403 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1404 unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace(); 1405 Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace)); 1406 Value *NewVal = Builder.getInt8(1); 1407 Value *Order = EmitScalarExpr(E->getArg(1)); 1408 if (isa<llvm::ConstantInt>(Order)) { 1409 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 1410 AtomicRMWInst *Result = nullptr; 1411 switch (ord) { 1412 case 0: // memory_order_relaxed 1413 default: // invalid order 1414 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 1415 llvm::AtomicOrdering::Monotonic); 1416 break; 1417 case 1: // memory_order_consume 1418 case 2: // memory_order_acquire 1419 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 1420 llvm::AtomicOrdering::Acquire); 1421 break; 1422 case 3: // memory_order_release 1423 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 1424 llvm::AtomicOrdering::Release); 1425 break; 1426 case 4: // memory_order_acq_rel 1427 1428 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 1429 llvm::AtomicOrdering::AcquireRelease); 1430 break; 1431 case 5: // memory_order_seq_cst 1432 Result = Builder.CreateAtomicRMW( 1433 llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 1434 llvm::AtomicOrdering::SequentiallyConsistent); 1435 break; 1436 } 1437 Result->setVolatile(Volatile); 1438 return RValue::get(Builder.CreateIsNotNull(Result, "tobool")); 1439 } 1440 1441 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 1442 1443 llvm::BasicBlock *BBs[5] = { 1444 createBasicBlock("monotonic", CurFn), 1445 createBasicBlock("acquire", CurFn), 1446 createBasicBlock("release", CurFn), 1447 createBasicBlock("acqrel", CurFn), 1448 createBasicBlock("seqcst", CurFn) 1449 }; 1450 llvm::AtomicOrdering Orders[5] = { 1451 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire, 1452 llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease, 1453 llvm::AtomicOrdering::SequentiallyConsistent}; 1454 1455 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 1456 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); 1457 1458 Builder.SetInsertPoint(ContBB); 1459 PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set"); 1460 1461 for (unsigned i = 0; i < 5; ++i) { 1462 Builder.SetInsertPoint(BBs[i]); 1463 AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 1464 Ptr, NewVal, Orders[i]); 1465 RMW->setVolatile(Volatile); 1466 Result->addIncoming(RMW, BBs[i]); 1467 Builder.CreateBr(ContBB); 1468 } 1469 1470 SI->addCase(Builder.getInt32(0), BBs[0]); 1471 SI->addCase(Builder.getInt32(1), BBs[1]); 1472 SI->addCase(Builder.getInt32(2), BBs[1]); 1473 SI->addCase(Builder.getInt32(3), BBs[2]); 1474 SI->addCase(Builder.getInt32(4), BBs[3]); 1475 SI->addCase(Builder.getInt32(5), BBs[4]); 1476 1477 Builder.SetInsertPoint(ContBB); 1478 return RValue::get(Builder.CreateIsNotNull(Result, "tobool")); 1479 } 1480 1481 case Builtin::BI__atomic_clear: { 1482 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); 1483 bool Volatile = 1484 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); 1485 1486 Address Ptr = EmitPointerWithAlignment(E->getArg(0)); 1487 unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace(); 1488 Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace)); 1489 Value *NewVal = Builder.getInt8(0); 1490 Value *Order = EmitScalarExpr(E->getArg(1)); 1491 if (isa<llvm::ConstantInt>(Order)) { 1492 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 1493 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); 1494 switch (ord) { 1495 case 0: // memory_order_relaxed 1496 default: // invalid order 1497 Store->setOrdering(llvm::AtomicOrdering::Monotonic); 1498 break; 1499 case 3: // memory_order_release 1500 Store->setOrdering(llvm::AtomicOrdering::Release); 1501 break; 1502 case 5: // memory_order_seq_cst 1503 Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent); 1504 break; 1505 } 1506 return RValue::get(nullptr); 1507 } 1508 1509 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 1510 1511 llvm::BasicBlock *BBs[3] = { 1512 createBasicBlock("monotonic", CurFn), 1513 createBasicBlock("release", CurFn), 1514 createBasicBlock("seqcst", CurFn) 1515 }; 1516 llvm::AtomicOrdering Orders[3] = { 1517 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release, 1518 llvm::AtomicOrdering::SequentiallyConsistent}; 1519 1520 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 1521 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); 1522 1523 for (unsigned i = 0; i < 3; ++i) { 1524 Builder.SetInsertPoint(BBs[i]); 1525 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); 1526 Store->setOrdering(Orders[i]); 1527 Builder.CreateBr(ContBB); 1528 } 1529 1530 SI->addCase(Builder.getInt32(0), BBs[0]); 1531 SI->addCase(Builder.getInt32(3), BBs[1]); 1532 SI->addCase(Builder.getInt32(5), BBs[2]); 1533 1534 Builder.SetInsertPoint(ContBB); 1535 return RValue::get(nullptr); 1536 } 1537 1538 case Builtin::BI__atomic_thread_fence: 1539 case Builtin::BI__atomic_signal_fence: 1540 case Builtin::BI__c11_atomic_thread_fence: 1541 case Builtin::BI__c11_atomic_signal_fence: { 1542 llvm::SynchronizationScope Scope; 1543 if (BuiltinID == Builtin::BI__atomic_signal_fence || 1544 BuiltinID == Builtin::BI__c11_atomic_signal_fence) 1545 Scope = llvm::SingleThread; 1546 else 1547 Scope = llvm::CrossThread; 1548 Value *Order = EmitScalarExpr(E->getArg(0)); 1549 if (isa<llvm::ConstantInt>(Order)) { 1550 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 1551 switch (ord) { 1552 case 0: // memory_order_relaxed 1553 default: // invalid order 1554 break; 1555 case 1: // memory_order_consume 1556 case 2: // memory_order_acquire 1557 Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope); 1558 break; 1559 case 3: // memory_order_release 1560 Builder.CreateFence(llvm::AtomicOrdering::Release, Scope); 1561 break; 1562 case 4: // memory_order_acq_rel 1563 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope); 1564 break; 1565 case 5: // memory_order_seq_cst 1566 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, 1567 Scope); 1568 break; 1569 } 1570 return RValue::get(nullptr); 1571 } 1572 1573 llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB; 1574 AcquireBB = createBasicBlock("acquire", CurFn); 1575 ReleaseBB = createBasicBlock("release", CurFn); 1576 AcqRelBB = createBasicBlock("acqrel", CurFn); 1577 SeqCstBB = createBasicBlock("seqcst", CurFn); 1578 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 1579 1580 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 1581 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB); 1582 1583 Builder.SetInsertPoint(AcquireBB); 1584 Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope); 1585 Builder.CreateBr(ContBB); 1586 SI->addCase(Builder.getInt32(1), AcquireBB); 1587 SI->addCase(Builder.getInt32(2), AcquireBB); 1588 1589 Builder.SetInsertPoint(ReleaseBB); 1590 Builder.CreateFence(llvm::AtomicOrdering::Release, Scope); 1591 Builder.CreateBr(ContBB); 1592 SI->addCase(Builder.getInt32(3), ReleaseBB); 1593 1594 Builder.SetInsertPoint(AcqRelBB); 1595 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope); 1596 Builder.CreateBr(ContBB); 1597 SI->addCase(Builder.getInt32(4), AcqRelBB); 1598 1599 Builder.SetInsertPoint(SeqCstBB); 1600 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, Scope); 1601 Builder.CreateBr(ContBB); 1602 SI->addCase(Builder.getInt32(5), SeqCstBB); 1603 1604 Builder.SetInsertPoint(ContBB); 1605 return RValue::get(nullptr); 1606 } 1607 1608 // Library functions with special handling. 1609 case Builtin::BIsqrt: 1610 case Builtin::BIsqrtf: 1611 case Builtin::BIsqrtl: { 1612 // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only 1613 // in finite- or unsafe-math mode (the intrinsic has different semantics 1614 // for handling negative numbers compared to the library function, so 1615 // -fmath-errno=0 is not enough). 1616 if (!FD->hasAttr<ConstAttr>()) 1617 break; 1618 if (!(CGM.getCodeGenOpts().UnsafeFPMath || 1619 CGM.getCodeGenOpts().NoNaNsFPMath)) 1620 break; 1621 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 1622 llvm::Type *ArgType = Arg0->getType(); 1623 Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType); 1624 return RValue::get(Builder.CreateCall(F, Arg0)); 1625 } 1626 1627 case Builtin::BI__builtin_pow: 1628 case Builtin::BI__builtin_powf: 1629 case Builtin::BI__builtin_powl: 1630 case Builtin::BIpow: 1631 case Builtin::BIpowf: 1632 case Builtin::BIpowl: { 1633 // Transform a call to pow* into a @llvm.pow.* intrinsic call. 1634 if (!FD->hasAttr<ConstAttr>()) 1635 break; 1636 Value *Base = EmitScalarExpr(E->getArg(0)); 1637 Value *Exponent = EmitScalarExpr(E->getArg(1)); 1638 llvm::Type *ArgType = Base->getType(); 1639 Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType); 1640 return RValue::get(Builder.CreateCall(F, {Base, Exponent})); 1641 } 1642 1643 case Builtin::BIfma: 1644 case Builtin::BIfmaf: 1645 case Builtin::BIfmal: 1646 case Builtin::BI__builtin_fma: 1647 case Builtin::BI__builtin_fmaf: 1648 case Builtin::BI__builtin_fmal: { 1649 // Rewrite fma to intrinsic. 1650 Value *FirstArg = EmitScalarExpr(E->getArg(0)); 1651 llvm::Type *ArgType = FirstArg->getType(); 1652 Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType); 1653 return RValue::get( 1654 Builder.CreateCall(F, {FirstArg, EmitScalarExpr(E->getArg(1)), 1655 EmitScalarExpr(E->getArg(2))})); 1656 } 1657 1658 case Builtin::BI__builtin_signbit: 1659 case Builtin::BI__builtin_signbitf: 1660 case Builtin::BI__builtin_signbitl: { 1661 return RValue::get( 1662 Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))), 1663 ConvertType(E->getType()))); 1664 } 1665 case Builtin::BI__builtin_annotation: { 1666 llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0)); 1667 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation, 1668 AnnVal->getType()); 1669 1670 // Get the annotation string, go through casts. Sema requires this to be a 1671 // non-wide string literal, potentially casted, so the cast<> is safe. 1672 const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts(); 1673 StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString(); 1674 return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc())); 1675 } 1676 case Builtin::BI__builtin_addcb: 1677 case Builtin::BI__builtin_addcs: 1678 case Builtin::BI__builtin_addc: 1679 case Builtin::BI__builtin_addcl: 1680 case Builtin::BI__builtin_addcll: 1681 case Builtin::BI__builtin_subcb: 1682 case Builtin::BI__builtin_subcs: 1683 case Builtin::BI__builtin_subc: 1684 case Builtin::BI__builtin_subcl: 1685 case Builtin::BI__builtin_subcll: { 1686 1687 // We translate all of these builtins from expressions of the form: 1688 // int x = ..., y = ..., carryin = ..., carryout, result; 1689 // result = __builtin_addc(x, y, carryin, &carryout); 1690 // 1691 // to LLVM IR of the form: 1692 // 1693 // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y) 1694 // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0 1695 // %carry1 = extractvalue {i32, i1} %tmp1, 1 1696 // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1, 1697 // i32 %carryin) 1698 // %result = extractvalue {i32, i1} %tmp2, 0 1699 // %carry2 = extractvalue {i32, i1} %tmp2, 1 1700 // %tmp3 = or i1 %carry1, %carry2 1701 // %tmp4 = zext i1 %tmp3 to i32 1702 // store i32 %tmp4, i32* %carryout 1703 1704 // Scalarize our inputs. 1705 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 1706 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 1707 llvm::Value *Carryin = EmitScalarExpr(E->getArg(2)); 1708 Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3)); 1709 1710 // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow. 1711 llvm::Intrinsic::ID IntrinsicId; 1712 switch (BuiltinID) { 1713 default: llvm_unreachable("Unknown multiprecision builtin id."); 1714 case Builtin::BI__builtin_addcb: 1715 case Builtin::BI__builtin_addcs: 1716 case Builtin::BI__builtin_addc: 1717 case Builtin::BI__builtin_addcl: 1718 case Builtin::BI__builtin_addcll: 1719 IntrinsicId = llvm::Intrinsic::uadd_with_overflow; 1720 break; 1721 case Builtin::BI__builtin_subcb: 1722 case Builtin::BI__builtin_subcs: 1723 case Builtin::BI__builtin_subc: 1724 case Builtin::BI__builtin_subcl: 1725 case Builtin::BI__builtin_subcll: 1726 IntrinsicId = llvm::Intrinsic::usub_with_overflow; 1727 break; 1728 } 1729 1730 // Construct our resulting LLVM IR expression. 1731 llvm::Value *Carry1; 1732 llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId, 1733 X, Y, Carry1); 1734 llvm::Value *Carry2; 1735 llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId, 1736 Sum1, Carryin, Carry2); 1737 llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2), 1738 X->getType()); 1739 Builder.CreateStore(CarryOut, CarryOutPtr); 1740 return RValue::get(Sum2); 1741 } 1742 1743 case Builtin::BI__builtin_add_overflow: 1744 case Builtin::BI__builtin_sub_overflow: 1745 case Builtin::BI__builtin_mul_overflow: { 1746 const clang::Expr *LeftArg = E->getArg(0); 1747 const clang::Expr *RightArg = E->getArg(1); 1748 const clang::Expr *ResultArg = E->getArg(2); 1749 1750 clang::QualType ResultQTy = 1751 ResultArg->getType()->castAs<PointerType>()->getPointeeType(); 1752 1753 WidthAndSignedness LeftInfo = 1754 getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType()); 1755 WidthAndSignedness RightInfo = 1756 getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType()); 1757 WidthAndSignedness ResultInfo = 1758 getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy); 1759 WidthAndSignedness EncompassingInfo = 1760 EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo}); 1761 1762 llvm::Type *EncompassingLLVMTy = 1763 llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width); 1764 1765 llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy); 1766 1767 llvm::Intrinsic::ID IntrinsicId; 1768 switch (BuiltinID) { 1769 default: 1770 llvm_unreachable("Unknown overflow builtin id."); 1771 case Builtin::BI__builtin_add_overflow: 1772 IntrinsicId = EncompassingInfo.Signed 1773 ? llvm::Intrinsic::sadd_with_overflow 1774 : llvm::Intrinsic::uadd_with_overflow; 1775 break; 1776 case Builtin::BI__builtin_sub_overflow: 1777 IntrinsicId = EncompassingInfo.Signed 1778 ? llvm::Intrinsic::ssub_with_overflow 1779 : llvm::Intrinsic::usub_with_overflow; 1780 break; 1781 case Builtin::BI__builtin_mul_overflow: 1782 IntrinsicId = EncompassingInfo.Signed 1783 ? llvm::Intrinsic::smul_with_overflow 1784 : llvm::Intrinsic::umul_with_overflow; 1785 break; 1786 } 1787 1788 llvm::Value *Left = EmitScalarExpr(LeftArg); 1789 llvm::Value *Right = EmitScalarExpr(RightArg); 1790 Address ResultPtr = EmitPointerWithAlignment(ResultArg); 1791 1792 // Extend each operand to the encompassing type. 1793 Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed); 1794 Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed); 1795 1796 // Perform the operation on the extended values. 1797 llvm::Value *Overflow, *Result; 1798 Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow); 1799 1800 if (EncompassingInfo.Width > ResultInfo.Width) { 1801 // The encompassing type is wider than the result type, so we need to 1802 // truncate it. 1803 llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy); 1804 1805 // To see if the truncation caused an overflow, we will extend 1806 // the result and then compare it to the original result. 1807 llvm::Value *ResultTruncExt = Builder.CreateIntCast( 1808 ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed); 1809 llvm::Value *TruncationOverflow = 1810 Builder.CreateICmpNE(Result, ResultTruncExt); 1811 1812 Overflow = Builder.CreateOr(Overflow, TruncationOverflow); 1813 Result = ResultTrunc; 1814 } 1815 1816 // Finally, store the result using the pointer. 1817 bool isVolatile = 1818 ResultArg->getType()->getPointeeType().isVolatileQualified(); 1819 Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile); 1820 1821 return RValue::get(Overflow); 1822 } 1823 1824 case Builtin::BI__builtin_uadd_overflow: 1825 case Builtin::BI__builtin_uaddl_overflow: 1826 case Builtin::BI__builtin_uaddll_overflow: 1827 case Builtin::BI__builtin_usub_overflow: 1828 case Builtin::BI__builtin_usubl_overflow: 1829 case Builtin::BI__builtin_usubll_overflow: 1830 case Builtin::BI__builtin_umul_overflow: 1831 case Builtin::BI__builtin_umull_overflow: 1832 case Builtin::BI__builtin_umulll_overflow: 1833 case Builtin::BI__builtin_sadd_overflow: 1834 case Builtin::BI__builtin_saddl_overflow: 1835 case Builtin::BI__builtin_saddll_overflow: 1836 case Builtin::BI__builtin_ssub_overflow: 1837 case Builtin::BI__builtin_ssubl_overflow: 1838 case Builtin::BI__builtin_ssubll_overflow: 1839 case Builtin::BI__builtin_smul_overflow: 1840 case Builtin::BI__builtin_smull_overflow: 1841 case Builtin::BI__builtin_smulll_overflow: { 1842 1843 // We translate all of these builtins directly to the relevant llvm IR node. 1844 1845 // Scalarize our inputs. 1846 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 1847 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 1848 Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2)); 1849 1850 // Decide which of the overflow intrinsics we are lowering to: 1851 llvm::Intrinsic::ID IntrinsicId; 1852 switch (BuiltinID) { 1853 default: llvm_unreachable("Unknown overflow builtin id."); 1854 case Builtin::BI__builtin_uadd_overflow: 1855 case Builtin::BI__builtin_uaddl_overflow: 1856 case Builtin::BI__builtin_uaddll_overflow: 1857 IntrinsicId = llvm::Intrinsic::uadd_with_overflow; 1858 break; 1859 case Builtin::BI__builtin_usub_overflow: 1860 case Builtin::BI__builtin_usubl_overflow: 1861 case Builtin::BI__builtin_usubll_overflow: 1862 IntrinsicId = llvm::Intrinsic::usub_with_overflow; 1863 break; 1864 case Builtin::BI__builtin_umul_overflow: 1865 case Builtin::BI__builtin_umull_overflow: 1866 case Builtin::BI__builtin_umulll_overflow: 1867 IntrinsicId = llvm::Intrinsic::umul_with_overflow; 1868 break; 1869 case Builtin::BI__builtin_sadd_overflow: 1870 case Builtin::BI__builtin_saddl_overflow: 1871 case Builtin::BI__builtin_saddll_overflow: 1872 IntrinsicId = llvm::Intrinsic::sadd_with_overflow; 1873 break; 1874 case Builtin::BI__builtin_ssub_overflow: 1875 case Builtin::BI__builtin_ssubl_overflow: 1876 case Builtin::BI__builtin_ssubll_overflow: 1877 IntrinsicId = llvm::Intrinsic::ssub_with_overflow; 1878 break; 1879 case Builtin::BI__builtin_smul_overflow: 1880 case Builtin::BI__builtin_smull_overflow: 1881 case Builtin::BI__builtin_smulll_overflow: 1882 IntrinsicId = llvm::Intrinsic::smul_with_overflow; 1883 break; 1884 } 1885 1886 1887 llvm::Value *Carry; 1888 llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry); 1889 Builder.CreateStore(Sum, SumOutPtr); 1890 1891 return RValue::get(Carry); 1892 } 1893 case Builtin::BI__builtin_addressof: 1894 return RValue::get(EmitLValue(E->getArg(0)).getPointer()); 1895 case Builtin::BI__builtin_operator_new: 1896 return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(), 1897 E->getArg(0), false); 1898 case Builtin::BI__builtin_operator_delete: 1899 return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(), 1900 E->getArg(0), true); 1901 case Builtin::BI__noop: 1902 // __noop always evaluates to an integer literal zero. 1903 return RValue::get(ConstantInt::get(IntTy, 0)); 1904 case Builtin::BI__builtin_call_with_static_chain: { 1905 const CallExpr *Call = cast<CallExpr>(E->getArg(0)); 1906 const Expr *Chain = E->getArg(1); 1907 return EmitCall(Call->getCallee()->getType(), 1908 EmitScalarExpr(Call->getCallee()), Call, ReturnValue, 1909 Call->getCalleeDecl(), EmitScalarExpr(Chain)); 1910 } 1911 case Builtin::BI_InterlockedExchange: 1912 case Builtin::BI_InterlockedExchangePointer: 1913 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 1914 case Builtin::BI_InterlockedCompareExchangePointer: { 1915 llvm::Type *RTy; 1916 llvm::IntegerType *IntType = 1917 IntegerType::get(getLLVMContext(), 1918 getContext().getTypeSize(E->getType())); 1919 llvm::Type *IntPtrType = IntType->getPointerTo(); 1920 1921 llvm::Value *Destination = 1922 Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType); 1923 1924 llvm::Value *Exchange = EmitScalarExpr(E->getArg(1)); 1925 RTy = Exchange->getType(); 1926 Exchange = Builder.CreatePtrToInt(Exchange, IntType); 1927 1928 llvm::Value *Comparand = 1929 Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType); 1930 1931 auto Result = 1932 Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange, 1933 AtomicOrdering::SequentiallyConsistent, 1934 AtomicOrdering::SequentiallyConsistent); 1935 Result->setVolatile(true); 1936 1937 return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result, 1938 0), 1939 RTy)); 1940 } 1941 case Builtin::BI_InterlockedCompareExchange: { 1942 AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg( 1943 EmitScalarExpr(E->getArg(0)), 1944 EmitScalarExpr(E->getArg(2)), 1945 EmitScalarExpr(E->getArg(1)), 1946 AtomicOrdering::SequentiallyConsistent, 1947 AtomicOrdering::SequentiallyConsistent); 1948 CXI->setVolatile(true); 1949 return RValue::get(Builder.CreateExtractValue(CXI, 0)); 1950 } 1951 case Builtin::BI_InterlockedIncrement: { 1952 llvm::Type *IntTy = ConvertType(E->getType()); 1953 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 1954 AtomicRMWInst::Add, 1955 EmitScalarExpr(E->getArg(0)), 1956 ConstantInt::get(IntTy, 1), 1957 llvm::AtomicOrdering::SequentiallyConsistent); 1958 RMWI->setVolatile(true); 1959 return RValue::get(Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1))); 1960 } 1961 case Builtin::BI_InterlockedDecrement: { 1962 llvm::Type *IntTy = ConvertType(E->getType()); 1963 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 1964 AtomicRMWInst::Sub, 1965 EmitScalarExpr(E->getArg(0)), 1966 ConstantInt::get(IntTy, 1), 1967 llvm::AtomicOrdering::SequentiallyConsistent); 1968 RMWI->setVolatile(true); 1969 return RValue::get(Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1))); 1970 } 1971 case Builtin::BI_InterlockedExchangeAdd: { 1972 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 1973 AtomicRMWInst::Add, 1974 EmitScalarExpr(E->getArg(0)), 1975 EmitScalarExpr(E->getArg(1)), 1976 llvm::AtomicOrdering::SequentiallyConsistent); 1977 RMWI->setVolatile(true); 1978 return RValue::get(RMWI); 1979 } 1980 case Builtin::BI__readfsdword: { 1981 llvm::Type *IntTy = ConvertType(E->getType()); 1982 Value *IntToPtr = 1983 Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)), 1984 llvm::PointerType::get(IntTy, 257)); 1985 LoadInst *Load = 1986 Builder.CreateDefaultAlignedLoad(IntToPtr, /*isVolatile=*/true); 1987 return RValue::get(Load); 1988 } 1989 1990 case Builtin::BI__exception_code: 1991 case Builtin::BI_exception_code: 1992 return RValue::get(EmitSEHExceptionCode()); 1993 case Builtin::BI__exception_info: 1994 case Builtin::BI_exception_info: 1995 return RValue::get(EmitSEHExceptionInfo()); 1996 case Builtin::BI__abnormal_termination: 1997 case Builtin::BI_abnormal_termination: 1998 return RValue::get(EmitSEHAbnormalTermination()); 1999 case Builtin::BI_setjmpex: { 2000 if (getTarget().getTriple().isOSMSVCRT()) { 2001 llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy}; 2002 llvm::AttributeSet ReturnsTwiceAttr = 2003 AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex, 2004 llvm::Attribute::ReturnsTwice); 2005 llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction( 2006 llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false), 2007 "_setjmpex", ReturnsTwiceAttr); 2008 llvm::Value *Buf = Builder.CreateBitOrPointerCast( 2009 EmitScalarExpr(E->getArg(0)), Int8PtrTy); 2010 llvm::Value *FrameAddr = 2011 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 2012 ConstantInt::get(Int32Ty, 0)); 2013 llvm::Value *Args[] = {Buf, FrameAddr}; 2014 llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args); 2015 CS.setAttributes(ReturnsTwiceAttr); 2016 return RValue::get(CS.getInstruction()); 2017 } 2018 break; 2019 } 2020 case Builtin::BI_setjmp: { 2021 if (getTarget().getTriple().isOSMSVCRT()) { 2022 llvm::AttributeSet ReturnsTwiceAttr = 2023 AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex, 2024 llvm::Attribute::ReturnsTwice); 2025 llvm::Value *Buf = Builder.CreateBitOrPointerCast( 2026 EmitScalarExpr(E->getArg(0)), Int8PtrTy); 2027 llvm::CallSite CS; 2028 if (getTarget().getTriple().getArch() == llvm::Triple::x86) { 2029 llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy}; 2030 llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction( 2031 llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true), 2032 "_setjmp3", ReturnsTwiceAttr); 2033 llvm::Value *Count = ConstantInt::get(IntTy, 0); 2034 llvm::Value *Args[] = {Buf, Count}; 2035 CS = EmitRuntimeCallOrInvoke(SetJmp3, Args); 2036 } else { 2037 llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy}; 2038 llvm::Constant *SetJmp = CGM.CreateRuntimeFunction( 2039 llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false), 2040 "_setjmp", ReturnsTwiceAttr); 2041 llvm::Value *FrameAddr = 2042 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 2043 ConstantInt::get(Int32Ty, 0)); 2044 llvm::Value *Args[] = {Buf, FrameAddr}; 2045 CS = EmitRuntimeCallOrInvoke(SetJmp, Args); 2046 } 2047 CS.setAttributes(ReturnsTwiceAttr); 2048 return RValue::get(CS.getInstruction()); 2049 } 2050 break; 2051 } 2052 2053 case Builtin::BI__GetExceptionInfo: { 2054 if (llvm::GlobalVariable *GV = 2055 CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType())) 2056 return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy)); 2057 break; 2058 } 2059 2060 // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions 2061 case Builtin::BIread_pipe: 2062 case Builtin::BIwrite_pipe: { 2063 Value *Arg0 = EmitScalarExpr(E->getArg(0)), 2064 *Arg1 = EmitScalarExpr(E->getArg(1)); 2065 2066 // Type of the generic packet parameter. 2067 unsigned GenericAS = 2068 getContext().getTargetAddressSpace(LangAS::opencl_generic); 2069 llvm::Type *I8PTy = llvm::PointerType::get( 2070 llvm::Type::getInt8Ty(getLLVMContext()), GenericAS); 2071 2072 // Testing which overloaded version we should generate the call for. 2073 if (2U == E->getNumArgs()) { 2074 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2" 2075 : "__write_pipe_2"; 2076 // Creating a generic function type to be able to call with any builtin or 2077 // user defined type. 2078 llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy}; 2079 llvm::FunctionType *FTy = llvm::FunctionType::get( 2080 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2081 Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy); 2082 return RValue::get(Builder.CreateCall( 2083 CGM.CreateRuntimeFunction(FTy, Name), {Arg0, BCast})); 2084 } else { 2085 assert(4 == E->getNumArgs() && 2086 "Illegal number of parameters to pipe function"); 2087 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4" 2088 : "__write_pipe_4"; 2089 2090 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy}; 2091 Value *Arg2 = EmitScalarExpr(E->getArg(2)), 2092 *Arg3 = EmitScalarExpr(E->getArg(3)); 2093 llvm::FunctionType *FTy = llvm::FunctionType::get( 2094 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2095 Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy); 2096 // We know the third argument is an integer type, but we may need to cast 2097 // it to i32. 2098 if (Arg2->getType() != Int32Ty) 2099 Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty); 2100 return RValue::get(Builder.CreateCall( 2101 CGM.CreateRuntimeFunction(FTy, Name), {Arg0, Arg1, Arg2, BCast})); 2102 } 2103 } 2104 // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write 2105 // functions 2106 case Builtin::BIreserve_read_pipe: 2107 case Builtin::BIreserve_write_pipe: 2108 case Builtin::BIwork_group_reserve_read_pipe: 2109 case Builtin::BIwork_group_reserve_write_pipe: 2110 case Builtin::BIsub_group_reserve_read_pipe: 2111 case Builtin::BIsub_group_reserve_write_pipe: { 2112 // Composing the mangled name for the function. 2113 const char *Name; 2114 if (BuiltinID == Builtin::BIreserve_read_pipe) 2115 Name = "__reserve_read_pipe"; 2116 else if (BuiltinID == Builtin::BIreserve_write_pipe) 2117 Name = "__reserve_write_pipe"; 2118 else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe) 2119 Name = "__work_group_reserve_read_pipe"; 2120 else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe) 2121 Name = "__work_group_reserve_write_pipe"; 2122 else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe) 2123 Name = "__sub_group_reserve_read_pipe"; 2124 else 2125 Name = "__sub_group_reserve_write_pipe"; 2126 2127 Value *Arg0 = EmitScalarExpr(E->getArg(0)), 2128 *Arg1 = EmitScalarExpr(E->getArg(1)); 2129 llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy); 2130 2131 // Building the generic function prototype. 2132 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty}; 2133 llvm::FunctionType *FTy = llvm::FunctionType::get( 2134 ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2135 // We know the second argument is an integer type, but we may need to cast 2136 // it to i32. 2137 if (Arg1->getType() != Int32Ty) 2138 Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty); 2139 return RValue::get( 2140 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0, Arg1})); 2141 } 2142 // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write 2143 // functions 2144 case Builtin::BIcommit_read_pipe: 2145 case Builtin::BIcommit_write_pipe: 2146 case Builtin::BIwork_group_commit_read_pipe: 2147 case Builtin::BIwork_group_commit_write_pipe: 2148 case Builtin::BIsub_group_commit_read_pipe: 2149 case Builtin::BIsub_group_commit_write_pipe: { 2150 const char *Name; 2151 if (BuiltinID == Builtin::BIcommit_read_pipe) 2152 Name = "__commit_read_pipe"; 2153 else if (BuiltinID == Builtin::BIcommit_write_pipe) 2154 Name = "__commit_write_pipe"; 2155 else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe) 2156 Name = "__work_group_commit_read_pipe"; 2157 else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe) 2158 Name = "__work_group_commit_write_pipe"; 2159 else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe) 2160 Name = "__sub_group_commit_read_pipe"; 2161 else 2162 Name = "__sub_group_commit_write_pipe"; 2163 2164 Value *Arg0 = EmitScalarExpr(E->getArg(0)), 2165 *Arg1 = EmitScalarExpr(E->getArg(1)); 2166 2167 // Building the generic function prototype. 2168 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType()}; 2169 llvm::FunctionType *FTy = 2170 llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()), 2171 llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2172 2173 return RValue::get( 2174 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0, Arg1})); 2175 } 2176 // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions 2177 case Builtin::BIget_pipe_num_packets: 2178 case Builtin::BIget_pipe_max_packets: { 2179 const char *Name; 2180 if (BuiltinID == Builtin::BIget_pipe_num_packets) 2181 Name = "__get_pipe_num_packets"; 2182 else 2183 Name = "__get_pipe_max_packets"; 2184 2185 // Building the generic function prototype. 2186 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 2187 llvm::Type *ArgTys[] = {Arg0->getType()}; 2188 llvm::FunctionType *FTy = llvm::FunctionType::get( 2189 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2190 2191 return RValue::get( 2192 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0})); 2193 } 2194 2195 // OpenCL v2.0 s6.13.9 - Address space qualifier functions. 2196 case Builtin::BIto_global: 2197 case Builtin::BIto_local: 2198 case Builtin::BIto_private: { 2199 auto Arg0 = EmitScalarExpr(E->getArg(0)); 2200 auto NewArgT = llvm::PointerType::get(Int8Ty, 2201 CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); 2202 auto NewRetT = llvm::PointerType::get(Int8Ty, 2203 CGM.getContext().getTargetAddressSpace( 2204 E->getType()->getPointeeType().getAddressSpace())); 2205 auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false); 2206 llvm::Value *NewArg; 2207 if (Arg0->getType()->getPointerAddressSpace() != 2208 NewArgT->getPointerAddressSpace()) 2209 NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT); 2210 else 2211 NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT); 2212 auto NewName = std::string("__") + E->getDirectCallee()->getName().str(); 2213 auto NewCall = 2214 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg}); 2215 return RValue::get(Builder.CreateBitOrPointerCast(NewCall, 2216 ConvertType(E->getType()))); 2217 } 2218 2219 // OpenCL v2.0, s6.13.17 - Enqueue kernel function. 2220 // It contains four different overload formats specified in Table 6.13.17.1. 2221 case Builtin::BIenqueue_kernel: { 2222 StringRef Name; // Generated function call name 2223 unsigned NumArgs = E->getNumArgs(); 2224 2225 llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy); 2226 llvm::Type *RangeTy = ConvertType(getContext().OCLNDRangeTy); 2227 2228 llvm::Value *Queue = EmitScalarExpr(E->getArg(0)); 2229 llvm::Value *Flags = EmitScalarExpr(E->getArg(1)); 2230 llvm::Value *Range = EmitScalarExpr(E->getArg(2)); 2231 2232 if (NumArgs == 4) { 2233 // The most basic form of the call with parameters: 2234 // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void) 2235 Name = "__enqueue_kernel_basic"; 2236 llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, Int8PtrTy}; 2237 llvm::FunctionType *FTy = llvm::FunctionType::get( 2238 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys, 4), false); 2239 2240 llvm::Value *Block = 2241 Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy); 2242 2243 return RValue::get(Builder.CreateCall( 2244 CGM.CreateRuntimeFunction(FTy, Name), {Queue, Flags, Range, Block})); 2245 } 2246 assert(NumArgs >= 5 && "Invalid enqueue_kernel signature"); 2247 2248 // Could have events and/or vaargs. 2249 if (E->getArg(3)->getType()->isBlockPointerType()) { 2250 // No events passed, but has variadic arguments. 2251 Name = "__enqueue_kernel_vaargs"; 2252 llvm::Value *Block = 2253 Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy); 2254 // Create a vector of the arguments, as well as a constant value to 2255 // express to the runtime the number of variadic arguments. 2256 std::vector<llvm::Value *> Args = {Queue, Flags, Range, Block, 2257 ConstantInt::get(IntTy, NumArgs - 4)}; 2258 std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, RangeTy, Int8PtrTy, 2259 IntTy}; 2260 2261 // Add the variadics. 2262 for (unsigned I = 4; I < NumArgs; ++I) { 2263 llvm::Value *ArgSize = EmitScalarExpr(E->getArg(I)); 2264 unsigned TypeSizeInBytes = 2265 getContext() 2266 .getTypeSizeInChars(E->getArg(I)->getType()) 2267 .getQuantity(); 2268 Args.push_back(TypeSizeInBytes < 4 2269 ? Builder.CreateZExt(ArgSize, Int32Ty) 2270 : ArgSize); 2271 } 2272 2273 llvm::FunctionType *FTy = llvm::FunctionType::get( 2274 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true); 2275 return RValue::get( 2276 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2277 llvm::ArrayRef<llvm::Value *>(Args))); 2278 } 2279 // Any calls now have event arguments passed. 2280 if (NumArgs >= 7) { 2281 llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy); 2282 unsigned AS4 = 2283 E->getArg(4)->getType()->isArrayType() 2284 ? E->getArg(4)->getType().getAddressSpace() 2285 : E->getArg(4)->getType()->getPointeeType().getAddressSpace(); 2286 llvm::Type *EventPtrAS4Ty = 2287 EventTy->getPointerTo(CGM.getContext().getTargetAddressSpace(AS4)); 2288 unsigned AS5 = 2289 E->getArg(5)->getType()->getPointeeType().getAddressSpace(); 2290 llvm::Type *EventPtrAS5Ty = 2291 EventTy->getPointerTo(CGM.getContext().getTargetAddressSpace(AS5)); 2292 2293 llvm::Value *NumEvents = EmitScalarExpr(E->getArg(3)); 2294 llvm::Value *EventList = 2295 E->getArg(4)->getType()->isArrayType() 2296 ? EmitArrayToPointerDecay(E->getArg(4)).getPointer() 2297 : EmitScalarExpr(E->getArg(4)); 2298 llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5)); 2299 llvm::Value *Block = 2300 Builder.CreateBitCast(EmitScalarExpr(E->getArg(6)), Int8PtrTy); 2301 2302 std::vector<llvm::Type *> ArgTys = { 2303 QueueTy, Int32Ty, RangeTy, Int32Ty, 2304 EventPtrAS4Ty, EventPtrAS5Ty, Int8PtrTy}; 2305 std::vector<llvm::Value *> Args = {Queue, Flags, Range, NumEvents, 2306 EventList, ClkEvent, Block}; 2307 2308 if (NumArgs == 7) { 2309 // Has events but no variadics. 2310 Name = "__enqueue_kernel_basic_events"; 2311 llvm::FunctionType *FTy = llvm::FunctionType::get( 2312 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2313 return RValue::get( 2314 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2315 llvm::ArrayRef<llvm::Value *>(Args))); 2316 } 2317 // Has event info and variadics 2318 // Pass the number of variadics to the runtime function too. 2319 Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7)); 2320 ArgTys.push_back(Int32Ty); 2321 Name = "__enqueue_kernel_events_vaargs"; 2322 2323 // Add the variadics. 2324 for (unsigned I = 7; I < NumArgs; ++I) { 2325 llvm::Value *ArgSize = EmitScalarExpr(E->getArg(I)); 2326 unsigned TypeSizeInBytes = 2327 getContext() 2328 .getTypeSizeInChars(E->getArg(I)->getType()) 2329 .getQuantity(); 2330 Args.push_back(TypeSizeInBytes < 4 2331 ? Builder.CreateZExt(ArgSize, Int32Ty) 2332 : ArgSize); 2333 } 2334 llvm::FunctionType *FTy = llvm::FunctionType::get( 2335 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true); 2336 return RValue::get( 2337 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2338 llvm::ArrayRef<llvm::Value *>(Args))); 2339 } 2340 } 2341 // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block 2342 // parameter. 2343 case Builtin::BIget_kernel_work_group_size: { 2344 Value *Arg = EmitScalarExpr(E->getArg(0)); 2345 Arg = Builder.CreateBitCast(Arg, Int8PtrTy); 2346 return RValue::get( 2347 Builder.CreateCall(CGM.CreateRuntimeFunction( 2348 llvm::FunctionType::get(IntTy, Int8PtrTy, false), 2349 "__get_kernel_work_group_size_impl"), 2350 Arg)); 2351 } 2352 case Builtin::BIget_kernel_preferred_work_group_size_multiple: { 2353 Value *Arg = EmitScalarExpr(E->getArg(0)); 2354 Arg = Builder.CreateBitCast(Arg, Int8PtrTy); 2355 return RValue::get(Builder.CreateCall( 2356 CGM.CreateRuntimeFunction( 2357 llvm::FunctionType::get(IntTy, Int8PtrTy, false), 2358 "__get_kernel_preferred_work_group_multiple_impl"), 2359 Arg)); 2360 } 2361 case Builtin::BIprintf: 2362 if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) 2363 return EmitCUDADevicePrintfCallExpr(E, ReturnValue); 2364 break; 2365 case Builtin::BI__builtin_canonicalize: 2366 case Builtin::BI__builtin_canonicalizef: 2367 case Builtin::BI__builtin_canonicalizel: 2368 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize)); 2369 2370 case Builtin::BI__builtin_thread_pointer: { 2371 if (!getContext().getTargetInfo().isTLSSupported()) 2372 CGM.ErrorUnsupported(E, "__builtin_thread_pointer"); 2373 // Fall through - it's already mapped to the intrinsic by GCCBuiltin. 2374 break; 2375 } 2376 } 2377 2378 // If this is an alias for a lib function (e.g. __builtin_sin), emit 2379 // the call using the normal call path, but using the unmangled 2380 // version of the function name. 2381 if (getContext().BuiltinInfo.isLibFunction(BuiltinID)) 2382 return emitLibraryCall(*this, FD, E, 2383 CGM.getBuiltinLibFunction(FD, BuiltinID)); 2384 2385 // If this is a predefined lib function (e.g. malloc), emit the call 2386 // using exactly the normal call path. 2387 if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID)) 2388 return emitLibraryCall(*this, FD, E, EmitScalarExpr(E->getCallee())); 2389 2390 // Check that a call to a target specific builtin has the correct target 2391 // features. 2392 // This is down here to avoid non-target specific builtins, however, if 2393 // generic builtins start to require generic target features then we 2394 // can move this up to the beginning of the function. 2395 checkTargetFeatures(E, FD); 2396 2397 // See if we have a target specific intrinsic. 2398 const char *Name = getContext().BuiltinInfo.getName(BuiltinID); 2399 Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic; 2400 if (const char *Prefix = 2401 llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch())) { 2402 IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix, Name); 2403 // NOTE we dont need to perform a compatibility flag check here since the 2404 // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the 2405 // MS builtins via ALL_MS_LANGUAGES and are filtered earlier. 2406 if (IntrinsicID == Intrinsic::not_intrinsic) 2407 IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix, Name); 2408 } 2409 2410 if (IntrinsicID != Intrinsic::not_intrinsic) { 2411 SmallVector<Value*, 16> Args; 2412 2413 // Find out if any arguments are required to be integer constant 2414 // expressions. 2415 unsigned ICEArguments = 0; 2416 ASTContext::GetBuiltinTypeError Error; 2417 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 2418 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 2419 2420 Function *F = CGM.getIntrinsic(IntrinsicID); 2421 llvm::FunctionType *FTy = F->getFunctionType(); 2422 2423 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) { 2424 Value *ArgValue; 2425 // If this is a normal argument, just emit it as a scalar. 2426 if ((ICEArguments & (1 << i)) == 0) { 2427 ArgValue = EmitScalarExpr(E->getArg(i)); 2428 } else { 2429 // If this is required to be a constant, constant fold it so that we 2430 // know that the generated intrinsic gets a ConstantInt. 2431 llvm::APSInt Result; 2432 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext()); 2433 assert(IsConst && "Constant arg isn't actually constant?"); 2434 (void)IsConst; 2435 ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result); 2436 } 2437 2438 // If the intrinsic arg type is different from the builtin arg type 2439 // we need to do a bit cast. 2440 llvm::Type *PTy = FTy->getParamType(i); 2441 if (PTy != ArgValue->getType()) { 2442 assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) && 2443 "Must be able to losslessly bit cast to param"); 2444 ArgValue = Builder.CreateBitCast(ArgValue, PTy); 2445 } 2446 2447 Args.push_back(ArgValue); 2448 } 2449 2450 Value *V = Builder.CreateCall(F, Args); 2451 QualType BuiltinRetType = E->getType(); 2452 2453 llvm::Type *RetTy = VoidTy; 2454 if (!BuiltinRetType->isVoidType()) 2455 RetTy = ConvertType(BuiltinRetType); 2456 2457 if (RetTy != V->getType()) { 2458 assert(V->getType()->canLosslesslyBitCastTo(RetTy) && 2459 "Must be able to losslessly bit cast result type"); 2460 V = Builder.CreateBitCast(V, RetTy); 2461 } 2462 2463 return RValue::get(V); 2464 } 2465 2466 // See if we have a target specific builtin that needs to be lowered. 2467 if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E)) 2468 return RValue::get(V); 2469 2470 ErrorUnsupported(E, "builtin function"); 2471 2472 // Unknown builtin, for now just dump it out and return undef. 2473 return GetUndefRValue(E->getType()); 2474 } 2475 2476 static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, 2477 unsigned BuiltinID, const CallExpr *E, 2478 llvm::Triple::ArchType Arch) { 2479 switch (Arch) { 2480 case llvm::Triple::arm: 2481 case llvm::Triple::armeb: 2482 case llvm::Triple::thumb: 2483 case llvm::Triple::thumbeb: 2484 return CGF->EmitARMBuiltinExpr(BuiltinID, E); 2485 case llvm::Triple::aarch64: 2486 case llvm::Triple::aarch64_be: 2487 return CGF->EmitAArch64BuiltinExpr(BuiltinID, E); 2488 case llvm::Triple::x86: 2489 case llvm::Triple::x86_64: 2490 return CGF->EmitX86BuiltinExpr(BuiltinID, E); 2491 case llvm::Triple::ppc: 2492 case llvm::Triple::ppc64: 2493 case llvm::Triple::ppc64le: 2494 return CGF->EmitPPCBuiltinExpr(BuiltinID, E); 2495 case llvm::Triple::r600: 2496 case llvm::Triple::amdgcn: 2497 return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E); 2498 case llvm::Triple::systemz: 2499 return CGF->EmitSystemZBuiltinExpr(BuiltinID, E); 2500 case llvm::Triple::nvptx: 2501 case llvm::Triple::nvptx64: 2502 return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E); 2503 case llvm::Triple::wasm32: 2504 case llvm::Triple::wasm64: 2505 return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E); 2506 default: 2507 return nullptr; 2508 } 2509 } 2510 2511 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID, 2512 const CallExpr *E) { 2513 if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) { 2514 assert(getContext().getAuxTargetInfo() && "Missing aux target info"); 2515 return EmitTargetArchBuiltinExpr( 2516 this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E, 2517 getContext().getAuxTargetInfo()->getTriple().getArch()); 2518 } 2519 2520 return EmitTargetArchBuiltinExpr(this, BuiltinID, E, 2521 getTarget().getTriple().getArch()); 2522 } 2523 2524 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF, 2525 NeonTypeFlags TypeFlags, 2526 bool V1Ty=false) { 2527 int IsQuad = TypeFlags.isQuad(); 2528 switch (TypeFlags.getEltType()) { 2529 case NeonTypeFlags::Int8: 2530 case NeonTypeFlags::Poly8: 2531 return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad)); 2532 case NeonTypeFlags::Int16: 2533 case NeonTypeFlags::Poly16: 2534 case NeonTypeFlags::Float16: 2535 return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad)); 2536 case NeonTypeFlags::Int32: 2537 return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad)); 2538 case NeonTypeFlags::Int64: 2539 case NeonTypeFlags::Poly64: 2540 return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad)); 2541 case NeonTypeFlags::Poly128: 2542 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm. 2543 // There is a lot of i128 and f128 API missing. 2544 // so we use v16i8 to represent poly128 and get pattern matched. 2545 return llvm::VectorType::get(CGF->Int8Ty, 16); 2546 case NeonTypeFlags::Float32: 2547 return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad)); 2548 case NeonTypeFlags::Float64: 2549 return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad)); 2550 } 2551 llvm_unreachable("Unknown vector element type!"); 2552 } 2553 2554 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF, 2555 NeonTypeFlags IntTypeFlags) { 2556 int IsQuad = IntTypeFlags.isQuad(); 2557 switch (IntTypeFlags.getEltType()) { 2558 case NeonTypeFlags::Int32: 2559 return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad)); 2560 case NeonTypeFlags::Int64: 2561 return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad)); 2562 default: 2563 llvm_unreachable("Type can't be converted to floating-point!"); 2564 } 2565 } 2566 2567 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) { 2568 unsigned nElts = V->getType()->getVectorNumElements(); 2569 Value* SV = llvm::ConstantVector::getSplat(nElts, C); 2570 return Builder.CreateShuffleVector(V, V, SV, "lane"); 2571 } 2572 2573 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops, 2574 const char *name, 2575 unsigned shift, bool rightshift) { 2576 unsigned j = 0; 2577 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); 2578 ai != ae; ++ai, ++j) 2579 if (shift > 0 && shift == j) 2580 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift); 2581 else 2582 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name); 2583 2584 return Builder.CreateCall(F, Ops, name); 2585 } 2586 2587 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty, 2588 bool neg) { 2589 int SV = cast<ConstantInt>(V)->getSExtValue(); 2590 return ConstantInt::get(Ty, neg ? -SV : SV); 2591 } 2592 2593 // \brief Right-shift a vector by a constant. 2594 Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift, 2595 llvm::Type *Ty, bool usgn, 2596 const char *name) { 2597 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 2598 2599 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue(); 2600 int EltSize = VTy->getScalarSizeInBits(); 2601 2602 Vec = Builder.CreateBitCast(Vec, Ty); 2603 2604 // lshr/ashr are undefined when the shift amount is equal to the vector 2605 // element size. 2606 if (ShiftAmt == EltSize) { 2607 if (usgn) { 2608 // Right-shifting an unsigned value by its size yields 0. 2609 return llvm::ConstantAggregateZero::get(VTy); 2610 } else { 2611 // Right-shifting a signed value by its size is equivalent 2612 // to a shift of size-1. 2613 --ShiftAmt; 2614 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt); 2615 } 2616 } 2617 2618 Shift = EmitNeonShiftVector(Shift, Ty, false); 2619 if (usgn) 2620 return Builder.CreateLShr(Vec, Shift, name); 2621 else 2622 return Builder.CreateAShr(Vec, Shift, name); 2623 } 2624 2625 enum { 2626 AddRetType = (1 << 0), 2627 Add1ArgType = (1 << 1), 2628 Add2ArgTypes = (1 << 2), 2629 2630 VectorizeRetType = (1 << 3), 2631 VectorizeArgTypes = (1 << 4), 2632 2633 InventFloatType = (1 << 5), 2634 UnsignedAlts = (1 << 6), 2635 2636 Use64BitVectors = (1 << 7), 2637 Use128BitVectors = (1 << 8), 2638 2639 Vectorize1ArgType = Add1ArgType | VectorizeArgTypes, 2640 VectorRet = AddRetType | VectorizeRetType, 2641 VectorRetGetArgs01 = 2642 AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes, 2643 FpCmpzModifiers = 2644 AddRetType | VectorizeRetType | Add1ArgType | InventFloatType 2645 }; 2646 2647 namespace { 2648 struct NeonIntrinsicInfo { 2649 const char *NameHint; 2650 unsigned BuiltinID; 2651 unsigned LLVMIntrinsic; 2652 unsigned AltLLVMIntrinsic; 2653 unsigned TypeModifier; 2654 2655 bool operator<(unsigned RHSBuiltinID) const { 2656 return BuiltinID < RHSBuiltinID; 2657 } 2658 bool operator<(const NeonIntrinsicInfo &TE) const { 2659 return BuiltinID < TE.BuiltinID; 2660 } 2661 }; 2662 } // end anonymous namespace 2663 2664 #define NEONMAP0(NameBase) \ 2665 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 } 2666 2667 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \ 2668 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \ 2669 Intrinsic::LLVMIntrinsic, 0, TypeModifier } 2670 2671 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \ 2672 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \ 2673 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \ 2674 TypeModifier } 2675 2676 static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { 2677 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), 2678 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), 2679 NEONMAP1(vabs_v, arm_neon_vabs, 0), 2680 NEONMAP1(vabsq_v, arm_neon_vabs, 0), 2681 NEONMAP0(vaddhn_v), 2682 NEONMAP1(vaesdq_v, arm_neon_aesd, 0), 2683 NEONMAP1(vaeseq_v, arm_neon_aese, 0), 2684 NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0), 2685 NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0), 2686 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType), 2687 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType), 2688 NEONMAP1(vcage_v, arm_neon_vacge, 0), 2689 NEONMAP1(vcageq_v, arm_neon_vacge, 0), 2690 NEONMAP1(vcagt_v, arm_neon_vacgt, 0), 2691 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0), 2692 NEONMAP1(vcale_v, arm_neon_vacge, 0), 2693 NEONMAP1(vcaleq_v, arm_neon_vacge, 0), 2694 NEONMAP1(vcalt_v, arm_neon_vacgt, 0), 2695 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0), 2696 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType), 2697 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType), 2698 NEONMAP1(vclz_v, ctlz, Add1ArgType), 2699 NEONMAP1(vclzq_v, ctlz, Add1ArgType), 2700 NEONMAP1(vcnt_v, ctpop, Add1ArgType), 2701 NEONMAP1(vcntq_v, ctpop, Add1ArgType), 2702 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0), 2703 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0), 2704 NEONMAP0(vcvt_f32_v), 2705 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), 2706 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0), 2707 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0), 2708 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0), 2709 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0), 2710 NEONMAP0(vcvt_s32_v), 2711 NEONMAP0(vcvt_s64_v), 2712 NEONMAP0(vcvt_u32_v), 2713 NEONMAP0(vcvt_u64_v), 2714 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0), 2715 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0), 2716 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0), 2717 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0), 2718 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0), 2719 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0), 2720 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0), 2721 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0), 2722 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0), 2723 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0), 2724 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0), 2725 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0), 2726 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0), 2727 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0), 2728 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0), 2729 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0), 2730 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0), 2731 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0), 2732 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0), 2733 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0), 2734 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0), 2735 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0), 2736 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0), 2737 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0), 2738 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0), 2739 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0), 2740 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0), 2741 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0), 2742 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0), 2743 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0), 2744 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0), 2745 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0), 2746 NEONMAP0(vcvtq_f32_v), 2747 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), 2748 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0), 2749 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0), 2750 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0), 2751 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0), 2752 NEONMAP0(vcvtq_s32_v), 2753 NEONMAP0(vcvtq_s64_v), 2754 NEONMAP0(vcvtq_u32_v), 2755 NEONMAP0(vcvtq_u64_v), 2756 NEONMAP0(vext_v), 2757 NEONMAP0(vextq_v), 2758 NEONMAP0(vfma_v), 2759 NEONMAP0(vfmaq_v), 2760 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts), 2761 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts), 2762 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts), 2763 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts), 2764 NEONMAP0(vld1_dup_v), 2765 NEONMAP1(vld1_v, arm_neon_vld1, 0), 2766 NEONMAP0(vld1q_dup_v), 2767 NEONMAP1(vld1q_v, arm_neon_vld1, 0), 2768 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0), 2769 NEONMAP1(vld2_v, arm_neon_vld2, 0), 2770 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0), 2771 NEONMAP1(vld2q_v, arm_neon_vld2, 0), 2772 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0), 2773 NEONMAP1(vld3_v, arm_neon_vld3, 0), 2774 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0), 2775 NEONMAP1(vld3q_v, arm_neon_vld3, 0), 2776 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0), 2777 NEONMAP1(vld4_v, arm_neon_vld4, 0), 2778 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0), 2779 NEONMAP1(vld4q_v, arm_neon_vld4, 0), 2780 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts), 2781 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType), 2782 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType), 2783 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts), 2784 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), 2785 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType), 2786 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType), 2787 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), 2788 NEONMAP0(vmovl_v), 2789 NEONMAP0(vmovn_v), 2790 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType), 2791 NEONMAP0(vmull_v), 2792 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType), 2793 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts), 2794 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts), 2795 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType), 2796 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts), 2797 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts), 2798 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType), 2799 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts), 2800 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts), 2801 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType), 2802 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType), 2803 NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts), 2804 NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts), 2805 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0), 2806 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0), 2807 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType), 2808 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType), 2809 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType), 2810 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts), 2811 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType), 2812 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType), 2813 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType), 2814 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType), 2815 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType), 2816 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts), 2817 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts), 2818 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts), 2819 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts), 2820 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts), 2821 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts), 2822 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0), 2823 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0), 2824 NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts), 2825 NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts), 2826 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType), 2827 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), 2828 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), 2829 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType), 2830 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType), 2831 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts), 2832 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts), 2833 NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType), 2834 NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType), 2835 NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType), 2836 NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType), 2837 NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType), 2838 NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType), 2839 NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType), 2840 NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType), 2841 NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType), 2842 NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType), 2843 NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType), 2844 NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType), 2845 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts), 2846 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts), 2847 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts), 2848 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts), 2849 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0), 2850 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0), 2851 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType), 2852 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType), 2853 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType), 2854 NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0), 2855 NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0), 2856 NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0), 2857 NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0), 2858 NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0), 2859 NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0), 2860 NEONMAP0(vshl_n_v), 2861 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts), 2862 NEONMAP0(vshll_n_v), 2863 NEONMAP0(vshlq_n_v), 2864 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts), 2865 NEONMAP0(vshr_n_v), 2866 NEONMAP0(vshrn_n_v), 2867 NEONMAP0(vshrq_n_v), 2868 NEONMAP1(vst1_v, arm_neon_vst1, 0), 2869 NEONMAP1(vst1q_v, arm_neon_vst1, 0), 2870 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0), 2871 NEONMAP1(vst2_v, arm_neon_vst2, 0), 2872 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0), 2873 NEONMAP1(vst2q_v, arm_neon_vst2, 0), 2874 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0), 2875 NEONMAP1(vst3_v, arm_neon_vst3, 0), 2876 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0), 2877 NEONMAP1(vst3q_v, arm_neon_vst3, 0), 2878 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0), 2879 NEONMAP1(vst4_v, arm_neon_vst4, 0), 2880 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0), 2881 NEONMAP1(vst4q_v, arm_neon_vst4, 0), 2882 NEONMAP0(vsubhn_v), 2883 NEONMAP0(vtrn_v), 2884 NEONMAP0(vtrnq_v), 2885 NEONMAP0(vtst_v), 2886 NEONMAP0(vtstq_v), 2887 NEONMAP0(vuzp_v), 2888 NEONMAP0(vuzpq_v), 2889 NEONMAP0(vzip_v), 2890 NEONMAP0(vzipq_v) 2891 }; 2892 2893 static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = { 2894 NEONMAP1(vabs_v, aarch64_neon_abs, 0), 2895 NEONMAP1(vabsq_v, aarch64_neon_abs, 0), 2896 NEONMAP0(vaddhn_v), 2897 NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0), 2898 NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0), 2899 NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0), 2900 NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0), 2901 NEONMAP1(vcage_v, aarch64_neon_facge, 0), 2902 NEONMAP1(vcageq_v, aarch64_neon_facge, 0), 2903 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0), 2904 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0), 2905 NEONMAP1(vcale_v, aarch64_neon_facge, 0), 2906 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0), 2907 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0), 2908 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0), 2909 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType), 2910 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType), 2911 NEONMAP1(vclz_v, ctlz, Add1ArgType), 2912 NEONMAP1(vclzq_v, ctlz, Add1ArgType), 2913 NEONMAP1(vcnt_v, ctpop, Add1ArgType), 2914 NEONMAP1(vcntq_v, ctpop, Add1ArgType), 2915 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0), 2916 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0), 2917 NEONMAP0(vcvt_f32_v), 2918 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 2919 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 2920 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), 2921 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), 2922 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), 2923 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), 2924 NEONMAP0(vcvtq_f32_v), 2925 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 2926 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 2927 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), 2928 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), 2929 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), 2930 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), 2931 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType), 2932 NEONMAP0(vext_v), 2933 NEONMAP0(vextq_v), 2934 NEONMAP0(vfma_v), 2935 NEONMAP0(vfmaq_v), 2936 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), 2937 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), 2938 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), 2939 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), 2940 NEONMAP0(vmovl_v), 2941 NEONMAP0(vmovn_v), 2942 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType), 2943 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType), 2944 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType), 2945 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts), 2946 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts), 2947 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType), 2948 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType), 2949 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType), 2950 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts), 2951 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts), 2952 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0), 2953 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0), 2954 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType), 2955 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType), 2956 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType), 2957 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts), 2958 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType), 2959 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType), 2960 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType), 2961 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType), 2962 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType), 2963 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts), 2964 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts), 2965 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts), 2966 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts), 2967 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts), 2968 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts), 2969 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0), 2970 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0), 2971 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), 2972 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), 2973 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType), 2974 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), 2975 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), 2976 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType), 2977 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType), 2978 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), 2979 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), 2980 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), 2981 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), 2982 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), 2983 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), 2984 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0), 2985 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0), 2986 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType), 2987 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType), 2988 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType), 2989 NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0), 2990 NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0), 2991 NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0), 2992 NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0), 2993 NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0), 2994 NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0), 2995 NEONMAP0(vshl_n_v), 2996 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts), 2997 NEONMAP0(vshll_n_v), 2998 NEONMAP0(vshlq_n_v), 2999 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts), 3000 NEONMAP0(vshr_n_v), 3001 NEONMAP0(vshrn_n_v), 3002 NEONMAP0(vshrq_n_v), 3003 NEONMAP0(vsubhn_v), 3004 NEONMAP0(vtst_v), 3005 NEONMAP0(vtstq_v), 3006 }; 3007 3008 static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = { 3009 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType), 3010 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType), 3011 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType), 3012 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType), 3013 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType), 3014 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType), 3015 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType), 3016 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), 3017 NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType), 3018 NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3019 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), 3020 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType), 3021 NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType), 3022 NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType), 3023 NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3024 NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3025 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType), 3026 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType), 3027 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType), 3028 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType), 3029 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType), 3030 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType), 3031 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType), 3032 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType), 3033 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType), 3034 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType), 3035 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType), 3036 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType), 3037 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), 3038 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), 3039 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), 3040 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), 3041 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType), 3042 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), 3043 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType), 3044 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), 3045 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType), 3046 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), 3047 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType), 3048 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), 3049 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType), 3050 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), 3051 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType), 3052 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), 3053 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), 3054 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), 3055 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), 3056 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), 3057 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0), 3058 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3059 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3060 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3061 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3062 NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType), 3063 NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType), 3064 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3065 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3066 NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType), 3067 NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType), 3068 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3069 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3070 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3071 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 3072 NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType), 3073 NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType), 3074 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 3075 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType), 3076 NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType), 3077 NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType), 3078 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0), 3079 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType), 3080 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType), 3081 NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3082 NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3083 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3084 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3085 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3086 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3087 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3088 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3089 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType), 3090 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 3091 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors), 3092 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType), 3093 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors), 3094 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType), 3095 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors), 3096 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors), 3097 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType), 3098 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType), 3099 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors), 3100 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors), 3101 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType), 3102 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType), 3103 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors), 3104 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType), 3105 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors), 3106 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0), 3107 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType), 3108 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType), 3109 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors), 3110 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors), 3111 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors), 3112 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors), 3113 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType), 3114 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors), 3115 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors), 3116 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors), 3117 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType), 3118 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors), 3119 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType), 3120 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors), 3121 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType), 3122 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors), 3123 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors), 3124 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType), 3125 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType), 3126 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors), 3127 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors), 3128 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType), 3129 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType), 3130 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType), 3131 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType), 3132 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors), 3133 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors), 3134 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors), 3135 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors), 3136 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType), 3137 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors), 3138 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors), 3139 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 3140 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 3141 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 3142 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 3143 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType), 3144 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType), 3145 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 3146 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 3147 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 3148 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 3149 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType), 3150 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType), 3151 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType), 3152 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType), 3153 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors), 3154 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors), 3155 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType), 3156 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType), 3157 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType), 3158 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors), 3159 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors), 3160 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors), 3161 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors), 3162 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType), 3163 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors), 3164 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors), 3165 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors), 3166 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors), 3167 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType), 3168 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType), 3169 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors), 3170 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors), 3171 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType), 3172 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType), 3173 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType), 3174 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType), 3175 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType), 3176 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType), 3177 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType), 3178 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType), 3179 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType), 3180 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType), 3181 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType), 3182 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType), 3183 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0), 3184 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0), 3185 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0), 3186 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0), 3187 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType), 3188 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType), 3189 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType), 3190 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType), 3191 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors), 3192 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType), 3193 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors), 3194 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType), 3195 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType), 3196 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType), 3197 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors), 3198 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType), 3199 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors), 3200 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType), 3201 }; 3202 3203 #undef NEONMAP0 3204 #undef NEONMAP1 3205 #undef NEONMAP2 3206 3207 static bool NEONSIMDIntrinsicsProvenSorted = false; 3208 3209 static bool AArch64SIMDIntrinsicsProvenSorted = false; 3210 static bool AArch64SISDIntrinsicsProvenSorted = false; 3211 3212 3213 static const NeonIntrinsicInfo * 3214 findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap, 3215 unsigned BuiltinID, bool &MapProvenSorted) { 3216 3217 #ifndef NDEBUG 3218 if (!MapProvenSorted) { 3219 assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap))); 3220 MapProvenSorted = true; 3221 } 3222 #endif 3223 3224 const NeonIntrinsicInfo *Builtin = 3225 std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID); 3226 3227 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID) 3228 return Builtin; 3229 3230 return nullptr; 3231 } 3232 3233 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID, 3234 unsigned Modifier, 3235 llvm::Type *ArgType, 3236 const CallExpr *E) { 3237 int VectorSize = 0; 3238 if (Modifier & Use64BitVectors) 3239 VectorSize = 64; 3240 else if (Modifier & Use128BitVectors) 3241 VectorSize = 128; 3242 3243 // Return type. 3244 SmallVector<llvm::Type *, 3> Tys; 3245 if (Modifier & AddRetType) { 3246 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext())); 3247 if (Modifier & VectorizeRetType) 3248 Ty = llvm::VectorType::get( 3249 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1); 3250 3251 Tys.push_back(Ty); 3252 } 3253 3254 // Arguments. 3255 if (Modifier & VectorizeArgTypes) { 3256 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1; 3257 ArgType = llvm::VectorType::get(ArgType, Elts); 3258 } 3259 3260 if (Modifier & (Add1ArgType | Add2ArgTypes)) 3261 Tys.push_back(ArgType); 3262 3263 if (Modifier & Add2ArgTypes) 3264 Tys.push_back(ArgType); 3265 3266 if (Modifier & InventFloatType) 3267 Tys.push_back(FloatTy); 3268 3269 return CGM.getIntrinsic(IntrinsicID, Tys); 3270 } 3271 3272 static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, 3273 const NeonIntrinsicInfo &SISDInfo, 3274 SmallVectorImpl<Value *> &Ops, 3275 const CallExpr *E) { 3276 unsigned BuiltinID = SISDInfo.BuiltinID; 3277 unsigned int Int = SISDInfo.LLVMIntrinsic; 3278 unsigned Modifier = SISDInfo.TypeModifier; 3279 const char *s = SISDInfo.NameHint; 3280 3281 switch (BuiltinID) { 3282 case NEON::BI__builtin_neon_vcled_s64: 3283 case NEON::BI__builtin_neon_vcled_u64: 3284 case NEON::BI__builtin_neon_vcles_f32: 3285 case NEON::BI__builtin_neon_vcled_f64: 3286 case NEON::BI__builtin_neon_vcltd_s64: 3287 case NEON::BI__builtin_neon_vcltd_u64: 3288 case NEON::BI__builtin_neon_vclts_f32: 3289 case NEON::BI__builtin_neon_vcltd_f64: 3290 case NEON::BI__builtin_neon_vcales_f32: 3291 case NEON::BI__builtin_neon_vcaled_f64: 3292 case NEON::BI__builtin_neon_vcalts_f32: 3293 case NEON::BI__builtin_neon_vcaltd_f64: 3294 // Only one direction of comparisons actually exist, cmle is actually a cmge 3295 // with swapped operands. The table gives us the right intrinsic but we 3296 // still need to do the swap. 3297 std::swap(Ops[0], Ops[1]); 3298 break; 3299 } 3300 3301 assert(Int && "Generic code assumes a valid intrinsic"); 3302 3303 // Determine the type(s) of this overloaded AArch64 intrinsic. 3304 const Expr *Arg = E->getArg(0); 3305 llvm::Type *ArgTy = CGF.ConvertType(Arg->getType()); 3306 Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E); 3307 3308 int j = 0; 3309 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0); 3310 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); 3311 ai != ae; ++ai, ++j) { 3312 llvm::Type *ArgTy = ai->getType(); 3313 if (Ops[j]->getType()->getPrimitiveSizeInBits() == 3314 ArgTy->getPrimitiveSizeInBits()) 3315 continue; 3316 3317 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy()); 3318 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate 3319 // it before inserting. 3320 Ops[j] = 3321 CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType()); 3322 Ops[j] = 3323 CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0); 3324 } 3325 3326 Value *Result = CGF.EmitNeonCall(F, Ops, s); 3327 llvm::Type *ResultType = CGF.ConvertType(E->getType()); 3328 if (ResultType->getPrimitiveSizeInBits() < 3329 Result->getType()->getPrimitiveSizeInBits()) 3330 return CGF.Builder.CreateExtractElement(Result, C0); 3331 3332 return CGF.Builder.CreateBitCast(Result, ResultType, s); 3333 } 3334 3335 Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( 3336 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, 3337 const char *NameHint, unsigned Modifier, const CallExpr *E, 3338 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1) { 3339 // Get the last argument, which specifies the vector type. 3340 llvm::APSInt NeonTypeConst; 3341 const Expr *Arg = E->getArg(E->getNumArgs() - 1); 3342 if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext())) 3343 return nullptr; 3344 3345 // Determine the type of this overloaded NEON intrinsic. 3346 NeonTypeFlags Type(NeonTypeConst.getZExtValue()); 3347 bool Usgn = Type.isUnsigned(); 3348 bool Quad = Type.isQuad(); 3349 3350 llvm::VectorType *VTy = GetNeonType(this, Type); 3351 llvm::Type *Ty = VTy; 3352 if (!Ty) 3353 return nullptr; 3354 3355 auto getAlignmentValue32 = [&](Address addr) -> Value* { 3356 return Builder.getInt32(addr.getAlignment().getQuantity()); 3357 }; 3358 3359 unsigned Int = LLVMIntrinsic; 3360 if ((Modifier & UnsignedAlts) && !Usgn) 3361 Int = AltLLVMIntrinsic; 3362 3363 switch (BuiltinID) { 3364 default: break; 3365 case NEON::BI__builtin_neon_vabs_v: 3366 case NEON::BI__builtin_neon_vabsq_v: 3367 if (VTy->getElementType()->isFloatingPointTy()) 3368 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs"); 3369 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs"); 3370 case NEON::BI__builtin_neon_vaddhn_v: { 3371 llvm::VectorType *SrcTy = 3372 llvm::VectorType::getExtendedElementVectorType(VTy); 3373 3374 // %sum = add <4 x i32> %lhs, %rhs 3375 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 3376 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy); 3377 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn"); 3378 3379 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16> 3380 Constant *ShiftAmt = 3381 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2); 3382 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn"); 3383 3384 // %res = trunc <4 x i32> %high to <4 x i16> 3385 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn"); 3386 } 3387 case NEON::BI__builtin_neon_vcale_v: 3388 case NEON::BI__builtin_neon_vcaleq_v: 3389 case NEON::BI__builtin_neon_vcalt_v: 3390 case NEON::BI__builtin_neon_vcaltq_v: 3391 std::swap(Ops[0], Ops[1]); 3392 case NEON::BI__builtin_neon_vcage_v: 3393 case NEON::BI__builtin_neon_vcageq_v: 3394 case NEON::BI__builtin_neon_vcagt_v: 3395 case NEON::BI__builtin_neon_vcagtq_v: { 3396 llvm::Type *VecFlt = llvm::VectorType::get( 3397 VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy, 3398 VTy->getNumElements()); 3399 llvm::Type *Tys[] = { VTy, VecFlt }; 3400 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 3401 return EmitNeonCall(F, Ops, NameHint); 3402 } 3403 case NEON::BI__builtin_neon_vclz_v: 3404 case NEON::BI__builtin_neon_vclzq_v: 3405 // We generate target-independent intrinsic, which needs a second argument 3406 // for whether or not clz of zero is undefined; on ARM it isn't. 3407 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef())); 3408 break; 3409 case NEON::BI__builtin_neon_vcvt_f32_v: 3410 case NEON::BI__builtin_neon_vcvtq_f32_v: 3411 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3412 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad)); 3413 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 3414 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 3415 case NEON::BI__builtin_neon_vcvt_n_f32_v: 3416 case NEON::BI__builtin_neon_vcvt_n_f64_v: 3417 case NEON::BI__builtin_neon_vcvtq_n_f32_v: 3418 case NEON::BI__builtin_neon_vcvtq_n_f64_v: { 3419 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty }; 3420 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic; 3421 Function *F = CGM.getIntrinsic(Int, Tys); 3422 return EmitNeonCall(F, Ops, "vcvt_n"); 3423 } 3424 case NEON::BI__builtin_neon_vcvt_n_s32_v: 3425 case NEON::BI__builtin_neon_vcvt_n_u32_v: 3426 case NEON::BI__builtin_neon_vcvt_n_s64_v: 3427 case NEON::BI__builtin_neon_vcvt_n_u64_v: 3428 case NEON::BI__builtin_neon_vcvtq_n_s32_v: 3429 case NEON::BI__builtin_neon_vcvtq_n_u32_v: 3430 case NEON::BI__builtin_neon_vcvtq_n_s64_v: 3431 case NEON::BI__builtin_neon_vcvtq_n_u64_v: { 3432 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 3433 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 3434 return EmitNeonCall(F, Ops, "vcvt_n"); 3435 } 3436 case NEON::BI__builtin_neon_vcvt_s32_v: 3437 case NEON::BI__builtin_neon_vcvt_u32_v: 3438 case NEON::BI__builtin_neon_vcvt_s64_v: 3439 case NEON::BI__builtin_neon_vcvt_u64_v: 3440 case NEON::BI__builtin_neon_vcvtq_s32_v: 3441 case NEON::BI__builtin_neon_vcvtq_u32_v: 3442 case NEON::BI__builtin_neon_vcvtq_s64_v: 3443 case NEON::BI__builtin_neon_vcvtq_u64_v: { 3444 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type)); 3445 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt") 3446 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt"); 3447 } 3448 case NEON::BI__builtin_neon_vcvta_s32_v: 3449 case NEON::BI__builtin_neon_vcvta_s64_v: 3450 case NEON::BI__builtin_neon_vcvta_u32_v: 3451 case NEON::BI__builtin_neon_vcvta_u64_v: 3452 case NEON::BI__builtin_neon_vcvtaq_s32_v: 3453 case NEON::BI__builtin_neon_vcvtaq_s64_v: 3454 case NEON::BI__builtin_neon_vcvtaq_u32_v: 3455 case NEON::BI__builtin_neon_vcvtaq_u64_v: 3456 case NEON::BI__builtin_neon_vcvtn_s32_v: 3457 case NEON::BI__builtin_neon_vcvtn_s64_v: 3458 case NEON::BI__builtin_neon_vcvtn_u32_v: 3459 case NEON::BI__builtin_neon_vcvtn_u64_v: 3460 case NEON::BI__builtin_neon_vcvtnq_s32_v: 3461 case NEON::BI__builtin_neon_vcvtnq_s64_v: 3462 case NEON::BI__builtin_neon_vcvtnq_u32_v: 3463 case NEON::BI__builtin_neon_vcvtnq_u64_v: 3464 case NEON::BI__builtin_neon_vcvtp_s32_v: 3465 case NEON::BI__builtin_neon_vcvtp_s64_v: 3466 case NEON::BI__builtin_neon_vcvtp_u32_v: 3467 case NEON::BI__builtin_neon_vcvtp_u64_v: 3468 case NEON::BI__builtin_neon_vcvtpq_s32_v: 3469 case NEON::BI__builtin_neon_vcvtpq_s64_v: 3470 case NEON::BI__builtin_neon_vcvtpq_u32_v: 3471 case NEON::BI__builtin_neon_vcvtpq_u64_v: 3472 case NEON::BI__builtin_neon_vcvtm_s32_v: 3473 case NEON::BI__builtin_neon_vcvtm_s64_v: 3474 case NEON::BI__builtin_neon_vcvtm_u32_v: 3475 case NEON::BI__builtin_neon_vcvtm_u64_v: 3476 case NEON::BI__builtin_neon_vcvtmq_s32_v: 3477 case NEON::BI__builtin_neon_vcvtmq_s64_v: 3478 case NEON::BI__builtin_neon_vcvtmq_u32_v: 3479 case NEON::BI__builtin_neon_vcvtmq_u64_v: { 3480 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 3481 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint); 3482 } 3483 case NEON::BI__builtin_neon_vext_v: 3484 case NEON::BI__builtin_neon_vextq_v: { 3485 int CV = cast<ConstantInt>(Ops[2])->getSExtValue(); 3486 SmallVector<uint32_t, 16> Indices; 3487 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 3488 Indices.push_back(i+CV); 3489 3490 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3491 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3492 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext"); 3493 } 3494 case NEON::BI__builtin_neon_vfma_v: 3495 case NEON::BI__builtin_neon_vfmaq_v: { 3496 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 3497 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3498 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3499 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 3500 3501 // NEON intrinsic puts accumulator first, unlike the LLVM fma. 3502 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); 3503 } 3504 case NEON::BI__builtin_neon_vld1_v: 3505 case NEON::BI__builtin_neon_vld1q_v: { 3506 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 3507 Ops.push_back(getAlignmentValue32(PtrOp0)); 3508 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1"); 3509 } 3510 case NEON::BI__builtin_neon_vld2_v: 3511 case NEON::BI__builtin_neon_vld2q_v: 3512 case NEON::BI__builtin_neon_vld3_v: 3513 case NEON::BI__builtin_neon_vld3q_v: 3514 case NEON::BI__builtin_neon_vld4_v: 3515 case NEON::BI__builtin_neon_vld4q_v: { 3516 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 3517 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 3518 Value *Align = getAlignmentValue32(PtrOp1); 3519 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint); 3520 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 3521 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3522 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 3523 } 3524 case NEON::BI__builtin_neon_vld1_dup_v: 3525 case NEON::BI__builtin_neon_vld1q_dup_v: { 3526 Value *V = UndefValue::get(Ty); 3527 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 3528 PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty); 3529 LoadInst *Ld = Builder.CreateLoad(PtrOp0); 3530 llvm::Constant *CI = ConstantInt::get(SizeTy, 0); 3531 Ops[0] = Builder.CreateInsertElement(V, Ld, CI); 3532 return EmitNeonSplat(Ops[0], CI); 3533 } 3534 case NEON::BI__builtin_neon_vld2_lane_v: 3535 case NEON::BI__builtin_neon_vld2q_lane_v: 3536 case NEON::BI__builtin_neon_vld3_lane_v: 3537 case NEON::BI__builtin_neon_vld3q_lane_v: 3538 case NEON::BI__builtin_neon_vld4_lane_v: 3539 case NEON::BI__builtin_neon_vld4q_lane_v: { 3540 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 3541 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 3542 for (unsigned I = 2; I < Ops.size() - 1; ++I) 3543 Ops[I] = Builder.CreateBitCast(Ops[I], Ty); 3544 Ops.push_back(getAlignmentValue32(PtrOp1)); 3545 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint); 3546 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 3547 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3548 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 3549 } 3550 case NEON::BI__builtin_neon_vmovl_v: { 3551 llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy); 3552 Ops[0] = Builder.CreateBitCast(Ops[0], DTy); 3553 if (Usgn) 3554 return Builder.CreateZExt(Ops[0], Ty, "vmovl"); 3555 return Builder.CreateSExt(Ops[0], Ty, "vmovl"); 3556 } 3557 case NEON::BI__builtin_neon_vmovn_v: { 3558 llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy); 3559 Ops[0] = Builder.CreateBitCast(Ops[0], QTy); 3560 return Builder.CreateTrunc(Ops[0], Ty, "vmovn"); 3561 } 3562 case NEON::BI__builtin_neon_vmull_v: 3563 // FIXME: the integer vmull operations could be emitted in terms of pure 3564 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of 3565 // hoisting the exts outside loops. Until global ISel comes along that can 3566 // see through such movement this leads to bad CodeGen. So we need an 3567 // intrinsic for now. 3568 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls; 3569 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int; 3570 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull"); 3571 case NEON::BI__builtin_neon_vpadal_v: 3572 case NEON::BI__builtin_neon_vpadalq_v: { 3573 // The source operand type has twice as many elements of half the size. 3574 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); 3575 llvm::Type *EltTy = 3576 llvm::IntegerType::get(getLLVMContext(), EltBits / 2); 3577 llvm::Type *NarrowTy = 3578 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2); 3579 llvm::Type *Tys[2] = { Ty, NarrowTy }; 3580 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint); 3581 } 3582 case NEON::BI__builtin_neon_vpaddl_v: 3583 case NEON::BI__builtin_neon_vpaddlq_v: { 3584 // The source operand type has twice as many elements of half the size. 3585 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); 3586 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2); 3587 llvm::Type *NarrowTy = 3588 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2); 3589 llvm::Type *Tys[2] = { Ty, NarrowTy }; 3590 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl"); 3591 } 3592 case NEON::BI__builtin_neon_vqdmlal_v: 3593 case NEON::BI__builtin_neon_vqdmlsl_v: { 3594 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end()); 3595 Ops[1] = 3596 EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal"); 3597 Ops.resize(2); 3598 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint); 3599 } 3600 case NEON::BI__builtin_neon_vqshl_n_v: 3601 case NEON::BI__builtin_neon_vqshlq_n_v: 3602 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n", 3603 1, false); 3604 case NEON::BI__builtin_neon_vqshlu_n_v: 3605 case NEON::BI__builtin_neon_vqshluq_n_v: 3606 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n", 3607 1, false); 3608 case NEON::BI__builtin_neon_vrecpe_v: 3609 case NEON::BI__builtin_neon_vrecpeq_v: 3610 case NEON::BI__builtin_neon_vrsqrte_v: 3611 case NEON::BI__builtin_neon_vrsqrteq_v: 3612 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic; 3613 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint); 3614 3615 case NEON::BI__builtin_neon_vrshr_n_v: 3616 case NEON::BI__builtin_neon_vrshrq_n_v: 3617 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n", 3618 1, true); 3619 case NEON::BI__builtin_neon_vshl_n_v: 3620 case NEON::BI__builtin_neon_vshlq_n_v: 3621 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false); 3622 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1], 3623 "vshl_n"); 3624 case NEON::BI__builtin_neon_vshll_n_v: { 3625 llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy); 3626 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 3627 if (Usgn) 3628 Ops[0] = Builder.CreateZExt(Ops[0], VTy); 3629 else 3630 Ops[0] = Builder.CreateSExt(Ops[0], VTy); 3631 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false); 3632 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n"); 3633 } 3634 case NEON::BI__builtin_neon_vshrn_n_v: { 3635 llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy); 3636 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 3637 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false); 3638 if (Usgn) 3639 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]); 3640 else 3641 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]); 3642 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n"); 3643 } 3644 case NEON::BI__builtin_neon_vshr_n_v: 3645 case NEON::BI__builtin_neon_vshrq_n_v: 3646 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n"); 3647 case NEON::BI__builtin_neon_vst1_v: 3648 case NEON::BI__builtin_neon_vst1q_v: 3649 case NEON::BI__builtin_neon_vst2_v: 3650 case NEON::BI__builtin_neon_vst2q_v: 3651 case NEON::BI__builtin_neon_vst3_v: 3652 case NEON::BI__builtin_neon_vst3q_v: 3653 case NEON::BI__builtin_neon_vst4_v: 3654 case NEON::BI__builtin_neon_vst4q_v: 3655 case NEON::BI__builtin_neon_vst2_lane_v: 3656 case NEON::BI__builtin_neon_vst2q_lane_v: 3657 case NEON::BI__builtin_neon_vst3_lane_v: 3658 case NEON::BI__builtin_neon_vst3q_lane_v: 3659 case NEON::BI__builtin_neon_vst4_lane_v: 3660 case NEON::BI__builtin_neon_vst4q_lane_v: { 3661 llvm::Type *Tys[] = {Int8PtrTy, Ty}; 3662 Ops.push_back(getAlignmentValue32(PtrOp0)); 3663 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, ""); 3664 } 3665 case NEON::BI__builtin_neon_vsubhn_v: { 3666 llvm::VectorType *SrcTy = 3667 llvm::VectorType::getExtendedElementVectorType(VTy); 3668 3669 // %sum = add <4 x i32> %lhs, %rhs 3670 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 3671 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy); 3672 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn"); 3673 3674 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16> 3675 Constant *ShiftAmt = 3676 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2); 3677 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn"); 3678 3679 // %res = trunc <4 x i32> %high to <4 x i16> 3680 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn"); 3681 } 3682 case NEON::BI__builtin_neon_vtrn_v: 3683 case NEON::BI__builtin_neon_vtrnq_v: { 3684 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 3685 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3686 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 3687 Value *SV = nullptr; 3688 3689 for (unsigned vi = 0; vi != 2; ++vi) { 3690 SmallVector<uint32_t, 16> Indices; 3691 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 3692 Indices.push_back(i+vi); 3693 Indices.push_back(i+e+vi); 3694 } 3695 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 3696 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn"); 3697 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 3698 } 3699 return SV; 3700 } 3701 case NEON::BI__builtin_neon_vtst_v: 3702 case NEON::BI__builtin_neon_vtstq_v: { 3703 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3704 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3705 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); 3706 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], 3707 ConstantAggregateZero::get(Ty)); 3708 return Builder.CreateSExt(Ops[0], Ty, "vtst"); 3709 } 3710 case NEON::BI__builtin_neon_vuzp_v: 3711 case NEON::BI__builtin_neon_vuzpq_v: { 3712 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 3713 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3714 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 3715 Value *SV = nullptr; 3716 3717 for (unsigned vi = 0; vi != 2; ++vi) { 3718 SmallVector<uint32_t, 16> Indices; 3719 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 3720 Indices.push_back(2*i+vi); 3721 3722 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 3723 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp"); 3724 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 3725 } 3726 return SV; 3727 } 3728 case NEON::BI__builtin_neon_vzip_v: 3729 case NEON::BI__builtin_neon_vzipq_v: { 3730 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 3731 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3732 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 3733 Value *SV = nullptr; 3734 3735 for (unsigned vi = 0; vi != 2; ++vi) { 3736 SmallVector<uint32_t, 16> Indices; 3737 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 3738 Indices.push_back((i + vi*e) >> 1); 3739 Indices.push_back(((i + vi*e) >> 1)+e); 3740 } 3741 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 3742 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip"); 3743 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 3744 } 3745 return SV; 3746 } 3747 } 3748 3749 assert(Int && "Expected valid intrinsic number"); 3750 3751 // Determine the type(s) of this overloaded AArch64 intrinsic. 3752 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E); 3753 3754 Value *Result = EmitNeonCall(F, Ops, NameHint); 3755 llvm::Type *ResultType = ConvertType(E->getType()); 3756 // AArch64 intrinsic one-element vector type cast to 3757 // scalar type expected by the builtin 3758 return Builder.CreateBitCast(Result, ResultType, NameHint); 3759 } 3760 3761 Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr( 3762 Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp, 3763 const CmpInst::Predicate Ip, const Twine &Name) { 3764 llvm::Type *OTy = Op->getType(); 3765 3766 // FIXME: this is utterly horrific. We should not be looking at previous 3767 // codegen context to find out what needs doing. Unfortunately TableGen 3768 // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32 3769 // (etc). 3770 if (BitCastInst *BI = dyn_cast<BitCastInst>(Op)) 3771 OTy = BI->getOperand(0)->getType(); 3772 3773 Op = Builder.CreateBitCast(Op, OTy); 3774 if (OTy->getScalarType()->isFloatingPointTy()) { 3775 Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy)); 3776 } else { 3777 Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy)); 3778 } 3779 return Builder.CreateSExt(Op, Ty, Name); 3780 } 3781 3782 static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops, 3783 Value *ExtOp, Value *IndexOp, 3784 llvm::Type *ResTy, unsigned IntID, 3785 const char *Name) { 3786 SmallVector<Value *, 2> TblOps; 3787 if (ExtOp) 3788 TblOps.push_back(ExtOp); 3789 3790 // Build a vector containing sequential number like (0, 1, 2, ..., 15) 3791 SmallVector<uint32_t, 16> Indices; 3792 llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType()); 3793 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) { 3794 Indices.push_back(2*i); 3795 Indices.push_back(2*i+1); 3796 } 3797 3798 int PairPos = 0, End = Ops.size() - 1; 3799 while (PairPos < End) { 3800 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], 3801 Ops[PairPos+1], Indices, 3802 Name)); 3803 PairPos += 2; 3804 } 3805 3806 // If there's an odd number of 64-bit lookup table, fill the high 64-bit 3807 // of the 128-bit lookup table with zero. 3808 if (PairPos == End) { 3809 Value *ZeroTbl = ConstantAggregateZero::get(TblTy); 3810 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], 3811 ZeroTbl, Indices, Name)); 3812 } 3813 3814 Function *TblF; 3815 TblOps.push_back(IndexOp); 3816 TblF = CGF.CGM.getIntrinsic(IntID, ResTy); 3817 3818 return CGF.EmitNeonCall(TblF, TblOps, Name); 3819 } 3820 3821 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) { 3822 unsigned Value; 3823 switch (BuiltinID) { 3824 default: 3825 return nullptr; 3826 case ARM::BI__builtin_arm_nop: 3827 Value = 0; 3828 break; 3829 case ARM::BI__builtin_arm_yield: 3830 case ARM::BI__yield: 3831 Value = 1; 3832 break; 3833 case ARM::BI__builtin_arm_wfe: 3834 case ARM::BI__wfe: 3835 Value = 2; 3836 break; 3837 case ARM::BI__builtin_arm_wfi: 3838 case ARM::BI__wfi: 3839 Value = 3; 3840 break; 3841 case ARM::BI__builtin_arm_sev: 3842 case ARM::BI__sev: 3843 Value = 4; 3844 break; 3845 case ARM::BI__builtin_arm_sevl: 3846 case ARM::BI__sevl: 3847 Value = 5; 3848 break; 3849 } 3850 3851 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint), 3852 llvm::ConstantInt::get(Int32Ty, Value)); 3853 } 3854 3855 // Generates the IR for the read/write special register builtin, 3856 // ValueType is the type of the value that is to be written or read, 3857 // RegisterType is the type of the register being written to or read from. 3858 static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, 3859 const CallExpr *E, 3860 llvm::Type *RegisterType, 3861 llvm::Type *ValueType, 3862 bool IsRead, 3863 StringRef SysReg = "") { 3864 // write and register intrinsics only support 32 and 64 bit operations. 3865 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64)) 3866 && "Unsupported size for register."); 3867 3868 CodeGen::CGBuilderTy &Builder = CGF.Builder; 3869 CodeGen::CodeGenModule &CGM = CGF.CGM; 3870 LLVMContext &Context = CGM.getLLVMContext(); 3871 3872 if (SysReg.empty()) { 3873 const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts(); 3874 SysReg = cast<StringLiteral>(SysRegStrExpr)->getString(); 3875 } 3876 3877 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) }; 3878 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); 3879 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); 3880 3881 llvm::Type *Types[] = { RegisterType }; 3882 3883 bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32); 3884 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64)) 3885 && "Can't fit 64-bit value in 32-bit register"); 3886 3887 if (IsRead) { 3888 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types); 3889 llvm::Value *Call = Builder.CreateCall(F, Metadata); 3890 3891 if (MixedTypes) 3892 // Read into 64 bit register and then truncate result to 32 bit. 3893 return Builder.CreateTrunc(Call, ValueType); 3894 3895 if (ValueType->isPointerTy()) 3896 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*). 3897 return Builder.CreateIntToPtr(Call, ValueType); 3898 3899 return Call; 3900 } 3901 3902 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types); 3903 llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1)); 3904 if (MixedTypes) { 3905 // Extend 32 bit write value to 64 bit to pass to write. 3906 ArgValue = Builder.CreateZExt(ArgValue, RegisterType); 3907 return Builder.CreateCall(F, { Metadata, ArgValue }); 3908 } 3909 3910 if (ValueType->isPointerTy()) { 3911 // Have VoidPtrTy ArgValue but want to return an i32/i64. 3912 ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType); 3913 return Builder.CreateCall(F, { Metadata, ArgValue }); 3914 } 3915 3916 return Builder.CreateCall(F, { Metadata, ArgValue }); 3917 } 3918 3919 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra 3920 /// argument that specifies the vector type. 3921 static bool HasExtraNeonArgument(unsigned BuiltinID) { 3922 switch (BuiltinID) { 3923 default: break; 3924 case NEON::BI__builtin_neon_vget_lane_i8: 3925 case NEON::BI__builtin_neon_vget_lane_i16: 3926 case NEON::BI__builtin_neon_vget_lane_i32: 3927 case NEON::BI__builtin_neon_vget_lane_i64: 3928 case NEON::BI__builtin_neon_vget_lane_f32: 3929 case NEON::BI__builtin_neon_vgetq_lane_i8: 3930 case NEON::BI__builtin_neon_vgetq_lane_i16: 3931 case NEON::BI__builtin_neon_vgetq_lane_i32: 3932 case NEON::BI__builtin_neon_vgetq_lane_i64: 3933 case NEON::BI__builtin_neon_vgetq_lane_f32: 3934 case NEON::BI__builtin_neon_vset_lane_i8: 3935 case NEON::BI__builtin_neon_vset_lane_i16: 3936 case NEON::BI__builtin_neon_vset_lane_i32: 3937 case NEON::BI__builtin_neon_vset_lane_i64: 3938 case NEON::BI__builtin_neon_vset_lane_f32: 3939 case NEON::BI__builtin_neon_vsetq_lane_i8: 3940 case NEON::BI__builtin_neon_vsetq_lane_i16: 3941 case NEON::BI__builtin_neon_vsetq_lane_i32: 3942 case NEON::BI__builtin_neon_vsetq_lane_i64: 3943 case NEON::BI__builtin_neon_vsetq_lane_f32: 3944 case NEON::BI__builtin_neon_vsha1h_u32: 3945 case NEON::BI__builtin_neon_vsha1cq_u32: 3946 case NEON::BI__builtin_neon_vsha1pq_u32: 3947 case NEON::BI__builtin_neon_vsha1mq_u32: 3948 case ARM::BI_MoveToCoprocessor: 3949 case ARM::BI_MoveToCoprocessor2: 3950 return false; 3951 } 3952 return true; 3953 } 3954 3955 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, 3956 const CallExpr *E) { 3957 if (auto Hint = GetValueForARMHint(BuiltinID)) 3958 return Hint; 3959 3960 if (BuiltinID == ARM::BI__emit) { 3961 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb; 3962 llvm::FunctionType *FTy = 3963 llvm::FunctionType::get(VoidTy, /*Variadic=*/false); 3964 3965 APSInt Value; 3966 if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext())) 3967 llvm_unreachable("Sema will ensure that the parameter is constant"); 3968 3969 uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue(); 3970 3971 llvm::InlineAsm *Emit = 3972 IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "", 3973 /*SideEffects=*/true) 3974 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "", 3975 /*SideEffects=*/true); 3976 3977 return Builder.CreateCall(Emit); 3978 } 3979 3980 if (BuiltinID == ARM::BI__builtin_arm_dbg) { 3981 Value *Option = EmitScalarExpr(E->getArg(0)); 3982 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option); 3983 } 3984 3985 if (BuiltinID == ARM::BI__builtin_arm_prefetch) { 3986 Value *Address = EmitScalarExpr(E->getArg(0)); 3987 Value *RW = EmitScalarExpr(E->getArg(1)); 3988 Value *IsData = EmitScalarExpr(E->getArg(2)); 3989 3990 // Locality is not supported on ARM target 3991 Value *Locality = llvm::ConstantInt::get(Int32Ty, 3); 3992 3993 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 3994 return Builder.CreateCall(F, {Address, RW, Locality, IsData}); 3995 } 3996 3997 if (BuiltinID == ARM::BI__builtin_arm_rbit) { 3998 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_rbit), 3999 EmitScalarExpr(E->getArg(0)), 4000 "rbit"); 4001 } 4002 4003 if (BuiltinID == ARM::BI__clear_cache) { 4004 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); 4005 const FunctionDecl *FD = E->getDirectCallee(); 4006 Value *Ops[2]; 4007 for (unsigned i = 0; i < 2; i++) 4008 Ops[i] = EmitScalarExpr(E->getArg(i)); 4009 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); 4010 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); 4011 StringRef Name = FD->getName(); 4012 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); 4013 } 4014 4015 if (BuiltinID == ARM::BI__builtin_arm_mcrr || 4016 BuiltinID == ARM::BI__builtin_arm_mcrr2) { 4017 Function *F; 4018 4019 switch (BuiltinID) { 4020 default: llvm_unreachable("unexpected builtin"); 4021 case ARM::BI__builtin_arm_mcrr: 4022 F = CGM.getIntrinsic(Intrinsic::arm_mcrr); 4023 break; 4024 case ARM::BI__builtin_arm_mcrr2: 4025 F = CGM.getIntrinsic(Intrinsic::arm_mcrr2); 4026 break; 4027 } 4028 4029 // MCRR{2} instruction has 5 operands but 4030 // the intrinsic has 4 because Rt and Rt2 4031 // are represented as a single unsigned 64 4032 // bit integer in the intrinsic definition 4033 // but internally it's represented as 2 32 4034 // bit integers. 4035 4036 Value *Coproc = EmitScalarExpr(E->getArg(0)); 4037 Value *Opc1 = EmitScalarExpr(E->getArg(1)); 4038 Value *RtAndRt2 = EmitScalarExpr(E->getArg(2)); 4039 Value *CRm = EmitScalarExpr(E->getArg(3)); 4040 4041 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32); 4042 Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty); 4043 Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1); 4044 Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty); 4045 4046 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm}); 4047 } 4048 4049 if (BuiltinID == ARM::BI__builtin_arm_mrrc || 4050 BuiltinID == ARM::BI__builtin_arm_mrrc2) { 4051 Function *F; 4052 4053 switch (BuiltinID) { 4054 default: llvm_unreachable("unexpected builtin"); 4055 case ARM::BI__builtin_arm_mrrc: 4056 F = CGM.getIntrinsic(Intrinsic::arm_mrrc); 4057 break; 4058 case ARM::BI__builtin_arm_mrrc2: 4059 F = CGM.getIntrinsic(Intrinsic::arm_mrrc2); 4060 break; 4061 } 4062 4063 Value *Coproc = EmitScalarExpr(E->getArg(0)); 4064 Value *Opc1 = EmitScalarExpr(E->getArg(1)); 4065 Value *CRm = EmitScalarExpr(E->getArg(2)); 4066 Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm}); 4067 4068 // Returns an unsigned 64 bit integer, represented 4069 // as two 32 bit integers. 4070 4071 Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1); 4072 Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0); 4073 Rt = Builder.CreateZExt(Rt, Int64Ty); 4074 Rt1 = Builder.CreateZExt(Rt1, Int64Ty); 4075 4076 Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32); 4077 RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true); 4078 RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1); 4079 4080 return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType())); 4081 } 4082 4083 if (BuiltinID == ARM::BI__builtin_arm_ldrexd || 4084 ((BuiltinID == ARM::BI__builtin_arm_ldrex || 4085 BuiltinID == ARM::BI__builtin_arm_ldaex) && 4086 getContext().getTypeSize(E->getType()) == 64) || 4087 BuiltinID == ARM::BI__ldrexd) { 4088 Function *F; 4089 4090 switch (BuiltinID) { 4091 default: llvm_unreachable("unexpected builtin"); 4092 case ARM::BI__builtin_arm_ldaex: 4093 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd); 4094 break; 4095 case ARM::BI__builtin_arm_ldrexd: 4096 case ARM::BI__builtin_arm_ldrex: 4097 case ARM::BI__ldrexd: 4098 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd); 4099 break; 4100 } 4101 4102 Value *LdPtr = EmitScalarExpr(E->getArg(0)); 4103 Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy), 4104 "ldrexd"); 4105 4106 Value *Val0 = Builder.CreateExtractValue(Val, 1); 4107 Value *Val1 = Builder.CreateExtractValue(Val, 0); 4108 Val0 = Builder.CreateZExt(Val0, Int64Ty); 4109 Val1 = Builder.CreateZExt(Val1, Int64Ty); 4110 4111 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32); 4112 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */); 4113 Val = Builder.CreateOr(Val, Val1); 4114 return Builder.CreateBitCast(Val, ConvertType(E->getType())); 4115 } 4116 4117 if (BuiltinID == ARM::BI__builtin_arm_ldrex || 4118 BuiltinID == ARM::BI__builtin_arm_ldaex) { 4119 Value *LoadAddr = EmitScalarExpr(E->getArg(0)); 4120 4121 QualType Ty = E->getType(); 4122 llvm::Type *RealResTy = ConvertType(Ty); 4123 llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(), 4124 getContext().getTypeSize(Ty)); 4125 LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo()); 4126 4127 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex 4128 ? Intrinsic::arm_ldaex 4129 : Intrinsic::arm_ldrex, 4130 LoadAddr->getType()); 4131 Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex"); 4132 4133 if (RealResTy->isPointerTy()) 4134 return Builder.CreateIntToPtr(Val, RealResTy); 4135 else { 4136 Val = Builder.CreateTruncOrBitCast(Val, IntResTy); 4137 return Builder.CreateBitCast(Val, RealResTy); 4138 } 4139 } 4140 4141 if (BuiltinID == ARM::BI__builtin_arm_strexd || 4142 ((BuiltinID == ARM::BI__builtin_arm_stlex || 4143 BuiltinID == ARM::BI__builtin_arm_strex) && 4144 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) { 4145 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex 4146 ? Intrinsic::arm_stlexd 4147 : Intrinsic::arm_strexd); 4148 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, nullptr); 4149 4150 Address Tmp = CreateMemTemp(E->getArg(0)->getType()); 4151 Value *Val = EmitScalarExpr(E->getArg(0)); 4152 Builder.CreateStore(Val, Tmp); 4153 4154 Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy)); 4155 Val = Builder.CreateLoad(LdPtr); 4156 4157 Value *Arg0 = Builder.CreateExtractValue(Val, 0); 4158 Value *Arg1 = Builder.CreateExtractValue(Val, 1); 4159 Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy); 4160 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd"); 4161 } 4162 4163 if (BuiltinID == ARM::BI__builtin_arm_strex || 4164 BuiltinID == ARM::BI__builtin_arm_stlex) { 4165 Value *StoreVal = EmitScalarExpr(E->getArg(0)); 4166 Value *StoreAddr = EmitScalarExpr(E->getArg(1)); 4167 4168 QualType Ty = E->getArg(0)->getType(); 4169 llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(), 4170 getContext().getTypeSize(Ty)); 4171 StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo()); 4172 4173 if (StoreVal->getType()->isPointerTy()) 4174 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty); 4175 else { 4176 StoreVal = Builder.CreateBitCast(StoreVal, StoreTy); 4177 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty); 4178 } 4179 4180 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex 4181 ? Intrinsic::arm_stlex 4182 : Intrinsic::arm_strex, 4183 StoreAddr->getType()); 4184 return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex"); 4185 } 4186 4187 if (BuiltinID == ARM::BI__builtin_arm_clrex) { 4188 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex); 4189 return Builder.CreateCall(F); 4190 } 4191 4192 // CRC32 4193 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; 4194 switch (BuiltinID) { 4195 case ARM::BI__builtin_arm_crc32b: 4196 CRCIntrinsicID = Intrinsic::arm_crc32b; break; 4197 case ARM::BI__builtin_arm_crc32cb: 4198 CRCIntrinsicID = Intrinsic::arm_crc32cb; break; 4199 case ARM::BI__builtin_arm_crc32h: 4200 CRCIntrinsicID = Intrinsic::arm_crc32h; break; 4201 case ARM::BI__builtin_arm_crc32ch: 4202 CRCIntrinsicID = Intrinsic::arm_crc32ch; break; 4203 case ARM::BI__builtin_arm_crc32w: 4204 case ARM::BI__builtin_arm_crc32d: 4205 CRCIntrinsicID = Intrinsic::arm_crc32w; break; 4206 case ARM::BI__builtin_arm_crc32cw: 4207 case ARM::BI__builtin_arm_crc32cd: 4208 CRCIntrinsicID = Intrinsic::arm_crc32cw; break; 4209 } 4210 4211 if (CRCIntrinsicID != Intrinsic::not_intrinsic) { 4212 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 4213 Value *Arg1 = EmitScalarExpr(E->getArg(1)); 4214 4215 // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w 4216 // intrinsics, hence we need different codegen for these cases. 4217 if (BuiltinID == ARM::BI__builtin_arm_crc32d || 4218 BuiltinID == ARM::BI__builtin_arm_crc32cd) { 4219 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32); 4220 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty); 4221 Value *Arg1b = Builder.CreateLShr(Arg1, C1); 4222 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty); 4223 4224 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 4225 Value *Res = Builder.CreateCall(F, {Arg0, Arg1a}); 4226 return Builder.CreateCall(F, {Res, Arg1b}); 4227 } else { 4228 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty); 4229 4230 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 4231 return Builder.CreateCall(F, {Arg0, Arg1}); 4232 } 4233 } 4234 4235 if (BuiltinID == ARM::BI__builtin_arm_rsr || 4236 BuiltinID == ARM::BI__builtin_arm_rsr64 || 4237 BuiltinID == ARM::BI__builtin_arm_rsrp || 4238 BuiltinID == ARM::BI__builtin_arm_wsr || 4239 BuiltinID == ARM::BI__builtin_arm_wsr64 || 4240 BuiltinID == ARM::BI__builtin_arm_wsrp) { 4241 4242 bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr || 4243 BuiltinID == ARM::BI__builtin_arm_rsr64 || 4244 BuiltinID == ARM::BI__builtin_arm_rsrp; 4245 4246 bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp || 4247 BuiltinID == ARM::BI__builtin_arm_wsrp; 4248 4249 bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 || 4250 BuiltinID == ARM::BI__builtin_arm_wsr64; 4251 4252 llvm::Type *ValueType; 4253 llvm::Type *RegisterType; 4254 if (IsPointerBuiltin) { 4255 ValueType = VoidPtrTy; 4256 RegisterType = Int32Ty; 4257 } else if (Is64Bit) { 4258 ValueType = RegisterType = Int64Ty; 4259 } else { 4260 ValueType = RegisterType = Int32Ty; 4261 } 4262 4263 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead); 4264 } 4265 4266 // Find out if any arguments are required to be integer constant 4267 // expressions. 4268 unsigned ICEArguments = 0; 4269 ASTContext::GetBuiltinTypeError Error; 4270 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 4271 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 4272 4273 auto getAlignmentValue32 = [&](Address addr) -> Value* { 4274 return Builder.getInt32(addr.getAlignment().getQuantity()); 4275 }; 4276 4277 Address PtrOp0 = Address::invalid(); 4278 Address PtrOp1 = Address::invalid(); 4279 SmallVector<Value*, 4> Ops; 4280 bool HasExtraArg = HasExtraNeonArgument(BuiltinID); 4281 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0); 4282 for (unsigned i = 0, e = NumArgs; i != e; i++) { 4283 if (i == 0) { 4284 switch (BuiltinID) { 4285 case NEON::BI__builtin_neon_vld1_v: 4286 case NEON::BI__builtin_neon_vld1q_v: 4287 case NEON::BI__builtin_neon_vld1q_lane_v: 4288 case NEON::BI__builtin_neon_vld1_lane_v: 4289 case NEON::BI__builtin_neon_vld1_dup_v: 4290 case NEON::BI__builtin_neon_vld1q_dup_v: 4291 case NEON::BI__builtin_neon_vst1_v: 4292 case NEON::BI__builtin_neon_vst1q_v: 4293 case NEON::BI__builtin_neon_vst1q_lane_v: 4294 case NEON::BI__builtin_neon_vst1_lane_v: 4295 case NEON::BI__builtin_neon_vst2_v: 4296 case NEON::BI__builtin_neon_vst2q_v: 4297 case NEON::BI__builtin_neon_vst2_lane_v: 4298 case NEON::BI__builtin_neon_vst2q_lane_v: 4299 case NEON::BI__builtin_neon_vst3_v: 4300 case NEON::BI__builtin_neon_vst3q_v: 4301 case NEON::BI__builtin_neon_vst3_lane_v: 4302 case NEON::BI__builtin_neon_vst3q_lane_v: 4303 case NEON::BI__builtin_neon_vst4_v: 4304 case NEON::BI__builtin_neon_vst4q_v: 4305 case NEON::BI__builtin_neon_vst4_lane_v: 4306 case NEON::BI__builtin_neon_vst4q_lane_v: 4307 // Get the alignment for the argument in addition to the value; 4308 // we'll use it later. 4309 PtrOp0 = EmitPointerWithAlignment(E->getArg(0)); 4310 Ops.push_back(PtrOp0.getPointer()); 4311 continue; 4312 } 4313 } 4314 if (i == 1) { 4315 switch (BuiltinID) { 4316 case NEON::BI__builtin_neon_vld2_v: 4317 case NEON::BI__builtin_neon_vld2q_v: 4318 case NEON::BI__builtin_neon_vld3_v: 4319 case NEON::BI__builtin_neon_vld3q_v: 4320 case NEON::BI__builtin_neon_vld4_v: 4321 case NEON::BI__builtin_neon_vld4q_v: 4322 case NEON::BI__builtin_neon_vld2_lane_v: 4323 case NEON::BI__builtin_neon_vld2q_lane_v: 4324 case NEON::BI__builtin_neon_vld3_lane_v: 4325 case NEON::BI__builtin_neon_vld3q_lane_v: 4326 case NEON::BI__builtin_neon_vld4_lane_v: 4327 case NEON::BI__builtin_neon_vld4q_lane_v: 4328 case NEON::BI__builtin_neon_vld2_dup_v: 4329 case NEON::BI__builtin_neon_vld3_dup_v: 4330 case NEON::BI__builtin_neon_vld4_dup_v: 4331 // Get the alignment for the argument in addition to the value; 4332 // we'll use it later. 4333 PtrOp1 = EmitPointerWithAlignment(E->getArg(1)); 4334 Ops.push_back(PtrOp1.getPointer()); 4335 continue; 4336 } 4337 } 4338 4339 if ((ICEArguments & (1 << i)) == 0) { 4340 Ops.push_back(EmitScalarExpr(E->getArg(i))); 4341 } else { 4342 // If this is required to be a constant, constant fold it so that we know 4343 // that the generated intrinsic gets a ConstantInt. 4344 llvm::APSInt Result; 4345 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); 4346 assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst; 4347 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); 4348 } 4349 } 4350 4351 switch (BuiltinID) { 4352 default: break; 4353 4354 case NEON::BI__builtin_neon_vget_lane_i8: 4355 case NEON::BI__builtin_neon_vget_lane_i16: 4356 case NEON::BI__builtin_neon_vget_lane_i32: 4357 case NEON::BI__builtin_neon_vget_lane_i64: 4358 case NEON::BI__builtin_neon_vget_lane_f32: 4359 case NEON::BI__builtin_neon_vgetq_lane_i8: 4360 case NEON::BI__builtin_neon_vgetq_lane_i16: 4361 case NEON::BI__builtin_neon_vgetq_lane_i32: 4362 case NEON::BI__builtin_neon_vgetq_lane_i64: 4363 case NEON::BI__builtin_neon_vgetq_lane_f32: 4364 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane"); 4365 4366 case NEON::BI__builtin_neon_vset_lane_i8: 4367 case NEON::BI__builtin_neon_vset_lane_i16: 4368 case NEON::BI__builtin_neon_vset_lane_i32: 4369 case NEON::BI__builtin_neon_vset_lane_i64: 4370 case NEON::BI__builtin_neon_vset_lane_f32: 4371 case NEON::BI__builtin_neon_vsetq_lane_i8: 4372 case NEON::BI__builtin_neon_vsetq_lane_i16: 4373 case NEON::BI__builtin_neon_vsetq_lane_i32: 4374 case NEON::BI__builtin_neon_vsetq_lane_i64: 4375 case NEON::BI__builtin_neon_vsetq_lane_f32: 4376 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 4377 4378 case NEON::BI__builtin_neon_vsha1h_u32: 4379 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops, 4380 "vsha1h"); 4381 case NEON::BI__builtin_neon_vsha1cq_u32: 4382 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops, 4383 "vsha1h"); 4384 case NEON::BI__builtin_neon_vsha1pq_u32: 4385 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops, 4386 "vsha1h"); 4387 case NEON::BI__builtin_neon_vsha1mq_u32: 4388 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops, 4389 "vsha1h"); 4390 4391 // The ARM _MoveToCoprocessor builtins put the input register value as 4392 // the first argument, but the LLVM intrinsic expects it as the third one. 4393 case ARM::BI_MoveToCoprocessor: 4394 case ARM::BI_MoveToCoprocessor2: { 4395 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ? 4396 Intrinsic::arm_mcr : Intrinsic::arm_mcr2); 4397 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0], 4398 Ops[3], Ops[4], Ops[5]}); 4399 } 4400 } 4401 4402 // Get the last argument, which specifies the vector type. 4403 assert(HasExtraArg); 4404 llvm::APSInt Result; 4405 const Expr *Arg = E->getArg(E->getNumArgs()-1); 4406 if (!Arg->isIntegerConstantExpr(Result, getContext())) 4407 return nullptr; 4408 4409 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f || 4410 BuiltinID == ARM::BI__builtin_arm_vcvtr_d) { 4411 // Determine the overloaded type of this builtin. 4412 llvm::Type *Ty; 4413 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f) 4414 Ty = FloatTy; 4415 else 4416 Ty = DoubleTy; 4417 4418 // Determine whether this is an unsigned conversion or not. 4419 bool usgn = Result.getZExtValue() == 1; 4420 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr; 4421 4422 // Call the appropriate intrinsic. 4423 Function *F = CGM.getIntrinsic(Int, Ty); 4424 return Builder.CreateCall(F, Ops, "vcvtr"); 4425 } 4426 4427 // Determine the type of this overloaded NEON intrinsic. 4428 NeonTypeFlags Type(Result.getZExtValue()); 4429 bool usgn = Type.isUnsigned(); 4430 bool rightShift = false; 4431 4432 llvm::VectorType *VTy = GetNeonType(this, Type); 4433 llvm::Type *Ty = VTy; 4434 if (!Ty) 4435 return nullptr; 4436 4437 // Many NEON builtins have identical semantics and uses in ARM and 4438 // AArch64. Emit these in a single function. 4439 auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap); 4440 const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap( 4441 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted); 4442 if (Builtin) 4443 return EmitCommonNeonBuiltinExpr( 4444 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic, 4445 Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1); 4446 4447 unsigned Int; 4448 switch (BuiltinID) { 4449 default: return nullptr; 4450 case NEON::BI__builtin_neon_vld1q_lane_v: 4451 // Handle 64-bit integer elements as a special case. Use shuffles of 4452 // one-element vectors to avoid poor code for i64 in the backend. 4453 if (VTy->getElementType()->isIntegerTy(64)) { 4454 // Extract the other lane. 4455 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4456 uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue(); 4457 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane)); 4458 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); 4459 // Load the value as a one-element vector. 4460 Ty = llvm::VectorType::get(VTy->getElementType(), 1); 4461 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 4462 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys); 4463 Value *Align = getAlignmentValue32(PtrOp0); 4464 Value *Ld = Builder.CreateCall(F, {Ops[0], Align}); 4465 // Combine them. 4466 uint32_t Indices[] = {1 - Lane, Lane}; 4467 SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices); 4468 return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane"); 4469 } 4470 // fall through 4471 case NEON::BI__builtin_neon_vld1_lane_v: { 4472 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4473 PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType()); 4474 Value *Ld = Builder.CreateLoad(PtrOp0); 4475 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane"); 4476 } 4477 case NEON::BI__builtin_neon_vld2_dup_v: 4478 case NEON::BI__builtin_neon_vld3_dup_v: 4479 case NEON::BI__builtin_neon_vld4_dup_v: { 4480 // Handle 64-bit elements as a special-case. There is no "dup" needed. 4481 if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) { 4482 switch (BuiltinID) { 4483 case NEON::BI__builtin_neon_vld2_dup_v: 4484 Int = Intrinsic::arm_neon_vld2; 4485 break; 4486 case NEON::BI__builtin_neon_vld3_dup_v: 4487 Int = Intrinsic::arm_neon_vld3; 4488 break; 4489 case NEON::BI__builtin_neon_vld4_dup_v: 4490 Int = Intrinsic::arm_neon_vld4; 4491 break; 4492 default: llvm_unreachable("unknown vld_dup intrinsic?"); 4493 } 4494 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 4495 Function *F = CGM.getIntrinsic(Int, Tys); 4496 llvm::Value *Align = getAlignmentValue32(PtrOp1); 4497 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup"); 4498 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 4499 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4500 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 4501 } 4502 switch (BuiltinID) { 4503 case NEON::BI__builtin_neon_vld2_dup_v: 4504 Int = Intrinsic::arm_neon_vld2lane; 4505 break; 4506 case NEON::BI__builtin_neon_vld3_dup_v: 4507 Int = Intrinsic::arm_neon_vld3lane; 4508 break; 4509 case NEON::BI__builtin_neon_vld4_dup_v: 4510 Int = Intrinsic::arm_neon_vld4lane; 4511 break; 4512 default: llvm_unreachable("unknown vld_dup intrinsic?"); 4513 } 4514 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 4515 Function *F = CGM.getIntrinsic(Int, Tys); 4516 llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType()); 4517 4518 SmallVector<Value*, 6> Args; 4519 Args.push_back(Ops[1]); 4520 Args.append(STy->getNumElements(), UndefValue::get(Ty)); 4521 4522 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); 4523 Args.push_back(CI); 4524 Args.push_back(getAlignmentValue32(PtrOp1)); 4525 4526 Ops[1] = Builder.CreateCall(F, Args, "vld_dup"); 4527 // splat lane 0 to all elts in each vector of the result. 4528 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 4529 Value *Val = Builder.CreateExtractValue(Ops[1], i); 4530 Value *Elt = Builder.CreateBitCast(Val, Ty); 4531 Elt = EmitNeonSplat(Elt, CI); 4532 Elt = Builder.CreateBitCast(Elt, Val->getType()); 4533 Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i); 4534 } 4535 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 4536 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4537 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 4538 } 4539 case NEON::BI__builtin_neon_vqrshrn_n_v: 4540 Int = 4541 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns; 4542 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n", 4543 1, true); 4544 case NEON::BI__builtin_neon_vqrshrun_n_v: 4545 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty), 4546 Ops, "vqrshrun_n", 1, true); 4547 case NEON::BI__builtin_neon_vqshrn_n_v: 4548 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns; 4549 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n", 4550 1, true); 4551 case NEON::BI__builtin_neon_vqshrun_n_v: 4552 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty), 4553 Ops, "vqshrun_n", 1, true); 4554 case NEON::BI__builtin_neon_vrecpe_v: 4555 case NEON::BI__builtin_neon_vrecpeq_v: 4556 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty), 4557 Ops, "vrecpe"); 4558 case NEON::BI__builtin_neon_vrshrn_n_v: 4559 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty), 4560 Ops, "vrshrn_n", 1, true); 4561 case NEON::BI__builtin_neon_vrsra_n_v: 4562 case NEON::BI__builtin_neon_vrsraq_n_v: 4563 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4564 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4565 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true); 4566 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts; 4567 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]}); 4568 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n"); 4569 case NEON::BI__builtin_neon_vsri_n_v: 4570 case NEON::BI__builtin_neon_vsriq_n_v: 4571 rightShift = true; 4572 case NEON::BI__builtin_neon_vsli_n_v: 4573 case NEON::BI__builtin_neon_vsliq_n_v: 4574 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift); 4575 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty), 4576 Ops, "vsli_n"); 4577 case NEON::BI__builtin_neon_vsra_n_v: 4578 case NEON::BI__builtin_neon_vsraq_n_v: 4579 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4580 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n"); 4581 return Builder.CreateAdd(Ops[0], Ops[1]); 4582 case NEON::BI__builtin_neon_vst1q_lane_v: 4583 // Handle 64-bit integer elements as a special case. Use a shuffle to get 4584 // a one-element vector and avoid poor code for i64 in the backend. 4585 if (VTy->getElementType()->isIntegerTy(64)) { 4586 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4587 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2])); 4588 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); 4589 Ops[2] = getAlignmentValue32(PtrOp0); 4590 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()}; 4591 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, 4592 Tys), Ops); 4593 } 4594 // fall through 4595 case NEON::BI__builtin_neon_vst1_lane_v: { 4596 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4597 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); 4598 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 4599 auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty)); 4600 return St; 4601 } 4602 case NEON::BI__builtin_neon_vtbl1_v: 4603 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1), 4604 Ops, "vtbl1"); 4605 case NEON::BI__builtin_neon_vtbl2_v: 4606 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2), 4607 Ops, "vtbl2"); 4608 case NEON::BI__builtin_neon_vtbl3_v: 4609 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3), 4610 Ops, "vtbl3"); 4611 case NEON::BI__builtin_neon_vtbl4_v: 4612 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4), 4613 Ops, "vtbl4"); 4614 case NEON::BI__builtin_neon_vtbx1_v: 4615 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1), 4616 Ops, "vtbx1"); 4617 case NEON::BI__builtin_neon_vtbx2_v: 4618 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2), 4619 Ops, "vtbx2"); 4620 case NEON::BI__builtin_neon_vtbx3_v: 4621 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3), 4622 Ops, "vtbx3"); 4623 case NEON::BI__builtin_neon_vtbx4_v: 4624 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4), 4625 Ops, "vtbx4"); 4626 } 4627 } 4628 4629 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, 4630 const CallExpr *E, 4631 SmallVectorImpl<Value *> &Ops) { 4632 unsigned int Int = 0; 4633 const char *s = nullptr; 4634 4635 switch (BuiltinID) { 4636 default: 4637 return nullptr; 4638 case NEON::BI__builtin_neon_vtbl1_v: 4639 case NEON::BI__builtin_neon_vqtbl1_v: 4640 case NEON::BI__builtin_neon_vqtbl1q_v: 4641 case NEON::BI__builtin_neon_vtbl2_v: 4642 case NEON::BI__builtin_neon_vqtbl2_v: 4643 case NEON::BI__builtin_neon_vqtbl2q_v: 4644 case NEON::BI__builtin_neon_vtbl3_v: 4645 case NEON::BI__builtin_neon_vqtbl3_v: 4646 case NEON::BI__builtin_neon_vqtbl3q_v: 4647 case NEON::BI__builtin_neon_vtbl4_v: 4648 case NEON::BI__builtin_neon_vqtbl4_v: 4649 case NEON::BI__builtin_neon_vqtbl4q_v: 4650 break; 4651 case NEON::BI__builtin_neon_vtbx1_v: 4652 case NEON::BI__builtin_neon_vqtbx1_v: 4653 case NEON::BI__builtin_neon_vqtbx1q_v: 4654 case NEON::BI__builtin_neon_vtbx2_v: 4655 case NEON::BI__builtin_neon_vqtbx2_v: 4656 case NEON::BI__builtin_neon_vqtbx2q_v: 4657 case NEON::BI__builtin_neon_vtbx3_v: 4658 case NEON::BI__builtin_neon_vqtbx3_v: 4659 case NEON::BI__builtin_neon_vqtbx3q_v: 4660 case NEON::BI__builtin_neon_vtbx4_v: 4661 case NEON::BI__builtin_neon_vqtbx4_v: 4662 case NEON::BI__builtin_neon_vqtbx4q_v: 4663 break; 4664 } 4665 4666 assert(E->getNumArgs() >= 3); 4667 4668 // Get the last argument, which specifies the vector type. 4669 llvm::APSInt Result; 4670 const Expr *Arg = E->getArg(E->getNumArgs() - 1); 4671 if (!Arg->isIntegerConstantExpr(Result, CGF.getContext())) 4672 return nullptr; 4673 4674 // Determine the type of this overloaded NEON intrinsic. 4675 NeonTypeFlags Type(Result.getZExtValue()); 4676 llvm::VectorType *Ty = GetNeonType(&CGF, Type); 4677 if (!Ty) 4678 return nullptr; 4679 4680 CodeGen::CGBuilderTy &Builder = CGF.Builder; 4681 4682 // AArch64 scalar builtins are not overloaded, they do not have an extra 4683 // argument that specifies the vector type, need to handle each case. 4684 switch (BuiltinID) { 4685 case NEON::BI__builtin_neon_vtbl1_v: { 4686 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr, 4687 Ops[1], Ty, Intrinsic::aarch64_neon_tbl1, 4688 "vtbl1"); 4689 } 4690 case NEON::BI__builtin_neon_vtbl2_v: { 4691 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr, 4692 Ops[2], Ty, Intrinsic::aarch64_neon_tbl1, 4693 "vtbl1"); 4694 } 4695 case NEON::BI__builtin_neon_vtbl3_v: { 4696 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr, 4697 Ops[3], Ty, Intrinsic::aarch64_neon_tbl2, 4698 "vtbl2"); 4699 } 4700 case NEON::BI__builtin_neon_vtbl4_v: { 4701 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr, 4702 Ops[4], Ty, Intrinsic::aarch64_neon_tbl2, 4703 "vtbl2"); 4704 } 4705 case NEON::BI__builtin_neon_vtbx1_v: { 4706 Value *TblRes = 4707 packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2], 4708 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1"); 4709 4710 llvm::Constant *EightV = ConstantInt::get(Ty, 8); 4711 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV); 4712 CmpRes = Builder.CreateSExt(CmpRes, Ty); 4713 4714 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]); 4715 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes); 4716 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx"); 4717 } 4718 case NEON::BI__builtin_neon_vtbx2_v: { 4719 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0], 4720 Ops[3], Ty, Intrinsic::aarch64_neon_tbx1, 4721 "vtbx1"); 4722 } 4723 case NEON::BI__builtin_neon_vtbx3_v: { 4724 Value *TblRes = 4725 packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4], 4726 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2"); 4727 4728 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24); 4729 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4], 4730 TwentyFourV); 4731 CmpRes = Builder.CreateSExt(CmpRes, Ty); 4732 4733 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]); 4734 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes); 4735 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx"); 4736 } 4737 case NEON::BI__builtin_neon_vtbx4_v: { 4738 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0], 4739 Ops[5], Ty, Intrinsic::aarch64_neon_tbx2, 4740 "vtbx2"); 4741 } 4742 case NEON::BI__builtin_neon_vqtbl1_v: 4743 case NEON::BI__builtin_neon_vqtbl1q_v: 4744 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break; 4745 case NEON::BI__builtin_neon_vqtbl2_v: 4746 case NEON::BI__builtin_neon_vqtbl2q_v: { 4747 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break; 4748 case NEON::BI__builtin_neon_vqtbl3_v: 4749 case NEON::BI__builtin_neon_vqtbl3q_v: 4750 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break; 4751 case NEON::BI__builtin_neon_vqtbl4_v: 4752 case NEON::BI__builtin_neon_vqtbl4q_v: 4753 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break; 4754 case NEON::BI__builtin_neon_vqtbx1_v: 4755 case NEON::BI__builtin_neon_vqtbx1q_v: 4756 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break; 4757 case NEON::BI__builtin_neon_vqtbx2_v: 4758 case NEON::BI__builtin_neon_vqtbx2q_v: 4759 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break; 4760 case NEON::BI__builtin_neon_vqtbx3_v: 4761 case NEON::BI__builtin_neon_vqtbx3q_v: 4762 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break; 4763 case NEON::BI__builtin_neon_vqtbx4_v: 4764 case NEON::BI__builtin_neon_vqtbx4q_v: 4765 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break; 4766 } 4767 } 4768 4769 if (!Int) 4770 return nullptr; 4771 4772 Function *F = CGF.CGM.getIntrinsic(Int, Ty); 4773 return CGF.EmitNeonCall(F, Ops, s); 4774 } 4775 4776 Value *CodeGenFunction::vectorWrapScalar16(Value *Op) { 4777 llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4); 4778 Op = Builder.CreateBitCast(Op, Int16Ty); 4779 Value *V = UndefValue::get(VTy); 4780 llvm::Constant *CI = ConstantInt::get(SizeTy, 0); 4781 Op = Builder.CreateInsertElement(V, Op, CI); 4782 return Op; 4783 } 4784 4785 Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, 4786 const CallExpr *E) { 4787 unsigned HintID = static_cast<unsigned>(-1); 4788 switch (BuiltinID) { 4789 default: break; 4790 case AArch64::BI__builtin_arm_nop: 4791 HintID = 0; 4792 break; 4793 case AArch64::BI__builtin_arm_yield: 4794 HintID = 1; 4795 break; 4796 case AArch64::BI__builtin_arm_wfe: 4797 HintID = 2; 4798 break; 4799 case AArch64::BI__builtin_arm_wfi: 4800 HintID = 3; 4801 break; 4802 case AArch64::BI__builtin_arm_sev: 4803 HintID = 4; 4804 break; 4805 case AArch64::BI__builtin_arm_sevl: 4806 HintID = 5; 4807 break; 4808 } 4809 4810 if (HintID != static_cast<unsigned>(-1)) { 4811 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint); 4812 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID)); 4813 } 4814 4815 if (BuiltinID == AArch64::BI__builtin_arm_prefetch) { 4816 Value *Address = EmitScalarExpr(E->getArg(0)); 4817 Value *RW = EmitScalarExpr(E->getArg(1)); 4818 Value *CacheLevel = EmitScalarExpr(E->getArg(2)); 4819 Value *RetentionPolicy = EmitScalarExpr(E->getArg(3)); 4820 Value *IsData = EmitScalarExpr(E->getArg(4)); 4821 4822 Value *Locality = nullptr; 4823 if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) { 4824 // Temporal fetch, needs to convert cache level to locality. 4825 Locality = llvm::ConstantInt::get(Int32Ty, 4826 -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3); 4827 } else { 4828 // Streaming fetch. 4829 Locality = llvm::ConstantInt::get(Int32Ty, 0); 4830 } 4831 4832 // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify 4833 // PLDL3STRM or PLDL2STRM. 4834 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 4835 return Builder.CreateCall(F, {Address, RW, Locality, IsData}); 4836 } 4837 4838 if (BuiltinID == AArch64::BI__builtin_arm_rbit) { 4839 assert((getContext().getTypeSize(E->getType()) == 32) && 4840 "rbit of unusual size!"); 4841 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 4842 return Builder.CreateCall( 4843 CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit"); 4844 } 4845 if (BuiltinID == AArch64::BI__builtin_arm_rbit64) { 4846 assert((getContext().getTypeSize(E->getType()) == 64) && 4847 "rbit of unusual size!"); 4848 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 4849 return Builder.CreateCall( 4850 CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit"); 4851 } 4852 4853 if (BuiltinID == AArch64::BI__clear_cache) { 4854 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); 4855 const FunctionDecl *FD = E->getDirectCallee(); 4856 Value *Ops[2]; 4857 for (unsigned i = 0; i < 2; i++) 4858 Ops[i] = EmitScalarExpr(E->getArg(i)); 4859 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); 4860 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); 4861 StringRef Name = FD->getName(); 4862 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); 4863 } 4864 4865 if ((BuiltinID == AArch64::BI__builtin_arm_ldrex || 4866 BuiltinID == AArch64::BI__builtin_arm_ldaex) && 4867 getContext().getTypeSize(E->getType()) == 128) { 4868 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex 4869 ? Intrinsic::aarch64_ldaxp 4870 : Intrinsic::aarch64_ldxp); 4871 4872 Value *LdPtr = EmitScalarExpr(E->getArg(0)); 4873 Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy), 4874 "ldxp"); 4875 4876 Value *Val0 = Builder.CreateExtractValue(Val, 1); 4877 Value *Val1 = Builder.CreateExtractValue(Val, 0); 4878 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); 4879 Val0 = Builder.CreateZExt(Val0, Int128Ty); 4880 Val1 = Builder.CreateZExt(Val1, Int128Ty); 4881 4882 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64); 4883 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */); 4884 Val = Builder.CreateOr(Val, Val1); 4885 return Builder.CreateBitCast(Val, ConvertType(E->getType())); 4886 } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex || 4887 BuiltinID == AArch64::BI__builtin_arm_ldaex) { 4888 Value *LoadAddr = EmitScalarExpr(E->getArg(0)); 4889 4890 QualType Ty = E->getType(); 4891 llvm::Type *RealResTy = ConvertType(Ty); 4892 llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(), 4893 getContext().getTypeSize(Ty)); 4894 LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo()); 4895 4896 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex 4897 ? Intrinsic::aarch64_ldaxr 4898 : Intrinsic::aarch64_ldxr, 4899 LoadAddr->getType()); 4900 Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr"); 4901 4902 if (RealResTy->isPointerTy()) 4903 return Builder.CreateIntToPtr(Val, RealResTy); 4904 4905 Val = Builder.CreateTruncOrBitCast(Val, IntResTy); 4906 return Builder.CreateBitCast(Val, RealResTy); 4907 } 4908 4909 if ((BuiltinID == AArch64::BI__builtin_arm_strex || 4910 BuiltinID == AArch64::BI__builtin_arm_stlex) && 4911 getContext().getTypeSize(E->getArg(0)->getType()) == 128) { 4912 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex 4913 ? Intrinsic::aarch64_stlxp 4914 : Intrinsic::aarch64_stxp); 4915 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty, nullptr); 4916 4917 Address Tmp = CreateMemTemp(E->getArg(0)->getType()); 4918 EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true); 4919 4920 Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy)); 4921 llvm::Value *Val = Builder.CreateLoad(Tmp); 4922 4923 Value *Arg0 = Builder.CreateExtractValue(Val, 0); 4924 Value *Arg1 = Builder.CreateExtractValue(Val, 1); 4925 Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), 4926 Int8PtrTy); 4927 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp"); 4928 } 4929 4930 if (BuiltinID == AArch64::BI__builtin_arm_strex || 4931 BuiltinID == AArch64::BI__builtin_arm_stlex) { 4932 Value *StoreVal = EmitScalarExpr(E->getArg(0)); 4933 Value *StoreAddr = EmitScalarExpr(E->getArg(1)); 4934 4935 QualType Ty = E->getArg(0)->getType(); 4936 llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(), 4937 getContext().getTypeSize(Ty)); 4938 StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo()); 4939 4940 if (StoreVal->getType()->isPointerTy()) 4941 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty); 4942 else { 4943 StoreVal = Builder.CreateBitCast(StoreVal, StoreTy); 4944 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty); 4945 } 4946 4947 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex 4948 ? Intrinsic::aarch64_stlxr 4949 : Intrinsic::aarch64_stxr, 4950 StoreAddr->getType()); 4951 return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr"); 4952 } 4953 4954 if (BuiltinID == AArch64::BI__builtin_arm_clrex) { 4955 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex); 4956 return Builder.CreateCall(F); 4957 } 4958 4959 // CRC32 4960 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; 4961 switch (BuiltinID) { 4962 case AArch64::BI__builtin_arm_crc32b: 4963 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break; 4964 case AArch64::BI__builtin_arm_crc32cb: 4965 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break; 4966 case AArch64::BI__builtin_arm_crc32h: 4967 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break; 4968 case AArch64::BI__builtin_arm_crc32ch: 4969 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break; 4970 case AArch64::BI__builtin_arm_crc32w: 4971 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break; 4972 case AArch64::BI__builtin_arm_crc32cw: 4973 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break; 4974 case AArch64::BI__builtin_arm_crc32d: 4975 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break; 4976 case AArch64::BI__builtin_arm_crc32cd: 4977 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break; 4978 } 4979 4980 if (CRCIntrinsicID != Intrinsic::not_intrinsic) { 4981 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 4982 Value *Arg1 = EmitScalarExpr(E->getArg(1)); 4983 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 4984 4985 llvm::Type *DataTy = F->getFunctionType()->getParamType(1); 4986 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy); 4987 4988 return Builder.CreateCall(F, {Arg0, Arg1}); 4989 } 4990 4991 if (BuiltinID == AArch64::BI__builtin_arm_rsr || 4992 BuiltinID == AArch64::BI__builtin_arm_rsr64 || 4993 BuiltinID == AArch64::BI__builtin_arm_rsrp || 4994 BuiltinID == AArch64::BI__builtin_arm_wsr || 4995 BuiltinID == AArch64::BI__builtin_arm_wsr64 || 4996 BuiltinID == AArch64::BI__builtin_arm_wsrp) { 4997 4998 bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr || 4999 BuiltinID == AArch64::BI__builtin_arm_rsr64 || 5000 BuiltinID == AArch64::BI__builtin_arm_rsrp; 5001 5002 bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp || 5003 BuiltinID == AArch64::BI__builtin_arm_wsrp; 5004 5005 bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr && 5006 BuiltinID != AArch64::BI__builtin_arm_wsr; 5007 5008 llvm::Type *ValueType; 5009 llvm::Type *RegisterType = Int64Ty; 5010 if (IsPointerBuiltin) { 5011 ValueType = VoidPtrTy; 5012 } else if (Is64Bit) { 5013 ValueType = Int64Ty; 5014 } else { 5015 ValueType = Int32Ty; 5016 } 5017 5018 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead); 5019 } 5020 5021 // Find out if any arguments are required to be integer constant 5022 // expressions. 5023 unsigned ICEArguments = 0; 5024 ASTContext::GetBuiltinTypeError Error; 5025 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 5026 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 5027 5028 llvm::SmallVector<Value*, 4> Ops; 5029 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) { 5030 if ((ICEArguments & (1 << i)) == 0) { 5031 Ops.push_back(EmitScalarExpr(E->getArg(i))); 5032 } else { 5033 // If this is required to be a constant, constant fold it so that we know 5034 // that the generated intrinsic gets a ConstantInt. 5035 llvm::APSInt Result; 5036 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); 5037 assert(IsConst && "Constant arg isn't actually constant?"); 5038 (void)IsConst; 5039 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); 5040 } 5041 } 5042 5043 auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap); 5044 const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap( 5045 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted); 5046 5047 if (Builtin) { 5048 Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1))); 5049 Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E); 5050 assert(Result && "SISD intrinsic should have been handled"); 5051 return Result; 5052 } 5053 5054 llvm::APSInt Result; 5055 const Expr *Arg = E->getArg(E->getNumArgs()-1); 5056 NeonTypeFlags Type(0); 5057 if (Arg->isIntegerConstantExpr(Result, getContext())) 5058 // Determine the type of this overloaded NEON intrinsic. 5059 Type = NeonTypeFlags(Result.getZExtValue()); 5060 5061 bool usgn = Type.isUnsigned(); 5062 bool quad = Type.isQuad(); 5063 5064 // Handle non-overloaded intrinsics first. 5065 switch (BuiltinID) { 5066 default: break; 5067 case NEON::BI__builtin_neon_vldrq_p128: { 5068 llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128); 5069 Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy); 5070 return Builder.CreateDefaultAlignedLoad(Ptr); 5071 } 5072 case NEON::BI__builtin_neon_vstrq_p128: { 5073 llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128); 5074 Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy); 5075 return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr); 5076 } 5077 case NEON::BI__builtin_neon_vcvts_u32_f32: 5078 case NEON::BI__builtin_neon_vcvtd_u64_f64: 5079 usgn = true; 5080 // FALL THROUGH 5081 case NEON::BI__builtin_neon_vcvts_s32_f32: 5082 case NEON::BI__builtin_neon_vcvtd_s64_f64: { 5083 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5084 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64; 5085 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty; 5086 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy; 5087 Ops[0] = Builder.CreateBitCast(Ops[0], FTy); 5088 if (usgn) 5089 return Builder.CreateFPToUI(Ops[0], InTy); 5090 return Builder.CreateFPToSI(Ops[0], InTy); 5091 } 5092 case NEON::BI__builtin_neon_vcvts_f32_u32: 5093 case NEON::BI__builtin_neon_vcvtd_f64_u64: 5094 usgn = true; 5095 // FALL THROUGH 5096 case NEON::BI__builtin_neon_vcvts_f32_s32: 5097 case NEON::BI__builtin_neon_vcvtd_f64_s64: { 5098 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5099 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64; 5100 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty; 5101 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy; 5102 Ops[0] = Builder.CreateBitCast(Ops[0], InTy); 5103 if (usgn) 5104 return Builder.CreateUIToFP(Ops[0], FTy); 5105 return Builder.CreateSIToFP(Ops[0], FTy); 5106 } 5107 case NEON::BI__builtin_neon_vpaddd_s64: { 5108 llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2); 5109 Value *Vec = EmitScalarExpr(E->getArg(0)); 5110 // The vector is v2f64, so make sure it's bitcast to that. 5111 Vec = Builder.CreateBitCast(Vec, Ty, "v2i64"); 5112 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 5113 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 5114 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 5115 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 5116 // Pairwise addition of a v2f64 into a scalar f64. 5117 return Builder.CreateAdd(Op0, Op1, "vpaddd"); 5118 } 5119 case NEON::BI__builtin_neon_vpaddd_f64: { 5120 llvm::Type *Ty = 5121 llvm::VectorType::get(DoubleTy, 2); 5122 Value *Vec = EmitScalarExpr(E->getArg(0)); 5123 // The vector is v2f64, so make sure it's bitcast to that. 5124 Vec = Builder.CreateBitCast(Vec, Ty, "v2f64"); 5125 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 5126 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 5127 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 5128 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 5129 // Pairwise addition of a v2f64 into a scalar f64. 5130 return Builder.CreateFAdd(Op0, Op1, "vpaddd"); 5131 } 5132 case NEON::BI__builtin_neon_vpadds_f32: { 5133 llvm::Type *Ty = 5134 llvm::VectorType::get(FloatTy, 2); 5135 Value *Vec = EmitScalarExpr(E->getArg(0)); 5136 // The vector is v2f32, so make sure it's bitcast to that. 5137 Vec = Builder.CreateBitCast(Vec, Ty, "v2f32"); 5138 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 5139 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 5140 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 5141 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 5142 // Pairwise addition of a v2f32 into a scalar f32. 5143 return Builder.CreateFAdd(Op0, Op1, "vpaddd"); 5144 } 5145 case NEON::BI__builtin_neon_vceqzd_s64: 5146 case NEON::BI__builtin_neon_vceqzd_f64: 5147 case NEON::BI__builtin_neon_vceqzs_f32: 5148 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5149 return EmitAArch64CompareBuiltinExpr( 5150 Ops[0], ConvertType(E->getCallReturnType(getContext())), 5151 ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz"); 5152 case NEON::BI__builtin_neon_vcgezd_s64: 5153 case NEON::BI__builtin_neon_vcgezd_f64: 5154 case NEON::BI__builtin_neon_vcgezs_f32: 5155 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5156 return EmitAArch64CompareBuiltinExpr( 5157 Ops[0], ConvertType(E->getCallReturnType(getContext())), 5158 ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez"); 5159 case NEON::BI__builtin_neon_vclezd_s64: 5160 case NEON::BI__builtin_neon_vclezd_f64: 5161 case NEON::BI__builtin_neon_vclezs_f32: 5162 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5163 return EmitAArch64CompareBuiltinExpr( 5164 Ops[0], ConvertType(E->getCallReturnType(getContext())), 5165 ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez"); 5166 case NEON::BI__builtin_neon_vcgtzd_s64: 5167 case NEON::BI__builtin_neon_vcgtzd_f64: 5168 case NEON::BI__builtin_neon_vcgtzs_f32: 5169 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5170 return EmitAArch64CompareBuiltinExpr( 5171 Ops[0], ConvertType(E->getCallReturnType(getContext())), 5172 ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz"); 5173 case NEON::BI__builtin_neon_vcltzd_s64: 5174 case NEON::BI__builtin_neon_vcltzd_f64: 5175 case NEON::BI__builtin_neon_vcltzs_f32: 5176 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5177 return EmitAArch64CompareBuiltinExpr( 5178 Ops[0], ConvertType(E->getCallReturnType(getContext())), 5179 ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz"); 5180 5181 case NEON::BI__builtin_neon_vceqzd_u64: { 5182 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5183 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); 5184 Ops[0] = 5185 Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty)); 5186 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd"); 5187 } 5188 case NEON::BI__builtin_neon_vceqd_f64: 5189 case NEON::BI__builtin_neon_vcled_f64: 5190 case NEON::BI__builtin_neon_vcltd_f64: 5191 case NEON::BI__builtin_neon_vcged_f64: 5192 case NEON::BI__builtin_neon_vcgtd_f64: { 5193 llvm::CmpInst::Predicate P; 5194 switch (BuiltinID) { 5195 default: llvm_unreachable("missing builtin ID in switch!"); 5196 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break; 5197 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break; 5198 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break; 5199 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break; 5200 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break; 5201 } 5202 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5203 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 5204 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); 5205 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); 5206 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd"); 5207 } 5208 case NEON::BI__builtin_neon_vceqs_f32: 5209 case NEON::BI__builtin_neon_vcles_f32: 5210 case NEON::BI__builtin_neon_vclts_f32: 5211 case NEON::BI__builtin_neon_vcges_f32: 5212 case NEON::BI__builtin_neon_vcgts_f32: { 5213 llvm::CmpInst::Predicate P; 5214 switch (BuiltinID) { 5215 default: llvm_unreachable("missing builtin ID in switch!"); 5216 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break; 5217 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break; 5218 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break; 5219 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break; 5220 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break; 5221 } 5222 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5223 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy); 5224 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy); 5225 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); 5226 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd"); 5227 } 5228 case NEON::BI__builtin_neon_vceqd_s64: 5229 case NEON::BI__builtin_neon_vceqd_u64: 5230 case NEON::BI__builtin_neon_vcgtd_s64: 5231 case NEON::BI__builtin_neon_vcgtd_u64: 5232 case NEON::BI__builtin_neon_vcltd_s64: 5233 case NEON::BI__builtin_neon_vcltd_u64: 5234 case NEON::BI__builtin_neon_vcged_u64: 5235 case NEON::BI__builtin_neon_vcged_s64: 5236 case NEON::BI__builtin_neon_vcled_u64: 5237 case NEON::BI__builtin_neon_vcled_s64: { 5238 llvm::CmpInst::Predicate P; 5239 switch (BuiltinID) { 5240 default: llvm_unreachable("missing builtin ID in switch!"); 5241 case NEON::BI__builtin_neon_vceqd_s64: 5242 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break; 5243 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break; 5244 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break; 5245 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break; 5246 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break; 5247 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break; 5248 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break; 5249 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break; 5250 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break; 5251 } 5252 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5253 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); 5254 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 5255 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]); 5256 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd"); 5257 } 5258 case NEON::BI__builtin_neon_vtstd_s64: 5259 case NEON::BI__builtin_neon_vtstd_u64: { 5260 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5261 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); 5262 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 5263 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); 5264 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], 5265 llvm::Constant::getNullValue(Int64Ty)); 5266 return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd"); 5267 } 5268 case NEON::BI__builtin_neon_vset_lane_i8: 5269 case NEON::BI__builtin_neon_vset_lane_i16: 5270 case NEON::BI__builtin_neon_vset_lane_i32: 5271 case NEON::BI__builtin_neon_vset_lane_i64: 5272 case NEON::BI__builtin_neon_vset_lane_f32: 5273 case NEON::BI__builtin_neon_vsetq_lane_i8: 5274 case NEON::BI__builtin_neon_vsetq_lane_i16: 5275 case NEON::BI__builtin_neon_vsetq_lane_i32: 5276 case NEON::BI__builtin_neon_vsetq_lane_i64: 5277 case NEON::BI__builtin_neon_vsetq_lane_f32: 5278 Ops.push_back(EmitScalarExpr(E->getArg(2))); 5279 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 5280 case NEON::BI__builtin_neon_vset_lane_f64: 5281 // The vector type needs a cast for the v1f64 variant. 5282 Ops[1] = Builder.CreateBitCast(Ops[1], 5283 llvm::VectorType::get(DoubleTy, 1)); 5284 Ops.push_back(EmitScalarExpr(E->getArg(2))); 5285 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 5286 case NEON::BI__builtin_neon_vsetq_lane_f64: 5287 // The vector type needs a cast for the v2f64 variant. 5288 Ops[1] = Builder.CreateBitCast(Ops[1], 5289 llvm::VectorType::get(DoubleTy, 2)); 5290 Ops.push_back(EmitScalarExpr(E->getArg(2))); 5291 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 5292 5293 case NEON::BI__builtin_neon_vget_lane_i8: 5294 case NEON::BI__builtin_neon_vdupb_lane_i8: 5295 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8)); 5296 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5297 "vget_lane"); 5298 case NEON::BI__builtin_neon_vgetq_lane_i8: 5299 case NEON::BI__builtin_neon_vdupb_laneq_i8: 5300 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16)); 5301 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5302 "vgetq_lane"); 5303 case NEON::BI__builtin_neon_vget_lane_i16: 5304 case NEON::BI__builtin_neon_vduph_lane_i16: 5305 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4)); 5306 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5307 "vget_lane"); 5308 case NEON::BI__builtin_neon_vgetq_lane_i16: 5309 case NEON::BI__builtin_neon_vduph_laneq_i16: 5310 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8)); 5311 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5312 "vgetq_lane"); 5313 case NEON::BI__builtin_neon_vget_lane_i32: 5314 case NEON::BI__builtin_neon_vdups_lane_i32: 5315 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2)); 5316 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5317 "vget_lane"); 5318 case NEON::BI__builtin_neon_vdups_lane_f32: 5319 Ops[0] = Builder.CreateBitCast(Ops[0], 5320 llvm::VectorType::get(FloatTy, 2)); 5321 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5322 "vdups_lane"); 5323 case NEON::BI__builtin_neon_vgetq_lane_i32: 5324 case NEON::BI__builtin_neon_vdups_laneq_i32: 5325 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4)); 5326 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5327 "vgetq_lane"); 5328 case NEON::BI__builtin_neon_vget_lane_i64: 5329 case NEON::BI__builtin_neon_vdupd_lane_i64: 5330 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1)); 5331 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5332 "vget_lane"); 5333 case NEON::BI__builtin_neon_vdupd_lane_f64: 5334 Ops[0] = Builder.CreateBitCast(Ops[0], 5335 llvm::VectorType::get(DoubleTy, 1)); 5336 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5337 "vdupd_lane"); 5338 case NEON::BI__builtin_neon_vgetq_lane_i64: 5339 case NEON::BI__builtin_neon_vdupd_laneq_i64: 5340 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); 5341 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5342 "vgetq_lane"); 5343 case NEON::BI__builtin_neon_vget_lane_f32: 5344 Ops[0] = Builder.CreateBitCast(Ops[0], 5345 llvm::VectorType::get(FloatTy, 2)); 5346 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5347 "vget_lane"); 5348 case NEON::BI__builtin_neon_vget_lane_f64: 5349 Ops[0] = Builder.CreateBitCast(Ops[0], 5350 llvm::VectorType::get(DoubleTy, 1)); 5351 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5352 "vget_lane"); 5353 case NEON::BI__builtin_neon_vgetq_lane_f32: 5354 case NEON::BI__builtin_neon_vdups_laneq_f32: 5355 Ops[0] = Builder.CreateBitCast(Ops[0], 5356 llvm::VectorType::get(FloatTy, 4)); 5357 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5358 "vgetq_lane"); 5359 case NEON::BI__builtin_neon_vgetq_lane_f64: 5360 case NEON::BI__builtin_neon_vdupd_laneq_f64: 5361 Ops[0] = Builder.CreateBitCast(Ops[0], 5362 llvm::VectorType::get(DoubleTy, 2)); 5363 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5364 "vgetq_lane"); 5365 case NEON::BI__builtin_neon_vaddd_s64: 5366 case NEON::BI__builtin_neon_vaddd_u64: 5367 return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd"); 5368 case NEON::BI__builtin_neon_vsubd_s64: 5369 case NEON::BI__builtin_neon_vsubd_u64: 5370 return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd"); 5371 case NEON::BI__builtin_neon_vqdmlalh_s16: 5372 case NEON::BI__builtin_neon_vqdmlslh_s16: { 5373 SmallVector<Value *, 2> ProductOps; 5374 ProductOps.push_back(vectorWrapScalar16(Ops[1])); 5375 ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2)))); 5376 llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4); 5377 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy), 5378 ProductOps, "vqdmlXl"); 5379 Constant *CI = ConstantInt::get(SizeTy, 0); 5380 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0"); 5381 5382 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16 5383 ? Intrinsic::aarch64_neon_sqadd 5384 : Intrinsic::aarch64_neon_sqsub; 5385 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl"); 5386 } 5387 case NEON::BI__builtin_neon_vqshlud_n_s64: { 5388 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5389 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty); 5390 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty), 5391 Ops, "vqshlu_n"); 5392 } 5393 case NEON::BI__builtin_neon_vqshld_n_u64: 5394 case NEON::BI__builtin_neon_vqshld_n_s64: { 5395 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64 5396 ? Intrinsic::aarch64_neon_uqshl 5397 : Intrinsic::aarch64_neon_sqshl; 5398 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5399 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty); 5400 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n"); 5401 } 5402 case NEON::BI__builtin_neon_vrshrd_n_u64: 5403 case NEON::BI__builtin_neon_vrshrd_n_s64: { 5404 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64 5405 ? Intrinsic::aarch64_neon_urshl 5406 : Intrinsic::aarch64_neon_srshl; 5407 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5408 int SV = cast<ConstantInt>(Ops[1])->getSExtValue(); 5409 Ops[1] = ConstantInt::get(Int64Ty, -SV); 5410 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n"); 5411 } 5412 case NEON::BI__builtin_neon_vrsrad_n_u64: 5413 case NEON::BI__builtin_neon_vrsrad_n_s64: { 5414 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64 5415 ? Intrinsic::aarch64_neon_urshl 5416 : Intrinsic::aarch64_neon_srshl; 5417 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 5418 Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2)))); 5419 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty), 5420 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)}); 5421 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty)); 5422 } 5423 case NEON::BI__builtin_neon_vshld_n_s64: 5424 case NEON::BI__builtin_neon_vshld_n_u64: { 5425 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 5426 return Builder.CreateShl( 5427 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n"); 5428 } 5429 case NEON::BI__builtin_neon_vshrd_n_s64: { 5430 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 5431 return Builder.CreateAShr( 5432 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63), 5433 Amt->getZExtValue())), 5434 "shrd_n"); 5435 } 5436 case NEON::BI__builtin_neon_vshrd_n_u64: { 5437 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 5438 uint64_t ShiftAmt = Amt->getZExtValue(); 5439 // Right-shifting an unsigned value by its size yields 0. 5440 if (ShiftAmt == 64) 5441 return ConstantInt::get(Int64Ty, 0); 5442 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt), 5443 "shrd_n"); 5444 } 5445 case NEON::BI__builtin_neon_vsrad_n_s64: { 5446 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2))); 5447 Ops[1] = Builder.CreateAShr( 5448 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63), 5449 Amt->getZExtValue())), 5450 "shrd_n"); 5451 return Builder.CreateAdd(Ops[0], Ops[1]); 5452 } 5453 case NEON::BI__builtin_neon_vsrad_n_u64: { 5454 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2))); 5455 uint64_t ShiftAmt = Amt->getZExtValue(); 5456 // Right-shifting an unsigned value by its size yields 0. 5457 // As Op + 0 = Op, return Ops[0] directly. 5458 if (ShiftAmt == 64) 5459 return Ops[0]; 5460 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt), 5461 "shrd_n"); 5462 return Builder.CreateAdd(Ops[0], Ops[1]); 5463 } 5464 case NEON::BI__builtin_neon_vqdmlalh_lane_s16: 5465 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16: 5466 case NEON::BI__builtin_neon_vqdmlslh_lane_s16: 5467 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: { 5468 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)), 5469 "lane"); 5470 SmallVector<Value *, 2> ProductOps; 5471 ProductOps.push_back(vectorWrapScalar16(Ops[1])); 5472 ProductOps.push_back(vectorWrapScalar16(Ops[2])); 5473 llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4); 5474 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy), 5475 ProductOps, "vqdmlXl"); 5476 Constant *CI = ConstantInt::get(SizeTy, 0); 5477 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0"); 5478 Ops.pop_back(); 5479 5480 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 || 5481 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16) 5482 ? Intrinsic::aarch64_neon_sqadd 5483 : Intrinsic::aarch64_neon_sqsub; 5484 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl"); 5485 } 5486 case NEON::BI__builtin_neon_vqdmlals_s32: 5487 case NEON::BI__builtin_neon_vqdmlsls_s32: { 5488 SmallVector<Value *, 2> ProductOps; 5489 ProductOps.push_back(Ops[1]); 5490 ProductOps.push_back(EmitScalarExpr(E->getArg(2))); 5491 Ops[1] = 5492 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar), 5493 ProductOps, "vqdmlXl"); 5494 5495 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32 5496 ? Intrinsic::aarch64_neon_sqadd 5497 : Intrinsic::aarch64_neon_sqsub; 5498 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl"); 5499 } 5500 case NEON::BI__builtin_neon_vqdmlals_lane_s32: 5501 case NEON::BI__builtin_neon_vqdmlals_laneq_s32: 5502 case NEON::BI__builtin_neon_vqdmlsls_lane_s32: 5503 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: { 5504 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)), 5505 "lane"); 5506 SmallVector<Value *, 2> ProductOps; 5507 ProductOps.push_back(Ops[1]); 5508 ProductOps.push_back(Ops[2]); 5509 Ops[1] = 5510 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar), 5511 ProductOps, "vqdmlXl"); 5512 Ops.pop_back(); 5513 5514 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 || 5515 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32) 5516 ? Intrinsic::aarch64_neon_sqadd 5517 : Intrinsic::aarch64_neon_sqsub; 5518 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl"); 5519 } 5520 } 5521 5522 llvm::VectorType *VTy = GetNeonType(this, Type); 5523 llvm::Type *Ty = VTy; 5524 if (!Ty) 5525 return nullptr; 5526 5527 // Not all intrinsics handled by the common case work for AArch64 yet, so only 5528 // defer to common code if it's been added to our special map. 5529 Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID, 5530 AArch64SIMDIntrinsicsProvenSorted); 5531 5532 if (Builtin) 5533 return EmitCommonNeonBuiltinExpr( 5534 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic, 5535 Builtin->NameHint, Builtin->TypeModifier, E, Ops, 5536 /*never use addresses*/ Address::invalid(), Address::invalid()); 5537 5538 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops)) 5539 return V; 5540 5541 unsigned Int; 5542 switch (BuiltinID) { 5543 default: return nullptr; 5544 case NEON::BI__builtin_neon_vbsl_v: 5545 case NEON::BI__builtin_neon_vbslq_v: { 5546 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy); 5547 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl"); 5548 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl"); 5549 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl"); 5550 5551 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl"); 5552 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl"); 5553 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl"); 5554 return Builder.CreateBitCast(Ops[0], Ty); 5555 } 5556 case NEON::BI__builtin_neon_vfma_lane_v: 5557 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types 5558 // The ARM builtins (and instructions) have the addend as the first 5559 // operand, but the 'fma' intrinsics have it last. Swap it around here. 5560 Value *Addend = Ops[0]; 5561 Value *Multiplicand = Ops[1]; 5562 Value *LaneSource = Ops[2]; 5563 Ops[0] = Multiplicand; 5564 Ops[1] = LaneSource; 5565 Ops[2] = Addend; 5566 5567 // Now adjust things to handle the lane access. 5568 llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ? 5569 llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) : 5570 VTy; 5571 llvm::Constant *cst = cast<Constant>(Ops[3]); 5572 Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst); 5573 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy); 5574 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane"); 5575 5576 Ops.pop_back(); 5577 Int = Intrinsic::fma; 5578 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla"); 5579 } 5580 case NEON::BI__builtin_neon_vfma_laneq_v: { 5581 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 5582 // v1f64 fma should be mapped to Neon scalar f64 fma 5583 if (VTy && VTy->getElementType() == DoubleTy) { 5584 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 5585 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); 5586 llvm::Type *VTy = GetNeonType(this, 5587 NeonTypeFlags(NeonTypeFlags::Float64, false, true)); 5588 Ops[2] = Builder.CreateBitCast(Ops[2], VTy); 5589 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); 5590 Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy); 5591 Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); 5592 return Builder.CreateBitCast(Result, Ty); 5593 } 5594 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 5595 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5596 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5597 5598 llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(), 5599 VTy->getNumElements() * 2); 5600 Ops[2] = Builder.CreateBitCast(Ops[2], STy); 5601 Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), 5602 cast<ConstantInt>(Ops[3])); 5603 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane"); 5604 5605 return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]}); 5606 } 5607 case NEON::BI__builtin_neon_vfmaq_laneq_v: { 5608 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 5609 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5610 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5611 5612 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 5613 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3])); 5614 return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]}); 5615 } 5616 case NEON::BI__builtin_neon_vfmas_lane_f32: 5617 case NEON::BI__builtin_neon_vfmas_laneq_f32: 5618 case NEON::BI__builtin_neon_vfmad_lane_f64: 5619 case NEON::BI__builtin_neon_vfmad_laneq_f64: { 5620 Ops.push_back(EmitScalarExpr(E->getArg(3))); 5621 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext())); 5622 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 5623 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); 5624 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); 5625 } 5626 case NEON::BI__builtin_neon_vmull_v: 5627 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 5628 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull; 5629 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull; 5630 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull"); 5631 case NEON::BI__builtin_neon_vmax_v: 5632 case NEON::BI__builtin_neon_vmaxq_v: 5633 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 5634 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax; 5635 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax; 5636 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax"); 5637 case NEON::BI__builtin_neon_vmin_v: 5638 case NEON::BI__builtin_neon_vminq_v: 5639 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 5640 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin; 5641 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin; 5642 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin"); 5643 case NEON::BI__builtin_neon_vabd_v: 5644 case NEON::BI__builtin_neon_vabdq_v: 5645 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 5646 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd; 5647 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd; 5648 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd"); 5649 case NEON::BI__builtin_neon_vpadal_v: 5650 case NEON::BI__builtin_neon_vpadalq_v: { 5651 unsigned ArgElts = VTy->getNumElements(); 5652 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType()); 5653 unsigned BitWidth = EltTy->getBitWidth(); 5654 llvm::Type *ArgTy = llvm::VectorType::get( 5655 llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts); 5656 llvm::Type* Tys[2] = { VTy, ArgTy }; 5657 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp; 5658 SmallVector<llvm::Value*, 1> TmpOps; 5659 TmpOps.push_back(Ops[1]); 5660 Function *F = CGM.getIntrinsic(Int, Tys); 5661 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal"); 5662 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType()); 5663 return Builder.CreateAdd(tmp, addend); 5664 } 5665 case NEON::BI__builtin_neon_vpmin_v: 5666 case NEON::BI__builtin_neon_vpminq_v: 5667 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 5668 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp; 5669 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp; 5670 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin"); 5671 case NEON::BI__builtin_neon_vpmax_v: 5672 case NEON::BI__builtin_neon_vpmaxq_v: 5673 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 5674 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp; 5675 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp; 5676 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax"); 5677 case NEON::BI__builtin_neon_vminnm_v: 5678 case NEON::BI__builtin_neon_vminnmq_v: 5679 Int = Intrinsic::aarch64_neon_fminnm; 5680 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm"); 5681 case NEON::BI__builtin_neon_vmaxnm_v: 5682 case NEON::BI__builtin_neon_vmaxnmq_v: 5683 Int = Intrinsic::aarch64_neon_fmaxnm; 5684 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm"); 5685 case NEON::BI__builtin_neon_vrecpss_f32: { 5686 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5687 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy), 5688 Ops, "vrecps"); 5689 } 5690 case NEON::BI__builtin_neon_vrecpsd_f64: { 5691 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5692 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy), 5693 Ops, "vrecps"); 5694 } 5695 case NEON::BI__builtin_neon_vqshrun_n_v: 5696 Int = Intrinsic::aarch64_neon_sqshrun; 5697 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n"); 5698 case NEON::BI__builtin_neon_vqrshrun_n_v: 5699 Int = Intrinsic::aarch64_neon_sqrshrun; 5700 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n"); 5701 case NEON::BI__builtin_neon_vqshrn_n_v: 5702 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn; 5703 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n"); 5704 case NEON::BI__builtin_neon_vrshrn_n_v: 5705 Int = Intrinsic::aarch64_neon_rshrn; 5706 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n"); 5707 case NEON::BI__builtin_neon_vqrshrn_n_v: 5708 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn; 5709 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n"); 5710 case NEON::BI__builtin_neon_vrnda_v: 5711 case NEON::BI__builtin_neon_vrndaq_v: { 5712 Int = Intrinsic::round; 5713 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda"); 5714 } 5715 case NEON::BI__builtin_neon_vrndi_v: 5716 case NEON::BI__builtin_neon_vrndiq_v: { 5717 Int = Intrinsic::nearbyint; 5718 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi"); 5719 } 5720 case NEON::BI__builtin_neon_vrndm_v: 5721 case NEON::BI__builtin_neon_vrndmq_v: { 5722 Int = Intrinsic::floor; 5723 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm"); 5724 } 5725 case NEON::BI__builtin_neon_vrndn_v: 5726 case NEON::BI__builtin_neon_vrndnq_v: { 5727 Int = Intrinsic::aarch64_neon_frintn; 5728 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn"); 5729 } 5730 case NEON::BI__builtin_neon_vrndp_v: 5731 case NEON::BI__builtin_neon_vrndpq_v: { 5732 Int = Intrinsic::ceil; 5733 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp"); 5734 } 5735 case NEON::BI__builtin_neon_vrndx_v: 5736 case NEON::BI__builtin_neon_vrndxq_v: { 5737 Int = Intrinsic::rint; 5738 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx"); 5739 } 5740 case NEON::BI__builtin_neon_vrnd_v: 5741 case NEON::BI__builtin_neon_vrndq_v: { 5742 Int = Intrinsic::trunc; 5743 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz"); 5744 } 5745 case NEON::BI__builtin_neon_vceqz_v: 5746 case NEON::BI__builtin_neon_vceqzq_v: 5747 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ, 5748 ICmpInst::ICMP_EQ, "vceqz"); 5749 case NEON::BI__builtin_neon_vcgez_v: 5750 case NEON::BI__builtin_neon_vcgezq_v: 5751 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE, 5752 ICmpInst::ICMP_SGE, "vcgez"); 5753 case NEON::BI__builtin_neon_vclez_v: 5754 case NEON::BI__builtin_neon_vclezq_v: 5755 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE, 5756 ICmpInst::ICMP_SLE, "vclez"); 5757 case NEON::BI__builtin_neon_vcgtz_v: 5758 case NEON::BI__builtin_neon_vcgtzq_v: 5759 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT, 5760 ICmpInst::ICMP_SGT, "vcgtz"); 5761 case NEON::BI__builtin_neon_vcltz_v: 5762 case NEON::BI__builtin_neon_vcltzq_v: 5763 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT, 5764 ICmpInst::ICMP_SLT, "vcltz"); 5765 case NEON::BI__builtin_neon_vcvt_f64_v: 5766 case NEON::BI__builtin_neon_vcvtq_f64_v: 5767 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5768 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad)); 5769 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 5770 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 5771 case NEON::BI__builtin_neon_vcvt_f64_f32: { 5772 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad && 5773 "unexpected vcvt_f64_f32 builtin"); 5774 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false); 5775 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag)); 5776 5777 return Builder.CreateFPExt(Ops[0], Ty, "vcvt"); 5778 } 5779 case NEON::BI__builtin_neon_vcvt_f32_f64: { 5780 assert(Type.getEltType() == NeonTypeFlags::Float32 && 5781 "unexpected vcvt_f32_f64 builtin"); 5782 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true); 5783 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag)); 5784 5785 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt"); 5786 } 5787 case NEON::BI__builtin_neon_vcvt_s32_v: 5788 case NEON::BI__builtin_neon_vcvt_u32_v: 5789 case NEON::BI__builtin_neon_vcvt_s64_v: 5790 case NEON::BI__builtin_neon_vcvt_u64_v: 5791 case NEON::BI__builtin_neon_vcvtq_s32_v: 5792 case NEON::BI__builtin_neon_vcvtq_u32_v: 5793 case NEON::BI__builtin_neon_vcvtq_s64_v: 5794 case NEON::BI__builtin_neon_vcvtq_u64_v: { 5795 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type)); 5796 if (usgn) 5797 return Builder.CreateFPToUI(Ops[0], Ty); 5798 return Builder.CreateFPToSI(Ops[0], Ty); 5799 } 5800 case NEON::BI__builtin_neon_vcvta_s32_v: 5801 case NEON::BI__builtin_neon_vcvtaq_s32_v: 5802 case NEON::BI__builtin_neon_vcvta_u32_v: 5803 case NEON::BI__builtin_neon_vcvtaq_u32_v: 5804 case NEON::BI__builtin_neon_vcvta_s64_v: 5805 case NEON::BI__builtin_neon_vcvtaq_s64_v: 5806 case NEON::BI__builtin_neon_vcvta_u64_v: 5807 case NEON::BI__builtin_neon_vcvtaq_u64_v: { 5808 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas; 5809 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 5810 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta"); 5811 } 5812 case NEON::BI__builtin_neon_vcvtm_s32_v: 5813 case NEON::BI__builtin_neon_vcvtmq_s32_v: 5814 case NEON::BI__builtin_neon_vcvtm_u32_v: 5815 case NEON::BI__builtin_neon_vcvtmq_u32_v: 5816 case NEON::BI__builtin_neon_vcvtm_s64_v: 5817 case NEON::BI__builtin_neon_vcvtmq_s64_v: 5818 case NEON::BI__builtin_neon_vcvtm_u64_v: 5819 case NEON::BI__builtin_neon_vcvtmq_u64_v: { 5820 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms; 5821 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 5822 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm"); 5823 } 5824 case NEON::BI__builtin_neon_vcvtn_s32_v: 5825 case NEON::BI__builtin_neon_vcvtnq_s32_v: 5826 case NEON::BI__builtin_neon_vcvtn_u32_v: 5827 case NEON::BI__builtin_neon_vcvtnq_u32_v: 5828 case NEON::BI__builtin_neon_vcvtn_s64_v: 5829 case NEON::BI__builtin_neon_vcvtnq_s64_v: 5830 case NEON::BI__builtin_neon_vcvtn_u64_v: 5831 case NEON::BI__builtin_neon_vcvtnq_u64_v: { 5832 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns; 5833 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 5834 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn"); 5835 } 5836 case NEON::BI__builtin_neon_vcvtp_s32_v: 5837 case NEON::BI__builtin_neon_vcvtpq_s32_v: 5838 case NEON::BI__builtin_neon_vcvtp_u32_v: 5839 case NEON::BI__builtin_neon_vcvtpq_u32_v: 5840 case NEON::BI__builtin_neon_vcvtp_s64_v: 5841 case NEON::BI__builtin_neon_vcvtpq_s64_v: 5842 case NEON::BI__builtin_neon_vcvtp_u64_v: 5843 case NEON::BI__builtin_neon_vcvtpq_u64_v: { 5844 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps; 5845 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 5846 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp"); 5847 } 5848 case NEON::BI__builtin_neon_vmulx_v: 5849 case NEON::BI__builtin_neon_vmulxq_v: { 5850 Int = Intrinsic::aarch64_neon_fmulx; 5851 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx"); 5852 } 5853 case NEON::BI__builtin_neon_vmul_lane_v: 5854 case NEON::BI__builtin_neon_vmul_laneq_v: { 5855 // v1f64 vmul_lane should be mapped to Neon scalar mul lane 5856 bool Quad = false; 5857 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v) 5858 Quad = true; 5859 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 5860 llvm::Type *VTy = GetNeonType(this, 5861 NeonTypeFlags(NeonTypeFlags::Float64, false, Quad)); 5862 Ops[1] = Builder.CreateBitCast(Ops[1], VTy); 5863 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract"); 5864 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]); 5865 return Builder.CreateBitCast(Result, Ty); 5866 } 5867 case NEON::BI__builtin_neon_vnegd_s64: 5868 return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd"); 5869 case NEON::BI__builtin_neon_vpmaxnm_v: 5870 case NEON::BI__builtin_neon_vpmaxnmq_v: { 5871 Int = Intrinsic::aarch64_neon_fmaxnmp; 5872 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm"); 5873 } 5874 case NEON::BI__builtin_neon_vpminnm_v: 5875 case NEON::BI__builtin_neon_vpminnmq_v: { 5876 Int = Intrinsic::aarch64_neon_fminnmp; 5877 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm"); 5878 } 5879 case NEON::BI__builtin_neon_vsqrt_v: 5880 case NEON::BI__builtin_neon_vsqrtq_v: { 5881 Int = Intrinsic::sqrt; 5882 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5883 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt"); 5884 } 5885 case NEON::BI__builtin_neon_vrbit_v: 5886 case NEON::BI__builtin_neon_vrbitq_v: { 5887 Int = Intrinsic::aarch64_neon_rbit; 5888 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit"); 5889 } 5890 case NEON::BI__builtin_neon_vaddv_u8: 5891 // FIXME: These are handled by the AArch64 scalar code. 5892 usgn = true; 5893 // FALLTHROUGH 5894 case NEON::BI__builtin_neon_vaddv_s8: { 5895 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 5896 Ty = Int32Ty; 5897 VTy = llvm::VectorType::get(Int8Ty, 8); 5898 llvm::Type *Tys[2] = { Ty, VTy }; 5899 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5900 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 5901 return Builder.CreateTrunc(Ops[0], Int8Ty); 5902 } 5903 case NEON::BI__builtin_neon_vaddv_u16: 5904 usgn = true; 5905 // FALLTHROUGH 5906 case NEON::BI__builtin_neon_vaddv_s16: { 5907 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 5908 Ty = Int32Ty; 5909 VTy = llvm::VectorType::get(Int16Ty, 4); 5910 llvm::Type *Tys[2] = { Ty, VTy }; 5911 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5912 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 5913 return Builder.CreateTrunc(Ops[0], Int16Ty); 5914 } 5915 case NEON::BI__builtin_neon_vaddvq_u8: 5916 usgn = true; 5917 // FALLTHROUGH 5918 case NEON::BI__builtin_neon_vaddvq_s8: { 5919 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 5920 Ty = Int32Ty; 5921 VTy = llvm::VectorType::get(Int8Ty, 16); 5922 llvm::Type *Tys[2] = { Ty, VTy }; 5923 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5924 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 5925 return Builder.CreateTrunc(Ops[0], Int8Ty); 5926 } 5927 case NEON::BI__builtin_neon_vaddvq_u16: 5928 usgn = true; 5929 // FALLTHROUGH 5930 case NEON::BI__builtin_neon_vaddvq_s16: { 5931 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 5932 Ty = Int32Ty; 5933 VTy = llvm::VectorType::get(Int16Ty, 8); 5934 llvm::Type *Tys[2] = { Ty, VTy }; 5935 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5936 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 5937 return Builder.CreateTrunc(Ops[0], Int16Ty); 5938 } 5939 case NEON::BI__builtin_neon_vmaxv_u8: { 5940 Int = Intrinsic::aarch64_neon_umaxv; 5941 Ty = Int32Ty; 5942 VTy = llvm::VectorType::get(Int8Ty, 8); 5943 llvm::Type *Tys[2] = { Ty, VTy }; 5944 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5945 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 5946 return Builder.CreateTrunc(Ops[0], Int8Ty); 5947 } 5948 case NEON::BI__builtin_neon_vmaxv_u16: { 5949 Int = Intrinsic::aarch64_neon_umaxv; 5950 Ty = Int32Ty; 5951 VTy = llvm::VectorType::get(Int16Ty, 4); 5952 llvm::Type *Tys[2] = { Ty, VTy }; 5953 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5954 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 5955 return Builder.CreateTrunc(Ops[0], Int16Ty); 5956 } 5957 case NEON::BI__builtin_neon_vmaxvq_u8: { 5958 Int = Intrinsic::aarch64_neon_umaxv; 5959 Ty = Int32Ty; 5960 VTy = llvm::VectorType::get(Int8Ty, 16); 5961 llvm::Type *Tys[2] = { Ty, VTy }; 5962 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5963 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 5964 return Builder.CreateTrunc(Ops[0], Int8Ty); 5965 } 5966 case NEON::BI__builtin_neon_vmaxvq_u16: { 5967 Int = Intrinsic::aarch64_neon_umaxv; 5968 Ty = Int32Ty; 5969 VTy = llvm::VectorType::get(Int16Ty, 8); 5970 llvm::Type *Tys[2] = { Ty, VTy }; 5971 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5972 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 5973 return Builder.CreateTrunc(Ops[0], Int16Ty); 5974 } 5975 case NEON::BI__builtin_neon_vmaxv_s8: { 5976 Int = Intrinsic::aarch64_neon_smaxv; 5977 Ty = Int32Ty; 5978 VTy = llvm::VectorType::get(Int8Ty, 8); 5979 llvm::Type *Tys[2] = { Ty, VTy }; 5980 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5981 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 5982 return Builder.CreateTrunc(Ops[0], Int8Ty); 5983 } 5984 case NEON::BI__builtin_neon_vmaxv_s16: { 5985 Int = Intrinsic::aarch64_neon_smaxv; 5986 Ty = Int32Ty; 5987 VTy = llvm::VectorType::get(Int16Ty, 4); 5988 llvm::Type *Tys[2] = { Ty, VTy }; 5989 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5990 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 5991 return Builder.CreateTrunc(Ops[0], Int16Ty); 5992 } 5993 case NEON::BI__builtin_neon_vmaxvq_s8: { 5994 Int = Intrinsic::aarch64_neon_smaxv; 5995 Ty = Int32Ty; 5996 VTy = llvm::VectorType::get(Int8Ty, 16); 5997 llvm::Type *Tys[2] = { Ty, VTy }; 5998 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5999 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6000 return Builder.CreateTrunc(Ops[0], Int8Ty); 6001 } 6002 case NEON::BI__builtin_neon_vmaxvq_s16: { 6003 Int = Intrinsic::aarch64_neon_smaxv; 6004 Ty = Int32Ty; 6005 VTy = llvm::VectorType::get(Int16Ty, 8); 6006 llvm::Type *Tys[2] = { Ty, VTy }; 6007 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6008 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6009 return Builder.CreateTrunc(Ops[0], Int16Ty); 6010 } 6011 case NEON::BI__builtin_neon_vminv_u8: { 6012 Int = Intrinsic::aarch64_neon_uminv; 6013 Ty = Int32Ty; 6014 VTy = llvm::VectorType::get(Int8Ty, 8); 6015 llvm::Type *Tys[2] = { Ty, VTy }; 6016 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6017 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6018 return Builder.CreateTrunc(Ops[0], Int8Ty); 6019 } 6020 case NEON::BI__builtin_neon_vminv_u16: { 6021 Int = Intrinsic::aarch64_neon_uminv; 6022 Ty = Int32Ty; 6023 VTy = llvm::VectorType::get(Int16Ty, 4); 6024 llvm::Type *Tys[2] = { Ty, VTy }; 6025 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6026 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6027 return Builder.CreateTrunc(Ops[0], Int16Ty); 6028 } 6029 case NEON::BI__builtin_neon_vminvq_u8: { 6030 Int = Intrinsic::aarch64_neon_uminv; 6031 Ty = Int32Ty; 6032 VTy = llvm::VectorType::get(Int8Ty, 16); 6033 llvm::Type *Tys[2] = { Ty, VTy }; 6034 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6035 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6036 return Builder.CreateTrunc(Ops[0], Int8Ty); 6037 } 6038 case NEON::BI__builtin_neon_vminvq_u16: { 6039 Int = Intrinsic::aarch64_neon_uminv; 6040 Ty = Int32Ty; 6041 VTy = llvm::VectorType::get(Int16Ty, 8); 6042 llvm::Type *Tys[2] = { Ty, VTy }; 6043 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6044 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6045 return Builder.CreateTrunc(Ops[0], Int16Ty); 6046 } 6047 case NEON::BI__builtin_neon_vminv_s8: { 6048 Int = Intrinsic::aarch64_neon_sminv; 6049 Ty = Int32Ty; 6050 VTy = llvm::VectorType::get(Int8Ty, 8); 6051 llvm::Type *Tys[2] = { Ty, VTy }; 6052 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6053 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6054 return Builder.CreateTrunc(Ops[0], Int8Ty); 6055 } 6056 case NEON::BI__builtin_neon_vminv_s16: { 6057 Int = Intrinsic::aarch64_neon_sminv; 6058 Ty = Int32Ty; 6059 VTy = llvm::VectorType::get(Int16Ty, 4); 6060 llvm::Type *Tys[2] = { Ty, VTy }; 6061 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6062 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6063 return Builder.CreateTrunc(Ops[0], Int16Ty); 6064 } 6065 case NEON::BI__builtin_neon_vminvq_s8: { 6066 Int = Intrinsic::aarch64_neon_sminv; 6067 Ty = Int32Ty; 6068 VTy = llvm::VectorType::get(Int8Ty, 16); 6069 llvm::Type *Tys[2] = { Ty, VTy }; 6070 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6071 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6072 return Builder.CreateTrunc(Ops[0], Int8Ty); 6073 } 6074 case NEON::BI__builtin_neon_vminvq_s16: { 6075 Int = Intrinsic::aarch64_neon_sminv; 6076 Ty = Int32Ty; 6077 VTy = llvm::VectorType::get(Int16Ty, 8); 6078 llvm::Type *Tys[2] = { Ty, VTy }; 6079 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6080 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6081 return Builder.CreateTrunc(Ops[0], Int16Ty); 6082 } 6083 case NEON::BI__builtin_neon_vmul_n_f64: { 6084 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 6085 Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy); 6086 return Builder.CreateFMul(Ops[0], RHS); 6087 } 6088 case NEON::BI__builtin_neon_vaddlv_u8: { 6089 Int = Intrinsic::aarch64_neon_uaddlv; 6090 Ty = Int32Ty; 6091 VTy = llvm::VectorType::get(Int8Ty, 8); 6092 llvm::Type *Tys[2] = { Ty, VTy }; 6093 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6094 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6095 return Builder.CreateTrunc(Ops[0], Int16Ty); 6096 } 6097 case NEON::BI__builtin_neon_vaddlv_u16: { 6098 Int = Intrinsic::aarch64_neon_uaddlv; 6099 Ty = Int32Ty; 6100 VTy = llvm::VectorType::get(Int16Ty, 4); 6101 llvm::Type *Tys[2] = { Ty, VTy }; 6102 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6103 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6104 } 6105 case NEON::BI__builtin_neon_vaddlvq_u8: { 6106 Int = Intrinsic::aarch64_neon_uaddlv; 6107 Ty = Int32Ty; 6108 VTy = llvm::VectorType::get(Int8Ty, 16); 6109 llvm::Type *Tys[2] = { Ty, VTy }; 6110 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6111 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6112 return Builder.CreateTrunc(Ops[0], Int16Ty); 6113 } 6114 case NEON::BI__builtin_neon_vaddlvq_u16: { 6115 Int = Intrinsic::aarch64_neon_uaddlv; 6116 Ty = Int32Ty; 6117 VTy = llvm::VectorType::get(Int16Ty, 8); 6118 llvm::Type *Tys[2] = { Ty, VTy }; 6119 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6120 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6121 } 6122 case NEON::BI__builtin_neon_vaddlv_s8: { 6123 Int = Intrinsic::aarch64_neon_saddlv; 6124 Ty = Int32Ty; 6125 VTy = llvm::VectorType::get(Int8Ty, 8); 6126 llvm::Type *Tys[2] = { Ty, VTy }; 6127 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6128 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6129 return Builder.CreateTrunc(Ops[0], Int16Ty); 6130 } 6131 case NEON::BI__builtin_neon_vaddlv_s16: { 6132 Int = Intrinsic::aarch64_neon_saddlv; 6133 Ty = Int32Ty; 6134 VTy = llvm::VectorType::get(Int16Ty, 4); 6135 llvm::Type *Tys[2] = { Ty, VTy }; 6136 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6137 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6138 } 6139 case NEON::BI__builtin_neon_vaddlvq_s8: { 6140 Int = Intrinsic::aarch64_neon_saddlv; 6141 Ty = Int32Ty; 6142 VTy = llvm::VectorType::get(Int8Ty, 16); 6143 llvm::Type *Tys[2] = { Ty, VTy }; 6144 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6145 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6146 return Builder.CreateTrunc(Ops[0], Int16Ty); 6147 } 6148 case NEON::BI__builtin_neon_vaddlvq_s16: { 6149 Int = Intrinsic::aarch64_neon_saddlv; 6150 Ty = Int32Ty; 6151 VTy = llvm::VectorType::get(Int16Ty, 8); 6152 llvm::Type *Tys[2] = { Ty, VTy }; 6153 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6154 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6155 } 6156 case NEON::BI__builtin_neon_vsri_n_v: 6157 case NEON::BI__builtin_neon_vsriq_n_v: { 6158 Int = Intrinsic::aarch64_neon_vsri; 6159 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty); 6160 return EmitNeonCall(Intrin, Ops, "vsri_n"); 6161 } 6162 case NEON::BI__builtin_neon_vsli_n_v: 6163 case NEON::BI__builtin_neon_vsliq_n_v: { 6164 Int = Intrinsic::aarch64_neon_vsli; 6165 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty); 6166 return EmitNeonCall(Intrin, Ops, "vsli_n"); 6167 } 6168 case NEON::BI__builtin_neon_vsra_n_v: 6169 case NEON::BI__builtin_neon_vsraq_n_v: 6170 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6171 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n"); 6172 return Builder.CreateAdd(Ops[0], Ops[1]); 6173 case NEON::BI__builtin_neon_vrsra_n_v: 6174 case NEON::BI__builtin_neon_vrsraq_n_v: { 6175 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl; 6176 SmallVector<llvm::Value*,2> TmpOps; 6177 TmpOps.push_back(Ops[1]); 6178 TmpOps.push_back(Ops[2]); 6179 Function* F = CGM.getIntrinsic(Int, Ty); 6180 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true); 6181 Ops[0] = Builder.CreateBitCast(Ops[0], VTy); 6182 return Builder.CreateAdd(Ops[0], tmp); 6183 } 6184 // FIXME: Sharing loads & stores with 32-bit is complicated by the absence 6185 // of an Align parameter here. 6186 case NEON::BI__builtin_neon_vld1_x2_v: 6187 case NEON::BI__builtin_neon_vld1q_x2_v: 6188 case NEON::BI__builtin_neon_vld1_x3_v: 6189 case NEON::BI__builtin_neon_vld1q_x3_v: 6190 case NEON::BI__builtin_neon_vld1_x4_v: 6191 case NEON::BI__builtin_neon_vld1q_x4_v: { 6192 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType()); 6193 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6194 llvm::Type *Tys[2] = { VTy, PTy }; 6195 unsigned Int; 6196 switch (BuiltinID) { 6197 case NEON::BI__builtin_neon_vld1_x2_v: 6198 case NEON::BI__builtin_neon_vld1q_x2_v: 6199 Int = Intrinsic::aarch64_neon_ld1x2; 6200 break; 6201 case NEON::BI__builtin_neon_vld1_x3_v: 6202 case NEON::BI__builtin_neon_vld1q_x3_v: 6203 Int = Intrinsic::aarch64_neon_ld1x3; 6204 break; 6205 case NEON::BI__builtin_neon_vld1_x4_v: 6206 case NEON::BI__builtin_neon_vld1q_x4_v: 6207 Int = Intrinsic::aarch64_neon_ld1x4; 6208 break; 6209 } 6210 Function *F = CGM.getIntrinsic(Int, Tys); 6211 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN"); 6212 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 6213 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6214 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6215 } 6216 case NEON::BI__builtin_neon_vst1_x2_v: 6217 case NEON::BI__builtin_neon_vst1q_x2_v: 6218 case NEON::BI__builtin_neon_vst1_x3_v: 6219 case NEON::BI__builtin_neon_vst1q_x3_v: 6220 case NEON::BI__builtin_neon_vst1_x4_v: 6221 case NEON::BI__builtin_neon_vst1q_x4_v: { 6222 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType()); 6223 llvm::Type *Tys[2] = { VTy, PTy }; 6224 unsigned Int; 6225 switch (BuiltinID) { 6226 case NEON::BI__builtin_neon_vst1_x2_v: 6227 case NEON::BI__builtin_neon_vst1q_x2_v: 6228 Int = Intrinsic::aarch64_neon_st1x2; 6229 break; 6230 case NEON::BI__builtin_neon_vst1_x3_v: 6231 case NEON::BI__builtin_neon_vst1q_x3_v: 6232 Int = Intrinsic::aarch64_neon_st1x3; 6233 break; 6234 case NEON::BI__builtin_neon_vst1_x4_v: 6235 case NEON::BI__builtin_neon_vst1q_x4_v: 6236 Int = Intrinsic::aarch64_neon_st1x4; 6237 break; 6238 } 6239 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end()); 6240 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, ""); 6241 } 6242 case NEON::BI__builtin_neon_vld1_v: 6243 case NEON::BI__builtin_neon_vld1q_v: 6244 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); 6245 return Builder.CreateDefaultAlignedLoad(Ops[0]); 6246 case NEON::BI__builtin_neon_vst1_v: 6247 case NEON::BI__builtin_neon_vst1q_v: 6248 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); 6249 Ops[1] = Builder.CreateBitCast(Ops[1], VTy); 6250 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6251 case NEON::BI__builtin_neon_vld1_lane_v: 6252 case NEON::BI__builtin_neon_vld1q_lane_v: 6253 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6254 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 6255 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6256 Ops[0] = Builder.CreateDefaultAlignedLoad(Ops[0]); 6257 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane"); 6258 case NEON::BI__builtin_neon_vld1_dup_v: 6259 case NEON::BI__builtin_neon_vld1q_dup_v: { 6260 Value *V = UndefValue::get(Ty); 6261 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 6262 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6263 Ops[0] = Builder.CreateDefaultAlignedLoad(Ops[0]); 6264 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); 6265 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI); 6266 return EmitNeonSplat(Ops[0], CI); 6267 } 6268 case NEON::BI__builtin_neon_vst1_lane_v: 6269 case NEON::BI__builtin_neon_vst1q_lane_v: 6270 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6271 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); 6272 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 6273 return Builder.CreateDefaultAlignedStore(Ops[1], 6274 Builder.CreateBitCast(Ops[0], Ty)); 6275 case NEON::BI__builtin_neon_vld2_v: 6276 case NEON::BI__builtin_neon_vld2q_v: { 6277 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 6278 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6279 llvm::Type *Tys[2] = { VTy, PTy }; 6280 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys); 6281 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); 6282 Ops[0] = Builder.CreateBitCast(Ops[0], 6283 llvm::PointerType::getUnqual(Ops[1]->getType())); 6284 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6285 } 6286 case NEON::BI__builtin_neon_vld3_v: 6287 case NEON::BI__builtin_neon_vld3q_v: { 6288 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 6289 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6290 llvm::Type *Tys[2] = { VTy, PTy }; 6291 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys); 6292 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); 6293 Ops[0] = Builder.CreateBitCast(Ops[0], 6294 llvm::PointerType::getUnqual(Ops[1]->getType())); 6295 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6296 } 6297 case NEON::BI__builtin_neon_vld4_v: 6298 case NEON::BI__builtin_neon_vld4q_v: { 6299 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 6300 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6301 llvm::Type *Tys[2] = { VTy, PTy }; 6302 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys); 6303 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); 6304 Ops[0] = Builder.CreateBitCast(Ops[0], 6305 llvm::PointerType::getUnqual(Ops[1]->getType())); 6306 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6307 } 6308 case NEON::BI__builtin_neon_vld2_dup_v: 6309 case NEON::BI__builtin_neon_vld2q_dup_v: { 6310 llvm::Type *PTy = 6311 llvm::PointerType::getUnqual(VTy->getElementType()); 6312 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6313 llvm::Type *Tys[2] = { VTy, PTy }; 6314 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys); 6315 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); 6316 Ops[0] = Builder.CreateBitCast(Ops[0], 6317 llvm::PointerType::getUnqual(Ops[1]->getType())); 6318 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6319 } 6320 case NEON::BI__builtin_neon_vld3_dup_v: 6321 case NEON::BI__builtin_neon_vld3q_dup_v: { 6322 llvm::Type *PTy = 6323 llvm::PointerType::getUnqual(VTy->getElementType()); 6324 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6325 llvm::Type *Tys[2] = { VTy, PTy }; 6326 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys); 6327 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); 6328 Ops[0] = Builder.CreateBitCast(Ops[0], 6329 llvm::PointerType::getUnqual(Ops[1]->getType())); 6330 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6331 } 6332 case NEON::BI__builtin_neon_vld4_dup_v: 6333 case NEON::BI__builtin_neon_vld4q_dup_v: { 6334 llvm::Type *PTy = 6335 llvm::PointerType::getUnqual(VTy->getElementType()); 6336 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6337 llvm::Type *Tys[2] = { VTy, PTy }; 6338 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys); 6339 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); 6340 Ops[0] = Builder.CreateBitCast(Ops[0], 6341 llvm::PointerType::getUnqual(Ops[1]->getType())); 6342 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6343 } 6344 case NEON::BI__builtin_neon_vld2_lane_v: 6345 case NEON::BI__builtin_neon_vld2q_lane_v: { 6346 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 6347 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys); 6348 Ops.push_back(Ops[1]); 6349 Ops.erase(Ops.begin()+1); 6350 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6351 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 6352 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty); 6353 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane"); 6354 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 6355 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6356 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6357 } 6358 case NEON::BI__builtin_neon_vld3_lane_v: 6359 case NEON::BI__builtin_neon_vld3q_lane_v: { 6360 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 6361 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys); 6362 Ops.push_back(Ops[1]); 6363 Ops.erase(Ops.begin()+1); 6364 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6365 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 6366 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 6367 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty); 6368 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane"); 6369 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 6370 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6371 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6372 } 6373 case NEON::BI__builtin_neon_vld4_lane_v: 6374 case NEON::BI__builtin_neon_vld4q_lane_v: { 6375 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 6376 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys); 6377 Ops.push_back(Ops[1]); 6378 Ops.erase(Ops.begin()+1); 6379 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6380 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 6381 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 6382 Ops[4] = Builder.CreateBitCast(Ops[4], Ty); 6383 Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty); 6384 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane"); 6385 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 6386 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6387 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6388 } 6389 case NEON::BI__builtin_neon_vst2_v: 6390 case NEON::BI__builtin_neon_vst2q_v: { 6391 Ops.push_back(Ops[0]); 6392 Ops.erase(Ops.begin()); 6393 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() }; 6394 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys), 6395 Ops, ""); 6396 } 6397 case NEON::BI__builtin_neon_vst2_lane_v: 6398 case NEON::BI__builtin_neon_vst2q_lane_v: { 6399 Ops.push_back(Ops[0]); 6400 Ops.erase(Ops.begin()); 6401 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty); 6402 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() }; 6403 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys), 6404 Ops, ""); 6405 } 6406 case NEON::BI__builtin_neon_vst3_v: 6407 case NEON::BI__builtin_neon_vst3q_v: { 6408 Ops.push_back(Ops[0]); 6409 Ops.erase(Ops.begin()); 6410 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() }; 6411 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys), 6412 Ops, ""); 6413 } 6414 case NEON::BI__builtin_neon_vst3_lane_v: 6415 case NEON::BI__builtin_neon_vst3q_lane_v: { 6416 Ops.push_back(Ops[0]); 6417 Ops.erase(Ops.begin()); 6418 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty); 6419 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() }; 6420 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys), 6421 Ops, ""); 6422 } 6423 case NEON::BI__builtin_neon_vst4_v: 6424 case NEON::BI__builtin_neon_vst4q_v: { 6425 Ops.push_back(Ops[0]); 6426 Ops.erase(Ops.begin()); 6427 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() }; 6428 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys), 6429 Ops, ""); 6430 } 6431 case NEON::BI__builtin_neon_vst4_lane_v: 6432 case NEON::BI__builtin_neon_vst4q_lane_v: { 6433 Ops.push_back(Ops[0]); 6434 Ops.erase(Ops.begin()); 6435 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty); 6436 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() }; 6437 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys), 6438 Ops, ""); 6439 } 6440 case NEON::BI__builtin_neon_vtrn_v: 6441 case NEON::BI__builtin_neon_vtrnq_v: { 6442 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 6443 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6444 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 6445 Value *SV = nullptr; 6446 6447 for (unsigned vi = 0; vi != 2; ++vi) { 6448 SmallVector<uint32_t, 16> Indices; 6449 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 6450 Indices.push_back(i+vi); 6451 Indices.push_back(i+e+vi); 6452 } 6453 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 6454 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn"); 6455 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 6456 } 6457 return SV; 6458 } 6459 case NEON::BI__builtin_neon_vuzp_v: 6460 case NEON::BI__builtin_neon_vuzpq_v: { 6461 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 6462 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6463 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 6464 Value *SV = nullptr; 6465 6466 for (unsigned vi = 0; vi != 2; ++vi) { 6467 SmallVector<uint32_t, 16> Indices; 6468 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 6469 Indices.push_back(2*i+vi); 6470 6471 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 6472 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp"); 6473 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 6474 } 6475 return SV; 6476 } 6477 case NEON::BI__builtin_neon_vzip_v: 6478 case NEON::BI__builtin_neon_vzipq_v: { 6479 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 6480 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6481 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 6482 Value *SV = nullptr; 6483 6484 for (unsigned vi = 0; vi != 2; ++vi) { 6485 SmallVector<uint32_t, 16> Indices; 6486 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 6487 Indices.push_back((i + vi*e) >> 1); 6488 Indices.push_back(((i + vi*e) >> 1)+e); 6489 } 6490 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 6491 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip"); 6492 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 6493 } 6494 return SV; 6495 } 6496 case NEON::BI__builtin_neon_vqtbl1q_v: { 6497 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty), 6498 Ops, "vtbl1"); 6499 } 6500 case NEON::BI__builtin_neon_vqtbl2q_v: { 6501 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty), 6502 Ops, "vtbl2"); 6503 } 6504 case NEON::BI__builtin_neon_vqtbl3q_v: { 6505 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty), 6506 Ops, "vtbl3"); 6507 } 6508 case NEON::BI__builtin_neon_vqtbl4q_v: { 6509 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty), 6510 Ops, "vtbl4"); 6511 } 6512 case NEON::BI__builtin_neon_vqtbx1q_v: { 6513 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty), 6514 Ops, "vtbx1"); 6515 } 6516 case NEON::BI__builtin_neon_vqtbx2q_v: { 6517 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty), 6518 Ops, "vtbx2"); 6519 } 6520 case NEON::BI__builtin_neon_vqtbx3q_v: { 6521 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty), 6522 Ops, "vtbx3"); 6523 } 6524 case NEON::BI__builtin_neon_vqtbx4q_v: { 6525 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty), 6526 Ops, "vtbx4"); 6527 } 6528 case NEON::BI__builtin_neon_vsqadd_v: 6529 case NEON::BI__builtin_neon_vsqaddq_v: { 6530 Int = Intrinsic::aarch64_neon_usqadd; 6531 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd"); 6532 } 6533 case NEON::BI__builtin_neon_vuqadd_v: 6534 case NEON::BI__builtin_neon_vuqaddq_v: { 6535 Int = Intrinsic::aarch64_neon_suqadd; 6536 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd"); 6537 } 6538 } 6539 } 6540 6541 llvm::Value *CodeGenFunction:: 6542 BuildVector(ArrayRef<llvm::Value*> Ops) { 6543 assert((Ops.size() & (Ops.size() - 1)) == 0 && 6544 "Not a power-of-two sized vector!"); 6545 bool AllConstants = true; 6546 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i) 6547 AllConstants &= isa<Constant>(Ops[i]); 6548 6549 // If this is a constant vector, create a ConstantVector. 6550 if (AllConstants) { 6551 SmallVector<llvm::Constant*, 16> CstOps; 6552 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 6553 CstOps.push_back(cast<Constant>(Ops[i])); 6554 return llvm::ConstantVector::get(CstOps); 6555 } 6556 6557 // Otherwise, insertelement the values to build the vector. 6558 Value *Result = 6559 llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size())); 6560 6561 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 6562 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i)); 6563 6564 return Result; 6565 } 6566 6567 // Convert the mask from an integer type to a vector of i1. 6568 static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask, 6569 unsigned NumElts) { 6570 6571 llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(), 6572 cast<IntegerType>(Mask->getType())->getBitWidth()); 6573 Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy); 6574 6575 // If we have less than 8 elements, then the starting mask was an i8 and 6576 // we need to extract down to the right number of elements. 6577 if (NumElts < 8) { 6578 uint32_t Indices[4]; 6579 for (unsigned i = 0; i != NumElts; ++i) 6580 Indices[i] = i; 6581 MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec, 6582 makeArrayRef(Indices, NumElts), 6583 "extract"); 6584 } 6585 return MaskVec; 6586 } 6587 6588 static Value *EmitX86MaskedStore(CodeGenFunction &CGF, 6589 SmallVectorImpl<Value *> &Ops, 6590 unsigned Align) { 6591 // Cast the pointer to right type. 6592 Ops[0] = CGF.Builder.CreateBitCast(Ops[0], 6593 llvm::PointerType::getUnqual(Ops[1]->getType())); 6594 6595 // If the mask is all ones just emit a regular store. 6596 if (const auto *C = dyn_cast<Constant>(Ops[2])) 6597 if (C->isAllOnesValue()) 6598 return CGF.Builder.CreateAlignedStore(Ops[1], Ops[0], Align); 6599 6600 Value *MaskVec = getMaskVecValue(CGF, Ops[2], 6601 Ops[1]->getType()->getVectorNumElements()); 6602 6603 return CGF.Builder.CreateMaskedStore(Ops[1], Ops[0], Align, MaskVec); 6604 } 6605 6606 static Value *EmitX86MaskedLoad(CodeGenFunction &CGF, 6607 SmallVectorImpl<Value *> &Ops, unsigned Align) { 6608 // Cast the pointer to right type. 6609 Ops[0] = CGF.Builder.CreateBitCast(Ops[0], 6610 llvm::PointerType::getUnqual(Ops[1]->getType())); 6611 6612 // If the mask is all ones just emit a regular store. 6613 if (const auto *C = dyn_cast<Constant>(Ops[2])) 6614 if (C->isAllOnesValue()) 6615 return CGF.Builder.CreateAlignedLoad(Ops[0], Align); 6616 6617 Value *MaskVec = getMaskVecValue(CGF, Ops[2], 6618 Ops[1]->getType()->getVectorNumElements()); 6619 6620 return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]); 6621 } 6622 6623 static Value *EmitX86SubVectorBroadcast(CodeGenFunction &CGF, 6624 SmallVectorImpl<Value *> &Ops, 6625 llvm::Type *DstTy, 6626 unsigned SrcSizeInBits, 6627 unsigned Align) { 6628 // Load the subvector. 6629 Ops[0] = CGF.Builder.CreateAlignedLoad(Ops[0], Align); 6630 6631 // Create broadcast mask. 6632 unsigned NumDstElts = DstTy->getVectorNumElements(); 6633 unsigned NumSrcElts = SrcSizeInBits / DstTy->getScalarSizeInBits(); 6634 6635 SmallVector<uint32_t, 8> Mask; 6636 for (unsigned i = 0; i != NumDstElts; i += NumSrcElts) 6637 for (unsigned j = 0; j != NumSrcElts; ++j) 6638 Mask.push_back(j); 6639 6640 return CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], Mask, "subvecbcst"); 6641 } 6642 6643 static Value *EmitX86Select(CodeGenFunction &CGF, 6644 Value *Mask, Value *Op0, Value *Op1) { 6645 6646 // If the mask is all ones just return first argument. 6647 if (const auto *C = dyn_cast<Constant>(Mask)) 6648 if (C->isAllOnesValue()) 6649 return Op0; 6650 6651 Mask = getMaskVecValue(CGF, Mask, Op0->getType()->getVectorNumElements()); 6652 6653 return CGF.Builder.CreateSelect(Mask, Op0, Op1); 6654 } 6655 6656 static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, 6657 bool Signed, SmallVectorImpl<Value *> &Ops) { 6658 unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); 6659 Value *Cmp; 6660 6661 if (CC == 3) { 6662 Cmp = Constant::getNullValue( 6663 llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts)); 6664 } else if (CC == 7) { 6665 Cmp = Constant::getAllOnesValue( 6666 llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts)); 6667 } else { 6668 ICmpInst::Predicate Pred; 6669 switch (CC) { 6670 default: llvm_unreachable("Unknown condition code"); 6671 case 0: Pred = ICmpInst::ICMP_EQ; break; 6672 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break; 6673 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break; 6674 case 4: Pred = ICmpInst::ICMP_NE; break; 6675 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break; 6676 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break; 6677 } 6678 Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]); 6679 } 6680 6681 const auto *C = dyn_cast<Constant>(Ops.back()); 6682 if (!C || !C->isAllOnesValue()) 6683 Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops.back(), NumElts)); 6684 6685 if (NumElts < 8) { 6686 uint32_t Indices[8]; 6687 for (unsigned i = 0; i != NumElts; ++i) 6688 Indices[i] = i; 6689 for (unsigned i = NumElts; i != 8; ++i) 6690 Indices[i] = i % NumElts + NumElts; 6691 Cmp = CGF.Builder.CreateShuffleVector( 6692 Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices); 6693 } 6694 return CGF.Builder.CreateBitCast(Cmp, 6695 IntegerType::get(CGF.getLLVMContext(), 6696 std::max(NumElts, 8U))); 6697 } 6698 6699 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, 6700 const CallExpr *E) { 6701 if (BuiltinID == X86::BI__builtin_ms_va_start || 6702 BuiltinID == X86::BI__builtin_ms_va_end) 6703 return EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(), 6704 BuiltinID == X86::BI__builtin_ms_va_start); 6705 if (BuiltinID == X86::BI__builtin_ms_va_copy) { 6706 // Lower this manually. We can't reliably determine whether or not any 6707 // given va_copy() is for a Win64 va_list from the calling convention 6708 // alone, because it's legal to do this from a System V ABI function. 6709 // With opaque pointer types, we won't have enough information in LLVM 6710 // IR to determine this from the argument types, either. Best to do it 6711 // now, while we have enough information. 6712 Address DestAddr = EmitMSVAListRef(E->getArg(0)); 6713 Address SrcAddr = EmitMSVAListRef(E->getArg(1)); 6714 6715 llvm::Type *BPP = Int8PtrPtrTy; 6716 6717 DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"), 6718 DestAddr.getAlignment()); 6719 SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"), 6720 SrcAddr.getAlignment()); 6721 6722 Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val"); 6723 return Builder.CreateStore(ArgPtr, DestAddr); 6724 } 6725 6726 SmallVector<Value*, 4> Ops; 6727 6728 // Find out if any arguments are required to be integer constant expressions. 6729 unsigned ICEArguments = 0; 6730 ASTContext::GetBuiltinTypeError Error; 6731 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 6732 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 6733 6734 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { 6735 // If this is a normal argument, just emit it as a scalar. 6736 if ((ICEArguments & (1 << i)) == 0) { 6737 Ops.push_back(EmitScalarExpr(E->getArg(i))); 6738 continue; 6739 } 6740 6741 // If this is required to be a constant, constant fold it so that we know 6742 // that the generated intrinsic gets a ConstantInt. 6743 llvm::APSInt Result; 6744 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); 6745 assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst; 6746 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); 6747 } 6748 6749 // These exist so that the builtin that takes an immediate can be bounds 6750 // checked by clang to avoid passing bad immediates to the backend. Since 6751 // AVX has a larger immediate than SSE we would need separate builtins to 6752 // do the different bounds checking. Rather than create a clang specific 6753 // SSE only builtin, this implements eight separate builtins to match gcc 6754 // implementation. 6755 auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) { 6756 Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm)); 6757 llvm::Function *F = CGM.getIntrinsic(ID); 6758 return Builder.CreateCall(F, Ops); 6759 }; 6760 6761 // For the vector forms of FP comparisons, translate the builtins directly to 6762 // IR. 6763 // TODO: The builtins could be removed if the SSE header files used vector 6764 // extension comparisons directly (vector ordered/unordered may need 6765 // additional support via __builtin_isnan()). 6766 auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred) { 6767 Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]); 6768 llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType()); 6769 llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy); 6770 Value *Sext = Builder.CreateSExt(Cmp, IntVecTy); 6771 return Builder.CreateBitCast(Sext, FPVecTy); 6772 }; 6773 6774 switch (BuiltinID) { 6775 default: return nullptr; 6776 case X86::BI__builtin_cpu_supports: { 6777 const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts(); 6778 StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString(); 6779 6780 // TODO: When/if this becomes more than x86 specific then use a TargetInfo 6781 // based mapping. 6782 // Processor features and mapping to processor feature value. 6783 enum X86Features { 6784 CMOV = 0, 6785 MMX, 6786 POPCNT, 6787 SSE, 6788 SSE2, 6789 SSE3, 6790 SSSE3, 6791 SSE4_1, 6792 SSE4_2, 6793 AVX, 6794 AVX2, 6795 SSE4_A, 6796 FMA4, 6797 XOP, 6798 FMA, 6799 AVX512F, 6800 BMI, 6801 BMI2, 6802 AES, 6803 PCLMUL, 6804 AVX512VL, 6805 AVX512BW, 6806 AVX512DQ, 6807 AVX512CD, 6808 AVX512ER, 6809 AVX512PF, 6810 AVX512VBMI, 6811 AVX512IFMA, 6812 MAX 6813 }; 6814 6815 X86Features Feature = StringSwitch<X86Features>(FeatureStr) 6816 .Case("cmov", X86Features::CMOV) 6817 .Case("mmx", X86Features::MMX) 6818 .Case("popcnt", X86Features::POPCNT) 6819 .Case("sse", X86Features::SSE) 6820 .Case("sse2", X86Features::SSE2) 6821 .Case("sse3", X86Features::SSE3) 6822 .Case("ssse3", X86Features::SSSE3) 6823 .Case("sse4.1", X86Features::SSE4_1) 6824 .Case("sse4.2", X86Features::SSE4_2) 6825 .Case("avx", X86Features::AVX) 6826 .Case("avx2", X86Features::AVX2) 6827 .Case("sse4a", X86Features::SSE4_A) 6828 .Case("fma4", X86Features::FMA4) 6829 .Case("xop", X86Features::XOP) 6830 .Case("fma", X86Features::FMA) 6831 .Case("avx512f", X86Features::AVX512F) 6832 .Case("bmi", X86Features::BMI) 6833 .Case("bmi2", X86Features::BMI2) 6834 .Case("aes", X86Features::AES) 6835 .Case("pclmul", X86Features::PCLMUL) 6836 .Case("avx512vl", X86Features::AVX512VL) 6837 .Case("avx512bw", X86Features::AVX512BW) 6838 .Case("avx512dq", X86Features::AVX512DQ) 6839 .Case("avx512cd", X86Features::AVX512CD) 6840 .Case("avx512er", X86Features::AVX512ER) 6841 .Case("avx512pf", X86Features::AVX512PF) 6842 .Case("avx512vbmi", X86Features::AVX512VBMI) 6843 .Case("avx512ifma", X86Features::AVX512IFMA) 6844 .Default(X86Features::MAX); 6845 assert(Feature != X86Features::MAX && "Invalid feature!"); 6846 6847 // Matching the struct layout from the compiler-rt/libgcc structure that is 6848 // filled in: 6849 // unsigned int __cpu_vendor; 6850 // unsigned int __cpu_type; 6851 // unsigned int __cpu_subtype; 6852 // unsigned int __cpu_features[1]; 6853 llvm::Type *STy = llvm::StructType::get( 6854 Int32Ty, Int32Ty, Int32Ty, llvm::ArrayType::get(Int32Ty, 1), nullptr); 6855 6856 // Grab the global __cpu_model. 6857 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model"); 6858 6859 // Grab the first (0th) element from the field __cpu_features off of the 6860 // global in the struct STy. 6861 Value *Idxs[] = { 6862 ConstantInt::get(Int32Ty, 0), 6863 ConstantInt::get(Int32Ty, 3), 6864 ConstantInt::get(Int32Ty, 0) 6865 }; 6866 Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs); 6867 Value *Features = Builder.CreateAlignedLoad(CpuFeatures, 6868 CharUnits::fromQuantity(4)); 6869 6870 // Check the value of the bit corresponding to the feature requested. 6871 Value *Bitset = Builder.CreateAnd( 6872 Features, llvm::ConstantInt::get(Int32Ty, 1ULL << Feature)); 6873 return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0)); 6874 } 6875 case X86::BI_mm_prefetch: { 6876 Value *Address = Ops[0]; 6877 Value *RW = ConstantInt::get(Int32Ty, 0); 6878 Value *Locality = Ops[1]; 6879 Value *Data = ConstantInt::get(Int32Ty, 1); 6880 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 6881 return Builder.CreateCall(F, {Address, RW, Locality, Data}); 6882 } 6883 case X86::BI__builtin_ia32_undef128: 6884 case X86::BI__builtin_ia32_undef256: 6885 case X86::BI__builtin_ia32_undef512: 6886 return UndefValue::get(ConvertType(E->getType())); 6887 case X86::BI__builtin_ia32_vec_init_v8qi: 6888 case X86::BI__builtin_ia32_vec_init_v4hi: 6889 case X86::BI__builtin_ia32_vec_init_v2si: 6890 return Builder.CreateBitCast(BuildVector(Ops), 6891 llvm::Type::getX86_MMXTy(getLLVMContext())); 6892 case X86::BI__builtin_ia32_vec_ext_v2si: 6893 return Builder.CreateExtractElement(Ops[0], 6894 llvm::ConstantInt::get(Ops[1]->getType(), 0)); 6895 case X86::BI__builtin_ia32_ldmxcsr: { 6896 Address Tmp = CreateMemTemp(E->getArg(0)->getType()); 6897 Builder.CreateStore(Ops[0], Tmp); 6898 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr), 6899 Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy)); 6900 } 6901 case X86::BI__builtin_ia32_stmxcsr: { 6902 Address Tmp = CreateMemTemp(E->getType()); 6903 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr), 6904 Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy)); 6905 return Builder.CreateLoad(Tmp, "stmxcsr"); 6906 } 6907 case X86::BI__builtin_ia32_xsave: 6908 case X86::BI__builtin_ia32_xsave64: 6909 case X86::BI__builtin_ia32_xrstor: 6910 case X86::BI__builtin_ia32_xrstor64: 6911 case X86::BI__builtin_ia32_xsaveopt: 6912 case X86::BI__builtin_ia32_xsaveopt64: 6913 case X86::BI__builtin_ia32_xrstors: 6914 case X86::BI__builtin_ia32_xrstors64: 6915 case X86::BI__builtin_ia32_xsavec: 6916 case X86::BI__builtin_ia32_xsavec64: 6917 case X86::BI__builtin_ia32_xsaves: 6918 case X86::BI__builtin_ia32_xsaves64: { 6919 Intrinsic::ID ID; 6920 #define INTRINSIC_X86_XSAVE_ID(NAME) \ 6921 case X86::BI__builtin_ia32_##NAME: \ 6922 ID = Intrinsic::x86_##NAME; \ 6923 break 6924 switch (BuiltinID) { 6925 default: llvm_unreachable("Unsupported intrinsic!"); 6926 INTRINSIC_X86_XSAVE_ID(xsave); 6927 INTRINSIC_X86_XSAVE_ID(xsave64); 6928 INTRINSIC_X86_XSAVE_ID(xrstor); 6929 INTRINSIC_X86_XSAVE_ID(xrstor64); 6930 INTRINSIC_X86_XSAVE_ID(xsaveopt); 6931 INTRINSIC_X86_XSAVE_ID(xsaveopt64); 6932 INTRINSIC_X86_XSAVE_ID(xrstors); 6933 INTRINSIC_X86_XSAVE_ID(xrstors64); 6934 INTRINSIC_X86_XSAVE_ID(xsavec); 6935 INTRINSIC_X86_XSAVE_ID(xsavec64); 6936 INTRINSIC_X86_XSAVE_ID(xsaves); 6937 INTRINSIC_X86_XSAVE_ID(xsaves64); 6938 } 6939 #undef INTRINSIC_X86_XSAVE_ID 6940 Value *Mhi = Builder.CreateTrunc( 6941 Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty); 6942 Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty); 6943 Ops[1] = Mhi; 6944 Ops.push_back(Mlo); 6945 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); 6946 } 6947 case X86::BI__builtin_ia32_storedqudi128_mask: 6948 case X86::BI__builtin_ia32_storedqusi128_mask: 6949 case X86::BI__builtin_ia32_storedquhi128_mask: 6950 case X86::BI__builtin_ia32_storedquqi128_mask: 6951 case X86::BI__builtin_ia32_storeupd128_mask: 6952 case X86::BI__builtin_ia32_storeups128_mask: 6953 case X86::BI__builtin_ia32_storedqudi256_mask: 6954 case X86::BI__builtin_ia32_storedqusi256_mask: 6955 case X86::BI__builtin_ia32_storedquhi256_mask: 6956 case X86::BI__builtin_ia32_storedquqi256_mask: 6957 case X86::BI__builtin_ia32_storeupd256_mask: 6958 case X86::BI__builtin_ia32_storeups256_mask: 6959 case X86::BI__builtin_ia32_storedqudi512_mask: 6960 case X86::BI__builtin_ia32_storedqusi512_mask: 6961 case X86::BI__builtin_ia32_storedquhi512_mask: 6962 case X86::BI__builtin_ia32_storedquqi512_mask: 6963 case X86::BI__builtin_ia32_storeupd512_mask: 6964 case X86::BI__builtin_ia32_storeups512_mask: 6965 return EmitX86MaskedStore(*this, Ops, 1); 6966 6967 case X86::BI__builtin_ia32_movdqa32store128_mask: 6968 case X86::BI__builtin_ia32_movdqa64store128_mask: 6969 case X86::BI__builtin_ia32_storeaps128_mask: 6970 case X86::BI__builtin_ia32_storeapd128_mask: 6971 case X86::BI__builtin_ia32_movdqa32store256_mask: 6972 case X86::BI__builtin_ia32_movdqa64store256_mask: 6973 case X86::BI__builtin_ia32_storeaps256_mask: 6974 case X86::BI__builtin_ia32_storeapd256_mask: 6975 case X86::BI__builtin_ia32_movdqa32store512_mask: 6976 case X86::BI__builtin_ia32_movdqa64store512_mask: 6977 case X86::BI__builtin_ia32_storeaps512_mask: 6978 case X86::BI__builtin_ia32_storeapd512_mask: { 6979 unsigned Align = 6980 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity(); 6981 return EmitX86MaskedStore(*this, Ops, Align); 6982 } 6983 case X86::BI__builtin_ia32_loadups128_mask: 6984 case X86::BI__builtin_ia32_loadups256_mask: 6985 case X86::BI__builtin_ia32_loadups512_mask: 6986 case X86::BI__builtin_ia32_loadupd128_mask: 6987 case X86::BI__builtin_ia32_loadupd256_mask: 6988 case X86::BI__builtin_ia32_loadupd512_mask: 6989 case X86::BI__builtin_ia32_loaddquqi128_mask: 6990 case X86::BI__builtin_ia32_loaddquqi256_mask: 6991 case X86::BI__builtin_ia32_loaddquqi512_mask: 6992 case X86::BI__builtin_ia32_loaddquhi128_mask: 6993 case X86::BI__builtin_ia32_loaddquhi256_mask: 6994 case X86::BI__builtin_ia32_loaddquhi512_mask: 6995 case X86::BI__builtin_ia32_loaddqusi128_mask: 6996 case X86::BI__builtin_ia32_loaddqusi256_mask: 6997 case X86::BI__builtin_ia32_loaddqusi512_mask: 6998 case X86::BI__builtin_ia32_loaddqudi128_mask: 6999 case X86::BI__builtin_ia32_loaddqudi256_mask: 7000 case X86::BI__builtin_ia32_loaddqudi512_mask: 7001 return EmitX86MaskedLoad(*this, Ops, 1); 7002 7003 case X86::BI__builtin_ia32_loadaps128_mask: 7004 case X86::BI__builtin_ia32_loadaps256_mask: 7005 case X86::BI__builtin_ia32_loadaps512_mask: 7006 case X86::BI__builtin_ia32_loadapd128_mask: 7007 case X86::BI__builtin_ia32_loadapd256_mask: 7008 case X86::BI__builtin_ia32_loadapd512_mask: 7009 case X86::BI__builtin_ia32_movdqa32load128_mask: 7010 case X86::BI__builtin_ia32_movdqa32load256_mask: 7011 case X86::BI__builtin_ia32_movdqa32load512_mask: 7012 case X86::BI__builtin_ia32_movdqa64load128_mask: 7013 case X86::BI__builtin_ia32_movdqa64load256_mask: 7014 case X86::BI__builtin_ia32_movdqa64load512_mask: { 7015 unsigned Align = 7016 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity(); 7017 return EmitX86MaskedLoad(*this, Ops, Align); 7018 } 7019 7020 case X86::BI__builtin_ia32_vbroadcastf128_pd256: 7021 case X86::BI__builtin_ia32_vbroadcastf128_ps256: { 7022 llvm::Type *DstTy = ConvertType(E->getType()); 7023 return EmitX86SubVectorBroadcast(*this, Ops, DstTy, 128, 1); 7024 } 7025 7026 case X86::BI__builtin_ia32_storehps: 7027 case X86::BI__builtin_ia32_storelps: { 7028 llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty); 7029 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2); 7030 7031 // cast val v2i64 7032 Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast"); 7033 7034 // extract (0, 1) 7035 unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1; 7036 llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index); 7037 Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract"); 7038 7039 // cast pointer to i64 & store 7040 Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy); 7041 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 7042 } 7043 case X86::BI__builtin_ia32_palignr128: 7044 case X86::BI__builtin_ia32_palignr256: 7045 case X86::BI__builtin_ia32_palignr128_mask: 7046 case X86::BI__builtin_ia32_palignr256_mask: 7047 case X86::BI__builtin_ia32_palignr512_mask: { 7048 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 7049 7050 unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); 7051 assert(NumElts % 16 == 0); 7052 7053 // If palignr is shifting the pair of vectors more than the size of two 7054 // lanes, emit zero. 7055 if (ShiftVal >= 32) 7056 return llvm::Constant::getNullValue(ConvertType(E->getType())); 7057 7058 // If palignr is shifting the pair of input vectors more than one lane, 7059 // but less than two lanes, convert to shifting in zeroes. 7060 if (ShiftVal > 16) { 7061 ShiftVal -= 16; 7062 Ops[1] = Ops[0]; 7063 Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType()); 7064 } 7065 7066 uint32_t Indices[64]; 7067 // 256-bit palignr operates on 128-bit lanes so we need to handle that 7068 for (unsigned l = 0; l != NumElts; l += 16) { 7069 for (unsigned i = 0; i != 16; ++i) { 7070 unsigned Idx = ShiftVal + i; 7071 if (Idx >= 16) 7072 Idx += NumElts - 16; // End of lane, switch operand. 7073 Indices[l + i] = Idx + l; 7074 } 7075 } 7076 7077 Value *Align = Builder.CreateShuffleVector(Ops[1], Ops[0], 7078 makeArrayRef(Indices, NumElts), 7079 "palignr"); 7080 7081 // If this isn't a masked builtin, just return the align operation. 7082 if (Ops.size() == 3) 7083 return Align; 7084 7085 return EmitX86Select(*this, Ops[4], Align, Ops[3]); 7086 } 7087 7088 case X86::BI__builtin_ia32_movnti: 7089 case X86::BI__builtin_ia32_movnti64: { 7090 llvm::MDNode *Node = llvm::MDNode::get( 7091 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1))); 7092 7093 // Convert the type of the pointer to a pointer to the stored type. 7094 Value *BC = Builder.CreateBitCast(Ops[0], 7095 llvm::PointerType::getUnqual(Ops[1]->getType()), 7096 "cast"); 7097 StoreInst *SI = Builder.CreateDefaultAlignedStore(Ops[1], BC); 7098 SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); 7099 7100 // No alignment for scalar intrinsic store. 7101 SI->setAlignment(1); 7102 return SI; 7103 } 7104 case X86::BI__builtin_ia32_movntsd: 7105 case X86::BI__builtin_ia32_movntss: { 7106 llvm::MDNode *Node = llvm::MDNode::get( 7107 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1))); 7108 7109 // Extract the 0'th element of the source vector. 7110 Value *Scl = Builder.CreateExtractElement(Ops[1], (uint64_t)0, "extract"); 7111 7112 // Convert the type of the pointer to a pointer to the stored type. 7113 Value *BC = Builder.CreateBitCast(Ops[0], 7114 llvm::PointerType::getUnqual(Scl->getType()), 7115 "cast"); 7116 7117 // Unaligned nontemporal store of the scalar value. 7118 StoreInst *SI = Builder.CreateDefaultAlignedStore(Scl, BC); 7119 SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); 7120 SI->setAlignment(1); 7121 return SI; 7122 } 7123 7124 case X86::BI__builtin_ia32_selectb_128: 7125 case X86::BI__builtin_ia32_selectb_256: 7126 case X86::BI__builtin_ia32_selectb_512: 7127 case X86::BI__builtin_ia32_selectw_128: 7128 case X86::BI__builtin_ia32_selectw_256: 7129 case X86::BI__builtin_ia32_selectw_512: 7130 case X86::BI__builtin_ia32_selectd_128: 7131 case X86::BI__builtin_ia32_selectd_256: 7132 case X86::BI__builtin_ia32_selectd_512: 7133 case X86::BI__builtin_ia32_selectq_128: 7134 case X86::BI__builtin_ia32_selectq_256: 7135 case X86::BI__builtin_ia32_selectq_512: 7136 case X86::BI__builtin_ia32_selectps_128: 7137 case X86::BI__builtin_ia32_selectps_256: 7138 case X86::BI__builtin_ia32_selectps_512: 7139 case X86::BI__builtin_ia32_selectpd_128: 7140 case X86::BI__builtin_ia32_selectpd_256: 7141 case X86::BI__builtin_ia32_selectpd_512: 7142 return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]); 7143 case X86::BI__builtin_ia32_pcmpeqb128_mask: 7144 case X86::BI__builtin_ia32_pcmpeqb256_mask: 7145 case X86::BI__builtin_ia32_pcmpeqb512_mask: 7146 case X86::BI__builtin_ia32_pcmpeqw128_mask: 7147 case X86::BI__builtin_ia32_pcmpeqw256_mask: 7148 case X86::BI__builtin_ia32_pcmpeqw512_mask: 7149 case X86::BI__builtin_ia32_pcmpeqd128_mask: 7150 case X86::BI__builtin_ia32_pcmpeqd256_mask: 7151 case X86::BI__builtin_ia32_pcmpeqd512_mask: 7152 case X86::BI__builtin_ia32_pcmpeqq128_mask: 7153 case X86::BI__builtin_ia32_pcmpeqq256_mask: 7154 case X86::BI__builtin_ia32_pcmpeqq512_mask: 7155 return EmitX86MaskedCompare(*this, 0, false, Ops); 7156 case X86::BI__builtin_ia32_pcmpgtb128_mask: 7157 case X86::BI__builtin_ia32_pcmpgtb256_mask: 7158 case X86::BI__builtin_ia32_pcmpgtb512_mask: 7159 case X86::BI__builtin_ia32_pcmpgtw128_mask: 7160 case X86::BI__builtin_ia32_pcmpgtw256_mask: 7161 case X86::BI__builtin_ia32_pcmpgtw512_mask: 7162 case X86::BI__builtin_ia32_pcmpgtd128_mask: 7163 case X86::BI__builtin_ia32_pcmpgtd256_mask: 7164 case X86::BI__builtin_ia32_pcmpgtd512_mask: 7165 case X86::BI__builtin_ia32_pcmpgtq128_mask: 7166 case X86::BI__builtin_ia32_pcmpgtq256_mask: 7167 case X86::BI__builtin_ia32_pcmpgtq512_mask: 7168 return EmitX86MaskedCompare(*this, 6, true, Ops); 7169 case X86::BI__builtin_ia32_cmpb128_mask: 7170 case X86::BI__builtin_ia32_cmpb256_mask: 7171 case X86::BI__builtin_ia32_cmpb512_mask: 7172 case X86::BI__builtin_ia32_cmpw128_mask: 7173 case X86::BI__builtin_ia32_cmpw256_mask: 7174 case X86::BI__builtin_ia32_cmpw512_mask: 7175 case X86::BI__builtin_ia32_cmpd128_mask: 7176 case X86::BI__builtin_ia32_cmpd256_mask: 7177 case X86::BI__builtin_ia32_cmpd512_mask: 7178 case X86::BI__builtin_ia32_cmpq128_mask: 7179 case X86::BI__builtin_ia32_cmpq256_mask: 7180 case X86::BI__builtin_ia32_cmpq512_mask: { 7181 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7; 7182 return EmitX86MaskedCompare(*this, CC, true, Ops); 7183 } 7184 case X86::BI__builtin_ia32_ucmpb128_mask: 7185 case X86::BI__builtin_ia32_ucmpb256_mask: 7186 case X86::BI__builtin_ia32_ucmpb512_mask: 7187 case X86::BI__builtin_ia32_ucmpw128_mask: 7188 case X86::BI__builtin_ia32_ucmpw256_mask: 7189 case X86::BI__builtin_ia32_ucmpw512_mask: 7190 case X86::BI__builtin_ia32_ucmpd128_mask: 7191 case X86::BI__builtin_ia32_ucmpd256_mask: 7192 case X86::BI__builtin_ia32_ucmpd512_mask: 7193 case X86::BI__builtin_ia32_ucmpq128_mask: 7194 case X86::BI__builtin_ia32_ucmpq256_mask: 7195 case X86::BI__builtin_ia32_ucmpq512_mask: { 7196 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7; 7197 return EmitX86MaskedCompare(*this, CC, false, Ops); 7198 } 7199 7200 case X86::BI__builtin_ia32_vplzcntd_128_mask: 7201 case X86::BI__builtin_ia32_vplzcntd_256_mask: 7202 case X86::BI__builtin_ia32_vplzcntd_512_mask: 7203 case X86::BI__builtin_ia32_vplzcntq_128_mask: 7204 case X86::BI__builtin_ia32_vplzcntq_256_mask: 7205 case X86::BI__builtin_ia32_vplzcntq_512_mask: { 7206 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType()); 7207 return EmitX86Select(*this, Ops[2], 7208 Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)}), 7209 Ops[1]); 7210 } 7211 7212 // TODO: Handle 64/512-bit vector widths of min/max. 7213 case X86::BI__builtin_ia32_pmaxsb128: 7214 case X86::BI__builtin_ia32_pmaxsw128: 7215 case X86::BI__builtin_ia32_pmaxsd128: 7216 case X86::BI__builtin_ia32_pmaxsb256: 7217 case X86::BI__builtin_ia32_pmaxsw256: 7218 case X86::BI__builtin_ia32_pmaxsd256: { 7219 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Ops[0], Ops[1]); 7220 return Builder.CreateSelect(Cmp, Ops[0], Ops[1]); 7221 } 7222 case X86::BI__builtin_ia32_pmaxub128: 7223 case X86::BI__builtin_ia32_pmaxuw128: 7224 case X86::BI__builtin_ia32_pmaxud128: 7225 case X86::BI__builtin_ia32_pmaxub256: 7226 case X86::BI__builtin_ia32_pmaxuw256: 7227 case X86::BI__builtin_ia32_pmaxud256: { 7228 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_UGT, Ops[0], Ops[1]); 7229 return Builder.CreateSelect(Cmp, Ops[0], Ops[1]); 7230 } 7231 case X86::BI__builtin_ia32_pminsb128: 7232 case X86::BI__builtin_ia32_pminsw128: 7233 case X86::BI__builtin_ia32_pminsd128: 7234 case X86::BI__builtin_ia32_pminsb256: 7235 case X86::BI__builtin_ia32_pminsw256: 7236 case X86::BI__builtin_ia32_pminsd256: { 7237 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SLT, Ops[0], Ops[1]); 7238 return Builder.CreateSelect(Cmp, Ops[0], Ops[1]); 7239 } 7240 case X86::BI__builtin_ia32_pminub128: 7241 case X86::BI__builtin_ia32_pminuw128: 7242 case X86::BI__builtin_ia32_pminud128: 7243 case X86::BI__builtin_ia32_pminub256: 7244 case X86::BI__builtin_ia32_pminuw256: 7245 case X86::BI__builtin_ia32_pminud256: { 7246 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, Ops[0], Ops[1]); 7247 return Builder.CreateSelect(Cmp, Ops[0], Ops[1]); 7248 } 7249 7250 // 3DNow! 7251 case X86::BI__builtin_ia32_pswapdsf: 7252 case X86::BI__builtin_ia32_pswapdsi: { 7253 llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext()); 7254 Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast"); 7255 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd); 7256 return Builder.CreateCall(F, Ops, "pswapd"); 7257 } 7258 case X86::BI__builtin_ia32_rdrand16_step: 7259 case X86::BI__builtin_ia32_rdrand32_step: 7260 case X86::BI__builtin_ia32_rdrand64_step: 7261 case X86::BI__builtin_ia32_rdseed16_step: 7262 case X86::BI__builtin_ia32_rdseed32_step: 7263 case X86::BI__builtin_ia32_rdseed64_step: { 7264 Intrinsic::ID ID; 7265 switch (BuiltinID) { 7266 default: llvm_unreachable("Unsupported intrinsic!"); 7267 case X86::BI__builtin_ia32_rdrand16_step: 7268 ID = Intrinsic::x86_rdrand_16; 7269 break; 7270 case X86::BI__builtin_ia32_rdrand32_step: 7271 ID = Intrinsic::x86_rdrand_32; 7272 break; 7273 case X86::BI__builtin_ia32_rdrand64_step: 7274 ID = Intrinsic::x86_rdrand_64; 7275 break; 7276 case X86::BI__builtin_ia32_rdseed16_step: 7277 ID = Intrinsic::x86_rdseed_16; 7278 break; 7279 case X86::BI__builtin_ia32_rdseed32_step: 7280 ID = Intrinsic::x86_rdseed_32; 7281 break; 7282 case X86::BI__builtin_ia32_rdseed64_step: 7283 ID = Intrinsic::x86_rdseed_64; 7284 break; 7285 } 7286 7287 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID)); 7288 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0), 7289 Ops[0]); 7290 return Builder.CreateExtractValue(Call, 1); 7291 } 7292 7293 // SSE packed comparison intrinsics 7294 case X86::BI__builtin_ia32_cmpeqps: 7295 case X86::BI__builtin_ia32_cmpeqpd: 7296 return getVectorFCmpIR(CmpInst::FCMP_OEQ); 7297 case X86::BI__builtin_ia32_cmpltps: 7298 case X86::BI__builtin_ia32_cmpltpd: 7299 return getVectorFCmpIR(CmpInst::FCMP_OLT); 7300 case X86::BI__builtin_ia32_cmpleps: 7301 case X86::BI__builtin_ia32_cmplepd: 7302 return getVectorFCmpIR(CmpInst::FCMP_OLE); 7303 case X86::BI__builtin_ia32_cmpunordps: 7304 case X86::BI__builtin_ia32_cmpunordpd: 7305 return getVectorFCmpIR(CmpInst::FCMP_UNO); 7306 case X86::BI__builtin_ia32_cmpneqps: 7307 case X86::BI__builtin_ia32_cmpneqpd: 7308 return getVectorFCmpIR(CmpInst::FCMP_UNE); 7309 case X86::BI__builtin_ia32_cmpnltps: 7310 case X86::BI__builtin_ia32_cmpnltpd: 7311 return getVectorFCmpIR(CmpInst::FCMP_UGE); 7312 case X86::BI__builtin_ia32_cmpnleps: 7313 case X86::BI__builtin_ia32_cmpnlepd: 7314 return getVectorFCmpIR(CmpInst::FCMP_UGT); 7315 case X86::BI__builtin_ia32_cmpordps: 7316 case X86::BI__builtin_ia32_cmpordpd: 7317 return getVectorFCmpIR(CmpInst::FCMP_ORD); 7318 case X86::BI__builtin_ia32_cmpps: 7319 case X86::BI__builtin_ia32_cmpps256: 7320 case X86::BI__builtin_ia32_cmppd: 7321 case X86::BI__builtin_ia32_cmppd256: { 7322 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 7323 // If this one of the SSE immediates, we can use native IR. 7324 if (CC < 8) { 7325 FCmpInst::Predicate Pred; 7326 switch (CC) { 7327 case 0: Pred = FCmpInst::FCMP_OEQ; break; 7328 case 1: Pred = FCmpInst::FCMP_OLT; break; 7329 case 2: Pred = FCmpInst::FCMP_OLE; break; 7330 case 3: Pred = FCmpInst::FCMP_UNO; break; 7331 case 4: Pred = FCmpInst::FCMP_UNE; break; 7332 case 5: Pred = FCmpInst::FCMP_UGE; break; 7333 case 6: Pred = FCmpInst::FCMP_UGT; break; 7334 case 7: Pred = FCmpInst::FCMP_ORD; break; 7335 } 7336 return getVectorFCmpIR(Pred); 7337 } 7338 7339 // We can't handle 8-31 immediates with native IR, use the intrinsic. 7340 Intrinsic::ID ID; 7341 switch (BuiltinID) { 7342 default: llvm_unreachable("Unsupported intrinsic!"); 7343 case X86::BI__builtin_ia32_cmpps: 7344 ID = Intrinsic::x86_sse_cmp_ps; 7345 break; 7346 case X86::BI__builtin_ia32_cmpps256: 7347 ID = Intrinsic::x86_avx_cmp_ps_256; 7348 break; 7349 case X86::BI__builtin_ia32_cmppd: 7350 ID = Intrinsic::x86_sse2_cmp_pd; 7351 break; 7352 case X86::BI__builtin_ia32_cmppd256: 7353 ID = Intrinsic::x86_avx_cmp_pd_256; 7354 break; 7355 } 7356 7357 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); 7358 } 7359 7360 // SSE scalar comparison intrinsics 7361 case X86::BI__builtin_ia32_cmpeqss: 7362 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0); 7363 case X86::BI__builtin_ia32_cmpltss: 7364 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1); 7365 case X86::BI__builtin_ia32_cmpless: 7366 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2); 7367 case X86::BI__builtin_ia32_cmpunordss: 7368 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3); 7369 case X86::BI__builtin_ia32_cmpneqss: 7370 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4); 7371 case X86::BI__builtin_ia32_cmpnltss: 7372 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5); 7373 case X86::BI__builtin_ia32_cmpnless: 7374 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6); 7375 case X86::BI__builtin_ia32_cmpordss: 7376 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7); 7377 case X86::BI__builtin_ia32_cmpeqsd: 7378 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0); 7379 case X86::BI__builtin_ia32_cmpltsd: 7380 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1); 7381 case X86::BI__builtin_ia32_cmplesd: 7382 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2); 7383 case X86::BI__builtin_ia32_cmpunordsd: 7384 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3); 7385 case X86::BI__builtin_ia32_cmpneqsd: 7386 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4); 7387 case X86::BI__builtin_ia32_cmpnltsd: 7388 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5); 7389 case X86::BI__builtin_ia32_cmpnlesd: 7390 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6); 7391 case X86::BI__builtin_ia32_cmpordsd: 7392 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7); 7393 } 7394 } 7395 7396 7397 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, 7398 const CallExpr *E) { 7399 SmallVector<Value*, 4> Ops; 7400 7401 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) 7402 Ops.push_back(EmitScalarExpr(E->getArg(i))); 7403 7404 Intrinsic::ID ID = Intrinsic::not_intrinsic; 7405 7406 switch (BuiltinID) { 7407 default: return nullptr; 7408 7409 // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we 7410 // call __builtin_readcyclecounter. 7411 case PPC::BI__builtin_ppc_get_timebase: 7412 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter)); 7413 7414 // vec_ld, vec_lvsl, vec_lvsr 7415 case PPC::BI__builtin_altivec_lvx: 7416 case PPC::BI__builtin_altivec_lvxl: 7417 case PPC::BI__builtin_altivec_lvebx: 7418 case PPC::BI__builtin_altivec_lvehx: 7419 case PPC::BI__builtin_altivec_lvewx: 7420 case PPC::BI__builtin_altivec_lvsl: 7421 case PPC::BI__builtin_altivec_lvsr: 7422 case PPC::BI__builtin_vsx_lxvd2x: 7423 case PPC::BI__builtin_vsx_lxvw4x: 7424 { 7425 Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy); 7426 7427 Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]); 7428 Ops.pop_back(); 7429 7430 switch (BuiltinID) { 7431 default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!"); 7432 case PPC::BI__builtin_altivec_lvx: 7433 ID = Intrinsic::ppc_altivec_lvx; 7434 break; 7435 case PPC::BI__builtin_altivec_lvxl: 7436 ID = Intrinsic::ppc_altivec_lvxl; 7437 break; 7438 case PPC::BI__builtin_altivec_lvebx: 7439 ID = Intrinsic::ppc_altivec_lvebx; 7440 break; 7441 case PPC::BI__builtin_altivec_lvehx: 7442 ID = Intrinsic::ppc_altivec_lvehx; 7443 break; 7444 case PPC::BI__builtin_altivec_lvewx: 7445 ID = Intrinsic::ppc_altivec_lvewx; 7446 break; 7447 case PPC::BI__builtin_altivec_lvsl: 7448 ID = Intrinsic::ppc_altivec_lvsl; 7449 break; 7450 case PPC::BI__builtin_altivec_lvsr: 7451 ID = Intrinsic::ppc_altivec_lvsr; 7452 break; 7453 case PPC::BI__builtin_vsx_lxvd2x: 7454 ID = Intrinsic::ppc_vsx_lxvd2x; 7455 break; 7456 case PPC::BI__builtin_vsx_lxvw4x: 7457 ID = Intrinsic::ppc_vsx_lxvw4x; 7458 break; 7459 } 7460 llvm::Function *F = CGM.getIntrinsic(ID); 7461 return Builder.CreateCall(F, Ops, ""); 7462 } 7463 7464 // vec_st 7465 case PPC::BI__builtin_altivec_stvx: 7466 case PPC::BI__builtin_altivec_stvxl: 7467 case PPC::BI__builtin_altivec_stvebx: 7468 case PPC::BI__builtin_altivec_stvehx: 7469 case PPC::BI__builtin_altivec_stvewx: 7470 case PPC::BI__builtin_vsx_stxvd2x: 7471 case PPC::BI__builtin_vsx_stxvw4x: 7472 { 7473 Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy); 7474 Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]); 7475 Ops.pop_back(); 7476 7477 switch (BuiltinID) { 7478 default: llvm_unreachable("Unsupported st intrinsic!"); 7479 case PPC::BI__builtin_altivec_stvx: 7480 ID = Intrinsic::ppc_altivec_stvx; 7481 break; 7482 case PPC::BI__builtin_altivec_stvxl: 7483 ID = Intrinsic::ppc_altivec_stvxl; 7484 break; 7485 case PPC::BI__builtin_altivec_stvebx: 7486 ID = Intrinsic::ppc_altivec_stvebx; 7487 break; 7488 case PPC::BI__builtin_altivec_stvehx: 7489 ID = Intrinsic::ppc_altivec_stvehx; 7490 break; 7491 case PPC::BI__builtin_altivec_stvewx: 7492 ID = Intrinsic::ppc_altivec_stvewx; 7493 break; 7494 case PPC::BI__builtin_vsx_stxvd2x: 7495 ID = Intrinsic::ppc_vsx_stxvd2x; 7496 break; 7497 case PPC::BI__builtin_vsx_stxvw4x: 7498 ID = Intrinsic::ppc_vsx_stxvw4x; 7499 break; 7500 } 7501 llvm::Function *F = CGM.getIntrinsic(ID); 7502 return Builder.CreateCall(F, Ops, ""); 7503 } 7504 // Square root 7505 case PPC::BI__builtin_vsx_xvsqrtsp: 7506 case PPC::BI__builtin_vsx_xvsqrtdp: { 7507 llvm::Type *ResultType = ConvertType(E->getType()); 7508 Value *X = EmitScalarExpr(E->getArg(0)); 7509 ID = Intrinsic::sqrt; 7510 llvm::Function *F = CGM.getIntrinsic(ID, ResultType); 7511 return Builder.CreateCall(F, X); 7512 } 7513 // Count leading zeros 7514 case PPC::BI__builtin_altivec_vclzb: 7515 case PPC::BI__builtin_altivec_vclzh: 7516 case PPC::BI__builtin_altivec_vclzw: 7517 case PPC::BI__builtin_altivec_vclzd: { 7518 llvm::Type *ResultType = ConvertType(E->getType()); 7519 Value *X = EmitScalarExpr(E->getArg(0)); 7520 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 7521 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType); 7522 return Builder.CreateCall(F, {X, Undef}); 7523 } 7524 // Copy sign 7525 case PPC::BI__builtin_vsx_xvcpsgnsp: 7526 case PPC::BI__builtin_vsx_xvcpsgndp: { 7527 llvm::Type *ResultType = ConvertType(E->getType()); 7528 Value *X = EmitScalarExpr(E->getArg(0)); 7529 Value *Y = EmitScalarExpr(E->getArg(1)); 7530 ID = Intrinsic::copysign; 7531 llvm::Function *F = CGM.getIntrinsic(ID, ResultType); 7532 return Builder.CreateCall(F, {X, Y}); 7533 } 7534 // Rounding/truncation 7535 case PPC::BI__builtin_vsx_xvrspip: 7536 case PPC::BI__builtin_vsx_xvrdpip: 7537 case PPC::BI__builtin_vsx_xvrdpim: 7538 case PPC::BI__builtin_vsx_xvrspim: 7539 case PPC::BI__builtin_vsx_xvrdpi: 7540 case PPC::BI__builtin_vsx_xvrspi: 7541 case PPC::BI__builtin_vsx_xvrdpic: 7542 case PPC::BI__builtin_vsx_xvrspic: 7543 case PPC::BI__builtin_vsx_xvrdpiz: 7544 case PPC::BI__builtin_vsx_xvrspiz: { 7545 llvm::Type *ResultType = ConvertType(E->getType()); 7546 Value *X = EmitScalarExpr(E->getArg(0)); 7547 if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim || 7548 BuiltinID == PPC::BI__builtin_vsx_xvrspim) 7549 ID = Intrinsic::floor; 7550 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi || 7551 BuiltinID == PPC::BI__builtin_vsx_xvrspi) 7552 ID = Intrinsic::round; 7553 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic || 7554 BuiltinID == PPC::BI__builtin_vsx_xvrspic) 7555 ID = Intrinsic::nearbyint; 7556 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip || 7557 BuiltinID == PPC::BI__builtin_vsx_xvrspip) 7558 ID = Intrinsic::ceil; 7559 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz || 7560 BuiltinID == PPC::BI__builtin_vsx_xvrspiz) 7561 ID = Intrinsic::trunc; 7562 llvm::Function *F = CGM.getIntrinsic(ID, ResultType); 7563 return Builder.CreateCall(F, X); 7564 } 7565 7566 // Absolute value 7567 case PPC::BI__builtin_vsx_xvabsdp: 7568 case PPC::BI__builtin_vsx_xvabssp: { 7569 llvm::Type *ResultType = ConvertType(E->getType()); 7570 Value *X = EmitScalarExpr(E->getArg(0)); 7571 llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); 7572 return Builder.CreateCall(F, X); 7573 } 7574 7575 // FMA variations 7576 case PPC::BI__builtin_vsx_xvmaddadp: 7577 case PPC::BI__builtin_vsx_xvmaddasp: 7578 case PPC::BI__builtin_vsx_xvnmaddadp: 7579 case PPC::BI__builtin_vsx_xvnmaddasp: 7580 case PPC::BI__builtin_vsx_xvmsubadp: 7581 case PPC::BI__builtin_vsx_xvmsubasp: 7582 case PPC::BI__builtin_vsx_xvnmsubadp: 7583 case PPC::BI__builtin_vsx_xvnmsubasp: { 7584 llvm::Type *ResultType = ConvertType(E->getType()); 7585 Value *X = EmitScalarExpr(E->getArg(0)); 7586 Value *Y = EmitScalarExpr(E->getArg(1)); 7587 Value *Z = EmitScalarExpr(E->getArg(2)); 7588 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); 7589 llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 7590 switch (BuiltinID) { 7591 case PPC::BI__builtin_vsx_xvmaddadp: 7592 case PPC::BI__builtin_vsx_xvmaddasp: 7593 return Builder.CreateCall(F, {X, Y, Z}); 7594 case PPC::BI__builtin_vsx_xvnmaddadp: 7595 case PPC::BI__builtin_vsx_xvnmaddasp: 7596 return Builder.CreateFSub(Zero, 7597 Builder.CreateCall(F, {X, Y, Z}), "sub"); 7598 case PPC::BI__builtin_vsx_xvmsubadp: 7599 case PPC::BI__builtin_vsx_xvmsubasp: 7600 return Builder.CreateCall(F, 7601 {X, Y, Builder.CreateFSub(Zero, Z, "sub")}); 7602 case PPC::BI__builtin_vsx_xvnmsubadp: 7603 case PPC::BI__builtin_vsx_xvnmsubasp: 7604 Value *FsubRes = 7605 Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")}); 7606 return Builder.CreateFSub(Zero, FsubRes, "sub"); 7607 } 7608 llvm_unreachable("Unknown FMA operation"); 7609 return nullptr; // Suppress no-return warning 7610 } 7611 } 7612 } 7613 7614 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, 7615 const CallExpr *E) { 7616 switch (BuiltinID) { 7617 case AMDGPU::BI__builtin_amdgcn_div_scale: 7618 case AMDGPU::BI__builtin_amdgcn_div_scalef: { 7619 // Translate from the intrinsics's struct return to the builtin's out 7620 // argument. 7621 7622 Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3)); 7623 7624 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 7625 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 7626 llvm::Value *Z = EmitScalarExpr(E->getArg(2)); 7627 7628 llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale, 7629 X->getType()); 7630 7631 llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z}); 7632 7633 llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0); 7634 llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1); 7635 7636 llvm::Type *RealFlagType 7637 = FlagOutPtr.getPointer()->getType()->getPointerElementType(); 7638 7639 llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType); 7640 Builder.CreateStore(FlagExt, FlagOutPtr); 7641 return Result; 7642 } 7643 case AMDGPU::BI__builtin_amdgcn_div_fmas: 7644 case AMDGPU::BI__builtin_amdgcn_div_fmasf: { 7645 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); 7646 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); 7647 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); 7648 llvm::Value *Src3 = EmitScalarExpr(E->getArg(3)); 7649 7650 llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas, 7651 Src0->getType()); 7652 llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3); 7653 return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool}); 7654 } 7655 case AMDGPU::BI__builtin_amdgcn_div_fixup: 7656 case AMDGPU::BI__builtin_amdgcn_div_fixupf: 7657 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup); 7658 case AMDGPU::BI__builtin_amdgcn_trig_preop: 7659 case AMDGPU::BI__builtin_amdgcn_trig_preopf: 7660 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop); 7661 case AMDGPU::BI__builtin_amdgcn_rcp: 7662 case AMDGPU::BI__builtin_amdgcn_rcpf: 7663 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp); 7664 case AMDGPU::BI__builtin_amdgcn_rsq: 7665 case AMDGPU::BI__builtin_amdgcn_rsqf: 7666 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq); 7667 case AMDGPU::BI__builtin_amdgcn_rsq_clamp: 7668 case AMDGPU::BI__builtin_amdgcn_rsq_clampf: 7669 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp); 7670 case AMDGPU::BI__builtin_amdgcn_sinf: 7671 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin); 7672 case AMDGPU::BI__builtin_amdgcn_cosf: 7673 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos); 7674 case AMDGPU::BI__builtin_amdgcn_log_clampf: 7675 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp); 7676 case AMDGPU::BI__builtin_amdgcn_ldexp: 7677 case AMDGPU::BI__builtin_amdgcn_ldexpf: 7678 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp); 7679 case AMDGPU::BI__builtin_amdgcn_frexp_mant: 7680 case AMDGPU::BI__builtin_amdgcn_frexp_mantf: { 7681 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant); 7682 } 7683 case AMDGPU::BI__builtin_amdgcn_frexp_exp: 7684 case AMDGPU::BI__builtin_amdgcn_frexp_expf: { 7685 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_exp); 7686 } 7687 case AMDGPU::BI__builtin_amdgcn_fract: 7688 case AMDGPU::BI__builtin_amdgcn_fractf: 7689 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract); 7690 case AMDGPU::BI__builtin_amdgcn_lerp: 7691 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp); 7692 case AMDGPU::BI__builtin_amdgcn_uicmp: 7693 case AMDGPU::BI__builtin_amdgcn_uicmpl: 7694 case AMDGPU::BI__builtin_amdgcn_sicmp: 7695 case AMDGPU::BI__builtin_amdgcn_sicmpl: 7696 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_icmp); 7697 case AMDGPU::BI__builtin_amdgcn_fcmp: 7698 case AMDGPU::BI__builtin_amdgcn_fcmpf: 7699 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fcmp); 7700 case AMDGPU::BI__builtin_amdgcn_class: 7701 case AMDGPU::BI__builtin_amdgcn_classf: 7702 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class); 7703 7704 case AMDGPU::BI__builtin_amdgcn_read_exec: { 7705 CallInst *CI = cast<CallInst>( 7706 EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec")); 7707 CI->setConvergent(); 7708 return CI; 7709 } 7710 7711 // amdgcn workitem 7712 case AMDGPU::BI__builtin_amdgcn_workitem_id_x: 7713 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024); 7714 case AMDGPU::BI__builtin_amdgcn_workitem_id_y: 7715 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024); 7716 case AMDGPU::BI__builtin_amdgcn_workitem_id_z: 7717 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024); 7718 7719 // r600 intrinsics 7720 case AMDGPU::BI__builtin_r600_recipsqrt_ieee: 7721 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef: 7722 return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee); 7723 case AMDGPU::BI__builtin_r600_read_tidig_x: 7724 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024); 7725 case AMDGPU::BI__builtin_r600_read_tidig_y: 7726 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024); 7727 case AMDGPU::BI__builtin_r600_read_tidig_z: 7728 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024); 7729 default: 7730 return nullptr; 7731 } 7732 } 7733 7734 /// Handle a SystemZ function in which the final argument is a pointer 7735 /// to an int that receives the post-instruction CC value. At the LLVM level 7736 /// this is represented as a function that returns a {result, cc} pair. 7737 static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF, 7738 unsigned IntrinsicID, 7739 const CallExpr *E) { 7740 unsigned NumArgs = E->getNumArgs() - 1; 7741 SmallVector<Value *, 8> Args(NumArgs); 7742 for (unsigned I = 0; I < NumArgs; ++I) 7743 Args[I] = CGF.EmitScalarExpr(E->getArg(I)); 7744 Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs)); 7745 Value *F = CGF.CGM.getIntrinsic(IntrinsicID); 7746 Value *Call = CGF.Builder.CreateCall(F, Args); 7747 Value *CC = CGF.Builder.CreateExtractValue(Call, 1); 7748 CGF.Builder.CreateStore(CC, CCPtr); 7749 return CGF.Builder.CreateExtractValue(Call, 0); 7750 } 7751 7752 Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, 7753 const CallExpr *E) { 7754 switch (BuiltinID) { 7755 case SystemZ::BI__builtin_tbegin: { 7756 Value *TDB = EmitScalarExpr(E->getArg(0)); 7757 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c); 7758 Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin); 7759 return Builder.CreateCall(F, {TDB, Control}); 7760 } 7761 case SystemZ::BI__builtin_tbegin_nofloat: { 7762 Value *TDB = EmitScalarExpr(E->getArg(0)); 7763 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c); 7764 Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat); 7765 return Builder.CreateCall(F, {TDB, Control}); 7766 } 7767 case SystemZ::BI__builtin_tbeginc: { 7768 Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy); 7769 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08); 7770 Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc); 7771 return Builder.CreateCall(F, {TDB, Control}); 7772 } 7773 case SystemZ::BI__builtin_tabort: { 7774 Value *Data = EmitScalarExpr(E->getArg(0)); 7775 Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort); 7776 return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort")); 7777 } 7778 case SystemZ::BI__builtin_non_tx_store: { 7779 Value *Address = EmitScalarExpr(E->getArg(0)); 7780 Value *Data = EmitScalarExpr(E->getArg(1)); 7781 Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg); 7782 return Builder.CreateCall(F, {Data, Address}); 7783 } 7784 7785 // Vector builtins. Note that most vector builtins are mapped automatically 7786 // to target-specific LLVM intrinsics. The ones handled specially here can 7787 // be represented via standard LLVM IR, which is preferable to enable common 7788 // LLVM optimizations. 7789 7790 case SystemZ::BI__builtin_s390_vpopctb: 7791 case SystemZ::BI__builtin_s390_vpopcth: 7792 case SystemZ::BI__builtin_s390_vpopctf: 7793 case SystemZ::BI__builtin_s390_vpopctg: { 7794 llvm::Type *ResultType = ConvertType(E->getType()); 7795 Value *X = EmitScalarExpr(E->getArg(0)); 7796 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); 7797 return Builder.CreateCall(F, X); 7798 } 7799 7800 case SystemZ::BI__builtin_s390_vclzb: 7801 case SystemZ::BI__builtin_s390_vclzh: 7802 case SystemZ::BI__builtin_s390_vclzf: 7803 case SystemZ::BI__builtin_s390_vclzg: { 7804 llvm::Type *ResultType = ConvertType(E->getType()); 7805 Value *X = EmitScalarExpr(E->getArg(0)); 7806 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 7807 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType); 7808 return Builder.CreateCall(F, {X, Undef}); 7809 } 7810 7811 case SystemZ::BI__builtin_s390_vctzb: 7812 case SystemZ::BI__builtin_s390_vctzh: 7813 case SystemZ::BI__builtin_s390_vctzf: 7814 case SystemZ::BI__builtin_s390_vctzg: { 7815 llvm::Type *ResultType = ConvertType(E->getType()); 7816 Value *X = EmitScalarExpr(E->getArg(0)); 7817 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 7818 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType); 7819 return Builder.CreateCall(F, {X, Undef}); 7820 } 7821 7822 case SystemZ::BI__builtin_s390_vfsqdb: { 7823 llvm::Type *ResultType = ConvertType(E->getType()); 7824 Value *X = EmitScalarExpr(E->getArg(0)); 7825 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType); 7826 return Builder.CreateCall(F, X); 7827 } 7828 case SystemZ::BI__builtin_s390_vfmadb: { 7829 llvm::Type *ResultType = ConvertType(E->getType()); 7830 Value *X = EmitScalarExpr(E->getArg(0)); 7831 Value *Y = EmitScalarExpr(E->getArg(1)); 7832 Value *Z = EmitScalarExpr(E->getArg(2)); 7833 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 7834 return Builder.CreateCall(F, {X, Y, Z}); 7835 } 7836 case SystemZ::BI__builtin_s390_vfmsdb: { 7837 llvm::Type *ResultType = ConvertType(E->getType()); 7838 Value *X = EmitScalarExpr(E->getArg(0)); 7839 Value *Y = EmitScalarExpr(E->getArg(1)); 7840 Value *Z = EmitScalarExpr(E->getArg(2)); 7841 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); 7842 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 7843 return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")}); 7844 } 7845 case SystemZ::BI__builtin_s390_vflpdb: { 7846 llvm::Type *ResultType = ConvertType(E->getType()); 7847 Value *X = EmitScalarExpr(E->getArg(0)); 7848 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); 7849 return Builder.CreateCall(F, X); 7850 } 7851 case SystemZ::BI__builtin_s390_vflndb: { 7852 llvm::Type *ResultType = ConvertType(E->getType()); 7853 Value *X = EmitScalarExpr(E->getArg(0)); 7854 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); 7855 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); 7856 return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub"); 7857 } 7858 case SystemZ::BI__builtin_s390_vfidb: { 7859 llvm::Type *ResultType = ConvertType(E->getType()); 7860 Value *X = EmitScalarExpr(E->getArg(0)); 7861 // Constant-fold the M4 and M5 mask arguments. 7862 llvm::APSInt M4, M5; 7863 bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext()); 7864 bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext()); 7865 assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?"); 7866 (void)IsConstM4; (void)IsConstM5; 7867 // Check whether this instance of vfidb can be represented via a LLVM 7868 // standard intrinsic. We only support some combinations of M4 and M5. 7869 Intrinsic::ID ID = Intrinsic::not_intrinsic; 7870 switch (M4.getZExtValue()) { 7871 default: break; 7872 case 0: // IEEE-inexact exception allowed 7873 switch (M5.getZExtValue()) { 7874 default: break; 7875 case 0: ID = Intrinsic::rint; break; 7876 } 7877 break; 7878 case 4: // IEEE-inexact exception suppressed 7879 switch (M5.getZExtValue()) { 7880 default: break; 7881 case 0: ID = Intrinsic::nearbyint; break; 7882 case 1: ID = Intrinsic::round; break; 7883 case 5: ID = Intrinsic::trunc; break; 7884 case 6: ID = Intrinsic::ceil; break; 7885 case 7: ID = Intrinsic::floor; break; 7886 } 7887 break; 7888 } 7889 if (ID != Intrinsic::not_intrinsic) { 7890 Function *F = CGM.getIntrinsic(ID, ResultType); 7891 return Builder.CreateCall(F, X); 7892 } 7893 Function *F = CGM.getIntrinsic(Intrinsic::s390_vfidb); 7894 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4); 7895 Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5); 7896 return Builder.CreateCall(F, {X, M4Value, M5Value}); 7897 } 7898 7899 // Vector intrisincs that output the post-instruction CC value. 7900 7901 #define INTRINSIC_WITH_CC(NAME) \ 7902 case SystemZ::BI__builtin_##NAME: \ 7903 return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E) 7904 7905 INTRINSIC_WITH_CC(s390_vpkshs); 7906 INTRINSIC_WITH_CC(s390_vpksfs); 7907 INTRINSIC_WITH_CC(s390_vpksgs); 7908 7909 INTRINSIC_WITH_CC(s390_vpklshs); 7910 INTRINSIC_WITH_CC(s390_vpklsfs); 7911 INTRINSIC_WITH_CC(s390_vpklsgs); 7912 7913 INTRINSIC_WITH_CC(s390_vceqbs); 7914 INTRINSIC_WITH_CC(s390_vceqhs); 7915 INTRINSIC_WITH_CC(s390_vceqfs); 7916 INTRINSIC_WITH_CC(s390_vceqgs); 7917 7918 INTRINSIC_WITH_CC(s390_vchbs); 7919 INTRINSIC_WITH_CC(s390_vchhs); 7920 INTRINSIC_WITH_CC(s390_vchfs); 7921 INTRINSIC_WITH_CC(s390_vchgs); 7922 7923 INTRINSIC_WITH_CC(s390_vchlbs); 7924 INTRINSIC_WITH_CC(s390_vchlhs); 7925 INTRINSIC_WITH_CC(s390_vchlfs); 7926 INTRINSIC_WITH_CC(s390_vchlgs); 7927 7928 INTRINSIC_WITH_CC(s390_vfaebs); 7929 INTRINSIC_WITH_CC(s390_vfaehs); 7930 INTRINSIC_WITH_CC(s390_vfaefs); 7931 7932 INTRINSIC_WITH_CC(s390_vfaezbs); 7933 INTRINSIC_WITH_CC(s390_vfaezhs); 7934 INTRINSIC_WITH_CC(s390_vfaezfs); 7935 7936 INTRINSIC_WITH_CC(s390_vfeebs); 7937 INTRINSIC_WITH_CC(s390_vfeehs); 7938 INTRINSIC_WITH_CC(s390_vfeefs); 7939 7940 INTRINSIC_WITH_CC(s390_vfeezbs); 7941 INTRINSIC_WITH_CC(s390_vfeezhs); 7942 INTRINSIC_WITH_CC(s390_vfeezfs); 7943 7944 INTRINSIC_WITH_CC(s390_vfenebs); 7945 INTRINSIC_WITH_CC(s390_vfenehs); 7946 INTRINSIC_WITH_CC(s390_vfenefs); 7947 7948 INTRINSIC_WITH_CC(s390_vfenezbs); 7949 INTRINSIC_WITH_CC(s390_vfenezhs); 7950 INTRINSIC_WITH_CC(s390_vfenezfs); 7951 7952 INTRINSIC_WITH_CC(s390_vistrbs); 7953 INTRINSIC_WITH_CC(s390_vistrhs); 7954 INTRINSIC_WITH_CC(s390_vistrfs); 7955 7956 INTRINSIC_WITH_CC(s390_vstrcbs); 7957 INTRINSIC_WITH_CC(s390_vstrchs); 7958 INTRINSIC_WITH_CC(s390_vstrcfs); 7959 7960 INTRINSIC_WITH_CC(s390_vstrczbs); 7961 INTRINSIC_WITH_CC(s390_vstrczhs); 7962 INTRINSIC_WITH_CC(s390_vstrczfs); 7963 7964 INTRINSIC_WITH_CC(s390_vfcedbs); 7965 INTRINSIC_WITH_CC(s390_vfchdbs); 7966 INTRINSIC_WITH_CC(s390_vfchedbs); 7967 7968 INTRINSIC_WITH_CC(s390_vftcidb); 7969 7970 #undef INTRINSIC_WITH_CC 7971 7972 default: 7973 return nullptr; 7974 } 7975 } 7976 7977 Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, 7978 const CallExpr *E) { 7979 auto MakeLdg = [&](unsigned IntrinsicID) { 7980 Value *Ptr = EmitScalarExpr(E->getArg(0)); 7981 AlignmentSource AlignSource; 7982 clang::CharUnits Align = 7983 getNaturalPointeeTypeAlignment(E->getArg(0)->getType(), &AlignSource); 7984 return Builder.CreateCall( 7985 CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(), 7986 Ptr->getType()}), 7987 {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())}); 7988 }; 7989 7990 switch (BuiltinID) { 7991 case NVPTX::BI__nvvm_atom_add_gen_i: 7992 case NVPTX::BI__nvvm_atom_add_gen_l: 7993 case NVPTX::BI__nvvm_atom_add_gen_ll: 7994 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E); 7995 7996 case NVPTX::BI__nvvm_atom_sub_gen_i: 7997 case NVPTX::BI__nvvm_atom_sub_gen_l: 7998 case NVPTX::BI__nvvm_atom_sub_gen_ll: 7999 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E); 8000 8001 case NVPTX::BI__nvvm_atom_and_gen_i: 8002 case NVPTX::BI__nvvm_atom_and_gen_l: 8003 case NVPTX::BI__nvvm_atom_and_gen_ll: 8004 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E); 8005 8006 case NVPTX::BI__nvvm_atom_or_gen_i: 8007 case NVPTX::BI__nvvm_atom_or_gen_l: 8008 case NVPTX::BI__nvvm_atom_or_gen_ll: 8009 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E); 8010 8011 case NVPTX::BI__nvvm_atom_xor_gen_i: 8012 case NVPTX::BI__nvvm_atom_xor_gen_l: 8013 case NVPTX::BI__nvvm_atom_xor_gen_ll: 8014 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E); 8015 8016 case NVPTX::BI__nvvm_atom_xchg_gen_i: 8017 case NVPTX::BI__nvvm_atom_xchg_gen_l: 8018 case NVPTX::BI__nvvm_atom_xchg_gen_ll: 8019 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E); 8020 8021 case NVPTX::BI__nvvm_atom_max_gen_i: 8022 case NVPTX::BI__nvvm_atom_max_gen_l: 8023 case NVPTX::BI__nvvm_atom_max_gen_ll: 8024 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E); 8025 8026 case NVPTX::BI__nvvm_atom_max_gen_ui: 8027 case NVPTX::BI__nvvm_atom_max_gen_ul: 8028 case NVPTX::BI__nvvm_atom_max_gen_ull: 8029 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E); 8030 8031 case NVPTX::BI__nvvm_atom_min_gen_i: 8032 case NVPTX::BI__nvvm_atom_min_gen_l: 8033 case NVPTX::BI__nvvm_atom_min_gen_ll: 8034 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E); 8035 8036 case NVPTX::BI__nvvm_atom_min_gen_ui: 8037 case NVPTX::BI__nvvm_atom_min_gen_ul: 8038 case NVPTX::BI__nvvm_atom_min_gen_ull: 8039 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E); 8040 8041 case NVPTX::BI__nvvm_atom_cas_gen_i: 8042 case NVPTX::BI__nvvm_atom_cas_gen_l: 8043 case NVPTX::BI__nvvm_atom_cas_gen_ll: 8044 // __nvvm_atom_cas_gen_* should return the old value rather than the 8045 // success flag. 8046 return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false); 8047 8048 case NVPTX::BI__nvvm_atom_add_gen_f: { 8049 Value *Ptr = EmitScalarExpr(E->getArg(0)); 8050 Value *Val = EmitScalarExpr(E->getArg(1)); 8051 // atomicrmw only deals with integer arguments so we need to use 8052 // LLVM's nvvm_atomic_load_add_f32 intrinsic for that. 8053 Value *FnALAF32 = 8054 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType()); 8055 return Builder.CreateCall(FnALAF32, {Ptr, Val}); 8056 } 8057 8058 case NVPTX::BI__nvvm_atom_inc_gen_ui: { 8059 Value *Ptr = EmitScalarExpr(E->getArg(0)); 8060 Value *Val = EmitScalarExpr(E->getArg(1)); 8061 Value *FnALI32 = 8062 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType()); 8063 return Builder.CreateCall(FnALI32, {Ptr, Val}); 8064 } 8065 8066 case NVPTX::BI__nvvm_atom_dec_gen_ui: { 8067 Value *Ptr = EmitScalarExpr(E->getArg(0)); 8068 Value *Val = EmitScalarExpr(E->getArg(1)); 8069 Value *FnALD32 = 8070 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType()); 8071 return Builder.CreateCall(FnALD32, {Ptr, Val}); 8072 } 8073 8074 case NVPTX::BI__nvvm_ldg_c: 8075 case NVPTX::BI__nvvm_ldg_c2: 8076 case NVPTX::BI__nvvm_ldg_c4: 8077 case NVPTX::BI__nvvm_ldg_s: 8078 case NVPTX::BI__nvvm_ldg_s2: 8079 case NVPTX::BI__nvvm_ldg_s4: 8080 case NVPTX::BI__nvvm_ldg_i: 8081 case NVPTX::BI__nvvm_ldg_i2: 8082 case NVPTX::BI__nvvm_ldg_i4: 8083 case NVPTX::BI__nvvm_ldg_l: 8084 case NVPTX::BI__nvvm_ldg_ll: 8085 case NVPTX::BI__nvvm_ldg_ll2: 8086 case NVPTX::BI__nvvm_ldg_uc: 8087 case NVPTX::BI__nvvm_ldg_uc2: 8088 case NVPTX::BI__nvvm_ldg_uc4: 8089 case NVPTX::BI__nvvm_ldg_us: 8090 case NVPTX::BI__nvvm_ldg_us2: 8091 case NVPTX::BI__nvvm_ldg_us4: 8092 case NVPTX::BI__nvvm_ldg_ui: 8093 case NVPTX::BI__nvvm_ldg_ui2: 8094 case NVPTX::BI__nvvm_ldg_ui4: 8095 case NVPTX::BI__nvvm_ldg_ul: 8096 case NVPTX::BI__nvvm_ldg_ull: 8097 case NVPTX::BI__nvvm_ldg_ull2: 8098 // PTX Interoperability section 2.2: "For a vector with an even number of 8099 // elements, its alignment is set to number of elements times the alignment 8100 // of its member: n*alignof(t)." 8101 return MakeLdg(Intrinsic::nvvm_ldg_global_i); 8102 case NVPTX::BI__nvvm_ldg_f: 8103 case NVPTX::BI__nvvm_ldg_f2: 8104 case NVPTX::BI__nvvm_ldg_f4: 8105 case NVPTX::BI__nvvm_ldg_d: 8106 case NVPTX::BI__nvvm_ldg_d2: 8107 return MakeLdg(Intrinsic::nvvm_ldg_global_f); 8108 default: 8109 return nullptr; 8110 } 8111 } 8112 8113 Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, 8114 const CallExpr *E) { 8115 switch (BuiltinID) { 8116 case WebAssembly::BI__builtin_wasm_current_memory: { 8117 llvm::Type *ResultType = ConvertType(E->getType()); 8118 Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType); 8119 return Builder.CreateCall(Callee); 8120 } 8121 case WebAssembly::BI__builtin_wasm_grow_memory: { 8122 Value *X = EmitScalarExpr(E->getArg(0)); 8123 Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_grow_memory, X->getType()); 8124 return Builder.CreateCall(Callee, X); 8125 } 8126 8127 default: 8128 return nullptr; 8129 } 8130 } 8131