1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This contains code to emit Builtin calls as LLVM code. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGCXXABI.h" 14 #include "CGObjCRuntime.h" 15 #include "CGOpenCLRuntime.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "CodeGenModule.h" 19 #include "ConstantEmitter.h" 20 #include "PatternInit.h" 21 #include "TargetInfo.h" 22 #include "clang/AST/ASTContext.h" 23 #include "clang/AST/Attr.h" 24 #include "clang/AST/Decl.h" 25 #include "clang/AST/OSLog.h" 26 #include "clang/Basic/TargetBuiltins.h" 27 #include "clang/Basic/TargetInfo.h" 28 #include "clang/CodeGen/CGFunctionInfo.h" 29 #include "llvm/ADT/SmallPtrSet.h" 30 #include "llvm/ADT/StringExtras.h" 31 #include "llvm/IR/DataLayout.h" 32 #include "llvm/IR/InlineAsm.h" 33 #include "llvm/IR/Intrinsics.h" 34 #include "llvm/IR/IntrinsicsAArch64.h" 35 #include "llvm/IR/IntrinsicsAMDGPU.h" 36 #include "llvm/IR/IntrinsicsARM.h" 37 #include "llvm/IR/IntrinsicsBPF.h" 38 #include "llvm/IR/IntrinsicsHexagon.h" 39 #include "llvm/IR/IntrinsicsNVPTX.h" 40 #include "llvm/IR/IntrinsicsPowerPC.h" 41 #include "llvm/IR/IntrinsicsR600.h" 42 #include "llvm/IR/IntrinsicsS390.h" 43 #include "llvm/IR/IntrinsicsWebAssembly.h" 44 #include "llvm/IR/IntrinsicsX86.h" 45 #include "llvm/IR/MDBuilder.h" 46 #include "llvm/Support/ConvertUTF.h" 47 #include "llvm/Support/ScopedPrinter.h" 48 #include "llvm/Support/TargetParser.h" 49 #include <sstream> 50 51 using namespace clang; 52 using namespace CodeGen; 53 using namespace llvm; 54 55 static 56 int64_t clamp(int64_t Value, int64_t Low, int64_t High) { 57 return std::min(High, std::max(Low, Value)); 58 } 59 60 static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size, 61 Align AlignmentInBytes) { 62 ConstantInt *Byte; 63 switch (CGF.getLangOpts().getTrivialAutoVarInit()) { 64 case LangOptions::TrivialAutoVarInitKind::Uninitialized: 65 // Nothing to initialize. 66 return; 67 case LangOptions::TrivialAutoVarInitKind::Zero: 68 Byte = CGF.Builder.getInt8(0x00); 69 break; 70 case LangOptions::TrivialAutoVarInitKind::Pattern: { 71 llvm::Type *Int8 = llvm::IntegerType::getInt8Ty(CGF.CGM.getLLVMContext()); 72 Byte = llvm::dyn_cast<llvm::ConstantInt>( 73 initializationPatternFor(CGF.CGM, Int8)); 74 break; 75 } 76 } 77 CGF.Builder.CreateMemSet(AI, Byte, Size, AlignmentInBytes); 78 } 79 80 /// getBuiltinLibFunction - Given a builtin id for a function like 81 /// "__builtin_fabsf", return a Function* for "fabsf". 82 llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD, 83 unsigned BuiltinID) { 84 assert(Context.BuiltinInfo.isLibFunction(BuiltinID)); 85 86 // Get the name, skip over the __builtin_ prefix (if necessary). 87 StringRef Name; 88 GlobalDecl D(FD); 89 90 // If the builtin has been declared explicitly with an assembler label, 91 // use the mangled name. This differs from the plain label on platforms 92 // that prefix labels. 93 if (FD->hasAttr<AsmLabelAttr>()) 94 Name = getMangledName(D); 95 else 96 Name = Context.BuiltinInfo.getName(BuiltinID) + 10; 97 98 llvm::FunctionType *Ty = 99 cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType())); 100 101 return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false); 102 } 103 104 /// Emit the conversions required to turn the given value into an 105 /// integer of the given size. 106 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V, 107 QualType T, llvm::IntegerType *IntType) { 108 V = CGF.EmitToMemory(V, T); 109 110 if (V->getType()->isPointerTy()) 111 return CGF.Builder.CreatePtrToInt(V, IntType); 112 113 assert(V->getType() == IntType); 114 return V; 115 } 116 117 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, 118 QualType T, llvm::Type *ResultType) { 119 V = CGF.EmitFromMemory(V, T); 120 121 if (ResultType->isPointerTy()) 122 return CGF.Builder.CreateIntToPtr(V, ResultType); 123 124 assert(V->getType() == ResultType); 125 return V; 126 } 127 128 /// Utility to insert an atomic instruction based on Intrinsic::ID 129 /// and the expression node. 130 static Value *MakeBinaryAtomicValue( 131 CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, 132 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) { 133 QualType T = E->getType(); 134 assert(E->getArg(0)->getType()->isPointerType()); 135 assert(CGF.getContext().hasSameUnqualifiedType(T, 136 E->getArg(0)->getType()->getPointeeType())); 137 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); 138 139 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 140 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 141 142 llvm::IntegerType *IntType = 143 llvm::IntegerType::get(CGF.getLLVMContext(), 144 CGF.getContext().getTypeSize(T)); 145 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 146 147 llvm::Value *Args[2]; 148 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 149 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 150 llvm::Type *ValueType = Args[1]->getType(); 151 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 152 153 llvm::Value *Result = CGF.Builder.CreateAtomicRMW( 154 Kind, Args[0], Args[1], Ordering); 155 return EmitFromInt(CGF, Result, T, ValueType); 156 } 157 158 static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) { 159 Value *Val = CGF.EmitScalarExpr(E->getArg(0)); 160 Value *Address = CGF.EmitScalarExpr(E->getArg(1)); 161 162 // Convert the type of the pointer to a pointer to the stored type. 163 Val = CGF.EmitToMemory(Val, E->getArg(0)->getType()); 164 Value *BC = CGF.Builder.CreateBitCast( 165 Address, llvm::PointerType::getUnqual(Val->getType()), "cast"); 166 LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType()); 167 LV.setNontemporal(true); 168 CGF.EmitStoreOfScalar(Val, LV, false); 169 return nullptr; 170 } 171 172 static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) { 173 Value *Address = CGF.EmitScalarExpr(E->getArg(0)); 174 175 LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType()); 176 LV.setNontemporal(true); 177 return CGF.EmitLoadOfScalar(LV, E->getExprLoc()); 178 } 179 180 static RValue EmitBinaryAtomic(CodeGenFunction &CGF, 181 llvm::AtomicRMWInst::BinOp Kind, 182 const CallExpr *E) { 183 return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E)); 184 } 185 186 /// Utility to insert an atomic instruction based Intrinsic::ID and 187 /// the expression node, where the return value is the result of the 188 /// operation. 189 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, 190 llvm::AtomicRMWInst::BinOp Kind, 191 const CallExpr *E, 192 Instruction::BinaryOps Op, 193 bool Invert = false) { 194 QualType T = E->getType(); 195 assert(E->getArg(0)->getType()->isPointerType()); 196 assert(CGF.getContext().hasSameUnqualifiedType(T, 197 E->getArg(0)->getType()->getPointeeType())); 198 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); 199 200 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 201 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 202 203 llvm::IntegerType *IntType = 204 llvm::IntegerType::get(CGF.getLLVMContext(), 205 CGF.getContext().getTypeSize(T)); 206 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 207 208 llvm::Value *Args[2]; 209 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 210 llvm::Type *ValueType = Args[1]->getType(); 211 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 212 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 213 214 llvm::Value *Result = CGF.Builder.CreateAtomicRMW( 215 Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent); 216 Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]); 217 if (Invert) 218 Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result, 219 llvm::ConstantInt::get(IntType, -1)); 220 Result = EmitFromInt(CGF, Result, T, ValueType); 221 return RValue::get(Result); 222 } 223 224 /// Utility to insert an atomic cmpxchg instruction. 225 /// 226 /// @param CGF The current codegen function. 227 /// @param E Builtin call expression to convert to cmpxchg. 228 /// arg0 - address to operate on 229 /// arg1 - value to compare with 230 /// arg2 - new value 231 /// @param ReturnBool Specifies whether to return success flag of 232 /// cmpxchg result or the old value. 233 /// 234 /// @returns result of cmpxchg, according to ReturnBool 235 /// 236 /// Note: In order to lower Microsoft's _InterlockedCompareExchange* intrinsics 237 /// invoke the function EmitAtomicCmpXchgForMSIntrin. 238 static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, 239 bool ReturnBool) { 240 QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType(); 241 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 242 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 243 244 llvm::IntegerType *IntType = llvm::IntegerType::get( 245 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T)); 246 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 247 248 Value *Args[3]; 249 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 250 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 251 llvm::Type *ValueType = Args[1]->getType(); 252 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 253 Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType); 254 255 Value *Pair = CGF.Builder.CreateAtomicCmpXchg( 256 Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent, 257 llvm::AtomicOrdering::SequentiallyConsistent); 258 if (ReturnBool) 259 // Extract boolean success flag and zext it to int. 260 return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1), 261 CGF.ConvertType(E->getType())); 262 else 263 // Extract old value and emit it using the same type as compare value. 264 return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T, 265 ValueType); 266 } 267 268 /// This function should be invoked to emit atomic cmpxchg for Microsoft's 269 /// _InterlockedCompareExchange* intrinsics which have the following signature: 270 /// T _InterlockedCompareExchange(T volatile *Destination, 271 /// T Exchange, 272 /// T Comparand); 273 /// 274 /// Whereas the llvm 'cmpxchg' instruction has the following syntax: 275 /// cmpxchg *Destination, Comparand, Exchange. 276 /// So we need to swap Comparand and Exchange when invoking 277 /// CreateAtomicCmpXchg. That is the reason we could not use the above utility 278 /// function MakeAtomicCmpXchgValue since it expects the arguments to be 279 /// already swapped. 280 281 static 282 Value *EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, 283 AtomicOrdering SuccessOrdering = AtomicOrdering::SequentiallyConsistent) { 284 assert(E->getArg(0)->getType()->isPointerType()); 285 assert(CGF.getContext().hasSameUnqualifiedType( 286 E->getType(), E->getArg(0)->getType()->getPointeeType())); 287 assert(CGF.getContext().hasSameUnqualifiedType(E->getType(), 288 E->getArg(1)->getType())); 289 assert(CGF.getContext().hasSameUnqualifiedType(E->getType(), 290 E->getArg(2)->getType())); 291 292 auto *Destination = CGF.EmitScalarExpr(E->getArg(0)); 293 auto *Comparand = CGF.EmitScalarExpr(E->getArg(2)); 294 auto *Exchange = CGF.EmitScalarExpr(E->getArg(1)); 295 296 // For Release ordering, the failure ordering should be Monotonic. 297 auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release ? 298 AtomicOrdering::Monotonic : 299 SuccessOrdering; 300 301 auto *Result = CGF.Builder.CreateAtomicCmpXchg( 302 Destination, Comparand, Exchange, 303 SuccessOrdering, FailureOrdering); 304 Result->setVolatile(true); 305 return CGF.Builder.CreateExtractValue(Result, 0); 306 } 307 308 static Value *EmitAtomicIncrementValue(CodeGenFunction &CGF, const CallExpr *E, 309 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) { 310 assert(E->getArg(0)->getType()->isPointerType()); 311 312 auto *IntTy = CGF.ConvertType(E->getType()); 313 auto *Result = CGF.Builder.CreateAtomicRMW( 314 AtomicRMWInst::Add, 315 CGF.EmitScalarExpr(E->getArg(0)), 316 ConstantInt::get(IntTy, 1), 317 Ordering); 318 return CGF.Builder.CreateAdd(Result, ConstantInt::get(IntTy, 1)); 319 } 320 321 static Value *EmitAtomicDecrementValue(CodeGenFunction &CGF, const CallExpr *E, 322 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) { 323 assert(E->getArg(0)->getType()->isPointerType()); 324 325 auto *IntTy = CGF.ConvertType(E->getType()); 326 auto *Result = CGF.Builder.CreateAtomicRMW( 327 AtomicRMWInst::Sub, 328 CGF.EmitScalarExpr(E->getArg(0)), 329 ConstantInt::get(IntTy, 1), 330 Ordering); 331 return CGF.Builder.CreateSub(Result, ConstantInt::get(IntTy, 1)); 332 } 333 334 // Build a plain volatile load. 335 static Value *EmitISOVolatileLoad(CodeGenFunction &CGF, const CallExpr *E) { 336 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0)); 337 QualType ElTy = E->getArg(0)->getType()->getPointeeType(); 338 CharUnits LoadSize = CGF.getContext().getTypeSizeInChars(ElTy); 339 llvm::Type *ITy = 340 llvm::IntegerType::get(CGF.getLLVMContext(), LoadSize.getQuantity() * 8); 341 Ptr = CGF.Builder.CreateBitCast(Ptr, ITy->getPointerTo()); 342 llvm::LoadInst *Load = CGF.Builder.CreateAlignedLoad(Ptr, LoadSize); 343 Load->setVolatile(true); 344 return Load; 345 } 346 347 // Build a plain volatile store. 348 static Value *EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E) { 349 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0)); 350 Value *Value = CGF.EmitScalarExpr(E->getArg(1)); 351 QualType ElTy = E->getArg(0)->getType()->getPointeeType(); 352 CharUnits StoreSize = CGF.getContext().getTypeSizeInChars(ElTy); 353 llvm::Type *ITy = 354 llvm::IntegerType::get(CGF.getLLVMContext(), StoreSize.getQuantity() * 8); 355 Ptr = CGF.Builder.CreateBitCast(Ptr, ITy->getPointerTo()); 356 llvm::StoreInst *Store = 357 CGF.Builder.CreateAlignedStore(Value, Ptr, StoreSize); 358 Store->setVolatile(true); 359 return Store; 360 } 361 362 // Emit a simple mangled intrinsic that has 1 argument and a return type 363 // matching the argument type. Depending on mode, this may be a constrained 364 // floating-point intrinsic. 365 static Value *emitUnaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, 366 const CallExpr *E, unsigned IntrinsicID, 367 unsigned ConstrainedIntrinsicID) { 368 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 369 370 if (CGF.Builder.getIsFPConstrained()) { 371 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType()); 372 return CGF.Builder.CreateConstrainedFPCall(F, { Src0 }); 373 } else { 374 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 375 return CGF.Builder.CreateCall(F, Src0); 376 } 377 } 378 379 // Emit an intrinsic that has 2 operands of the same type as its result. 380 // Depending on mode, this may be a constrained floating-point intrinsic. 381 static Value *emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, 382 const CallExpr *E, unsigned IntrinsicID, 383 unsigned ConstrainedIntrinsicID) { 384 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 385 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 386 387 if (CGF.Builder.getIsFPConstrained()) { 388 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType()); 389 return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1 }); 390 } else { 391 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 392 return CGF.Builder.CreateCall(F, { Src0, Src1 }); 393 } 394 } 395 396 // Emit an intrinsic that has 3 operands of the same type as its result. 397 // Depending on mode, this may be a constrained floating-point intrinsic. 398 static Value *emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, 399 const CallExpr *E, unsigned IntrinsicID, 400 unsigned ConstrainedIntrinsicID) { 401 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 402 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 403 llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); 404 405 if (CGF.Builder.getIsFPConstrained()) { 406 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType()); 407 return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1, Src2 }); 408 } else { 409 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 410 return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 }); 411 } 412 } 413 414 // Emit a simple mangled intrinsic that has 1 argument and a return type 415 // matching the argument type. 416 static Value *emitUnaryBuiltin(CodeGenFunction &CGF, 417 const CallExpr *E, 418 unsigned IntrinsicID) { 419 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 420 421 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 422 return CGF.Builder.CreateCall(F, Src0); 423 } 424 425 // Emit an intrinsic that has 2 operands of the same type as its result. 426 static Value *emitBinaryBuiltin(CodeGenFunction &CGF, 427 const CallExpr *E, 428 unsigned IntrinsicID) { 429 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 430 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 431 432 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 433 return CGF.Builder.CreateCall(F, { Src0, Src1 }); 434 } 435 436 // Emit an intrinsic that has 3 operands of the same type as its result. 437 static Value *emitTernaryBuiltin(CodeGenFunction &CGF, 438 const CallExpr *E, 439 unsigned IntrinsicID) { 440 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 441 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 442 llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); 443 444 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 445 return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 }); 446 } 447 448 // Emit an intrinsic that has 1 float or double operand, and 1 integer. 449 static Value *emitFPIntBuiltin(CodeGenFunction &CGF, 450 const CallExpr *E, 451 unsigned IntrinsicID) { 452 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 453 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 454 455 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 456 return CGF.Builder.CreateCall(F, {Src0, Src1}); 457 } 458 459 // Emit an intrinsic that has overloaded integer result and fp operand. 460 static Value * 461 emitMaybeConstrainedFPToIntRoundBuiltin(CodeGenFunction &CGF, const CallExpr *E, 462 unsigned IntrinsicID, 463 unsigned ConstrainedIntrinsicID) { 464 llvm::Type *ResultType = CGF.ConvertType(E->getType()); 465 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 466 467 if (CGF.Builder.getIsFPConstrained()) { 468 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, 469 {ResultType, Src0->getType()}); 470 return CGF.Builder.CreateConstrainedFPCall(F, {Src0}); 471 } else { 472 Function *F = 473 CGF.CGM.getIntrinsic(IntrinsicID, {ResultType, Src0->getType()}); 474 return CGF.Builder.CreateCall(F, Src0); 475 } 476 } 477 478 /// EmitFAbs - Emit a call to @llvm.fabs(). 479 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) { 480 Function *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType()); 481 llvm::CallInst *Call = CGF.Builder.CreateCall(F, V); 482 Call->setDoesNotAccessMemory(); 483 return Call; 484 } 485 486 /// Emit the computation of the sign bit for a floating point value. Returns 487 /// the i1 sign bit value. 488 static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) { 489 LLVMContext &C = CGF.CGM.getLLVMContext(); 490 491 llvm::Type *Ty = V->getType(); 492 int Width = Ty->getPrimitiveSizeInBits(); 493 llvm::Type *IntTy = llvm::IntegerType::get(C, Width); 494 V = CGF.Builder.CreateBitCast(V, IntTy); 495 if (Ty->isPPC_FP128Ty()) { 496 // We want the sign bit of the higher-order double. The bitcast we just 497 // did works as if the double-double was stored to memory and then 498 // read as an i128. The "store" will put the higher-order double in the 499 // lower address in both little- and big-Endian modes, but the "load" 500 // will treat those bits as a different part of the i128: the low bits in 501 // little-Endian, the high bits in big-Endian. Therefore, on big-Endian 502 // we need to shift the high bits down to the low before truncating. 503 Width >>= 1; 504 if (CGF.getTarget().isBigEndian()) { 505 Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width); 506 V = CGF.Builder.CreateLShr(V, ShiftCst); 507 } 508 // We are truncating value in order to extract the higher-order 509 // double, which we will be using to extract the sign from. 510 IntTy = llvm::IntegerType::get(C, Width); 511 V = CGF.Builder.CreateTrunc(V, IntTy); 512 } 513 Value *Zero = llvm::Constant::getNullValue(IntTy); 514 return CGF.Builder.CreateICmpSLT(V, Zero); 515 } 516 517 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD, 518 const CallExpr *E, llvm::Constant *calleeValue) { 519 CGCallee callee = CGCallee::forDirect(calleeValue, GlobalDecl(FD)); 520 return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot()); 521 } 522 523 /// Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.* 524 /// depending on IntrinsicID. 525 /// 526 /// \arg CGF The current codegen function. 527 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate. 528 /// \arg X The first argument to the llvm.*.with.overflow.*. 529 /// \arg Y The second argument to the llvm.*.with.overflow.*. 530 /// \arg Carry The carry returned by the llvm.*.with.overflow.*. 531 /// \returns The result (i.e. sum/product) returned by the intrinsic. 532 static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF, 533 const llvm::Intrinsic::ID IntrinsicID, 534 llvm::Value *X, llvm::Value *Y, 535 llvm::Value *&Carry) { 536 // Make sure we have integers of the same width. 537 assert(X->getType() == Y->getType() && 538 "Arguments must be the same type. (Did you forget to make sure both " 539 "arguments have the same integer width?)"); 540 541 Function *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType()); 542 llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y}); 543 Carry = CGF.Builder.CreateExtractValue(Tmp, 1); 544 return CGF.Builder.CreateExtractValue(Tmp, 0); 545 } 546 547 static Value *emitRangedBuiltin(CodeGenFunction &CGF, 548 unsigned IntrinsicID, 549 int low, int high) { 550 llvm::MDBuilder MDHelper(CGF.getLLVMContext()); 551 llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high)); 552 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {}); 553 llvm::Instruction *Call = CGF.Builder.CreateCall(F); 554 Call->setMetadata(llvm::LLVMContext::MD_range, RNode); 555 return Call; 556 } 557 558 namespace { 559 struct WidthAndSignedness { 560 unsigned Width; 561 bool Signed; 562 }; 563 } 564 565 static WidthAndSignedness 566 getIntegerWidthAndSignedness(const clang::ASTContext &context, 567 const clang::QualType Type) { 568 assert(Type->isIntegerType() && "Given type is not an integer."); 569 unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width; 570 bool Signed = Type->isSignedIntegerType(); 571 return {Width, Signed}; 572 } 573 574 // Given one or more integer types, this function produces an integer type that 575 // encompasses them: any value in one of the given types could be expressed in 576 // the encompassing type. 577 static struct WidthAndSignedness 578 EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) { 579 assert(Types.size() > 0 && "Empty list of types."); 580 581 // If any of the given types is signed, we must return a signed type. 582 bool Signed = false; 583 for (const auto &Type : Types) { 584 Signed |= Type.Signed; 585 } 586 587 // The encompassing type must have a width greater than or equal to the width 588 // of the specified types. Additionally, if the encompassing type is signed, 589 // its width must be strictly greater than the width of any unsigned types 590 // given. 591 unsigned Width = 0; 592 for (const auto &Type : Types) { 593 unsigned MinWidth = Type.Width + (Signed && !Type.Signed); 594 if (Width < MinWidth) { 595 Width = MinWidth; 596 } 597 } 598 599 return {Width, Signed}; 600 } 601 602 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) { 603 llvm::Type *DestType = Int8PtrTy; 604 if (ArgValue->getType() != DestType) 605 ArgValue = 606 Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data()); 607 608 Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend; 609 return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue); 610 } 611 612 /// Checks if using the result of __builtin_object_size(p, @p From) in place of 613 /// __builtin_object_size(p, @p To) is correct 614 static bool areBOSTypesCompatible(int From, int To) { 615 // Note: Our __builtin_object_size implementation currently treats Type=0 and 616 // Type=2 identically. Encoding this implementation detail here may make 617 // improving __builtin_object_size difficult in the future, so it's omitted. 618 return From == To || (From == 0 && To == 1) || (From == 3 && To == 2); 619 } 620 621 static llvm::Value * 622 getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) { 623 return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true); 624 } 625 626 llvm::Value * 627 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type, 628 llvm::IntegerType *ResType, 629 llvm::Value *EmittedE, 630 bool IsDynamic) { 631 uint64_t ObjectSize; 632 if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type)) 633 return emitBuiltinObjectSize(E, Type, ResType, EmittedE, IsDynamic); 634 return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true); 635 } 636 637 /// Returns a Value corresponding to the size of the given expression. 638 /// This Value may be either of the following: 639 /// - A llvm::Argument (if E is a param with the pass_object_size attribute on 640 /// it) 641 /// - A call to the @llvm.objectsize intrinsic 642 /// 643 /// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null 644 /// and we wouldn't otherwise try to reference a pass_object_size parameter, 645 /// we'll call @llvm.objectsize on EmittedE, rather than emitting E. 646 llvm::Value * 647 CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type, 648 llvm::IntegerType *ResType, 649 llvm::Value *EmittedE, bool IsDynamic) { 650 // We need to reference an argument if the pointer is a parameter with the 651 // pass_object_size attribute. 652 if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) { 653 auto *Param = dyn_cast<ParmVarDecl>(D->getDecl()); 654 auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>(); 655 if (Param != nullptr && PS != nullptr && 656 areBOSTypesCompatible(PS->getType(), Type)) { 657 auto Iter = SizeArguments.find(Param); 658 assert(Iter != SizeArguments.end()); 659 660 const ImplicitParamDecl *D = Iter->second; 661 auto DIter = LocalDeclMap.find(D); 662 assert(DIter != LocalDeclMap.end()); 663 664 return EmitLoadOfScalar(DIter->second, /*Volatile=*/false, 665 getContext().getSizeType(), E->getBeginLoc()); 666 } 667 } 668 669 // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't 670 // evaluate E for side-effects. In either case, we shouldn't lower to 671 // @llvm.objectsize. 672 if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext()))) 673 return getDefaultBuiltinObjectSizeResult(Type, ResType); 674 675 Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E); 676 assert(Ptr->getType()->isPointerTy() && 677 "Non-pointer passed to __builtin_object_size?"); 678 679 Function *F = 680 CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()}); 681 682 // LLVM only supports 0 and 2, make sure that we pass along that as a boolean. 683 Value *Min = Builder.getInt1((Type & 2) != 0); 684 // For GCC compatibility, __builtin_object_size treat NULL as unknown size. 685 Value *NullIsUnknown = Builder.getTrue(); 686 Value *Dynamic = Builder.getInt1(IsDynamic); 687 return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown, Dynamic}); 688 } 689 690 namespace { 691 /// A struct to generically describe a bit test intrinsic. 692 struct BitTest { 693 enum ActionKind : uint8_t { TestOnly, Complement, Reset, Set }; 694 enum InterlockingKind : uint8_t { 695 Unlocked, 696 Sequential, 697 Acquire, 698 Release, 699 NoFence 700 }; 701 702 ActionKind Action; 703 InterlockingKind Interlocking; 704 bool Is64Bit; 705 706 static BitTest decodeBitTestBuiltin(unsigned BuiltinID); 707 }; 708 } // namespace 709 710 BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) { 711 switch (BuiltinID) { 712 // Main portable variants. 713 case Builtin::BI_bittest: 714 return {TestOnly, Unlocked, false}; 715 case Builtin::BI_bittestandcomplement: 716 return {Complement, Unlocked, false}; 717 case Builtin::BI_bittestandreset: 718 return {Reset, Unlocked, false}; 719 case Builtin::BI_bittestandset: 720 return {Set, Unlocked, false}; 721 case Builtin::BI_interlockedbittestandreset: 722 return {Reset, Sequential, false}; 723 case Builtin::BI_interlockedbittestandset: 724 return {Set, Sequential, false}; 725 726 // X86-specific 64-bit variants. 727 case Builtin::BI_bittest64: 728 return {TestOnly, Unlocked, true}; 729 case Builtin::BI_bittestandcomplement64: 730 return {Complement, Unlocked, true}; 731 case Builtin::BI_bittestandreset64: 732 return {Reset, Unlocked, true}; 733 case Builtin::BI_bittestandset64: 734 return {Set, Unlocked, true}; 735 case Builtin::BI_interlockedbittestandreset64: 736 return {Reset, Sequential, true}; 737 case Builtin::BI_interlockedbittestandset64: 738 return {Set, Sequential, true}; 739 740 // ARM/AArch64-specific ordering variants. 741 case Builtin::BI_interlockedbittestandset_acq: 742 return {Set, Acquire, false}; 743 case Builtin::BI_interlockedbittestandset_rel: 744 return {Set, Release, false}; 745 case Builtin::BI_interlockedbittestandset_nf: 746 return {Set, NoFence, false}; 747 case Builtin::BI_interlockedbittestandreset_acq: 748 return {Reset, Acquire, false}; 749 case Builtin::BI_interlockedbittestandreset_rel: 750 return {Reset, Release, false}; 751 case Builtin::BI_interlockedbittestandreset_nf: 752 return {Reset, NoFence, false}; 753 } 754 llvm_unreachable("expected only bittest intrinsics"); 755 } 756 757 static char bitActionToX86BTCode(BitTest::ActionKind A) { 758 switch (A) { 759 case BitTest::TestOnly: return '\0'; 760 case BitTest::Complement: return 'c'; 761 case BitTest::Reset: return 'r'; 762 case BitTest::Set: return 's'; 763 } 764 llvm_unreachable("invalid action"); 765 } 766 767 static llvm::Value *EmitX86BitTestIntrinsic(CodeGenFunction &CGF, 768 BitTest BT, 769 const CallExpr *E, Value *BitBase, 770 Value *BitPos) { 771 char Action = bitActionToX86BTCode(BT.Action); 772 char SizeSuffix = BT.Is64Bit ? 'q' : 'l'; 773 774 // Build the assembly. 775 SmallString<64> Asm; 776 raw_svector_ostream AsmOS(Asm); 777 if (BT.Interlocking != BitTest::Unlocked) 778 AsmOS << "lock "; 779 AsmOS << "bt"; 780 if (Action) 781 AsmOS << Action; 782 AsmOS << SizeSuffix << " $2, ($1)\n\tsetc ${0:b}"; 783 784 // Build the constraints. FIXME: We should support immediates when possible. 785 std::string Constraints = "=r,r,r,~{cc},~{flags},~{fpsr}"; 786 llvm::IntegerType *IntType = llvm::IntegerType::get( 787 CGF.getLLVMContext(), 788 CGF.getContext().getTypeSize(E->getArg(1)->getType())); 789 llvm::Type *IntPtrType = IntType->getPointerTo(); 790 llvm::FunctionType *FTy = 791 llvm::FunctionType::get(CGF.Int8Ty, {IntPtrType, IntType}, false); 792 793 llvm::InlineAsm *IA = 794 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true); 795 return CGF.Builder.CreateCall(IA, {BitBase, BitPos}); 796 } 797 798 static llvm::AtomicOrdering 799 getBitTestAtomicOrdering(BitTest::InterlockingKind I) { 800 switch (I) { 801 case BitTest::Unlocked: return llvm::AtomicOrdering::NotAtomic; 802 case BitTest::Sequential: return llvm::AtomicOrdering::SequentiallyConsistent; 803 case BitTest::Acquire: return llvm::AtomicOrdering::Acquire; 804 case BitTest::Release: return llvm::AtomicOrdering::Release; 805 case BitTest::NoFence: return llvm::AtomicOrdering::Monotonic; 806 } 807 llvm_unreachable("invalid interlocking"); 808 } 809 810 /// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of 811 /// bits and a bit position and read and optionally modify the bit at that 812 /// position. The position index can be arbitrarily large, i.e. it can be larger 813 /// than 31 or 63, so we need an indexed load in the general case. 814 static llvm::Value *EmitBitTestIntrinsic(CodeGenFunction &CGF, 815 unsigned BuiltinID, 816 const CallExpr *E) { 817 Value *BitBase = CGF.EmitScalarExpr(E->getArg(0)); 818 Value *BitPos = CGF.EmitScalarExpr(E->getArg(1)); 819 820 BitTest BT = BitTest::decodeBitTestBuiltin(BuiltinID); 821 822 // X86 has special BT, BTC, BTR, and BTS instructions that handle the array 823 // indexing operation internally. Use them if possible. 824 if (CGF.getTarget().getTriple().isX86()) 825 return EmitX86BitTestIntrinsic(CGF, BT, E, BitBase, BitPos); 826 827 // Otherwise, use generic code to load one byte and test the bit. Use all but 828 // the bottom three bits as the array index, and the bottom three bits to form 829 // a mask. 830 // Bit = BitBaseI8[BitPos >> 3] & (1 << (BitPos & 0x7)) != 0; 831 Value *ByteIndex = CGF.Builder.CreateAShr( 832 BitPos, llvm::ConstantInt::get(BitPos->getType(), 3), "bittest.byteidx"); 833 Value *BitBaseI8 = CGF.Builder.CreatePointerCast(BitBase, CGF.Int8PtrTy); 834 Address ByteAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, BitBaseI8, 835 ByteIndex, "bittest.byteaddr"), 836 CharUnits::One()); 837 Value *PosLow = 838 CGF.Builder.CreateAnd(CGF.Builder.CreateTrunc(BitPos, CGF.Int8Ty), 839 llvm::ConstantInt::get(CGF.Int8Ty, 0x7)); 840 841 // The updating instructions will need a mask. 842 Value *Mask = nullptr; 843 if (BT.Action != BitTest::TestOnly) { 844 Mask = CGF.Builder.CreateShl(llvm::ConstantInt::get(CGF.Int8Ty, 1), PosLow, 845 "bittest.mask"); 846 } 847 848 // Check the action and ordering of the interlocked intrinsics. 849 llvm::AtomicOrdering Ordering = getBitTestAtomicOrdering(BT.Interlocking); 850 851 Value *OldByte = nullptr; 852 if (Ordering != llvm::AtomicOrdering::NotAtomic) { 853 // Emit a combined atomicrmw load/store operation for the interlocked 854 // intrinsics. 855 llvm::AtomicRMWInst::BinOp RMWOp = llvm::AtomicRMWInst::Or; 856 if (BT.Action == BitTest::Reset) { 857 Mask = CGF.Builder.CreateNot(Mask); 858 RMWOp = llvm::AtomicRMWInst::And; 859 } 860 OldByte = CGF.Builder.CreateAtomicRMW(RMWOp, ByteAddr.getPointer(), Mask, 861 Ordering); 862 } else { 863 // Emit a plain load for the non-interlocked intrinsics. 864 OldByte = CGF.Builder.CreateLoad(ByteAddr, "bittest.byte"); 865 Value *NewByte = nullptr; 866 switch (BT.Action) { 867 case BitTest::TestOnly: 868 // Don't store anything. 869 break; 870 case BitTest::Complement: 871 NewByte = CGF.Builder.CreateXor(OldByte, Mask); 872 break; 873 case BitTest::Reset: 874 NewByte = CGF.Builder.CreateAnd(OldByte, CGF.Builder.CreateNot(Mask)); 875 break; 876 case BitTest::Set: 877 NewByte = CGF.Builder.CreateOr(OldByte, Mask); 878 break; 879 } 880 if (NewByte) 881 CGF.Builder.CreateStore(NewByte, ByteAddr); 882 } 883 884 // However we loaded the old byte, either by plain load or atomicrmw, shift 885 // the bit into the low position and mask it to 0 or 1. 886 Value *ShiftedByte = CGF.Builder.CreateLShr(OldByte, PosLow, "bittest.shr"); 887 return CGF.Builder.CreateAnd( 888 ShiftedByte, llvm::ConstantInt::get(CGF.Int8Ty, 1), "bittest.res"); 889 } 890 891 namespace { 892 enum class MSVCSetJmpKind { 893 _setjmpex, 894 _setjmp3, 895 _setjmp 896 }; 897 } 898 899 /// MSVC handles setjmp a bit differently on different platforms. On every 900 /// architecture except 32-bit x86, the frame address is passed. On x86, extra 901 /// parameters can be passed as variadic arguments, but we always pass none. 902 static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind, 903 const CallExpr *E) { 904 llvm::Value *Arg1 = nullptr; 905 llvm::Type *Arg1Ty = nullptr; 906 StringRef Name; 907 bool IsVarArg = false; 908 if (SJKind == MSVCSetJmpKind::_setjmp3) { 909 Name = "_setjmp3"; 910 Arg1Ty = CGF.Int32Ty; 911 Arg1 = llvm::ConstantInt::get(CGF.IntTy, 0); 912 IsVarArg = true; 913 } else { 914 Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp" : "_setjmpex"; 915 Arg1Ty = CGF.Int8PtrTy; 916 if (CGF.getTarget().getTriple().getArch() == llvm::Triple::aarch64) { 917 Arg1 = CGF.Builder.CreateCall( 918 CGF.CGM.getIntrinsic(Intrinsic::sponentry, CGF.AllocaInt8PtrTy)); 919 } else 920 Arg1 = CGF.Builder.CreateCall( 921 CGF.CGM.getIntrinsic(Intrinsic::frameaddress, CGF.AllocaInt8PtrTy), 922 llvm::ConstantInt::get(CGF.Int32Ty, 0)); 923 } 924 925 // Mark the call site and declaration with ReturnsTwice. 926 llvm::Type *ArgTypes[2] = {CGF.Int8PtrTy, Arg1Ty}; 927 llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get( 928 CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex, 929 llvm::Attribute::ReturnsTwice); 930 llvm::FunctionCallee SetJmpFn = CGF.CGM.CreateRuntimeFunction( 931 llvm::FunctionType::get(CGF.IntTy, ArgTypes, IsVarArg), Name, 932 ReturnsTwiceAttr, /*Local=*/true); 933 934 llvm::Value *Buf = CGF.Builder.CreateBitOrPointerCast( 935 CGF.EmitScalarExpr(E->getArg(0)), CGF.Int8PtrTy); 936 llvm::Value *Args[] = {Buf, Arg1}; 937 llvm::CallBase *CB = CGF.EmitRuntimeCallOrInvoke(SetJmpFn, Args); 938 CB->setAttributes(ReturnsTwiceAttr); 939 return RValue::get(CB); 940 } 941 942 // Many of MSVC builtins are on x64, ARM and AArch64; to avoid repeating code, 943 // we handle them here. 944 enum class CodeGenFunction::MSVCIntrin { 945 _BitScanForward, 946 _BitScanReverse, 947 _InterlockedAnd, 948 _InterlockedDecrement, 949 _InterlockedExchange, 950 _InterlockedExchangeAdd, 951 _InterlockedExchangeSub, 952 _InterlockedIncrement, 953 _InterlockedOr, 954 _InterlockedXor, 955 _InterlockedExchangeAdd_acq, 956 _InterlockedExchangeAdd_rel, 957 _InterlockedExchangeAdd_nf, 958 _InterlockedExchange_acq, 959 _InterlockedExchange_rel, 960 _InterlockedExchange_nf, 961 _InterlockedCompareExchange_acq, 962 _InterlockedCompareExchange_rel, 963 _InterlockedCompareExchange_nf, 964 _InterlockedOr_acq, 965 _InterlockedOr_rel, 966 _InterlockedOr_nf, 967 _InterlockedXor_acq, 968 _InterlockedXor_rel, 969 _InterlockedXor_nf, 970 _InterlockedAnd_acq, 971 _InterlockedAnd_rel, 972 _InterlockedAnd_nf, 973 _InterlockedIncrement_acq, 974 _InterlockedIncrement_rel, 975 _InterlockedIncrement_nf, 976 _InterlockedDecrement_acq, 977 _InterlockedDecrement_rel, 978 _InterlockedDecrement_nf, 979 __fastfail, 980 }; 981 982 Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, 983 const CallExpr *E) { 984 switch (BuiltinID) { 985 case MSVCIntrin::_BitScanForward: 986 case MSVCIntrin::_BitScanReverse: { 987 Value *ArgValue = EmitScalarExpr(E->getArg(1)); 988 989 llvm::Type *ArgType = ArgValue->getType(); 990 llvm::Type *IndexType = 991 EmitScalarExpr(E->getArg(0))->getType()->getPointerElementType(); 992 llvm::Type *ResultType = ConvertType(E->getType()); 993 994 Value *ArgZero = llvm::Constant::getNullValue(ArgType); 995 Value *ResZero = llvm::Constant::getNullValue(ResultType); 996 Value *ResOne = llvm::ConstantInt::get(ResultType, 1); 997 998 BasicBlock *Begin = Builder.GetInsertBlock(); 999 BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn); 1000 Builder.SetInsertPoint(End); 1001 PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result"); 1002 1003 Builder.SetInsertPoint(Begin); 1004 Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero); 1005 BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn); 1006 Builder.CreateCondBr(IsZero, End, NotZero); 1007 Result->addIncoming(ResZero, Begin); 1008 1009 Builder.SetInsertPoint(NotZero); 1010 Address IndexAddress = EmitPointerWithAlignment(E->getArg(0)); 1011 1012 if (BuiltinID == MSVCIntrin::_BitScanForward) { 1013 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 1014 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()}); 1015 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false); 1016 Builder.CreateStore(ZeroCount, IndexAddress, false); 1017 } else { 1018 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth(); 1019 Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1); 1020 1021 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); 1022 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()}); 1023 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false); 1024 Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount); 1025 Builder.CreateStore(Index, IndexAddress, false); 1026 } 1027 Builder.CreateBr(End); 1028 Result->addIncoming(ResOne, NotZero); 1029 1030 Builder.SetInsertPoint(End); 1031 return Result; 1032 } 1033 case MSVCIntrin::_InterlockedAnd: 1034 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E); 1035 case MSVCIntrin::_InterlockedExchange: 1036 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E); 1037 case MSVCIntrin::_InterlockedExchangeAdd: 1038 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E); 1039 case MSVCIntrin::_InterlockedExchangeSub: 1040 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E); 1041 case MSVCIntrin::_InterlockedOr: 1042 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E); 1043 case MSVCIntrin::_InterlockedXor: 1044 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E); 1045 case MSVCIntrin::_InterlockedExchangeAdd_acq: 1046 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E, 1047 AtomicOrdering::Acquire); 1048 case MSVCIntrin::_InterlockedExchangeAdd_rel: 1049 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E, 1050 AtomicOrdering::Release); 1051 case MSVCIntrin::_InterlockedExchangeAdd_nf: 1052 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E, 1053 AtomicOrdering::Monotonic); 1054 case MSVCIntrin::_InterlockedExchange_acq: 1055 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E, 1056 AtomicOrdering::Acquire); 1057 case MSVCIntrin::_InterlockedExchange_rel: 1058 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E, 1059 AtomicOrdering::Release); 1060 case MSVCIntrin::_InterlockedExchange_nf: 1061 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E, 1062 AtomicOrdering::Monotonic); 1063 case MSVCIntrin::_InterlockedCompareExchange_acq: 1064 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Acquire); 1065 case MSVCIntrin::_InterlockedCompareExchange_rel: 1066 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Release); 1067 case MSVCIntrin::_InterlockedCompareExchange_nf: 1068 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Monotonic); 1069 case MSVCIntrin::_InterlockedOr_acq: 1070 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E, 1071 AtomicOrdering::Acquire); 1072 case MSVCIntrin::_InterlockedOr_rel: 1073 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E, 1074 AtomicOrdering::Release); 1075 case MSVCIntrin::_InterlockedOr_nf: 1076 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E, 1077 AtomicOrdering::Monotonic); 1078 case MSVCIntrin::_InterlockedXor_acq: 1079 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E, 1080 AtomicOrdering::Acquire); 1081 case MSVCIntrin::_InterlockedXor_rel: 1082 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E, 1083 AtomicOrdering::Release); 1084 case MSVCIntrin::_InterlockedXor_nf: 1085 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E, 1086 AtomicOrdering::Monotonic); 1087 case MSVCIntrin::_InterlockedAnd_acq: 1088 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E, 1089 AtomicOrdering::Acquire); 1090 case MSVCIntrin::_InterlockedAnd_rel: 1091 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E, 1092 AtomicOrdering::Release); 1093 case MSVCIntrin::_InterlockedAnd_nf: 1094 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E, 1095 AtomicOrdering::Monotonic); 1096 case MSVCIntrin::_InterlockedIncrement_acq: 1097 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Acquire); 1098 case MSVCIntrin::_InterlockedIncrement_rel: 1099 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Release); 1100 case MSVCIntrin::_InterlockedIncrement_nf: 1101 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Monotonic); 1102 case MSVCIntrin::_InterlockedDecrement_acq: 1103 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Acquire); 1104 case MSVCIntrin::_InterlockedDecrement_rel: 1105 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Release); 1106 case MSVCIntrin::_InterlockedDecrement_nf: 1107 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Monotonic); 1108 1109 case MSVCIntrin::_InterlockedDecrement: 1110 return EmitAtomicDecrementValue(*this, E); 1111 case MSVCIntrin::_InterlockedIncrement: 1112 return EmitAtomicIncrementValue(*this, E); 1113 1114 case MSVCIntrin::__fastfail: { 1115 // Request immediate process termination from the kernel. The instruction 1116 // sequences to do this are documented on MSDN: 1117 // https://msdn.microsoft.com/en-us/library/dn774154.aspx 1118 llvm::Triple::ArchType ISA = getTarget().getTriple().getArch(); 1119 StringRef Asm, Constraints; 1120 switch (ISA) { 1121 default: 1122 ErrorUnsupported(E, "__fastfail call for this architecture"); 1123 break; 1124 case llvm::Triple::x86: 1125 case llvm::Triple::x86_64: 1126 Asm = "int $$0x29"; 1127 Constraints = "{cx}"; 1128 break; 1129 case llvm::Triple::thumb: 1130 Asm = "udf #251"; 1131 Constraints = "{r0}"; 1132 break; 1133 case llvm::Triple::aarch64: 1134 Asm = "brk #0xF003"; 1135 Constraints = "{w0}"; 1136 } 1137 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false); 1138 llvm::InlineAsm *IA = 1139 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true); 1140 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get( 1141 getLLVMContext(), llvm::AttributeList::FunctionIndex, 1142 llvm::Attribute::NoReturn); 1143 llvm::CallInst *CI = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0))); 1144 CI->setAttributes(NoReturnAttr); 1145 return CI; 1146 } 1147 } 1148 llvm_unreachable("Incorrect MSVC intrinsic!"); 1149 } 1150 1151 namespace { 1152 // ARC cleanup for __builtin_os_log_format 1153 struct CallObjCArcUse final : EHScopeStack::Cleanup { 1154 CallObjCArcUse(llvm::Value *object) : object(object) {} 1155 llvm::Value *object; 1156 1157 void Emit(CodeGenFunction &CGF, Flags flags) override { 1158 CGF.EmitARCIntrinsicUse(object); 1159 } 1160 }; 1161 } 1162 1163 Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E, 1164 BuiltinCheckKind Kind) { 1165 assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero) 1166 && "Unsupported builtin check kind"); 1167 1168 Value *ArgValue = EmitScalarExpr(E); 1169 if (!SanOpts.has(SanitizerKind::Builtin) || !getTarget().isCLZForZeroUndef()) 1170 return ArgValue; 1171 1172 SanitizerScope SanScope(this); 1173 Value *Cond = Builder.CreateICmpNE( 1174 ArgValue, llvm::Constant::getNullValue(ArgValue->getType())); 1175 EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin), 1176 SanitizerHandler::InvalidBuiltin, 1177 {EmitCheckSourceLocation(E->getExprLoc()), 1178 llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)}, 1179 None); 1180 return ArgValue; 1181 } 1182 1183 /// Get the argument type for arguments to os_log_helper. 1184 static CanQualType getOSLogArgType(ASTContext &C, int Size) { 1185 QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false); 1186 return C.getCanonicalType(UnsignedTy); 1187 } 1188 1189 llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction( 1190 const analyze_os_log::OSLogBufferLayout &Layout, 1191 CharUnits BufferAlignment) { 1192 ASTContext &Ctx = getContext(); 1193 1194 llvm::SmallString<64> Name; 1195 { 1196 raw_svector_ostream OS(Name); 1197 OS << "__os_log_helper"; 1198 OS << "_" << BufferAlignment.getQuantity(); 1199 OS << "_" << int(Layout.getSummaryByte()); 1200 OS << "_" << int(Layout.getNumArgsByte()); 1201 for (const auto &Item : Layout.Items) 1202 OS << "_" << int(Item.getSizeByte()) << "_" 1203 << int(Item.getDescriptorByte()); 1204 } 1205 1206 if (llvm::Function *F = CGM.getModule().getFunction(Name)) 1207 return F; 1208 1209 llvm::SmallVector<QualType, 4> ArgTys; 1210 FunctionArgList Args; 1211 Args.push_back(ImplicitParamDecl::Create( 1212 Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"), Ctx.VoidPtrTy, 1213 ImplicitParamDecl::Other)); 1214 ArgTys.emplace_back(Ctx.VoidPtrTy); 1215 1216 for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) { 1217 char Size = Layout.Items[I].getSizeByte(); 1218 if (!Size) 1219 continue; 1220 1221 QualType ArgTy = getOSLogArgType(Ctx, Size); 1222 Args.push_back(ImplicitParamDecl::Create( 1223 Ctx, nullptr, SourceLocation(), 1224 &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), ArgTy, 1225 ImplicitParamDecl::Other)); 1226 ArgTys.emplace_back(ArgTy); 1227 } 1228 1229 QualType ReturnTy = Ctx.VoidTy; 1230 QualType FuncionTy = Ctx.getFunctionType(ReturnTy, ArgTys, {}); 1231 1232 // The helper function has linkonce_odr linkage to enable the linker to merge 1233 // identical functions. To ensure the merging always happens, 'noinline' is 1234 // attached to the function when compiling with -Oz. 1235 const CGFunctionInfo &FI = 1236 CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, Args); 1237 llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI); 1238 llvm::Function *Fn = llvm::Function::Create( 1239 FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule()); 1240 Fn->setVisibility(llvm::GlobalValue::HiddenVisibility); 1241 CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn); 1242 CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn); 1243 Fn->setDoesNotThrow(); 1244 1245 // Attach 'noinline' at -Oz. 1246 if (CGM.getCodeGenOpts().OptimizeSize == 2) 1247 Fn->addFnAttr(llvm::Attribute::NoInline); 1248 1249 auto NL = ApplyDebugLocation::CreateEmpty(*this); 1250 IdentifierInfo *II = &Ctx.Idents.get(Name); 1251 FunctionDecl *FD = FunctionDecl::Create( 1252 Ctx, Ctx.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II, 1253 FuncionTy, nullptr, SC_PrivateExtern, false, false); 1254 1255 StartFunction(FD, ReturnTy, Fn, FI, Args); 1256 1257 // Create a scope with an artificial location for the body of this function. 1258 auto AL = ApplyDebugLocation::CreateArtificial(*this); 1259 1260 CharUnits Offset; 1261 Address BufAddr(Builder.CreateLoad(GetAddrOfLocalVar(Args[0]), "buf"), 1262 BufferAlignment); 1263 Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()), 1264 Builder.CreateConstByteGEP(BufAddr, Offset++, "summary")); 1265 Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()), 1266 Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs")); 1267 1268 unsigned I = 1; 1269 for (const auto &Item : Layout.Items) { 1270 Builder.CreateStore( 1271 Builder.getInt8(Item.getDescriptorByte()), 1272 Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor")); 1273 Builder.CreateStore( 1274 Builder.getInt8(Item.getSizeByte()), 1275 Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize")); 1276 1277 CharUnits Size = Item.size(); 1278 if (!Size.getQuantity()) 1279 continue; 1280 1281 Address Arg = GetAddrOfLocalVar(Args[I]); 1282 Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData"); 1283 Addr = Builder.CreateBitCast(Addr, Arg.getPointer()->getType(), 1284 "argDataCast"); 1285 Builder.CreateStore(Builder.CreateLoad(Arg), Addr); 1286 Offset += Size; 1287 ++I; 1288 } 1289 1290 FinishFunction(); 1291 1292 return Fn; 1293 } 1294 1295 RValue CodeGenFunction::emitBuiltinOSLogFormat(const CallExpr &E) { 1296 assert(E.getNumArgs() >= 2 && 1297 "__builtin_os_log_format takes at least 2 arguments"); 1298 ASTContext &Ctx = getContext(); 1299 analyze_os_log::OSLogBufferLayout Layout; 1300 analyze_os_log::computeOSLogBufferLayout(Ctx, &E, Layout); 1301 Address BufAddr = EmitPointerWithAlignment(E.getArg(0)); 1302 llvm::SmallVector<llvm::Value *, 4> RetainableOperands; 1303 1304 // Ignore argument 1, the format string. It is not currently used. 1305 CallArgList Args; 1306 Args.add(RValue::get(BufAddr.getPointer()), Ctx.VoidPtrTy); 1307 1308 for (const auto &Item : Layout.Items) { 1309 int Size = Item.getSizeByte(); 1310 if (!Size) 1311 continue; 1312 1313 llvm::Value *ArgVal; 1314 1315 if (Item.getKind() == analyze_os_log::OSLogBufferItem::MaskKind) { 1316 uint64_t Val = 0; 1317 for (unsigned I = 0, E = Item.getMaskType().size(); I < E; ++I) 1318 Val |= ((uint64_t)Item.getMaskType()[I]) << I * 8; 1319 ArgVal = llvm::Constant::getIntegerValue(Int64Ty, llvm::APInt(64, Val)); 1320 } else if (const Expr *TheExpr = Item.getExpr()) { 1321 ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false); 1322 1323 // If this is a retainable type, push a lifetime-extended cleanup to 1324 // ensure the lifetime of the argument is extended to the end of the 1325 // enclosing block scope. 1326 // FIXME: We only have to do this if the argument is a temporary, which 1327 // gets released after the full expression. 1328 if (TheExpr->getType()->isObjCRetainableType() && 1329 getLangOpts().ObjCAutoRefCount) { 1330 assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar && 1331 "Only scalar can be a ObjC retainable type"); 1332 if (!isa<Constant>(ArgVal)) { 1333 CleanupKind Cleanup = getARCCleanupKind(); 1334 QualType Ty = TheExpr->getType(); 1335 Address Alloca = Address::invalid(); 1336 Address Addr = CreateMemTemp(Ty, "os.log.arg", &Alloca); 1337 ArgVal = EmitARCRetain(Ty, ArgVal); 1338 Builder.CreateStore(ArgVal, Addr); 1339 pushLifetimeExtendedDestroy(Cleanup, Alloca, Ty, 1340 CodeGenFunction::destroyARCStrongPrecise, 1341 Cleanup & EHCleanup); 1342 1343 // Push a clang.arc.use call to ensure ARC optimizer knows that the 1344 // argument has to be alive. 1345 if (CGM.getCodeGenOpts().OptimizationLevel != 0) 1346 pushCleanupAfterFullExpr<CallObjCArcUse>(Cleanup, ArgVal); 1347 } 1348 } 1349 } else { 1350 ArgVal = Builder.getInt32(Item.getConstValue().getQuantity()); 1351 } 1352 1353 unsigned ArgValSize = 1354 CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType()); 1355 llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(), 1356 ArgValSize); 1357 ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy); 1358 CanQualType ArgTy = getOSLogArgType(Ctx, Size); 1359 // If ArgVal has type x86_fp80, zero-extend ArgVal. 1360 ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy)); 1361 Args.add(RValue::get(ArgVal), ArgTy); 1362 } 1363 1364 const CGFunctionInfo &FI = 1365 CGM.getTypes().arrangeBuiltinFunctionCall(Ctx.VoidTy, Args); 1366 llvm::Function *F = CodeGenFunction(CGM).generateBuiltinOSLogHelperFunction( 1367 Layout, BufAddr.getAlignment()); 1368 EmitCall(FI, CGCallee::forDirect(F), ReturnValueSlot(), Args); 1369 return RValue::get(BufAddr.getPointer()); 1370 } 1371 1372 /// Determine if a binop is a checked mixed-sign multiply we can specialize. 1373 static bool isSpecialMixedSignMultiply(unsigned BuiltinID, 1374 WidthAndSignedness Op1Info, 1375 WidthAndSignedness Op2Info, 1376 WidthAndSignedness ResultInfo) { 1377 return BuiltinID == Builtin::BI__builtin_mul_overflow && 1378 std::max(Op1Info.Width, Op2Info.Width) >= ResultInfo.Width && 1379 Op1Info.Signed != Op2Info.Signed; 1380 } 1381 1382 /// Emit a checked mixed-sign multiply. This is a cheaper specialization of 1383 /// the generic checked-binop irgen. 1384 static RValue 1385 EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1, 1386 WidthAndSignedness Op1Info, const clang::Expr *Op2, 1387 WidthAndSignedness Op2Info, 1388 const clang::Expr *ResultArg, QualType ResultQTy, 1389 WidthAndSignedness ResultInfo) { 1390 assert(isSpecialMixedSignMultiply(Builtin::BI__builtin_mul_overflow, Op1Info, 1391 Op2Info, ResultInfo) && 1392 "Not a mixed-sign multipliction we can specialize"); 1393 1394 // Emit the signed and unsigned operands. 1395 const clang::Expr *SignedOp = Op1Info.Signed ? Op1 : Op2; 1396 const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1; 1397 llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp); 1398 llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp); 1399 unsigned SignedOpWidth = Op1Info.Signed ? Op1Info.Width : Op2Info.Width; 1400 unsigned UnsignedOpWidth = Op1Info.Signed ? Op2Info.Width : Op1Info.Width; 1401 1402 // One of the operands may be smaller than the other. If so, [s|z]ext it. 1403 if (SignedOpWidth < UnsignedOpWidth) 1404 Signed = CGF.Builder.CreateSExt(Signed, Unsigned->getType(), "op.sext"); 1405 if (UnsignedOpWidth < SignedOpWidth) 1406 Unsigned = CGF.Builder.CreateZExt(Unsigned, Signed->getType(), "op.zext"); 1407 1408 llvm::Type *OpTy = Signed->getType(); 1409 llvm::Value *Zero = llvm::Constant::getNullValue(OpTy); 1410 Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg); 1411 llvm::Type *ResTy = ResultPtr.getElementType(); 1412 unsigned OpWidth = std::max(Op1Info.Width, Op2Info.Width); 1413 1414 // Take the absolute value of the signed operand. 1415 llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero); 1416 llvm::Value *AbsOfNegative = CGF.Builder.CreateSub(Zero, Signed); 1417 llvm::Value *AbsSigned = 1418 CGF.Builder.CreateSelect(IsNegative, AbsOfNegative, Signed); 1419 1420 // Perform a checked unsigned multiplication. 1421 llvm::Value *UnsignedOverflow; 1422 llvm::Value *UnsignedResult = 1423 EmitOverflowIntrinsic(CGF, llvm::Intrinsic::umul_with_overflow, AbsSigned, 1424 Unsigned, UnsignedOverflow); 1425 1426 llvm::Value *Overflow, *Result; 1427 if (ResultInfo.Signed) { 1428 // Signed overflow occurs if the result is greater than INT_MAX or lesser 1429 // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative). 1430 auto IntMax = 1431 llvm::APInt::getSignedMaxValue(ResultInfo.Width).zextOrSelf(OpWidth); 1432 llvm::Value *MaxResult = 1433 CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax), 1434 CGF.Builder.CreateZExt(IsNegative, OpTy)); 1435 llvm::Value *SignedOverflow = 1436 CGF.Builder.CreateICmpUGT(UnsignedResult, MaxResult); 1437 Overflow = CGF.Builder.CreateOr(UnsignedOverflow, SignedOverflow); 1438 1439 // Prepare the signed result (possibly by negating it). 1440 llvm::Value *NegativeResult = CGF.Builder.CreateNeg(UnsignedResult); 1441 llvm::Value *SignedResult = 1442 CGF.Builder.CreateSelect(IsNegative, NegativeResult, UnsignedResult); 1443 Result = CGF.Builder.CreateTrunc(SignedResult, ResTy); 1444 } else { 1445 // Unsigned overflow occurs if the result is < 0 or greater than UINT_MAX. 1446 llvm::Value *Underflow = CGF.Builder.CreateAnd( 1447 IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult)); 1448 Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow); 1449 if (ResultInfo.Width < OpWidth) { 1450 auto IntMax = 1451 llvm::APInt::getMaxValue(ResultInfo.Width).zext(OpWidth); 1452 llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT( 1453 UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax)); 1454 Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow); 1455 } 1456 1457 // Negate the product if it would be negative in infinite precision. 1458 Result = CGF.Builder.CreateSelect( 1459 IsNegative, CGF.Builder.CreateNeg(UnsignedResult), UnsignedResult); 1460 1461 Result = CGF.Builder.CreateTrunc(Result, ResTy); 1462 } 1463 assert(Overflow && Result && "Missing overflow or result"); 1464 1465 bool isVolatile = 1466 ResultArg->getType()->getPointeeType().isVolatileQualified(); 1467 CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr, 1468 isVolatile); 1469 return RValue::get(Overflow); 1470 } 1471 1472 static llvm::Value *dumpRecord(CodeGenFunction &CGF, QualType RType, 1473 Value *&RecordPtr, CharUnits Align, 1474 llvm::FunctionCallee Func, int Lvl) { 1475 ASTContext &Context = CGF.getContext(); 1476 RecordDecl *RD = RType->castAs<RecordType>()->getDecl()->getDefinition(); 1477 std::string Pad = std::string(Lvl * 4, ' '); 1478 1479 Value *GString = 1480 CGF.Builder.CreateGlobalStringPtr(RType.getAsString() + " {\n"); 1481 Value *Res = CGF.Builder.CreateCall(Func, {GString}); 1482 1483 static llvm::DenseMap<QualType, const char *> Types; 1484 if (Types.empty()) { 1485 Types[Context.CharTy] = "%c"; 1486 Types[Context.BoolTy] = "%d"; 1487 Types[Context.SignedCharTy] = "%hhd"; 1488 Types[Context.UnsignedCharTy] = "%hhu"; 1489 Types[Context.IntTy] = "%d"; 1490 Types[Context.UnsignedIntTy] = "%u"; 1491 Types[Context.LongTy] = "%ld"; 1492 Types[Context.UnsignedLongTy] = "%lu"; 1493 Types[Context.LongLongTy] = "%lld"; 1494 Types[Context.UnsignedLongLongTy] = "%llu"; 1495 Types[Context.ShortTy] = "%hd"; 1496 Types[Context.UnsignedShortTy] = "%hu"; 1497 Types[Context.VoidPtrTy] = "%p"; 1498 Types[Context.FloatTy] = "%f"; 1499 Types[Context.DoubleTy] = "%f"; 1500 Types[Context.LongDoubleTy] = "%Lf"; 1501 Types[Context.getPointerType(Context.CharTy)] = "%s"; 1502 Types[Context.getPointerType(Context.getConstType(Context.CharTy))] = "%s"; 1503 } 1504 1505 for (const auto *FD : RD->fields()) { 1506 Value *FieldPtr = RecordPtr; 1507 if (RD->isUnion()) 1508 FieldPtr = CGF.Builder.CreatePointerCast( 1509 FieldPtr, CGF.ConvertType(Context.getPointerType(FD->getType()))); 1510 else 1511 FieldPtr = CGF.Builder.CreateStructGEP(CGF.ConvertType(RType), FieldPtr, 1512 FD->getFieldIndex()); 1513 1514 GString = CGF.Builder.CreateGlobalStringPtr( 1515 llvm::Twine(Pad) 1516 .concat(FD->getType().getAsString()) 1517 .concat(llvm::Twine(' ')) 1518 .concat(FD->getNameAsString()) 1519 .concat(" : ") 1520 .str()); 1521 Value *TmpRes = CGF.Builder.CreateCall(Func, {GString}); 1522 Res = CGF.Builder.CreateAdd(Res, TmpRes); 1523 1524 QualType CanonicalType = 1525 FD->getType().getUnqualifiedType().getCanonicalType(); 1526 1527 // We check whether we are in a recursive type 1528 if (CanonicalType->isRecordType()) { 1529 TmpRes = dumpRecord(CGF, CanonicalType, FieldPtr, Align, Func, Lvl + 1); 1530 Res = CGF.Builder.CreateAdd(TmpRes, Res); 1531 continue; 1532 } 1533 1534 // We try to determine the best format to print the current field 1535 llvm::Twine Format = Types.find(CanonicalType) == Types.end() 1536 ? Types[Context.VoidPtrTy] 1537 : Types[CanonicalType]; 1538 1539 Address FieldAddress = Address(FieldPtr, Align); 1540 FieldPtr = CGF.Builder.CreateLoad(FieldAddress); 1541 1542 // FIXME Need to handle bitfield here 1543 GString = CGF.Builder.CreateGlobalStringPtr( 1544 Format.concat(llvm::Twine('\n')).str()); 1545 TmpRes = CGF.Builder.CreateCall(Func, {GString, FieldPtr}); 1546 Res = CGF.Builder.CreateAdd(Res, TmpRes); 1547 } 1548 1549 GString = CGF.Builder.CreateGlobalStringPtr(Pad + "}\n"); 1550 Value *TmpRes = CGF.Builder.CreateCall(Func, {GString}); 1551 Res = CGF.Builder.CreateAdd(Res, TmpRes); 1552 return Res; 1553 } 1554 1555 static bool 1556 TypeRequiresBuiltinLaunderImp(const ASTContext &Ctx, QualType Ty, 1557 llvm::SmallPtrSetImpl<const Decl *> &Seen) { 1558 if (const auto *Arr = Ctx.getAsArrayType(Ty)) 1559 Ty = Ctx.getBaseElementType(Arr); 1560 1561 const auto *Record = Ty->getAsCXXRecordDecl(); 1562 if (!Record) 1563 return false; 1564 1565 // We've already checked this type, or are in the process of checking it. 1566 if (!Seen.insert(Record).second) 1567 return false; 1568 1569 assert(Record->hasDefinition() && 1570 "Incomplete types should already be diagnosed"); 1571 1572 if (Record->isDynamicClass()) 1573 return true; 1574 1575 for (FieldDecl *F : Record->fields()) { 1576 if (TypeRequiresBuiltinLaunderImp(Ctx, F->getType(), Seen)) 1577 return true; 1578 } 1579 return false; 1580 } 1581 1582 /// Determine if the specified type requires laundering by checking if it is a 1583 /// dynamic class type or contains a subobject which is a dynamic class type. 1584 static bool TypeRequiresBuiltinLaunder(CodeGenModule &CGM, QualType Ty) { 1585 if (!CGM.getCodeGenOpts().StrictVTablePointers) 1586 return false; 1587 llvm::SmallPtrSet<const Decl *, 16> Seen; 1588 return TypeRequiresBuiltinLaunderImp(CGM.getContext(), Ty, Seen); 1589 } 1590 1591 RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) { 1592 llvm::Value *Src = EmitScalarExpr(E->getArg(0)); 1593 llvm::Value *ShiftAmt = EmitScalarExpr(E->getArg(1)); 1594 1595 // The builtin's shift arg may have a different type than the source arg and 1596 // result, but the LLVM intrinsic uses the same type for all values. 1597 llvm::Type *Ty = Src->getType(); 1598 ShiftAmt = Builder.CreateIntCast(ShiftAmt, Ty, false); 1599 1600 // Rotate is a special case of LLVM funnel shift - 1st 2 args are the same. 1601 unsigned IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl; 1602 Function *F = CGM.getIntrinsic(IID, Ty); 1603 return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt })); 1604 } 1605 1606 RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, 1607 const CallExpr *E, 1608 ReturnValueSlot ReturnValue) { 1609 const FunctionDecl *FD = GD.getDecl()->getAsFunction(); 1610 // See if we can constant fold this builtin. If so, don't emit it at all. 1611 Expr::EvalResult Result; 1612 if (E->EvaluateAsRValue(Result, CGM.getContext()) && 1613 !Result.hasSideEffects()) { 1614 if (Result.Val.isInt()) 1615 return RValue::get(llvm::ConstantInt::get(getLLVMContext(), 1616 Result.Val.getInt())); 1617 if (Result.Val.isFloat()) 1618 return RValue::get(llvm::ConstantFP::get(getLLVMContext(), 1619 Result.Val.getFloat())); 1620 } 1621 1622 // There are LLVM math intrinsics/instructions corresponding to math library 1623 // functions except the LLVM op will never set errno while the math library 1624 // might. Also, math builtins have the same semantics as their math library 1625 // twins. Thus, we can transform math library and builtin calls to their 1626 // LLVM counterparts if the call is marked 'const' (known to never set errno). 1627 if (FD->hasAttr<ConstAttr>()) { 1628 switch (BuiltinID) { 1629 case Builtin::BIceil: 1630 case Builtin::BIceilf: 1631 case Builtin::BIceill: 1632 case Builtin::BI__builtin_ceil: 1633 case Builtin::BI__builtin_ceilf: 1634 case Builtin::BI__builtin_ceilf16: 1635 case Builtin::BI__builtin_ceill: 1636 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, 1637 Intrinsic::ceil, 1638 Intrinsic::experimental_constrained_ceil)); 1639 1640 case Builtin::BIcopysign: 1641 case Builtin::BIcopysignf: 1642 case Builtin::BIcopysignl: 1643 case Builtin::BI__builtin_copysign: 1644 case Builtin::BI__builtin_copysignf: 1645 case Builtin::BI__builtin_copysignf16: 1646 case Builtin::BI__builtin_copysignl: 1647 case Builtin::BI__builtin_copysignf128: 1648 return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign)); 1649 1650 case Builtin::BIcos: 1651 case Builtin::BIcosf: 1652 case Builtin::BIcosl: 1653 case Builtin::BI__builtin_cos: 1654 case Builtin::BI__builtin_cosf: 1655 case Builtin::BI__builtin_cosf16: 1656 case Builtin::BI__builtin_cosl: 1657 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, 1658 Intrinsic::cos, 1659 Intrinsic::experimental_constrained_cos)); 1660 1661 case Builtin::BIexp: 1662 case Builtin::BIexpf: 1663 case Builtin::BIexpl: 1664 case Builtin::BI__builtin_exp: 1665 case Builtin::BI__builtin_expf: 1666 case Builtin::BI__builtin_expf16: 1667 case Builtin::BI__builtin_expl: 1668 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, 1669 Intrinsic::exp, 1670 Intrinsic::experimental_constrained_exp)); 1671 1672 case Builtin::BIexp2: 1673 case Builtin::BIexp2f: 1674 case Builtin::BIexp2l: 1675 case Builtin::BI__builtin_exp2: 1676 case Builtin::BI__builtin_exp2f: 1677 case Builtin::BI__builtin_exp2f16: 1678 case Builtin::BI__builtin_exp2l: 1679 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, 1680 Intrinsic::exp2, 1681 Intrinsic::experimental_constrained_exp2)); 1682 1683 case Builtin::BIfabs: 1684 case Builtin::BIfabsf: 1685 case Builtin::BIfabsl: 1686 case Builtin::BI__builtin_fabs: 1687 case Builtin::BI__builtin_fabsf: 1688 case Builtin::BI__builtin_fabsf16: 1689 case Builtin::BI__builtin_fabsl: 1690 case Builtin::BI__builtin_fabsf128: 1691 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs)); 1692 1693 case Builtin::BIfloor: 1694 case Builtin::BIfloorf: 1695 case Builtin::BIfloorl: 1696 case Builtin::BI__builtin_floor: 1697 case Builtin::BI__builtin_floorf: 1698 case Builtin::BI__builtin_floorf16: 1699 case Builtin::BI__builtin_floorl: 1700 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, 1701 Intrinsic::floor, 1702 Intrinsic::experimental_constrained_floor)); 1703 1704 case Builtin::BIfma: 1705 case Builtin::BIfmaf: 1706 case Builtin::BIfmal: 1707 case Builtin::BI__builtin_fma: 1708 case Builtin::BI__builtin_fmaf: 1709 case Builtin::BI__builtin_fmaf16: 1710 case Builtin::BI__builtin_fmal: 1711 return RValue::get(emitTernaryMaybeConstrainedFPBuiltin(*this, E, 1712 Intrinsic::fma, 1713 Intrinsic::experimental_constrained_fma)); 1714 1715 case Builtin::BIfmax: 1716 case Builtin::BIfmaxf: 1717 case Builtin::BIfmaxl: 1718 case Builtin::BI__builtin_fmax: 1719 case Builtin::BI__builtin_fmaxf: 1720 case Builtin::BI__builtin_fmaxf16: 1721 case Builtin::BI__builtin_fmaxl: 1722 return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E, 1723 Intrinsic::maxnum, 1724 Intrinsic::experimental_constrained_maxnum)); 1725 1726 case Builtin::BIfmin: 1727 case Builtin::BIfminf: 1728 case Builtin::BIfminl: 1729 case Builtin::BI__builtin_fmin: 1730 case Builtin::BI__builtin_fminf: 1731 case Builtin::BI__builtin_fminf16: 1732 case Builtin::BI__builtin_fminl: 1733 return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E, 1734 Intrinsic::minnum, 1735 Intrinsic::experimental_constrained_minnum)); 1736 1737 // fmod() is a special-case. It maps to the frem instruction rather than an 1738 // LLVM intrinsic. 1739 case Builtin::BIfmod: 1740 case Builtin::BIfmodf: 1741 case Builtin::BIfmodl: 1742 case Builtin::BI__builtin_fmod: 1743 case Builtin::BI__builtin_fmodf: 1744 case Builtin::BI__builtin_fmodf16: 1745 case Builtin::BI__builtin_fmodl: { 1746 Value *Arg1 = EmitScalarExpr(E->getArg(0)); 1747 Value *Arg2 = EmitScalarExpr(E->getArg(1)); 1748 return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod")); 1749 } 1750 1751 case Builtin::BIlog: 1752 case Builtin::BIlogf: 1753 case Builtin::BIlogl: 1754 case Builtin::BI__builtin_log: 1755 case Builtin::BI__builtin_logf: 1756 case Builtin::BI__builtin_logf16: 1757 case Builtin::BI__builtin_logl: 1758 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, 1759 Intrinsic::log, 1760 Intrinsic::experimental_constrained_log)); 1761 1762 case Builtin::BIlog10: 1763 case Builtin::BIlog10f: 1764 case Builtin::BIlog10l: 1765 case Builtin::BI__builtin_log10: 1766 case Builtin::BI__builtin_log10f: 1767 case Builtin::BI__builtin_log10f16: 1768 case Builtin::BI__builtin_log10l: 1769 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, 1770 Intrinsic::log10, 1771 Intrinsic::experimental_constrained_log10)); 1772 1773 case Builtin::BIlog2: 1774 case Builtin::BIlog2f: 1775 case Builtin::BIlog2l: 1776 case Builtin::BI__builtin_log2: 1777 case Builtin::BI__builtin_log2f: 1778 case Builtin::BI__builtin_log2f16: 1779 case Builtin::BI__builtin_log2l: 1780 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, 1781 Intrinsic::log2, 1782 Intrinsic::experimental_constrained_log2)); 1783 1784 case Builtin::BInearbyint: 1785 case Builtin::BInearbyintf: 1786 case Builtin::BInearbyintl: 1787 case Builtin::BI__builtin_nearbyint: 1788 case Builtin::BI__builtin_nearbyintf: 1789 case Builtin::BI__builtin_nearbyintl: 1790 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, 1791 Intrinsic::nearbyint, 1792 Intrinsic::experimental_constrained_nearbyint)); 1793 1794 case Builtin::BIpow: 1795 case Builtin::BIpowf: 1796 case Builtin::BIpowl: 1797 case Builtin::BI__builtin_pow: 1798 case Builtin::BI__builtin_powf: 1799 case Builtin::BI__builtin_powf16: 1800 case Builtin::BI__builtin_powl: 1801 return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E, 1802 Intrinsic::pow, 1803 Intrinsic::experimental_constrained_pow)); 1804 1805 case Builtin::BIrint: 1806 case Builtin::BIrintf: 1807 case Builtin::BIrintl: 1808 case Builtin::BI__builtin_rint: 1809 case Builtin::BI__builtin_rintf: 1810 case Builtin::BI__builtin_rintf16: 1811 case Builtin::BI__builtin_rintl: 1812 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, 1813 Intrinsic::rint, 1814 Intrinsic::experimental_constrained_rint)); 1815 1816 case Builtin::BIround: 1817 case Builtin::BIroundf: 1818 case Builtin::BIroundl: 1819 case Builtin::BI__builtin_round: 1820 case Builtin::BI__builtin_roundf: 1821 case Builtin::BI__builtin_roundf16: 1822 case Builtin::BI__builtin_roundl: 1823 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, 1824 Intrinsic::round, 1825 Intrinsic::experimental_constrained_round)); 1826 1827 case Builtin::BIsin: 1828 case Builtin::BIsinf: 1829 case Builtin::BIsinl: 1830 case Builtin::BI__builtin_sin: 1831 case Builtin::BI__builtin_sinf: 1832 case Builtin::BI__builtin_sinf16: 1833 case Builtin::BI__builtin_sinl: 1834 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, 1835 Intrinsic::sin, 1836 Intrinsic::experimental_constrained_sin)); 1837 1838 case Builtin::BIsqrt: 1839 case Builtin::BIsqrtf: 1840 case Builtin::BIsqrtl: 1841 case Builtin::BI__builtin_sqrt: 1842 case Builtin::BI__builtin_sqrtf: 1843 case Builtin::BI__builtin_sqrtf16: 1844 case Builtin::BI__builtin_sqrtl: 1845 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, 1846 Intrinsic::sqrt, 1847 Intrinsic::experimental_constrained_sqrt)); 1848 1849 case Builtin::BItrunc: 1850 case Builtin::BItruncf: 1851 case Builtin::BItruncl: 1852 case Builtin::BI__builtin_trunc: 1853 case Builtin::BI__builtin_truncf: 1854 case Builtin::BI__builtin_truncf16: 1855 case Builtin::BI__builtin_truncl: 1856 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, 1857 Intrinsic::trunc, 1858 Intrinsic::experimental_constrained_trunc)); 1859 1860 case Builtin::BIlround: 1861 case Builtin::BIlroundf: 1862 case Builtin::BIlroundl: 1863 case Builtin::BI__builtin_lround: 1864 case Builtin::BI__builtin_lroundf: 1865 case Builtin::BI__builtin_lroundl: 1866 return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin( 1867 *this, E, Intrinsic::lround, 1868 Intrinsic::experimental_constrained_lround)); 1869 1870 case Builtin::BIllround: 1871 case Builtin::BIllroundf: 1872 case Builtin::BIllroundl: 1873 case Builtin::BI__builtin_llround: 1874 case Builtin::BI__builtin_llroundf: 1875 case Builtin::BI__builtin_llroundl: 1876 return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin( 1877 *this, E, Intrinsic::llround, 1878 Intrinsic::experimental_constrained_llround)); 1879 1880 case Builtin::BIlrint: 1881 case Builtin::BIlrintf: 1882 case Builtin::BIlrintl: 1883 case Builtin::BI__builtin_lrint: 1884 case Builtin::BI__builtin_lrintf: 1885 case Builtin::BI__builtin_lrintl: 1886 return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin( 1887 *this, E, Intrinsic::lrint, 1888 Intrinsic::experimental_constrained_lrint)); 1889 1890 case Builtin::BIllrint: 1891 case Builtin::BIllrintf: 1892 case Builtin::BIllrintl: 1893 case Builtin::BI__builtin_llrint: 1894 case Builtin::BI__builtin_llrintf: 1895 case Builtin::BI__builtin_llrintl: 1896 return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin( 1897 *this, E, Intrinsic::llrint, 1898 Intrinsic::experimental_constrained_llrint)); 1899 1900 default: 1901 break; 1902 } 1903 } 1904 1905 switch (BuiltinID) { 1906 default: break; 1907 case Builtin::BI__builtin___CFStringMakeConstantString: 1908 case Builtin::BI__builtin___NSStringMakeConstantString: 1909 return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType())); 1910 case Builtin::BI__builtin_stdarg_start: 1911 case Builtin::BI__builtin_va_start: 1912 case Builtin::BI__va_start: 1913 case Builtin::BI__builtin_va_end: 1914 return RValue::get( 1915 EmitVAStartEnd(BuiltinID == Builtin::BI__va_start 1916 ? EmitScalarExpr(E->getArg(0)) 1917 : EmitVAListRef(E->getArg(0)).getPointer(), 1918 BuiltinID != Builtin::BI__builtin_va_end)); 1919 case Builtin::BI__builtin_va_copy: { 1920 Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer(); 1921 Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer(); 1922 1923 llvm::Type *Type = Int8PtrTy; 1924 1925 DstPtr = Builder.CreateBitCast(DstPtr, Type); 1926 SrcPtr = Builder.CreateBitCast(SrcPtr, Type); 1927 return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy), 1928 {DstPtr, SrcPtr})); 1929 } 1930 case Builtin::BI__builtin_abs: 1931 case Builtin::BI__builtin_labs: 1932 case Builtin::BI__builtin_llabs: { 1933 // X < 0 ? -X : X 1934 // The negation has 'nsw' because abs of INT_MIN is undefined. 1935 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 1936 Value *NegOp = Builder.CreateNSWNeg(ArgValue, "neg"); 1937 Constant *Zero = llvm::Constant::getNullValue(ArgValue->getType()); 1938 Value *CmpResult = Builder.CreateICmpSLT(ArgValue, Zero, "abscond"); 1939 Value *Result = Builder.CreateSelect(CmpResult, NegOp, ArgValue, "abs"); 1940 return RValue::get(Result); 1941 } 1942 case Builtin::BI__builtin_conj: 1943 case Builtin::BI__builtin_conjf: 1944 case Builtin::BI__builtin_conjl: 1945 case Builtin::BIconj: 1946 case Builtin::BIconjf: 1947 case Builtin::BIconjl: { 1948 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 1949 Value *Real = ComplexVal.first; 1950 Value *Imag = ComplexVal.second; 1951 Imag = Builder.CreateFNeg(Imag, "neg"); 1952 return RValue::getComplex(std::make_pair(Real, Imag)); 1953 } 1954 case Builtin::BI__builtin_creal: 1955 case Builtin::BI__builtin_crealf: 1956 case Builtin::BI__builtin_creall: 1957 case Builtin::BIcreal: 1958 case Builtin::BIcrealf: 1959 case Builtin::BIcreall: { 1960 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 1961 return RValue::get(ComplexVal.first); 1962 } 1963 1964 case Builtin::BI__builtin_dump_struct: { 1965 llvm::Type *LLVMIntTy = getTypes().ConvertType(getContext().IntTy); 1966 llvm::FunctionType *LLVMFuncType = llvm::FunctionType::get( 1967 LLVMIntTy, {llvm::Type::getInt8PtrTy(getLLVMContext())}, true); 1968 1969 Value *Func = EmitScalarExpr(E->getArg(1)->IgnoreImpCasts()); 1970 CharUnits Arg0Align = EmitPointerWithAlignment(E->getArg(0)).getAlignment(); 1971 1972 const Expr *Arg0 = E->getArg(0)->IgnoreImpCasts(); 1973 QualType Arg0Type = Arg0->getType()->getPointeeType(); 1974 1975 Value *RecordPtr = EmitScalarExpr(Arg0); 1976 Value *Res = dumpRecord(*this, Arg0Type, RecordPtr, Arg0Align, 1977 {LLVMFuncType, Func}, 0); 1978 return RValue::get(Res); 1979 } 1980 1981 case Builtin::BI__builtin_preserve_access_index: { 1982 // Only enabled preserved access index region when debuginfo 1983 // is available as debuginfo is needed to preserve user-level 1984 // access pattern. 1985 if (!getDebugInfo()) { 1986 CGM.Error(E->getExprLoc(), "using builtin_preserve_access_index() without -g"); 1987 return RValue::get(EmitScalarExpr(E->getArg(0))); 1988 } 1989 1990 // Nested builtin_preserve_access_index() not supported 1991 if (IsInPreservedAIRegion) { 1992 CGM.Error(E->getExprLoc(), "nested builtin_preserve_access_index() not supported"); 1993 return RValue::get(EmitScalarExpr(E->getArg(0))); 1994 } 1995 1996 IsInPreservedAIRegion = true; 1997 Value *Res = EmitScalarExpr(E->getArg(0)); 1998 IsInPreservedAIRegion = false; 1999 return RValue::get(Res); 2000 } 2001 2002 case Builtin::BI__builtin_cimag: 2003 case Builtin::BI__builtin_cimagf: 2004 case Builtin::BI__builtin_cimagl: 2005 case Builtin::BIcimag: 2006 case Builtin::BIcimagf: 2007 case Builtin::BIcimagl: { 2008 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 2009 return RValue::get(ComplexVal.second); 2010 } 2011 2012 case Builtin::BI__builtin_clrsb: 2013 case Builtin::BI__builtin_clrsbl: 2014 case Builtin::BI__builtin_clrsbll: { 2015 // clrsb(x) -> clz(x < 0 ? ~x : x) - 1 or 2016 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 2017 2018 llvm::Type *ArgType = ArgValue->getType(); 2019 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); 2020 2021 llvm::Type *ResultType = ConvertType(E->getType()); 2022 Value *Zero = llvm::Constant::getNullValue(ArgType); 2023 Value *IsNeg = Builder.CreateICmpSLT(ArgValue, Zero, "isneg"); 2024 Value *Inverse = Builder.CreateNot(ArgValue, "not"); 2025 Value *Tmp = Builder.CreateSelect(IsNeg, Inverse, ArgValue); 2026 Value *Ctlz = Builder.CreateCall(F, {Tmp, Builder.getFalse()}); 2027 Value *Result = Builder.CreateSub(Ctlz, llvm::ConstantInt::get(ArgType, 1)); 2028 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 2029 "cast"); 2030 return RValue::get(Result); 2031 } 2032 case Builtin::BI__builtin_ctzs: 2033 case Builtin::BI__builtin_ctz: 2034 case Builtin::BI__builtin_ctzl: 2035 case Builtin::BI__builtin_ctzll: { 2036 Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero); 2037 2038 llvm::Type *ArgType = ArgValue->getType(); 2039 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 2040 2041 llvm::Type *ResultType = ConvertType(E->getType()); 2042 Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); 2043 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef}); 2044 if (Result->getType() != ResultType) 2045 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 2046 "cast"); 2047 return RValue::get(Result); 2048 } 2049 case Builtin::BI__builtin_clzs: 2050 case Builtin::BI__builtin_clz: 2051 case Builtin::BI__builtin_clzl: 2052 case Builtin::BI__builtin_clzll: { 2053 Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero); 2054 2055 llvm::Type *ArgType = ArgValue->getType(); 2056 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); 2057 2058 llvm::Type *ResultType = ConvertType(E->getType()); 2059 Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); 2060 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef}); 2061 if (Result->getType() != ResultType) 2062 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 2063 "cast"); 2064 return RValue::get(Result); 2065 } 2066 case Builtin::BI__builtin_ffs: 2067 case Builtin::BI__builtin_ffsl: 2068 case Builtin::BI__builtin_ffsll: { 2069 // ffs(x) -> x ? cttz(x) + 1 : 0 2070 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 2071 2072 llvm::Type *ArgType = ArgValue->getType(); 2073 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 2074 2075 llvm::Type *ResultType = ConvertType(E->getType()); 2076 Value *Tmp = 2077 Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}), 2078 llvm::ConstantInt::get(ArgType, 1)); 2079 Value *Zero = llvm::Constant::getNullValue(ArgType); 2080 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero"); 2081 Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs"); 2082 if (Result->getType() != ResultType) 2083 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 2084 "cast"); 2085 return RValue::get(Result); 2086 } 2087 case Builtin::BI__builtin_parity: 2088 case Builtin::BI__builtin_parityl: 2089 case Builtin::BI__builtin_parityll: { 2090 // parity(x) -> ctpop(x) & 1 2091 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 2092 2093 llvm::Type *ArgType = ArgValue->getType(); 2094 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); 2095 2096 llvm::Type *ResultType = ConvertType(E->getType()); 2097 Value *Tmp = Builder.CreateCall(F, ArgValue); 2098 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1)); 2099 if (Result->getType() != ResultType) 2100 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 2101 "cast"); 2102 return RValue::get(Result); 2103 } 2104 case Builtin::BI__lzcnt16: 2105 case Builtin::BI__lzcnt: 2106 case Builtin::BI__lzcnt64: { 2107 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 2108 2109 llvm::Type *ArgType = ArgValue->getType(); 2110 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); 2111 2112 llvm::Type *ResultType = ConvertType(E->getType()); 2113 Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getFalse()}); 2114 if (Result->getType() != ResultType) 2115 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 2116 "cast"); 2117 return RValue::get(Result); 2118 } 2119 case Builtin::BI__popcnt16: 2120 case Builtin::BI__popcnt: 2121 case Builtin::BI__popcnt64: 2122 case Builtin::BI__builtin_popcount: 2123 case Builtin::BI__builtin_popcountl: 2124 case Builtin::BI__builtin_popcountll: { 2125 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 2126 2127 llvm::Type *ArgType = ArgValue->getType(); 2128 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); 2129 2130 llvm::Type *ResultType = ConvertType(E->getType()); 2131 Value *Result = Builder.CreateCall(F, ArgValue); 2132 if (Result->getType() != ResultType) 2133 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 2134 "cast"); 2135 return RValue::get(Result); 2136 } 2137 case Builtin::BI__builtin_unpredictable: { 2138 // Always return the argument of __builtin_unpredictable. LLVM does not 2139 // handle this builtin. Metadata for this builtin should be added directly 2140 // to instructions such as branches or switches that use it. 2141 return RValue::get(EmitScalarExpr(E->getArg(0))); 2142 } 2143 case Builtin::BI__builtin_expect: { 2144 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 2145 llvm::Type *ArgType = ArgValue->getType(); 2146 2147 Value *ExpectedValue = EmitScalarExpr(E->getArg(1)); 2148 // Don't generate llvm.expect on -O0 as the backend won't use it for 2149 // anything. 2150 // Note, we still IRGen ExpectedValue because it could have side-effects. 2151 if (CGM.getCodeGenOpts().OptimizationLevel == 0) 2152 return RValue::get(ArgValue); 2153 2154 Function *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType); 2155 Value *Result = 2156 Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval"); 2157 return RValue::get(Result); 2158 } 2159 case Builtin::BI__builtin_assume_aligned: { 2160 const Expr *Ptr = E->getArg(0); 2161 Value *PtrValue = EmitScalarExpr(Ptr); 2162 Value *OffsetValue = 2163 (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr; 2164 2165 Value *AlignmentValue = EmitScalarExpr(E->getArg(1)); 2166 ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue); 2167 if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment)) 2168 AlignmentCI = ConstantInt::get(AlignmentCI->getType(), 2169 llvm::Value::MaximumAlignment); 2170 2171 emitAlignmentAssumption(PtrValue, Ptr, 2172 /*The expr loc is sufficient.*/ SourceLocation(), 2173 AlignmentCI, OffsetValue); 2174 return RValue::get(PtrValue); 2175 } 2176 case Builtin::BI__assume: 2177 case Builtin::BI__builtin_assume: { 2178 if (E->getArg(0)->HasSideEffects(getContext())) 2179 return RValue::get(nullptr); 2180 2181 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 2182 Function *FnAssume = CGM.getIntrinsic(Intrinsic::assume); 2183 return RValue::get(Builder.CreateCall(FnAssume, ArgValue)); 2184 } 2185 case Builtin::BI__builtin_bswap16: 2186 case Builtin::BI__builtin_bswap32: 2187 case Builtin::BI__builtin_bswap64: { 2188 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap)); 2189 } 2190 case Builtin::BI__builtin_bitreverse8: 2191 case Builtin::BI__builtin_bitreverse16: 2192 case Builtin::BI__builtin_bitreverse32: 2193 case Builtin::BI__builtin_bitreverse64: { 2194 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse)); 2195 } 2196 case Builtin::BI__builtin_rotateleft8: 2197 case Builtin::BI__builtin_rotateleft16: 2198 case Builtin::BI__builtin_rotateleft32: 2199 case Builtin::BI__builtin_rotateleft64: 2200 case Builtin::BI_rotl8: // Microsoft variants of rotate left 2201 case Builtin::BI_rotl16: 2202 case Builtin::BI_rotl: 2203 case Builtin::BI_lrotl: 2204 case Builtin::BI_rotl64: 2205 return emitRotate(E, false); 2206 2207 case Builtin::BI__builtin_rotateright8: 2208 case Builtin::BI__builtin_rotateright16: 2209 case Builtin::BI__builtin_rotateright32: 2210 case Builtin::BI__builtin_rotateright64: 2211 case Builtin::BI_rotr8: // Microsoft variants of rotate right 2212 case Builtin::BI_rotr16: 2213 case Builtin::BI_rotr: 2214 case Builtin::BI_lrotr: 2215 case Builtin::BI_rotr64: 2216 return emitRotate(E, true); 2217 2218 case Builtin::BI__builtin_constant_p: { 2219 llvm::Type *ResultType = ConvertType(E->getType()); 2220 2221 const Expr *Arg = E->getArg(0); 2222 QualType ArgType = Arg->getType(); 2223 // FIXME: The allowance for Obj-C pointers and block pointers is historical 2224 // and likely a mistake. 2225 if (!ArgType->isIntegralOrEnumerationType() && !ArgType->isFloatingType() && 2226 !ArgType->isObjCObjectPointerType() && !ArgType->isBlockPointerType()) 2227 // Per the GCC documentation, only numeric constants are recognized after 2228 // inlining. 2229 return RValue::get(ConstantInt::get(ResultType, 0)); 2230 2231 if (Arg->HasSideEffects(getContext())) 2232 // The argument is unevaluated, so be conservative if it might have 2233 // side-effects. 2234 return RValue::get(ConstantInt::get(ResultType, 0)); 2235 2236 Value *ArgValue = EmitScalarExpr(Arg); 2237 if (ArgType->isObjCObjectPointerType()) { 2238 // Convert Objective-C objects to id because we cannot distinguish between 2239 // LLVM types for Obj-C classes as they are opaque. 2240 ArgType = CGM.getContext().getObjCIdType(); 2241 ArgValue = Builder.CreateBitCast(ArgValue, ConvertType(ArgType)); 2242 } 2243 Function *F = 2244 CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType)); 2245 Value *Result = Builder.CreateCall(F, ArgValue); 2246 if (Result->getType() != ResultType) 2247 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/false); 2248 return RValue::get(Result); 2249 } 2250 case Builtin::BI__builtin_dynamic_object_size: 2251 case Builtin::BI__builtin_object_size: { 2252 unsigned Type = 2253 E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue(); 2254 auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType())); 2255 2256 // We pass this builtin onto the optimizer so that it can figure out the 2257 // object size in more complex cases. 2258 bool IsDynamic = BuiltinID == Builtin::BI__builtin_dynamic_object_size; 2259 return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType, 2260 /*EmittedE=*/nullptr, IsDynamic)); 2261 } 2262 case Builtin::BI__builtin_prefetch: { 2263 Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0)); 2264 // FIXME: Technically these constants should of type 'int', yes? 2265 RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) : 2266 llvm::ConstantInt::get(Int32Ty, 0); 2267 Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : 2268 llvm::ConstantInt::get(Int32Ty, 3); 2269 Value *Data = llvm::ConstantInt::get(Int32Ty, 1); 2270 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType()); 2271 return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data})); 2272 } 2273 case Builtin::BI__builtin_readcyclecounter: { 2274 Function *F = CGM.getIntrinsic(Intrinsic::readcyclecounter); 2275 return RValue::get(Builder.CreateCall(F)); 2276 } 2277 case Builtin::BI__builtin___clear_cache: { 2278 Value *Begin = EmitScalarExpr(E->getArg(0)); 2279 Value *End = EmitScalarExpr(E->getArg(1)); 2280 Function *F = CGM.getIntrinsic(Intrinsic::clear_cache); 2281 return RValue::get(Builder.CreateCall(F, {Begin, End})); 2282 } 2283 case Builtin::BI__builtin_trap: 2284 return RValue::get(EmitTrapCall(Intrinsic::trap)); 2285 case Builtin::BI__debugbreak: 2286 return RValue::get(EmitTrapCall(Intrinsic::debugtrap)); 2287 case Builtin::BI__builtin_unreachable: { 2288 EmitUnreachable(E->getExprLoc()); 2289 2290 // We do need to preserve an insertion point. 2291 EmitBlock(createBasicBlock("unreachable.cont")); 2292 2293 return RValue::get(nullptr); 2294 } 2295 2296 case Builtin::BI__builtin_powi: 2297 case Builtin::BI__builtin_powif: 2298 case Builtin::BI__builtin_powil: 2299 return RValue::get(emitBinaryMaybeConstrainedFPBuiltin( 2300 *this, E, Intrinsic::powi, Intrinsic::experimental_constrained_powi)); 2301 2302 case Builtin::BI__builtin_isgreater: 2303 case Builtin::BI__builtin_isgreaterequal: 2304 case Builtin::BI__builtin_isless: 2305 case Builtin::BI__builtin_islessequal: 2306 case Builtin::BI__builtin_islessgreater: 2307 case Builtin::BI__builtin_isunordered: { 2308 // Ordered comparisons: we know the arguments to these are matching scalar 2309 // floating point values. 2310 Value *LHS = EmitScalarExpr(E->getArg(0)); 2311 Value *RHS = EmitScalarExpr(E->getArg(1)); 2312 2313 switch (BuiltinID) { 2314 default: llvm_unreachable("Unknown ordered comparison"); 2315 case Builtin::BI__builtin_isgreater: 2316 LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp"); 2317 break; 2318 case Builtin::BI__builtin_isgreaterequal: 2319 LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp"); 2320 break; 2321 case Builtin::BI__builtin_isless: 2322 LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp"); 2323 break; 2324 case Builtin::BI__builtin_islessequal: 2325 LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp"); 2326 break; 2327 case Builtin::BI__builtin_islessgreater: 2328 LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp"); 2329 break; 2330 case Builtin::BI__builtin_isunordered: 2331 LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp"); 2332 break; 2333 } 2334 // ZExt bool to int type. 2335 return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType()))); 2336 } 2337 case Builtin::BI__builtin_isnan: { 2338 Value *V = EmitScalarExpr(E->getArg(0)); 2339 V = Builder.CreateFCmpUNO(V, V, "cmp"); 2340 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 2341 } 2342 2343 case Builtin::BIfinite: 2344 case Builtin::BI__finite: 2345 case Builtin::BIfinitef: 2346 case Builtin::BI__finitef: 2347 case Builtin::BIfinitel: 2348 case Builtin::BI__finitel: 2349 case Builtin::BI__builtin_isinf: 2350 case Builtin::BI__builtin_isfinite: { 2351 // isinf(x) --> fabs(x) == infinity 2352 // isfinite(x) --> fabs(x) != infinity 2353 // x != NaN via the ordered compare in either case. 2354 Value *V = EmitScalarExpr(E->getArg(0)); 2355 Value *Fabs = EmitFAbs(*this, V); 2356 Constant *Infinity = ConstantFP::getInfinity(V->getType()); 2357 CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf) 2358 ? CmpInst::FCMP_OEQ 2359 : CmpInst::FCMP_ONE; 2360 Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf"); 2361 return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType()))); 2362 } 2363 2364 case Builtin::BI__builtin_isinf_sign: { 2365 // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0 2366 Value *Arg = EmitScalarExpr(E->getArg(0)); 2367 Value *AbsArg = EmitFAbs(*this, Arg); 2368 Value *IsInf = Builder.CreateFCmpOEQ( 2369 AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf"); 2370 Value *IsNeg = EmitSignBit(*this, Arg); 2371 2372 llvm::Type *IntTy = ConvertType(E->getType()); 2373 Value *Zero = Constant::getNullValue(IntTy); 2374 Value *One = ConstantInt::get(IntTy, 1); 2375 Value *NegativeOne = ConstantInt::get(IntTy, -1); 2376 Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One); 2377 Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero); 2378 return RValue::get(Result); 2379 } 2380 2381 case Builtin::BI__builtin_isnormal: { 2382 // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min 2383 Value *V = EmitScalarExpr(E->getArg(0)); 2384 Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq"); 2385 2386 Value *Abs = EmitFAbs(*this, V); 2387 Value *IsLessThanInf = 2388 Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf"); 2389 APFloat Smallest = APFloat::getSmallestNormalized( 2390 getContext().getFloatTypeSemantics(E->getArg(0)->getType())); 2391 Value *IsNormal = 2392 Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest), 2393 "isnormal"); 2394 V = Builder.CreateAnd(Eq, IsLessThanInf, "and"); 2395 V = Builder.CreateAnd(V, IsNormal, "and"); 2396 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 2397 } 2398 2399 case Builtin::BI__builtin_flt_rounds: { 2400 Function *F = CGM.getIntrinsic(Intrinsic::flt_rounds); 2401 2402 llvm::Type *ResultType = ConvertType(E->getType()); 2403 Value *Result = Builder.CreateCall(F); 2404 if (Result->getType() != ResultType) 2405 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 2406 "cast"); 2407 return RValue::get(Result); 2408 } 2409 2410 case Builtin::BI__builtin_fpclassify: { 2411 Value *V = EmitScalarExpr(E->getArg(5)); 2412 llvm::Type *Ty = ConvertType(E->getArg(5)->getType()); 2413 2414 // Create Result 2415 BasicBlock *Begin = Builder.GetInsertBlock(); 2416 BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn); 2417 Builder.SetInsertPoint(End); 2418 PHINode *Result = 2419 Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4, 2420 "fpclassify_result"); 2421 2422 // if (V==0) return FP_ZERO 2423 Builder.SetInsertPoint(Begin); 2424 Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty), 2425 "iszero"); 2426 Value *ZeroLiteral = EmitScalarExpr(E->getArg(4)); 2427 BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn); 2428 Builder.CreateCondBr(IsZero, End, NotZero); 2429 Result->addIncoming(ZeroLiteral, Begin); 2430 2431 // if (V != V) return FP_NAN 2432 Builder.SetInsertPoint(NotZero); 2433 Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp"); 2434 Value *NanLiteral = EmitScalarExpr(E->getArg(0)); 2435 BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn); 2436 Builder.CreateCondBr(IsNan, End, NotNan); 2437 Result->addIncoming(NanLiteral, NotZero); 2438 2439 // if (fabs(V) == infinity) return FP_INFINITY 2440 Builder.SetInsertPoint(NotNan); 2441 Value *VAbs = EmitFAbs(*this, V); 2442 Value *IsInf = 2443 Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()), 2444 "isinf"); 2445 Value *InfLiteral = EmitScalarExpr(E->getArg(1)); 2446 BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn); 2447 Builder.CreateCondBr(IsInf, End, NotInf); 2448 Result->addIncoming(InfLiteral, NotNan); 2449 2450 // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL 2451 Builder.SetInsertPoint(NotInf); 2452 APFloat Smallest = APFloat::getSmallestNormalized( 2453 getContext().getFloatTypeSemantics(E->getArg(5)->getType())); 2454 Value *IsNormal = 2455 Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest), 2456 "isnormal"); 2457 Value *NormalResult = 2458 Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)), 2459 EmitScalarExpr(E->getArg(3))); 2460 Builder.CreateBr(End); 2461 Result->addIncoming(NormalResult, NotInf); 2462 2463 // return Result 2464 Builder.SetInsertPoint(End); 2465 return RValue::get(Result); 2466 } 2467 2468 case Builtin::BIalloca: 2469 case Builtin::BI_alloca: 2470 case Builtin::BI__builtin_alloca: { 2471 Value *Size = EmitScalarExpr(E->getArg(0)); 2472 const TargetInfo &TI = getContext().getTargetInfo(); 2473 // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__. 2474 const Align SuitableAlignmentInBytes = 2475 CGM.getContext() 2476 .toCharUnitsFromBits(TI.getSuitableAlign()) 2477 .getAsAlign(); 2478 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size); 2479 AI->setAlignment(SuitableAlignmentInBytes); 2480 initializeAlloca(*this, AI, Size, SuitableAlignmentInBytes); 2481 return RValue::get(AI); 2482 } 2483 2484 case Builtin::BI__builtin_alloca_with_align: { 2485 Value *Size = EmitScalarExpr(E->getArg(0)); 2486 Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1)); 2487 auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue); 2488 unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue(); 2489 const Align AlignmentInBytes = 2490 CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getAsAlign(); 2491 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size); 2492 AI->setAlignment(AlignmentInBytes); 2493 initializeAlloca(*this, AI, Size, AlignmentInBytes); 2494 return RValue::get(AI); 2495 } 2496 2497 case Builtin::BIbzero: 2498 case Builtin::BI__builtin_bzero: { 2499 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 2500 Value *SizeVal = EmitScalarExpr(E->getArg(1)); 2501 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 2502 E->getArg(0)->getExprLoc(), FD, 0); 2503 Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false); 2504 return RValue::get(nullptr); 2505 } 2506 case Builtin::BImemcpy: 2507 case Builtin::BI__builtin_memcpy: 2508 case Builtin::BImempcpy: 2509 case Builtin::BI__builtin_mempcpy: { 2510 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 2511 Address Src = EmitPointerWithAlignment(E->getArg(1)); 2512 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 2513 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 2514 E->getArg(0)->getExprLoc(), FD, 0); 2515 EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(), 2516 E->getArg(1)->getExprLoc(), FD, 1); 2517 Builder.CreateMemCpy(Dest, Src, SizeVal, false); 2518 if (BuiltinID == Builtin::BImempcpy || 2519 BuiltinID == Builtin::BI__builtin_mempcpy) 2520 return RValue::get(Builder.CreateInBoundsGEP(Dest.getPointer(), SizeVal)); 2521 else 2522 return RValue::get(Dest.getPointer()); 2523 } 2524 2525 case Builtin::BI__builtin_memcpy_inline: { 2526 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 2527 Address Src = EmitPointerWithAlignment(E->getArg(1)); 2528 uint64_t Size = 2529 E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue(); 2530 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 2531 E->getArg(0)->getExprLoc(), FD, 0); 2532 EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(), 2533 E->getArg(1)->getExprLoc(), FD, 1); 2534 Builder.CreateMemCpyInline(Dest, Src, Size); 2535 return RValue::get(nullptr); 2536 } 2537 2538 case Builtin::BI__builtin_char_memchr: 2539 BuiltinID = Builtin::BI__builtin_memchr; 2540 break; 2541 2542 case Builtin::BI__builtin___memcpy_chk: { 2543 // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2. 2544 Expr::EvalResult SizeResult, DstSizeResult; 2545 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) || 2546 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext())) 2547 break; 2548 llvm::APSInt Size = SizeResult.Val.getInt(); 2549 llvm::APSInt DstSize = DstSizeResult.Val.getInt(); 2550 if (Size.ugt(DstSize)) 2551 break; 2552 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 2553 Address Src = EmitPointerWithAlignment(E->getArg(1)); 2554 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 2555 Builder.CreateMemCpy(Dest, Src, SizeVal, false); 2556 return RValue::get(Dest.getPointer()); 2557 } 2558 2559 case Builtin::BI__builtin_objc_memmove_collectable: { 2560 Address DestAddr = EmitPointerWithAlignment(E->getArg(0)); 2561 Address SrcAddr = EmitPointerWithAlignment(E->getArg(1)); 2562 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 2563 CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this, 2564 DestAddr, SrcAddr, SizeVal); 2565 return RValue::get(DestAddr.getPointer()); 2566 } 2567 2568 case Builtin::BI__builtin___memmove_chk: { 2569 // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2. 2570 Expr::EvalResult SizeResult, DstSizeResult; 2571 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) || 2572 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext())) 2573 break; 2574 llvm::APSInt Size = SizeResult.Val.getInt(); 2575 llvm::APSInt DstSize = DstSizeResult.Val.getInt(); 2576 if (Size.ugt(DstSize)) 2577 break; 2578 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 2579 Address Src = EmitPointerWithAlignment(E->getArg(1)); 2580 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 2581 Builder.CreateMemMove(Dest, Src, SizeVal, false); 2582 return RValue::get(Dest.getPointer()); 2583 } 2584 2585 case Builtin::BImemmove: 2586 case Builtin::BI__builtin_memmove: { 2587 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 2588 Address Src = EmitPointerWithAlignment(E->getArg(1)); 2589 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 2590 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 2591 E->getArg(0)->getExprLoc(), FD, 0); 2592 EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(), 2593 E->getArg(1)->getExprLoc(), FD, 1); 2594 Builder.CreateMemMove(Dest, Src, SizeVal, false); 2595 return RValue::get(Dest.getPointer()); 2596 } 2597 case Builtin::BImemset: 2598 case Builtin::BI__builtin_memset: { 2599 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 2600 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), 2601 Builder.getInt8Ty()); 2602 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 2603 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 2604 E->getArg(0)->getExprLoc(), FD, 0); 2605 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false); 2606 return RValue::get(Dest.getPointer()); 2607 } 2608 case Builtin::BI__builtin___memset_chk: { 2609 // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2. 2610 Expr::EvalResult SizeResult, DstSizeResult; 2611 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) || 2612 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext())) 2613 break; 2614 llvm::APSInt Size = SizeResult.Val.getInt(); 2615 llvm::APSInt DstSize = DstSizeResult.Val.getInt(); 2616 if (Size.ugt(DstSize)) 2617 break; 2618 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 2619 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), 2620 Builder.getInt8Ty()); 2621 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 2622 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false); 2623 return RValue::get(Dest.getPointer()); 2624 } 2625 case Builtin::BI__builtin_wmemcmp: { 2626 // The MSVC runtime library does not provide a definition of wmemcmp, so we 2627 // need an inline implementation. 2628 if (!getTarget().getTriple().isOSMSVCRT()) 2629 break; 2630 2631 llvm::Type *WCharTy = ConvertType(getContext().WCharTy); 2632 2633 Value *Dst = EmitScalarExpr(E->getArg(0)); 2634 Value *Src = EmitScalarExpr(E->getArg(1)); 2635 Value *Size = EmitScalarExpr(E->getArg(2)); 2636 2637 BasicBlock *Entry = Builder.GetInsertBlock(); 2638 BasicBlock *CmpGT = createBasicBlock("wmemcmp.gt"); 2639 BasicBlock *CmpLT = createBasicBlock("wmemcmp.lt"); 2640 BasicBlock *Next = createBasicBlock("wmemcmp.next"); 2641 BasicBlock *Exit = createBasicBlock("wmemcmp.exit"); 2642 Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0)); 2643 Builder.CreateCondBr(SizeEq0, Exit, CmpGT); 2644 2645 EmitBlock(CmpGT); 2646 PHINode *DstPhi = Builder.CreatePHI(Dst->getType(), 2); 2647 DstPhi->addIncoming(Dst, Entry); 2648 PHINode *SrcPhi = Builder.CreatePHI(Src->getType(), 2); 2649 SrcPhi->addIncoming(Src, Entry); 2650 PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2); 2651 SizePhi->addIncoming(Size, Entry); 2652 CharUnits WCharAlign = 2653 getContext().getTypeAlignInChars(getContext().WCharTy); 2654 Value *DstCh = Builder.CreateAlignedLoad(WCharTy, DstPhi, WCharAlign); 2655 Value *SrcCh = Builder.CreateAlignedLoad(WCharTy, SrcPhi, WCharAlign); 2656 Value *DstGtSrc = Builder.CreateICmpUGT(DstCh, SrcCh); 2657 Builder.CreateCondBr(DstGtSrc, Exit, CmpLT); 2658 2659 EmitBlock(CmpLT); 2660 Value *DstLtSrc = Builder.CreateICmpULT(DstCh, SrcCh); 2661 Builder.CreateCondBr(DstLtSrc, Exit, Next); 2662 2663 EmitBlock(Next); 2664 Value *NextDst = Builder.CreateConstInBoundsGEP1_32(WCharTy, DstPhi, 1); 2665 Value *NextSrc = Builder.CreateConstInBoundsGEP1_32(WCharTy, SrcPhi, 1); 2666 Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1)); 2667 Value *NextSizeEq0 = 2668 Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0)); 2669 Builder.CreateCondBr(NextSizeEq0, Exit, CmpGT); 2670 DstPhi->addIncoming(NextDst, Next); 2671 SrcPhi->addIncoming(NextSrc, Next); 2672 SizePhi->addIncoming(NextSize, Next); 2673 2674 EmitBlock(Exit); 2675 PHINode *Ret = Builder.CreatePHI(IntTy, 4); 2676 Ret->addIncoming(ConstantInt::get(IntTy, 0), Entry); 2677 Ret->addIncoming(ConstantInt::get(IntTy, 1), CmpGT); 2678 Ret->addIncoming(ConstantInt::get(IntTy, -1), CmpLT); 2679 Ret->addIncoming(ConstantInt::get(IntTy, 0), Next); 2680 return RValue::get(Ret); 2681 } 2682 case Builtin::BI__builtin_dwarf_cfa: { 2683 // The offset in bytes from the first argument to the CFA. 2684 // 2685 // Why on earth is this in the frontend? Is there any reason at 2686 // all that the backend can't reasonably determine this while 2687 // lowering llvm.eh.dwarf.cfa()? 2688 // 2689 // TODO: If there's a satisfactory reason, add a target hook for 2690 // this instead of hard-coding 0, which is correct for most targets. 2691 int32_t Offset = 0; 2692 2693 Function *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa); 2694 return RValue::get(Builder.CreateCall(F, 2695 llvm::ConstantInt::get(Int32Ty, Offset))); 2696 } 2697 case Builtin::BI__builtin_return_address: { 2698 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0), 2699 getContext().UnsignedIntTy); 2700 Function *F = CGM.getIntrinsic(Intrinsic::returnaddress); 2701 return RValue::get(Builder.CreateCall(F, Depth)); 2702 } 2703 case Builtin::BI_ReturnAddress: { 2704 Function *F = CGM.getIntrinsic(Intrinsic::returnaddress); 2705 return RValue::get(Builder.CreateCall(F, Builder.getInt32(0))); 2706 } 2707 case Builtin::BI__builtin_frame_address: { 2708 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0), 2709 getContext().UnsignedIntTy); 2710 Function *F = CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy); 2711 return RValue::get(Builder.CreateCall(F, Depth)); 2712 } 2713 case Builtin::BI__builtin_extract_return_addr: { 2714 Value *Address = EmitScalarExpr(E->getArg(0)); 2715 Value *Result = getTargetHooks().decodeReturnAddress(*this, Address); 2716 return RValue::get(Result); 2717 } 2718 case Builtin::BI__builtin_frob_return_addr: { 2719 Value *Address = EmitScalarExpr(E->getArg(0)); 2720 Value *Result = getTargetHooks().encodeReturnAddress(*this, Address); 2721 return RValue::get(Result); 2722 } 2723 case Builtin::BI__builtin_dwarf_sp_column: { 2724 llvm::IntegerType *Ty 2725 = cast<llvm::IntegerType>(ConvertType(E->getType())); 2726 int Column = getTargetHooks().getDwarfEHStackPointer(CGM); 2727 if (Column == -1) { 2728 CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column"); 2729 return RValue::get(llvm::UndefValue::get(Ty)); 2730 } 2731 return RValue::get(llvm::ConstantInt::get(Ty, Column, true)); 2732 } 2733 case Builtin::BI__builtin_init_dwarf_reg_size_table: { 2734 Value *Address = EmitScalarExpr(E->getArg(0)); 2735 if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address)) 2736 CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table"); 2737 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType()))); 2738 } 2739 case Builtin::BI__builtin_eh_return: { 2740 Value *Int = EmitScalarExpr(E->getArg(0)); 2741 Value *Ptr = EmitScalarExpr(E->getArg(1)); 2742 2743 llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType()); 2744 assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) && 2745 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants"); 2746 Function *F = 2747 CGM.getIntrinsic(IntTy->getBitWidth() == 32 ? Intrinsic::eh_return_i32 2748 : Intrinsic::eh_return_i64); 2749 Builder.CreateCall(F, {Int, Ptr}); 2750 Builder.CreateUnreachable(); 2751 2752 // We do need to preserve an insertion point. 2753 EmitBlock(createBasicBlock("builtin_eh_return.cont")); 2754 2755 return RValue::get(nullptr); 2756 } 2757 case Builtin::BI__builtin_unwind_init: { 2758 Function *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init); 2759 return RValue::get(Builder.CreateCall(F)); 2760 } 2761 case Builtin::BI__builtin_extend_pointer: { 2762 // Extends a pointer to the size of an _Unwind_Word, which is 2763 // uint64_t on all platforms. Generally this gets poked into a 2764 // register and eventually used as an address, so if the 2765 // addressing registers are wider than pointers and the platform 2766 // doesn't implicitly ignore high-order bits when doing 2767 // addressing, we need to make sure we zext / sext based on 2768 // the platform's expectations. 2769 // 2770 // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html 2771 2772 // Cast the pointer to intptr_t. 2773 Value *Ptr = EmitScalarExpr(E->getArg(0)); 2774 Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast"); 2775 2776 // If that's 64 bits, we're done. 2777 if (IntPtrTy->getBitWidth() == 64) 2778 return RValue::get(Result); 2779 2780 // Otherwise, ask the codegen data what to do. 2781 if (getTargetHooks().extendPointerWithSExt()) 2782 return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext")); 2783 else 2784 return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext")); 2785 } 2786 case Builtin::BI__builtin_setjmp: { 2787 // Buffer is a void**. 2788 Address Buf = EmitPointerWithAlignment(E->getArg(0)); 2789 2790 // Store the frame pointer to the setjmp buffer. 2791 Value *FrameAddr = Builder.CreateCall( 2792 CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy), 2793 ConstantInt::get(Int32Ty, 0)); 2794 Builder.CreateStore(FrameAddr, Buf); 2795 2796 // Store the stack pointer to the setjmp buffer. 2797 Value *StackAddr = 2798 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave)); 2799 Address StackSaveSlot = Builder.CreateConstInBoundsGEP(Buf, 2); 2800 Builder.CreateStore(StackAddr, StackSaveSlot); 2801 2802 // Call LLVM's EH setjmp, which is lightweight. 2803 Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp); 2804 Buf = Builder.CreateBitCast(Buf, Int8PtrTy); 2805 return RValue::get(Builder.CreateCall(F, Buf.getPointer())); 2806 } 2807 case Builtin::BI__builtin_longjmp: { 2808 Value *Buf = EmitScalarExpr(E->getArg(0)); 2809 Buf = Builder.CreateBitCast(Buf, Int8PtrTy); 2810 2811 // Call LLVM's EH longjmp, which is lightweight. 2812 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf); 2813 2814 // longjmp doesn't return; mark this as unreachable. 2815 Builder.CreateUnreachable(); 2816 2817 // We do need to preserve an insertion point. 2818 EmitBlock(createBasicBlock("longjmp.cont")); 2819 2820 return RValue::get(nullptr); 2821 } 2822 case Builtin::BI__builtin_launder: { 2823 const Expr *Arg = E->getArg(0); 2824 QualType ArgTy = Arg->getType()->getPointeeType(); 2825 Value *Ptr = EmitScalarExpr(Arg); 2826 if (TypeRequiresBuiltinLaunder(CGM, ArgTy)) 2827 Ptr = Builder.CreateLaunderInvariantGroup(Ptr); 2828 2829 return RValue::get(Ptr); 2830 } 2831 case Builtin::BI__sync_fetch_and_add: 2832 case Builtin::BI__sync_fetch_and_sub: 2833 case Builtin::BI__sync_fetch_and_or: 2834 case Builtin::BI__sync_fetch_and_and: 2835 case Builtin::BI__sync_fetch_and_xor: 2836 case Builtin::BI__sync_fetch_and_nand: 2837 case Builtin::BI__sync_add_and_fetch: 2838 case Builtin::BI__sync_sub_and_fetch: 2839 case Builtin::BI__sync_and_and_fetch: 2840 case Builtin::BI__sync_or_and_fetch: 2841 case Builtin::BI__sync_xor_and_fetch: 2842 case Builtin::BI__sync_nand_and_fetch: 2843 case Builtin::BI__sync_val_compare_and_swap: 2844 case Builtin::BI__sync_bool_compare_and_swap: 2845 case Builtin::BI__sync_lock_test_and_set: 2846 case Builtin::BI__sync_lock_release: 2847 case Builtin::BI__sync_swap: 2848 llvm_unreachable("Shouldn't make it through sema"); 2849 case Builtin::BI__sync_fetch_and_add_1: 2850 case Builtin::BI__sync_fetch_and_add_2: 2851 case Builtin::BI__sync_fetch_and_add_4: 2852 case Builtin::BI__sync_fetch_and_add_8: 2853 case Builtin::BI__sync_fetch_and_add_16: 2854 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E); 2855 case Builtin::BI__sync_fetch_and_sub_1: 2856 case Builtin::BI__sync_fetch_and_sub_2: 2857 case Builtin::BI__sync_fetch_and_sub_4: 2858 case Builtin::BI__sync_fetch_and_sub_8: 2859 case Builtin::BI__sync_fetch_and_sub_16: 2860 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E); 2861 case Builtin::BI__sync_fetch_and_or_1: 2862 case Builtin::BI__sync_fetch_and_or_2: 2863 case Builtin::BI__sync_fetch_and_or_4: 2864 case Builtin::BI__sync_fetch_and_or_8: 2865 case Builtin::BI__sync_fetch_and_or_16: 2866 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E); 2867 case Builtin::BI__sync_fetch_and_and_1: 2868 case Builtin::BI__sync_fetch_and_and_2: 2869 case Builtin::BI__sync_fetch_and_and_4: 2870 case Builtin::BI__sync_fetch_and_and_8: 2871 case Builtin::BI__sync_fetch_and_and_16: 2872 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E); 2873 case Builtin::BI__sync_fetch_and_xor_1: 2874 case Builtin::BI__sync_fetch_and_xor_2: 2875 case Builtin::BI__sync_fetch_and_xor_4: 2876 case Builtin::BI__sync_fetch_and_xor_8: 2877 case Builtin::BI__sync_fetch_and_xor_16: 2878 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E); 2879 case Builtin::BI__sync_fetch_and_nand_1: 2880 case Builtin::BI__sync_fetch_and_nand_2: 2881 case Builtin::BI__sync_fetch_and_nand_4: 2882 case Builtin::BI__sync_fetch_and_nand_8: 2883 case Builtin::BI__sync_fetch_and_nand_16: 2884 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E); 2885 2886 // Clang extensions: not overloaded yet. 2887 case Builtin::BI__sync_fetch_and_min: 2888 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E); 2889 case Builtin::BI__sync_fetch_and_max: 2890 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E); 2891 case Builtin::BI__sync_fetch_and_umin: 2892 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E); 2893 case Builtin::BI__sync_fetch_and_umax: 2894 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E); 2895 2896 case Builtin::BI__sync_add_and_fetch_1: 2897 case Builtin::BI__sync_add_and_fetch_2: 2898 case Builtin::BI__sync_add_and_fetch_4: 2899 case Builtin::BI__sync_add_and_fetch_8: 2900 case Builtin::BI__sync_add_and_fetch_16: 2901 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E, 2902 llvm::Instruction::Add); 2903 case Builtin::BI__sync_sub_and_fetch_1: 2904 case Builtin::BI__sync_sub_and_fetch_2: 2905 case Builtin::BI__sync_sub_and_fetch_4: 2906 case Builtin::BI__sync_sub_and_fetch_8: 2907 case Builtin::BI__sync_sub_and_fetch_16: 2908 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E, 2909 llvm::Instruction::Sub); 2910 case Builtin::BI__sync_and_and_fetch_1: 2911 case Builtin::BI__sync_and_and_fetch_2: 2912 case Builtin::BI__sync_and_and_fetch_4: 2913 case Builtin::BI__sync_and_and_fetch_8: 2914 case Builtin::BI__sync_and_and_fetch_16: 2915 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E, 2916 llvm::Instruction::And); 2917 case Builtin::BI__sync_or_and_fetch_1: 2918 case Builtin::BI__sync_or_and_fetch_2: 2919 case Builtin::BI__sync_or_and_fetch_4: 2920 case Builtin::BI__sync_or_and_fetch_8: 2921 case Builtin::BI__sync_or_and_fetch_16: 2922 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E, 2923 llvm::Instruction::Or); 2924 case Builtin::BI__sync_xor_and_fetch_1: 2925 case Builtin::BI__sync_xor_and_fetch_2: 2926 case Builtin::BI__sync_xor_and_fetch_4: 2927 case Builtin::BI__sync_xor_and_fetch_8: 2928 case Builtin::BI__sync_xor_and_fetch_16: 2929 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E, 2930 llvm::Instruction::Xor); 2931 case Builtin::BI__sync_nand_and_fetch_1: 2932 case Builtin::BI__sync_nand_and_fetch_2: 2933 case Builtin::BI__sync_nand_and_fetch_4: 2934 case Builtin::BI__sync_nand_and_fetch_8: 2935 case Builtin::BI__sync_nand_and_fetch_16: 2936 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E, 2937 llvm::Instruction::And, true); 2938 2939 case Builtin::BI__sync_val_compare_and_swap_1: 2940 case Builtin::BI__sync_val_compare_and_swap_2: 2941 case Builtin::BI__sync_val_compare_and_swap_4: 2942 case Builtin::BI__sync_val_compare_and_swap_8: 2943 case Builtin::BI__sync_val_compare_and_swap_16: 2944 return RValue::get(MakeAtomicCmpXchgValue(*this, E, false)); 2945 2946 case Builtin::BI__sync_bool_compare_and_swap_1: 2947 case Builtin::BI__sync_bool_compare_and_swap_2: 2948 case Builtin::BI__sync_bool_compare_and_swap_4: 2949 case Builtin::BI__sync_bool_compare_and_swap_8: 2950 case Builtin::BI__sync_bool_compare_and_swap_16: 2951 return RValue::get(MakeAtomicCmpXchgValue(*this, E, true)); 2952 2953 case Builtin::BI__sync_swap_1: 2954 case Builtin::BI__sync_swap_2: 2955 case Builtin::BI__sync_swap_4: 2956 case Builtin::BI__sync_swap_8: 2957 case Builtin::BI__sync_swap_16: 2958 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 2959 2960 case Builtin::BI__sync_lock_test_and_set_1: 2961 case Builtin::BI__sync_lock_test_and_set_2: 2962 case Builtin::BI__sync_lock_test_and_set_4: 2963 case Builtin::BI__sync_lock_test_and_set_8: 2964 case Builtin::BI__sync_lock_test_and_set_16: 2965 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 2966 2967 case Builtin::BI__sync_lock_release_1: 2968 case Builtin::BI__sync_lock_release_2: 2969 case Builtin::BI__sync_lock_release_4: 2970 case Builtin::BI__sync_lock_release_8: 2971 case Builtin::BI__sync_lock_release_16: { 2972 Value *Ptr = EmitScalarExpr(E->getArg(0)); 2973 QualType ElTy = E->getArg(0)->getType()->getPointeeType(); 2974 CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy); 2975 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), 2976 StoreSize.getQuantity() * 8); 2977 Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); 2978 llvm::StoreInst *Store = 2979 Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr, 2980 StoreSize); 2981 Store->setAtomic(llvm::AtomicOrdering::Release); 2982 return RValue::get(nullptr); 2983 } 2984 2985 case Builtin::BI__sync_synchronize: { 2986 // We assume this is supposed to correspond to a C++0x-style 2987 // sequentially-consistent fence (i.e. this is only usable for 2988 // synchronization, not device I/O or anything like that). This intrinsic 2989 // is really badly designed in the sense that in theory, there isn't 2990 // any way to safely use it... but in practice, it mostly works 2991 // to use it with non-atomic loads and stores to get acquire/release 2992 // semantics. 2993 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent); 2994 return RValue::get(nullptr); 2995 } 2996 2997 case Builtin::BI__builtin_nontemporal_load: 2998 return RValue::get(EmitNontemporalLoad(*this, E)); 2999 case Builtin::BI__builtin_nontemporal_store: 3000 return RValue::get(EmitNontemporalStore(*this, E)); 3001 case Builtin::BI__c11_atomic_is_lock_free: 3002 case Builtin::BI__atomic_is_lock_free: { 3003 // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the 3004 // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since 3005 // _Atomic(T) is always properly-aligned. 3006 const char *LibCallName = "__atomic_is_lock_free"; 3007 CallArgList Args; 3008 Args.add(RValue::get(EmitScalarExpr(E->getArg(0))), 3009 getContext().getSizeType()); 3010 if (BuiltinID == Builtin::BI__atomic_is_lock_free) 3011 Args.add(RValue::get(EmitScalarExpr(E->getArg(1))), 3012 getContext().VoidPtrTy); 3013 else 3014 Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)), 3015 getContext().VoidPtrTy); 3016 const CGFunctionInfo &FuncInfo = 3017 CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args); 3018 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo); 3019 llvm::FunctionCallee Func = CGM.CreateRuntimeFunction(FTy, LibCallName); 3020 return EmitCall(FuncInfo, CGCallee::forDirect(Func), 3021 ReturnValueSlot(), Args); 3022 } 3023 3024 case Builtin::BI__atomic_test_and_set: { 3025 // Look at the argument type to determine whether this is a volatile 3026 // operation. The parameter type is always volatile. 3027 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); 3028 bool Volatile = 3029 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); 3030 3031 Value *Ptr = EmitScalarExpr(E->getArg(0)); 3032 unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace(); 3033 Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace)); 3034 Value *NewVal = Builder.getInt8(1); 3035 Value *Order = EmitScalarExpr(E->getArg(1)); 3036 if (isa<llvm::ConstantInt>(Order)) { 3037 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 3038 AtomicRMWInst *Result = nullptr; 3039 switch (ord) { 3040 case 0: // memory_order_relaxed 3041 default: // invalid order 3042 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 3043 llvm::AtomicOrdering::Monotonic); 3044 break; 3045 case 1: // memory_order_consume 3046 case 2: // memory_order_acquire 3047 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 3048 llvm::AtomicOrdering::Acquire); 3049 break; 3050 case 3: // memory_order_release 3051 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 3052 llvm::AtomicOrdering::Release); 3053 break; 3054 case 4: // memory_order_acq_rel 3055 3056 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 3057 llvm::AtomicOrdering::AcquireRelease); 3058 break; 3059 case 5: // memory_order_seq_cst 3060 Result = Builder.CreateAtomicRMW( 3061 llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 3062 llvm::AtomicOrdering::SequentiallyConsistent); 3063 break; 3064 } 3065 Result->setVolatile(Volatile); 3066 return RValue::get(Builder.CreateIsNotNull(Result, "tobool")); 3067 } 3068 3069 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 3070 3071 llvm::BasicBlock *BBs[5] = { 3072 createBasicBlock("monotonic", CurFn), 3073 createBasicBlock("acquire", CurFn), 3074 createBasicBlock("release", CurFn), 3075 createBasicBlock("acqrel", CurFn), 3076 createBasicBlock("seqcst", CurFn) 3077 }; 3078 llvm::AtomicOrdering Orders[5] = { 3079 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire, 3080 llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease, 3081 llvm::AtomicOrdering::SequentiallyConsistent}; 3082 3083 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 3084 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); 3085 3086 Builder.SetInsertPoint(ContBB); 3087 PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set"); 3088 3089 for (unsigned i = 0; i < 5; ++i) { 3090 Builder.SetInsertPoint(BBs[i]); 3091 AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 3092 Ptr, NewVal, Orders[i]); 3093 RMW->setVolatile(Volatile); 3094 Result->addIncoming(RMW, BBs[i]); 3095 Builder.CreateBr(ContBB); 3096 } 3097 3098 SI->addCase(Builder.getInt32(0), BBs[0]); 3099 SI->addCase(Builder.getInt32(1), BBs[1]); 3100 SI->addCase(Builder.getInt32(2), BBs[1]); 3101 SI->addCase(Builder.getInt32(3), BBs[2]); 3102 SI->addCase(Builder.getInt32(4), BBs[3]); 3103 SI->addCase(Builder.getInt32(5), BBs[4]); 3104 3105 Builder.SetInsertPoint(ContBB); 3106 return RValue::get(Builder.CreateIsNotNull(Result, "tobool")); 3107 } 3108 3109 case Builtin::BI__atomic_clear: { 3110 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); 3111 bool Volatile = 3112 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); 3113 3114 Address Ptr = EmitPointerWithAlignment(E->getArg(0)); 3115 unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace(); 3116 Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace)); 3117 Value *NewVal = Builder.getInt8(0); 3118 Value *Order = EmitScalarExpr(E->getArg(1)); 3119 if (isa<llvm::ConstantInt>(Order)) { 3120 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 3121 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); 3122 switch (ord) { 3123 case 0: // memory_order_relaxed 3124 default: // invalid order 3125 Store->setOrdering(llvm::AtomicOrdering::Monotonic); 3126 break; 3127 case 3: // memory_order_release 3128 Store->setOrdering(llvm::AtomicOrdering::Release); 3129 break; 3130 case 5: // memory_order_seq_cst 3131 Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent); 3132 break; 3133 } 3134 return RValue::get(nullptr); 3135 } 3136 3137 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 3138 3139 llvm::BasicBlock *BBs[3] = { 3140 createBasicBlock("monotonic", CurFn), 3141 createBasicBlock("release", CurFn), 3142 createBasicBlock("seqcst", CurFn) 3143 }; 3144 llvm::AtomicOrdering Orders[3] = { 3145 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release, 3146 llvm::AtomicOrdering::SequentiallyConsistent}; 3147 3148 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 3149 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); 3150 3151 for (unsigned i = 0; i < 3; ++i) { 3152 Builder.SetInsertPoint(BBs[i]); 3153 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); 3154 Store->setOrdering(Orders[i]); 3155 Builder.CreateBr(ContBB); 3156 } 3157 3158 SI->addCase(Builder.getInt32(0), BBs[0]); 3159 SI->addCase(Builder.getInt32(3), BBs[1]); 3160 SI->addCase(Builder.getInt32(5), BBs[2]); 3161 3162 Builder.SetInsertPoint(ContBB); 3163 return RValue::get(nullptr); 3164 } 3165 3166 case Builtin::BI__atomic_thread_fence: 3167 case Builtin::BI__atomic_signal_fence: 3168 case Builtin::BI__c11_atomic_thread_fence: 3169 case Builtin::BI__c11_atomic_signal_fence: { 3170 llvm::SyncScope::ID SSID; 3171 if (BuiltinID == Builtin::BI__atomic_signal_fence || 3172 BuiltinID == Builtin::BI__c11_atomic_signal_fence) 3173 SSID = llvm::SyncScope::SingleThread; 3174 else 3175 SSID = llvm::SyncScope::System; 3176 Value *Order = EmitScalarExpr(E->getArg(0)); 3177 if (isa<llvm::ConstantInt>(Order)) { 3178 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 3179 switch (ord) { 3180 case 0: // memory_order_relaxed 3181 default: // invalid order 3182 break; 3183 case 1: // memory_order_consume 3184 case 2: // memory_order_acquire 3185 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID); 3186 break; 3187 case 3: // memory_order_release 3188 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID); 3189 break; 3190 case 4: // memory_order_acq_rel 3191 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID); 3192 break; 3193 case 5: // memory_order_seq_cst 3194 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID); 3195 break; 3196 } 3197 return RValue::get(nullptr); 3198 } 3199 3200 llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB; 3201 AcquireBB = createBasicBlock("acquire", CurFn); 3202 ReleaseBB = createBasicBlock("release", CurFn); 3203 AcqRelBB = createBasicBlock("acqrel", CurFn); 3204 SeqCstBB = createBasicBlock("seqcst", CurFn); 3205 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 3206 3207 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 3208 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB); 3209 3210 Builder.SetInsertPoint(AcquireBB); 3211 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID); 3212 Builder.CreateBr(ContBB); 3213 SI->addCase(Builder.getInt32(1), AcquireBB); 3214 SI->addCase(Builder.getInt32(2), AcquireBB); 3215 3216 Builder.SetInsertPoint(ReleaseBB); 3217 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID); 3218 Builder.CreateBr(ContBB); 3219 SI->addCase(Builder.getInt32(3), ReleaseBB); 3220 3221 Builder.SetInsertPoint(AcqRelBB); 3222 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID); 3223 Builder.CreateBr(ContBB); 3224 SI->addCase(Builder.getInt32(4), AcqRelBB); 3225 3226 Builder.SetInsertPoint(SeqCstBB); 3227 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID); 3228 Builder.CreateBr(ContBB); 3229 SI->addCase(Builder.getInt32(5), SeqCstBB); 3230 3231 Builder.SetInsertPoint(ContBB); 3232 return RValue::get(nullptr); 3233 } 3234 3235 case Builtin::BI__builtin_signbit: 3236 case Builtin::BI__builtin_signbitf: 3237 case Builtin::BI__builtin_signbitl: { 3238 return RValue::get( 3239 Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))), 3240 ConvertType(E->getType()))); 3241 } 3242 case Builtin::BI__warn_memset_zero_len: 3243 return RValue::getIgnored(); 3244 case Builtin::BI__annotation: { 3245 // Re-encode each wide string to UTF8 and make an MDString. 3246 SmallVector<Metadata *, 1> Strings; 3247 for (const Expr *Arg : E->arguments()) { 3248 const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts()); 3249 assert(Str->getCharByteWidth() == 2); 3250 StringRef WideBytes = Str->getBytes(); 3251 std::string StrUtf8; 3252 if (!convertUTF16ToUTF8String( 3253 makeArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) { 3254 CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument"); 3255 continue; 3256 } 3257 Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8)); 3258 } 3259 3260 // Build and MDTuple of MDStrings and emit the intrinsic call. 3261 llvm::Function *F = 3262 CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {}); 3263 MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings); 3264 Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple)); 3265 return RValue::getIgnored(); 3266 } 3267 case Builtin::BI__builtin_annotation: { 3268 llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0)); 3269 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::annotation, 3270 AnnVal->getType()); 3271 3272 // Get the annotation string, go through casts. Sema requires this to be a 3273 // non-wide string literal, potentially casted, so the cast<> is safe. 3274 const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts(); 3275 StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString(); 3276 return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc())); 3277 } 3278 case Builtin::BI__builtin_addcb: 3279 case Builtin::BI__builtin_addcs: 3280 case Builtin::BI__builtin_addc: 3281 case Builtin::BI__builtin_addcl: 3282 case Builtin::BI__builtin_addcll: 3283 case Builtin::BI__builtin_subcb: 3284 case Builtin::BI__builtin_subcs: 3285 case Builtin::BI__builtin_subc: 3286 case Builtin::BI__builtin_subcl: 3287 case Builtin::BI__builtin_subcll: { 3288 3289 // We translate all of these builtins from expressions of the form: 3290 // int x = ..., y = ..., carryin = ..., carryout, result; 3291 // result = __builtin_addc(x, y, carryin, &carryout); 3292 // 3293 // to LLVM IR of the form: 3294 // 3295 // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y) 3296 // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0 3297 // %carry1 = extractvalue {i32, i1} %tmp1, 1 3298 // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1, 3299 // i32 %carryin) 3300 // %result = extractvalue {i32, i1} %tmp2, 0 3301 // %carry2 = extractvalue {i32, i1} %tmp2, 1 3302 // %tmp3 = or i1 %carry1, %carry2 3303 // %tmp4 = zext i1 %tmp3 to i32 3304 // store i32 %tmp4, i32* %carryout 3305 3306 // Scalarize our inputs. 3307 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 3308 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 3309 llvm::Value *Carryin = EmitScalarExpr(E->getArg(2)); 3310 Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3)); 3311 3312 // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow. 3313 llvm::Intrinsic::ID IntrinsicId; 3314 switch (BuiltinID) { 3315 default: llvm_unreachable("Unknown multiprecision builtin id."); 3316 case Builtin::BI__builtin_addcb: 3317 case Builtin::BI__builtin_addcs: 3318 case Builtin::BI__builtin_addc: 3319 case Builtin::BI__builtin_addcl: 3320 case Builtin::BI__builtin_addcll: 3321 IntrinsicId = llvm::Intrinsic::uadd_with_overflow; 3322 break; 3323 case Builtin::BI__builtin_subcb: 3324 case Builtin::BI__builtin_subcs: 3325 case Builtin::BI__builtin_subc: 3326 case Builtin::BI__builtin_subcl: 3327 case Builtin::BI__builtin_subcll: 3328 IntrinsicId = llvm::Intrinsic::usub_with_overflow; 3329 break; 3330 } 3331 3332 // Construct our resulting LLVM IR expression. 3333 llvm::Value *Carry1; 3334 llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId, 3335 X, Y, Carry1); 3336 llvm::Value *Carry2; 3337 llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId, 3338 Sum1, Carryin, Carry2); 3339 llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2), 3340 X->getType()); 3341 Builder.CreateStore(CarryOut, CarryOutPtr); 3342 return RValue::get(Sum2); 3343 } 3344 3345 case Builtin::BI__builtin_add_overflow: 3346 case Builtin::BI__builtin_sub_overflow: 3347 case Builtin::BI__builtin_mul_overflow: { 3348 const clang::Expr *LeftArg = E->getArg(0); 3349 const clang::Expr *RightArg = E->getArg(1); 3350 const clang::Expr *ResultArg = E->getArg(2); 3351 3352 clang::QualType ResultQTy = 3353 ResultArg->getType()->castAs<PointerType>()->getPointeeType(); 3354 3355 WidthAndSignedness LeftInfo = 3356 getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType()); 3357 WidthAndSignedness RightInfo = 3358 getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType()); 3359 WidthAndSignedness ResultInfo = 3360 getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy); 3361 3362 // Handle mixed-sign multiplication as a special case, because adding 3363 // runtime or backend support for our generic irgen would be too expensive. 3364 if (isSpecialMixedSignMultiply(BuiltinID, LeftInfo, RightInfo, ResultInfo)) 3365 return EmitCheckedMixedSignMultiply(*this, LeftArg, LeftInfo, RightArg, 3366 RightInfo, ResultArg, ResultQTy, 3367 ResultInfo); 3368 3369 WidthAndSignedness EncompassingInfo = 3370 EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo}); 3371 3372 llvm::Type *EncompassingLLVMTy = 3373 llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width); 3374 3375 llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy); 3376 3377 llvm::Intrinsic::ID IntrinsicId; 3378 switch (BuiltinID) { 3379 default: 3380 llvm_unreachable("Unknown overflow builtin id."); 3381 case Builtin::BI__builtin_add_overflow: 3382 IntrinsicId = EncompassingInfo.Signed 3383 ? llvm::Intrinsic::sadd_with_overflow 3384 : llvm::Intrinsic::uadd_with_overflow; 3385 break; 3386 case Builtin::BI__builtin_sub_overflow: 3387 IntrinsicId = EncompassingInfo.Signed 3388 ? llvm::Intrinsic::ssub_with_overflow 3389 : llvm::Intrinsic::usub_with_overflow; 3390 break; 3391 case Builtin::BI__builtin_mul_overflow: 3392 IntrinsicId = EncompassingInfo.Signed 3393 ? llvm::Intrinsic::smul_with_overflow 3394 : llvm::Intrinsic::umul_with_overflow; 3395 break; 3396 } 3397 3398 llvm::Value *Left = EmitScalarExpr(LeftArg); 3399 llvm::Value *Right = EmitScalarExpr(RightArg); 3400 Address ResultPtr = EmitPointerWithAlignment(ResultArg); 3401 3402 // Extend each operand to the encompassing type. 3403 Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed); 3404 Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed); 3405 3406 // Perform the operation on the extended values. 3407 llvm::Value *Overflow, *Result; 3408 Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow); 3409 3410 if (EncompassingInfo.Width > ResultInfo.Width) { 3411 // The encompassing type is wider than the result type, so we need to 3412 // truncate it. 3413 llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy); 3414 3415 // To see if the truncation caused an overflow, we will extend 3416 // the result and then compare it to the original result. 3417 llvm::Value *ResultTruncExt = Builder.CreateIntCast( 3418 ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed); 3419 llvm::Value *TruncationOverflow = 3420 Builder.CreateICmpNE(Result, ResultTruncExt); 3421 3422 Overflow = Builder.CreateOr(Overflow, TruncationOverflow); 3423 Result = ResultTrunc; 3424 } 3425 3426 // Finally, store the result using the pointer. 3427 bool isVolatile = 3428 ResultArg->getType()->getPointeeType().isVolatileQualified(); 3429 Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile); 3430 3431 return RValue::get(Overflow); 3432 } 3433 3434 case Builtin::BI__builtin_uadd_overflow: 3435 case Builtin::BI__builtin_uaddl_overflow: 3436 case Builtin::BI__builtin_uaddll_overflow: 3437 case Builtin::BI__builtin_usub_overflow: 3438 case Builtin::BI__builtin_usubl_overflow: 3439 case Builtin::BI__builtin_usubll_overflow: 3440 case Builtin::BI__builtin_umul_overflow: 3441 case Builtin::BI__builtin_umull_overflow: 3442 case Builtin::BI__builtin_umulll_overflow: 3443 case Builtin::BI__builtin_sadd_overflow: 3444 case Builtin::BI__builtin_saddl_overflow: 3445 case Builtin::BI__builtin_saddll_overflow: 3446 case Builtin::BI__builtin_ssub_overflow: 3447 case Builtin::BI__builtin_ssubl_overflow: 3448 case Builtin::BI__builtin_ssubll_overflow: 3449 case Builtin::BI__builtin_smul_overflow: 3450 case Builtin::BI__builtin_smull_overflow: 3451 case Builtin::BI__builtin_smulll_overflow: { 3452 3453 // We translate all of these builtins directly to the relevant llvm IR node. 3454 3455 // Scalarize our inputs. 3456 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 3457 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 3458 Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2)); 3459 3460 // Decide which of the overflow intrinsics we are lowering to: 3461 llvm::Intrinsic::ID IntrinsicId; 3462 switch (BuiltinID) { 3463 default: llvm_unreachable("Unknown overflow builtin id."); 3464 case Builtin::BI__builtin_uadd_overflow: 3465 case Builtin::BI__builtin_uaddl_overflow: 3466 case Builtin::BI__builtin_uaddll_overflow: 3467 IntrinsicId = llvm::Intrinsic::uadd_with_overflow; 3468 break; 3469 case Builtin::BI__builtin_usub_overflow: 3470 case Builtin::BI__builtin_usubl_overflow: 3471 case Builtin::BI__builtin_usubll_overflow: 3472 IntrinsicId = llvm::Intrinsic::usub_with_overflow; 3473 break; 3474 case Builtin::BI__builtin_umul_overflow: 3475 case Builtin::BI__builtin_umull_overflow: 3476 case Builtin::BI__builtin_umulll_overflow: 3477 IntrinsicId = llvm::Intrinsic::umul_with_overflow; 3478 break; 3479 case Builtin::BI__builtin_sadd_overflow: 3480 case Builtin::BI__builtin_saddl_overflow: 3481 case Builtin::BI__builtin_saddll_overflow: 3482 IntrinsicId = llvm::Intrinsic::sadd_with_overflow; 3483 break; 3484 case Builtin::BI__builtin_ssub_overflow: 3485 case Builtin::BI__builtin_ssubl_overflow: 3486 case Builtin::BI__builtin_ssubll_overflow: 3487 IntrinsicId = llvm::Intrinsic::ssub_with_overflow; 3488 break; 3489 case Builtin::BI__builtin_smul_overflow: 3490 case Builtin::BI__builtin_smull_overflow: 3491 case Builtin::BI__builtin_smulll_overflow: 3492 IntrinsicId = llvm::Intrinsic::smul_with_overflow; 3493 break; 3494 } 3495 3496 3497 llvm::Value *Carry; 3498 llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry); 3499 Builder.CreateStore(Sum, SumOutPtr); 3500 3501 return RValue::get(Carry); 3502 } 3503 case Builtin::BI__builtin_addressof: 3504 return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this)); 3505 case Builtin::BI__builtin_operator_new: 3506 return EmitBuiltinNewDeleteCall( 3507 E->getCallee()->getType()->castAs<FunctionProtoType>(), E, false); 3508 case Builtin::BI__builtin_operator_delete: 3509 return EmitBuiltinNewDeleteCall( 3510 E->getCallee()->getType()->castAs<FunctionProtoType>(), E, true); 3511 3512 case Builtin::BI__builtin_is_aligned: 3513 return EmitBuiltinIsAligned(E); 3514 case Builtin::BI__builtin_align_up: 3515 return EmitBuiltinAlignTo(E, true); 3516 case Builtin::BI__builtin_align_down: 3517 return EmitBuiltinAlignTo(E, false); 3518 3519 case Builtin::BI__noop: 3520 // __noop always evaluates to an integer literal zero. 3521 return RValue::get(ConstantInt::get(IntTy, 0)); 3522 case Builtin::BI__builtin_call_with_static_chain: { 3523 const CallExpr *Call = cast<CallExpr>(E->getArg(0)); 3524 const Expr *Chain = E->getArg(1); 3525 return EmitCall(Call->getCallee()->getType(), 3526 EmitCallee(Call->getCallee()), Call, ReturnValue, 3527 EmitScalarExpr(Chain)); 3528 } 3529 case Builtin::BI_InterlockedExchange8: 3530 case Builtin::BI_InterlockedExchange16: 3531 case Builtin::BI_InterlockedExchange: 3532 case Builtin::BI_InterlockedExchangePointer: 3533 return RValue::get( 3534 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E)); 3535 case Builtin::BI_InterlockedCompareExchangePointer: 3536 case Builtin::BI_InterlockedCompareExchangePointer_nf: { 3537 llvm::Type *RTy; 3538 llvm::IntegerType *IntType = 3539 IntegerType::get(getLLVMContext(), 3540 getContext().getTypeSize(E->getType())); 3541 llvm::Type *IntPtrType = IntType->getPointerTo(); 3542 3543 llvm::Value *Destination = 3544 Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType); 3545 3546 llvm::Value *Exchange = EmitScalarExpr(E->getArg(1)); 3547 RTy = Exchange->getType(); 3548 Exchange = Builder.CreatePtrToInt(Exchange, IntType); 3549 3550 llvm::Value *Comparand = 3551 Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType); 3552 3553 auto Ordering = 3554 BuiltinID == Builtin::BI_InterlockedCompareExchangePointer_nf ? 3555 AtomicOrdering::Monotonic : AtomicOrdering::SequentiallyConsistent; 3556 3557 auto Result = Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange, 3558 Ordering, Ordering); 3559 Result->setVolatile(true); 3560 3561 return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result, 3562 0), 3563 RTy)); 3564 } 3565 case Builtin::BI_InterlockedCompareExchange8: 3566 case Builtin::BI_InterlockedCompareExchange16: 3567 case Builtin::BI_InterlockedCompareExchange: 3568 case Builtin::BI_InterlockedCompareExchange64: 3569 return RValue::get(EmitAtomicCmpXchgForMSIntrin(*this, E)); 3570 case Builtin::BI_InterlockedIncrement16: 3571 case Builtin::BI_InterlockedIncrement: 3572 return RValue::get( 3573 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E)); 3574 case Builtin::BI_InterlockedDecrement16: 3575 case Builtin::BI_InterlockedDecrement: 3576 return RValue::get( 3577 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E)); 3578 case Builtin::BI_InterlockedAnd8: 3579 case Builtin::BI_InterlockedAnd16: 3580 case Builtin::BI_InterlockedAnd: 3581 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E)); 3582 case Builtin::BI_InterlockedExchangeAdd8: 3583 case Builtin::BI_InterlockedExchangeAdd16: 3584 case Builtin::BI_InterlockedExchangeAdd: 3585 return RValue::get( 3586 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E)); 3587 case Builtin::BI_InterlockedExchangeSub8: 3588 case Builtin::BI_InterlockedExchangeSub16: 3589 case Builtin::BI_InterlockedExchangeSub: 3590 return RValue::get( 3591 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E)); 3592 case Builtin::BI_InterlockedOr8: 3593 case Builtin::BI_InterlockedOr16: 3594 case Builtin::BI_InterlockedOr: 3595 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E)); 3596 case Builtin::BI_InterlockedXor8: 3597 case Builtin::BI_InterlockedXor16: 3598 case Builtin::BI_InterlockedXor: 3599 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E)); 3600 3601 case Builtin::BI_bittest64: 3602 case Builtin::BI_bittest: 3603 case Builtin::BI_bittestandcomplement64: 3604 case Builtin::BI_bittestandcomplement: 3605 case Builtin::BI_bittestandreset64: 3606 case Builtin::BI_bittestandreset: 3607 case Builtin::BI_bittestandset64: 3608 case Builtin::BI_bittestandset: 3609 case Builtin::BI_interlockedbittestandreset: 3610 case Builtin::BI_interlockedbittestandreset64: 3611 case Builtin::BI_interlockedbittestandset64: 3612 case Builtin::BI_interlockedbittestandset: 3613 case Builtin::BI_interlockedbittestandset_acq: 3614 case Builtin::BI_interlockedbittestandset_rel: 3615 case Builtin::BI_interlockedbittestandset_nf: 3616 case Builtin::BI_interlockedbittestandreset_acq: 3617 case Builtin::BI_interlockedbittestandreset_rel: 3618 case Builtin::BI_interlockedbittestandreset_nf: 3619 return RValue::get(EmitBitTestIntrinsic(*this, BuiltinID, E)); 3620 3621 // These builtins exist to emit regular volatile loads and stores not 3622 // affected by the -fms-volatile setting. 3623 case Builtin::BI__iso_volatile_load8: 3624 case Builtin::BI__iso_volatile_load16: 3625 case Builtin::BI__iso_volatile_load32: 3626 case Builtin::BI__iso_volatile_load64: 3627 return RValue::get(EmitISOVolatileLoad(*this, E)); 3628 case Builtin::BI__iso_volatile_store8: 3629 case Builtin::BI__iso_volatile_store16: 3630 case Builtin::BI__iso_volatile_store32: 3631 case Builtin::BI__iso_volatile_store64: 3632 return RValue::get(EmitISOVolatileStore(*this, E)); 3633 3634 case Builtin::BI__exception_code: 3635 case Builtin::BI_exception_code: 3636 return RValue::get(EmitSEHExceptionCode()); 3637 case Builtin::BI__exception_info: 3638 case Builtin::BI_exception_info: 3639 return RValue::get(EmitSEHExceptionInfo()); 3640 case Builtin::BI__abnormal_termination: 3641 case Builtin::BI_abnormal_termination: 3642 return RValue::get(EmitSEHAbnormalTermination()); 3643 case Builtin::BI_setjmpex: 3644 if (getTarget().getTriple().isOSMSVCRT()) 3645 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E); 3646 break; 3647 case Builtin::BI_setjmp: 3648 if (getTarget().getTriple().isOSMSVCRT()) { 3649 if (getTarget().getTriple().getArch() == llvm::Triple::x86) 3650 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp3, E); 3651 else if (getTarget().getTriple().getArch() == llvm::Triple::aarch64) 3652 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E); 3653 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp, E); 3654 } 3655 break; 3656 3657 case Builtin::BI__GetExceptionInfo: { 3658 if (llvm::GlobalVariable *GV = 3659 CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType())) 3660 return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy)); 3661 break; 3662 } 3663 3664 case Builtin::BI__fastfail: 3665 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E)); 3666 3667 case Builtin::BI__builtin_coro_size: { 3668 auto & Context = getContext(); 3669 auto SizeTy = Context.getSizeType(); 3670 auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy)); 3671 Function *F = CGM.getIntrinsic(Intrinsic::coro_size, T); 3672 return RValue::get(Builder.CreateCall(F)); 3673 } 3674 3675 case Builtin::BI__builtin_coro_id: 3676 return EmitCoroutineIntrinsic(E, Intrinsic::coro_id); 3677 case Builtin::BI__builtin_coro_promise: 3678 return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise); 3679 case Builtin::BI__builtin_coro_resume: 3680 return EmitCoroutineIntrinsic(E, Intrinsic::coro_resume); 3681 case Builtin::BI__builtin_coro_frame: 3682 return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame); 3683 case Builtin::BI__builtin_coro_noop: 3684 return EmitCoroutineIntrinsic(E, Intrinsic::coro_noop); 3685 case Builtin::BI__builtin_coro_free: 3686 return EmitCoroutineIntrinsic(E, Intrinsic::coro_free); 3687 case Builtin::BI__builtin_coro_destroy: 3688 return EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy); 3689 case Builtin::BI__builtin_coro_done: 3690 return EmitCoroutineIntrinsic(E, Intrinsic::coro_done); 3691 case Builtin::BI__builtin_coro_alloc: 3692 return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc); 3693 case Builtin::BI__builtin_coro_begin: 3694 return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin); 3695 case Builtin::BI__builtin_coro_end: 3696 return EmitCoroutineIntrinsic(E, Intrinsic::coro_end); 3697 case Builtin::BI__builtin_coro_suspend: 3698 return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend); 3699 case Builtin::BI__builtin_coro_param: 3700 return EmitCoroutineIntrinsic(E, Intrinsic::coro_param); 3701 3702 // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions 3703 case Builtin::BIread_pipe: 3704 case Builtin::BIwrite_pipe: { 3705 Value *Arg0 = EmitScalarExpr(E->getArg(0)), 3706 *Arg1 = EmitScalarExpr(E->getArg(1)); 3707 CGOpenCLRuntime OpenCLRT(CGM); 3708 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); 3709 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); 3710 3711 // Type of the generic packet parameter. 3712 unsigned GenericAS = 3713 getContext().getTargetAddressSpace(LangAS::opencl_generic); 3714 llvm::Type *I8PTy = llvm::PointerType::get( 3715 llvm::Type::getInt8Ty(getLLVMContext()), GenericAS); 3716 3717 // Testing which overloaded version we should generate the call for. 3718 if (2U == E->getNumArgs()) { 3719 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2" 3720 : "__write_pipe_2"; 3721 // Creating a generic function type to be able to call with any builtin or 3722 // user defined type. 3723 llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty}; 3724 llvm::FunctionType *FTy = llvm::FunctionType::get( 3725 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 3726 Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy); 3727 return RValue::get( 3728 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 3729 {Arg0, BCast, PacketSize, PacketAlign})); 3730 } else { 3731 assert(4 == E->getNumArgs() && 3732 "Illegal number of parameters to pipe function"); 3733 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4" 3734 : "__write_pipe_4"; 3735 3736 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy, 3737 Int32Ty, Int32Ty}; 3738 Value *Arg2 = EmitScalarExpr(E->getArg(2)), 3739 *Arg3 = EmitScalarExpr(E->getArg(3)); 3740 llvm::FunctionType *FTy = llvm::FunctionType::get( 3741 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 3742 Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy); 3743 // We know the third argument is an integer type, but we may need to cast 3744 // it to i32. 3745 if (Arg2->getType() != Int32Ty) 3746 Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty); 3747 return RValue::get(Builder.CreateCall( 3748 CGM.CreateRuntimeFunction(FTy, Name), 3749 {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign})); 3750 } 3751 } 3752 // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write 3753 // functions 3754 case Builtin::BIreserve_read_pipe: 3755 case Builtin::BIreserve_write_pipe: 3756 case Builtin::BIwork_group_reserve_read_pipe: 3757 case Builtin::BIwork_group_reserve_write_pipe: 3758 case Builtin::BIsub_group_reserve_read_pipe: 3759 case Builtin::BIsub_group_reserve_write_pipe: { 3760 // Composing the mangled name for the function. 3761 const char *Name; 3762 if (BuiltinID == Builtin::BIreserve_read_pipe) 3763 Name = "__reserve_read_pipe"; 3764 else if (BuiltinID == Builtin::BIreserve_write_pipe) 3765 Name = "__reserve_write_pipe"; 3766 else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe) 3767 Name = "__work_group_reserve_read_pipe"; 3768 else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe) 3769 Name = "__work_group_reserve_write_pipe"; 3770 else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe) 3771 Name = "__sub_group_reserve_read_pipe"; 3772 else 3773 Name = "__sub_group_reserve_write_pipe"; 3774 3775 Value *Arg0 = EmitScalarExpr(E->getArg(0)), 3776 *Arg1 = EmitScalarExpr(E->getArg(1)); 3777 llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy); 3778 CGOpenCLRuntime OpenCLRT(CGM); 3779 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); 3780 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); 3781 3782 // Building the generic function prototype. 3783 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty}; 3784 llvm::FunctionType *FTy = llvm::FunctionType::get( 3785 ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 3786 // We know the second argument is an integer type, but we may need to cast 3787 // it to i32. 3788 if (Arg1->getType() != Int32Ty) 3789 Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty); 3790 return RValue::get( 3791 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 3792 {Arg0, Arg1, PacketSize, PacketAlign})); 3793 } 3794 // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write 3795 // functions 3796 case Builtin::BIcommit_read_pipe: 3797 case Builtin::BIcommit_write_pipe: 3798 case Builtin::BIwork_group_commit_read_pipe: 3799 case Builtin::BIwork_group_commit_write_pipe: 3800 case Builtin::BIsub_group_commit_read_pipe: 3801 case Builtin::BIsub_group_commit_write_pipe: { 3802 const char *Name; 3803 if (BuiltinID == Builtin::BIcommit_read_pipe) 3804 Name = "__commit_read_pipe"; 3805 else if (BuiltinID == Builtin::BIcommit_write_pipe) 3806 Name = "__commit_write_pipe"; 3807 else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe) 3808 Name = "__work_group_commit_read_pipe"; 3809 else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe) 3810 Name = "__work_group_commit_write_pipe"; 3811 else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe) 3812 Name = "__sub_group_commit_read_pipe"; 3813 else 3814 Name = "__sub_group_commit_write_pipe"; 3815 3816 Value *Arg0 = EmitScalarExpr(E->getArg(0)), 3817 *Arg1 = EmitScalarExpr(E->getArg(1)); 3818 CGOpenCLRuntime OpenCLRT(CGM); 3819 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); 3820 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); 3821 3822 // Building the generic function prototype. 3823 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty}; 3824 llvm::FunctionType *FTy = 3825 llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()), 3826 llvm::ArrayRef<llvm::Type *>(ArgTys), false); 3827 3828 return RValue::get( 3829 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 3830 {Arg0, Arg1, PacketSize, PacketAlign})); 3831 } 3832 // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions 3833 case Builtin::BIget_pipe_num_packets: 3834 case Builtin::BIget_pipe_max_packets: { 3835 const char *BaseName; 3836 const auto *PipeTy = E->getArg(0)->getType()->castAs<PipeType>(); 3837 if (BuiltinID == Builtin::BIget_pipe_num_packets) 3838 BaseName = "__get_pipe_num_packets"; 3839 else 3840 BaseName = "__get_pipe_max_packets"; 3841 std::string Name = std::string(BaseName) + 3842 std::string(PipeTy->isReadOnly() ? "_ro" : "_wo"); 3843 3844 // Building the generic function prototype. 3845 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 3846 CGOpenCLRuntime OpenCLRT(CGM); 3847 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); 3848 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); 3849 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty}; 3850 llvm::FunctionType *FTy = llvm::FunctionType::get( 3851 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 3852 3853 return RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 3854 {Arg0, PacketSize, PacketAlign})); 3855 } 3856 3857 // OpenCL v2.0 s6.13.9 - Address space qualifier functions. 3858 case Builtin::BIto_global: 3859 case Builtin::BIto_local: 3860 case Builtin::BIto_private: { 3861 auto Arg0 = EmitScalarExpr(E->getArg(0)); 3862 auto NewArgT = llvm::PointerType::get(Int8Ty, 3863 CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); 3864 auto NewRetT = llvm::PointerType::get(Int8Ty, 3865 CGM.getContext().getTargetAddressSpace( 3866 E->getType()->getPointeeType().getAddressSpace())); 3867 auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false); 3868 llvm::Value *NewArg; 3869 if (Arg0->getType()->getPointerAddressSpace() != 3870 NewArgT->getPointerAddressSpace()) 3871 NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT); 3872 else 3873 NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT); 3874 auto NewName = std::string("__") + E->getDirectCallee()->getName().str(); 3875 auto NewCall = 3876 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg}); 3877 return RValue::get(Builder.CreateBitOrPointerCast(NewCall, 3878 ConvertType(E->getType()))); 3879 } 3880 3881 // OpenCL v2.0, s6.13.17 - Enqueue kernel function. 3882 // It contains four different overload formats specified in Table 6.13.17.1. 3883 case Builtin::BIenqueue_kernel: { 3884 StringRef Name; // Generated function call name 3885 unsigned NumArgs = E->getNumArgs(); 3886 3887 llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy); 3888 llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( 3889 getContext().getTargetAddressSpace(LangAS::opencl_generic)); 3890 3891 llvm::Value *Queue = EmitScalarExpr(E->getArg(0)); 3892 llvm::Value *Flags = EmitScalarExpr(E->getArg(1)); 3893 LValue NDRangeL = EmitAggExprToLValue(E->getArg(2)); 3894 llvm::Value *Range = NDRangeL.getAddress(*this).getPointer(); 3895 llvm::Type *RangeTy = NDRangeL.getAddress(*this).getType(); 3896 3897 if (NumArgs == 4) { 3898 // The most basic form of the call with parameters: 3899 // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void) 3900 Name = "__enqueue_kernel_basic"; 3901 llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy, 3902 GenericVoidPtrTy}; 3903 llvm::FunctionType *FTy = llvm::FunctionType::get( 3904 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 3905 3906 auto Info = 3907 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3)); 3908 llvm::Value *Kernel = 3909 Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy); 3910 llvm::Value *Block = 3911 Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); 3912 3913 AttrBuilder B; 3914 B.addByValAttr(NDRangeL.getAddress(*this).getElementType()); 3915 llvm::AttributeList ByValAttrSet = 3916 llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B); 3917 3918 auto RTCall = 3919 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet), 3920 {Queue, Flags, Range, Kernel, Block}); 3921 RTCall->setAttributes(ByValAttrSet); 3922 return RValue::get(RTCall); 3923 } 3924 assert(NumArgs >= 5 && "Invalid enqueue_kernel signature"); 3925 3926 // Create a temporary array to hold the sizes of local pointer arguments 3927 // for the block. \p First is the position of the first size argument. 3928 auto CreateArrayForSizeVar = [=](unsigned First) 3929 -> std::tuple<llvm::Value *, llvm::Value *, llvm::Value *> { 3930 llvm::APInt ArraySize(32, NumArgs - First); 3931 QualType SizeArrayTy = getContext().getConstantArrayType( 3932 getContext().getSizeType(), ArraySize, nullptr, ArrayType::Normal, 3933 /*IndexTypeQuals=*/0); 3934 auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes"); 3935 llvm::Value *TmpPtr = Tmp.getPointer(); 3936 llvm::Value *TmpSize = EmitLifetimeStart( 3937 CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr); 3938 llvm::Value *ElemPtr; 3939 // Each of the following arguments specifies the size of the corresponding 3940 // argument passed to the enqueued block. 3941 auto *Zero = llvm::ConstantInt::get(IntTy, 0); 3942 for (unsigned I = First; I < NumArgs; ++I) { 3943 auto *Index = llvm::ConstantInt::get(IntTy, I - First); 3944 auto *GEP = Builder.CreateGEP(TmpPtr, {Zero, Index}); 3945 if (I == First) 3946 ElemPtr = GEP; 3947 auto *V = 3948 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy); 3949 Builder.CreateAlignedStore( 3950 V, GEP, CGM.getDataLayout().getPrefTypeAlign(SizeTy)); 3951 } 3952 return std::tie(ElemPtr, TmpSize, TmpPtr); 3953 }; 3954 3955 // Could have events and/or varargs. 3956 if (E->getArg(3)->getType()->isBlockPointerType()) { 3957 // No events passed, but has variadic arguments. 3958 Name = "__enqueue_kernel_varargs"; 3959 auto Info = 3960 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3)); 3961 llvm::Value *Kernel = 3962 Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy); 3963 auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); 3964 llvm::Value *ElemPtr, *TmpSize, *TmpPtr; 3965 std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(4); 3966 3967 // Create a vector of the arguments, as well as a constant value to 3968 // express to the runtime the number of variadic arguments. 3969 llvm::Value *const Args[] = {Queue, Flags, 3970 Range, Kernel, 3971 Block, ConstantInt::get(IntTy, NumArgs - 4), 3972 ElemPtr}; 3973 llvm::Type *const ArgTys[] = { 3974 QueueTy, IntTy, RangeTy, GenericVoidPtrTy, 3975 GenericVoidPtrTy, IntTy, ElemPtr->getType()}; 3976 3977 llvm::FunctionType *FTy = llvm::FunctionType::get(Int32Ty, ArgTys, false); 3978 auto Call = RValue::get( 3979 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), Args)); 3980 if (TmpSize) 3981 EmitLifetimeEnd(TmpSize, TmpPtr); 3982 return Call; 3983 } 3984 // Any calls now have event arguments passed. 3985 if (NumArgs >= 7) { 3986 llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy); 3987 llvm::PointerType *EventPtrTy = EventTy->getPointerTo( 3988 CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); 3989 3990 llvm::Value *NumEvents = 3991 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty); 3992 3993 // Since SemaOpenCLBuiltinEnqueueKernel allows fifth and sixth arguments 3994 // to be a null pointer constant (including `0` literal), we can take it 3995 // into account and emit null pointer directly. 3996 llvm::Value *EventWaitList = nullptr; 3997 if (E->getArg(4)->isNullPointerConstant( 3998 getContext(), Expr::NPC_ValueDependentIsNotNull)) { 3999 EventWaitList = llvm::ConstantPointerNull::get(EventPtrTy); 4000 } else { 4001 EventWaitList = E->getArg(4)->getType()->isArrayType() 4002 ? EmitArrayToPointerDecay(E->getArg(4)).getPointer() 4003 : EmitScalarExpr(E->getArg(4)); 4004 // Convert to generic address space. 4005 EventWaitList = Builder.CreatePointerCast(EventWaitList, EventPtrTy); 4006 } 4007 llvm::Value *EventRet = nullptr; 4008 if (E->getArg(5)->isNullPointerConstant( 4009 getContext(), Expr::NPC_ValueDependentIsNotNull)) { 4010 EventRet = llvm::ConstantPointerNull::get(EventPtrTy); 4011 } else { 4012 EventRet = 4013 Builder.CreatePointerCast(EmitScalarExpr(E->getArg(5)), EventPtrTy); 4014 } 4015 4016 auto Info = 4017 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6)); 4018 llvm::Value *Kernel = 4019 Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy); 4020 llvm::Value *Block = 4021 Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); 4022 4023 std::vector<llvm::Type *> ArgTys = { 4024 QueueTy, Int32Ty, RangeTy, Int32Ty, 4025 EventPtrTy, EventPtrTy, GenericVoidPtrTy, GenericVoidPtrTy}; 4026 4027 std::vector<llvm::Value *> Args = {Queue, Flags, Range, 4028 NumEvents, EventWaitList, EventRet, 4029 Kernel, Block}; 4030 4031 if (NumArgs == 7) { 4032 // Has events but no variadics. 4033 Name = "__enqueue_kernel_basic_events"; 4034 llvm::FunctionType *FTy = llvm::FunctionType::get( 4035 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 4036 return RValue::get( 4037 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 4038 llvm::ArrayRef<llvm::Value *>(Args))); 4039 } 4040 // Has event info and variadics 4041 // Pass the number of variadics to the runtime function too. 4042 Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7)); 4043 ArgTys.push_back(Int32Ty); 4044 Name = "__enqueue_kernel_events_varargs"; 4045 4046 llvm::Value *ElemPtr, *TmpSize, *TmpPtr; 4047 std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(7); 4048 Args.push_back(ElemPtr); 4049 ArgTys.push_back(ElemPtr->getType()); 4050 4051 llvm::FunctionType *FTy = llvm::FunctionType::get( 4052 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 4053 auto Call = 4054 RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 4055 llvm::ArrayRef<llvm::Value *>(Args))); 4056 if (TmpSize) 4057 EmitLifetimeEnd(TmpSize, TmpPtr); 4058 return Call; 4059 } 4060 LLVM_FALLTHROUGH; 4061 } 4062 // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block 4063 // parameter. 4064 case Builtin::BIget_kernel_work_group_size: { 4065 llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( 4066 getContext().getTargetAddressSpace(LangAS::opencl_generic)); 4067 auto Info = 4068 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0)); 4069 Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy); 4070 Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); 4071 return RValue::get(Builder.CreateCall( 4072 CGM.CreateRuntimeFunction( 4073 llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy}, 4074 false), 4075 "__get_kernel_work_group_size_impl"), 4076 {Kernel, Arg})); 4077 } 4078 case Builtin::BIget_kernel_preferred_work_group_size_multiple: { 4079 llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( 4080 getContext().getTargetAddressSpace(LangAS::opencl_generic)); 4081 auto Info = 4082 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0)); 4083 Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy); 4084 Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); 4085 return RValue::get(Builder.CreateCall( 4086 CGM.CreateRuntimeFunction( 4087 llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy}, 4088 false), 4089 "__get_kernel_preferred_work_group_size_multiple_impl"), 4090 {Kernel, Arg})); 4091 } 4092 case Builtin::BIget_kernel_max_sub_group_size_for_ndrange: 4093 case Builtin::BIget_kernel_sub_group_count_for_ndrange: { 4094 llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( 4095 getContext().getTargetAddressSpace(LangAS::opencl_generic)); 4096 LValue NDRangeL = EmitAggExprToLValue(E->getArg(0)); 4097 llvm::Value *NDRange = NDRangeL.getAddress(*this).getPointer(); 4098 auto Info = 4099 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(1)); 4100 Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy); 4101 Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); 4102 const char *Name = 4103 BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange 4104 ? "__get_kernel_max_sub_group_size_for_ndrange_impl" 4105 : "__get_kernel_sub_group_count_for_ndrange_impl"; 4106 return RValue::get(Builder.CreateCall( 4107 CGM.CreateRuntimeFunction( 4108 llvm::FunctionType::get( 4109 IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy}, 4110 false), 4111 Name), 4112 {NDRange, Kernel, Block})); 4113 } 4114 4115 case Builtin::BI__builtin_store_half: 4116 case Builtin::BI__builtin_store_halff: { 4117 Value *Val = EmitScalarExpr(E->getArg(0)); 4118 Address Address = EmitPointerWithAlignment(E->getArg(1)); 4119 Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy()); 4120 return RValue::get(Builder.CreateStore(HalfVal, Address)); 4121 } 4122 case Builtin::BI__builtin_load_half: { 4123 Address Address = EmitPointerWithAlignment(E->getArg(0)); 4124 Value *HalfVal = Builder.CreateLoad(Address); 4125 return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy())); 4126 } 4127 case Builtin::BI__builtin_load_halff: { 4128 Address Address = EmitPointerWithAlignment(E->getArg(0)); 4129 Value *HalfVal = Builder.CreateLoad(Address); 4130 return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy())); 4131 } 4132 case Builtin::BIprintf: 4133 if (getTarget().getTriple().isNVPTX()) 4134 return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue); 4135 if (getTarget().getTriple().getArch() == Triple::amdgcn && 4136 getLangOpts().HIP) 4137 return EmitAMDGPUDevicePrintfCallExpr(E, ReturnValue); 4138 break; 4139 case Builtin::BI__builtin_canonicalize: 4140 case Builtin::BI__builtin_canonicalizef: 4141 case Builtin::BI__builtin_canonicalizef16: 4142 case Builtin::BI__builtin_canonicalizel: 4143 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize)); 4144 4145 case Builtin::BI__builtin_thread_pointer: { 4146 if (!getContext().getTargetInfo().isTLSSupported()) 4147 CGM.ErrorUnsupported(E, "__builtin_thread_pointer"); 4148 // Fall through - it's already mapped to the intrinsic by GCCBuiltin. 4149 break; 4150 } 4151 case Builtin::BI__builtin_os_log_format: 4152 return emitBuiltinOSLogFormat(*E); 4153 4154 case Builtin::BI__xray_customevent: { 4155 if (!ShouldXRayInstrumentFunction()) 4156 return RValue::getIgnored(); 4157 4158 if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has( 4159 XRayInstrKind::Custom)) 4160 return RValue::getIgnored(); 4161 4162 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>()) 4163 if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents()) 4164 return RValue::getIgnored(); 4165 4166 Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent); 4167 auto FTy = F->getFunctionType(); 4168 auto Arg0 = E->getArg(0); 4169 auto Arg0Val = EmitScalarExpr(Arg0); 4170 auto Arg0Ty = Arg0->getType(); 4171 auto PTy0 = FTy->getParamType(0); 4172 if (PTy0 != Arg0Val->getType()) { 4173 if (Arg0Ty->isArrayType()) 4174 Arg0Val = EmitArrayToPointerDecay(Arg0).getPointer(); 4175 else 4176 Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0); 4177 } 4178 auto Arg1 = EmitScalarExpr(E->getArg(1)); 4179 auto PTy1 = FTy->getParamType(1); 4180 if (PTy1 != Arg1->getType()) 4181 Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1); 4182 return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1})); 4183 } 4184 4185 case Builtin::BI__xray_typedevent: { 4186 // TODO: There should be a way to always emit events even if the current 4187 // function is not instrumented. Losing events in a stream can cripple 4188 // a trace. 4189 if (!ShouldXRayInstrumentFunction()) 4190 return RValue::getIgnored(); 4191 4192 if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has( 4193 XRayInstrKind::Typed)) 4194 return RValue::getIgnored(); 4195 4196 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>()) 4197 if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayTypedEvents()) 4198 return RValue::getIgnored(); 4199 4200 Function *F = CGM.getIntrinsic(Intrinsic::xray_typedevent); 4201 auto FTy = F->getFunctionType(); 4202 auto Arg0 = EmitScalarExpr(E->getArg(0)); 4203 auto PTy0 = FTy->getParamType(0); 4204 if (PTy0 != Arg0->getType()) 4205 Arg0 = Builder.CreateTruncOrBitCast(Arg0, PTy0); 4206 auto Arg1 = E->getArg(1); 4207 auto Arg1Val = EmitScalarExpr(Arg1); 4208 auto Arg1Ty = Arg1->getType(); 4209 auto PTy1 = FTy->getParamType(1); 4210 if (PTy1 != Arg1Val->getType()) { 4211 if (Arg1Ty->isArrayType()) 4212 Arg1Val = EmitArrayToPointerDecay(Arg1).getPointer(); 4213 else 4214 Arg1Val = Builder.CreatePointerCast(Arg1Val, PTy1); 4215 } 4216 auto Arg2 = EmitScalarExpr(E->getArg(2)); 4217 auto PTy2 = FTy->getParamType(2); 4218 if (PTy2 != Arg2->getType()) 4219 Arg2 = Builder.CreateTruncOrBitCast(Arg2, PTy2); 4220 return RValue::get(Builder.CreateCall(F, {Arg0, Arg1Val, Arg2})); 4221 } 4222 4223 case Builtin::BI__builtin_ms_va_start: 4224 case Builtin::BI__builtin_ms_va_end: 4225 return RValue::get( 4226 EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(), 4227 BuiltinID == Builtin::BI__builtin_ms_va_start)); 4228 4229 case Builtin::BI__builtin_ms_va_copy: { 4230 // Lower this manually. We can't reliably determine whether or not any 4231 // given va_copy() is for a Win64 va_list from the calling convention 4232 // alone, because it's legal to do this from a System V ABI function. 4233 // With opaque pointer types, we won't have enough information in LLVM 4234 // IR to determine this from the argument types, either. Best to do it 4235 // now, while we have enough information. 4236 Address DestAddr = EmitMSVAListRef(E->getArg(0)); 4237 Address SrcAddr = EmitMSVAListRef(E->getArg(1)); 4238 4239 llvm::Type *BPP = Int8PtrPtrTy; 4240 4241 DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"), 4242 DestAddr.getAlignment()); 4243 SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"), 4244 SrcAddr.getAlignment()); 4245 4246 Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val"); 4247 return RValue::get(Builder.CreateStore(ArgPtr, DestAddr)); 4248 } 4249 } 4250 4251 // If this is an alias for a lib function (e.g. __builtin_sin), emit 4252 // the call using the normal call path, but using the unmangled 4253 // version of the function name. 4254 if (getContext().BuiltinInfo.isLibFunction(BuiltinID)) 4255 return emitLibraryCall(*this, FD, E, 4256 CGM.getBuiltinLibFunction(FD, BuiltinID)); 4257 4258 // If this is a predefined lib function (e.g. malloc), emit the call 4259 // using exactly the normal call path. 4260 if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID)) 4261 return emitLibraryCall(*this, FD, E, 4262 cast<llvm::Constant>(EmitScalarExpr(E->getCallee()))); 4263 4264 // Check that a call to a target specific builtin has the correct target 4265 // features. 4266 // This is down here to avoid non-target specific builtins, however, if 4267 // generic builtins start to require generic target features then we 4268 // can move this up to the beginning of the function. 4269 checkTargetFeatures(E, FD); 4270 4271 if (unsigned VectorWidth = getContext().BuiltinInfo.getRequiredVectorWidth(BuiltinID)) 4272 LargestVectorWidth = std::max(LargestVectorWidth, VectorWidth); 4273 4274 // See if we have a target specific intrinsic. 4275 const char *Name = getContext().BuiltinInfo.getName(BuiltinID); 4276 Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic; 4277 StringRef Prefix = 4278 llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch()); 4279 if (!Prefix.empty()) { 4280 IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix.data(), Name); 4281 // NOTE we don't need to perform a compatibility flag check here since the 4282 // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the 4283 // MS builtins via ALL_MS_LANGUAGES and are filtered earlier. 4284 if (IntrinsicID == Intrinsic::not_intrinsic) 4285 IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name); 4286 } 4287 4288 if (IntrinsicID != Intrinsic::not_intrinsic) { 4289 SmallVector<Value*, 16> Args; 4290 4291 // Find out if any arguments are required to be integer constant 4292 // expressions. 4293 unsigned ICEArguments = 0; 4294 ASTContext::GetBuiltinTypeError Error; 4295 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 4296 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 4297 4298 Function *F = CGM.getIntrinsic(IntrinsicID); 4299 llvm::FunctionType *FTy = F->getFunctionType(); 4300 4301 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) { 4302 Value *ArgValue; 4303 // If this is a normal argument, just emit it as a scalar. 4304 if ((ICEArguments & (1 << i)) == 0) { 4305 ArgValue = EmitScalarExpr(E->getArg(i)); 4306 } else { 4307 // If this is required to be a constant, constant fold it so that we 4308 // know that the generated intrinsic gets a ConstantInt. 4309 llvm::APSInt Result; 4310 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext()); 4311 assert(IsConst && "Constant arg isn't actually constant?"); 4312 (void)IsConst; 4313 ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result); 4314 } 4315 4316 // If the intrinsic arg type is different from the builtin arg type 4317 // we need to do a bit cast. 4318 llvm::Type *PTy = FTy->getParamType(i); 4319 if (PTy != ArgValue->getType()) { 4320 // XXX - vector of pointers? 4321 if (auto *PtrTy = dyn_cast<llvm::PointerType>(PTy)) { 4322 if (PtrTy->getAddressSpace() != 4323 ArgValue->getType()->getPointerAddressSpace()) { 4324 ArgValue = Builder.CreateAddrSpaceCast( 4325 ArgValue, 4326 ArgValue->getType()->getPointerTo(PtrTy->getAddressSpace())); 4327 } 4328 } 4329 4330 assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) && 4331 "Must be able to losslessly bit cast to param"); 4332 ArgValue = Builder.CreateBitCast(ArgValue, PTy); 4333 } 4334 4335 Args.push_back(ArgValue); 4336 } 4337 4338 Value *V = Builder.CreateCall(F, Args); 4339 QualType BuiltinRetType = E->getType(); 4340 4341 llvm::Type *RetTy = VoidTy; 4342 if (!BuiltinRetType->isVoidType()) 4343 RetTy = ConvertType(BuiltinRetType); 4344 4345 if (RetTy != V->getType()) { 4346 // XXX - vector of pointers? 4347 if (auto *PtrTy = dyn_cast<llvm::PointerType>(RetTy)) { 4348 if (PtrTy->getAddressSpace() != V->getType()->getPointerAddressSpace()) { 4349 V = Builder.CreateAddrSpaceCast( 4350 V, V->getType()->getPointerTo(PtrTy->getAddressSpace())); 4351 } 4352 } 4353 4354 assert(V->getType()->canLosslesslyBitCastTo(RetTy) && 4355 "Must be able to losslessly bit cast result type"); 4356 V = Builder.CreateBitCast(V, RetTy); 4357 } 4358 4359 return RValue::get(V); 4360 } 4361 4362 // Some target-specific builtins can have aggregate return values, e.g. 4363 // __builtin_arm_mve_vld2q_u32. So if the result is an aggregate, force 4364 // ReturnValue to be non-null, so that the target-specific emission code can 4365 // always just emit into it. 4366 TypeEvaluationKind EvalKind = getEvaluationKind(E->getType()); 4367 if (EvalKind == TEK_Aggregate && ReturnValue.isNull()) { 4368 Address DestPtr = CreateMemTemp(E->getType(), "agg.tmp"); 4369 ReturnValue = ReturnValueSlot(DestPtr, false); 4370 } 4371 4372 // Now see if we can emit a target-specific builtin. 4373 if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) { 4374 switch (EvalKind) { 4375 case TEK_Scalar: 4376 return RValue::get(V); 4377 case TEK_Aggregate: 4378 return RValue::getAggregate(ReturnValue.getValue(), 4379 ReturnValue.isVolatile()); 4380 case TEK_Complex: 4381 llvm_unreachable("No current target builtin returns complex"); 4382 } 4383 llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr"); 4384 } 4385 4386 ErrorUnsupported(E, "builtin function"); 4387 4388 // Unknown builtin, for now just dump it out and return undef. 4389 return GetUndefRValue(E->getType()); 4390 } 4391 4392 static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, 4393 unsigned BuiltinID, const CallExpr *E, 4394 ReturnValueSlot ReturnValue, 4395 llvm::Triple::ArchType Arch) { 4396 switch (Arch) { 4397 case llvm::Triple::arm: 4398 case llvm::Triple::armeb: 4399 case llvm::Triple::thumb: 4400 case llvm::Triple::thumbeb: 4401 return CGF->EmitARMBuiltinExpr(BuiltinID, E, ReturnValue, Arch); 4402 case llvm::Triple::aarch64: 4403 case llvm::Triple::aarch64_32: 4404 case llvm::Triple::aarch64_be: 4405 return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch); 4406 case llvm::Triple::bpfeb: 4407 case llvm::Triple::bpfel: 4408 return CGF->EmitBPFBuiltinExpr(BuiltinID, E); 4409 case llvm::Triple::x86: 4410 case llvm::Triple::x86_64: 4411 return CGF->EmitX86BuiltinExpr(BuiltinID, E); 4412 case llvm::Triple::ppc: 4413 case llvm::Triple::ppc64: 4414 case llvm::Triple::ppc64le: 4415 return CGF->EmitPPCBuiltinExpr(BuiltinID, E); 4416 case llvm::Triple::r600: 4417 case llvm::Triple::amdgcn: 4418 return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E); 4419 case llvm::Triple::systemz: 4420 return CGF->EmitSystemZBuiltinExpr(BuiltinID, E); 4421 case llvm::Triple::nvptx: 4422 case llvm::Triple::nvptx64: 4423 return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E); 4424 case llvm::Triple::wasm32: 4425 case llvm::Triple::wasm64: 4426 return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E); 4427 case llvm::Triple::hexagon: 4428 return CGF->EmitHexagonBuiltinExpr(BuiltinID, E); 4429 default: 4430 return nullptr; 4431 } 4432 } 4433 4434 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID, 4435 const CallExpr *E, 4436 ReturnValueSlot ReturnValue) { 4437 if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) { 4438 assert(getContext().getAuxTargetInfo() && "Missing aux target info"); 4439 return EmitTargetArchBuiltinExpr( 4440 this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E, 4441 ReturnValue, getContext().getAuxTargetInfo()->getTriple().getArch()); 4442 } 4443 4444 return EmitTargetArchBuiltinExpr(this, BuiltinID, E, ReturnValue, 4445 getTarget().getTriple().getArch()); 4446 } 4447 4448 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF, 4449 NeonTypeFlags TypeFlags, 4450 bool HasLegalHalfType=true, 4451 bool V1Ty=false) { 4452 int IsQuad = TypeFlags.isQuad(); 4453 switch (TypeFlags.getEltType()) { 4454 case NeonTypeFlags::Int8: 4455 case NeonTypeFlags::Poly8: 4456 return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad)); 4457 case NeonTypeFlags::Int16: 4458 case NeonTypeFlags::Poly16: 4459 return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad)); 4460 case NeonTypeFlags::Float16: 4461 if (HasLegalHalfType) 4462 return llvm::VectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad)); 4463 else 4464 return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad)); 4465 case NeonTypeFlags::Int32: 4466 return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad)); 4467 case NeonTypeFlags::Int64: 4468 case NeonTypeFlags::Poly64: 4469 return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad)); 4470 case NeonTypeFlags::Poly128: 4471 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm. 4472 // There is a lot of i128 and f128 API missing. 4473 // so we use v16i8 to represent poly128 and get pattern matched. 4474 return llvm::VectorType::get(CGF->Int8Ty, 16); 4475 case NeonTypeFlags::Float32: 4476 return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad)); 4477 case NeonTypeFlags::Float64: 4478 return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad)); 4479 } 4480 llvm_unreachable("Unknown vector element type!"); 4481 } 4482 4483 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF, 4484 NeonTypeFlags IntTypeFlags) { 4485 int IsQuad = IntTypeFlags.isQuad(); 4486 switch (IntTypeFlags.getEltType()) { 4487 case NeonTypeFlags::Int16: 4488 return llvm::VectorType::get(CGF->HalfTy, (4 << IsQuad)); 4489 case NeonTypeFlags::Int32: 4490 return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad)); 4491 case NeonTypeFlags::Int64: 4492 return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad)); 4493 default: 4494 llvm_unreachable("Type can't be converted to floating-point!"); 4495 } 4496 } 4497 4498 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C, 4499 const ElementCount &Count) { 4500 Value *SV = llvm::ConstantVector::getSplat(Count, C); 4501 return Builder.CreateShuffleVector(V, V, SV, "lane"); 4502 } 4503 4504 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) { 4505 ElementCount EC = V->getType()->getVectorElementCount(); 4506 return EmitNeonSplat(V, C, EC); 4507 } 4508 4509 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops, 4510 const char *name, 4511 unsigned shift, bool rightshift) { 4512 unsigned j = 0; 4513 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); 4514 ai != ae; ++ai, ++j) 4515 if (shift > 0 && shift == j) 4516 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift); 4517 else 4518 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name); 4519 4520 return Builder.CreateCall(F, Ops, name); 4521 } 4522 4523 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty, 4524 bool neg) { 4525 int SV = cast<ConstantInt>(V)->getSExtValue(); 4526 return ConstantInt::get(Ty, neg ? -SV : SV); 4527 } 4528 4529 // Right-shift a vector by a constant. 4530 Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift, 4531 llvm::Type *Ty, bool usgn, 4532 const char *name) { 4533 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 4534 4535 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue(); 4536 int EltSize = VTy->getScalarSizeInBits(); 4537 4538 Vec = Builder.CreateBitCast(Vec, Ty); 4539 4540 // lshr/ashr are undefined when the shift amount is equal to the vector 4541 // element size. 4542 if (ShiftAmt == EltSize) { 4543 if (usgn) { 4544 // Right-shifting an unsigned value by its size yields 0. 4545 return llvm::ConstantAggregateZero::get(VTy); 4546 } else { 4547 // Right-shifting a signed value by its size is equivalent 4548 // to a shift of size-1. 4549 --ShiftAmt; 4550 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt); 4551 } 4552 } 4553 4554 Shift = EmitNeonShiftVector(Shift, Ty, false); 4555 if (usgn) 4556 return Builder.CreateLShr(Vec, Shift, name); 4557 else 4558 return Builder.CreateAShr(Vec, Shift, name); 4559 } 4560 4561 enum { 4562 AddRetType = (1 << 0), 4563 Add1ArgType = (1 << 1), 4564 Add2ArgTypes = (1 << 2), 4565 4566 VectorizeRetType = (1 << 3), 4567 VectorizeArgTypes = (1 << 4), 4568 4569 InventFloatType = (1 << 5), 4570 UnsignedAlts = (1 << 6), 4571 4572 Use64BitVectors = (1 << 7), 4573 Use128BitVectors = (1 << 8), 4574 4575 Vectorize1ArgType = Add1ArgType | VectorizeArgTypes, 4576 VectorRet = AddRetType | VectorizeRetType, 4577 VectorRetGetArgs01 = 4578 AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes, 4579 FpCmpzModifiers = 4580 AddRetType | VectorizeRetType | Add1ArgType | InventFloatType 4581 }; 4582 4583 namespace { 4584 struct ARMVectorIntrinsicInfo { 4585 const char *NameHint; 4586 unsigned BuiltinID; 4587 unsigned LLVMIntrinsic; 4588 unsigned AltLLVMIntrinsic; 4589 unsigned TypeModifier; 4590 4591 bool operator<(unsigned RHSBuiltinID) const { 4592 return BuiltinID < RHSBuiltinID; 4593 } 4594 bool operator<(const ARMVectorIntrinsicInfo &TE) const { 4595 return BuiltinID < TE.BuiltinID; 4596 } 4597 }; 4598 } // end anonymous namespace 4599 4600 #define NEONMAP0(NameBase) \ 4601 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 } 4602 4603 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \ 4604 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \ 4605 Intrinsic::LLVMIntrinsic, 0, TypeModifier } 4606 4607 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \ 4608 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \ 4609 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \ 4610 TypeModifier } 4611 4612 static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = { 4613 NEONMAP0(splat_lane_v), 4614 NEONMAP0(splat_laneq_v), 4615 NEONMAP0(splatq_lane_v), 4616 NEONMAP0(splatq_laneq_v), 4617 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), 4618 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), 4619 NEONMAP1(vabs_v, arm_neon_vabs, 0), 4620 NEONMAP1(vabsq_v, arm_neon_vabs, 0), 4621 NEONMAP0(vaddhn_v), 4622 NEONMAP1(vaesdq_v, arm_neon_aesd, 0), 4623 NEONMAP1(vaeseq_v, arm_neon_aese, 0), 4624 NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0), 4625 NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0), 4626 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType), 4627 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType), 4628 NEONMAP1(vcadd_rot270_v, arm_neon_vcadd_rot270, Add1ArgType), 4629 NEONMAP1(vcadd_rot90_v, arm_neon_vcadd_rot90, Add1ArgType), 4630 NEONMAP1(vcaddq_rot270_v, arm_neon_vcadd_rot270, Add1ArgType), 4631 NEONMAP1(vcaddq_rot90_v, arm_neon_vcadd_rot90, Add1ArgType), 4632 NEONMAP1(vcage_v, arm_neon_vacge, 0), 4633 NEONMAP1(vcageq_v, arm_neon_vacge, 0), 4634 NEONMAP1(vcagt_v, arm_neon_vacgt, 0), 4635 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0), 4636 NEONMAP1(vcale_v, arm_neon_vacge, 0), 4637 NEONMAP1(vcaleq_v, arm_neon_vacge, 0), 4638 NEONMAP1(vcalt_v, arm_neon_vacgt, 0), 4639 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0), 4640 NEONMAP0(vceqz_v), 4641 NEONMAP0(vceqzq_v), 4642 NEONMAP0(vcgez_v), 4643 NEONMAP0(vcgezq_v), 4644 NEONMAP0(vcgtz_v), 4645 NEONMAP0(vcgtzq_v), 4646 NEONMAP0(vclez_v), 4647 NEONMAP0(vclezq_v), 4648 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType), 4649 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType), 4650 NEONMAP0(vcltz_v), 4651 NEONMAP0(vcltzq_v), 4652 NEONMAP1(vclz_v, ctlz, Add1ArgType), 4653 NEONMAP1(vclzq_v, ctlz, Add1ArgType), 4654 NEONMAP1(vcnt_v, ctpop, Add1ArgType), 4655 NEONMAP1(vcntq_v, ctpop, Add1ArgType), 4656 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0), 4657 NEONMAP0(vcvt_f16_v), 4658 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0), 4659 NEONMAP0(vcvt_f32_v), 4660 NEONMAP2(vcvt_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), 4661 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), 4662 NEONMAP1(vcvt_n_s16_v, arm_neon_vcvtfp2fxs, 0), 4663 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0), 4664 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0), 4665 NEONMAP1(vcvt_n_u16_v, arm_neon_vcvtfp2fxu, 0), 4666 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0), 4667 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0), 4668 NEONMAP0(vcvt_s16_v), 4669 NEONMAP0(vcvt_s32_v), 4670 NEONMAP0(vcvt_s64_v), 4671 NEONMAP0(vcvt_u16_v), 4672 NEONMAP0(vcvt_u32_v), 4673 NEONMAP0(vcvt_u64_v), 4674 NEONMAP1(vcvta_s16_v, arm_neon_vcvtas, 0), 4675 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0), 4676 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0), 4677 NEONMAP1(vcvta_u16_v, arm_neon_vcvtau, 0), 4678 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0), 4679 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0), 4680 NEONMAP1(vcvtaq_s16_v, arm_neon_vcvtas, 0), 4681 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0), 4682 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0), 4683 NEONMAP1(vcvtaq_u16_v, arm_neon_vcvtau, 0), 4684 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0), 4685 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0), 4686 NEONMAP1(vcvtm_s16_v, arm_neon_vcvtms, 0), 4687 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0), 4688 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0), 4689 NEONMAP1(vcvtm_u16_v, arm_neon_vcvtmu, 0), 4690 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0), 4691 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0), 4692 NEONMAP1(vcvtmq_s16_v, arm_neon_vcvtms, 0), 4693 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0), 4694 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0), 4695 NEONMAP1(vcvtmq_u16_v, arm_neon_vcvtmu, 0), 4696 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0), 4697 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0), 4698 NEONMAP1(vcvtn_s16_v, arm_neon_vcvtns, 0), 4699 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0), 4700 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0), 4701 NEONMAP1(vcvtn_u16_v, arm_neon_vcvtnu, 0), 4702 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0), 4703 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0), 4704 NEONMAP1(vcvtnq_s16_v, arm_neon_vcvtns, 0), 4705 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0), 4706 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0), 4707 NEONMAP1(vcvtnq_u16_v, arm_neon_vcvtnu, 0), 4708 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0), 4709 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0), 4710 NEONMAP1(vcvtp_s16_v, arm_neon_vcvtps, 0), 4711 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0), 4712 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0), 4713 NEONMAP1(vcvtp_u16_v, arm_neon_vcvtpu, 0), 4714 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0), 4715 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0), 4716 NEONMAP1(vcvtpq_s16_v, arm_neon_vcvtps, 0), 4717 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0), 4718 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0), 4719 NEONMAP1(vcvtpq_u16_v, arm_neon_vcvtpu, 0), 4720 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0), 4721 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0), 4722 NEONMAP0(vcvtq_f16_v), 4723 NEONMAP0(vcvtq_f32_v), 4724 NEONMAP2(vcvtq_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), 4725 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), 4726 NEONMAP1(vcvtq_n_s16_v, arm_neon_vcvtfp2fxs, 0), 4727 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0), 4728 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0), 4729 NEONMAP1(vcvtq_n_u16_v, arm_neon_vcvtfp2fxu, 0), 4730 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0), 4731 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0), 4732 NEONMAP0(vcvtq_s16_v), 4733 NEONMAP0(vcvtq_s32_v), 4734 NEONMAP0(vcvtq_s64_v), 4735 NEONMAP0(vcvtq_u16_v), 4736 NEONMAP0(vcvtq_u32_v), 4737 NEONMAP0(vcvtq_u64_v), 4738 NEONMAP2(vdot_v, arm_neon_udot, arm_neon_sdot, 0), 4739 NEONMAP2(vdotq_v, arm_neon_udot, arm_neon_sdot, 0), 4740 NEONMAP0(vext_v), 4741 NEONMAP0(vextq_v), 4742 NEONMAP0(vfma_v), 4743 NEONMAP0(vfmaq_v), 4744 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts), 4745 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts), 4746 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts), 4747 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts), 4748 NEONMAP0(vld1_dup_v), 4749 NEONMAP1(vld1_v, arm_neon_vld1, 0), 4750 NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0), 4751 NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0), 4752 NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0), 4753 NEONMAP0(vld1q_dup_v), 4754 NEONMAP1(vld1q_v, arm_neon_vld1, 0), 4755 NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0), 4756 NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0), 4757 NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0), 4758 NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0), 4759 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0), 4760 NEONMAP1(vld2_v, arm_neon_vld2, 0), 4761 NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0), 4762 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0), 4763 NEONMAP1(vld2q_v, arm_neon_vld2, 0), 4764 NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0), 4765 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0), 4766 NEONMAP1(vld3_v, arm_neon_vld3, 0), 4767 NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0), 4768 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0), 4769 NEONMAP1(vld3q_v, arm_neon_vld3, 0), 4770 NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0), 4771 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0), 4772 NEONMAP1(vld4_v, arm_neon_vld4, 0), 4773 NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0), 4774 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0), 4775 NEONMAP1(vld4q_v, arm_neon_vld4, 0), 4776 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts), 4777 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType), 4778 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType), 4779 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts), 4780 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), 4781 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType), 4782 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType), 4783 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), 4784 NEONMAP0(vmovl_v), 4785 NEONMAP0(vmovn_v), 4786 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType), 4787 NEONMAP0(vmull_v), 4788 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType), 4789 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts), 4790 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts), 4791 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType), 4792 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts), 4793 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts), 4794 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType), 4795 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts), 4796 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts), 4797 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType), 4798 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType), 4799 NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts), 4800 NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts), 4801 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0), 4802 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0), 4803 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType), 4804 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType), 4805 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType), 4806 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts), 4807 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType), 4808 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType), 4809 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType), 4810 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType), 4811 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType), 4812 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts), 4813 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts), 4814 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts), 4815 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts), 4816 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts), 4817 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts), 4818 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0), 4819 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0), 4820 NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts), 4821 NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts), 4822 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType), 4823 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), 4824 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), 4825 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType), 4826 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType), 4827 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts), 4828 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts), 4829 NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType), 4830 NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType), 4831 NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType), 4832 NEONMAP0(vrndi_v), 4833 NEONMAP0(vrndiq_v), 4834 NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType), 4835 NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType), 4836 NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType), 4837 NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType), 4838 NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType), 4839 NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType), 4840 NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType), 4841 NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType), 4842 NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType), 4843 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts), 4844 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts), 4845 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts), 4846 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts), 4847 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0), 4848 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0), 4849 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType), 4850 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType), 4851 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType), 4852 NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0), 4853 NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0), 4854 NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0), 4855 NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0), 4856 NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0), 4857 NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0), 4858 NEONMAP0(vshl_n_v), 4859 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts), 4860 NEONMAP0(vshll_n_v), 4861 NEONMAP0(vshlq_n_v), 4862 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts), 4863 NEONMAP0(vshr_n_v), 4864 NEONMAP0(vshrn_n_v), 4865 NEONMAP0(vshrq_n_v), 4866 NEONMAP1(vst1_v, arm_neon_vst1, 0), 4867 NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0), 4868 NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0), 4869 NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0), 4870 NEONMAP1(vst1q_v, arm_neon_vst1, 0), 4871 NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0), 4872 NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0), 4873 NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0), 4874 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0), 4875 NEONMAP1(vst2_v, arm_neon_vst2, 0), 4876 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0), 4877 NEONMAP1(vst2q_v, arm_neon_vst2, 0), 4878 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0), 4879 NEONMAP1(vst3_v, arm_neon_vst3, 0), 4880 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0), 4881 NEONMAP1(vst3q_v, arm_neon_vst3, 0), 4882 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0), 4883 NEONMAP1(vst4_v, arm_neon_vst4, 0), 4884 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0), 4885 NEONMAP1(vst4q_v, arm_neon_vst4, 0), 4886 NEONMAP0(vsubhn_v), 4887 NEONMAP0(vtrn_v), 4888 NEONMAP0(vtrnq_v), 4889 NEONMAP0(vtst_v), 4890 NEONMAP0(vtstq_v), 4891 NEONMAP0(vuzp_v), 4892 NEONMAP0(vuzpq_v), 4893 NEONMAP0(vzip_v), 4894 NEONMAP0(vzipq_v) 4895 }; 4896 4897 static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = { 4898 NEONMAP0(splat_lane_v), 4899 NEONMAP0(splat_laneq_v), 4900 NEONMAP0(splatq_lane_v), 4901 NEONMAP0(splatq_laneq_v), 4902 NEONMAP1(vabs_v, aarch64_neon_abs, 0), 4903 NEONMAP1(vabsq_v, aarch64_neon_abs, 0), 4904 NEONMAP0(vaddhn_v), 4905 NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0), 4906 NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0), 4907 NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0), 4908 NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0), 4909 NEONMAP1(vcadd_rot270_v, aarch64_neon_vcadd_rot270, Add1ArgType), 4910 NEONMAP1(vcadd_rot90_v, aarch64_neon_vcadd_rot90, Add1ArgType), 4911 NEONMAP1(vcaddq_rot270_v, aarch64_neon_vcadd_rot270, Add1ArgType), 4912 NEONMAP1(vcaddq_rot90_v, aarch64_neon_vcadd_rot90, Add1ArgType), 4913 NEONMAP1(vcage_v, aarch64_neon_facge, 0), 4914 NEONMAP1(vcageq_v, aarch64_neon_facge, 0), 4915 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0), 4916 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0), 4917 NEONMAP1(vcale_v, aarch64_neon_facge, 0), 4918 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0), 4919 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0), 4920 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0), 4921 NEONMAP0(vceqz_v), 4922 NEONMAP0(vceqzq_v), 4923 NEONMAP0(vcgez_v), 4924 NEONMAP0(vcgezq_v), 4925 NEONMAP0(vcgtz_v), 4926 NEONMAP0(vcgtzq_v), 4927 NEONMAP0(vclez_v), 4928 NEONMAP0(vclezq_v), 4929 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType), 4930 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType), 4931 NEONMAP0(vcltz_v), 4932 NEONMAP0(vcltzq_v), 4933 NEONMAP1(vclz_v, ctlz, Add1ArgType), 4934 NEONMAP1(vclzq_v, ctlz, Add1ArgType), 4935 NEONMAP1(vcnt_v, ctpop, Add1ArgType), 4936 NEONMAP1(vcntq_v, ctpop, Add1ArgType), 4937 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0), 4938 NEONMAP0(vcvt_f16_v), 4939 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0), 4940 NEONMAP0(vcvt_f32_v), 4941 NEONMAP2(vcvt_n_f16_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 4942 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 4943 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 4944 NEONMAP1(vcvt_n_s16_v, aarch64_neon_vcvtfp2fxs, 0), 4945 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), 4946 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), 4947 NEONMAP1(vcvt_n_u16_v, aarch64_neon_vcvtfp2fxu, 0), 4948 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), 4949 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), 4950 NEONMAP0(vcvtq_f16_v), 4951 NEONMAP0(vcvtq_f32_v), 4952 NEONMAP2(vcvtq_n_f16_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 4953 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 4954 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 4955 NEONMAP1(vcvtq_n_s16_v, aarch64_neon_vcvtfp2fxs, 0), 4956 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), 4957 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), 4958 NEONMAP1(vcvtq_n_u16_v, aarch64_neon_vcvtfp2fxu, 0), 4959 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), 4960 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), 4961 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType), 4962 NEONMAP2(vdot_v, aarch64_neon_udot, aarch64_neon_sdot, 0), 4963 NEONMAP2(vdotq_v, aarch64_neon_udot, aarch64_neon_sdot, 0), 4964 NEONMAP0(vext_v), 4965 NEONMAP0(vextq_v), 4966 NEONMAP0(vfma_v), 4967 NEONMAP0(vfmaq_v), 4968 NEONMAP1(vfmlal_high_v, aarch64_neon_fmlal2, 0), 4969 NEONMAP1(vfmlal_low_v, aarch64_neon_fmlal, 0), 4970 NEONMAP1(vfmlalq_high_v, aarch64_neon_fmlal2, 0), 4971 NEONMAP1(vfmlalq_low_v, aarch64_neon_fmlal, 0), 4972 NEONMAP1(vfmlsl_high_v, aarch64_neon_fmlsl2, 0), 4973 NEONMAP1(vfmlsl_low_v, aarch64_neon_fmlsl, 0), 4974 NEONMAP1(vfmlslq_high_v, aarch64_neon_fmlsl2, 0), 4975 NEONMAP1(vfmlslq_low_v, aarch64_neon_fmlsl, 0), 4976 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), 4977 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), 4978 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), 4979 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), 4980 NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0), 4981 NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0), 4982 NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0), 4983 NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0), 4984 NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0), 4985 NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0), 4986 NEONMAP0(vmovl_v), 4987 NEONMAP0(vmovn_v), 4988 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType), 4989 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType), 4990 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType), 4991 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts), 4992 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts), 4993 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType), 4994 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType), 4995 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType), 4996 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts), 4997 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts), 4998 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0), 4999 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0), 5000 NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0), 5001 NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0), 5002 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType), 5003 NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0), 5004 NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0), 5005 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType), 5006 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType), 5007 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts), 5008 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType), 5009 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType), 5010 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType), 5011 NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0), 5012 NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0), 5013 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType), 5014 NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0), 5015 NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0), 5016 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType), 5017 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts), 5018 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts), 5019 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts), 5020 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts), 5021 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts), 5022 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts), 5023 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0), 5024 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0), 5025 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), 5026 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), 5027 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType), 5028 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), 5029 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), 5030 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType), 5031 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType), 5032 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), 5033 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), 5034 NEONMAP0(vrndi_v), 5035 NEONMAP0(vrndiq_v), 5036 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), 5037 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), 5038 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), 5039 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), 5040 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0), 5041 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0), 5042 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType), 5043 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType), 5044 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType), 5045 NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0), 5046 NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0), 5047 NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0), 5048 NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0), 5049 NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0), 5050 NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0), 5051 NEONMAP0(vshl_n_v), 5052 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts), 5053 NEONMAP0(vshll_n_v), 5054 NEONMAP0(vshlq_n_v), 5055 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts), 5056 NEONMAP0(vshr_n_v), 5057 NEONMAP0(vshrn_n_v), 5058 NEONMAP0(vshrq_n_v), 5059 NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0), 5060 NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0), 5061 NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0), 5062 NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0), 5063 NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0), 5064 NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0), 5065 NEONMAP0(vsubhn_v), 5066 NEONMAP0(vtst_v), 5067 NEONMAP0(vtstq_v), 5068 }; 5069 5070 static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = { 5071 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType), 5072 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType), 5073 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType), 5074 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType), 5075 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType), 5076 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType), 5077 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType), 5078 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), 5079 NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType), 5080 NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType), 5081 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), 5082 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType), 5083 NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType), 5084 NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType), 5085 NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType), 5086 NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 5087 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType), 5088 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType), 5089 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType), 5090 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType), 5091 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType), 5092 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType), 5093 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType), 5094 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType), 5095 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType), 5096 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType), 5097 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType), 5098 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType), 5099 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), 5100 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), 5101 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), 5102 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), 5103 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType), 5104 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), 5105 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType), 5106 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), 5107 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType), 5108 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), 5109 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType), 5110 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), 5111 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType), 5112 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), 5113 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType), 5114 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), 5115 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), 5116 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), 5117 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), 5118 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), 5119 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0), 5120 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 5121 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 5122 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 5123 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 5124 NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType), 5125 NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType), 5126 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 5127 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 5128 NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType), 5129 NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType), 5130 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 5131 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 5132 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 5133 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 5134 NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType), 5135 NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType), 5136 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 5137 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType), 5138 NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType), 5139 NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType), 5140 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0), 5141 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType), 5142 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType), 5143 NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 5144 NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 5145 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 5146 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 5147 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 5148 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 5149 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 5150 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 5151 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType), 5152 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 5153 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors), 5154 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType), 5155 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors), 5156 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType), 5157 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors), 5158 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors), 5159 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType), 5160 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType), 5161 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors), 5162 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors), 5163 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType), 5164 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType), 5165 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors), 5166 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType), 5167 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors), 5168 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0), 5169 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType), 5170 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType), 5171 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors), 5172 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors), 5173 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors), 5174 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors), 5175 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType), 5176 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors), 5177 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors), 5178 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors), 5179 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType), 5180 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors), 5181 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType), 5182 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors), 5183 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType), 5184 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors), 5185 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors), 5186 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType), 5187 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType), 5188 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors), 5189 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors), 5190 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType), 5191 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType), 5192 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType), 5193 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType), 5194 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors), 5195 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors), 5196 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors), 5197 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors), 5198 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType), 5199 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors), 5200 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors), 5201 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 5202 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 5203 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 5204 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 5205 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType), 5206 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType), 5207 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 5208 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 5209 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 5210 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 5211 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType), 5212 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType), 5213 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType), 5214 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType), 5215 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors), 5216 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors), 5217 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType), 5218 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType), 5219 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType), 5220 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors), 5221 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors), 5222 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors), 5223 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors), 5224 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType), 5225 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors), 5226 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors), 5227 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors), 5228 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors), 5229 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType), 5230 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType), 5231 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors), 5232 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors), 5233 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType), 5234 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType), 5235 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType), 5236 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType), 5237 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType), 5238 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType), 5239 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType), 5240 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType), 5241 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType), 5242 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType), 5243 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType), 5244 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType), 5245 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0), 5246 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0), 5247 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0), 5248 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0), 5249 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType), 5250 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType), 5251 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType), 5252 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType), 5253 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors), 5254 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType), 5255 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors), 5256 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType), 5257 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType), 5258 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType), 5259 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors), 5260 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType), 5261 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors), 5262 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType), 5263 // FP16 scalar intrinisics go here. 5264 NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType), 5265 NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType), 5266 NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType), 5267 NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType), 5268 NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType), 5269 NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), 5270 NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), 5271 NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), 5272 NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), 5273 NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), 5274 NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), 5275 NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), 5276 NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), 5277 NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType), 5278 NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType), 5279 NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), 5280 NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), 5281 NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType), 5282 NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType), 5283 NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), 5284 NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), 5285 NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType), 5286 NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType), 5287 NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), 5288 NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), 5289 NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType), 5290 NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType), 5291 NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType), 5292 NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType), 5293 NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType), 5294 }; 5295 5296 #undef NEONMAP0 5297 #undef NEONMAP1 5298 #undef NEONMAP2 5299 5300 #define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \ 5301 { \ 5302 #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \ 5303 TypeModifier \ 5304 } 5305 5306 #define SVEMAP2(NameBase, TypeModifier) \ 5307 { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier } 5308 static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = { 5309 #define GET_SVE_LLVM_INTRINSIC_MAP 5310 #include "clang/Basic/arm_sve_builtin_cg.inc" 5311 #undef GET_SVE_LLVM_INTRINSIC_MAP 5312 }; 5313 5314 #undef SVEMAP1 5315 #undef SVEMAP2 5316 5317 static bool NEONSIMDIntrinsicsProvenSorted = false; 5318 5319 static bool AArch64SIMDIntrinsicsProvenSorted = false; 5320 static bool AArch64SISDIntrinsicsProvenSorted = false; 5321 static bool AArch64SVEIntrinsicsProvenSorted = false; 5322 5323 static const ARMVectorIntrinsicInfo * 5324 findARMVectorIntrinsicInMap(ArrayRef<ARMVectorIntrinsicInfo> IntrinsicMap, 5325 unsigned BuiltinID, bool &MapProvenSorted) { 5326 5327 #ifndef NDEBUG 5328 if (!MapProvenSorted) { 5329 assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap))); 5330 MapProvenSorted = true; 5331 } 5332 #endif 5333 5334 const ARMVectorIntrinsicInfo *Builtin = 5335 llvm::lower_bound(IntrinsicMap, BuiltinID); 5336 5337 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID) 5338 return Builtin; 5339 5340 return nullptr; 5341 } 5342 5343 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID, 5344 unsigned Modifier, 5345 llvm::Type *ArgType, 5346 const CallExpr *E) { 5347 int VectorSize = 0; 5348 if (Modifier & Use64BitVectors) 5349 VectorSize = 64; 5350 else if (Modifier & Use128BitVectors) 5351 VectorSize = 128; 5352 5353 // Return type. 5354 SmallVector<llvm::Type *, 3> Tys; 5355 if (Modifier & AddRetType) { 5356 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext())); 5357 if (Modifier & VectorizeRetType) 5358 Ty = llvm::VectorType::get( 5359 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1); 5360 5361 Tys.push_back(Ty); 5362 } 5363 5364 // Arguments. 5365 if (Modifier & VectorizeArgTypes) { 5366 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1; 5367 ArgType = llvm::VectorType::get(ArgType, Elts); 5368 } 5369 5370 if (Modifier & (Add1ArgType | Add2ArgTypes)) 5371 Tys.push_back(ArgType); 5372 5373 if (Modifier & Add2ArgTypes) 5374 Tys.push_back(ArgType); 5375 5376 if (Modifier & InventFloatType) 5377 Tys.push_back(FloatTy); 5378 5379 return CGM.getIntrinsic(IntrinsicID, Tys); 5380 } 5381 5382 static Value *EmitCommonNeonSISDBuiltinExpr( 5383 CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo, 5384 SmallVectorImpl<Value *> &Ops, const CallExpr *E) { 5385 unsigned BuiltinID = SISDInfo.BuiltinID; 5386 unsigned int Int = SISDInfo.LLVMIntrinsic; 5387 unsigned Modifier = SISDInfo.TypeModifier; 5388 const char *s = SISDInfo.NameHint; 5389 5390 switch (BuiltinID) { 5391 case NEON::BI__builtin_neon_vcled_s64: 5392 case NEON::BI__builtin_neon_vcled_u64: 5393 case NEON::BI__builtin_neon_vcles_f32: 5394 case NEON::BI__builtin_neon_vcled_f64: 5395 case NEON::BI__builtin_neon_vcltd_s64: 5396 case NEON::BI__builtin_neon_vcltd_u64: 5397 case NEON::BI__builtin_neon_vclts_f32: 5398 case NEON::BI__builtin_neon_vcltd_f64: 5399 case NEON::BI__builtin_neon_vcales_f32: 5400 case NEON::BI__builtin_neon_vcaled_f64: 5401 case NEON::BI__builtin_neon_vcalts_f32: 5402 case NEON::BI__builtin_neon_vcaltd_f64: 5403 // Only one direction of comparisons actually exist, cmle is actually a cmge 5404 // with swapped operands. The table gives us the right intrinsic but we 5405 // still need to do the swap. 5406 std::swap(Ops[0], Ops[1]); 5407 break; 5408 } 5409 5410 assert(Int && "Generic code assumes a valid intrinsic"); 5411 5412 // Determine the type(s) of this overloaded AArch64 intrinsic. 5413 const Expr *Arg = E->getArg(0); 5414 llvm::Type *ArgTy = CGF.ConvertType(Arg->getType()); 5415 Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E); 5416 5417 int j = 0; 5418 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0); 5419 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); 5420 ai != ae; ++ai, ++j) { 5421 llvm::Type *ArgTy = ai->getType(); 5422 if (Ops[j]->getType()->getPrimitiveSizeInBits() == 5423 ArgTy->getPrimitiveSizeInBits()) 5424 continue; 5425 5426 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy()); 5427 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate 5428 // it before inserting. 5429 Ops[j] = 5430 CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType()); 5431 Ops[j] = 5432 CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0); 5433 } 5434 5435 Value *Result = CGF.EmitNeonCall(F, Ops, s); 5436 llvm::Type *ResultType = CGF.ConvertType(E->getType()); 5437 if (ResultType->getPrimitiveSizeInBits() < 5438 Result->getType()->getPrimitiveSizeInBits()) 5439 return CGF.Builder.CreateExtractElement(Result, C0); 5440 5441 return CGF.Builder.CreateBitCast(Result, ResultType, s); 5442 } 5443 5444 Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( 5445 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, 5446 const char *NameHint, unsigned Modifier, const CallExpr *E, 5447 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1, 5448 llvm::Triple::ArchType Arch) { 5449 // Get the last argument, which specifies the vector type. 5450 llvm::APSInt NeonTypeConst; 5451 const Expr *Arg = E->getArg(E->getNumArgs() - 1); 5452 if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext())) 5453 return nullptr; 5454 5455 // Determine the type of this overloaded NEON intrinsic. 5456 NeonTypeFlags Type(NeonTypeConst.getZExtValue()); 5457 bool Usgn = Type.isUnsigned(); 5458 bool Quad = Type.isQuad(); 5459 const bool HasLegalHalfType = getTarget().hasLegalHalfType(); 5460 5461 llvm::VectorType *VTy = GetNeonType(this, Type, HasLegalHalfType); 5462 llvm::Type *Ty = VTy; 5463 if (!Ty) 5464 return nullptr; 5465 5466 auto getAlignmentValue32 = [&](Address addr) -> Value* { 5467 return Builder.getInt32(addr.getAlignment().getQuantity()); 5468 }; 5469 5470 unsigned Int = LLVMIntrinsic; 5471 if ((Modifier & UnsignedAlts) && !Usgn) 5472 Int = AltLLVMIntrinsic; 5473 5474 switch (BuiltinID) { 5475 default: break; 5476 case NEON::BI__builtin_neon_splat_lane_v: 5477 case NEON::BI__builtin_neon_splat_laneq_v: 5478 case NEON::BI__builtin_neon_splatq_lane_v: 5479 case NEON::BI__builtin_neon_splatq_laneq_v: { 5480 auto NumElements = VTy->getElementCount(); 5481 if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v) 5482 NumElements = NumElements * 2; 5483 if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v) 5484 NumElements = NumElements / 2; 5485 5486 Ops[0] = Builder.CreateBitCast(Ops[0], VTy); 5487 return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements); 5488 } 5489 case NEON::BI__builtin_neon_vpadd_v: 5490 case NEON::BI__builtin_neon_vpaddq_v: 5491 // We don't allow fp/int overloading of intrinsics. 5492 if (VTy->getElementType()->isFloatingPointTy() && 5493 Int == Intrinsic::aarch64_neon_addp) 5494 Int = Intrinsic::aarch64_neon_faddp; 5495 break; 5496 case NEON::BI__builtin_neon_vabs_v: 5497 case NEON::BI__builtin_neon_vabsq_v: 5498 if (VTy->getElementType()->isFloatingPointTy()) 5499 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs"); 5500 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs"); 5501 case NEON::BI__builtin_neon_vaddhn_v: { 5502 llvm::VectorType *SrcTy = 5503 llvm::VectorType::getExtendedElementVectorType(VTy); 5504 5505 // %sum = add <4 x i32> %lhs, %rhs 5506 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 5507 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy); 5508 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn"); 5509 5510 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16> 5511 Constant *ShiftAmt = 5512 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2); 5513 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn"); 5514 5515 // %res = trunc <4 x i32> %high to <4 x i16> 5516 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn"); 5517 } 5518 case NEON::BI__builtin_neon_vcale_v: 5519 case NEON::BI__builtin_neon_vcaleq_v: 5520 case NEON::BI__builtin_neon_vcalt_v: 5521 case NEON::BI__builtin_neon_vcaltq_v: 5522 std::swap(Ops[0], Ops[1]); 5523 LLVM_FALLTHROUGH; 5524 case NEON::BI__builtin_neon_vcage_v: 5525 case NEON::BI__builtin_neon_vcageq_v: 5526 case NEON::BI__builtin_neon_vcagt_v: 5527 case NEON::BI__builtin_neon_vcagtq_v: { 5528 llvm::Type *Ty; 5529 switch (VTy->getScalarSizeInBits()) { 5530 default: llvm_unreachable("unexpected type"); 5531 case 32: 5532 Ty = FloatTy; 5533 break; 5534 case 64: 5535 Ty = DoubleTy; 5536 break; 5537 case 16: 5538 Ty = HalfTy; 5539 break; 5540 } 5541 llvm::Type *VecFlt = llvm::VectorType::get(Ty, VTy->getNumElements()); 5542 llvm::Type *Tys[] = { VTy, VecFlt }; 5543 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 5544 return EmitNeonCall(F, Ops, NameHint); 5545 } 5546 case NEON::BI__builtin_neon_vceqz_v: 5547 case NEON::BI__builtin_neon_vceqzq_v: 5548 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ, 5549 ICmpInst::ICMP_EQ, "vceqz"); 5550 case NEON::BI__builtin_neon_vcgez_v: 5551 case NEON::BI__builtin_neon_vcgezq_v: 5552 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE, 5553 ICmpInst::ICMP_SGE, "vcgez"); 5554 case NEON::BI__builtin_neon_vclez_v: 5555 case NEON::BI__builtin_neon_vclezq_v: 5556 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE, 5557 ICmpInst::ICMP_SLE, "vclez"); 5558 case NEON::BI__builtin_neon_vcgtz_v: 5559 case NEON::BI__builtin_neon_vcgtzq_v: 5560 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT, 5561 ICmpInst::ICMP_SGT, "vcgtz"); 5562 case NEON::BI__builtin_neon_vcltz_v: 5563 case NEON::BI__builtin_neon_vcltzq_v: 5564 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT, 5565 ICmpInst::ICMP_SLT, "vcltz"); 5566 case NEON::BI__builtin_neon_vclz_v: 5567 case NEON::BI__builtin_neon_vclzq_v: 5568 // We generate target-independent intrinsic, which needs a second argument 5569 // for whether or not clz of zero is undefined; on ARM it isn't. 5570 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef())); 5571 break; 5572 case NEON::BI__builtin_neon_vcvt_f32_v: 5573 case NEON::BI__builtin_neon_vcvtq_f32_v: 5574 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5575 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad), 5576 HasLegalHalfType); 5577 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 5578 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 5579 case NEON::BI__builtin_neon_vcvt_f16_v: 5580 case NEON::BI__builtin_neon_vcvtq_f16_v: 5581 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5582 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad), 5583 HasLegalHalfType); 5584 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 5585 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 5586 case NEON::BI__builtin_neon_vcvt_n_f16_v: 5587 case NEON::BI__builtin_neon_vcvt_n_f32_v: 5588 case NEON::BI__builtin_neon_vcvt_n_f64_v: 5589 case NEON::BI__builtin_neon_vcvtq_n_f16_v: 5590 case NEON::BI__builtin_neon_vcvtq_n_f32_v: 5591 case NEON::BI__builtin_neon_vcvtq_n_f64_v: { 5592 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty }; 5593 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic; 5594 Function *F = CGM.getIntrinsic(Int, Tys); 5595 return EmitNeonCall(F, Ops, "vcvt_n"); 5596 } 5597 case NEON::BI__builtin_neon_vcvt_n_s16_v: 5598 case NEON::BI__builtin_neon_vcvt_n_s32_v: 5599 case NEON::BI__builtin_neon_vcvt_n_u16_v: 5600 case NEON::BI__builtin_neon_vcvt_n_u32_v: 5601 case NEON::BI__builtin_neon_vcvt_n_s64_v: 5602 case NEON::BI__builtin_neon_vcvt_n_u64_v: 5603 case NEON::BI__builtin_neon_vcvtq_n_s16_v: 5604 case NEON::BI__builtin_neon_vcvtq_n_s32_v: 5605 case NEON::BI__builtin_neon_vcvtq_n_u16_v: 5606 case NEON::BI__builtin_neon_vcvtq_n_u32_v: 5607 case NEON::BI__builtin_neon_vcvtq_n_s64_v: 5608 case NEON::BI__builtin_neon_vcvtq_n_u64_v: { 5609 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 5610 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 5611 return EmitNeonCall(F, Ops, "vcvt_n"); 5612 } 5613 case NEON::BI__builtin_neon_vcvt_s32_v: 5614 case NEON::BI__builtin_neon_vcvt_u32_v: 5615 case NEON::BI__builtin_neon_vcvt_s64_v: 5616 case NEON::BI__builtin_neon_vcvt_u64_v: 5617 case NEON::BI__builtin_neon_vcvt_s16_v: 5618 case NEON::BI__builtin_neon_vcvt_u16_v: 5619 case NEON::BI__builtin_neon_vcvtq_s32_v: 5620 case NEON::BI__builtin_neon_vcvtq_u32_v: 5621 case NEON::BI__builtin_neon_vcvtq_s64_v: 5622 case NEON::BI__builtin_neon_vcvtq_u64_v: 5623 case NEON::BI__builtin_neon_vcvtq_s16_v: 5624 case NEON::BI__builtin_neon_vcvtq_u16_v: { 5625 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type)); 5626 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt") 5627 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt"); 5628 } 5629 case NEON::BI__builtin_neon_vcvta_s16_v: 5630 case NEON::BI__builtin_neon_vcvta_s32_v: 5631 case NEON::BI__builtin_neon_vcvta_s64_v: 5632 case NEON::BI__builtin_neon_vcvta_u16_v: 5633 case NEON::BI__builtin_neon_vcvta_u32_v: 5634 case NEON::BI__builtin_neon_vcvta_u64_v: 5635 case NEON::BI__builtin_neon_vcvtaq_s16_v: 5636 case NEON::BI__builtin_neon_vcvtaq_s32_v: 5637 case NEON::BI__builtin_neon_vcvtaq_s64_v: 5638 case NEON::BI__builtin_neon_vcvtaq_u16_v: 5639 case NEON::BI__builtin_neon_vcvtaq_u32_v: 5640 case NEON::BI__builtin_neon_vcvtaq_u64_v: 5641 case NEON::BI__builtin_neon_vcvtn_s16_v: 5642 case NEON::BI__builtin_neon_vcvtn_s32_v: 5643 case NEON::BI__builtin_neon_vcvtn_s64_v: 5644 case NEON::BI__builtin_neon_vcvtn_u16_v: 5645 case NEON::BI__builtin_neon_vcvtn_u32_v: 5646 case NEON::BI__builtin_neon_vcvtn_u64_v: 5647 case NEON::BI__builtin_neon_vcvtnq_s16_v: 5648 case NEON::BI__builtin_neon_vcvtnq_s32_v: 5649 case NEON::BI__builtin_neon_vcvtnq_s64_v: 5650 case NEON::BI__builtin_neon_vcvtnq_u16_v: 5651 case NEON::BI__builtin_neon_vcvtnq_u32_v: 5652 case NEON::BI__builtin_neon_vcvtnq_u64_v: 5653 case NEON::BI__builtin_neon_vcvtp_s16_v: 5654 case NEON::BI__builtin_neon_vcvtp_s32_v: 5655 case NEON::BI__builtin_neon_vcvtp_s64_v: 5656 case NEON::BI__builtin_neon_vcvtp_u16_v: 5657 case NEON::BI__builtin_neon_vcvtp_u32_v: 5658 case NEON::BI__builtin_neon_vcvtp_u64_v: 5659 case NEON::BI__builtin_neon_vcvtpq_s16_v: 5660 case NEON::BI__builtin_neon_vcvtpq_s32_v: 5661 case NEON::BI__builtin_neon_vcvtpq_s64_v: 5662 case NEON::BI__builtin_neon_vcvtpq_u16_v: 5663 case NEON::BI__builtin_neon_vcvtpq_u32_v: 5664 case NEON::BI__builtin_neon_vcvtpq_u64_v: 5665 case NEON::BI__builtin_neon_vcvtm_s16_v: 5666 case NEON::BI__builtin_neon_vcvtm_s32_v: 5667 case NEON::BI__builtin_neon_vcvtm_s64_v: 5668 case NEON::BI__builtin_neon_vcvtm_u16_v: 5669 case NEON::BI__builtin_neon_vcvtm_u32_v: 5670 case NEON::BI__builtin_neon_vcvtm_u64_v: 5671 case NEON::BI__builtin_neon_vcvtmq_s16_v: 5672 case NEON::BI__builtin_neon_vcvtmq_s32_v: 5673 case NEON::BI__builtin_neon_vcvtmq_s64_v: 5674 case NEON::BI__builtin_neon_vcvtmq_u16_v: 5675 case NEON::BI__builtin_neon_vcvtmq_u32_v: 5676 case NEON::BI__builtin_neon_vcvtmq_u64_v: { 5677 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 5678 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint); 5679 } 5680 case NEON::BI__builtin_neon_vcvtx_f32_v: { 5681 llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty}; 5682 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint); 5683 5684 } 5685 case NEON::BI__builtin_neon_vext_v: 5686 case NEON::BI__builtin_neon_vextq_v: { 5687 int CV = cast<ConstantInt>(Ops[2])->getSExtValue(); 5688 SmallVector<uint32_t, 16> Indices; 5689 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 5690 Indices.push_back(i+CV); 5691 5692 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5693 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5694 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext"); 5695 } 5696 case NEON::BI__builtin_neon_vfma_v: 5697 case NEON::BI__builtin_neon_vfmaq_v: { 5698 Function *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 5699 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5700 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5701 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 5702 5703 // NEON intrinsic puts accumulator first, unlike the LLVM fma. 5704 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); 5705 } 5706 case NEON::BI__builtin_neon_vld1_v: 5707 case NEON::BI__builtin_neon_vld1q_v: { 5708 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 5709 Ops.push_back(getAlignmentValue32(PtrOp0)); 5710 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1"); 5711 } 5712 case NEON::BI__builtin_neon_vld1_x2_v: 5713 case NEON::BI__builtin_neon_vld1q_x2_v: 5714 case NEON::BI__builtin_neon_vld1_x3_v: 5715 case NEON::BI__builtin_neon_vld1q_x3_v: 5716 case NEON::BI__builtin_neon_vld1_x4_v: 5717 case NEON::BI__builtin_neon_vld1q_x4_v: { 5718 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType()); 5719 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 5720 llvm::Type *Tys[2] = { VTy, PTy }; 5721 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 5722 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN"); 5723 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 5724 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5725 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 5726 } 5727 case NEON::BI__builtin_neon_vld2_v: 5728 case NEON::BI__builtin_neon_vld2q_v: 5729 case NEON::BI__builtin_neon_vld3_v: 5730 case NEON::BI__builtin_neon_vld3q_v: 5731 case NEON::BI__builtin_neon_vld4_v: 5732 case NEON::BI__builtin_neon_vld4q_v: 5733 case NEON::BI__builtin_neon_vld2_dup_v: 5734 case NEON::BI__builtin_neon_vld2q_dup_v: 5735 case NEON::BI__builtin_neon_vld3_dup_v: 5736 case NEON::BI__builtin_neon_vld3q_dup_v: 5737 case NEON::BI__builtin_neon_vld4_dup_v: 5738 case NEON::BI__builtin_neon_vld4q_dup_v: { 5739 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 5740 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 5741 Value *Align = getAlignmentValue32(PtrOp1); 5742 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint); 5743 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 5744 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5745 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 5746 } 5747 case NEON::BI__builtin_neon_vld1_dup_v: 5748 case NEON::BI__builtin_neon_vld1q_dup_v: { 5749 Value *V = UndefValue::get(Ty); 5750 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 5751 PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty); 5752 LoadInst *Ld = Builder.CreateLoad(PtrOp0); 5753 llvm::Constant *CI = ConstantInt::get(SizeTy, 0); 5754 Ops[0] = Builder.CreateInsertElement(V, Ld, CI); 5755 return EmitNeonSplat(Ops[0], CI); 5756 } 5757 case NEON::BI__builtin_neon_vld2_lane_v: 5758 case NEON::BI__builtin_neon_vld2q_lane_v: 5759 case NEON::BI__builtin_neon_vld3_lane_v: 5760 case NEON::BI__builtin_neon_vld3q_lane_v: 5761 case NEON::BI__builtin_neon_vld4_lane_v: 5762 case NEON::BI__builtin_neon_vld4q_lane_v: { 5763 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 5764 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 5765 for (unsigned I = 2; I < Ops.size() - 1; ++I) 5766 Ops[I] = Builder.CreateBitCast(Ops[I], Ty); 5767 Ops.push_back(getAlignmentValue32(PtrOp1)); 5768 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint); 5769 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 5770 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5771 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 5772 } 5773 case NEON::BI__builtin_neon_vmovl_v: { 5774 llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy); 5775 Ops[0] = Builder.CreateBitCast(Ops[0], DTy); 5776 if (Usgn) 5777 return Builder.CreateZExt(Ops[0], Ty, "vmovl"); 5778 return Builder.CreateSExt(Ops[0], Ty, "vmovl"); 5779 } 5780 case NEON::BI__builtin_neon_vmovn_v: { 5781 llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy); 5782 Ops[0] = Builder.CreateBitCast(Ops[0], QTy); 5783 return Builder.CreateTrunc(Ops[0], Ty, "vmovn"); 5784 } 5785 case NEON::BI__builtin_neon_vmull_v: 5786 // FIXME: the integer vmull operations could be emitted in terms of pure 5787 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of 5788 // hoisting the exts outside loops. Until global ISel comes along that can 5789 // see through such movement this leads to bad CodeGen. So we need an 5790 // intrinsic for now. 5791 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls; 5792 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int; 5793 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull"); 5794 case NEON::BI__builtin_neon_vpadal_v: 5795 case NEON::BI__builtin_neon_vpadalq_v: { 5796 // The source operand type has twice as many elements of half the size. 5797 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); 5798 llvm::Type *EltTy = 5799 llvm::IntegerType::get(getLLVMContext(), EltBits / 2); 5800 llvm::Type *NarrowTy = 5801 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2); 5802 llvm::Type *Tys[2] = { Ty, NarrowTy }; 5803 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint); 5804 } 5805 case NEON::BI__builtin_neon_vpaddl_v: 5806 case NEON::BI__builtin_neon_vpaddlq_v: { 5807 // The source operand type has twice as many elements of half the size. 5808 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); 5809 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2); 5810 llvm::Type *NarrowTy = 5811 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2); 5812 llvm::Type *Tys[2] = { Ty, NarrowTy }; 5813 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl"); 5814 } 5815 case NEON::BI__builtin_neon_vqdmlal_v: 5816 case NEON::BI__builtin_neon_vqdmlsl_v: { 5817 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end()); 5818 Ops[1] = 5819 EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal"); 5820 Ops.resize(2); 5821 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint); 5822 } 5823 case NEON::BI__builtin_neon_vqdmulhq_lane_v: 5824 case NEON::BI__builtin_neon_vqdmulh_lane_v: 5825 case NEON::BI__builtin_neon_vqrdmulhq_lane_v: 5826 case NEON::BI__builtin_neon_vqrdmulh_lane_v: { 5827 llvm::Type *RTy = Ty; 5828 if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v || 5829 BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v) 5830 RTy = llvm::VectorType::get(Ty->getVectorElementType(), 5831 Ty->getVectorNumElements() * 2); 5832 llvm::Type *Tys[2] = { 5833 RTy, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false, 5834 /*isQuad*/ false))}; 5835 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint); 5836 } 5837 case NEON::BI__builtin_neon_vqdmulhq_laneq_v: 5838 case NEON::BI__builtin_neon_vqdmulh_laneq_v: 5839 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v: 5840 case NEON::BI__builtin_neon_vqrdmulh_laneq_v: { 5841 llvm::Type *Tys[2] = { 5842 Ty, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false, 5843 /*isQuad*/ true))}; 5844 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint); 5845 } 5846 case NEON::BI__builtin_neon_vqshl_n_v: 5847 case NEON::BI__builtin_neon_vqshlq_n_v: 5848 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n", 5849 1, false); 5850 case NEON::BI__builtin_neon_vqshlu_n_v: 5851 case NEON::BI__builtin_neon_vqshluq_n_v: 5852 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n", 5853 1, false); 5854 case NEON::BI__builtin_neon_vrecpe_v: 5855 case NEON::BI__builtin_neon_vrecpeq_v: 5856 case NEON::BI__builtin_neon_vrsqrte_v: 5857 case NEON::BI__builtin_neon_vrsqrteq_v: 5858 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic; 5859 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint); 5860 case NEON::BI__builtin_neon_vrndi_v: 5861 case NEON::BI__builtin_neon_vrndiq_v: 5862 Int = Intrinsic::nearbyint; 5863 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint); 5864 case NEON::BI__builtin_neon_vrshr_n_v: 5865 case NEON::BI__builtin_neon_vrshrq_n_v: 5866 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n", 5867 1, true); 5868 case NEON::BI__builtin_neon_vshl_n_v: 5869 case NEON::BI__builtin_neon_vshlq_n_v: 5870 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false); 5871 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1], 5872 "vshl_n"); 5873 case NEON::BI__builtin_neon_vshll_n_v: { 5874 llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy); 5875 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 5876 if (Usgn) 5877 Ops[0] = Builder.CreateZExt(Ops[0], VTy); 5878 else 5879 Ops[0] = Builder.CreateSExt(Ops[0], VTy); 5880 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false); 5881 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n"); 5882 } 5883 case NEON::BI__builtin_neon_vshrn_n_v: { 5884 llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy); 5885 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 5886 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false); 5887 if (Usgn) 5888 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]); 5889 else 5890 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]); 5891 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n"); 5892 } 5893 case NEON::BI__builtin_neon_vshr_n_v: 5894 case NEON::BI__builtin_neon_vshrq_n_v: 5895 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n"); 5896 case NEON::BI__builtin_neon_vst1_v: 5897 case NEON::BI__builtin_neon_vst1q_v: 5898 case NEON::BI__builtin_neon_vst2_v: 5899 case NEON::BI__builtin_neon_vst2q_v: 5900 case NEON::BI__builtin_neon_vst3_v: 5901 case NEON::BI__builtin_neon_vst3q_v: 5902 case NEON::BI__builtin_neon_vst4_v: 5903 case NEON::BI__builtin_neon_vst4q_v: 5904 case NEON::BI__builtin_neon_vst2_lane_v: 5905 case NEON::BI__builtin_neon_vst2q_lane_v: 5906 case NEON::BI__builtin_neon_vst3_lane_v: 5907 case NEON::BI__builtin_neon_vst3q_lane_v: 5908 case NEON::BI__builtin_neon_vst4_lane_v: 5909 case NEON::BI__builtin_neon_vst4q_lane_v: { 5910 llvm::Type *Tys[] = {Int8PtrTy, Ty}; 5911 Ops.push_back(getAlignmentValue32(PtrOp0)); 5912 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, ""); 5913 } 5914 case NEON::BI__builtin_neon_vst1_x2_v: 5915 case NEON::BI__builtin_neon_vst1q_x2_v: 5916 case NEON::BI__builtin_neon_vst1_x3_v: 5917 case NEON::BI__builtin_neon_vst1q_x3_v: 5918 case NEON::BI__builtin_neon_vst1_x4_v: 5919 case NEON::BI__builtin_neon_vst1q_x4_v: { 5920 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType()); 5921 // TODO: Currently in AArch32 mode the pointer operand comes first, whereas 5922 // in AArch64 it comes last. We may want to stick to one or another. 5923 if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be || 5924 Arch == llvm::Triple::aarch64_32) { 5925 llvm::Type *Tys[2] = { VTy, PTy }; 5926 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end()); 5927 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, ""); 5928 } 5929 llvm::Type *Tys[2] = { PTy, VTy }; 5930 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, ""); 5931 } 5932 case NEON::BI__builtin_neon_vsubhn_v: { 5933 llvm::VectorType *SrcTy = 5934 llvm::VectorType::getExtendedElementVectorType(VTy); 5935 5936 // %sum = add <4 x i32> %lhs, %rhs 5937 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 5938 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy); 5939 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn"); 5940 5941 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16> 5942 Constant *ShiftAmt = 5943 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2); 5944 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn"); 5945 5946 // %res = trunc <4 x i32> %high to <4 x i16> 5947 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn"); 5948 } 5949 case NEON::BI__builtin_neon_vtrn_v: 5950 case NEON::BI__builtin_neon_vtrnq_v: { 5951 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 5952 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5953 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 5954 Value *SV = nullptr; 5955 5956 for (unsigned vi = 0; vi != 2; ++vi) { 5957 SmallVector<uint32_t, 16> Indices; 5958 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 5959 Indices.push_back(i+vi); 5960 Indices.push_back(i+e+vi); 5961 } 5962 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 5963 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn"); 5964 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 5965 } 5966 return SV; 5967 } 5968 case NEON::BI__builtin_neon_vtst_v: 5969 case NEON::BI__builtin_neon_vtstq_v: { 5970 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5971 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5972 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); 5973 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], 5974 ConstantAggregateZero::get(Ty)); 5975 return Builder.CreateSExt(Ops[0], Ty, "vtst"); 5976 } 5977 case NEON::BI__builtin_neon_vuzp_v: 5978 case NEON::BI__builtin_neon_vuzpq_v: { 5979 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 5980 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5981 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 5982 Value *SV = nullptr; 5983 5984 for (unsigned vi = 0; vi != 2; ++vi) { 5985 SmallVector<uint32_t, 16> Indices; 5986 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 5987 Indices.push_back(2*i+vi); 5988 5989 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 5990 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp"); 5991 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 5992 } 5993 return SV; 5994 } 5995 case NEON::BI__builtin_neon_vzip_v: 5996 case NEON::BI__builtin_neon_vzipq_v: { 5997 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 5998 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5999 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 6000 Value *SV = nullptr; 6001 6002 for (unsigned vi = 0; vi != 2; ++vi) { 6003 SmallVector<uint32_t, 16> Indices; 6004 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 6005 Indices.push_back((i + vi*e) >> 1); 6006 Indices.push_back(((i + vi*e) >> 1)+e); 6007 } 6008 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 6009 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip"); 6010 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 6011 } 6012 return SV; 6013 } 6014 case NEON::BI__builtin_neon_vdot_v: 6015 case NEON::BI__builtin_neon_vdotq_v: { 6016 llvm::Type *InputTy = 6017 llvm::VectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8); 6018 llvm::Type *Tys[2] = { Ty, InputTy }; 6019 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic; 6020 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot"); 6021 } 6022 case NEON::BI__builtin_neon_vfmlal_low_v: 6023 case NEON::BI__builtin_neon_vfmlalq_low_v: { 6024 llvm::Type *InputTy = 6025 llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16); 6026 llvm::Type *Tys[2] = { Ty, InputTy }; 6027 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low"); 6028 } 6029 case NEON::BI__builtin_neon_vfmlsl_low_v: 6030 case NEON::BI__builtin_neon_vfmlslq_low_v: { 6031 llvm::Type *InputTy = 6032 llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16); 6033 llvm::Type *Tys[2] = { Ty, InputTy }; 6034 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low"); 6035 } 6036 case NEON::BI__builtin_neon_vfmlal_high_v: 6037 case NEON::BI__builtin_neon_vfmlalq_high_v: { 6038 llvm::Type *InputTy = 6039 llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16); 6040 llvm::Type *Tys[2] = { Ty, InputTy }; 6041 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high"); 6042 } 6043 case NEON::BI__builtin_neon_vfmlsl_high_v: 6044 case NEON::BI__builtin_neon_vfmlslq_high_v: { 6045 llvm::Type *InputTy = 6046 llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16); 6047 llvm::Type *Tys[2] = { Ty, InputTy }; 6048 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high"); 6049 } 6050 } 6051 6052 assert(Int && "Expected valid intrinsic number"); 6053 6054 // Determine the type(s) of this overloaded AArch64 intrinsic. 6055 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E); 6056 6057 Value *Result = EmitNeonCall(F, Ops, NameHint); 6058 llvm::Type *ResultType = ConvertType(E->getType()); 6059 // AArch64 intrinsic one-element vector type cast to 6060 // scalar type expected by the builtin 6061 return Builder.CreateBitCast(Result, ResultType, NameHint); 6062 } 6063 6064 Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr( 6065 Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp, 6066 const CmpInst::Predicate Ip, const Twine &Name) { 6067 llvm::Type *OTy = Op->getType(); 6068 6069 // FIXME: this is utterly horrific. We should not be looking at previous 6070 // codegen context to find out what needs doing. Unfortunately TableGen 6071 // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32 6072 // (etc). 6073 if (BitCastInst *BI = dyn_cast<BitCastInst>(Op)) 6074 OTy = BI->getOperand(0)->getType(); 6075 6076 Op = Builder.CreateBitCast(Op, OTy); 6077 if (OTy->getScalarType()->isFloatingPointTy()) { 6078 Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy)); 6079 } else { 6080 Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy)); 6081 } 6082 return Builder.CreateSExt(Op, Ty, Name); 6083 } 6084 6085 static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops, 6086 Value *ExtOp, Value *IndexOp, 6087 llvm::Type *ResTy, unsigned IntID, 6088 const char *Name) { 6089 SmallVector<Value *, 2> TblOps; 6090 if (ExtOp) 6091 TblOps.push_back(ExtOp); 6092 6093 // Build a vector containing sequential number like (0, 1, 2, ..., 15) 6094 SmallVector<uint32_t, 16> Indices; 6095 llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType()); 6096 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) { 6097 Indices.push_back(2*i); 6098 Indices.push_back(2*i+1); 6099 } 6100 6101 int PairPos = 0, End = Ops.size() - 1; 6102 while (PairPos < End) { 6103 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], 6104 Ops[PairPos+1], Indices, 6105 Name)); 6106 PairPos += 2; 6107 } 6108 6109 // If there's an odd number of 64-bit lookup table, fill the high 64-bit 6110 // of the 128-bit lookup table with zero. 6111 if (PairPos == End) { 6112 Value *ZeroTbl = ConstantAggregateZero::get(TblTy); 6113 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], 6114 ZeroTbl, Indices, Name)); 6115 } 6116 6117 Function *TblF; 6118 TblOps.push_back(IndexOp); 6119 TblF = CGF.CGM.getIntrinsic(IntID, ResTy); 6120 6121 return CGF.EmitNeonCall(TblF, TblOps, Name); 6122 } 6123 6124 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) { 6125 unsigned Value; 6126 switch (BuiltinID) { 6127 default: 6128 return nullptr; 6129 case ARM::BI__builtin_arm_nop: 6130 Value = 0; 6131 break; 6132 case ARM::BI__builtin_arm_yield: 6133 case ARM::BI__yield: 6134 Value = 1; 6135 break; 6136 case ARM::BI__builtin_arm_wfe: 6137 case ARM::BI__wfe: 6138 Value = 2; 6139 break; 6140 case ARM::BI__builtin_arm_wfi: 6141 case ARM::BI__wfi: 6142 Value = 3; 6143 break; 6144 case ARM::BI__builtin_arm_sev: 6145 case ARM::BI__sev: 6146 Value = 4; 6147 break; 6148 case ARM::BI__builtin_arm_sevl: 6149 case ARM::BI__sevl: 6150 Value = 5; 6151 break; 6152 } 6153 6154 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint), 6155 llvm::ConstantInt::get(Int32Ty, Value)); 6156 } 6157 6158 // Generates the IR for the read/write special register builtin, 6159 // ValueType is the type of the value that is to be written or read, 6160 // RegisterType is the type of the register being written to or read from. 6161 static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, 6162 const CallExpr *E, 6163 llvm::Type *RegisterType, 6164 llvm::Type *ValueType, 6165 bool IsRead, 6166 StringRef SysReg = "") { 6167 // write and register intrinsics only support 32 and 64 bit operations. 6168 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64)) 6169 && "Unsupported size for register."); 6170 6171 CodeGen::CGBuilderTy &Builder = CGF.Builder; 6172 CodeGen::CodeGenModule &CGM = CGF.CGM; 6173 LLVMContext &Context = CGM.getLLVMContext(); 6174 6175 if (SysReg.empty()) { 6176 const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts(); 6177 SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString(); 6178 } 6179 6180 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) }; 6181 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); 6182 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); 6183 6184 llvm::Type *Types[] = { RegisterType }; 6185 6186 bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32); 6187 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64)) 6188 && "Can't fit 64-bit value in 32-bit register"); 6189 6190 if (IsRead) { 6191 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types); 6192 llvm::Value *Call = Builder.CreateCall(F, Metadata); 6193 6194 if (MixedTypes) 6195 // Read into 64 bit register and then truncate result to 32 bit. 6196 return Builder.CreateTrunc(Call, ValueType); 6197 6198 if (ValueType->isPointerTy()) 6199 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*). 6200 return Builder.CreateIntToPtr(Call, ValueType); 6201 6202 return Call; 6203 } 6204 6205 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types); 6206 llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1)); 6207 if (MixedTypes) { 6208 // Extend 32 bit write value to 64 bit to pass to write. 6209 ArgValue = Builder.CreateZExt(ArgValue, RegisterType); 6210 return Builder.CreateCall(F, { Metadata, ArgValue }); 6211 } 6212 6213 if (ValueType->isPointerTy()) { 6214 // Have VoidPtrTy ArgValue but want to return an i32/i64. 6215 ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType); 6216 return Builder.CreateCall(F, { Metadata, ArgValue }); 6217 } 6218 6219 return Builder.CreateCall(F, { Metadata, ArgValue }); 6220 } 6221 6222 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra 6223 /// argument that specifies the vector type. 6224 static bool HasExtraNeonArgument(unsigned BuiltinID) { 6225 switch (BuiltinID) { 6226 default: break; 6227 case NEON::BI__builtin_neon_vget_lane_i8: 6228 case NEON::BI__builtin_neon_vget_lane_i16: 6229 case NEON::BI__builtin_neon_vget_lane_i32: 6230 case NEON::BI__builtin_neon_vget_lane_i64: 6231 case NEON::BI__builtin_neon_vget_lane_f32: 6232 case NEON::BI__builtin_neon_vgetq_lane_i8: 6233 case NEON::BI__builtin_neon_vgetq_lane_i16: 6234 case NEON::BI__builtin_neon_vgetq_lane_i32: 6235 case NEON::BI__builtin_neon_vgetq_lane_i64: 6236 case NEON::BI__builtin_neon_vgetq_lane_f32: 6237 case NEON::BI__builtin_neon_vset_lane_i8: 6238 case NEON::BI__builtin_neon_vset_lane_i16: 6239 case NEON::BI__builtin_neon_vset_lane_i32: 6240 case NEON::BI__builtin_neon_vset_lane_i64: 6241 case NEON::BI__builtin_neon_vset_lane_f32: 6242 case NEON::BI__builtin_neon_vsetq_lane_i8: 6243 case NEON::BI__builtin_neon_vsetq_lane_i16: 6244 case NEON::BI__builtin_neon_vsetq_lane_i32: 6245 case NEON::BI__builtin_neon_vsetq_lane_i64: 6246 case NEON::BI__builtin_neon_vsetq_lane_f32: 6247 case NEON::BI__builtin_neon_vsha1h_u32: 6248 case NEON::BI__builtin_neon_vsha1cq_u32: 6249 case NEON::BI__builtin_neon_vsha1pq_u32: 6250 case NEON::BI__builtin_neon_vsha1mq_u32: 6251 case clang::ARM::BI_MoveToCoprocessor: 6252 case clang::ARM::BI_MoveToCoprocessor2: 6253 return false; 6254 } 6255 return true; 6256 } 6257 6258 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, 6259 const CallExpr *E, 6260 ReturnValueSlot ReturnValue, 6261 llvm::Triple::ArchType Arch) { 6262 if (auto Hint = GetValueForARMHint(BuiltinID)) 6263 return Hint; 6264 6265 if (BuiltinID == ARM::BI__emit) { 6266 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb; 6267 llvm::FunctionType *FTy = 6268 llvm::FunctionType::get(VoidTy, /*Variadic=*/false); 6269 6270 Expr::EvalResult Result; 6271 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext())) 6272 llvm_unreachable("Sema will ensure that the parameter is constant"); 6273 6274 llvm::APSInt Value = Result.Val.getInt(); 6275 uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue(); 6276 6277 llvm::InlineAsm *Emit = 6278 IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "", 6279 /*hasSideEffects=*/true) 6280 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "", 6281 /*hasSideEffects=*/true); 6282 6283 return Builder.CreateCall(Emit); 6284 } 6285 6286 if (BuiltinID == ARM::BI__builtin_arm_dbg) { 6287 Value *Option = EmitScalarExpr(E->getArg(0)); 6288 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option); 6289 } 6290 6291 if (BuiltinID == ARM::BI__builtin_arm_prefetch) { 6292 Value *Address = EmitScalarExpr(E->getArg(0)); 6293 Value *RW = EmitScalarExpr(E->getArg(1)); 6294 Value *IsData = EmitScalarExpr(E->getArg(2)); 6295 6296 // Locality is not supported on ARM target 6297 Value *Locality = llvm::ConstantInt::get(Int32Ty, 3); 6298 6299 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType()); 6300 return Builder.CreateCall(F, {Address, RW, Locality, IsData}); 6301 } 6302 6303 if (BuiltinID == ARM::BI__builtin_arm_rbit) { 6304 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 6305 return Builder.CreateCall( 6306 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); 6307 } 6308 6309 if (BuiltinID == ARM::BI__builtin_arm_cls) { 6310 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 6311 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls"); 6312 } 6313 if (BuiltinID == ARM::BI__builtin_arm_cls64) { 6314 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 6315 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg, 6316 "cls"); 6317 } 6318 6319 if (BuiltinID == ARM::BI__clear_cache) { 6320 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); 6321 const FunctionDecl *FD = E->getDirectCallee(); 6322 Value *Ops[2]; 6323 for (unsigned i = 0; i < 2; i++) 6324 Ops[i] = EmitScalarExpr(E->getArg(i)); 6325 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); 6326 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); 6327 StringRef Name = FD->getName(); 6328 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); 6329 } 6330 6331 if (BuiltinID == ARM::BI__builtin_arm_mcrr || 6332 BuiltinID == ARM::BI__builtin_arm_mcrr2) { 6333 Function *F; 6334 6335 switch (BuiltinID) { 6336 default: llvm_unreachable("unexpected builtin"); 6337 case ARM::BI__builtin_arm_mcrr: 6338 F = CGM.getIntrinsic(Intrinsic::arm_mcrr); 6339 break; 6340 case ARM::BI__builtin_arm_mcrr2: 6341 F = CGM.getIntrinsic(Intrinsic::arm_mcrr2); 6342 break; 6343 } 6344 6345 // MCRR{2} instruction has 5 operands but 6346 // the intrinsic has 4 because Rt and Rt2 6347 // are represented as a single unsigned 64 6348 // bit integer in the intrinsic definition 6349 // but internally it's represented as 2 32 6350 // bit integers. 6351 6352 Value *Coproc = EmitScalarExpr(E->getArg(0)); 6353 Value *Opc1 = EmitScalarExpr(E->getArg(1)); 6354 Value *RtAndRt2 = EmitScalarExpr(E->getArg(2)); 6355 Value *CRm = EmitScalarExpr(E->getArg(3)); 6356 6357 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32); 6358 Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty); 6359 Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1); 6360 Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty); 6361 6362 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm}); 6363 } 6364 6365 if (BuiltinID == ARM::BI__builtin_arm_mrrc || 6366 BuiltinID == ARM::BI__builtin_arm_mrrc2) { 6367 Function *F; 6368 6369 switch (BuiltinID) { 6370 default: llvm_unreachable("unexpected builtin"); 6371 case ARM::BI__builtin_arm_mrrc: 6372 F = CGM.getIntrinsic(Intrinsic::arm_mrrc); 6373 break; 6374 case ARM::BI__builtin_arm_mrrc2: 6375 F = CGM.getIntrinsic(Intrinsic::arm_mrrc2); 6376 break; 6377 } 6378 6379 Value *Coproc = EmitScalarExpr(E->getArg(0)); 6380 Value *Opc1 = EmitScalarExpr(E->getArg(1)); 6381 Value *CRm = EmitScalarExpr(E->getArg(2)); 6382 Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm}); 6383 6384 // Returns an unsigned 64 bit integer, represented 6385 // as two 32 bit integers. 6386 6387 Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1); 6388 Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0); 6389 Rt = Builder.CreateZExt(Rt, Int64Ty); 6390 Rt1 = Builder.CreateZExt(Rt1, Int64Ty); 6391 6392 Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32); 6393 RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true); 6394 RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1); 6395 6396 return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType())); 6397 } 6398 6399 if (BuiltinID == ARM::BI__builtin_arm_ldrexd || 6400 ((BuiltinID == ARM::BI__builtin_arm_ldrex || 6401 BuiltinID == ARM::BI__builtin_arm_ldaex) && 6402 getContext().getTypeSize(E->getType()) == 64) || 6403 BuiltinID == ARM::BI__ldrexd) { 6404 Function *F; 6405 6406 switch (BuiltinID) { 6407 default: llvm_unreachable("unexpected builtin"); 6408 case ARM::BI__builtin_arm_ldaex: 6409 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd); 6410 break; 6411 case ARM::BI__builtin_arm_ldrexd: 6412 case ARM::BI__builtin_arm_ldrex: 6413 case ARM::BI__ldrexd: 6414 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd); 6415 break; 6416 } 6417 6418 Value *LdPtr = EmitScalarExpr(E->getArg(0)); 6419 Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy), 6420 "ldrexd"); 6421 6422 Value *Val0 = Builder.CreateExtractValue(Val, 1); 6423 Value *Val1 = Builder.CreateExtractValue(Val, 0); 6424 Val0 = Builder.CreateZExt(Val0, Int64Ty); 6425 Val1 = Builder.CreateZExt(Val1, Int64Ty); 6426 6427 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32); 6428 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */); 6429 Val = Builder.CreateOr(Val, Val1); 6430 return Builder.CreateBitCast(Val, ConvertType(E->getType())); 6431 } 6432 6433 if (BuiltinID == ARM::BI__builtin_arm_ldrex || 6434 BuiltinID == ARM::BI__builtin_arm_ldaex) { 6435 Value *LoadAddr = EmitScalarExpr(E->getArg(0)); 6436 6437 QualType Ty = E->getType(); 6438 llvm::Type *RealResTy = ConvertType(Ty); 6439 llvm::Type *PtrTy = llvm::IntegerType::get( 6440 getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo(); 6441 LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy); 6442 6443 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex 6444 ? Intrinsic::arm_ldaex 6445 : Intrinsic::arm_ldrex, 6446 PtrTy); 6447 Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex"); 6448 6449 if (RealResTy->isPointerTy()) 6450 return Builder.CreateIntToPtr(Val, RealResTy); 6451 else { 6452 llvm::Type *IntResTy = llvm::IntegerType::get( 6453 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy)); 6454 Val = Builder.CreateTruncOrBitCast(Val, IntResTy); 6455 return Builder.CreateBitCast(Val, RealResTy); 6456 } 6457 } 6458 6459 if (BuiltinID == ARM::BI__builtin_arm_strexd || 6460 ((BuiltinID == ARM::BI__builtin_arm_stlex || 6461 BuiltinID == ARM::BI__builtin_arm_strex) && 6462 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) { 6463 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex 6464 ? Intrinsic::arm_stlexd 6465 : Intrinsic::arm_strexd); 6466 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty); 6467 6468 Address Tmp = CreateMemTemp(E->getArg(0)->getType()); 6469 Value *Val = EmitScalarExpr(E->getArg(0)); 6470 Builder.CreateStore(Val, Tmp); 6471 6472 Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy)); 6473 Val = Builder.CreateLoad(LdPtr); 6474 6475 Value *Arg0 = Builder.CreateExtractValue(Val, 0); 6476 Value *Arg1 = Builder.CreateExtractValue(Val, 1); 6477 Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy); 6478 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd"); 6479 } 6480 6481 if (BuiltinID == ARM::BI__builtin_arm_strex || 6482 BuiltinID == ARM::BI__builtin_arm_stlex) { 6483 Value *StoreVal = EmitScalarExpr(E->getArg(0)); 6484 Value *StoreAddr = EmitScalarExpr(E->getArg(1)); 6485 6486 QualType Ty = E->getArg(0)->getType(); 6487 llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(), 6488 getContext().getTypeSize(Ty)); 6489 StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo()); 6490 6491 if (StoreVal->getType()->isPointerTy()) 6492 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty); 6493 else { 6494 llvm::Type *IntTy = llvm::IntegerType::get( 6495 getLLVMContext(), 6496 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType())); 6497 StoreVal = Builder.CreateBitCast(StoreVal, IntTy); 6498 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty); 6499 } 6500 6501 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex 6502 ? Intrinsic::arm_stlex 6503 : Intrinsic::arm_strex, 6504 StoreAddr->getType()); 6505 return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex"); 6506 } 6507 6508 if (BuiltinID == ARM::BI__builtin_arm_clrex) { 6509 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex); 6510 return Builder.CreateCall(F); 6511 } 6512 6513 // CRC32 6514 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; 6515 switch (BuiltinID) { 6516 case ARM::BI__builtin_arm_crc32b: 6517 CRCIntrinsicID = Intrinsic::arm_crc32b; break; 6518 case ARM::BI__builtin_arm_crc32cb: 6519 CRCIntrinsicID = Intrinsic::arm_crc32cb; break; 6520 case ARM::BI__builtin_arm_crc32h: 6521 CRCIntrinsicID = Intrinsic::arm_crc32h; break; 6522 case ARM::BI__builtin_arm_crc32ch: 6523 CRCIntrinsicID = Intrinsic::arm_crc32ch; break; 6524 case ARM::BI__builtin_arm_crc32w: 6525 case ARM::BI__builtin_arm_crc32d: 6526 CRCIntrinsicID = Intrinsic::arm_crc32w; break; 6527 case ARM::BI__builtin_arm_crc32cw: 6528 case ARM::BI__builtin_arm_crc32cd: 6529 CRCIntrinsicID = Intrinsic::arm_crc32cw; break; 6530 } 6531 6532 if (CRCIntrinsicID != Intrinsic::not_intrinsic) { 6533 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 6534 Value *Arg1 = EmitScalarExpr(E->getArg(1)); 6535 6536 // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w 6537 // intrinsics, hence we need different codegen for these cases. 6538 if (BuiltinID == ARM::BI__builtin_arm_crc32d || 6539 BuiltinID == ARM::BI__builtin_arm_crc32cd) { 6540 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32); 6541 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty); 6542 Value *Arg1b = Builder.CreateLShr(Arg1, C1); 6543 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty); 6544 6545 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 6546 Value *Res = Builder.CreateCall(F, {Arg0, Arg1a}); 6547 return Builder.CreateCall(F, {Res, Arg1b}); 6548 } else { 6549 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty); 6550 6551 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 6552 return Builder.CreateCall(F, {Arg0, Arg1}); 6553 } 6554 } 6555 6556 if (BuiltinID == ARM::BI__builtin_arm_rsr || 6557 BuiltinID == ARM::BI__builtin_arm_rsr64 || 6558 BuiltinID == ARM::BI__builtin_arm_rsrp || 6559 BuiltinID == ARM::BI__builtin_arm_wsr || 6560 BuiltinID == ARM::BI__builtin_arm_wsr64 || 6561 BuiltinID == ARM::BI__builtin_arm_wsrp) { 6562 6563 bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr || 6564 BuiltinID == ARM::BI__builtin_arm_rsr64 || 6565 BuiltinID == ARM::BI__builtin_arm_rsrp; 6566 6567 bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp || 6568 BuiltinID == ARM::BI__builtin_arm_wsrp; 6569 6570 bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 || 6571 BuiltinID == ARM::BI__builtin_arm_wsr64; 6572 6573 llvm::Type *ValueType; 6574 llvm::Type *RegisterType; 6575 if (IsPointerBuiltin) { 6576 ValueType = VoidPtrTy; 6577 RegisterType = Int32Ty; 6578 } else if (Is64Bit) { 6579 ValueType = RegisterType = Int64Ty; 6580 } else { 6581 ValueType = RegisterType = Int32Ty; 6582 } 6583 6584 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead); 6585 } 6586 6587 // Deal with MVE builtins 6588 if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch)) 6589 return Result; 6590 // Handle CDE builtins 6591 if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch)) 6592 return Result; 6593 6594 // Find out if any arguments are required to be integer constant 6595 // expressions. 6596 unsigned ICEArguments = 0; 6597 ASTContext::GetBuiltinTypeError Error; 6598 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 6599 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 6600 6601 auto getAlignmentValue32 = [&](Address addr) -> Value* { 6602 return Builder.getInt32(addr.getAlignment().getQuantity()); 6603 }; 6604 6605 Address PtrOp0 = Address::invalid(); 6606 Address PtrOp1 = Address::invalid(); 6607 SmallVector<Value*, 4> Ops; 6608 bool HasExtraArg = HasExtraNeonArgument(BuiltinID); 6609 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0); 6610 for (unsigned i = 0, e = NumArgs; i != e; i++) { 6611 if (i == 0) { 6612 switch (BuiltinID) { 6613 case NEON::BI__builtin_neon_vld1_v: 6614 case NEON::BI__builtin_neon_vld1q_v: 6615 case NEON::BI__builtin_neon_vld1q_lane_v: 6616 case NEON::BI__builtin_neon_vld1_lane_v: 6617 case NEON::BI__builtin_neon_vld1_dup_v: 6618 case NEON::BI__builtin_neon_vld1q_dup_v: 6619 case NEON::BI__builtin_neon_vst1_v: 6620 case NEON::BI__builtin_neon_vst1q_v: 6621 case NEON::BI__builtin_neon_vst1q_lane_v: 6622 case NEON::BI__builtin_neon_vst1_lane_v: 6623 case NEON::BI__builtin_neon_vst2_v: 6624 case NEON::BI__builtin_neon_vst2q_v: 6625 case NEON::BI__builtin_neon_vst2_lane_v: 6626 case NEON::BI__builtin_neon_vst2q_lane_v: 6627 case NEON::BI__builtin_neon_vst3_v: 6628 case NEON::BI__builtin_neon_vst3q_v: 6629 case NEON::BI__builtin_neon_vst3_lane_v: 6630 case NEON::BI__builtin_neon_vst3q_lane_v: 6631 case NEON::BI__builtin_neon_vst4_v: 6632 case NEON::BI__builtin_neon_vst4q_v: 6633 case NEON::BI__builtin_neon_vst4_lane_v: 6634 case NEON::BI__builtin_neon_vst4q_lane_v: 6635 // Get the alignment for the argument in addition to the value; 6636 // we'll use it later. 6637 PtrOp0 = EmitPointerWithAlignment(E->getArg(0)); 6638 Ops.push_back(PtrOp0.getPointer()); 6639 continue; 6640 } 6641 } 6642 if (i == 1) { 6643 switch (BuiltinID) { 6644 case NEON::BI__builtin_neon_vld2_v: 6645 case NEON::BI__builtin_neon_vld2q_v: 6646 case NEON::BI__builtin_neon_vld3_v: 6647 case NEON::BI__builtin_neon_vld3q_v: 6648 case NEON::BI__builtin_neon_vld4_v: 6649 case NEON::BI__builtin_neon_vld4q_v: 6650 case NEON::BI__builtin_neon_vld2_lane_v: 6651 case NEON::BI__builtin_neon_vld2q_lane_v: 6652 case NEON::BI__builtin_neon_vld3_lane_v: 6653 case NEON::BI__builtin_neon_vld3q_lane_v: 6654 case NEON::BI__builtin_neon_vld4_lane_v: 6655 case NEON::BI__builtin_neon_vld4q_lane_v: 6656 case NEON::BI__builtin_neon_vld2_dup_v: 6657 case NEON::BI__builtin_neon_vld2q_dup_v: 6658 case NEON::BI__builtin_neon_vld3_dup_v: 6659 case NEON::BI__builtin_neon_vld3q_dup_v: 6660 case NEON::BI__builtin_neon_vld4_dup_v: 6661 case NEON::BI__builtin_neon_vld4q_dup_v: 6662 // Get the alignment for the argument in addition to the value; 6663 // we'll use it later. 6664 PtrOp1 = EmitPointerWithAlignment(E->getArg(1)); 6665 Ops.push_back(PtrOp1.getPointer()); 6666 continue; 6667 } 6668 } 6669 6670 if ((ICEArguments & (1 << i)) == 0) { 6671 Ops.push_back(EmitScalarExpr(E->getArg(i))); 6672 } else { 6673 // If this is required to be a constant, constant fold it so that we know 6674 // that the generated intrinsic gets a ConstantInt. 6675 llvm::APSInt Result; 6676 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); 6677 assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst; 6678 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); 6679 } 6680 } 6681 6682 switch (BuiltinID) { 6683 default: break; 6684 6685 case NEON::BI__builtin_neon_vget_lane_i8: 6686 case NEON::BI__builtin_neon_vget_lane_i16: 6687 case NEON::BI__builtin_neon_vget_lane_i32: 6688 case NEON::BI__builtin_neon_vget_lane_i64: 6689 case NEON::BI__builtin_neon_vget_lane_f32: 6690 case NEON::BI__builtin_neon_vgetq_lane_i8: 6691 case NEON::BI__builtin_neon_vgetq_lane_i16: 6692 case NEON::BI__builtin_neon_vgetq_lane_i32: 6693 case NEON::BI__builtin_neon_vgetq_lane_i64: 6694 case NEON::BI__builtin_neon_vgetq_lane_f32: 6695 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane"); 6696 6697 case NEON::BI__builtin_neon_vrndns_f32: { 6698 Value *Arg = EmitScalarExpr(E->getArg(0)); 6699 llvm::Type *Tys[] = {Arg->getType()}; 6700 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vrintn, Tys); 6701 return Builder.CreateCall(F, {Arg}, "vrndn"); } 6702 6703 case NEON::BI__builtin_neon_vset_lane_i8: 6704 case NEON::BI__builtin_neon_vset_lane_i16: 6705 case NEON::BI__builtin_neon_vset_lane_i32: 6706 case NEON::BI__builtin_neon_vset_lane_i64: 6707 case NEON::BI__builtin_neon_vset_lane_f32: 6708 case NEON::BI__builtin_neon_vsetq_lane_i8: 6709 case NEON::BI__builtin_neon_vsetq_lane_i16: 6710 case NEON::BI__builtin_neon_vsetq_lane_i32: 6711 case NEON::BI__builtin_neon_vsetq_lane_i64: 6712 case NEON::BI__builtin_neon_vsetq_lane_f32: 6713 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 6714 6715 case NEON::BI__builtin_neon_vsha1h_u32: 6716 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops, 6717 "vsha1h"); 6718 case NEON::BI__builtin_neon_vsha1cq_u32: 6719 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops, 6720 "vsha1h"); 6721 case NEON::BI__builtin_neon_vsha1pq_u32: 6722 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops, 6723 "vsha1h"); 6724 case NEON::BI__builtin_neon_vsha1mq_u32: 6725 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops, 6726 "vsha1h"); 6727 6728 // The ARM _MoveToCoprocessor builtins put the input register value as 6729 // the first argument, but the LLVM intrinsic expects it as the third one. 6730 case ARM::BI_MoveToCoprocessor: 6731 case ARM::BI_MoveToCoprocessor2: { 6732 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ? 6733 Intrinsic::arm_mcr : Intrinsic::arm_mcr2); 6734 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0], 6735 Ops[3], Ops[4], Ops[5]}); 6736 } 6737 case ARM::BI_BitScanForward: 6738 case ARM::BI_BitScanForward64: 6739 return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E); 6740 case ARM::BI_BitScanReverse: 6741 case ARM::BI_BitScanReverse64: 6742 return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E); 6743 6744 case ARM::BI_InterlockedAnd64: 6745 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E); 6746 case ARM::BI_InterlockedExchange64: 6747 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E); 6748 case ARM::BI_InterlockedExchangeAdd64: 6749 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E); 6750 case ARM::BI_InterlockedExchangeSub64: 6751 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E); 6752 case ARM::BI_InterlockedOr64: 6753 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E); 6754 case ARM::BI_InterlockedXor64: 6755 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E); 6756 case ARM::BI_InterlockedDecrement64: 6757 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E); 6758 case ARM::BI_InterlockedIncrement64: 6759 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E); 6760 case ARM::BI_InterlockedExchangeAdd8_acq: 6761 case ARM::BI_InterlockedExchangeAdd16_acq: 6762 case ARM::BI_InterlockedExchangeAdd_acq: 6763 case ARM::BI_InterlockedExchangeAdd64_acq: 6764 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_acq, E); 6765 case ARM::BI_InterlockedExchangeAdd8_rel: 6766 case ARM::BI_InterlockedExchangeAdd16_rel: 6767 case ARM::BI_InterlockedExchangeAdd_rel: 6768 case ARM::BI_InterlockedExchangeAdd64_rel: 6769 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_rel, E); 6770 case ARM::BI_InterlockedExchangeAdd8_nf: 6771 case ARM::BI_InterlockedExchangeAdd16_nf: 6772 case ARM::BI_InterlockedExchangeAdd_nf: 6773 case ARM::BI_InterlockedExchangeAdd64_nf: 6774 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_nf, E); 6775 case ARM::BI_InterlockedExchange8_acq: 6776 case ARM::BI_InterlockedExchange16_acq: 6777 case ARM::BI_InterlockedExchange_acq: 6778 case ARM::BI_InterlockedExchange64_acq: 6779 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_acq, E); 6780 case ARM::BI_InterlockedExchange8_rel: 6781 case ARM::BI_InterlockedExchange16_rel: 6782 case ARM::BI_InterlockedExchange_rel: 6783 case ARM::BI_InterlockedExchange64_rel: 6784 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_rel, E); 6785 case ARM::BI_InterlockedExchange8_nf: 6786 case ARM::BI_InterlockedExchange16_nf: 6787 case ARM::BI_InterlockedExchange_nf: 6788 case ARM::BI_InterlockedExchange64_nf: 6789 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_nf, E); 6790 case ARM::BI_InterlockedCompareExchange8_acq: 6791 case ARM::BI_InterlockedCompareExchange16_acq: 6792 case ARM::BI_InterlockedCompareExchange_acq: 6793 case ARM::BI_InterlockedCompareExchange64_acq: 6794 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_acq, E); 6795 case ARM::BI_InterlockedCompareExchange8_rel: 6796 case ARM::BI_InterlockedCompareExchange16_rel: 6797 case ARM::BI_InterlockedCompareExchange_rel: 6798 case ARM::BI_InterlockedCompareExchange64_rel: 6799 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_rel, E); 6800 case ARM::BI_InterlockedCompareExchange8_nf: 6801 case ARM::BI_InterlockedCompareExchange16_nf: 6802 case ARM::BI_InterlockedCompareExchange_nf: 6803 case ARM::BI_InterlockedCompareExchange64_nf: 6804 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_nf, E); 6805 case ARM::BI_InterlockedOr8_acq: 6806 case ARM::BI_InterlockedOr16_acq: 6807 case ARM::BI_InterlockedOr_acq: 6808 case ARM::BI_InterlockedOr64_acq: 6809 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_acq, E); 6810 case ARM::BI_InterlockedOr8_rel: 6811 case ARM::BI_InterlockedOr16_rel: 6812 case ARM::BI_InterlockedOr_rel: 6813 case ARM::BI_InterlockedOr64_rel: 6814 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_rel, E); 6815 case ARM::BI_InterlockedOr8_nf: 6816 case ARM::BI_InterlockedOr16_nf: 6817 case ARM::BI_InterlockedOr_nf: 6818 case ARM::BI_InterlockedOr64_nf: 6819 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_nf, E); 6820 case ARM::BI_InterlockedXor8_acq: 6821 case ARM::BI_InterlockedXor16_acq: 6822 case ARM::BI_InterlockedXor_acq: 6823 case ARM::BI_InterlockedXor64_acq: 6824 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_acq, E); 6825 case ARM::BI_InterlockedXor8_rel: 6826 case ARM::BI_InterlockedXor16_rel: 6827 case ARM::BI_InterlockedXor_rel: 6828 case ARM::BI_InterlockedXor64_rel: 6829 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_rel, E); 6830 case ARM::BI_InterlockedXor8_nf: 6831 case ARM::BI_InterlockedXor16_nf: 6832 case ARM::BI_InterlockedXor_nf: 6833 case ARM::BI_InterlockedXor64_nf: 6834 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_nf, E); 6835 case ARM::BI_InterlockedAnd8_acq: 6836 case ARM::BI_InterlockedAnd16_acq: 6837 case ARM::BI_InterlockedAnd_acq: 6838 case ARM::BI_InterlockedAnd64_acq: 6839 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_acq, E); 6840 case ARM::BI_InterlockedAnd8_rel: 6841 case ARM::BI_InterlockedAnd16_rel: 6842 case ARM::BI_InterlockedAnd_rel: 6843 case ARM::BI_InterlockedAnd64_rel: 6844 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_rel, E); 6845 case ARM::BI_InterlockedAnd8_nf: 6846 case ARM::BI_InterlockedAnd16_nf: 6847 case ARM::BI_InterlockedAnd_nf: 6848 case ARM::BI_InterlockedAnd64_nf: 6849 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_nf, E); 6850 case ARM::BI_InterlockedIncrement16_acq: 6851 case ARM::BI_InterlockedIncrement_acq: 6852 case ARM::BI_InterlockedIncrement64_acq: 6853 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_acq, E); 6854 case ARM::BI_InterlockedIncrement16_rel: 6855 case ARM::BI_InterlockedIncrement_rel: 6856 case ARM::BI_InterlockedIncrement64_rel: 6857 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_rel, E); 6858 case ARM::BI_InterlockedIncrement16_nf: 6859 case ARM::BI_InterlockedIncrement_nf: 6860 case ARM::BI_InterlockedIncrement64_nf: 6861 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_nf, E); 6862 case ARM::BI_InterlockedDecrement16_acq: 6863 case ARM::BI_InterlockedDecrement_acq: 6864 case ARM::BI_InterlockedDecrement64_acq: 6865 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_acq, E); 6866 case ARM::BI_InterlockedDecrement16_rel: 6867 case ARM::BI_InterlockedDecrement_rel: 6868 case ARM::BI_InterlockedDecrement64_rel: 6869 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_rel, E); 6870 case ARM::BI_InterlockedDecrement16_nf: 6871 case ARM::BI_InterlockedDecrement_nf: 6872 case ARM::BI_InterlockedDecrement64_nf: 6873 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_nf, E); 6874 } 6875 6876 // Get the last argument, which specifies the vector type. 6877 assert(HasExtraArg); 6878 llvm::APSInt Result; 6879 const Expr *Arg = E->getArg(E->getNumArgs()-1); 6880 if (!Arg->isIntegerConstantExpr(Result, getContext())) 6881 return nullptr; 6882 6883 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f || 6884 BuiltinID == ARM::BI__builtin_arm_vcvtr_d) { 6885 // Determine the overloaded type of this builtin. 6886 llvm::Type *Ty; 6887 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f) 6888 Ty = FloatTy; 6889 else 6890 Ty = DoubleTy; 6891 6892 // Determine whether this is an unsigned conversion or not. 6893 bool usgn = Result.getZExtValue() == 1; 6894 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr; 6895 6896 // Call the appropriate intrinsic. 6897 Function *F = CGM.getIntrinsic(Int, Ty); 6898 return Builder.CreateCall(F, Ops, "vcvtr"); 6899 } 6900 6901 // Determine the type of this overloaded NEON intrinsic. 6902 NeonTypeFlags Type(Result.getZExtValue()); 6903 bool usgn = Type.isUnsigned(); 6904 bool rightShift = false; 6905 6906 llvm::VectorType *VTy = GetNeonType(this, Type, 6907 getTarget().hasLegalHalfType()); 6908 llvm::Type *Ty = VTy; 6909 if (!Ty) 6910 return nullptr; 6911 6912 // Many NEON builtins have identical semantics and uses in ARM and 6913 // AArch64. Emit these in a single function. 6914 auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap); 6915 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap( 6916 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted); 6917 if (Builtin) 6918 return EmitCommonNeonBuiltinExpr( 6919 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic, 6920 Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch); 6921 6922 unsigned Int; 6923 switch (BuiltinID) { 6924 default: return nullptr; 6925 case NEON::BI__builtin_neon_vld1q_lane_v: 6926 // Handle 64-bit integer elements as a special case. Use shuffles of 6927 // one-element vectors to avoid poor code for i64 in the backend. 6928 if (VTy->getElementType()->isIntegerTy(64)) { 6929 // Extract the other lane. 6930 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6931 uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue(); 6932 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane)); 6933 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); 6934 // Load the value as a one-element vector. 6935 Ty = llvm::VectorType::get(VTy->getElementType(), 1); 6936 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 6937 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys); 6938 Value *Align = getAlignmentValue32(PtrOp0); 6939 Value *Ld = Builder.CreateCall(F, {Ops[0], Align}); 6940 // Combine them. 6941 uint32_t Indices[] = {1 - Lane, Lane}; 6942 SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices); 6943 return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane"); 6944 } 6945 LLVM_FALLTHROUGH; 6946 case NEON::BI__builtin_neon_vld1_lane_v: { 6947 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6948 PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType()); 6949 Value *Ld = Builder.CreateLoad(PtrOp0); 6950 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane"); 6951 } 6952 case NEON::BI__builtin_neon_vqrshrn_n_v: 6953 Int = 6954 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns; 6955 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n", 6956 1, true); 6957 case NEON::BI__builtin_neon_vqrshrun_n_v: 6958 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty), 6959 Ops, "vqrshrun_n", 1, true); 6960 case NEON::BI__builtin_neon_vqshrn_n_v: 6961 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns; 6962 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n", 6963 1, true); 6964 case NEON::BI__builtin_neon_vqshrun_n_v: 6965 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty), 6966 Ops, "vqshrun_n", 1, true); 6967 case NEON::BI__builtin_neon_vrecpe_v: 6968 case NEON::BI__builtin_neon_vrecpeq_v: 6969 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty), 6970 Ops, "vrecpe"); 6971 case NEON::BI__builtin_neon_vrshrn_n_v: 6972 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty), 6973 Ops, "vrshrn_n", 1, true); 6974 case NEON::BI__builtin_neon_vrsra_n_v: 6975 case NEON::BI__builtin_neon_vrsraq_n_v: 6976 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6977 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6978 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true); 6979 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts; 6980 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]}); 6981 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n"); 6982 case NEON::BI__builtin_neon_vsri_n_v: 6983 case NEON::BI__builtin_neon_vsriq_n_v: 6984 rightShift = true; 6985 LLVM_FALLTHROUGH; 6986 case NEON::BI__builtin_neon_vsli_n_v: 6987 case NEON::BI__builtin_neon_vsliq_n_v: 6988 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift); 6989 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty), 6990 Ops, "vsli_n"); 6991 case NEON::BI__builtin_neon_vsra_n_v: 6992 case NEON::BI__builtin_neon_vsraq_n_v: 6993 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6994 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n"); 6995 return Builder.CreateAdd(Ops[0], Ops[1]); 6996 case NEON::BI__builtin_neon_vst1q_lane_v: 6997 // Handle 64-bit integer elements as a special case. Use a shuffle to get 6998 // a one-element vector and avoid poor code for i64 in the backend. 6999 if (VTy->getElementType()->isIntegerTy(64)) { 7000 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 7001 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2])); 7002 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); 7003 Ops[2] = getAlignmentValue32(PtrOp0); 7004 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()}; 7005 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, 7006 Tys), Ops); 7007 } 7008 LLVM_FALLTHROUGH; 7009 case NEON::BI__builtin_neon_vst1_lane_v: { 7010 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 7011 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); 7012 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 7013 auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty)); 7014 return St; 7015 } 7016 case NEON::BI__builtin_neon_vtbl1_v: 7017 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1), 7018 Ops, "vtbl1"); 7019 case NEON::BI__builtin_neon_vtbl2_v: 7020 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2), 7021 Ops, "vtbl2"); 7022 case NEON::BI__builtin_neon_vtbl3_v: 7023 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3), 7024 Ops, "vtbl3"); 7025 case NEON::BI__builtin_neon_vtbl4_v: 7026 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4), 7027 Ops, "vtbl4"); 7028 case NEON::BI__builtin_neon_vtbx1_v: 7029 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1), 7030 Ops, "vtbx1"); 7031 case NEON::BI__builtin_neon_vtbx2_v: 7032 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2), 7033 Ops, "vtbx2"); 7034 case NEON::BI__builtin_neon_vtbx3_v: 7035 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3), 7036 Ops, "vtbx3"); 7037 case NEON::BI__builtin_neon_vtbx4_v: 7038 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4), 7039 Ops, "vtbx4"); 7040 } 7041 } 7042 7043 template<typename Integer> 7044 static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context) { 7045 llvm::APSInt IntVal; 7046 bool IsConst = E->isIntegerConstantExpr(IntVal, Context); 7047 assert(IsConst && "Sema should have checked this was a constant"); 7048 (void)IsConst; 7049 return IntVal.getExtValue(); 7050 } 7051 7052 static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V, 7053 llvm::Type *T, bool Unsigned) { 7054 // Helper function called by Tablegen-constructed ARM MVE builtin codegen, 7055 // which finds it convenient to specify signed/unsigned as a boolean flag. 7056 return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T); 7057 } 7058 7059 static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V, 7060 uint32_t Shift, bool Unsigned) { 7061 // MVE helper function for integer shift right. This must handle signed vs 7062 // unsigned, and also deal specially with the case where the shift count is 7063 // equal to the lane size. In LLVM IR, an LShr with that parameter would be 7064 // undefined behavior, but in MVE it's legal, so we must convert it to code 7065 // that is not undefined in IR. 7066 unsigned LaneBits = 7067 V->getType()->getVectorElementType()->getPrimitiveSizeInBits(); 7068 if (Shift == LaneBits) { 7069 // An unsigned shift of the full lane size always generates zero, so we can 7070 // simply emit a zero vector. A signed shift of the full lane size does the 7071 // same thing as shifting by one bit fewer. 7072 if (Unsigned) 7073 return llvm::Constant::getNullValue(V->getType()); 7074 else 7075 --Shift; 7076 } 7077 return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift); 7078 } 7079 7080 static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) { 7081 // MVE-specific helper function for a vector splat, which infers the element 7082 // count of the output vector by knowing that MVE vectors are all 128 bits 7083 // wide. 7084 unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits(); 7085 return Builder.CreateVectorSplat(Elements, V); 7086 } 7087 7088 static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder, 7089 CodeGenFunction *CGF, 7090 llvm::Value *V, 7091 llvm::Type *DestType) { 7092 // Convert one MVE vector type into another by reinterpreting its in-register 7093 // format. 7094 // 7095 // Little-endian, this is identical to a bitcast (which reinterprets the 7096 // memory format). But big-endian, they're not necessarily the same, because 7097 // the register and memory formats map to each other differently depending on 7098 // the lane size. 7099 // 7100 // We generate a bitcast whenever we can (if we're little-endian, or if the 7101 // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic 7102 // that performs the different kind of reinterpretation. 7103 if (CGF->getTarget().isBigEndian() && 7104 V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) { 7105 return Builder.CreateCall( 7106 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vreinterpretq, 7107 {DestType, V->getType()}), 7108 V); 7109 } else { 7110 return Builder.CreateBitCast(V, DestType); 7111 } 7112 } 7113 7114 static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) { 7115 // Make a shufflevector that extracts every other element of a vector (evens 7116 // or odds, as desired). 7117 SmallVector<uint32_t, 16> Indices; 7118 unsigned InputElements = V->getType()->getVectorNumElements(); 7119 for (unsigned i = 0; i < InputElements; i += 2) 7120 Indices.push_back(i + Odd); 7121 return Builder.CreateShuffleVector(V, llvm::UndefValue::get(V->getType()), 7122 Indices); 7123 } 7124 7125 static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0, 7126 llvm::Value *V1) { 7127 // Make a shufflevector that interleaves two vectors element by element. 7128 assert(V0->getType() == V1->getType() && "Can't zip different vector types"); 7129 SmallVector<uint32_t, 16> Indices; 7130 unsigned InputElements = V0->getType()->getVectorNumElements(); 7131 for (unsigned i = 0; i < InputElements; i++) { 7132 Indices.push_back(i); 7133 Indices.push_back(i + InputElements); 7134 } 7135 return Builder.CreateShuffleVector(V0, V1, Indices); 7136 } 7137 7138 template<unsigned HighBit, unsigned OtherBits> 7139 static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) { 7140 // MVE-specific helper function to make a vector splat of a constant such as 7141 // UINT_MAX or INT_MIN, in which all bits below the highest one are equal. 7142 llvm::Type *T = VT->getVectorElementType(); 7143 unsigned LaneBits = T->getPrimitiveSizeInBits(); 7144 uint32_t Value = HighBit << (LaneBits - 1); 7145 if (OtherBits) 7146 Value |= (1UL << (LaneBits - 1)) - 1; 7147 llvm::Value *Lane = llvm::ConstantInt::get(T, Value); 7148 return ARMMVEVectorSplat(Builder, Lane); 7149 } 7150 7151 static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder, 7152 llvm::Value *V, 7153 unsigned ReverseWidth) { 7154 // MVE-specific helper function which reverses the elements of a 7155 // vector within every (ReverseWidth)-bit collection of lanes. 7156 SmallVector<uint32_t, 16> Indices; 7157 unsigned LaneSize = V->getType()->getScalarSizeInBits(); 7158 unsigned Elements = 128 / LaneSize; 7159 unsigned Mask = ReverseWidth / LaneSize - 1; 7160 for (unsigned i = 0; i < Elements; i++) 7161 Indices.push_back(i ^ Mask); 7162 return Builder.CreateShuffleVector(V, llvm::UndefValue::get(V->getType()), 7163 Indices); 7164 } 7165 7166 Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID, 7167 const CallExpr *E, 7168 ReturnValueSlot ReturnValue, 7169 llvm::Triple::ArchType Arch) { 7170 enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType; 7171 Intrinsic::ID IRIntr; 7172 unsigned NumVectors; 7173 7174 // Code autogenerated by Tablegen will handle all the simple builtins. 7175 switch (BuiltinID) { 7176 #include "clang/Basic/arm_mve_builtin_cg.inc" 7177 7178 // If we didn't match an MVE builtin id at all, go back to the 7179 // main EmitARMBuiltinExpr. 7180 default: 7181 return nullptr; 7182 } 7183 7184 // Anything that breaks from that switch is an MVE builtin that 7185 // needs handwritten code to generate. 7186 7187 switch (CustomCodeGenType) { 7188 7189 case CustomCodeGen::VLD24: { 7190 llvm::SmallVector<Value *, 4> Ops; 7191 llvm::SmallVector<llvm::Type *, 4> Tys; 7192 7193 auto MvecCType = E->getType(); 7194 auto MvecLType = ConvertType(MvecCType); 7195 assert(MvecLType->isStructTy() && 7196 "Return type for vld[24]q should be a struct"); 7197 assert(MvecLType->getStructNumElements() == 1 && 7198 "Return-type struct for vld[24]q should have one element"); 7199 auto MvecLTypeInner = MvecLType->getStructElementType(0); 7200 assert(MvecLTypeInner->isArrayTy() && 7201 "Return-type struct for vld[24]q should contain an array"); 7202 assert(MvecLTypeInner->getArrayNumElements() == NumVectors && 7203 "Array member of return-type struct vld[24]q has wrong length"); 7204 auto VecLType = MvecLTypeInner->getArrayElementType(); 7205 7206 Tys.push_back(VecLType); 7207 7208 auto Addr = E->getArg(0); 7209 Ops.push_back(EmitScalarExpr(Addr)); 7210 Tys.push_back(ConvertType(Addr->getType())); 7211 7212 Function *F = CGM.getIntrinsic(IRIntr, makeArrayRef(Tys)); 7213 Value *LoadResult = Builder.CreateCall(F, Ops); 7214 Value *MvecOut = UndefValue::get(MvecLType); 7215 for (unsigned i = 0; i < NumVectors; ++i) { 7216 Value *Vec = Builder.CreateExtractValue(LoadResult, i); 7217 MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i}); 7218 } 7219 7220 if (ReturnValue.isNull()) 7221 return MvecOut; 7222 else 7223 return Builder.CreateStore(MvecOut, ReturnValue.getValue()); 7224 } 7225 7226 case CustomCodeGen::VST24: { 7227 llvm::SmallVector<Value *, 4> Ops; 7228 llvm::SmallVector<llvm::Type *, 4> Tys; 7229 7230 auto Addr = E->getArg(0); 7231 Ops.push_back(EmitScalarExpr(Addr)); 7232 Tys.push_back(ConvertType(Addr->getType())); 7233 7234 auto MvecCType = E->getArg(1)->getType(); 7235 auto MvecLType = ConvertType(MvecCType); 7236 assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct"); 7237 assert(MvecLType->getStructNumElements() == 1 && 7238 "Data-type struct for vst2q should have one element"); 7239 auto MvecLTypeInner = MvecLType->getStructElementType(0); 7240 assert(MvecLTypeInner->isArrayTy() && 7241 "Data-type struct for vst2q should contain an array"); 7242 assert(MvecLTypeInner->getArrayNumElements() == NumVectors && 7243 "Array member of return-type struct vld[24]q has wrong length"); 7244 auto VecLType = MvecLTypeInner->getArrayElementType(); 7245 7246 Tys.push_back(VecLType); 7247 7248 AggValueSlot MvecSlot = CreateAggTemp(MvecCType); 7249 EmitAggExpr(E->getArg(1), MvecSlot); 7250 auto Mvec = Builder.CreateLoad(MvecSlot.getAddress()); 7251 for (unsigned i = 0; i < NumVectors; i++) 7252 Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i})); 7253 7254 Function *F = CGM.getIntrinsic(IRIntr, makeArrayRef(Tys)); 7255 Value *ToReturn = nullptr; 7256 for (unsigned i = 0; i < NumVectors; i++) { 7257 Ops.push_back(llvm::ConstantInt::get(Int32Ty, i)); 7258 ToReturn = Builder.CreateCall(F, Ops); 7259 Ops.pop_back(); 7260 } 7261 return ToReturn; 7262 } 7263 } 7264 llvm_unreachable("unknown custom codegen type."); 7265 } 7266 7267 Value *CodeGenFunction::EmitARMCDEBuiltinExpr(unsigned BuiltinID, 7268 const CallExpr *E, 7269 ReturnValueSlot ReturnValue, 7270 llvm::Triple::ArchType Arch) { 7271 switch (BuiltinID) { 7272 default: 7273 return nullptr; 7274 #include "clang/Basic/arm_cde_builtin_cg.inc" 7275 } 7276 } 7277 7278 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, 7279 const CallExpr *E, 7280 SmallVectorImpl<Value *> &Ops, 7281 llvm::Triple::ArchType Arch) { 7282 unsigned int Int = 0; 7283 const char *s = nullptr; 7284 7285 switch (BuiltinID) { 7286 default: 7287 return nullptr; 7288 case NEON::BI__builtin_neon_vtbl1_v: 7289 case NEON::BI__builtin_neon_vqtbl1_v: 7290 case NEON::BI__builtin_neon_vqtbl1q_v: 7291 case NEON::BI__builtin_neon_vtbl2_v: 7292 case NEON::BI__builtin_neon_vqtbl2_v: 7293 case NEON::BI__builtin_neon_vqtbl2q_v: 7294 case NEON::BI__builtin_neon_vtbl3_v: 7295 case NEON::BI__builtin_neon_vqtbl3_v: 7296 case NEON::BI__builtin_neon_vqtbl3q_v: 7297 case NEON::BI__builtin_neon_vtbl4_v: 7298 case NEON::BI__builtin_neon_vqtbl4_v: 7299 case NEON::BI__builtin_neon_vqtbl4q_v: 7300 break; 7301 case NEON::BI__builtin_neon_vtbx1_v: 7302 case NEON::BI__builtin_neon_vqtbx1_v: 7303 case NEON::BI__builtin_neon_vqtbx1q_v: 7304 case NEON::BI__builtin_neon_vtbx2_v: 7305 case NEON::BI__builtin_neon_vqtbx2_v: 7306 case NEON::BI__builtin_neon_vqtbx2q_v: 7307 case NEON::BI__builtin_neon_vtbx3_v: 7308 case NEON::BI__builtin_neon_vqtbx3_v: 7309 case NEON::BI__builtin_neon_vqtbx3q_v: 7310 case NEON::BI__builtin_neon_vtbx4_v: 7311 case NEON::BI__builtin_neon_vqtbx4_v: 7312 case NEON::BI__builtin_neon_vqtbx4q_v: 7313 break; 7314 } 7315 7316 assert(E->getNumArgs() >= 3); 7317 7318 // Get the last argument, which specifies the vector type. 7319 llvm::APSInt Result; 7320 const Expr *Arg = E->getArg(E->getNumArgs() - 1); 7321 if (!Arg->isIntegerConstantExpr(Result, CGF.getContext())) 7322 return nullptr; 7323 7324 // Determine the type of this overloaded NEON intrinsic. 7325 NeonTypeFlags Type(Result.getZExtValue()); 7326 llvm::VectorType *Ty = GetNeonType(&CGF, Type); 7327 if (!Ty) 7328 return nullptr; 7329 7330 CodeGen::CGBuilderTy &Builder = CGF.Builder; 7331 7332 // AArch64 scalar builtins are not overloaded, they do not have an extra 7333 // argument that specifies the vector type, need to handle each case. 7334 switch (BuiltinID) { 7335 case NEON::BI__builtin_neon_vtbl1_v: { 7336 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr, 7337 Ops[1], Ty, Intrinsic::aarch64_neon_tbl1, 7338 "vtbl1"); 7339 } 7340 case NEON::BI__builtin_neon_vtbl2_v: { 7341 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr, 7342 Ops[2], Ty, Intrinsic::aarch64_neon_tbl1, 7343 "vtbl1"); 7344 } 7345 case NEON::BI__builtin_neon_vtbl3_v: { 7346 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr, 7347 Ops[3], Ty, Intrinsic::aarch64_neon_tbl2, 7348 "vtbl2"); 7349 } 7350 case NEON::BI__builtin_neon_vtbl4_v: { 7351 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr, 7352 Ops[4], Ty, Intrinsic::aarch64_neon_tbl2, 7353 "vtbl2"); 7354 } 7355 case NEON::BI__builtin_neon_vtbx1_v: { 7356 Value *TblRes = 7357 packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2], 7358 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1"); 7359 7360 llvm::Constant *EightV = ConstantInt::get(Ty, 8); 7361 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV); 7362 CmpRes = Builder.CreateSExt(CmpRes, Ty); 7363 7364 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]); 7365 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes); 7366 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx"); 7367 } 7368 case NEON::BI__builtin_neon_vtbx2_v: { 7369 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0], 7370 Ops[3], Ty, Intrinsic::aarch64_neon_tbx1, 7371 "vtbx1"); 7372 } 7373 case NEON::BI__builtin_neon_vtbx3_v: { 7374 Value *TblRes = 7375 packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4], 7376 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2"); 7377 7378 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24); 7379 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4], 7380 TwentyFourV); 7381 CmpRes = Builder.CreateSExt(CmpRes, Ty); 7382 7383 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]); 7384 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes); 7385 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx"); 7386 } 7387 case NEON::BI__builtin_neon_vtbx4_v: { 7388 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0], 7389 Ops[5], Ty, Intrinsic::aarch64_neon_tbx2, 7390 "vtbx2"); 7391 } 7392 case NEON::BI__builtin_neon_vqtbl1_v: 7393 case NEON::BI__builtin_neon_vqtbl1q_v: 7394 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break; 7395 case NEON::BI__builtin_neon_vqtbl2_v: 7396 case NEON::BI__builtin_neon_vqtbl2q_v: { 7397 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break; 7398 case NEON::BI__builtin_neon_vqtbl3_v: 7399 case NEON::BI__builtin_neon_vqtbl3q_v: 7400 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break; 7401 case NEON::BI__builtin_neon_vqtbl4_v: 7402 case NEON::BI__builtin_neon_vqtbl4q_v: 7403 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break; 7404 case NEON::BI__builtin_neon_vqtbx1_v: 7405 case NEON::BI__builtin_neon_vqtbx1q_v: 7406 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break; 7407 case NEON::BI__builtin_neon_vqtbx2_v: 7408 case NEON::BI__builtin_neon_vqtbx2q_v: 7409 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break; 7410 case NEON::BI__builtin_neon_vqtbx3_v: 7411 case NEON::BI__builtin_neon_vqtbx3q_v: 7412 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break; 7413 case NEON::BI__builtin_neon_vqtbx4_v: 7414 case NEON::BI__builtin_neon_vqtbx4q_v: 7415 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break; 7416 } 7417 } 7418 7419 if (!Int) 7420 return nullptr; 7421 7422 Function *F = CGF.CGM.getIntrinsic(Int, Ty); 7423 return CGF.EmitNeonCall(F, Ops, s); 7424 } 7425 7426 Value *CodeGenFunction::vectorWrapScalar16(Value *Op) { 7427 llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4); 7428 Op = Builder.CreateBitCast(Op, Int16Ty); 7429 Value *V = UndefValue::get(VTy); 7430 llvm::Constant *CI = ConstantInt::get(SizeTy, 0); 7431 Op = Builder.CreateInsertElement(V, Op, CI); 7432 return Op; 7433 } 7434 7435 // Reinterpret the input predicate so that it can be used to correctly isolate 7436 // the elements of the specified datatype. 7437 Value *CodeGenFunction::EmitSVEPredicateCast(Value *Pred, 7438 llvm::VectorType *VTy) { 7439 llvm::VectorType *RTy = llvm::VectorType::get( 7440 IntegerType::get(getLLVMContext(), 1), VTy->getElementCount()); 7441 if (Pred->getType() == RTy) 7442 return Pred; 7443 7444 unsigned IntID; 7445 llvm::Type *IntrinsicTy; 7446 switch (VTy->getNumElements()) { 7447 default: 7448 llvm_unreachable("unsupported element count!"); 7449 case 2: 7450 case 4: 7451 case 8: 7452 IntID = Intrinsic::aarch64_sve_convert_from_svbool; 7453 IntrinsicTy = RTy; 7454 break; 7455 case 16: 7456 IntID = Intrinsic::aarch64_sve_convert_to_svbool; 7457 IntrinsicTy = Pred->getType(); 7458 break; 7459 } 7460 7461 Function *F = CGM.getIntrinsic(IntID, IntrinsicTy); 7462 Value *C = Builder.CreateCall(F, Pred); 7463 assert(C->getType() == RTy && "Unexpected return type!"); 7464 return C; 7465 } 7466 7467 Value *CodeGenFunction::EmitSVEMaskedLoad(llvm::Type *ReturnTy, 7468 SmallVectorImpl<Value *> &Ops) { 7469 llvm::PointerType *PTy = cast<llvm::PointerType>(Ops[1]->getType()); 7470 llvm::Type *MemEltTy = PTy->getPointerElementType(); 7471 7472 // The vector type that is returned may be different from the 7473 // eventual type loaded from memory. 7474 auto VectorTy = cast<llvm::VectorType>(ReturnTy); 7475 auto MemoryTy = 7476 llvm::VectorType::get(MemEltTy, VectorTy->getVectorElementCount()); 7477 7478 Value *Offset = Builder.getInt32(0); 7479 Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy); 7480 Value *BasePtr = Builder.CreateBitCast(Ops[1], MemoryTy->getPointerTo()); 7481 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Offset); 7482 7483 Value *Splat0 = Constant::getNullValue(MemoryTy); 7484 return Builder.CreateMaskedLoad(BasePtr, Align(1), Predicate, Splat0); 7485 } 7486 7487 Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, 7488 const CallExpr *E) { 7489 // Find out if any arguments are required to be integer constant expressions. 7490 unsigned ICEArguments = 0; 7491 ASTContext::GetBuiltinTypeError Error; 7492 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 7493 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 7494 7495 llvm::SmallVector<Value *, 4> Ops; 7496 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { 7497 if ((ICEArguments & (1 << i)) == 0) 7498 Ops.push_back(EmitScalarExpr(E->getArg(i))); 7499 else 7500 llvm_unreachable("Not yet implemented"); 7501 } 7502 7503 auto *Builtin = findARMVectorIntrinsicInMap(AArch64SVEIntrinsicMap, BuiltinID, 7504 AArch64SVEIntrinsicsProvenSorted); 7505 SVETypeFlags TypeFlags(Builtin->TypeModifier); 7506 llvm::Type *Ty = ConvertType(E->getType()); 7507 if (TypeFlags.isLoad()) 7508 return EmitSVEMaskedLoad(Ty, Ops); 7509 7510 /// Should not happen 7511 return nullptr; 7512 } 7513 7514 Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, 7515 const CallExpr *E, 7516 llvm::Triple::ArchType Arch) { 7517 if (BuiltinID >= AArch64::FirstSVEBuiltin && 7518 BuiltinID <= AArch64::LastSVEBuiltin) 7519 return EmitAArch64SVEBuiltinExpr(BuiltinID, E); 7520 7521 unsigned HintID = static_cast<unsigned>(-1); 7522 switch (BuiltinID) { 7523 default: break; 7524 case AArch64::BI__builtin_arm_nop: 7525 HintID = 0; 7526 break; 7527 case AArch64::BI__builtin_arm_yield: 7528 case AArch64::BI__yield: 7529 HintID = 1; 7530 break; 7531 case AArch64::BI__builtin_arm_wfe: 7532 case AArch64::BI__wfe: 7533 HintID = 2; 7534 break; 7535 case AArch64::BI__builtin_arm_wfi: 7536 case AArch64::BI__wfi: 7537 HintID = 3; 7538 break; 7539 case AArch64::BI__builtin_arm_sev: 7540 case AArch64::BI__sev: 7541 HintID = 4; 7542 break; 7543 case AArch64::BI__builtin_arm_sevl: 7544 case AArch64::BI__sevl: 7545 HintID = 5; 7546 break; 7547 } 7548 7549 if (HintID != static_cast<unsigned>(-1)) { 7550 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint); 7551 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID)); 7552 } 7553 7554 if (BuiltinID == AArch64::BI__builtin_arm_prefetch) { 7555 Value *Address = EmitScalarExpr(E->getArg(0)); 7556 Value *RW = EmitScalarExpr(E->getArg(1)); 7557 Value *CacheLevel = EmitScalarExpr(E->getArg(2)); 7558 Value *RetentionPolicy = EmitScalarExpr(E->getArg(3)); 7559 Value *IsData = EmitScalarExpr(E->getArg(4)); 7560 7561 Value *Locality = nullptr; 7562 if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) { 7563 // Temporal fetch, needs to convert cache level to locality. 7564 Locality = llvm::ConstantInt::get(Int32Ty, 7565 -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3); 7566 } else { 7567 // Streaming fetch. 7568 Locality = llvm::ConstantInt::get(Int32Ty, 0); 7569 } 7570 7571 // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify 7572 // PLDL3STRM or PLDL2STRM. 7573 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType()); 7574 return Builder.CreateCall(F, {Address, RW, Locality, IsData}); 7575 } 7576 7577 if (BuiltinID == AArch64::BI__builtin_arm_rbit) { 7578 assert((getContext().getTypeSize(E->getType()) == 32) && 7579 "rbit of unusual size!"); 7580 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 7581 return Builder.CreateCall( 7582 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); 7583 } 7584 if (BuiltinID == AArch64::BI__builtin_arm_rbit64) { 7585 assert((getContext().getTypeSize(E->getType()) == 64) && 7586 "rbit of unusual size!"); 7587 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 7588 return Builder.CreateCall( 7589 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); 7590 } 7591 7592 if (BuiltinID == AArch64::BI__builtin_arm_cls) { 7593 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 7594 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg, 7595 "cls"); 7596 } 7597 if (BuiltinID == AArch64::BI__builtin_arm_cls64) { 7598 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 7599 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg, 7600 "cls"); 7601 } 7602 7603 if (BuiltinID == AArch64::BI__builtin_arm_jcvt) { 7604 assert((getContext().getTypeSize(E->getType()) == 32) && 7605 "__jcvt of unusual size!"); 7606 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 7607 return Builder.CreateCall( 7608 CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg); 7609 } 7610 7611 if (BuiltinID == AArch64::BI__clear_cache) { 7612 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); 7613 const FunctionDecl *FD = E->getDirectCallee(); 7614 Value *Ops[2]; 7615 for (unsigned i = 0; i < 2; i++) 7616 Ops[i] = EmitScalarExpr(E->getArg(i)); 7617 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); 7618 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); 7619 StringRef Name = FD->getName(); 7620 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); 7621 } 7622 7623 if ((BuiltinID == AArch64::BI__builtin_arm_ldrex || 7624 BuiltinID == AArch64::BI__builtin_arm_ldaex) && 7625 getContext().getTypeSize(E->getType()) == 128) { 7626 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex 7627 ? Intrinsic::aarch64_ldaxp 7628 : Intrinsic::aarch64_ldxp); 7629 7630 Value *LdPtr = EmitScalarExpr(E->getArg(0)); 7631 Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy), 7632 "ldxp"); 7633 7634 Value *Val0 = Builder.CreateExtractValue(Val, 1); 7635 Value *Val1 = Builder.CreateExtractValue(Val, 0); 7636 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); 7637 Val0 = Builder.CreateZExt(Val0, Int128Ty); 7638 Val1 = Builder.CreateZExt(Val1, Int128Ty); 7639 7640 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64); 7641 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */); 7642 Val = Builder.CreateOr(Val, Val1); 7643 return Builder.CreateBitCast(Val, ConvertType(E->getType())); 7644 } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex || 7645 BuiltinID == AArch64::BI__builtin_arm_ldaex) { 7646 Value *LoadAddr = EmitScalarExpr(E->getArg(0)); 7647 7648 QualType Ty = E->getType(); 7649 llvm::Type *RealResTy = ConvertType(Ty); 7650 llvm::Type *PtrTy = llvm::IntegerType::get( 7651 getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo(); 7652 LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy); 7653 7654 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex 7655 ? Intrinsic::aarch64_ldaxr 7656 : Intrinsic::aarch64_ldxr, 7657 PtrTy); 7658 Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr"); 7659 7660 if (RealResTy->isPointerTy()) 7661 return Builder.CreateIntToPtr(Val, RealResTy); 7662 7663 llvm::Type *IntResTy = llvm::IntegerType::get( 7664 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy)); 7665 Val = Builder.CreateTruncOrBitCast(Val, IntResTy); 7666 return Builder.CreateBitCast(Val, RealResTy); 7667 } 7668 7669 if ((BuiltinID == AArch64::BI__builtin_arm_strex || 7670 BuiltinID == AArch64::BI__builtin_arm_stlex) && 7671 getContext().getTypeSize(E->getArg(0)->getType()) == 128) { 7672 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex 7673 ? Intrinsic::aarch64_stlxp 7674 : Intrinsic::aarch64_stxp); 7675 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty); 7676 7677 Address Tmp = CreateMemTemp(E->getArg(0)->getType()); 7678 EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true); 7679 7680 Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy)); 7681 llvm::Value *Val = Builder.CreateLoad(Tmp); 7682 7683 Value *Arg0 = Builder.CreateExtractValue(Val, 0); 7684 Value *Arg1 = Builder.CreateExtractValue(Val, 1); 7685 Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), 7686 Int8PtrTy); 7687 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp"); 7688 } 7689 7690 if (BuiltinID == AArch64::BI__builtin_arm_strex || 7691 BuiltinID == AArch64::BI__builtin_arm_stlex) { 7692 Value *StoreVal = EmitScalarExpr(E->getArg(0)); 7693 Value *StoreAddr = EmitScalarExpr(E->getArg(1)); 7694 7695 QualType Ty = E->getArg(0)->getType(); 7696 llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(), 7697 getContext().getTypeSize(Ty)); 7698 StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo()); 7699 7700 if (StoreVal->getType()->isPointerTy()) 7701 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty); 7702 else { 7703 llvm::Type *IntTy = llvm::IntegerType::get( 7704 getLLVMContext(), 7705 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType())); 7706 StoreVal = Builder.CreateBitCast(StoreVal, IntTy); 7707 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty); 7708 } 7709 7710 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex 7711 ? Intrinsic::aarch64_stlxr 7712 : Intrinsic::aarch64_stxr, 7713 StoreAddr->getType()); 7714 return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr"); 7715 } 7716 7717 if (BuiltinID == AArch64::BI__getReg) { 7718 Expr::EvalResult Result; 7719 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext())) 7720 llvm_unreachable("Sema will ensure that the parameter is constant"); 7721 7722 llvm::APSInt Value = Result.Val.getInt(); 7723 LLVMContext &Context = CGM.getLLVMContext(); 7724 std::string Reg = Value == 31 ? "sp" : "x" + Value.toString(10); 7725 7726 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)}; 7727 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); 7728 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); 7729 7730 llvm::Function *F = 7731 CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty}); 7732 return Builder.CreateCall(F, Metadata); 7733 } 7734 7735 if (BuiltinID == AArch64::BI__builtin_arm_clrex) { 7736 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex); 7737 return Builder.CreateCall(F); 7738 } 7739 7740 if (BuiltinID == AArch64::BI_ReadWriteBarrier) 7741 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, 7742 llvm::SyncScope::SingleThread); 7743 7744 // CRC32 7745 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; 7746 switch (BuiltinID) { 7747 case AArch64::BI__builtin_arm_crc32b: 7748 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break; 7749 case AArch64::BI__builtin_arm_crc32cb: 7750 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break; 7751 case AArch64::BI__builtin_arm_crc32h: 7752 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break; 7753 case AArch64::BI__builtin_arm_crc32ch: 7754 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break; 7755 case AArch64::BI__builtin_arm_crc32w: 7756 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break; 7757 case AArch64::BI__builtin_arm_crc32cw: 7758 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break; 7759 case AArch64::BI__builtin_arm_crc32d: 7760 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break; 7761 case AArch64::BI__builtin_arm_crc32cd: 7762 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break; 7763 } 7764 7765 if (CRCIntrinsicID != Intrinsic::not_intrinsic) { 7766 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 7767 Value *Arg1 = EmitScalarExpr(E->getArg(1)); 7768 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 7769 7770 llvm::Type *DataTy = F->getFunctionType()->getParamType(1); 7771 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy); 7772 7773 return Builder.CreateCall(F, {Arg0, Arg1}); 7774 } 7775 7776 // Memory Tagging Extensions (MTE) Intrinsics 7777 Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic; 7778 switch (BuiltinID) { 7779 case AArch64::BI__builtin_arm_irg: 7780 MTEIntrinsicID = Intrinsic::aarch64_irg; break; 7781 case AArch64::BI__builtin_arm_addg: 7782 MTEIntrinsicID = Intrinsic::aarch64_addg; break; 7783 case AArch64::BI__builtin_arm_gmi: 7784 MTEIntrinsicID = Intrinsic::aarch64_gmi; break; 7785 case AArch64::BI__builtin_arm_ldg: 7786 MTEIntrinsicID = Intrinsic::aarch64_ldg; break; 7787 case AArch64::BI__builtin_arm_stg: 7788 MTEIntrinsicID = Intrinsic::aarch64_stg; break; 7789 case AArch64::BI__builtin_arm_subp: 7790 MTEIntrinsicID = Intrinsic::aarch64_subp; break; 7791 } 7792 7793 if (MTEIntrinsicID != Intrinsic::not_intrinsic) { 7794 llvm::Type *T = ConvertType(E->getType()); 7795 7796 if (MTEIntrinsicID == Intrinsic::aarch64_irg) { 7797 Value *Pointer = EmitScalarExpr(E->getArg(0)); 7798 Value *Mask = EmitScalarExpr(E->getArg(1)); 7799 7800 Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy); 7801 Mask = Builder.CreateZExt(Mask, Int64Ty); 7802 Value *RV = Builder.CreateCall( 7803 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, Mask}); 7804 return Builder.CreatePointerCast(RV, T); 7805 } 7806 if (MTEIntrinsicID == Intrinsic::aarch64_addg) { 7807 Value *Pointer = EmitScalarExpr(E->getArg(0)); 7808 Value *TagOffset = EmitScalarExpr(E->getArg(1)); 7809 7810 Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy); 7811 TagOffset = Builder.CreateZExt(TagOffset, Int64Ty); 7812 Value *RV = Builder.CreateCall( 7813 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, TagOffset}); 7814 return Builder.CreatePointerCast(RV, T); 7815 } 7816 if (MTEIntrinsicID == Intrinsic::aarch64_gmi) { 7817 Value *Pointer = EmitScalarExpr(E->getArg(0)); 7818 Value *ExcludedMask = EmitScalarExpr(E->getArg(1)); 7819 7820 ExcludedMask = Builder.CreateZExt(ExcludedMask, Int64Ty); 7821 Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy); 7822 return Builder.CreateCall( 7823 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask}); 7824 } 7825 // Although it is possible to supply a different return 7826 // address (first arg) to this intrinsic, for now we set 7827 // return address same as input address. 7828 if (MTEIntrinsicID == Intrinsic::aarch64_ldg) { 7829 Value *TagAddress = EmitScalarExpr(E->getArg(0)); 7830 TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy); 7831 Value *RV = Builder.CreateCall( 7832 CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress}); 7833 return Builder.CreatePointerCast(RV, T); 7834 } 7835 // Although it is possible to supply a different tag (to set) 7836 // to this intrinsic (as first arg), for now we supply 7837 // the tag that is in input address arg (common use case). 7838 if (MTEIntrinsicID == Intrinsic::aarch64_stg) { 7839 Value *TagAddress = EmitScalarExpr(E->getArg(0)); 7840 TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy); 7841 return Builder.CreateCall( 7842 CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress}); 7843 } 7844 if (MTEIntrinsicID == Intrinsic::aarch64_subp) { 7845 Value *PointerA = EmitScalarExpr(E->getArg(0)); 7846 Value *PointerB = EmitScalarExpr(E->getArg(1)); 7847 PointerA = Builder.CreatePointerCast(PointerA, Int8PtrTy); 7848 PointerB = Builder.CreatePointerCast(PointerB, Int8PtrTy); 7849 return Builder.CreateCall( 7850 CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB}); 7851 } 7852 } 7853 7854 if (BuiltinID == AArch64::BI__builtin_arm_rsr || 7855 BuiltinID == AArch64::BI__builtin_arm_rsr64 || 7856 BuiltinID == AArch64::BI__builtin_arm_rsrp || 7857 BuiltinID == AArch64::BI__builtin_arm_wsr || 7858 BuiltinID == AArch64::BI__builtin_arm_wsr64 || 7859 BuiltinID == AArch64::BI__builtin_arm_wsrp) { 7860 7861 bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr || 7862 BuiltinID == AArch64::BI__builtin_arm_rsr64 || 7863 BuiltinID == AArch64::BI__builtin_arm_rsrp; 7864 7865 bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp || 7866 BuiltinID == AArch64::BI__builtin_arm_wsrp; 7867 7868 bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr && 7869 BuiltinID != AArch64::BI__builtin_arm_wsr; 7870 7871 llvm::Type *ValueType; 7872 llvm::Type *RegisterType = Int64Ty; 7873 if (IsPointerBuiltin) { 7874 ValueType = VoidPtrTy; 7875 } else if (Is64Bit) { 7876 ValueType = Int64Ty; 7877 } else { 7878 ValueType = Int32Ty; 7879 } 7880 7881 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead); 7882 } 7883 7884 if (BuiltinID == AArch64::BI_ReadStatusReg || 7885 BuiltinID == AArch64::BI_WriteStatusReg) { 7886 LLVMContext &Context = CGM.getLLVMContext(); 7887 7888 unsigned SysReg = 7889 E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue(); 7890 7891 std::string SysRegStr; 7892 llvm::raw_string_ostream(SysRegStr) << 7893 ((1 << 1) | ((SysReg >> 14) & 1)) << ":" << 7894 ((SysReg >> 11) & 7) << ":" << 7895 ((SysReg >> 7) & 15) << ":" << 7896 ((SysReg >> 3) & 15) << ":" << 7897 ( SysReg & 7); 7898 7899 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) }; 7900 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); 7901 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); 7902 7903 llvm::Type *RegisterType = Int64Ty; 7904 llvm::Type *Types[] = { RegisterType }; 7905 7906 if (BuiltinID == AArch64::BI_ReadStatusReg) { 7907 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types); 7908 7909 return Builder.CreateCall(F, Metadata); 7910 } 7911 7912 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types); 7913 llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1)); 7914 7915 return Builder.CreateCall(F, { Metadata, ArgValue }); 7916 } 7917 7918 if (BuiltinID == AArch64::BI_AddressOfReturnAddress) { 7919 llvm::Function *F = 7920 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy); 7921 return Builder.CreateCall(F); 7922 } 7923 7924 if (BuiltinID == AArch64::BI__builtin_sponentry) { 7925 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy); 7926 return Builder.CreateCall(F); 7927 } 7928 7929 // Find out if any arguments are required to be integer constant 7930 // expressions. 7931 unsigned ICEArguments = 0; 7932 ASTContext::GetBuiltinTypeError Error; 7933 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 7934 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 7935 7936 llvm::SmallVector<Value*, 4> Ops; 7937 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) { 7938 if ((ICEArguments & (1 << i)) == 0) { 7939 Ops.push_back(EmitScalarExpr(E->getArg(i))); 7940 } else { 7941 // If this is required to be a constant, constant fold it so that we know 7942 // that the generated intrinsic gets a ConstantInt. 7943 llvm::APSInt Result; 7944 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); 7945 assert(IsConst && "Constant arg isn't actually constant?"); 7946 (void)IsConst; 7947 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); 7948 } 7949 } 7950 7951 auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap); 7952 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap( 7953 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted); 7954 7955 if (Builtin) { 7956 Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1))); 7957 Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E); 7958 assert(Result && "SISD intrinsic should have been handled"); 7959 return Result; 7960 } 7961 7962 llvm::APSInt Result; 7963 const Expr *Arg = E->getArg(E->getNumArgs()-1); 7964 NeonTypeFlags Type(0); 7965 if (Arg->isIntegerConstantExpr(Result, getContext())) 7966 // Determine the type of this overloaded NEON intrinsic. 7967 Type = NeonTypeFlags(Result.getZExtValue()); 7968 7969 bool usgn = Type.isUnsigned(); 7970 bool quad = Type.isQuad(); 7971 7972 // Handle non-overloaded intrinsics first. 7973 switch (BuiltinID) { 7974 default: break; 7975 case NEON::BI__builtin_neon_vabsh_f16: 7976 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7977 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs"); 7978 case NEON::BI__builtin_neon_vldrq_p128: { 7979 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128); 7980 llvm::Type *Int128PTy = llvm::PointerType::get(Int128Ty, 0); 7981 Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy); 7982 return Builder.CreateAlignedLoad(Int128Ty, Ptr, 7983 CharUnits::fromQuantity(16)); 7984 } 7985 case NEON::BI__builtin_neon_vstrq_p128: { 7986 llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128); 7987 Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy); 7988 return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr); 7989 } 7990 case NEON::BI__builtin_neon_vcvts_u32_f32: 7991 case NEON::BI__builtin_neon_vcvtd_u64_f64: 7992 usgn = true; 7993 LLVM_FALLTHROUGH; 7994 case NEON::BI__builtin_neon_vcvts_s32_f32: 7995 case NEON::BI__builtin_neon_vcvtd_s64_f64: { 7996 Ops.push_back(EmitScalarExpr(E->getArg(0))); 7997 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64; 7998 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty; 7999 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy; 8000 Ops[0] = Builder.CreateBitCast(Ops[0], FTy); 8001 if (usgn) 8002 return Builder.CreateFPToUI(Ops[0], InTy); 8003 return Builder.CreateFPToSI(Ops[0], InTy); 8004 } 8005 case NEON::BI__builtin_neon_vcvts_f32_u32: 8006 case NEON::BI__builtin_neon_vcvtd_f64_u64: 8007 usgn = true; 8008 LLVM_FALLTHROUGH; 8009 case NEON::BI__builtin_neon_vcvts_f32_s32: 8010 case NEON::BI__builtin_neon_vcvtd_f64_s64: { 8011 Ops.push_back(EmitScalarExpr(E->getArg(0))); 8012 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64; 8013 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty; 8014 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy; 8015 Ops[0] = Builder.CreateBitCast(Ops[0], InTy); 8016 if (usgn) 8017 return Builder.CreateUIToFP(Ops[0], FTy); 8018 return Builder.CreateSIToFP(Ops[0], FTy); 8019 } 8020 case NEON::BI__builtin_neon_vcvth_f16_u16: 8021 case NEON::BI__builtin_neon_vcvth_f16_u32: 8022 case NEON::BI__builtin_neon_vcvth_f16_u64: 8023 usgn = true; 8024 LLVM_FALLTHROUGH; 8025 case NEON::BI__builtin_neon_vcvth_f16_s16: 8026 case NEON::BI__builtin_neon_vcvth_f16_s32: 8027 case NEON::BI__builtin_neon_vcvth_f16_s64: { 8028 Ops.push_back(EmitScalarExpr(E->getArg(0))); 8029 llvm::Type *FTy = HalfTy; 8030 llvm::Type *InTy; 8031 if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64) 8032 InTy = Int64Ty; 8033 else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32) 8034 InTy = Int32Ty; 8035 else 8036 InTy = Int16Ty; 8037 Ops[0] = Builder.CreateBitCast(Ops[0], InTy); 8038 if (usgn) 8039 return Builder.CreateUIToFP(Ops[0], FTy); 8040 return Builder.CreateSIToFP(Ops[0], FTy); 8041 } 8042 case NEON::BI__builtin_neon_vcvth_u16_f16: 8043 usgn = true; 8044 LLVM_FALLTHROUGH; 8045 case NEON::BI__builtin_neon_vcvth_s16_f16: { 8046 Ops.push_back(EmitScalarExpr(E->getArg(0))); 8047 Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy); 8048 if (usgn) 8049 return Builder.CreateFPToUI(Ops[0], Int16Ty); 8050 return Builder.CreateFPToSI(Ops[0], Int16Ty); 8051 } 8052 case NEON::BI__builtin_neon_vcvth_u32_f16: 8053 usgn = true; 8054 LLVM_FALLTHROUGH; 8055 case NEON::BI__builtin_neon_vcvth_s32_f16: { 8056 Ops.push_back(EmitScalarExpr(E->getArg(0))); 8057 Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy); 8058 if (usgn) 8059 return Builder.CreateFPToUI(Ops[0], Int32Ty); 8060 return Builder.CreateFPToSI(Ops[0], Int32Ty); 8061 } 8062 case NEON::BI__builtin_neon_vcvth_u64_f16: 8063 usgn = true; 8064 LLVM_FALLTHROUGH; 8065 case NEON::BI__builtin_neon_vcvth_s64_f16: { 8066 Ops.push_back(EmitScalarExpr(E->getArg(0))); 8067 Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy); 8068 if (usgn) 8069 return Builder.CreateFPToUI(Ops[0], Int64Ty); 8070 return Builder.CreateFPToSI(Ops[0], Int64Ty); 8071 } 8072 case NEON::BI__builtin_neon_vcvtah_u16_f16: 8073 case NEON::BI__builtin_neon_vcvtmh_u16_f16: 8074 case NEON::BI__builtin_neon_vcvtnh_u16_f16: 8075 case NEON::BI__builtin_neon_vcvtph_u16_f16: 8076 case NEON::BI__builtin_neon_vcvtah_s16_f16: 8077 case NEON::BI__builtin_neon_vcvtmh_s16_f16: 8078 case NEON::BI__builtin_neon_vcvtnh_s16_f16: 8079 case NEON::BI__builtin_neon_vcvtph_s16_f16: { 8080 unsigned Int; 8081 llvm::Type* InTy = Int32Ty; 8082 llvm::Type* FTy = HalfTy; 8083 llvm::Type *Tys[2] = {InTy, FTy}; 8084 Ops.push_back(EmitScalarExpr(E->getArg(0))); 8085 switch (BuiltinID) { 8086 default: llvm_unreachable("missing builtin ID in switch!"); 8087 case NEON::BI__builtin_neon_vcvtah_u16_f16: 8088 Int = Intrinsic::aarch64_neon_fcvtau; break; 8089 case NEON::BI__builtin_neon_vcvtmh_u16_f16: 8090 Int = Intrinsic::aarch64_neon_fcvtmu; break; 8091 case NEON::BI__builtin_neon_vcvtnh_u16_f16: 8092 Int = Intrinsic::aarch64_neon_fcvtnu; break; 8093 case NEON::BI__builtin_neon_vcvtph_u16_f16: 8094 Int = Intrinsic::aarch64_neon_fcvtpu; break; 8095 case NEON::BI__builtin_neon_vcvtah_s16_f16: 8096 Int = Intrinsic::aarch64_neon_fcvtas; break; 8097 case NEON::BI__builtin_neon_vcvtmh_s16_f16: 8098 Int = Intrinsic::aarch64_neon_fcvtms; break; 8099 case NEON::BI__builtin_neon_vcvtnh_s16_f16: 8100 Int = Intrinsic::aarch64_neon_fcvtns; break; 8101 case NEON::BI__builtin_neon_vcvtph_s16_f16: 8102 Int = Intrinsic::aarch64_neon_fcvtps; break; 8103 } 8104 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt"); 8105 return Builder.CreateTrunc(Ops[0], Int16Ty); 8106 } 8107 case NEON::BI__builtin_neon_vcaleh_f16: 8108 case NEON::BI__builtin_neon_vcalth_f16: 8109 case NEON::BI__builtin_neon_vcageh_f16: 8110 case NEON::BI__builtin_neon_vcagth_f16: { 8111 unsigned Int; 8112 llvm::Type* InTy = Int32Ty; 8113 llvm::Type* FTy = HalfTy; 8114 llvm::Type *Tys[2] = {InTy, FTy}; 8115 Ops.push_back(EmitScalarExpr(E->getArg(1))); 8116 switch (BuiltinID) { 8117 default: llvm_unreachable("missing builtin ID in switch!"); 8118 case NEON::BI__builtin_neon_vcageh_f16: 8119 Int = Intrinsic::aarch64_neon_facge; break; 8120 case NEON::BI__builtin_neon_vcagth_f16: 8121 Int = Intrinsic::aarch64_neon_facgt; break; 8122 case NEON::BI__builtin_neon_vcaleh_f16: 8123 Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break; 8124 case NEON::BI__builtin_neon_vcalth_f16: 8125 Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break; 8126 } 8127 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg"); 8128 return Builder.CreateTrunc(Ops[0], Int16Ty); 8129 } 8130 case NEON::BI__builtin_neon_vcvth_n_s16_f16: 8131 case NEON::BI__builtin_neon_vcvth_n_u16_f16: { 8132 unsigned Int; 8133 llvm::Type* InTy = Int32Ty; 8134 llvm::Type* FTy = HalfTy; 8135 llvm::Type *Tys[2] = {InTy, FTy}; 8136 Ops.push_back(EmitScalarExpr(E->getArg(1))); 8137 switch (BuiltinID) { 8138 default: llvm_unreachable("missing builtin ID in switch!"); 8139 case NEON::BI__builtin_neon_vcvth_n_s16_f16: 8140 Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break; 8141 case NEON::BI__builtin_neon_vcvth_n_u16_f16: 8142 Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break; 8143 } 8144 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n"); 8145 return Builder.CreateTrunc(Ops[0], Int16Ty); 8146 } 8147 case NEON::BI__builtin_neon_vcvth_n_f16_s16: 8148 case NEON::BI__builtin_neon_vcvth_n_f16_u16: { 8149 unsigned Int; 8150 llvm::Type* FTy = HalfTy; 8151 llvm::Type* InTy = Int32Ty; 8152 llvm::Type *Tys[2] = {FTy, InTy}; 8153 Ops.push_back(EmitScalarExpr(E->getArg(1))); 8154 switch (BuiltinID) { 8155 default: llvm_unreachable("missing builtin ID in switch!"); 8156 case NEON::BI__builtin_neon_vcvth_n_f16_s16: 8157 Int = Intrinsic::aarch64_neon_vcvtfxs2fp; 8158 Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext"); 8159 break; 8160 case NEON::BI__builtin_neon_vcvth_n_f16_u16: 8161 Int = Intrinsic::aarch64_neon_vcvtfxu2fp; 8162 Ops[0] = Builder.CreateZExt(Ops[0], InTy); 8163 break; 8164 } 8165 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n"); 8166 } 8167 case NEON::BI__builtin_neon_vpaddd_s64: { 8168 llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2); 8169 Value *Vec = EmitScalarExpr(E->getArg(0)); 8170 // The vector is v2f64, so make sure it's bitcast to that. 8171 Vec = Builder.CreateBitCast(Vec, Ty, "v2i64"); 8172 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 8173 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 8174 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 8175 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 8176 // Pairwise addition of a v2f64 into a scalar f64. 8177 return Builder.CreateAdd(Op0, Op1, "vpaddd"); 8178 } 8179 case NEON::BI__builtin_neon_vpaddd_f64: { 8180 llvm::Type *Ty = 8181 llvm::VectorType::get(DoubleTy, 2); 8182 Value *Vec = EmitScalarExpr(E->getArg(0)); 8183 // The vector is v2f64, so make sure it's bitcast to that. 8184 Vec = Builder.CreateBitCast(Vec, Ty, "v2f64"); 8185 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 8186 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 8187 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 8188 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 8189 // Pairwise addition of a v2f64 into a scalar f64. 8190 return Builder.CreateFAdd(Op0, Op1, "vpaddd"); 8191 } 8192 case NEON::BI__builtin_neon_vpadds_f32: { 8193 llvm::Type *Ty = 8194 llvm::VectorType::get(FloatTy, 2); 8195 Value *Vec = EmitScalarExpr(E->getArg(0)); 8196 // The vector is v2f32, so make sure it's bitcast to that. 8197 Vec = Builder.CreateBitCast(Vec, Ty, "v2f32"); 8198 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 8199 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 8200 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 8201 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 8202 // Pairwise addition of a v2f32 into a scalar f32. 8203 return Builder.CreateFAdd(Op0, Op1, "vpaddd"); 8204 } 8205 case NEON::BI__builtin_neon_vceqzd_s64: 8206 case NEON::BI__builtin_neon_vceqzd_f64: 8207 case NEON::BI__builtin_neon_vceqzs_f32: 8208 case NEON::BI__builtin_neon_vceqzh_f16: 8209 Ops.push_back(EmitScalarExpr(E->getArg(0))); 8210 return EmitAArch64CompareBuiltinExpr( 8211 Ops[0], ConvertType(E->getCallReturnType(getContext())), 8212 ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz"); 8213 case NEON::BI__builtin_neon_vcgezd_s64: 8214 case NEON::BI__builtin_neon_vcgezd_f64: 8215 case NEON::BI__builtin_neon_vcgezs_f32: 8216 case NEON::BI__builtin_neon_vcgezh_f16: 8217 Ops.push_back(EmitScalarExpr(E->getArg(0))); 8218 return EmitAArch64CompareBuiltinExpr( 8219 Ops[0], ConvertType(E->getCallReturnType(getContext())), 8220 ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez"); 8221 case NEON::BI__builtin_neon_vclezd_s64: 8222 case NEON::BI__builtin_neon_vclezd_f64: 8223 case NEON::BI__builtin_neon_vclezs_f32: 8224 case NEON::BI__builtin_neon_vclezh_f16: 8225 Ops.push_back(EmitScalarExpr(E->getArg(0))); 8226 return EmitAArch64CompareBuiltinExpr( 8227 Ops[0], ConvertType(E->getCallReturnType(getContext())), 8228 ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez"); 8229 case NEON::BI__builtin_neon_vcgtzd_s64: 8230 case NEON::BI__builtin_neon_vcgtzd_f64: 8231 case NEON::BI__builtin_neon_vcgtzs_f32: 8232 case NEON::BI__builtin_neon_vcgtzh_f16: 8233 Ops.push_back(EmitScalarExpr(E->getArg(0))); 8234 return EmitAArch64CompareBuiltinExpr( 8235 Ops[0], ConvertType(E->getCallReturnType(getContext())), 8236 ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz"); 8237 case NEON::BI__builtin_neon_vcltzd_s64: 8238 case NEON::BI__builtin_neon_vcltzd_f64: 8239 case NEON::BI__builtin_neon_vcltzs_f32: 8240 case NEON::BI__builtin_neon_vcltzh_f16: 8241 Ops.push_back(EmitScalarExpr(E->getArg(0))); 8242 return EmitAArch64CompareBuiltinExpr( 8243 Ops[0], ConvertType(E->getCallReturnType(getContext())), 8244 ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz"); 8245 8246 case NEON::BI__builtin_neon_vceqzd_u64: { 8247 Ops.push_back(EmitScalarExpr(E->getArg(0))); 8248 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); 8249 Ops[0] = 8250 Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty)); 8251 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd"); 8252 } 8253 case NEON::BI__builtin_neon_vceqd_f64: 8254 case NEON::BI__builtin_neon_vcled_f64: 8255 case NEON::BI__builtin_neon_vcltd_f64: 8256 case NEON::BI__builtin_neon_vcged_f64: 8257 case NEON::BI__builtin_neon_vcgtd_f64: { 8258 llvm::CmpInst::Predicate P; 8259 switch (BuiltinID) { 8260 default: llvm_unreachable("missing builtin ID in switch!"); 8261 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break; 8262 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break; 8263 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break; 8264 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break; 8265 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break; 8266 } 8267 Ops.push_back(EmitScalarExpr(E->getArg(1))); 8268 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 8269 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); 8270 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); 8271 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd"); 8272 } 8273 case NEON::BI__builtin_neon_vceqs_f32: 8274 case NEON::BI__builtin_neon_vcles_f32: 8275 case NEON::BI__builtin_neon_vclts_f32: 8276 case NEON::BI__builtin_neon_vcges_f32: 8277 case NEON::BI__builtin_neon_vcgts_f32: { 8278 llvm::CmpInst::Predicate P; 8279 switch (BuiltinID) { 8280 default: llvm_unreachable("missing builtin ID in switch!"); 8281 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break; 8282 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break; 8283 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break; 8284 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break; 8285 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break; 8286 } 8287 Ops.push_back(EmitScalarExpr(E->getArg(1))); 8288 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy); 8289 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy); 8290 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); 8291 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd"); 8292 } 8293 case NEON::BI__builtin_neon_vceqh_f16: 8294 case NEON::BI__builtin_neon_vcleh_f16: 8295 case NEON::BI__builtin_neon_vclth_f16: 8296 case NEON::BI__builtin_neon_vcgeh_f16: 8297 case NEON::BI__builtin_neon_vcgth_f16: { 8298 llvm::CmpInst::Predicate P; 8299 switch (BuiltinID) { 8300 default: llvm_unreachable("missing builtin ID in switch!"); 8301 case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break; 8302 case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break; 8303 case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break; 8304 case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break; 8305 case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break; 8306 } 8307 Ops.push_back(EmitScalarExpr(E->getArg(1))); 8308 Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy); 8309 Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy); 8310 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); 8311 return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd"); 8312 } 8313 case NEON::BI__builtin_neon_vceqd_s64: 8314 case NEON::BI__builtin_neon_vceqd_u64: 8315 case NEON::BI__builtin_neon_vcgtd_s64: 8316 case NEON::BI__builtin_neon_vcgtd_u64: 8317 case NEON::BI__builtin_neon_vcltd_s64: 8318 case NEON::BI__builtin_neon_vcltd_u64: 8319 case NEON::BI__builtin_neon_vcged_u64: 8320 case NEON::BI__builtin_neon_vcged_s64: 8321 case NEON::BI__builtin_neon_vcled_u64: 8322 case NEON::BI__builtin_neon_vcled_s64: { 8323 llvm::CmpInst::Predicate P; 8324 switch (BuiltinID) { 8325 default: llvm_unreachable("missing builtin ID in switch!"); 8326 case NEON::BI__builtin_neon_vceqd_s64: 8327 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break; 8328 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break; 8329 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break; 8330 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break; 8331 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break; 8332 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break; 8333 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break; 8334 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break; 8335 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break; 8336 } 8337 Ops.push_back(EmitScalarExpr(E->getArg(1))); 8338 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); 8339 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 8340 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]); 8341 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd"); 8342 } 8343 case NEON::BI__builtin_neon_vtstd_s64: 8344 case NEON::BI__builtin_neon_vtstd_u64: { 8345 Ops.push_back(EmitScalarExpr(E->getArg(1))); 8346 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); 8347 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 8348 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); 8349 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], 8350 llvm::Constant::getNullValue(Int64Ty)); 8351 return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd"); 8352 } 8353 case NEON::BI__builtin_neon_vset_lane_i8: 8354 case NEON::BI__builtin_neon_vset_lane_i16: 8355 case NEON::BI__builtin_neon_vset_lane_i32: 8356 case NEON::BI__builtin_neon_vset_lane_i64: 8357 case NEON::BI__builtin_neon_vset_lane_f32: 8358 case NEON::BI__builtin_neon_vsetq_lane_i8: 8359 case NEON::BI__builtin_neon_vsetq_lane_i16: 8360 case NEON::BI__builtin_neon_vsetq_lane_i32: 8361 case NEON::BI__builtin_neon_vsetq_lane_i64: 8362 case NEON::BI__builtin_neon_vsetq_lane_f32: 8363 Ops.push_back(EmitScalarExpr(E->getArg(2))); 8364 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 8365 case NEON::BI__builtin_neon_vset_lane_f64: 8366 // The vector type needs a cast for the v1f64 variant. 8367 Ops[1] = Builder.CreateBitCast(Ops[1], 8368 llvm::VectorType::get(DoubleTy, 1)); 8369 Ops.push_back(EmitScalarExpr(E->getArg(2))); 8370 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 8371 case NEON::BI__builtin_neon_vsetq_lane_f64: 8372 // The vector type needs a cast for the v2f64 variant. 8373 Ops[1] = Builder.CreateBitCast(Ops[1], 8374 llvm::VectorType::get(DoubleTy, 2)); 8375 Ops.push_back(EmitScalarExpr(E->getArg(2))); 8376 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 8377 8378 case NEON::BI__builtin_neon_vget_lane_i8: 8379 case NEON::BI__builtin_neon_vdupb_lane_i8: 8380 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8)); 8381 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 8382 "vget_lane"); 8383 case NEON::BI__builtin_neon_vgetq_lane_i8: 8384 case NEON::BI__builtin_neon_vdupb_laneq_i8: 8385 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16)); 8386 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 8387 "vgetq_lane"); 8388 case NEON::BI__builtin_neon_vget_lane_i16: 8389 case NEON::BI__builtin_neon_vduph_lane_i16: 8390 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4)); 8391 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 8392 "vget_lane"); 8393 case NEON::BI__builtin_neon_vgetq_lane_i16: 8394 case NEON::BI__builtin_neon_vduph_laneq_i16: 8395 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8)); 8396 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 8397 "vgetq_lane"); 8398 case NEON::BI__builtin_neon_vget_lane_i32: 8399 case NEON::BI__builtin_neon_vdups_lane_i32: 8400 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2)); 8401 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 8402 "vget_lane"); 8403 case NEON::BI__builtin_neon_vdups_lane_f32: 8404 Ops[0] = Builder.CreateBitCast(Ops[0], 8405 llvm::VectorType::get(FloatTy, 2)); 8406 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 8407 "vdups_lane"); 8408 case NEON::BI__builtin_neon_vgetq_lane_i32: 8409 case NEON::BI__builtin_neon_vdups_laneq_i32: 8410 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4)); 8411 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 8412 "vgetq_lane"); 8413 case NEON::BI__builtin_neon_vget_lane_i64: 8414 case NEON::BI__builtin_neon_vdupd_lane_i64: 8415 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1)); 8416 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 8417 "vget_lane"); 8418 case NEON::BI__builtin_neon_vdupd_lane_f64: 8419 Ops[0] = Builder.CreateBitCast(Ops[0], 8420 llvm::VectorType::get(DoubleTy, 1)); 8421 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 8422 "vdupd_lane"); 8423 case NEON::BI__builtin_neon_vgetq_lane_i64: 8424 case NEON::BI__builtin_neon_vdupd_laneq_i64: 8425 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); 8426 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 8427 "vgetq_lane"); 8428 case NEON::BI__builtin_neon_vget_lane_f32: 8429 Ops[0] = Builder.CreateBitCast(Ops[0], 8430 llvm::VectorType::get(FloatTy, 2)); 8431 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 8432 "vget_lane"); 8433 case NEON::BI__builtin_neon_vget_lane_f64: 8434 Ops[0] = Builder.CreateBitCast(Ops[0], 8435 llvm::VectorType::get(DoubleTy, 1)); 8436 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 8437 "vget_lane"); 8438 case NEON::BI__builtin_neon_vgetq_lane_f32: 8439 case NEON::BI__builtin_neon_vdups_laneq_f32: 8440 Ops[0] = Builder.CreateBitCast(Ops[0], 8441 llvm::VectorType::get(FloatTy, 4)); 8442 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 8443 "vgetq_lane"); 8444 case NEON::BI__builtin_neon_vgetq_lane_f64: 8445 case NEON::BI__builtin_neon_vdupd_laneq_f64: 8446 Ops[0] = Builder.CreateBitCast(Ops[0], 8447 llvm::VectorType::get(DoubleTy, 2)); 8448 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 8449 "vgetq_lane"); 8450 case NEON::BI__builtin_neon_vaddh_f16: 8451 Ops.push_back(EmitScalarExpr(E->getArg(1))); 8452 return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh"); 8453 case NEON::BI__builtin_neon_vsubh_f16: 8454 Ops.push_back(EmitScalarExpr(E->getArg(1))); 8455 return Builder.CreateFSub(Ops[0], Ops[1], "vsubh"); 8456 case NEON::BI__builtin_neon_vmulh_f16: 8457 Ops.push_back(EmitScalarExpr(E->getArg(1))); 8458 return Builder.CreateFMul(Ops[0], Ops[1], "vmulh"); 8459 case NEON::BI__builtin_neon_vdivh_f16: 8460 Ops.push_back(EmitScalarExpr(E->getArg(1))); 8461 return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh"); 8462 case NEON::BI__builtin_neon_vfmah_f16: { 8463 Function *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy); 8464 // NEON intrinsic puts accumulator first, unlike the LLVM fma. 8465 return Builder.CreateCall(F, 8466 {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]}); 8467 } 8468 case NEON::BI__builtin_neon_vfmsh_f16: { 8469 Function *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy); 8470 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(HalfTy); 8471 Value* Sub = Builder.CreateFSub(Zero, EmitScalarExpr(E->getArg(1)), "vsubh"); 8472 // NEON intrinsic puts accumulator first, unlike the LLVM fma. 8473 return Builder.CreateCall(F, {Sub, EmitScalarExpr(E->getArg(2)), Ops[0]}); 8474 } 8475 case NEON::BI__builtin_neon_vaddd_s64: 8476 case NEON::BI__builtin_neon_vaddd_u64: 8477 return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd"); 8478 case NEON::BI__builtin_neon_vsubd_s64: 8479 case NEON::BI__builtin_neon_vsubd_u64: 8480 return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd"); 8481 case NEON::BI__builtin_neon_vqdmlalh_s16: 8482 case NEON::BI__builtin_neon_vqdmlslh_s16: { 8483 SmallVector<Value *, 2> ProductOps; 8484 ProductOps.push_back(vectorWrapScalar16(Ops[1])); 8485 ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2)))); 8486 llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4); 8487 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy), 8488 ProductOps, "vqdmlXl"); 8489 Constant *CI = ConstantInt::get(SizeTy, 0); 8490 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0"); 8491 8492 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16 8493 ? Intrinsic::aarch64_neon_sqadd 8494 : Intrinsic::aarch64_neon_sqsub; 8495 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl"); 8496 } 8497 case NEON::BI__builtin_neon_vqshlud_n_s64: { 8498 Ops.push_back(EmitScalarExpr(E->getArg(1))); 8499 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty); 8500 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty), 8501 Ops, "vqshlu_n"); 8502 } 8503 case NEON::BI__builtin_neon_vqshld_n_u64: 8504 case NEON::BI__builtin_neon_vqshld_n_s64: { 8505 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64 8506 ? Intrinsic::aarch64_neon_uqshl 8507 : Intrinsic::aarch64_neon_sqshl; 8508 Ops.push_back(EmitScalarExpr(E->getArg(1))); 8509 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty); 8510 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n"); 8511 } 8512 case NEON::BI__builtin_neon_vrshrd_n_u64: 8513 case NEON::BI__builtin_neon_vrshrd_n_s64: { 8514 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64 8515 ? Intrinsic::aarch64_neon_urshl 8516 : Intrinsic::aarch64_neon_srshl; 8517 Ops.push_back(EmitScalarExpr(E->getArg(1))); 8518 int SV = cast<ConstantInt>(Ops[1])->getSExtValue(); 8519 Ops[1] = ConstantInt::get(Int64Ty, -SV); 8520 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n"); 8521 } 8522 case NEON::BI__builtin_neon_vrsrad_n_u64: 8523 case NEON::BI__builtin_neon_vrsrad_n_s64: { 8524 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64 8525 ? Intrinsic::aarch64_neon_urshl 8526 : Intrinsic::aarch64_neon_srshl; 8527 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 8528 Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2)))); 8529 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty), 8530 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)}); 8531 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty)); 8532 } 8533 case NEON::BI__builtin_neon_vshld_n_s64: 8534 case NEON::BI__builtin_neon_vshld_n_u64: { 8535 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 8536 return Builder.CreateShl( 8537 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n"); 8538 } 8539 case NEON::BI__builtin_neon_vshrd_n_s64: { 8540 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 8541 return Builder.CreateAShr( 8542 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63), 8543 Amt->getZExtValue())), 8544 "shrd_n"); 8545 } 8546 case NEON::BI__builtin_neon_vshrd_n_u64: { 8547 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 8548 uint64_t ShiftAmt = Amt->getZExtValue(); 8549 // Right-shifting an unsigned value by its size yields 0. 8550 if (ShiftAmt == 64) 8551 return ConstantInt::get(Int64Ty, 0); 8552 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt), 8553 "shrd_n"); 8554 } 8555 case NEON::BI__builtin_neon_vsrad_n_s64: { 8556 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2))); 8557 Ops[1] = Builder.CreateAShr( 8558 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63), 8559 Amt->getZExtValue())), 8560 "shrd_n"); 8561 return Builder.CreateAdd(Ops[0], Ops[1]); 8562 } 8563 case NEON::BI__builtin_neon_vsrad_n_u64: { 8564 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2))); 8565 uint64_t ShiftAmt = Amt->getZExtValue(); 8566 // Right-shifting an unsigned value by its size yields 0. 8567 // As Op + 0 = Op, return Ops[0] directly. 8568 if (ShiftAmt == 64) 8569 return Ops[0]; 8570 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt), 8571 "shrd_n"); 8572 return Builder.CreateAdd(Ops[0], Ops[1]); 8573 } 8574 case NEON::BI__builtin_neon_vqdmlalh_lane_s16: 8575 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16: 8576 case NEON::BI__builtin_neon_vqdmlslh_lane_s16: 8577 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: { 8578 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)), 8579 "lane"); 8580 SmallVector<Value *, 2> ProductOps; 8581 ProductOps.push_back(vectorWrapScalar16(Ops[1])); 8582 ProductOps.push_back(vectorWrapScalar16(Ops[2])); 8583 llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4); 8584 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy), 8585 ProductOps, "vqdmlXl"); 8586 Constant *CI = ConstantInt::get(SizeTy, 0); 8587 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0"); 8588 Ops.pop_back(); 8589 8590 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 || 8591 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16) 8592 ? Intrinsic::aarch64_neon_sqadd 8593 : Intrinsic::aarch64_neon_sqsub; 8594 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl"); 8595 } 8596 case NEON::BI__builtin_neon_vqdmlals_s32: 8597 case NEON::BI__builtin_neon_vqdmlsls_s32: { 8598 SmallVector<Value *, 2> ProductOps; 8599 ProductOps.push_back(Ops[1]); 8600 ProductOps.push_back(EmitScalarExpr(E->getArg(2))); 8601 Ops[1] = 8602 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar), 8603 ProductOps, "vqdmlXl"); 8604 8605 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32 8606 ? Intrinsic::aarch64_neon_sqadd 8607 : Intrinsic::aarch64_neon_sqsub; 8608 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl"); 8609 } 8610 case NEON::BI__builtin_neon_vqdmlals_lane_s32: 8611 case NEON::BI__builtin_neon_vqdmlals_laneq_s32: 8612 case NEON::BI__builtin_neon_vqdmlsls_lane_s32: 8613 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: { 8614 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)), 8615 "lane"); 8616 SmallVector<Value *, 2> ProductOps; 8617 ProductOps.push_back(Ops[1]); 8618 ProductOps.push_back(Ops[2]); 8619 Ops[1] = 8620 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar), 8621 ProductOps, "vqdmlXl"); 8622 Ops.pop_back(); 8623 8624 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 || 8625 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32) 8626 ? Intrinsic::aarch64_neon_sqadd 8627 : Intrinsic::aarch64_neon_sqsub; 8628 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl"); 8629 } 8630 case NEON::BI__builtin_neon_vduph_lane_f16: { 8631 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 8632 "vget_lane"); 8633 } 8634 case NEON::BI__builtin_neon_vduph_laneq_f16: { 8635 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 8636 "vgetq_lane"); 8637 } 8638 case AArch64::BI_BitScanForward: 8639 case AArch64::BI_BitScanForward64: 8640 return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E); 8641 case AArch64::BI_BitScanReverse: 8642 case AArch64::BI_BitScanReverse64: 8643 return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E); 8644 case AArch64::BI_InterlockedAnd64: 8645 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E); 8646 case AArch64::BI_InterlockedExchange64: 8647 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E); 8648 case AArch64::BI_InterlockedExchangeAdd64: 8649 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E); 8650 case AArch64::BI_InterlockedExchangeSub64: 8651 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E); 8652 case AArch64::BI_InterlockedOr64: 8653 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E); 8654 case AArch64::BI_InterlockedXor64: 8655 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E); 8656 case AArch64::BI_InterlockedDecrement64: 8657 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E); 8658 case AArch64::BI_InterlockedIncrement64: 8659 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E); 8660 case AArch64::BI_InterlockedExchangeAdd8_acq: 8661 case AArch64::BI_InterlockedExchangeAdd16_acq: 8662 case AArch64::BI_InterlockedExchangeAdd_acq: 8663 case AArch64::BI_InterlockedExchangeAdd64_acq: 8664 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_acq, E); 8665 case AArch64::BI_InterlockedExchangeAdd8_rel: 8666 case AArch64::BI_InterlockedExchangeAdd16_rel: 8667 case AArch64::BI_InterlockedExchangeAdd_rel: 8668 case AArch64::BI_InterlockedExchangeAdd64_rel: 8669 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_rel, E); 8670 case AArch64::BI_InterlockedExchangeAdd8_nf: 8671 case AArch64::BI_InterlockedExchangeAdd16_nf: 8672 case AArch64::BI_InterlockedExchangeAdd_nf: 8673 case AArch64::BI_InterlockedExchangeAdd64_nf: 8674 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_nf, E); 8675 case AArch64::BI_InterlockedExchange8_acq: 8676 case AArch64::BI_InterlockedExchange16_acq: 8677 case AArch64::BI_InterlockedExchange_acq: 8678 case AArch64::BI_InterlockedExchange64_acq: 8679 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_acq, E); 8680 case AArch64::BI_InterlockedExchange8_rel: 8681 case AArch64::BI_InterlockedExchange16_rel: 8682 case AArch64::BI_InterlockedExchange_rel: 8683 case AArch64::BI_InterlockedExchange64_rel: 8684 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_rel, E); 8685 case AArch64::BI_InterlockedExchange8_nf: 8686 case AArch64::BI_InterlockedExchange16_nf: 8687 case AArch64::BI_InterlockedExchange_nf: 8688 case AArch64::BI_InterlockedExchange64_nf: 8689 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_nf, E); 8690 case AArch64::BI_InterlockedCompareExchange8_acq: 8691 case AArch64::BI_InterlockedCompareExchange16_acq: 8692 case AArch64::BI_InterlockedCompareExchange_acq: 8693 case AArch64::BI_InterlockedCompareExchange64_acq: 8694 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_acq, E); 8695 case AArch64::BI_InterlockedCompareExchange8_rel: 8696 case AArch64::BI_InterlockedCompareExchange16_rel: 8697 case AArch64::BI_InterlockedCompareExchange_rel: 8698 case AArch64::BI_InterlockedCompareExchange64_rel: 8699 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_rel, E); 8700 case AArch64::BI_InterlockedCompareExchange8_nf: 8701 case AArch64::BI_InterlockedCompareExchange16_nf: 8702 case AArch64::BI_InterlockedCompareExchange_nf: 8703 case AArch64::BI_InterlockedCompareExchange64_nf: 8704 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_nf, E); 8705 case AArch64::BI_InterlockedOr8_acq: 8706 case AArch64::BI_InterlockedOr16_acq: 8707 case AArch64::BI_InterlockedOr_acq: 8708 case AArch64::BI_InterlockedOr64_acq: 8709 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_acq, E); 8710 case AArch64::BI_InterlockedOr8_rel: 8711 case AArch64::BI_InterlockedOr16_rel: 8712 case AArch64::BI_InterlockedOr_rel: 8713 case AArch64::BI_InterlockedOr64_rel: 8714 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_rel, E); 8715 case AArch64::BI_InterlockedOr8_nf: 8716 case AArch64::BI_InterlockedOr16_nf: 8717 case AArch64::BI_InterlockedOr_nf: 8718 case AArch64::BI_InterlockedOr64_nf: 8719 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_nf, E); 8720 case AArch64::BI_InterlockedXor8_acq: 8721 case AArch64::BI_InterlockedXor16_acq: 8722 case AArch64::BI_InterlockedXor_acq: 8723 case AArch64::BI_InterlockedXor64_acq: 8724 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_acq, E); 8725 case AArch64::BI_InterlockedXor8_rel: 8726 case AArch64::BI_InterlockedXor16_rel: 8727 case AArch64::BI_InterlockedXor_rel: 8728 case AArch64::BI_InterlockedXor64_rel: 8729 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_rel, E); 8730 case AArch64::BI_InterlockedXor8_nf: 8731 case AArch64::BI_InterlockedXor16_nf: 8732 case AArch64::BI_InterlockedXor_nf: 8733 case AArch64::BI_InterlockedXor64_nf: 8734 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_nf, E); 8735 case AArch64::BI_InterlockedAnd8_acq: 8736 case AArch64::BI_InterlockedAnd16_acq: 8737 case AArch64::BI_InterlockedAnd_acq: 8738 case AArch64::BI_InterlockedAnd64_acq: 8739 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_acq, E); 8740 case AArch64::BI_InterlockedAnd8_rel: 8741 case AArch64::BI_InterlockedAnd16_rel: 8742 case AArch64::BI_InterlockedAnd_rel: 8743 case AArch64::BI_InterlockedAnd64_rel: 8744 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_rel, E); 8745 case AArch64::BI_InterlockedAnd8_nf: 8746 case AArch64::BI_InterlockedAnd16_nf: 8747 case AArch64::BI_InterlockedAnd_nf: 8748 case AArch64::BI_InterlockedAnd64_nf: 8749 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_nf, E); 8750 case AArch64::BI_InterlockedIncrement16_acq: 8751 case AArch64::BI_InterlockedIncrement_acq: 8752 case AArch64::BI_InterlockedIncrement64_acq: 8753 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_acq, E); 8754 case AArch64::BI_InterlockedIncrement16_rel: 8755 case AArch64::BI_InterlockedIncrement_rel: 8756 case AArch64::BI_InterlockedIncrement64_rel: 8757 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_rel, E); 8758 case AArch64::BI_InterlockedIncrement16_nf: 8759 case AArch64::BI_InterlockedIncrement_nf: 8760 case AArch64::BI_InterlockedIncrement64_nf: 8761 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_nf, E); 8762 case AArch64::BI_InterlockedDecrement16_acq: 8763 case AArch64::BI_InterlockedDecrement_acq: 8764 case AArch64::BI_InterlockedDecrement64_acq: 8765 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_acq, E); 8766 case AArch64::BI_InterlockedDecrement16_rel: 8767 case AArch64::BI_InterlockedDecrement_rel: 8768 case AArch64::BI_InterlockedDecrement64_rel: 8769 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_rel, E); 8770 case AArch64::BI_InterlockedDecrement16_nf: 8771 case AArch64::BI_InterlockedDecrement_nf: 8772 case AArch64::BI_InterlockedDecrement64_nf: 8773 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_nf, E); 8774 8775 case AArch64::BI_InterlockedAdd: { 8776 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 8777 Value *Arg1 = EmitScalarExpr(E->getArg(1)); 8778 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 8779 AtomicRMWInst::Add, Arg0, Arg1, 8780 llvm::AtomicOrdering::SequentiallyConsistent); 8781 return Builder.CreateAdd(RMWI, Arg1); 8782 } 8783 } 8784 8785 llvm::VectorType *VTy = GetNeonType(this, Type); 8786 llvm::Type *Ty = VTy; 8787 if (!Ty) 8788 return nullptr; 8789 8790 // Not all intrinsics handled by the common case work for AArch64 yet, so only 8791 // defer to common code if it's been added to our special map. 8792 Builtin = findARMVectorIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID, 8793 AArch64SIMDIntrinsicsProvenSorted); 8794 8795 if (Builtin) 8796 return EmitCommonNeonBuiltinExpr( 8797 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic, 8798 Builtin->NameHint, Builtin->TypeModifier, E, Ops, 8799 /*never use addresses*/ Address::invalid(), Address::invalid(), Arch); 8800 8801 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch)) 8802 return V; 8803 8804 unsigned Int; 8805 switch (BuiltinID) { 8806 default: return nullptr; 8807 case NEON::BI__builtin_neon_vbsl_v: 8808 case NEON::BI__builtin_neon_vbslq_v: { 8809 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy); 8810 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl"); 8811 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl"); 8812 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl"); 8813 8814 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl"); 8815 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl"); 8816 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl"); 8817 return Builder.CreateBitCast(Ops[0], Ty); 8818 } 8819 case NEON::BI__builtin_neon_vfma_lane_v: 8820 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types 8821 // The ARM builtins (and instructions) have the addend as the first 8822 // operand, but the 'fma' intrinsics have it last. Swap it around here. 8823 Value *Addend = Ops[0]; 8824 Value *Multiplicand = Ops[1]; 8825 Value *LaneSource = Ops[2]; 8826 Ops[0] = Multiplicand; 8827 Ops[1] = LaneSource; 8828 Ops[2] = Addend; 8829 8830 // Now adjust things to handle the lane access. 8831 llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ? 8832 llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) : 8833 VTy; 8834 llvm::Constant *cst = cast<Constant>(Ops[3]); 8835 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst); 8836 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy); 8837 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane"); 8838 8839 Ops.pop_back(); 8840 Int = Intrinsic::fma; 8841 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla"); 8842 } 8843 case NEON::BI__builtin_neon_vfma_laneq_v: { 8844 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 8845 // v1f64 fma should be mapped to Neon scalar f64 fma 8846 if (VTy && VTy->getElementType() == DoubleTy) { 8847 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 8848 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); 8849 llvm::Type *VTy = GetNeonType(this, 8850 NeonTypeFlags(NeonTypeFlags::Float64, false, true)); 8851 Ops[2] = Builder.CreateBitCast(Ops[2], VTy); 8852 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); 8853 Function *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy); 8854 Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); 8855 return Builder.CreateBitCast(Result, Ty); 8856 } 8857 Function *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 8858 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 8859 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 8860 8861 llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(), 8862 VTy->getNumElements() * 2); 8863 Ops[2] = Builder.CreateBitCast(Ops[2], STy); 8864 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), 8865 cast<ConstantInt>(Ops[3])); 8866 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane"); 8867 8868 return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]}); 8869 } 8870 case NEON::BI__builtin_neon_vfmaq_laneq_v: { 8871 Function *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 8872 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 8873 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 8874 8875 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 8876 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3])); 8877 return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]}); 8878 } 8879 case NEON::BI__builtin_neon_vfmah_lane_f16: 8880 case NEON::BI__builtin_neon_vfmas_lane_f32: 8881 case NEON::BI__builtin_neon_vfmah_laneq_f16: 8882 case NEON::BI__builtin_neon_vfmas_laneq_f32: 8883 case NEON::BI__builtin_neon_vfmad_lane_f64: 8884 case NEON::BI__builtin_neon_vfmad_laneq_f64: { 8885 Ops.push_back(EmitScalarExpr(E->getArg(3))); 8886 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext())); 8887 Function *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 8888 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); 8889 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); 8890 } 8891 case NEON::BI__builtin_neon_vmull_v: 8892 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 8893 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull; 8894 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull; 8895 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull"); 8896 case NEON::BI__builtin_neon_vmax_v: 8897 case NEON::BI__builtin_neon_vmaxq_v: 8898 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 8899 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax; 8900 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax; 8901 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax"); 8902 case NEON::BI__builtin_neon_vmaxh_f16: { 8903 Ops.push_back(EmitScalarExpr(E->getArg(1))); 8904 Int = Intrinsic::aarch64_neon_fmax; 8905 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax"); 8906 } 8907 case NEON::BI__builtin_neon_vmin_v: 8908 case NEON::BI__builtin_neon_vminq_v: 8909 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 8910 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin; 8911 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin; 8912 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin"); 8913 case NEON::BI__builtin_neon_vminh_f16: { 8914 Ops.push_back(EmitScalarExpr(E->getArg(1))); 8915 Int = Intrinsic::aarch64_neon_fmin; 8916 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin"); 8917 } 8918 case NEON::BI__builtin_neon_vabd_v: 8919 case NEON::BI__builtin_neon_vabdq_v: 8920 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 8921 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd; 8922 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd; 8923 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd"); 8924 case NEON::BI__builtin_neon_vpadal_v: 8925 case NEON::BI__builtin_neon_vpadalq_v: { 8926 unsigned ArgElts = VTy->getNumElements(); 8927 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType()); 8928 unsigned BitWidth = EltTy->getBitWidth(); 8929 llvm::Type *ArgTy = llvm::VectorType::get( 8930 llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts); 8931 llvm::Type* Tys[2] = { VTy, ArgTy }; 8932 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp; 8933 SmallVector<llvm::Value*, 1> TmpOps; 8934 TmpOps.push_back(Ops[1]); 8935 Function *F = CGM.getIntrinsic(Int, Tys); 8936 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal"); 8937 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType()); 8938 return Builder.CreateAdd(tmp, addend); 8939 } 8940 case NEON::BI__builtin_neon_vpmin_v: 8941 case NEON::BI__builtin_neon_vpminq_v: 8942 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 8943 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp; 8944 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp; 8945 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin"); 8946 case NEON::BI__builtin_neon_vpmax_v: 8947 case NEON::BI__builtin_neon_vpmaxq_v: 8948 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 8949 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp; 8950 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp; 8951 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax"); 8952 case NEON::BI__builtin_neon_vminnm_v: 8953 case NEON::BI__builtin_neon_vminnmq_v: 8954 Int = Intrinsic::aarch64_neon_fminnm; 8955 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm"); 8956 case NEON::BI__builtin_neon_vminnmh_f16: 8957 Ops.push_back(EmitScalarExpr(E->getArg(1))); 8958 Int = Intrinsic::aarch64_neon_fminnm; 8959 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm"); 8960 case NEON::BI__builtin_neon_vmaxnm_v: 8961 case NEON::BI__builtin_neon_vmaxnmq_v: 8962 Int = Intrinsic::aarch64_neon_fmaxnm; 8963 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm"); 8964 case NEON::BI__builtin_neon_vmaxnmh_f16: 8965 Ops.push_back(EmitScalarExpr(E->getArg(1))); 8966 Int = Intrinsic::aarch64_neon_fmaxnm; 8967 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm"); 8968 case NEON::BI__builtin_neon_vrecpss_f32: { 8969 Ops.push_back(EmitScalarExpr(E->getArg(1))); 8970 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy), 8971 Ops, "vrecps"); 8972 } 8973 case NEON::BI__builtin_neon_vrecpsd_f64: 8974 Ops.push_back(EmitScalarExpr(E->getArg(1))); 8975 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy), 8976 Ops, "vrecps"); 8977 case NEON::BI__builtin_neon_vrecpsh_f16: 8978 Ops.push_back(EmitScalarExpr(E->getArg(1))); 8979 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy), 8980 Ops, "vrecps"); 8981 case NEON::BI__builtin_neon_vqshrun_n_v: 8982 Int = Intrinsic::aarch64_neon_sqshrun; 8983 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n"); 8984 case NEON::BI__builtin_neon_vqrshrun_n_v: 8985 Int = Intrinsic::aarch64_neon_sqrshrun; 8986 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n"); 8987 case NEON::BI__builtin_neon_vqshrn_n_v: 8988 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn; 8989 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n"); 8990 case NEON::BI__builtin_neon_vrshrn_n_v: 8991 Int = Intrinsic::aarch64_neon_rshrn; 8992 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n"); 8993 case NEON::BI__builtin_neon_vqrshrn_n_v: 8994 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn; 8995 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n"); 8996 case NEON::BI__builtin_neon_vrndah_f16: { 8997 Ops.push_back(EmitScalarExpr(E->getArg(0))); 8998 Int = Intrinsic::round; 8999 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda"); 9000 } 9001 case NEON::BI__builtin_neon_vrnda_v: 9002 case NEON::BI__builtin_neon_vrndaq_v: { 9003 Int = Intrinsic::round; 9004 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda"); 9005 } 9006 case NEON::BI__builtin_neon_vrndih_f16: { 9007 Ops.push_back(EmitScalarExpr(E->getArg(0))); 9008 Int = Intrinsic::nearbyint; 9009 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi"); 9010 } 9011 case NEON::BI__builtin_neon_vrndmh_f16: { 9012 Ops.push_back(EmitScalarExpr(E->getArg(0))); 9013 Int = Intrinsic::floor; 9014 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm"); 9015 } 9016 case NEON::BI__builtin_neon_vrndm_v: 9017 case NEON::BI__builtin_neon_vrndmq_v: { 9018 Int = Intrinsic::floor; 9019 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm"); 9020 } 9021 case NEON::BI__builtin_neon_vrndnh_f16: { 9022 Ops.push_back(EmitScalarExpr(E->getArg(0))); 9023 Int = Intrinsic::aarch64_neon_frintn; 9024 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn"); 9025 } 9026 case NEON::BI__builtin_neon_vrndn_v: 9027 case NEON::BI__builtin_neon_vrndnq_v: { 9028 Int = Intrinsic::aarch64_neon_frintn; 9029 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn"); 9030 } 9031 case NEON::BI__builtin_neon_vrndns_f32: { 9032 Ops.push_back(EmitScalarExpr(E->getArg(0))); 9033 Int = Intrinsic::aarch64_neon_frintn; 9034 return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn"); 9035 } 9036 case NEON::BI__builtin_neon_vrndph_f16: { 9037 Ops.push_back(EmitScalarExpr(E->getArg(0))); 9038 Int = Intrinsic::ceil; 9039 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp"); 9040 } 9041 case NEON::BI__builtin_neon_vrndp_v: 9042 case NEON::BI__builtin_neon_vrndpq_v: { 9043 Int = Intrinsic::ceil; 9044 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp"); 9045 } 9046 case NEON::BI__builtin_neon_vrndxh_f16: { 9047 Ops.push_back(EmitScalarExpr(E->getArg(0))); 9048 Int = Intrinsic::rint; 9049 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx"); 9050 } 9051 case NEON::BI__builtin_neon_vrndx_v: 9052 case NEON::BI__builtin_neon_vrndxq_v: { 9053 Int = Intrinsic::rint; 9054 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx"); 9055 } 9056 case NEON::BI__builtin_neon_vrndh_f16: { 9057 Ops.push_back(EmitScalarExpr(E->getArg(0))); 9058 Int = Intrinsic::trunc; 9059 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz"); 9060 } 9061 case NEON::BI__builtin_neon_vrnd_v: 9062 case NEON::BI__builtin_neon_vrndq_v: { 9063 Int = Intrinsic::trunc; 9064 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz"); 9065 } 9066 case NEON::BI__builtin_neon_vcvt_f64_v: 9067 case NEON::BI__builtin_neon_vcvtq_f64_v: 9068 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 9069 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad)); 9070 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 9071 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 9072 case NEON::BI__builtin_neon_vcvt_f64_f32: { 9073 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad && 9074 "unexpected vcvt_f64_f32 builtin"); 9075 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false); 9076 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag)); 9077 9078 return Builder.CreateFPExt(Ops[0], Ty, "vcvt"); 9079 } 9080 case NEON::BI__builtin_neon_vcvt_f32_f64: { 9081 assert(Type.getEltType() == NeonTypeFlags::Float32 && 9082 "unexpected vcvt_f32_f64 builtin"); 9083 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true); 9084 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag)); 9085 9086 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt"); 9087 } 9088 case NEON::BI__builtin_neon_vcvt_s32_v: 9089 case NEON::BI__builtin_neon_vcvt_u32_v: 9090 case NEON::BI__builtin_neon_vcvt_s64_v: 9091 case NEON::BI__builtin_neon_vcvt_u64_v: 9092 case NEON::BI__builtin_neon_vcvt_s16_v: 9093 case NEON::BI__builtin_neon_vcvt_u16_v: 9094 case NEON::BI__builtin_neon_vcvtq_s32_v: 9095 case NEON::BI__builtin_neon_vcvtq_u32_v: 9096 case NEON::BI__builtin_neon_vcvtq_s64_v: 9097 case NEON::BI__builtin_neon_vcvtq_u64_v: 9098 case NEON::BI__builtin_neon_vcvtq_s16_v: 9099 case NEON::BI__builtin_neon_vcvtq_u16_v: { 9100 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type)); 9101 if (usgn) 9102 return Builder.CreateFPToUI(Ops[0], Ty); 9103 return Builder.CreateFPToSI(Ops[0], Ty); 9104 } 9105 case NEON::BI__builtin_neon_vcvta_s16_v: 9106 case NEON::BI__builtin_neon_vcvta_u16_v: 9107 case NEON::BI__builtin_neon_vcvta_s32_v: 9108 case NEON::BI__builtin_neon_vcvtaq_s16_v: 9109 case NEON::BI__builtin_neon_vcvtaq_s32_v: 9110 case NEON::BI__builtin_neon_vcvta_u32_v: 9111 case NEON::BI__builtin_neon_vcvtaq_u16_v: 9112 case NEON::BI__builtin_neon_vcvtaq_u32_v: 9113 case NEON::BI__builtin_neon_vcvta_s64_v: 9114 case NEON::BI__builtin_neon_vcvtaq_s64_v: 9115 case NEON::BI__builtin_neon_vcvta_u64_v: 9116 case NEON::BI__builtin_neon_vcvtaq_u64_v: { 9117 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas; 9118 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 9119 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta"); 9120 } 9121 case NEON::BI__builtin_neon_vcvtm_s16_v: 9122 case NEON::BI__builtin_neon_vcvtm_s32_v: 9123 case NEON::BI__builtin_neon_vcvtmq_s16_v: 9124 case NEON::BI__builtin_neon_vcvtmq_s32_v: 9125 case NEON::BI__builtin_neon_vcvtm_u16_v: 9126 case NEON::BI__builtin_neon_vcvtm_u32_v: 9127 case NEON::BI__builtin_neon_vcvtmq_u16_v: 9128 case NEON::BI__builtin_neon_vcvtmq_u32_v: 9129 case NEON::BI__builtin_neon_vcvtm_s64_v: 9130 case NEON::BI__builtin_neon_vcvtmq_s64_v: 9131 case NEON::BI__builtin_neon_vcvtm_u64_v: 9132 case NEON::BI__builtin_neon_vcvtmq_u64_v: { 9133 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms; 9134 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 9135 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm"); 9136 } 9137 case NEON::BI__builtin_neon_vcvtn_s16_v: 9138 case NEON::BI__builtin_neon_vcvtn_s32_v: 9139 case NEON::BI__builtin_neon_vcvtnq_s16_v: 9140 case NEON::BI__builtin_neon_vcvtnq_s32_v: 9141 case NEON::BI__builtin_neon_vcvtn_u16_v: 9142 case NEON::BI__builtin_neon_vcvtn_u32_v: 9143 case NEON::BI__builtin_neon_vcvtnq_u16_v: 9144 case NEON::BI__builtin_neon_vcvtnq_u32_v: 9145 case NEON::BI__builtin_neon_vcvtn_s64_v: 9146 case NEON::BI__builtin_neon_vcvtnq_s64_v: 9147 case NEON::BI__builtin_neon_vcvtn_u64_v: 9148 case NEON::BI__builtin_neon_vcvtnq_u64_v: { 9149 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns; 9150 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 9151 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn"); 9152 } 9153 case NEON::BI__builtin_neon_vcvtp_s16_v: 9154 case NEON::BI__builtin_neon_vcvtp_s32_v: 9155 case NEON::BI__builtin_neon_vcvtpq_s16_v: 9156 case NEON::BI__builtin_neon_vcvtpq_s32_v: 9157 case NEON::BI__builtin_neon_vcvtp_u16_v: 9158 case NEON::BI__builtin_neon_vcvtp_u32_v: 9159 case NEON::BI__builtin_neon_vcvtpq_u16_v: 9160 case NEON::BI__builtin_neon_vcvtpq_u32_v: 9161 case NEON::BI__builtin_neon_vcvtp_s64_v: 9162 case NEON::BI__builtin_neon_vcvtpq_s64_v: 9163 case NEON::BI__builtin_neon_vcvtp_u64_v: 9164 case NEON::BI__builtin_neon_vcvtpq_u64_v: { 9165 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps; 9166 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 9167 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp"); 9168 } 9169 case NEON::BI__builtin_neon_vmulx_v: 9170 case NEON::BI__builtin_neon_vmulxq_v: { 9171 Int = Intrinsic::aarch64_neon_fmulx; 9172 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx"); 9173 } 9174 case NEON::BI__builtin_neon_vmulxh_lane_f16: 9175 case NEON::BI__builtin_neon_vmulxh_laneq_f16: { 9176 // vmulx_lane should be mapped to Neon scalar mulx after 9177 // extracting the scalar element 9178 Ops.push_back(EmitScalarExpr(E->getArg(2))); 9179 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract"); 9180 Ops.pop_back(); 9181 Int = Intrinsic::aarch64_neon_fmulx; 9182 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx"); 9183 } 9184 case NEON::BI__builtin_neon_vmul_lane_v: 9185 case NEON::BI__builtin_neon_vmul_laneq_v: { 9186 // v1f64 vmul_lane should be mapped to Neon scalar mul lane 9187 bool Quad = false; 9188 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v) 9189 Quad = true; 9190 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 9191 llvm::Type *VTy = GetNeonType(this, 9192 NeonTypeFlags(NeonTypeFlags::Float64, false, Quad)); 9193 Ops[1] = Builder.CreateBitCast(Ops[1], VTy); 9194 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract"); 9195 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]); 9196 return Builder.CreateBitCast(Result, Ty); 9197 } 9198 case NEON::BI__builtin_neon_vnegd_s64: 9199 return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd"); 9200 case NEON::BI__builtin_neon_vnegh_f16: 9201 return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh"); 9202 case NEON::BI__builtin_neon_vpmaxnm_v: 9203 case NEON::BI__builtin_neon_vpmaxnmq_v: { 9204 Int = Intrinsic::aarch64_neon_fmaxnmp; 9205 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm"); 9206 } 9207 case NEON::BI__builtin_neon_vpminnm_v: 9208 case NEON::BI__builtin_neon_vpminnmq_v: { 9209 Int = Intrinsic::aarch64_neon_fminnmp; 9210 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm"); 9211 } 9212 case NEON::BI__builtin_neon_vsqrth_f16: { 9213 Ops.push_back(EmitScalarExpr(E->getArg(0))); 9214 Int = Intrinsic::sqrt; 9215 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt"); 9216 } 9217 case NEON::BI__builtin_neon_vsqrt_v: 9218 case NEON::BI__builtin_neon_vsqrtq_v: { 9219 Int = Intrinsic::sqrt; 9220 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 9221 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt"); 9222 } 9223 case NEON::BI__builtin_neon_vrbit_v: 9224 case NEON::BI__builtin_neon_vrbitq_v: { 9225 Int = Intrinsic::aarch64_neon_rbit; 9226 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit"); 9227 } 9228 case NEON::BI__builtin_neon_vaddv_u8: 9229 // FIXME: These are handled by the AArch64 scalar code. 9230 usgn = true; 9231 LLVM_FALLTHROUGH; 9232 case NEON::BI__builtin_neon_vaddv_s8: { 9233 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 9234 Ty = Int32Ty; 9235 VTy = llvm::VectorType::get(Int8Ty, 8); 9236 llvm::Type *Tys[2] = { Ty, VTy }; 9237 Ops.push_back(EmitScalarExpr(E->getArg(0))); 9238 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 9239 return Builder.CreateTrunc(Ops[0], Int8Ty); 9240 } 9241 case NEON::BI__builtin_neon_vaddv_u16: 9242 usgn = true; 9243 LLVM_FALLTHROUGH; 9244 case NEON::BI__builtin_neon_vaddv_s16: { 9245 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 9246 Ty = Int32Ty; 9247 VTy = llvm::VectorType::get(Int16Ty, 4); 9248 llvm::Type *Tys[2] = { Ty, VTy }; 9249 Ops.push_back(EmitScalarExpr(E->getArg(0))); 9250 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 9251 return Builder.CreateTrunc(Ops[0], Int16Ty); 9252 } 9253 case NEON::BI__builtin_neon_vaddvq_u8: 9254 usgn = true; 9255 LLVM_FALLTHROUGH; 9256 case NEON::BI__builtin_neon_vaddvq_s8: { 9257 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 9258 Ty = Int32Ty; 9259 VTy = llvm::VectorType::get(Int8Ty, 16); 9260 llvm::Type *Tys[2] = { Ty, VTy }; 9261 Ops.push_back(EmitScalarExpr(E->getArg(0))); 9262 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 9263 return Builder.CreateTrunc(Ops[0], Int8Ty); 9264 } 9265 case NEON::BI__builtin_neon_vaddvq_u16: 9266 usgn = true; 9267 LLVM_FALLTHROUGH; 9268 case NEON::BI__builtin_neon_vaddvq_s16: { 9269 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 9270 Ty = Int32Ty; 9271 VTy = llvm::VectorType::get(Int16Ty, 8); 9272 llvm::Type *Tys[2] = { Ty, VTy }; 9273 Ops.push_back(EmitScalarExpr(E->getArg(0))); 9274 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 9275 return Builder.CreateTrunc(Ops[0], Int16Ty); 9276 } 9277 case NEON::BI__builtin_neon_vmaxv_u8: { 9278 Int = Intrinsic::aarch64_neon_umaxv; 9279 Ty = Int32Ty; 9280 VTy = llvm::VectorType::get(Int8Ty, 8); 9281 llvm::Type *Tys[2] = { Ty, VTy }; 9282 Ops.push_back(EmitScalarExpr(E->getArg(0))); 9283 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 9284 return Builder.CreateTrunc(Ops[0], Int8Ty); 9285 } 9286 case NEON::BI__builtin_neon_vmaxv_u16: { 9287 Int = Intrinsic::aarch64_neon_umaxv; 9288 Ty = Int32Ty; 9289 VTy = llvm::VectorType::get(Int16Ty, 4); 9290 llvm::Type *Tys[2] = { Ty, VTy }; 9291 Ops.push_back(EmitScalarExpr(E->getArg(0))); 9292 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 9293 return Builder.CreateTrunc(Ops[0], Int16Ty); 9294 } 9295 case NEON::BI__builtin_neon_vmaxvq_u8: { 9296 Int = Intrinsic::aarch64_neon_umaxv; 9297 Ty = Int32Ty; 9298 VTy = llvm::VectorType::get(Int8Ty, 16); 9299 llvm::Type *Tys[2] = { Ty, VTy }; 9300 Ops.push_back(EmitScalarExpr(E->getArg(0))); 9301 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 9302 return Builder.CreateTrunc(Ops[0], Int8Ty); 9303 } 9304 case NEON::BI__builtin_neon_vmaxvq_u16: { 9305 Int = Intrinsic::aarch64_neon_umaxv; 9306 Ty = Int32Ty; 9307 VTy = llvm::VectorType::get(Int16Ty, 8); 9308 llvm::Type *Tys[2] = { Ty, VTy }; 9309 Ops.push_back(EmitScalarExpr(E->getArg(0))); 9310 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 9311 return Builder.CreateTrunc(Ops[0], Int16Ty); 9312 } 9313 case NEON::BI__builtin_neon_vmaxv_s8: { 9314 Int = Intrinsic::aarch64_neon_smaxv; 9315 Ty = Int32Ty; 9316 VTy = llvm::VectorType::get(Int8Ty, 8); 9317 llvm::Type *Tys[2] = { Ty, VTy }; 9318 Ops.push_back(EmitScalarExpr(E->getArg(0))); 9319 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 9320 return Builder.CreateTrunc(Ops[0], Int8Ty); 9321 } 9322 case NEON::BI__builtin_neon_vmaxv_s16: { 9323 Int = Intrinsic::aarch64_neon_smaxv; 9324 Ty = Int32Ty; 9325 VTy = llvm::VectorType::get(Int16Ty, 4); 9326 llvm::Type *Tys[2] = { Ty, VTy }; 9327 Ops.push_back(EmitScalarExpr(E->getArg(0))); 9328 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 9329 return Builder.CreateTrunc(Ops[0], Int16Ty); 9330 } 9331 case NEON::BI__builtin_neon_vmaxvq_s8: { 9332 Int = Intrinsic::aarch64_neon_smaxv; 9333 Ty = Int32Ty; 9334 VTy = llvm::VectorType::get(Int8Ty, 16); 9335 llvm::Type *Tys[2] = { Ty, VTy }; 9336 Ops.push_back(EmitScalarExpr(E->getArg(0))); 9337 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 9338 return Builder.CreateTrunc(Ops[0], Int8Ty); 9339 } 9340 case NEON::BI__builtin_neon_vmaxvq_s16: { 9341 Int = Intrinsic::aarch64_neon_smaxv; 9342 Ty = Int32Ty; 9343 VTy = llvm::VectorType::get(Int16Ty, 8); 9344 llvm::Type *Tys[2] = { Ty, VTy }; 9345 Ops.push_back(EmitScalarExpr(E->getArg(0))); 9346 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 9347 return Builder.CreateTrunc(Ops[0], Int16Ty); 9348 } 9349 case NEON::BI__builtin_neon_vmaxv_f16: { 9350 Int = Intrinsic::aarch64_neon_fmaxv; 9351 Ty = HalfTy; 9352 VTy = llvm::VectorType::get(HalfTy, 4); 9353 llvm::Type *Tys[2] = { Ty, VTy }; 9354 Ops.push_back(EmitScalarExpr(E->getArg(0))); 9355 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 9356 return Builder.CreateTrunc(Ops[0], HalfTy); 9357 } 9358 case NEON::BI__builtin_neon_vmaxvq_f16: { 9359 Int = Intrinsic::aarch64_neon_fmaxv; 9360 Ty = HalfTy; 9361 VTy = llvm::VectorType::get(HalfTy, 8); 9362 llvm::Type *Tys[2] = { Ty, VTy }; 9363 Ops.push_back(EmitScalarExpr(E->getArg(0))); 9364 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 9365 return Builder.CreateTrunc(Ops[0], HalfTy); 9366 } 9367 case NEON::BI__builtin_neon_vminv_u8: { 9368 Int = Intrinsic::aarch64_neon_uminv; 9369 Ty = Int32Ty; 9370 VTy = llvm::VectorType::get(Int8Ty, 8); 9371 llvm::Type *Tys[2] = { Ty, VTy }; 9372 Ops.push_back(EmitScalarExpr(E->getArg(0))); 9373 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 9374 return Builder.CreateTrunc(Ops[0], Int8Ty); 9375 } 9376 case NEON::BI__builtin_neon_vminv_u16: { 9377 Int = Intrinsic::aarch64_neon_uminv; 9378 Ty = Int32Ty; 9379 VTy = llvm::VectorType::get(Int16Ty, 4); 9380 llvm::Type *Tys[2] = { Ty, VTy }; 9381 Ops.push_back(EmitScalarExpr(E->getArg(0))); 9382 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 9383 return Builder.CreateTrunc(Ops[0], Int16Ty); 9384 } 9385 case NEON::BI__builtin_neon_vminvq_u8: { 9386 Int = Intrinsic::aarch64_neon_uminv; 9387 Ty = Int32Ty; 9388 VTy = llvm::VectorType::get(Int8Ty, 16); 9389 llvm::Type *Tys[2] = { Ty, VTy }; 9390 Ops.push_back(EmitScalarExpr(E->getArg(0))); 9391 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 9392 return Builder.CreateTrunc(Ops[0], Int8Ty); 9393 } 9394 case NEON::BI__builtin_neon_vminvq_u16: { 9395 Int = Intrinsic::aarch64_neon_uminv; 9396 Ty = Int32Ty; 9397 VTy = llvm::VectorType::get(Int16Ty, 8); 9398 llvm::Type *Tys[2] = { Ty, VTy }; 9399 Ops.push_back(EmitScalarExpr(E->getArg(0))); 9400 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 9401 return Builder.CreateTrunc(Ops[0], Int16Ty); 9402 } 9403 case NEON::BI__builtin_neon_vminv_s8: { 9404 Int = Intrinsic::aarch64_neon_sminv; 9405 Ty = Int32Ty; 9406 VTy = llvm::VectorType::get(Int8Ty, 8); 9407 llvm::Type *Tys[2] = { Ty, VTy }; 9408 Ops.push_back(EmitScalarExpr(E->getArg(0))); 9409 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 9410 return Builder.CreateTrunc(Ops[0], Int8Ty); 9411 } 9412 case NEON::BI__builtin_neon_vminv_s16: { 9413 Int = Intrinsic::aarch64_neon_sminv; 9414 Ty = Int32Ty; 9415 VTy = llvm::VectorType::get(Int16Ty, 4); 9416 llvm::Type *Tys[2] = { Ty, VTy }; 9417 Ops.push_back(EmitScalarExpr(E->getArg(0))); 9418 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 9419 return Builder.CreateTrunc(Ops[0], Int16Ty); 9420 } 9421 case NEON::BI__builtin_neon_vminvq_s8: { 9422 Int = Intrinsic::aarch64_neon_sminv; 9423 Ty = Int32Ty; 9424 VTy = llvm::VectorType::get(Int8Ty, 16); 9425 llvm::Type *Tys[2] = { Ty, VTy }; 9426 Ops.push_back(EmitScalarExpr(E->getArg(0))); 9427 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 9428 return Builder.CreateTrunc(Ops[0], Int8Ty); 9429 } 9430 case NEON::BI__builtin_neon_vminvq_s16: { 9431 Int = Intrinsic::aarch64_neon_sminv; 9432 Ty = Int32Ty; 9433 VTy = llvm::VectorType::get(Int16Ty, 8); 9434 llvm::Type *Tys[2] = { Ty, VTy }; 9435 Ops.push_back(EmitScalarExpr(E->getArg(0))); 9436 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 9437 return Builder.CreateTrunc(Ops[0], Int16Ty); 9438 } 9439 case NEON::BI__builtin_neon_vminv_f16: { 9440 Int = Intrinsic::aarch64_neon_fminv; 9441 Ty = HalfTy; 9442 VTy = llvm::VectorType::get(HalfTy, 4); 9443 llvm::Type *Tys[2] = { Ty, VTy }; 9444 Ops.push_back(EmitScalarExpr(E->getArg(0))); 9445 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 9446 return Builder.CreateTrunc(Ops[0], HalfTy); 9447 } 9448 case NEON::BI__builtin_neon_vminvq_f16: { 9449 Int = Intrinsic::aarch64_neon_fminv; 9450 Ty = HalfTy; 9451 VTy = llvm::VectorType::get(HalfTy, 8); 9452 llvm::Type *Tys[2] = { Ty, VTy }; 9453 Ops.push_back(EmitScalarExpr(E->getArg(0))); 9454 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 9455 return Builder.CreateTrunc(Ops[0], HalfTy); 9456 } 9457 case NEON::BI__builtin_neon_vmaxnmv_f16: { 9458 Int = Intrinsic::aarch64_neon_fmaxnmv; 9459 Ty = HalfTy; 9460 VTy = llvm::VectorType::get(HalfTy, 4); 9461 llvm::Type *Tys[2] = { Ty, VTy }; 9462 Ops.push_back(EmitScalarExpr(E->getArg(0))); 9463 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv"); 9464 return Builder.CreateTrunc(Ops[0], HalfTy); 9465 } 9466 case NEON::BI__builtin_neon_vmaxnmvq_f16: { 9467 Int = Intrinsic::aarch64_neon_fmaxnmv; 9468 Ty = HalfTy; 9469 VTy = llvm::VectorType::get(HalfTy, 8); 9470 llvm::Type *Tys[2] = { Ty, VTy }; 9471 Ops.push_back(EmitScalarExpr(E->getArg(0))); 9472 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv"); 9473 return Builder.CreateTrunc(Ops[0], HalfTy); 9474 } 9475 case NEON::BI__builtin_neon_vminnmv_f16: { 9476 Int = Intrinsic::aarch64_neon_fminnmv; 9477 Ty = HalfTy; 9478 VTy = llvm::VectorType::get(HalfTy, 4); 9479 llvm::Type *Tys[2] = { Ty, VTy }; 9480 Ops.push_back(EmitScalarExpr(E->getArg(0))); 9481 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv"); 9482 return Builder.CreateTrunc(Ops[0], HalfTy); 9483 } 9484 case NEON::BI__builtin_neon_vminnmvq_f16: { 9485 Int = Intrinsic::aarch64_neon_fminnmv; 9486 Ty = HalfTy; 9487 VTy = llvm::VectorType::get(HalfTy, 8); 9488 llvm::Type *Tys[2] = { Ty, VTy }; 9489 Ops.push_back(EmitScalarExpr(E->getArg(0))); 9490 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv"); 9491 return Builder.CreateTrunc(Ops[0], HalfTy); 9492 } 9493 case NEON::BI__builtin_neon_vmul_n_f64: { 9494 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 9495 Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy); 9496 return Builder.CreateFMul(Ops[0], RHS); 9497 } 9498 case NEON::BI__builtin_neon_vaddlv_u8: { 9499 Int = Intrinsic::aarch64_neon_uaddlv; 9500 Ty = Int32Ty; 9501 VTy = llvm::VectorType::get(Int8Ty, 8); 9502 llvm::Type *Tys[2] = { Ty, VTy }; 9503 Ops.push_back(EmitScalarExpr(E->getArg(0))); 9504 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 9505 return Builder.CreateTrunc(Ops[0], Int16Ty); 9506 } 9507 case NEON::BI__builtin_neon_vaddlv_u16: { 9508 Int = Intrinsic::aarch64_neon_uaddlv; 9509 Ty = Int32Ty; 9510 VTy = llvm::VectorType::get(Int16Ty, 4); 9511 llvm::Type *Tys[2] = { Ty, VTy }; 9512 Ops.push_back(EmitScalarExpr(E->getArg(0))); 9513 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 9514 } 9515 case NEON::BI__builtin_neon_vaddlvq_u8: { 9516 Int = Intrinsic::aarch64_neon_uaddlv; 9517 Ty = Int32Ty; 9518 VTy = llvm::VectorType::get(Int8Ty, 16); 9519 llvm::Type *Tys[2] = { Ty, VTy }; 9520 Ops.push_back(EmitScalarExpr(E->getArg(0))); 9521 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 9522 return Builder.CreateTrunc(Ops[0], Int16Ty); 9523 } 9524 case NEON::BI__builtin_neon_vaddlvq_u16: { 9525 Int = Intrinsic::aarch64_neon_uaddlv; 9526 Ty = Int32Ty; 9527 VTy = llvm::VectorType::get(Int16Ty, 8); 9528 llvm::Type *Tys[2] = { Ty, VTy }; 9529 Ops.push_back(EmitScalarExpr(E->getArg(0))); 9530 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 9531 } 9532 case NEON::BI__builtin_neon_vaddlv_s8: { 9533 Int = Intrinsic::aarch64_neon_saddlv; 9534 Ty = Int32Ty; 9535 VTy = llvm::VectorType::get(Int8Ty, 8); 9536 llvm::Type *Tys[2] = { Ty, VTy }; 9537 Ops.push_back(EmitScalarExpr(E->getArg(0))); 9538 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 9539 return Builder.CreateTrunc(Ops[0], Int16Ty); 9540 } 9541 case NEON::BI__builtin_neon_vaddlv_s16: { 9542 Int = Intrinsic::aarch64_neon_saddlv; 9543 Ty = Int32Ty; 9544 VTy = llvm::VectorType::get(Int16Ty, 4); 9545 llvm::Type *Tys[2] = { Ty, VTy }; 9546 Ops.push_back(EmitScalarExpr(E->getArg(0))); 9547 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 9548 } 9549 case NEON::BI__builtin_neon_vaddlvq_s8: { 9550 Int = Intrinsic::aarch64_neon_saddlv; 9551 Ty = Int32Ty; 9552 VTy = llvm::VectorType::get(Int8Ty, 16); 9553 llvm::Type *Tys[2] = { Ty, VTy }; 9554 Ops.push_back(EmitScalarExpr(E->getArg(0))); 9555 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 9556 return Builder.CreateTrunc(Ops[0], Int16Ty); 9557 } 9558 case NEON::BI__builtin_neon_vaddlvq_s16: { 9559 Int = Intrinsic::aarch64_neon_saddlv; 9560 Ty = Int32Ty; 9561 VTy = llvm::VectorType::get(Int16Ty, 8); 9562 llvm::Type *Tys[2] = { Ty, VTy }; 9563 Ops.push_back(EmitScalarExpr(E->getArg(0))); 9564 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 9565 } 9566 case NEON::BI__builtin_neon_vsri_n_v: 9567 case NEON::BI__builtin_neon_vsriq_n_v: { 9568 Int = Intrinsic::aarch64_neon_vsri; 9569 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty); 9570 return EmitNeonCall(Intrin, Ops, "vsri_n"); 9571 } 9572 case NEON::BI__builtin_neon_vsli_n_v: 9573 case NEON::BI__builtin_neon_vsliq_n_v: { 9574 Int = Intrinsic::aarch64_neon_vsli; 9575 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty); 9576 return EmitNeonCall(Intrin, Ops, "vsli_n"); 9577 } 9578 case NEON::BI__builtin_neon_vsra_n_v: 9579 case NEON::BI__builtin_neon_vsraq_n_v: 9580 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 9581 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n"); 9582 return Builder.CreateAdd(Ops[0], Ops[1]); 9583 case NEON::BI__builtin_neon_vrsra_n_v: 9584 case NEON::BI__builtin_neon_vrsraq_n_v: { 9585 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl; 9586 SmallVector<llvm::Value*,2> TmpOps; 9587 TmpOps.push_back(Ops[1]); 9588 TmpOps.push_back(Ops[2]); 9589 Function* F = CGM.getIntrinsic(Int, Ty); 9590 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true); 9591 Ops[0] = Builder.CreateBitCast(Ops[0], VTy); 9592 return Builder.CreateAdd(Ops[0], tmp); 9593 } 9594 case NEON::BI__builtin_neon_vld1_v: 9595 case NEON::BI__builtin_neon_vld1q_v: { 9596 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); 9597 auto Alignment = CharUnits::fromQuantity( 9598 BuiltinID == NEON::BI__builtin_neon_vld1_v ? 8 : 16); 9599 return Builder.CreateAlignedLoad(VTy, Ops[0], Alignment); 9600 } 9601 case NEON::BI__builtin_neon_vst1_v: 9602 case NEON::BI__builtin_neon_vst1q_v: 9603 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); 9604 Ops[1] = Builder.CreateBitCast(Ops[1], VTy); 9605 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 9606 case NEON::BI__builtin_neon_vld1_lane_v: 9607 case NEON::BI__builtin_neon_vld1q_lane_v: { 9608 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 9609 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 9610 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 9611 auto Alignment = CharUnits::fromQuantity( 9612 BuiltinID == NEON::BI__builtin_neon_vld1_lane_v ? 8 : 16); 9613 Ops[0] = 9614 Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment); 9615 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane"); 9616 } 9617 case NEON::BI__builtin_neon_vld1_dup_v: 9618 case NEON::BI__builtin_neon_vld1q_dup_v: { 9619 Value *V = UndefValue::get(Ty); 9620 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 9621 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 9622 auto Alignment = CharUnits::fromQuantity( 9623 BuiltinID == NEON::BI__builtin_neon_vld1_dup_v ? 8 : 16); 9624 Ops[0] = 9625 Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment); 9626 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); 9627 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI); 9628 return EmitNeonSplat(Ops[0], CI); 9629 } 9630 case NEON::BI__builtin_neon_vst1_lane_v: 9631 case NEON::BI__builtin_neon_vst1q_lane_v: 9632 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 9633 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); 9634 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 9635 return Builder.CreateDefaultAlignedStore(Ops[1], 9636 Builder.CreateBitCast(Ops[0], Ty)); 9637 case NEON::BI__builtin_neon_vld2_v: 9638 case NEON::BI__builtin_neon_vld2q_v: { 9639 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 9640 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 9641 llvm::Type *Tys[2] = { VTy, PTy }; 9642 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys); 9643 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); 9644 Ops[0] = Builder.CreateBitCast(Ops[0], 9645 llvm::PointerType::getUnqual(Ops[1]->getType())); 9646 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 9647 } 9648 case NEON::BI__builtin_neon_vld3_v: 9649 case NEON::BI__builtin_neon_vld3q_v: { 9650 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 9651 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 9652 llvm::Type *Tys[2] = { VTy, PTy }; 9653 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys); 9654 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); 9655 Ops[0] = Builder.CreateBitCast(Ops[0], 9656 llvm::PointerType::getUnqual(Ops[1]->getType())); 9657 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 9658 } 9659 case NEON::BI__builtin_neon_vld4_v: 9660 case NEON::BI__builtin_neon_vld4q_v: { 9661 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 9662 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 9663 llvm::Type *Tys[2] = { VTy, PTy }; 9664 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys); 9665 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); 9666 Ops[0] = Builder.CreateBitCast(Ops[0], 9667 llvm::PointerType::getUnqual(Ops[1]->getType())); 9668 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 9669 } 9670 case NEON::BI__builtin_neon_vld2_dup_v: 9671 case NEON::BI__builtin_neon_vld2q_dup_v: { 9672 llvm::Type *PTy = 9673 llvm::PointerType::getUnqual(VTy->getElementType()); 9674 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 9675 llvm::Type *Tys[2] = { VTy, PTy }; 9676 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys); 9677 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); 9678 Ops[0] = Builder.CreateBitCast(Ops[0], 9679 llvm::PointerType::getUnqual(Ops[1]->getType())); 9680 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 9681 } 9682 case NEON::BI__builtin_neon_vld3_dup_v: 9683 case NEON::BI__builtin_neon_vld3q_dup_v: { 9684 llvm::Type *PTy = 9685 llvm::PointerType::getUnqual(VTy->getElementType()); 9686 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 9687 llvm::Type *Tys[2] = { VTy, PTy }; 9688 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys); 9689 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); 9690 Ops[0] = Builder.CreateBitCast(Ops[0], 9691 llvm::PointerType::getUnqual(Ops[1]->getType())); 9692 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 9693 } 9694 case NEON::BI__builtin_neon_vld4_dup_v: 9695 case NEON::BI__builtin_neon_vld4q_dup_v: { 9696 llvm::Type *PTy = 9697 llvm::PointerType::getUnqual(VTy->getElementType()); 9698 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 9699 llvm::Type *Tys[2] = { VTy, PTy }; 9700 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys); 9701 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); 9702 Ops[0] = Builder.CreateBitCast(Ops[0], 9703 llvm::PointerType::getUnqual(Ops[1]->getType())); 9704 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 9705 } 9706 case NEON::BI__builtin_neon_vld2_lane_v: 9707 case NEON::BI__builtin_neon_vld2q_lane_v: { 9708 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 9709 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys); 9710 Ops.push_back(Ops[1]); 9711 Ops.erase(Ops.begin()+1); 9712 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 9713 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 9714 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty); 9715 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane"); 9716 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 9717 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 9718 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 9719 } 9720 case NEON::BI__builtin_neon_vld3_lane_v: 9721 case NEON::BI__builtin_neon_vld3q_lane_v: { 9722 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 9723 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys); 9724 Ops.push_back(Ops[1]); 9725 Ops.erase(Ops.begin()+1); 9726 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 9727 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 9728 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 9729 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty); 9730 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane"); 9731 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 9732 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 9733 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 9734 } 9735 case NEON::BI__builtin_neon_vld4_lane_v: 9736 case NEON::BI__builtin_neon_vld4q_lane_v: { 9737 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 9738 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys); 9739 Ops.push_back(Ops[1]); 9740 Ops.erase(Ops.begin()+1); 9741 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 9742 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 9743 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 9744 Ops[4] = Builder.CreateBitCast(Ops[4], Ty); 9745 Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty); 9746 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane"); 9747 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 9748 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 9749 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 9750 } 9751 case NEON::BI__builtin_neon_vst2_v: 9752 case NEON::BI__builtin_neon_vst2q_v: { 9753 Ops.push_back(Ops[0]); 9754 Ops.erase(Ops.begin()); 9755 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() }; 9756 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys), 9757 Ops, ""); 9758 } 9759 case NEON::BI__builtin_neon_vst2_lane_v: 9760 case NEON::BI__builtin_neon_vst2q_lane_v: { 9761 Ops.push_back(Ops[0]); 9762 Ops.erase(Ops.begin()); 9763 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty); 9764 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() }; 9765 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys), 9766 Ops, ""); 9767 } 9768 case NEON::BI__builtin_neon_vst3_v: 9769 case NEON::BI__builtin_neon_vst3q_v: { 9770 Ops.push_back(Ops[0]); 9771 Ops.erase(Ops.begin()); 9772 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() }; 9773 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys), 9774 Ops, ""); 9775 } 9776 case NEON::BI__builtin_neon_vst3_lane_v: 9777 case NEON::BI__builtin_neon_vst3q_lane_v: { 9778 Ops.push_back(Ops[0]); 9779 Ops.erase(Ops.begin()); 9780 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty); 9781 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() }; 9782 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys), 9783 Ops, ""); 9784 } 9785 case NEON::BI__builtin_neon_vst4_v: 9786 case NEON::BI__builtin_neon_vst4q_v: { 9787 Ops.push_back(Ops[0]); 9788 Ops.erase(Ops.begin()); 9789 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() }; 9790 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys), 9791 Ops, ""); 9792 } 9793 case NEON::BI__builtin_neon_vst4_lane_v: 9794 case NEON::BI__builtin_neon_vst4q_lane_v: { 9795 Ops.push_back(Ops[0]); 9796 Ops.erase(Ops.begin()); 9797 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty); 9798 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() }; 9799 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys), 9800 Ops, ""); 9801 } 9802 case NEON::BI__builtin_neon_vtrn_v: 9803 case NEON::BI__builtin_neon_vtrnq_v: { 9804 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 9805 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 9806 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 9807 Value *SV = nullptr; 9808 9809 for (unsigned vi = 0; vi != 2; ++vi) { 9810 SmallVector<uint32_t, 16> Indices; 9811 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 9812 Indices.push_back(i+vi); 9813 Indices.push_back(i+e+vi); 9814 } 9815 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 9816 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn"); 9817 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 9818 } 9819 return SV; 9820 } 9821 case NEON::BI__builtin_neon_vuzp_v: 9822 case NEON::BI__builtin_neon_vuzpq_v: { 9823 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 9824 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 9825 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 9826 Value *SV = nullptr; 9827 9828 for (unsigned vi = 0; vi != 2; ++vi) { 9829 SmallVector<uint32_t, 16> Indices; 9830 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 9831 Indices.push_back(2*i+vi); 9832 9833 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 9834 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp"); 9835 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 9836 } 9837 return SV; 9838 } 9839 case NEON::BI__builtin_neon_vzip_v: 9840 case NEON::BI__builtin_neon_vzipq_v: { 9841 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 9842 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 9843 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 9844 Value *SV = nullptr; 9845 9846 for (unsigned vi = 0; vi != 2; ++vi) { 9847 SmallVector<uint32_t, 16> Indices; 9848 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 9849 Indices.push_back((i + vi*e) >> 1); 9850 Indices.push_back(((i + vi*e) >> 1)+e); 9851 } 9852 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 9853 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip"); 9854 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 9855 } 9856 return SV; 9857 } 9858 case NEON::BI__builtin_neon_vqtbl1q_v: { 9859 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty), 9860 Ops, "vtbl1"); 9861 } 9862 case NEON::BI__builtin_neon_vqtbl2q_v: { 9863 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty), 9864 Ops, "vtbl2"); 9865 } 9866 case NEON::BI__builtin_neon_vqtbl3q_v: { 9867 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty), 9868 Ops, "vtbl3"); 9869 } 9870 case NEON::BI__builtin_neon_vqtbl4q_v: { 9871 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty), 9872 Ops, "vtbl4"); 9873 } 9874 case NEON::BI__builtin_neon_vqtbx1q_v: { 9875 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty), 9876 Ops, "vtbx1"); 9877 } 9878 case NEON::BI__builtin_neon_vqtbx2q_v: { 9879 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty), 9880 Ops, "vtbx2"); 9881 } 9882 case NEON::BI__builtin_neon_vqtbx3q_v: { 9883 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty), 9884 Ops, "vtbx3"); 9885 } 9886 case NEON::BI__builtin_neon_vqtbx4q_v: { 9887 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty), 9888 Ops, "vtbx4"); 9889 } 9890 case NEON::BI__builtin_neon_vsqadd_v: 9891 case NEON::BI__builtin_neon_vsqaddq_v: { 9892 Int = Intrinsic::aarch64_neon_usqadd; 9893 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd"); 9894 } 9895 case NEON::BI__builtin_neon_vuqadd_v: 9896 case NEON::BI__builtin_neon_vuqaddq_v: { 9897 Int = Intrinsic::aarch64_neon_suqadd; 9898 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd"); 9899 } 9900 } 9901 } 9902 9903 Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID, 9904 const CallExpr *E) { 9905 assert(BuiltinID == BPF::BI__builtin_preserve_field_info && 9906 "unexpected ARM builtin"); 9907 9908 const Expr *Arg = E->getArg(0); 9909 bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField; 9910 9911 if (!getDebugInfo()) { 9912 CGM.Error(E->getExprLoc(), "using builtin_preserve_field_info() without -g"); 9913 return IsBitField ? EmitLValue(Arg).getBitFieldPointer() 9914 : EmitLValue(Arg).getPointer(*this); 9915 } 9916 9917 // Enable underlying preserve_*_access_index() generation. 9918 bool OldIsInPreservedAIRegion = IsInPreservedAIRegion; 9919 IsInPreservedAIRegion = true; 9920 Value *FieldAddr = IsBitField ? EmitLValue(Arg).getBitFieldPointer() 9921 : EmitLValue(Arg).getPointer(*this); 9922 IsInPreservedAIRegion = OldIsInPreservedAIRegion; 9923 9924 ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 9925 Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue()); 9926 9927 // Built the IR for the preserve_field_info intrinsic. 9928 llvm::Function *FnGetFieldInfo = llvm::Intrinsic::getDeclaration( 9929 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_field_info, 9930 {FieldAddr->getType()}); 9931 return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind}); 9932 } 9933 9934 llvm::Value *CodeGenFunction:: 9935 BuildVector(ArrayRef<llvm::Value*> Ops) { 9936 assert((Ops.size() & (Ops.size() - 1)) == 0 && 9937 "Not a power-of-two sized vector!"); 9938 bool AllConstants = true; 9939 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i) 9940 AllConstants &= isa<Constant>(Ops[i]); 9941 9942 // If this is a constant vector, create a ConstantVector. 9943 if (AllConstants) { 9944 SmallVector<llvm::Constant*, 16> CstOps; 9945 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 9946 CstOps.push_back(cast<Constant>(Ops[i])); 9947 return llvm::ConstantVector::get(CstOps); 9948 } 9949 9950 // Otherwise, insertelement the values to build the vector. 9951 Value *Result = 9952 llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size())); 9953 9954 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 9955 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i)); 9956 9957 return Result; 9958 } 9959 9960 // Convert the mask from an integer type to a vector of i1. 9961 static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask, 9962 unsigned NumElts) { 9963 9964 llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(), 9965 cast<IntegerType>(Mask->getType())->getBitWidth()); 9966 Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy); 9967 9968 // If we have less than 8 elements, then the starting mask was an i8 and 9969 // we need to extract down to the right number of elements. 9970 if (NumElts < 8) { 9971 uint32_t Indices[4]; 9972 for (unsigned i = 0; i != NumElts; ++i) 9973 Indices[i] = i; 9974 MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec, 9975 makeArrayRef(Indices, NumElts), 9976 "extract"); 9977 } 9978 return MaskVec; 9979 } 9980 9981 static Value *EmitX86MaskedStore(CodeGenFunction &CGF, ArrayRef<Value *> Ops, 9982 Align Alignment) { 9983 // Cast the pointer to right type. 9984 Value *Ptr = CGF.Builder.CreateBitCast(Ops[0], 9985 llvm::PointerType::getUnqual(Ops[1]->getType())); 9986 9987 Value *MaskVec = getMaskVecValue(CGF, Ops[2], 9988 Ops[1]->getType()->getVectorNumElements()); 9989 9990 return CGF.Builder.CreateMaskedStore(Ops[1], Ptr, Alignment, MaskVec); 9991 } 9992 9993 static Value *EmitX86MaskedLoad(CodeGenFunction &CGF, ArrayRef<Value *> Ops, 9994 Align Alignment) { 9995 // Cast the pointer to right type. 9996 Value *Ptr = CGF.Builder.CreateBitCast(Ops[0], 9997 llvm::PointerType::getUnqual(Ops[1]->getType())); 9998 9999 Value *MaskVec = getMaskVecValue(CGF, Ops[2], 10000 Ops[1]->getType()->getVectorNumElements()); 10001 10002 return CGF.Builder.CreateMaskedLoad(Ptr, Alignment, MaskVec, Ops[1]); 10003 } 10004 10005 static Value *EmitX86ExpandLoad(CodeGenFunction &CGF, 10006 ArrayRef<Value *> Ops) { 10007 llvm::Type *ResultTy = Ops[1]->getType(); 10008 llvm::Type *PtrTy = ResultTy->getVectorElementType(); 10009 10010 // Cast the pointer to element type. 10011 Value *Ptr = CGF.Builder.CreateBitCast(Ops[0], 10012 llvm::PointerType::getUnqual(PtrTy)); 10013 10014 Value *MaskVec = getMaskVecValue(CGF, Ops[2], 10015 ResultTy->getVectorNumElements()); 10016 10017 llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_expandload, 10018 ResultTy); 10019 return CGF.Builder.CreateCall(F, { Ptr, MaskVec, Ops[1] }); 10020 } 10021 10022 static Value *EmitX86CompressExpand(CodeGenFunction &CGF, 10023 ArrayRef<Value *> Ops, 10024 bool IsCompress) { 10025 llvm::Type *ResultTy = Ops[1]->getType(); 10026 10027 Value *MaskVec = getMaskVecValue(CGF, Ops[2], 10028 ResultTy->getVectorNumElements()); 10029 10030 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress 10031 : Intrinsic::x86_avx512_mask_expand; 10032 llvm::Function *F = CGF.CGM.getIntrinsic(IID, ResultTy); 10033 return CGF.Builder.CreateCall(F, { Ops[0], Ops[1], MaskVec }); 10034 } 10035 10036 static Value *EmitX86CompressStore(CodeGenFunction &CGF, 10037 ArrayRef<Value *> Ops) { 10038 llvm::Type *ResultTy = Ops[1]->getType(); 10039 llvm::Type *PtrTy = ResultTy->getVectorElementType(); 10040 10041 // Cast the pointer to element type. 10042 Value *Ptr = CGF.Builder.CreateBitCast(Ops[0], 10043 llvm::PointerType::getUnqual(PtrTy)); 10044 10045 Value *MaskVec = getMaskVecValue(CGF, Ops[2], 10046 ResultTy->getVectorNumElements()); 10047 10048 llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_compressstore, 10049 ResultTy); 10050 return CGF.Builder.CreateCall(F, { Ops[1], Ptr, MaskVec }); 10051 } 10052 10053 static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc, 10054 ArrayRef<Value *> Ops, 10055 bool InvertLHS = false) { 10056 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); 10057 Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts); 10058 Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts); 10059 10060 if (InvertLHS) 10061 LHS = CGF.Builder.CreateNot(LHS); 10062 10063 return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS), 10064 Ops[0]->getType()); 10065 } 10066 10067 static Value *EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1, 10068 Value *Amt, bool IsRight) { 10069 llvm::Type *Ty = Op0->getType(); 10070 10071 // Amount may be scalar immediate, in which case create a splat vector. 10072 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so 10073 // we only care about the lowest log2 bits anyway. 10074 if (Amt->getType() != Ty) { 10075 unsigned NumElts = Ty->getVectorNumElements(); 10076 Amt = CGF.Builder.CreateIntCast(Amt, Ty->getScalarType(), false); 10077 Amt = CGF.Builder.CreateVectorSplat(NumElts, Amt); 10078 } 10079 10080 unsigned IID = IsRight ? Intrinsic::fshr : Intrinsic::fshl; 10081 Function *F = CGF.CGM.getIntrinsic(IID, Ty); 10082 return CGF.Builder.CreateCall(F, {Op0, Op1, Amt}); 10083 } 10084 10085 static Value *EmitX86vpcom(CodeGenFunction &CGF, ArrayRef<Value *> Ops, 10086 bool IsSigned) { 10087 Value *Op0 = Ops[0]; 10088 Value *Op1 = Ops[1]; 10089 llvm::Type *Ty = Op0->getType(); 10090 uint64_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7; 10091 10092 CmpInst::Predicate Pred; 10093 switch (Imm) { 10094 case 0x0: 10095 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; 10096 break; 10097 case 0x1: 10098 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; 10099 break; 10100 case 0x2: 10101 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; 10102 break; 10103 case 0x3: 10104 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; 10105 break; 10106 case 0x4: 10107 Pred = ICmpInst::ICMP_EQ; 10108 break; 10109 case 0x5: 10110 Pred = ICmpInst::ICMP_NE; 10111 break; 10112 case 0x6: 10113 return llvm::Constant::getNullValue(Ty); // FALSE 10114 case 0x7: 10115 return llvm::Constant::getAllOnesValue(Ty); // TRUE 10116 default: 10117 llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate"); 10118 } 10119 10120 Value *Cmp = CGF.Builder.CreateICmp(Pred, Op0, Op1); 10121 Value *Res = CGF.Builder.CreateSExt(Cmp, Ty); 10122 return Res; 10123 } 10124 10125 static Value *EmitX86Select(CodeGenFunction &CGF, 10126 Value *Mask, Value *Op0, Value *Op1) { 10127 10128 // If the mask is all ones just return first argument. 10129 if (const auto *C = dyn_cast<Constant>(Mask)) 10130 if (C->isAllOnesValue()) 10131 return Op0; 10132 10133 Mask = getMaskVecValue(CGF, Mask, Op0->getType()->getVectorNumElements()); 10134 10135 return CGF.Builder.CreateSelect(Mask, Op0, Op1); 10136 } 10137 10138 static Value *EmitX86ScalarSelect(CodeGenFunction &CGF, 10139 Value *Mask, Value *Op0, Value *Op1) { 10140 // If the mask is all ones just return first argument. 10141 if (const auto *C = dyn_cast<Constant>(Mask)) 10142 if (C->isAllOnesValue()) 10143 return Op0; 10144 10145 llvm::VectorType *MaskTy = 10146 llvm::VectorType::get(CGF.Builder.getInt1Ty(), 10147 Mask->getType()->getIntegerBitWidth()); 10148 Mask = CGF.Builder.CreateBitCast(Mask, MaskTy); 10149 Mask = CGF.Builder.CreateExtractElement(Mask, (uint64_t)0); 10150 return CGF.Builder.CreateSelect(Mask, Op0, Op1); 10151 } 10152 10153 static Value *EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp, 10154 unsigned NumElts, Value *MaskIn) { 10155 if (MaskIn) { 10156 const auto *C = dyn_cast<Constant>(MaskIn); 10157 if (!C || !C->isAllOnesValue()) 10158 Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts)); 10159 } 10160 10161 if (NumElts < 8) { 10162 uint32_t Indices[8]; 10163 for (unsigned i = 0; i != NumElts; ++i) 10164 Indices[i] = i; 10165 for (unsigned i = NumElts; i != 8; ++i) 10166 Indices[i] = i % NumElts + NumElts; 10167 Cmp = CGF.Builder.CreateShuffleVector( 10168 Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices); 10169 } 10170 10171 return CGF.Builder.CreateBitCast(Cmp, 10172 IntegerType::get(CGF.getLLVMContext(), 10173 std::max(NumElts, 8U))); 10174 } 10175 10176 static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, 10177 bool Signed, ArrayRef<Value *> Ops) { 10178 assert((Ops.size() == 2 || Ops.size() == 4) && 10179 "Unexpected number of arguments"); 10180 unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); 10181 Value *Cmp; 10182 10183 if (CC == 3) { 10184 Cmp = Constant::getNullValue( 10185 llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts)); 10186 } else if (CC == 7) { 10187 Cmp = Constant::getAllOnesValue( 10188 llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts)); 10189 } else { 10190 ICmpInst::Predicate Pred; 10191 switch (CC) { 10192 default: llvm_unreachable("Unknown condition code"); 10193 case 0: Pred = ICmpInst::ICMP_EQ; break; 10194 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break; 10195 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break; 10196 case 4: Pred = ICmpInst::ICMP_NE; break; 10197 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break; 10198 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break; 10199 } 10200 Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]); 10201 } 10202 10203 Value *MaskIn = nullptr; 10204 if (Ops.size() == 4) 10205 MaskIn = Ops[3]; 10206 10207 return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn); 10208 } 10209 10210 static Value *EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In) { 10211 Value *Zero = Constant::getNullValue(In->getType()); 10212 return EmitX86MaskedCompare(CGF, 1, true, { In, Zero }); 10213 } 10214 10215 static Value *EmitX86ConvertIntToFp(CodeGenFunction &CGF, 10216 ArrayRef<Value *> Ops, bool IsSigned) { 10217 unsigned Rnd = cast<llvm::ConstantInt>(Ops[3])->getZExtValue(); 10218 llvm::Type *Ty = Ops[1]->getType(); 10219 10220 Value *Res; 10221 if (Rnd != 4) { 10222 Intrinsic::ID IID = IsSigned ? Intrinsic::x86_avx512_sitofp_round 10223 : Intrinsic::x86_avx512_uitofp_round; 10224 Function *F = CGF.CGM.getIntrinsic(IID, { Ty, Ops[0]->getType() }); 10225 Res = CGF.Builder.CreateCall(F, { Ops[0], Ops[3] }); 10226 } else { 10227 Res = IsSigned ? CGF.Builder.CreateSIToFP(Ops[0], Ty) 10228 : CGF.Builder.CreateUIToFP(Ops[0], Ty); 10229 } 10230 10231 return EmitX86Select(CGF, Ops[2], Res, Ops[1]); 10232 } 10233 10234 static Value *EmitX86Abs(CodeGenFunction &CGF, ArrayRef<Value *> Ops) { 10235 10236 llvm::Type *Ty = Ops[0]->getType(); 10237 Value *Zero = llvm::Constant::getNullValue(Ty); 10238 Value *Sub = CGF.Builder.CreateSub(Zero, Ops[0]); 10239 Value *Cmp = CGF.Builder.CreateICmp(ICmpInst::ICMP_SGT, Ops[0], Zero); 10240 Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Sub); 10241 return Res; 10242 } 10243 10244 static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred, 10245 ArrayRef<Value *> Ops) { 10246 Value *Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]); 10247 Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Ops[1]); 10248 10249 assert(Ops.size() == 2); 10250 return Res; 10251 } 10252 10253 // Lowers X86 FMA intrinsics to IR. 10254 static Value *EmitX86FMAExpr(CodeGenFunction &CGF, ArrayRef<Value *> Ops, 10255 unsigned BuiltinID, bool IsAddSub) { 10256 10257 bool Subtract = false; 10258 Intrinsic::ID IID = Intrinsic::not_intrinsic; 10259 switch (BuiltinID) { 10260 default: break; 10261 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3: 10262 Subtract = true; 10263 LLVM_FALLTHROUGH; 10264 case clang::X86::BI__builtin_ia32_vfmaddps512_mask: 10265 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz: 10266 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3: 10267 IID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break; 10268 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3: 10269 Subtract = true; 10270 LLVM_FALLTHROUGH; 10271 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask: 10272 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz: 10273 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3: 10274 IID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break; 10275 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3: 10276 Subtract = true; 10277 LLVM_FALLTHROUGH; 10278 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask: 10279 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz: 10280 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3: 10281 IID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512; 10282 break; 10283 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3: 10284 Subtract = true; 10285 LLVM_FALLTHROUGH; 10286 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask: 10287 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz: 10288 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3: 10289 IID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512; 10290 break; 10291 } 10292 10293 Value *A = Ops[0]; 10294 Value *B = Ops[1]; 10295 Value *C = Ops[2]; 10296 10297 if (Subtract) 10298 C = CGF.Builder.CreateFNeg(C); 10299 10300 Value *Res; 10301 10302 // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding). 10303 if (IID != Intrinsic::not_intrinsic && 10304 (cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4 || 10305 IsAddSub)) { 10306 Function *Intr = CGF.CGM.getIntrinsic(IID); 10307 Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() }); 10308 } else { 10309 llvm::Type *Ty = A->getType(); 10310 Function *FMA; 10311 if (CGF.Builder.getIsFPConstrained()) { 10312 FMA = CGF.CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, Ty); 10313 Res = CGF.Builder.CreateConstrainedFPCall(FMA, {A, B, C}); 10314 } else { 10315 FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty); 10316 Res = CGF.Builder.CreateCall(FMA, {A, B, C}); 10317 } 10318 } 10319 10320 // Handle any required masking. 10321 Value *MaskFalseVal = nullptr; 10322 switch (BuiltinID) { 10323 case clang::X86::BI__builtin_ia32_vfmaddps512_mask: 10324 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask: 10325 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask: 10326 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask: 10327 MaskFalseVal = Ops[0]; 10328 break; 10329 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz: 10330 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz: 10331 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz: 10332 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz: 10333 MaskFalseVal = Constant::getNullValue(Ops[0]->getType()); 10334 break; 10335 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3: 10336 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3: 10337 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3: 10338 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3: 10339 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3: 10340 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3: 10341 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3: 10342 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3: 10343 MaskFalseVal = Ops[2]; 10344 break; 10345 } 10346 10347 if (MaskFalseVal) 10348 return EmitX86Select(CGF, Ops[3], Res, MaskFalseVal); 10349 10350 return Res; 10351 } 10352 10353 static Value * 10354 EmitScalarFMAExpr(CodeGenFunction &CGF, MutableArrayRef<Value *> Ops, 10355 Value *Upper, bool ZeroMask = false, unsigned PTIdx = 0, 10356 bool NegAcc = false) { 10357 unsigned Rnd = 4; 10358 if (Ops.size() > 4) 10359 Rnd = cast<llvm::ConstantInt>(Ops[4])->getZExtValue(); 10360 10361 if (NegAcc) 10362 Ops[2] = CGF.Builder.CreateFNeg(Ops[2]); 10363 10364 Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], (uint64_t)0); 10365 Ops[1] = CGF.Builder.CreateExtractElement(Ops[1], (uint64_t)0); 10366 Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], (uint64_t)0); 10367 Value *Res; 10368 if (Rnd != 4) { 10369 Intrinsic::ID IID = Ops[0]->getType()->getPrimitiveSizeInBits() == 32 ? 10370 Intrinsic::x86_avx512_vfmadd_f32 : 10371 Intrinsic::x86_avx512_vfmadd_f64; 10372 Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID), 10373 {Ops[0], Ops[1], Ops[2], Ops[4]}); 10374 } else if (CGF.Builder.getIsFPConstrained()) { 10375 Function *FMA = CGF.CGM.getIntrinsic( 10376 Intrinsic::experimental_constrained_fma, Ops[0]->getType()); 10377 Res = CGF.Builder.CreateConstrainedFPCall(FMA, Ops.slice(0, 3)); 10378 } else { 10379 Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ops[0]->getType()); 10380 Res = CGF.Builder.CreateCall(FMA, Ops.slice(0, 3)); 10381 } 10382 // If we have more than 3 arguments, we need to do masking. 10383 if (Ops.size() > 3) { 10384 Value *PassThru = ZeroMask ? Constant::getNullValue(Res->getType()) 10385 : Ops[PTIdx]; 10386 10387 // If we negated the accumulator and the its the PassThru value we need to 10388 // bypass the negate. Conveniently Upper should be the same thing in this 10389 // case. 10390 if (NegAcc && PTIdx == 2) 10391 PassThru = CGF.Builder.CreateExtractElement(Upper, (uint64_t)0); 10392 10393 Res = EmitX86ScalarSelect(CGF, Ops[3], Res, PassThru); 10394 } 10395 return CGF.Builder.CreateInsertElement(Upper, Res, (uint64_t)0); 10396 } 10397 10398 static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned, 10399 ArrayRef<Value *> Ops) { 10400 llvm::Type *Ty = Ops[0]->getType(); 10401 // Arguments have a vXi32 type so cast to vXi64. 10402 Ty = llvm::VectorType::get(CGF.Int64Ty, 10403 Ty->getPrimitiveSizeInBits() / 64); 10404 Value *LHS = CGF.Builder.CreateBitCast(Ops[0], Ty); 10405 Value *RHS = CGF.Builder.CreateBitCast(Ops[1], Ty); 10406 10407 if (IsSigned) { 10408 // Shift left then arithmetic shift right. 10409 Constant *ShiftAmt = ConstantInt::get(Ty, 32); 10410 LHS = CGF.Builder.CreateShl(LHS, ShiftAmt); 10411 LHS = CGF.Builder.CreateAShr(LHS, ShiftAmt); 10412 RHS = CGF.Builder.CreateShl(RHS, ShiftAmt); 10413 RHS = CGF.Builder.CreateAShr(RHS, ShiftAmt); 10414 } else { 10415 // Clear the upper bits. 10416 Constant *Mask = ConstantInt::get(Ty, 0xffffffff); 10417 LHS = CGF.Builder.CreateAnd(LHS, Mask); 10418 RHS = CGF.Builder.CreateAnd(RHS, Mask); 10419 } 10420 10421 return CGF.Builder.CreateMul(LHS, RHS); 10422 } 10423 10424 // Emit a masked pternlog intrinsic. This only exists because the header has to 10425 // use a macro and we aren't able to pass the input argument to a pternlog 10426 // builtin and a select builtin without evaluating it twice. 10427 static Value *EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask, 10428 ArrayRef<Value *> Ops) { 10429 llvm::Type *Ty = Ops[0]->getType(); 10430 10431 unsigned VecWidth = Ty->getPrimitiveSizeInBits(); 10432 unsigned EltWidth = Ty->getScalarSizeInBits(); 10433 Intrinsic::ID IID; 10434 if (VecWidth == 128 && EltWidth == 32) 10435 IID = Intrinsic::x86_avx512_pternlog_d_128; 10436 else if (VecWidth == 256 && EltWidth == 32) 10437 IID = Intrinsic::x86_avx512_pternlog_d_256; 10438 else if (VecWidth == 512 && EltWidth == 32) 10439 IID = Intrinsic::x86_avx512_pternlog_d_512; 10440 else if (VecWidth == 128 && EltWidth == 64) 10441 IID = Intrinsic::x86_avx512_pternlog_q_128; 10442 else if (VecWidth == 256 && EltWidth == 64) 10443 IID = Intrinsic::x86_avx512_pternlog_q_256; 10444 else if (VecWidth == 512 && EltWidth == 64) 10445 IID = Intrinsic::x86_avx512_pternlog_q_512; 10446 else 10447 llvm_unreachable("Unexpected intrinsic"); 10448 10449 Value *Ternlog = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID), 10450 Ops.drop_back()); 10451 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) : Ops[0]; 10452 return EmitX86Select(CGF, Ops[4], Ternlog, PassThru); 10453 } 10454 10455 static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, 10456 llvm::Type *DstTy) { 10457 unsigned NumberOfElements = DstTy->getVectorNumElements(); 10458 Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements); 10459 return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2"); 10460 } 10461 10462 // Emit addition or subtraction with signed/unsigned saturation. 10463 static Value *EmitX86AddSubSatExpr(CodeGenFunction &CGF, 10464 ArrayRef<Value *> Ops, bool IsSigned, 10465 bool IsAddition) { 10466 Intrinsic::ID IID = 10467 IsSigned ? (IsAddition ? Intrinsic::sadd_sat : Intrinsic::ssub_sat) 10468 : (IsAddition ? Intrinsic::uadd_sat : Intrinsic::usub_sat); 10469 llvm::Function *F = CGF.CGM.getIntrinsic(IID, Ops[0]->getType()); 10470 return CGF.Builder.CreateCall(F, {Ops[0], Ops[1]}); 10471 } 10472 10473 Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) { 10474 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts(); 10475 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString(); 10476 return EmitX86CpuIs(CPUStr); 10477 } 10478 10479 // Convert F16 halfs to floats. 10480 static Value *EmitX86CvtF16ToFloatExpr(CodeGenFunction &CGF, 10481 ArrayRef<Value *> Ops, 10482 llvm::Type *DstTy) { 10483 assert((Ops.size() == 1 || Ops.size() == 3 || Ops.size() == 4) && 10484 "Unknown cvtph2ps intrinsic"); 10485 10486 // If the SAE intrinsic doesn't use default rounding then we can't upgrade. 10487 if (Ops.size() == 4 && cast<llvm::ConstantInt>(Ops[3])->getZExtValue() != 4) { 10488 Intrinsic::ID IID = Intrinsic::x86_avx512_mask_vcvtph2ps_512; 10489 Function *F = 10490 CGF.CGM.getIntrinsic(IID, {DstTy, Ops[0]->getType(), Ops[1]->getType(), 10491 Ops[2]->getType(), Ops[3]->getType()}); 10492 return CGF.Builder.CreateCall(F, {Ops[0], Ops[1], Ops[2], Ops[3]}); 10493 } 10494 10495 unsigned NumDstElts = DstTy->getVectorNumElements(); 10496 Value *Src = Ops[0]; 10497 10498 // Extract the subvector. 10499 if (NumDstElts != Src->getType()->getVectorNumElements()) { 10500 assert(NumDstElts == 4 && "Unexpected vector size"); 10501 uint32_t ShuffleMask[4] = {0, 1, 2, 3}; 10502 Src = CGF.Builder.CreateShuffleVector(Src, UndefValue::get(Src->getType()), 10503 ShuffleMask); 10504 } 10505 10506 // Bitcast from vXi16 to vXf16. 10507 llvm::Type *HalfTy = llvm::VectorType::get( 10508 llvm::Type::getHalfTy(CGF.getLLVMContext()), NumDstElts); 10509 Src = CGF.Builder.CreateBitCast(Src, HalfTy); 10510 10511 // Perform the fp-extension. 10512 Value *Res = CGF.Builder.CreateFPExt(Src, DstTy, "cvtph2ps"); 10513 10514 if (Ops.size() >= 3) 10515 Res = EmitX86Select(CGF, Ops[2], Res, Ops[1]); 10516 return Res; 10517 } 10518 10519 // Convert a BF16 to a float. 10520 static Value *EmitX86CvtBF16ToFloatExpr(CodeGenFunction &CGF, 10521 const CallExpr *E, 10522 ArrayRef<Value *> Ops) { 10523 llvm::Type *Int32Ty = CGF.Builder.getInt32Ty(); 10524 Value *ZeroExt = CGF.Builder.CreateZExt(Ops[0], Int32Ty); 10525 Value *Shl = CGF.Builder.CreateShl(ZeroExt, 16); 10526 llvm::Type *ResultType = CGF.ConvertType(E->getType()); 10527 Value *BitCast = CGF.Builder.CreateBitCast(Shl, ResultType); 10528 return BitCast; 10529 } 10530 10531 Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) { 10532 10533 llvm::Type *Int32Ty = Builder.getInt32Ty(); 10534 10535 // Matching the struct layout from the compiler-rt/libgcc structure that is 10536 // filled in: 10537 // unsigned int __cpu_vendor; 10538 // unsigned int __cpu_type; 10539 // unsigned int __cpu_subtype; 10540 // unsigned int __cpu_features[1]; 10541 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, 10542 llvm::ArrayType::get(Int32Ty, 1)); 10543 10544 // Grab the global __cpu_model. 10545 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model"); 10546 cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true); 10547 10548 // Calculate the index needed to access the correct field based on the 10549 // range. Also adjust the expected value. 10550 unsigned Index; 10551 unsigned Value; 10552 std::tie(Index, Value) = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr) 10553 #define X86_VENDOR(ENUM, STRING) \ 10554 .Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)}) 10555 #define X86_CPU_TYPE_COMPAT_WITH_ALIAS(ARCHNAME, ENUM, STR, ALIAS) \ 10556 .Cases(STR, ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)}) 10557 #define X86_CPU_TYPE_COMPAT(ARCHNAME, ENUM, STR) \ 10558 .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)}) 10559 #define X86_CPU_SUBTYPE_COMPAT(ARCHNAME, ENUM, STR) \ 10560 .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)}) 10561 #include "llvm/Support/X86TargetParser.def" 10562 .Default({0, 0}); 10563 assert(Value != 0 && "Invalid CPUStr passed to CpuIs"); 10564 10565 // Grab the appropriate field from __cpu_model. 10566 llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0), 10567 ConstantInt::get(Int32Ty, Index)}; 10568 llvm::Value *CpuValue = Builder.CreateGEP(STy, CpuModel, Idxs); 10569 CpuValue = Builder.CreateAlignedLoad(CpuValue, CharUnits::fromQuantity(4)); 10570 10571 // Check the value of the field against the requested value. 10572 return Builder.CreateICmpEQ(CpuValue, 10573 llvm::ConstantInt::get(Int32Ty, Value)); 10574 } 10575 10576 Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) { 10577 const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts(); 10578 StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString(); 10579 return EmitX86CpuSupports(FeatureStr); 10580 } 10581 10582 uint64_t 10583 CodeGenFunction::GetX86CpuSupportsMask(ArrayRef<StringRef> FeatureStrs) { 10584 // Processor features and mapping to processor feature value. 10585 uint64_t FeaturesMask = 0; 10586 for (const StringRef &FeatureStr : FeatureStrs) { 10587 unsigned Feature = 10588 StringSwitch<unsigned>(FeatureStr) 10589 #define X86_FEATURE_COMPAT(VAL, ENUM, STR) .Case(STR, VAL) 10590 #include "llvm/Support/X86TargetParser.def" 10591 ; 10592 FeaturesMask |= (1ULL << Feature); 10593 } 10594 return FeaturesMask; 10595 } 10596 10597 Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) { 10598 return EmitX86CpuSupports(GetX86CpuSupportsMask(FeatureStrs)); 10599 } 10600 10601 llvm::Value *CodeGenFunction::EmitX86CpuSupports(uint64_t FeaturesMask) { 10602 uint32_t Features1 = Lo_32(FeaturesMask); 10603 uint32_t Features2 = Hi_32(FeaturesMask); 10604 10605 Value *Result = Builder.getTrue(); 10606 10607 if (Features1 != 0) { 10608 // Matching the struct layout from the compiler-rt/libgcc structure that is 10609 // filled in: 10610 // unsigned int __cpu_vendor; 10611 // unsigned int __cpu_type; 10612 // unsigned int __cpu_subtype; 10613 // unsigned int __cpu_features[1]; 10614 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, 10615 llvm::ArrayType::get(Int32Ty, 1)); 10616 10617 // Grab the global __cpu_model. 10618 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model"); 10619 cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true); 10620 10621 // Grab the first (0th) element from the field __cpu_features off of the 10622 // global in the struct STy. 10623 Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(3), 10624 Builder.getInt32(0)}; 10625 Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs); 10626 Value *Features = 10627 Builder.CreateAlignedLoad(CpuFeatures, CharUnits::fromQuantity(4)); 10628 10629 // Check the value of the bit corresponding to the feature requested. 10630 Value *Mask = Builder.getInt32(Features1); 10631 Value *Bitset = Builder.CreateAnd(Features, Mask); 10632 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask); 10633 Result = Builder.CreateAnd(Result, Cmp); 10634 } 10635 10636 if (Features2 != 0) { 10637 llvm::Constant *CpuFeatures2 = CGM.CreateRuntimeVariable(Int32Ty, 10638 "__cpu_features2"); 10639 cast<llvm::GlobalValue>(CpuFeatures2)->setDSOLocal(true); 10640 10641 Value *Features = 10642 Builder.CreateAlignedLoad(CpuFeatures2, CharUnits::fromQuantity(4)); 10643 10644 // Check the value of the bit corresponding to the feature requested. 10645 Value *Mask = Builder.getInt32(Features2); 10646 Value *Bitset = Builder.CreateAnd(Features, Mask); 10647 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask); 10648 Result = Builder.CreateAnd(Result, Cmp); 10649 } 10650 10651 return Result; 10652 } 10653 10654 Value *CodeGenFunction::EmitX86CpuInit() { 10655 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, 10656 /*Variadic*/ false); 10657 llvm::FunctionCallee Func = 10658 CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init"); 10659 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true); 10660 cast<llvm::GlobalValue>(Func.getCallee()) 10661 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass); 10662 return Builder.CreateCall(Func); 10663 } 10664 10665 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, 10666 const CallExpr *E) { 10667 if (BuiltinID == X86::BI__builtin_cpu_is) 10668 return EmitX86CpuIs(E); 10669 if (BuiltinID == X86::BI__builtin_cpu_supports) 10670 return EmitX86CpuSupports(E); 10671 if (BuiltinID == X86::BI__builtin_cpu_init) 10672 return EmitX86CpuInit(); 10673 10674 SmallVector<Value*, 4> Ops; 10675 10676 // Find out if any arguments are required to be integer constant expressions. 10677 unsigned ICEArguments = 0; 10678 ASTContext::GetBuiltinTypeError Error; 10679 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 10680 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 10681 10682 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { 10683 // If this is a normal argument, just emit it as a scalar. 10684 if ((ICEArguments & (1 << i)) == 0) { 10685 Ops.push_back(EmitScalarExpr(E->getArg(i))); 10686 continue; 10687 } 10688 10689 // If this is required to be a constant, constant fold it so that we know 10690 // that the generated intrinsic gets a ConstantInt. 10691 llvm::APSInt Result; 10692 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); 10693 assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst; 10694 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); 10695 } 10696 10697 // These exist so that the builtin that takes an immediate can be bounds 10698 // checked by clang to avoid passing bad immediates to the backend. Since 10699 // AVX has a larger immediate than SSE we would need separate builtins to 10700 // do the different bounds checking. Rather than create a clang specific 10701 // SSE only builtin, this implements eight separate builtins to match gcc 10702 // implementation. 10703 auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) { 10704 Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm)); 10705 llvm::Function *F = CGM.getIntrinsic(ID); 10706 return Builder.CreateCall(F, Ops); 10707 }; 10708 10709 // For the vector forms of FP comparisons, translate the builtins directly to 10710 // IR. 10711 // TODO: The builtins could be removed if the SSE header files used vector 10712 // extension comparisons directly (vector ordered/unordered may need 10713 // additional support via __builtin_isnan()). 10714 auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred, 10715 bool IsSignaling) { 10716 Value *Cmp; 10717 if (IsSignaling) 10718 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]); 10719 else 10720 Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]); 10721 llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType()); 10722 llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy); 10723 Value *Sext = Builder.CreateSExt(Cmp, IntVecTy); 10724 return Builder.CreateBitCast(Sext, FPVecTy); 10725 }; 10726 10727 switch (BuiltinID) { 10728 default: return nullptr; 10729 case X86::BI_mm_prefetch: { 10730 Value *Address = Ops[0]; 10731 ConstantInt *C = cast<ConstantInt>(Ops[1]); 10732 Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1); 10733 Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3); 10734 Value *Data = ConstantInt::get(Int32Ty, 1); 10735 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType()); 10736 return Builder.CreateCall(F, {Address, RW, Locality, Data}); 10737 } 10738 case X86::BI_mm_clflush: { 10739 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush), 10740 Ops[0]); 10741 } 10742 case X86::BI_mm_lfence: { 10743 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence)); 10744 } 10745 case X86::BI_mm_mfence: { 10746 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence)); 10747 } 10748 case X86::BI_mm_sfence: { 10749 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence)); 10750 } 10751 case X86::BI_mm_pause: { 10752 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause)); 10753 } 10754 case X86::BI__rdtsc: { 10755 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc)); 10756 } 10757 case X86::BI__builtin_ia32_rdtscp: { 10758 Value *Call = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtscp)); 10759 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1), 10760 Ops[0]); 10761 return Builder.CreateExtractValue(Call, 0); 10762 } 10763 case X86::BI__builtin_ia32_lzcnt_u16: 10764 case X86::BI__builtin_ia32_lzcnt_u32: 10765 case X86::BI__builtin_ia32_lzcnt_u64: { 10766 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType()); 10767 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)}); 10768 } 10769 case X86::BI__builtin_ia32_tzcnt_u16: 10770 case X86::BI__builtin_ia32_tzcnt_u32: 10771 case X86::BI__builtin_ia32_tzcnt_u64: { 10772 Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType()); 10773 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)}); 10774 } 10775 case X86::BI__builtin_ia32_undef128: 10776 case X86::BI__builtin_ia32_undef256: 10777 case X86::BI__builtin_ia32_undef512: 10778 // The x86 definition of "undef" is not the same as the LLVM definition 10779 // (PR32176). We leave optimizing away an unnecessary zero constant to the 10780 // IR optimizer and backend. 10781 // TODO: If we had a "freeze" IR instruction to generate a fixed undef 10782 // value, we should use that here instead of a zero. 10783 return llvm::Constant::getNullValue(ConvertType(E->getType())); 10784 case X86::BI__builtin_ia32_vec_init_v8qi: 10785 case X86::BI__builtin_ia32_vec_init_v4hi: 10786 case X86::BI__builtin_ia32_vec_init_v2si: 10787 return Builder.CreateBitCast(BuildVector(Ops), 10788 llvm::Type::getX86_MMXTy(getLLVMContext())); 10789 case X86::BI__builtin_ia32_vec_ext_v2si: 10790 case X86::BI__builtin_ia32_vec_ext_v16qi: 10791 case X86::BI__builtin_ia32_vec_ext_v8hi: 10792 case X86::BI__builtin_ia32_vec_ext_v4si: 10793 case X86::BI__builtin_ia32_vec_ext_v4sf: 10794 case X86::BI__builtin_ia32_vec_ext_v2di: 10795 case X86::BI__builtin_ia32_vec_ext_v32qi: 10796 case X86::BI__builtin_ia32_vec_ext_v16hi: 10797 case X86::BI__builtin_ia32_vec_ext_v8si: 10798 case X86::BI__builtin_ia32_vec_ext_v4di: { 10799 unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); 10800 uint64_t Index = cast<ConstantInt>(Ops[1])->getZExtValue(); 10801 Index &= NumElts - 1; 10802 // These builtins exist so we can ensure the index is an ICE and in range. 10803 // Otherwise we could just do this in the header file. 10804 return Builder.CreateExtractElement(Ops[0], Index); 10805 } 10806 case X86::BI__builtin_ia32_vec_set_v16qi: 10807 case X86::BI__builtin_ia32_vec_set_v8hi: 10808 case X86::BI__builtin_ia32_vec_set_v4si: 10809 case X86::BI__builtin_ia32_vec_set_v2di: 10810 case X86::BI__builtin_ia32_vec_set_v32qi: 10811 case X86::BI__builtin_ia32_vec_set_v16hi: 10812 case X86::BI__builtin_ia32_vec_set_v8si: 10813 case X86::BI__builtin_ia32_vec_set_v4di: { 10814 unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); 10815 unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue(); 10816 Index &= NumElts - 1; 10817 // These builtins exist so we can ensure the index is an ICE and in range. 10818 // Otherwise we could just do this in the header file. 10819 return Builder.CreateInsertElement(Ops[0], Ops[1], Index); 10820 } 10821 case X86::BI_mm_setcsr: 10822 case X86::BI__builtin_ia32_ldmxcsr: { 10823 Address Tmp = CreateMemTemp(E->getArg(0)->getType()); 10824 Builder.CreateStore(Ops[0], Tmp); 10825 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr), 10826 Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy)); 10827 } 10828 case X86::BI_mm_getcsr: 10829 case X86::BI__builtin_ia32_stmxcsr: { 10830 Address Tmp = CreateMemTemp(E->getType()); 10831 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr), 10832 Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy)); 10833 return Builder.CreateLoad(Tmp, "stmxcsr"); 10834 } 10835 case X86::BI__builtin_ia32_xsave: 10836 case X86::BI__builtin_ia32_xsave64: 10837 case X86::BI__builtin_ia32_xrstor: 10838 case X86::BI__builtin_ia32_xrstor64: 10839 case X86::BI__builtin_ia32_xsaveopt: 10840 case X86::BI__builtin_ia32_xsaveopt64: 10841 case X86::BI__builtin_ia32_xrstors: 10842 case X86::BI__builtin_ia32_xrstors64: 10843 case X86::BI__builtin_ia32_xsavec: 10844 case X86::BI__builtin_ia32_xsavec64: 10845 case X86::BI__builtin_ia32_xsaves: 10846 case X86::BI__builtin_ia32_xsaves64: 10847 case X86::BI__builtin_ia32_xsetbv: 10848 case X86::BI_xsetbv: { 10849 Intrinsic::ID ID; 10850 #define INTRINSIC_X86_XSAVE_ID(NAME) \ 10851 case X86::BI__builtin_ia32_##NAME: \ 10852 ID = Intrinsic::x86_##NAME; \ 10853 break 10854 switch (BuiltinID) { 10855 default: llvm_unreachable("Unsupported intrinsic!"); 10856 INTRINSIC_X86_XSAVE_ID(xsave); 10857 INTRINSIC_X86_XSAVE_ID(xsave64); 10858 INTRINSIC_X86_XSAVE_ID(xrstor); 10859 INTRINSIC_X86_XSAVE_ID(xrstor64); 10860 INTRINSIC_X86_XSAVE_ID(xsaveopt); 10861 INTRINSIC_X86_XSAVE_ID(xsaveopt64); 10862 INTRINSIC_X86_XSAVE_ID(xrstors); 10863 INTRINSIC_X86_XSAVE_ID(xrstors64); 10864 INTRINSIC_X86_XSAVE_ID(xsavec); 10865 INTRINSIC_X86_XSAVE_ID(xsavec64); 10866 INTRINSIC_X86_XSAVE_ID(xsaves); 10867 INTRINSIC_X86_XSAVE_ID(xsaves64); 10868 INTRINSIC_X86_XSAVE_ID(xsetbv); 10869 case X86::BI_xsetbv: 10870 ID = Intrinsic::x86_xsetbv; 10871 break; 10872 } 10873 #undef INTRINSIC_X86_XSAVE_ID 10874 Value *Mhi = Builder.CreateTrunc( 10875 Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty); 10876 Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty); 10877 Ops[1] = Mhi; 10878 Ops.push_back(Mlo); 10879 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); 10880 } 10881 case X86::BI__builtin_ia32_xgetbv: 10882 case X86::BI_xgetbv: 10883 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_xgetbv), Ops); 10884 case X86::BI__builtin_ia32_storedqudi128_mask: 10885 case X86::BI__builtin_ia32_storedqusi128_mask: 10886 case X86::BI__builtin_ia32_storedquhi128_mask: 10887 case X86::BI__builtin_ia32_storedquqi128_mask: 10888 case X86::BI__builtin_ia32_storeupd128_mask: 10889 case X86::BI__builtin_ia32_storeups128_mask: 10890 case X86::BI__builtin_ia32_storedqudi256_mask: 10891 case X86::BI__builtin_ia32_storedqusi256_mask: 10892 case X86::BI__builtin_ia32_storedquhi256_mask: 10893 case X86::BI__builtin_ia32_storedquqi256_mask: 10894 case X86::BI__builtin_ia32_storeupd256_mask: 10895 case X86::BI__builtin_ia32_storeups256_mask: 10896 case X86::BI__builtin_ia32_storedqudi512_mask: 10897 case X86::BI__builtin_ia32_storedqusi512_mask: 10898 case X86::BI__builtin_ia32_storedquhi512_mask: 10899 case X86::BI__builtin_ia32_storedquqi512_mask: 10900 case X86::BI__builtin_ia32_storeupd512_mask: 10901 case X86::BI__builtin_ia32_storeups512_mask: 10902 return EmitX86MaskedStore(*this, Ops, Align(1)); 10903 10904 case X86::BI__builtin_ia32_storess128_mask: 10905 case X86::BI__builtin_ia32_storesd128_mask: 10906 return EmitX86MaskedStore(*this, Ops, Align(1)); 10907 10908 case X86::BI__builtin_ia32_vpopcntb_128: 10909 case X86::BI__builtin_ia32_vpopcntd_128: 10910 case X86::BI__builtin_ia32_vpopcntq_128: 10911 case X86::BI__builtin_ia32_vpopcntw_128: 10912 case X86::BI__builtin_ia32_vpopcntb_256: 10913 case X86::BI__builtin_ia32_vpopcntd_256: 10914 case X86::BI__builtin_ia32_vpopcntq_256: 10915 case X86::BI__builtin_ia32_vpopcntw_256: 10916 case X86::BI__builtin_ia32_vpopcntb_512: 10917 case X86::BI__builtin_ia32_vpopcntd_512: 10918 case X86::BI__builtin_ia32_vpopcntq_512: 10919 case X86::BI__builtin_ia32_vpopcntw_512: { 10920 llvm::Type *ResultType = ConvertType(E->getType()); 10921 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); 10922 return Builder.CreateCall(F, Ops); 10923 } 10924 case X86::BI__builtin_ia32_cvtmask2b128: 10925 case X86::BI__builtin_ia32_cvtmask2b256: 10926 case X86::BI__builtin_ia32_cvtmask2b512: 10927 case X86::BI__builtin_ia32_cvtmask2w128: 10928 case X86::BI__builtin_ia32_cvtmask2w256: 10929 case X86::BI__builtin_ia32_cvtmask2w512: 10930 case X86::BI__builtin_ia32_cvtmask2d128: 10931 case X86::BI__builtin_ia32_cvtmask2d256: 10932 case X86::BI__builtin_ia32_cvtmask2d512: 10933 case X86::BI__builtin_ia32_cvtmask2q128: 10934 case X86::BI__builtin_ia32_cvtmask2q256: 10935 case X86::BI__builtin_ia32_cvtmask2q512: 10936 return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType())); 10937 10938 case X86::BI__builtin_ia32_cvtb2mask128: 10939 case X86::BI__builtin_ia32_cvtb2mask256: 10940 case X86::BI__builtin_ia32_cvtb2mask512: 10941 case X86::BI__builtin_ia32_cvtw2mask128: 10942 case X86::BI__builtin_ia32_cvtw2mask256: 10943 case X86::BI__builtin_ia32_cvtw2mask512: 10944 case X86::BI__builtin_ia32_cvtd2mask128: 10945 case X86::BI__builtin_ia32_cvtd2mask256: 10946 case X86::BI__builtin_ia32_cvtd2mask512: 10947 case X86::BI__builtin_ia32_cvtq2mask128: 10948 case X86::BI__builtin_ia32_cvtq2mask256: 10949 case X86::BI__builtin_ia32_cvtq2mask512: 10950 return EmitX86ConvertToMask(*this, Ops[0]); 10951 10952 case X86::BI__builtin_ia32_cvtdq2ps512_mask: 10953 case X86::BI__builtin_ia32_cvtqq2ps512_mask: 10954 case X86::BI__builtin_ia32_cvtqq2pd512_mask: 10955 return EmitX86ConvertIntToFp(*this, Ops, /*IsSigned*/true); 10956 case X86::BI__builtin_ia32_cvtudq2ps512_mask: 10957 case X86::BI__builtin_ia32_cvtuqq2ps512_mask: 10958 case X86::BI__builtin_ia32_cvtuqq2pd512_mask: 10959 return EmitX86ConvertIntToFp(*this, Ops, /*IsSigned*/false); 10960 10961 case X86::BI__builtin_ia32_vfmaddss3: 10962 case X86::BI__builtin_ia32_vfmaddsd3: 10963 case X86::BI__builtin_ia32_vfmaddss3_mask: 10964 case X86::BI__builtin_ia32_vfmaddsd3_mask: 10965 return EmitScalarFMAExpr(*this, Ops, Ops[0]); 10966 case X86::BI__builtin_ia32_vfmaddss: 10967 case X86::BI__builtin_ia32_vfmaddsd: 10968 return EmitScalarFMAExpr(*this, Ops, 10969 Constant::getNullValue(Ops[0]->getType())); 10970 case X86::BI__builtin_ia32_vfmaddss3_maskz: 10971 case X86::BI__builtin_ia32_vfmaddsd3_maskz: 10972 return EmitScalarFMAExpr(*this, Ops, Ops[0], /*ZeroMask*/true); 10973 case X86::BI__builtin_ia32_vfmaddss3_mask3: 10974 case X86::BI__builtin_ia32_vfmaddsd3_mask3: 10975 return EmitScalarFMAExpr(*this, Ops, Ops[2], /*ZeroMask*/false, 2); 10976 case X86::BI__builtin_ia32_vfmsubss3_mask3: 10977 case X86::BI__builtin_ia32_vfmsubsd3_mask3: 10978 return EmitScalarFMAExpr(*this, Ops, Ops[2], /*ZeroMask*/false, 2, 10979 /*NegAcc*/true); 10980 case X86::BI__builtin_ia32_vfmaddps: 10981 case X86::BI__builtin_ia32_vfmaddpd: 10982 case X86::BI__builtin_ia32_vfmaddps256: 10983 case X86::BI__builtin_ia32_vfmaddpd256: 10984 case X86::BI__builtin_ia32_vfmaddps512_mask: 10985 case X86::BI__builtin_ia32_vfmaddps512_maskz: 10986 case X86::BI__builtin_ia32_vfmaddps512_mask3: 10987 case X86::BI__builtin_ia32_vfmsubps512_mask3: 10988 case X86::BI__builtin_ia32_vfmaddpd512_mask: 10989 case X86::BI__builtin_ia32_vfmaddpd512_maskz: 10990 case X86::BI__builtin_ia32_vfmaddpd512_mask3: 10991 case X86::BI__builtin_ia32_vfmsubpd512_mask3: 10992 return EmitX86FMAExpr(*this, Ops, BuiltinID, /*IsAddSub*/false); 10993 case X86::BI__builtin_ia32_vfmaddsubps512_mask: 10994 case X86::BI__builtin_ia32_vfmaddsubps512_maskz: 10995 case X86::BI__builtin_ia32_vfmaddsubps512_mask3: 10996 case X86::BI__builtin_ia32_vfmsubaddps512_mask3: 10997 case X86::BI__builtin_ia32_vfmaddsubpd512_mask: 10998 case X86::BI__builtin_ia32_vfmaddsubpd512_maskz: 10999 case X86::BI__builtin_ia32_vfmaddsubpd512_mask3: 11000 case X86::BI__builtin_ia32_vfmsubaddpd512_mask3: 11001 return EmitX86FMAExpr(*this, Ops, BuiltinID, /*IsAddSub*/true); 11002 11003 case X86::BI__builtin_ia32_movdqa32store128_mask: 11004 case X86::BI__builtin_ia32_movdqa64store128_mask: 11005 case X86::BI__builtin_ia32_storeaps128_mask: 11006 case X86::BI__builtin_ia32_storeapd128_mask: 11007 case X86::BI__builtin_ia32_movdqa32store256_mask: 11008 case X86::BI__builtin_ia32_movdqa64store256_mask: 11009 case X86::BI__builtin_ia32_storeaps256_mask: 11010 case X86::BI__builtin_ia32_storeapd256_mask: 11011 case X86::BI__builtin_ia32_movdqa32store512_mask: 11012 case X86::BI__builtin_ia32_movdqa64store512_mask: 11013 case X86::BI__builtin_ia32_storeaps512_mask: 11014 case X86::BI__builtin_ia32_storeapd512_mask: 11015 return EmitX86MaskedStore( 11016 *this, Ops, 11017 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign()); 11018 11019 case X86::BI__builtin_ia32_loadups128_mask: 11020 case X86::BI__builtin_ia32_loadups256_mask: 11021 case X86::BI__builtin_ia32_loadups512_mask: 11022 case X86::BI__builtin_ia32_loadupd128_mask: 11023 case X86::BI__builtin_ia32_loadupd256_mask: 11024 case X86::BI__builtin_ia32_loadupd512_mask: 11025 case X86::BI__builtin_ia32_loaddquqi128_mask: 11026 case X86::BI__builtin_ia32_loaddquqi256_mask: 11027 case X86::BI__builtin_ia32_loaddquqi512_mask: 11028 case X86::BI__builtin_ia32_loaddquhi128_mask: 11029 case X86::BI__builtin_ia32_loaddquhi256_mask: 11030 case X86::BI__builtin_ia32_loaddquhi512_mask: 11031 case X86::BI__builtin_ia32_loaddqusi128_mask: 11032 case X86::BI__builtin_ia32_loaddqusi256_mask: 11033 case X86::BI__builtin_ia32_loaddqusi512_mask: 11034 case X86::BI__builtin_ia32_loaddqudi128_mask: 11035 case X86::BI__builtin_ia32_loaddqudi256_mask: 11036 case X86::BI__builtin_ia32_loaddqudi512_mask: 11037 return EmitX86MaskedLoad(*this, Ops, Align(1)); 11038 11039 case X86::BI__builtin_ia32_loadss128_mask: 11040 case X86::BI__builtin_ia32_loadsd128_mask: 11041 return EmitX86MaskedLoad(*this, Ops, Align(1)); 11042 11043 case X86::BI__builtin_ia32_loadaps128_mask: 11044 case X86::BI__builtin_ia32_loadaps256_mask: 11045 case X86::BI__builtin_ia32_loadaps512_mask: 11046 case X86::BI__builtin_ia32_loadapd128_mask: 11047 case X86::BI__builtin_ia32_loadapd256_mask: 11048 case X86::BI__builtin_ia32_loadapd512_mask: 11049 case X86::BI__builtin_ia32_movdqa32load128_mask: 11050 case X86::BI__builtin_ia32_movdqa32load256_mask: 11051 case X86::BI__builtin_ia32_movdqa32load512_mask: 11052 case X86::BI__builtin_ia32_movdqa64load128_mask: 11053 case X86::BI__builtin_ia32_movdqa64load256_mask: 11054 case X86::BI__builtin_ia32_movdqa64load512_mask: 11055 return EmitX86MaskedLoad( 11056 *this, Ops, 11057 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign()); 11058 11059 case X86::BI__builtin_ia32_expandloaddf128_mask: 11060 case X86::BI__builtin_ia32_expandloaddf256_mask: 11061 case X86::BI__builtin_ia32_expandloaddf512_mask: 11062 case X86::BI__builtin_ia32_expandloadsf128_mask: 11063 case X86::BI__builtin_ia32_expandloadsf256_mask: 11064 case X86::BI__builtin_ia32_expandloadsf512_mask: 11065 case X86::BI__builtin_ia32_expandloaddi128_mask: 11066 case X86::BI__builtin_ia32_expandloaddi256_mask: 11067 case X86::BI__builtin_ia32_expandloaddi512_mask: 11068 case X86::BI__builtin_ia32_expandloadsi128_mask: 11069 case X86::BI__builtin_ia32_expandloadsi256_mask: 11070 case X86::BI__builtin_ia32_expandloadsi512_mask: 11071 case X86::BI__builtin_ia32_expandloadhi128_mask: 11072 case X86::BI__builtin_ia32_expandloadhi256_mask: 11073 case X86::BI__builtin_ia32_expandloadhi512_mask: 11074 case X86::BI__builtin_ia32_expandloadqi128_mask: 11075 case X86::BI__builtin_ia32_expandloadqi256_mask: 11076 case X86::BI__builtin_ia32_expandloadqi512_mask: 11077 return EmitX86ExpandLoad(*this, Ops); 11078 11079 case X86::BI__builtin_ia32_compressstoredf128_mask: 11080 case X86::BI__builtin_ia32_compressstoredf256_mask: 11081 case X86::BI__builtin_ia32_compressstoredf512_mask: 11082 case X86::BI__builtin_ia32_compressstoresf128_mask: 11083 case X86::BI__builtin_ia32_compressstoresf256_mask: 11084 case X86::BI__builtin_ia32_compressstoresf512_mask: 11085 case X86::BI__builtin_ia32_compressstoredi128_mask: 11086 case X86::BI__builtin_ia32_compressstoredi256_mask: 11087 case X86::BI__builtin_ia32_compressstoredi512_mask: 11088 case X86::BI__builtin_ia32_compressstoresi128_mask: 11089 case X86::BI__builtin_ia32_compressstoresi256_mask: 11090 case X86::BI__builtin_ia32_compressstoresi512_mask: 11091 case X86::BI__builtin_ia32_compressstorehi128_mask: 11092 case X86::BI__builtin_ia32_compressstorehi256_mask: 11093 case X86::BI__builtin_ia32_compressstorehi512_mask: 11094 case X86::BI__builtin_ia32_compressstoreqi128_mask: 11095 case X86::BI__builtin_ia32_compressstoreqi256_mask: 11096 case X86::BI__builtin_ia32_compressstoreqi512_mask: 11097 return EmitX86CompressStore(*this, Ops); 11098 11099 case X86::BI__builtin_ia32_expanddf128_mask: 11100 case X86::BI__builtin_ia32_expanddf256_mask: 11101 case X86::BI__builtin_ia32_expanddf512_mask: 11102 case X86::BI__builtin_ia32_expandsf128_mask: 11103 case X86::BI__builtin_ia32_expandsf256_mask: 11104 case X86::BI__builtin_ia32_expandsf512_mask: 11105 case X86::BI__builtin_ia32_expanddi128_mask: 11106 case X86::BI__builtin_ia32_expanddi256_mask: 11107 case X86::BI__builtin_ia32_expanddi512_mask: 11108 case X86::BI__builtin_ia32_expandsi128_mask: 11109 case X86::BI__builtin_ia32_expandsi256_mask: 11110 case X86::BI__builtin_ia32_expandsi512_mask: 11111 case X86::BI__builtin_ia32_expandhi128_mask: 11112 case X86::BI__builtin_ia32_expandhi256_mask: 11113 case X86::BI__builtin_ia32_expandhi512_mask: 11114 case X86::BI__builtin_ia32_expandqi128_mask: 11115 case X86::BI__builtin_ia32_expandqi256_mask: 11116 case X86::BI__builtin_ia32_expandqi512_mask: 11117 return EmitX86CompressExpand(*this, Ops, /*IsCompress*/false); 11118 11119 case X86::BI__builtin_ia32_compressdf128_mask: 11120 case X86::BI__builtin_ia32_compressdf256_mask: 11121 case X86::BI__builtin_ia32_compressdf512_mask: 11122 case X86::BI__builtin_ia32_compresssf128_mask: 11123 case X86::BI__builtin_ia32_compresssf256_mask: 11124 case X86::BI__builtin_ia32_compresssf512_mask: 11125 case X86::BI__builtin_ia32_compressdi128_mask: 11126 case X86::BI__builtin_ia32_compressdi256_mask: 11127 case X86::BI__builtin_ia32_compressdi512_mask: 11128 case X86::BI__builtin_ia32_compresssi128_mask: 11129 case X86::BI__builtin_ia32_compresssi256_mask: 11130 case X86::BI__builtin_ia32_compresssi512_mask: 11131 case X86::BI__builtin_ia32_compresshi128_mask: 11132 case X86::BI__builtin_ia32_compresshi256_mask: 11133 case X86::BI__builtin_ia32_compresshi512_mask: 11134 case X86::BI__builtin_ia32_compressqi128_mask: 11135 case X86::BI__builtin_ia32_compressqi256_mask: 11136 case X86::BI__builtin_ia32_compressqi512_mask: 11137 return EmitX86CompressExpand(*this, Ops, /*IsCompress*/true); 11138 11139 case X86::BI__builtin_ia32_gather3div2df: 11140 case X86::BI__builtin_ia32_gather3div2di: 11141 case X86::BI__builtin_ia32_gather3div4df: 11142 case X86::BI__builtin_ia32_gather3div4di: 11143 case X86::BI__builtin_ia32_gather3div4sf: 11144 case X86::BI__builtin_ia32_gather3div4si: 11145 case X86::BI__builtin_ia32_gather3div8sf: 11146 case X86::BI__builtin_ia32_gather3div8si: 11147 case X86::BI__builtin_ia32_gather3siv2df: 11148 case X86::BI__builtin_ia32_gather3siv2di: 11149 case X86::BI__builtin_ia32_gather3siv4df: 11150 case X86::BI__builtin_ia32_gather3siv4di: 11151 case X86::BI__builtin_ia32_gather3siv4sf: 11152 case X86::BI__builtin_ia32_gather3siv4si: 11153 case X86::BI__builtin_ia32_gather3siv8sf: 11154 case X86::BI__builtin_ia32_gather3siv8si: 11155 case X86::BI__builtin_ia32_gathersiv8df: 11156 case X86::BI__builtin_ia32_gathersiv16sf: 11157 case X86::BI__builtin_ia32_gatherdiv8df: 11158 case X86::BI__builtin_ia32_gatherdiv16sf: 11159 case X86::BI__builtin_ia32_gathersiv8di: 11160 case X86::BI__builtin_ia32_gathersiv16si: 11161 case X86::BI__builtin_ia32_gatherdiv8di: 11162 case X86::BI__builtin_ia32_gatherdiv16si: { 11163 Intrinsic::ID IID; 11164 switch (BuiltinID) { 11165 default: llvm_unreachable("Unexpected builtin"); 11166 case X86::BI__builtin_ia32_gather3div2df: 11167 IID = Intrinsic::x86_avx512_mask_gather3div2_df; 11168 break; 11169 case X86::BI__builtin_ia32_gather3div2di: 11170 IID = Intrinsic::x86_avx512_mask_gather3div2_di; 11171 break; 11172 case X86::BI__builtin_ia32_gather3div4df: 11173 IID = Intrinsic::x86_avx512_mask_gather3div4_df; 11174 break; 11175 case X86::BI__builtin_ia32_gather3div4di: 11176 IID = Intrinsic::x86_avx512_mask_gather3div4_di; 11177 break; 11178 case X86::BI__builtin_ia32_gather3div4sf: 11179 IID = Intrinsic::x86_avx512_mask_gather3div4_sf; 11180 break; 11181 case X86::BI__builtin_ia32_gather3div4si: 11182 IID = Intrinsic::x86_avx512_mask_gather3div4_si; 11183 break; 11184 case X86::BI__builtin_ia32_gather3div8sf: 11185 IID = Intrinsic::x86_avx512_mask_gather3div8_sf; 11186 break; 11187 case X86::BI__builtin_ia32_gather3div8si: 11188 IID = Intrinsic::x86_avx512_mask_gather3div8_si; 11189 break; 11190 case X86::BI__builtin_ia32_gather3siv2df: 11191 IID = Intrinsic::x86_avx512_mask_gather3siv2_df; 11192 break; 11193 case X86::BI__builtin_ia32_gather3siv2di: 11194 IID = Intrinsic::x86_avx512_mask_gather3siv2_di; 11195 break; 11196 case X86::BI__builtin_ia32_gather3siv4df: 11197 IID = Intrinsic::x86_avx512_mask_gather3siv4_df; 11198 break; 11199 case X86::BI__builtin_ia32_gather3siv4di: 11200 IID = Intrinsic::x86_avx512_mask_gather3siv4_di; 11201 break; 11202 case X86::BI__builtin_ia32_gather3siv4sf: 11203 IID = Intrinsic::x86_avx512_mask_gather3siv4_sf; 11204 break; 11205 case X86::BI__builtin_ia32_gather3siv4si: 11206 IID = Intrinsic::x86_avx512_mask_gather3siv4_si; 11207 break; 11208 case X86::BI__builtin_ia32_gather3siv8sf: 11209 IID = Intrinsic::x86_avx512_mask_gather3siv8_sf; 11210 break; 11211 case X86::BI__builtin_ia32_gather3siv8si: 11212 IID = Intrinsic::x86_avx512_mask_gather3siv8_si; 11213 break; 11214 case X86::BI__builtin_ia32_gathersiv8df: 11215 IID = Intrinsic::x86_avx512_mask_gather_dpd_512; 11216 break; 11217 case X86::BI__builtin_ia32_gathersiv16sf: 11218 IID = Intrinsic::x86_avx512_mask_gather_dps_512; 11219 break; 11220 case X86::BI__builtin_ia32_gatherdiv8df: 11221 IID = Intrinsic::x86_avx512_mask_gather_qpd_512; 11222 break; 11223 case X86::BI__builtin_ia32_gatherdiv16sf: 11224 IID = Intrinsic::x86_avx512_mask_gather_qps_512; 11225 break; 11226 case X86::BI__builtin_ia32_gathersiv8di: 11227 IID = Intrinsic::x86_avx512_mask_gather_dpq_512; 11228 break; 11229 case X86::BI__builtin_ia32_gathersiv16si: 11230 IID = Intrinsic::x86_avx512_mask_gather_dpi_512; 11231 break; 11232 case X86::BI__builtin_ia32_gatherdiv8di: 11233 IID = Intrinsic::x86_avx512_mask_gather_qpq_512; 11234 break; 11235 case X86::BI__builtin_ia32_gatherdiv16si: 11236 IID = Intrinsic::x86_avx512_mask_gather_qpi_512; 11237 break; 11238 } 11239 11240 unsigned MinElts = std::min(Ops[0]->getType()->getVectorNumElements(), 11241 Ops[2]->getType()->getVectorNumElements()); 11242 Ops[3] = getMaskVecValue(*this, Ops[3], MinElts); 11243 Function *Intr = CGM.getIntrinsic(IID); 11244 return Builder.CreateCall(Intr, Ops); 11245 } 11246 11247 case X86::BI__builtin_ia32_scattersiv8df: 11248 case X86::BI__builtin_ia32_scattersiv16sf: 11249 case X86::BI__builtin_ia32_scatterdiv8df: 11250 case X86::BI__builtin_ia32_scatterdiv16sf: 11251 case X86::BI__builtin_ia32_scattersiv8di: 11252 case X86::BI__builtin_ia32_scattersiv16si: 11253 case X86::BI__builtin_ia32_scatterdiv8di: 11254 case X86::BI__builtin_ia32_scatterdiv16si: 11255 case X86::BI__builtin_ia32_scatterdiv2df: 11256 case X86::BI__builtin_ia32_scatterdiv2di: 11257 case X86::BI__builtin_ia32_scatterdiv4df: 11258 case X86::BI__builtin_ia32_scatterdiv4di: 11259 case X86::BI__builtin_ia32_scatterdiv4sf: 11260 case X86::BI__builtin_ia32_scatterdiv4si: 11261 case X86::BI__builtin_ia32_scatterdiv8sf: 11262 case X86::BI__builtin_ia32_scatterdiv8si: 11263 case X86::BI__builtin_ia32_scattersiv2df: 11264 case X86::BI__builtin_ia32_scattersiv2di: 11265 case X86::BI__builtin_ia32_scattersiv4df: 11266 case X86::BI__builtin_ia32_scattersiv4di: 11267 case X86::BI__builtin_ia32_scattersiv4sf: 11268 case X86::BI__builtin_ia32_scattersiv4si: 11269 case X86::BI__builtin_ia32_scattersiv8sf: 11270 case X86::BI__builtin_ia32_scattersiv8si: { 11271 Intrinsic::ID IID; 11272 switch (BuiltinID) { 11273 default: llvm_unreachable("Unexpected builtin"); 11274 case X86::BI__builtin_ia32_scattersiv8df: 11275 IID = Intrinsic::x86_avx512_mask_scatter_dpd_512; 11276 break; 11277 case X86::BI__builtin_ia32_scattersiv16sf: 11278 IID = Intrinsic::x86_avx512_mask_scatter_dps_512; 11279 break; 11280 case X86::BI__builtin_ia32_scatterdiv8df: 11281 IID = Intrinsic::x86_avx512_mask_scatter_qpd_512; 11282 break; 11283 case X86::BI__builtin_ia32_scatterdiv16sf: 11284 IID = Intrinsic::x86_avx512_mask_scatter_qps_512; 11285 break; 11286 case X86::BI__builtin_ia32_scattersiv8di: 11287 IID = Intrinsic::x86_avx512_mask_scatter_dpq_512; 11288 break; 11289 case X86::BI__builtin_ia32_scattersiv16si: 11290 IID = Intrinsic::x86_avx512_mask_scatter_dpi_512; 11291 break; 11292 case X86::BI__builtin_ia32_scatterdiv8di: 11293 IID = Intrinsic::x86_avx512_mask_scatter_qpq_512; 11294 break; 11295 case X86::BI__builtin_ia32_scatterdiv16si: 11296 IID = Intrinsic::x86_avx512_mask_scatter_qpi_512; 11297 break; 11298 case X86::BI__builtin_ia32_scatterdiv2df: 11299 IID = Intrinsic::x86_avx512_mask_scatterdiv2_df; 11300 break; 11301 case X86::BI__builtin_ia32_scatterdiv2di: 11302 IID = Intrinsic::x86_avx512_mask_scatterdiv2_di; 11303 break; 11304 case X86::BI__builtin_ia32_scatterdiv4df: 11305 IID = Intrinsic::x86_avx512_mask_scatterdiv4_df; 11306 break; 11307 case X86::BI__builtin_ia32_scatterdiv4di: 11308 IID = Intrinsic::x86_avx512_mask_scatterdiv4_di; 11309 break; 11310 case X86::BI__builtin_ia32_scatterdiv4sf: 11311 IID = Intrinsic::x86_avx512_mask_scatterdiv4_sf; 11312 break; 11313 case X86::BI__builtin_ia32_scatterdiv4si: 11314 IID = Intrinsic::x86_avx512_mask_scatterdiv4_si; 11315 break; 11316 case X86::BI__builtin_ia32_scatterdiv8sf: 11317 IID = Intrinsic::x86_avx512_mask_scatterdiv8_sf; 11318 break; 11319 case X86::BI__builtin_ia32_scatterdiv8si: 11320 IID = Intrinsic::x86_avx512_mask_scatterdiv8_si; 11321 break; 11322 case X86::BI__builtin_ia32_scattersiv2df: 11323 IID = Intrinsic::x86_avx512_mask_scattersiv2_df; 11324 break; 11325 case X86::BI__builtin_ia32_scattersiv2di: 11326 IID = Intrinsic::x86_avx512_mask_scattersiv2_di; 11327 break; 11328 case X86::BI__builtin_ia32_scattersiv4df: 11329 IID = Intrinsic::x86_avx512_mask_scattersiv4_df; 11330 break; 11331 case X86::BI__builtin_ia32_scattersiv4di: 11332 IID = Intrinsic::x86_avx512_mask_scattersiv4_di; 11333 break; 11334 case X86::BI__builtin_ia32_scattersiv4sf: 11335 IID = Intrinsic::x86_avx512_mask_scattersiv4_sf; 11336 break; 11337 case X86::BI__builtin_ia32_scattersiv4si: 11338 IID = Intrinsic::x86_avx512_mask_scattersiv4_si; 11339 break; 11340 case X86::BI__builtin_ia32_scattersiv8sf: 11341 IID = Intrinsic::x86_avx512_mask_scattersiv8_sf; 11342 break; 11343 case X86::BI__builtin_ia32_scattersiv8si: 11344 IID = Intrinsic::x86_avx512_mask_scattersiv8_si; 11345 break; 11346 } 11347 11348 unsigned MinElts = std::min(Ops[2]->getType()->getVectorNumElements(), 11349 Ops[3]->getType()->getVectorNumElements()); 11350 Ops[1] = getMaskVecValue(*this, Ops[1], MinElts); 11351 Function *Intr = CGM.getIntrinsic(IID); 11352 return Builder.CreateCall(Intr, Ops); 11353 } 11354 11355 case X86::BI__builtin_ia32_vextractf128_pd256: 11356 case X86::BI__builtin_ia32_vextractf128_ps256: 11357 case X86::BI__builtin_ia32_vextractf128_si256: 11358 case X86::BI__builtin_ia32_extract128i256: 11359 case X86::BI__builtin_ia32_extractf64x4_mask: 11360 case X86::BI__builtin_ia32_extractf32x4_mask: 11361 case X86::BI__builtin_ia32_extracti64x4_mask: 11362 case X86::BI__builtin_ia32_extracti32x4_mask: 11363 case X86::BI__builtin_ia32_extractf32x8_mask: 11364 case X86::BI__builtin_ia32_extracti32x8_mask: 11365 case X86::BI__builtin_ia32_extractf32x4_256_mask: 11366 case X86::BI__builtin_ia32_extracti32x4_256_mask: 11367 case X86::BI__builtin_ia32_extractf64x2_256_mask: 11368 case X86::BI__builtin_ia32_extracti64x2_256_mask: 11369 case X86::BI__builtin_ia32_extractf64x2_512_mask: 11370 case X86::BI__builtin_ia32_extracti64x2_512_mask: { 11371 llvm::Type *DstTy = ConvertType(E->getType()); 11372 unsigned NumElts = DstTy->getVectorNumElements(); 11373 unsigned SrcNumElts = Ops[0]->getType()->getVectorNumElements(); 11374 unsigned SubVectors = SrcNumElts / NumElts; 11375 unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue(); 11376 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors"); 11377 Index &= SubVectors - 1; // Remove any extra bits. 11378 Index *= NumElts; 11379 11380 uint32_t Indices[16]; 11381 for (unsigned i = 0; i != NumElts; ++i) 11382 Indices[i] = i + Index; 11383 11384 Value *Res = Builder.CreateShuffleVector(Ops[0], 11385 UndefValue::get(Ops[0]->getType()), 11386 makeArrayRef(Indices, NumElts), 11387 "extract"); 11388 11389 if (Ops.size() == 4) 11390 Res = EmitX86Select(*this, Ops[3], Res, Ops[2]); 11391 11392 return Res; 11393 } 11394 case X86::BI__builtin_ia32_vinsertf128_pd256: 11395 case X86::BI__builtin_ia32_vinsertf128_ps256: 11396 case X86::BI__builtin_ia32_vinsertf128_si256: 11397 case X86::BI__builtin_ia32_insert128i256: 11398 case X86::BI__builtin_ia32_insertf64x4: 11399 case X86::BI__builtin_ia32_insertf32x4: 11400 case X86::BI__builtin_ia32_inserti64x4: 11401 case X86::BI__builtin_ia32_inserti32x4: 11402 case X86::BI__builtin_ia32_insertf32x8: 11403 case X86::BI__builtin_ia32_inserti32x8: 11404 case X86::BI__builtin_ia32_insertf32x4_256: 11405 case X86::BI__builtin_ia32_inserti32x4_256: 11406 case X86::BI__builtin_ia32_insertf64x2_256: 11407 case X86::BI__builtin_ia32_inserti64x2_256: 11408 case X86::BI__builtin_ia32_insertf64x2_512: 11409 case X86::BI__builtin_ia32_inserti64x2_512: { 11410 unsigned DstNumElts = Ops[0]->getType()->getVectorNumElements(); 11411 unsigned SrcNumElts = Ops[1]->getType()->getVectorNumElements(); 11412 unsigned SubVectors = DstNumElts / SrcNumElts; 11413 unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue(); 11414 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors"); 11415 Index &= SubVectors - 1; // Remove any extra bits. 11416 Index *= SrcNumElts; 11417 11418 uint32_t Indices[16]; 11419 for (unsigned i = 0; i != DstNumElts; ++i) 11420 Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i; 11421 11422 Value *Op1 = Builder.CreateShuffleVector(Ops[1], 11423 UndefValue::get(Ops[1]->getType()), 11424 makeArrayRef(Indices, DstNumElts), 11425 "widen"); 11426 11427 for (unsigned i = 0; i != DstNumElts; ++i) { 11428 if (i >= Index && i < (Index + SrcNumElts)) 11429 Indices[i] = (i - Index) + DstNumElts; 11430 else 11431 Indices[i] = i; 11432 } 11433 11434 return Builder.CreateShuffleVector(Ops[0], Op1, 11435 makeArrayRef(Indices, DstNumElts), 11436 "insert"); 11437 } 11438 case X86::BI__builtin_ia32_pmovqd512_mask: 11439 case X86::BI__builtin_ia32_pmovwb512_mask: { 11440 Value *Res = Builder.CreateTrunc(Ops[0], Ops[1]->getType()); 11441 return EmitX86Select(*this, Ops[2], Res, Ops[1]); 11442 } 11443 case X86::BI__builtin_ia32_pmovdb512_mask: 11444 case X86::BI__builtin_ia32_pmovdw512_mask: 11445 case X86::BI__builtin_ia32_pmovqw512_mask: { 11446 if (const auto *C = dyn_cast<Constant>(Ops[2])) 11447 if (C->isAllOnesValue()) 11448 return Builder.CreateTrunc(Ops[0], Ops[1]->getType()); 11449 11450 Intrinsic::ID IID; 11451 switch (BuiltinID) { 11452 default: llvm_unreachable("Unsupported intrinsic!"); 11453 case X86::BI__builtin_ia32_pmovdb512_mask: 11454 IID = Intrinsic::x86_avx512_mask_pmov_db_512; 11455 break; 11456 case X86::BI__builtin_ia32_pmovdw512_mask: 11457 IID = Intrinsic::x86_avx512_mask_pmov_dw_512; 11458 break; 11459 case X86::BI__builtin_ia32_pmovqw512_mask: 11460 IID = Intrinsic::x86_avx512_mask_pmov_qw_512; 11461 break; 11462 } 11463 11464 Function *Intr = CGM.getIntrinsic(IID); 11465 return Builder.CreateCall(Intr, Ops); 11466 } 11467 case X86::BI__builtin_ia32_pblendw128: 11468 case X86::BI__builtin_ia32_blendpd: 11469 case X86::BI__builtin_ia32_blendps: 11470 case X86::BI__builtin_ia32_blendpd256: 11471 case X86::BI__builtin_ia32_blendps256: 11472 case X86::BI__builtin_ia32_pblendw256: 11473 case X86::BI__builtin_ia32_pblendd128: 11474 case X86::BI__builtin_ia32_pblendd256: { 11475 unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); 11476 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 11477 11478 uint32_t Indices[16]; 11479 // If there are more than 8 elements, the immediate is used twice so make 11480 // sure we handle that. 11481 for (unsigned i = 0; i != NumElts; ++i) 11482 Indices[i] = ((Imm >> (i % 8)) & 0x1) ? NumElts + i : i; 11483 11484 return Builder.CreateShuffleVector(Ops[0], Ops[1], 11485 makeArrayRef(Indices, NumElts), 11486 "blend"); 11487 } 11488 case X86::BI__builtin_ia32_pshuflw: 11489 case X86::BI__builtin_ia32_pshuflw256: 11490 case X86::BI__builtin_ia32_pshuflw512: { 11491 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue(); 11492 llvm::Type *Ty = Ops[0]->getType(); 11493 unsigned NumElts = Ty->getVectorNumElements(); 11494 11495 // Splat the 8-bits of immediate 4 times to help the loop wrap around. 11496 Imm = (Imm & 0xff) * 0x01010101; 11497 11498 uint32_t Indices[32]; 11499 for (unsigned l = 0; l != NumElts; l += 8) { 11500 for (unsigned i = 0; i != 4; ++i) { 11501 Indices[l + i] = l + (Imm & 3); 11502 Imm >>= 2; 11503 } 11504 for (unsigned i = 4; i != 8; ++i) 11505 Indices[l + i] = l + i; 11506 } 11507 11508 return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty), 11509 makeArrayRef(Indices, NumElts), 11510 "pshuflw"); 11511 } 11512 case X86::BI__builtin_ia32_pshufhw: 11513 case X86::BI__builtin_ia32_pshufhw256: 11514 case X86::BI__builtin_ia32_pshufhw512: { 11515 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue(); 11516 llvm::Type *Ty = Ops[0]->getType(); 11517 unsigned NumElts = Ty->getVectorNumElements(); 11518 11519 // Splat the 8-bits of immediate 4 times to help the loop wrap around. 11520 Imm = (Imm & 0xff) * 0x01010101; 11521 11522 uint32_t Indices[32]; 11523 for (unsigned l = 0; l != NumElts; l += 8) { 11524 for (unsigned i = 0; i != 4; ++i) 11525 Indices[l + i] = l + i; 11526 for (unsigned i = 4; i != 8; ++i) { 11527 Indices[l + i] = l + 4 + (Imm & 3); 11528 Imm >>= 2; 11529 } 11530 } 11531 11532 return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty), 11533 makeArrayRef(Indices, NumElts), 11534 "pshufhw"); 11535 } 11536 case X86::BI__builtin_ia32_pshufd: 11537 case X86::BI__builtin_ia32_pshufd256: 11538 case X86::BI__builtin_ia32_pshufd512: 11539 case X86::BI__builtin_ia32_vpermilpd: 11540 case X86::BI__builtin_ia32_vpermilps: 11541 case X86::BI__builtin_ia32_vpermilpd256: 11542 case X86::BI__builtin_ia32_vpermilps256: 11543 case X86::BI__builtin_ia32_vpermilpd512: 11544 case X86::BI__builtin_ia32_vpermilps512: { 11545 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue(); 11546 llvm::Type *Ty = Ops[0]->getType(); 11547 unsigned NumElts = Ty->getVectorNumElements(); 11548 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128; 11549 unsigned NumLaneElts = NumElts / NumLanes; 11550 11551 // Splat the 8-bits of immediate 4 times to help the loop wrap around. 11552 Imm = (Imm & 0xff) * 0x01010101; 11553 11554 uint32_t Indices[16]; 11555 for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 11556 for (unsigned i = 0; i != NumLaneElts; ++i) { 11557 Indices[i + l] = (Imm % NumLaneElts) + l; 11558 Imm /= NumLaneElts; 11559 } 11560 } 11561 11562 return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty), 11563 makeArrayRef(Indices, NumElts), 11564 "permil"); 11565 } 11566 case X86::BI__builtin_ia32_shufpd: 11567 case X86::BI__builtin_ia32_shufpd256: 11568 case X86::BI__builtin_ia32_shufpd512: 11569 case X86::BI__builtin_ia32_shufps: 11570 case X86::BI__builtin_ia32_shufps256: 11571 case X86::BI__builtin_ia32_shufps512: { 11572 uint32_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 11573 llvm::Type *Ty = Ops[0]->getType(); 11574 unsigned NumElts = Ty->getVectorNumElements(); 11575 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128; 11576 unsigned NumLaneElts = NumElts / NumLanes; 11577 11578 // Splat the 8-bits of immediate 4 times to help the loop wrap around. 11579 Imm = (Imm & 0xff) * 0x01010101; 11580 11581 uint32_t Indices[16]; 11582 for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 11583 for (unsigned i = 0; i != NumLaneElts; ++i) { 11584 unsigned Index = Imm % NumLaneElts; 11585 Imm /= NumLaneElts; 11586 if (i >= (NumLaneElts / 2)) 11587 Index += NumElts; 11588 Indices[l + i] = l + Index; 11589 } 11590 } 11591 11592 return Builder.CreateShuffleVector(Ops[0], Ops[1], 11593 makeArrayRef(Indices, NumElts), 11594 "shufp"); 11595 } 11596 case X86::BI__builtin_ia32_permdi256: 11597 case X86::BI__builtin_ia32_permdf256: 11598 case X86::BI__builtin_ia32_permdi512: 11599 case X86::BI__builtin_ia32_permdf512: { 11600 unsigned Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue(); 11601 llvm::Type *Ty = Ops[0]->getType(); 11602 unsigned NumElts = Ty->getVectorNumElements(); 11603 11604 // These intrinsics operate on 256-bit lanes of four 64-bit elements. 11605 uint32_t Indices[8]; 11606 for (unsigned l = 0; l != NumElts; l += 4) 11607 for (unsigned i = 0; i != 4; ++i) 11608 Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3); 11609 11610 return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty), 11611 makeArrayRef(Indices, NumElts), 11612 "perm"); 11613 } 11614 case X86::BI__builtin_ia32_palignr128: 11615 case X86::BI__builtin_ia32_palignr256: 11616 case X86::BI__builtin_ia32_palignr512: { 11617 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff; 11618 11619 unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); 11620 assert(NumElts % 16 == 0); 11621 11622 // If palignr is shifting the pair of vectors more than the size of two 11623 // lanes, emit zero. 11624 if (ShiftVal >= 32) 11625 return llvm::Constant::getNullValue(ConvertType(E->getType())); 11626 11627 // If palignr is shifting the pair of input vectors more than one lane, 11628 // but less than two lanes, convert to shifting in zeroes. 11629 if (ShiftVal > 16) { 11630 ShiftVal -= 16; 11631 Ops[1] = Ops[0]; 11632 Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType()); 11633 } 11634 11635 uint32_t Indices[64]; 11636 // 256-bit palignr operates on 128-bit lanes so we need to handle that 11637 for (unsigned l = 0; l != NumElts; l += 16) { 11638 for (unsigned i = 0; i != 16; ++i) { 11639 unsigned Idx = ShiftVal + i; 11640 if (Idx >= 16) 11641 Idx += NumElts - 16; // End of lane, switch operand. 11642 Indices[l + i] = Idx + l; 11643 } 11644 } 11645 11646 return Builder.CreateShuffleVector(Ops[1], Ops[0], 11647 makeArrayRef(Indices, NumElts), 11648 "palignr"); 11649 } 11650 case X86::BI__builtin_ia32_alignd128: 11651 case X86::BI__builtin_ia32_alignd256: 11652 case X86::BI__builtin_ia32_alignd512: 11653 case X86::BI__builtin_ia32_alignq128: 11654 case X86::BI__builtin_ia32_alignq256: 11655 case X86::BI__builtin_ia32_alignq512: { 11656 unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); 11657 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff; 11658 11659 // Mask the shift amount to width of two vectors. 11660 ShiftVal &= (2 * NumElts) - 1; 11661 11662 uint32_t Indices[16]; 11663 for (unsigned i = 0; i != NumElts; ++i) 11664 Indices[i] = i + ShiftVal; 11665 11666 return Builder.CreateShuffleVector(Ops[1], Ops[0], 11667 makeArrayRef(Indices, NumElts), 11668 "valign"); 11669 } 11670 case X86::BI__builtin_ia32_shuf_f32x4_256: 11671 case X86::BI__builtin_ia32_shuf_f64x2_256: 11672 case X86::BI__builtin_ia32_shuf_i32x4_256: 11673 case X86::BI__builtin_ia32_shuf_i64x2_256: 11674 case X86::BI__builtin_ia32_shuf_f32x4: 11675 case X86::BI__builtin_ia32_shuf_f64x2: 11676 case X86::BI__builtin_ia32_shuf_i32x4: 11677 case X86::BI__builtin_ia32_shuf_i64x2: { 11678 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 11679 llvm::Type *Ty = Ops[0]->getType(); 11680 unsigned NumElts = Ty->getVectorNumElements(); 11681 unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2; 11682 unsigned NumLaneElts = NumElts / NumLanes; 11683 11684 uint32_t Indices[16]; 11685 for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 11686 unsigned Index = (Imm % NumLanes) * NumLaneElts; 11687 Imm /= NumLanes; // Discard the bits we just used. 11688 if (l >= (NumElts / 2)) 11689 Index += NumElts; // Switch to other source. 11690 for (unsigned i = 0; i != NumLaneElts; ++i) { 11691 Indices[l + i] = Index + i; 11692 } 11693 } 11694 11695 return Builder.CreateShuffleVector(Ops[0], Ops[1], 11696 makeArrayRef(Indices, NumElts), 11697 "shuf"); 11698 } 11699 11700 case X86::BI__builtin_ia32_vperm2f128_pd256: 11701 case X86::BI__builtin_ia32_vperm2f128_ps256: 11702 case X86::BI__builtin_ia32_vperm2f128_si256: 11703 case X86::BI__builtin_ia32_permti256: { 11704 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 11705 unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); 11706 11707 // This takes a very simple approach since there are two lanes and a 11708 // shuffle can have 2 inputs. So we reserve the first input for the first 11709 // lane and the second input for the second lane. This may result in 11710 // duplicate sources, but this can be dealt with in the backend. 11711 11712 Value *OutOps[2]; 11713 uint32_t Indices[8]; 11714 for (unsigned l = 0; l != 2; ++l) { 11715 // Determine the source for this lane. 11716 if (Imm & (1 << ((l * 4) + 3))) 11717 OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType()); 11718 else if (Imm & (1 << ((l * 4) + 1))) 11719 OutOps[l] = Ops[1]; 11720 else 11721 OutOps[l] = Ops[0]; 11722 11723 for (unsigned i = 0; i != NumElts/2; ++i) { 11724 // Start with ith element of the source for this lane. 11725 unsigned Idx = (l * NumElts) + i; 11726 // If bit 0 of the immediate half is set, switch to the high half of 11727 // the source. 11728 if (Imm & (1 << (l * 4))) 11729 Idx += NumElts/2; 11730 Indices[(l * (NumElts/2)) + i] = Idx; 11731 } 11732 } 11733 11734 return Builder.CreateShuffleVector(OutOps[0], OutOps[1], 11735 makeArrayRef(Indices, NumElts), 11736 "vperm"); 11737 } 11738 11739 case X86::BI__builtin_ia32_pslldqi128_byteshift: 11740 case X86::BI__builtin_ia32_pslldqi256_byteshift: 11741 case X86::BI__builtin_ia32_pslldqi512_byteshift: { 11742 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff; 11743 llvm::Type *ResultType = Ops[0]->getType(); 11744 // Builtin type is vXi64 so multiply by 8 to get bytes. 11745 unsigned NumElts = ResultType->getVectorNumElements() * 8; 11746 11747 // If pslldq is shifting the vector more than 15 bytes, emit zero. 11748 if (ShiftVal >= 16) 11749 return llvm::Constant::getNullValue(ResultType); 11750 11751 uint32_t Indices[64]; 11752 // 256/512-bit pslldq operates on 128-bit lanes so we need to handle that 11753 for (unsigned l = 0; l != NumElts; l += 16) { 11754 for (unsigned i = 0; i != 16; ++i) { 11755 unsigned Idx = NumElts + i - ShiftVal; 11756 if (Idx < NumElts) Idx -= NumElts - 16; // end of lane, switch operand. 11757 Indices[l + i] = Idx + l; 11758 } 11759 } 11760 11761 llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, NumElts); 11762 Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast"); 11763 Value *Zero = llvm::Constant::getNullValue(VecTy); 11764 Value *SV = Builder.CreateShuffleVector(Zero, Cast, 11765 makeArrayRef(Indices, NumElts), 11766 "pslldq"); 11767 return Builder.CreateBitCast(SV, Ops[0]->getType(), "cast"); 11768 } 11769 case X86::BI__builtin_ia32_psrldqi128_byteshift: 11770 case X86::BI__builtin_ia32_psrldqi256_byteshift: 11771 case X86::BI__builtin_ia32_psrldqi512_byteshift: { 11772 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff; 11773 llvm::Type *ResultType = Ops[0]->getType(); 11774 // Builtin type is vXi64 so multiply by 8 to get bytes. 11775 unsigned NumElts = ResultType->getVectorNumElements() * 8; 11776 11777 // If psrldq is shifting the vector more than 15 bytes, emit zero. 11778 if (ShiftVal >= 16) 11779 return llvm::Constant::getNullValue(ResultType); 11780 11781 uint32_t Indices[64]; 11782 // 256/512-bit psrldq operates on 128-bit lanes so we need to handle that 11783 for (unsigned l = 0; l != NumElts; l += 16) { 11784 for (unsigned i = 0; i != 16; ++i) { 11785 unsigned Idx = i + ShiftVal; 11786 if (Idx >= 16) Idx += NumElts - 16; // end of lane, switch operand. 11787 Indices[l + i] = Idx + l; 11788 } 11789 } 11790 11791 llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, NumElts); 11792 Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast"); 11793 Value *Zero = llvm::Constant::getNullValue(VecTy); 11794 Value *SV = Builder.CreateShuffleVector(Cast, Zero, 11795 makeArrayRef(Indices, NumElts), 11796 "psrldq"); 11797 return Builder.CreateBitCast(SV, ResultType, "cast"); 11798 } 11799 case X86::BI__builtin_ia32_kshiftliqi: 11800 case X86::BI__builtin_ia32_kshiftlihi: 11801 case X86::BI__builtin_ia32_kshiftlisi: 11802 case X86::BI__builtin_ia32_kshiftlidi: { 11803 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff; 11804 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); 11805 11806 if (ShiftVal >= NumElts) 11807 return llvm::Constant::getNullValue(Ops[0]->getType()); 11808 11809 Value *In = getMaskVecValue(*this, Ops[0], NumElts); 11810 11811 uint32_t Indices[64]; 11812 for (unsigned i = 0; i != NumElts; ++i) 11813 Indices[i] = NumElts + i - ShiftVal; 11814 11815 Value *Zero = llvm::Constant::getNullValue(In->getType()); 11816 Value *SV = Builder.CreateShuffleVector(Zero, In, 11817 makeArrayRef(Indices, NumElts), 11818 "kshiftl"); 11819 return Builder.CreateBitCast(SV, Ops[0]->getType()); 11820 } 11821 case X86::BI__builtin_ia32_kshiftriqi: 11822 case X86::BI__builtin_ia32_kshiftrihi: 11823 case X86::BI__builtin_ia32_kshiftrisi: 11824 case X86::BI__builtin_ia32_kshiftridi: { 11825 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff; 11826 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); 11827 11828 if (ShiftVal >= NumElts) 11829 return llvm::Constant::getNullValue(Ops[0]->getType()); 11830 11831 Value *In = getMaskVecValue(*this, Ops[0], NumElts); 11832 11833 uint32_t Indices[64]; 11834 for (unsigned i = 0; i != NumElts; ++i) 11835 Indices[i] = i + ShiftVal; 11836 11837 Value *Zero = llvm::Constant::getNullValue(In->getType()); 11838 Value *SV = Builder.CreateShuffleVector(In, Zero, 11839 makeArrayRef(Indices, NumElts), 11840 "kshiftr"); 11841 return Builder.CreateBitCast(SV, Ops[0]->getType()); 11842 } 11843 case X86::BI__builtin_ia32_movnti: 11844 case X86::BI__builtin_ia32_movnti64: 11845 case X86::BI__builtin_ia32_movntsd: 11846 case X86::BI__builtin_ia32_movntss: { 11847 llvm::MDNode *Node = llvm::MDNode::get( 11848 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1))); 11849 11850 Value *Ptr = Ops[0]; 11851 Value *Src = Ops[1]; 11852 11853 // Extract the 0'th element of the source vector. 11854 if (BuiltinID == X86::BI__builtin_ia32_movntsd || 11855 BuiltinID == X86::BI__builtin_ia32_movntss) 11856 Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract"); 11857 11858 // Convert the type of the pointer to a pointer to the stored type. 11859 Value *BC = Builder.CreateBitCast( 11860 Ptr, llvm::PointerType::getUnqual(Src->getType()), "cast"); 11861 11862 // Unaligned nontemporal store of the scalar value. 11863 StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, BC); 11864 SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); 11865 SI->setAlignment(llvm::Align(1)); 11866 return SI; 11867 } 11868 // Rotate is a special case of funnel shift - 1st 2 args are the same. 11869 case X86::BI__builtin_ia32_vprotb: 11870 case X86::BI__builtin_ia32_vprotw: 11871 case X86::BI__builtin_ia32_vprotd: 11872 case X86::BI__builtin_ia32_vprotq: 11873 case X86::BI__builtin_ia32_vprotbi: 11874 case X86::BI__builtin_ia32_vprotwi: 11875 case X86::BI__builtin_ia32_vprotdi: 11876 case X86::BI__builtin_ia32_vprotqi: 11877 case X86::BI__builtin_ia32_prold128: 11878 case X86::BI__builtin_ia32_prold256: 11879 case X86::BI__builtin_ia32_prold512: 11880 case X86::BI__builtin_ia32_prolq128: 11881 case X86::BI__builtin_ia32_prolq256: 11882 case X86::BI__builtin_ia32_prolq512: 11883 case X86::BI__builtin_ia32_prolvd128: 11884 case X86::BI__builtin_ia32_prolvd256: 11885 case X86::BI__builtin_ia32_prolvd512: 11886 case X86::BI__builtin_ia32_prolvq128: 11887 case X86::BI__builtin_ia32_prolvq256: 11888 case X86::BI__builtin_ia32_prolvq512: 11889 return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], false); 11890 case X86::BI__builtin_ia32_prord128: 11891 case X86::BI__builtin_ia32_prord256: 11892 case X86::BI__builtin_ia32_prord512: 11893 case X86::BI__builtin_ia32_prorq128: 11894 case X86::BI__builtin_ia32_prorq256: 11895 case X86::BI__builtin_ia32_prorq512: 11896 case X86::BI__builtin_ia32_prorvd128: 11897 case X86::BI__builtin_ia32_prorvd256: 11898 case X86::BI__builtin_ia32_prorvd512: 11899 case X86::BI__builtin_ia32_prorvq128: 11900 case X86::BI__builtin_ia32_prorvq256: 11901 case X86::BI__builtin_ia32_prorvq512: 11902 return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], true); 11903 case X86::BI__builtin_ia32_selectb_128: 11904 case X86::BI__builtin_ia32_selectb_256: 11905 case X86::BI__builtin_ia32_selectb_512: 11906 case X86::BI__builtin_ia32_selectw_128: 11907 case X86::BI__builtin_ia32_selectw_256: 11908 case X86::BI__builtin_ia32_selectw_512: 11909 case X86::BI__builtin_ia32_selectd_128: 11910 case X86::BI__builtin_ia32_selectd_256: 11911 case X86::BI__builtin_ia32_selectd_512: 11912 case X86::BI__builtin_ia32_selectq_128: 11913 case X86::BI__builtin_ia32_selectq_256: 11914 case X86::BI__builtin_ia32_selectq_512: 11915 case X86::BI__builtin_ia32_selectps_128: 11916 case X86::BI__builtin_ia32_selectps_256: 11917 case X86::BI__builtin_ia32_selectps_512: 11918 case X86::BI__builtin_ia32_selectpd_128: 11919 case X86::BI__builtin_ia32_selectpd_256: 11920 case X86::BI__builtin_ia32_selectpd_512: 11921 return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]); 11922 case X86::BI__builtin_ia32_selectss_128: 11923 case X86::BI__builtin_ia32_selectsd_128: { 11924 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0); 11925 Value *B = Builder.CreateExtractElement(Ops[2], (uint64_t)0); 11926 A = EmitX86ScalarSelect(*this, Ops[0], A, B); 11927 return Builder.CreateInsertElement(Ops[1], A, (uint64_t)0); 11928 } 11929 case X86::BI__builtin_ia32_cmpb128_mask: 11930 case X86::BI__builtin_ia32_cmpb256_mask: 11931 case X86::BI__builtin_ia32_cmpb512_mask: 11932 case X86::BI__builtin_ia32_cmpw128_mask: 11933 case X86::BI__builtin_ia32_cmpw256_mask: 11934 case X86::BI__builtin_ia32_cmpw512_mask: 11935 case X86::BI__builtin_ia32_cmpd128_mask: 11936 case X86::BI__builtin_ia32_cmpd256_mask: 11937 case X86::BI__builtin_ia32_cmpd512_mask: 11938 case X86::BI__builtin_ia32_cmpq128_mask: 11939 case X86::BI__builtin_ia32_cmpq256_mask: 11940 case X86::BI__builtin_ia32_cmpq512_mask: { 11941 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7; 11942 return EmitX86MaskedCompare(*this, CC, true, Ops); 11943 } 11944 case X86::BI__builtin_ia32_ucmpb128_mask: 11945 case X86::BI__builtin_ia32_ucmpb256_mask: 11946 case X86::BI__builtin_ia32_ucmpb512_mask: 11947 case X86::BI__builtin_ia32_ucmpw128_mask: 11948 case X86::BI__builtin_ia32_ucmpw256_mask: 11949 case X86::BI__builtin_ia32_ucmpw512_mask: 11950 case X86::BI__builtin_ia32_ucmpd128_mask: 11951 case X86::BI__builtin_ia32_ucmpd256_mask: 11952 case X86::BI__builtin_ia32_ucmpd512_mask: 11953 case X86::BI__builtin_ia32_ucmpq128_mask: 11954 case X86::BI__builtin_ia32_ucmpq256_mask: 11955 case X86::BI__builtin_ia32_ucmpq512_mask: { 11956 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7; 11957 return EmitX86MaskedCompare(*this, CC, false, Ops); 11958 } 11959 case X86::BI__builtin_ia32_vpcomb: 11960 case X86::BI__builtin_ia32_vpcomw: 11961 case X86::BI__builtin_ia32_vpcomd: 11962 case X86::BI__builtin_ia32_vpcomq: 11963 return EmitX86vpcom(*this, Ops, true); 11964 case X86::BI__builtin_ia32_vpcomub: 11965 case X86::BI__builtin_ia32_vpcomuw: 11966 case X86::BI__builtin_ia32_vpcomud: 11967 case X86::BI__builtin_ia32_vpcomuq: 11968 return EmitX86vpcom(*this, Ops, false); 11969 11970 case X86::BI__builtin_ia32_kortestcqi: 11971 case X86::BI__builtin_ia32_kortestchi: 11972 case X86::BI__builtin_ia32_kortestcsi: 11973 case X86::BI__builtin_ia32_kortestcdi: { 11974 Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops); 11975 Value *C = llvm::Constant::getAllOnesValue(Ops[0]->getType()); 11976 Value *Cmp = Builder.CreateICmpEQ(Or, C); 11977 return Builder.CreateZExt(Cmp, ConvertType(E->getType())); 11978 } 11979 case X86::BI__builtin_ia32_kortestzqi: 11980 case X86::BI__builtin_ia32_kortestzhi: 11981 case X86::BI__builtin_ia32_kortestzsi: 11982 case X86::BI__builtin_ia32_kortestzdi: { 11983 Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops); 11984 Value *C = llvm::Constant::getNullValue(Ops[0]->getType()); 11985 Value *Cmp = Builder.CreateICmpEQ(Or, C); 11986 return Builder.CreateZExt(Cmp, ConvertType(E->getType())); 11987 } 11988 11989 case X86::BI__builtin_ia32_ktestcqi: 11990 case X86::BI__builtin_ia32_ktestzqi: 11991 case X86::BI__builtin_ia32_ktestchi: 11992 case X86::BI__builtin_ia32_ktestzhi: 11993 case X86::BI__builtin_ia32_ktestcsi: 11994 case X86::BI__builtin_ia32_ktestzsi: 11995 case X86::BI__builtin_ia32_ktestcdi: 11996 case X86::BI__builtin_ia32_ktestzdi: { 11997 Intrinsic::ID IID; 11998 switch (BuiltinID) { 11999 default: llvm_unreachable("Unsupported intrinsic!"); 12000 case X86::BI__builtin_ia32_ktestcqi: 12001 IID = Intrinsic::x86_avx512_ktestc_b; 12002 break; 12003 case X86::BI__builtin_ia32_ktestzqi: 12004 IID = Intrinsic::x86_avx512_ktestz_b; 12005 break; 12006 case X86::BI__builtin_ia32_ktestchi: 12007 IID = Intrinsic::x86_avx512_ktestc_w; 12008 break; 12009 case X86::BI__builtin_ia32_ktestzhi: 12010 IID = Intrinsic::x86_avx512_ktestz_w; 12011 break; 12012 case X86::BI__builtin_ia32_ktestcsi: 12013 IID = Intrinsic::x86_avx512_ktestc_d; 12014 break; 12015 case X86::BI__builtin_ia32_ktestzsi: 12016 IID = Intrinsic::x86_avx512_ktestz_d; 12017 break; 12018 case X86::BI__builtin_ia32_ktestcdi: 12019 IID = Intrinsic::x86_avx512_ktestc_q; 12020 break; 12021 case X86::BI__builtin_ia32_ktestzdi: 12022 IID = Intrinsic::x86_avx512_ktestz_q; 12023 break; 12024 } 12025 12026 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); 12027 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts); 12028 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts); 12029 Function *Intr = CGM.getIntrinsic(IID); 12030 return Builder.CreateCall(Intr, {LHS, RHS}); 12031 } 12032 12033 case X86::BI__builtin_ia32_kaddqi: 12034 case X86::BI__builtin_ia32_kaddhi: 12035 case X86::BI__builtin_ia32_kaddsi: 12036 case X86::BI__builtin_ia32_kadddi: { 12037 Intrinsic::ID IID; 12038 switch (BuiltinID) { 12039 default: llvm_unreachable("Unsupported intrinsic!"); 12040 case X86::BI__builtin_ia32_kaddqi: 12041 IID = Intrinsic::x86_avx512_kadd_b; 12042 break; 12043 case X86::BI__builtin_ia32_kaddhi: 12044 IID = Intrinsic::x86_avx512_kadd_w; 12045 break; 12046 case X86::BI__builtin_ia32_kaddsi: 12047 IID = Intrinsic::x86_avx512_kadd_d; 12048 break; 12049 case X86::BI__builtin_ia32_kadddi: 12050 IID = Intrinsic::x86_avx512_kadd_q; 12051 break; 12052 } 12053 12054 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); 12055 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts); 12056 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts); 12057 Function *Intr = CGM.getIntrinsic(IID); 12058 Value *Res = Builder.CreateCall(Intr, {LHS, RHS}); 12059 return Builder.CreateBitCast(Res, Ops[0]->getType()); 12060 } 12061 case X86::BI__builtin_ia32_kandqi: 12062 case X86::BI__builtin_ia32_kandhi: 12063 case X86::BI__builtin_ia32_kandsi: 12064 case X86::BI__builtin_ia32_kanddi: 12065 return EmitX86MaskLogic(*this, Instruction::And, Ops); 12066 case X86::BI__builtin_ia32_kandnqi: 12067 case X86::BI__builtin_ia32_kandnhi: 12068 case X86::BI__builtin_ia32_kandnsi: 12069 case X86::BI__builtin_ia32_kandndi: 12070 return EmitX86MaskLogic(*this, Instruction::And, Ops, true); 12071 case X86::BI__builtin_ia32_korqi: 12072 case X86::BI__builtin_ia32_korhi: 12073 case X86::BI__builtin_ia32_korsi: 12074 case X86::BI__builtin_ia32_kordi: 12075 return EmitX86MaskLogic(*this, Instruction::Or, Ops); 12076 case X86::BI__builtin_ia32_kxnorqi: 12077 case X86::BI__builtin_ia32_kxnorhi: 12078 case X86::BI__builtin_ia32_kxnorsi: 12079 case X86::BI__builtin_ia32_kxnordi: 12080 return EmitX86MaskLogic(*this, Instruction::Xor, Ops, true); 12081 case X86::BI__builtin_ia32_kxorqi: 12082 case X86::BI__builtin_ia32_kxorhi: 12083 case X86::BI__builtin_ia32_kxorsi: 12084 case X86::BI__builtin_ia32_kxordi: 12085 return EmitX86MaskLogic(*this, Instruction::Xor, Ops); 12086 case X86::BI__builtin_ia32_knotqi: 12087 case X86::BI__builtin_ia32_knothi: 12088 case X86::BI__builtin_ia32_knotsi: 12089 case X86::BI__builtin_ia32_knotdi: { 12090 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); 12091 Value *Res = getMaskVecValue(*this, Ops[0], NumElts); 12092 return Builder.CreateBitCast(Builder.CreateNot(Res), 12093 Ops[0]->getType()); 12094 } 12095 case X86::BI__builtin_ia32_kmovb: 12096 case X86::BI__builtin_ia32_kmovw: 12097 case X86::BI__builtin_ia32_kmovd: 12098 case X86::BI__builtin_ia32_kmovq: { 12099 // Bitcast to vXi1 type and then back to integer. This gets the mask 12100 // register type into the IR, but might be optimized out depending on 12101 // what's around it. 12102 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); 12103 Value *Res = getMaskVecValue(*this, Ops[0], NumElts); 12104 return Builder.CreateBitCast(Res, Ops[0]->getType()); 12105 } 12106 12107 case X86::BI__builtin_ia32_kunpckdi: 12108 case X86::BI__builtin_ia32_kunpcksi: 12109 case X86::BI__builtin_ia32_kunpckhi: { 12110 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); 12111 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts); 12112 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts); 12113 uint32_t Indices[64]; 12114 for (unsigned i = 0; i != NumElts; ++i) 12115 Indices[i] = i; 12116 12117 // First extract half of each vector. This gives better codegen than 12118 // doing it in a single shuffle. 12119 LHS = Builder.CreateShuffleVector(LHS, LHS, 12120 makeArrayRef(Indices, NumElts / 2)); 12121 RHS = Builder.CreateShuffleVector(RHS, RHS, 12122 makeArrayRef(Indices, NumElts / 2)); 12123 // Concat the vectors. 12124 // NOTE: Operands are swapped to match the intrinsic definition. 12125 Value *Res = Builder.CreateShuffleVector(RHS, LHS, 12126 makeArrayRef(Indices, NumElts)); 12127 return Builder.CreateBitCast(Res, Ops[0]->getType()); 12128 } 12129 12130 case X86::BI__builtin_ia32_vplzcntd_128: 12131 case X86::BI__builtin_ia32_vplzcntd_256: 12132 case X86::BI__builtin_ia32_vplzcntd_512: 12133 case X86::BI__builtin_ia32_vplzcntq_128: 12134 case X86::BI__builtin_ia32_vplzcntq_256: 12135 case X86::BI__builtin_ia32_vplzcntq_512: { 12136 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType()); 12137 return Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)}); 12138 } 12139 case X86::BI__builtin_ia32_sqrtss: 12140 case X86::BI__builtin_ia32_sqrtsd: { 12141 Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0); 12142 Function *F; 12143 if (Builder.getIsFPConstrained()) { 12144 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, 12145 A->getType()); 12146 A = Builder.CreateConstrainedFPCall(F, {A}); 12147 } else { 12148 F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType()); 12149 A = Builder.CreateCall(F, {A}); 12150 } 12151 return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0); 12152 } 12153 case X86::BI__builtin_ia32_sqrtsd_round_mask: 12154 case X86::BI__builtin_ia32_sqrtss_round_mask: { 12155 unsigned CC = cast<llvm::ConstantInt>(Ops[4])->getZExtValue(); 12156 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION), 12157 // otherwise keep the intrinsic. 12158 if (CC != 4) { 12159 Intrinsic::ID IID = BuiltinID == X86::BI__builtin_ia32_sqrtsd_round_mask ? 12160 Intrinsic::x86_avx512_mask_sqrt_sd : 12161 Intrinsic::x86_avx512_mask_sqrt_ss; 12162 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops); 12163 } 12164 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0); 12165 Function *F; 12166 if (Builder.getIsFPConstrained()) { 12167 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, 12168 A->getType()); 12169 A = Builder.CreateConstrainedFPCall(F, A); 12170 } else { 12171 F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType()); 12172 A = Builder.CreateCall(F, A); 12173 } 12174 Value *Src = Builder.CreateExtractElement(Ops[2], (uint64_t)0); 12175 A = EmitX86ScalarSelect(*this, Ops[3], A, Src); 12176 return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0); 12177 } 12178 case X86::BI__builtin_ia32_sqrtpd256: 12179 case X86::BI__builtin_ia32_sqrtpd: 12180 case X86::BI__builtin_ia32_sqrtps256: 12181 case X86::BI__builtin_ia32_sqrtps: 12182 case X86::BI__builtin_ia32_sqrtps512: 12183 case X86::BI__builtin_ia32_sqrtpd512: { 12184 if (Ops.size() == 2) { 12185 unsigned CC = cast<llvm::ConstantInt>(Ops[1])->getZExtValue(); 12186 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION), 12187 // otherwise keep the intrinsic. 12188 if (CC != 4) { 12189 Intrinsic::ID IID = BuiltinID == X86::BI__builtin_ia32_sqrtps512 ? 12190 Intrinsic::x86_avx512_sqrt_ps_512 : 12191 Intrinsic::x86_avx512_sqrt_pd_512; 12192 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops); 12193 } 12194 } 12195 if (Builder.getIsFPConstrained()) { 12196 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, 12197 Ops[0]->getType()); 12198 return Builder.CreateConstrainedFPCall(F, Ops[0]); 12199 } else { 12200 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType()); 12201 return Builder.CreateCall(F, Ops[0]); 12202 } 12203 } 12204 case X86::BI__builtin_ia32_pabsb128: 12205 case X86::BI__builtin_ia32_pabsw128: 12206 case X86::BI__builtin_ia32_pabsd128: 12207 case X86::BI__builtin_ia32_pabsb256: 12208 case X86::BI__builtin_ia32_pabsw256: 12209 case X86::BI__builtin_ia32_pabsd256: 12210 case X86::BI__builtin_ia32_pabsq128: 12211 case X86::BI__builtin_ia32_pabsq256: 12212 case X86::BI__builtin_ia32_pabsb512: 12213 case X86::BI__builtin_ia32_pabsw512: 12214 case X86::BI__builtin_ia32_pabsd512: 12215 case X86::BI__builtin_ia32_pabsq512: 12216 return EmitX86Abs(*this, Ops); 12217 12218 case X86::BI__builtin_ia32_pmaxsb128: 12219 case X86::BI__builtin_ia32_pmaxsw128: 12220 case X86::BI__builtin_ia32_pmaxsd128: 12221 case X86::BI__builtin_ia32_pmaxsq128: 12222 case X86::BI__builtin_ia32_pmaxsb256: 12223 case X86::BI__builtin_ia32_pmaxsw256: 12224 case X86::BI__builtin_ia32_pmaxsd256: 12225 case X86::BI__builtin_ia32_pmaxsq256: 12226 case X86::BI__builtin_ia32_pmaxsb512: 12227 case X86::BI__builtin_ia32_pmaxsw512: 12228 case X86::BI__builtin_ia32_pmaxsd512: 12229 case X86::BI__builtin_ia32_pmaxsq512: 12230 return EmitX86MinMax(*this, ICmpInst::ICMP_SGT, Ops); 12231 case X86::BI__builtin_ia32_pmaxub128: 12232 case X86::BI__builtin_ia32_pmaxuw128: 12233 case X86::BI__builtin_ia32_pmaxud128: 12234 case X86::BI__builtin_ia32_pmaxuq128: 12235 case X86::BI__builtin_ia32_pmaxub256: 12236 case X86::BI__builtin_ia32_pmaxuw256: 12237 case X86::BI__builtin_ia32_pmaxud256: 12238 case X86::BI__builtin_ia32_pmaxuq256: 12239 case X86::BI__builtin_ia32_pmaxub512: 12240 case X86::BI__builtin_ia32_pmaxuw512: 12241 case X86::BI__builtin_ia32_pmaxud512: 12242 case X86::BI__builtin_ia32_pmaxuq512: 12243 return EmitX86MinMax(*this, ICmpInst::ICMP_UGT, Ops); 12244 case X86::BI__builtin_ia32_pminsb128: 12245 case X86::BI__builtin_ia32_pminsw128: 12246 case X86::BI__builtin_ia32_pminsd128: 12247 case X86::BI__builtin_ia32_pminsq128: 12248 case X86::BI__builtin_ia32_pminsb256: 12249 case X86::BI__builtin_ia32_pminsw256: 12250 case X86::BI__builtin_ia32_pminsd256: 12251 case X86::BI__builtin_ia32_pminsq256: 12252 case X86::BI__builtin_ia32_pminsb512: 12253 case X86::BI__builtin_ia32_pminsw512: 12254 case X86::BI__builtin_ia32_pminsd512: 12255 case X86::BI__builtin_ia32_pminsq512: 12256 return EmitX86MinMax(*this, ICmpInst::ICMP_SLT, Ops); 12257 case X86::BI__builtin_ia32_pminub128: 12258 case X86::BI__builtin_ia32_pminuw128: 12259 case X86::BI__builtin_ia32_pminud128: 12260 case X86::BI__builtin_ia32_pminuq128: 12261 case X86::BI__builtin_ia32_pminub256: 12262 case X86::BI__builtin_ia32_pminuw256: 12263 case X86::BI__builtin_ia32_pminud256: 12264 case X86::BI__builtin_ia32_pminuq256: 12265 case X86::BI__builtin_ia32_pminub512: 12266 case X86::BI__builtin_ia32_pminuw512: 12267 case X86::BI__builtin_ia32_pminud512: 12268 case X86::BI__builtin_ia32_pminuq512: 12269 return EmitX86MinMax(*this, ICmpInst::ICMP_ULT, Ops); 12270 12271 case X86::BI__builtin_ia32_pmuludq128: 12272 case X86::BI__builtin_ia32_pmuludq256: 12273 case X86::BI__builtin_ia32_pmuludq512: 12274 return EmitX86Muldq(*this, /*IsSigned*/false, Ops); 12275 12276 case X86::BI__builtin_ia32_pmuldq128: 12277 case X86::BI__builtin_ia32_pmuldq256: 12278 case X86::BI__builtin_ia32_pmuldq512: 12279 return EmitX86Muldq(*this, /*IsSigned*/true, Ops); 12280 12281 case X86::BI__builtin_ia32_pternlogd512_mask: 12282 case X86::BI__builtin_ia32_pternlogq512_mask: 12283 case X86::BI__builtin_ia32_pternlogd128_mask: 12284 case X86::BI__builtin_ia32_pternlogd256_mask: 12285 case X86::BI__builtin_ia32_pternlogq128_mask: 12286 case X86::BI__builtin_ia32_pternlogq256_mask: 12287 return EmitX86Ternlog(*this, /*ZeroMask*/false, Ops); 12288 12289 case X86::BI__builtin_ia32_pternlogd512_maskz: 12290 case X86::BI__builtin_ia32_pternlogq512_maskz: 12291 case X86::BI__builtin_ia32_pternlogd128_maskz: 12292 case X86::BI__builtin_ia32_pternlogd256_maskz: 12293 case X86::BI__builtin_ia32_pternlogq128_maskz: 12294 case X86::BI__builtin_ia32_pternlogq256_maskz: 12295 return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops); 12296 12297 case X86::BI__builtin_ia32_vpshldd128: 12298 case X86::BI__builtin_ia32_vpshldd256: 12299 case X86::BI__builtin_ia32_vpshldd512: 12300 case X86::BI__builtin_ia32_vpshldq128: 12301 case X86::BI__builtin_ia32_vpshldq256: 12302 case X86::BI__builtin_ia32_vpshldq512: 12303 case X86::BI__builtin_ia32_vpshldw128: 12304 case X86::BI__builtin_ia32_vpshldw256: 12305 case X86::BI__builtin_ia32_vpshldw512: 12306 return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false); 12307 12308 case X86::BI__builtin_ia32_vpshrdd128: 12309 case X86::BI__builtin_ia32_vpshrdd256: 12310 case X86::BI__builtin_ia32_vpshrdd512: 12311 case X86::BI__builtin_ia32_vpshrdq128: 12312 case X86::BI__builtin_ia32_vpshrdq256: 12313 case X86::BI__builtin_ia32_vpshrdq512: 12314 case X86::BI__builtin_ia32_vpshrdw128: 12315 case X86::BI__builtin_ia32_vpshrdw256: 12316 case X86::BI__builtin_ia32_vpshrdw512: 12317 // Ops 0 and 1 are swapped. 12318 return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true); 12319 12320 case X86::BI__builtin_ia32_vpshldvd128: 12321 case X86::BI__builtin_ia32_vpshldvd256: 12322 case X86::BI__builtin_ia32_vpshldvd512: 12323 case X86::BI__builtin_ia32_vpshldvq128: 12324 case X86::BI__builtin_ia32_vpshldvq256: 12325 case X86::BI__builtin_ia32_vpshldvq512: 12326 case X86::BI__builtin_ia32_vpshldvw128: 12327 case X86::BI__builtin_ia32_vpshldvw256: 12328 case X86::BI__builtin_ia32_vpshldvw512: 12329 return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false); 12330 12331 case X86::BI__builtin_ia32_vpshrdvd128: 12332 case X86::BI__builtin_ia32_vpshrdvd256: 12333 case X86::BI__builtin_ia32_vpshrdvd512: 12334 case X86::BI__builtin_ia32_vpshrdvq128: 12335 case X86::BI__builtin_ia32_vpshrdvq256: 12336 case X86::BI__builtin_ia32_vpshrdvq512: 12337 case X86::BI__builtin_ia32_vpshrdvw128: 12338 case X86::BI__builtin_ia32_vpshrdvw256: 12339 case X86::BI__builtin_ia32_vpshrdvw512: 12340 // Ops 0 and 1 are swapped. 12341 return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true); 12342 12343 // 3DNow! 12344 case X86::BI__builtin_ia32_pswapdsf: 12345 case X86::BI__builtin_ia32_pswapdsi: { 12346 llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext()); 12347 Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast"); 12348 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd); 12349 return Builder.CreateCall(F, Ops, "pswapd"); 12350 } 12351 case X86::BI__builtin_ia32_rdrand16_step: 12352 case X86::BI__builtin_ia32_rdrand32_step: 12353 case X86::BI__builtin_ia32_rdrand64_step: 12354 case X86::BI__builtin_ia32_rdseed16_step: 12355 case X86::BI__builtin_ia32_rdseed32_step: 12356 case X86::BI__builtin_ia32_rdseed64_step: { 12357 Intrinsic::ID ID; 12358 switch (BuiltinID) { 12359 default: llvm_unreachable("Unsupported intrinsic!"); 12360 case X86::BI__builtin_ia32_rdrand16_step: 12361 ID = Intrinsic::x86_rdrand_16; 12362 break; 12363 case X86::BI__builtin_ia32_rdrand32_step: 12364 ID = Intrinsic::x86_rdrand_32; 12365 break; 12366 case X86::BI__builtin_ia32_rdrand64_step: 12367 ID = Intrinsic::x86_rdrand_64; 12368 break; 12369 case X86::BI__builtin_ia32_rdseed16_step: 12370 ID = Intrinsic::x86_rdseed_16; 12371 break; 12372 case X86::BI__builtin_ia32_rdseed32_step: 12373 ID = Intrinsic::x86_rdseed_32; 12374 break; 12375 case X86::BI__builtin_ia32_rdseed64_step: 12376 ID = Intrinsic::x86_rdseed_64; 12377 break; 12378 } 12379 12380 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID)); 12381 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0), 12382 Ops[0]); 12383 return Builder.CreateExtractValue(Call, 1); 12384 } 12385 case X86::BI__builtin_ia32_addcarryx_u32: 12386 case X86::BI__builtin_ia32_addcarryx_u64: 12387 case X86::BI__builtin_ia32_subborrow_u32: 12388 case X86::BI__builtin_ia32_subborrow_u64: { 12389 Intrinsic::ID IID; 12390 switch (BuiltinID) { 12391 default: llvm_unreachable("Unsupported intrinsic!"); 12392 case X86::BI__builtin_ia32_addcarryx_u32: 12393 IID = Intrinsic::x86_addcarry_32; 12394 break; 12395 case X86::BI__builtin_ia32_addcarryx_u64: 12396 IID = Intrinsic::x86_addcarry_64; 12397 break; 12398 case X86::BI__builtin_ia32_subborrow_u32: 12399 IID = Intrinsic::x86_subborrow_32; 12400 break; 12401 case X86::BI__builtin_ia32_subborrow_u64: 12402 IID = Intrinsic::x86_subborrow_64; 12403 break; 12404 } 12405 12406 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), 12407 { Ops[0], Ops[1], Ops[2] }); 12408 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1), 12409 Ops[3]); 12410 return Builder.CreateExtractValue(Call, 0); 12411 } 12412 12413 case X86::BI__builtin_ia32_fpclassps128_mask: 12414 case X86::BI__builtin_ia32_fpclassps256_mask: 12415 case X86::BI__builtin_ia32_fpclassps512_mask: 12416 case X86::BI__builtin_ia32_fpclasspd128_mask: 12417 case X86::BI__builtin_ia32_fpclasspd256_mask: 12418 case X86::BI__builtin_ia32_fpclasspd512_mask: { 12419 unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); 12420 Value *MaskIn = Ops[2]; 12421 Ops.erase(&Ops[2]); 12422 12423 Intrinsic::ID ID; 12424 switch (BuiltinID) { 12425 default: llvm_unreachable("Unsupported intrinsic!"); 12426 case X86::BI__builtin_ia32_fpclassps128_mask: 12427 ID = Intrinsic::x86_avx512_fpclass_ps_128; 12428 break; 12429 case X86::BI__builtin_ia32_fpclassps256_mask: 12430 ID = Intrinsic::x86_avx512_fpclass_ps_256; 12431 break; 12432 case X86::BI__builtin_ia32_fpclassps512_mask: 12433 ID = Intrinsic::x86_avx512_fpclass_ps_512; 12434 break; 12435 case X86::BI__builtin_ia32_fpclasspd128_mask: 12436 ID = Intrinsic::x86_avx512_fpclass_pd_128; 12437 break; 12438 case X86::BI__builtin_ia32_fpclasspd256_mask: 12439 ID = Intrinsic::x86_avx512_fpclass_pd_256; 12440 break; 12441 case X86::BI__builtin_ia32_fpclasspd512_mask: 12442 ID = Intrinsic::x86_avx512_fpclass_pd_512; 12443 break; 12444 } 12445 12446 Value *Fpclass = Builder.CreateCall(CGM.getIntrinsic(ID), Ops); 12447 return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn); 12448 } 12449 12450 case X86::BI__builtin_ia32_vp2intersect_q_512: 12451 case X86::BI__builtin_ia32_vp2intersect_q_256: 12452 case X86::BI__builtin_ia32_vp2intersect_q_128: 12453 case X86::BI__builtin_ia32_vp2intersect_d_512: 12454 case X86::BI__builtin_ia32_vp2intersect_d_256: 12455 case X86::BI__builtin_ia32_vp2intersect_d_128: { 12456 unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); 12457 Intrinsic::ID ID; 12458 12459 switch (BuiltinID) { 12460 default: llvm_unreachable("Unsupported intrinsic!"); 12461 case X86::BI__builtin_ia32_vp2intersect_q_512: 12462 ID = Intrinsic::x86_avx512_vp2intersect_q_512; 12463 break; 12464 case X86::BI__builtin_ia32_vp2intersect_q_256: 12465 ID = Intrinsic::x86_avx512_vp2intersect_q_256; 12466 break; 12467 case X86::BI__builtin_ia32_vp2intersect_q_128: 12468 ID = Intrinsic::x86_avx512_vp2intersect_q_128; 12469 break; 12470 case X86::BI__builtin_ia32_vp2intersect_d_512: 12471 ID = Intrinsic::x86_avx512_vp2intersect_d_512; 12472 break; 12473 case X86::BI__builtin_ia32_vp2intersect_d_256: 12474 ID = Intrinsic::x86_avx512_vp2intersect_d_256; 12475 break; 12476 case X86::BI__builtin_ia32_vp2intersect_d_128: 12477 ID = Intrinsic::x86_avx512_vp2intersect_d_128; 12478 break; 12479 } 12480 12481 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), {Ops[0], Ops[1]}); 12482 Value *Result = Builder.CreateExtractValue(Call, 0); 12483 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr); 12484 Builder.CreateDefaultAlignedStore(Result, Ops[2]); 12485 12486 Result = Builder.CreateExtractValue(Call, 1); 12487 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr); 12488 return Builder.CreateDefaultAlignedStore(Result, Ops[3]); 12489 } 12490 12491 case X86::BI__builtin_ia32_vpmultishiftqb128: 12492 case X86::BI__builtin_ia32_vpmultishiftqb256: 12493 case X86::BI__builtin_ia32_vpmultishiftqb512: { 12494 Intrinsic::ID ID; 12495 switch (BuiltinID) { 12496 default: llvm_unreachable("Unsupported intrinsic!"); 12497 case X86::BI__builtin_ia32_vpmultishiftqb128: 12498 ID = Intrinsic::x86_avx512_pmultishift_qb_128; 12499 break; 12500 case X86::BI__builtin_ia32_vpmultishiftqb256: 12501 ID = Intrinsic::x86_avx512_pmultishift_qb_256; 12502 break; 12503 case X86::BI__builtin_ia32_vpmultishiftqb512: 12504 ID = Intrinsic::x86_avx512_pmultishift_qb_512; 12505 break; 12506 } 12507 12508 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); 12509 } 12510 12511 case X86::BI__builtin_ia32_vpshufbitqmb128_mask: 12512 case X86::BI__builtin_ia32_vpshufbitqmb256_mask: 12513 case X86::BI__builtin_ia32_vpshufbitqmb512_mask: { 12514 unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); 12515 Value *MaskIn = Ops[2]; 12516 Ops.erase(&Ops[2]); 12517 12518 Intrinsic::ID ID; 12519 switch (BuiltinID) { 12520 default: llvm_unreachable("Unsupported intrinsic!"); 12521 case X86::BI__builtin_ia32_vpshufbitqmb128_mask: 12522 ID = Intrinsic::x86_avx512_vpshufbitqmb_128; 12523 break; 12524 case X86::BI__builtin_ia32_vpshufbitqmb256_mask: 12525 ID = Intrinsic::x86_avx512_vpshufbitqmb_256; 12526 break; 12527 case X86::BI__builtin_ia32_vpshufbitqmb512_mask: 12528 ID = Intrinsic::x86_avx512_vpshufbitqmb_512; 12529 break; 12530 } 12531 12532 Value *Shufbit = Builder.CreateCall(CGM.getIntrinsic(ID), Ops); 12533 return EmitX86MaskedCompareResult(*this, Shufbit, NumElts, MaskIn); 12534 } 12535 12536 // packed comparison intrinsics 12537 case X86::BI__builtin_ia32_cmpeqps: 12538 case X86::BI__builtin_ia32_cmpeqpd: 12539 return getVectorFCmpIR(CmpInst::FCMP_OEQ, /*IsSignaling*/false); 12540 case X86::BI__builtin_ia32_cmpltps: 12541 case X86::BI__builtin_ia32_cmpltpd: 12542 return getVectorFCmpIR(CmpInst::FCMP_OLT, /*IsSignaling*/true); 12543 case X86::BI__builtin_ia32_cmpleps: 12544 case X86::BI__builtin_ia32_cmplepd: 12545 return getVectorFCmpIR(CmpInst::FCMP_OLE, /*IsSignaling*/true); 12546 case X86::BI__builtin_ia32_cmpunordps: 12547 case X86::BI__builtin_ia32_cmpunordpd: 12548 return getVectorFCmpIR(CmpInst::FCMP_UNO, /*IsSignaling*/false); 12549 case X86::BI__builtin_ia32_cmpneqps: 12550 case X86::BI__builtin_ia32_cmpneqpd: 12551 return getVectorFCmpIR(CmpInst::FCMP_UNE, /*IsSignaling*/false); 12552 case X86::BI__builtin_ia32_cmpnltps: 12553 case X86::BI__builtin_ia32_cmpnltpd: 12554 return getVectorFCmpIR(CmpInst::FCMP_UGE, /*IsSignaling*/true); 12555 case X86::BI__builtin_ia32_cmpnleps: 12556 case X86::BI__builtin_ia32_cmpnlepd: 12557 return getVectorFCmpIR(CmpInst::FCMP_UGT, /*IsSignaling*/true); 12558 case X86::BI__builtin_ia32_cmpordps: 12559 case X86::BI__builtin_ia32_cmpordpd: 12560 return getVectorFCmpIR(CmpInst::FCMP_ORD, /*IsSignaling*/false); 12561 case X86::BI__builtin_ia32_cmpps: 12562 case X86::BI__builtin_ia32_cmpps256: 12563 case X86::BI__builtin_ia32_cmppd: 12564 case X86::BI__builtin_ia32_cmppd256: 12565 case X86::BI__builtin_ia32_cmpps128_mask: 12566 case X86::BI__builtin_ia32_cmpps256_mask: 12567 case X86::BI__builtin_ia32_cmpps512_mask: 12568 case X86::BI__builtin_ia32_cmppd128_mask: 12569 case X86::BI__builtin_ia32_cmppd256_mask: 12570 case X86::BI__builtin_ia32_cmppd512_mask: { 12571 // Lowering vector comparisons to fcmp instructions, while 12572 // ignoring signalling behaviour requested 12573 // ignoring rounding mode requested 12574 // This is is only possible as long as FENV_ACCESS is not implemented. 12575 // See also: https://reviews.llvm.org/D45616 12576 12577 // The third argument is the comparison condition, and integer in the 12578 // range [0, 31] 12579 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x1f; 12580 12581 // Lowering to IR fcmp instruction. 12582 // Ignoring requested signaling behaviour, 12583 // e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT. 12584 FCmpInst::Predicate Pred; 12585 bool IsSignaling; 12586 // Predicates for 16-31 repeat the 0-15 predicates. Only the signalling 12587 // behavior is inverted. We'll handle that after the switch. 12588 switch (CC & 0xf) { 12589 case 0x00: Pred = FCmpInst::FCMP_OEQ; IsSignaling = false; break; 12590 case 0x01: Pred = FCmpInst::FCMP_OLT; IsSignaling = true; break; 12591 case 0x02: Pred = FCmpInst::FCMP_OLE; IsSignaling = true; break; 12592 case 0x03: Pred = FCmpInst::FCMP_UNO; IsSignaling = false; break; 12593 case 0x04: Pred = FCmpInst::FCMP_UNE; IsSignaling = false; break; 12594 case 0x05: Pred = FCmpInst::FCMP_UGE; IsSignaling = true; break; 12595 case 0x06: Pred = FCmpInst::FCMP_UGT; IsSignaling = true; break; 12596 case 0x07: Pred = FCmpInst::FCMP_ORD; IsSignaling = false; break; 12597 case 0x08: Pred = FCmpInst::FCMP_UEQ; IsSignaling = false; break; 12598 case 0x09: Pred = FCmpInst::FCMP_ULT; IsSignaling = true; break; 12599 case 0x0a: Pred = FCmpInst::FCMP_ULE; IsSignaling = true; break; 12600 case 0x0b: Pred = FCmpInst::FCMP_FALSE; IsSignaling = false; break; 12601 case 0x0c: Pred = FCmpInst::FCMP_ONE; IsSignaling = false; break; 12602 case 0x0d: Pred = FCmpInst::FCMP_OGE; IsSignaling = true; break; 12603 case 0x0e: Pred = FCmpInst::FCMP_OGT; IsSignaling = true; break; 12604 case 0x0f: Pred = FCmpInst::FCMP_TRUE; IsSignaling = false; break; 12605 default: llvm_unreachable("Unhandled CC"); 12606 } 12607 12608 // Invert the signalling behavior for 16-31. 12609 if (CC & 0x10) 12610 IsSignaling = !IsSignaling; 12611 12612 // If the predicate is true or false and we're using constrained intrinsics, 12613 // we don't have a compare intrinsic we can use. Just use the legacy X86 12614 // specific intrinsic. 12615 if ((Pred == FCmpInst::FCMP_TRUE || Pred == FCmpInst::FCMP_FALSE) && 12616 Builder.getIsFPConstrained()) { 12617 12618 Intrinsic::ID IID; 12619 switch (BuiltinID) { 12620 default: llvm_unreachable("Unexpected builtin"); 12621 case X86::BI__builtin_ia32_cmpps: 12622 IID = Intrinsic::x86_sse_cmp_ps; 12623 break; 12624 case X86::BI__builtin_ia32_cmpps256: 12625 IID = Intrinsic::x86_avx_cmp_ps_256; 12626 break; 12627 case X86::BI__builtin_ia32_cmppd: 12628 IID = Intrinsic::x86_sse2_cmp_pd; 12629 break; 12630 case X86::BI__builtin_ia32_cmppd256: 12631 IID = Intrinsic::x86_avx_cmp_pd_256; 12632 break; 12633 case X86::BI__builtin_ia32_cmpps512_mask: 12634 IID = Intrinsic::x86_avx512_cmp_ps_512; 12635 break; 12636 case X86::BI__builtin_ia32_cmppd512_mask: 12637 IID = Intrinsic::x86_avx512_cmp_pd_512; 12638 break; 12639 case X86::BI__builtin_ia32_cmpps128_mask: 12640 IID = Intrinsic::x86_avx512_cmp_ps_128; 12641 break; 12642 case X86::BI__builtin_ia32_cmpps256_mask: 12643 IID = Intrinsic::x86_avx512_cmp_ps_256; 12644 break; 12645 case X86::BI__builtin_ia32_cmppd128_mask: 12646 IID = Intrinsic::x86_avx512_cmp_pd_128; 12647 break; 12648 case X86::BI__builtin_ia32_cmppd256_mask: 12649 IID = Intrinsic::x86_avx512_cmp_pd_256; 12650 break; 12651 } 12652 12653 Function *Intr = CGM.getIntrinsic(IID); 12654 if (Intr->getReturnType()->getVectorElementType()->isIntegerTy(1)) { 12655 unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); 12656 Value *MaskIn = Ops[3]; 12657 Ops.erase(&Ops[3]); 12658 12659 Value *Cmp = Builder.CreateCall(Intr, Ops); 12660 return EmitX86MaskedCompareResult(*this, Cmp, NumElts, MaskIn); 12661 } 12662 12663 return Builder.CreateCall(Intr, Ops); 12664 } 12665 12666 // Builtins without the _mask suffix return a vector of integers 12667 // of the same width as the input vectors 12668 switch (BuiltinID) { 12669 case X86::BI__builtin_ia32_cmpps512_mask: 12670 case X86::BI__builtin_ia32_cmppd512_mask: 12671 case X86::BI__builtin_ia32_cmpps128_mask: 12672 case X86::BI__builtin_ia32_cmpps256_mask: 12673 case X86::BI__builtin_ia32_cmppd128_mask: 12674 case X86::BI__builtin_ia32_cmppd256_mask: { 12675 // FIXME: Support SAE. 12676 unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); 12677 Value *Cmp; 12678 if (IsSignaling) 12679 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]); 12680 else 12681 Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]); 12682 return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]); 12683 } 12684 default: 12685 return getVectorFCmpIR(Pred, IsSignaling); 12686 } 12687 } 12688 12689 // SSE scalar comparison intrinsics 12690 case X86::BI__builtin_ia32_cmpeqss: 12691 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0); 12692 case X86::BI__builtin_ia32_cmpltss: 12693 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1); 12694 case X86::BI__builtin_ia32_cmpless: 12695 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2); 12696 case X86::BI__builtin_ia32_cmpunordss: 12697 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3); 12698 case X86::BI__builtin_ia32_cmpneqss: 12699 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4); 12700 case X86::BI__builtin_ia32_cmpnltss: 12701 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5); 12702 case X86::BI__builtin_ia32_cmpnless: 12703 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6); 12704 case X86::BI__builtin_ia32_cmpordss: 12705 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7); 12706 case X86::BI__builtin_ia32_cmpeqsd: 12707 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0); 12708 case X86::BI__builtin_ia32_cmpltsd: 12709 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1); 12710 case X86::BI__builtin_ia32_cmplesd: 12711 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2); 12712 case X86::BI__builtin_ia32_cmpunordsd: 12713 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3); 12714 case X86::BI__builtin_ia32_cmpneqsd: 12715 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4); 12716 case X86::BI__builtin_ia32_cmpnltsd: 12717 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5); 12718 case X86::BI__builtin_ia32_cmpnlesd: 12719 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6); 12720 case X86::BI__builtin_ia32_cmpordsd: 12721 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7); 12722 12723 // f16c half2float intrinsics 12724 case X86::BI__builtin_ia32_vcvtph2ps: 12725 case X86::BI__builtin_ia32_vcvtph2ps256: 12726 case X86::BI__builtin_ia32_vcvtph2ps_mask: 12727 case X86::BI__builtin_ia32_vcvtph2ps256_mask: 12728 case X86::BI__builtin_ia32_vcvtph2ps512_mask: 12729 return EmitX86CvtF16ToFloatExpr(*this, Ops, ConvertType(E->getType())); 12730 12731 // AVX512 bf16 intrinsics 12732 case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: { 12733 Ops[2] = getMaskVecValue(*this, Ops[2], 12734 Ops[0]->getType()->getVectorNumElements()); 12735 Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128; 12736 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops); 12737 } 12738 case X86::BI__builtin_ia32_cvtsbf162ss_32: 12739 return EmitX86CvtBF16ToFloatExpr(*this, E, Ops); 12740 12741 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask: 12742 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: { 12743 Intrinsic::ID IID; 12744 switch (BuiltinID) { 12745 default: llvm_unreachable("Unsupported intrinsic!"); 12746 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask: 12747 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_256; 12748 break; 12749 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: 12750 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_512; 12751 break; 12752 } 12753 Value *Res = Builder.CreateCall(CGM.getIntrinsic(IID), Ops[0]); 12754 return EmitX86Select(*this, Ops[2], Res, Ops[1]); 12755 } 12756 12757 case X86::BI__emul: 12758 case X86::BI__emulu: { 12759 llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64); 12760 bool isSigned = (BuiltinID == X86::BI__emul); 12761 Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned); 12762 Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned); 12763 return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned); 12764 } 12765 case X86::BI__mulh: 12766 case X86::BI__umulh: 12767 case X86::BI_mul128: 12768 case X86::BI_umul128: { 12769 llvm::Type *ResType = ConvertType(E->getType()); 12770 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); 12771 12772 bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128); 12773 Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned); 12774 Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned); 12775 12776 Value *MulResult, *HigherBits; 12777 if (IsSigned) { 12778 MulResult = Builder.CreateNSWMul(LHS, RHS); 12779 HigherBits = Builder.CreateAShr(MulResult, 64); 12780 } else { 12781 MulResult = Builder.CreateNUWMul(LHS, RHS); 12782 HigherBits = Builder.CreateLShr(MulResult, 64); 12783 } 12784 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned); 12785 12786 if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh) 12787 return HigherBits; 12788 12789 Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2)); 12790 Builder.CreateStore(HigherBits, HighBitsAddress); 12791 return Builder.CreateIntCast(MulResult, ResType, IsSigned); 12792 } 12793 12794 case X86::BI__faststorefence: { 12795 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, 12796 llvm::SyncScope::System); 12797 } 12798 case X86::BI__shiftleft128: 12799 case X86::BI__shiftright128: { 12800 // FIXME: Once fshl/fshr no longer add an unneeded and and cmov, do this: 12801 // llvm::Function *F = CGM.getIntrinsic( 12802 // BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr, 12803 // Int64Ty); 12804 // Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty); 12805 // return Builder.CreateCall(F, Ops); 12806 llvm::Type *Int128Ty = Builder.getInt128Ty(); 12807 Value *HighPart128 = 12808 Builder.CreateShl(Builder.CreateZExt(Ops[1], Int128Ty), 64); 12809 Value *LowPart128 = Builder.CreateZExt(Ops[0], Int128Ty); 12810 Value *Val = Builder.CreateOr(HighPart128, LowPart128); 12811 Value *Amt = Builder.CreateAnd(Builder.CreateZExt(Ops[2], Int128Ty), 12812 llvm::ConstantInt::get(Int128Ty, 0x3f)); 12813 Value *Res; 12814 if (BuiltinID == X86::BI__shiftleft128) 12815 Res = Builder.CreateLShr(Builder.CreateShl(Val, Amt), 64); 12816 else 12817 Res = Builder.CreateLShr(Val, Amt); 12818 return Builder.CreateTrunc(Res, Int64Ty); 12819 } 12820 case X86::BI_ReadWriteBarrier: 12821 case X86::BI_ReadBarrier: 12822 case X86::BI_WriteBarrier: { 12823 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, 12824 llvm::SyncScope::SingleThread); 12825 } 12826 case X86::BI_BitScanForward: 12827 case X86::BI_BitScanForward64: 12828 return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E); 12829 case X86::BI_BitScanReverse: 12830 case X86::BI_BitScanReverse64: 12831 return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E); 12832 12833 case X86::BI_InterlockedAnd64: 12834 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E); 12835 case X86::BI_InterlockedExchange64: 12836 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E); 12837 case X86::BI_InterlockedExchangeAdd64: 12838 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E); 12839 case X86::BI_InterlockedExchangeSub64: 12840 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E); 12841 case X86::BI_InterlockedOr64: 12842 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E); 12843 case X86::BI_InterlockedXor64: 12844 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E); 12845 case X86::BI_InterlockedDecrement64: 12846 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E); 12847 case X86::BI_InterlockedIncrement64: 12848 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E); 12849 case X86::BI_InterlockedCompareExchange128: { 12850 // InterlockedCompareExchange128 doesn't directly refer to 128bit ints, 12851 // instead it takes pointers to 64bit ints for Destination and 12852 // ComparandResult, and exchange is taken as two 64bit ints (high & low). 12853 // The previous value is written to ComparandResult, and success is 12854 // returned. 12855 12856 llvm::Type *Int128Ty = Builder.getInt128Ty(); 12857 llvm::Type *Int128PtrTy = Int128Ty->getPointerTo(); 12858 12859 Value *Destination = 12860 Builder.CreateBitCast(Ops[0], Int128PtrTy); 12861 Value *ExchangeHigh128 = Builder.CreateZExt(Ops[1], Int128Ty); 12862 Value *ExchangeLow128 = Builder.CreateZExt(Ops[2], Int128Ty); 12863 Address ComparandResult(Builder.CreateBitCast(Ops[3], Int128PtrTy), 12864 getContext().toCharUnitsFromBits(128)); 12865 12866 Value *Exchange = Builder.CreateOr( 12867 Builder.CreateShl(ExchangeHigh128, 64, "", false, false), 12868 ExchangeLow128); 12869 12870 Value *Comparand = Builder.CreateLoad(ComparandResult); 12871 12872 AtomicCmpXchgInst *CXI = 12873 Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange, 12874 AtomicOrdering::SequentiallyConsistent, 12875 AtomicOrdering::SequentiallyConsistent); 12876 CXI->setVolatile(true); 12877 12878 // Write the result back to the inout pointer. 12879 Builder.CreateStore(Builder.CreateExtractValue(CXI, 0), ComparandResult); 12880 12881 // Get the success boolean and zero extend it to i8. 12882 Value *Success = Builder.CreateExtractValue(CXI, 1); 12883 return Builder.CreateZExt(Success, ConvertType(E->getType())); 12884 } 12885 12886 case X86::BI_AddressOfReturnAddress: { 12887 Function *F = 12888 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy); 12889 return Builder.CreateCall(F); 12890 } 12891 case X86::BI__stosb: { 12892 // We treat __stosb as a volatile memset - it may not generate "rep stosb" 12893 // instruction, but it will create a memset that won't be optimized away. 12894 return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align(1), true); 12895 } 12896 case X86::BI__ud2: 12897 // llvm.trap makes a ud2a instruction on x86. 12898 return EmitTrapCall(Intrinsic::trap); 12899 case X86::BI__int2c: { 12900 // This syscall signals a driver assertion failure in x86 NT kernels. 12901 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false); 12902 llvm::InlineAsm *IA = 12903 llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*hasSideEffects=*/true); 12904 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get( 12905 getLLVMContext(), llvm::AttributeList::FunctionIndex, 12906 llvm::Attribute::NoReturn); 12907 llvm::CallInst *CI = Builder.CreateCall(IA); 12908 CI->setAttributes(NoReturnAttr); 12909 return CI; 12910 } 12911 case X86::BI__readfsbyte: 12912 case X86::BI__readfsword: 12913 case X86::BI__readfsdword: 12914 case X86::BI__readfsqword: { 12915 llvm::Type *IntTy = ConvertType(E->getType()); 12916 Value *Ptr = 12917 Builder.CreateIntToPtr(Ops[0], llvm::PointerType::get(IntTy, 257)); 12918 LoadInst *Load = Builder.CreateAlignedLoad( 12919 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType())); 12920 Load->setVolatile(true); 12921 return Load; 12922 } 12923 case X86::BI__readgsbyte: 12924 case X86::BI__readgsword: 12925 case X86::BI__readgsdword: 12926 case X86::BI__readgsqword: { 12927 llvm::Type *IntTy = ConvertType(E->getType()); 12928 Value *Ptr = 12929 Builder.CreateIntToPtr(Ops[0], llvm::PointerType::get(IntTy, 256)); 12930 LoadInst *Load = Builder.CreateAlignedLoad( 12931 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType())); 12932 Load->setVolatile(true); 12933 return Load; 12934 } 12935 case X86::BI__builtin_ia32_paddsb512: 12936 case X86::BI__builtin_ia32_paddsw512: 12937 case X86::BI__builtin_ia32_paddsb256: 12938 case X86::BI__builtin_ia32_paddsw256: 12939 case X86::BI__builtin_ia32_paddsb128: 12940 case X86::BI__builtin_ia32_paddsw128: 12941 return EmitX86AddSubSatExpr(*this, Ops, true, true); 12942 case X86::BI__builtin_ia32_paddusb512: 12943 case X86::BI__builtin_ia32_paddusw512: 12944 case X86::BI__builtin_ia32_paddusb256: 12945 case X86::BI__builtin_ia32_paddusw256: 12946 case X86::BI__builtin_ia32_paddusb128: 12947 case X86::BI__builtin_ia32_paddusw128: 12948 return EmitX86AddSubSatExpr(*this, Ops, false, true); 12949 case X86::BI__builtin_ia32_psubsb512: 12950 case X86::BI__builtin_ia32_psubsw512: 12951 case X86::BI__builtin_ia32_psubsb256: 12952 case X86::BI__builtin_ia32_psubsw256: 12953 case X86::BI__builtin_ia32_psubsb128: 12954 case X86::BI__builtin_ia32_psubsw128: 12955 return EmitX86AddSubSatExpr(*this, Ops, true, false); 12956 case X86::BI__builtin_ia32_psubusb512: 12957 case X86::BI__builtin_ia32_psubusw512: 12958 case X86::BI__builtin_ia32_psubusb256: 12959 case X86::BI__builtin_ia32_psubusw256: 12960 case X86::BI__builtin_ia32_psubusb128: 12961 case X86::BI__builtin_ia32_psubusw128: 12962 return EmitX86AddSubSatExpr(*this, Ops, false, false); 12963 } 12964 } 12965 12966 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, 12967 const CallExpr *E) { 12968 SmallVector<Value*, 4> Ops; 12969 12970 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) 12971 Ops.push_back(EmitScalarExpr(E->getArg(i))); 12972 12973 Intrinsic::ID ID = Intrinsic::not_intrinsic; 12974 12975 switch (BuiltinID) { 12976 default: return nullptr; 12977 12978 // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we 12979 // call __builtin_readcyclecounter. 12980 case PPC::BI__builtin_ppc_get_timebase: 12981 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter)); 12982 12983 // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr 12984 case PPC::BI__builtin_altivec_lvx: 12985 case PPC::BI__builtin_altivec_lvxl: 12986 case PPC::BI__builtin_altivec_lvebx: 12987 case PPC::BI__builtin_altivec_lvehx: 12988 case PPC::BI__builtin_altivec_lvewx: 12989 case PPC::BI__builtin_altivec_lvsl: 12990 case PPC::BI__builtin_altivec_lvsr: 12991 case PPC::BI__builtin_vsx_lxvd2x: 12992 case PPC::BI__builtin_vsx_lxvw4x: 12993 case PPC::BI__builtin_vsx_lxvd2x_be: 12994 case PPC::BI__builtin_vsx_lxvw4x_be: 12995 case PPC::BI__builtin_vsx_lxvl: 12996 case PPC::BI__builtin_vsx_lxvll: 12997 { 12998 if(BuiltinID == PPC::BI__builtin_vsx_lxvl || 12999 BuiltinID == PPC::BI__builtin_vsx_lxvll){ 13000 Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy); 13001 }else { 13002 Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy); 13003 Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]); 13004 Ops.pop_back(); 13005 } 13006 13007 switch (BuiltinID) { 13008 default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!"); 13009 case PPC::BI__builtin_altivec_lvx: 13010 ID = Intrinsic::ppc_altivec_lvx; 13011 break; 13012 case PPC::BI__builtin_altivec_lvxl: 13013 ID = Intrinsic::ppc_altivec_lvxl; 13014 break; 13015 case PPC::BI__builtin_altivec_lvebx: 13016 ID = Intrinsic::ppc_altivec_lvebx; 13017 break; 13018 case PPC::BI__builtin_altivec_lvehx: 13019 ID = Intrinsic::ppc_altivec_lvehx; 13020 break; 13021 case PPC::BI__builtin_altivec_lvewx: 13022 ID = Intrinsic::ppc_altivec_lvewx; 13023 break; 13024 case PPC::BI__builtin_altivec_lvsl: 13025 ID = Intrinsic::ppc_altivec_lvsl; 13026 break; 13027 case PPC::BI__builtin_altivec_lvsr: 13028 ID = Intrinsic::ppc_altivec_lvsr; 13029 break; 13030 case PPC::BI__builtin_vsx_lxvd2x: 13031 ID = Intrinsic::ppc_vsx_lxvd2x; 13032 break; 13033 case PPC::BI__builtin_vsx_lxvw4x: 13034 ID = Intrinsic::ppc_vsx_lxvw4x; 13035 break; 13036 case PPC::BI__builtin_vsx_lxvd2x_be: 13037 ID = Intrinsic::ppc_vsx_lxvd2x_be; 13038 break; 13039 case PPC::BI__builtin_vsx_lxvw4x_be: 13040 ID = Intrinsic::ppc_vsx_lxvw4x_be; 13041 break; 13042 case PPC::BI__builtin_vsx_lxvl: 13043 ID = Intrinsic::ppc_vsx_lxvl; 13044 break; 13045 case PPC::BI__builtin_vsx_lxvll: 13046 ID = Intrinsic::ppc_vsx_lxvll; 13047 break; 13048 } 13049 llvm::Function *F = CGM.getIntrinsic(ID); 13050 return Builder.CreateCall(F, Ops, ""); 13051 } 13052 13053 // vec_st, vec_xst_be 13054 case PPC::BI__builtin_altivec_stvx: 13055 case PPC::BI__builtin_altivec_stvxl: 13056 case PPC::BI__builtin_altivec_stvebx: 13057 case PPC::BI__builtin_altivec_stvehx: 13058 case PPC::BI__builtin_altivec_stvewx: 13059 case PPC::BI__builtin_vsx_stxvd2x: 13060 case PPC::BI__builtin_vsx_stxvw4x: 13061 case PPC::BI__builtin_vsx_stxvd2x_be: 13062 case PPC::BI__builtin_vsx_stxvw4x_be: 13063 case PPC::BI__builtin_vsx_stxvl: 13064 case PPC::BI__builtin_vsx_stxvll: 13065 { 13066 if(BuiltinID == PPC::BI__builtin_vsx_stxvl || 13067 BuiltinID == PPC::BI__builtin_vsx_stxvll ){ 13068 Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy); 13069 }else { 13070 Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy); 13071 Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]); 13072 Ops.pop_back(); 13073 } 13074 13075 switch (BuiltinID) { 13076 default: llvm_unreachable("Unsupported st intrinsic!"); 13077 case PPC::BI__builtin_altivec_stvx: 13078 ID = Intrinsic::ppc_altivec_stvx; 13079 break; 13080 case PPC::BI__builtin_altivec_stvxl: 13081 ID = Intrinsic::ppc_altivec_stvxl; 13082 break; 13083 case PPC::BI__builtin_altivec_stvebx: 13084 ID = Intrinsic::ppc_altivec_stvebx; 13085 break; 13086 case PPC::BI__builtin_altivec_stvehx: 13087 ID = Intrinsic::ppc_altivec_stvehx; 13088 break; 13089 case PPC::BI__builtin_altivec_stvewx: 13090 ID = Intrinsic::ppc_altivec_stvewx; 13091 break; 13092 case PPC::BI__builtin_vsx_stxvd2x: 13093 ID = Intrinsic::ppc_vsx_stxvd2x; 13094 break; 13095 case PPC::BI__builtin_vsx_stxvw4x: 13096 ID = Intrinsic::ppc_vsx_stxvw4x; 13097 break; 13098 case PPC::BI__builtin_vsx_stxvd2x_be: 13099 ID = Intrinsic::ppc_vsx_stxvd2x_be; 13100 break; 13101 case PPC::BI__builtin_vsx_stxvw4x_be: 13102 ID = Intrinsic::ppc_vsx_stxvw4x_be; 13103 break; 13104 case PPC::BI__builtin_vsx_stxvl: 13105 ID = Intrinsic::ppc_vsx_stxvl; 13106 break; 13107 case PPC::BI__builtin_vsx_stxvll: 13108 ID = Intrinsic::ppc_vsx_stxvll; 13109 break; 13110 } 13111 llvm::Function *F = CGM.getIntrinsic(ID); 13112 return Builder.CreateCall(F, Ops, ""); 13113 } 13114 // Square root 13115 case PPC::BI__builtin_vsx_xvsqrtsp: 13116 case PPC::BI__builtin_vsx_xvsqrtdp: { 13117 llvm::Type *ResultType = ConvertType(E->getType()); 13118 Value *X = EmitScalarExpr(E->getArg(0)); 13119 ID = Intrinsic::sqrt; 13120 llvm::Function *F = CGM.getIntrinsic(ID, ResultType); 13121 return Builder.CreateCall(F, X); 13122 } 13123 // Count leading zeros 13124 case PPC::BI__builtin_altivec_vclzb: 13125 case PPC::BI__builtin_altivec_vclzh: 13126 case PPC::BI__builtin_altivec_vclzw: 13127 case PPC::BI__builtin_altivec_vclzd: { 13128 llvm::Type *ResultType = ConvertType(E->getType()); 13129 Value *X = EmitScalarExpr(E->getArg(0)); 13130 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 13131 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType); 13132 return Builder.CreateCall(F, {X, Undef}); 13133 } 13134 case PPC::BI__builtin_altivec_vctzb: 13135 case PPC::BI__builtin_altivec_vctzh: 13136 case PPC::BI__builtin_altivec_vctzw: 13137 case PPC::BI__builtin_altivec_vctzd: { 13138 llvm::Type *ResultType = ConvertType(E->getType()); 13139 Value *X = EmitScalarExpr(E->getArg(0)); 13140 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 13141 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType); 13142 return Builder.CreateCall(F, {X, Undef}); 13143 } 13144 case PPC::BI__builtin_altivec_vpopcntb: 13145 case PPC::BI__builtin_altivec_vpopcnth: 13146 case PPC::BI__builtin_altivec_vpopcntw: 13147 case PPC::BI__builtin_altivec_vpopcntd: { 13148 llvm::Type *ResultType = ConvertType(E->getType()); 13149 Value *X = EmitScalarExpr(E->getArg(0)); 13150 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); 13151 return Builder.CreateCall(F, X); 13152 } 13153 // Copy sign 13154 case PPC::BI__builtin_vsx_xvcpsgnsp: 13155 case PPC::BI__builtin_vsx_xvcpsgndp: { 13156 llvm::Type *ResultType = ConvertType(E->getType()); 13157 Value *X = EmitScalarExpr(E->getArg(0)); 13158 Value *Y = EmitScalarExpr(E->getArg(1)); 13159 ID = Intrinsic::copysign; 13160 llvm::Function *F = CGM.getIntrinsic(ID, ResultType); 13161 return Builder.CreateCall(F, {X, Y}); 13162 } 13163 // Rounding/truncation 13164 case PPC::BI__builtin_vsx_xvrspip: 13165 case PPC::BI__builtin_vsx_xvrdpip: 13166 case PPC::BI__builtin_vsx_xvrdpim: 13167 case PPC::BI__builtin_vsx_xvrspim: 13168 case PPC::BI__builtin_vsx_xvrdpi: 13169 case PPC::BI__builtin_vsx_xvrspi: 13170 case PPC::BI__builtin_vsx_xvrdpic: 13171 case PPC::BI__builtin_vsx_xvrspic: 13172 case PPC::BI__builtin_vsx_xvrdpiz: 13173 case PPC::BI__builtin_vsx_xvrspiz: { 13174 llvm::Type *ResultType = ConvertType(E->getType()); 13175 Value *X = EmitScalarExpr(E->getArg(0)); 13176 if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim || 13177 BuiltinID == PPC::BI__builtin_vsx_xvrspim) 13178 ID = Intrinsic::floor; 13179 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi || 13180 BuiltinID == PPC::BI__builtin_vsx_xvrspi) 13181 ID = Intrinsic::round; 13182 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic || 13183 BuiltinID == PPC::BI__builtin_vsx_xvrspic) 13184 ID = Intrinsic::nearbyint; 13185 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip || 13186 BuiltinID == PPC::BI__builtin_vsx_xvrspip) 13187 ID = Intrinsic::ceil; 13188 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz || 13189 BuiltinID == PPC::BI__builtin_vsx_xvrspiz) 13190 ID = Intrinsic::trunc; 13191 llvm::Function *F = CGM.getIntrinsic(ID, ResultType); 13192 return Builder.CreateCall(F, X); 13193 } 13194 13195 // Absolute value 13196 case PPC::BI__builtin_vsx_xvabsdp: 13197 case PPC::BI__builtin_vsx_xvabssp: { 13198 llvm::Type *ResultType = ConvertType(E->getType()); 13199 Value *X = EmitScalarExpr(E->getArg(0)); 13200 llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); 13201 return Builder.CreateCall(F, X); 13202 } 13203 13204 // FMA variations 13205 case PPC::BI__builtin_vsx_xvmaddadp: 13206 case PPC::BI__builtin_vsx_xvmaddasp: 13207 case PPC::BI__builtin_vsx_xvnmaddadp: 13208 case PPC::BI__builtin_vsx_xvnmaddasp: 13209 case PPC::BI__builtin_vsx_xvmsubadp: 13210 case PPC::BI__builtin_vsx_xvmsubasp: 13211 case PPC::BI__builtin_vsx_xvnmsubadp: 13212 case PPC::BI__builtin_vsx_xvnmsubasp: { 13213 llvm::Type *ResultType = ConvertType(E->getType()); 13214 Value *X = EmitScalarExpr(E->getArg(0)); 13215 Value *Y = EmitScalarExpr(E->getArg(1)); 13216 Value *Z = EmitScalarExpr(E->getArg(2)); 13217 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); 13218 llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 13219 switch (BuiltinID) { 13220 case PPC::BI__builtin_vsx_xvmaddadp: 13221 case PPC::BI__builtin_vsx_xvmaddasp: 13222 return Builder.CreateCall(F, {X, Y, Z}); 13223 case PPC::BI__builtin_vsx_xvnmaddadp: 13224 case PPC::BI__builtin_vsx_xvnmaddasp: 13225 return Builder.CreateFSub(Zero, 13226 Builder.CreateCall(F, {X, Y, Z}), "sub"); 13227 case PPC::BI__builtin_vsx_xvmsubadp: 13228 case PPC::BI__builtin_vsx_xvmsubasp: 13229 return Builder.CreateCall(F, 13230 {X, Y, Builder.CreateFSub(Zero, Z, "sub")}); 13231 case PPC::BI__builtin_vsx_xvnmsubadp: 13232 case PPC::BI__builtin_vsx_xvnmsubasp: 13233 Value *FsubRes = 13234 Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")}); 13235 return Builder.CreateFSub(Zero, FsubRes, "sub"); 13236 } 13237 llvm_unreachable("Unknown FMA operation"); 13238 return nullptr; // Suppress no-return warning 13239 } 13240 13241 case PPC::BI__builtin_vsx_insertword: { 13242 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw); 13243 13244 // Third argument is a compile time constant int. It must be clamped to 13245 // to the range [0, 12]. 13246 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]); 13247 assert(ArgCI && 13248 "Third arg to xxinsertw intrinsic must be constant integer"); 13249 const int64_t MaxIndex = 12; 13250 int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex); 13251 13252 // The builtin semantics don't exactly match the xxinsertw instructions 13253 // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the 13254 // word from the first argument, and inserts it in the second argument. The 13255 // instruction extracts the word from its second input register and inserts 13256 // it into its first input register, so swap the first and second arguments. 13257 std::swap(Ops[0], Ops[1]); 13258 13259 // Need to cast the second argument from a vector of unsigned int to a 13260 // vector of long long. 13261 Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2)); 13262 13263 if (getTarget().isLittleEndian()) { 13264 // Create a shuffle mask of (1, 0) 13265 Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1), 13266 ConstantInt::get(Int32Ty, 0) 13267 }; 13268 Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); 13269 13270 // Reverse the double words in the vector we will extract from. 13271 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); 13272 Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ShuffleMask); 13273 13274 // Reverse the index. 13275 Index = MaxIndex - Index; 13276 } 13277 13278 // Intrinsic expects the first arg to be a vector of int. 13279 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4)); 13280 Ops[2] = ConstantInt::getSigned(Int32Ty, Index); 13281 return Builder.CreateCall(F, Ops); 13282 } 13283 13284 case PPC::BI__builtin_vsx_extractuword: { 13285 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw); 13286 13287 // Intrinsic expects the first argument to be a vector of doublewords. 13288 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); 13289 13290 // The second argument is a compile time constant int that needs to 13291 // be clamped to the range [0, 12]. 13292 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[1]); 13293 assert(ArgCI && 13294 "Second Arg to xxextractuw intrinsic must be a constant integer!"); 13295 const int64_t MaxIndex = 12; 13296 int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex); 13297 13298 if (getTarget().isLittleEndian()) { 13299 // Reverse the index. 13300 Index = MaxIndex - Index; 13301 Ops[1] = ConstantInt::getSigned(Int32Ty, Index); 13302 13303 // Emit the call, then reverse the double words of the results vector. 13304 Value *Call = Builder.CreateCall(F, Ops); 13305 13306 // Create a shuffle mask of (1, 0) 13307 Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1), 13308 ConstantInt::get(Int32Ty, 0) 13309 }; 13310 Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); 13311 13312 Value *ShuffleCall = Builder.CreateShuffleVector(Call, Call, ShuffleMask); 13313 return ShuffleCall; 13314 } else { 13315 Ops[1] = ConstantInt::getSigned(Int32Ty, Index); 13316 return Builder.CreateCall(F, Ops); 13317 } 13318 } 13319 13320 case PPC::BI__builtin_vsx_xxpermdi: { 13321 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]); 13322 assert(ArgCI && "Third arg must be constant integer!"); 13323 13324 unsigned Index = ArgCI->getZExtValue(); 13325 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); 13326 Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2)); 13327 13328 // Account for endianness by treating this as just a shuffle. So we use the 13329 // same indices for both LE and BE in order to produce expected results in 13330 // both cases. 13331 unsigned ElemIdx0 = (Index & 2) >> 1; 13332 unsigned ElemIdx1 = 2 + (Index & 1); 13333 13334 Constant *ShuffleElts[2] = {ConstantInt::get(Int32Ty, ElemIdx0), 13335 ConstantInt::get(Int32Ty, ElemIdx1)}; 13336 Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); 13337 13338 Value *ShuffleCall = 13339 Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask); 13340 QualType BIRetType = E->getType(); 13341 auto RetTy = ConvertType(BIRetType); 13342 return Builder.CreateBitCast(ShuffleCall, RetTy); 13343 } 13344 13345 case PPC::BI__builtin_vsx_xxsldwi: { 13346 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]); 13347 assert(ArgCI && "Third argument must be a compile time constant"); 13348 unsigned Index = ArgCI->getZExtValue() & 0x3; 13349 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4)); 13350 Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int32Ty, 4)); 13351 13352 // Create a shuffle mask 13353 unsigned ElemIdx0; 13354 unsigned ElemIdx1; 13355 unsigned ElemIdx2; 13356 unsigned ElemIdx3; 13357 if (getTarget().isLittleEndian()) { 13358 // Little endian element N comes from element 8+N-Index of the 13359 // concatenated wide vector (of course, using modulo arithmetic on 13360 // the total number of elements). 13361 ElemIdx0 = (8 - Index) % 8; 13362 ElemIdx1 = (9 - Index) % 8; 13363 ElemIdx2 = (10 - Index) % 8; 13364 ElemIdx3 = (11 - Index) % 8; 13365 } else { 13366 // Big endian ElemIdx<N> = Index + N 13367 ElemIdx0 = Index; 13368 ElemIdx1 = Index + 1; 13369 ElemIdx2 = Index + 2; 13370 ElemIdx3 = Index + 3; 13371 } 13372 13373 Constant *ShuffleElts[4] = {ConstantInt::get(Int32Ty, ElemIdx0), 13374 ConstantInt::get(Int32Ty, ElemIdx1), 13375 ConstantInt::get(Int32Ty, ElemIdx2), 13376 ConstantInt::get(Int32Ty, ElemIdx3)}; 13377 13378 Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); 13379 Value *ShuffleCall = 13380 Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask); 13381 QualType BIRetType = E->getType(); 13382 auto RetTy = ConvertType(BIRetType); 13383 return Builder.CreateBitCast(ShuffleCall, RetTy); 13384 } 13385 13386 case PPC::BI__builtin_pack_vector_int128: { 13387 bool isLittleEndian = getTarget().isLittleEndian(); 13388 Value *UndefValue = 13389 llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), 2)); 13390 Value *Res = Builder.CreateInsertElement( 13391 UndefValue, Ops[0], (uint64_t)(isLittleEndian ? 1 : 0)); 13392 Res = Builder.CreateInsertElement(Res, Ops[1], 13393 (uint64_t)(isLittleEndian ? 0 : 1)); 13394 return Builder.CreateBitCast(Res, ConvertType(E->getType())); 13395 } 13396 13397 case PPC::BI__builtin_unpack_vector_int128: { 13398 ConstantInt *Index = cast<ConstantInt>(Ops[1]); 13399 Value *Unpacked = Builder.CreateBitCast( 13400 Ops[0], llvm::VectorType::get(ConvertType(E->getType()), 2)); 13401 13402 if (getTarget().isLittleEndian()) 13403 Index = ConstantInt::get(Index->getType(), 1 - Index->getZExtValue()); 13404 13405 return Builder.CreateExtractElement(Unpacked, Index); 13406 } 13407 } 13408 } 13409 13410 namespace { 13411 // If \p E is not null pointer, insert address space cast to match return 13412 // type of \p E if necessary. 13413 Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF, 13414 const CallExpr *E = nullptr) { 13415 auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr); 13416 auto *Call = CGF.Builder.CreateCall(F); 13417 Call->addAttribute( 13418 AttributeList::ReturnIndex, 13419 Attribute::getWithDereferenceableBytes(Call->getContext(), 64)); 13420 Call->addAttribute(AttributeList::ReturnIndex, 13421 Attribute::getWithAlignment(Call->getContext(), Align(4))); 13422 if (!E) 13423 return Call; 13424 QualType BuiltinRetType = E->getType(); 13425 auto *RetTy = cast<llvm::PointerType>(CGF.ConvertType(BuiltinRetType)); 13426 if (RetTy == Call->getType()) 13427 return Call; 13428 return CGF.Builder.CreateAddrSpaceCast(Call, RetTy); 13429 } 13430 13431 // \p Index is 0, 1, and 2 for x, y, and z dimension, respectively. 13432 Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) { 13433 const unsigned XOffset = 4; 13434 auto *DP = EmitAMDGPUDispatchPtr(CGF); 13435 // Indexing the HSA kernel_dispatch_packet struct. 13436 auto *Offset = llvm::ConstantInt::get(CGF.Int32Ty, XOffset + Index * 2); 13437 auto *GEP = CGF.Builder.CreateGEP(DP, Offset); 13438 auto *DstTy = 13439 CGF.Int16Ty->getPointerTo(GEP->getType()->getPointerAddressSpace()); 13440 auto *Cast = CGF.Builder.CreateBitCast(GEP, DstTy); 13441 auto *LD = CGF.Builder.CreateLoad(Address(Cast, CharUnits::fromQuantity(2))); 13442 llvm::MDBuilder MDHelper(CGF.getLLVMContext()); 13443 llvm::MDNode *RNode = MDHelper.createRange(APInt(16, 1), 13444 APInt(16, CGF.getTarget().getMaxOpenCLWorkGroupSize() + 1)); 13445 LD->setMetadata(llvm::LLVMContext::MD_range, RNode); 13446 LD->setMetadata(llvm::LLVMContext::MD_invariant_load, 13447 llvm::MDNode::get(CGF.getLLVMContext(), None)); 13448 return LD; 13449 } 13450 } // namespace 13451 13452 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, 13453 const CallExpr *E) { 13454 switch (BuiltinID) { 13455 case AMDGPU::BI__builtin_amdgcn_div_scale: 13456 case AMDGPU::BI__builtin_amdgcn_div_scalef: { 13457 // Translate from the intrinsics's struct return to the builtin's out 13458 // argument. 13459 13460 Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3)); 13461 13462 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 13463 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 13464 llvm::Value *Z = EmitScalarExpr(E->getArg(2)); 13465 13466 llvm::Function *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale, 13467 X->getType()); 13468 13469 llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z}); 13470 13471 llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0); 13472 llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1); 13473 13474 llvm::Type *RealFlagType 13475 = FlagOutPtr.getPointer()->getType()->getPointerElementType(); 13476 13477 llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType); 13478 Builder.CreateStore(FlagExt, FlagOutPtr); 13479 return Result; 13480 } 13481 case AMDGPU::BI__builtin_amdgcn_div_fmas: 13482 case AMDGPU::BI__builtin_amdgcn_div_fmasf: { 13483 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); 13484 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); 13485 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); 13486 llvm::Value *Src3 = EmitScalarExpr(E->getArg(3)); 13487 13488 llvm::Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas, 13489 Src0->getType()); 13490 llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3); 13491 return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool}); 13492 } 13493 13494 case AMDGPU::BI__builtin_amdgcn_ds_swizzle: 13495 return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle); 13496 case AMDGPU::BI__builtin_amdgcn_mov_dpp8: 13497 return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_mov_dpp8); 13498 case AMDGPU::BI__builtin_amdgcn_mov_dpp: 13499 case AMDGPU::BI__builtin_amdgcn_update_dpp: { 13500 llvm::SmallVector<llvm::Value *, 6> Args; 13501 for (unsigned I = 0; I != E->getNumArgs(); ++I) 13502 Args.push_back(EmitScalarExpr(E->getArg(I))); 13503 assert(Args.size() == 5 || Args.size() == 6); 13504 if (Args.size() == 5) 13505 Args.insert(Args.begin(), llvm::UndefValue::get(Args[0]->getType())); 13506 Function *F = 13507 CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType()); 13508 return Builder.CreateCall(F, Args); 13509 } 13510 case AMDGPU::BI__builtin_amdgcn_div_fixup: 13511 case AMDGPU::BI__builtin_amdgcn_div_fixupf: 13512 case AMDGPU::BI__builtin_amdgcn_div_fixuph: 13513 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup); 13514 case AMDGPU::BI__builtin_amdgcn_trig_preop: 13515 case AMDGPU::BI__builtin_amdgcn_trig_preopf: 13516 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop); 13517 case AMDGPU::BI__builtin_amdgcn_rcp: 13518 case AMDGPU::BI__builtin_amdgcn_rcpf: 13519 case AMDGPU::BI__builtin_amdgcn_rcph: 13520 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp); 13521 case AMDGPU::BI__builtin_amdgcn_rsq: 13522 case AMDGPU::BI__builtin_amdgcn_rsqf: 13523 case AMDGPU::BI__builtin_amdgcn_rsqh: 13524 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq); 13525 case AMDGPU::BI__builtin_amdgcn_rsq_clamp: 13526 case AMDGPU::BI__builtin_amdgcn_rsq_clampf: 13527 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp); 13528 case AMDGPU::BI__builtin_amdgcn_sinf: 13529 case AMDGPU::BI__builtin_amdgcn_sinh: 13530 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin); 13531 case AMDGPU::BI__builtin_amdgcn_cosf: 13532 case AMDGPU::BI__builtin_amdgcn_cosh: 13533 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos); 13534 case AMDGPU::BI__builtin_amdgcn_dispatch_ptr: 13535 return EmitAMDGPUDispatchPtr(*this, E); 13536 case AMDGPU::BI__builtin_amdgcn_log_clampf: 13537 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp); 13538 case AMDGPU::BI__builtin_amdgcn_ldexp: 13539 case AMDGPU::BI__builtin_amdgcn_ldexpf: 13540 case AMDGPU::BI__builtin_amdgcn_ldexph: 13541 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp); 13542 case AMDGPU::BI__builtin_amdgcn_frexp_mant: 13543 case AMDGPU::BI__builtin_amdgcn_frexp_mantf: 13544 case AMDGPU::BI__builtin_amdgcn_frexp_manth: 13545 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant); 13546 case AMDGPU::BI__builtin_amdgcn_frexp_exp: 13547 case AMDGPU::BI__builtin_amdgcn_frexp_expf: { 13548 Value *Src0 = EmitScalarExpr(E->getArg(0)); 13549 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp, 13550 { Builder.getInt32Ty(), Src0->getType() }); 13551 return Builder.CreateCall(F, Src0); 13552 } 13553 case AMDGPU::BI__builtin_amdgcn_frexp_exph: { 13554 Value *Src0 = EmitScalarExpr(E->getArg(0)); 13555 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp, 13556 { Builder.getInt16Ty(), Src0->getType() }); 13557 return Builder.CreateCall(F, Src0); 13558 } 13559 case AMDGPU::BI__builtin_amdgcn_fract: 13560 case AMDGPU::BI__builtin_amdgcn_fractf: 13561 case AMDGPU::BI__builtin_amdgcn_fracth: 13562 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract); 13563 case AMDGPU::BI__builtin_amdgcn_lerp: 13564 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp); 13565 case AMDGPU::BI__builtin_amdgcn_ubfe: 13566 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_ubfe); 13567 case AMDGPU::BI__builtin_amdgcn_sbfe: 13568 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_sbfe); 13569 case AMDGPU::BI__builtin_amdgcn_uicmp: 13570 case AMDGPU::BI__builtin_amdgcn_uicmpl: 13571 case AMDGPU::BI__builtin_amdgcn_sicmp: 13572 case AMDGPU::BI__builtin_amdgcn_sicmpl: { 13573 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); 13574 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); 13575 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); 13576 13577 // FIXME-GFX10: How should 32 bit mask be handled? 13578 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_icmp, 13579 { Builder.getInt64Ty(), Src0->getType() }); 13580 return Builder.CreateCall(F, { Src0, Src1, Src2 }); 13581 } 13582 case AMDGPU::BI__builtin_amdgcn_fcmp: 13583 case AMDGPU::BI__builtin_amdgcn_fcmpf: { 13584 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); 13585 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); 13586 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); 13587 13588 // FIXME-GFX10: How should 32 bit mask be handled? 13589 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_fcmp, 13590 { Builder.getInt64Ty(), Src0->getType() }); 13591 return Builder.CreateCall(F, { Src0, Src1, Src2 }); 13592 } 13593 case AMDGPU::BI__builtin_amdgcn_class: 13594 case AMDGPU::BI__builtin_amdgcn_classf: 13595 case AMDGPU::BI__builtin_amdgcn_classh: 13596 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class); 13597 case AMDGPU::BI__builtin_amdgcn_fmed3f: 13598 case AMDGPU::BI__builtin_amdgcn_fmed3h: 13599 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3); 13600 case AMDGPU::BI__builtin_amdgcn_ds_append: 13601 case AMDGPU::BI__builtin_amdgcn_ds_consume: { 13602 Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ? 13603 Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume; 13604 Value *Src0 = EmitScalarExpr(E->getArg(0)); 13605 Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() }); 13606 return Builder.CreateCall(F, { Src0, Builder.getFalse() }); 13607 } 13608 case AMDGPU::BI__builtin_amdgcn_read_exec: { 13609 CallInst *CI = cast<CallInst>( 13610 EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec")); 13611 CI->setConvergent(); 13612 return CI; 13613 } 13614 case AMDGPU::BI__builtin_amdgcn_read_exec_lo: 13615 case AMDGPU::BI__builtin_amdgcn_read_exec_hi: { 13616 StringRef RegName = BuiltinID == AMDGPU::BI__builtin_amdgcn_read_exec_lo ? 13617 "exec_lo" : "exec_hi"; 13618 CallInst *CI = cast<CallInst>( 13619 EmitSpecialRegisterBuiltin(*this, E, Int32Ty, Int32Ty, true, RegName)); 13620 CI->setConvergent(); 13621 return CI; 13622 } 13623 // amdgcn workitem 13624 case AMDGPU::BI__builtin_amdgcn_workitem_id_x: 13625 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024); 13626 case AMDGPU::BI__builtin_amdgcn_workitem_id_y: 13627 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024); 13628 case AMDGPU::BI__builtin_amdgcn_workitem_id_z: 13629 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024); 13630 13631 // amdgcn workgroup size 13632 case AMDGPU::BI__builtin_amdgcn_workgroup_size_x: 13633 return EmitAMDGPUWorkGroupSize(*this, 0); 13634 case AMDGPU::BI__builtin_amdgcn_workgroup_size_y: 13635 return EmitAMDGPUWorkGroupSize(*this, 1); 13636 case AMDGPU::BI__builtin_amdgcn_workgroup_size_z: 13637 return EmitAMDGPUWorkGroupSize(*this, 2); 13638 13639 // r600 intrinsics 13640 case AMDGPU::BI__builtin_r600_recipsqrt_ieee: 13641 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef: 13642 return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee); 13643 case AMDGPU::BI__builtin_r600_read_tidig_x: 13644 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024); 13645 case AMDGPU::BI__builtin_r600_read_tidig_y: 13646 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024); 13647 case AMDGPU::BI__builtin_r600_read_tidig_z: 13648 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024); 13649 case AMDGPU::BI__builtin_amdgcn_alignbit: { 13650 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); 13651 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); 13652 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); 13653 Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType()); 13654 return Builder.CreateCall(F, { Src0, Src1, Src2 }); 13655 } 13656 default: 13657 return nullptr; 13658 } 13659 } 13660 13661 /// Handle a SystemZ function in which the final argument is a pointer 13662 /// to an int that receives the post-instruction CC value. At the LLVM level 13663 /// this is represented as a function that returns a {result, cc} pair. 13664 static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF, 13665 unsigned IntrinsicID, 13666 const CallExpr *E) { 13667 unsigned NumArgs = E->getNumArgs() - 1; 13668 SmallVector<Value *, 8> Args(NumArgs); 13669 for (unsigned I = 0; I < NumArgs; ++I) 13670 Args[I] = CGF.EmitScalarExpr(E->getArg(I)); 13671 Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs)); 13672 Function *F = CGF.CGM.getIntrinsic(IntrinsicID); 13673 Value *Call = CGF.Builder.CreateCall(F, Args); 13674 Value *CC = CGF.Builder.CreateExtractValue(Call, 1); 13675 CGF.Builder.CreateStore(CC, CCPtr); 13676 return CGF.Builder.CreateExtractValue(Call, 0); 13677 } 13678 13679 Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, 13680 const CallExpr *E) { 13681 switch (BuiltinID) { 13682 case SystemZ::BI__builtin_tbegin: { 13683 Value *TDB = EmitScalarExpr(E->getArg(0)); 13684 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c); 13685 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin); 13686 return Builder.CreateCall(F, {TDB, Control}); 13687 } 13688 case SystemZ::BI__builtin_tbegin_nofloat: { 13689 Value *TDB = EmitScalarExpr(E->getArg(0)); 13690 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c); 13691 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat); 13692 return Builder.CreateCall(F, {TDB, Control}); 13693 } 13694 case SystemZ::BI__builtin_tbeginc: { 13695 Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy); 13696 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08); 13697 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc); 13698 return Builder.CreateCall(F, {TDB, Control}); 13699 } 13700 case SystemZ::BI__builtin_tabort: { 13701 Value *Data = EmitScalarExpr(E->getArg(0)); 13702 Function *F = CGM.getIntrinsic(Intrinsic::s390_tabort); 13703 return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort")); 13704 } 13705 case SystemZ::BI__builtin_non_tx_store: { 13706 Value *Address = EmitScalarExpr(E->getArg(0)); 13707 Value *Data = EmitScalarExpr(E->getArg(1)); 13708 Function *F = CGM.getIntrinsic(Intrinsic::s390_ntstg); 13709 return Builder.CreateCall(F, {Data, Address}); 13710 } 13711 13712 // Vector builtins. Note that most vector builtins are mapped automatically 13713 // to target-specific LLVM intrinsics. The ones handled specially here can 13714 // be represented via standard LLVM IR, which is preferable to enable common 13715 // LLVM optimizations. 13716 13717 case SystemZ::BI__builtin_s390_vpopctb: 13718 case SystemZ::BI__builtin_s390_vpopcth: 13719 case SystemZ::BI__builtin_s390_vpopctf: 13720 case SystemZ::BI__builtin_s390_vpopctg: { 13721 llvm::Type *ResultType = ConvertType(E->getType()); 13722 Value *X = EmitScalarExpr(E->getArg(0)); 13723 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); 13724 return Builder.CreateCall(F, X); 13725 } 13726 13727 case SystemZ::BI__builtin_s390_vclzb: 13728 case SystemZ::BI__builtin_s390_vclzh: 13729 case SystemZ::BI__builtin_s390_vclzf: 13730 case SystemZ::BI__builtin_s390_vclzg: { 13731 llvm::Type *ResultType = ConvertType(E->getType()); 13732 Value *X = EmitScalarExpr(E->getArg(0)); 13733 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 13734 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType); 13735 return Builder.CreateCall(F, {X, Undef}); 13736 } 13737 13738 case SystemZ::BI__builtin_s390_vctzb: 13739 case SystemZ::BI__builtin_s390_vctzh: 13740 case SystemZ::BI__builtin_s390_vctzf: 13741 case SystemZ::BI__builtin_s390_vctzg: { 13742 llvm::Type *ResultType = ConvertType(E->getType()); 13743 Value *X = EmitScalarExpr(E->getArg(0)); 13744 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 13745 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType); 13746 return Builder.CreateCall(F, {X, Undef}); 13747 } 13748 13749 case SystemZ::BI__builtin_s390_vfsqsb: 13750 case SystemZ::BI__builtin_s390_vfsqdb: { 13751 llvm::Type *ResultType = ConvertType(E->getType()); 13752 Value *X = EmitScalarExpr(E->getArg(0)); 13753 if (Builder.getIsFPConstrained()) { 13754 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, ResultType); 13755 return Builder.CreateConstrainedFPCall(F, { X }); 13756 } else { 13757 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType); 13758 return Builder.CreateCall(F, X); 13759 } 13760 } 13761 case SystemZ::BI__builtin_s390_vfmasb: 13762 case SystemZ::BI__builtin_s390_vfmadb: { 13763 llvm::Type *ResultType = ConvertType(E->getType()); 13764 Value *X = EmitScalarExpr(E->getArg(0)); 13765 Value *Y = EmitScalarExpr(E->getArg(1)); 13766 Value *Z = EmitScalarExpr(E->getArg(2)); 13767 if (Builder.getIsFPConstrained()) { 13768 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType); 13769 return Builder.CreateConstrainedFPCall(F, {X, Y, Z}); 13770 } else { 13771 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 13772 return Builder.CreateCall(F, {X, Y, Z}); 13773 } 13774 } 13775 case SystemZ::BI__builtin_s390_vfmssb: 13776 case SystemZ::BI__builtin_s390_vfmsdb: { 13777 llvm::Type *ResultType = ConvertType(E->getType()); 13778 Value *X = EmitScalarExpr(E->getArg(0)); 13779 Value *Y = EmitScalarExpr(E->getArg(1)); 13780 Value *Z = EmitScalarExpr(E->getArg(2)); 13781 if (Builder.getIsFPConstrained()) { 13782 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType); 13783 return Builder.CreateConstrainedFPCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")}); 13784 } else { 13785 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 13786 return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")}); 13787 } 13788 } 13789 case SystemZ::BI__builtin_s390_vfnmasb: 13790 case SystemZ::BI__builtin_s390_vfnmadb: { 13791 llvm::Type *ResultType = ConvertType(E->getType()); 13792 Value *X = EmitScalarExpr(E->getArg(0)); 13793 Value *Y = EmitScalarExpr(E->getArg(1)); 13794 Value *Z = EmitScalarExpr(E->getArg(2)); 13795 if (Builder.getIsFPConstrained()) { 13796 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType); 13797 return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg"); 13798 } else { 13799 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 13800 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg"); 13801 } 13802 } 13803 case SystemZ::BI__builtin_s390_vfnmssb: 13804 case SystemZ::BI__builtin_s390_vfnmsdb: { 13805 llvm::Type *ResultType = ConvertType(E->getType()); 13806 Value *X = EmitScalarExpr(E->getArg(0)); 13807 Value *Y = EmitScalarExpr(E->getArg(1)); 13808 Value *Z = EmitScalarExpr(E->getArg(2)); 13809 if (Builder.getIsFPConstrained()) { 13810 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType); 13811 Value *NegZ = Builder.CreateFNeg(Z, "sub"); 13812 return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, NegZ})); 13813 } else { 13814 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 13815 Value *NegZ = Builder.CreateFNeg(Z, "neg"); 13816 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, NegZ})); 13817 } 13818 } 13819 case SystemZ::BI__builtin_s390_vflpsb: 13820 case SystemZ::BI__builtin_s390_vflpdb: { 13821 llvm::Type *ResultType = ConvertType(E->getType()); 13822 Value *X = EmitScalarExpr(E->getArg(0)); 13823 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); 13824 return Builder.CreateCall(F, X); 13825 } 13826 case SystemZ::BI__builtin_s390_vflnsb: 13827 case SystemZ::BI__builtin_s390_vflndb: { 13828 llvm::Type *ResultType = ConvertType(E->getType()); 13829 Value *X = EmitScalarExpr(E->getArg(0)); 13830 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); 13831 return Builder.CreateFNeg(Builder.CreateCall(F, X), "neg"); 13832 } 13833 case SystemZ::BI__builtin_s390_vfisb: 13834 case SystemZ::BI__builtin_s390_vfidb: { 13835 llvm::Type *ResultType = ConvertType(E->getType()); 13836 Value *X = EmitScalarExpr(E->getArg(0)); 13837 // Constant-fold the M4 and M5 mask arguments. 13838 llvm::APSInt M4, M5; 13839 bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext()); 13840 bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext()); 13841 assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?"); 13842 (void)IsConstM4; (void)IsConstM5; 13843 // Check whether this instance can be represented via a LLVM standard 13844 // intrinsic. We only support some combinations of M4 and M5. 13845 Intrinsic::ID ID = Intrinsic::not_intrinsic; 13846 Intrinsic::ID CI; 13847 switch (M4.getZExtValue()) { 13848 default: break; 13849 case 0: // IEEE-inexact exception allowed 13850 switch (M5.getZExtValue()) { 13851 default: break; 13852 case 0: ID = Intrinsic::rint; 13853 CI = Intrinsic::experimental_constrained_rint; break; 13854 } 13855 break; 13856 case 4: // IEEE-inexact exception suppressed 13857 switch (M5.getZExtValue()) { 13858 default: break; 13859 case 0: ID = Intrinsic::nearbyint; 13860 CI = Intrinsic::experimental_constrained_nearbyint; break; 13861 case 1: ID = Intrinsic::round; 13862 CI = Intrinsic::experimental_constrained_round; break; 13863 case 5: ID = Intrinsic::trunc; 13864 CI = Intrinsic::experimental_constrained_trunc; break; 13865 case 6: ID = Intrinsic::ceil; 13866 CI = Intrinsic::experimental_constrained_ceil; break; 13867 case 7: ID = Intrinsic::floor; 13868 CI = Intrinsic::experimental_constrained_floor; break; 13869 } 13870 break; 13871 } 13872 if (ID != Intrinsic::not_intrinsic) { 13873 if (Builder.getIsFPConstrained()) { 13874 Function *F = CGM.getIntrinsic(CI, ResultType); 13875 return Builder.CreateConstrainedFPCall(F, X); 13876 } else { 13877 Function *F = CGM.getIntrinsic(ID, ResultType); 13878 return Builder.CreateCall(F, X); 13879 } 13880 } 13881 switch (BuiltinID) { // FIXME: constrained version? 13882 case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break; 13883 case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break; 13884 default: llvm_unreachable("Unknown BuiltinID"); 13885 } 13886 Function *F = CGM.getIntrinsic(ID); 13887 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4); 13888 Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5); 13889 return Builder.CreateCall(F, {X, M4Value, M5Value}); 13890 } 13891 case SystemZ::BI__builtin_s390_vfmaxsb: 13892 case SystemZ::BI__builtin_s390_vfmaxdb: { 13893 llvm::Type *ResultType = ConvertType(E->getType()); 13894 Value *X = EmitScalarExpr(E->getArg(0)); 13895 Value *Y = EmitScalarExpr(E->getArg(1)); 13896 // Constant-fold the M4 mask argument. 13897 llvm::APSInt M4; 13898 bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext()); 13899 assert(IsConstM4 && "Constant arg isn't actually constant?"); 13900 (void)IsConstM4; 13901 // Check whether this instance can be represented via a LLVM standard 13902 // intrinsic. We only support some values of M4. 13903 Intrinsic::ID ID = Intrinsic::not_intrinsic; 13904 Intrinsic::ID CI; 13905 switch (M4.getZExtValue()) { 13906 default: break; 13907 case 4: ID = Intrinsic::maxnum; 13908 CI = Intrinsic::experimental_constrained_maxnum; break; 13909 } 13910 if (ID != Intrinsic::not_intrinsic) { 13911 if (Builder.getIsFPConstrained()) { 13912 Function *F = CGM.getIntrinsic(CI, ResultType); 13913 return Builder.CreateConstrainedFPCall(F, {X, Y}); 13914 } else { 13915 Function *F = CGM.getIntrinsic(ID, ResultType); 13916 return Builder.CreateCall(F, {X, Y}); 13917 } 13918 } 13919 switch (BuiltinID) { 13920 case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break; 13921 case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break; 13922 default: llvm_unreachable("Unknown BuiltinID"); 13923 } 13924 Function *F = CGM.getIntrinsic(ID); 13925 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4); 13926 return Builder.CreateCall(F, {X, Y, M4Value}); 13927 } 13928 case SystemZ::BI__builtin_s390_vfminsb: 13929 case SystemZ::BI__builtin_s390_vfmindb: { 13930 llvm::Type *ResultType = ConvertType(E->getType()); 13931 Value *X = EmitScalarExpr(E->getArg(0)); 13932 Value *Y = EmitScalarExpr(E->getArg(1)); 13933 // Constant-fold the M4 mask argument. 13934 llvm::APSInt M4; 13935 bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext()); 13936 assert(IsConstM4 && "Constant arg isn't actually constant?"); 13937 (void)IsConstM4; 13938 // Check whether this instance can be represented via a LLVM standard 13939 // intrinsic. We only support some values of M4. 13940 Intrinsic::ID ID = Intrinsic::not_intrinsic; 13941 Intrinsic::ID CI; 13942 switch (M4.getZExtValue()) { 13943 default: break; 13944 case 4: ID = Intrinsic::minnum; 13945 CI = Intrinsic::experimental_constrained_minnum; break; 13946 } 13947 if (ID != Intrinsic::not_intrinsic) { 13948 if (Builder.getIsFPConstrained()) { 13949 Function *F = CGM.getIntrinsic(CI, ResultType); 13950 return Builder.CreateConstrainedFPCall(F, {X, Y}); 13951 } else { 13952 Function *F = CGM.getIntrinsic(ID, ResultType); 13953 return Builder.CreateCall(F, {X, Y}); 13954 } 13955 } 13956 switch (BuiltinID) { 13957 case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break; 13958 case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break; 13959 default: llvm_unreachable("Unknown BuiltinID"); 13960 } 13961 Function *F = CGM.getIntrinsic(ID); 13962 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4); 13963 return Builder.CreateCall(F, {X, Y, M4Value}); 13964 } 13965 13966 case SystemZ::BI__builtin_s390_vlbrh: 13967 case SystemZ::BI__builtin_s390_vlbrf: 13968 case SystemZ::BI__builtin_s390_vlbrg: { 13969 llvm::Type *ResultType = ConvertType(E->getType()); 13970 Value *X = EmitScalarExpr(E->getArg(0)); 13971 Function *F = CGM.getIntrinsic(Intrinsic::bswap, ResultType); 13972 return Builder.CreateCall(F, X); 13973 } 13974 13975 // Vector intrinsics that output the post-instruction CC value. 13976 13977 #define INTRINSIC_WITH_CC(NAME) \ 13978 case SystemZ::BI__builtin_##NAME: \ 13979 return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E) 13980 13981 INTRINSIC_WITH_CC(s390_vpkshs); 13982 INTRINSIC_WITH_CC(s390_vpksfs); 13983 INTRINSIC_WITH_CC(s390_vpksgs); 13984 13985 INTRINSIC_WITH_CC(s390_vpklshs); 13986 INTRINSIC_WITH_CC(s390_vpklsfs); 13987 INTRINSIC_WITH_CC(s390_vpklsgs); 13988 13989 INTRINSIC_WITH_CC(s390_vceqbs); 13990 INTRINSIC_WITH_CC(s390_vceqhs); 13991 INTRINSIC_WITH_CC(s390_vceqfs); 13992 INTRINSIC_WITH_CC(s390_vceqgs); 13993 13994 INTRINSIC_WITH_CC(s390_vchbs); 13995 INTRINSIC_WITH_CC(s390_vchhs); 13996 INTRINSIC_WITH_CC(s390_vchfs); 13997 INTRINSIC_WITH_CC(s390_vchgs); 13998 13999 INTRINSIC_WITH_CC(s390_vchlbs); 14000 INTRINSIC_WITH_CC(s390_vchlhs); 14001 INTRINSIC_WITH_CC(s390_vchlfs); 14002 INTRINSIC_WITH_CC(s390_vchlgs); 14003 14004 INTRINSIC_WITH_CC(s390_vfaebs); 14005 INTRINSIC_WITH_CC(s390_vfaehs); 14006 INTRINSIC_WITH_CC(s390_vfaefs); 14007 14008 INTRINSIC_WITH_CC(s390_vfaezbs); 14009 INTRINSIC_WITH_CC(s390_vfaezhs); 14010 INTRINSIC_WITH_CC(s390_vfaezfs); 14011 14012 INTRINSIC_WITH_CC(s390_vfeebs); 14013 INTRINSIC_WITH_CC(s390_vfeehs); 14014 INTRINSIC_WITH_CC(s390_vfeefs); 14015 14016 INTRINSIC_WITH_CC(s390_vfeezbs); 14017 INTRINSIC_WITH_CC(s390_vfeezhs); 14018 INTRINSIC_WITH_CC(s390_vfeezfs); 14019 14020 INTRINSIC_WITH_CC(s390_vfenebs); 14021 INTRINSIC_WITH_CC(s390_vfenehs); 14022 INTRINSIC_WITH_CC(s390_vfenefs); 14023 14024 INTRINSIC_WITH_CC(s390_vfenezbs); 14025 INTRINSIC_WITH_CC(s390_vfenezhs); 14026 INTRINSIC_WITH_CC(s390_vfenezfs); 14027 14028 INTRINSIC_WITH_CC(s390_vistrbs); 14029 INTRINSIC_WITH_CC(s390_vistrhs); 14030 INTRINSIC_WITH_CC(s390_vistrfs); 14031 14032 INTRINSIC_WITH_CC(s390_vstrcbs); 14033 INTRINSIC_WITH_CC(s390_vstrchs); 14034 INTRINSIC_WITH_CC(s390_vstrcfs); 14035 14036 INTRINSIC_WITH_CC(s390_vstrczbs); 14037 INTRINSIC_WITH_CC(s390_vstrczhs); 14038 INTRINSIC_WITH_CC(s390_vstrczfs); 14039 14040 INTRINSIC_WITH_CC(s390_vfcesbs); 14041 INTRINSIC_WITH_CC(s390_vfcedbs); 14042 INTRINSIC_WITH_CC(s390_vfchsbs); 14043 INTRINSIC_WITH_CC(s390_vfchdbs); 14044 INTRINSIC_WITH_CC(s390_vfchesbs); 14045 INTRINSIC_WITH_CC(s390_vfchedbs); 14046 14047 INTRINSIC_WITH_CC(s390_vftcisb); 14048 INTRINSIC_WITH_CC(s390_vftcidb); 14049 14050 INTRINSIC_WITH_CC(s390_vstrsb); 14051 INTRINSIC_WITH_CC(s390_vstrsh); 14052 INTRINSIC_WITH_CC(s390_vstrsf); 14053 14054 INTRINSIC_WITH_CC(s390_vstrszb); 14055 INTRINSIC_WITH_CC(s390_vstrszh); 14056 INTRINSIC_WITH_CC(s390_vstrszf); 14057 14058 #undef INTRINSIC_WITH_CC 14059 14060 default: 14061 return nullptr; 14062 } 14063 } 14064 14065 namespace { 14066 // Helper classes for mapping MMA builtins to particular LLVM intrinsic variant. 14067 struct NVPTXMmaLdstInfo { 14068 unsigned NumResults; // Number of elements to load/store 14069 // Intrinsic IDs for row/col variants. 0 if particular layout is unsupported. 14070 unsigned IID_col; 14071 unsigned IID_row; 14072 }; 14073 14074 #define MMA_INTR(geom_op_type, layout) \ 14075 Intrinsic::nvvm_wmma_##geom_op_type##_##layout##_stride 14076 #define MMA_LDST(n, geom_op_type) \ 14077 { n, MMA_INTR(geom_op_type, col), MMA_INTR(geom_op_type, row) } 14078 14079 static NVPTXMmaLdstInfo getNVPTXMmaLdstInfo(unsigned BuiltinID) { 14080 switch (BuiltinID) { 14081 // FP MMA loads 14082 case NVPTX::BI__hmma_m16n16k16_ld_a: 14083 return MMA_LDST(8, m16n16k16_load_a_f16); 14084 case NVPTX::BI__hmma_m16n16k16_ld_b: 14085 return MMA_LDST(8, m16n16k16_load_b_f16); 14086 case NVPTX::BI__hmma_m16n16k16_ld_c_f16: 14087 return MMA_LDST(4, m16n16k16_load_c_f16); 14088 case NVPTX::BI__hmma_m16n16k16_ld_c_f32: 14089 return MMA_LDST(8, m16n16k16_load_c_f32); 14090 case NVPTX::BI__hmma_m32n8k16_ld_a: 14091 return MMA_LDST(8, m32n8k16_load_a_f16); 14092 case NVPTX::BI__hmma_m32n8k16_ld_b: 14093 return MMA_LDST(8, m32n8k16_load_b_f16); 14094 case NVPTX::BI__hmma_m32n8k16_ld_c_f16: 14095 return MMA_LDST(4, m32n8k16_load_c_f16); 14096 case NVPTX::BI__hmma_m32n8k16_ld_c_f32: 14097 return MMA_LDST(8, m32n8k16_load_c_f32); 14098 case NVPTX::BI__hmma_m8n32k16_ld_a: 14099 return MMA_LDST(8, m8n32k16_load_a_f16); 14100 case NVPTX::BI__hmma_m8n32k16_ld_b: 14101 return MMA_LDST(8, m8n32k16_load_b_f16); 14102 case NVPTX::BI__hmma_m8n32k16_ld_c_f16: 14103 return MMA_LDST(4, m8n32k16_load_c_f16); 14104 case NVPTX::BI__hmma_m8n32k16_ld_c_f32: 14105 return MMA_LDST(8, m8n32k16_load_c_f32); 14106 14107 // Integer MMA loads 14108 case NVPTX::BI__imma_m16n16k16_ld_a_s8: 14109 return MMA_LDST(2, m16n16k16_load_a_s8); 14110 case NVPTX::BI__imma_m16n16k16_ld_a_u8: 14111 return MMA_LDST(2, m16n16k16_load_a_u8); 14112 case NVPTX::BI__imma_m16n16k16_ld_b_s8: 14113 return MMA_LDST(2, m16n16k16_load_b_s8); 14114 case NVPTX::BI__imma_m16n16k16_ld_b_u8: 14115 return MMA_LDST(2, m16n16k16_load_b_u8); 14116 case NVPTX::BI__imma_m16n16k16_ld_c: 14117 return MMA_LDST(8, m16n16k16_load_c_s32); 14118 case NVPTX::BI__imma_m32n8k16_ld_a_s8: 14119 return MMA_LDST(4, m32n8k16_load_a_s8); 14120 case NVPTX::BI__imma_m32n8k16_ld_a_u8: 14121 return MMA_LDST(4, m32n8k16_load_a_u8); 14122 case NVPTX::BI__imma_m32n8k16_ld_b_s8: 14123 return MMA_LDST(1, m32n8k16_load_b_s8); 14124 case NVPTX::BI__imma_m32n8k16_ld_b_u8: 14125 return MMA_LDST(1, m32n8k16_load_b_u8); 14126 case NVPTX::BI__imma_m32n8k16_ld_c: 14127 return MMA_LDST(8, m32n8k16_load_c_s32); 14128 case NVPTX::BI__imma_m8n32k16_ld_a_s8: 14129 return MMA_LDST(1, m8n32k16_load_a_s8); 14130 case NVPTX::BI__imma_m8n32k16_ld_a_u8: 14131 return MMA_LDST(1, m8n32k16_load_a_u8); 14132 case NVPTX::BI__imma_m8n32k16_ld_b_s8: 14133 return MMA_LDST(4, m8n32k16_load_b_s8); 14134 case NVPTX::BI__imma_m8n32k16_ld_b_u8: 14135 return MMA_LDST(4, m8n32k16_load_b_u8); 14136 case NVPTX::BI__imma_m8n32k16_ld_c: 14137 return MMA_LDST(8, m8n32k16_load_c_s32); 14138 14139 // Sub-integer MMA loads. 14140 // Only row/col layout is supported by A/B fragments. 14141 case NVPTX::BI__imma_m8n8k32_ld_a_s4: 14142 return {1, 0, MMA_INTR(m8n8k32_load_a_s4, row)}; 14143 case NVPTX::BI__imma_m8n8k32_ld_a_u4: 14144 return {1, 0, MMA_INTR(m8n8k32_load_a_u4, row)}; 14145 case NVPTX::BI__imma_m8n8k32_ld_b_s4: 14146 return {1, MMA_INTR(m8n8k32_load_b_s4, col), 0}; 14147 case NVPTX::BI__imma_m8n8k32_ld_b_u4: 14148 return {1, MMA_INTR(m8n8k32_load_b_u4, col), 0}; 14149 case NVPTX::BI__imma_m8n8k32_ld_c: 14150 return MMA_LDST(2, m8n8k32_load_c_s32); 14151 case NVPTX::BI__bmma_m8n8k128_ld_a_b1: 14152 return {1, 0, MMA_INTR(m8n8k128_load_a_b1, row)}; 14153 case NVPTX::BI__bmma_m8n8k128_ld_b_b1: 14154 return {1, MMA_INTR(m8n8k128_load_b_b1, col), 0}; 14155 case NVPTX::BI__bmma_m8n8k128_ld_c: 14156 return MMA_LDST(2, m8n8k128_load_c_s32); 14157 14158 // NOTE: We need to follow inconsitent naming scheme used by NVCC. Unlike 14159 // PTX and LLVM IR where stores always use fragment D, NVCC builtins always 14160 // use fragment C for both loads and stores. 14161 // FP MMA stores. 14162 case NVPTX::BI__hmma_m16n16k16_st_c_f16: 14163 return MMA_LDST(4, m16n16k16_store_d_f16); 14164 case NVPTX::BI__hmma_m16n16k16_st_c_f32: 14165 return MMA_LDST(8, m16n16k16_store_d_f32); 14166 case NVPTX::BI__hmma_m32n8k16_st_c_f16: 14167 return MMA_LDST(4, m32n8k16_store_d_f16); 14168 case NVPTX::BI__hmma_m32n8k16_st_c_f32: 14169 return MMA_LDST(8, m32n8k16_store_d_f32); 14170 case NVPTX::BI__hmma_m8n32k16_st_c_f16: 14171 return MMA_LDST(4, m8n32k16_store_d_f16); 14172 case NVPTX::BI__hmma_m8n32k16_st_c_f32: 14173 return MMA_LDST(8, m8n32k16_store_d_f32); 14174 14175 // Integer and sub-integer MMA stores. 14176 // Another naming quirk. Unlike other MMA builtins that use PTX types in the 14177 // name, integer loads/stores use LLVM's i32. 14178 case NVPTX::BI__imma_m16n16k16_st_c_i32: 14179 return MMA_LDST(8, m16n16k16_store_d_s32); 14180 case NVPTX::BI__imma_m32n8k16_st_c_i32: 14181 return MMA_LDST(8, m32n8k16_store_d_s32); 14182 case NVPTX::BI__imma_m8n32k16_st_c_i32: 14183 return MMA_LDST(8, m8n32k16_store_d_s32); 14184 case NVPTX::BI__imma_m8n8k32_st_c_i32: 14185 return MMA_LDST(2, m8n8k32_store_d_s32); 14186 case NVPTX::BI__bmma_m8n8k128_st_c_i32: 14187 return MMA_LDST(2, m8n8k128_store_d_s32); 14188 14189 default: 14190 llvm_unreachable("Unknown MMA builtin"); 14191 } 14192 } 14193 #undef MMA_LDST 14194 #undef MMA_INTR 14195 14196 14197 struct NVPTXMmaInfo { 14198 unsigned NumEltsA; 14199 unsigned NumEltsB; 14200 unsigned NumEltsC; 14201 unsigned NumEltsD; 14202 std::array<unsigned, 8> Variants; 14203 14204 unsigned getMMAIntrinsic(int Layout, bool Satf) { 14205 unsigned Index = Layout * 2 + Satf; 14206 if (Index >= Variants.size()) 14207 return 0; 14208 return Variants[Index]; 14209 } 14210 }; 14211 14212 // Returns an intrinsic that matches Layout and Satf for valid combinations of 14213 // Layout and Satf, 0 otherwise. 14214 static NVPTXMmaInfo getNVPTXMmaInfo(unsigned BuiltinID) { 14215 // clang-format off 14216 #define MMA_VARIANTS(geom, type) {{ \ 14217 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \ 14218 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \ 14219 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \ 14220 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \ 14221 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \ 14222 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \ 14223 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type, \ 14224 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite \ 14225 }} 14226 // Sub-integer MMA only supports row.col layout. 14227 #define MMA_VARIANTS_I4(geom, type) {{ \ 14228 0, \ 14229 0, \ 14230 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \ 14231 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \ 14232 0, \ 14233 0, \ 14234 0, \ 14235 0 \ 14236 }} 14237 // b1 MMA does not support .satfinite. 14238 #define MMA_VARIANTS_B1(geom, type) {{ \ 14239 0, \ 14240 0, \ 14241 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \ 14242 0, \ 14243 0, \ 14244 0, \ 14245 0, \ 14246 0 \ 14247 }} 14248 // clang-format on 14249 switch (BuiltinID) { 14250 // FP MMA 14251 // Note that 'type' argument of MMA_VARIANT uses D_C notation, while 14252 // NumEltsN of return value are ordered as A,B,C,D. 14253 case NVPTX::BI__hmma_m16n16k16_mma_f16f16: 14254 return {8, 8, 4, 4, MMA_VARIANTS(m16n16k16, f16_f16)}; 14255 case NVPTX::BI__hmma_m16n16k16_mma_f32f16: 14256 return {8, 8, 4, 8, MMA_VARIANTS(m16n16k16, f32_f16)}; 14257 case NVPTX::BI__hmma_m16n16k16_mma_f16f32: 14258 return {8, 8, 8, 4, MMA_VARIANTS(m16n16k16, f16_f32)}; 14259 case NVPTX::BI__hmma_m16n16k16_mma_f32f32: 14260 return {8, 8, 8, 8, MMA_VARIANTS(m16n16k16, f32_f32)}; 14261 case NVPTX::BI__hmma_m32n8k16_mma_f16f16: 14262 return {8, 8, 4, 4, MMA_VARIANTS(m32n8k16, f16_f16)}; 14263 case NVPTX::BI__hmma_m32n8k16_mma_f32f16: 14264 return {8, 8, 4, 8, MMA_VARIANTS(m32n8k16, f32_f16)}; 14265 case NVPTX::BI__hmma_m32n8k16_mma_f16f32: 14266 return {8, 8, 8, 4, MMA_VARIANTS(m32n8k16, f16_f32)}; 14267 case NVPTX::BI__hmma_m32n8k16_mma_f32f32: 14268 return {8, 8, 8, 8, MMA_VARIANTS(m32n8k16, f32_f32)}; 14269 case NVPTX::BI__hmma_m8n32k16_mma_f16f16: 14270 return {8, 8, 4, 4, MMA_VARIANTS(m8n32k16, f16_f16)}; 14271 case NVPTX::BI__hmma_m8n32k16_mma_f32f16: 14272 return {8, 8, 4, 8, MMA_VARIANTS(m8n32k16, f32_f16)}; 14273 case NVPTX::BI__hmma_m8n32k16_mma_f16f32: 14274 return {8, 8, 8, 4, MMA_VARIANTS(m8n32k16, f16_f32)}; 14275 case NVPTX::BI__hmma_m8n32k16_mma_f32f32: 14276 return {8, 8, 8, 8, MMA_VARIANTS(m8n32k16, f32_f32)}; 14277 14278 // Integer MMA 14279 case NVPTX::BI__imma_m16n16k16_mma_s8: 14280 return {2, 2, 8, 8, MMA_VARIANTS(m16n16k16, s8)}; 14281 case NVPTX::BI__imma_m16n16k16_mma_u8: 14282 return {2, 2, 8, 8, MMA_VARIANTS(m16n16k16, u8)}; 14283 case NVPTX::BI__imma_m32n8k16_mma_s8: 14284 return {4, 1, 8, 8, MMA_VARIANTS(m32n8k16, s8)}; 14285 case NVPTX::BI__imma_m32n8k16_mma_u8: 14286 return {4, 1, 8, 8, MMA_VARIANTS(m32n8k16, u8)}; 14287 case NVPTX::BI__imma_m8n32k16_mma_s8: 14288 return {1, 4, 8, 8, MMA_VARIANTS(m8n32k16, s8)}; 14289 case NVPTX::BI__imma_m8n32k16_mma_u8: 14290 return {1, 4, 8, 8, MMA_VARIANTS(m8n32k16, u8)}; 14291 14292 // Sub-integer MMA 14293 case NVPTX::BI__imma_m8n8k32_mma_s4: 14294 return {1, 1, 2, 2, MMA_VARIANTS_I4(m8n8k32, s4)}; 14295 case NVPTX::BI__imma_m8n8k32_mma_u4: 14296 return {1, 1, 2, 2, MMA_VARIANTS_I4(m8n8k32, u4)}; 14297 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1: 14298 return {1, 1, 2, 2, MMA_VARIANTS_B1(m8n8k128, b1)}; 14299 default: 14300 llvm_unreachable("Unexpected builtin ID."); 14301 } 14302 #undef MMA_VARIANTS 14303 #undef MMA_VARIANTS_I4 14304 #undef MMA_VARIANTS_B1 14305 } 14306 14307 } // namespace 14308 14309 Value * 14310 CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { 14311 auto MakeLdg = [&](unsigned IntrinsicID) { 14312 Value *Ptr = EmitScalarExpr(E->getArg(0)); 14313 clang::CharUnits Align = 14314 getNaturalPointeeTypeAlignment(E->getArg(0)->getType()); 14315 return Builder.CreateCall( 14316 CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(), 14317 Ptr->getType()}), 14318 {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())}); 14319 }; 14320 auto MakeScopedAtomic = [&](unsigned IntrinsicID) { 14321 Value *Ptr = EmitScalarExpr(E->getArg(0)); 14322 return Builder.CreateCall( 14323 CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(), 14324 Ptr->getType()}), 14325 {Ptr, EmitScalarExpr(E->getArg(1))}); 14326 }; 14327 switch (BuiltinID) { 14328 case NVPTX::BI__nvvm_atom_add_gen_i: 14329 case NVPTX::BI__nvvm_atom_add_gen_l: 14330 case NVPTX::BI__nvvm_atom_add_gen_ll: 14331 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E); 14332 14333 case NVPTX::BI__nvvm_atom_sub_gen_i: 14334 case NVPTX::BI__nvvm_atom_sub_gen_l: 14335 case NVPTX::BI__nvvm_atom_sub_gen_ll: 14336 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E); 14337 14338 case NVPTX::BI__nvvm_atom_and_gen_i: 14339 case NVPTX::BI__nvvm_atom_and_gen_l: 14340 case NVPTX::BI__nvvm_atom_and_gen_ll: 14341 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E); 14342 14343 case NVPTX::BI__nvvm_atom_or_gen_i: 14344 case NVPTX::BI__nvvm_atom_or_gen_l: 14345 case NVPTX::BI__nvvm_atom_or_gen_ll: 14346 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E); 14347 14348 case NVPTX::BI__nvvm_atom_xor_gen_i: 14349 case NVPTX::BI__nvvm_atom_xor_gen_l: 14350 case NVPTX::BI__nvvm_atom_xor_gen_ll: 14351 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E); 14352 14353 case NVPTX::BI__nvvm_atom_xchg_gen_i: 14354 case NVPTX::BI__nvvm_atom_xchg_gen_l: 14355 case NVPTX::BI__nvvm_atom_xchg_gen_ll: 14356 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E); 14357 14358 case NVPTX::BI__nvvm_atom_max_gen_i: 14359 case NVPTX::BI__nvvm_atom_max_gen_l: 14360 case NVPTX::BI__nvvm_atom_max_gen_ll: 14361 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E); 14362 14363 case NVPTX::BI__nvvm_atom_max_gen_ui: 14364 case NVPTX::BI__nvvm_atom_max_gen_ul: 14365 case NVPTX::BI__nvvm_atom_max_gen_ull: 14366 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E); 14367 14368 case NVPTX::BI__nvvm_atom_min_gen_i: 14369 case NVPTX::BI__nvvm_atom_min_gen_l: 14370 case NVPTX::BI__nvvm_atom_min_gen_ll: 14371 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E); 14372 14373 case NVPTX::BI__nvvm_atom_min_gen_ui: 14374 case NVPTX::BI__nvvm_atom_min_gen_ul: 14375 case NVPTX::BI__nvvm_atom_min_gen_ull: 14376 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E); 14377 14378 case NVPTX::BI__nvvm_atom_cas_gen_i: 14379 case NVPTX::BI__nvvm_atom_cas_gen_l: 14380 case NVPTX::BI__nvvm_atom_cas_gen_ll: 14381 // __nvvm_atom_cas_gen_* should return the old value rather than the 14382 // success flag. 14383 return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false); 14384 14385 case NVPTX::BI__nvvm_atom_add_gen_f: 14386 case NVPTX::BI__nvvm_atom_add_gen_d: { 14387 Value *Ptr = EmitScalarExpr(E->getArg(0)); 14388 Value *Val = EmitScalarExpr(E->getArg(1)); 14389 return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, Ptr, Val, 14390 AtomicOrdering::SequentiallyConsistent); 14391 } 14392 14393 case NVPTX::BI__nvvm_atom_inc_gen_ui: { 14394 Value *Ptr = EmitScalarExpr(E->getArg(0)); 14395 Value *Val = EmitScalarExpr(E->getArg(1)); 14396 Function *FnALI32 = 14397 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType()); 14398 return Builder.CreateCall(FnALI32, {Ptr, Val}); 14399 } 14400 14401 case NVPTX::BI__nvvm_atom_dec_gen_ui: { 14402 Value *Ptr = EmitScalarExpr(E->getArg(0)); 14403 Value *Val = EmitScalarExpr(E->getArg(1)); 14404 Function *FnALD32 = 14405 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType()); 14406 return Builder.CreateCall(FnALD32, {Ptr, Val}); 14407 } 14408 14409 case NVPTX::BI__nvvm_ldg_c: 14410 case NVPTX::BI__nvvm_ldg_c2: 14411 case NVPTX::BI__nvvm_ldg_c4: 14412 case NVPTX::BI__nvvm_ldg_s: 14413 case NVPTX::BI__nvvm_ldg_s2: 14414 case NVPTX::BI__nvvm_ldg_s4: 14415 case NVPTX::BI__nvvm_ldg_i: 14416 case NVPTX::BI__nvvm_ldg_i2: 14417 case NVPTX::BI__nvvm_ldg_i4: 14418 case NVPTX::BI__nvvm_ldg_l: 14419 case NVPTX::BI__nvvm_ldg_ll: 14420 case NVPTX::BI__nvvm_ldg_ll2: 14421 case NVPTX::BI__nvvm_ldg_uc: 14422 case NVPTX::BI__nvvm_ldg_uc2: 14423 case NVPTX::BI__nvvm_ldg_uc4: 14424 case NVPTX::BI__nvvm_ldg_us: 14425 case NVPTX::BI__nvvm_ldg_us2: 14426 case NVPTX::BI__nvvm_ldg_us4: 14427 case NVPTX::BI__nvvm_ldg_ui: 14428 case NVPTX::BI__nvvm_ldg_ui2: 14429 case NVPTX::BI__nvvm_ldg_ui4: 14430 case NVPTX::BI__nvvm_ldg_ul: 14431 case NVPTX::BI__nvvm_ldg_ull: 14432 case NVPTX::BI__nvvm_ldg_ull2: 14433 // PTX Interoperability section 2.2: "For a vector with an even number of 14434 // elements, its alignment is set to number of elements times the alignment 14435 // of its member: n*alignof(t)." 14436 return MakeLdg(Intrinsic::nvvm_ldg_global_i); 14437 case NVPTX::BI__nvvm_ldg_f: 14438 case NVPTX::BI__nvvm_ldg_f2: 14439 case NVPTX::BI__nvvm_ldg_f4: 14440 case NVPTX::BI__nvvm_ldg_d: 14441 case NVPTX::BI__nvvm_ldg_d2: 14442 return MakeLdg(Intrinsic::nvvm_ldg_global_f); 14443 14444 case NVPTX::BI__nvvm_atom_cta_add_gen_i: 14445 case NVPTX::BI__nvvm_atom_cta_add_gen_l: 14446 case NVPTX::BI__nvvm_atom_cta_add_gen_ll: 14447 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta); 14448 case NVPTX::BI__nvvm_atom_sys_add_gen_i: 14449 case NVPTX::BI__nvvm_atom_sys_add_gen_l: 14450 case NVPTX::BI__nvvm_atom_sys_add_gen_ll: 14451 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys); 14452 case NVPTX::BI__nvvm_atom_cta_add_gen_f: 14453 case NVPTX::BI__nvvm_atom_cta_add_gen_d: 14454 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta); 14455 case NVPTX::BI__nvvm_atom_sys_add_gen_f: 14456 case NVPTX::BI__nvvm_atom_sys_add_gen_d: 14457 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys); 14458 case NVPTX::BI__nvvm_atom_cta_xchg_gen_i: 14459 case NVPTX::BI__nvvm_atom_cta_xchg_gen_l: 14460 case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll: 14461 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta); 14462 case NVPTX::BI__nvvm_atom_sys_xchg_gen_i: 14463 case NVPTX::BI__nvvm_atom_sys_xchg_gen_l: 14464 case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll: 14465 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys); 14466 case NVPTX::BI__nvvm_atom_cta_max_gen_i: 14467 case NVPTX::BI__nvvm_atom_cta_max_gen_ui: 14468 case NVPTX::BI__nvvm_atom_cta_max_gen_l: 14469 case NVPTX::BI__nvvm_atom_cta_max_gen_ul: 14470 case NVPTX::BI__nvvm_atom_cta_max_gen_ll: 14471 case NVPTX::BI__nvvm_atom_cta_max_gen_ull: 14472 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta); 14473 case NVPTX::BI__nvvm_atom_sys_max_gen_i: 14474 case NVPTX::BI__nvvm_atom_sys_max_gen_ui: 14475 case NVPTX::BI__nvvm_atom_sys_max_gen_l: 14476 case NVPTX::BI__nvvm_atom_sys_max_gen_ul: 14477 case NVPTX::BI__nvvm_atom_sys_max_gen_ll: 14478 case NVPTX::BI__nvvm_atom_sys_max_gen_ull: 14479 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys); 14480 case NVPTX::BI__nvvm_atom_cta_min_gen_i: 14481 case NVPTX::BI__nvvm_atom_cta_min_gen_ui: 14482 case NVPTX::BI__nvvm_atom_cta_min_gen_l: 14483 case NVPTX::BI__nvvm_atom_cta_min_gen_ul: 14484 case NVPTX::BI__nvvm_atom_cta_min_gen_ll: 14485 case NVPTX::BI__nvvm_atom_cta_min_gen_ull: 14486 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta); 14487 case NVPTX::BI__nvvm_atom_sys_min_gen_i: 14488 case NVPTX::BI__nvvm_atom_sys_min_gen_ui: 14489 case NVPTX::BI__nvvm_atom_sys_min_gen_l: 14490 case NVPTX::BI__nvvm_atom_sys_min_gen_ul: 14491 case NVPTX::BI__nvvm_atom_sys_min_gen_ll: 14492 case NVPTX::BI__nvvm_atom_sys_min_gen_ull: 14493 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys); 14494 case NVPTX::BI__nvvm_atom_cta_inc_gen_ui: 14495 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta); 14496 case NVPTX::BI__nvvm_atom_cta_dec_gen_ui: 14497 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta); 14498 case NVPTX::BI__nvvm_atom_sys_inc_gen_ui: 14499 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys); 14500 case NVPTX::BI__nvvm_atom_sys_dec_gen_ui: 14501 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys); 14502 case NVPTX::BI__nvvm_atom_cta_and_gen_i: 14503 case NVPTX::BI__nvvm_atom_cta_and_gen_l: 14504 case NVPTX::BI__nvvm_atom_cta_and_gen_ll: 14505 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta); 14506 case NVPTX::BI__nvvm_atom_sys_and_gen_i: 14507 case NVPTX::BI__nvvm_atom_sys_and_gen_l: 14508 case NVPTX::BI__nvvm_atom_sys_and_gen_ll: 14509 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys); 14510 case NVPTX::BI__nvvm_atom_cta_or_gen_i: 14511 case NVPTX::BI__nvvm_atom_cta_or_gen_l: 14512 case NVPTX::BI__nvvm_atom_cta_or_gen_ll: 14513 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta); 14514 case NVPTX::BI__nvvm_atom_sys_or_gen_i: 14515 case NVPTX::BI__nvvm_atom_sys_or_gen_l: 14516 case NVPTX::BI__nvvm_atom_sys_or_gen_ll: 14517 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys); 14518 case NVPTX::BI__nvvm_atom_cta_xor_gen_i: 14519 case NVPTX::BI__nvvm_atom_cta_xor_gen_l: 14520 case NVPTX::BI__nvvm_atom_cta_xor_gen_ll: 14521 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta); 14522 case NVPTX::BI__nvvm_atom_sys_xor_gen_i: 14523 case NVPTX::BI__nvvm_atom_sys_xor_gen_l: 14524 case NVPTX::BI__nvvm_atom_sys_xor_gen_ll: 14525 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys); 14526 case NVPTX::BI__nvvm_atom_cta_cas_gen_i: 14527 case NVPTX::BI__nvvm_atom_cta_cas_gen_l: 14528 case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: { 14529 Value *Ptr = EmitScalarExpr(E->getArg(0)); 14530 return Builder.CreateCall( 14531 CGM.getIntrinsic( 14532 Intrinsic::nvvm_atomic_cas_gen_i_cta, 14533 {Ptr->getType()->getPointerElementType(), Ptr->getType()}), 14534 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))}); 14535 } 14536 case NVPTX::BI__nvvm_atom_sys_cas_gen_i: 14537 case NVPTX::BI__nvvm_atom_sys_cas_gen_l: 14538 case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: { 14539 Value *Ptr = EmitScalarExpr(E->getArg(0)); 14540 return Builder.CreateCall( 14541 CGM.getIntrinsic( 14542 Intrinsic::nvvm_atomic_cas_gen_i_sys, 14543 {Ptr->getType()->getPointerElementType(), Ptr->getType()}), 14544 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))}); 14545 } 14546 case NVPTX::BI__nvvm_match_all_sync_i32p: 14547 case NVPTX::BI__nvvm_match_all_sync_i64p: { 14548 Value *Mask = EmitScalarExpr(E->getArg(0)); 14549 Value *Val = EmitScalarExpr(E->getArg(1)); 14550 Address PredOutPtr = EmitPointerWithAlignment(E->getArg(2)); 14551 Value *ResultPair = Builder.CreateCall( 14552 CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p 14553 ? Intrinsic::nvvm_match_all_sync_i32p 14554 : Intrinsic::nvvm_match_all_sync_i64p), 14555 {Mask, Val}); 14556 Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1), 14557 PredOutPtr.getElementType()); 14558 Builder.CreateStore(Pred, PredOutPtr); 14559 return Builder.CreateExtractValue(ResultPair, 0); 14560 } 14561 14562 // FP MMA loads 14563 case NVPTX::BI__hmma_m16n16k16_ld_a: 14564 case NVPTX::BI__hmma_m16n16k16_ld_b: 14565 case NVPTX::BI__hmma_m16n16k16_ld_c_f16: 14566 case NVPTX::BI__hmma_m16n16k16_ld_c_f32: 14567 case NVPTX::BI__hmma_m32n8k16_ld_a: 14568 case NVPTX::BI__hmma_m32n8k16_ld_b: 14569 case NVPTX::BI__hmma_m32n8k16_ld_c_f16: 14570 case NVPTX::BI__hmma_m32n8k16_ld_c_f32: 14571 case NVPTX::BI__hmma_m8n32k16_ld_a: 14572 case NVPTX::BI__hmma_m8n32k16_ld_b: 14573 case NVPTX::BI__hmma_m8n32k16_ld_c_f16: 14574 case NVPTX::BI__hmma_m8n32k16_ld_c_f32: 14575 // Integer MMA loads. 14576 case NVPTX::BI__imma_m16n16k16_ld_a_s8: 14577 case NVPTX::BI__imma_m16n16k16_ld_a_u8: 14578 case NVPTX::BI__imma_m16n16k16_ld_b_s8: 14579 case NVPTX::BI__imma_m16n16k16_ld_b_u8: 14580 case NVPTX::BI__imma_m16n16k16_ld_c: 14581 case NVPTX::BI__imma_m32n8k16_ld_a_s8: 14582 case NVPTX::BI__imma_m32n8k16_ld_a_u8: 14583 case NVPTX::BI__imma_m32n8k16_ld_b_s8: 14584 case NVPTX::BI__imma_m32n8k16_ld_b_u8: 14585 case NVPTX::BI__imma_m32n8k16_ld_c: 14586 case NVPTX::BI__imma_m8n32k16_ld_a_s8: 14587 case NVPTX::BI__imma_m8n32k16_ld_a_u8: 14588 case NVPTX::BI__imma_m8n32k16_ld_b_s8: 14589 case NVPTX::BI__imma_m8n32k16_ld_b_u8: 14590 case NVPTX::BI__imma_m8n32k16_ld_c: 14591 // Sub-integer MMA loads. 14592 case NVPTX::BI__imma_m8n8k32_ld_a_s4: 14593 case NVPTX::BI__imma_m8n8k32_ld_a_u4: 14594 case NVPTX::BI__imma_m8n8k32_ld_b_s4: 14595 case NVPTX::BI__imma_m8n8k32_ld_b_u4: 14596 case NVPTX::BI__imma_m8n8k32_ld_c: 14597 case NVPTX::BI__bmma_m8n8k128_ld_a_b1: 14598 case NVPTX::BI__bmma_m8n8k128_ld_b_b1: 14599 case NVPTX::BI__bmma_m8n8k128_ld_c: 14600 { 14601 Address Dst = EmitPointerWithAlignment(E->getArg(0)); 14602 Value *Src = EmitScalarExpr(E->getArg(1)); 14603 Value *Ldm = EmitScalarExpr(E->getArg(2)); 14604 llvm::APSInt isColMajorArg; 14605 if (!E->getArg(3)->isIntegerConstantExpr(isColMajorArg, getContext())) 14606 return nullptr; 14607 bool isColMajor = isColMajorArg.getSExtValue(); 14608 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID); 14609 unsigned IID = isColMajor ? II.IID_col : II.IID_row; 14610 if (IID == 0) 14611 return nullptr; 14612 14613 Value *Result = 14614 Builder.CreateCall(CGM.getIntrinsic(IID, Src->getType()), {Src, Ldm}); 14615 14616 // Save returned values. 14617 assert(II.NumResults); 14618 if (II.NumResults == 1) { 14619 Builder.CreateAlignedStore(Result, Dst.getPointer(), 14620 CharUnits::fromQuantity(4)); 14621 } else { 14622 for (unsigned i = 0; i < II.NumResults; ++i) { 14623 Builder.CreateAlignedStore( 14624 Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), 14625 Dst.getElementType()), 14626 Builder.CreateGEP(Dst.getPointer(), 14627 llvm::ConstantInt::get(IntTy, i)), 14628 CharUnits::fromQuantity(4)); 14629 } 14630 } 14631 return Result; 14632 } 14633 14634 case NVPTX::BI__hmma_m16n16k16_st_c_f16: 14635 case NVPTX::BI__hmma_m16n16k16_st_c_f32: 14636 case NVPTX::BI__hmma_m32n8k16_st_c_f16: 14637 case NVPTX::BI__hmma_m32n8k16_st_c_f32: 14638 case NVPTX::BI__hmma_m8n32k16_st_c_f16: 14639 case NVPTX::BI__hmma_m8n32k16_st_c_f32: 14640 case NVPTX::BI__imma_m16n16k16_st_c_i32: 14641 case NVPTX::BI__imma_m32n8k16_st_c_i32: 14642 case NVPTX::BI__imma_m8n32k16_st_c_i32: 14643 case NVPTX::BI__imma_m8n8k32_st_c_i32: 14644 case NVPTX::BI__bmma_m8n8k128_st_c_i32: { 14645 Value *Dst = EmitScalarExpr(E->getArg(0)); 14646 Address Src = EmitPointerWithAlignment(E->getArg(1)); 14647 Value *Ldm = EmitScalarExpr(E->getArg(2)); 14648 llvm::APSInt isColMajorArg; 14649 if (!E->getArg(3)->isIntegerConstantExpr(isColMajorArg, getContext())) 14650 return nullptr; 14651 bool isColMajor = isColMajorArg.getSExtValue(); 14652 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID); 14653 unsigned IID = isColMajor ? II.IID_col : II.IID_row; 14654 if (IID == 0) 14655 return nullptr; 14656 Function *Intrinsic = 14657 CGM.getIntrinsic(IID, Dst->getType()); 14658 llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1); 14659 SmallVector<Value *, 10> Values = {Dst}; 14660 for (unsigned i = 0; i < II.NumResults; ++i) { 14661 Value *V = Builder.CreateAlignedLoad( 14662 Builder.CreateGEP(Src.getPointer(), llvm::ConstantInt::get(IntTy, i)), 14663 CharUnits::fromQuantity(4)); 14664 Values.push_back(Builder.CreateBitCast(V, ParamType)); 14665 } 14666 Values.push_back(Ldm); 14667 Value *Result = Builder.CreateCall(Intrinsic, Values); 14668 return Result; 14669 } 14670 14671 // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) --> 14672 // Intrinsic::nvvm_wmma_m16n16k16_mma_sync<layout A,B><DType><CType><Satf> 14673 case NVPTX::BI__hmma_m16n16k16_mma_f16f16: 14674 case NVPTX::BI__hmma_m16n16k16_mma_f32f16: 14675 case NVPTX::BI__hmma_m16n16k16_mma_f32f32: 14676 case NVPTX::BI__hmma_m16n16k16_mma_f16f32: 14677 case NVPTX::BI__hmma_m32n8k16_mma_f16f16: 14678 case NVPTX::BI__hmma_m32n8k16_mma_f32f16: 14679 case NVPTX::BI__hmma_m32n8k16_mma_f32f32: 14680 case NVPTX::BI__hmma_m32n8k16_mma_f16f32: 14681 case NVPTX::BI__hmma_m8n32k16_mma_f16f16: 14682 case NVPTX::BI__hmma_m8n32k16_mma_f32f16: 14683 case NVPTX::BI__hmma_m8n32k16_mma_f32f32: 14684 case NVPTX::BI__hmma_m8n32k16_mma_f16f32: 14685 case NVPTX::BI__imma_m16n16k16_mma_s8: 14686 case NVPTX::BI__imma_m16n16k16_mma_u8: 14687 case NVPTX::BI__imma_m32n8k16_mma_s8: 14688 case NVPTX::BI__imma_m32n8k16_mma_u8: 14689 case NVPTX::BI__imma_m8n32k16_mma_s8: 14690 case NVPTX::BI__imma_m8n32k16_mma_u8: 14691 case NVPTX::BI__imma_m8n8k32_mma_s4: 14692 case NVPTX::BI__imma_m8n8k32_mma_u4: 14693 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1: { 14694 Address Dst = EmitPointerWithAlignment(E->getArg(0)); 14695 Address SrcA = EmitPointerWithAlignment(E->getArg(1)); 14696 Address SrcB = EmitPointerWithAlignment(E->getArg(2)); 14697 Address SrcC = EmitPointerWithAlignment(E->getArg(3)); 14698 llvm::APSInt LayoutArg; 14699 if (!E->getArg(4)->isIntegerConstantExpr(LayoutArg, getContext())) 14700 return nullptr; 14701 int Layout = LayoutArg.getSExtValue(); 14702 if (Layout < 0 || Layout > 3) 14703 return nullptr; 14704 llvm::APSInt SatfArg; 14705 if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1) 14706 SatfArg = 0; // .b1 does not have satf argument. 14707 else if (!E->getArg(5)->isIntegerConstantExpr(SatfArg, getContext())) 14708 return nullptr; 14709 bool Satf = SatfArg.getSExtValue(); 14710 NVPTXMmaInfo MI = getNVPTXMmaInfo(BuiltinID); 14711 unsigned IID = MI.getMMAIntrinsic(Layout, Satf); 14712 if (IID == 0) // Unsupported combination of Layout/Satf. 14713 return nullptr; 14714 14715 SmallVector<Value *, 24> Values; 14716 Function *Intrinsic = CGM.getIntrinsic(IID); 14717 llvm::Type *AType = Intrinsic->getFunctionType()->getParamType(0); 14718 // Load A 14719 for (unsigned i = 0; i < MI.NumEltsA; ++i) { 14720 Value *V = Builder.CreateAlignedLoad( 14721 Builder.CreateGEP(SrcA.getPointer(), 14722 llvm::ConstantInt::get(IntTy, i)), 14723 CharUnits::fromQuantity(4)); 14724 Values.push_back(Builder.CreateBitCast(V, AType)); 14725 } 14726 // Load B 14727 llvm::Type *BType = Intrinsic->getFunctionType()->getParamType(MI.NumEltsA); 14728 for (unsigned i = 0; i < MI.NumEltsB; ++i) { 14729 Value *V = Builder.CreateAlignedLoad( 14730 Builder.CreateGEP(SrcB.getPointer(), 14731 llvm::ConstantInt::get(IntTy, i)), 14732 CharUnits::fromQuantity(4)); 14733 Values.push_back(Builder.CreateBitCast(V, BType)); 14734 } 14735 // Load C 14736 llvm::Type *CType = 14737 Intrinsic->getFunctionType()->getParamType(MI.NumEltsA + MI.NumEltsB); 14738 for (unsigned i = 0; i < MI.NumEltsC; ++i) { 14739 Value *V = Builder.CreateAlignedLoad( 14740 Builder.CreateGEP(SrcC.getPointer(), 14741 llvm::ConstantInt::get(IntTy, i)), 14742 CharUnits::fromQuantity(4)); 14743 Values.push_back(Builder.CreateBitCast(V, CType)); 14744 } 14745 Value *Result = Builder.CreateCall(Intrinsic, Values); 14746 llvm::Type *DType = Dst.getElementType(); 14747 for (unsigned i = 0; i < MI.NumEltsD; ++i) 14748 Builder.CreateAlignedStore( 14749 Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType), 14750 Builder.CreateGEP(Dst.getPointer(), llvm::ConstantInt::get(IntTy, i)), 14751 CharUnits::fromQuantity(4)); 14752 return Result; 14753 } 14754 default: 14755 return nullptr; 14756 } 14757 } 14758 14759 namespace { 14760 struct BuiltinAlignArgs { 14761 llvm::Value *Src = nullptr; 14762 llvm::Type *SrcType = nullptr; 14763 llvm::Value *Alignment = nullptr; 14764 llvm::Value *Mask = nullptr; 14765 llvm::IntegerType *IntType = nullptr; 14766 14767 BuiltinAlignArgs(const CallExpr *E, CodeGenFunction &CGF) { 14768 QualType AstType = E->getArg(0)->getType(); 14769 if (AstType->isArrayType()) 14770 Src = CGF.EmitArrayToPointerDecay(E->getArg(0)).getPointer(); 14771 else 14772 Src = CGF.EmitScalarExpr(E->getArg(0)); 14773 SrcType = Src->getType(); 14774 if (SrcType->isPointerTy()) { 14775 IntType = IntegerType::get( 14776 CGF.getLLVMContext(), 14777 CGF.CGM.getDataLayout().getIndexTypeSizeInBits(SrcType)); 14778 } else { 14779 assert(SrcType->isIntegerTy()); 14780 IntType = cast<llvm::IntegerType>(SrcType); 14781 } 14782 Alignment = CGF.EmitScalarExpr(E->getArg(1)); 14783 Alignment = CGF.Builder.CreateZExtOrTrunc(Alignment, IntType, "alignment"); 14784 auto *One = llvm::ConstantInt::get(IntType, 1); 14785 Mask = CGF.Builder.CreateSub(Alignment, One, "mask"); 14786 } 14787 }; 14788 } // namespace 14789 14790 /// Generate (x & (y-1)) == 0. 14791 RValue CodeGenFunction::EmitBuiltinIsAligned(const CallExpr *E) { 14792 BuiltinAlignArgs Args(E, *this); 14793 llvm::Value *SrcAddress = Args.Src; 14794 if (Args.SrcType->isPointerTy()) 14795 SrcAddress = 14796 Builder.CreateBitOrPointerCast(Args.Src, Args.IntType, "src_addr"); 14797 return RValue::get(Builder.CreateICmpEQ( 14798 Builder.CreateAnd(SrcAddress, Args.Mask, "set_bits"), 14799 llvm::Constant::getNullValue(Args.IntType), "is_aligned")); 14800 } 14801 14802 /// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up. 14803 /// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the 14804 /// llvm.ptrmask instrinsic (with a GEP before in the align_up case). 14805 /// TODO: actually use ptrmask once most optimization passes know about it. 14806 RValue CodeGenFunction::EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp) { 14807 BuiltinAlignArgs Args(E, *this); 14808 llvm::Value *SrcAddr = Args.Src; 14809 if (Args.Src->getType()->isPointerTy()) 14810 SrcAddr = Builder.CreatePtrToInt(Args.Src, Args.IntType, "intptr"); 14811 llvm::Value *SrcForMask = SrcAddr; 14812 if (AlignUp) { 14813 // When aligning up we have to first add the mask to ensure we go over the 14814 // next alignment value and then align down to the next valid multiple. 14815 // By adding the mask, we ensure that align_up on an already aligned 14816 // value will not change the value. 14817 SrcForMask = Builder.CreateAdd(SrcForMask, Args.Mask, "over_boundary"); 14818 } 14819 // Invert the mask to only clear the lower bits. 14820 llvm::Value *InvertedMask = Builder.CreateNot(Args.Mask, "inverted_mask"); 14821 llvm::Value *Result = 14822 Builder.CreateAnd(SrcForMask, InvertedMask, "aligned_result"); 14823 if (Args.Src->getType()->isPointerTy()) { 14824 /// TODO: Use ptrmask instead of ptrtoint+gep once it is optimized well. 14825 // Result = Builder.CreateIntrinsic( 14826 // Intrinsic::ptrmask, {Args.SrcType, SrcForMask->getType(), Args.IntType}, 14827 // {SrcForMask, NegatedMask}, nullptr, "aligned_result"); 14828 Result->setName("aligned_intptr"); 14829 llvm::Value *Difference = Builder.CreateSub(Result, SrcAddr, "diff"); 14830 // The result must point to the same underlying allocation. This means we 14831 // can use an inbounds GEP to enable better optimization. 14832 Value *Base = EmitCastToVoidPtr(Args.Src); 14833 if (getLangOpts().isSignedOverflowDefined()) 14834 Result = Builder.CreateGEP(Base, Difference, "aligned_result"); 14835 else 14836 Result = EmitCheckedInBoundsGEP(Base, Difference, 14837 /*SignedIndices=*/true, 14838 /*isSubtraction=*/!AlignUp, 14839 E->getExprLoc(), "aligned_result"); 14840 Result = Builder.CreatePointerCast(Result, Args.SrcType); 14841 // Emit an alignment assumption to ensure that the new alignment is 14842 // propagated to loads/stores, etc. 14843 emitAlignmentAssumption(Result, E, E->getExprLoc(), Args.Alignment); 14844 } 14845 assert(Result->getType() == Args.SrcType); 14846 return RValue::get(Result); 14847 } 14848 14849 Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, 14850 const CallExpr *E) { 14851 switch (BuiltinID) { 14852 case WebAssembly::BI__builtin_wasm_memory_size: { 14853 llvm::Type *ResultType = ConvertType(E->getType()); 14854 Value *I = EmitScalarExpr(E->getArg(0)); 14855 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType); 14856 return Builder.CreateCall(Callee, I); 14857 } 14858 case WebAssembly::BI__builtin_wasm_memory_grow: { 14859 llvm::Type *ResultType = ConvertType(E->getType()); 14860 Value *Args[] = { 14861 EmitScalarExpr(E->getArg(0)), 14862 EmitScalarExpr(E->getArg(1)) 14863 }; 14864 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType); 14865 return Builder.CreateCall(Callee, Args); 14866 } 14867 case WebAssembly::BI__builtin_wasm_memory_init: { 14868 llvm::APSInt SegConst; 14869 if (!E->getArg(0)->isIntegerConstantExpr(SegConst, getContext())) 14870 llvm_unreachable("Constant arg isn't actually constant?"); 14871 llvm::APSInt MemConst; 14872 if (!E->getArg(1)->isIntegerConstantExpr(MemConst, getContext())) 14873 llvm_unreachable("Constant arg isn't actually constant?"); 14874 if (!MemConst.isNullValue()) 14875 ErrorUnsupported(E, "non-zero memory index"); 14876 Value *Args[] = {llvm::ConstantInt::get(getLLVMContext(), SegConst), 14877 llvm::ConstantInt::get(getLLVMContext(), MemConst), 14878 EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3)), 14879 EmitScalarExpr(E->getArg(4))}; 14880 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_init); 14881 return Builder.CreateCall(Callee, Args); 14882 } 14883 case WebAssembly::BI__builtin_wasm_data_drop: { 14884 llvm::APSInt SegConst; 14885 if (!E->getArg(0)->isIntegerConstantExpr(SegConst, getContext())) 14886 llvm_unreachable("Constant arg isn't actually constant?"); 14887 Value *Arg = llvm::ConstantInt::get(getLLVMContext(), SegConst); 14888 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_data_drop); 14889 return Builder.CreateCall(Callee, {Arg}); 14890 } 14891 case WebAssembly::BI__builtin_wasm_tls_size: { 14892 llvm::Type *ResultType = ConvertType(E->getType()); 14893 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_size, ResultType); 14894 return Builder.CreateCall(Callee); 14895 } 14896 case WebAssembly::BI__builtin_wasm_tls_align: { 14897 llvm::Type *ResultType = ConvertType(E->getType()); 14898 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_align, ResultType); 14899 return Builder.CreateCall(Callee); 14900 } 14901 case WebAssembly::BI__builtin_wasm_tls_base: { 14902 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_base); 14903 return Builder.CreateCall(Callee); 14904 } 14905 case WebAssembly::BI__builtin_wasm_throw: { 14906 Value *Tag = EmitScalarExpr(E->getArg(0)); 14907 Value *Obj = EmitScalarExpr(E->getArg(1)); 14908 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw); 14909 return Builder.CreateCall(Callee, {Tag, Obj}); 14910 } 14911 case WebAssembly::BI__builtin_wasm_rethrow_in_catch: { 14912 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow_in_catch); 14913 return Builder.CreateCall(Callee); 14914 } 14915 case WebAssembly::BI__builtin_wasm_atomic_wait_i32: { 14916 Value *Addr = EmitScalarExpr(E->getArg(0)); 14917 Value *Expected = EmitScalarExpr(E->getArg(1)); 14918 Value *Timeout = EmitScalarExpr(E->getArg(2)); 14919 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_wait_i32); 14920 return Builder.CreateCall(Callee, {Addr, Expected, Timeout}); 14921 } 14922 case WebAssembly::BI__builtin_wasm_atomic_wait_i64: { 14923 Value *Addr = EmitScalarExpr(E->getArg(0)); 14924 Value *Expected = EmitScalarExpr(E->getArg(1)); 14925 Value *Timeout = EmitScalarExpr(E->getArg(2)); 14926 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_wait_i64); 14927 return Builder.CreateCall(Callee, {Addr, Expected, Timeout}); 14928 } 14929 case WebAssembly::BI__builtin_wasm_atomic_notify: { 14930 Value *Addr = EmitScalarExpr(E->getArg(0)); 14931 Value *Count = EmitScalarExpr(E->getArg(1)); 14932 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_notify); 14933 return Builder.CreateCall(Callee, {Addr, Count}); 14934 } 14935 case WebAssembly::BI__builtin_wasm_trunc_s_i32_f32: 14936 case WebAssembly::BI__builtin_wasm_trunc_s_i32_f64: 14937 case WebAssembly::BI__builtin_wasm_trunc_s_i64_f32: 14938 case WebAssembly::BI__builtin_wasm_trunc_s_i64_f64: { 14939 Value *Src = EmitScalarExpr(E->getArg(0)); 14940 llvm::Type *ResT = ConvertType(E->getType()); 14941 Function *Callee = 14942 CGM.getIntrinsic(Intrinsic::wasm_trunc_signed, {ResT, Src->getType()}); 14943 return Builder.CreateCall(Callee, {Src}); 14944 } 14945 case WebAssembly::BI__builtin_wasm_trunc_u_i32_f32: 14946 case WebAssembly::BI__builtin_wasm_trunc_u_i32_f64: 14947 case WebAssembly::BI__builtin_wasm_trunc_u_i64_f32: 14948 case WebAssembly::BI__builtin_wasm_trunc_u_i64_f64: { 14949 Value *Src = EmitScalarExpr(E->getArg(0)); 14950 llvm::Type *ResT = ConvertType(E->getType()); 14951 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_unsigned, 14952 {ResT, Src->getType()}); 14953 return Builder.CreateCall(Callee, {Src}); 14954 } 14955 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32: 14956 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64: 14957 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32: 14958 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64: 14959 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: 14960 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64x2_f64x2: { 14961 Value *Src = EmitScalarExpr(E->getArg(0)); 14962 llvm::Type *ResT = ConvertType(E->getType()); 14963 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_signed, 14964 {ResT, Src->getType()}); 14965 return Builder.CreateCall(Callee, {Src}); 14966 } 14967 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32: 14968 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64: 14969 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32: 14970 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64: 14971 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: 14972 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64x2_f64x2: { 14973 Value *Src = EmitScalarExpr(E->getArg(0)); 14974 llvm::Type *ResT = ConvertType(E->getType()); 14975 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_unsigned, 14976 {ResT, Src->getType()}); 14977 return Builder.CreateCall(Callee, {Src}); 14978 } 14979 case WebAssembly::BI__builtin_wasm_min_f32: 14980 case WebAssembly::BI__builtin_wasm_min_f64: 14981 case WebAssembly::BI__builtin_wasm_min_f32x4: 14982 case WebAssembly::BI__builtin_wasm_min_f64x2: { 14983 Value *LHS = EmitScalarExpr(E->getArg(0)); 14984 Value *RHS = EmitScalarExpr(E->getArg(1)); 14985 Function *Callee = CGM.getIntrinsic(Intrinsic::minimum, 14986 ConvertType(E->getType())); 14987 return Builder.CreateCall(Callee, {LHS, RHS}); 14988 } 14989 case WebAssembly::BI__builtin_wasm_max_f32: 14990 case WebAssembly::BI__builtin_wasm_max_f64: 14991 case WebAssembly::BI__builtin_wasm_max_f32x4: 14992 case WebAssembly::BI__builtin_wasm_max_f64x2: { 14993 Value *LHS = EmitScalarExpr(E->getArg(0)); 14994 Value *RHS = EmitScalarExpr(E->getArg(1)); 14995 Function *Callee = CGM.getIntrinsic(Intrinsic::maximum, 14996 ConvertType(E->getType())); 14997 return Builder.CreateCall(Callee, {LHS, RHS}); 14998 } 14999 case WebAssembly::BI__builtin_wasm_swizzle_v8x16: { 15000 Value *Src = EmitScalarExpr(E->getArg(0)); 15001 Value *Indices = EmitScalarExpr(E->getArg(1)); 15002 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_swizzle); 15003 return Builder.CreateCall(Callee, {Src, Indices}); 15004 } 15005 case WebAssembly::BI__builtin_wasm_extract_lane_s_i8x16: 15006 case WebAssembly::BI__builtin_wasm_extract_lane_u_i8x16: 15007 case WebAssembly::BI__builtin_wasm_extract_lane_s_i16x8: 15008 case WebAssembly::BI__builtin_wasm_extract_lane_u_i16x8: 15009 case WebAssembly::BI__builtin_wasm_extract_lane_i32x4: 15010 case WebAssembly::BI__builtin_wasm_extract_lane_i64x2: 15011 case WebAssembly::BI__builtin_wasm_extract_lane_f32x4: 15012 case WebAssembly::BI__builtin_wasm_extract_lane_f64x2: { 15013 llvm::APSInt LaneConst; 15014 if (!E->getArg(1)->isIntegerConstantExpr(LaneConst, getContext())) 15015 llvm_unreachable("Constant arg isn't actually constant?"); 15016 Value *Vec = EmitScalarExpr(E->getArg(0)); 15017 Value *Lane = llvm::ConstantInt::get(getLLVMContext(), LaneConst); 15018 Value *Extract = Builder.CreateExtractElement(Vec, Lane); 15019 switch (BuiltinID) { 15020 case WebAssembly::BI__builtin_wasm_extract_lane_s_i8x16: 15021 case WebAssembly::BI__builtin_wasm_extract_lane_s_i16x8: 15022 return Builder.CreateSExt(Extract, ConvertType(E->getType())); 15023 case WebAssembly::BI__builtin_wasm_extract_lane_u_i8x16: 15024 case WebAssembly::BI__builtin_wasm_extract_lane_u_i16x8: 15025 return Builder.CreateZExt(Extract, ConvertType(E->getType())); 15026 case WebAssembly::BI__builtin_wasm_extract_lane_i32x4: 15027 case WebAssembly::BI__builtin_wasm_extract_lane_i64x2: 15028 case WebAssembly::BI__builtin_wasm_extract_lane_f32x4: 15029 case WebAssembly::BI__builtin_wasm_extract_lane_f64x2: 15030 return Extract; 15031 default: 15032 llvm_unreachable("unexpected builtin ID"); 15033 } 15034 } 15035 case WebAssembly::BI__builtin_wasm_replace_lane_i8x16: 15036 case WebAssembly::BI__builtin_wasm_replace_lane_i16x8: 15037 case WebAssembly::BI__builtin_wasm_replace_lane_i32x4: 15038 case WebAssembly::BI__builtin_wasm_replace_lane_i64x2: 15039 case WebAssembly::BI__builtin_wasm_replace_lane_f32x4: 15040 case WebAssembly::BI__builtin_wasm_replace_lane_f64x2: { 15041 llvm::APSInt LaneConst; 15042 if (!E->getArg(1)->isIntegerConstantExpr(LaneConst, getContext())) 15043 llvm_unreachable("Constant arg isn't actually constant?"); 15044 Value *Vec = EmitScalarExpr(E->getArg(0)); 15045 Value *Lane = llvm::ConstantInt::get(getLLVMContext(), LaneConst); 15046 Value *Val = EmitScalarExpr(E->getArg(2)); 15047 switch (BuiltinID) { 15048 case WebAssembly::BI__builtin_wasm_replace_lane_i8x16: 15049 case WebAssembly::BI__builtin_wasm_replace_lane_i16x8: { 15050 llvm::Type *ElemType = ConvertType(E->getType())->getVectorElementType(); 15051 Value *Trunc = Builder.CreateTrunc(Val, ElemType); 15052 return Builder.CreateInsertElement(Vec, Trunc, Lane); 15053 } 15054 case WebAssembly::BI__builtin_wasm_replace_lane_i32x4: 15055 case WebAssembly::BI__builtin_wasm_replace_lane_i64x2: 15056 case WebAssembly::BI__builtin_wasm_replace_lane_f32x4: 15057 case WebAssembly::BI__builtin_wasm_replace_lane_f64x2: 15058 return Builder.CreateInsertElement(Vec, Val, Lane); 15059 default: 15060 llvm_unreachable("unexpected builtin ID"); 15061 } 15062 } 15063 case WebAssembly::BI__builtin_wasm_add_saturate_s_i8x16: 15064 case WebAssembly::BI__builtin_wasm_add_saturate_u_i8x16: 15065 case WebAssembly::BI__builtin_wasm_add_saturate_s_i16x8: 15066 case WebAssembly::BI__builtin_wasm_add_saturate_u_i16x8: 15067 case WebAssembly::BI__builtin_wasm_sub_saturate_s_i8x16: 15068 case WebAssembly::BI__builtin_wasm_sub_saturate_u_i8x16: 15069 case WebAssembly::BI__builtin_wasm_sub_saturate_s_i16x8: 15070 case WebAssembly::BI__builtin_wasm_sub_saturate_u_i16x8: { 15071 unsigned IntNo; 15072 switch (BuiltinID) { 15073 case WebAssembly::BI__builtin_wasm_add_saturate_s_i8x16: 15074 case WebAssembly::BI__builtin_wasm_add_saturate_s_i16x8: 15075 IntNo = Intrinsic::sadd_sat; 15076 break; 15077 case WebAssembly::BI__builtin_wasm_add_saturate_u_i8x16: 15078 case WebAssembly::BI__builtin_wasm_add_saturate_u_i16x8: 15079 IntNo = Intrinsic::uadd_sat; 15080 break; 15081 case WebAssembly::BI__builtin_wasm_sub_saturate_s_i8x16: 15082 case WebAssembly::BI__builtin_wasm_sub_saturate_s_i16x8: 15083 IntNo = Intrinsic::wasm_sub_saturate_signed; 15084 break; 15085 case WebAssembly::BI__builtin_wasm_sub_saturate_u_i8x16: 15086 case WebAssembly::BI__builtin_wasm_sub_saturate_u_i16x8: 15087 IntNo = Intrinsic::wasm_sub_saturate_unsigned; 15088 break; 15089 default: 15090 llvm_unreachable("unexpected builtin ID"); 15091 } 15092 Value *LHS = EmitScalarExpr(E->getArg(0)); 15093 Value *RHS = EmitScalarExpr(E->getArg(1)); 15094 Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType())); 15095 return Builder.CreateCall(Callee, {LHS, RHS}); 15096 } 15097 case WebAssembly::BI__builtin_wasm_abs_i8x16: 15098 case WebAssembly::BI__builtin_wasm_abs_i16x8: 15099 case WebAssembly::BI__builtin_wasm_abs_i32x4: { 15100 Value *Vec = EmitScalarExpr(E->getArg(0)); 15101 Value *Neg = Builder.CreateNeg(Vec, "neg"); 15102 Constant *Zero = llvm::Constant::getNullValue(Vec->getType()); 15103 Value *ICmp = Builder.CreateICmpSLT(Vec, Zero, "abscond"); 15104 return Builder.CreateSelect(ICmp, Neg, Vec, "abs"); 15105 } 15106 case WebAssembly::BI__builtin_wasm_min_s_i8x16: 15107 case WebAssembly::BI__builtin_wasm_min_u_i8x16: 15108 case WebAssembly::BI__builtin_wasm_max_s_i8x16: 15109 case WebAssembly::BI__builtin_wasm_max_u_i8x16: 15110 case WebAssembly::BI__builtin_wasm_min_s_i16x8: 15111 case WebAssembly::BI__builtin_wasm_min_u_i16x8: 15112 case WebAssembly::BI__builtin_wasm_max_s_i16x8: 15113 case WebAssembly::BI__builtin_wasm_max_u_i16x8: 15114 case WebAssembly::BI__builtin_wasm_min_s_i32x4: 15115 case WebAssembly::BI__builtin_wasm_min_u_i32x4: 15116 case WebAssembly::BI__builtin_wasm_max_s_i32x4: 15117 case WebAssembly::BI__builtin_wasm_max_u_i32x4: { 15118 Value *LHS = EmitScalarExpr(E->getArg(0)); 15119 Value *RHS = EmitScalarExpr(E->getArg(1)); 15120 Value *ICmp; 15121 switch (BuiltinID) { 15122 case WebAssembly::BI__builtin_wasm_min_s_i8x16: 15123 case WebAssembly::BI__builtin_wasm_min_s_i16x8: 15124 case WebAssembly::BI__builtin_wasm_min_s_i32x4: 15125 ICmp = Builder.CreateICmpSLT(LHS, RHS); 15126 break; 15127 case WebAssembly::BI__builtin_wasm_min_u_i8x16: 15128 case WebAssembly::BI__builtin_wasm_min_u_i16x8: 15129 case WebAssembly::BI__builtin_wasm_min_u_i32x4: 15130 ICmp = Builder.CreateICmpULT(LHS, RHS); 15131 break; 15132 case WebAssembly::BI__builtin_wasm_max_s_i8x16: 15133 case WebAssembly::BI__builtin_wasm_max_s_i16x8: 15134 case WebAssembly::BI__builtin_wasm_max_s_i32x4: 15135 ICmp = Builder.CreateICmpSGT(LHS, RHS); 15136 break; 15137 case WebAssembly::BI__builtin_wasm_max_u_i8x16: 15138 case WebAssembly::BI__builtin_wasm_max_u_i16x8: 15139 case WebAssembly::BI__builtin_wasm_max_u_i32x4: 15140 ICmp = Builder.CreateICmpUGT(LHS, RHS); 15141 break; 15142 default: 15143 llvm_unreachable("unexpected builtin ID"); 15144 } 15145 return Builder.CreateSelect(ICmp, LHS, RHS); 15146 } 15147 case WebAssembly::BI__builtin_wasm_avgr_u_i8x16: 15148 case WebAssembly::BI__builtin_wasm_avgr_u_i16x8: { 15149 Value *LHS = EmitScalarExpr(E->getArg(0)); 15150 Value *RHS = EmitScalarExpr(E->getArg(1)); 15151 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_avgr_unsigned, 15152 ConvertType(E->getType())); 15153 return Builder.CreateCall(Callee, {LHS, RHS}); 15154 } 15155 case WebAssembly::BI__builtin_wasm_bitselect: { 15156 Value *V1 = EmitScalarExpr(E->getArg(0)); 15157 Value *V2 = EmitScalarExpr(E->getArg(1)); 15158 Value *C = EmitScalarExpr(E->getArg(2)); 15159 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_bitselect, 15160 ConvertType(E->getType())); 15161 return Builder.CreateCall(Callee, {V1, V2, C}); 15162 } 15163 case WebAssembly::BI__builtin_wasm_dot_s_i32x4_i16x8: { 15164 Value *LHS = EmitScalarExpr(E->getArg(0)); 15165 Value *RHS = EmitScalarExpr(E->getArg(1)); 15166 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_dot); 15167 return Builder.CreateCall(Callee, {LHS, RHS}); 15168 } 15169 case WebAssembly::BI__builtin_wasm_any_true_i8x16: 15170 case WebAssembly::BI__builtin_wasm_any_true_i16x8: 15171 case WebAssembly::BI__builtin_wasm_any_true_i32x4: 15172 case WebAssembly::BI__builtin_wasm_any_true_i64x2: 15173 case WebAssembly::BI__builtin_wasm_all_true_i8x16: 15174 case WebAssembly::BI__builtin_wasm_all_true_i16x8: 15175 case WebAssembly::BI__builtin_wasm_all_true_i32x4: 15176 case WebAssembly::BI__builtin_wasm_all_true_i64x2: { 15177 unsigned IntNo; 15178 switch (BuiltinID) { 15179 case WebAssembly::BI__builtin_wasm_any_true_i8x16: 15180 case WebAssembly::BI__builtin_wasm_any_true_i16x8: 15181 case WebAssembly::BI__builtin_wasm_any_true_i32x4: 15182 case WebAssembly::BI__builtin_wasm_any_true_i64x2: 15183 IntNo = Intrinsic::wasm_anytrue; 15184 break; 15185 case WebAssembly::BI__builtin_wasm_all_true_i8x16: 15186 case WebAssembly::BI__builtin_wasm_all_true_i16x8: 15187 case WebAssembly::BI__builtin_wasm_all_true_i32x4: 15188 case WebAssembly::BI__builtin_wasm_all_true_i64x2: 15189 IntNo = Intrinsic::wasm_alltrue; 15190 break; 15191 default: 15192 llvm_unreachable("unexpected builtin ID"); 15193 } 15194 Value *Vec = EmitScalarExpr(E->getArg(0)); 15195 Function *Callee = CGM.getIntrinsic(IntNo, Vec->getType()); 15196 return Builder.CreateCall(Callee, {Vec}); 15197 } 15198 case WebAssembly::BI__builtin_wasm_bitmask_i8x16: 15199 case WebAssembly::BI__builtin_wasm_bitmask_i16x8: 15200 case WebAssembly::BI__builtin_wasm_bitmask_i32x4: { 15201 Value *Vec = EmitScalarExpr(E->getArg(0)); 15202 Function *Callee = 15203 CGM.getIntrinsic(Intrinsic::wasm_bitmask, Vec->getType()); 15204 return Builder.CreateCall(Callee, {Vec}); 15205 } 15206 case WebAssembly::BI__builtin_wasm_abs_f32x4: 15207 case WebAssembly::BI__builtin_wasm_abs_f64x2: { 15208 Value *Vec = EmitScalarExpr(E->getArg(0)); 15209 Function *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType()); 15210 return Builder.CreateCall(Callee, {Vec}); 15211 } 15212 case WebAssembly::BI__builtin_wasm_sqrt_f32x4: 15213 case WebAssembly::BI__builtin_wasm_sqrt_f64x2: { 15214 Value *Vec = EmitScalarExpr(E->getArg(0)); 15215 Function *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType()); 15216 return Builder.CreateCall(Callee, {Vec}); 15217 } 15218 case WebAssembly::BI__builtin_wasm_qfma_f32x4: 15219 case WebAssembly::BI__builtin_wasm_qfms_f32x4: 15220 case WebAssembly::BI__builtin_wasm_qfma_f64x2: 15221 case WebAssembly::BI__builtin_wasm_qfms_f64x2: { 15222 Value *A = EmitScalarExpr(E->getArg(0)); 15223 Value *B = EmitScalarExpr(E->getArg(1)); 15224 Value *C = EmitScalarExpr(E->getArg(2)); 15225 unsigned IntNo; 15226 switch (BuiltinID) { 15227 case WebAssembly::BI__builtin_wasm_qfma_f32x4: 15228 case WebAssembly::BI__builtin_wasm_qfma_f64x2: 15229 IntNo = Intrinsic::wasm_qfma; 15230 break; 15231 case WebAssembly::BI__builtin_wasm_qfms_f32x4: 15232 case WebAssembly::BI__builtin_wasm_qfms_f64x2: 15233 IntNo = Intrinsic::wasm_qfms; 15234 break; 15235 default: 15236 llvm_unreachable("unexpected builtin ID"); 15237 } 15238 Function *Callee = CGM.getIntrinsic(IntNo, A->getType()); 15239 return Builder.CreateCall(Callee, {A, B, C}); 15240 } 15241 case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8: 15242 case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8: 15243 case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4: 15244 case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4: { 15245 Value *Low = EmitScalarExpr(E->getArg(0)); 15246 Value *High = EmitScalarExpr(E->getArg(1)); 15247 unsigned IntNo; 15248 switch (BuiltinID) { 15249 case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8: 15250 case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4: 15251 IntNo = Intrinsic::wasm_narrow_signed; 15252 break; 15253 case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8: 15254 case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4: 15255 IntNo = Intrinsic::wasm_narrow_unsigned; 15256 break; 15257 default: 15258 llvm_unreachable("unexpected builtin ID"); 15259 } 15260 Function *Callee = 15261 CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Low->getType()}); 15262 return Builder.CreateCall(Callee, {Low, High}); 15263 } 15264 case WebAssembly::BI__builtin_wasm_widen_low_s_i16x8_i8x16: 15265 case WebAssembly::BI__builtin_wasm_widen_high_s_i16x8_i8x16: 15266 case WebAssembly::BI__builtin_wasm_widen_low_u_i16x8_i8x16: 15267 case WebAssembly::BI__builtin_wasm_widen_high_u_i16x8_i8x16: 15268 case WebAssembly::BI__builtin_wasm_widen_low_s_i32x4_i16x8: 15269 case WebAssembly::BI__builtin_wasm_widen_high_s_i32x4_i16x8: 15270 case WebAssembly::BI__builtin_wasm_widen_low_u_i32x4_i16x8: 15271 case WebAssembly::BI__builtin_wasm_widen_high_u_i32x4_i16x8: { 15272 Value *Vec = EmitScalarExpr(E->getArg(0)); 15273 unsigned IntNo; 15274 switch (BuiltinID) { 15275 case WebAssembly::BI__builtin_wasm_widen_low_s_i16x8_i8x16: 15276 case WebAssembly::BI__builtin_wasm_widen_low_s_i32x4_i16x8: 15277 IntNo = Intrinsic::wasm_widen_low_signed; 15278 break; 15279 case WebAssembly::BI__builtin_wasm_widen_high_s_i16x8_i8x16: 15280 case WebAssembly::BI__builtin_wasm_widen_high_s_i32x4_i16x8: 15281 IntNo = Intrinsic::wasm_widen_high_signed; 15282 break; 15283 case WebAssembly::BI__builtin_wasm_widen_low_u_i16x8_i8x16: 15284 case WebAssembly::BI__builtin_wasm_widen_low_u_i32x4_i16x8: 15285 IntNo = Intrinsic::wasm_widen_low_unsigned; 15286 break; 15287 case WebAssembly::BI__builtin_wasm_widen_high_u_i16x8_i8x16: 15288 case WebAssembly::BI__builtin_wasm_widen_high_u_i32x4_i16x8: 15289 IntNo = Intrinsic::wasm_widen_high_unsigned; 15290 break; 15291 default: 15292 llvm_unreachable("unexpected builtin ID"); 15293 } 15294 Function *Callee = 15295 CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Vec->getType()}); 15296 return Builder.CreateCall(Callee, Vec); 15297 } 15298 default: 15299 return nullptr; 15300 } 15301 } 15302 15303 static std::pair<Intrinsic::ID, unsigned> 15304 getIntrinsicForHexagonNonGCCBuiltin(unsigned BuiltinID) { 15305 struct Info { 15306 unsigned BuiltinID; 15307 Intrinsic::ID IntrinsicID; 15308 unsigned VecLen; 15309 }; 15310 Info Infos[] = { 15311 #define CUSTOM_BUILTIN_MAPPING(x,s) \ 15312 { Hexagon::BI__builtin_HEXAGON_##x, Intrinsic::hexagon_##x, s }, 15313 CUSTOM_BUILTIN_MAPPING(L2_loadrub_pci, 0) 15314 CUSTOM_BUILTIN_MAPPING(L2_loadrb_pci, 0) 15315 CUSTOM_BUILTIN_MAPPING(L2_loadruh_pci, 0) 15316 CUSTOM_BUILTIN_MAPPING(L2_loadrh_pci, 0) 15317 CUSTOM_BUILTIN_MAPPING(L2_loadri_pci, 0) 15318 CUSTOM_BUILTIN_MAPPING(L2_loadrd_pci, 0) 15319 CUSTOM_BUILTIN_MAPPING(L2_loadrub_pcr, 0) 15320 CUSTOM_BUILTIN_MAPPING(L2_loadrb_pcr, 0) 15321 CUSTOM_BUILTIN_MAPPING(L2_loadruh_pcr, 0) 15322 CUSTOM_BUILTIN_MAPPING(L2_loadrh_pcr, 0) 15323 CUSTOM_BUILTIN_MAPPING(L2_loadri_pcr, 0) 15324 CUSTOM_BUILTIN_MAPPING(L2_loadrd_pcr, 0) 15325 CUSTOM_BUILTIN_MAPPING(S2_storerb_pci, 0) 15326 CUSTOM_BUILTIN_MAPPING(S2_storerh_pci, 0) 15327 CUSTOM_BUILTIN_MAPPING(S2_storerf_pci, 0) 15328 CUSTOM_BUILTIN_MAPPING(S2_storeri_pci, 0) 15329 CUSTOM_BUILTIN_MAPPING(S2_storerd_pci, 0) 15330 CUSTOM_BUILTIN_MAPPING(S2_storerb_pcr, 0) 15331 CUSTOM_BUILTIN_MAPPING(S2_storerh_pcr, 0) 15332 CUSTOM_BUILTIN_MAPPING(S2_storerf_pcr, 0) 15333 CUSTOM_BUILTIN_MAPPING(S2_storeri_pcr, 0) 15334 CUSTOM_BUILTIN_MAPPING(S2_storerd_pcr, 0) 15335 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq, 64) 15336 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq, 64) 15337 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq, 64) 15338 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq, 64) 15339 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq_128B, 128) 15340 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq_128B, 128) 15341 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq_128B, 128) 15342 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq_128B, 128) 15343 #include "clang/Basic/BuiltinsHexagonMapCustomDep.def" 15344 #undef CUSTOM_BUILTIN_MAPPING 15345 }; 15346 15347 auto CmpInfo = [] (Info A, Info B) { return A.BuiltinID < B.BuiltinID; }; 15348 static const bool SortOnce = (llvm::sort(Infos, CmpInfo), true); 15349 (void)SortOnce; 15350 15351 const Info *F = std::lower_bound(std::begin(Infos), std::end(Infos), 15352 Info{BuiltinID, 0, 0}, CmpInfo); 15353 if (F == std::end(Infos) || F->BuiltinID != BuiltinID) 15354 return {Intrinsic::not_intrinsic, 0}; 15355 15356 return {F->IntrinsicID, F->VecLen}; 15357 } 15358 15359 Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID, 15360 const CallExpr *E) { 15361 Intrinsic::ID ID; 15362 unsigned VecLen; 15363 std::tie(ID, VecLen) = getIntrinsicForHexagonNonGCCBuiltin(BuiltinID); 15364 15365 auto MakeCircOp = [this, E](unsigned IntID, bool IsLoad) { 15366 // The base pointer is passed by address, so it needs to be loaded. 15367 Address A = EmitPointerWithAlignment(E->getArg(0)); 15368 Address BP = Address( 15369 Builder.CreateBitCast(A.getPointer(), Int8PtrPtrTy), A.getAlignment()); 15370 llvm::Value *Base = Builder.CreateLoad(BP); 15371 // The treatment of both loads and stores is the same: the arguments for 15372 // the builtin are the same as the arguments for the intrinsic. 15373 // Load: 15374 // builtin(Base, Inc, Mod, Start) -> intr(Base, Inc, Mod, Start) 15375 // builtin(Base, Mod, Start) -> intr(Base, Mod, Start) 15376 // Store: 15377 // builtin(Base, Inc, Mod, Val, Start) -> intr(Base, Inc, Mod, Val, Start) 15378 // builtin(Base, Mod, Val, Start) -> intr(Base, Mod, Val, Start) 15379 SmallVector<llvm::Value*,5> Ops = { Base }; 15380 for (unsigned i = 1, e = E->getNumArgs(); i != e; ++i) 15381 Ops.push_back(EmitScalarExpr(E->getArg(i))); 15382 15383 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops); 15384 // The load intrinsics generate two results (Value, NewBase), stores 15385 // generate one (NewBase). The new base address needs to be stored. 15386 llvm::Value *NewBase = IsLoad ? Builder.CreateExtractValue(Result, 1) 15387 : Result; 15388 llvm::Value *LV = Builder.CreateBitCast( 15389 EmitScalarExpr(E->getArg(0)), NewBase->getType()->getPointerTo()); 15390 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 15391 llvm::Value *RetVal = 15392 Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment()); 15393 if (IsLoad) 15394 RetVal = Builder.CreateExtractValue(Result, 0); 15395 return RetVal; 15396 }; 15397 15398 // Handle the conversion of bit-reverse load intrinsics to bit code. 15399 // The intrinsic call after this function only reads from memory and the 15400 // write to memory is dealt by the store instruction. 15401 auto MakeBrevLd = [this, E](unsigned IntID, llvm::Type *DestTy) { 15402 // The intrinsic generates one result, which is the new value for the base 15403 // pointer. It needs to be returned. The result of the load instruction is 15404 // passed to intrinsic by address, so the value needs to be stored. 15405 llvm::Value *BaseAddress = 15406 Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int8PtrTy); 15407 15408 // Expressions like &(*pt++) will be incremented per evaluation. 15409 // EmitPointerWithAlignment and EmitScalarExpr evaluates the expression 15410 // per call. 15411 Address DestAddr = EmitPointerWithAlignment(E->getArg(1)); 15412 DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), Int8PtrTy), 15413 DestAddr.getAlignment()); 15414 llvm::Value *DestAddress = DestAddr.getPointer(); 15415 15416 // Operands are Base, Dest, Modifier. 15417 // The intrinsic format in LLVM IR is defined as 15418 // { ValueType, i8* } (i8*, i32). 15419 llvm::Value *Result = Builder.CreateCall( 15420 CGM.getIntrinsic(IntID), {BaseAddress, EmitScalarExpr(E->getArg(2))}); 15421 15422 // The value needs to be stored as the variable is passed by reference. 15423 llvm::Value *DestVal = Builder.CreateExtractValue(Result, 0); 15424 15425 // The store needs to be truncated to fit the destination type. 15426 // While i32 and i64 are natively supported on Hexagon, i8 and i16 needs 15427 // to be handled with stores of respective destination type. 15428 DestVal = Builder.CreateTrunc(DestVal, DestTy); 15429 15430 llvm::Value *DestForStore = 15431 Builder.CreateBitCast(DestAddress, DestVal->getType()->getPointerTo()); 15432 Builder.CreateAlignedStore(DestVal, DestForStore, DestAddr.getAlignment()); 15433 // The updated value of the base pointer is returned. 15434 return Builder.CreateExtractValue(Result, 1); 15435 }; 15436 15437 auto V2Q = [this, VecLen] (llvm::Value *Vec) { 15438 Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandvrt_128B 15439 : Intrinsic::hexagon_V6_vandvrt; 15440 return Builder.CreateCall(CGM.getIntrinsic(ID), 15441 {Vec, Builder.getInt32(-1)}); 15442 }; 15443 auto Q2V = [this, VecLen] (llvm::Value *Pred) { 15444 Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandqrt_128B 15445 : Intrinsic::hexagon_V6_vandqrt; 15446 return Builder.CreateCall(CGM.getIntrinsic(ID), 15447 {Pred, Builder.getInt32(-1)}); 15448 }; 15449 15450 switch (BuiltinID) { 15451 // These intrinsics return a tuple {Vector, VectorPred} in LLVM IR, 15452 // and the corresponding C/C++ builtins use loads/stores to update 15453 // the predicate. 15454 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry: 15455 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B: 15456 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry: 15457 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B: { 15458 // Get the type from the 0-th argument. 15459 llvm::Type *VecType = ConvertType(E->getArg(0)->getType()); 15460 Address PredAddr = Builder.CreateBitCast( 15461 EmitPointerWithAlignment(E->getArg(2)), VecType->getPointerTo(0)); 15462 llvm::Value *PredIn = V2Q(Builder.CreateLoad(PredAddr)); 15463 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID), 15464 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), PredIn}); 15465 15466 llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1); 15467 Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.getPointer(), 15468 PredAddr.getAlignment()); 15469 return Builder.CreateExtractValue(Result, 0); 15470 } 15471 15472 case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci: 15473 case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci: 15474 case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pci: 15475 case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pci: 15476 case Hexagon::BI__builtin_HEXAGON_L2_loadri_pci: 15477 case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pci: 15478 case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pcr: 15479 case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pcr: 15480 case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pcr: 15481 case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pcr: 15482 case Hexagon::BI__builtin_HEXAGON_L2_loadri_pcr: 15483 case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pcr: 15484 return MakeCircOp(ID, /*IsLoad=*/true); 15485 case Hexagon::BI__builtin_HEXAGON_S2_storerb_pci: 15486 case Hexagon::BI__builtin_HEXAGON_S2_storerh_pci: 15487 case Hexagon::BI__builtin_HEXAGON_S2_storerf_pci: 15488 case Hexagon::BI__builtin_HEXAGON_S2_storeri_pci: 15489 case Hexagon::BI__builtin_HEXAGON_S2_storerd_pci: 15490 case Hexagon::BI__builtin_HEXAGON_S2_storerb_pcr: 15491 case Hexagon::BI__builtin_HEXAGON_S2_storerh_pcr: 15492 case Hexagon::BI__builtin_HEXAGON_S2_storerf_pcr: 15493 case Hexagon::BI__builtin_HEXAGON_S2_storeri_pcr: 15494 case Hexagon::BI__builtin_HEXAGON_S2_storerd_pcr: 15495 return MakeCircOp(ID, /*IsLoad=*/false); 15496 case Hexagon::BI__builtin_brev_ldub: 15497 return MakeBrevLd(Intrinsic::hexagon_L2_loadrub_pbr, Int8Ty); 15498 case Hexagon::BI__builtin_brev_ldb: 15499 return MakeBrevLd(Intrinsic::hexagon_L2_loadrb_pbr, Int8Ty); 15500 case Hexagon::BI__builtin_brev_lduh: 15501 return MakeBrevLd(Intrinsic::hexagon_L2_loadruh_pbr, Int16Ty); 15502 case Hexagon::BI__builtin_brev_ldh: 15503 return MakeBrevLd(Intrinsic::hexagon_L2_loadrh_pbr, Int16Ty); 15504 case Hexagon::BI__builtin_brev_ldw: 15505 return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr, Int32Ty); 15506 case Hexagon::BI__builtin_brev_ldd: 15507 return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr, Int64Ty); 15508 15509 default: { 15510 if (ID == Intrinsic::not_intrinsic) 15511 return nullptr; 15512 15513 auto IsVectorPredTy = [] (llvm::Type *T) { 15514 return T->isVectorTy() && T->getVectorElementType()->isIntegerTy(1); 15515 }; 15516 15517 llvm::Function *IntrFn = CGM.getIntrinsic(ID); 15518 llvm::FunctionType *IntrTy = IntrFn->getFunctionType(); 15519 SmallVector<llvm::Value*,4> Ops; 15520 for (unsigned i = 0, e = IntrTy->getNumParams(); i != e; ++i) { 15521 llvm::Type *T = IntrTy->getParamType(i); 15522 const Expr *A = E->getArg(i); 15523 if (IsVectorPredTy(T)) { 15524 // There will be an implicit cast to a boolean vector. Strip it. 15525 if (auto *Cast = dyn_cast<ImplicitCastExpr>(A)) { 15526 if (Cast->getCastKind() == CK_BitCast) 15527 A = Cast->getSubExpr(); 15528 } 15529 Ops.push_back(V2Q(EmitScalarExpr(A))); 15530 } else { 15531 Ops.push_back(EmitScalarExpr(A)); 15532 } 15533 } 15534 15535 llvm::Value *Call = Builder.CreateCall(IntrFn, Ops); 15536 if (IsVectorPredTy(IntrTy->getReturnType())) 15537 Call = Q2V(Call); 15538 15539 return Call; 15540 } // default 15541 } // switch 15542 15543 return nullptr; 15544 } 15545