1 //===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI pass --------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// \file 10 /// This file implements a TargetTransformInfo analysis pass specific to the 11 /// AArch64 target machine. It uses the target's detailed information to provide 12 /// more precise answers to certain TTI queries, while letting the target 13 /// independent and default TTI implementations handle the rest. 14 /// 15 //===----------------------------------------------------------------------===// 16 17 #include "AArch64.h" 18 #include "AArch64TargetMachine.h" 19 #include "MCTargetDesc/AArch64AddressingModes.h" 20 #include "llvm/Analysis/TargetTransformInfo.h" 21 #include "llvm/Support/Debug.h" 22 #include "llvm/Target/CostTable.h" 23 #include "llvm/Target/TargetLowering.h" 24 #include <algorithm> 25 using namespace llvm; 26 27 #define DEBUG_TYPE "aarch64tti" 28 29 // Declare the pass initialization routine locally as target-specific passes 30 // don't have a target-wide initialization entry point, and so we rely on the 31 // pass constructor initialization. 32 namespace llvm { 33 void initializeAArch64TTIPass(PassRegistry &); 34 } 35 36 namespace { 37 38 class AArch64TTI final : public ImmutablePass, public TargetTransformInfo { 39 const AArch64TargetMachine *TM; 40 const AArch64Subtarget *ST; 41 const AArch64TargetLowering *TLI; 42 43 /// Estimate the overhead of scalarizing an instruction. Insert and Extract 44 /// are set if the result needs to be inserted and/or extracted from vectors. 45 unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; 46 47 public: 48 AArch64TTI() : ImmutablePass(ID), TM(nullptr), ST(nullptr), TLI(nullptr) { 49 llvm_unreachable("This pass cannot be directly constructed"); 50 } 51 52 AArch64TTI(const AArch64TargetMachine *TM) 53 : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()), 54 TLI(TM->getSubtargetImpl()->getTargetLowering()) { 55 initializeAArch64TTIPass(*PassRegistry::getPassRegistry()); 56 } 57 58 void initializePass() override { pushTTIStack(this); } 59 60 void getAnalysisUsage(AnalysisUsage &AU) const override { 61 TargetTransformInfo::getAnalysisUsage(AU); 62 } 63 64 /// Pass identification. 65 static char ID; 66 67 /// Provide necessary pointer adjustments for the two base classes. 68 void *getAdjustedAnalysisPointer(const void *ID) override { 69 if (ID == &TargetTransformInfo::ID) 70 return (TargetTransformInfo *)this; 71 return this; 72 } 73 74 /// \name Scalar TTI Implementations 75 /// @{ 76 unsigned getIntImmCost(int64_t Val) const; 77 unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override; 78 unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, 79 Type *Ty) const override; 80 unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, 81 Type *Ty) const override; 82 PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override; 83 84 /// @} 85 86 /// \name Vector TTI Implementations 87 /// @{ 88 89 unsigned getNumberOfRegisters(bool Vector) const override { 90 if (Vector) { 91 if (ST->hasNEON()) 92 return 32; 93 return 0; 94 } 95 return 31; 96 } 97 98 unsigned getRegisterBitWidth(bool Vector) const override { 99 if (Vector) { 100 if (ST->hasNEON()) 101 return 128; 102 return 0; 103 } 104 return 64; 105 } 106 107 unsigned getMaxInterleaveFactor() const override; 108 109 unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const 110 override; 111 112 unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const 113 override; 114 115 unsigned getArithmeticInstrCost( 116 unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue, 117 OperandValueKind Opd2Info = OK_AnyValue, 118 OperandValueProperties Opd1PropInfo = OP_None, 119 OperandValueProperties Opd2PropInfo = OP_None) const override; 120 121 unsigned getAddressComputationCost(Type *Ty, bool IsComplex) const override; 122 123 unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) const 124 override; 125 126 unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, 127 unsigned AddressSpace) const override; 128 129 unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys) const override; 130 131 /// @} 132 }; 133 134 } // end anonymous namespace 135 136 INITIALIZE_AG_PASS(AArch64TTI, TargetTransformInfo, "aarch64tti", 137 "AArch64 Target Transform Info", true, true, false) 138 char AArch64TTI::ID = 0; 139 140 ImmutablePass * 141 llvm::createAArch64TargetTransformInfoPass(const AArch64TargetMachine *TM) { 142 return new AArch64TTI(TM); 143 } 144 145 /// \brief Calculate the cost of materializing a 64-bit value. This helper 146 /// method might only calculate a fraction of a larger immediate. Therefore it 147 /// is valid to return a cost of ZERO. 148 unsigned AArch64TTI::getIntImmCost(int64_t Val) const { 149 // Check if the immediate can be encoded within an instruction. 150 if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, 64)) 151 return 0; 152 153 if (Val < 0) 154 Val = ~Val; 155 156 // Calculate how many moves we will need to materialize this constant. 157 unsigned LZ = countLeadingZeros((uint64_t)Val); 158 return (64 - LZ + 15) / 16; 159 } 160 161 /// \brief Calculate the cost of materializing the given constant. 162 unsigned AArch64TTI::getIntImmCost(const APInt &Imm, Type *Ty) const { 163 assert(Ty->isIntegerTy()); 164 165 unsigned BitSize = Ty->getPrimitiveSizeInBits(); 166 if (BitSize == 0) 167 return ~0U; 168 169 // Sign-extend all constants to a multiple of 64-bit. 170 APInt ImmVal = Imm; 171 if (BitSize & 0x3f) 172 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU); 173 174 // Split the constant into 64-bit chunks and calculate the cost for each 175 // chunk. 176 unsigned Cost = 0; 177 for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) { 178 APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64); 179 int64_t Val = Tmp.getSExtValue(); 180 Cost += getIntImmCost(Val); 181 } 182 // We need at least one instruction to materialze the constant. 183 return std::max(1U, Cost); 184 } 185 186 unsigned AArch64TTI::getIntImmCost(unsigned Opcode, unsigned Idx, 187 const APInt &Imm, Type *Ty) const { 188 assert(Ty->isIntegerTy()); 189 190 unsigned BitSize = Ty->getPrimitiveSizeInBits(); 191 // There is no cost model for constants with a bit size of 0. Return TCC_Free 192 // here, so that constant hoisting will ignore this constant. 193 if (BitSize == 0) 194 return TCC_Free; 195 196 unsigned ImmIdx = ~0U; 197 switch (Opcode) { 198 default: 199 return TCC_Free; 200 case Instruction::GetElementPtr: 201 // Always hoist the base address of a GetElementPtr. 202 if (Idx == 0) 203 return 2 * TCC_Basic; 204 return TCC_Free; 205 case Instruction::Store: 206 ImmIdx = 0; 207 break; 208 case Instruction::Add: 209 case Instruction::Sub: 210 case Instruction::Mul: 211 case Instruction::UDiv: 212 case Instruction::SDiv: 213 case Instruction::URem: 214 case Instruction::SRem: 215 case Instruction::And: 216 case Instruction::Or: 217 case Instruction::Xor: 218 case Instruction::ICmp: 219 ImmIdx = 1; 220 break; 221 // Always return TCC_Free for the shift value of a shift instruction. 222 case Instruction::Shl: 223 case Instruction::LShr: 224 case Instruction::AShr: 225 if (Idx == 1) 226 return TCC_Free; 227 break; 228 case Instruction::Trunc: 229 case Instruction::ZExt: 230 case Instruction::SExt: 231 case Instruction::IntToPtr: 232 case Instruction::PtrToInt: 233 case Instruction::BitCast: 234 case Instruction::PHI: 235 case Instruction::Call: 236 case Instruction::Select: 237 case Instruction::Ret: 238 case Instruction::Load: 239 break; 240 } 241 242 if (Idx == ImmIdx) { 243 unsigned NumConstants = (BitSize + 63) / 64; 244 unsigned Cost = AArch64TTI::getIntImmCost(Imm, Ty); 245 return (Cost <= NumConstants * TCC_Basic) 246 ? static_cast<unsigned>(TCC_Free) : Cost; 247 } 248 return AArch64TTI::getIntImmCost(Imm, Ty); 249 } 250 251 unsigned AArch64TTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx, 252 const APInt &Imm, Type *Ty) const { 253 assert(Ty->isIntegerTy()); 254 255 unsigned BitSize = Ty->getPrimitiveSizeInBits(); 256 // There is no cost model for constants with a bit size of 0. Return TCC_Free 257 // here, so that constant hoisting will ignore this constant. 258 if (BitSize == 0) 259 return TCC_Free; 260 261 switch (IID) { 262 default: 263 return TCC_Free; 264 case Intrinsic::sadd_with_overflow: 265 case Intrinsic::uadd_with_overflow: 266 case Intrinsic::ssub_with_overflow: 267 case Intrinsic::usub_with_overflow: 268 case Intrinsic::smul_with_overflow: 269 case Intrinsic::umul_with_overflow: 270 if (Idx == 1) { 271 unsigned NumConstants = (BitSize + 63) / 64; 272 unsigned Cost = AArch64TTI::getIntImmCost(Imm, Ty); 273 return (Cost <= NumConstants * TCC_Basic) 274 ? static_cast<unsigned>(TCC_Free) : Cost; 275 } 276 break; 277 case Intrinsic::experimental_stackmap: 278 if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) 279 return TCC_Free; 280 break; 281 case Intrinsic::experimental_patchpoint_void: 282 case Intrinsic::experimental_patchpoint_i64: 283 if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) 284 return TCC_Free; 285 break; 286 } 287 return AArch64TTI::getIntImmCost(Imm, Ty); 288 } 289 290 AArch64TTI::PopcntSupportKind 291 AArch64TTI::getPopcntSupport(unsigned TyWidth) const { 292 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); 293 if (TyWidth == 32 || TyWidth == 64) 294 return PSK_FastHardware; 295 // TODO: AArch64TargetLowering::LowerCTPOP() supports 128bit popcount. 296 return PSK_Software; 297 } 298 299 unsigned AArch64TTI::getCastInstrCost(unsigned Opcode, Type *Dst, 300 Type *Src) const { 301 int ISD = TLI->InstructionOpcodeToISD(Opcode); 302 assert(ISD && "Invalid opcode"); 303 304 EVT SrcTy = TLI->getValueType(Src); 305 EVT DstTy = TLI->getValueType(Dst); 306 307 if (!SrcTy.isSimple() || !DstTy.isSimple()) 308 return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); 309 310 static const TypeConversionCostTblEntry<MVT> ConversionTbl[] = { 311 // LowerVectorINT_TO_FP: 312 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, 313 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, 314 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 }, 315 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, 316 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, 317 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 }, 318 319 // Complex: to v2f32 320 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 }, 321 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 }, 322 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 }, 323 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 }, 324 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 }, 325 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 }, 326 327 // Complex: to v4f32 328 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 4 }, 329 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, 330 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 }, 331 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, 332 333 // Complex: to v2f64 334 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 }, 335 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 }, 336 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, 337 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 }, 338 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 }, 339 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, 340 341 342 // LowerVectorFP_TO_INT 343 { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f32, 1 }, 344 { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 }, 345 { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 }, 346 { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 }, 347 { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 }, 348 { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 }, 349 350 // Complex, from v2f32: legal type is v2i32 (no cost) or v2i64 (1 ext). 351 { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 2 }, 352 { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 1 }, 353 { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 1 }, 354 { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 2 }, 355 { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 1 }, 356 { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 1 }, 357 358 // Complex, from v4f32: legal type is v4i16, 1 narrowing => ~2 359 { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 }, 360 { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 2 }, 361 { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 }, 362 { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 2 }, 363 364 // Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2. 365 { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 }, 366 { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 }, 367 { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 2 }, 368 { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 }, 369 { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 }, 370 { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 2 }, 371 }; 372 373 int Idx = ConvertCostTableLookup<MVT>( 374 ConversionTbl, array_lengthof(ConversionTbl), ISD, DstTy.getSimpleVT(), 375 SrcTy.getSimpleVT()); 376 if (Idx != -1) 377 return ConversionTbl[Idx].Cost; 378 379 return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); 380 } 381 382 unsigned AArch64TTI::getVectorInstrCost(unsigned Opcode, Type *Val, 383 unsigned Index) const { 384 assert(Val->isVectorTy() && "This must be a vector type"); 385 386 if (Index != -1U) { 387 // Legalize the type. 388 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Val); 389 390 // This type is legalized to a scalar type. 391 if (!LT.second.isVector()) 392 return 0; 393 394 // The type may be split. Normalize the index to the new type. 395 unsigned Width = LT.second.getVectorNumElements(); 396 Index = Index % Width; 397 398 // The element at index zero is already inside the vector. 399 if (Index == 0) 400 return 0; 401 } 402 403 // All other insert/extracts cost this much. 404 return 2; 405 } 406 407 unsigned AArch64TTI::getArithmeticInstrCost( 408 unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, 409 OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo, 410 OperandValueProperties Opd2PropInfo) const { 411 // Legalize the type. 412 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty); 413 414 int ISD = TLI->InstructionOpcodeToISD(Opcode); 415 416 if (ISD == ISD::SDIV && 417 Opd2Info == TargetTransformInfo::OK_UniformConstantValue && 418 Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) { 419 // On AArch64, scalar signed division by constants power-of-two are 420 // normally expanded to the sequence ADD + CMP + SELECT + SRA. 421 // The OperandValue properties many not be same as that of previous 422 // operation; conservatively assume OP_None. 423 unsigned Cost = 424 getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info, 425 TargetTransformInfo::OP_None, 426 TargetTransformInfo::OP_None); 427 Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Opd1Info, Opd2Info, 428 TargetTransformInfo::OP_None, 429 TargetTransformInfo::OP_None); 430 Cost += getArithmeticInstrCost(Instruction::Select, Ty, Opd1Info, Opd2Info, 431 TargetTransformInfo::OP_None, 432 TargetTransformInfo::OP_None); 433 Cost += getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info, Opd2Info, 434 TargetTransformInfo::OP_None, 435 TargetTransformInfo::OP_None); 436 return Cost; 437 } 438 439 switch (ISD) { 440 default: 441 return TargetTransformInfo::getArithmeticInstrCost( 442 Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo); 443 case ISD::ADD: 444 case ISD::MUL: 445 case ISD::XOR: 446 case ISD::OR: 447 case ISD::AND: 448 // These nodes are marked as 'custom' for combining purposes only. 449 // We know that they are legal. See LowerAdd in ISelLowering. 450 return 1 * LT.first; 451 } 452 } 453 454 unsigned AArch64TTI::getAddressComputationCost(Type *Ty, bool IsComplex) const { 455 // Address computations in vectorized code with non-consecutive addresses will 456 // likely result in more instructions compared to scalar code where the 457 // computation can more often be merged into the index mode. The resulting 458 // extra micro-ops can significantly decrease throughput. 459 unsigned NumVectorInstToHideOverhead = 10; 460 461 if (Ty->isVectorTy() && IsComplex) 462 return NumVectorInstToHideOverhead; 463 464 // In many cases the address computation is not merged into the instruction 465 // addressing mode. 466 return 1; 467 } 468 469 unsigned AArch64TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, 470 Type *CondTy) const { 471 472 int ISD = TLI->InstructionOpcodeToISD(Opcode); 473 // We don't lower vector selects well that are wider than the register width. 474 if (ValTy->isVectorTy() && ISD == ISD::SELECT) { 475 // We would need this many instructions to hide the scalarization happening. 476 unsigned AmortizationCost = 20; 477 static const TypeConversionCostTblEntry<MVT::SimpleValueType> 478 VectorSelectTbl[] = { 479 { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 * AmortizationCost }, 480 { ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 * AmortizationCost }, 481 { ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 * AmortizationCost }, 482 { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost }, 483 { ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost }, 484 { ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost } 485 }; 486 487 EVT SelCondTy = TLI->getValueType(CondTy); 488 EVT SelValTy = TLI->getValueType(ValTy); 489 if (SelCondTy.isSimple() && SelValTy.isSimple()) { 490 int Idx = 491 ConvertCostTableLookup(VectorSelectTbl, ISD, SelCondTy.getSimpleVT(), 492 SelValTy.getSimpleVT()); 493 if (Idx != -1) 494 return VectorSelectTbl[Idx].Cost; 495 } 496 } 497 return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy); 498 } 499 500 unsigned AArch64TTI::getMemoryOpCost(unsigned Opcode, Type *Src, 501 unsigned Alignment, 502 unsigned AddressSpace) const { 503 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src); 504 505 if (Opcode == Instruction::Store && Src->isVectorTy() && Alignment != 16 && 506 Src->getVectorElementType()->isIntegerTy(64)) { 507 // Unaligned stores are extremely inefficient. We don't split 508 // unaligned v2i64 stores because the negative impact that has shown in 509 // practice on inlined memcpy code. 510 // We make v2i64 stores expensive so that we will only vectorize if there 511 // are 6 other instructions getting vectorized. 512 unsigned AmortizationCost = 6; 513 514 return LT.first * 2 * AmortizationCost; 515 } 516 517 if (Src->isVectorTy() && Src->getVectorElementType()->isIntegerTy(8) && 518 Src->getVectorNumElements() < 8) { 519 // We scalarize the loads/stores because there is not v.4b register and we 520 // have to promote the elements to v.4h. 521 unsigned NumVecElts = Src->getVectorNumElements(); 522 unsigned NumVectorizableInstsToAmortize = NumVecElts * 2; 523 // We generate 2 instructions per vector element. 524 return NumVectorizableInstsToAmortize * NumVecElts * 2; 525 } 526 527 return LT.first; 528 } 529 530 unsigned AArch64TTI::getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys) const { 531 unsigned Cost = 0; 532 for (auto *I : Tys) { 533 if (!I->isVectorTy()) 534 continue; 535 if (I->getScalarSizeInBits() * I->getVectorNumElements() == 128) 536 Cost += getMemoryOpCost(Instruction::Store, I, 128, 0) + 537 getMemoryOpCost(Instruction::Load, I, 128, 0); 538 } 539 return Cost; 540 } 541 542 unsigned AArch64TTI::getMaxInterleaveFactor() const { 543 if (ST->isCortexA57()) 544 return 4; 545 return 2; 546 } 547