1 //===- AMDGPULegalizerInfo.cpp -----------------------------------*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file implements the targeting of the Machinelegalizer class for 10 /// AMDGPU. 11 /// \todo This should be generated by TableGen. 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPU.h" 15 #include "AMDGPULegalizerInfo.h" 16 #include "AMDGPUTargetMachine.h" 17 #include "llvm/CodeGen/TargetOpcodes.h" 18 #include "llvm/CodeGen/ValueTypes.h" 19 #include "llvm/IR/DerivedTypes.h" 20 #include "llvm/IR/Type.h" 21 #include "llvm/Support/Debug.h" 22 23 using namespace llvm; 24 using namespace LegalizeActions; 25 using namespace LegalizeMutations; 26 using namespace LegalityPredicates; 27 28 AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST, 29 const GCNTargetMachine &TM) { 30 using namespace TargetOpcode; 31 32 auto GetAddrSpacePtr = [&TM](unsigned AS) { 33 return LLT::pointer(AS, TM.getPointerSizeInBits(AS)); 34 }; 35 36 const LLT S1 = LLT::scalar(1); 37 const LLT S8 = LLT::scalar(8); 38 const LLT S16 = LLT::scalar(16); 39 const LLT S32 = LLT::scalar(32); 40 const LLT S64 = LLT::scalar(64); 41 const LLT S128 = LLT::scalar(128); 42 const LLT S256 = LLT::scalar(256); 43 const LLT S512 = LLT::scalar(512); 44 45 const LLT V2S16 = LLT::vector(2, 16); 46 const LLT V4S16 = LLT::vector(4, 16); 47 const LLT V8S16 = LLT::vector(8, 16); 48 49 const LLT V2S32 = LLT::vector(2, 32); 50 const LLT V3S32 = LLT::vector(3, 32); 51 const LLT V4S32 = LLT::vector(4, 32); 52 const LLT V5S32 = LLT::vector(5, 32); 53 const LLT V6S32 = LLT::vector(6, 32); 54 const LLT V7S32 = LLT::vector(7, 32); 55 const LLT V8S32 = LLT::vector(8, 32); 56 const LLT V9S32 = LLT::vector(9, 32); 57 const LLT V10S32 = LLT::vector(10, 32); 58 const LLT V11S32 = LLT::vector(11, 32); 59 const LLT V12S32 = LLT::vector(12, 32); 60 const LLT V13S32 = LLT::vector(13, 32); 61 const LLT V14S32 = LLT::vector(14, 32); 62 const LLT V15S32 = LLT::vector(15, 32); 63 const LLT V16S32 = LLT::vector(16, 32); 64 65 const LLT V2S64 = LLT::vector(2, 64); 66 const LLT V3S64 = LLT::vector(3, 64); 67 const LLT V4S64 = LLT::vector(4, 64); 68 const LLT V5S64 = LLT::vector(5, 64); 69 const LLT V6S64 = LLT::vector(6, 64); 70 const LLT V7S64 = LLT::vector(7, 64); 71 const LLT V8S64 = LLT::vector(8, 64); 72 73 std::initializer_list<LLT> AllS32Vectors = 74 {V2S32, V3S32, V4S32, V5S32, V6S32, V7S32, V8S32, 75 V9S32, V10S32, V11S32, V12S32, V13S32, V14S32, V15S32, V16S32}; 76 std::initializer_list<LLT> AllS64Vectors = 77 {V2S64, V3S64, V4S64, V5S64, V6S64, V7S64, V8S64}; 78 79 const LLT GlobalPtr = GetAddrSpacePtr(AMDGPUAS::GLOBAL_ADDRESS); 80 const LLT ConstantPtr = GetAddrSpacePtr(AMDGPUAS::CONSTANT_ADDRESS); 81 const LLT LocalPtr = GetAddrSpacePtr(AMDGPUAS::LOCAL_ADDRESS); 82 const LLT FlatPtr = GetAddrSpacePtr(AMDGPUAS::FLAT_ADDRESS); 83 const LLT PrivatePtr = GetAddrSpacePtr(AMDGPUAS::PRIVATE_ADDRESS); 84 85 const LLT CodePtr = FlatPtr; 86 87 const LLT AddrSpaces[] = { 88 GlobalPtr, 89 ConstantPtr, 90 LocalPtr, 91 FlatPtr, 92 PrivatePtr 93 }; 94 95 setAction({G_BRCOND, S1}, Legal); 96 97 getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_UMULH, G_SMULH}) 98 .legalFor({S32}) 99 .clampScalar(0, S32, S32) 100 .scalarize(0); 101 102 // Report legal for any types we can handle anywhere. For the cases only legal 103 // on the SALU, RegBankSelect will be able to re-legalize. 104 getActionDefinitionsBuilder({G_AND, G_OR, G_XOR}) 105 .legalFor({S32, S1, S64, V2S32, V2S16, V4S16}) 106 .clampScalar(0, S32, S64) 107 .scalarize(0); 108 109 getActionDefinitionsBuilder({G_UADDO, G_SADDO, G_USUBO, G_SSUBO, 110 G_UADDE, G_SADDE, G_USUBE, G_SSUBE}) 111 .legalFor({{S32, S1}}) 112 .clampScalar(0, S32, S32); 113 114 getActionDefinitionsBuilder(G_BITCAST) 115 .legalForCartesianProduct({S32, V2S16}) 116 .legalForCartesianProduct({S64, V2S32, V4S16}) 117 .legalForCartesianProduct({V2S64, V4S32}) 118 // Don't worry about the size constraint. 119 .legalIf(all(isPointer(0), isPointer(1))); 120 121 getActionDefinitionsBuilder(G_FCONSTANT) 122 .legalFor({S32, S64, S16}); 123 124 // G_IMPLICIT_DEF is a no-op so we can make it legal for any value type that 125 // can fit in a register. 126 // FIXME: We need to legalize several more operations before we can add 127 // a test case for size > 512. 128 getActionDefinitionsBuilder(G_IMPLICIT_DEF) 129 .legalIf([=](const LegalityQuery &Query) { 130 return Query.Types[0].getSizeInBits() <= 512; 131 }) 132 .clampScalar(0, S1, S512); 133 134 135 // FIXME: i1 operands to intrinsics should always be legal, but other i1 136 // values may not be legal. We need to figure out how to distinguish 137 // between these two scenarios. 138 getActionDefinitionsBuilder(G_CONSTANT) 139 .legalFor({S1, S32, S64, GlobalPtr, 140 LocalPtr, ConstantPtr, PrivatePtr, FlatPtr }) 141 .clampScalar(0, S32, S64) 142 .widenScalarToNextPow2(0) 143 .legalIf(isPointer(0)); 144 145 setAction({G_FRAME_INDEX, PrivatePtr}, Legal); 146 147 getActionDefinitionsBuilder({G_FADD, G_FMUL, G_FNEG, G_FABS, G_FMA}) 148 .legalFor({S32, S64}) 149 .scalarize(0) 150 .clampScalar(0, S32, S64); 151 152 getActionDefinitionsBuilder(G_FPTRUNC) 153 .legalFor({{S32, S64}, {S16, S32}}) 154 .scalarize(0); 155 156 getActionDefinitionsBuilder(G_FPEXT) 157 .legalFor({{S64, S32}, {S32, S16}}) 158 .lowerFor({{S64, S16}}) // FIXME: Implement 159 .scalarize(0); 160 161 getActionDefinitionsBuilder(G_FSUB) 162 // Use actual fsub instruction 163 .legalFor({S32}) 164 // Must use fadd + fneg 165 .lowerFor({S64, S16, V2S16}) 166 .scalarize(0) 167 .clampScalar(0, S32, S64); 168 169 getActionDefinitionsBuilder({G_SEXT, G_ZEXT, G_ANYEXT}) 170 .legalFor({{S64, S32}, {S32, S16}, {S64, S16}, 171 {S32, S1}, {S64, S1}, {S16, S1}, 172 // FIXME: Hack 173 {S32, S8}, {S128, S32}, {S128, S64}, {S32, LLT::scalar(24)}}) 174 .scalarize(0); 175 176 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP}) 177 .legalFor({{S32, S32}, {S64, S32}}) 178 .scalarize(0); 179 180 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI}) 181 .legalFor({{S32, S32}, {S32, S64}}) 182 .scalarize(0); 183 184 getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND}) 185 .legalFor({S32, S64}) 186 .scalarize(0); 187 188 for (LLT PtrTy : AddrSpaces) { 189 LLT IdxTy = LLT::scalar(PtrTy.getSizeInBits()); 190 setAction({G_GEP, PtrTy}, Legal); 191 setAction({G_GEP, 1, IdxTy}, Legal); 192 } 193 194 // FIXME: When RegBankSelect inserts copies, it will only create new registers 195 // with scalar types. This means we can end up with G_LOAD/G_STORE/G_GEP 196 // instruction with scalar types for their pointer operands. In assert builds, 197 // the instruction selector will assert if it sees a generic instruction which 198 // isn't legal, so we need to tell it that scalar types are legal for pointer 199 // operands 200 setAction({G_GEP, S64}, Legal); 201 202 setAction({G_BLOCK_ADDR, CodePtr}, Legal); 203 204 getActionDefinitionsBuilder(G_ICMP) 205 .legalForCartesianProduct( 206 {S1}, {S32, S64, GlobalPtr, LocalPtr, ConstantPtr, PrivatePtr, FlatPtr}) 207 .legalFor({{S1, S32}, {S1, S64}}) 208 .widenScalarToNextPow2(1) 209 .clampScalar(1, S32, S64) 210 .scalarize(0) 211 .legalIf(all(typeIs(0, S1), isPointer(1))); 212 213 getActionDefinitionsBuilder(G_FCMP) 214 .legalFor({{S1, S32}, {S1, S64}}) 215 .widenScalarToNextPow2(1) 216 .clampScalar(1, S32, S64) 217 .scalarize(0); 218 219 // FIXME: fexp, flog2, flog10 needs to be custom lowered. 220 getActionDefinitionsBuilder({G_FPOW, G_FEXP, G_FEXP2, 221 G_FLOG, G_FLOG2, G_FLOG10}) 222 .legalFor({S32}) 223 .scalarize(0); 224 225 // The 64-bit versions produce 32-bit results, but only on the SALU. 226 getActionDefinitionsBuilder({G_CTLZ, G_CTLZ_ZERO_UNDEF, 227 G_CTTZ, G_CTTZ_ZERO_UNDEF, 228 G_CTPOP}) 229 .legalFor({{S32, S32}, {S32, S64}}) 230 .clampScalar(0, S32, S32) 231 .clampScalar(1, S32, S64); 232 // TODO: Scalarize 233 234 // TODO: Expand for > s32 235 getActionDefinitionsBuilder(G_BSWAP) 236 .legalFor({S32}) 237 .clampScalar(0, S32, S32) 238 .scalarize(0); 239 240 241 auto smallerThan = [](unsigned TypeIdx0, unsigned TypeIdx1) { 242 return [=](const LegalityQuery &Query) { 243 return Query.Types[TypeIdx0].getSizeInBits() < 244 Query.Types[TypeIdx1].getSizeInBits(); 245 }; 246 }; 247 248 auto greaterThan = [](unsigned TypeIdx0, unsigned TypeIdx1) { 249 return [=](const LegalityQuery &Query) { 250 return Query.Types[TypeIdx0].getSizeInBits() > 251 Query.Types[TypeIdx1].getSizeInBits(); 252 }; 253 }; 254 255 getActionDefinitionsBuilder(G_INTTOPTR) 256 // List the common cases 257 .legalForCartesianProduct({GlobalPtr, ConstantPtr, FlatPtr}, {S64}) 258 .legalForCartesianProduct({LocalPtr, PrivatePtr}, {S32}) 259 .scalarize(0) 260 // Accept any address space as long as the size matches 261 .legalIf(sameSize(0, 1)) 262 .widenScalarIf(smallerThan(1, 0), 263 [](const LegalityQuery &Query) { 264 return std::make_pair(1, LLT::scalar(Query.Types[0].getSizeInBits())); 265 }) 266 .narrowScalarIf(greaterThan(1, 0), 267 [](const LegalityQuery &Query) { 268 return std::make_pair(1, LLT::scalar(Query.Types[0].getSizeInBits())); 269 }); 270 271 getActionDefinitionsBuilder(G_PTRTOINT) 272 // List the common cases 273 .legalForCartesianProduct({GlobalPtr, ConstantPtr, FlatPtr}, {S64}) 274 .legalForCartesianProduct({LocalPtr, PrivatePtr}, {S32}) 275 .scalarize(0) 276 // Accept any address space as long as the size matches 277 .legalIf(sameSize(0, 1)) 278 .widenScalarIf(smallerThan(0, 1), 279 [](const LegalityQuery &Query) { 280 return std::make_pair(0, LLT::scalar(Query.Types[1].getSizeInBits())); 281 }) 282 .narrowScalarIf( 283 greaterThan(0, 1), 284 [](const LegalityQuery &Query) { 285 return std::make_pair(0, LLT::scalar(Query.Types[1].getSizeInBits())); 286 }); 287 288 getActionDefinitionsBuilder({G_LOAD, G_STORE}) 289 .narrowScalarIf([](const LegalityQuery &Query) { 290 unsigned Size = Query.Types[0].getSizeInBits(); 291 unsigned MemSize = Query.MMODescrs[0].SizeInBits; 292 return (Size > 32 && MemSize < Size); 293 }, 294 [](const LegalityQuery &Query) { 295 return std::make_pair(0, LLT::scalar(32)); 296 }) 297 .fewerElementsIf([=, &ST](const LegalityQuery &Query) { 298 unsigned MemSize = Query.MMODescrs[0].SizeInBits; 299 return (MemSize == 96) && 300 Query.Types[0].isVector() && 301 ST.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS; 302 }, 303 [=](const LegalityQuery &Query) { 304 return std::make_pair(0, V2S32); 305 }) 306 .legalIf([=, &ST](const LegalityQuery &Query) { 307 const LLT &Ty0 = Query.Types[0]; 308 309 unsigned Size = Ty0.getSizeInBits(); 310 unsigned MemSize = Query.MMODescrs[0].SizeInBits; 311 if (Size < 32 || (Size > 32 && MemSize < Size)) 312 return false; 313 314 if (Ty0.isVector() && Size != MemSize) 315 return false; 316 317 // TODO: Decompose private loads into 4-byte components. 318 // TODO: Illegal flat loads on SI 319 switch (MemSize) { 320 case 8: 321 case 16: 322 return Size == 32; 323 case 32: 324 case 64: 325 case 128: 326 return true; 327 328 case 96: 329 // XXX hasLoadX3 330 return (ST.getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS); 331 332 case 256: 333 case 512: 334 // TODO: constant loads 335 default: 336 return false; 337 } 338 }) 339 .clampScalar(0, S32, S64); 340 341 342 auto &ExtLoads = getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD}) 343 .legalForTypesWithMemSize({ 344 {S32, GlobalPtr, 8}, 345 {S32, GlobalPtr, 16}, 346 {S32, LocalPtr, 8}, 347 {S32, LocalPtr, 16}, 348 {S32, PrivatePtr, 8}, 349 {S32, PrivatePtr, 16}}); 350 if (ST.hasFlatAddressSpace()) { 351 ExtLoads.legalForTypesWithMemSize({{S32, FlatPtr, 8}, 352 {S32, FlatPtr, 16}}); 353 } 354 355 ExtLoads.clampScalar(0, S32, S32) 356 .widenScalarToNextPow2(0) 357 .unsupportedIfMemSizeNotPow2() 358 .lower(); 359 360 auto &Atomics = getActionDefinitionsBuilder( 361 {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, 362 G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR, 363 G_ATOMICRMW_MAX, G_ATOMICRMW_MIN, G_ATOMICRMW_UMAX, 364 G_ATOMICRMW_UMIN, G_ATOMIC_CMPXCHG}) 365 .legalFor({{S32, GlobalPtr}, {S32, LocalPtr}, 366 {S64, GlobalPtr}, {S64, LocalPtr}}); 367 if (ST.hasFlatAddressSpace()) { 368 Atomics.legalFor({{S32, FlatPtr}, {S64, FlatPtr}}); 369 } 370 371 // TODO: Pointer types, any 32-bit or 64-bit vector 372 getActionDefinitionsBuilder(G_SELECT) 373 .legalForCartesianProduct({S32, S64, V2S32, V2S16, V4S16, 374 GlobalPtr, LocalPtr, FlatPtr, PrivatePtr, 375 LLT::vector(2, LocalPtr), LLT::vector(2, PrivatePtr)}, {S1}) 376 .clampScalar(0, S32, S64) 377 .fewerElementsIf( 378 [=](const LegalityQuery &Query) { 379 if (Query.Types[1].isVector()) 380 return true; 381 382 LLT Ty = Query.Types[0]; 383 384 // FIXME: Hack until odd splits handled 385 return Ty.isVector() && 386 (Ty.getScalarSizeInBits() > 32 || Ty.getNumElements() % 2 != 0); 387 }, 388 scalarize(0)) 389 // FIXME: Handle 16-bit vectors better 390 .fewerElementsIf( 391 [=](const LegalityQuery &Query) { 392 return Query.Types[0].isVector() && 393 Query.Types[0].getElementType().getSizeInBits() < 32;}, 394 scalarize(0)) 395 .scalarize(1) 396 .clampMaxNumElements(0, S32, 2) 397 .clampMaxNumElements(0, LocalPtr, 2) 398 .clampMaxNumElements(0, PrivatePtr, 2) 399 .legalIf(all(isPointer(0), typeIs(1, S1))); 400 401 // TODO: Only the low 4/5/6 bits of the shift amount are observed, so we can 402 // be more flexible with the shift amount type. 403 auto &Shifts = getActionDefinitionsBuilder({G_SHL, G_LSHR, G_ASHR}) 404 .legalFor({{S32, S32}, {S64, S32}}); 405 if (ST.has16BitInsts()) { 406 Shifts.legalFor({{S16, S32}, {S16, S16}}); 407 Shifts.clampScalar(0, S16, S64); 408 } else 409 Shifts.clampScalar(0, S32, S64); 410 Shifts.clampScalar(1, S32, S32); 411 412 for (unsigned Op : {G_EXTRACT_VECTOR_ELT, G_INSERT_VECTOR_ELT}) { 413 unsigned VecTypeIdx = Op == G_EXTRACT_VECTOR_ELT ? 1 : 0; 414 unsigned EltTypeIdx = Op == G_EXTRACT_VECTOR_ELT ? 0 : 1; 415 unsigned IdxTypeIdx = 2; 416 417 getActionDefinitionsBuilder(Op) 418 .legalIf([=](const LegalityQuery &Query) { 419 const LLT &VecTy = Query.Types[VecTypeIdx]; 420 const LLT &IdxTy = Query.Types[IdxTypeIdx]; 421 return VecTy.getSizeInBits() % 32 == 0 && 422 VecTy.getSizeInBits() <= 512 && 423 IdxTy.getSizeInBits() == 32; 424 }) 425 .clampScalar(EltTypeIdx, S32, S64) 426 .clampScalar(VecTypeIdx, S32, S64) 427 .clampScalar(IdxTypeIdx, S32, S32); 428 } 429 430 getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT) 431 .unsupportedIf([=](const LegalityQuery &Query) { 432 const LLT &EltTy = Query.Types[1].getElementType(); 433 return Query.Types[0] != EltTy; 434 }); 435 436 // FIXME: Doesn't handle extract of illegal sizes. 437 getActionDefinitionsBuilder({G_EXTRACT, G_INSERT}) 438 .legalIf([=](const LegalityQuery &Query) { 439 const LLT &Ty0 = Query.Types[0]; 440 const LLT &Ty1 = Query.Types[1]; 441 return (Ty0.getSizeInBits() % 16 == 0) && 442 (Ty1.getSizeInBits() % 16 == 0); 443 }) 444 .widenScalarIf( 445 [=](const LegalityQuery &Query) { 446 const LLT &Ty1 = Query.Types[1]; 447 return (Ty1.getScalarSizeInBits() < 16); 448 }, 449 // TODO Use generic LegalizeMutation 450 [](const LegalityQuery &Query) { 451 LLT Ty1 = Query.Types[1]; 452 unsigned NewEltSizeInBits = 453 std::max(1 << Log2_32_Ceil(Ty1.getScalarSizeInBits()), 16); 454 if (Ty1.isVector()) { 455 return std::make_pair(1, LLT::vector(Ty1.getNumElements(), 456 NewEltSizeInBits)); 457 } 458 459 return std::make_pair(1, LLT::scalar(NewEltSizeInBits)); 460 }); 461 462 // TODO: vectors of pointers 463 getActionDefinitionsBuilder(G_BUILD_VECTOR) 464 .legalForCartesianProduct(AllS32Vectors, {S32}) 465 .legalForCartesianProduct(AllS64Vectors, {S64}) 466 .clampNumElements(0, V16S32, V16S32) 467 .clampNumElements(0, V2S64, V8S64) 468 .minScalarSameAs(1, 0) 469 // FIXME: Sort of a hack to make progress on other legalizations. 470 .legalIf([=](const LegalityQuery &Query) { 471 return Query.Types[0].getScalarSizeInBits() <= 32 || 472 Query.Types[0].getScalarSizeInBits() == 64; 473 }); 474 475 // TODO: Support any combination of v2s32 476 getActionDefinitionsBuilder(G_CONCAT_VECTORS) 477 .legalFor({{V4S32, V2S32}, 478 {V8S32, V2S32}, 479 {V8S32, V4S32}, 480 {V4S64, V2S64}, 481 {V4S16, V2S16}, 482 {V8S16, V2S16}, 483 {V8S16, V4S16}, 484 {LLT::vector(4, LocalPtr), LLT::vector(2, LocalPtr)}, 485 {LLT::vector(4, PrivatePtr), LLT::vector(2, PrivatePtr)}}); 486 487 // Merge/Unmerge 488 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) { 489 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1; 490 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0; 491 492 auto notValidElt = [=](const LegalityQuery &Query, unsigned TypeIdx) { 493 const LLT &Ty = Query.Types[TypeIdx]; 494 if (Ty.isVector()) { 495 const LLT &EltTy = Ty.getElementType(); 496 if (EltTy.getSizeInBits() < 8 || EltTy.getSizeInBits() > 64) 497 return true; 498 if (!isPowerOf2_32(EltTy.getSizeInBits())) 499 return true; 500 } 501 return false; 502 }; 503 504 getActionDefinitionsBuilder(Op) 505 .widenScalarToNextPow2(LitTyIdx, /*Min*/ 16) 506 // Clamp the little scalar to s8-s256 and make it a power of 2. It's not 507 // worth considering the multiples of 64 since 2*192 and 2*384 are not 508 // valid. 509 .clampScalar(LitTyIdx, S16, S256) 510 .widenScalarToNextPow2(LitTyIdx, /*Min*/ 32) 511 512 // Break up vectors with weird elements into scalars 513 .fewerElementsIf( 514 [=](const LegalityQuery &Query) { return notValidElt(Query, 0); }, 515 scalarize(0)) 516 .fewerElementsIf( 517 [=](const LegalityQuery &Query) { return notValidElt(Query, 1); }, 518 scalarize(1)) 519 .clampScalar(BigTyIdx, S32, S512) 520 .widenScalarIf( 521 [=](const LegalityQuery &Query) { 522 const LLT &Ty = Query.Types[BigTyIdx]; 523 return !isPowerOf2_32(Ty.getSizeInBits()) && 524 Ty.getSizeInBits() % 16 != 0; 525 }, 526 [=](const LegalityQuery &Query) { 527 // Pick the next power of 2, or a multiple of 64 over 128. 528 // Whichever is smaller. 529 const LLT &Ty = Query.Types[BigTyIdx]; 530 unsigned NewSizeInBits = 1 << Log2_32_Ceil(Ty.getSizeInBits() + 1); 531 if (NewSizeInBits >= 256) { 532 unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1); 533 if (RoundedTo < NewSizeInBits) 534 NewSizeInBits = RoundedTo; 535 } 536 return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits)); 537 }) 538 .legalIf([=](const LegalityQuery &Query) { 539 const LLT &BigTy = Query.Types[BigTyIdx]; 540 const LLT &LitTy = Query.Types[LitTyIdx]; 541 542 if (BigTy.isVector() && BigTy.getSizeInBits() < 32) 543 return false; 544 if (LitTy.isVector() && LitTy.getSizeInBits() < 32) 545 return false; 546 547 return BigTy.getSizeInBits() % 16 == 0 && 548 LitTy.getSizeInBits() % 16 == 0 && 549 BigTy.getSizeInBits() <= 512; 550 }) 551 // Any vectors left are the wrong size. Scalarize them. 552 .scalarize(0) 553 .scalarize(1); 554 } 555 556 computeTables(); 557 verify(*ST.getInstrInfo()); 558 } 559