1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file This file implements the LegalizerHelper class to legalize 11 /// individual instructions and the LegalizeMachineIR wrapper pass for the 12 /// primary legalization. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" 17 #include "llvm/CodeGen/GlobalISel/CallLowering.h" 18 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" 19 #include "llvm/CodeGen/MachineRegisterInfo.h" 20 #include "llvm/CodeGen/TargetInstrInfo.h" 21 #include "llvm/CodeGen/TargetLowering.h" 22 #include "llvm/CodeGen/TargetSubtargetInfo.h" 23 #include "llvm/Support/Debug.h" 24 #include "llvm/Support/MathExtras.h" 25 #include "llvm/Support/raw_ostream.h" 26 27 #define DEBUG_TYPE "legalizer" 28 29 using namespace llvm; 30 using namespace LegalizeActions; 31 32 LegalizerHelper::LegalizerHelper(MachineFunction &MF) 33 : MRI(MF.getRegInfo()), LI(*MF.getSubtarget().getLegalizerInfo()) { 34 MIRBuilder.setMF(MF); 35 } 36 37 LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI) 38 : MRI(MF.getRegInfo()), LI(LI) { 39 MIRBuilder.setMF(MF); 40 } 41 LegalizerHelper::LegalizeResult 42 LegalizerHelper::legalizeInstrStep(MachineInstr &MI) { 43 LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs())); 44 45 auto Step = LI.getAction(MI, MRI); 46 switch (Step.Action) { 47 case Legal: 48 LLVM_DEBUG(dbgs() << ".. Already legal\n"); 49 return AlreadyLegal; 50 case Libcall: 51 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n"); 52 return libcall(MI); 53 case NarrowScalar: 54 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n"); 55 return narrowScalar(MI, Step.TypeIdx, Step.NewType); 56 case WidenScalar: 57 LLVM_DEBUG(dbgs() << ".. Widen scalar\n"); 58 return widenScalar(MI, Step.TypeIdx, Step.NewType); 59 case Lower: 60 LLVM_DEBUG(dbgs() << ".. Lower\n"); 61 return lower(MI, Step.TypeIdx, Step.NewType); 62 case FewerElements: 63 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n"); 64 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType); 65 case Custom: 66 LLVM_DEBUG(dbgs() << ".. Custom legalization\n"); 67 return LI.legalizeCustom(MI, MRI, MIRBuilder) ? Legalized 68 : UnableToLegalize; 69 default: 70 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n"); 71 return UnableToLegalize; 72 } 73 } 74 75 void LegalizerHelper::extractParts(unsigned Reg, LLT Ty, int NumParts, 76 SmallVectorImpl<unsigned> &VRegs) { 77 for (int i = 0; i < NumParts; ++i) 78 VRegs.push_back(MRI.createGenericVirtualRegister(Ty)); 79 MIRBuilder.buildUnmerge(VRegs, Reg); 80 } 81 82 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { 83 switch (Opcode) { 84 case TargetOpcode::G_SDIV: 85 assert(Size == 32 && "Unsupported size"); 86 return RTLIB::SDIV_I32; 87 case TargetOpcode::G_UDIV: 88 assert(Size == 32 && "Unsupported size"); 89 return RTLIB::UDIV_I32; 90 case TargetOpcode::G_SREM: 91 assert(Size == 32 && "Unsupported size"); 92 return RTLIB::SREM_I32; 93 case TargetOpcode::G_UREM: 94 assert(Size == 32 && "Unsupported size"); 95 return RTLIB::UREM_I32; 96 case TargetOpcode::G_FADD: 97 assert((Size == 32 || Size == 64) && "Unsupported size"); 98 return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32; 99 case TargetOpcode::G_FSUB: 100 assert((Size == 32 || Size == 64) && "Unsupported size"); 101 return Size == 64 ? RTLIB::SUB_F64 : RTLIB::SUB_F32; 102 case TargetOpcode::G_FMUL: 103 assert((Size == 32 || Size == 64) && "Unsupported size"); 104 return Size == 64 ? RTLIB::MUL_F64 : RTLIB::MUL_F32; 105 case TargetOpcode::G_FDIV: 106 assert((Size == 32 || Size == 64) && "Unsupported size"); 107 return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32; 108 case TargetOpcode::G_FREM: 109 return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32; 110 case TargetOpcode::G_FPOW: 111 return Size == 64 ? RTLIB::POW_F64 : RTLIB::POW_F32; 112 case TargetOpcode::G_FMA: 113 assert((Size == 32 || Size == 64) && "Unsupported size"); 114 return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32; 115 } 116 llvm_unreachable("Unknown libcall function"); 117 } 118 119 LegalizerHelper::LegalizeResult 120 llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, 121 const CallLowering::ArgInfo &Result, 122 ArrayRef<CallLowering::ArgInfo> Args) { 123 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); 124 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); 125 const char *Name = TLI.getLibcallName(Libcall); 126 127 MIRBuilder.getMF().getFrameInfo().setHasCalls(true); 128 if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall), 129 MachineOperand::CreateES(Name), Result, Args)) 130 return LegalizerHelper::UnableToLegalize; 131 132 return LegalizerHelper::Legalized; 133 } 134 135 // Useful for libcalls where all operands have the same type. 136 static LegalizerHelper::LegalizeResult 137 simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, 138 Type *OpType) { 139 auto Libcall = getRTLibDesc(MI.getOpcode(), Size); 140 141 SmallVector<CallLowering::ArgInfo, 3> Args; 142 for (unsigned i = 1; i < MI.getNumOperands(); i++) 143 Args.push_back({MI.getOperand(i).getReg(), OpType}); 144 return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType}, 145 Args); 146 } 147 148 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, 149 Type *FromType) { 150 auto ToMVT = MVT::getVT(ToType); 151 auto FromMVT = MVT::getVT(FromType); 152 153 switch (Opcode) { 154 case TargetOpcode::G_FPEXT: 155 return RTLIB::getFPEXT(FromMVT, ToMVT); 156 case TargetOpcode::G_FPTRUNC: 157 return RTLIB::getFPROUND(FromMVT, ToMVT); 158 case TargetOpcode::G_FPTOSI: 159 return RTLIB::getFPTOSINT(FromMVT, ToMVT); 160 case TargetOpcode::G_FPTOUI: 161 return RTLIB::getFPTOUINT(FromMVT, ToMVT); 162 case TargetOpcode::G_SITOFP: 163 return RTLIB::getSINTTOFP(FromMVT, ToMVT); 164 case TargetOpcode::G_UITOFP: 165 return RTLIB::getUINTTOFP(FromMVT, ToMVT); 166 } 167 llvm_unreachable("Unsupported libcall function"); 168 } 169 170 static LegalizerHelper::LegalizeResult 171 conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, 172 Type *FromType) { 173 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType); 174 return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType}, 175 {{MI.getOperand(1).getReg(), FromType}}); 176 } 177 178 LegalizerHelper::LegalizeResult 179 LegalizerHelper::libcall(MachineInstr &MI) { 180 LLT LLTy = MRI.getType(MI.getOperand(0).getReg()); 181 unsigned Size = LLTy.getSizeInBits(); 182 auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); 183 184 MIRBuilder.setInstr(MI); 185 186 switch (MI.getOpcode()) { 187 default: 188 return UnableToLegalize; 189 case TargetOpcode::G_SDIV: 190 case TargetOpcode::G_UDIV: 191 case TargetOpcode::G_SREM: 192 case TargetOpcode::G_UREM: { 193 Type *HLTy = Type::getInt32Ty(Ctx); 194 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); 195 if (Status != Legalized) 196 return Status; 197 break; 198 } 199 case TargetOpcode::G_FADD: 200 case TargetOpcode::G_FSUB: 201 case TargetOpcode::G_FMUL: 202 case TargetOpcode::G_FDIV: 203 case TargetOpcode::G_FMA: 204 case TargetOpcode::G_FPOW: 205 case TargetOpcode::G_FREM: { 206 Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx); 207 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); 208 if (Status != Legalized) 209 return Status; 210 break; 211 } 212 case TargetOpcode::G_FPEXT: { 213 // FIXME: Support other floating point types (half, fp128 etc) 214 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 215 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 216 if (ToSize != 64 || FromSize != 32) 217 return UnableToLegalize; 218 LegalizeResult Status = conversionLibcall( 219 MI, MIRBuilder, Type::getDoubleTy(Ctx), Type::getFloatTy(Ctx)); 220 if (Status != Legalized) 221 return Status; 222 break; 223 } 224 case TargetOpcode::G_FPTRUNC: { 225 // FIXME: Support other floating point types (half, fp128 etc) 226 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 227 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 228 if (ToSize != 32 || FromSize != 64) 229 return UnableToLegalize; 230 LegalizeResult Status = conversionLibcall( 231 MI, MIRBuilder, Type::getFloatTy(Ctx), Type::getDoubleTy(Ctx)); 232 if (Status != Legalized) 233 return Status; 234 break; 235 } 236 case TargetOpcode::G_FPTOSI: 237 case TargetOpcode::G_FPTOUI: { 238 // FIXME: Support other types 239 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 240 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 241 if (ToSize != 32 || (FromSize != 32 && FromSize != 64)) 242 return UnableToLegalize; 243 LegalizeResult Status = conversionLibcall( 244 MI, MIRBuilder, Type::getInt32Ty(Ctx), 245 FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx)); 246 if (Status != Legalized) 247 return Status; 248 break; 249 } 250 case TargetOpcode::G_SITOFP: 251 case TargetOpcode::G_UITOFP: { 252 // FIXME: Support other types 253 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 254 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 255 if (FromSize != 32 || (ToSize != 32 && ToSize != 64)) 256 return UnableToLegalize; 257 LegalizeResult Status = conversionLibcall( 258 MI, MIRBuilder, 259 ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx), 260 Type::getInt32Ty(Ctx)); 261 if (Status != Legalized) 262 return Status; 263 break; 264 } 265 } 266 267 MI.eraseFromParent(); 268 return Legalized; 269 } 270 271 LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, 272 unsigned TypeIdx, 273 LLT NarrowTy) { 274 // FIXME: Don't know how to handle secondary types yet. 275 if (TypeIdx != 0 && MI.getOpcode() != TargetOpcode::G_EXTRACT) 276 return UnableToLegalize; 277 278 MIRBuilder.setInstr(MI); 279 280 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 281 uint64_t NarrowSize = NarrowTy.getSizeInBits(); 282 283 switch (MI.getOpcode()) { 284 default: 285 return UnableToLegalize; 286 case TargetOpcode::G_IMPLICIT_DEF: { 287 // FIXME: add support for when SizeOp0 isn't an exact multiple of 288 // NarrowSize. 289 if (SizeOp0 % NarrowSize != 0) 290 return UnableToLegalize; 291 int NumParts = SizeOp0 / NarrowSize; 292 293 SmallVector<unsigned, 2> DstRegs; 294 for (int i = 0; i < NumParts; ++i) 295 DstRegs.push_back( 296 MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg()); 297 MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs); 298 MI.eraseFromParent(); 299 return Legalized; 300 } 301 case TargetOpcode::G_ADD: { 302 // FIXME: add support for when SizeOp0 isn't an exact multiple of 303 // NarrowSize. 304 if (SizeOp0 % NarrowSize != 0) 305 return UnableToLegalize; 306 // Expand in terms of carry-setting/consuming G_ADDE instructions. 307 int NumParts = SizeOp0 / NarrowTy.getSizeInBits(); 308 309 SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs; 310 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); 311 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); 312 313 unsigned CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1)); 314 MIRBuilder.buildConstant(CarryIn, 0); 315 316 for (int i = 0; i < NumParts; ++i) { 317 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 318 unsigned CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); 319 320 MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i], 321 Src2Regs[i], CarryIn); 322 323 DstRegs.push_back(DstReg); 324 CarryIn = CarryOut; 325 } 326 unsigned DstReg = MI.getOperand(0).getReg(); 327 MIRBuilder.buildMerge(DstReg, DstRegs); 328 MI.eraseFromParent(); 329 return Legalized; 330 } 331 case TargetOpcode::G_EXTRACT: { 332 if (TypeIdx != 1) 333 return UnableToLegalize; 334 335 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 336 // FIXME: add support for when SizeOp1 isn't an exact multiple of 337 // NarrowSize. 338 if (SizeOp1 % NarrowSize != 0) 339 return UnableToLegalize; 340 int NumParts = SizeOp1 / NarrowSize; 341 342 SmallVector<unsigned, 2> SrcRegs, DstRegs; 343 SmallVector<uint64_t, 2> Indexes; 344 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); 345 346 unsigned OpReg = MI.getOperand(0).getReg(); 347 uint64_t OpStart = MI.getOperand(2).getImm(); 348 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); 349 for (int i = 0; i < NumParts; ++i) { 350 unsigned SrcStart = i * NarrowSize; 351 352 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) { 353 // No part of the extract uses this subregister, ignore it. 354 continue; 355 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) { 356 // The entire subregister is extracted, forward the value. 357 DstRegs.push_back(SrcRegs[i]); 358 continue; 359 } 360 361 // OpSegStart is where this destination segment would start in OpReg if it 362 // extended infinitely in both directions. 363 int64_t ExtractOffset; 364 uint64_t SegSize; 365 if (OpStart < SrcStart) { 366 ExtractOffset = 0; 367 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart); 368 } else { 369 ExtractOffset = OpStart - SrcStart; 370 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize); 371 } 372 373 unsigned SegReg = SrcRegs[i]; 374 if (ExtractOffset != 0 || SegSize != NarrowSize) { 375 // A genuine extract is needed. 376 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); 377 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset); 378 } 379 380 DstRegs.push_back(SegReg); 381 } 382 383 MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs); 384 MI.eraseFromParent(); 385 return Legalized; 386 } 387 case TargetOpcode::G_INSERT: { 388 // FIXME: add support for when SizeOp0 isn't an exact multiple of 389 // NarrowSize. 390 if (SizeOp0 % NarrowSize != 0) 391 return UnableToLegalize; 392 393 int NumParts = SizeOp0 / NarrowSize; 394 395 SmallVector<unsigned, 2> SrcRegs, DstRegs; 396 SmallVector<uint64_t, 2> Indexes; 397 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); 398 399 unsigned OpReg = MI.getOperand(2).getReg(); 400 uint64_t OpStart = MI.getOperand(3).getImm(); 401 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); 402 for (int i = 0; i < NumParts; ++i) { 403 unsigned DstStart = i * NarrowSize; 404 405 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) { 406 // No part of the insert affects this subregister, forward the original. 407 DstRegs.push_back(SrcRegs[i]); 408 continue; 409 } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) { 410 // The entire subregister is defined by this insert, forward the new 411 // value. 412 DstRegs.push_back(OpReg); 413 continue; 414 } 415 416 // OpSegStart is where this destination segment would start in OpReg if it 417 // extended infinitely in both directions. 418 int64_t ExtractOffset, InsertOffset; 419 uint64_t SegSize; 420 if (OpStart < DstStart) { 421 InsertOffset = 0; 422 ExtractOffset = DstStart - OpStart; 423 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart); 424 } else { 425 InsertOffset = OpStart - DstStart; 426 ExtractOffset = 0; 427 SegSize = 428 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart); 429 } 430 431 unsigned SegReg = OpReg; 432 if (ExtractOffset != 0 || SegSize != OpSize) { 433 // A genuine extract is needed. 434 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); 435 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset); 436 } 437 438 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 439 MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset); 440 DstRegs.push_back(DstReg); 441 } 442 443 assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered"); 444 MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs); 445 MI.eraseFromParent(); 446 return Legalized; 447 } 448 case TargetOpcode::G_LOAD: { 449 // FIXME: add support for when SizeOp0 isn't an exact multiple of 450 // NarrowSize. 451 if (SizeOp0 % NarrowSize != 0) 452 return UnableToLegalize; 453 454 const auto &MMO = **MI.memoperands_begin(); 455 // This implementation doesn't work for atomics. Give up instead of doing 456 // something invalid. 457 if (MMO.getOrdering() != AtomicOrdering::NotAtomic || 458 MMO.getFailureOrdering() != AtomicOrdering::NotAtomic) 459 return UnableToLegalize; 460 461 int NumParts = SizeOp0 / NarrowSize; 462 LLT OffsetTy = LLT::scalar( 463 MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()); 464 465 SmallVector<unsigned, 2> DstRegs; 466 for (int i = 0; i < NumParts; ++i) { 467 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 468 unsigned SrcReg = 0; 469 unsigned Adjustment = i * NarrowSize / 8; 470 471 MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand( 472 MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(), 473 NarrowSize / 8, i == 0 ? MMO.getAlignment() : NarrowSize / 8, 474 MMO.getAAInfo(), MMO.getRanges(), MMO.getSyncScopeID(), 475 MMO.getOrdering(), MMO.getFailureOrdering()); 476 477 MIRBuilder.materializeGEP(SrcReg, MI.getOperand(1).getReg(), OffsetTy, 478 Adjustment); 479 480 MIRBuilder.buildLoad(DstReg, SrcReg, *SplitMMO); 481 482 DstRegs.push_back(DstReg); 483 } 484 unsigned DstReg = MI.getOperand(0).getReg(); 485 MIRBuilder.buildMerge(DstReg, DstRegs); 486 MI.eraseFromParent(); 487 return Legalized; 488 } 489 case TargetOpcode::G_STORE: { 490 // FIXME: add support for when SizeOp0 isn't an exact multiple of 491 // NarrowSize. 492 if (SizeOp0 % NarrowSize != 0) 493 return UnableToLegalize; 494 495 const auto &MMO = **MI.memoperands_begin(); 496 // This implementation doesn't work for atomics. Give up instead of doing 497 // something invalid. 498 if (MMO.getOrdering() != AtomicOrdering::NotAtomic || 499 MMO.getFailureOrdering() != AtomicOrdering::NotAtomic) 500 return UnableToLegalize; 501 502 int NumParts = SizeOp0 / NarrowSize; 503 LLT OffsetTy = LLT::scalar( 504 MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()); 505 506 SmallVector<unsigned, 2> SrcRegs; 507 extractParts(MI.getOperand(0).getReg(), NarrowTy, NumParts, SrcRegs); 508 509 for (int i = 0; i < NumParts; ++i) { 510 unsigned DstReg = 0; 511 unsigned Adjustment = i * NarrowSize / 8; 512 513 MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand( 514 MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(), 515 NarrowSize / 8, i == 0 ? MMO.getAlignment() : NarrowSize / 8, 516 MMO.getAAInfo(), MMO.getRanges(), MMO.getSyncScopeID(), 517 MMO.getOrdering(), MMO.getFailureOrdering()); 518 519 MIRBuilder.materializeGEP(DstReg, MI.getOperand(1).getReg(), OffsetTy, 520 Adjustment); 521 522 MIRBuilder.buildStore(SrcRegs[i], DstReg, *SplitMMO); 523 } 524 MI.eraseFromParent(); 525 return Legalized; 526 } 527 case TargetOpcode::G_CONSTANT: { 528 // FIXME: add support for when SizeOp0 isn't an exact multiple of 529 // NarrowSize. 530 if (SizeOp0 % NarrowSize != 0) 531 return UnableToLegalize; 532 int NumParts = SizeOp0 / NarrowSize; 533 const APInt &Cst = MI.getOperand(1).getCImm()->getValue(); 534 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 535 536 SmallVector<unsigned, 2> DstRegs; 537 for (int i = 0; i < NumParts; ++i) { 538 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 539 ConstantInt *CI = 540 ConstantInt::get(Ctx, Cst.lshr(NarrowSize * i).trunc(NarrowSize)); 541 MIRBuilder.buildConstant(DstReg, *CI); 542 DstRegs.push_back(DstReg); 543 } 544 unsigned DstReg = MI.getOperand(0).getReg(); 545 MIRBuilder.buildMerge(DstReg, DstRegs); 546 MI.eraseFromParent(); 547 return Legalized; 548 } 549 case TargetOpcode::G_OR: { 550 // Legalize bitwise operation: 551 // A = BinOp<Ty> B, C 552 // into: 553 // B1, ..., BN = G_UNMERGE_VALUES B 554 // C1, ..., CN = G_UNMERGE_VALUES C 555 // A1 = BinOp<Ty/N> B1, C2 556 // ... 557 // AN = BinOp<Ty/N> BN, CN 558 // A = G_MERGE_VALUES A1, ..., AN 559 560 // FIXME: add support for when SizeOp0 isn't an exact multiple of 561 // NarrowSize. 562 if (SizeOp0 % NarrowSize != 0) 563 return UnableToLegalize; 564 int NumParts = SizeOp0 / NarrowSize; 565 566 // List the registers where the destination will be scattered. 567 SmallVector<unsigned, 2> DstRegs; 568 // List the registers where the first argument will be split. 569 SmallVector<unsigned, 2> SrcsReg1; 570 // List the registers where the second argument will be split. 571 SmallVector<unsigned, 2> SrcsReg2; 572 // Create all the temporary registers. 573 for (int i = 0; i < NumParts; ++i) { 574 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 575 unsigned SrcReg1 = MRI.createGenericVirtualRegister(NarrowTy); 576 unsigned SrcReg2 = MRI.createGenericVirtualRegister(NarrowTy); 577 578 DstRegs.push_back(DstReg); 579 SrcsReg1.push_back(SrcReg1); 580 SrcsReg2.push_back(SrcReg2); 581 } 582 // Explode the big arguments into smaller chunks. 583 MIRBuilder.buildUnmerge(SrcsReg1, MI.getOperand(1).getReg()); 584 MIRBuilder.buildUnmerge(SrcsReg2, MI.getOperand(2).getReg()); 585 586 // Do the operation on each small part. 587 for (int i = 0; i < NumParts; ++i) 588 MIRBuilder.buildOr(DstRegs[i], SrcsReg1[i], SrcsReg2[i]); 589 590 // Gather the destination registers into the final destination. 591 unsigned DstReg = MI.getOperand(0).getReg(); 592 MIRBuilder.buildMerge(DstReg, DstRegs); 593 MI.eraseFromParent(); 594 return Legalized; 595 } 596 } 597 } 598 599 void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy, 600 unsigned OpIdx, unsigned ExtOpcode) { 601 MachineOperand &MO = MI.getOperand(OpIdx); 602 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, WideTy, MO.getReg()); 603 MO.setReg(ExtB->getOperand(0).getReg()); 604 } 605 606 void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy, 607 unsigned OpIdx, unsigned TruncOpcode) { 608 MachineOperand &MO = MI.getOperand(OpIdx); 609 unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); 610 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 611 MIRBuilder.buildInstr(TruncOpcode, MO.getReg(), DstExt); 612 MO.setReg(DstExt); 613 } 614 615 LegalizerHelper::LegalizeResult 616 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { 617 MIRBuilder.setInstr(MI); 618 619 switch (MI.getOpcode()) { 620 default: 621 return UnableToLegalize; 622 case TargetOpcode::G_CTTZ: 623 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 624 case TargetOpcode::G_CTLZ: 625 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 626 case TargetOpcode::G_CTPOP: { 627 // First ZEXT the input. 628 auto MIBSrc = MIRBuilder.buildZExt(WideTy, MI.getOperand(1).getReg()); 629 LLT CurTy = MRI.getType(MI.getOperand(0).getReg()); 630 if (MI.getOpcode() == TargetOpcode::G_CTTZ) { 631 // The count is the same in the larger type except if the original 632 // value was zero. This can be handled by setting the bit just off 633 // the top of the original type. 634 auto TopBit = 635 APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits()); 636 MIBSrc = MIRBuilder.buildInstr( 637 TargetOpcode::G_OR, WideTy, MIBSrc, 638 MIRBuilder.buildConstant(WideTy, TopBit.getSExtValue())); 639 } 640 // Perform the operation at the larger size. 641 auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), WideTy, MIBSrc); 642 // This is already the correct result for CTPOP and CTTZs 643 if (MI.getOpcode() == TargetOpcode::G_CTLZ || 644 MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) { 645 // The correct result is NewOp - (Difference in widety and current ty). 646 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits(); 647 MIBNewOp = 648 MIRBuilder.buildInstr(TargetOpcode::G_SUB, WideTy, MIBNewOp, 649 MIRBuilder.buildConstant(WideTy, SizeDiff)); 650 } 651 auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); 652 // Make the original instruction a trunc now, and update it's source. 653 MI.setDesc(TII.get(TargetOpcode::G_TRUNC)); 654 MI.getOperand(1).setReg(MIBNewOp->getOperand(0).getReg()); 655 MIRBuilder.recordInsertion(&MI); 656 return Legalized; 657 } 658 659 case TargetOpcode::G_ADD: 660 case TargetOpcode::G_AND: 661 case TargetOpcode::G_MUL: 662 case TargetOpcode::G_OR: 663 case TargetOpcode::G_XOR: 664 case TargetOpcode::G_SUB: 665 // Perform operation at larger width (any extension is fine here, high bits 666 // don't affect the result) and then truncate the result back to the 667 // original type. 668 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 669 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); 670 widenScalarDst(MI, WideTy); 671 MIRBuilder.recordInsertion(&MI); 672 return Legalized; 673 674 case TargetOpcode::G_SHL: 675 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 676 // The "number of bits to shift" operand must preserve its value as an 677 // unsigned integer: 678 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 679 widenScalarDst(MI, WideTy); 680 MIRBuilder.recordInsertion(&MI); 681 return Legalized; 682 683 case TargetOpcode::G_SDIV: 684 case TargetOpcode::G_SREM: 685 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); 686 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 687 widenScalarDst(MI, WideTy); 688 MIRBuilder.recordInsertion(&MI); 689 return Legalized; 690 691 case TargetOpcode::G_ASHR: 692 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); 693 // The "number of bits to shift" operand must preserve its value as an 694 // unsigned integer: 695 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 696 widenScalarDst(MI, WideTy); 697 MIRBuilder.recordInsertion(&MI); 698 return Legalized; 699 700 case TargetOpcode::G_UDIV: 701 case TargetOpcode::G_UREM: 702 case TargetOpcode::G_LSHR: 703 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 704 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 705 widenScalarDst(MI, WideTy); 706 MIRBuilder.recordInsertion(&MI); 707 return Legalized; 708 709 case TargetOpcode::G_SELECT: 710 if (TypeIdx != 0) 711 return UnableToLegalize; 712 // Perform operation at larger width (any extension is fine here, high bits 713 // don't affect the result) and then truncate the result back to the 714 // original type. 715 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); 716 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT); 717 widenScalarDst(MI, WideTy); 718 MIRBuilder.recordInsertion(&MI); 719 return Legalized; 720 721 case TargetOpcode::G_FPTOSI: 722 case TargetOpcode::G_FPTOUI: 723 if (TypeIdx != 0) 724 return UnableToLegalize; 725 widenScalarDst(MI, WideTy); 726 MIRBuilder.recordInsertion(&MI); 727 return Legalized; 728 729 case TargetOpcode::G_SITOFP: 730 if (TypeIdx != 1) 731 return UnableToLegalize; 732 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); 733 MIRBuilder.recordInsertion(&MI); 734 return Legalized; 735 736 case TargetOpcode::G_UITOFP: 737 if (TypeIdx != 1) 738 return UnableToLegalize; 739 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 740 MIRBuilder.recordInsertion(&MI); 741 return Legalized; 742 743 case TargetOpcode::G_INSERT: 744 if (TypeIdx != 0) 745 return UnableToLegalize; 746 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 747 widenScalarDst(MI, WideTy); 748 MIRBuilder.recordInsertion(&MI); 749 return Legalized; 750 751 case TargetOpcode::G_LOAD: 752 // For some types like i24, we might try to widen to i32. To properly handle 753 // this we should be using a dedicated extending load, until then avoid 754 // trying to legalize. 755 if (alignTo(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(), 8) != 756 WideTy.getSizeInBits()) 757 return UnableToLegalize; 758 LLVM_FALLTHROUGH; 759 case TargetOpcode::G_SEXTLOAD: 760 case TargetOpcode::G_ZEXTLOAD: 761 widenScalarDst(MI, WideTy); 762 MIRBuilder.recordInsertion(&MI); 763 return Legalized; 764 765 case TargetOpcode::G_STORE: { 766 if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(1) || 767 WideTy != LLT::scalar(8)) 768 return UnableToLegalize; 769 770 widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ZEXT); 771 MIRBuilder.recordInsertion(&MI); 772 return Legalized; 773 } 774 case TargetOpcode::G_CONSTANT: { 775 MachineOperand &SrcMO = MI.getOperand(1); 776 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 777 const APInt &Val = SrcMO.getCImm()->getValue().sext(WideTy.getSizeInBits()); 778 SrcMO.setCImm(ConstantInt::get(Ctx, Val)); 779 780 widenScalarDst(MI, WideTy); 781 MIRBuilder.recordInsertion(&MI); 782 return Legalized; 783 } 784 case TargetOpcode::G_FCONSTANT: { 785 MachineOperand &SrcMO = MI.getOperand(1); 786 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 787 APFloat Val = SrcMO.getFPImm()->getValueAPF(); 788 bool LosesInfo; 789 switch (WideTy.getSizeInBits()) { 790 case 32: 791 Val.convert(APFloat::IEEEsingle(), APFloat::rmTowardZero, &LosesInfo); 792 break; 793 case 64: 794 Val.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &LosesInfo); 795 break; 796 default: 797 llvm_unreachable("Unhandled fp widen type"); 798 } 799 SrcMO.setFPImm(ConstantFP::get(Ctx, Val)); 800 801 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); 802 MIRBuilder.recordInsertion(&MI); 803 return Legalized; 804 } 805 case TargetOpcode::G_BRCOND: 806 widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT); 807 MIRBuilder.recordInsertion(&MI); 808 return Legalized; 809 810 case TargetOpcode::G_FCMP: 811 if (TypeIdx == 0) 812 widenScalarDst(MI, WideTy); 813 else { 814 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT); 815 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT); 816 } 817 MIRBuilder.recordInsertion(&MI); 818 return Legalized; 819 820 case TargetOpcode::G_ICMP: 821 if (TypeIdx == 0) 822 widenScalarDst(MI, WideTy); 823 else { 824 unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>( 825 MI.getOperand(1).getPredicate())) 826 ? TargetOpcode::G_SEXT 827 : TargetOpcode::G_ZEXT; 828 widenScalarSrc(MI, WideTy, 2, ExtOpcode); 829 widenScalarSrc(MI, WideTy, 3, ExtOpcode); 830 } 831 MIRBuilder.recordInsertion(&MI); 832 return Legalized; 833 834 case TargetOpcode::G_GEP: 835 assert(TypeIdx == 1 && "unable to legalize pointer of GEP"); 836 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 837 MIRBuilder.recordInsertion(&MI); 838 return Legalized; 839 840 case TargetOpcode::G_PHI: { 841 assert(TypeIdx == 0 && "Expecting only Idx 0"); 842 843 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) { 844 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB(); 845 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); 846 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT); 847 } 848 849 MachineBasicBlock &MBB = *MI.getParent(); 850 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI()); 851 widenScalarDst(MI, WideTy); 852 MIRBuilder.recordInsertion(&MI); 853 return Legalized; 854 } 855 } 856 } 857 858 LegalizerHelper::LegalizeResult 859 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { 860 using namespace TargetOpcode; 861 MIRBuilder.setInstr(MI); 862 863 switch(MI.getOpcode()) { 864 default: 865 return UnableToLegalize; 866 case TargetOpcode::G_SREM: 867 case TargetOpcode::G_UREM: { 868 unsigned QuotReg = MRI.createGenericVirtualRegister(Ty); 869 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV) 870 .addDef(QuotReg) 871 .addUse(MI.getOperand(1).getReg()) 872 .addUse(MI.getOperand(2).getReg()); 873 874 unsigned ProdReg = MRI.createGenericVirtualRegister(Ty); 875 MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg()); 876 MIRBuilder.buildSub(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(), 877 ProdReg); 878 MI.eraseFromParent(); 879 return Legalized; 880 } 881 case TargetOpcode::G_SMULO: 882 case TargetOpcode::G_UMULO: { 883 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the 884 // result. 885 unsigned Res = MI.getOperand(0).getReg(); 886 unsigned Overflow = MI.getOperand(1).getReg(); 887 unsigned LHS = MI.getOperand(2).getReg(); 888 unsigned RHS = MI.getOperand(3).getReg(); 889 890 MIRBuilder.buildMul(Res, LHS, RHS); 891 892 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO 893 ? TargetOpcode::G_SMULH 894 : TargetOpcode::G_UMULH; 895 896 unsigned HiPart = MRI.createGenericVirtualRegister(Ty); 897 MIRBuilder.buildInstr(Opcode) 898 .addDef(HiPart) 899 .addUse(LHS) 900 .addUse(RHS); 901 902 unsigned Zero = MRI.createGenericVirtualRegister(Ty); 903 MIRBuilder.buildConstant(Zero, 0); 904 905 // For *signed* multiply, overflow is detected by checking: 906 // (hi != (lo >> bitwidth-1)) 907 if (Opcode == TargetOpcode::G_SMULH) { 908 unsigned Shifted = MRI.createGenericVirtualRegister(Ty); 909 unsigned ShiftAmt = MRI.createGenericVirtualRegister(Ty); 910 MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1); 911 MIRBuilder.buildInstr(TargetOpcode::G_ASHR) 912 .addDef(Shifted) 913 .addUse(Res) 914 .addUse(ShiftAmt); 915 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted); 916 } else { 917 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero); 918 } 919 MI.eraseFromParent(); 920 return Legalized; 921 } 922 case TargetOpcode::G_FNEG: { 923 // TODO: Handle vector types once we are able to 924 // represent them. 925 if (Ty.isVector()) 926 return UnableToLegalize; 927 unsigned Res = MI.getOperand(0).getReg(); 928 Type *ZeroTy; 929 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 930 switch (Ty.getSizeInBits()) { 931 case 16: 932 ZeroTy = Type::getHalfTy(Ctx); 933 break; 934 case 32: 935 ZeroTy = Type::getFloatTy(Ctx); 936 break; 937 case 64: 938 ZeroTy = Type::getDoubleTy(Ctx); 939 break; 940 case 128: 941 ZeroTy = Type::getFP128Ty(Ctx); 942 break; 943 default: 944 llvm_unreachable("unexpected floating-point type"); 945 } 946 ConstantFP &ZeroForNegation = 947 *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy)); 948 auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation); 949 MIRBuilder.buildInstr(TargetOpcode::G_FSUB) 950 .addDef(Res) 951 .addUse(Zero->getOperand(0).getReg()) 952 .addUse(MI.getOperand(1).getReg()); 953 MI.eraseFromParent(); 954 return Legalized; 955 } 956 case TargetOpcode::G_FSUB: { 957 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)). 958 // First, check if G_FNEG is marked as Lower. If so, we may 959 // end up with an infinite loop as G_FSUB is used to legalize G_FNEG. 960 if (LI.getAction({G_FNEG, {Ty}}).Action == Lower) 961 return UnableToLegalize; 962 unsigned Res = MI.getOperand(0).getReg(); 963 unsigned LHS = MI.getOperand(1).getReg(); 964 unsigned RHS = MI.getOperand(2).getReg(); 965 unsigned Neg = MRI.createGenericVirtualRegister(Ty); 966 MIRBuilder.buildInstr(TargetOpcode::G_FNEG).addDef(Neg).addUse(RHS); 967 MIRBuilder.buildInstr(TargetOpcode::G_FADD) 968 .addDef(Res) 969 .addUse(LHS) 970 .addUse(Neg); 971 MI.eraseFromParent(); 972 return Legalized; 973 } 974 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: { 975 unsigned OldValRes = MI.getOperand(0).getReg(); 976 unsigned SuccessRes = MI.getOperand(1).getReg(); 977 unsigned Addr = MI.getOperand(2).getReg(); 978 unsigned CmpVal = MI.getOperand(3).getReg(); 979 unsigned NewVal = MI.getOperand(4).getReg(); 980 MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal, 981 **MI.memoperands_begin()); 982 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal); 983 MI.eraseFromParent(); 984 return Legalized; 985 } 986 case TargetOpcode::G_LOAD: 987 case TargetOpcode::G_SEXTLOAD: 988 case TargetOpcode::G_ZEXTLOAD: { 989 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT 990 unsigned DstReg = MI.getOperand(0).getReg(); 991 unsigned PtrReg = MI.getOperand(1).getReg(); 992 LLT DstTy = MRI.getType(DstReg); 993 auto &MMO = **MI.memoperands_begin(); 994 995 if (DstTy.getSizeInBits() == MMO.getSize() /* in bytes */ * 8) { 996 // In the case of G_LOAD, this was a non-extending load already and we're 997 // about to lower to the same instruction. 998 if (MI.getOpcode() == TargetOpcode::G_LOAD) 999 return UnableToLegalize; 1000 MIRBuilder.buildLoad(DstReg, PtrReg, MMO); 1001 MI.eraseFromParent(); 1002 return Legalized; 1003 } 1004 1005 if (DstTy.isScalar()) { 1006 unsigned TmpReg = MRI.createGenericVirtualRegister( 1007 LLT::scalar(MMO.getSize() /* in bytes */ * 8)); 1008 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); 1009 switch (MI.getOpcode()) { 1010 default: 1011 llvm_unreachable("Unexpected opcode"); 1012 case TargetOpcode::G_LOAD: 1013 MIRBuilder.buildAnyExt(DstReg, TmpReg); 1014 break; 1015 case TargetOpcode::G_SEXTLOAD: 1016 MIRBuilder.buildSExt(DstReg, TmpReg); 1017 break; 1018 case TargetOpcode::G_ZEXTLOAD: 1019 MIRBuilder.buildZExt(DstReg, TmpReg); 1020 break; 1021 } 1022 MI.eraseFromParent(); 1023 return Legalized; 1024 } 1025 1026 return UnableToLegalize; 1027 } 1028 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 1029 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 1030 case TargetOpcode::G_CTLZ: 1031 case TargetOpcode::G_CTTZ: 1032 case TargetOpcode::G_CTPOP: 1033 return lowerBitCount(MI, TypeIdx, Ty); 1034 } 1035 } 1036 1037 LegalizerHelper::LegalizeResult 1038 LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, 1039 LLT NarrowTy) { 1040 // FIXME: Don't know how to handle secondary types yet. 1041 if (TypeIdx != 0) 1042 return UnableToLegalize; 1043 switch (MI.getOpcode()) { 1044 default: 1045 return UnableToLegalize; 1046 case TargetOpcode::G_ADD: { 1047 unsigned NarrowSize = NarrowTy.getSizeInBits(); 1048 unsigned DstReg = MI.getOperand(0).getReg(); 1049 unsigned Size = MRI.getType(DstReg).getSizeInBits(); 1050 int NumParts = Size / NarrowSize; 1051 // FIXME: Don't know how to handle the situation where the small vectors 1052 // aren't all the same size yet. 1053 if (Size % NarrowSize != 0) 1054 return UnableToLegalize; 1055 1056 MIRBuilder.setInstr(MI); 1057 1058 SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs; 1059 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); 1060 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); 1061 1062 for (int i = 0; i < NumParts; ++i) { 1063 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 1064 MIRBuilder.buildAdd(DstReg, Src1Regs[i], Src2Regs[i]); 1065 DstRegs.push_back(DstReg); 1066 } 1067 1068 MIRBuilder.buildMerge(DstReg, DstRegs); 1069 MI.eraseFromParent(); 1070 return Legalized; 1071 } 1072 } 1073 } 1074 1075 LegalizerHelper::LegalizeResult 1076 LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { 1077 unsigned Opc = MI.getOpcode(); 1078 auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); 1079 auto isLegalOrCustom = [this](const LegalityQuery &Q) { 1080 auto QAction = LI.getAction(Q).Action; 1081 return QAction == Legal || QAction == Custom; 1082 }; 1083 switch (Opc) { 1084 default: 1085 return UnableToLegalize; 1086 case TargetOpcode::G_CTLZ_ZERO_UNDEF: { 1087 // This trivially expands to CTLZ. 1088 MI.setDesc(TII.get(TargetOpcode::G_CTLZ)); 1089 MIRBuilder.recordInsertion(&MI); 1090 return Legalized; 1091 } 1092 case TargetOpcode::G_CTLZ: { 1093 unsigned SrcReg = MI.getOperand(1).getReg(); 1094 unsigned Len = Ty.getSizeInBits(); 1095 if (isLegalOrCustom({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty}})) { 1096 // If CTLZ_ZERO_UNDEF is legal or custom, emit that and a select with 1097 // zero. 1098 auto MIBCtlzZU = 1099 MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF, Ty, SrcReg); 1100 auto MIBZero = MIRBuilder.buildConstant(Ty, 0); 1101 auto MIBLen = MIRBuilder.buildConstant(Ty, Len); 1102 auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), 1103 SrcReg, MIBZero); 1104 MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen, 1105 MIBCtlzZU); 1106 MI.eraseFromParent(); 1107 return Legalized; 1108 } 1109 // for now, we do this: 1110 // NewLen = NextPowerOf2(Len); 1111 // x = x | (x >> 1); 1112 // x = x | (x >> 2); 1113 // ... 1114 // x = x | (x >>16); 1115 // x = x | (x >>32); // for 64-bit input 1116 // Upto NewLen/2 1117 // return Len - popcount(x); 1118 // 1119 // Ref: "Hacker's Delight" by Henry Warren 1120 unsigned Op = SrcReg; 1121 unsigned NewLen = PowerOf2Ceil(Len); 1122 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) { 1123 auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i); 1124 auto MIBOp = MIRBuilder.buildInstr( 1125 TargetOpcode::G_OR, Ty, Op, 1126 MIRBuilder.buildInstr(TargetOpcode::G_LSHR, Ty, Op, MIBShiftAmt)); 1127 Op = MIBOp->getOperand(0).getReg(); 1128 } 1129 auto MIBPop = MIRBuilder.buildInstr(TargetOpcode::G_CTPOP, Ty, Op); 1130 MIRBuilder.buildInstr(TargetOpcode::G_SUB, MI.getOperand(0).getReg(), 1131 MIRBuilder.buildConstant(Ty, Len), MIBPop); 1132 MI.eraseFromParent(); 1133 return Legalized; 1134 } 1135 case TargetOpcode::G_CTTZ_ZERO_UNDEF: { 1136 // This trivially expands to CTTZ. 1137 MI.setDesc(TII.get(TargetOpcode::G_CTTZ)); 1138 MIRBuilder.recordInsertion(&MI); 1139 return Legalized; 1140 } 1141 case TargetOpcode::G_CTTZ: { 1142 unsigned SrcReg = MI.getOperand(1).getReg(); 1143 unsigned Len = Ty.getSizeInBits(); 1144 if (isLegalOrCustom({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty}})) { 1145 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with 1146 // zero. 1147 auto MIBCttzZU = 1148 MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF, Ty, SrcReg); 1149 auto MIBZero = MIRBuilder.buildConstant(Ty, 0); 1150 auto MIBLen = MIRBuilder.buildConstant(Ty, Len); 1151 auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), 1152 SrcReg, MIBZero); 1153 MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen, 1154 MIBCttzZU); 1155 MI.eraseFromParent(); 1156 return Legalized; 1157 } 1158 // for now, we use: { return popcount(~x & (x - 1)); } 1159 // unless the target has ctlz but not ctpop, in which case we use: 1160 // { return 32 - nlz(~x & (x-1)); } 1161 // Ref: "Hacker's Delight" by Henry Warren 1162 auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1); 1163 auto MIBNot = 1164 MIRBuilder.buildInstr(TargetOpcode::G_XOR, Ty, SrcReg, MIBCstNeg1); 1165 auto MIBTmp = MIRBuilder.buildInstr( 1166 TargetOpcode::G_AND, Ty, MIBNot, 1167 MIRBuilder.buildInstr(TargetOpcode::G_ADD, Ty, SrcReg, MIBCstNeg1)); 1168 if (!isLegalOrCustom({TargetOpcode::G_CTPOP, {Ty}}) && 1169 isLegalOrCustom({TargetOpcode::G_CTLZ, {Ty}})) { 1170 auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len); 1171 MIRBuilder.buildInstr( 1172 TargetOpcode::G_SUB, MI.getOperand(0).getReg(), 1173 MIBCstLen, 1174 MIRBuilder.buildInstr(TargetOpcode::G_CTLZ, Ty, MIBTmp)); 1175 MI.eraseFromParent(); 1176 return Legalized; 1177 } 1178 MI.setDesc(TII.get(TargetOpcode::G_CTPOP)); 1179 MI.getOperand(1).setReg(MIBTmp->getOperand(0).getReg()); 1180 return Legalized; 1181 } 1182 } 1183 } 1184