1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file This file implements the LegalizerHelper class to legalize 11 /// individual instructions and the LegalizeMachineIR wrapper pass for the 12 /// primary legalization. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" 17 #include "llvm/CodeGen/GlobalISel/CallLowering.h" 18 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" 19 #include "llvm/CodeGen/MachineRegisterInfo.h" 20 #include "llvm/CodeGen/TargetInstrInfo.h" 21 #include "llvm/CodeGen/TargetLowering.h" 22 #include "llvm/CodeGen/TargetSubtargetInfo.h" 23 #include "llvm/Support/Debug.h" 24 #include "llvm/Support/MathExtras.h" 25 #include "llvm/Support/raw_ostream.h" 26 27 #define DEBUG_TYPE "legalizer" 28 29 using namespace llvm; 30 using namespace LegalizeActions; 31 32 LegalizerHelper::LegalizerHelper(MachineFunction &MF) 33 : MRI(MF.getRegInfo()), LI(*MF.getSubtarget().getLegalizerInfo()) { 34 MIRBuilder.setMF(MF); 35 } 36 37 LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI) 38 : MRI(MF.getRegInfo()), LI(LI) { 39 MIRBuilder.setMF(MF); 40 } 41 LegalizerHelper::LegalizeResult 42 LegalizerHelper::legalizeInstrStep(MachineInstr &MI) { 43 LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs())); 44 45 auto Step = LI.getAction(MI, MRI); 46 switch (Step.Action) { 47 case Legal: 48 LLVM_DEBUG(dbgs() << ".. Already legal\n"); 49 return AlreadyLegal; 50 case Libcall: 51 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n"); 52 return libcall(MI); 53 case NarrowScalar: 54 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n"); 55 return narrowScalar(MI, Step.TypeIdx, Step.NewType); 56 case WidenScalar: 57 LLVM_DEBUG(dbgs() << ".. Widen scalar\n"); 58 return widenScalar(MI, Step.TypeIdx, Step.NewType); 59 case Lower: 60 LLVM_DEBUG(dbgs() << ".. Lower\n"); 61 return lower(MI, Step.TypeIdx, Step.NewType); 62 case FewerElements: 63 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n"); 64 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType); 65 case Custom: 66 LLVM_DEBUG(dbgs() << ".. Custom legalization\n"); 67 return LI.legalizeCustom(MI, MRI, MIRBuilder) ? Legalized 68 : UnableToLegalize; 69 default: 70 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n"); 71 return UnableToLegalize; 72 } 73 } 74 75 void LegalizerHelper::extractParts(unsigned Reg, LLT Ty, int NumParts, 76 SmallVectorImpl<unsigned> &VRegs) { 77 for (int i = 0; i < NumParts; ++i) 78 VRegs.push_back(MRI.createGenericVirtualRegister(Ty)); 79 MIRBuilder.buildUnmerge(VRegs, Reg); 80 } 81 82 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { 83 switch (Opcode) { 84 case TargetOpcode::G_SDIV: 85 assert(Size == 32 && "Unsupported size"); 86 return RTLIB::SDIV_I32; 87 case TargetOpcode::G_UDIV: 88 assert(Size == 32 && "Unsupported size"); 89 return RTLIB::UDIV_I32; 90 case TargetOpcode::G_SREM: 91 assert(Size == 32 && "Unsupported size"); 92 return RTLIB::SREM_I32; 93 case TargetOpcode::G_UREM: 94 assert(Size == 32 && "Unsupported size"); 95 return RTLIB::UREM_I32; 96 case TargetOpcode::G_FADD: 97 assert((Size == 32 || Size == 64) && "Unsupported size"); 98 return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32; 99 case TargetOpcode::G_FSUB: 100 assert((Size == 32 || Size == 64) && "Unsupported size"); 101 return Size == 64 ? RTLIB::SUB_F64 : RTLIB::SUB_F32; 102 case TargetOpcode::G_FMUL: 103 assert((Size == 32 || Size == 64) && "Unsupported size"); 104 return Size == 64 ? RTLIB::MUL_F64 : RTLIB::MUL_F32; 105 case TargetOpcode::G_FDIV: 106 assert((Size == 32 || Size == 64) && "Unsupported size"); 107 return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32; 108 case TargetOpcode::G_FREM: 109 return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32; 110 case TargetOpcode::G_FPOW: 111 return Size == 64 ? RTLIB::POW_F64 : RTLIB::POW_F32; 112 case TargetOpcode::G_FMA: 113 assert((Size == 32 || Size == 64) && "Unsupported size"); 114 return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32; 115 } 116 llvm_unreachable("Unknown libcall function"); 117 } 118 119 LegalizerHelper::LegalizeResult 120 llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, 121 const CallLowering::ArgInfo &Result, 122 ArrayRef<CallLowering::ArgInfo> Args) { 123 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); 124 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); 125 const char *Name = TLI.getLibcallName(Libcall); 126 127 MIRBuilder.getMF().getFrameInfo().setHasCalls(true); 128 if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall), 129 MachineOperand::CreateES(Name), Result, Args)) 130 return LegalizerHelper::UnableToLegalize; 131 132 return LegalizerHelper::Legalized; 133 } 134 135 // Useful for libcalls where all operands have the same type. 136 static LegalizerHelper::LegalizeResult 137 simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, 138 Type *OpType) { 139 auto Libcall = getRTLibDesc(MI.getOpcode(), Size); 140 141 SmallVector<CallLowering::ArgInfo, 3> Args; 142 for (unsigned i = 1; i < MI.getNumOperands(); i++) 143 Args.push_back({MI.getOperand(i).getReg(), OpType}); 144 return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType}, 145 Args); 146 } 147 148 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, 149 Type *FromType) { 150 auto ToMVT = MVT::getVT(ToType); 151 auto FromMVT = MVT::getVT(FromType); 152 153 switch (Opcode) { 154 case TargetOpcode::G_FPEXT: 155 return RTLIB::getFPEXT(FromMVT, ToMVT); 156 case TargetOpcode::G_FPTRUNC: 157 return RTLIB::getFPROUND(FromMVT, ToMVT); 158 case TargetOpcode::G_FPTOSI: 159 return RTLIB::getFPTOSINT(FromMVT, ToMVT); 160 case TargetOpcode::G_FPTOUI: 161 return RTLIB::getFPTOUINT(FromMVT, ToMVT); 162 case TargetOpcode::G_SITOFP: 163 return RTLIB::getSINTTOFP(FromMVT, ToMVT); 164 case TargetOpcode::G_UITOFP: 165 return RTLIB::getUINTTOFP(FromMVT, ToMVT); 166 } 167 llvm_unreachable("Unsupported libcall function"); 168 } 169 170 static LegalizerHelper::LegalizeResult 171 conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, 172 Type *FromType) { 173 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType); 174 return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType}, 175 {{MI.getOperand(1).getReg(), FromType}}); 176 } 177 178 LegalizerHelper::LegalizeResult 179 LegalizerHelper::libcall(MachineInstr &MI) { 180 LLT LLTy = MRI.getType(MI.getOperand(0).getReg()); 181 unsigned Size = LLTy.getSizeInBits(); 182 auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); 183 184 MIRBuilder.setInstr(MI); 185 186 switch (MI.getOpcode()) { 187 default: 188 return UnableToLegalize; 189 case TargetOpcode::G_SDIV: 190 case TargetOpcode::G_UDIV: 191 case TargetOpcode::G_SREM: 192 case TargetOpcode::G_UREM: { 193 Type *HLTy = Type::getInt32Ty(Ctx); 194 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); 195 if (Status != Legalized) 196 return Status; 197 break; 198 } 199 case TargetOpcode::G_FADD: 200 case TargetOpcode::G_FSUB: 201 case TargetOpcode::G_FMUL: 202 case TargetOpcode::G_FDIV: 203 case TargetOpcode::G_FMA: 204 case TargetOpcode::G_FPOW: 205 case TargetOpcode::G_FREM: { 206 Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx); 207 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); 208 if (Status != Legalized) 209 return Status; 210 break; 211 } 212 case TargetOpcode::G_FPEXT: { 213 // FIXME: Support other floating point types (half, fp128 etc) 214 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 215 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 216 if (ToSize != 64 || FromSize != 32) 217 return UnableToLegalize; 218 LegalizeResult Status = conversionLibcall( 219 MI, MIRBuilder, Type::getDoubleTy(Ctx), Type::getFloatTy(Ctx)); 220 if (Status != Legalized) 221 return Status; 222 break; 223 } 224 case TargetOpcode::G_FPTRUNC: { 225 // FIXME: Support other floating point types (half, fp128 etc) 226 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 227 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 228 if (ToSize != 32 || FromSize != 64) 229 return UnableToLegalize; 230 LegalizeResult Status = conversionLibcall( 231 MI, MIRBuilder, Type::getFloatTy(Ctx), Type::getDoubleTy(Ctx)); 232 if (Status != Legalized) 233 return Status; 234 break; 235 } 236 case TargetOpcode::G_FPTOSI: 237 case TargetOpcode::G_FPTOUI: { 238 // FIXME: Support other types 239 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 240 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 241 if (ToSize != 32 || (FromSize != 32 && FromSize != 64)) 242 return UnableToLegalize; 243 LegalizeResult Status = conversionLibcall( 244 MI, MIRBuilder, Type::getInt32Ty(Ctx), 245 FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx)); 246 if (Status != Legalized) 247 return Status; 248 break; 249 } 250 case TargetOpcode::G_SITOFP: 251 case TargetOpcode::G_UITOFP: { 252 // FIXME: Support other types 253 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 254 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 255 if (FromSize != 32 || (ToSize != 32 && ToSize != 64)) 256 return UnableToLegalize; 257 LegalizeResult Status = conversionLibcall( 258 MI, MIRBuilder, 259 ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx), 260 Type::getInt32Ty(Ctx)); 261 if (Status != Legalized) 262 return Status; 263 break; 264 } 265 } 266 267 MI.eraseFromParent(); 268 return Legalized; 269 } 270 271 LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, 272 unsigned TypeIdx, 273 LLT NarrowTy) { 274 // FIXME: Don't know how to handle secondary types yet. 275 if (TypeIdx != 0 && MI.getOpcode() != TargetOpcode::G_EXTRACT) 276 return UnableToLegalize; 277 278 MIRBuilder.setInstr(MI); 279 280 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 281 uint64_t NarrowSize = NarrowTy.getSizeInBits(); 282 283 switch (MI.getOpcode()) { 284 default: 285 return UnableToLegalize; 286 case TargetOpcode::G_IMPLICIT_DEF: { 287 // FIXME: add support for when SizeOp0 isn't an exact multiple of 288 // NarrowSize. 289 if (SizeOp0 % NarrowSize != 0) 290 return UnableToLegalize; 291 int NumParts = SizeOp0 / NarrowSize; 292 293 SmallVector<unsigned, 2> DstRegs; 294 for (int i = 0; i < NumParts; ++i) 295 DstRegs.push_back( 296 MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg()); 297 MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs); 298 MI.eraseFromParent(); 299 return Legalized; 300 } 301 case TargetOpcode::G_ADD: { 302 // FIXME: add support for when SizeOp0 isn't an exact multiple of 303 // NarrowSize. 304 if (SizeOp0 % NarrowSize != 0) 305 return UnableToLegalize; 306 // Expand in terms of carry-setting/consuming G_ADDE instructions. 307 int NumParts = SizeOp0 / NarrowTy.getSizeInBits(); 308 309 SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs; 310 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); 311 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); 312 313 unsigned CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1)); 314 MIRBuilder.buildConstant(CarryIn, 0); 315 316 for (int i = 0; i < NumParts; ++i) { 317 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 318 unsigned CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); 319 320 MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i], 321 Src2Regs[i], CarryIn); 322 323 DstRegs.push_back(DstReg); 324 CarryIn = CarryOut; 325 } 326 unsigned DstReg = MI.getOperand(0).getReg(); 327 MIRBuilder.buildMerge(DstReg, DstRegs); 328 MI.eraseFromParent(); 329 return Legalized; 330 } 331 case TargetOpcode::G_EXTRACT: { 332 if (TypeIdx != 1) 333 return UnableToLegalize; 334 335 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 336 // FIXME: add support for when SizeOp1 isn't an exact multiple of 337 // NarrowSize. 338 if (SizeOp1 % NarrowSize != 0) 339 return UnableToLegalize; 340 int NumParts = SizeOp1 / NarrowSize; 341 342 SmallVector<unsigned, 2> SrcRegs, DstRegs; 343 SmallVector<uint64_t, 2> Indexes; 344 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); 345 346 unsigned OpReg = MI.getOperand(0).getReg(); 347 uint64_t OpStart = MI.getOperand(2).getImm(); 348 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); 349 for (int i = 0; i < NumParts; ++i) { 350 unsigned SrcStart = i * NarrowSize; 351 352 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) { 353 // No part of the extract uses this subregister, ignore it. 354 continue; 355 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) { 356 // The entire subregister is extracted, forward the value. 357 DstRegs.push_back(SrcRegs[i]); 358 continue; 359 } 360 361 // OpSegStart is where this destination segment would start in OpReg if it 362 // extended infinitely in both directions. 363 int64_t ExtractOffset; 364 uint64_t SegSize; 365 if (OpStart < SrcStart) { 366 ExtractOffset = 0; 367 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart); 368 } else { 369 ExtractOffset = OpStart - SrcStart; 370 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize); 371 } 372 373 unsigned SegReg = SrcRegs[i]; 374 if (ExtractOffset != 0 || SegSize != NarrowSize) { 375 // A genuine extract is needed. 376 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); 377 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset); 378 } 379 380 DstRegs.push_back(SegReg); 381 } 382 383 MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs); 384 MI.eraseFromParent(); 385 return Legalized; 386 } 387 case TargetOpcode::G_INSERT: { 388 // FIXME: add support for when SizeOp0 isn't an exact multiple of 389 // NarrowSize. 390 if (SizeOp0 % NarrowSize != 0) 391 return UnableToLegalize; 392 393 int NumParts = SizeOp0 / NarrowSize; 394 395 SmallVector<unsigned, 2> SrcRegs, DstRegs; 396 SmallVector<uint64_t, 2> Indexes; 397 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); 398 399 unsigned OpReg = MI.getOperand(2).getReg(); 400 uint64_t OpStart = MI.getOperand(3).getImm(); 401 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); 402 for (int i = 0; i < NumParts; ++i) { 403 unsigned DstStart = i * NarrowSize; 404 405 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) { 406 // No part of the insert affects this subregister, forward the original. 407 DstRegs.push_back(SrcRegs[i]); 408 continue; 409 } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) { 410 // The entire subregister is defined by this insert, forward the new 411 // value. 412 DstRegs.push_back(OpReg); 413 continue; 414 } 415 416 // OpSegStart is where this destination segment would start in OpReg if it 417 // extended infinitely in both directions. 418 int64_t ExtractOffset, InsertOffset; 419 uint64_t SegSize; 420 if (OpStart < DstStart) { 421 InsertOffset = 0; 422 ExtractOffset = DstStart - OpStart; 423 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart); 424 } else { 425 InsertOffset = OpStart - DstStart; 426 ExtractOffset = 0; 427 SegSize = 428 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart); 429 } 430 431 unsigned SegReg = OpReg; 432 if (ExtractOffset != 0 || SegSize != OpSize) { 433 // A genuine extract is needed. 434 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); 435 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset); 436 } 437 438 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 439 MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset); 440 DstRegs.push_back(DstReg); 441 } 442 443 assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered"); 444 MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs); 445 MI.eraseFromParent(); 446 return Legalized; 447 } 448 case TargetOpcode::G_LOAD: { 449 // FIXME: add support for when SizeOp0 isn't an exact multiple of 450 // NarrowSize. 451 if (SizeOp0 % NarrowSize != 0) 452 return UnableToLegalize; 453 454 const auto &MMO = **MI.memoperands_begin(); 455 // This implementation doesn't work for atomics. Give up instead of doing 456 // something invalid. 457 if (MMO.getOrdering() != AtomicOrdering::NotAtomic || 458 MMO.getFailureOrdering() != AtomicOrdering::NotAtomic) 459 return UnableToLegalize; 460 461 int NumParts = SizeOp0 / NarrowSize; 462 LLT OffsetTy = LLT::scalar( 463 MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()); 464 465 SmallVector<unsigned, 2> DstRegs; 466 for (int i = 0; i < NumParts; ++i) { 467 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 468 unsigned SrcReg = 0; 469 unsigned Adjustment = i * NarrowSize / 8; 470 471 MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand( 472 MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(), 473 NarrowSize / 8, i == 0 ? MMO.getAlignment() : NarrowSize / 8, 474 MMO.getAAInfo(), MMO.getRanges(), MMO.getSyncScopeID(), 475 MMO.getOrdering(), MMO.getFailureOrdering()); 476 477 MIRBuilder.materializeGEP(SrcReg, MI.getOperand(1).getReg(), OffsetTy, 478 Adjustment); 479 480 MIRBuilder.buildLoad(DstReg, SrcReg, *SplitMMO); 481 482 DstRegs.push_back(DstReg); 483 } 484 unsigned DstReg = MI.getOperand(0).getReg(); 485 MIRBuilder.buildMerge(DstReg, DstRegs); 486 MI.eraseFromParent(); 487 return Legalized; 488 } 489 case TargetOpcode::G_STORE: { 490 // FIXME: add support for when SizeOp0 isn't an exact multiple of 491 // NarrowSize. 492 if (SizeOp0 % NarrowSize != 0) 493 return UnableToLegalize; 494 495 const auto &MMO = **MI.memoperands_begin(); 496 // This implementation doesn't work for atomics. Give up instead of doing 497 // something invalid. 498 if (MMO.getOrdering() != AtomicOrdering::NotAtomic || 499 MMO.getFailureOrdering() != AtomicOrdering::NotAtomic) 500 return UnableToLegalize; 501 502 int NumParts = SizeOp0 / NarrowSize; 503 LLT OffsetTy = LLT::scalar( 504 MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()); 505 506 SmallVector<unsigned, 2> SrcRegs; 507 extractParts(MI.getOperand(0).getReg(), NarrowTy, NumParts, SrcRegs); 508 509 for (int i = 0; i < NumParts; ++i) { 510 unsigned DstReg = 0; 511 unsigned Adjustment = i * NarrowSize / 8; 512 513 MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand( 514 MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(), 515 NarrowSize / 8, i == 0 ? MMO.getAlignment() : NarrowSize / 8, 516 MMO.getAAInfo(), MMO.getRanges(), MMO.getSyncScopeID(), 517 MMO.getOrdering(), MMO.getFailureOrdering()); 518 519 MIRBuilder.materializeGEP(DstReg, MI.getOperand(1).getReg(), OffsetTy, 520 Adjustment); 521 522 MIRBuilder.buildStore(SrcRegs[i], DstReg, *SplitMMO); 523 } 524 MI.eraseFromParent(); 525 return Legalized; 526 } 527 case TargetOpcode::G_CONSTANT: { 528 // FIXME: add support for when SizeOp0 isn't an exact multiple of 529 // NarrowSize. 530 if (SizeOp0 % NarrowSize != 0) 531 return UnableToLegalize; 532 int NumParts = SizeOp0 / NarrowSize; 533 const APInt &Cst = MI.getOperand(1).getCImm()->getValue(); 534 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 535 536 SmallVector<unsigned, 2> DstRegs; 537 for (int i = 0; i < NumParts; ++i) { 538 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 539 ConstantInt *CI = 540 ConstantInt::get(Ctx, Cst.lshr(NarrowSize * i).trunc(NarrowSize)); 541 MIRBuilder.buildConstant(DstReg, *CI); 542 DstRegs.push_back(DstReg); 543 } 544 unsigned DstReg = MI.getOperand(0).getReg(); 545 MIRBuilder.buildMerge(DstReg, DstRegs); 546 MI.eraseFromParent(); 547 return Legalized; 548 } 549 case TargetOpcode::G_OR: { 550 // Legalize bitwise operation: 551 // A = BinOp<Ty> B, C 552 // into: 553 // B1, ..., BN = G_UNMERGE_VALUES B 554 // C1, ..., CN = G_UNMERGE_VALUES C 555 // A1 = BinOp<Ty/N> B1, C2 556 // ... 557 // AN = BinOp<Ty/N> BN, CN 558 // A = G_MERGE_VALUES A1, ..., AN 559 560 // FIXME: add support for when SizeOp0 isn't an exact multiple of 561 // NarrowSize. 562 if (SizeOp0 % NarrowSize != 0) 563 return UnableToLegalize; 564 int NumParts = SizeOp0 / NarrowSize; 565 566 // List the registers where the destination will be scattered. 567 SmallVector<unsigned, 2> DstRegs; 568 // List the registers where the first argument will be split. 569 SmallVector<unsigned, 2> SrcsReg1; 570 // List the registers where the second argument will be split. 571 SmallVector<unsigned, 2> SrcsReg2; 572 // Create all the temporary registers. 573 for (int i = 0; i < NumParts; ++i) { 574 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 575 unsigned SrcReg1 = MRI.createGenericVirtualRegister(NarrowTy); 576 unsigned SrcReg2 = MRI.createGenericVirtualRegister(NarrowTy); 577 578 DstRegs.push_back(DstReg); 579 SrcsReg1.push_back(SrcReg1); 580 SrcsReg2.push_back(SrcReg2); 581 } 582 // Explode the big arguments into smaller chunks. 583 MIRBuilder.buildUnmerge(SrcsReg1, MI.getOperand(1).getReg()); 584 MIRBuilder.buildUnmerge(SrcsReg2, MI.getOperand(2).getReg()); 585 586 // Do the operation on each small part. 587 for (int i = 0; i < NumParts; ++i) 588 MIRBuilder.buildOr(DstRegs[i], SrcsReg1[i], SrcsReg2[i]); 589 590 // Gather the destination registers into the final destination. 591 unsigned DstReg = MI.getOperand(0).getReg(); 592 MIRBuilder.buildMerge(DstReg, DstRegs); 593 MI.eraseFromParent(); 594 return Legalized; 595 } 596 } 597 } 598 599 void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy, 600 unsigned OpIdx, unsigned ExtOpcode) { 601 MachineOperand &MO = MI.getOperand(OpIdx); 602 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, WideTy, MO.getReg()); 603 MO.setReg(ExtB->getOperand(0).getReg()); 604 } 605 606 void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy, 607 unsigned OpIdx, unsigned TruncOpcode) { 608 MachineOperand &MO = MI.getOperand(OpIdx); 609 unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); 610 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 611 MIRBuilder.buildInstr(TruncOpcode, MO.getReg(), DstExt); 612 MO.setReg(DstExt); 613 } 614 615 LegalizerHelper::LegalizeResult 616 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { 617 MIRBuilder.setInstr(MI); 618 619 switch (MI.getOpcode()) { 620 default: 621 return UnableToLegalize; 622 case TargetOpcode::G_UADDO: 623 case TargetOpcode::G_USUBO: { 624 if (TypeIdx == 1) 625 return UnableToLegalize; // TODO 626 auto LHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, WideTy, 627 MI.getOperand(2).getReg()); 628 auto RHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, WideTy, 629 MI.getOperand(3).getReg()); 630 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO 631 ? TargetOpcode::G_ADD 632 : TargetOpcode::G_SUB; 633 // Do the arithmetic in the larger type. 634 auto NewOp = MIRBuilder.buildInstr(Opcode, WideTy, LHSZext, RHSZext); 635 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg()); 636 APInt Mask = APInt::getAllOnesValue(OrigTy.getSizeInBits()); 637 auto AndOp = MIRBuilder.buildInstr( 638 TargetOpcode::G_AND, WideTy, NewOp, 639 MIRBuilder.buildConstant(WideTy, Mask.getZExtValue())); 640 // There is no overflow if the AndOp is the same as NewOp. 641 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1).getReg(), NewOp, 642 AndOp); 643 // Now trunc the NewOp to the original result. 644 MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), NewOp); 645 MI.eraseFromParent(); 646 return Legalized; 647 } 648 case TargetOpcode::G_CTTZ: 649 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 650 case TargetOpcode::G_CTLZ: 651 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 652 case TargetOpcode::G_CTPOP: { 653 // First ZEXT the input. 654 auto MIBSrc = MIRBuilder.buildZExt(WideTy, MI.getOperand(1).getReg()); 655 LLT CurTy = MRI.getType(MI.getOperand(0).getReg()); 656 if (MI.getOpcode() == TargetOpcode::G_CTTZ) { 657 // The count is the same in the larger type except if the original 658 // value was zero. This can be handled by setting the bit just off 659 // the top of the original type. 660 auto TopBit = 661 APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits()); 662 MIBSrc = MIRBuilder.buildInstr( 663 TargetOpcode::G_OR, WideTy, MIBSrc, 664 MIRBuilder.buildConstant(WideTy, TopBit.getSExtValue())); 665 } 666 // Perform the operation at the larger size. 667 auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), WideTy, MIBSrc); 668 // This is already the correct result for CTPOP and CTTZs 669 if (MI.getOpcode() == TargetOpcode::G_CTLZ || 670 MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) { 671 // The correct result is NewOp - (Difference in widety and current ty). 672 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits(); 673 MIBNewOp = 674 MIRBuilder.buildInstr(TargetOpcode::G_SUB, WideTy, MIBNewOp, 675 MIRBuilder.buildConstant(WideTy, SizeDiff)); 676 } 677 auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); 678 // Make the original instruction a trunc now, and update it's source. 679 MI.setDesc(TII.get(TargetOpcode::G_TRUNC)); 680 MI.getOperand(1).setReg(MIBNewOp->getOperand(0).getReg()); 681 MIRBuilder.recordInsertion(&MI); 682 return Legalized; 683 } 684 685 case TargetOpcode::G_ADD: 686 case TargetOpcode::G_AND: 687 case TargetOpcode::G_MUL: 688 case TargetOpcode::G_OR: 689 case TargetOpcode::G_XOR: 690 case TargetOpcode::G_SUB: 691 // Perform operation at larger width (any extension is fine here, high bits 692 // don't affect the result) and then truncate the result back to the 693 // original type. 694 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 695 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); 696 widenScalarDst(MI, WideTy); 697 MIRBuilder.recordInsertion(&MI); 698 return Legalized; 699 700 case TargetOpcode::G_SHL: 701 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 702 // The "number of bits to shift" operand must preserve its value as an 703 // unsigned integer: 704 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 705 widenScalarDst(MI, WideTy); 706 MIRBuilder.recordInsertion(&MI); 707 return Legalized; 708 709 case TargetOpcode::G_SDIV: 710 case TargetOpcode::G_SREM: 711 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); 712 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 713 widenScalarDst(MI, WideTy); 714 MIRBuilder.recordInsertion(&MI); 715 return Legalized; 716 717 case TargetOpcode::G_ASHR: 718 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); 719 // The "number of bits to shift" operand must preserve its value as an 720 // unsigned integer: 721 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 722 widenScalarDst(MI, WideTy); 723 MIRBuilder.recordInsertion(&MI); 724 return Legalized; 725 726 case TargetOpcode::G_UDIV: 727 case TargetOpcode::G_UREM: 728 case TargetOpcode::G_LSHR: 729 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 730 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 731 widenScalarDst(MI, WideTy); 732 MIRBuilder.recordInsertion(&MI); 733 return Legalized; 734 735 case TargetOpcode::G_SELECT: 736 if (TypeIdx != 0) 737 return UnableToLegalize; 738 // Perform operation at larger width (any extension is fine here, high bits 739 // don't affect the result) and then truncate the result back to the 740 // original type. 741 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); 742 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT); 743 widenScalarDst(MI, WideTy); 744 MIRBuilder.recordInsertion(&MI); 745 return Legalized; 746 747 case TargetOpcode::G_FPTOSI: 748 case TargetOpcode::G_FPTOUI: 749 if (TypeIdx != 0) 750 return UnableToLegalize; 751 widenScalarDst(MI, WideTy); 752 MIRBuilder.recordInsertion(&MI); 753 return Legalized; 754 755 case TargetOpcode::G_SITOFP: 756 if (TypeIdx != 1) 757 return UnableToLegalize; 758 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); 759 MIRBuilder.recordInsertion(&MI); 760 return Legalized; 761 762 case TargetOpcode::G_UITOFP: 763 if (TypeIdx != 1) 764 return UnableToLegalize; 765 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 766 MIRBuilder.recordInsertion(&MI); 767 return Legalized; 768 769 case TargetOpcode::G_INSERT: 770 if (TypeIdx != 0) 771 return UnableToLegalize; 772 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 773 widenScalarDst(MI, WideTy); 774 MIRBuilder.recordInsertion(&MI); 775 return Legalized; 776 777 case TargetOpcode::G_LOAD: 778 // For some types like i24, we might try to widen to i32. To properly handle 779 // this we should be using a dedicated extending load, until then avoid 780 // trying to legalize. 781 if (alignTo(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(), 8) != 782 WideTy.getSizeInBits()) 783 return UnableToLegalize; 784 LLVM_FALLTHROUGH; 785 case TargetOpcode::G_SEXTLOAD: 786 case TargetOpcode::G_ZEXTLOAD: 787 widenScalarDst(MI, WideTy); 788 MIRBuilder.recordInsertion(&MI); 789 return Legalized; 790 791 case TargetOpcode::G_STORE: { 792 if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(1) || 793 WideTy != LLT::scalar(8)) 794 return UnableToLegalize; 795 796 widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ZEXT); 797 MIRBuilder.recordInsertion(&MI); 798 return Legalized; 799 } 800 case TargetOpcode::G_CONSTANT: { 801 MachineOperand &SrcMO = MI.getOperand(1); 802 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 803 const APInt &Val = SrcMO.getCImm()->getValue().sext(WideTy.getSizeInBits()); 804 SrcMO.setCImm(ConstantInt::get(Ctx, Val)); 805 806 widenScalarDst(MI, WideTy); 807 MIRBuilder.recordInsertion(&MI); 808 return Legalized; 809 } 810 case TargetOpcode::G_FCONSTANT: { 811 MachineOperand &SrcMO = MI.getOperand(1); 812 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 813 APFloat Val = SrcMO.getFPImm()->getValueAPF(); 814 bool LosesInfo; 815 switch (WideTy.getSizeInBits()) { 816 case 32: 817 Val.convert(APFloat::IEEEsingle(), APFloat::rmTowardZero, &LosesInfo); 818 break; 819 case 64: 820 Val.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &LosesInfo); 821 break; 822 default: 823 llvm_unreachable("Unhandled fp widen type"); 824 } 825 SrcMO.setFPImm(ConstantFP::get(Ctx, Val)); 826 827 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); 828 MIRBuilder.recordInsertion(&MI); 829 return Legalized; 830 } 831 case TargetOpcode::G_BRCOND: 832 widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT); 833 MIRBuilder.recordInsertion(&MI); 834 return Legalized; 835 836 case TargetOpcode::G_FCMP: 837 if (TypeIdx == 0) 838 widenScalarDst(MI, WideTy); 839 else { 840 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT); 841 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT); 842 } 843 MIRBuilder.recordInsertion(&MI); 844 return Legalized; 845 846 case TargetOpcode::G_ICMP: 847 if (TypeIdx == 0) 848 widenScalarDst(MI, WideTy); 849 else { 850 unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>( 851 MI.getOperand(1).getPredicate())) 852 ? TargetOpcode::G_SEXT 853 : TargetOpcode::G_ZEXT; 854 widenScalarSrc(MI, WideTy, 2, ExtOpcode); 855 widenScalarSrc(MI, WideTy, 3, ExtOpcode); 856 } 857 MIRBuilder.recordInsertion(&MI); 858 return Legalized; 859 860 case TargetOpcode::G_GEP: 861 assert(TypeIdx == 1 && "unable to legalize pointer of GEP"); 862 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 863 MIRBuilder.recordInsertion(&MI); 864 return Legalized; 865 866 case TargetOpcode::G_PHI: { 867 assert(TypeIdx == 0 && "Expecting only Idx 0"); 868 869 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) { 870 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB(); 871 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); 872 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT); 873 } 874 875 MachineBasicBlock &MBB = *MI.getParent(); 876 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI()); 877 widenScalarDst(MI, WideTy); 878 MIRBuilder.recordInsertion(&MI); 879 return Legalized; 880 } 881 } 882 } 883 884 LegalizerHelper::LegalizeResult 885 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { 886 using namespace TargetOpcode; 887 MIRBuilder.setInstr(MI); 888 889 switch(MI.getOpcode()) { 890 default: 891 return UnableToLegalize; 892 case TargetOpcode::G_SREM: 893 case TargetOpcode::G_UREM: { 894 unsigned QuotReg = MRI.createGenericVirtualRegister(Ty); 895 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV) 896 .addDef(QuotReg) 897 .addUse(MI.getOperand(1).getReg()) 898 .addUse(MI.getOperand(2).getReg()); 899 900 unsigned ProdReg = MRI.createGenericVirtualRegister(Ty); 901 MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg()); 902 MIRBuilder.buildSub(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(), 903 ProdReg); 904 MI.eraseFromParent(); 905 return Legalized; 906 } 907 case TargetOpcode::G_SMULO: 908 case TargetOpcode::G_UMULO: { 909 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the 910 // result. 911 unsigned Res = MI.getOperand(0).getReg(); 912 unsigned Overflow = MI.getOperand(1).getReg(); 913 unsigned LHS = MI.getOperand(2).getReg(); 914 unsigned RHS = MI.getOperand(3).getReg(); 915 916 MIRBuilder.buildMul(Res, LHS, RHS); 917 918 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO 919 ? TargetOpcode::G_SMULH 920 : TargetOpcode::G_UMULH; 921 922 unsigned HiPart = MRI.createGenericVirtualRegister(Ty); 923 MIRBuilder.buildInstr(Opcode) 924 .addDef(HiPart) 925 .addUse(LHS) 926 .addUse(RHS); 927 928 unsigned Zero = MRI.createGenericVirtualRegister(Ty); 929 MIRBuilder.buildConstant(Zero, 0); 930 931 // For *signed* multiply, overflow is detected by checking: 932 // (hi != (lo >> bitwidth-1)) 933 if (Opcode == TargetOpcode::G_SMULH) { 934 unsigned Shifted = MRI.createGenericVirtualRegister(Ty); 935 unsigned ShiftAmt = MRI.createGenericVirtualRegister(Ty); 936 MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1); 937 MIRBuilder.buildInstr(TargetOpcode::G_ASHR) 938 .addDef(Shifted) 939 .addUse(Res) 940 .addUse(ShiftAmt); 941 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted); 942 } else { 943 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero); 944 } 945 MI.eraseFromParent(); 946 return Legalized; 947 } 948 case TargetOpcode::G_FNEG: { 949 // TODO: Handle vector types once we are able to 950 // represent them. 951 if (Ty.isVector()) 952 return UnableToLegalize; 953 unsigned Res = MI.getOperand(0).getReg(); 954 Type *ZeroTy; 955 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 956 switch (Ty.getSizeInBits()) { 957 case 16: 958 ZeroTy = Type::getHalfTy(Ctx); 959 break; 960 case 32: 961 ZeroTy = Type::getFloatTy(Ctx); 962 break; 963 case 64: 964 ZeroTy = Type::getDoubleTy(Ctx); 965 break; 966 case 128: 967 ZeroTy = Type::getFP128Ty(Ctx); 968 break; 969 default: 970 llvm_unreachable("unexpected floating-point type"); 971 } 972 ConstantFP &ZeroForNegation = 973 *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy)); 974 auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation); 975 MIRBuilder.buildInstr(TargetOpcode::G_FSUB) 976 .addDef(Res) 977 .addUse(Zero->getOperand(0).getReg()) 978 .addUse(MI.getOperand(1).getReg()); 979 MI.eraseFromParent(); 980 return Legalized; 981 } 982 case TargetOpcode::G_FSUB: { 983 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)). 984 // First, check if G_FNEG is marked as Lower. If so, we may 985 // end up with an infinite loop as G_FSUB is used to legalize G_FNEG. 986 if (LI.getAction({G_FNEG, {Ty}}).Action == Lower) 987 return UnableToLegalize; 988 unsigned Res = MI.getOperand(0).getReg(); 989 unsigned LHS = MI.getOperand(1).getReg(); 990 unsigned RHS = MI.getOperand(2).getReg(); 991 unsigned Neg = MRI.createGenericVirtualRegister(Ty); 992 MIRBuilder.buildInstr(TargetOpcode::G_FNEG).addDef(Neg).addUse(RHS); 993 MIRBuilder.buildInstr(TargetOpcode::G_FADD) 994 .addDef(Res) 995 .addUse(LHS) 996 .addUse(Neg); 997 MI.eraseFromParent(); 998 return Legalized; 999 } 1000 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: { 1001 unsigned OldValRes = MI.getOperand(0).getReg(); 1002 unsigned SuccessRes = MI.getOperand(1).getReg(); 1003 unsigned Addr = MI.getOperand(2).getReg(); 1004 unsigned CmpVal = MI.getOperand(3).getReg(); 1005 unsigned NewVal = MI.getOperand(4).getReg(); 1006 MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal, 1007 **MI.memoperands_begin()); 1008 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal); 1009 MI.eraseFromParent(); 1010 return Legalized; 1011 } 1012 case TargetOpcode::G_LOAD: 1013 case TargetOpcode::G_SEXTLOAD: 1014 case TargetOpcode::G_ZEXTLOAD: { 1015 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT 1016 unsigned DstReg = MI.getOperand(0).getReg(); 1017 unsigned PtrReg = MI.getOperand(1).getReg(); 1018 LLT DstTy = MRI.getType(DstReg); 1019 auto &MMO = **MI.memoperands_begin(); 1020 1021 if (DstTy.getSizeInBits() == MMO.getSize() /* in bytes */ * 8) { 1022 // In the case of G_LOAD, this was a non-extending load already and we're 1023 // about to lower to the same instruction. 1024 if (MI.getOpcode() == TargetOpcode::G_LOAD) 1025 return UnableToLegalize; 1026 MIRBuilder.buildLoad(DstReg, PtrReg, MMO); 1027 MI.eraseFromParent(); 1028 return Legalized; 1029 } 1030 1031 if (DstTy.isScalar()) { 1032 unsigned TmpReg = MRI.createGenericVirtualRegister( 1033 LLT::scalar(MMO.getSize() /* in bytes */ * 8)); 1034 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); 1035 switch (MI.getOpcode()) { 1036 default: 1037 llvm_unreachable("Unexpected opcode"); 1038 case TargetOpcode::G_LOAD: 1039 MIRBuilder.buildAnyExt(DstReg, TmpReg); 1040 break; 1041 case TargetOpcode::G_SEXTLOAD: 1042 MIRBuilder.buildSExt(DstReg, TmpReg); 1043 break; 1044 case TargetOpcode::G_ZEXTLOAD: 1045 MIRBuilder.buildZExt(DstReg, TmpReg); 1046 break; 1047 } 1048 MI.eraseFromParent(); 1049 return Legalized; 1050 } 1051 1052 return UnableToLegalize; 1053 } 1054 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 1055 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 1056 case TargetOpcode::G_CTLZ: 1057 case TargetOpcode::G_CTTZ: 1058 case TargetOpcode::G_CTPOP: 1059 return lowerBitCount(MI, TypeIdx, Ty); 1060 } 1061 } 1062 1063 LegalizerHelper::LegalizeResult 1064 LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, 1065 LLT NarrowTy) { 1066 // FIXME: Don't know how to handle secondary types yet. 1067 if (TypeIdx != 0) 1068 return UnableToLegalize; 1069 switch (MI.getOpcode()) { 1070 default: 1071 return UnableToLegalize; 1072 case TargetOpcode::G_ADD: { 1073 unsigned NarrowSize = NarrowTy.getSizeInBits(); 1074 unsigned DstReg = MI.getOperand(0).getReg(); 1075 unsigned Size = MRI.getType(DstReg).getSizeInBits(); 1076 int NumParts = Size / NarrowSize; 1077 // FIXME: Don't know how to handle the situation where the small vectors 1078 // aren't all the same size yet. 1079 if (Size % NarrowSize != 0) 1080 return UnableToLegalize; 1081 1082 MIRBuilder.setInstr(MI); 1083 1084 SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs; 1085 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); 1086 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); 1087 1088 for (int i = 0; i < NumParts; ++i) { 1089 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 1090 MIRBuilder.buildAdd(DstReg, Src1Regs[i], Src2Regs[i]); 1091 DstRegs.push_back(DstReg); 1092 } 1093 1094 MIRBuilder.buildMerge(DstReg, DstRegs); 1095 MI.eraseFromParent(); 1096 return Legalized; 1097 } 1098 } 1099 } 1100 1101 LegalizerHelper::LegalizeResult 1102 LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { 1103 unsigned Opc = MI.getOpcode(); 1104 auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); 1105 auto isLegalOrCustom = [this](const LegalityQuery &Q) { 1106 auto QAction = LI.getAction(Q).Action; 1107 return QAction == Legal || QAction == Custom; 1108 }; 1109 switch (Opc) { 1110 default: 1111 return UnableToLegalize; 1112 case TargetOpcode::G_CTLZ_ZERO_UNDEF: { 1113 // This trivially expands to CTLZ. 1114 MI.setDesc(TII.get(TargetOpcode::G_CTLZ)); 1115 MIRBuilder.recordInsertion(&MI); 1116 return Legalized; 1117 } 1118 case TargetOpcode::G_CTLZ: { 1119 unsigned SrcReg = MI.getOperand(1).getReg(); 1120 unsigned Len = Ty.getSizeInBits(); 1121 if (isLegalOrCustom({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty}})) { 1122 // If CTLZ_ZERO_UNDEF is legal or custom, emit that and a select with 1123 // zero. 1124 auto MIBCtlzZU = 1125 MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF, Ty, SrcReg); 1126 auto MIBZero = MIRBuilder.buildConstant(Ty, 0); 1127 auto MIBLen = MIRBuilder.buildConstant(Ty, Len); 1128 auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), 1129 SrcReg, MIBZero); 1130 MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen, 1131 MIBCtlzZU); 1132 MI.eraseFromParent(); 1133 return Legalized; 1134 } 1135 // for now, we do this: 1136 // NewLen = NextPowerOf2(Len); 1137 // x = x | (x >> 1); 1138 // x = x | (x >> 2); 1139 // ... 1140 // x = x | (x >>16); 1141 // x = x | (x >>32); // for 64-bit input 1142 // Upto NewLen/2 1143 // return Len - popcount(x); 1144 // 1145 // Ref: "Hacker's Delight" by Henry Warren 1146 unsigned Op = SrcReg; 1147 unsigned NewLen = PowerOf2Ceil(Len); 1148 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) { 1149 auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i); 1150 auto MIBOp = MIRBuilder.buildInstr( 1151 TargetOpcode::G_OR, Ty, Op, 1152 MIRBuilder.buildInstr(TargetOpcode::G_LSHR, Ty, Op, MIBShiftAmt)); 1153 Op = MIBOp->getOperand(0).getReg(); 1154 } 1155 auto MIBPop = MIRBuilder.buildInstr(TargetOpcode::G_CTPOP, Ty, Op); 1156 MIRBuilder.buildInstr(TargetOpcode::G_SUB, MI.getOperand(0).getReg(), 1157 MIRBuilder.buildConstant(Ty, Len), MIBPop); 1158 MI.eraseFromParent(); 1159 return Legalized; 1160 } 1161 case TargetOpcode::G_CTTZ_ZERO_UNDEF: { 1162 // This trivially expands to CTTZ. 1163 MI.setDesc(TII.get(TargetOpcode::G_CTTZ)); 1164 MIRBuilder.recordInsertion(&MI); 1165 return Legalized; 1166 } 1167 case TargetOpcode::G_CTTZ: { 1168 unsigned SrcReg = MI.getOperand(1).getReg(); 1169 unsigned Len = Ty.getSizeInBits(); 1170 if (isLegalOrCustom({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty}})) { 1171 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with 1172 // zero. 1173 auto MIBCttzZU = 1174 MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF, Ty, SrcReg); 1175 auto MIBZero = MIRBuilder.buildConstant(Ty, 0); 1176 auto MIBLen = MIRBuilder.buildConstant(Ty, Len); 1177 auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), 1178 SrcReg, MIBZero); 1179 MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen, 1180 MIBCttzZU); 1181 MI.eraseFromParent(); 1182 return Legalized; 1183 } 1184 // for now, we use: { return popcount(~x & (x - 1)); } 1185 // unless the target has ctlz but not ctpop, in which case we use: 1186 // { return 32 - nlz(~x & (x-1)); } 1187 // Ref: "Hacker's Delight" by Henry Warren 1188 auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1); 1189 auto MIBNot = 1190 MIRBuilder.buildInstr(TargetOpcode::G_XOR, Ty, SrcReg, MIBCstNeg1); 1191 auto MIBTmp = MIRBuilder.buildInstr( 1192 TargetOpcode::G_AND, Ty, MIBNot, 1193 MIRBuilder.buildInstr(TargetOpcode::G_ADD, Ty, SrcReg, MIBCstNeg1)); 1194 if (!isLegalOrCustom({TargetOpcode::G_CTPOP, {Ty}}) && 1195 isLegalOrCustom({TargetOpcode::G_CTLZ, {Ty}})) { 1196 auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len); 1197 MIRBuilder.buildInstr( 1198 TargetOpcode::G_SUB, MI.getOperand(0).getReg(), 1199 MIBCstLen, 1200 MIRBuilder.buildInstr(TargetOpcode::G_CTLZ, Ty, MIBTmp)); 1201 MI.eraseFromParent(); 1202 return Legalized; 1203 } 1204 MI.setDesc(TII.get(TargetOpcode::G_CTPOP)); 1205 MI.getOperand(1).setReg(MIBTmp->getOperand(0).getReg()); 1206 return Legalized; 1207 } 1208 } 1209 } 1210