1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file This file implements the LegalizerHelper class to legalize 11 /// individual instructions and the LegalizeMachineIR wrapper pass for the 12 /// primary legalization. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" 17 #include "llvm/CodeGen/GlobalISel/CallLowering.h" 18 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" 19 #include "llvm/CodeGen/MachineRegisterInfo.h" 20 #include "llvm/CodeGen/TargetInstrInfo.h" 21 #include "llvm/CodeGen/TargetLowering.h" 22 #include "llvm/CodeGen/TargetSubtargetInfo.h" 23 #include "llvm/Support/Debug.h" 24 #include "llvm/Support/MathExtras.h" 25 #include "llvm/Support/raw_ostream.h" 26 27 #define DEBUG_TYPE "legalizer" 28 29 using namespace llvm; 30 using namespace LegalizeActions; 31 32 LegalizerHelper::LegalizerHelper(MachineFunction &MF) 33 : MRI(MF.getRegInfo()), LI(*MF.getSubtarget().getLegalizerInfo()) { 34 MIRBuilder.setMF(MF); 35 } 36 37 LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI) 38 : MRI(MF.getRegInfo()), LI(LI) { 39 MIRBuilder.setMF(MF); 40 } 41 LegalizerHelper::LegalizeResult 42 LegalizerHelper::legalizeInstrStep(MachineInstr &MI) { 43 LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs())); 44 45 auto Step = LI.getAction(MI, MRI); 46 switch (Step.Action) { 47 case Legal: 48 LLVM_DEBUG(dbgs() << ".. Already legal\n"); 49 return AlreadyLegal; 50 case Libcall: 51 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n"); 52 return libcall(MI); 53 case NarrowScalar: 54 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n"); 55 return narrowScalar(MI, Step.TypeIdx, Step.NewType); 56 case WidenScalar: 57 LLVM_DEBUG(dbgs() << ".. Widen scalar\n"); 58 return widenScalar(MI, Step.TypeIdx, Step.NewType); 59 case Lower: 60 LLVM_DEBUG(dbgs() << ".. Lower\n"); 61 return lower(MI, Step.TypeIdx, Step.NewType); 62 case FewerElements: 63 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n"); 64 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType); 65 case Custom: 66 LLVM_DEBUG(dbgs() << ".. Custom legalization\n"); 67 return LI.legalizeCustom(MI, MRI, MIRBuilder) ? Legalized 68 : UnableToLegalize; 69 default: 70 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n"); 71 return UnableToLegalize; 72 } 73 } 74 75 void LegalizerHelper::extractParts(unsigned Reg, LLT Ty, int NumParts, 76 SmallVectorImpl<unsigned> &VRegs) { 77 for (int i = 0; i < NumParts; ++i) 78 VRegs.push_back(MRI.createGenericVirtualRegister(Ty)); 79 MIRBuilder.buildUnmerge(VRegs, Reg); 80 } 81 82 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { 83 switch (Opcode) { 84 case TargetOpcode::G_SDIV: 85 assert(Size == 32 && "Unsupported size"); 86 return RTLIB::SDIV_I32; 87 case TargetOpcode::G_UDIV: 88 assert(Size == 32 && "Unsupported size"); 89 return RTLIB::UDIV_I32; 90 case TargetOpcode::G_SREM: 91 assert(Size == 32 && "Unsupported size"); 92 return RTLIB::SREM_I32; 93 case TargetOpcode::G_UREM: 94 assert(Size == 32 && "Unsupported size"); 95 return RTLIB::UREM_I32; 96 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 97 assert(Size == 32 && "Unsupported size"); 98 return RTLIB::CTLZ_I32; 99 case TargetOpcode::G_FADD: 100 assert((Size == 32 || Size == 64) && "Unsupported size"); 101 return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32; 102 case TargetOpcode::G_FSUB: 103 assert((Size == 32 || Size == 64) && "Unsupported size"); 104 return Size == 64 ? RTLIB::SUB_F64 : RTLIB::SUB_F32; 105 case TargetOpcode::G_FMUL: 106 assert((Size == 32 || Size == 64) && "Unsupported size"); 107 return Size == 64 ? RTLIB::MUL_F64 : RTLIB::MUL_F32; 108 case TargetOpcode::G_FDIV: 109 assert((Size == 32 || Size == 64) && "Unsupported size"); 110 return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32; 111 case TargetOpcode::G_FREM: 112 return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32; 113 case TargetOpcode::G_FPOW: 114 return Size == 64 ? RTLIB::POW_F64 : RTLIB::POW_F32; 115 case TargetOpcode::G_FMA: 116 assert((Size == 32 || Size == 64) && "Unsupported size"); 117 return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32; 118 } 119 llvm_unreachable("Unknown libcall function"); 120 } 121 122 LegalizerHelper::LegalizeResult 123 llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, 124 const CallLowering::ArgInfo &Result, 125 ArrayRef<CallLowering::ArgInfo> Args) { 126 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); 127 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); 128 const char *Name = TLI.getLibcallName(Libcall); 129 130 MIRBuilder.getMF().getFrameInfo().setHasCalls(true); 131 if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall), 132 MachineOperand::CreateES(Name), Result, Args)) 133 return LegalizerHelper::UnableToLegalize; 134 135 return LegalizerHelper::Legalized; 136 } 137 138 // Useful for libcalls where all operands have the same type. 139 static LegalizerHelper::LegalizeResult 140 simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, 141 Type *OpType) { 142 auto Libcall = getRTLibDesc(MI.getOpcode(), Size); 143 144 SmallVector<CallLowering::ArgInfo, 3> Args; 145 for (unsigned i = 1; i < MI.getNumOperands(); i++) 146 Args.push_back({MI.getOperand(i).getReg(), OpType}); 147 return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType}, 148 Args); 149 } 150 151 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, 152 Type *FromType) { 153 auto ToMVT = MVT::getVT(ToType); 154 auto FromMVT = MVT::getVT(FromType); 155 156 switch (Opcode) { 157 case TargetOpcode::G_FPEXT: 158 return RTLIB::getFPEXT(FromMVT, ToMVT); 159 case TargetOpcode::G_FPTRUNC: 160 return RTLIB::getFPROUND(FromMVT, ToMVT); 161 case TargetOpcode::G_FPTOSI: 162 return RTLIB::getFPTOSINT(FromMVT, ToMVT); 163 case TargetOpcode::G_FPTOUI: 164 return RTLIB::getFPTOUINT(FromMVT, ToMVT); 165 case TargetOpcode::G_SITOFP: 166 return RTLIB::getSINTTOFP(FromMVT, ToMVT); 167 case TargetOpcode::G_UITOFP: 168 return RTLIB::getUINTTOFP(FromMVT, ToMVT); 169 } 170 llvm_unreachable("Unsupported libcall function"); 171 } 172 173 static LegalizerHelper::LegalizeResult 174 conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, 175 Type *FromType) { 176 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType); 177 return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType}, 178 {{MI.getOperand(1).getReg(), FromType}}); 179 } 180 181 LegalizerHelper::LegalizeResult 182 LegalizerHelper::libcall(MachineInstr &MI) { 183 LLT LLTy = MRI.getType(MI.getOperand(0).getReg()); 184 unsigned Size = LLTy.getSizeInBits(); 185 auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); 186 187 MIRBuilder.setInstr(MI); 188 189 switch (MI.getOpcode()) { 190 default: 191 return UnableToLegalize; 192 case TargetOpcode::G_SDIV: 193 case TargetOpcode::G_UDIV: 194 case TargetOpcode::G_SREM: 195 case TargetOpcode::G_UREM: 196 case TargetOpcode::G_CTLZ_ZERO_UNDEF: { 197 Type *HLTy = Type::getInt32Ty(Ctx); 198 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); 199 if (Status != Legalized) 200 return Status; 201 break; 202 } 203 case TargetOpcode::G_FADD: 204 case TargetOpcode::G_FSUB: 205 case TargetOpcode::G_FMUL: 206 case TargetOpcode::G_FDIV: 207 case TargetOpcode::G_FMA: 208 case TargetOpcode::G_FPOW: 209 case TargetOpcode::G_FREM: { 210 Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx); 211 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); 212 if (Status != Legalized) 213 return Status; 214 break; 215 } 216 case TargetOpcode::G_FPEXT: { 217 // FIXME: Support other floating point types (half, fp128 etc) 218 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 219 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 220 if (ToSize != 64 || FromSize != 32) 221 return UnableToLegalize; 222 LegalizeResult Status = conversionLibcall( 223 MI, MIRBuilder, Type::getDoubleTy(Ctx), Type::getFloatTy(Ctx)); 224 if (Status != Legalized) 225 return Status; 226 break; 227 } 228 case TargetOpcode::G_FPTRUNC: { 229 // FIXME: Support other floating point types (half, fp128 etc) 230 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 231 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 232 if (ToSize != 32 || FromSize != 64) 233 return UnableToLegalize; 234 LegalizeResult Status = conversionLibcall( 235 MI, MIRBuilder, Type::getFloatTy(Ctx), Type::getDoubleTy(Ctx)); 236 if (Status != Legalized) 237 return Status; 238 break; 239 } 240 case TargetOpcode::G_FPTOSI: 241 case TargetOpcode::G_FPTOUI: { 242 // FIXME: Support other types 243 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 244 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 245 if (ToSize != 32 || (FromSize != 32 && FromSize != 64)) 246 return UnableToLegalize; 247 LegalizeResult Status = conversionLibcall( 248 MI, MIRBuilder, Type::getInt32Ty(Ctx), 249 FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx)); 250 if (Status != Legalized) 251 return Status; 252 break; 253 } 254 case TargetOpcode::G_SITOFP: 255 case TargetOpcode::G_UITOFP: { 256 // FIXME: Support other types 257 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 258 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 259 if (FromSize != 32 || (ToSize != 32 && ToSize != 64)) 260 return UnableToLegalize; 261 LegalizeResult Status = conversionLibcall( 262 MI, MIRBuilder, 263 ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx), 264 Type::getInt32Ty(Ctx)); 265 if (Status != Legalized) 266 return Status; 267 break; 268 } 269 } 270 271 MI.eraseFromParent(); 272 return Legalized; 273 } 274 275 LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, 276 unsigned TypeIdx, 277 LLT NarrowTy) { 278 // FIXME: Don't know how to handle secondary types yet. 279 if (TypeIdx != 0 && MI.getOpcode() != TargetOpcode::G_EXTRACT) 280 return UnableToLegalize; 281 282 MIRBuilder.setInstr(MI); 283 284 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 285 uint64_t NarrowSize = NarrowTy.getSizeInBits(); 286 287 switch (MI.getOpcode()) { 288 default: 289 return UnableToLegalize; 290 case TargetOpcode::G_IMPLICIT_DEF: { 291 // FIXME: add support for when SizeOp0 isn't an exact multiple of 292 // NarrowSize. 293 if (SizeOp0 % NarrowSize != 0) 294 return UnableToLegalize; 295 int NumParts = SizeOp0 / NarrowSize; 296 297 SmallVector<unsigned, 2> DstRegs; 298 for (int i = 0; i < NumParts; ++i) 299 DstRegs.push_back( 300 MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg()); 301 MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs); 302 MI.eraseFromParent(); 303 return Legalized; 304 } 305 case TargetOpcode::G_ADD: { 306 // FIXME: add support for when SizeOp0 isn't an exact multiple of 307 // NarrowSize. 308 if (SizeOp0 % NarrowSize != 0) 309 return UnableToLegalize; 310 // Expand in terms of carry-setting/consuming G_ADDE instructions. 311 int NumParts = SizeOp0 / NarrowTy.getSizeInBits(); 312 313 SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs; 314 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); 315 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); 316 317 unsigned CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1)); 318 MIRBuilder.buildConstant(CarryIn, 0); 319 320 for (int i = 0; i < NumParts; ++i) { 321 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 322 unsigned CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); 323 324 MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i], 325 Src2Regs[i], CarryIn); 326 327 DstRegs.push_back(DstReg); 328 CarryIn = CarryOut; 329 } 330 unsigned DstReg = MI.getOperand(0).getReg(); 331 MIRBuilder.buildMerge(DstReg, DstRegs); 332 MI.eraseFromParent(); 333 return Legalized; 334 } 335 case TargetOpcode::G_EXTRACT: { 336 if (TypeIdx != 1) 337 return UnableToLegalize; 338 339 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 340 // FIXME: add support for when SizeOp1 isn't an exact multiple of 341 // NarrowSize. 342 if (SizeOp1 % NarrowSize != 0) 343 return UnableToLegalize; 344 int NumParts = SizeOp1 / NarrowSize; 345 346 SmallVector<unsigned, 2> SrcRegs, DstRegs; 347 SmallVector<uint64_t, 2> Indexes; 348 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); 349 350 unsigned OpReg = MI.getOperand(0).getReg(); 351 uint64_t OpStart = MI.getOperand(2).getImm(); 352 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); 353 for (int i = 0; i < NumParts; ++i) { 354 unsigned SrcStart = i * NarrowSize; 355 356 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) { 357 // No part of the extract uses this subregister, ignore it. 358 continue; 359 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) { 360 // The entire subregister is extracted, forward the value. 361 DstRegs.push_back(SrcRegs[i]); 362 continue; 363 } 364 365 // OpSegStart is where this destination segment would start in OpReg if it 366 // extended infinitely in both directions. 367 int64_t ExtractOffset; 368 uint64_t SegSize; 369 if (OpStart < SrcStart) { 370 ExtractOffset = 0; 371 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart); 372 } else { 373 ExtractOffset = OpStart - SrcStart; 374 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize); 375 } 376 377 unsigned SegReg = SrcRegs[i]; 378 if (ExtractOffset != 0 || SegSize != NarrowSize) { 379 // A genuine extract is needed. 380 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); 381 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset); 382 } 383 384 DstRegs.push_back(SegReg); 385 } 386 387 MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs); 388 MI.eraseFromParent(); 389 return Legalized; 390 } 391 case TargetOpcode::G_INSERT: { 392 // FIXME: add support for when SizeOp0 isn't an exact multiple of 393 // NarrowSize. 394 if (SizeOp0 % NarrowSize != 0) 395 return UnableToLegalize; 396 397 int NumParts = SizeOp0 / NarrowSize; 398 399 SmallVector<unsigned, 2> SrcRegs, DstRegs; 400 SmallVector<uint64_t, 2> Indexes; 401 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); 402 403 unsigned OpReg = MI.getOperand(2).getReg(); 404 uint64_t OpStart = MI.getOperand(3).getImm(); 405 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); 406 for (int i = 0; i < NumParts; ++i) { 407 unsigned DstStart = i * NarrowSize; 408 409 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) { 410 // No part of the insert affects this subregister, forward the original. 411 DstRegs.push_back(SrcRegs[i]); 412 continue; 413 } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) { 414 // The entire subregister is defined by this insert, forward the new 415 // value. 416 DstRegs.push_back(OpReg); 417 continue; 418 } 419 420 // OpSegStart is where this destination segment would start in OpReg if it 421 // extended infinitely in both directions. 422 int64_t ExtractOffset, InsertOffset; 423 uint64_t SegSize; 424 if (OpStart < DstStart) { 425 InsertOffset = 0; 426 ExtractOffset = DstStart - OpStart; 427 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart); 428 } else { 429 InsertOffset = OpStart - DstStart; 430 ExtractOffset = 0; 431 SegSize = 432 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart); 433 } 434 435 unsigned SegReg = OpReg; 436 if (ExtractOffset != 0 || SegSize != OpSize) { 437 // A genuine extract is needed. 438 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); 439 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset); 440 } 441 442 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 443 MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset); 444 DstRegs.push_back(DstReg); 445 } 446 447 assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered"); 448 MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs); 449 MI.eraseFromParent(); 450 return Legalized; 451 } 452 case TargetOpcode::G_LOAD: { 453 // FIXME: add support for when SizeOp0 isn't an exact multiple of 454 // NarrowSize. 455 if (SizeOp0 % NarrowSize != 0) 456 return UnableToLegalize; 457 458 const auto &MMO = **MI.memoperands_begin(); 459 // This implementation doesn't work for atomics. Give up instead of doing 460 // something invalid. 461 if (MMO.getOrdering() != AtomicOrdering::NotAtomic || 462 MMO.getFailureOrdering() != AtomicOrdering::NotAtomic) 463 return UnableToLegalize; 464 465 int NumParts = SizeOp0 / NarrowSize; 466 LLT OffsetTy = LLT::scalar( 467 MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()); 468 469 SmallVector<unsigned, 2> DstRegs; 470 for (int i = 0; i < NumParts; ++i) { 471 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 472 unsigned SrcReg = 0; 473 unsigned Adjustment = i * NarrowSize / 8; 474 unsigned Alignment = MinAlign(MMO.getAlignment(), Adjustment); 475 476 MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand( 477 MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(), 478 NarrowSize / 8, Alignment, MMO.getAAInfo(), MMO.getRanges(), 479 MMO.getSyncScopeID(), MMO.getOrdering(), MMO.getFailureOrdering()); 480 481 MIRBuilder.materializeGEP(SrcReg, MI.getOperand(1).getReg(), OffsetTy, 482 Adjustment); 483 484 MIRBuilder.buildLoad(DstReg, SrcReg, *SplitMMO); 485 486 DstRegs.push_back(DstReg); 487 } 488 unsigned DstReg = MI.getOperand(0).getReg(); 489 MIRBuilder.buildMerge(DstReg, DstRegs); 490 MI.eraseFromParent(); 491 return Legalized; 492 } 493 case TargetOpcode::G_STORE: { 494 // FIXME: add support for when SizeOp0 isn't an exact multiple of 495 // NarrowSize. 496 if (SizeOp0 % NarrowSize != 0) 497 return UnableToLegalize; 498 499 const auto &MMO = **MI.memoperands_begin(); 500 // This implementation doesn't work for atomics. Give up instead of doing 501 // something invalid. 502 if (MMO.getOrdering() != AtomicOrdering::NotAtomic || 503 MMO.getFailureOrdering() != AtomicOrdering::NotAtomic) 504 return UnableToLegalize; 505 506 int NumParts = SizeOp0 / NarrowSize; 507 LLT OffsetTy = LLT::scalar( 508 MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()); 509 510 SmallVector<unsigned, 2> SrcRegs; 511 extractParts(MI.getOperand(0).getReg(), NarrowTy, NumParts, SrcRegs); 512 513 for (int i = 0; i < NumParts; ++i) { 514 unsigned DstReg = 0; 515 unsigned Adjustment = i * NarrowSize / 8; 516 unsigned Alignment = MinAlign(MMO.getAlignment(), Adjustment); 517 518 MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand( 519 MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(), 520 NarrowSize / 8, Alignment, MMO.getAAInfo(), MMO.getRanges(), 521 MMO.getSyncScopeID(), MMO.getOrdering(), MMO.getFailureOrdering()); 522 523 MIRBuilder.materializeGEP(DstReg, MI.getOperand(1).getReg(), OffsetTy, 524 Adjustment); 525 526 MIRBuilder.buildStore(SrcRegs[i], DstReg, *SplitMMO); 527 } 528 MI.eraseFromParent(); 529 return Legalized; 530 } 531 case TargetOpcode::G_CONSTANT: { 532 // FIXME: add support for when SizeOp0 isn't an exact multiple of 533 // NarrowSize. 534 if (SizeOp0 % NarrowSize != 0) 535 return UnableToLegalize; 536 int NumParts = SizeOp0 / NarrowSize; 537 const APInt &Cst = MI.getOperand(1).getCImm()->getValue(); 538 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 539 540 SmallVector<unsigned, 2> DstRegs; 541 for (int i = 0; i < NumParts; ++i) { 542 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 543 ConstantInt *CI = 544 ConstantInt::get(Ctx, Cst.lshr(NarrowSize * i).trunc(NarrowSize)); 545 MIRBuilder.buildConstant(DstReg, *CI); 546 DstRegs.push_back(DstReg); 547 } 548 unsigned DstReg = MI.getOperand(0).getReg(); 549 MIRBuilder.buildMerge(DstReg, DstRegs); 550 MI.eraseFromParent(); 551 return Legalized; 552 } 553 case TargetOpcode::G_OR: { 554 // Legalize bitwise operation: 555 // A = BinOp<Ty> B, C 556 // into: 557 // B1, ..., BN = G_UNMERGE_VALUES B 558 // C1, ..., CN = G_UNMERGE_VALUES C 559 // A1 = BinOp<Ty/N> B1, C2 560 // ... 561 // AN = BinOp<Ty/N> BN, CN 562 // A = G_MERGE_VALUES A1, ..., AN 563 564 // FIXME: add support for when SizeOp0 isn't an exact multiple of 565 // NarrowSize. 566 if (SizeOp0 % NarrowSize != 0) 567 return UnableToLegalize; 568 int NumParts = SizeOp0 / NarrowSize; 569 570 // List the registers where the destination will be scattered. 571 SmallVector<unsigned, 2> DstRegs; 572 // List the registers where the first argument will be split. 573 SmallVector<unsigned, 2> SrcsReg1; 574 // List the registers where the second argument will be split. 575 SmallVector<unsigned, 2> SrcsReg2; 576 // Create all the temporary registers. 577 for (int i = 0; i < NumParts; ++i) { 578 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 579 unsigned SrcReg1 = MRI.createGenericVirtualRegister(NarrowTy); 580 unsigned SrcReg2 = MRI.createGenericVirtualRegister(NarrowTy); 581 582 DstRegs.push_back(DstReg); 583 SrcsReg1.push_back(SrcReg1); 584 SrcsReg2.push_back(SrcReg2); 585 } 586 // Explode the big arguments into smaller chunks. 587 MIRBuilder.buildUnmerge(SrcsReg1, MI.getOperand(1).getReg()); 588 MIRBuilder.buildUnmerge(SrcsReg2, MI.getOperand(2).getReg()); 589 590 // Do the operation on each small part. 591 for (int i = 0; i < NumParts; ++i) 592 MIRBuilder.buildOr(DstRegs[i], SrcsReg1[i], SrcsReg2[i]); 593 594 // Gather the destination registers into the final destination. 595 unsigned DstReg = MI.getOperand(0).getReg(); 596 MIRBuilder.buildMerge(DstReg, DstRegs); 597 MI.eraseFromParent(); 598 return Legalized; 599 } 600 } 601 } 602 603 void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy, 604 unsigned OpIdx, unsigned ExtOpcode) { 605 MachineOperand &MO = MI.getOperand(OpIdx); 606 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, WideTy, MO.getReg()); 607 MO.setReg(ExtB->getOperand(0).getReg()); 608 } 609 610 void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy, 611 unsigned OpIdx, unsigned TruncOpcode) { 612 MachineOperand &MO = MI.getOperand(OpIdx); 613 unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); 614 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 615 MIRBuilder.buildInstr(TruncOpcode, MO.getReg(), DstExt); 616 MO.setReg(DstExt); 617 } 618 619 LegalizerHelper::LegalizeResult 620 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { 621 MIRBuilder.setInstr(MI); 622 623 switch (MI.getOpcode()) { 624 default: 625 return UnableToLegalize; 626 case TargetOpcode::G_UADDO: 627 case TargetOpcode::G_USUBO: { 628 if (TypeIdx == 1) 629 return UnableToLegalize; // TODO 630 auto LHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, WideTy, 631 MI.getOperand(2).getReg()); 632 auto RHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, WideTy, 633 MI.getOperand(3).getReg()); 634 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO 635 ? TargetOpcode::G_ADD 636 : TargetOpcode::G_SUB; 637 // Do the arithmetic in the larger type. 638 auto NewOp = MIRBuilder.buildInstr(Opcode, WideTy, LHSZext, RHSZext); 639 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg()); 640 APInt Mask = APInt::getAllOnesValue(OrigTy.getSizeInBits()); 641 auto AndOp = MIRBuilder.buildInstr( 642 TargetOpcode::G_AND, WideTy, NewOp, 643 MIRBuilder.buildConstant(WideTy, Mask.getZExtValue())); 644 // There is no overflow if the AndOp is the same as NewOp. 645 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1).getReg(), NewOp, 646 AndOp); 647 // Now trunc the NewOp to the original result. 648 MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), NewOp); 649 MI.eraseFromParent(); 650 return Legalized; 651 } 652 case TargetOpcode::G_CTTZ: 653 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 654 case TargetOpcode::G_CTLZ: 655 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 656 case TargetOpcode::G_CTPOP: { 657 // First ZEXT the input. 658 auto MIBSrc = MIRBuilder.buildZExt(WideTy, MI.getOperand(1).getReg()); 659 LLT CurTy = MRI.getType(MI.getOperand(0).getReg()); 660 if (MI.getOpcode() == TargetOpcode::G_CTTZ) { 661 // The count is the same in the larger type except if the original 662 // value was zero. This can be handled by setting the bit just off 663 // the top of the original type. 664 auto TopBit = 665 APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits()); 666 MIBSrc = MIRBuilder.buildInstr( 667 TargetOpcode::G_OR, WideTy, MIBSrc, 668 MIRBuilder.buildConstant(WideTy, TopBit.getSExtValue())); 669 } 670 // Perform the operation at the larger size. 671 auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), WideTy, MIBSrc); 672 // This is already the correct result for CTPOP and CTTZs 673 if (MI.getOpcode() == TargetOpcode::G_CTLZ || 674 MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) { 675 // The correct result is NewOp - (Difference in widety and current ty). 676 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits(); 677 MIBNewOp = 678 MIRBuilder.buildInstr(TargetOpcode::G_SUB, WideTy, MIBNewOp, 679 MIRBuilder.buildConstant(WideTy, SizeDiff)); 680 } 681 auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); 682 // Make the original instruction a trunc now, and update its source. 683 MI.setDesc(TII.get(TargetOpcode::G_TRUNC)); 684 MI.getOperand(1).setReg(MIBNewOp->getOperand(0).getReg()); 685 MIRBuilder.recordInsertion(&MI); 686 return Legalized; 687 } 688 689 case TargetOpcode::G_ADD: 690 case TargetOpcode::G_AND: 691 case TargetOpcode::G_MUL: 692 case TargetOpcode::G_OR: 693 case TargetOpcode::G_XOR: 694 case TargetOpcode::G_SUB: 695 // Perform operation at larger width (any extension is fine here, high bits 696 // don't affect the result) and then truncate the result back to the 697 // original type. 698 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 699 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); 700 widenScalarDst(MI, WideTy); 701 MIRBuilder.recordInsertion(&MI); 702 return Legalized; 703 704 case TargetOpcode::G_SHL: 705 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 706 // The "number of bits to shift" operand must preserve its value as an 707 // unsigned integer: 708 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 709 widenScalarDst(MI, WideTy); 710 MIRBuilder.recordInsertion(&MI); 711 return Legalized; 712 713 case TargetOpcode::G_SDIV: 714 case TargetOpcode::G_SREM: 715 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); 716 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 717 widenScalarDst(MI, WideTy); 718 MIRBuilder.recordInsertion(&MI); 719 return Legalized; 720 721 case TargetOpcode::G_ASHR: 722 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); 723 // The "number of bits to shift" operand must preserve its value as an 724 // unsigned integer: 725 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 726 widenScalarDst(MI, WideTy); 727 MIRBuilder.recordInsertion(&MI); 728 return Legalized; 729 730 case TargetOpcode::G_UDIV: 731 case TargetOpcode::G_UREM: 732 case TargetOpcode::G_LSHR: 733 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 734 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 735 widenScalarDst(MI, WideTy); 736 MIRBuilder.recordInsertion(&MI); 737 return Legalized; 738 739 case TargetOpcode::G_SELECT: 740 if (TypeIdx != 0) 741 return UnableToLegalize; 742 // Perform operation at larger width (any extension is fine here, high bits 743 // don't affect the result) and then truncate the result back to the 744 // original type. 745 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); 746 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT); 747 widenScalarDst(MI, WideTy); 748 MIRBuilder.recordInsertion(&MI); 749 return Legalized; 750 751 case TargetOpcode::G_FPTOSI: 752 case TargetOpcode::G_FPTOUI: 753 if (TypeIdx != 0) 754 return UnableToLegalize; 755 widenScalarDst(MI, WideTy); 756 MIRBuilder.recordInsertion(&MI); 757 return Legalized; 758 759 case TargetOpcode::G_SITOFP: 760 if (TypeIdx != 1) 761 return UnableToLegalize; 762 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); 763 MIRBuilder.recordInsertion(&MI); 764 return Legalized; 765 766 case TargetOpcode::G_UITOFP: 767 if (TypeIdx != 1) 768 return UnableToLegalize; 769 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 770 MIRBuilder.recordInsertion(&MI); 771 return Legalized; 772 773 case TargetOpcode::G_INSERT: 774 if (TypeIdx != 0) 775 return UnableToLegalize; 776 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 777 widenScalarDst(MI, WideTy); 778 MIRBuilder.recordInsertion(&MI); 779 return Legalized; 780 781 case TargetOpcode::G_LOAD: 782 // For some types like i24, we might try to widen to i32. To properly handle 783 // this we should be using a dedicated extending load, until then avoid 784 // trying to legalize. 785 if (alignTo(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(), 8) != 786 WideTy.getSizeInBits()) 787 return UnableToLegalize; 788 LLVM_FALLTHROUGH; 789 case TargetOpcode::G_SEXTLOAD: 790 case TargetOpcode::G_ZEXTLOAD: 791 widenScalarDst(MI, WideTy); 792 MIRBuilder.recordInsertion(&MI); 793 return Legalized; 794 795 case TargetOpcode::G_STORE: { 796 if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(1) || 797 WideTy != LLT::scalar(8)) 798 return UnableToLegalize; 799 800 widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ZEXT); 801 MIRBuilder.recordInsertion(&MI); 802 return Legalized; 803 } 804 case TargetOpcode::G_CONSTANT: { 805 MachineOperand &SrcMO = MI.getOperand(1); 806 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 807 const APInt &Val = SrcMO.getCImm()->getValue().sext(WideTy.getSizeInBits()); 808 SrcMO.setCImm(ConstantInt::get(Ctx, Val)); 809 810 widenScalarDst(MI, WideTy); 811 MIRBuilder.recordInsertion(&MI); 812 return Legalized; 813 } 814 case TargetOpcode::G_FCONSTANT: { 815 MachineOperand &SrcMO = MI.getOperand(1); 816 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 817 APFloat Val = SrcMO.getFPImm()->getValueAPF(); 818 bool LosesInfo; 819 switch (WideTy.getSizeInBits()) { 820 case 32: 821 Val.convert(APFloat::IEEEsingle(), APFloat::rmTowardZero, &LosesInfo); 822 break; 823 case 64: 824 Val.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &LosesInfo); 825 break; 826 default: 827 llvm_unreachable("Unhandled fp widen type"); 828 } 829 SrcMO.setFPImm(ConstantFP::get(Ctx, Val)); 830 831 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); 832 MIRBuilder.recordInsertion(&MI); 833 return Legalized; 834 } 835 case TargetOpcode::G_BRCOND: 836 widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT); 837 MIRBuilder.recordInsertion(&MI); 838 return Legalized; 839 840 case TargetOpcode::G_FCMP: 841 if (TypeIdx == 0) 842 widenScalarDst(MI, WideTy); 843 else { 844 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT); 845 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT); 846 } 847 MIRBuilder.recordInsertion(&MI); 848 return Legalized; 849 850 case TargetOpcode::G_ICMP: 851 if (TypeIdx == 0) 852 widenScalarDst(MI, WideTy); 853 else { 854 unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>( 855 MI.getOperand(1).getPredicate())) 856 ? TargetOpcode::G_SEXT 857 : TargetOpcode::G_ZEXT; 858 widenScalarSrc(MI, WideTy, 2, ExtOpcode); 859 widenScalarSrc(MI, WideTy, 3, ExtOpcode); 860 } 861 MIRBuilder.recordInsertion(&MI); 862 return Legalized; 863 864 case TargetOpcode::G_GEP: 865 assert(TypeIdx == 1 && "unable to legalize pointer of GEP"); 866 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 867 MIRBuilder.recordInsertion(&MI); 868 return Legalized; 869 870 case TargetOpcode::G_PHI: { 871 assert(TypeIdx == 0 && "Expecting only Idx 0"); 872 873 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) { 874 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB(); 875 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); 876 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT); 877 } 878 879 MachineBasicBlock &MBB = *MI.getParent(); 880 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI()); 881 widenScalarDst(MI, WideTy); 882 MIRBuilder.recordInsertion(&MI); 883 return Legalized; 884 } 885 case TargetOpcode::G_EXTRACT_VECTOR_ELT: 886 if (TypeIdx != 2) 887 return UnableToLegalize; 888 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 889 MIRBuilder.recordInsertion(&MI); 890 return Legalized; 891 } 892 } 893 894 LegalizerHelper::LegalizeResult 895 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { 896 using namespace TargetOpcode; 897 MIRBuilder.setInstr(MI); 898 899 switch(MI.getOpcode()) { 900 default: 901 return UnableToLegalize; 902 case TargetOpcode::G_SREM: 903 case TargetOpcode::G_UREM: { 904 unsigned QuotReg = MRI.createGenericVirtualRegister(Ty); 905 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV) 906 .addDef(QuotReg) 907 .addUse(MI.getOperand(1).getReg()) 908 .addUse(MI.getOperand(2).getReg()); 909 910 unsigned ProdReg = MRI.createGenericVirtualRegister(Ty); 911 MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg()); 912 MIRBuilder.buildSub(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(), 913 ProdReg); 914 MI.eraseFromParent(); 915 return Legalized; 916 } 917 case TargetOpcode::G_SMULO: 918 case TargetOpcode::G_UMULO: { 919 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the 920 // result. 921 unsigned Res = MI.getOperand(0).getReg(); 922 unsigned Overflow = MI.getOperand(1).getReg(); 923 unsigned LHS = MI.getOperand(2).getReg(); 924 unsigned RHS = MI.getOperand(3).getReg(); 925 926 MIRBuilder.buildMul(Res, LHS, RHS); 927 928 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO 929 ? TargetOpcode::G_SMULH 930 : TargetOpcode::G_UMULH; 931 932 unsigned HiPart = MRI.createGenericVirtualRegister(Ty); 933 MIRBuilder.buildInstr(Opcode) 934 .addDef(HiPart) 935 .addUse(LHS) 936 .addUse(RHS); 937 938 unsigned Zero = MRI.createGenericVirtualRegister(Ty); 939 MIRBuilder.buildConstant(Zero, 0); 940 941 // For *signed* multiply, overflow is detected by checking: 942 // (hi != (lo >> bitwidth-1)) 943 if (Opcode == TargetOpcode::G_SMULH) { 944 unsigned Shifted = MRI.createGenericVirtualRegister(Ty); 945 unsigned ShiftAmt = MRI.createGenericVirtualRegister(Ty); 946 MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1); 947 MIRBuilder.buildInstr(TargetOpcode::G_ASHR) 948 .addDef(Shifted) 949 .addUse(Res) 950 .addUse(ShiftAmt); 951 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted); 952 } else { 953 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero); 954 } 955 MI.eraseFromParent(); 956 return Legalized; 957 } 958 case TargetOpcode::G_FNEG: { 959 // TODO: Handle vector types once we are able to 960 // represent them. 961 if (Ty.isVector()) 962 return UnableToLegalize; 963 unsigned Res = MI.getOperand(0).getReg(); 964 Type *ZeroTy; 965 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 966 switch (Ty.getSizeInBits()) { 967 case 16: 968 ZeroTy = Type::getHalfTy(Ctx); 969 break; 970 case 32: 971 ZeroTy = Type::getFloatTy(Ctx); 972 break; 973 case 64: 974 ZeroTy = Type::getDoubleTy(Ctx); 975 break; 976 case 128: 977 ZeroTy = Type::getFP128Ty(Ctx); 978 break; 979 default: 980 llvm_unreachable("unexpected floating-point type"); 981 } 982 ConstantFP &ZeroForNegation = 983 *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy)); 984 auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation); 985 MIRBuilder.buildInstr(TargetOpcode::G_FSUB) 986 .addDef(Res) 987 .addUse(Zero->getOperand(0).getReg()) 988 .addUse(MI.getOperand(1).getReg()); 989 MI.eraseFromParent(); 990 return Legalized; 991 } 992 case TargetOpcode::G_FSUB: { 993 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)). 994 // First, check if G_FNEG is marked as Lower. If so, we may 995 // end up with an infinite loop as G_FSUB is used to legalize G_FNEG. 996 if (LI.getAction({G_FNEG, {Ty}}).Action == Lower) 997 return UnableToLegalize; 998 unsigned Res = MI.getOperand(0).getReg(); 999 unsigned LHS = MI.getOperand(1).getReg(); 1000 unsigned RHS = MI.getOperand(2).getReg(); 1001 unsigned Neg = MRI.createGenericVirtualRegister(Ty); 1002 MIRBuilder.buildInstr(TargetOpcode::G_FNEG).addDef(Neg).addUse(RHS); 1003 MIRBuilder.buildInstr(TargetOpcode::G_FADD) 1004 .addDef(Res) 1005 .addUse(LHS) 1006 .addUse(Neg); 1007 MI.eraseFromParent(); 1008 return Legalized; 1009 } 1010 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: { 1011 unsigned OldValRes = MI.getOperand(0).getReg(); 1012 unsigned SuccessRes = MI.getOperand(1).getReg(); 1013 unsigned Addr = MI.getOperand(2).getReg(); 1014 unsigned CmpVal = MI.getOperand(3).getReg(); 1015 unsigned NewVal = MI.getOperand(4).getReg(); 1016 MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal, 1017 **MI.memoperands_begin()); 1018 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal); 1019 MI.eraseFromParent(); 1020 return Legalized; 1021 } 1022 case TargetOpcode::G_LOAD: 1023 case TargetOpcode::G_SEXTLOAD: 1024 case TargetOpcode::G_ZEXTLOAD: { 1025 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT 1026 unsigned DstReg = MI.getOperand(0).getReg(); 1027 unsigned PtrReg = MI.getOperand(1).getReg(); 1028 LLT DstTy = MRI.getType(DstReg); 1029 auto &MMO = **MI.memoperands_begin(); 1030 1031 if (DstTy.getSizeInBits() == MMO.getSize() /* in bytes */ * 8) { 1032 // In the case of G_LOAD, this was a non-extending load already and we're 1033 // about to lower to the same instruction. 1034 if (MI.getOpcode() == TargetOpcode::G_LOAD) 1035 return UnableToLegalize; 1036 MIRBuilder.buildLoad(DstReg, PtrReg, MMO); 1037 MI.eraseFromParent(); 1038 return Legalized; 1039 } 1040 1041 if (DstTy.isScalar()) { 1042 unsigned TmpReg = MRI.createGenericVirtualRegister( 1043 LLT::scalar(MMO.getSize() /* in bytes */ * 8)); 1044 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); 1045 switch (MI.getOpcode()) { 1046 default: 1047 llvm_unreachable("Unexpected opcode"); 1048 case TargetOpcode::G_LOAD: 1049 MIRBuilder.buildAnyExt(DstReg, TmpReg); 1050 break; 1051 case TargetOpcode::G_SEXTLOAD: 1052 MIRBuilder.buildSExt(DstReg, TmpReg); 1053 break; 1054 case TargetOpcode::G_ZEXTLOAD: 1055 MIRBuilder.buildZExt(DstReg, TmpReg); 1056 break; 1057 } 1058 MI.eraseFromParent(); 1059 return Legalized; 1060 } 1061 1062 return UnableToLegalize; 1063 } 1064 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 1065 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 1066 case TargetOpcode::G_CTLZ: 1067 case TargetOpcode::G_CTTZ: 1068 case TargetOpcode::G_CTPOP: 1069 return lowerBitCount(MI, TypeIdx, Ty); 1070 } 1071 } 1072 1073 LegalizerHelper::LegalizeResult 1074 LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, 1075 LLT NarrowTy) { 1076 // FIXME: Don't know how to handle secondary types yet. 1077 if (TypeIdx != 0) 1078 return UnableToLegalize; 1079 switch (MI.getOpcode()) { 1080 default: 1081 return UnableToLegalize; 1082 case TargetOpcode::G_ADD: { 1083 unsigned NarrowSize = NarrowTy.getSizeInBits(); 1084 unsigned DstReg = MI.getOperand(0).getReg(); 1085 unsigned Size = MRI.getType(DstReg).getSizeInBits(); 1086 int NumParts = Size / NarrowSize; 1087 // FIXME: Don't know how to handle the situation where the small vectors 1088 // aren't all the same size yet. 1089 if (Size % NarrowSize != 0) 1090 return UnableToLegalize; 1091 1092 MIRBuilder.setInstr(MI); 1093 1094 SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs; 1095 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); 1096 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); 1097 1098 for (int i = 0; i < NumParts; ++i) { 1099 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 1100 MIRBuilder.buildAdd(DstReg, Src1Regs[i], Src2Regs[i]); 1101 DstRegs.push_back(DstReg); 1102 } 1103 1104 MIRBuilder.buildMerge(DstReg, DstRegs); 1105 MI.eraseFromParent(); 1106 return Legalized; 1107 } 1108 } 1109 } 1110 1111 LegalizerHelper::LegalizeResult 1112 LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { 1113 unsigned Opc = MI.getOpcode(); 1114 auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); 1115 auto isSupported = [this](const LegalityQuery &Q) { 1116 auto QAction = LI.getAction(Q).Action; 1117 return QAction == Legal || QAction == Libcall || QAction == Custom; 1118 }; 1119 switch (Opc) { 1120 default: 1121 return UnableToLegalize; 1122 case TargetOpcode::G_CTLZ_ZERO_UNDEF: { 1123 // This trivially expands to CTLZ. 1124 MI.setDesc(TII.get(TargetOpcode::G_CTLZ)); 1125 MIRBuilder.recordInsertion(&MI); 1126 return Legalized; 1127 } 1128 case TargetOpcode::G_CTLZ: { 1129 unsigned SrcReg = MI.getOperand(1).getReg(); 1130 unsigned Len = Ty.getSizeInBits(); 1131 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty}})) { 1132 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero. 1133 auto MIBCtlzZU = 1134 MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF, Ty, SrcReg); 1135 auto MIBZero = MIRBuilder.buildConstant(Ty, 0); 1136 auto MIBLen = MIRBuilder.buildConstant(Ty, Len); 1137 auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), 1138 SrcReg, MIBZero); 1139 MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen, 1140 MIBCtlzZU); 1141 MI.eraseFromParent(); 1142 return Legalized; 1143 } 1144 // for now, we do this: 1145 // NewLen = NextPowerOf2(Len); 1146 // x = x | (x >> 1); 1147 // x = x | (x >> 2); 1148 // ... 1149 // x = x | (x >>16); 1150 // x = x | (x >>32); // for 64-bit input 1151 // Upto NewLen/2 1152 // return Len - popcount(x); 1153 // 1154 // Ref: "Hacker's Delight" by Henry Warren 1155 unsigned Op = SrcReg; 1156 unsigned NewLen = PowerOf2Ceil(Len); 1157 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) { 1158 auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i); 1159 auto MIBOp = MIRBuilder.buildInstr( 1160 TargetOpcode::G_OR, Ty, Op, 1161 MIRBuilder.buildInstr(TargetOpcode::G_LSHR, Ty, Op, MIBShiftAmt)); 1162 Op = MIBOp->getOperand(0).getReg(); 1163 } 1164 auto MIBPop = MIRBuilder.buildInstr(TargetOpcode::G_CTPOP, Ty, Op); 1165 MIRBuilder.buildInstr(TargetOpcode::G_SUB, MI.getOperand(0).getReg(), 1166 MIRBuilder.buildConstant(Ty, Len), MIBPop); 1167 MI.eraseFromParent(); 1168 return Legalized; 1169 } 1170 case TargetOpcode::G_CTTZ_ZERO_UNDEF: { 1171 // This trivially expands to CTTZ. 1172 MI.setDesc(TII.get(TargetOpcode::G_CTTZ)); 1173 MIRBuilder.recordInsertion(&MI); 1174 return Legalized; 1175 } 1176 case TargetOpcode::G_CTTZ: { 1177 unsigned SrcReg = MI.getOperand(1).getReg(); 1178 unsigned Len = Ty.getSizeInBits(); 1179 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty}})) { 1180 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with 1181 // zero. 1182 auto MIBCttzZU = 1183 MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF, Ty, SrcReg); 1184 auto MIBZero = MIRBuilder.buildConstant(Ty, 0); 1185 auto MIBLen = MIRBuilder.buildConstant(Ty, Len); 1186 auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), 1187 SrcReg, MIBZero); 1188 MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen, 1189 MIBCttzZU); 1190 MI.eraseFromParent(); 1191 return Legalized; 1192 } 1193 // for now, we use: { return popcount(~x & (x - 1)); } 1194 // unless the target has ctlz but not ctpop, in which case we use: 1195 // { return 32 - nlz(~x & (x-1)); } 1196 // Ref: "Hacker's Delight" by Henry Warren 1197 auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1); 1198 auto MIBNot = 1199 MIRBuilder.buildInstr(TargetOpcode::G_XOR, Ty, SrcReg, MIBCstNeg1); 1200 auto MIBTmp = MIRBuilder.buildInstr( 1201 TargetOpcode::G_AND, Ty, MIBNot, 1202 MIRBuilder.buildInstr(TargetOpcode::G_ADD, Ty, SrcReg, MIBCstNeg1)); 1203 if (!isSupported({TargetOpcode::G_CTPOP, {Ty}}) && 1204 isSupported({TargetOpcode::G_CTLZ, {Ty}})) { 1205 auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len); 1206 MIRBuilder.buildInstr( 1207 TargetOpcode::G_SUB, MI.getOperand(0).getReg(), 1208 MIBCstLen, 1209 MIRBuilder.buildInstr(TargetOpcode::G_CTLZ, Ty, MIBTmp)); 1210 MI.eraseFromParent(); 1211 return Legalized; 1212 } 1213 MI.setDesc(TII.get(TargetOpcode::G_CTPOP)); 1214 MI.getOperand(1).setReg(MIBTmp->getOperand(0).getReg()); 1215 return Legalized; 1216 } 1217 } 1218 } 1219