1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file This file implements the LegalizerHelper class to legalize 11 /// individual instructions and the LegalizeMachineIR wrapper pass for the 12 /// primary legalization. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" 17 #include "llvm/CodeGen/GlobalISel/CallLowering.h" 18 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" 19 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" 20 #include "llvm/CodeGen/MachineRegisterInfo.h" 21 #include "llvm/CodeGen/TargetInstrInfo.h" 22 #include "llvm/CodeGen/TargetLowering.h" 23 #include "llvm/CodeGen/TargetSubtargetInfo.h" 24 #include "llvm/Support/Debug.h" 25 #include "llvm/Support/MathExtras.h" 26 #include "llvm/Support/raw_ostream.h" 27 28 #define DEBUG_TYPE "legalizer" 29 30 using namespace llvm; 31 using namespace LegalizeActions; 32 33 LegalizerHelper::LegalizerHelper(MachineFunction &MF, 34 GISelChangeObserver &Observer, 35 MachineIRBuilder &Builder) 36 : MIRBuilder(Builder), MRI(MF.getRegInfo()), 37 LI(*MF.getSubtarget().getLegalizerInfo()), Observer(Observer) { 38 MIRBuilder.setMF(MF); 39 MIRBuilder.setChangeObserver(Observer); 40 } 41 42 LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI, 43 GISelChangeObserver &Observer, 44 MachineIRBuilder &B) 45 : MIRBuilder(B), MRI(MF.getRegInfo()), LI(LI), Observer(Observer) { 46 MIRBuilder.setMF(MF); 47 MIRBuilder.setChangeObserver(Observer); 48 } 49 LegalizerHelper::LegalizeResult 50 LegalizerHelper::legalizeInstrStep(MachineInstr &MI) { 51 LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs())); 52 53 auto Step = LI.getAction(MI, MRI); 54 switch (Step.Action) { 55 case Legal: 56 LLVM_DEBUG(dbgs() << ".. Already legal\n"); 57 return AlreadyLegal; 58 case Libcall: 59 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n"); 60 return libcall(MI); 61 case NarrowScalar: 62 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n"); 63 return narrowScalar(MI, Step.TypeIdx, Step.NewType); 64 case WidenScalar: 65 LLVM_DEBUG(dbgs() << ".. Widen scalar\n"); 66 return widenScalar(MI, Step.TypeIdx, Step.NewType); 67 case Lower: 68 LLVM_DEBUG(dbgs() << ".. Lower\n"); 69 return lower(MI, Step.TypeIdx, Step.NewType); 70 case FewerElements: 71 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n"); 72 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType); 73 case Custom: 74 LLVM_DEBUG(dbgs() << ".. Custom legalization\n"); 75 return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized 76 : UnableToLegalize; 77 default: 78 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n"); 79 return UnableToLegalize; 80 } 81 } 82 83 void LegalizerHelper::extractParts(unsigned Reg, LLT Ty, int NumParts, 84 SmallVectorImpl<unsigned> &VRegs) { 85 for (int i = 0; i < NumParts; ++i) 86 VRegs.push_back(MRI.createGenericVirtualRegister(Ty)); 87 MIRBuilder.buildUnmerge(VRegs, Reg); 88 } 89 90 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { 91 switch (Opcode) { 92 case TargetOpcode::G_SDIV: 93 assert((Size == 32 || Size == 64) && "Unsupported size"); 94 return Size == 64 ? RTLIB::SDIV_I64 : RTLIB::SDIV_I32; 95 case TargetOpcode::G_UDIV: 96 assert((Size == 32 || Size == 64) && "Unsupported size"); 97 return Size == 64 ? RTLIB::UDIV_I64 : RTLIB::UDIV_I32; 98 case TargetOpcode::G_SREM: 99 assert((Size == 32 || Size == 64) && "Unsupported size"); 100 return Size == 64 ? RTLIB::SREM_I64 : RTLIB::SREM_I32; 101 case TargetOpcode::G_UREM: 102 assert((Size == 32 || Size == 64) && "Unsupported size"); 103 return Size == 64 ? RTLIB::UREM_I64 : RTLIB::UREM_I32; 104 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 105 assert(Size == 32 && "Unsupported size"); 106 return RTLIB::CTLZ_I32; 107 case TargetOpcode::G_FADD: 108 assert((Size == 32 || Size == 64) && "Unsupported size"); 109 return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32; 110 case TargetOpcode::G_FSUB: 111 assert((Size == 32 || Size == 64) && "Unsupported size"); 112 return Size == 64 ? RTLIB::SUB_F64 : RTLIB::SUB_F32; 113 case TargetOpcode::G_FMUL: 114 assert((Size == 32 || Size == 64) && "Unsupported size"); 115 return Size == 64 ? RTLIB::MUL_F64 : RTLIB::MUL_F32; 116 case TargetOpcode::G_FDIV: 117 assert((Size == 32 || Size == 64) && "Unsupported size"); 118 return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32; 119 case TargetOpcode::G_FREM: 120 return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32; 121 case TargetOpcode::G_FPOW: 122 return Size == 64 ? RTLIB::POW_F64 : RTLIB::POW_F32; 123 case TargetOpcode::G_FMA: 124 assert((Size == 32 || Size == 64) && "Unsupported size"); 125 return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32; 126 } 127 llvm_unreachable("Unknown libcall function"); 128 } 129 130 LegalizerHelper::LegalizeResult 131 llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, 132 const CallLowering::ArgInfo &Result, 133 ArrayRef<CallLowering::ArgInfo> Args) { 134 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); 135 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); 136 const char *Name = TLI.getLibcallName(Libcall); 137 138 MIRBuilder.getMF().getFrameInfo().setHasCalls(true); 139 if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall), 140 MachineOperand::CreateES(Name), Result, Args)) 141 return LegalizerHelper::UnableToLegalize; 142 143 return LegalizerHelper::Legalized; 144 } 145 146 // Useful for libcalls where all operands have the same type. 147 static LegalizerHelper::LegalizeResult 148 simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, 149 Type *OpType) { 150 auto Libcall = getRTLibDesc(MI.getOpcode(), Size); 151 152 SmallVector<CallLowering::ArgInfo, 3> Args; 153 for (unsigned i = 1; i < MI.getNumOperands(); i++) 154 Args.push_back({MI.getOperand(i).getReg(), OpType}); 155 return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType}, 156 Args); 157 } 158 159 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, 160 Type *FromType) { 161 auto ToMVT = MVT::getVT(ToType); 162 auto FromMVT = MVT::getVT(FromType); 163 164 switch (Opcode) { 165 case TargetOpcode::G_FPEXT: 166 return RTLIB::getFPEXT(FromMVT, ToMVT); 167 case TargetOpcode::G_FPTRUNC: 168 return RTLIB::getFPROUND(FromMVT, ToMVT); 169 case TargetOpcode::G_FPTOSI: 170 return RTLIB::getFPTOSINT(FromMVT, ToMVT); 171 case TargetOpcode::G_FPTOUI: 172 return RTLIB::getFPTOUINT(FromMVT, ToMVT); 173 case TargetOpcode::G_SITOFP: 174 return RTLIB::getSINTTOFP(FromMVT, ToMVT); 175 case TargetOpcode::G_UITOFP: 176 return RTLIB::getUINTTOFP(FromMVT, ToMVT); 177 } 178 llvm_unreachable("Unsupported libcall function"); 179 } 180 181 static LegalizerHelper::LegalizeResult 182 conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, 183 Type *FromType) { 184 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType); 185 return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType}, 186 {{MI.getOperand(1).getReg(), FromType}}); 187 } 188 189 LegalizerHelper::LegalizeResult 190 LegalizerHelper::libcall(MachineInstr &MI) { 191 LLT LLTy = MRI.getType(MI.getOperand(0).getReg()); 192 unsigned Size = LLTy.getSizeInBits(); 193 auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); 194 195 MIRBuilder.setInstr(MI); 196 197 switch (MI.getOpcode()) { 198 default: 199 return UnableToLegalize; 200 case TargetOpcode::G_SDIV: 201 case TargetOpcode::G_UDIV: 202 case TargetOpcode::G_SREM: 203 case TargetOpcode::G_UREM: 204 case TargetOpcode::G_CTLZ_ZERO_UNDEF: { 205 Type *HLTy = IntegerType::get(Ctx, Size); 206 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); 207 if (Status != Legalized) 208 return Status; 209 break; 210 } 211 case TargetOpcode::G_FADD: 212 case TargetOpcode::G_FSUB: 213 case TargetOpcode::G_FMUL: 214 case TargetOpcode::G_FDIV: 215 case TargetOpcode::G_FMA: 216 case TargetOpcode::G_FPOW: 217 case TargetOpcode::G_FREM: { 218 Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx); 219 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); 220 if (Status != Legalized) 221 return Status; 222 break; 223 } 224 case TargetOpcode::G_FPEXT: { 225 // FIXME: Support other floating point types (half, fp128 etc) 226 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 227 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 228 if (ToSize != 64 || FromSize != 32) 229 return UnableToLegalize; 230 LegalizeResult Status = conversionLibcall( 231 MI, MIRBuilder, Type::getDoubleTy(Ctx), Type::getFloatTy(Ctx)); 232 if (Status != Legalized) 233 return Status; 234 break; 235 } 236 case TargetOpcode::G_FPTRUNC: { 237 // FIXME: Support other floating point types (half, fp128 etc) 238 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 239 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 240 if (ToSize != 32 || FromSize != 64) 241 return UnableToLegalize; 242 LegalizeResult Status = conversionLibcall( 243 MI, MIRBuilder, Type::getFloatTy(Ctx), Type::getDoubleTy(Ctx)); 244 if (Status != Legalized) 245 return Status; 246 break; 247 } 248 case TargetOpcode::G_FPTOSI: 249 case TargetOpcode::G_FPTOUI: { 250 // FIXME: Support other types 251 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 252 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 253 if (ToSize != 32 || (FromSize != 32 && FromSize != 64)) 254 return UnableToLegalize; 255 LegalizeResult Status = conversionLibcall( 256 MI, MIRBuilder, Type::getInt32Ty(Ctx), 257 FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx)); 258 if (Status != Legalized) 259 return Status; 260 break; 261 } 262 case TargetOpcode::G_SITOFP: 263 case TargetOpcode::G_UITOFP: { 264 // FIXME: Support other types 265 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 266 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 267 if (FromSize != 32 || (ToSize != 32 && ToSize != 64)) 268 return UnableToLegalize; 269 LegalizeResult Status = conversionLibcall( 270 MI, MIRBuilder, 271 ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx), 272 Type::getInt32Ty(Ctx)); 273 if (Status != Legalized) 274 return Status; 275 break; 276 } 277 } 278 279 MI.eraseFromParent(); 280 return Legalized; 281 } 282 283 LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, 284 unsigned TypeIdx, 285 LLT NarrowTy) { 286 // FIXME: Don't know how to handle secondary types yet. 287 if (TypeIdx != 0 && MI.getOpcode() != TargetOpcode::G_EXTRACT) 288 return UnableToLegalize; 289 290 MIRBuilder.setInstr(MI); 291 292 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 293 uint64_t NarrowSize = NarrowTy.getSizeInBits(); 294 295 switch (MI.getOpcode()) { 296 default: 297 return UnableToLegalize; 298 case TargetOpcode::G_IMPLICIT_DEF: { 299 // FIXME: add support for when SizeOp0 isn't an exact multiple of 300 // NarrowSize. 301 if (SizeOp0 % NarrowSize != 0) 302 return UnableToLegalize; 303 int NumParts = SizeOp0 / NarrowSize; 304 305 SmallVector<unsigned, 2> DstRegs; 306 for (int i = 0; i < NumParts; ++i) 307 DstRegs.push_back( 308 MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg()); 309 310 unsigned DstReg = MI.getOperand(0).getReg(); 311 if(MRI.getType(DstReg).isVector()) 312 MIRBuilder.buildBuildVector(DstReg, DstRegs); 313 else 314 MIRBuilder.buildMerge(DstReg, DstRegs); 315 MI.eraseFromParent(); 316 return Legalized; 317 } 318 case TargetOpcode::G_ADD: { 319 // FIXME: add support for when SizeOp0 isn't an exact multiple of 320 // NarrowSize. 321 if (SizeOp0 % NarrowSize != 0) 322 return UnableToLegalize; 323 // Expand in terms of carry-setting/consuming G_ADDE instructions. 324 int NumParts = SizeOp0 / NarrowTy.getSizeInBits(); 325 326 SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs; 327 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); 328 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); 329 330 unsigned CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1)); 331 MIRBuilder.buildConstant(CarryIn, 0); 332 333 for (int i = 0; i < NumParts; ++i) { 334 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 335 unsigned CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); 336 337 MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i], 338 Src2Regs[i], CarryIn); 339 340 DstRegs.push_back(DstReg); 341 CarryIn = CarryOut; 342 } 343 unsigned DstReg = MI.getOperand(0).getReg(); 344 if(MRI.getType(DstReg).isVector()) 345 MIRBuilder.buildBuildVector(DstReg, DstRegs); 346 else 347 MIRBuilder.buildMerge(DstReg, DstRegs); 348 MI.eraseFromParent(); 349 return Legalized; 350 } 351 case TargetOpcode::G_EXTRACT: { 352 if (TypeIdx != 1) 353 return UnableToLegalize; 354 355 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 356 // FIXME: add support for when SizeOp1 isn't an exact multiple of 357 // NarrowSize. 358 if (SizeOp1 % NarrowSize != 0) 359 return UnableToLegalize; 360 int NumParts = SizeOp1 / NarrowSize; 361 362 SmallVector<unsigned, 2> SrcRegs, DstRegs; 363 SmallVector<uint64_t, 2> Indexes; 364 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); 365 366 unsigned OpReg = MI.getOperand(0).getReg(); 367 uint64_t OpStart = MI.getOperand(2).getImm(); 368 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); 369 for (int i = 0; i < NumParts; ++i) { 370 unsigned SrcStart = i * NarrowSize; 371 372 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) { 373 // No part of the extract uses this subregister, ignore it. 374 continue; 375 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) { 376 // The entire subregister is extracted, forward the value. 377 DstRegs.push_back(SrcRegs[i]); 378 continue; 379 } 380 381 // OpSegStart is where this destination segment would start in OpReg if it 382 // extended infinitely in both directions. 383 int64_t ExtractOffset; 384 uint64_t SegSize; 385 if (OpStart < SrcStart) { 386 ExtractOffset = 0; 387 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart); 388 } else { 389 ExtractOffset = OpStart - SrcStart; 390 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize); 391 } 392 393 unsigned SegReg = SrcRegs[i]; 394 if (ExtractOffset != 0 || SegSize != NarrowSize) { 395 // A genuine extract is needed. 396 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); 397 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset); 398 } 399 400 DstRegs.push_back(SegReg); 401 } 402 403 unsigned DstReg = MI.getOperand(0).getReg(); 404 if(MRI.getType(DstReg).isVector()) 405 MIRBuilder.buildBuildVector(DstReg, DstRegs); 406 else 407 MIRBuilder.buildMerge(DstReg, DstRegs); 408 MI.eraseFromParent(); 409 return Legalized; 410 } 411 case TargetOpcode::G_INSERT: { 412 // FIXME: add support for when SizeOp0 isn't an exact multiple of 413 // NarrowSize. 414 if (SizeOp0 % NarrowSize != 0) 415 return UnableToLegalize; 416 417 int NumParts = SizeOp0 / NarrowSize; 418 419 SmallVector<unsigned, 2> SrcRegs, DstRegs; 420 SmallVector<uint64_t, 2> Indexes; 421 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); 422 423 unsigned OpReg = MI.getOperand(2).getReg(); 424 uint64_t OpStart = MI.getOperand(3).getImm(); 425 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); 426 for (int i = 0; i < NumParts; ++i) { 427 unsigned DstStart = i * NarrowSize; 428 429 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) { 430 // No part of the insert affects this subregister, forward the original. 431 DstRegs.push_back(SrcRegs[i]); 432 continue; 433 } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) { 434 // The entire subregister is defined by this insert, forward the new 435 // value. 436 DstRegs.push_back(OpReg); 437 continue; 438 } 439 440 // OpSegStart is where this destination segment would start in OpReg if it 441 // extended infinitely in both directions. 442 int64_t ExtractOffset, InsertOffset; 443 uint64_t SegSize; 444 if (OpStart < DstStart) { 445 InsertOffset = 0; 446 ExtractOffset = DstStart - OpStart; 447 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart); 448 } else { 449 InsertOffset = OpStart - DstStart; 450 ExtractOffset = 0; 451 SegSize = 452 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart); 453 } 454 455 unsigned SegReg = OpReg; 456 if (ExtractOffset != 0 || SegSize != OpSize) { 457 // A genuine extract is needed. 458 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); 459 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset); 460 } 461 462 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 463 MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset); 464 DstRegs.push_back(DstReg); 465 } 466 467 assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered"); 468 unsigned DstReg = MI.getOperand(0).getReg(); 469 if(MRI.getType(DstReg).isVector()) 470 MIRBuilder.buildBuildVector(DstReg, DstRegs); 471 else 472 MIRBuilder.buildMerge(DstReg, DstRegs); 473 MI.eraseFromParent(); 474 return Legalized; 475 } 476 case TargetOpcode::G_LOAD: { 477 // FIXME: add support for when SizeOp0 isn't an exact multiple of 478 // NarrowSize. 479 if (SizeOp0 % NarrowSize != 0) 480 return UnableToLegalize; 481 482 const auto &MMO = **MI.memoperands_begin(); 483 // This implementation doesn't work for atomics. Give up instead of doing 484 // something invalid. 485 if (MMO.getOrdering() != AtomicOrdering::NotAtomic || 486 MMO.getFailureOrdering() != AtomicOrdering::NotAtomic) 487 return UnableToLegalize; 488 489 int NumParts = SizeOp0 / NarrowSize; 490 LLT OffsetTy = LLT::scalar( 491 MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()); 492 493 SmallVector<unsigned, 2> DstRegs; 494 for (int i = 0; i < NumParts; ++i) { 495 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 496 unsigned SrcReg = 0; 497 unsigned Adjustment = i * NarrowSize / 8; 498 unsigned Alignment = MinAlign(MMO.getAlignment(), Adjustment); 499 500 MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand( 501 MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(), 502 NarrowSize / 8, Alignment, MMO.getAAInfo(), MMO.getRanges(), 503 MMO.getSyncScopeID(), MMO.getOrdering(), MMO.getFailureOrdering()); 504 505 MIRBuilder.materializeGEP(SrcReg, MI.getOperand(1).getReg(), OffsetTy, 506 Adjustment); 507 508 MIRBuilder.buildLoad(DstReg, SrcReg, *SplitMMO); 509 510 DstRegs.push_back(DstReg); 511 } 512 unsigned DstReg = MI.getOperand(0).getReg(); 513 if(MRI.getType(DstReg).isVector()) 514 MIRBuilder.buildBuildVector(DstReg, DstRegs); 515 else 516 MIRBuilder.buildMerge(DstReg, DstRegs); 517 MI.eraseFromParent(); 518 return Legalized; 519 } 520 case TargetOpcode::G_STORE: { 521 // FIXME: add support for when SizeOp0 isn't an exact multiple of 522 // NarrowSize. 523 if (SizeOp0 % NarrowSize != 0) 524 return UnableToLegalize; 525 526 const auto &MMO = **MI.memoperands_begin(); 527 // This implementation doesn't work for atomics. Give up instead of doing 528 // something invalid. 529 if (MMO.getOrdering() != AtomicOrdering::NotAtomic || 530 MMO.getFailureOrdering() != AtomicOrdering::NotAtomic) 531 return UnableToLegalize; 532 533 int NumParts = SizeOp0 / NarrowSize; 534 LLT OffsetTy = LLT::scalar( 535 MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()); 536 537 SmallVector<unsigned, 2> SrcRegs; 538 extractParts(MI.getOperand(0).getReg(), NarrowTy, NumParts, SrcRegs); 539 540 for (int i = 0; i < NumParts; ++i) { 541 unsigned DstReg = 0; 542 unsigned Adjustment = i * NarrowSize / 8; 543 unsigned Alignment = MinAlign(MMO.getAlignment(), Adjustment); 544 545 MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand( 546 MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(), 547 NarrowSize / 8, Alignment, MMO.getAAInfo(), MMO.getRanges(), 548 MMO.getSyncScopeID(), MMO.getOrdering(), MMO.getFailureOrdering()); 549 550 MIRBuilder.materializeGEP(DstReg, MI.getOperand(1).getReg(), OffsetTy, 551 Adjustment); 552 553 MIRBuilder.buildStore(SrcRegs[i], DstReg, *SplitMMO); 554 } 555 MI.eraseFromParent(); 556 return Legalized; 557 } 558 case TargetOpcode::G_CONSTANT: { 559 // FIXME: add support for when SizeOp0 isn't an exact multiple of 560 // NarrowSize. 561 if (SizeOp0 % NarrowSize != 0) 562 return UnableToLegalize; 563 int NumParts = SizeOp0 / NarrowSize; 564 const APInt &Cst = MI.getOperand(1).getCImm()->getValue(); 565 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 566 567 SmallVector<unsigned, 2> DstRegs; 568 for (int i = 0; i < NumParts; ++i) { 569 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 570 ConstantInt *CI = 571 ConstantInt::get(Ctx, Cst.lshr(NarrowSize * i).trunc(NarrowSize)); 572 MIRBuilder.buildConstant(DstReg, *CI); 573 DstRegs.push_back(DstReg); 574 } 575 unsigned DstReg = MI.getOperand(0).getReg(); 576 if(MRI.getType(DstReg).isVector()) 577 MIRBuilder.buildBuildVector(DstReg, DstRegs); 578 else 579 MIRBuilder.buildMerge(DstReg, DstRegs); 580 MI.eraseFromParent(); 581 return Legalized; 582 } 583 case TargetOpcode::G_AND: 584 case TargetOpcode::G_OR: 585 case TargetOpcode::G_XOR: { 586 // Legalize bitwise operation: 587 // A = BinOp<Ty> B, C 588 // into: 589 // B1, ..., BN = G_UNMERGE_VALUES B 590 // C1, ..., CN = G_UNMERGE_VALUES C 591 // A1 = BinOp<Ty/N> B1, C2 592 // ... 593 // AN = BinOp<Ty/N> BN, CN 594 // A = G_MERGE_VALUES A1, ..., AN 595 596 // FIXME: add support for when SizeOp0 isn't an exact multiple of 597 // NarrowSize. 598 if (SizeOp0 % NarrowSize != 0) 599 return UnableToLegalize; 600 int NumParts = SizeOp0 / NarrowSize; 601 602 // List the registers where the destination will be scattered. 603 SmallVector<unsigned, 2> DstRegs; 604 // List the registers where the first argument will be split. 605 SmallVector<unsigned, 2> SrcsReg1; 606 // List the registers where the second argument will be split. 607 SmallVector<unsigned, 2> SrcsReg2; 608 // Create all the temporary registers. 609 for (int i = 0; i < NumParts; ++i) { 610 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 611 unsigned SrcReg1 = MRI.createGenericVirtualRegister(NarrowTy); 612 unsigned SrcReg2 = MRI.createGenericVirtualRegister(NarrowTy); 613 614 DstRegs.push_back(DstReg); 615 SrcsReg1.push_back(SrcReg1); 616 SrcsReg2.push_back(SrcReg2); 617 } 618 // Explode the big arguments into smaller chunks. 619 MIRBuilder.buildUnmerge(SrcsReg1, MI.getOperand(1).getReg()); 620 MIRBuilder.buildUnmerge(SrcsReg2, MI.getOperand(2).getReg()); 621 622 // Do the operation on each small part. 623 for (int i = 0; i < NumParts; ++i) 624 MIRBuilder.buildInstr(MI.getOpcode(), {DstRegs[i]}, 625 {SrcsReg1[i], SrcsReg2[i]}); 626 627 // Gather the destination registers into the final destination. 628 unsigned DstReg = MI.getOperand(0).getReg(); 629 if(MRI.getType(DstReg).isVector()) 630 MIRBuilder.buildBuildVector(DstReg, DstRegs); 631 else 632 MIRBuilder.buildMerge(DstReg, DstRegs); 633 MI.eraseFromParent(); 634 return Legalized; 635 } 636 } 637 } 638 639 void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy, 640 unsigned OpIdx, unsigned ExtOpcode) { 641 MachineOperand &MO = MI.getOperand(OpIdx); 642 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO.getReg()}); 643 MO.setReg(ExtB->getOperand(0).getReg()); 644 } 645 646 void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy, 647 unsigned OpIdx, unsigned TruncOpcode) { 648 MachineOperand &MO = MI.getOperand(OpIdx); 649 unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); 650 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 651 MIRBuilder.buildInstr(TruncOpcode, {MO.getReg()}, {DstExt}); 652 MO.setReg(DstExt); 653 } 654 655 LegalizerHelper::LegalizeResult 656 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { 657 MIRBuilder.setInstr(MI); 658 659 switch (MI.getOpcode()) { 660 default: 661 return UnableToLegalize; 662 case TargetOpcode::G_UADDO: 663 case TargetOpcode::G_USUBO: { 664 if (TypeIdx == 1) 665 return UnableToLegalize; // TODO 666 auto LHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy}, 667 {MI.getOperand(2).getReg()}); 668 auto RHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy}, 669 {MI.getOperand(3).getReg()}); 670 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO 671 ? TargetOpcode::G_ADD 672 : TargetOpcode::G_SUB; 673 // Do the arithmetic in the larger type. 674 auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext}); 675 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg()); 676 APInt Mask = APInt::getAllOnesValue(OrigTy.getSizeInBits()); 677 auto AndOp = MIRBuilder.buildInstr( 678 TargetOpcode::G_AND, {WideTy}, 679 {NewOp, MIRBuilder.buildConstant(WideTy, Mask.getZExtValue())}); 680 // There is no overflow if the AndOp is the same as NewOp. 681 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1).getReg(), NewOp, 682 AndOp); 683 // Now trunc the NewOp to the original result. 684 MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), NewOp); 685 MI.eraseFromParent(); 686 return Legalized; 687 } 688 case TargetOpcode::G_CTTZ: 689 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 690 case TargetOpcode::G_CTLZ: 691 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 692 case TargetOpcode::G_CTPOP: { 693 // First ZEXT the input. 694 auto MIBSrc = MIRBuilder.buildZExt(WideTy, MI.getOperand(1).getReg()); 695 LLT CurTy = MRI.getType(MI.getOperand(0).getReg()); 696 if (MI.getOpcode() == TargetOpcode::G_CTTZ) { 697 // The count is the same in the larger type except if the original 698 // value was zero. This can be handled by setting the bit just off 699 // the top of the original type. 700 auto TopBit = 701 APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits()); 702 MIBSrc = MIRBuilder.buildInstr( 703 TargetOpcode::G_OR, {WideTy}, 704 {MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit.getSExtValue())}); 705 } 706 // Perform the operation at the larger size. 707 auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc}); 708 // This is already the correct result for CTPOP and CTTZs 709 if (MI.getOpcode() == TargetOpcode::G_CTLZ || 710 MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) { 711 // The correct result is NewOp - (Difference in widety and current ty). 712 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits(); 713 MIBNewOp = MIRBuilder.buildInstr( 714 TargetOpcode::G_SUB, {WideTy}, 715 {MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)}); 716 } 717 auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); 718 // Make the original instruction a trunc now, and update its source. 719 Observer.changingInstr(MI); 720 MI.setDesc(TII.get(TargetOpcode::G_TRUNC)); 721 MI.getOperand(1).setReg(MIBNewOp->getOperand(0).getReg()); 722 Observer.changedInstr(MI); 723 return Legalized; 724 } 725 726 case TargetOpcode::G_ADD: 727 case TargetOpcode::G_AND: 728 case TargetOpcode::G_MUL: 729 case TargetOpcode::G_OR: 730 case TargetOpcode::G_XOR: 731 case TargetOpcode::G_SUB: 732 // Perform operation at larger width (any extension is fine here, high bits 733 // don't affect the result) and then truncate the result back to the 734 // original type. 735 Observer.changingInstr(MI); 736 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 737 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); 738 widenScalarDst(MI, WideTy); 739 Observer.changedInstr(MI); 740 return Legalized; 741 742 case TargetOpcode::G_SHL: 743 Observer.changingInstr(MI); 744 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 745 // The "number of bits to shift" operand must preserve its value as an 746 // unsigned integer: 747 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 748 widenScalarDst(MI, WideTy); 749 Observer.changedInstr(MI); 750 return Legalized; 751 752 case TargetOpcode::G_SDIV: 753 case TargetOpcode::G_SREM: 754 Observer.changingInstr(MI); 755 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); 756 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 757 widenScalarDst(MI, WideTy); 758 Observer.changedInstr(MI); 759 return Legalized; 760 761 case TargetOpcode::G_ASHR: 762 Observer.changingInstr(MI); 763 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); 764 // The "number of bits to shift" operand must preserve its value as an 765 // unsigned integer: 766 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 767 widenScalarDst(MI, WideTy); 768 Observer.changedInstr(MI); 769 return Legalized; 770 771 case TargetOpcode::G_UDIV: 772 case TargetOpcode::G_UREM: 773 case TargetOpcode::G_LSHR: 774 Observer.changingInstr(MI); 775 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 776 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 777 widenScalarDst(MI, WideTy); 778 Observer.changedInstr(MI); 779 return Legalized; 780 781 case TargetOpcode::G_SELECT: 782 Observer.changingInstr(MI); 783 if (TypeIdx == 0) { 784 // Perform operation at larger width (any extension is fine here, high 785 // bits don't affect the result) and then truncate the result back to the 786 // original type. 787 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); 788 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT); 789 widenScalarDst(MI, WideTy); 790 } else { 791 // Explicit extension is required here since high bits affect the result. 792 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 793 } 794 Observer.changedInstr(MI); 795 return Legalized; 796 797 case TargetOpcode::G_FPTOSI: 798 case TargetOpcode::G_FPTOUI: 799 if (TypeIdx != 0) 800 return UnableToLegalize; 801 Observer.changingInstr(MI); 802 widenScalarDst(MI, WideTy); 803 Observer.changedInstr(MI); 804 return Legalized; 805 806 case TargetOpcode::G_SITOFP: 807 if (TypeIdx != 1) 808 return UnableToLegalize; 809 Observer.changingInstr(MI); 810 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); 811 Observer.changedInstr(MI); 812 return Legalized; 813 814 case TargetOpcode::G_UITOFP: 815 if (TypeIdx != 1) 816 return UnableToLegalize; 817 Observer.changingInstr(MI); 818 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 819 Observer.changedInstr(MI); 820 return Legalized; 821 822 case TargetOpcode::G_INSERT: 823 if (TypeIdx != 0) 824 return UnableToLegalize; 825 Observer.changingInstr(MI); 826 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 827 widenScalarDst(MI, WideTy); 828 Observer.changedInstr(MI); 829 return Legalized; 830 831 case TargetOpcode::G_LOAD: 832 // For some types like i24, we might try to widen to i32. To properly handle 833 // this we should be using a dedicated extending load, until then avoid 834 // trying to legalize. 835 if (alignTo(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(), 8) != 836 WideTy.getSizeInBits()) 837 return UnableToLegalize; 838 LLVM_FALLTHROUGH; 839 case TargetOpcode::G_SEXTLOAD: 840 case TargetOpcode::G_ZEXTLOAD: 841 Observer.changingInstr(MI); 842 widenScalarDst(MI, WideTy); 843 Observer.changedInstr(MI); 844 return Legalized; 845 846 case TargetOpcode::G_STORE: { 847 if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(1) || 848 WideTy != LLT::scalar(8)) 849 return UnableToLegalize; 850 851 Observer.changingInstr(MI); 852 widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ZEXT); 853 Observer.changedInstr(MI); 854 return Legalized; 855 } 856 case TargetOpcode::G_CONSTANT: { 857 MachineOperand &SrcMO = MI.getOperand(1); 858 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 859 const APInt &Val = SrcMO.getCImm()->getValue().sext(WideTy.getSizeInBits()); 860 Observer.changingInstr(MI); 861 SrcMO.setCImm(ConstantInt::get(Ctx, Val)); 862 863 widenScalarDst(MI, WideTy); 864 Observer.changedInstr(MI); 865 return Legalized; 866 } 867 case TargetOpcode::G_FCONSTANT: { 868 MachineOperand &SrcMO = MI.getOperand(1); 869 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 870 APFloat Val = SrcMO.getFPImm()->getValueAPF(); 871 bool LosesInfo; 872 switch (WideTy.getSizeInBits()) { 873 case 32: 874 Val.convert(APFloat::IEEEsingle(), APFloat::rmTowardZero, &LosesInfo); 875 break; 876 case 64: 877 Val.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &LosesInfo); 878 break; 879 default: 880 llvm_unreachable("Unhandled fp widen type"); 881 } 882 Observer.changingInstr(MI); 883 SrcMO.setFPImm(ConstantFP::get(Ctx, Val)); 884 885 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); 886 Observer.changedInstr(MI); 887 return Legalized; 888 } 889 case TargetOpcode::G_IMPLICIT_DEF: { 890 Observer.changingInstr(MI); 891 widenScalarDst(MI, WideTy); 892 Observer.changedInstr(MI); 893 return Legalized; 894 } 895 case TargetOpcode::G_BRCOND: 896 Observer.changingInstr(MI); 897 widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT); 898 Observer.changedInstr(MI); 899 return Legalized; 900 901 case TargetOpcode::G_FCMP: 902 Observer.changingInstr(MI); 903 if (TypeIdx == 0) 904 widenScalarDst(MI, WideTy); 905 else { 906 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT); 907 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT); 908 } 909 Observer.changedInstr(MI); 910 return Legalized; 911 912 case TargetOpcode::G_ICMP: 913 Observer.changingInstr(MI); 914 if (TypeIdx == 0) 915 widenScalarDst(MI, WideTy); 916 else { 917 unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>( 918 MI.getOperand(1).getPredicate())) 919 ? TargetOpcode::G_SEXT 920 : TargetOpcode::G_ZEXT; 921 widenScalarSrc(MI, WideTy, 2, ExtOpcode); 922 widenScalarSrc(MI, WideTy, 3, ExtOpcode); 923 } 924 Observer.changedInstr(MI); 925 return Legalized; 926 927 case TargetOpcode::G_GEP: 928 assert(TypeIdx == 1 && "unable to legalize pointer of GEP"); 929 Observer.changingInstr(MI); 930 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 931 Observer.changedInstr(MI); 932 return Legalized; 933 934 case TargetOpcode::G_PHI: { 935 assert(TypeIdx == 0 && "Expecting only Idx 0"); 936 937 Observer.changingInstr(MI); 938 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) { 939 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB(); 940 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); 941 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT); 942 } 943 944 MachineBasicBlock &MBB = *MI.getParent(); 945 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI()); 946 widenScalarDst(MI, WideTy); 947 Observer.changedInstr(MI); 948 return Legalized; 949 } 950 case TargetOpcode::G_EXTRACT_VECTOR_ELT: 951 if (TypeIdx != 2) 952 return UnableToLegalize; 953 Observer.changingInstr(MI); 954 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 955 Observer.changedInstr(MI); 956 return Legalized; 957 958 case TargetOpcode::G_FCEIL: 959 if (TypeIdx != 0) 960 return UnableToLegalize; 961 Observer.changingInstr(MI); 962 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT); 963 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); 964 Observer.changedInstr(MI); 965 return Legalized; 966 } 967 } 968 969 LegalizerHelper::LegalizeResult 970 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { 971 using namespace TargetOpcode; 972 MIRBuilder.setInstr(MI); 973 974 switch(MI.getOpcode()) { 975 default: 976 return UnableToLegalize; 977 case TargetOpcode::G_SREM: 978 case TargetOpcode::G_UREM: { 979 unsigned QuotReg = MRI.createGenericVirtualRegister(Ty); 980 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV) 981 .addDef(QuotReg) 982 .addUse(MI.getOperand(1).getReg()) 983 .addUse(MI.getOperand(2).getReg()); 984 985 unsigned ProdReg = MRI.createGenericVirtualRegister(Ty); 986 MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg()); 987 MIRBuilder.buildSub(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(), 988 ProdReg); 989 MI.eraseFromParent(); 990 return Legalized; 991 } 992 case TargetOpcode::G_SMULO: 993 case TargetOpcode::G_UMULO: { 994 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the 995 // result. 996 unsigned Res = MI.getOperand(0).getReg(); 997 unsigned Overflow = MI.getOperand(1).getReg(); 998 unsigned LHS = MI.getOperand(2).getReg(); 999 unsigned RHS = MI.getOperand(3).getReg(); 1000 1001 MIRBuilder.buildMul(Res, LHS, RHS); 1002 1003 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO 1004 ? TargetOpcode::G_SMULH 1005 : TargetOpcode::G_UMULH; 1006 1007 unsigned HiPart = MRI.createGenericVirtualRegister(Ty); 1008 MIRBuilder.buildInstr(Opcode) 1009 .addDef(HiPart) 1010 .addUse(LHS) 1011 .addUse(RHS); 1012 1013 unsigned Zero = MRI.createGenericVirtualRegister(Ty); 1014 MIRBuilder.buildConstant(Zero, 0); 1015 1016 // For *signed* multiply, overflow is detected by checking: 1017 // (hi != (lo >> bitwidth-1)) 1018 if (Opcode == TargetOpcode::G_SMULH) { 1019 unsigned Shifted = MRI.createGenericVirtualRegister(Ty); 1020 unsigned ShiftAmt = MRI.createGenericVirtualRegister(Ty); 1021 MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1); 1022 MIRBuilder.buildInstr(TargetOpcode::G_ASHR) 1023 .addDef(Shifted) 1024 .addUse(Res) 1025 .addUse(ShiftAmt); 1026 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted); 1027 } else { 1028 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero); 1029 } 1030 MI.eraseFromParent(); 1031 return Legalized; 1032 } 1033 case TargetOpcode::G_FNEG: { 1034 // TODO: Handle vector types once we are able to 1035 // represent them. 1036 if (Ty.isVector()) 1037 return UnableToLegalize; 1038 unsigned Res = MI.getOperand(0).getReg(); 1039 Type *ZeroTy; 1040 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 1041 switch (Ty.getSizeInBits()) { 1042 case 16: 1043 ZeroTy = Type::getHalfTy(Ctx); 1044 break; 1045 case 32: 1046 ZeroTy = Type::getFloatTy(Ctx); 1047 break; 1048 case 64: 1049 ZeroTy = Type::getDoubleTy(Ctx); 1050 break; 1051 case 128: 1052 ZeroTy = Type::getFP128Ty(Ctx); 1053 break; 1054 default: 1055 llvm_unreachable("unexpected floating-point type"); 1056 } 1057 ConstantFP &ZeroForNegation = 1058 *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy)); 1059 auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation); 1060 MIRBuilder.buildInstr(TargetOpcode::G_FSUB) 1061 .addDef(Res) 1062 .addUse(Zero->getOperand(0).getReg()) 1063 .addUse(MI.getOperand(1).getReg()); 1064 MI.eraseFromParent(); 1065 return Legalized; 1066 } 1067 case TargetOpcode::G_FSUB: { 1068 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)). 1069 // First, check if G_FNEG is marked as Lower. If so, we may 1070 // end up with an infinite loop as G_FSUB is used to legalize G_FNEG. 1071 if (LI.getAction({G_FNEG, {Ty}}).Action == Lower) 1072 return UnableToLegalize; 1073 unsigned Res = MI.getOperand(0).getReg(); 1074 unsigned LHS = MI.getOperand(1).getReg(); 1075 unsigned RHS = MI.getOperand(2).getReg(); 1076 unsigned Neg = MRI.createGenericVirtualRegister(Ty); 1077 MIRBuilder.buildInstr(TargetOpcode::G_FNEG).addDef(Neg).addUse(RHS); 1078 MIRBuilder.buildInstr(TargetOpcode::G_FADD) 1079 .addDef(Res) 1080 .addUse(LHS) 1081 .addUse(Neg); 1082 MI.eraseFromParent(); 1083 return Legalized; 1084 } 1085 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: { 1086 unsigned OldValRes = MI.getOperand(0).getReg(); 1087 unsigned SuccessRes = MI.getOperand(1).getReg(); 1088 unsigned Addr = MI.getOperand(2).getReg(); 1089 unsigned CmpVal = MI.getOperand(3).getReg(); 1090 unsigned NewVal = MI.getOperand(4).getReg(); 1091 MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal, 1092 **MI.memoperands_begin()); 1093 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal); 1094 MI.eraseFromParent(); 1095 return Legalized; 1096 } 1097 case TargetOpcode::G_LOAD: 1098 case TargetOpcode::G_SEXTLOAD: 1099 case TargetOpcode::G_ZEXTLOAD: { 1100 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT 1101 unsigned DstReg = MI.getOperand(0).getReg(); 1102 unsigned PtrReg = MI.getOperand(1).getReg(); 1103 LLT DstTy = MRI.getType(DstReg); 1104 auto &MMO = **MI.memoperands_begin(); 1105 1106 if (DstTy.getSizeInBits() == MMO.getSize() /* in bytes */ * 8) { 1107 // In the case of G_LOAD, this was a non-extending load already and we're 1108 // about to lower to the same instruction. 1109 if (MI.getOpcode() == TargetOpcode::G_LOAD) 1110 return UnableToLegalize; 1111 MIRBuilder.buildLoad(DstReg, PtrReg, MMO); 1112 MI.eraseFromParent(); 1113 return Legalized; 1114 } 1115 1116 if (DstTy.isScalar()) { 1117 unsigned TmpReg = MRI.createGenericVirtualRegister( 1118 LLT::scalar(MMO.getSize() /* in bytes */ * 8)); 1119 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); 1120 switch (MI.getOpcode()) { 1121 default: 1122 llvm_unreachable("Unexpected opcode"); 1123 case TargetOpcode::G_LOAD: 1124 MIRBuilder.buildAnyExt(DstReg, TmpReg); 1125 break; 1126 case TargetOpcode::G_SEXTLOAD: 1127 MIRBuilder.buildSExt(DstReg, TmpReg); 1128 break; 1129 case TargetOpcode::G_ZEXTLOAD: 1130 MIRBuilder.buildZExt(DstReg, TmpReg); 1131 break; 1132 } 1133 MI.eraseFromParent(); 1134 return Legalized; 1135 } 1136 1137 return UnableToLegalize; 1138 } 1139 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 1140 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 1141 case TargetOpcode::G_CTLZ: 1142 case TargetOpcode::G_CTTZ: 1143 case TargetOpcode::G_CTPOP: 1144 return lowerBitCount(MI, TypeIdx, Ty); 1145 case G_UADDE: { 1146 unsigned Res = MI.getOperand(0).getReg(); 1147 unsigned CarryOut = MI.getOperand(1).getReg(); 1148 unsigned LHS = MI.getOperand(2).getReg(); 1149 unsigned RHS = MI.getOperand(3).getReg(); 1150 unsigned CarryIn = MI.getOperand(4).getReg(); 1151 1152 unsigned TmpRes = MRI.createGenericVirtualRegister(Ty); 1153 unsigned ZExtCarryIn = MRI.createGenericVirtualRegister(Ty); 1154 1155 MIRBuilder.buildAdd(TmpRes, LHS, RHS); 1156 MIRBuilder.buildZExt(ZExtCarryIn, CarryIn); 1157 MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn); 1158 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS); 1159 1160 MI.eraseFromParent(); 1161 return Legalized; 1162 } 1163 } 1164 } 1165 1166 LegalizerHelper::LegalizeResult 1167 LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, 1168 LLT NarrowTy) { 1169 // FIXME: Don't know how to handle secondary types yet. 1170 if (TypeIdx != 0) 1171 return UnableToLegalize; 1172 1173 MIRBuilder.setInstr(MI); 1174 switch (MI.getOpcode()) { 1175 default: 1176 return UnableToLegalize; 1177 case TargetOpcode::G_IMPLICIT_DEF: { 1178 SmallVector<unsigned, 2> DstRegs; 1179 1180 unsigned NarrowSize = NarrowTy.getSizeInBits(); 1181 unsigned DstReg = MI.getOperand(0).getReg(); 1182 unsigned Size = MRI.getType(DstReg).getSizeInBits(); 1183 int NumParts = Size / NarrowSize; 1184 // FIXME: Don't know how to handle the situation where the small vectors 1185 // aren't all the same size yet. 1186 if (Size % NarrowSize != 0) 1187 return UnableToLegalize; 1188 1189 for (int i = 0; i < NumParts; ++i) { 1190 unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy); 1191 MIRBuilder.buildUndef(TmpReg); 1192 DstRegs.push_back(TmpReg); 1193 } 1194 1195 if (NarrowTy.isVector()) 1196 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 1197 else 1198 MIRBuilder.buildBuildVector(DstReg, DstRegs); 1199 1200 MI.eraseFromParent(); 1201 return Legalized; 1202 } 1203 case TargetOpcode::G_ADD: { 1204 unsigned NarrowSize = NarrowTy.getSizeInBits(); 1205 unsigned DstReg = MI.getOperand(0).getReg(); 1206 unsigned Size = MRI.getType(DstReg).getSizeInBits(); 1207 int NumParts = Size / NarrowSize; 1208 // FIXME: Don't know how to handle the situation where the small vectors 1209 // aren't all the same size yet. 1210 if (Size % NarrowSize != 0) 1211 return UnableToLegalize; 1212 1213 SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs; 1214 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); 1215 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); 1216 1217 for (int i = 0; i < NumParts; ++i) { 1218 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 1219 MIRBuilder.buildAdd(DstReg, Src1Regs[i], Src2Regs[i]); 1220 DstRegs.push_back(DstReg); 1221 } 1222 1223 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 1224 MI.eraseFromParent(); 1225 return Legalized; 1226 } 1227 case TargetOpcode::G_LOAD: 1228 case TargetOpcode::G_STORE: { 1229 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD; 1230 unsigned ValReg = MI.getOperand(0).getReg(); 1231 unsigned AddrReg = MI.getOperand(1).getReg(); 1232 unsigned NarrowSize = NarrowTy.getSizeInBits(); 1233 unsigned Size = MRI.getType(ValReg).getSizeInBits(); 1234 unsigned NumParts = Size / NarrowSize; 1235 1236 SmallVector<unsigned, 8> NarrowRegs; 1237 if (!IsLoad) 1238 extractParts(ValReg, NarrowTy, NumParts, NarrowRegs); 1239 1240 const LLT OffsetTy = 1241 LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits()); 1242 MachineFunction &MF = *MI.getMF(); 1243 MachineMemOperand *MMO = *MI.memoperands_begin(); 1244 for (unsigned Idx = 0; Idx < NumParts; ++Idx) { 1245 unsigned Adjustment = Idx * NarrowTy.getSizeInBits() / 8; 1246 unsigned Alignment = MinAlign(MMO->getAlignment(), Adjustment); 1247 unsigned NewAddrReg = 0; 1248 MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, Adjustment); 1249 MachineMemOperand &NewMMO = *MF.getMachineMemOperand( 1250 MMO->getPointerInfo().getWithOffset(Adjustment), MMO->getFlags(), 1251 NarrowTy.getSizeInBits() / 8, Alignment); 1252 if (IsLoad) { 1253 unsigned Dst = MRI.createGenericVirtualRegister(NarrowTy); 1254 NarrowRegs.push_back(Dst); 1255 MIRBuilder.buildLoad(Dst, NewAddrReg, NewMMO); 1256 } else { 1257 MIRBuilder.buildStore(NarrowRegs[Idx], NewAddrReg, NewMMO); 1258 } 1259 } 1260 if (IsLoad) { 1261 if (NarrowTy.isVector()) 1262 MIRBuilder.buildConcatVectors(ValReg, NarrowRegs); 1263 else 1264 MIRBuilder.buildBuildVector(ValReg, NarrowRegs); 1265 } 1266 MI.eraseFromParent(); 1267 return Legalized; 1268 } 1269 } 1270 } 1271 1272 LegalizerHelper::LegalizeResult 1273 LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { 1274 unsigned Opc = MI.getOpcode(); 1275 auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); 1276 auto isSupported = [this](const LegalityQuery &Q) { 1277 auto QAction = LI.getAction(Q).Action; 1278 return QAction == Legal || QAction == Libcall || QAction == Custom; 1279 }; 1280 switch (Opc) { 1281 default: 1282 return UnableToLegalize; 1283 case TargetOpcode::G_CTLZ_ZERO_UNDEF: { 1284 // This trivially expands to CTLZ. 1285 Observer.changingInstr(MI); 1286 MI.setDesc(TII.get(TargetOpcode::G_CTLZ)); 1287 Observer.changedInstr(MI); 1288 return Legalized; 1289 } 1290 case TargetOpcode::G_CTLZ: { 1291 unsigned SrcReg = MI.getOperand(1).getReg(); 1292 unsigned Len = Ty.getSizeInBits(); 1293 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty}})) { 1294 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero. 1295 auto MIBCtlzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF, 1296 {Ty}, {SrcReg}); 1297 auto MIBZero = MIRBuilder.buildConstant(Ty, 0); 1298 auto MIBLen = MIRBuilder.buildConstant(Ty, Len); 1299 auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), 1300 SrcReg, MIBZero); 1301 MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen, 1302 MIBCtlzZU); 1303 MI.eraseFromParent(); 1304 return Legalized; 1305 } 1306 // for now, we do this: 1307 // NewLen = NextPowerOf2(Len); 1308 // x = x | (x >> 1); 1309 // x = x | (x >> 2); 1310 // ... 1311 // x = x | (x >>16); 1312 // x = x | (x >>32); // for 64-bit input 1313 // Upto NewLen/2 1314 // return Len - popcount(x); 1315 // 1316 // Ref: "Hacker's Delight" by Henry Warren 1317 unsigned Op = SrcReg; 1318 unsigned NewLen = PowerOf2Ceil(Len); 1319 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) { 1320 auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i); 1321 auto MIBOp = MIRBuilder.buildInstr( 1322 TargetOpcode::G_OR, {Ty}, 1323 {Op, MIRBuilder.buildInstr(TargetOpcode::G_LSHR, {Ty}, 1324 {Op, MIBShiftAmt})}); 1325 Op = MIBOp->getOperand(0).getReg(); 1326 } 1327 auto MIBPop = MIRBuilder.buildInstr(TargetOpcode::G_CTPOP, {Ty}, {Op}); 1328 MIRBuilder.buildInstr(TargetOpcode::G_SUB, {MI.getOperand(0).getReg()}, 1329 {MIRBuilder.buildConstant(Ty, Len), MIBPop}); 1330 MI.eraseFromParent(); 1331 return Legalized; 1332 } 1333 case TargetOpcode::G_CTTZ_ZERO_UNDEF: { 1334 // This trivially expands to CTTZ. 1335 Observer.changingInstr(MI); 1336 MI.setDesc(TII.get(TargetOpcode::G_CTTZ)); 1337 Observer.changedInstr(MI); 1338 return Legalized; 1339 } 1340 case TargetOpcode::G_CTTZ: { 1341 unsigned SrcReg = MI.getOperand(1).getReg(); 1342 unsigned Len = Ty.getSizeInBits(); 1343 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty}})) { 1344 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with 1345 // zero. 1346 auto MIBCttzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF, 1347 {Ty}, {SrcReg}); 1348 auto MIBZero = MIRBuilder.buildConstant(Ty, 0); 1349 auto MIBLen = MIRBuilder.buildConstant(Ty, Len); 1350 auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), 1351 SrcReg, MIBZero); 1352 MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen, 1353 MIBCttzZU); 1354 MI.eraseFromParent(); 1355 return Legalized; 1356 } 1357 // for now, we use: { return popcount(~x & (x - 1)); } 1358 // unless the target has ctlz but not ctpop, in which case we use: 1359 // { return 32 - nlz(~x & (x-1)); } 1360 // Ref: "Hacker's Delight" by Henry Warren 1361 auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1); 1362 auto MIBNot = 1363 MIRBuilder.buildInstr(TargetOpcode::G_XOR, {Ty}, {SrcReg, MIBCstNeg1}); 1364 auto MIBTmp = MIRBuilder.buildInstr( 1365 TargetOpcode::G_AND, {Ty}, 1366 {MIBNot, MIRBuilder.buildInstr(TargetOpcode::G_ADD, {Ty}, 1367 {SrcReg, MIBCstNeg1})}); 1368 if (!isSupported({TargetOpcode::G_CTPOP, {Ty}}) && 1369 isSupported({TargetOpcode::G_CTLZ, {Ty}})) { 1370 auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len); 1371 MIRBuilder.buildInstr( 1372 TargetOpcode::G_SUB, {MI.getOperand(0).getReg()}, 1373 {MIBCstLen, 1374 MIRBuilder.buildInstr(TargetOpcode::G_CTLZ, {Ty}, {MIBTmp})}); 1375 MI.eraseFromParent(); 1376 return Legalized; 1377 } 1378 MI.setDesc(TII.get(TargetOpcode::G_CTPOP)); 1379 MI.getOperand(1).setReg(MIBTmp->getOperand(0).getReg()); 1380 return Legalized; 1381 } 1382 } 1383 } 1384