1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file This file implements the LegalizerHelper class to legalize 10 /// individual instructions and the LegalizeMachineIR wrapper pass for the 11 /// primary legalization. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" 16 #include "llvm/CodeGen/GlobalISel/CallLowering.h" 17 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" 18 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" 19 #include "llvm/CodeGen/MachineRegisterInfo.h" 20 #include "llvm/CodeGen/TargetInstrInfo.h" 21 #include "llvm/CodeGen/TargetLowering.h" 22 #include "llvm/CodeGen/TargetSubtargetInfo.h" 23 #include "llvm/Support/Debug.h" 24 #include "llvm/Support/MathExtras.h" 25 #include "llvm/Support/raw_ostream.h" 26 27 #define DEBUG_TYPE "legalizer" 28 29 using namespace llvm; 30 using namespace LegalizeActions; 31 32 LegalizerHelper::LegalizerHelper(MachineFunction &MF, 33 GISelChangeObserver &Observer, 34 MachineIRBuilder &Builder) 35 : MIRBuilder(Builder), MRI(MF.getRegInfo()), 36 LI(*MF.getSubtarget().getLegalizerInfo()), Observer(Observer) { 37 MIRBuilder.setMF(MF); 38 MIRBuilder.setChangeObserver(Observer); 39 } 40 41 LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI, 42 GISelChangeObserver &Observer, 43 MachineIRBuilder &B) 44 : MIRBuilder(B), MRI(MF.getRegInfo()), LI(LI), Observer(Observer) { 45 MIRBuilder.setMF(MF); 46 MIRBuilder.setChangeObserver(Observer); 47 } 48 LegalizerHelper::LegalizeResult 49 LegalizerHelper::legalizeInstrStep(MachineInstr &MI) { 50 LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs())); 51 52 auto Step = LI.getAction(MI, MRI); 53 switch (Step.Action) { 54 case Legal: 55 LLVM_DEBUG(dbgs() << ".. Already legal\n"); 56 return AlreadyLegal; 57 case Libcall: 58 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n"); 59 return libcall(MI); 60 case NarrowScalar: 61 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n"); 62 return narrowScalar(MI, Step.TypeIdx, Step.NewType); 63 case WidenScalar: 64 LLVM_DEBUG(dbgs() << ".. Widen scalar\n"); 65 return widenScalar(MI, Step.TypeIdx, Step.NewType); 66 case Lower: 67 LLVM_DEBUG(dbgs() << ".. Lower\n"); 68 return lower(MI, Step.TypeIdx, Step.NewType); 69 case FewerElements: 70 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n"); 71 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType); 72 case Custom: 73 LLVM_DEBUG(dbgs() << ".. Custom legalization\n"); 74 return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized 75 : UnableToLegalize; 76 default: 77 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n"); 78 return UnableToLegalize; 79 } 80 } 81 82 void LegalizerHelper::extractParts(unsigned Reg, LLT Ty, int NumParts, 83 SmallVectorImpl<unsigned> &VRegs) { 84 for (int i = 0; i < NumParts; ++i) 85 VRegs.push_back(MRI.createGenericVirtualRegister(Ty)); 86 MIRBuilder.buildUnmerge(VRegs, Reg); 87 } 88 89 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { 90 switch (Opcode) { 91 case TargetOpcode::G_SDIV: 92 assert((Size == 32 || Size == 64) && "Unsupported size"); 93 return Size == 64 ? RTLIB::SDIV_I64 : RTLIB::SDIV_I32; 94 case TargetOpcode::G_UDIV: 95 assert((Size == 32 || Size == 64) && "Unsupported size"); 96 return Size == 64 ? RTLIB::UDIV_I64 : RTLIB::UDIV_I32; 97 case TargetOpcode::G_SREM: 98 assert((Size == 32 || Size == 64) && "Unsupported size"); 99 return Size == 64 ? RTLIB::SREM_I64 : RTLIB::SREM_I32; 100 case TargetOpcode::G_UREM: 101 assert((Size == 32 || Size == 64) && "Unsupported size"); 102 return Size == 64 ? RTLIB::UREM_I64 : RTLIB::UREM_I32; 103 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 104 assert(Size == 32 && "Unsupported size"); 105 return RTLIB::CTLZ_I32; 106 case TargetOpcode::G_FADD: 107 assert((Size == 32 || Size == 64) && "Unsupported size"); 108 return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32; 109 case TargetOpcode::G_FSUB: 110 assert((Size == 32 || Size == 64) && "Unsupported size"); 111 return Size == 64 ? RTLIB::SUB_F64 : RTLIB::SUB_F32; 112 case TargetOpcode::G_FMUL: 113 assert((Size == 32 || Size == 64) && "Unsupported size"); 114 return Size == 64 ? RTLIB::MUL_F64 : RTLIB::MUL_F32; 115 case TargetOpcode::G_FDIV: 116 assert((Size == 32 || Size == 64) && "Unsupported size"); 117 return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32; 118 case TargetOpcode::G_FREM: 119 return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32; 120 case TargetOpcode::G_FPOW: 121 return Size == 64 ? RTLIB::POW_F64 : RTLIB::POW_F32; 122 case TargetOpcode::G_FMA: 123 assert((Size == 32 || Size == 64) && "Unsupported size"); 124 return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32; 125 } 126 llvm_unreachable("Unknown libcall function"); 127 } 128 129 LegalizerHelper::LegalizeResult 130 llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, 131 const CallLowering::ArgInfo &Result, 132 ArrayRef<CallLowering::ArgInfo> Args) { 133 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); 134 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); 135 const char *Name = TLI.getLibcallName(Libcall); 136 137 MIRBuilder.getMF().getFrameInfo().setHasCalls(true); 138 if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall), 139 MachineOperand::CreateES(Name), Result, Args)) 140 return LegalizerHelper::UnableToLegalize; 141 142 return LegalizerHelper::Legalized; 143 } 144 145 // Useful for libcalls where all operands have the same type. 146 static LegalizerHelper::LegalizeResult 147 simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, 148 Type *OpType) { 149 auto Libcall = getRTLibDesc(MI.getOpcode(), Size); 150 151 SmallVector<CallLowering::ArgInfo, 3> Args; 152 for (unsigned i = 1; i < MI.getNumOperands(); i++) 153 Args.push_back({MI.getOperand(i).getReg(), OpType}); 154 return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType}, 155 Args); 156 } 157 158 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, 159 Type *FromType) { 160 auto ToMVT = MVT::getVT(ToType); 161 auto FromMVT = MVT::getVT(FromType); 162 163 switch (Opcode) { 164 case TargetOpcode::G_FPEXT: 165 return RTLIB::getFPEXT(FromMVT, ToMVT); 166 case TargetOpcode::G_FPTRUNC: 167 return RTLIB::getFPROUND(FromMVT, ToMVT); 168 case TargetOpcode::G_FPTOSI: 169 return RTLIB::getFPTOSINT(FromMVT, ToMVT); 170 case TargetOpcode::G_FPTOUI: 171 return RTLIB::getFPTOUINT(FromMVT, ToMVT); 172 case TargetOpcode::G_SITOFP: 173 return RTLIB::getSINTTOFP(FromMVT, ToMVT); 174 case TargetOpcode::G_UITOFP: 175 return RTLIB::getUINTTOFP(FromMVT, ToMVT); 176 } 177 llvm_unreachable("Unsupported libcall function"); 178 } 179 180 static LegalizerHelper::LegalizeResult 181 conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, 182 Type *FromType) { 183 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType); 184 return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType}, 185 {{MI.getOperand(1).getReg(), FromType}}); 186 } 187 188 LegalizerHelper::LegalizeResult 189 LegalizerHelper::libcall(MachineInstr &MI) { 190 LLT LLTy = MRI.getType(MI.getOperand(0).getReg()); 191 unsigned Size = LLTy.getSizeInBits(); 192 auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); 193 194 MIRBuilder.setInstr(MI); 195 196 switch (MI.getOpcode()) { 197 default: 198 return UnableToLegalize; 199 case TargetOpcode::G_SDIV: 200 case TargetOpcode::G_UDIV: 201 case TargetOpcode::G_SREM: 202 case TargetOpcode::G_UREM: 203 case TargetOpcode::G_CTLZ_ZERO_UNDEF: { 204 Type *HLTy = IntegerType::get(Ctx, Size); 205 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); 206 if (Status != Legalized) 207 return Status; 208 break; 209 } 210 case TargetOpcode::G_FADD: 211 case TargetOpcode::G_FSUB: 212 case TargetOpcode::G_FMUL: 213 case TargetOpcode::G_FDIV: 214 case TargetOpcode::G_FMA: 215 case TargetOpcode::G_FPOW: 216 case TargetOpcode::G_FREM: { 217 Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx); 218 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); 219 if (Status != Legalized) 220 return Status; 221 break; 222 } 223 case TargetOpcode::G_FPEXT: { 224 // FIXME: Support other floating point types (half, fp128 etc) 225 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 226 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 227 if (ToSize != 64 || FromSize != 32) 228 return UnableToLegalize; 229 LegalizeResult Status = conversionLibcall( 230 MI, MIRBuilder, Type::getDoubleTy(Ctx), Type::getFloatTy(Ctx)); 231 if (Status != Legalized) 232 return Status; 233 break; 234 } 235 case TargetOpcode::G_FPTRUNC: { 236 // FIXME: Support other floating point types (half, fp128 etc) 237 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 238 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 239 if (ToSize != 32 || FromSize != 64) 240 return UnableToLegalize; 241 LegalizeResult Status = conversionLibcall( 242 MI, MIRBuilder, Type::getFloatTy(Ctx), Type::getDoubleTy(Ctx)); 243 if (Status != Legalized) 244 return Status; 245 break; 246 } 247 case TargetOpcode::G_FPTOSI: 248 case TargetOpcode::G_FPTOUI: { 249 // FIXME: Support other types 250 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 251 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 252 if (ToSize != 32 || (FromSize != 32 && FromSize != 64)) 253 return UnableToLegalize; 254 LegalizeResult Status = conversionLibcall( 255 MI, MIRBuilder, Type::getInt32Ty(Ctx), 256 FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx)); 257 if (Status != Legalized) 258 return Status; 259 break; 260 } 261 case TargetOpcode::G_SITOFP: 262 case TargetOpcode::G_UITOFP: { 263 // FIXME: Support other types 264 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 265 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 266 if (FromSize != 32 || (ToSize != 32 && ToSize != 64)) 267 return UnableToLegalize; 268 LegalizeResult Status = conversionLibcall( 269 MI, MIRBuilder, 270 ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx), 271 Type::getInt32Ty(Ctx)); 272 if (Status != Legalized) 273 return Status; 274 break; 275 } 276 } 277 278 MI.eraseFromParent(); 279 return Legalized; 280 } 281 282 LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, 283 unsigned TypeIdx, 284 LLT NarrowTy) { 285 // FIXME: Don't know how to handle secondary types yet. 286 if (TypeIdx != 0 && MI.getOpcode() != TargetOpcode::G_EXTRACT) 287 return UnableToLegalize; 288 289 MIRBuilder.setInstr(MI); 290 291 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 292 uint64_t NarrowSize = NarrowTy.getSizeInBits(); 293 294 switch (MI.getOpcode()) { 295 default: 296 return UnableToLegalize; 297 case TargetOpcode::G_IMPLICIT_DEF: { 298 // FIXME: add support for when SizeOp0 isn't an exact multiple of 299 // NarrowSize. 300 if (SizeOp0 % NarrowSize != 0) 301 return UnableToLegalize; 302 int NumParts = SizeOp0 / NarrowSize; 303 304 SmallVector<unsigned, 2> DstRegs; 305 for (int i = 0; i < NumParts; ++i) 306 DstRegs.push_back( 307 MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg()); 308 309 unsigned DstReg = MI.getOperand(0).getReg(); 310 if(MRI.getType(DstReg).isVector()) 311 MIRBuilder.buildBuildVector(DstReg, DstRegs); 312 else 313 MIRBuilder.buildMerge(DstReg, DstRegs); 314 MI.eraseFromParent(); 315 return Legalized; 316 } 317 case TargetOpcode::G_ADD: { 318 // FIXME: add support for when SizeOp0 isn't an exact multiple of 319 // NarrowSize. 320 if (SizeOp0 % NarrowSize != 0) 321 return UnableToLegalize; 322 // Expand in terms of carry-setting/consuming G_ADDE instructions. 323 int NumParts = SizeOp0 / NarrowTy.getSizeInBits(); 324 325 SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs; 326 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); 327 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); 328 329 unsigned CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1)); 330 MIRBuilder.buildConstant(CarryIn, 0); 331 332 for (int i = 0; i < NumParts; ++i) { 333 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 334 unsigned CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); 335 336 MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i], 337 Src2Regs[i], CarryIn); 338 339 DstRegs.push_back(DstReg); 340 CarryIn = CarryOut; 341 } 342 unsigned DstReg = MI.getOperand(0).getReg(); 343 if(MRI.getType(DstReg).isVector()) 344 MIRBuilder.buildBuildVector(DstReg, DstRegs); 345 else 346 MIRBuilder.buildMerge(DstReg, DstRegs); 347 MI.eraseFromParent(); 348 return Legalized; 349 } 350 case TargetOpcode::G_EXTRACT: { 351 if (TypeIdx != 1) 352 return UnableToLegalize; 353 354 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 355 // FIXME: add support for when SizeOp1 isn't an exact multiple of 356 // NarrowSize. 357 if (SizeOp1 % NarrowSize != 0) 358 return UnableToLegalize; 359 int NumParts = SizeOp1 / NarrowSize; 360 361 SmallVector<unsigned, 2> SrcRegs, DstRegs; 362 SmallVector<uint64_t, 2> Indexes; 363 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); 364 365 unsigned OpReg = MI.getOperand(0).getReg(); 366 uint64_t OpStart = MI.getOperand(2).getImm(); 367 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); 368 for (int i = 0; i < NumParts; ++i) { 369 unsigned SrcStart = i * NarrowSize; 370 371 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) { 372 // No part of the extract uses this subregister, ignore it. 373 continue; 374 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) { 375 // The entire subregister is extracted, forward the value. 376 DstRegs.push_back(SrcRegs[i]); 377 continue; 378 } 379 380 // OpSegStart is where this destination segment would start in OpReg if it 381 // extended infinitely in both directions. 382 int64_t ExtractOffset; 383 uint64_t SegSize; 384 if (OpStart < SrcStart) { 385 ExtractOffset = 0; 386 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart); 387 } else { 388 ExtractOffset = OpStart - SrcStart; 389 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize); 390 } 391 392 unsigned SegReg = SrcRegs[i]; 393 if (ExtractOffset != 0 || SegSize != NarrowSize) { 394 // A genuine extract is needed. 395 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); 396 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset); 397 } 398 399 DstRegs.push_back(SegReg); 400 } 401 402 unsigned DstReg = MI.getOperand(0).getReg(); 403 if(MRI.getType(DstReg).isVector()) 404 MIRBuilder.buildBuildVector(DstReg, DstRegs); 405 else 406 MIRBuilder.buildMerge(DstReg, DstRegs); 407 MI.eraseFromParent(); 408 return Legalized; 409 } 410 case TargetOpcode::G_INSERT: { 411 // FIXME: add support for when SizeOp0 isn't an exact multiple of 412 // NarrowSize. 413 if (SizeOp0 % NarrowSize != 0) 414 return UnableToLegalize; 415 416 int NumParts = SizeOp0 / NarrowSize; 417 418 SmallVector<unsigned, 2> SrcRegs, DstRegs; 419 SmallVector<uint64_t, 2> Indexes; 420 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); 421 422 unsigned OpReg = MI.getOperand(2).getReg(); 423 uint64_t OpStart = MI.getOperand(3).getImm(); 424 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); 425 for (int i = 0; i < NumParts; ++i) { 426 unsigned DstStart = i * NarrowSize; 427 428 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) { 429 // No part of the insert affects this subregister, forward the original. 430 DstRegs.push_back(SrcRegs[i]); 431 continue; 432 } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) { 433 // The entire subregister is defined by this insert, forward the new 434 // value. 435 DstRegs.push_back(OpReg); 436 continue; 437 } 438 439 // OpSegStart is where this destination segment would start in OpReg if it 440 // extended infinitely in both directions. 441 int64_t ExtractOffset, InsertOffset; 442 uint64_t SegSize; 443 if (OpStart < DstStart) { 444 InsertOffset = 0; 445 ExtractOffset = DstStart - OpStart; 446 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart); 447 } else { 448 InsertOffset = OpStart - DstStart; 449 ExtractOffset = 0; 450 SegSize = 451 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart); 452 } 453 454 unsigned SegReg = OpReg; 455 if (ExtractOffset != 0 || SegSize != OpSize) { 456 // A genuine extract is needed. 457 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); 458 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset); 459 } 460 461 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 462 MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset); 463 DstRegs.push_back(DstReg); 464 } 465 466 assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered"); 467 unsigned DstReg = MI.getOperand(0).getReg(); 468 if(MRI.getType(DstReg).isVector()) 469 MIRBuilder.buildBuildVector(DstReg, DstRegs); 470 else 471 MIRBuilder.buildMerge(DstReg, DstRegs); 472 MI.eraseFromParent(); 473 return Legalized; 474 } 475 case TargetOpcode::G_LOAD: { 476 // FIXME: add support for when SizeOp0 isn't an exact multiple of 477 // NarrowSize. 478 if (SizeOp0 % NarrowSize != 0) 479 return UnableToLegalize; 480 481 const auto &MMO = **MI.memoperands_begin(); 482 // This implementation doesn't work for atomics. Give up instead of doing 483 // something invalid. 484 if (MMO.getOrdering() != AtomicOrdering::NotAtomic || 485 MMO.getFailureOrdering() != AtomicOrdering::NotAtomic) 486 return UnableToLegalize; 487 488 int NumParts = SizeOp0 / NarrowSize; 489 LLT OffsetTy = LLT::scalar( 490 MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()); 491 492 SmallVector<unsigned, 2> DstRegs; 493 for (int i = 0; i < NumParts; ++i) { 494 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 495 unsigned SrcReg = 0; 496 unsigned Adjustment = i * NarrowSize / 8; 497 unsigned Alignment = MinAlign(MMO.getAlignment(), Adjustment); 498 499 MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand( 500 MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(), 501 NarrowSize / 8, Alignment, MMO.getAAInfo(), MMO.getRanges(), 502 MMO.getSyncScopeID(), MMO.getOrdering(), MMO.getFailureOrdering()); 503 504 MIRBuilder.materializeGEP(SrcReg, MI.getOperand(1).getReg(), OffsetTy, 505 Adjustment); 506 507 MIRBuilder.buildLoad(DstReg, SrcReg, *SplitMMO); 508 509 DstRegs.push_back(DstReg); 510 } 511 unsigned DstReg = MI.getOperand(0).getReg(); 512 if(MRI.getType(DstReg).isVector()) 513 MIRBuilder.buildBuildVector(DstReg, DstRegs); 514 else 515 MIRBuilder.buildMerge(DstReg, DstRegs); 516 MI.eraseFromParent(); 517 return Legalized; 518 } 519 case TargetOpcode::G_STORE: { 520 // FIXME: add support for when SizeOp0 isn't an exact multiple of 521 // NarrowSize. 522 if (SizeOp0 % NarrowSize != 0) 523 return UnableToLegalize; 524 525 const auto &MMO = **MI.memoperands_begin(); 526 // This implementation doesn't work for atomics. Give up instead of doing 527 // something invalid. 528 if (MMO.getOrdering() != AtomicOrdering::NotAtomic || 529 MMO.getFailureOrdering() != AtomicOrdering::NotAtomic) 530 return UnableToLegalize; 531 532 int NumParts = SizeOp0 / NarrowSize; 533 LLT OffsetTy = LLT::scalar( 534 MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()); 535 536 SmallVector<unsigned, 2> SrcRegs; 537 extractParts(MI.getOperand(0).getReg(), NarrowTy, NumParts, SrcRegs); 538 539 for (int i = 0; i < NumParts; ++i) { 540 unsigned DstReg = 0; 541 unsigned Adjustment = i * NarrowSize / 8; 542 unsigned Alignment = MinAlign(MMO.getAlignment(), Adjustment); 543 544 MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand( 545 MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(), 546 NarrowSize / 8, Alignment, MMO.getAAInfo(), MMO.getRanges(), 547 MMO.getSyncScopeID(), MMO.getOrdering(), MMO.getFailureOrdering()); 548 549 MIRBuilder.materializeGEP(DstReg, MI.getOperand(1).getReg(), OffsetTy, 550 Adjustment); 551 552 MIRBuilder.buildStore(SrcRegs[i], DstReg, *SplitMMO); 553 } 554 MI.eraseFromParent(); 555 return Legalized; 556 } 557 case TargetOpcode::G_CONSTANT: { 558 // FIXME: add support for when SizeOp0 isn't an exact multiple of 559 // NarrowSize. 560 if (SizeOp0 % NarrowSize != 0) 561 return UnableToLegalize; 562 int NumParts = SizeOp0 / NarrowSize; 563 const APInt &Cst = MI.getOperand(1).getCImm()->getValue(); 564 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 565 566 SmallVector<unsigned, 2> DstRegs; 567 for (int i = 0; i < NumParts; ++i) { 568 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 569 ConstantInt *CI = 570 ConstantInt::get(Ctx, Cst.lshr(NarrowSize * i).trunc(NarrowSize)); 571 MIRBuilder.buildConstant(DstReg, *CI); 572 DstRegs.push_back(DstReg); 573 } 574 unsigned DstReg = MI.getOperand(0).getReg(); 575 if(MRI.getType(DstReg).isVector()) 576 MIRBuilder.buildBuildVector(DstReg, DstRegs); 577 else 578 MIRBuilder.buildMerge(DstReg, DstRegs); 579 MI.eraseFromParent(); 580 return Legalized; 581 } 582 case TargetOpcode::G_AND: 583 case TargetOpcode::G_OR: 584 case TargetOpcode::G_XOR: { 585 // Legalize bitwise operation: 586 // A = BinOp<Ty> B, C 587 // into: 588 // B1, ..., BN = G_UNMERGE_VALUES B 589 // C1, ..., CN = G_UNMERGE_VALUES C 590 // A1 = BinOp<Ty/N> B1, C2 591 // ... 592 // AN = BinOp<Ty/N> BN, CN 593 // A = G_MERGE_VALUES A1, ..., AN 594 595 // FIXME: add support for when SizeOp0 isn't an exact multiple of 596 // NarrowSize. 597 if (SizeOp0 % NarrowSize != 0) 598 return UnableToLegalize; 599 int NumParts = SizeOp0 / NarrowSize; 600 601 // List the registers where the destination will be scattered. 602 SmallVector<unsigned, 2> DstRegs; 603 // List the registers where the first argument will be split. 604 SmallVector<unsigned, 2> SrcsReg1; 605 // List the registers where the second argument will be split. 606 SmallVector<unsigned, 2> SrcsReg2; 607 // Create all the temporary registers. 608 for (int i = 0; i < NumParts; ++i) { 609 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 610 unsigned SrcReg1 = MRI.createGenericVirtualRegister(NarrowTy); 611 unsigned SrcReg2 = MRI.createGenericVirtualRegister(NarrowTy); 612 613 DstRegs.push_back(DstReg); 614 SrcsReg1.push_back(SrcReg1); 615 SrcsReg2.push_back(SrcReg2); 616 } 617 // Explode the big arguments into smaller chunks. 618 MIRBuilder.buildUnmerge(SrcsReg1, MI.getOperand(1).getReg()); 619 MIRBuilder.buildUnmerge(SrcsReg2, MI.getOperand(2).getReg()); 620 621 // Do the operation on each small part. 622 for (int i = 0; i < NumParts; ++i) 623 MIRBuilder.buildInstr(MI.getOpcode(), {DstRegs[i]}, 624 {SrcsReg1[i], SrcsReg2[i]}); 625 626 // Gather the destination registers into the final destination. 627 unsigned DstReg = MI.getOperand(0).getReg(); 628 if(MRI.getType(DstReg).isVector()) 629 MIRBuilder.buildBuildVector(DstReg, DstRegs); 630 else 631 MIRBuilder.buildMerge(DstReg, DstRegs); 632 MI.eraseFromParent(); 633 return Legalized; 634 } 635 } 636 } 637 638 void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy, 639 unsigned OpIdx, unsigned ExtOpcode) { 640 MachineOperand &MO = MI.getOperand(OpIdx); 641 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO.getReg()}); 642 MO.setReg(ExtB->getOperand(0).getReg()); 643 } 644 645 void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy, 646 unsigned OpIdx, unsigned TruncOpcode) { 647 MachineOperand &MO = MI.getOperand(OpIdx); 648 unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); 649 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 650 MIRBuilder.buildInstr(TruncOpcode, {MO.getReg()}, {DstExt}); 651 MO.setReg(DstExt); 652 } 653 654 LegalizerHelper::LegalizeResult 655 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { 656 MIRBuilder.setInstr(MI); 657 658 switch (MI.getOpcode()) { 659 default: 660 return UnableToLegalize; 661 case TargetOpcode::G_UADDO: 662 case TargetOpcode::G_USUBO: { 663 if (TypeIdx == 1) 664 return UnableToLegalize; // TODO 665 auto LHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy}, 666 {MI.getOperand(2).getReg()}); 667 auto RHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy}, 668 {MI.getOperand(3).getReg()}); 669 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO 670 ? TargetOpcode::G_ADD 671 : TargetOpcode::G_SUB; 672 // Do the arithmetic in the larger type. 673 auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext}); 674 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg()); 675 APInt Mask = APInt::getAllOnesValue(OrigTy.getSizeInBits()); 676 auto AndOp = MIRBuilder.buildInstr( 677 TargetOpcode::G_AND, {WideTy}, 678 {NewOp, MIRBuilder.buildConstant(WideTy, Mask.getZExtValue())}); 679 // There is no overflow if the AndOp is the same as NewOp. 680 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1).getReg(), NewOp, 681 AndOp); 682 // Now trunc the NewOp to the original result. 683 MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), NewOp); 684 MI.eraseFromParent(); 685 return Legalized; 686 } 687 case TargetOpcode::G_CTTZ: 688 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 689 case TargetOpcode::G_CTLZ: 690 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 691 case TargetOpcode::G_CTPOP: { 692 // First ZEXT the input. 693 auto MIBSrc = MIRBuilder.buildZExt(WideTy, MI.getOperand(1).getReg()); 694 LLT CurTy = MRI.getType(MI.getOperand(0).getReg()); 695 if (MI.getOpcode() == TargetOpcode::G_CTTZ) { 696 // The count is the same in the larger type except if the original 697 // value was zero. This can be handled by setting the bit just off 698 // the top of the original type. 699 auto TopBit = 700 APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits()); 701 MIBSrc = MIRBuilder.buildInstr( 702 TargetOpcode::G_OR, {WideTy}, 703 {MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit.getSExtValue())}); 704 } 705 // Perform the operation at the larger size. 706 auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc}); 707 // This is already the correct result for CTPOP and CTTZs 708 if (MI.getOpcode() == TargetOpcode::G_CTLZ || 709 MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) { 710 // The correct result is NewOp - (Difference in widety and current ty). 711 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits(); 712 MIBNewOp = MIRBuilder.buildInstr( 713 TargetOpcode::G_SUB, {WideTy}, 714 {MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)}); 715 } 716 auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); 717 // Make the original instruction a trunc now, and update its source. 718 Observer.changingInstr(MI); 719 MI.setDesc(TII.get(TargetOpcode::G_TRUNC)); 720 MI.getOperand(1).setReg(MIBNewOp->getOperand(0).getReg()); 721 Observer.changedInstr(MI); 722 return Legalized; 723 } 724 725 case TargetOpcode::G_ADD: 726 case TargetOpcode::G_AND: 727 case TargetOpcode::G_MUL: 728 case TargetOpcode::G_OR: 729 case TargetOpcode::G_XOR: 730 case TargetOpcode::G_SUB: 731 // Perform operation at larger width (any extension is fine here, high bits 732 // don't affect the result) and then truncate the result back to the 733 // original type. 734 Observer.changingInstr(MI); 735 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 736 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); 737 widenScalarDst(MI, WideTy); 738 Observer.changedInstr(MI); 739 return Legalized; 740 741 case TargetOpcode::G_SHL: 742 Observer.changingInstr(MI); 743 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 744 // The "number of bits to shift" operand must preserve its value as an 745 // unsigned integer: 746 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 747 widenScalarDst(MI, WideTy); 748 Observer.changedInstr(MI); 749 return Legalized; 750 751 case TargetOpcode::G_SDIV: 752 case TargetOpcode::G_SREM: 753 Observer.changingInstr(MI); 754 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); 755 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 756 widenScalarDst(MI, WideTy); 757 Observer.changedInstr(MI); 758 return Legalized; 759 760 case TargetOpcode::G_ASHR: 761 Observer.changingInstr(MI); 762 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); 763 // The "number of bits to shift" operand must preserve its value as an 764 // unsigned integer: 765 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 766 widenScalarDst(MI, WideTy); 767 Observer.changedInstr(MI); 768 return Legalized; 769 770 case TargetOpcode::G_UDIV: 771 case TargetOpcode::G_UREM: 772 case TargetOpcode::G_LSHR: 773 Observer.changingInstr(MI); 774 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 775 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 776 widenScalarDst(MI, WideTy); 777 Observer.changedInstr(MI); 778 return Legalized; 779 780 case TargetOpcode::G_SELECT: 781 Observer.changingInstr(MI); 782 if (TypeIdx == 0) { 783 // Perform operation at larger width (any extension is fine here, high 784 // bits don't affect the result) and then truncate the result back to the 785 // original type. 786 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); 787 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT); 788 widenScalarDst(MI, WideTy); 789 } else { 790 // Explicit extension is required here since high bits affect the result. 791 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 792 } 793 Observer.changedInstr(MI); 794 return Legalized; 795 796 case TargetOpcode::G_FPTOSI: 797 case TargetOpcode::G_FPTOUI: 798 if (TypeIdx != 0) 799 return UnableToLegalize; 800 Observer.changingInstr(MI); 801 widenScalarDst(MI, WideTy); 802 Observer.changedInstr(MI); 803 return Legalized; 804 805 case TargetOpcode::G_SITOFP: 806 if (TypeIdx != 1) 807 return UnableToLegalize; 808 Observer.changingInstr(MI); 809 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); 810 Observer.changedInstr(MI); 811 return Legalized; 812 813 case TargetOpcode::G_UITOFP: 814 if (TypeIdx != 1) 815 return UnableToLegalize; 816 Observer.changingInstr(MI); 817 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 818 Observer.changedInstr(MI); 819 return Legalized; 820 821 case TargetOpcode::G_INSERT: 822 if (TypeIdx != 0) 823 return UnableToLegalize; 824 Observer.changingInstr(MI); 825 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 826 widenScalarDst(MI, WideTy); 827 Observer.changedInstr(MI); 828 return Legalized; 829 830 case TargetOpcode::G_LOAD: 831 // For some types like i24, we might try to widen to i32. To properly handle 832 // this we should be using a dedicated extending load, until then avoid 833 // trying to legalize. 834 if (alignTo(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(), 8) != 835 WideTy.getSizeInBits()) 836 return UnableToLegalize; 837 LLVM_FALLTHROUGH; 838 case TargetOpcode::G_SEXTLOAD: 839 case TargetOpcode::G_ZEXTLOAD: 840 Observer.changingInstr(MI); 841 widenScalarDst(MI, WideTy); 842 Observer.changedInstr(MI); 843 return Legalized; 844 845 case TargetOpcode::G_STORE: { 846 if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(1) || 847 WideTy != LLT::scalar(8)) 848 return UnableToLegalize; 849 850 Observer.changingInstr(MI); 851 widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ZEXT); 852 Observer.changedInstr(MI); 853 return Legalized; 854 } 855 case TargetOpcode::G_CONSTANT: { 856 MachineOperand &SrcMO = MI.getOperand(1); 857 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 858 const APInt &Val = SrcMO.getCImm()->getValue().sext(WideTy.getSizeInBits()); 859 Observer.changingInstr(MI); 860 SrcMO.setCImm(ConstantInt::get(Ctx, Val)); 861 862 widenScalarDst(MI, WideTy); 863 Observer.changedInstr(MI); 864 return Legalized; 865 } 866 case TargetOpcode::G_FCONSTANT: { 867 MachineOperand &SrcMO = MI.getOperand(1); 868 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 869 APFloat Val = SrcMO.getFPImm()->getValueAPF(); 870 bool LosesInfo; 871 switch (WideTy.getSizeInBits()) { 872 case 32: 873 Val.convert(APFloat::IEEEsingle(), APFloat::rmTowardZero, &LosesInfo); 874 break; 875 case 64: 876 Val.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &LosesInfo); 877 break; 878 default: 879 llvm_unreachable("Unhandled fp widen type"); 880 } 881 Observer.changingInstr(MI); 882 SrcMO.setFPImm(ConstantFP::get(Ctx, Val)); 883 884 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); 885 Observer.changedInstr(MI); 886 return Legalized; 887 } 888 case TargetOpcode::G_IMPLICIT_DEF: { 889 Observer.changingInstr(MI); 890 widenScalarDst(MI, WideTy); 891 Observer.changedInstr(MI); 892 return Legalized; 893 } 894 case TargetOpcode::G_BRCOND: 895 Observer.changingInstr(MI); 896 widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT); 897 Observer.changedInstr(MI); 898 return Legalized; 899 900 case TargetOpcode::G_FCMP: 901 Observer.changingInstr(MI); 902 if (TypeIdx == 0) 903 widenScalarDst(MI, WideTy); 904 else { 905 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT); 906 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT); 907 } 908 Observer.changedInstr(MI); 909 return Legalized; 910 911 case TargetOpcode::G_ICMP: 912 Observer.changingInstr(MI); 913 if (TypeIdx == 0) 914 widenScalarDst(MI, WideTy); 915 else { 916 unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>( 917 MI.getOperand(1).getPredicate())) 918 ? TargetOpcode::G_SEXT 919 : TargetOpcode::G_ZEXT; 920 widenScalarSrc(MI, WideTy, 2, ExtOpcode); 921 widenScalarSrc(MI, WideTy, 3, ExtOpcode); 922 } 923 Observer.changedInstr(MI); 924 return Legalized; 925 926 case TargetOpcode::G_GEP: 927 assert(TypeIdx == 1 && "unable to legalize pointer of GEP"); 928 Observer.changingInstr(MI); 929 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 930 Observer.changedInstr(MI); 931 return Legalized; 932 933 case TargetOpcode::G_PHI: { 934 assert(TypeIdx == 0 && "Expecting only Idx 0"); 935 936 Observer.changingInstr(MI); 937 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) { 938 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB(); 939 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); 940 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT); 941 } 942 943 MachineBasicBlock &MBB = *MI.getParent(); 944 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI()); 945 widenScalarDst(MI, WideTy); 946 Observer.changedInstr(MI); 947 return Legalized; 948 } 949 case TargetOpcode::G_EXTRACT_VECTOR_ELT: 950 if (TypeIdx != 2) 951 return UnableToLegalize; 952 Observer.changingInstr(MI); 953 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 954 Observer.changedInstr(MI); 955 return Legalized; 956 957 case TargetOpcode::G_FCEIL: 958 if (TypeIdx != 0) 959 return UnableToLegalize; 960 Observer.changingInstr(MI); 961 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT); 962 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); 963 Observer.changedInstr(MI); 964 return Legalized; 965 } 966 } 967 968 LegalizerHelper::LegalizeResult 969 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { 970 using namespace TargetOpcode; 971 MIRBuilder.setInstr(MI); 972 973 switch(MI.getOpcode()) { 974 default: 975 return UnableToLegalize; 976 case TargetOpcode::G_SREM: 977 case TargetOpcode::G_UREM: { 978 unsigned QuotReg = MRI.createGenericVirtualRegister(Ty); 979 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV) 980 .addDef(QuotReg) 981 .addUse(MI.getOperand(1).getReg()) 982 .addUse(MI.getOperand(2).getReg()); 983 984 unsigned ProdReg = MRI.createGenericVirtualRegister(Ty); 985 MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg()); 986 MIRBuilder.buildSub(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(), 987 ProdReg); 988 MI.eraseFromParent(); 989 return Legalized; 990 } 991 case TargetOpcode::G_SMULO: 992 case TargetOpcode::G_UMULO: { 993 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the 994 // result. 995 unsigned Res = MI.getOperand(0).getReg(); 996 unsigned Overflow = MI.getOperand(1).getReg(); 997 unsigned LHS = MI.getOperand(2).getReg(); 998 unsigned RHS = MI.getOperand(3).getReg(); 999 1000 MIRBuilder.buildMul(Res, LHS, RHS); 1001 1002 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO 1003 ? TargetOpcode::G_SMULH 1004 : TargetOpcode::G_UMULH; 1005 1006 unsigned HiPart = MRI.createGenericVirtualRegister(Ty); 1007 MIRBuilder.buildInstr(Opcode) 1008 .addDef(HiPart) 1009 .addUse(LHS) 1010 .addUse(RHS); 1011 1012 unsigned Zero = MRI.createGenericVirtualRegister(Ty); 1013 MIRBuilder.buildConstant(Zero, 0); 1014 1015 // For *signed* multiply, overflow is detected by checking: 1016 // (hi != (lo >> bitwidth-1)) 1017 if (Opcode == TargetOpcode::G_SMULH) { 1018 unsigned Shifted = MRI.createGenericVirtualRegister(Ty); 1019 unsigned ShiftAmt = MRI.createGenericVirtualRegister(Ty); 1020 MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1); 1021 MIRBuilder.buildInstr(TargetOpcode::G_ASHR) 1022 .addDef(Shifted) 1023 .addUse(Res) 1024 .addUse(ShiftAmt); 1025 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted); 1026 } else { 1027 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero); 1028 } 1029 MI.eraseFromParent(); 1030 return Legalized; 1031 } 1032 case TargetOpcode::G_FNEG: { 1033 // TODO: Handle vector types once we are able to 1034 // represent them. 1035 if (Ty.isVector()) 1036 return UnableToLegalize; 1037 unsigned Res = MI.getOperand(0).getReg(); 1038 Type *ZeroTy; 1039 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 1040 switch (Ty.getSizeInBits()) { 1041 case 16: 1042 ZeroTy = Type::getHalfTy(Ctx); 1043 break; 1044 case 32: 1045 ZeroTy = Type::getFloatTy(Ctx); 1046 break; 1047 case 64: 1048 ZeroTy = Type::getDoubleTy(Ctx); 1049 break; 1050 case 128: 1051 ZeroTy = Type::getFP128Ty(Ctx); 1052 break; 1053 default: 1054 llvm_unreachable("unexpected floating-point type"); 1055 } 1056 ConstantFP &ZeroForNegation = 1057 *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy)); 1058 auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation); 1059 MIRBuilder.buildInstr(TargetOpcode::G_FSUB) 1060 .addDef(Res) 1061 .addUse(Zero->getOperand(0).getReg()) 1062 .addUse(MI.getOperand(1).getReg()); 1063 MI.eraseFromParent(); 1064 return Legalized; 1065 } 1066 case TargetOpcode::G_FSUB: { 1067 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)). 1068 // First, check if G_FNEG is marked as Lower. If so, we may 1069 // end up with an infinite loop as G_FSUB is used to legalize G_FNEG. 1070 if (LI.getAction({G_FNEG, {Ty}}).Action == Lower) 1071 return UnableToLegalize; 1072 unsigned Res = MI.getOperand(0).getReg(); 1073 unsigned LHS = MI.getOperand(1).getReg(); 1074 unsigned RHS = MI.getOperand(2).getReg(); 1075 unsigned Neg = MRI.createGenericVirtualRegister(Ty); 1076 MIRBuilder.buildInstr(TargetOpcode::G_FNEG).addDef(Neg).addUse(RHS); 1077 MIRBuilder.buildInstr(TargetOpcode::G_FADD) 1078 .addDef(Res) 1079 .addUse(LHS) 1080 .addUse(Neg); 1081 MI.eraseFromParent(); 1082 return Legalized; 1083 } 1084 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: { 1085 unsigned OldValRes = MI.getOperand(0).getReg(); 1086 unsigned SuccessRes = MI.getOperand(1).getReg(); 1087 unsigned Addr = MI.getOperand(2).getReg(); 1088 unsigned CmpVal = MI.getOperand(3).getReg(); 1089 unsigned NewVal = MI.getOperand(4).getReg(); 1090 MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal, 1091 **MI.memoperands_begin()); 1092 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal); 1093 MI.eraseFromParent(); 1094 return Legalized; 1095 } 1096 case TargetOpcode::G_LOAD: 1097 case TargetOpcode::G_SEXTLOAD: 1098 case TargetOpcode::G_ZEXTLOAD: { 1099 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT 1100 unsigned DstReg = MI.getOperand(0).getReg(); 1101 unsigned PtrReg = MI.getOperand(1).getReg(); 1102 LLT DstTy = MRI.getType(DstReg); 1103 auto &MMO = **MI.memoperands_begin(); 1104 1105 if (DstTy.getSizeInBits() == MMO.getSize() /* in bytes */ * 8) { 1106 // In the case of G_LOAD, this was a non-extending load already and we're 1107 // about to lower to the same instruction. 1108 if (MI.getOpcode() == TargetOpcode::G_LOAD) 1109 return UnableToLegalize; 1110 MIRBuilder.buildLoad(DstReg, PtrReg, MMO); 1111 MI.eraseFromParent(); 1112 return Legalized; 1113 } 1114 1115 if (DstTy.isScalar()) { 1116 unsigned TmpReg = MRI.createGenericVirtualRegister( 1117 LLT::scalar(MMO.getSize() /* in bytes */ * 8)); 1118 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); 1119 switch (MI.getOpcode()) { 1120 default: 1121 llvm_unreachable("Unexpected opcode"); 1122 case TargetOpcode::G_LOAD: 1123 MIRBuilder.buildAnyExt(DstReg, TmpReg); 1124 break; 1125 case TargetOpcode::G_SEXTLOAD: 1126 MIRBuilder.buildSExt(DstReg, TmpReg); 1127 break; 1128 case TargetOpcode::G_ZEXTLOAD: 1129 MIRBuilder.buildZExt(DstReg, TmpReg); 1130 break; 1131 } 1132 MI.eraseFromParent(); 1133 return Legalized; 1134 } 1135 1136 return UnableToLegalize; 1137 } 1138 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 1139 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 1140 case TargetOpcode::G_CTLZ: 1141 case TargetOpcode::G_CTTZ: 1142 case TargetOpcode::G_CTPOP: 1143 return lowerBitCount(MI, TypeIdx, Ty); 1144 case G_UADDE: { 1145 unsigned Res = MI.getOperand(0).getReg(); 1146 unsigned CarryOut = MI.getOperand(1).getReg(); 1147 unsigned LHS = MI.getOperand(2).getReg(); 1148 unsigned RHS = MI.getOperand(3).getReg(); 1149 unsigned CarryIn = MI.getOperand(4).getReg(); 1150 1151 unsigned TmpRes = MRI.createGenericVirtualRegister(Ty); 1152 unsigned ZExtCarryIn = MRI.createGenericVirtualRegister(Ty); 1153 1154 MIRBuilder.buildAdd(TmpRes, LHS, RHS); 1155 MIRBuilder.buildZExt(ZExtCarryIn, CarryIn); 1156 MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn); 1157 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS); 1158 1159 MI.eraseFromParent(); 1160 return Legalized; 1161 } 1162 } 1163 } 1164 1165 LegalizerHelper::LegalizeResult 1166 LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, 1167 LLT NarrowTy) { 1168 // FIXME: Don't know how to handle secondary types yet. 1169 if (TypeIdx != 0) 1170 return UnableToLegalize; 1171 1172 MIRBuilder.setInstr(MI); 1173 switch (MI.getOpcode()) { 1174 default: 1175 return UnableToLegalize; 1176 case TargetOpcode::G_IMPLICIT_DEF: { 1177 SmallVector<unsigned, 2> DstRegs; 1178 1179 unsigned NarrowSize = NarrowTy.getSizeInBits(); 1180 unsigned DstReg = MI.getOperand(0).getReg(); 1181 unsigned Size = MRI.getType(DstReg).getSizeInBits(); 1182 int NumParts = Size / NarrowSize; 1183 // FIXME: Don't know how to handle the situation where the small vectors 1184 // aren't all the same size yet. 1185 if (Size % NarrowSize != 0) 1186 return UnableToLegalize; 1187 1188 for (int i = 0; i < NumParts; ++i) { 1189 unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy); 1190 MIRBuilder.buildUndef(TmpReg); 1191 DstRegs.push_back(TmpReg); 1192 } 1193 1194 if (NarrowTy.isVector()) 1195 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 1196 else 1197 MIRBuilder.buildBuildVector(DstReg, DstRegs); 1198 1199 MI.eraseFromParent(); 1200 return Legalized; 1201 } 1202 case TargetOpcode::G_ADD: { 1203 unsigned NarrowSize = NarrowTy.getSizeInBits(); 1204 unsigned DstReg = MI.getOperand(0).getReg(); 1205 unsigned Size = MRI.getType(DstReg).getSizeInBits(); 1206 int NumParts = Size / NarrowSize; 1207 // FIXME: Don't know how to handle the situation where the small vectors 1208 // aren't all the same size yet. 1209 if (Size % NarrowSize != 0) 1210 return UnableToLegalize; 1211 1212 SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs; 1213 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); 1214 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); 1215 1216 for (int i = 0; i < NumParts; ++i) { 1217 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 1218 MIRBuilder.buildAdd(DstReg, Src1Regs[i], Src2Regs[i]); 1219 DstRegs.push_back(DstReg); 1220 } 1221 1222 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 1223 MI.eraseFromParent(); 1224 return Legalized; 1225 } 1226 case TargetOpcode::G_LOAD: 1227 case TargetOpcode::G_STORE: { 1228 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD; 1229 unsigned ValReg = MI.getOperand(0).getReg(); 1230 unsigned AddrReg = MI.getOperand(1).getReg(); 1231 unsigned NarrowSize = NarrowTy.getSizeInBits(); 1232 unsigned Size = MRI.getType(ValReg).getSizeInBits(); 1233 unsigned NumParts = Size / NarrowSize; 1234 1235 SmallVector<unsigned, 8> NarrowRegs; 1236 if (!IsLoad) 1237 extractParts(ValReg, NarrowTy, NumParts, NarrowRegs); 1238 1239 const LLT OffsetTy = 1240 LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits()); 1241 MachineFunction &MF = *MI.getMF(); 1242 MachineMemOperand *MMO = *MI.memoperands_begin(); 1243 for (unsigned Idx = 0; Idx < NumParts; ++Idx) { 1244 unsigned Adjustment = Idx * NarrowTy.getSizeInBits() / 8; 1245 unsigned Alignment = MinAlign(MMO->getAlignment(), Adjustment); 1246 unsigned NewAddrReg = 0; 1247 MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, Adjustment); 1248 MachineMemOperand &NewMMO = *MF.getMachineMemOperand( 1249 MMO->getPointerInfo().getWithOffset(Adjustment), MMO->getFlags(), 1250 NarrowTy.getSizeInBits() / 8, Alignment); 1251 if (IsLoad) { 1252 unsigned Dst = MRI.createGenericVirtualRegister(NarrowTy); 1253 NarrowRegs.push_back(Dst); 1254 MIRBuilder.buildLoad(Dst, NewAddrReg, NewMMO); 1255 } else { 1256 MIRBuilder.buildStore(NarrowRegs[Idx], NewAddrReg, NewMMO); 1257 } 1258 } 1259 if (IsLoad) { 1260 if (NarrowTy.isVector()) 1261 MIRBuilder.buildConcatVectors(ValReg, NarrowRegs); 1262 else 1263 MIRBuilder.buildBuildVector(ValReg, NarrowRegs); 1264 } 1265 MI.eraseFromParent(); 1266 return Legalized; 1267 } 1268 } 1269 } 1270 1271 LegalizerHelper::LegalizeResult 1272 LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { 1273 unsigned Opc = MI.getOpcode(); 1274 auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); 1275 auto isSupported = [this](const LegalityQuery &Q) { 1276 auto QAction = LI.getAction(Q).Action; 1277 return QAction == Legal || QAction == Libcall || QAction == Custom; 1278 }; 1279 switch (Opc) { 1280 default: 1281 return UnableToLegalize; 1282 case TargetOpcode::G_CTLZ_ZERO_UNDEF: { 1283 // This trivially expands to CTLZ. 1284 Observer.changingInstr(MI); 1285 MI.setDesc(TII.get(TargetOpcode::G_CTLZ)); 1286 Observer.changedInstr(MI); 1287 return Legalized; 1288 } 1289 case TargetOpcode::G_CTLZ: { 1290 unsigned SrcReg = MI.getOperand(1).getReg(); 1291 unsigned Len = Ty.getSizeInBits(); 1292 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty}})) { 1293 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero. 1294 auto MIBCtlzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF, 1295 {Ty}, {SrcReg}); 1296 auto MIBZero = MIRBuilder.buildConstant(Ty, 0); 1297 auto MIBLen = MIRBuilder.buildConstant(Ty, Len); 1298 auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), 1299 SrcReg, MIBZero); 1300 MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen, 1301 MIBCtlzZU); 1302 MI.eraseFromParent(); 1303 return Legalized; 1304 } 1305 // for now, we do this: 1306 // NewLen = NextPowerOf2(Len); 1307 // x = x | (x >> 1); 1308 // x = x | (x >> 2); 1309 // ... 1310 // x = x | (x >>16); 1311 // x = x | (x >>32); // for 64-bit input 1312 // Upto NewLen/2 1313 // return Len - popcount(x); 1314 // 1315 // Ref: "Hacker's Delight" by Henry Warren 1316 unsigned Op = SrcReg; 1317 unsigned NewLen = PowerOf2Ceil(Len); 1318 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) { 1319 auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i); 1320 auto MIBOp = MIRBuilder.buildInstr( 1321 TargetOpcode::G_OR, {Ty}, 1322 {Op, MIRBuilder.buildInstr(TargetOpcode::G_LSHR, {Ty}, 1323 {Op, MIBShiftAmt})}); 1324 Op = MIBOp->getOperand(0).getReg(); 1325 } 1326 auto MIBPop = MIRBuilder.buildInstr(TargetOpcode::G_CTPOP, {Ty}, {Op}); 1327 MIRBuilder.buildInstr(TargetOpcode::G_SUB, {MI.getOperand(0).getReg()}, 1328 {MIRBuilder.buildConstant(Ty, Len), MIBPop}); 1329 MI.eraseFromParent(); 1330 return Legalized; 1331 } 1332 case TargetOpcode::G_CTTZ_ZERO_UNDEF: { 1333 // This trivially expands to CTTZ. 1334 Observer.changingInstr(MI); 1335 MI.setDesc(TII.get(TargetOpcode::G_CTTZ)); 1336 Observer.changedInstr(MI); 1337 return Legalized; 1338 } 1339 case TargetOpcode::G_CTTZ: { 1340 unsigned SrcReg = MI.getOperand(1).getReg(); 1341 unsigned Len = Ty.getSizeInBits(); 1342 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty}})) { 1343 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with 1344 // zero. 1345 auto MIBCttzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF, 1346 {Ty}, {SrcReg}); 1347 auto MIBZero = MIRBuilder.buildConstant(Ty, 0); 1348 auto MIBLen = MIRBuilder.buildConstant(Ty, Len); 1349 auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), 1350 SrcReg, MIBZero); 1351 MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen, 1352 MIBCttzZU); 1353 MI.eraseFromParent(); 1354 return Legalized; 1355 } 1356 // for now, we use: { return popcount(~x & (x - 1)); } 1357 // unless the target has ctlz but not ctpop, in which case we use: 1358 // { return 32 - nlz(~x & (x-1)); } 1359 // Ref: "Hacker's Delight" by Henry Warren 1360 auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1); 1361 auto MIBNot = 1362 MIRBuilder.buildInstr(TargetOpcode::G_XOR, {Ty}, {SrcReg, MIBCstNeg1}); 1363 auto MIBTmp = MIRBuilder.buildInstr( 1364 TargetOpcode::G_AND, {Ty}, 1365 {MIBNot, MIRBuilder.buildInstr(TargetOpcode::G_ADD, {Ty}, 1366 {SrcReg, MIBCstNeg1})}); 1367 if (!isSupported({TargetOpcode::G_CTPOP, {Ty}}) && 1368 isSupported({TargetOpcode::G_CTLZ, {Ty}})) { 1369 auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len); 1370 MIRBuilder.buildInstr( 1371 TargetOpcode::G_SUB, {MI.getOperand(0).getReg()}, 1372 {MIBCstLen, 1373 MIRBuilder.buildInstr(TargetOpcode::G_CTLZ, {Ty}, {MIBTmp})}); 1374 MI.eraseFromParent(); 1375 return Legalized; 1376 } 1377 MI.setDesc(TII.get(TargetOpcode::G_CTPOP)); 1378 MI.getOperand(1).setReg(MIBTmp->getOperand(0).getReg()); 1379 return Legalized; 1380 } 1381 } 1382 } 1383