1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file This file implements the LegalizerHelper class to legalize 10 /// individual instructions and the LegalizeMachineIR wrapper pass for the 11 /// primary legalization. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" 16 #include "llvm/CodeGen/GlobalISel/CallLowering.h" 17 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" 18 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" 19 #include "llvm/CodeGen/MachineRegisterInfo.h" 20 #include "llvm/CodeGen/TargetInstrInfo.h" 21 #include "llvm/CodeGen/TargetLowering.h" 22 #include "llvm/CodeGen/TargetSubtargetInfo.h" 23 #include "llvm/Support/Debug.h" 24 #include "llvm/Support/MathExtras.h" 25 #include "llvm/Support/raw_ostream.h" 26 27 #define DEBUG_TYPE "legalizer" 28 29 using namespace llvm; 30 using namespace LegalizeActions; 31 32 LegalizerHelper::LegalizerHelper(MachineFunction &MF, 33 GISelChangeObserver &Observer, 34 MachineIRBuilder &Builder) 35 : MIRBuilder(Builder), MRI(MF.getRegInfo()), 36 LI(*MF.getSubtarget().getLegalizerInfo()), Observer(Observer) { 37 MIRBuilder.setMF(MF); 38 MIRBuilder.setChangeObserver(Observer); 39 } 40 41 LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI, 42 GISelChangeObserver &Observer, 43 MachineIRBuilder &B) 44 : MIRBuilder(B), MRI(MF.getRegInfo()), LI(LI), Observer(Observer) { 45 MIRBuilder.setMF(MF); 46 MIRBuilder.setChangeObserver(Observer); 47 } 48 LegalizerHelper::LegalizeResult 49 LegalizerHelper::legalizeInstrStep(MachineInstr &MI) { 50 LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs())); 51 52 auto Step = LI.getAction(MI, MRI); 53 switch (Step.Action) { 54 case Legal: 55 LLVM_DEBUG(dbgs() << ".. Already legal\n"); 56 return AlreadyLegal; 57 case Libcall: 58 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n"); 59 return libcall(MI); 60 case NarrowScalar: 61 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n"); 62 return narrowScalar(MI, Step.TypeIdx, Step.NewType); 63 case WidenScalar: 64 LLVM_DEBUG(dbgs() << ".. Widen scalar\n"); 65 return widenScalar(MI, Step.TypeIdx, Step.NewType); 66 case Lower: 67 LLVM_DEBUG(dbgs() << ".. Lower\n"); 68 return lower(MI, Step.TypeIdx, Step.NewType); 69 case FewerElements: 70 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n"); 71 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType); 72 case Custom: 73 LLVM_DEBUG(dbgs() << ".. Custom legalization\n"); 74 return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized 75 : UnableToLegalize; 76 default: 77 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n"); 78 return UnableToLegalize; 79 } 80 } 81 82 void LegalizerHelper::extractParts(unsigned Reg, LLT Ty, int NumParts, 83 SmallVectorImpl<unsigned> &VRegs) { 84 for (int i = 0; i < NumParts; ++i) 85 VRegs.push_back(MRI.createGenericVirtualRegister(Ty)); 86 MIRBuilder.buildUnmerge(VRegs, Reg); 87 } 88 89 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { 90 switch (Opcode) { 91 case TargetOpcode::G_SDIV: 92 assert((Size == 32 || Size == 64) && "Unsupported size"); 93 return Size == 64 ? RTLIB::SDIV_I64 : RTLIB::SDIV_I32; 94 case TargetOpcode::G_UDIV: 95 assert((Size == 32 || Size == 64) && "Unsupported size"); 96 return Size == 64 ? RTLIB::UDIV_I64 : RTLIB::UDIV_I32; 97 case TargetOpcode::G_SREM: 98 assert((Size == 32 || Size == 64) && "Unsupported size"); 99 return Size == 64 ? RTLIB::SREM_I64 : RTLIB::SREM_I32; 100 case TargetOpcode::G_UREM: 101 assert((Size == 32 || Size == 64) && "Unsupported size"); 102 return Size == 64 ? RTLIB::UREM_I64 : RTLIB::UREM_I32; 103 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 104 assert(Size == 32 && "Unsupported size"); 105 return RTLIB::CTLZ_I32; 106 case TargetOpcode::G_FADD: 107 assert((Size == 32 || Size == 64) && "Unsupported size"); 108 return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32; 109 case TargetOpcode::G_FSUB: 110 assert((Size == 32 || Size == 64) && "Unsupported size"); 111 return Size == 64 ? RTLIB::SUB_F64 : RTLIB::SUB_F32; 112 case TargetOpcode::G_FMUL: 113 assert((Size == 32 || Size == 64) && "Unsupported size"); 114 return Size == 64 ? RTLIB::MUL_F64 : RTLIB::MUL_F32; 115 case TargetOpcode::G_FDIV: 116 assert((Size == 32 || Size == 64) && "Unsupported size"); 117 return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32; 118 case TargetOpcode::G_FREM: 119 return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32; 120 case TargetOpcode::G_FPOW: 121 return Size == 64 ? RTLIB::POW_F64 : RTLIB::POW_F32; 122 case TargetOpcode::G_FMA: 123 assert((Size == 32 || Size == 64) && "Unsupported size"); 124 return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32; 125 } 126 llvm_unreachable("Unknown libcall function"); 127 } 128 129 LegalizerHelper::LegalizeResult 130 llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, 131 const CallLowering::ArgInfo &Result, 132 ArrayRef<CallLowering::ArgInfo> Args) { 133 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); 134 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); 135 const char *Name = TLI.getLibcallName(Libcall); 136 137 MIRBuilder.getMF().getFrameInfo().setHasCalls(true); 138 if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall), 139 MachineOperand::CreateES(Name), Result, Args)) 140 return LegalizerHelper::UnableToLegalize; 141 142 return LegalizerHelper::Legalized; 143 } 144 145 // Useful for libcalls where all operands have the same type. 146 static LegalizerHelper::LegalizeResult 147 simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, 148 Type *OpType) { 149 auto Libcall = getRTLibDesc(MI.getOpcode(), Size); 150 151 SmallVector<CallLowering::ArgInfo, 3> Args; 152 for (unsigned i = 1; i < MI.getNumOperands(); i++) 153 Args.push_back({MI.getOperand(i).getReg(), OpType}); 154 return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType}, 155 Args); 156 } 157 158 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, 159 Type *FromType) { 160 auto ToMVT = MVT::getVT(ToType); 161 auto FromMVT = MVT::getVT(FromType); 162 163 switch (Opcode) { 164 case TargetOpcode::G_FPEXT: 165 return RTLIB::getFPEXT(FromMVT, ToMVT); 166 case TargetOpcode::G_FPTRUNC: 167 return RTLIB::getFPROUND(FromMVT, ToMVT); 168 case TargetOpcode::G_FPTOSI: 169 return RTLIB::getFPTOSINT(FromMVT, ToMVT); 170 case TargetOpcode::G_FPTOUI: 171 return RTLIB::getFPTOUINT(FromMVT, ToMVT); 172 case TargetOpcode::G_SITOFP: 173 return RTLIB::getSINTTOFP(FromMVT, ToMVT); 174 case TargetOpcode::G_UITOFP: 175 return RTLIB::getUINTTOFP(FromMVT, ToMVT); 176 } 177 llvm_unreachable("Unsupported libcall function"); 178 } 179 180 static LegalizerHelper::LegalizeResult 181 conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, 182 Type *FromType) { 183 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType); 184 return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType}, 185 {{MI.getOperand(1).getReg(), FromType}}); 186 } 187 188 LegalizerHelper::LegalizeResult 189 LegalizerHelper::libcall(MachineInstr &MI) { 190 LLT LLTy = MRI.getType(MI.getOperand(0).getReg()); 191 unsigned Size = LLTy.getSizeInBits(); 192 auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); 193 194 MIRBuilder.setInstr(MI); 195 196 switch (MI.getOpcode()) { 197 default: 198 return UnableToLegalize; 199 case TargetOpcode::G_SDIV: 200 case TargetOpcode::G_UDIV: 201 case TargetOpcode::G_SREM: 202 case TargetOpcode::G_UREM: 203 case TargetOpcode::G_CTLZ_ZERO_UNDEF: { 204 Type *HLTy = IntegerType::get(Ctx, Size); 205 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); 206 if (Status != Legalized) 207 return Status; 208 break; 209 } 210 case TargetOpcode::G_FADD: 211 case TargetOpcode::G_FSUB: 212 case TargetOpcode::G_FMUL: 213 case TargetOpcode::G_FDIV: 214 case TargetOpcode::G_FMA: 215 case TargetOpcode::G_FPOW: 216 case TargetOpcode::G_FREM: { 217 Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx); 218 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); 219 if (Status != Legalized) 220 return Status; 221 break; 222 } 223 case TargetOpcode::G_FPEXT: { 224 // FIXME: Support other floating point types (half, fp128 etc) 225 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 226 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 227 if (ToSize != 64 || FromSize != 32) 228 return UnableToLegalize; 229 LegalizeResult Status = conversionLibcall( 230 MI, MIRBuilder, Type::getDoubleTy(Ctx), Type::getFloatTy(Ctx)); 231 if (Status != Legalized) 232 return Status; 233 break; 234 } 235 case TargetOpcode::G_FPTRUNC: { 236 // FIXME: Support other floating point types (half, fp128 etc) 237 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 238 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 239 if (ToSize != 32 || FromSize != 64) 240 return UnableToLegalize; 241 LegalizeResult Status = conversionLibcall( 242 MI, MIRBuilder, Type::getFloatTy(Ctx), Type::getDoubleTy(Ctx)); 243 if (Status != Legalized) 244 return Status; 245 break; 246 } 247 case TargetOpcode::G_FPTOSI: 248 case TargetOpcode::G_FPTOUI: { 249 // FIXME: Support other types 250 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 251 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 252 if (ToSize != 32 || (FromSize != 32 && FromSize != 64)) 253 return UnableToLegalize; 254 LegalizeResult Status = conversionLibcall( 255 MI, MIRBuilder, Type::getInt32Ty(Ctx), 256 FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx)); 257 if (Status != Legalized) 258 return Status; 259 break; 260 } 261 case TargetOpcode::G_SITOFP: 262 case TargetOpcode::G_UITOFP: { 263 // FIXME: Support other types 264 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 265 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 266 if (FromSize != 32 || (ToSize != 32 && ToSize != 64)) 267 return UnableToLegalize; 268 LegalizeResult Status = conversionLibcall( 269 MI, MIRBuilder, 270 ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx), 271 Type::getInt32Ty(Ctx)); 272 if (Status != Legalized) 273 return Status; 274 break; 275 } 276 } 277 278 MI.eraseFromParent(); 279 return Legalized; 280 } 281 282 LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, 283 unsigned TypeIdx, 284 LLT NarrowTy) { 285 // FIXME: Don't know how to handle secondary types yet. 286 if (TypeIdx != 0 && MI.getOpcode() != TargetOpcode::G_EXTRACT) 287 return UnableToLegalize; 288 289 MIRBuilder.setInstr(MI); 290 291 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 292 uint64_t NarrowSize = NarrowTy.getSizeInBits(); 293 294 switch (MI.getOpcode()) { 295 default: 296 return UnableToLegalize; 297 case TargetOpcode::G_IMPLICIT_DEF: { 298 // FIXME: add support for when SizeOp0 isn't an exact multiple of 299 // NarrowSize. 300 if (SizeOp0 % NarrowSize != 0) 301 return UnableToLegalize; 302 int NumParts = SizeOp0 / NarrowSize; 303 304 SmallVector<unsigned, 2> DstRegs; 305 for (int i = 0; i < NumParts; ++i) 306 DstRegs.push_back( 307 MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg()); 308 309 unsigned DstReg = MI.getOperand(0).getReg(); 310 if(MRI.getType(DstReg).isVector()) 311 MIRBuilder.buildBuildVector(DstReg, DstRegs); 312 else 313 MIRBuilder.buildMerge(DstReg, DstRegs); 314 MI.eraseFromParent(); 315 return Legalized; 316 } 317 case TargetOpcode::G_ADD: { 318 // FIXME: add support for when SizeOp0 isn't an exact multiple of 319 // NarrowSize. 320 if (SizeOp0 % NarrowSize != 0) 321 return UnableToLegalize; 322 // Expand in terms of carry-setting/consuming G_ADDE instructions. 323 int NumParts = SizeOp0 / NarrowTy.getSizeInBits(); 324 325 SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs; 326 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); 327 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); 328 329 unsigned CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1)); 330 MIRBuilder.buildConstant(CarryIn, 0); 331 332 for (int i = 0; i < NumParts; ++i) { 333 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 334 unsigned CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); 335 336 MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i], 337 Src2Regs[i], CarryIn); 338 339 DstRegs.push_back(DstReg); 340 CarryIn = CarryOut; 341 } 342 unsigned DstReg = MI.getOperand(0).getReg(); 343 if(MRI.getType(DstReg).isVector()) 344 MIRBuilder.buildBuildVector(DstReg, DstRegs); 345 else 346 MIRBuilder.buildMerge(DstReg, DstRegs); 347 MI.eraseFromParent(); 348 return Legalized; 349 } 350 case TargetOpcode::G_EXTRACT: { 351 if (TypeIdx != 1) 352 return UnableToLegalize; 353 354 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 355 // FIXME: add support for when SizeOp1 isn't an exact multiple of 356 // NarrowSize. 357 if (SizeOp1 % NarrowSize != 0) 358 return UnableToLegalize; 359 int NumParts = SizeOp1 / NarrowSize; 360 361 SmallVector<unsigned, 2> SrcRegs, DstRegs; 362 SmallVector<uint64_t, 2> Indexes; 363 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); 364 365 unsigned OpReg = MI.getOperand(0).getReg(); 366 uint64_t OpStart = MI.getOperand(2).getImm(); 367 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); 368 for (int i = 0; i < NumParts; ++i) { 369 unsigned SrcStart = i * NarrowSize; 370 371 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) { 372 // No part of the extract uses this subregister, ignore it. 373 continue; 374 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) { 375 // The entire subregister is extracted, forward the value. 376 DstRegs.push_back(SrcRegs[i]); 377 continue; 378 } 379 380 // OpSegStart is where this destination segment would start in OpReg if it 381 // extended infinitely in both directions. 382 int64_t ExtractOffset; 383 uint64_t SegSize; 384 if (OpStart < SrcStart) { 385 ExtractOffset = 0; 386 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart); 387 } else { 388 ExtractOffset = OpStart - SrcStart; 389 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize); 390 } 391 392 unsigned SegReg = SrcRegs[i]; 393 if (ExtractOffset != 0 || SegSize != NarrowSize) { 394 // A genuine extract is needed. 395 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); 396 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset); 397 } 398 399 DstRegs.push_back(SegReg); 400 } 401 402 unsigned DstReg = MI.getOperand(0).getReg(); 403 if(MRI.getType(DstReg).isVector()) 404 MIRBuilder.buildBuildVector(DstReg, DstRegs); 405 else 406 MIRBuilder.buildMerge(DstReg, DstRegs); 407 MI.eraseFromParent(); 408 return Legalized; 409 } 410 case TargetOpcode::G_INSERT: { 411 // FIXME: add support for when SizeOp0 isn't an exact multiple of 412 // NarrowSize. 413 if (SizeOp0 % NarrowSize != 0) 414 return UnableToLegalize; 415 416 int NumParts = SizeOp0 / NarrowSize; 417 418 SmallVector<unsigned, 2> SrcRegs, DstRegs; 419 SmallVector<uint64_t, 2> Indexes; 420 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); 421 422 unsigned OpReg = MI.getOperand(2).getReg(); 423 uint64_t OpStart = MI.getOperand(3).getImm(); 424 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); 425 for (int i = 0; i < NumParts; ++i) { 426 unsigned DstStart = i * NarrowSize; 427 428 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) { 429 // No part of the insert affects this subregister, forward the original. 430 DstRegs.push_back(SrcRegs[i]); 431 continue; 432 } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) { 433 // The entire subregister is defined by this insert, forward the new 434 // value. 435 DstRegs.push_back(OpReg); 436 continue; 437 } 438 439 // OpSegStart is where this destination segment would start in OpReg if it 440 // extended infinitely in both directions. 441 int64_t ExtractOffset, InsertOffset; 442 uint64_t SegSize; 443 if (OpStart < DstStart) { 444 InsertOffset = 0; 445 ExtractOffset = DstStart - OpStart; 446 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart); 447 } else { 448 InsertOffset = OpStart - DstStart; 449 ExtractOffset = 0; 450 SegSize = 451 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart); 452 } 453 454 unsigned SegReg = OpReg; 455 if (ExtractOffset != 0 || SegSize != OpSize) { 456 // A genuine extract is needed. 457 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); 458 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset); 459 } 460 461 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 462 MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset); 463 DstRegs.push_back(DstReg); 464 } 465 466 assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered"); 467 unsigned DstReg = MI.getOperand(0).getReg(); 468 if(MRI.getType(DstReg).isVector()) 469 MIRBuilder.buildBuildVector(DstReg, DstRegs); 470 else 471 MIRBuilder.buildMerge(DstReg, DstRegs); 472 MI.eraseFromParent(); 473 return Legalized; 474 } 475 case TargetOpcode::G_LOAD: { 476 // FIXME: add support for when SizeOp0 isn't an exact multiple of 477 // NarrowSize. 478 if (SizeOp0 % NarrowSize != 0) 479 return UnableToLegalize; 480 481 const auto &MMO = **MI.memoperands_begin(); 482 // This implementation doesn't work for atomics. Give up instead of doing 483 // something invalid. 484 if (MMO.getOrdering() != AtomicOrdering::NotAtomic || 485 MMO.getFailureOrdering() != AtomicOrdering::NotAtomic) 486 return UnableToLegalize; 487 488 int NumParts = SizeOp0 / NarrowSize; 489 LLT OffsetTy = LLT::scalar( 490 MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()); 491 492 SmallVector<unsigned, 2> DstRegs; 493 for (int i = 0; i < NumParts; ++i) { 494 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 495 unsigned SrcReg = 0; 496 unsigned Adjustment = i * NarrowSize / 8; 497 unsigned Alignment = MinAlign(MMO.getAlignment(), Adjustment); 498 499 MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand( 500 MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(), 501 NarrowSize / 8, Alignment, MMO.getAAInfo(), MMO.getRanges(), 502 MMO.getSyncScopeID(), MMO.getOrdering(), MMO.getFailureOrdering()); 503 504 MIRBuilder.materializeGEP(SrcReg, MI.getOperand(1).getReg(), OffsetTy, 505 Adjustment); 506 507 MIRBuilder.buildLoad(DstReg, SrcReg, *SplitMMO); 508 509 DstRegs.push_back(DstReg); 510 } 511 unsigned DstReg = MI.getOperand(0).getReg(); 512 if(MRI.getType(DstReg).isVector()) 513 MIRBuilder.buildBuildVector(DstReg, DstRegs); 514 else 515 MIRBuilder.buildMerge(DstReg, DstRegs); 516 MI.eraseFromParent(); 517 return Legalized; 518 } 519 case TargetOpcode::G_STORE: { 520 // FIXME: add support for when SizeOp0 isn't an exact multiple of 521 // NarrowSize. 522 if (SizeOp0 % NarrowSize != 0) 523 return UnableToLegalize; 524 525 const auto &MMO = **MI.memoperands_begin(); 526 // This implementation doesn't work for atomics. Give up instead of doing 527 // something invalid. 528 if (MMO.getOrdering() != AtomicOrdering::NotAtomic || 529 MMO.getFailureOrdering() != AtomicOrdering::NotAtomic) 530 return UnableToLegalize; 531 532 int NumParts = SizeOp0 / NarrowSize; 533 LLT OffsetTy = LLT::scalar( 534 MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()); 535 536 SmallVector<unsigned, 2> SrcRegs; 537 extractParts(MI.getOperand(0).getReg(), NarrowTy, NumParts, SrcRegs); 538 539 for (int i = 0; i < NumParts; ++i) { 540 unsigned DstReg = 0; 541 unsigned Adjustment = i * NarrowSize / 8; 542 unsigned Alignment = MinAlign(MMO.getAlignment(), Adjustment); 543 544 MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand( 545 MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(), 546 NarrowSize / 8, Alignment, MMO.getAAInfo(), MMO.getRanges(), 547 MMO.getSyncScopeID(), MMO.getOrdering(), MMO.getFailureOrdering()); 548 549 MIRBuilder.materializeGEP(DstReg, MI.getOperand(1).getReg(), OffsetTy, 550 Adjustment); 551 552 MIRBuilder.buildStore(SrcRegs[i], DstReg, *SplitMMO); 553 } 554 MI.eraseFromParent(); 555 return Legalized; 556 } 557 case TargetOpcode::G_CONSTANT: { 558 // FIXME: add support for when SizeOp0 isn't an exact multiple of 559 // NarrowSize. 560 if (SizeOp0 % NarrowSize != 0) 561 return UnableToLegalize; 562 int NumParts = SizeOp0 / NarrowSize; 563 const APInt &Cst = MI.getOperand(1).getCImm()->getValue(); 564 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 565 566 SmallVector<unsigned, 2> DstRegs; 567 for (int i = 0; i < NumParts; ++i) { 568 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 569 ConstantInt *CI = 570 ConstantInt::get(Ctx, Cst.lshr(NarrowSize * i).trunc(NarrowSize)); 571 MIRBuilder.buildConstant(DstReg, *CI); 572 DstRegs.push_back(DstReg); 573 } 574 unsigned DstReg = MI.getOperand(0).getReg(); 575 if(MRI.getType(DstReg).isVector()) 576 MIRBuilder.buildBuildVector(DstReg, DstRegs); 577 else 578 MIRBuilder.buildMerge(DstReg, DstRegs); 579 MI.eraseFromParent(); 580 return Legalized; 581 } 582 case TargetOpcode::G_AND: 583 case TargetOpcode::G_OR: 584 case TargetOpcode::G_XOR: { 585 // Legalize bitwise operation: 586 // A = BinOp<Ty> B, C 587 // into: 588 // B1, ..., BN = G_UNMERGE_VALUES B 589 // C1, ..., CN = G_UNMERGE_VALUES C 590 // A1 = BinOp<Ty/N> B1, C2 591 // ... 592 // AN = BinOp<Ty/N> BN, CN 593 // A = G_MERGE_VALUES A1, ..., AN 594 595 // FIXME: add support for when SizeOp0 isn't an exact multiple of 596 // NarrowSize. 597 if (SizeOp0 % NarrowSize != 0) 598 return UnableToLegalize; 599 int NumParts = SizeOp0 / NarrowSize; 600 601 // List the registers where the destination will be scattered. 602 SmallVector<unsigned, 2> DstRegs; 603 // List the registers where the first argument will be split. 604 SmallVector<unsigned, 2> SrcsReg1; 605 // List the registers where the second argument will be split. 606 SmallVector<unsigned, 2> SrcsReg2; 607 // Create all the temporary registers. 608 for (int i = 0; i < NumParts; ++i) { 609 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 610 unsigned SrcReg1 = MRI.createGenericVirtualRegister(NarrowTy); 611 unsigned SrcReg2 = MRI.createGenericVirtualRegister(NarrowTy); 612 613 DstRegs.push_back(DstReg); 614 SrcsReg1.push_back(SrcReg1); 615 SrcsReg2.push_back(SrcReg2); 616 } 617 // Explode the big arguments into smaller chunks. 618 MIRBuilder.buildUnmerge(SrcsReg1, MI.getOperand(1).getReg()); 619 MIRBuilder.buildUnmerge(SrcsReg2, MI.getOperand(2).getReg()); 620 621 // Do the operation on each small part. 622 for (int i = 0; i < NumParts; ++i) 623 MIRBuilder.buildInstr(MI.getOpcode(), {DstRegs[i]}, 624 {SrcsReg1[i], SrcsReg2[i]}); 625 626 // Gather the destination registers into the final destination. 627 unsigned DstReg = MI.getOperand(0).getReg(); 628 if(MRI.getType(DstReg).isVector()) 629 MIRBuilder.buildBuildVector(DstReg, DstRegs); 630 else 631 MIRBuilder.buildMerge(DstReg, DstRegs); 632 MI.eraseFromParent(); 633 return Legalized; 634 } 635 } 636 } 637 638 void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy, 639 unsigned OpIdx, unsigned ExtOpcode) { 640 MachineOperand &MO = MI.getOperand(OpIdx); 641 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO.getReg()}); 642 MO.setReg(ExtB->getOperand(0).getReg()); 643 } 644 645 void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy, 646 unsigned OpIdx, unsigned TruncOpcode) { 647 MachineOperand &MO = MI.getOperand(OpIdx); 648 unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); 649 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 650 MIRBuilder.buildInstr(TruncOpcode, {MO.getReg()}, {DstExt}); 651 MO.setReg(DstExt); 652 } 653 654 LegalizerHelper::LegalizeResult 655 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { 656 MIRBuilder.setInstr(MI); 657 658 switch (MI.getOpcode()) { 659 default: 660 return UnableToLegalize; 661 case TargetOpcode::G_UADDO: 662 case TargetOpcode::G_USUBO: { 663 if (TypeIdx == 1) 664 return UnableToLegalize; // TODO 665 auto LHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy}, 666 {MI.getOperand(2).getReg()}); 667 auto RHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy}, 668 {MI.getOperand(3).getReg()}); 669 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO 670 ? TargetOpcode::G_ADD 671 : TargetOpcode::G_SUB; 672 // Do the arithmetic in the larger type. 673 auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext}); 674 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg()); 675 APInt Mask = APInt::getAllOnesValue(OrigTy.getSizeInBits()); 676 auto AndOp = MIRBuilder.buildInstr( 677 TargetOpcode::G_AND, {WideTy}, 678 {NewOp, MIRBuilder.buildConstant(WideTy, Mask.getZExtValue())}); 679 // There is no overflow if the AndOp is the same as NewOp. 680 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1).getReg(), NewOp, 681 AndOp); 682 // Now trunc the NewOp to the original result. 683 MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), NewOp); 684 MI.eraseFromParent(); 685 return Legalized; 686 } 687 case TargetOpcode::G_CTTZ: 688 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 689 case TargetOpcode::G_CTLZ: 690 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 691 case TargetOpcode::G_CTPOP: { 692 // First ZEXT the input. 693 auto MIBSrc = MIRBuilder.buildZExt(WideTy, MI.getOperand(1).getReg()); 694 LLT CurTy = MRI.getType(MI.getOperand(0).getReg()); 695 if (MI.getOpcode() == TargetOpcode::G_CTTZ) { 696 // The count is the same in the larger type except if the original 697 // value was zero. This can be handled by setting the bit just off 698 // the top of the original type. 699 auto TopBit = 700 APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits()); 701 MIBSrc = MIRBuilder.buildInstr( 702 TargetOpcode::G_OR, {WideTy}, 703 {MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit.getSExtValue())}); 704 } 705 // Perform the operation at the larger size. 706 auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc}); 707 // This is already the correct result for CTPOP and CTTZs 708 if (MI.getOpcode() == TargetOpcode::G_CTLZ || 709 MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) { 710 // The correct result is NewOp - (Difference in widety and current ty). 711 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits(); 712 MIBNewOp = MIRBuilder.buildInstr( 713 TargetOpcode::G_SUB, {WideTy}, 714 {MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)}); 715 } 716 auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); 717 // Make the original instruction a trunc now, and update its source. 718 Observer.changingInstr(MI); 719 MI.setDesc(TII.get(TargetOpcode::G_TRUNC)); 720 MI.getOperand(1).setReg(MIBNewOp->getOperand(0).getReg()); 721 Observer.changedInstr(MI); 722 return Legalized; 723 } 724 725 case TargetOpcode::G_ADD: 726 case TargetOpcode::G_AND: 727 case TargetOpcode::G_MUL: 728 case TargetOpcode::G_OR: 729 case TargetOpcode::G_XOR: 730 case TargetOpcode::G_SUB: 731 // Perform operation at larger width (any extension is fine here, high bits 732 // don't affect the result) and then truncate the result back to the 733 // original type. 734 Observer.changingInstr(MI); 735 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 736 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); 737 widenScalarDst(MI, WideTy); 738 Observer.changedInstr(MI); 739 return Legalized; 740 741 case TargetOpcode::G_SHL: 742 Observer.changingInstr(MI); 743 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 744 // The "number of bits to shift" operand must preserve its value as an 745 // unsigned integer: 746 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 747 widenScalarDst(MI, WideTy); 748 Observer.changedInstr(MI); 749 return Legalized; 750 751 case TargetOpcode::G_SDIV: 752 case TargetOpcode::G_SREM: 753 Observer.changingInstr(MI); 754 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); 755 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 756 widenScalarDst(MI, WideTy); 757 Observer.changedInstr(MI); 758 return Legalized; 759 760 case TargetOpcode::G_ASHR: 761 Observer.changingInstr(MI); 762 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); 763 // The "number of bits to shift" operand must preserve its value as an 764 // unsigned integer: 765 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 766 widenScalarDst(MI, WideTy); 767 Observer.changedInstr(MI); 768 return Legalized; 769 770 case TargetOpcode::G_UDIV: 771 case TargetOpcode::G_UREM: 772 case TargetOpcode::G_LSHR: 773 Observer.changingInstr(MI); 774 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 775 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 776 widenScalarDst(MI, WideTy); 777 Observer.changedInstr(MI); 778 return Legalized; 779 780 case TargetOpcode::G_SELECT: 781 Observer.changingInstr(MI); 782 if (TypeIdx == 0) { 783 // Perform operation at larger width (any extension is fine here, high 784 // bits don't affect the result) and then truncate the result back to the 785 // original type. 786 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); 787 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT); 788 widenScalarDst(MI, WideTy); 789 } else { 790 // Explicit extension is required here since high bits affect the result. 791 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 792 } 793 Observer.changedInstr(MI); 794 return Legalized; 795 796 case TargetOpcode::G_FPTOSI: 797 case TargetOpcode::G_FPTOUI: 798 if (TypeIdx != 0) 799 return UnableToLegalize; 800 Observer.changingInstr(MI); 801 widenScalarDst(MI, WideTy); 802 Observer.changedInstr(MI); 803 return Legalized; 804 805 case TargetOpcode::G_SITOFP: 806 if (TypeIdx != 1) 807 return UnableToLegalize; 808 Observer.changingInstr(MI); 809 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); 810 Observer.changedInstr(MI); 811 return Legalized; 812 813 case TargetOpcode::G_UITOFP: 814 if (TypeIdx != 1) 815 return UnableToLegalize; 816 Observer.changingInstr(MI); 817 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 818 Observer.changedInstr(MI); 819 return Legalized; 820 821 case TargetOpcode::G_INSERT: 822 if (TypeIdx != 0) 823 return UnableToLegalize; 824 Observer.changingInstr(MI); 825 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 826 widenScalarDst(MI, WideTy); 827 Observer.changedInstr(MI); 828 return Legalized; 829 830 case TargetOpcode::G_LOAD: 831 // For some types like i24, we might try to widen to i32. To properly handle 832 // this we should be using a dedicated extending load, until then avoid 833 // trying to legalize. 834 if (alignTo(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(), 8) != 835 WideTy.getSizeInBits()) 836 return UnableToLegalize; 837 LLVM_FALLTHROUGH; 838 case TargetOpcode::G_SEXTLOAD: 839 case TargetOpcode::G_ZEXTLOAD: 840 Observer.changingInstr(MI); 841 widenScalarDst(MI, WideTy); 842 Observer.changedInstr(MI); 843 return Legalized; 844 845 case TargetOpcode::G_STORE: { 846 if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(1) || 847 WideTy != LLT::scalar(8)) 848 return UnableToLegalize; 849 850 Observer.changingInstr(MI); 851 widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ZEXT); 852 Observer.changedInstr(MI); 853 return Legalized; 854 } 855 case TargetOpcode::G_CONSTANT: { 856 MachineOperand &SrcMO = MI.getOperand(1); 857 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 858 const APInt &Val = SrcMO.getCImm()->getValue().sext(WideTy.getSizeInBits()); 859 Observer.changingInstr(MI); 860 SrcMO.setCImm(ConstantInt::get(Ctx, Val)); 861 862 widenScalarDst(MI, WideTy); 863 Observer.changedInstr(MI); 864 return Legalized; 865 } 866 case TargetOpcode::G_FCONSTANT: { 867 MachineOperand &SrcMO = MI.getOperand(1); 868 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 869 APFloat Val = SrcMO.getFPImm()->getValueAPF(); 870 bool LosesInfo; 871 switch (WideTy.getSizeInBits()) { 872 case 32: 873 Val.convert(APFloat::IEEEsingle(), APFloat::rmTowardZero, &LosesInfo); 874 break; 875 case 64: 876 Val.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &LosesInfo); 877 break; 878 default: 879 llvm_unreachable("Unhandled fp widen type"); 880 } 881 Observer.changingInstr(MI); 882 SrcMO.setFPImm(ConstantFP::get(Ctx, Val)); 883 884 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); 885 Observer.changedInstr(MI); 886 return Legalized; 887 } 888 case TargetOpcode::G_IMPLICIT_DEF: { 889 Observer.changingInstr(MI); 890 widenScalarDst(MI, WideTy); 891 Observer.changedInstr(MI); 892 return Legalized; 893 } 894 case TargetOpcode::G_BRCOND: 895 Observer.changingInstr(MI); 896 widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT); 897 Observer.changedInstr(MI); 898 return Legalized; 899 900 case TargetOpcode::G_FCMP: 901 Observer.changingInstr(MI); 902 if (TypeIdx == 0) 903 widenScalarDst(MI, WideTy); 904 else { 905 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT); 906 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT); 907 } 908 Observer.changedInstr(MI); 909 return Legalized; 910 911 case TargetOpcode::G_ICMP: 912 Observer.changingInstr(MI); 913 if (TypeIdx == 0) 914 widenScalarDst(MI, WideTy); 915 else { 916 unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>( 917 MI.getOperand(1).getPredicate())) 918 ? TargetOpcode::G_SEXT 919 : TargetOpcode::G_ZEXT; 920 widenScalarSrc(MI, WideTy, 2, ExtOpcode); 921 widenScalarSrc(MI, WideTy, 3, ExtOpcode); 922 } 923 Observer.changedInstr(MI); 924 return Legalized; 925 926 case TargetOpcode::G_GEP: 927 assert(TypeIdx == 1 && "unable to legalize pointer of GEP"); 928 Observer.changingInstr(MI); 929 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 930 Observer.changedInstr(MI); 931 return Legalized; 932 933 case TargetOpcode::G_PHI: { 934 assert(TypeIdx == 0 && "Expecting only Idx 0"); 935 936 Observer.changingInstr(MI); 937 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) { 938 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB(); 939 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); 940 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT); 941 } 942 943 MachineBasicBlock &MBB = *MI.getParent(); 944 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI()); 945 widenScalarDst(MI, WideTy); 946 Observer.changedInstr(MI); 947 return Legalized; 948 } 949 case TargetOpcode::G_EXTRACT_VECTOR_ELT: 950 if (TypeIdx != 2) 951 return UnableToLegalize; 952 Observer.changingInstr(MI); 953 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 954 Observer.changedInstr(MI); 955 return Legalized; 956 case TargetOpcode::G_FADD: 957 case TargetOpcode::G_FMUL: 958 case TargetOpcode::G_FSUB: 959 case TargetOpcode::G_FMA: 960 case TargetOpcode::G_FNEG: 961 case TargetOpcode::G_FABS: 962 case TargetOpcode::G_FDIV: 963 case TargetOpcode::G_FREM: 964 case TargetOpcode::G_FCEIL: 965 assert(TypeIdx == 0); 966 Observer.changingInstr(MI); 967 968 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) 969 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT); 970 971 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); 972 Observer.changedInstr(MI); 973 return Legalized; 974 } 975 } 976 977 LegalizerHelper::LegalizeResult 978 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { 979 using namespace TargetOpcode; 980 MIRBuilder.setInstr(MI); 981 982 switch(MI.getOpcode()) { 983 default: 984 return UnableToLegalize; 985 case TargetOpcode::G_SREM: 986 case TargetOpcode::G_UREM: { 987 unsigned QuotReg = MRI.createGenericVirtualRegister(Ty); 988 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV) 989 .addDef(QuotReg) 990 .addUse(MI.getOperand(1).getReg()) 991 .addUse(MI.getOperand(2).getReg()); 992 993 unsigned ProdReg = MRI.createGenericVirtualRegister(Ty); 994 MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg()); 995 MIRBuilder.buildSub(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(), 996 ProdReg); 997 MI.eraseFromParent(); 998 return Legalized; 999 } 1000 case TargetOpcode::G_SMULO: 1001 case TargetOpcode::G_UMULO: { 1002 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the 1003 // result. 1004 unsigned Res = MI.getOperand(0).getReg(); 1005 unsigned Overflow = MI.getOperand(1).getReg(); 1006 unsigned LHS = MI.getOperand(2).getReg(); 1007 unsigned RHS = MI.getOperand(3).getReg(); 1008 1009 MIRBuilder.buildMul(Res, LHS, RHS); 1010 1011 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO 1012 ? TargetOpcode::G_SMULH 1013 : TargetOpcode::G_UMULH; 1014 1015 unsigned HiPart = MRI.createGenericVirtualRegister(Ty); 1016 MIRBuilder.buildInstr(Opcode) 1017 .addDef(HiPart) 1018 .addUse(LHS) 1019 .addUse(RHS); 1020 1021 unsigned Zero = MRI.createGenericVirtualRegister(Ty); 1022 MIRBuilder.buildConstant(Zero, 0); 1023 1024 // For *signed* multiply, overflow is detected by checking: 1025 // (hi != (lo >> bitwidth-1)) 1026 if (Opcode == TargetOpcode::G_SMULH) { 1027 unsigned Shifted = MRI.createGenericVirtualRegister(Ty); 1028 unsigned ShiftAmt = MRI.createGenericVirtualRegister(Ty); 1029 MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1); 1030 MIRBuilder.buildInstr(TargetOpcode::G_ASHR) 1031 .addDef(Shifted) 1032 .addUse(Res) 1033 .addUse(ShiftAmt); 1034 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted); 1035 } else { 1036 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero); 1037 } 1038 MI.eraseFromParent(); 1039 return Legalized; 1040 } 1041 case TargetOpcode::G_FNEG: { 1042 // TODO: Handle vector types once we are able to 1043 // represent them. 1044 if (Ty.isVector()) 1045 return UnableToLegalize; 1046 unsigned Res = MI.getOperand(0).getReg(); 1047 Type *ZeroTy; 1048 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 1049 switch (Ty.getSizeInBits()) { 1050 case 16: 1051 ZeroTy = Type::getHalfTy(Ctx); 1052 break; 1053 case 32: 1054 ZeroTy = Type::getFloatTy(Ctx); 1055 break; 1056 case 64: 1057 ZeroTy = Type::getDoubleTy(Ctx); 1058 break; 1059 case 128: 1060 ZeroTy = Type::getFP128Ty(Ctx); 1061 break; 1062 default: 1063 llvm_unreachable("unexpected floating-point type"); 1064 } 1065 ConstantFP &ZeroForNegation = 1066 *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy)); 1067 auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation); 1068 MIRBuilder.buildInstr(TargetOpcode::G_FSUB) 1069 .addDef(Res) 1070 .addUse(Zero->getOperand(0).getReg()) 1071 .addUse(MI.getOperand(1).getReg()); 1072 MI.eraseFromParent(); 1073 return Legalized; 1074 } 1075 case TargetOpcode::G_FSUB: { 1076 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)). 1077 // First, check if G_FNEG is marked as Lower. If so, we may 1078 // end up with an infinite loop as G_FSUB is used to legalize G_FNEG. 1079 if (LI.getAction({G_FNEG, {Ty}}).Action == Lower) 1080 return UnableToLegalize; 1081 unsigned Res = MI.getOperand(0).getReg(); 1082 unsigned LHS = MI.getOperand(1).getReg(); 1083 unsigned RHS = MI.getOperand(2).getReg(); 1084 unsigned Neg = MRI.createGenericVirtualRegister(Ty); 1085 MIRBuilder.buildInstr(TargetOpcode::G_FNEG).addDef(Neg).addUse(RHS); 1086 MIRBuilder.buildInstr(TargetOpcode::G_FADD) 1087 .addDef(Res) 1088 .addUse(LHS) 1089 .addUse(Neg); 1090 MI.eraseFromParent(); 1091 return Legalized; 1092 } 1093 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: { 1094 unsigned OldValRes = MI.getOperand(0).getReg(); 1095 unsigned SuccessRes = MI.getOperand(1).getReg(); 1096 unsigned Addr = MI.getOperand(2).getReg(); 1097 unsigned CmpVal = MI.getOperand(3).getReg(); 1098 unsigned NewVal = MI.getOperand(4).getReg(); 1099 MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal, 1100 **MI.memoperands_begin()); 1101 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal); 1102 MI.eraseFromParent(); 1103 return Legalized; 1104 } 1105 case TargetOpcode::G_LOAD: 1106 case TargetOpcode::G_SEXTLOAD: 1107 case TargetOpcode::G_ZEXTLOAD: { 1108 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT 1109 unsigned DstReg = MI.getOperand(0).getReg(); 1110 unsigned PtrReg = MI.getOperand(1).getReg(); 1111 LLT DstTy = MRI.getType(DstReg); 1112 auto &MMO = **MI.memoperands_begin(); 1113 1114 if (DstTy.getSizeInBits() == MMO.getSize() /* in bytes */ * 8) { 1115 // In the case of G_LOAD, this was a non-extending load already and we're 1116 // about to lower to the same instruction. 1117 if (MI.getOpcode() == TargetOpcode::G_LOAD) 1118 return UnableToLegalize; 1119 MIRBuilder.buildLoad(DstReg, PtrReg, MMO); 1120 MI.eraseFromParent(); 1121 return Legalized; 1122 } 1123 1124 if (DstTy.isScalar()) { 1125 unsigned TmpReg = MRI.createGenericVirtualRegister( 1126 LLT::scalar(MMO.getSize() /* in bytes */ * 8)); 1127 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); 1128 switch (MI.getOpcode()) { 1129 default: 1130 llvm_unreachable("Unexpected opcode"); 1131 case TargetOpcode::G_LOAD: 1132 MIRBuilder.buildAnyExt(DstReg, TmpReg); 1133 break; 1134 case TargetOpcode::G_SEXTLOAD: 1135 MIRBuilder.buildSExt(DstReg, TmpReg); 1136 break; 1137 case TargetOpcode::G_ZEXTLOAD: 1138 MIRBuilder.buildZExt(DstReg, TmpReg); 1139 break; 1140 } 1141 MI.eraseFromParent(); 1142 return Legalized; 1143 } 1144 1145 return UnableToLegalize; 1146 } 1147 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 1148 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 1149 case TargetOpcode::G_CTLZ: 1150 case TargetOpcode::G_CTTZ: 1151 case TargetOpcode::G_CTPOP: 1152 return lowerBitCount(MI, TypeIdx, Ty); 1153 case G_UADDE: { 1154 unsigned Res = MI.getOperand(0).getReg(); 1155 unsigned CarryOut = MI.getOperand(1).getReg(); 1156 unsigned LHS = MI.getOperand(2).getReg(); 1157 unsigned RHS = MI.getOperand(3).getReg(); 1158 unsigned CarryIn = MI.getOperand(4).getReg(); 1159 1160 unsigned TmpRes = MRI.createGenericVirtualRegister(Ty); 1161 unsigned ZExtCarryIn = MRI.createGenericVirtualRegister(Ty); 1162 1163 MIRBuilder.buildAdd(TmpRes, LHS, RHS); 1164 MIRBuilder.buildZExt(ZExtCarryIn, CarryIn); 1165 MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn); 1166 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS); 1167 1168 MI.eraseFromParent(); 1169 return Legalized; 1170 } 1171 } 1172 } 1173 1174 LegalizerHelper::LegalizeResult 1175 LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, 1176 LLT NarrowTy) { 1177 // FIXME: Don't know how to handle secondary types yet. 1178 if (TypeIdx != 0) 1179 return UnableToLegalize; 1180 1181 MIRBuilder.setInstr(MI); 1182 switch (MI.getOpcode()) { 1183 default: 1184 return UnableToLegalize; 1185 case TargetOpcode::G_IMPLICIT_DEF: { 1186 SmallVector<unsigned, 2> DstRegs; 1187 1188 unsigned NarrowSize = NarrowTy.getSizeInBits(); 1189 unsigned DstReg = MI.getOperand(0).getReg(); 1190 unsigned Size = MRI.getType(DstReg).getSizeInBits(); 1191 int NumParts = Size / NarrowSize; 1192 // FIXME: Don't know how to handle the situation where the small vectors 1193 // aren't all the same size yet. 1194 if (Size % NarrowSize != 0) 1195 return UnableToLegalize; 1196 1197 for (int i = 0; i < NumParts; ++i) { 1198 unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy); 1199 MIRBuilder.buildUndef(TmpReg); 1200 DstRegs.push_back(TmpReg); 1201 } 1202 1203 if (NarrowTy.isVector()) 1204 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 1205 else 1206 MIRBuilder.buildBuildVector(DstReg, DstRegs); 1207 1208 MI.eraseFromParent(); 1209 return Legalized; 1210 } 1211 case TargetOpcode::G_ADD: { 1212 unsigned NarrowSize = NarrowTy.getSizeInBits(); 1213 unsigned DstReg = MI.getOperand(0).getReg(); 1214 unsigned Size = MRI.getType(DstReg).getSizeInBits(); 1215 int NumParts = Size / NarrowSize; 1216 // FIXME: Don't know how to handle the situation where the small vectors 1217 // aren't all the same size yet. 1218 if (Size % NarrowSize != 0) 1219 return UnableToLegalize; 1220 1221 SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs; 1222 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); 1223 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); 1224 1225 for (int i = 0; i < NumParts; ++i) { 1226 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 1227 MIRBuilder.buildAdd(DstReg, Src1Regs[i], Src2Regs[i]); 1228 DstRegs.push_back(DstReg); 1229 } 1230 1231 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 1232 MI.eraseFromParent(); 1233 return Legalized; 1234 } 1235 case TargetOpcode::G_LOAD: 1236 case TargetOpcode::G_STORE: { 1237 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD; 1238 unsigned ValReg = MI.getOperand(0).getReg(); 1239 unsigned AddrReg = MI.getOperand(1).getReg(); 1240 unsigned NarrowSize = NarrowTy.getSizeInBits(); 1241 unsigned Size = MRI.getType(ValReg).getSizeInBits(); 1242 unsigned NumParts = Size / NarrowSize; 1243 1244 SmallVector<unsigned, 8> NarrowRegs; 1245 if (!IsLoad) 1246 extractParts(ValReg, NarrowTy, NumParts, NarrowRegs); 1247 1248 const LLT OffsetTy = 1249 LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits()); 1250 MachineFunction &MF = *MI.getMF(); 1251 MachineMemOperand *MMO = *MI.memoperands_begin(); 1252 for (unsigned Idx = 0; Idx < NumParts; ++Idx) { 1253 unsigned Adjustment = Idx * NarrowTy.getSizeInBits() / 8; 1254 unsigned Alignment = MinAlign(MMO->getAlignment(), Adjustment); 1255 unsigned NewAddrReg = 0; 1256 MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, Adjustment); 1257 MachineMemOperand &NewMMO = *MF.getMachineMemOperand( 1258 MMO->getPointerInfo().getWithOffset(Adjustment), MMO->getFlags(), 1259 NarrowTy.getSizeInBits() / 8, Alignment); 1260 if (IsLoad) { 1261 unsigned Dst = MRI.createGenericVirtualRegister(NarrowTy); 1262 NarrowRegs.push_back(Dst); 1263 MIRBuilder.buildLoad(Dst, NewAddrReg, NewMMO); 1264 } else { 1265 MIRBuilder.buildStore(NarrowRegs[Idx], NewAddrReg, NewMMO); 1266 } 1267 } 1268 if (IsLoad) { 1269 if (NarrowTy.isVector()) 1270 MIRBuilder.buildConcatVectors(ValReg, NarrowRegs); 1271 else 1272 MIRBuilder.buildBuildVector(ValReg, NarrowRegs); 1273 } 1274 MI.eraseFromParent(); 1275 return Legalized; 1276 } 1277 } 1278 } 1279 1280 LegalizerHelper::LegalizeResult 1281 LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { 1282 unsigned Opc = MI.getOpcode(); 1283 auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); 1284 auto isSupported = [this](const LegalityQuery &Q) { 1285 auto QAction = LI.getAction(Q).Action; 1286 return QAction == Legal || QAction == Libcall || QAction == Custom; 1287 }; 1288 switch (Opc) { 1289 default: 1290 return UnableToLegalize; 1291 case TargetOpcode::G_CTLZ_ZERO_UNDEF: { 1292 // This trivially expands to CTLZ. 1293 Observer.changingInstr(MI); 1294 MI.setDesc(TII.get(TargetOpcode::G_CTLZ)); 1295 Observer.changedInstr(MI); 1296 return Legalized; 1297 } 1298 case TargetOpcode::G_CTLZ: { 1299 unsigned SrcReg = MI.getOperand(1).getReg(); 1300 unsigned Len = Ty.getSizeInBits(); 1301 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty}})) { 1302 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero. 1303 auto MIBCtlzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF, 1304 {Ty}, {SrcReg}); 1305 auto MIBZero = MIRBuilder.buildConstant(Ty, 0); 1306 auto MIBLen = MIRBuilder.buildConstant(Ty, Len); 1307 auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), 1308 SrcReg, MIBZero); 1309 MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen, 1310 MIBCtlzZU); 1311 MI.eraseFromParent(); 1312 return Legalized; 1313 } 1314 // for now, we do this: 1315 // NewLen = NextPowerOf2(Len); 1316 // x = x | (x >> 1); 1317 // x = x | (x >> 2); 1318 // ... 1319 // x = x | (x >>16); 1320 // x = x | (x >>32); // for 64-bit input 1321 // Upto NewLen/2 1322 // return Len - popcount(x); 1323 // 1324 // Ref: "Hacker's Delight" by Henry Warren 1325 unsigned Op = SrcReg; 1326 unsigned NewLen = PowerOf2Ceil(Len); 1327 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) { 1328 auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i); 1329 auto MIBOp = MIRBuilder.buildInstr( 1330 TargetOpcode::G_OR, {Ty}, 1331 {Op, MIRBuilder.buildInstr(TargetOpcode::G_LSHR, {Ty}, 1332 {Op, MIBShiftAmt})}); 1333 Op = MIBOp->getOperand(0).getReg(); 1334 } 1335 auto MIBPop = MIRBuilder.buildInstr(TargetOpcode::G_CTPOP, {Ty}, {Op}); 1336 MIRBuilder.buildInstr(TargetOpcode::G_SUB, {MI.getOperand(0).getReg()}, 1337 {MIRBuilder.buildConstant(Ty, Len), MIBPop}); 1338 MI.eraseFromParent(); 1339 return Legalized; 1340 } 1341 case TargetOpcode::G_CTTZ_ZERO_UNDEF: { 1342 // This trivially expands to CTTZ. 1343 Observer.changingInstr(MI); 1344 MI.setDesc(TII.get(TargetOpcode::G_CTTZ)); 1345 Observer.changedInstr(MI); 1346 return Legalized; 1347 } 1348 case TargetOpcode::G_CTTZ: { 1349 unsigned SrcReg = MI.getOperand(1).getReg(); 1350 unsigned Len = Ty.getSizeInBits(); 1351 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty}})) { 1352 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with 1353 // zero. 1354 auto MIBCttzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF, 1355 {Ty}, {SrcReg}); 1356 auto MIBZero = MIRBuilder.buildConstant(Ty, 0); 1357 auto MIBLen = MIRBuilder.buildConstant(Ty, Len); 1358 auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), 1359 SrcReg, MIBZero); 1360 MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen, 1361 MIBCttzZU); 1362 MI.eraseFromParent(); 1363 return Legalized; 1364 } 1365 // for now, we use: { return popcount(~x & (x - 1)); } 1366 // unless the target has ctlz but not ctpop, in which case we use: 1367 // { return 32 - nlz(~x & (x-1)); } 1368 // Ref: "Hacker's Delight" by Henry Warren 1369 auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1); 1370 auto MIBNot = 1371 MIRBuilder.buildInstr(TargetOpcode::G_XOR, {Ty}, {SrcReg, MIBCstNeg1}); 1372 auto MIBTmp = MIRBuilder.buildInstr( 1373 TargetOpcode::G_AND, {Ty}, 1374 {MIBNot, MIRBuilder.buildInstr(TargetOpcode::G_ADD, {Ty}, 1375 {SrcReg, MIBCstNeg1})}); 1376 if (!isSupported({TargetOpcode::G_CTPOP, {Ty}}) && 1377 isSupported({TargetOpcode::G_CTLZ, {Ty}})) { 1378 auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len); 1379 MIRBuilder.buildInstr( 1380 TargetOpcode::G_SUB, {MI.getOperand(0).getReg()}, 1381 {MIBCstLen, 1382 MIRBuilder.buildInstr(TargetOpcode::G_CTLZ, {Ty}, {MIBTmp})}); 1383 MI.eraseFromParent(); 1384 return Legalized; 1385 } 1386 MI.setDesc(TII.get(TargetOpcode::G_CTPOP)); 1387 MI.getOperand(1).setReg(MIBTmp->getOperand(0).getReg()); 1388 return Legalized; 1389 } 1390 } 1391 } 1392