1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file This file implements the LegalizerHelper class to legalize 10 /// individual instructions and the LegalizeMachineIR wrapper pass for the 11 /// primary legalization. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" 16 #include "llvm/CodeGen/GlobalISel/CallLowering.h" 17 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" 18 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" 19 #include "llvm/CodeGen/MachineRegisterInfo.h" 20 #include "llvm/CodeGen/TargetInstrInfo.h" 21 #include "llvm/CodeGen/TargetLowering.h" 22 #include "llvm/CodeGen/TargetSubtargetInfo.h" 23 #include "llvm/Support/Debug.h" 24 #include "llvm/Support/MathExtras.h" 25 #include "llvm/Support/raw_ostream.h" 26 27 #define DEBUG_TYPE "legalizer" 28 29 using namespace llvm; 30 using namespace LegalizeActions; 31 32 LegalizerHelper::LegalizerHelper(MachineFunction &MF, 33 GISelChangeObserver &Observer, 34 MachineIRBuilder &Builder) 35 : MIRBuilder(Builder), MRI(MF.getRegInfo()), 36 LI(*MF.getSubtarget().getLegalizerInfo()), Observer(Observer) { 37 MIRBuilder.setMF(MF); 38 MIRBuilder.setChangeObserver(Observer); 39 } 40 41 LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI, 42 GISelChangeObserver &Observer, 43 MachineIRBuilder &B) 44 : MIRBuilder(B), MRI(MF.getRegInfo()), LI(LI), Observer(Observer) { 45 MIRBuilder.setMF(MF); 46 MIRBuilder.setChangeObserver(Observer); 47 } 48 LegalizerHelper::LegalizeResult 49 LegalizerHelper::legalizeInstrStep(MachineInstr &MI) { 50 LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs())); 51 52 auto Step = LI.getAction(MI, MRI); 53 switch (Step.Action) { 54 case Legal: 55 LLVM_DEBUG(dbgs() << ".. Already legal\n"); 56 return AlreadyLegal; 57 case Libcall: 58 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n"); 59 return libcall(MI); 60 case NarrowScalar: 61 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n"); 62 return narrowScalar(MI, Step.TypeIdx, Step.NewType); 63 case WidenScalar: 64 LLVM_DEBUG(dbgs() << ".. Widen scalar\n"); 65 return widenScalar(MI, Step.TypeIdx, Step.NewType); 66 case Lower: 67 LLVM_DEBUG(dbgs() << ".. Lower\n"); 68 return lower(MI, Step.TypeIdx, Step.NewType); 69 case FewerElements: 70 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n"); 71 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType); 72 case Custom: 73 LLVM_DEBUG(dbgs() << ".. Custom legalization\n"); 74 return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized 75 : UnableToLegalize; 76 default: 77 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n"); 78 return UnableToLegalize; 79 } 80 } 81 82 void LegalizerHelper::extractParts(unsigned Reg, LLT Ty, int NumParts, 83 SmallVectorImpl<unsigned> &VRegs) { 84 for (int i = 0; i < NumParts; ++i) 85 VRegs.push_back(MRI.createGenericVirtualRegister(Ty)); 86 MIRBuilder.buildUnmerge(VRegs, Reg); 87 } 88 89 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { 90 switch (Opcode) { 91 case TargetOpcode::G_SDIV: 92 assert((Size == 32 || Size == 64) && "Unsupported size"); 93 return Size == 64 ? RTLIB::SDIV_I64 : RTLIB::SDIV_I32; 94 case TargetOpcode::G_UDIV: 95 assert((Size == 32 || Size == 64) && "Unsupported size"); 96 return Size == 64 ? RTLIB::UDIV_I64 : RTLIB::UDIV_I32; 97 case TargetOpcode::G_SREM: 98 assert((Size == 32 || Size == 64) && "Unsupported size"); 99 return Size == 64 ? RTLIB::SREM_I64 : RTLIB::SREM_I32; 100 case TargetOpcode::G_UREM: 101 assert((Size == 32 || Size == 64) && "Unsupported size"); 102 return Size == 64 ? RTLIB::UREM_I64 : RTLIB::UREM_I32; 103 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 104 assert(Size == 32 && "Unsupported size"); 105 return RTLIB::CTLZ_I32; 106 case TargetOpcode::G_FADD: 107 assert((Size == 32 || Size == 64) && "Unsupported size"); 108 return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32; 109 case TargetOpcode::G_FSUB: 110 assert((Size == 32 || Size == 64) && "Unsupported size"); 111 return Size == 64 ? RTLIB::SUB_F64 : RTLIB::SUB_F32; 112 case TargetOpcode::G_FMUL: 113 assert((Size == 32 || Size == 64) && "Unsupported size"); 114 return Size == 64 ? RTLIB::MUL_F64 : RTLIB::MUL_F32; 115 case TargetOpcode::G_FDIV: 116 assert((Size == 32 || Size == 64) && "Unsupported size"); 117 return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32; 118 case TargetOpcode::G_FREM: 119 return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32; 120 case TargetOpcode::G_FPOW: 121 return Size == 64 ? RTLIB::POW_F64 : RTLIB::POW_F32; 122 case TargetOpcode::G_FMA: 123 assert((Size == 32 || Size == 64) && "Unsupported size"); 124 return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32; 125 } 126 llvm_unreachable("Unknown libcall function"); 127 } 128 129 LegalizerHelper::LegalizeResult 130 llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, 131 const CallLowering::ArgInfo &Result, 132 ArrayRef<CallLowering::ArgInfo> Args) { 133 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); 134 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); 135 const char *Name = TLI.getLibcallName(Libcall); 136 137 MIRBuilder.getMF().getFrameInfo().setHasCalls(true); 138 if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall), 139 MachineOperand::CreateES(Name), Result, Args)) 140 return LegalizerHelper::UnableToLegalize; 141 142 return LegalizerHelper::Legalized; 143 } 144 145 // Useful for libcalls where all operands have the same type. 146 static LegalizerHelper::LegalizeResult 147 simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, 148 Type *OpType) { 149 auto Libcall = getRTLibDesc(MI.getOpcode(), Size); 150 151 SmallVector<CallLowering::ArgInfo, 3> Args; 152 for (unsigned i = 1; i < MI.getNumOperands(); i++) 153 Args.push_back({MI.getOperand(i).getReg(), OpType}); 154 return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType}, 155 Args); 156 } 157 158 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, 159 Type *FromType) { 160 auto ToMVT = MVT::getVT(ToType); 161 auto FromMVT = MVT::getVT(FromType); 162 163 switch (Opcode) { 164 case TargetOpcode::G_FPEXT: 165 return RTLIB::getFPEXT(FromMVT, ToMVT); 166 case TargetOpcode::G_FPTRUNC: 167 return RTLIB::getFPROUND(FromMVT, ToMVT); 168 case TargetOpcode::G_FPTOSI: 169 return RTLIB::getFPTOSINT(FromMVT, ToMVT); 170 case TargetOpcode::G_FPTOUI: 171 return RTLIB::getFPTOUINT(FromMVT, ToMVT); 172 case TargetOpcode::G_SITOFP: 173 return RTLIB::getSINTTOFP(FromMVT, ToMVT); 174 case TargetOpcode::G_UITOFP: 175 return RTLIB::getUINTTOFP(FromMVT, ToMVT); 176 } 177 llvm_unreachable("Unsupported libcall function"); 178 } 179 180 static LegalizerHelper::LegalizeResult 181 conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, 182 Type *FromType) { 183 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType); 184 return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType}, 185 {{MI.getOperand(1).getReg(), FromType}}); 186 } 187 188 LegalizerHelper::LegalizeResult 189 LegalizerHelper::libcall(MachineInstr &MI) { 190 LLT LLTy = MRI.getType(MI.getOperand(0).getReg()); 191 unsigned Size = LLTy.getSizeInBits(); 192 auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); 193 194 MIRBuilder.setInstr(MI); 195 196 switch (MI.getOpcode()) { 197 default: 198 return UnableToLegalize; 199 case TargetOpcode::G_SDIV: 200 case TargetOpcode::G_UDIV: 201 case TargetOpcode::G_SREM: 202 case TargetOpcode::G_UREM: 203 case TargetOpcode::G_CTLZ_ZERO_UNDEF: { 204 Type *HLTy = IntegerType::get(Ctx, Size); 205 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); 206 if (Status != Legalized) 207 return Status; 208 break; 209 } 210 case TargetOpcode::G_FADD: 211 case TargetOpcode::G_FSUB: 212 case TargetOpcode::G_FMUL: 213 case TargetOpcode::G_FDIV: 214 case TargetOpcode::G_FMA: 215 case TargetOpcode::G_FPOW: 216 case TargetOpcode::G_FREM: { 217 Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx); 218 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); 219 if (Status != Legalized) 220 return Status; 221 break; 222 } 223 case TargetOpcode::G_FPEXT: { 224 // FIXME: Support other floating point types (half, fp128 etc) 225 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 226 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 227 if (ToSize != 64 || FromSize != 32) 228 return UnableToLegalize; 229 LegalizeResult Status = conversionLibcall( 230 MI, MIRBuilder, Type::getDoubleTy(Ctx), Type::getFloatTy(Ctx)); 231 if (Status != Legalized) 232 return Status; 233 break; 234 } 235 case TargetOpcode::G_FPTRUNC: { 236 // FIXME: Support other floating point types (half, fp128 etc) 237 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 238 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 239 if (ToSize != 32 || FromSize != 64) 240 return UnableToLegalize; 241 LegalizeResult Status = conversionLibcall( 242 MI, MIRBuilder, Type::getFloatTy(Ctx), Type::getDoubleTy(Ctx)); 243 if (Status != Legalized) 244 return Status; 245 break; 246 } 247 case TargetOpcode::G_FPTOSI: 248 case TargetOpcode::G_FPTOUI: { 249 // FIXME: Support other types 250 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 251 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 252 if (ToSize != 32 || (FromSize != 32 && FromSize != 64)) 253 return UnableToLegalize; 254 LegalizeResult Status = conversionLibcall( 255 MI, MIRBuilder, Type::getInt32Ty(Ctx), 256 FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx)); 257 if (Status != Legalized) 258 return Status; 259 break; 260 } 261 case TargetOpcode::G_SITOFP: 262 case TargetOpcode::G_UITOFP: { 263 // FIXME: Support other types 264 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 265 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 266 if (FromSize != 32 || (ToSize != 32 && ToSize != 64)) 267 return UnableToLegalize; 268 LegalizeResult Status = conversionLibcall( 269 MI, MIRBuilder, 270 ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx), 271 Type::getInt32Ty(Ctx)); 272 if (Status != Legalized) 273 return Status; 274 break; 275 } 276 } 277 278 MI.eraseFromParent(); 279 return Legalized; 280 } 281 282 LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, 283 unsigned TypeIdx, 284 LLT NarrowTy) { 285 MIRBuilder.setInstr(MI); 286 287 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 288 uint64_t NarrowSize = NarrowTy.getSizeInBits(); 289 290 switch (MI.getOpcode()) { 291 default: 292 return UnableToLegalize; 293 case TargetOpcode::G_IMPLICIT_DEF: { 294 // FIXME: add support for when SizeOp0 isn't an exact multiple of 295 // NarrowSize. 296 if (SizeOp0 % NarrowSize != 0) 297 return UnableToLegalize; 298 int NumParts = SizeOp0 / NarrowSize; 299 300 SmallVector<unsigned, 2> DstRegs; 301 for (int i = 0; i < NumParts; ++i) 302 DstRegs.push_back( 303 MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg()); 304 305 unsigned DstReg = MI.getOperand(0).getReg(); 306 if(MRI.getType(DstReg).isVector()) 307 MIRBuilder.buildBuildVector(DstReg, DstRegs); 308 else 309 MIRBuilder.buildMerge(DstReg, DstRegs); 310 MI.eraseFromParent(); 311 return Legalized; 312 } 313 case TargetOpcode::G_ADD: { 314 // FIXME: add support for when SizeOp0 isn't an exact multiple of 315 // NarrowSize. 316 if (SizeOp0 % NarrowSize != 0) 317 return UnableToLegalize; 318 // Expand in terms of carry-setting/consuming G_ADDE instructions. 319 int NumParts = SizeOp0 / NarrowTy.getSizeInBits(); 320 321 SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs; 322 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); 323 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); 324 325 unsigned CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1)); 326 MIRBuilder.buildConstant(CarryIn, 0); 327 328 for (int i = 0; i < NumParts; ++i) { 329 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 330 unsigned CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); 331 332 MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i], 333 Src2Regs[i], CarryIn); 334 335 DstRegs.push_back(DstReg); 336 CarryIn = CarryOut; 337 } 338 unsigned DstReg = MI.getOperand(0).getReg(); 339 if(MRI.getType(DstReg).isVector()) 340 MIRBuilder.buildBuildVector(DstReg, DstRegs); 341 else 342 MIRBuilder.buildMerge(DstReg, DstRegs); 343 MI.eraseFromParent(); 344 return Legalized; 345 } 346 case TargetOpcode::G_SUB: { 347 // FIXME: add support for when SizeOp0 isn't an exact multiple of 348 // NarrowSize. 349 if (SizeOp0 % NarrowSize != 0) 350 return UnableToLegalize; 351 352 int NumParts = SizeOp0 / NarrowTy.getSizeInBits(); 353 354 SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs; 355 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); 356 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); 357 358 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 359 unsigned BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); 360 MIRBuilder.buildInstr(TargetOpcode::G_USUBO, {DstReg, BorrowOut}, 361 {Src1Regs[0], Src2Regs[0]}); 362 DstRegs.push_back(DstReg); 363 unsigned BorrowIn = BorrowOut; 364 for (int i = 1; i < NumParts; ++i) { 365 DstReg = MRI.createGenericVirtualRegister(NarrowTy); 366 BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); 367 368 MIRBuilder.buildInstr(TargetOpcode::G_USUBE, {DstReg, BorrowOut}, 369 {Src1Regs[i], Src2Regs[i], BorrowIn}); 370 371 DstRegs.push_back(DstReg); 372 BorrowIn = BorrowOut; 373 } 374 MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs); 375 MI.eraseFromParent(); 376 return Legalized; 377 } 378 case TargetOpcode::G_MUL: 379 return narrowScalarMul(MI, TypeIdx, NarrowTy); 380 case TargetOpcode::G_EXTRACT: { 381 if (TypeIdx != 1) 382 return UnableToLegalize; 383 384 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 385 // FIXME: add support for when SizeOp1 isn't an exact multiple of 386 // NarrowSize. 387 if (SizeOp1 % NarrowSize != 0) 388 return UnableToLegalize; 389 int NumParts = SizeOp1 / NarrowSize; 390 391 SmallVector<unsigned, 2> SrcRegs, DstRegs; 392 SmallVector<uint64_t, 2> Indexes; 393 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); 394 395 unsigned OpReg = MI.getOperand(0).getReg(); 396 uint64_t OpStart = MI.getOperand(2).getImm(); 397 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); 398 for (int i = 0; i < NumParts; ++i) { 399 unsigned SrcStart = i * NarrowSize; 400 401 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) { 402 // No part of the extract uses this subregister, ignore it. 403 continue; 404 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) { 405 // The entire subregister is extracted, forward the value. 406 DstRegs.push_back(SrcRegs[i]); 407 continue; 408 } 409 410 // OpSegStart is where this destination segment would start in OpReg if it 411 // extended infinitely in both directions. 412 int64_t ExtractOffset; 413 uint64_t SegSize; 414 if (OpStart < SrcStart) { 415 ExtractOffset = 0; 416 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart); 417 } else { 418 ExtractOffset = OpStart - SrcStart; 419 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize); 420 } 421 422 unsigned SegReg = SrcRegs[i]; 423 if (ExtractOffset != 0 || SegSize != NarrowSize) { 424 // A genuine extract is needed. 425 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); 426 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset); 427 } 428 429 DstRegs.push_back(SegReg); 430 } 431 432 unsigned DstReg = MI.getOperand(0).getReg(); 433 if(MRI.getType(DstReg).isVector()) 434 MIRBuilder.buildBuildVector(DstReg, DstRegs); 435 else 436 MIRBuilder.buildMerge(DstReg, DstRegs); 437 MI.eraseFromParent(); 438 return Legalized; 439 } 440 case TargetOpcode::G_INSERT: { 441 // FIXME: Don't know how to handle secondary types yet. 442 if (TypeIdx != 0) 443 return UnableToLegalize; 444 445 // FIXME: add support for when SizeOp0 isn't an exact multiple of 446 // NarrowSize. 447 if (SizeOp0 % NarrowSize != 0) 448 return UnableToLegalize; 449 450 int NumParts = SizeOp0 / NarrowSize; 451 452 SmallVector<unsigned, 2> SrcRegs, DstRegs; 453 SmallVector<uint64_t, 2> Indexes; 454 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); 455 456 unsigned OpReg = MI.getOperand(2).getReg(); 457 uint64_t OpStart = MI.getOperand(3).getImm(); 458 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); 459 for (int i = 0; i < NumParts; ++i) { 460 unsigned DstStart = i * NarrowSize; 461 462 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) { 463 // No part of the insert affects this subregister, forward the original. 464 DstRegs.push_back(SrcRegs[i]); 465 continue; 466 } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) { 467 // The entire subregister is defined by this insert, forward the new 468 // value. 469 DstRegs.push_back(OpReg); 470 continue; 471 } 472 473 // OpSegStart is where this destination segment would start in OpReg if it 474 // extended infinitely in both directions. 475 int64_t ExtractOffset, InsertOffset; 476 uint64_t SegSize; 477 if (OpStart < DstStart) { 478 InsertOffset = 0; 479 ExtractOffset = DstStart - OpStart; 480 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart); 481 } else { 482 InsertOffset = OpStart - DstStart; 483 ExtractOffset = 0; 484 SegSize = 485 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart); 486 } 487 488 unsigned SegReg = OpReg; 489 if (ExtractOffset != 0 || SegSize != OpSize) { 490 // A genuine extract is needed. 491 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); 492 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset); 493 } 494 495 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 496 MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset); 497 DstRegs.push_back(DstReg); 498 } 499 500 assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered"); 501 unsigned DstReg = MI.getOperand(0).getReg(); 502 if(MRI.getType(DstReg).isVector()) 503 MIRBuilder.buildBuildVector(DstReg, DstRegs); 504 else 505 MIRBuilder.buildMerge(DstReg, DstRegs); 506 MI.eraseFromParent(); 507 return Legalized; 508 } 509 case TargetOpcode::G_LOAD: { 510 // FIXME: add support for when SizeOp0 isn't an exact multiple of 511 // NarrowSize. 512 if (SizeOp0 % NarrowSize != 0) 513 return UnableToLegalize; 514 515 const auto &MMO = **MI.memoperands_begin(); 516 // This implementation doesn't work for atomics. Give up instead of doing 517 // something invalid. 518 if (MMO.getOrdering() != AtomicOrdering::NotAtomic || 519 MMO.getFailureOrdering() != AtomicOrdering::NotAtomic) 520 return UnableToLegalize; 521 522 int NumParts = SizeOp0 / NarrowSize; 523 LLT OffsetTy = LLT::scalar( 524 MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()); 525 526 SmallVector<unsigned, 2> DstRegs; 527 for (int i = 0; i < NumParts; ++i) { 528 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 529 unsigned SrcReg = 0; 530 unsigned Adjustment = i * NarrowSize / 8; 531 unsigned Alignment = MinAlign(MMO.getAlignment(), Adjustment); 532 533 MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand( 534 MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(), 535 NarrowSize / 8, Alignment, MMO.getAAInfo(), MMO.getRanges(), 536 MMO.getSyncScopeID(), MMO.getOrdering(), MMO.getFailureOrdering()); 537 538 MIRBuilder.materializeGEP(SrcReg, MI.getOperand(1).getReg(), OffsetTy, 539 Adjustment); 540 541 MIRBuilder.buildLoad(DstReg, SrcReg, *SplitMMO); 542 543 DstRegs.push_back(DstReg); 544 } 545 unsigned DstReg = MI.getOperand(0).getReg(); 546 if(MRI.getType(DstReg).isVector()) 547 MIRBuilder.buildBuildVector(DstReg, DstRegs); 548 else 549 MIRBuilder.buildMerge(DstReg, DstRegs); 550 MI.eraseFromParent(); 551 return Legalized; 552 } 553 case TargetOpcode::G_ZEXTLOAD: 554 case TargetOpcode::G_SEXTLOAD: { 555 bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD; 556 unsigned DstReg = MI.getOperand(0).getReg(); 557 unsigned PtrReg = MI.getOperand(1).getReg(); 558 559 unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy); 560 auto &MMO = **MI.memoperands_begin(); 561 if (MMO.getSize() * 8 == NarrowSize) { 562 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); 563 } else { 564 unsigned ExtLoad = ZExt ? TargetOpcode::G_ZEXTLOAD 565 : TargetOpcode::G_SEXTLOAD; 566 MIRBuilder.buildInstr(ExtLoad) 567 .addDef(TmpReg) 568 .addUse(PtrReg) 569 .addMemOperand(&MMO); 570 } 571 572 if (ZExt) 573 MIRBuilder.buildZExt(DstReg, TmpReg); 574 else 575 MIRBuilder.buildSExt(DstReg, TmpReg); 576 577 MI.eraseFromParent(); 578 return Legalized; 579 } 580 case TargetOpcode::G_STORE: { 581 // FIXME: add support for when SizeOp0 isn't an exact multiple of 582 // NarrowSize. 583 if (SizeOp0 % NarrowSize != 0) 584 return UnableToLegalize; 585 586 const auto &MMO = **MI.memoperands_begin(); 587 // This implementation doesn't work for atomics. Give up instead of doing 588 // something invalid. 589 if (MMO.getOrdering() != AtomicOrdering::NotAtomic || 590 MMO.getFailureOrdering() != AtomicOrdering::NotAtomic) 591 return UnableToLegalize; 592 593 int NumParts = SizeOp0 / NarrowSize; 594 LLT OffsetTy = LLT::scalar( 595 MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()); 596 597 SmallVector<unsigned, 2> SrcRegs; 598 extractParts(MI.getOperand(0).getReg(), NarrowTy, NumParts, SrcRegs); 599 600 for (int i = 0; i < NumParts; ++i) { 601 unsigned DstReg = 0; 602 unsigned Adjustment = i * NarrowSize / 8; 603 unsigned Alignment = MinAlign(MMO.getAlignment(), Adjustment); 604 605 MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand( 606 MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(), 607 NarrowSize / 8, Alignment, MMO.getAAInfo(), MMO.getRanges(), 608 MMO.getSyncScopeID(), MMO.getOrdering(), MMO.getFailureOrdering()); 609 610 MIRBuilder.materializeGEP(DstReg, MI.getOperand(1).getReg(), OffsetTy, 611 Adjustment); 612 613 MIRBuilder.buildStore(SrcRegs[i], DstReg, *SplitMMO); 614 } 615 MI.eraseFromParent(); 616 return Legalized; 617 } 618 case TargetOpcode::G_CONSTANT: { 619 // FIXME: add support for when SizeOp0 isn't an exact multiple of 620 // NarrowSize. 621 if (SizeOp0 % NarrowSize != 0) 622 return UnableToLegalize; 623 int NumParts = SizeOp0 / NarrowSize; 624 const APInt &Cst = MI.getOperand(1).getCImm()->getValue(); 625 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 626 627 SmallVector<unsigned, 2> DstRegs; 628 for (int i = 0; i < NumParts; ++i) { 629 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 630 ConstantInt *CI = 631 ConstantInt::get(Ctx, Cst.lshr(NarrowSize * i).trunc(NarrowSize)); 632 MIRBuilder.buildConstant(DstReg, *CI); 633 DstRegs.push_back(DstReg); 634 } 635 unsigned DstReg = MI.getOperand(0).getReg(); 636 if(MRI.getType(DstReg).isVector()) 637 MIRBuilder.buildBuildVector(DstReg, DstRegs); 638 else 639 MIRBuilder.buildMerge(DstReg, DstRegs); 640 MI.eraseFromParent(); 641 return Legalized; 642 } 643 case TargetOpcode::G_AND: 644 case TargetOpcode::G_OR: 645 case TargetOpcode::G_XOR: { 646 // Legalize bitwise operation: 647 // A = BinOp<Ty> B, C 648 // into: 649 // B1, ..., BN = G_UNMERGE_VALUES B 650 // C1, ..., CN = G_UNMERGE_VALUES C 651 // A1 = BinOp<Ty/N> B1, C2 652 // ... 653 // AN = BinOp<Ty/N> BN, CN 654 // A = G_MERGE_VALUES A1, ..., AN 655 656 // FIXME: add support for when SizeOp0 isn't an exact multiple of 657 // NarrowSize. 658 if (SizeOp0 % NarrowSize != 0) 659 return UnableToLegalize; 660 int NumParts = SizeOp0 / NarrowSize; 661 662 // List the registers where the destination will be scattered. 663 SmallVector<unsigned, 2> DstRegs; 664 // List the registers where the first argument will be split. 665 SmallVector<unsigned, 2> SrcsReg1; 666 // List the registers where the second argument will be split. 667 SmallVector<unsigned, 2> SrcsReg2; 668 // Create all the temporary registers. 669 for (int i = 0; i < NumParts; ++i) { 670 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 671 unsigned SrcReg1 = MRI.createGenericVirtualRegister(NarrowTy); 672 unsigned SrcReg2 = MRI.createGenericVirtualRegister(NarrowTy); 673 674 DstRegs.push_back(DstReg); 675 SrcsReg1.push_back(SrcReg1); 676 SrcsReg2.push_back(SrcReg2); 677 } 678 // Explode the big arguments into smaller chunks. 679 MIRBuilder.buildUnmerge(SrcsReg1, MI.getOperand(1).getReg()); 680 MIRBuilder.buildUnmerge(SrcsReg2, MI.getOperand(2).getReg()); 681 682 // Do the operation on each small part. 683 for (int i = 0; i < NumParts; ++i) 684 MIRBuilder.buildInstr(MI.getOpcode(), {DstRegs[i]}, 685 {SrcsReg1[i], SrcsReg2[i]}); 686 687 // Gather the destination registers into the final destination. 688 unsigned DstReg = MI.getOperand(0).getReg(); 689 if(MRI.getType(DstReg).isVector()) 690 MIRBuilder.buildBuildVector(DstReg, DstRegs); 691 else 692 MIRBuilder.buildMerge(DstReg, DstRegs); 693 MI.eraseFromParent(); 694 return Legalized; 695 } 696 case TargetOpcode::G_SHL: 697 case TargetOpcode::G_LSHR: 698 case TargetOpcode::G_ASHR: { 699 if (TypeIdx != 1) 700 return UnableToLegalize; // TODO 701 narrowScalarSrc(MI, NarrowTy, 2); 702 return Legalized; 703 } 704 } 705 } 706 707 void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy, 708 unsigned OpIdx, unsigned ExtOpcode) { 709 MachineOperand &MO = MI.getOperand(OpIdx); 710 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO.getReg()}); 711 MO.setReg(ExtB->getOperand(0).getReg()); 712 } 713 714 void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, 715 unsigned OpIdx) { 716 MachineOperand &MO = MI.getOperand(OpIdx); 717 auto ExtB = MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, {NarrowTy}, 718 {MO.getReg()}); 719 MO.setReg(ExtB->getOperand(0).getReg()); 720 } 721 722 void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy, 723 unsigned OpIdx, unsigned TruncOpcode) { 724 MachineOperand &MO = MI.getOperand(OpIdx); 725 unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); 726 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 727 MIRBuilder.buildInstr(TruncOpcode, {MO.getReg()}, {DstExt}); 728 MO.setReg(DstExt); 729 } 730 731 LegalizerHelper::LegalizeResult 732 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { 733 MIRBuilder.setInstr(MI); 734 735 switch (MI.getOpcode()) { 736 default: 737 return UnableToLegalize; 738 case TargetOpcode::G_UADDO: 739 case TargetOpcode::G_USUBO: { 740 if (TypeIdx == 1) 741 return UnableToLegalize; // TODO 742 auto LHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy}, 743 {MI.getOperand(2).getReg()}); 744 auto RHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy}, 745 {MI.getOperand(3).getReg()}); 746 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO 747 ? TargetOpcode::G_ADD 748 : TargetOpcode::G_SUB; 749 // Do the arithmetic in the larger type. 750 auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext}); 751 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg()); 752 APInt Mask = APInt::getAllOnesValue(OrigTy.getSizeInBits()); 753 auto AndOp = MIRBuilder.buildInstr( 754 TargetOpcode::G_AND, {WideTy}, 755 {NewOp, MIRBuilder.buildConstant(WideTy, Mask.getZExtValue())}); 756 // There is no overflow if the AndOp is the same as NewOp. 757 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1).getReg(), NewOp, 758 AndOp); 759 // Now trunc the NewOp to the original result. 760 MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), NewOp); 761 MI.eraseFromParent(); 762 return Legalized; 763 } 764 case TargetOpcode::G_CTTZ: 765 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 766 case TargetOpcode::G_CTLZ: 767 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 768 case TargetOpcode::G_CTPOP: { 769 // First ZEXT the input. 770 auto MIBSrc = MIRBuilder.buildZExt(WideTy, MI.getOperand(1).getReg()); 771 LLT CurTy = MRI.getType(MI.getOperand(0).getReg()); 772 if (MI.getOpcode() == TargetOpcode::G_CTTZ) { 773 // The count is the same in the larger type except if the original 774 // value was zero. This can be handled by setting the bit just off 775 // the top of the original type. 776 auto TopBit = 777 APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits()); 778 MIBSrc = MIRBuilder.buildInstr( 779 TargetOpcode::G_OR, {WideTy}, 780 {MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit.getSExtValue())}); 781 } 782 // Perform the operation at the larger size. 783 auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc}); 784 // This is already the correct result for CTPOP and CTTZs 785 if (MI.getOpcode() == TargetOpcode::G_CTLZ || 786 MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) { 787 // The correct result is NewOp - (Difference in widety and current ty). 788 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits(); 789 MIBNewOp = MIRBuilder.buildInstr( 790 TargetOpcode::G_SUB, {WideTy}, 791 {MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)}); 792 } 793 auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); 794 // Make the original instruction a trunc now, and update its source. 795 Observer.changingInstr(MI); 796 MI.setDesc(TII.get(TargetOpcode::G_TRUNC)); 797 MI.getOperand(1).setReg(MIBNewOp->getOperand(0).getReg()); 798 Observer.changedInstr(MI); 799 return Legalized; 800 } 801 802 case TargetOpcode::G_ADD: 803 case TargetOpcode::G_AND: 804 case TargetOpcode::G_MUL: 805 case TargetOpcode::G_OR: 806 case TargetOpcode::G_XOR: 807 case TargetOpcode::G_SUB: 808 // Perform operation at larger width (any extension is fine here, high bits 809 // don't affect the result) and then truncate the result back to the 810 // original type. 811 Observer.changingInstr(MI); 812 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 813 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); 814 widenScalarDst(MI, WideTy); 815 Observer.changedInstr(MI); 816 return Legalized; 817 818 case TargetOpcode::G_SHL: 819 Observer.changingInstr(MI); 820 821 if (TypeIdx == 0) { 822 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 823 widenScalarDst(MI, WideTy); 824 } else { 825 assert(TypeIdx == 1); 826 // The "number of bits to shift" operand must preserve its value as an 827 // unsigned integer: 828 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 829 } 830 831 Observer.changedInstr(MI); 832 return Legalized; 833 834 case TargetOpcode::G_SDIV: 835 case TargetOpcode::G_SREM: 836 Observer.changingInstr(MI); 837 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); 838 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 839 widenScalarDst(MI, WideTy); 840 Observer.changedInstr(MI); 841 return Legalized; 842 843 case TargetOpcode::G_ASHR: 844 case TargetOpcode::G_LSHR: 845 Observer.changingInstr(MI); 846 847 if (TypeIdx == 0) { 848 unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ? 849 TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT; 850 851 widenScalarSrc(MI, WideTy, 1, CvtOp); 852 widenScalarDst(MI, WideTy); 853 } else { 854 assert(TypeIdx == 1); 855 // The "number of bits to shift" operand must preserve its value as an 856 // unsigned integer: 857 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 858 } 859 860 Observer.changedInstr(MI); 861 return Legalized; 862 case TargetOpcode::G_UDIV: 863 case TargetOpcode::G_UREM: 864 Observer.changingInstr(MI); 865 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 866 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 867 widenScalarDst(MI, WideTy); 868 Observer.changedInstr(MI); 869 return Legalized; 870 871 case TargetOpcode::G_SELECT: 872 Observer.changingInstr(MI); 873 if (TypeIdx == 0) { 874 // Perform operation at larger width (any extension is fine here, high 875 // bits don't affect the result) and then truncate the result back to the 876 // original type. 877 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); 878 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT); 879 widenScalarDst(MI, WideTy); 880 } else { 881 // Explicit extension is required here since high bits affect the result. 882 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 883 } 884 Observer.changedInstr(MI); 885 return Legalized; 886 887 case TargetOpcode::G_FPTOSI: 888 case TargetOpcode::G_FPTOUI: 889 if (TypeIdx != 0) 890 return UnableToLegalize; 891 Observer.changingInstr(MI); 892 widenScalarDst(MI, WideTy); 893 Observer.changedInstr(MI); 894 return Legalized; 895 896 case TargetOpcode::G_SITOFP: 897 if (TypeIdx != 1) 898 return UnableToLegalize; 899 Observer.changingInstr(MI); 900 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); 901 Observer.changedInstr(MI); 902 return Legalized; 903 904 case TargetOpcode::G_UITOFP: 905 if (TypeIdx != 1) 906 return UnableToLegalize; 907 Observer.changingInstr(MI); 908 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 909 Observer.changedInstr(MI); 910 return Legalized; 911 912 case TargetOpcode::G_INSERT: 913 if (TypeIdx != 0) 914 return UnableToLegalize; 915 Observer.changingInstr(MI); 916 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 917 widenScalarDst(MI, WideTy); 918 Observer.changedInstr(MI); 919 return Legalized; 920 921 case TargetOpcode::G_LOAD: 922 // For some types like i24, we might try to widen to i32. To properly handle 923 // this we should be using a dedicated extending load, until then avoid 924 // trying to legalize. 925 if (alignTo(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(), 8) != 926 WideTy.getSizeInBits()) 927 return UnableToLegalize; 928 LLVM_FALLTHROUGH; 929 case TargetOpcode::G_SEXTLOAD: 930 case TargetOpcode::G_ZEXTLOAD: 931 Observer.changingInstr(MI); 932 widenScalarDst(MI, WideTy); 933 Observer.changedInstr(MI); 934 return Legalized; 935 936 case TargetOpcode::G_STORE: { 937 if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(1) || 938 WideTy != LLT::scalar(8)) 939 return UnableToLegalize; 940 941 Observer.changingInstr(MI); 942 widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ZEXT); 943 Observer.changedInstr(MI); 944 return Legalized; 945 } 946 case TargetOpcode::G_CONSTANT: { 947 MachineOperand &SrcMO = MI.getOperand(1); 948 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 949 const APInt &Val = SrcMO.getCImm()->getValue().sext(WideTy.getSizeInBits()); 950 Observer.changingInstr(MI); 951 SrcMO.setCImm(ConstantInt::get(Ctx, Val)); 952 953 widenScalarDst(MI, WideTy); 954 Observer.changedInstr(MI); 955 return Legalized; 956 } 957 case TargetOpcode::G_FCONSTANT: { 958 MachineOperand &SrcMO = MI.getOperand(1); 959 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 960 APFloat Val = SrcMO.getFPImm()->getValueAPF(); 961 bool LosesInfo; 962 switch (WideTy.getSizeInBits()) { 963 case 32: 964 Val.convert(APFloat::IEEEsingle(), APFloat::rmTowardZero, &LosesInfo); 965 break; 966 case 64: 967 Val.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &LosesInfo); 968 break; 969 default: 970 llvm_unreachable("Unhandled fp widen type"); 971 } 972 Observer.changingInstr(MI); 973 SrcMO.setFPImm(ConstantFP::get(Ctx, Val)); 974 975 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); 976 Observer.changedInstr(MI); 977 return Legalized; 978 } 979 case TargetOpcode::G_IMPLICIT_DEF: { 980 Observer.changingInstr(MI); 981 widenScalarDst(MI, WideTy); 982 Observer.changedInstr(MI); 983 return Legalized; 984 } 985 case TargetOpcode::G_BRCOND: 986 Observer.changingInstr(MI); 987 widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT); 988 Observer.changedInstr(MI); 989 return Legalized; 990 991 case TargetOpcode::G_FCMP: 992 Observer.changingInstr(MI); 993 if (TypeIdx == 0) 994 widenScalarDst(MI, WideTy); 995 else { 996 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT); 997 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT); 998 } 999 Observer.changedInstr(MI); 1000 return Legalized; 1001 1002 case TargetOpcode::G_ICMP: 1003 Observer.changingInstr(MI); 1004 if (TypeIdx == 0) 1005 widenScalarDst(MI, WideTy); 1006 else { 1007 unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>( 1008 MI.getOperand(1).getPredicate())) 1009 ? TargetOpcode::G_SEXT 1010 : TargetOpcode::G_ZEXT; 1011 widenScalarSrc(MI, WideTy, 2, ExtOpcode); 1012 widenScalarSrc(MI, WideTy, 3, ExtOpcode); 1013 } 1014 Observer.changedInstr(MI); 1015 return Legalized; 1016 1017 case TargetOpcode::G_GEP: 1018 assert(TypeIdx == 1 && "unable to legalize pointer of GEP"); 1019 Observer.changingInstr(MI); 1020 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 1021 Observer.changedInstr(MI); 1022 return Legalized; 1023 1024 case TargetOpcode::G_PHI: { 1025 assert(TypeIdx == 0 && "Expecting only Idx 0"); 1026 1027 Observer.changingInstr(MI); 1028 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) { 1029 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB(); 1030 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); 1031 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT); 1032 } 1033 1034 MachineBasicBlock &MBB = *MI.getParent(); 1035 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI()); 1036 widenScalarDst(MI, WideTy); 1037 Observer.changedInstr(MI); 1038 return Legalized; 1039 } 1040 case TargetOpcode::G_EXTRACT_VECTOR_ELT: { 1041 if (TypeIdx == 0) { 1042 unsigned VecReg = MI.getOperand(1).getReg(); 1043 LLT VecTy = MRI.getType(VecReg); 1044 Observer.changingInstr(MI); 1045 1046 widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(), 1047 WideTy.getSizeInBits()), 1048 1, TargetOpcode::G_SEXT); 1049 1050 widenScalarDst(MI, WideTy, 0); 1051 Observer.changedInstr(MI); 1052 return Legalized; 1053 } 1054 1055 if (TypeIdx != 2) 1056 return UnableToLegalize; 1057 Observer.changingInstr(MI); 1058 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 1059 Observer.changedInstr(MI); 1060 return Legalized; 1061 } 1062 case TargetOpcode::G_FADD: 1063 case TargetOpcode::G_FMUL: 1064 case TargetOpcode::G_FSUB: 1065 case TargetOpcode::G_FMA: 1066 case TargetOpcode::G_FNEG: 1067 case TargetOpcode::G_FABS: 1068 case TargetOpcode::G_FDIV: 1069 case TargetOpcode::G_FREM: 1070 case TargetOpcode::G_FCEIL: 1071 assert(TypeIdx == 0); 1072 Observer.changingInstr(MI); 1073 1074 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) 1075 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT); 1076 1077 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); 1078 Observer.changedInstr(MI); 1079 return Legalized; 1080 } 1081 } 1082 1083 LegalizerHelper::LegalizeResult 1084 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { 1085 using namespace TargetOpcode; 1086 MIRBuilder.setInstr(MI); 1087 1088 switch(MI.getOpcode()) { 1089 default: 1090 return UnableToLegalize; 1091 case TargetOpcode::G_SREM: 1092 case TargetOpcode::G_UREM: { 1093 unsigned QuotReg = MRI.createGenericVirtualRegister(Ty); 1094 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV) 1095 .addDef(QuotReg) 1096 .addUse(MI.getOperand(1).getReg()) 1097 .addUse(MI.getOperand(2).getReg()); 1098 1099 unsigned ProdReg = MRI.createGenericVirtualRegister(Ty); 1100 MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg()); 1101 MIRBuilder.buildSub(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(), 1102 ProdReg); 1103 MI.eraseFromParent(); 1104 return Legalized; 1105 } 1106 case TargetOpcode::G_SMULO: 1107 case TargetOpcode::G_UMULO: { 1108 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the 1109 // result. 1110 unsigned Res = MI.getOperand(0).getReg(); 1111 unsigned Overflow = MI.getOperand(1).getReg(); 1112 unsigned LHS = MI.getOperand(2).getReg(); 1113 unsigned RHS = MI.getOperand(3).getReg(); 1114 1115 MIRBuilder.buildMul(Res, LHS, RHS); 1116 1117 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO 1118 ? TargetOpcode::G_SMULH 1119 : TargetOpcode::G_UMULH; 1120 1121 unsigned HiPart = MRI.createGenericVirtualRegister(Ty); 1122 MIRBuilder.buildInstr(Opcode) 1123 .addDef(HiPart) 1124 .addUse(LHS) 1125 .addUse(RHS); 1126 1127 unsigned Zero = MRI.createGenericVirtualRegister(Ty); 1128 MIRBuilder.buildConstant(Zero, 0); 1129 1130 // For *signed* multiply, overflow is detected by checking: 1131 // (hi != (lo >> bitwidth-1)) 1132 if (Opcode == TargetOpcode::G_SMULH) { 1133 unsigned Shifted = MRI.createGenericVirtualRegister(Ty); 1134 unsigned ShiftAmt = MRI.createGenericVirtualRegister(Ty); 1135 MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1); 1136 MIRBuilder.buildInstr(TargetOpcode::G_ASHR) 1137 .addDef(Shifted) 1138 .addUse(Res) 1139 .addUse(ShiftAmt); 1140 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted); 1141 } else { 1142 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero); 1143 } 1144 MI.eraseFromParent(); 1145 return Legalized; 1146 } 1147 case TargetOpcode::G_FNEG: { 1148 // TODO: Handle vector types once we are able to 1149 // represent them. 1150 if (Ty.isVector()) 1151 return UnableToLegalize; 1152 unsigned Res = MI.getOperand(0).getReg(); 1153 Type *ZeroTy; 1154 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 1155 switch (Ty.getSizeInBits()) { 1156 case 16: 1157 ZeroTy = Type::getHalfTy(Ctx); 1158 break; 1159 case 32: 1160 ZeroTy = Type::getFloatTy(Ctx); 1161 break; 1162 case 64: 1163 ZeroTy = Type::getDoubleTy(Ctx); 1164 break; 1165 case 128: 1166 ZeroTy = Type::getFP128Ty(Ctx); 1167 break; 1168 default: 1169 llvm_unreachable("unexpected floating-point type"); 1170 } 1171 ConstantFP &ZeroForNegation = 1172 *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy)); 1173 auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation); 1174 MIRBuilder.buildInstr(TargetOpcode::G_FSUB) 1175 .addDef(Res) 1176 .addUse(Zero->getOperand(0).getReg()) 1177 .addUse(MI.getOperand(1).getReg()); 1178 MI.eraseFromParent(); 1179 return Legalized; 1180 } 1181 case TargetOpcode::G_FSUB: { 1182 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)). 1183 // First, check if G_FNEG is marked as Lower. If so, we may 1184 // end up with an infinite loop as G_FSUB is used to legalize G_FNEG. 1185 if (LI.getAction({G_FNEG, {Ty}}).Action == Lower) 1186 return UnableToLegalize; 1187 unsigned Res = MI.getOperand(0).getReg(); 1188 unsigned LHS = MI.getOperand(1).getReg(); 1189 unsigned RHS = MI.getOperand(2).getReg(); 1190 unsigned Neg = MRI.createGenericVirtualRegister(Ty); 1191 MIRBuilder.buildInstr(TargetOpcode::G_FNEG).addDef(Neg).addUse(RHS); 1192 MIRBuilder.buildInstr(TargetOpcode::G_FADD) 1193 .addDef(Res) 1194 .addUse(LHS) 1195 .addUse(Neg); 1196 MI.eraseFromParent(); 1197 return Legalized; 1198 } 1199 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: { 1200 unsigned OldValRes = MI.getOperand(0).getReg(); 1201 unsigned SuccessRes = MI.getOperand(1).getReg(); 1202 unsigned Addr = MI.getOperand(2).getReg(); 1203 unsigned CmpVal = MI.getOperand(3).getReg(); 1204 unsigned NewVal = MI.getOperand(4).getReg(); 1205 MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal, 1206 **MI.memoperands_begin()); 1207 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal); 1208 MI.eraseFromParent(); 1209 return Legalized; 1210 } 1211 case TargetOpcode::G_LOAD: 1212 case TargetOpcode::G_SEXTLOAD: 1213 case TargetOpcode::G_ZEXTLOAD: { 1214 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT 1215 unsigned DstReg = MI.getOperand(0).getReg(); 1216 unsigned PtrReg = MI.getOperand(1).getReg(); 1217 LLT DstTy = MRI.getType(DstReg); 1218 auto &MMO = **MI.memoperands_begin(); 1219 1220 if (DstTy.getSizeInBits() == MMO.getSize() /* in bytes */ * 8) { 1221 // In the case of G_LOAD, this was a non-extending load already and we're 1222 // about to lower to the same instruction. 1223 if (MI.getOpcode() == TargetOpcode::G_LOAD) 1224 return UnableToLegalize; 1225 MIRBuilder.buildLoad(DstReg, PtrReg, MMO); 1226 MI.eraseFromParent(); 1227 return Legalized; 1228 } 1229 1230 if (DstTy.isScalar()) { 1231 unsigned TmpReg = MRI.createGenericVirtualRegister( 1232 LLT::scalar(MMO.getSize() /* in bytes */ * 8)); 1233 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); 1234 switch (MI.getOpcode()) { 1235 default: 1236 llvm_unreachable("Unexpected opcode"); 1237 case TargetOpcode::G_LOAD: 1238 MIRBuilder.buildAnyExt(DstReg, TmpReg); 1239 break; 1240 case TargetOpcode::G_SEXTLOAD: 1241 MIRBuilder.buildSExt(DstReg, TmpReg); 1242 break; 1243 case TargetOpcode::G_ZEXTLOAD: 1244 MIRBuilder.buildZExt(DstReg, TmpReg); 1245 break; 1246 } 1247 MI.eraseFromParent(); 1248 return Legalized; 1249 } 1250 1251 return UnableToLegalize; 1252 } 1253 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 1254 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 1255 case TargetOpcode::G_CTLZ: 1256 case TargetOpcode::G_CTTZ: 1257 case TargetOpcode::G_CTPOP: 1258 return lowerBitCount(MI, TypeIdx, Ty); 1259 case G_UADDE: { 1260 unsigned Res = MI.getOperand(0).getReg(); 1261 unsigned CarryOut = MI.getOperand(1).getReg(); 1262 unsigned LHS = MI.getOperand(2).getReg(); 1263 unsigned RHS = MI.getOperand(3).getReg(); 1264 unsigned CarryIn = MI.getOperand(4).getReg(); 1265 1266 unsigned TmpRes = MRI.createGenericVirtualRegister(Ty); 1267 unsigned ZExtCarryIn = MRI.createGenericVirtualRegister(Ty); 1268 1269 MIRBuilder.buildAdd(TmpRes, LHS, RHS); 1270 MIRBuilder.buildZExt(ZExtCarryIn, CarryIn); 1271 MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn); 1272 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS); 1273 1274 MI.eraseFromParent(); 1275 return Legalized; 1276 } 1277 case G_USUBO: { 1278 unsigned Res = MI.getOperand(0).getReg(); 1279 unsigned BorrowOut = MI.getOperand(1).getReg(); 1280 unsigned LHS = MI.getOperand(2).getReg(); 1281 unsigned RHS = MI.getOperand(3).getReg(); 1282 1283 MIRBuilder.buildSub(Res, LHS, RHS); 1284 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS); 1285 1286 MI.eraseFromParent(); 1287 return Legalized; 1288 } 1289 case G_USUBE: { 1290 unsigned Res = MI.getOperand(0).getReg(); 1291 unsigned BorrowOut = MI.getOperand(1).getReg(); 1292 unsigned LHS = MI.getOperand(2).getReg(); 1293 unsigned RHS = MI.getOperand(3).getReg(); 1294 unsigned BorrowIn = MI.getOperand(4).getReg(); 1295 1296 unsigned TmpRes = MRI.createGenericVirtualRegister(Ty); 1297 unsigned ZExtBorrowIn = MRI.createGenericVirtualRegister(Ty); 1298 unsigned LHS_EQ_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1)); 1299 unsigned LHS_ULT_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1)); 1300 1301 MIRBuilder.buildSub(TmpRes, LHS, RHS); 1302 MIRBuilder.buildZExt(ZExtBorrowIn, BorrowIn); 1303 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn); 1304 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LHS_EQ_RHS, LHS, RHS); 1305 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, LHS_ULT_RHS, LHS, RHS); 1306 MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS); 1307 1308 MI.eraseFromParent(); 1309 return Legalized; 1310 } 1311 } 1312 } 1313 1314 LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef( 1315 MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { 1316 SmallVector<unsigned, 2> DstRegs; 1317 1318 unsigned NarrowSize = NarrowTy.getSizeInBits(); 1319 unsigned DstReg = MI.getOperand(0).getReg(); 1320 unsigned Size = MRI.getType(DstReg).getSizeInBits(); 1321 int NumParts = Size / NarrowSize; 1322 // FIXME: Don't know how to handle the situation where the small vectors 1323 // aren't all the same size yet. 1324 if (Size % NarrowSize != 0) 1325 return UnableToLegalize; 1326 1327 for (int i = 0; i < NumParts; ++i) { 1328 unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy); 1329 MIRBuilder.buildUndef(TmpReg); 1330 DstRegs.push_back(TmpReg); 1331 } 1332 1333 if (NarrowTy.isVector()) 1334 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 1335 else 1336 MIRBuilder.buildBuildVector(DstReg, DstRegs); 1337 1338 MI.eraseFromParent(); 1339 return Legalized; 1340 } 1341 1342 LegalizerHelper::LegalizeResult 1343 LegalizerHelper::fewerElementsVectorBasic(MachineInstr &MI, unsigned TypeIdx, 1344 LLT NarrowTy) { 1345 unsigned Opc = MI.getOpcode(); 1346 unsigned NarrowSize = NarrowTy.getSizeInBits(); 1347 unsigned DstReg = MI.getOperand(0).getReg(); 1348 unsigned Flags = MI.getFlags(); 1349 unsigned Size = MRI.getType(DstReg).getSizeInBits(); 1350 int NumParts = Size / NarrowSize; 1351 // FIXME: Don't know how to handle the situation where the small vectors 1352 // aren't all the same size yet. 1353 if (Size % NarrowSize != 0) 1354 return UnableToLegalize; 1355 1356 unsigned NumOps = MI.getNumOperands() - 1; 1357 SmallVector<unsigned, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs; 1358 1359 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src0Regs); 1360 1361 if (NumOps >= 2) 1362 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src1Regs); 1363 1364 if (NumOps >= 3) 1365 extractParts(MI.getOperand(3).getReg(), NarrowTy, NumParts, Src2Regs); 1366 1367 for (int i = 0; i < NumParts; ++i) { 1368 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 1369 1370 if (NumOps == 1) 1371 MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i]}, Flags); 1372 else if (NumOps == 2) { 1373 MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i], Src1Regs[i]}, Flags); 1374 } else if (NumOps == 3) { 1375 MIRBuilder.buildInstr(Opc, {DstReg}, 1376 {Src0Regs[i], Src1Regs[i], Src2Regs[i]}, Flags); 1377 } 1378 1379 DstRegs.push_back(DstReg); 1380 } 1381 1382 if (NarrowTy.isVector()) 1383 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 1384 else 1385 MIRBuilder.buildBuildVector(DstReg, DstRegs); 1386 1387 MI.eraseFromParent(); 1388 return Legalized; 1389 } 1390 1391 LegalizerHelper::LegalizeResult 1392 LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx, 1393 LLT NarrowTy) { 1394 if (TypeIdx != 0) 1395 return UnableToLegalize; 1396 1397 unsigned DstReg = MI.getOperand(0).getReg(); 1398 unsigned SrcReg = MI.getOperand(1).getReg(); 1399 LLT DstTy = MRI.getType(DstReg); 1400 LLT SrcTy = MRI.getType(SrcReg); 1401 1402 LLT NarrowTy0 = NarrowTy; 1403 LLT NarrowTy1; 1404 unsigned NumParts; 1405 1406 if (NarrowTy.isScalar()) { 1407 NumParts = DstTy.getNumElements(); 1408 NarrowTy1 = SrcTy.getElementType(); 1409 } else { 1410 // Uneven breakdown not handled. 1411 NumParts = DstTy.getNumElements() / NarrowTy.getNumElements(); 1412 if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements()) 1413 return UnableToLegalize; 1414 1415 NarrowTy1 = LLT::vector(NumParts, SrcTy.getElementType().getSizeInBits()); 1416 } 1417 1418 SmallVector<unsigned, 4> SrcRegs, DstRegs; 1419 extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs); 1420 1421 for (unsigned I = 0; I < NumParts; ++I) { 1422 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy0); 1423 MachineInstr *NewInst = MIRBuilder.buildInstr(MI.getOpcode()) 1424 .addDef(DstReg) 1425 .addUse(SrcRegs[I]); 1426 1427 NewInst->setFlags(MI.getFlags()); 1428 DstRegs.push_back(DstReg); 1429 } 1430 1431 if (NarrowTy.isVector()) 1432 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 1433 else 1434 MIRBuilder.buildBuildVector(DstReg, DstRegs); 1435 1436 MI.eraseFromParent(); 1437 return Legalized; 1438 } 1439 1440 LegalizerHelper::LegalizeResult 1441 LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx, 1442 LLT NarrowTy) { 1443 unsigned DstReg = MI.getOperand(0).getReg(); 1444 unsigned Src0Reg = MI.getOperand(2).getReg(); 1445 LLT DstTy = MRI.getType(DstReg); 1446 LLT SrcTy = MRI.getType(Src0Reg); 1447 1448 unsigned NumParts; 1449 LLT NarrowTy0, NarrowTy1; 1450 1451 if (TypeIdx == 0) { 1452 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; 1453 unsigned OldElts = DstTy.getNumElements(); 1454 1455 NarrowTy0 = NarrowTy; 1456 NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements(); 1457 NarrowTy1 = NarrowTy.isVector() ? 1458 LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) : 1459 SrcTy.getElementType(); 1460 1461 } else { 1462 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; 1463 unsigned OldElts = SrcTy.getNumElements(); 1464 1465 NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : 1466 NarrowTy.getNumElements(); 1467 NarrowTy0 = LLT::vector(NarrowTy.getNumElements(), 1468 DstTy.getScalarSizeInBits()); 1469 NarrowTy1 = NarrowTy; 1470 } 1471 1472 // FIXME: Don't know how to handle the situation where the small vectors 1473 // aren't all the same size yet. 1474 if (NarrowTy1.isVector() && 1475 NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements()) 1476 return UnableToLegalize; 1477 1478 CmpInst::Predicate Pred 1479 = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); 1480 1481 SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs; 1482 extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs); 1483 extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs); 1484 1485 for (unsigned I = 0; I < NumParts; ++I) { 1486 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy0); 1487 DstRegs.push_back(DstReg); 1488 1489 if (MI.getOpcode() == TargetOpcode::G_ICMP) 1490 MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]); 1491 else { 1492 MachineInstr *NewCmp 1493 = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]); 1494 NewCmp->setFlags(MI.getFlags()); 1495 } 1496 } 1497 1498 if (NarrowTy1.isVector()) 1499 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 1500 else 1501 MIRBuilder.buildBuildVector(DstReg, DstRegs); 1502 1503 MI.eraseFromParent(); 1504 return Legalized; 1505 } 1506 1507 LegalizerHelper::LegalizeResult 1508 LegalizerHelper::fewerElementsVectorLoadStore(MachineInstr &MI, unsigned TypeIdx, 1509 LLT NarrowTy) { 1510 // FIXME: Don't know how to handle secondary types yet. 1511 if (TypeIdx != 0) 1512 return UnableToLegalize; 1513 1514 MachineMemOperand *MMO = *MI.memoperands_begin(); 1515 1516 // This implementation doesn't work for atomics. Give up instead of doing 1517 // something invalid. 1518 if (MMO->getOrdering() != AtomicOrdering::NotAtomic || 1519 MMO->getFailureOrdering() != AtomicOrdering::NotAtomic) 1520 return UnableToLegalize; 1521 1522 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD; 1523 unsigned ValReg = MI.getOperand(0).getReg(); 1524 unsigned AddrReg = MI.getOperand(1).getReg(); 1525 unsigned NarrowSize = NarrowTy.getSizeInBits(); 1526 unsigned Size = MRI.getType(ValReg).getSizeInBits(); 1527 unsigned NumParts = Size / NarrowSize; 1528 1529 SmallVector<unsigned, 8> NarrowRegs; 1530 if (!IsLoad) 1531 extractParts(ValReg, NarrowTy, NumParts, NarrowRegs); 1532 1533 const LLT OffsetTy = 1534 LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits()); 1535 MachineFunction &MF = *MI.getMF(); 1536 1537 for (unsigned Idx = 0; Idx < NumParts; ++Idx) { 1538 unsigned Adjustment = Idx * NarrowTy.getSizeInBits() / 8; 1539 unsigned Alignment = MinAlign(MMO->getAlignment(), Adjustment); 1540 unsigned NewAddrReg = 0; 1541 MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, Adjustment); 1542 MachineMemOperand &NewMMO = *MF.getMachineMemOperand( 1543 MMO->getPointerInfo().getWithOffset(Adjustment), MMO->getFlags(), 1544 NarrowTy.getSizeInBits() / 8, Alignment); 1545 if (IsLoad) { 1546 unsigned Dst = MRI.createGenericVirtualRegister(NarrowTy); 1547 NarrowRegs.push_back(Dst); 1548 MIRBuilder.buildLoad(Dst, NewAddrReg, NewMMO); 1549 } else { 1550 MIRBuilder.buildStore(NarrowRegs[Idx], NewAddrReg, NewMMO); 1551 } 1552 } 1553 if (IsLoad) { 1554 if (NarrowTy.isVector()) 1555 MIRBuilder.buildConcatVectors(ValReg, NarrowRegs); 1556 else 1557 MIRBuilder.buildBuildVector(ValReg, NarrowRegs); 1558 } 1559 MI.eraseFromParent(); 1560 return Legalized; 1561 } 1562 1563 LegalizerHelper::LegalizeResult 1564 LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, 1565 LLT NarrowTy) { 1566 using namespace TargetOpcode; 1567 1568 MIRBuilder.setInstr(MI); 1569 switch (MI.getOpcode()) { 1570 case G_IMPLICIT_DEF: 1571 return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy); 1572 case G_AND: 1573 case G_OR: 1574 case G_XOR: 1575 case G_ADD: 1576 case G_SUB: 1577 case G_MUL: 1578 case G_SMULH: 1579 case G_UMULH: 1580 case G_FADD: 1581 case G_FMUL: 1582 case G_FSUB: 1583 case G_FNEG: 1584 case G_FABS: 1585 case G_FDIV: 1586 case G_FREM: 1587 case G_FMA: 1588 case G_FPOW: 1589 case G_FEXP: 1590 case G_FEXP2: 1591 case G_FLOG: 1592 case G_FLOG2: 1593 case G_FLOG10: 1594 case G_FCEIL: 1595 case G_INTRINSIC_ROUND: 1596 case G_INTRINSIC_TRUNC: 1597 return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy); 1598 case G_ZEXT: 1599 case G_SEXT: 1600 case G_ANYEXT: 1601 case G_FPEXT: 1602 case G_FPTRUNC: 1603 case G_SITOFP: 1604 case G_UITOFP: 1605 case G_FPTOSI: 1606 case G_FPTOUI: 1607 return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy); 1608 case G_ICMP: 1609 case G_FCMP: 1610 return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy); 1611 case G_LOAD: 1612 case G_STORE: 1613 return fewerElementsVectorLoadStore(MI, TypeIdx, NarrowTy); 1614 default: 1615 return UnableToLegalize; 1616 } 1617 } 1618 1619 LegalizerHelper::LegalizeResult 1620 LegalizerHelper::narrowScalarMul(MachineInstr &MI, unsigned TypeIdx, LLT NewTy) { 1621 unsigned DstReg = MI.getOperand(0).getReg(); 1622 unsigned Src0 = MI.getOperand(1).getReg(); 1623 unsigned Src1 = MI.getOperand(2).getReg(); 1624 LLT Ty = MRI.getType(DstReg); 1625 if (Ty.isVector()) 1626 return UnableToLegalize; 1627 1628 unsigned Size = Ty.getSizeInBits(); 1629 unsigned NewSize = Size / 2; 1630 if (Size != 2 * NewSize) 1631 return UnableToLegalize; 1632 1633 LLT HalfTy = LLT::scalar(NewSize); 1634 // TODO: if HalfTy != NewTy, handle the breakdown all at once? 1635 1636 unsigned ShiftAmt = MRI.createGenericVirtualRegister(Ty); 1637 unsigned Lo = MRI.createGenericVirtualRegister(HalfTy); 1638 unsigned Hi = MRI.createGenericVirtualRegister(HalfTy); 1639 unsigned ExtLo = MRI.createGenericVirtualRegister(Ty); 1640 unsigned ExtHi = MRI.createGenericVirtualRegister(Ty); 1641 unsigned ShiftedHi = MRI.createGenericVirtualRegister(Ty); 1642 1643 SmallVector<unsigned, 2> Src0Parts; 1644 SmallVector<unsigned, 2> Src1Parts; 1645 1646 extractParts(Src0, HalfTy, 2, Src0Parts); 1647 extractParts(Src1, HalfTy, 2, Src1Parts); 1648 1649 MIRBuilder.buildMul(Lo, Src0Parts[0], Src1Parts[0]); 1650 1651 // TODO: Use smulh or umulh depending on what the target has. 1652 MIRBuilder.buildUMulH(Hi, Src0Parts[1], Src1Parts[1]); 1653 1654 MIRBuilder.buildConstant(ShiftAmt, NewSize); 1655 MIRBuilder.buildAnyExt(ExtHi, Hi); 1656 MIRBuilder.buildShl(ShiftedHi, ExtHi, ShiftAmt); 1657 1658 MIRBuilder.buildZExt(ExtLo, Lo); 1659 MIRBuilder.buildOr(DstReg, ExtLo, ShiftedHi); 1660 MI.eraseFromParent(); 1661 return Legalized; 1662 } 1663 1664 LegalizerHelper::LegalizeResult 1665 LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { 1666 unsigned Opc = MI.getOpcode(); 1667 auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); 1668 auto isSupported = [this](const LegalityQuery &Q) { 1669 auto QAction = LI.getAction(Q).Action; 1670 return QAction == Legal || QAction == Libcall || QAction == Custom; 1671 }; 1672 switch (Opc) { 1673 default: 1674 return UnableToLegalize; 1675 case TargetOpcode::G_CTLZ_ZERO_UNDEF: { 1676 // This trivially expands to CTLZ. 1677 Observer.changingInstr(MI); 1678 MI.setDesc(TII.get(TargetOpcode::G_CTLZ)); 1679 Observer.changedInstr(MI); 1680 return Legalized; 1681 } 1682 case TargetOpcode::G_CTLZ: { 1683 unsigned SrcReg = MI.getOperand(1).getReg(); 1684 unsigned Len = Ty.getSizeInBits(); 1685 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty}})) { 1686 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero. 1687 auto MIBCtlzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF, 1688 {Ty}, {SrcReg}); 1689 auto MIBZero = MIRBuilder.buildConstant(Ty, 0); 1690 auto MIBLen = MIRBuilder.buildConstant(Ty, Len); 1691 auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), 1692 SrcReg, MIBZero); 1693 MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen, 1694 MIBCtlzZU); 1695 MI.eraseFromParent(); 1696 return Legalized; 1697 } 1698 // for now, we do this: 1699 // NewLen = NextPowerOf2(Len); 1700 // x = x | (x >> 1); 1701 // x = x | (x >> 2); 1702 // ... 1703 // x = x | (x >>16); 1704 // x = x | (x >>32); // for 64-bit input 1705 // Upto NewLen/2 1706 // return Len - popcount(x); 1707 // 1708 // Ref: "Hacker's Delight" by Henry Warren 1709 unsigned Op = SrcReg; 1710 unsigned NewLen = PowerOf2Ceil(Len); 1711 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) { 1712 auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i); 1713 auto MIBOp = MIRBuilder.buildInstr( 1714 TargetOpcode::G_OR, {Ty}, 1715 {Op, MIRBuilder.buildInstr(TargetOpcode::G_LSHR, {Ty}, 1716 {Op, MIBShiftAmt})}); 1717 Op = MIBOp->getOperand(0).getReg(); 1718 } 1719 auto MIBPop = MIRBuilder.buildInstr(TargetOpcode::G_CTPOP, {Ty}, {Op}); 1720 MIRBuilder.buildInstr(TargetOpcode::G_SUB, {MI.getOperand(0).getReg()}, 1721 {MIRBuilder.buildConstant(Ty, Len), MIBPop}); 1722 MI.eraseFromParent(); 1723 return Legalized; 1724 } 1725 case TargetOpcode::G_CTTZ_ZERO_UNDEF: { 1726 // This trivially expands to CTTZ. 1727 Observer.changingInstr(MI); 1728 MI.setDesc(TII.get(TargetOpcode::G_CTTZ)); 1729 Observer.changedInstr(MI); 1730 return Legalized; 1731 } 1732 case TargetOpcode::G_CTTZ: { 1733 unsigned SrcReg = MI.getOperand(1).getReg(); 1734 unsigned Len = Ty.getSizeInBits(); 1735 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty}})) { 1736 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with 1737 // zero. 1738 auto MIBCttzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF, 1739 {Ty}, {SrcReg}); 1740 auto MIBZero = MIRBuilder.buildConstant(Ty, 0); 1741 auto MIBLen = MIRBuilder.buildConstant(Ty, Len); 1742 auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), 1743 SrcReg, MIBZero); 1744 MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen, 1745 MIBCttzZU); 1746 MI.eraseFromParent(); 1747 return Legalized; 1748 } 1749 // for now, we use: { return popcount(~x & (x - 1)); } 1750 // unless the target has ctlz but not ctpop, in which case we use: 1751 // { return 32 - nlz(~x & (x-1)); } 1752 // Ref: "Hacker's Delight" by Henry Warren 1753 auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1); 1754 auto MIBNot = 1755 MIRBuilder.buildInstr(TargetOpcode::G_XOR, {Ty}, {SrcReg, MIBCstNeg1}); 1756 auto MIBTmp = MIRBuilder.buildInstr( 1757 TargetOpcode::G_AND, {Ty}, 1758 {MIBNot, MIRBuilder.buildInstr(TargetOpcode::G_ADD, {Ty}, 1759 {SrcReg, MIBCstNeg1})}); 1760 if (!isSupported({TargetOpcode::G_CTPOP, {Ty}}) && 1761 isSupported({TargetOpcode::G_CTLZ, {Ty}})) { 1762 auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len); 1763 MIRBuilder.buildInstr( 1764 TargetOpcode::G_SUB, {MI.getOperand(0).getReg()}, 1765 {MIBCstLen, 1766 MIRBuilder.buildInstr(TargetOpcode::G_CTLZ, {Ty}, {MIBTmp})}); 1767 MI.eraseFromParent(); 1768 return Legalized; 1769 } 1770 MI.setDesc(TII.get(TargetOpcode::G_CTPOP)); 1771 MI.getOperand(1).setReg(MIBTmp->getOperand(0).getReg()); 1772 return Legalized; 1773 } 1774 } 1775 } 1776