1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file This file implements the LegalizerHelper class to legalize 10 /// individual instructions and the LegalizeMachineIR wrapper pass for the 11 /// primary legalization. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" 16 #include "llvm/CodeGen/GlobalISel/CallLowering.h" 17 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" 18 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" 19 #include "llvm/CodeGen/MachineRegisterInfo.h" 20 #include "llvm/CodeGen/TargetInstrInfo.h" 21 #include "llvm/CodeGen/TargetLowering.h" 22 #include "llvm/CodeGen/TargetSubtargetInfo.h" 23 #include "llvm/Support/Debug.h" 24 #include "llvm/Support/MathExtras.h" 25 #include "llvm/Support/raw_ostream.h" 26 27 #define DEBUG_TYPE "legalizer" 28 29 using namespace llvm; 30 using namespace LegalizeActions; 31 32 /// Try to break down \p OrigTy into \p NarrowTy sized pieces. 33 /// 34 /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy, 35 /// with any leftover piece as type \p LeftoverTy 36 /// 37 /// Returns -1 if the breakdown is not satisfiable. 38 static int getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) { 39 assert(!LeftoverTy.isValid() && "this is an out argument"); 40 41 unsigned Size = OrigTy.getSizeInBits(); 42 unsigned NarrowSize = NarrowTy.getSizeInBits(); 43 unsigned NumParts = Size / NarrowSize; 44 unsigned LeftoverSize = Size - NumParts * NarrowSize; 45 assert(Size > NarrowSize); 46 47 if (LeftoverSize == 0) 48 return NumParts; 49 50 if (NarrowTy.isVector()) { 51 unsigned EltSize = OrigTy.getScalarSizeInBits(); 52 if (LeftoverSize % EltSize != 0) 53 return -1; 54 LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize); 55 } else { 56 LeftoverTy = LLT::scalar(LeftoverSize); 57 } 58 59 return NumParts; 60 } 61 62 LegalizerHelper::LegalizerHelper(MachineFunction &MF, 63 GISelChangeObserver &Observer, 64 MachineIRBuilder &Builder) 65 : MIRBuilder(Builder), MRI(MF.getRegInfo()), 66 LI(*MF.getSubtarget().getLegalizerInfo()), Observer(Observer) { 67 MIRBuilder.setMF(MF); 68 MIRBuilder.setChangeObserver(Observer); 69 } 70 71 LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI, 72 GISelChangeObserver &Observer, 73 MachineIRBuilder &B) 74 : MIRBuilder(B), MRI(MF.getRegInfo()), LI(LI), Observer(Observer) { 75 MIRBuilder.setMF(MF); 76 MIRBuilder.setChangeObserver(Observer); 77 } 78 LegalizerHelper::LegalizeResult 79 LegalizerHelper::legalizeInstrStep(MachineInstr &MI) { 80 LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs())); 81 82 auto Step = LI.getAction(MI, MRI); 83 switch (Step.Action) { 84 case Legal: 85 LLVM_DEBUG(dbgs() << ".. Already legal\n"); 86 return AlreadyLegal; 87 case Libcall: 88 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n"); 89 return libcall(MI); 90 case NarrowScalar: 91 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n"); 92 return narrowScalar(MI, Step.TypeIdx, Step.NewType); 93 case WidenScalar: 94 LLVM_DEBUG(dbgs() << ".. Widen scalar\n"); 95 return widenScalar(MI, Step.TypeIdx, Step.NewType); 96 case Lower: 97 LLVM_DEBUG(dbgs() << ".. Lower\n"); 98 return lower(MI, Step.TypeIdx, Step.NewType); 99 case FewerElements: 100 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n"); 101 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType); 102 case MoreElements: 103 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n"); 104 return moreElementsVector(MI, Step.TypeIdx, Step.NewType); 105 case Custom: 106 LLVM_DEBUG(dbgs() << ".. Custom legalization\n"); 107 return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized 108 : UnableToLegalize; 109 default: 110 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n"); 111 return UnableToLegalize; 112 } 113 } 114 115 void LegalizerHelper::extractParts(unsigned Reg, LLT Ty, int NumParts, 116 SmallVectorImpl<unsigned> &VRegs) { 117 for (int i = 0; i < NumParts; ++i) 118 VRegs.push_back(MRI.createGenericVirtualRegister(Ty)); 119 MIRBuilder.buildUnmerge(VRegs, Reg); 120 } 121 122 bool LegalizerHelper::extractParts(unsigned Reg, LLT RegTy, 123 LLT MainTy, LLT &LeftoverTy, 124 SmallVectorImpl<unsigned> &VRegs, 125 SmallVectorImpl<unsigned> &LeftoverRegs) { 126 assert(!LeftoverTy.isValid() && "this is an out argument"); 127 128 unsigned RegSize = RegTy.getSizeInBits(); 129 unsigned MainSize = MainTy.getSizeInBits(); 130 unsigned NumParts = RegSize / MainSize; 131 unsigned LeftoverSize = RegSize - NumParts * MainSize; 132 133 // Use an unmerge when possible. 134 if (LeftoverSize == 0) { 135 for (unsigned I = 0; I < NumParts; ++I) 136 VRegs.push_back(MRI.createGenericVirtualRegister(MainTy)); 137 MIRBuilder.buildUnmerge(VRegs, Reg); 138 return true; 139 } 140 141 if (MainTy.isVector()) { 142 unsigned EltSize = MainTy.getScalarSizeInBits(); 143 if (LeftoverSize % EltSize != 0) 144 return false; 145 LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize); 146 } else { 147 LeftoverTy = LLT::scalar(LeftoverSize); 148 } 149 150 // For irregular sizes, extract the individual parts. 151 for (unsigned I = 0; I != NumParts; ++I) { 152 unsigned NewReg = MRI.createGenericVirtualRegister(MainTy); 153 VRegs.push_back(NewReg); 154 MIRBuilder.buildExtract(NewReg, Reg, MainSize * I); 155 } 156 157 for (unsigned Offset = MainSize * NumParts; Offset < RegSize; 158 Offset += LeftoverSize) { 159 unsigned NewReg = MRI.createGenericVirtualRegister(LeftoverTy); 160 LeftoverRegs.push_back(NewReg); 161 MIRBuilder.buildExtract(NewReg, Reg, Offset); 162 } 163 164 return true; 165 } 166 167 void LegalizerHelper::insertParts(unsigned DstReg, 168 LLT ResultTy, LLT PartTy, 169 ArrayRef<unsigned> PartRegs, 170 LLT LeftoverTy, 171 ArrayRef<unsigned> LeftoverRegs) { 172 if (!LeftoverTy.isValid()) { 173 assert(LeftoverRegs.empty()); 174 175 if (!ResultTy.isVector()) { 176 MIRBuilder.buildMerge(DstReg, PartRegs); 177 return; 178 } 179 180 if (PartTy.isVector()) 181 MIRBuilder.buildConcatVectors(DstReg, PartRegs); 182 else 183 MIRBuilder.buildBuildVector(DstReg, PartRegs); 184 return; 185 } 186 187 unsigned PartSize = PartTy.getSizeInBits(); 188 unsigned LeftoverPartSize = LeftoverTy.getSizeInBits(); 189 190 unsigned CurResultReg = MRI.createGenericVirtualRegister(ResultTy); 191 MIRBuilder.buildUndef(CurResultReg); 192 193 unsigned Offset = 0; 194 for (unsigned PartReg : PartRegs) { 195 unsigned NewResultReg = MRI.createGenericVirtualRegister(ResultTy); 196 MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset); 197 CurResultReg = NewResultReg; 198 Offset += PartSize; 199 } 200 201 for (unsigned I = 0, E = LeftoverRegs.size(); I != E; ++I) { 202 // Use the original output register for the final insert to avoid a copy. 203 unsigned NewResultReg = (I + 1 == E) ? 204 DstReg : MRI.createGenericVirtualRegister(ResultTy); 205 206 MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset); 207 CurResultReg = NewResultReg; 208 Offset += LeftoverPartSize; 209 } 210 } 211 212 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { 213 switch (Opcode) { 214 case TargetOpcode::G_SDIV: 215 assert((Size == 32 || Size == 64) && "Unsupported size"); 216 return Size == 64 ? RTLIB::SDIV_I64 : RTLIB::SDIV_I32; 217 case TargetOpcode::G_UDIV: 218 assert((Size == 32 || Size == 64) && "Unsupported size"); 219 return Size == 64 ? RTLIB::UDIV_I64 : RTLIB::UDIV_I32; 220 case TargetOpcode::G_SREM: 221 assert((Size == 32 || Size == 64) && "Unsupported size"); 222 return Size == 64 ? RTLIB::SREM_I64 : RTLIB::SREM_I32; 223 case TargetOpcode::G_UREM: 224 assert((Size == 32 || Size == 64) && "Unsupported size"); 225 return Size == 64 ? RTLIB::UREM_I64 : RTLIB::UREM_I32; 226 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 227 assert(Size == 32 && "Unsupported size"); 228 return RTLIB::CTLZ_I32; 229 case TargetOpcode::G_FADD: 230 assert((Size == 32 || Size == 64) && "Unsupported size"); 231 return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32; 232 case TargetOpcode::G_FSUB: 233 assert((Size == 32 || Size == 64) && "Unsupported size"); 234 return Size == 64 ? RTLIB::SUB_F64 : RTLIB::SUB_F32; 235 case TargetOpcode::G_FMUL: 236 assert((Size == 32 || Size == 64) && "Unsupported size"); 237 return Size == 64 ? RTLIB::MUL_F64 : RTLIB::MUL_F32; 238 case TargetOpcode::G_FDIV: 239 assert((Size == 32 || Size == 64) && "Unsupported size"); 240 return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32; 241 case TargetOpcode::G_FEXP: 242 assert((Size == 32 || Size == 64) && "Unsupported size"); 243 return Size == 64 ? RTLIB::EXP_F64 : RTLIB::EXP_F32; 244 case TargetOpcode::G_FREM: 245 return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32; 246 case TargetOpcode::G_FPOW: 247 return Size == 64 ? RTLIB::POW_F64 : RTLIB::POW_F32; 248 case TargetOpcode::G_FMA: 249 assert((Size == 32 || Size == 64) && "Unsupported size"); 250 return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32; 251 case TargetOpcode::G_FSIN: 252 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); 253 return Size == 128 ? RTLIB::SIN_F128 254 : Size == 64 ? RTLIB::SIN_F64 : RTLIB::SIN_F32; 255 case TargetOpcode::G_FCOS: 256 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); 257 return Size == 128 ? RTLIB::COS_F128 258 : Size == 64 ? RTLIB::COS_F64 : RTLIB::COS_F32; 259 case TargetOpcode::G_FLOG10: 260 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); 261 return Size == 128 ? RTLIB::LOG10_F128 262 : Size == 64 ? RTLIB::LOG10_F64 : RTLIB::LOG10_F32; 263 case TargetOpcode::G_FLOG: 264 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); 265 return Size == 128 ? RTLIB::LOG_F128 266 : Size == 64 ? RTLIB::LOG_F64 : RTLIB::LOG_F32; 267 case TargetOpcode::G_FLOG2: 268 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); 269 return Size == 128 ? RTLIB::LOG2_F128 270 : Size == 64 ? RTLIB::LOG2_F64 : RTLIB::LOG2_F32; 271 } 272 llvm_unreachable("Unknown libcall function"); 273 } 274 275 LegalizerHelper::LegalizeResult 276 llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, 277 const CallLowering::ArgInfo &Result, 278 ArrayRef<CallLowering::ArgInfo> Args) { 279 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); 280 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); 281 const char *Name = TLI.getLibcallName(Libcall); 282 283 MIRBuilder.getMF().getFrameInfo().setHasCalls(true); 284 if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall), 285 MachineOperand::CreateES(Name), Result, Args)) 286 return LegalizerHelper::UnableToLegalize; 287 288 return LegalizerHelper::Legalized; 289 } 290 291 // Useful for libcalls where all operands have the same type. 292 static LegalizerHelper::LegalizeResult 293 simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, 294 Type *OpType) { 295 auto Libcall = getRTLibDesc(MI.getOpcode(), Size); 296 297 SmallVector<CallLowering::ArgInfo, 3> Args; 298 for (unsigned i = 1; i < MI.getNumOperands(); i++) 299 Args.push_back({MI.getOperand(i).getReg(), OpType}); 300 return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType}, 301 Args); 302 } 303 304 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, 305 Type *FromType) { 306 auto ToMVT = MVT::getVT(ToType); 307 auto FromMVT = MVT::getVT(FromType); 308 309 switch (Opcode) { 310 case TargetOpcode::G_FPEXT: 311 return RTLIB::getFPEXT(FromMVT, ToMVT); 312 case TargetOpcode::G_FPTRUNC: 313 return RTLIB::getFPROUND(FromMVT, ToMVT); 314 case TargetOpcode::G_FPTOSI: 315 return RTLIB::getFPTOSINT(FromMVT, ToMVT); 316 case TargetOpcode::G_FPTOUI: 317 return RTLIB::getFPTOUINT(FromMVT, ToMVT); 318 case TargetOpcode::G_SITOFP: 319 return RTLIB::getSINTTOFP(FromMVT, ToMVT); 320 case TargetOpcode::G_UITOFP: 321 return RTLIB::getUINTTOFP(FromMVT, ToMVT); 322 } 323 llvm_unreachable("Unsupported libcall function"); 324 } 325 326 static LegalizerHelper::LegalizeResult 327 conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, 328 Type *FromType) { 329 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType); 330 return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType}, 331 {{MI.getOperand(1).getReg(), FromType}}); 332 } 333 334 LegalizerHelper::LegalizeResult 335 LegalizerHelper::libcall(MachineInstr &MI) { 336 LLT LLTy = MRI.getType(MI.getOperand(0).getReg()); 337 unsigned Size = LLTy.getSizeInBits(); 338 auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); 339 340 MIRBuilder.setInstr(MI); 341 342 switch (MI.getOpcode()) { 343 default: 344 return UnableToLegalize; 345 case TargetOpcode::G_SDIV: 346 case TargetOpcode::G_UDIV: 347 case TargetOpcode::G_SREM: 348 case TargetOpcode::G_UREM: 349 case TargetOpcode::G_CTLZ_ZERO_UNDEF: { 350 Type *HLTy = IntegerType::get(Ctx, Size); 351 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); 352 if (Status != Legalized) 353 return Status; 354 break; 355 } 356 case TargetOpcode::G_FADD: 357 case TargetOpcode::G_FSUB: 358 case TargetOpcode::G_FMUL: 359 case TargetOpcode::G_FDIV: 360 case TargetOpcode::G_FMA: 361 case TargetOpcode::G_FPOW: 362 case TargetOpcode::G_FREM: 363 case TargetOpcode::G_FCOS: 364 case TargetOpcode::G_FSIN: 365 case TargetOpcode::G_FLOG10: 366 case TargetOpcode::G_FLOG: 367 case TargetOpcode::G_FLOG2: 368 case TargetOpcode::G_FEXP: { 369 if (Size > 64) { 370 LLVM_DEBUG(dbgs() << "Size " << Size << " too large to legalize.\n"); 371 return UnableToLegalize; 372 } 373 Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx); 374 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); 375 if (Status != Legalized) 376 return Status; 377 break; 378 } 379 case TargetOpcode::G_FPEXT: { 380 // FIXME: Support other floating point types (half, fp128 etc) 381 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 382 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 383 if (ToSize != 64 || FromSize != 32) 384 return UnableToLegalize; 385 LegalizeResult Status = conversionLibcall( 386 MI, MIRBuilder, Type::getDoubleTy(Ctx), Type::getFloatTy(Ctx)); 387 if (Status != Legalized) 388 return Status; 389 break; 390 } 391 case TargetOpcode::G_FPTRUNC: { 392 // FIXME: Support other floating point types (half, fp128 etc) 393 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 394 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 395 if (ToSize != 32 || FromSize != 64) 396 return UnableToLegalize; 397 LegalizeResult Status = conversionLibcall( 398 MI, MIRBuilder, Type::getFloatTy(Ctx), Type::getDoubleTy(Ctx)); 399 if (Status != Legalized) 400 return Status; 401 break; 402 } 403 case TargetOpcode::G_FPTOSI: 404 case TargetOpcode::G_FPTOUI: { 405 // FIXME: Support other types 406 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 407 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 408 if (ToSize != 32 || (FromSize != 32 && FromSize != 64)) 409 return UnableToLegalize; 410 LegalizeResult Status = conversionLibcall( 411 MI, MIRBuilder, Type::getInt32Ty(Ctx), 412 FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx)); 413 if (Status != Legalized) 414 return Status; 415 break; 416 } 417 case TargetOpcode::G_SITOFP: 418 case TargetOpcode::G_UITOFP: { 419 // FIXME: Support other types 420 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 421 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 422 if (FromSize != 32 || (ToSize != 32 && ToSize != 64)) 423 return UnableToLegalize; 424 LegalizeResult Status = conversionLibcall( 425 MI, MIRBuilder, 426 ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx), 427 Type::getInt32Ty(Ctx)); 428 if (Status != Legalized) 429 return Status; 430 break; 431 } 432 } 433 434 MI.eraseFromParent(); 435 return Legalized; 436 } 437 438 LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, 439 unsigned TypeIdx, 440 LLT NarrowTy) { 441 MIRBuilder.setInstr(MI); 442 443 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 444 uint64_t NarrowSize = NarrowTy.getSizeInBits(); 445 446 switch (MI.getOpcode()) { 447 default: 448 return UnableToLegalize; 449 case TargetOpcode::G_IMPLICIT_DEF: { 450 // FIXME: add support for when SizeOp0 isn't an exact multiple of 451 // NarrowSize. 452 if (SizeOp0 % NarrowSize != 0) 453 return UnableToLegalize; 454 int NumParts = SizeOp0 / NarrowSize; 455 456 SmallVector<unsigned, 2> DstRegs; 457 for (int i = 0; i < NumParts; ++i) 458 DstRegs.push_back( 459 MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg()); 460 461 unsigned DstReg = MI.getOperand(0).getReg(); 462 if(MRI.getType(DstReg).isVector()) 463 MIRBuilder.buildBuildVector(DstReg, DstRegs); 464 else 465 MIRBuilder.buildMerge(DstReg, DstRegs); 466 MI.eraseFromParent(); 467 return Legalized; 468 } 469 case TargetOpcode::G_ADD: { 470 // FIXME: add support for when SizeOp0 isn't an exact multiple of 471 // NarrowSize. 472 if (SizeOp0 % NarrowSize != 0) 473 return UnableToLegalize; 474 // Expand in terms of carry-setting/consuming G_ADDE instructions. 475 int NumParts = SizeOp0 / NarrowTy.getSizeInBits(); 476 477 SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs; 478 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); 479 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); 480 481 unsigned CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1)); 482 MIRBuilder.buildConstant(CarryIn, 0); 483 484 for (int i = 0; i < NumParts; ++i) { 485 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 486 unsigned CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); 487 488 MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i], 489 Src2Regs[i], CarryIn); 490 491 DstRegs.push_back(DstReg); 492 CarryIn = CarryOut; 493 } 494 unsigned DstReg = MI.getOperand(0).getReg(); 495 if(MRI.getType(DstReg).isVector()) 496 MIRBuilder.buildBuildVector(DstReg, DstRegs); 497 else 498 MIRBuilder.buildMerge(DstReg, DstRegs); 499 MI.eraseFromParent(); 500 return Legalized; 501 } 502 case TargetOpcode::G_SUB: { 503 // FIXME: add support for when SizeOp0 isn't an exact multiple of 504 // NarrowSize. 505 if (SizeOp0 % NarrowSize != 0) 506 return UnableToLegalize; 507 508 int NumParts = SizeOp0 / NarrowTy.getSizeInBits(); 509 510 SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs; 511 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); 512 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); 513 514 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 515 unsigned BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); 516 MIRBuilder.buildInstr(TargetOpcode::G_USUBO, {DstReg, BorrowOut}, 517 {Src1Regs[0], Src2Regs[0]}); 518 DstRegs.push_back(DstReg); 519 unsigned BorrowIn = BorrowOut; 520 for (int i = 1; i < NumParts; ++i) { 521 DstReg = MRI.createGenericVirtualRegister(NarrowTy); 522 BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); 523 524 MIRBuilder.buildInstr(TargetOpcode::G_USUBE, {DstReg, BorrowOut}, 525 {Src1Regs[i], Src2Regs[i], BorrowIn}); 526 527 DstRegs.push_back(DstReg); 528 BorrowIn = BorrowOut; 529 } 530 MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs); 531 MI.eraseFromParent(); 532 return Legalized; 533 } 534 case TargetOpcode::G_MUL: 535 return narrowScalarMul(MI, TypeIdx, NarrowTy); 536 case TargetOpcode::G_EXTRACT: 537 return narrowScalarExtract(MI, TypeIdx, NarrowTy); 538 case TargetOpcode::G_INSERT: 539 return narrowScalarInsert(MI, TypeIdx, NarrowTy); 540 case TargetOpcode::G_LOAD: { 541 const auto &MMO = **MI.memoperands_begin(); 542 unsigned DstReg = MI.getOperand(0).getReg(); 543 LLT DstTy = MRI.getType(DstReg); 544 if (DstTy.isVector()) 545 return UnableToLegalize; 546 547 if (8 * MMO.getSize() != DstTy.getSizeInBits()) { 548 unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy); 549 auto &MMO = **MI.memoperands_begin(); 550 MIRBuilder.buildLoad(TmpReg, MI.getOperand(1).getReg(), MMO); 551 MIRBuilder.buildAnyExt(DstReg, TmpReg); 552 MI.eraseFromParent(); 553 return Legalized; 554 } 555 556 return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy); 557 } 558 case TargetOpcode::G_ZEXTLOAD: 559 case TargetOpcode::G_SEXTLOAD: { 560 bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD; 561 unsigned DstReg = MI.getOperand(0).getReg(); 562 unsigned PtrReg = MI.getOperand(1).getReg(); 563 564 unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy); 565 auto &MMO = **MI.memoperands_begin(); 566 if (MMO.getSize() * 8 == NarrowSize) { 567 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); 568 } else { 569 unsigned ExtLoad = ZExt ? TargetOpcode::G_ZEXTLOAD 570 : TargetOpcode::G_SEXTLOAD; 571 MIRBuilder.buildInstr(ExtLoad) 572 .addDef(TmpReg) 573 .addUse(PtrReg) 574 .addMemOperand(&MMO); 575 } 576 577 if (ZExt) 578 MIRBuilder.buildZExt(DstReg, TmpReg); 579 else 580 MIRBuilder.buildSExt(DstReg, TmpReg); 581 582 MI.eraseFromParent(); 583 return Legalized; 584 } 585 case TargetOpcode::G_STORE: { 586 const auto &MMO = **MI.memoperands_begin(); 587 588 unsigned SrcReg = MI.getOperand(0).getReg(); 589 LLT SrcTy = MRI.getType(SrcReg); 590 if (SrcTy.isVector()) 591 return UnableToLegalize; 592 593 int NumParts = SizeOp0 / NarrowSize; 594 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits(); 595 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize; 596 if (SrcTy.isVector() && LeftoverBits != 0) 597 return UnableToLegalize; 598 599 if (8 * MMO.getSize() != SrcTy.getSizeInBits()) { 600 unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy); 601 auto &MMO = **MI.memoperands_begin(); 602 MIRBuilder.buildTrunc(TmpReg, SrcReg); 603 MIRBuilder.buildStore(TmpReg, MI.getOperand(1).getReg(), MMO); 604 MI.eraseFromParent(); 605 return Legalized; 606 } 607 608 return reduceLoadStoreWidth(MI, 0, NarrowTy); 609 } 610 case TargetOpcode::G_CONSTANT: { 611 // FIXME: add support for when SizeOp0 isn't an exact multiple of 612 // NarrowSize. 613 if (SizeOp0 % NarrowSize != 0) 614 return UnableToLegalize; 615 int NumParts = SizeOp0 / NarrowSize; 616 const APInt &Cst = MI.getOperand(1).getCImm()->getValue(); 617 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 618 619 SmallVector<unsigned, 2> DstRegs; 620 for (int i = 0; i < NumParts; ++i) { 621 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 622 ConstantInt *CI = 623 ConstantInt::get(Ctx, Cst.lshr(NarrowSize * i).trunc(NarrowSize)); 624 MIRBuilder.buildConstant(DstReg, *CI); 625 DstRegs.push_back(DstReg); 626 } 627 unsigned DstReg = MI.getOperand(0).getReg(); 628 if(MRI.getType(DstReg).isVector()) 629 MIRBuilder.buildBuildVector(DstReg, DstRegs); 630 else 631 MIRBuilder.buildMerge(DstReg, DstRegs); 632 MI.eraseFromParent(); 633 return Legalized; 634 } 635 case TargetOpcode::G_SELECT: 636 return narrowScalarSelect(MI, TypeIdx, NarrowTy); 637 case TargetOpcode::G_AND: 638 case TargetOpcode::G_OR: 639 case TargetOpcode::G_XOR: { 640 // Legalize bitwise operation: 641 // A = BinOp<Ty> B, C 642 // into: 643 // B1, ..., BN = G_UNMERGE_VALUES B 644 // C1, ..., CN = G_UNMERGE_VALUES C 645 // A1 = BinOp<Ty/N> B1, C2 646 // ... 647 // AN = BinOp<Ty/N> BN, CN 648 // A = G_MERGE_VALUES A1, ..., AN 649 650 // FIXME: add support for when SizeOp0 isn't an exact multiple of 651 // NarrowSize. 652 if (SizeOp0 % NarrowSize != 0) 653 return UnableToLegalize; 654 int NumParts = SizeOp0 / NarrowSize; 655 656 // List the registers where the destination will be scattered. 657 SmallVector<unsigned, 2> DstRegs; 658 // List the registers where the first argument will be split. 659 SmallVector<unsigned, 2> SrcsReg1; 660 // List the registers where the second argument will be split. 661 SmallVector<unsigned, 2> SrcsReg2; 662 // Create all the temporary registers. 663 for (int i = 0; i < NumParts; ++i) { 664 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 665 unsigned SrcReg1 = MRI.createGenericVirtualRegister(NarrowTy); 666 unsigned SrcReg2 = MRI.createGenericVirtualRegister(NarrowTy); 667 668 DstRegs.push_back(DstReg); 669 SrcsReg1.push_back(SrcReg1); 670 SrcsReg2.push_back(SrcReg2); 671 } 672 // Explode the big arguments into smaller chunks. 673 MIRBuilder.buildUnmerge(SrcsReg1, MI.getOperand(1).getReg()); 674 MIRBuilder.buildUnmerge(SrcsReg2, MI.getOperand(2).getReg()); 675 676 // Do the operation on each small part. 677 for (int i = 0; i < NumParts; ++i) 678 MIRBuilder.buildInstr(MI.getOpcode(), {DstRegs[i]}, 679 {SrcsReg1[i], SrcsReg2[i]}); 680 681 // Gather the destination registers into the final destination. 682 unsigned DstReg = MI.getOperand(0).getReg(); 683 if(MRI.getType(DstReg).isVector()) 684 MIRBuilder.buildBuildVector(DstReg, DstRegs); 685 else 686 MIRBuilder.buildMerge(DstReg, DstRegs); 687 MI.eraseFromParent(); 688 return Legalized; 689 } 690 case TargetOpcode::G_SHL: 691 case TargetOpcode::G_LSHR: 692 case TargetOpcode::G_ASHR: 693 return narrowScalarShift(MI, TypeIdx, NarrowTy); 694 case TargetOpcode::G_CTLZ: 695 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 696 case TargetOpcode::G_CTTZ: 697 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 698 case TargetOpcode::G_CTPOP: 699 if (TypeIdx != 0) 700 return UnableToLegalize; // TODO 701 702 Observer.changingInstr(MI); 703 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT); 704 Observer.changedInstr(MI); 705 return Legalized; 706 case TargetOpcode::G_INTTOPTR: 707 if (TypeIdx != 1) 708 return UnableToLegalize; 709 710 Observer.changingInstr(MI); 711 narrowScalarSrc(MI, NarrowTy, 1); 712 Observer.changedInstr(MI); 713 return Legalized; 714 case TargetOpcode::G_PTRTOINT: 715 if (TypeIdx != 0) 716 return UnableToLegalize; 717 718 Observer.changingInstr(MI); 719 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT); 720 Observer.changedInstr(MI); 721 return Legalized; 722 } 723 } 724 725 void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy, 726 unsigned OpIdx, unsigned ExtOpcode) { 727 MachineOperand &MO = MI.getOperand(OpIdx); 728 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO.getReg()}); 729 MO.setReg(ExtB->getOperand(0).getReg()); 730 } 731 732 void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, 733 unsigned OpIdx) { 734 MachineOperand &MO = MI.getOperand(OpIdx); 735 auto ExtB = MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, {NarrowTy}, 736 {MO.getReg()}); 737 MO.setReg(ExtB->getOperand(0).getReg()); 738 } 739 740 void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy, 741 unsigned OpIdx, unsigned TruncOpcode) { 742 MachineOperand &MO = MI.getOperand(OpIdx); 743 unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); 744 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 745 MIRBuilder.buildInstr(TruncOpcode, {MO.getReg()}, {DstExt}); 746 MO.setReg(DstExt); 747 } 748 749 void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy, 750 unsigned OpIdx, unsigned ExtOpcode) { 751 MachineOperand &MO = MI.getOperand(OpIdx); 752 unsigned DstTrunc = MRI.createGenericVirtualRegister(NarrowTy); 753 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 754 MIRBuilder.buildInstr(ExtOpcode, {MO.getReg()}, {DstTrunc}); 755 MO.setReg(DstTrunc); 756 } 757 758 void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy, 759 unsigned OpIdx) { 760 MachineOperand &MO = MI.getOperand(OpIdx); 761 unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); 762 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 763 MIRBuilder.buildExtract(MO.getReg(), DstExt, 0); 764 MO.setReg(DstExt); 765 } 766 767 void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, 768 unsigned OpIdx) { 769 MachineOperand &MO = MI.getOperand(OpIdx); 770 771 LLT OldTy = MRI.getType(MO.getReg()); 772 unsigned OldElts = OldTy.getNumElements(); 773 unsigned NewElts = MoreTy.getNumElements(); 774 775 unsigned NumParts = NewElts / OldElts; 776 777 // Use concat_vectors if the result is a multiple of the number of elements. 778 if (NumParts * OldElts == NewElts) { 779 SmallVector<unsigned, 8> Parts; 780 Parts.push_back(MO.getReg()); 781 782 unsigned ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0); 783 for (unsigned I = 1; I != NumParts; ++I) 784 Parts.push_back(ImpDef); 785 786 auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts); 787 MO.setReg(Concat.getReg(0)); 788 return; 789 } 790 791 unsigned MoreReg = MRI.createGenericVirtualRegister(MoreTy); 792 unsigned ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0); 793 MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0); 794 MO.setReg(MoreReg); 795 } 796 797 LegalizerHelper::LegalizeResult 798 LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, 799 LLT WideTy) { 800 if (TypeIdx != 1) 801 return UnableToLegalize; 802 803 unsigned DstReg = MI.getOperand(0).getReg(); 804 LLT DstTy = MRI.getType(DstReg); 805 if (!DstTy.isScalar()) 806 return UnableToLegalize; 807 808 unsigned NumOps = MI.getNumOperands(); 809 unsigned NumSrc = MI.getNumOperands() - 1; 810 unsigned PartSize = DstTy.getSizeInBits() / NumSrc; 811 812 unsigned Src1 = MI.getOperand(1).getReg(); 813 unsigned ResultReg = MIRBuilder.buildZExt(DstTy, Src1)->getOperand(0).getReg(); 814 815 for (unsigned I = 2; I != NumOps; ++I) { 816 const unsigned Offset = (I - 1) * PartSize; 817 818 unsigned SrcReg = MI.getOperand(I).getReg(); 819 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize)); 820 821 auto ZextInput = MIRBuilder.buildZExt(DstTy, SrcReg); 822 823 unsigned NextResult = I + 1 == NumOps ? DstReg : 824 MRI.createGenericVirtualRegister(DstTy); 825 826 auto ShiftAmt = MIRBuilder.buildConstant(DstTy, Offset); 827 auto Shl = MIRBuilder.buildShl(DstTy, ZextInput, ShiftAmt); 828 MIRBuilder.buildOr(NextResult, ResultReg, Shl); 829 ResultReg = NextResult; 830 } 831 832 MI.eraseFromParent(); 833 return Legalized; 834 } 835 836 LegalizerHelper::LegalizeResult 837 LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx, 838 LLT WideTy) { 839 if (TypeIdx != 0) 840 return UnableToLegalize; 841 842 unsigned NumDst = MI.getNumOperands() - 1; 843 unsigned SrcReg = MI.getOperand(NumDst).getReg(); 844 LLT SrcTy = MRI.getType(SrcReg); 845 if (!SrcTy.isScalar()) 846 return UnableToLegalize; 847 848 unsigned Dst0Reg = MI.getOperand(0).getReg(); 849 LLT DstTy = MRI.getType(Dst0Reg); 850 if (!DstTy.isScalar()) 851 return UnableToLegalize; 852 853 unsigned NewSrcSize = NumDst * WideTy.getSizeInBits(); 854 LLT NewSrcTy = LLT::scalar(NewSrcSize); 855 unsigned SizeDiff = WideTy.getSizeInBits() - DstTy.getSizeInBits(); 856 857 auto WideSrc = MIRBuilder.buildZExt(NewSrcTy, SrcReg); 858 859 for (unsigned I = 1; I != NumDst; ++I) { 860 auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, SizeDiff * I); 861 auto Shl = MIRBuilder.buildShl(NewSrcTy, WideSrc, ShiftAmt); 862 WideSrc = MIRBuilder.buildOr(NewSrcTy, WideSrc, Shl); 863 } 864 865 Observer.changingInstr(MI); 866 867 MI.getOperand(NumDst).setReg(WideSrc->getOperand(0).getReg()); 868 for (unsigned I = 0; I != NumDst; ++I) 869 widenScalarDst(MI, WideTy, I); 870 871 Observer.changedInstr(MI); 872 873 return Legalized; 874 } 875 876 LegalizerHelper::LegalizeResult 877 LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx, 878 LLT WideTy) { 879 unsigned DstReg = MI.getOperand(0).getReg(); 880 unsigned SrcReg = MI.getOperand(1).getReg(); 881 LLT SrcTy = MRI.getType(SrcReg); 882 883 LLT DstTy = MRI.getType(DstReg); 884 unsigned Offset = MI.getOperand(2).getImm(); 885 886 if (TypeIdx == 0) { 887 if (SrcTy.isVector() || DstTy.isVector()) 888 return UnableToLegalize; 889 890 SrcOp Src(SrcReg); 891 if (SrcTy.isPointer()) { 892 // Extracts from pointers can be handled only if they are really just 893 // simple integers. 894 const DataLayout &DL = MIRBuilder.getDataLayout(); 895 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) 896 return UnableToLegalize; 897 898 LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits()); 899 Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src); 900 SrcTy = SrcAsIntTy; 901 } 902 903 if (DstTy.isPointer()) 904 return UnableToLegalize; 905 906 if (Offset == 0) { 907 // Avoid a shift in the degenerate case. 908 MIRBuilder.buildTrunc(DstReg, 909 MIRBuilder.buildAnyExtOrTrunc(WideTy, Src)); 910 MI.eraseFromParent(); 911 return Legalized; 912 } 913 914 // Do a shift in the source type. 915 LLT ShiftTy = SrcTy; 916 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) { 917 Src = MIRBuilder.buildAnyExt(WideTy, Src); 918 ShiftTy = WideTy; 919 } else if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) 920 return UnableToLegalize; 921 922 auto LShr = MIRBuilder.buildLShr( 923 ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset)); 924 MIRBuilder.buildTrunc(DstReg, LShr); 925 MI.eraseFromParent(); 926 return Legalized; 927 } 928 929 if (!SrcTy.isVector()) 930 return UnableToLegalize; 931 932 if (DstTy != SrcTy.getElementType()) 933 return UnableToLegalize; 934 935 if (Offset % SrcTy.getScalarSizeInBits() != 0) 936 return UnableToLegalize; 937 938 Observer.changingInstr(MI); 939 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 940 941 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) * 942 Offset); 943 widenScalarDst(MI, WideTy.getScalarType(), 0); 944 Observer.changedInstr(MI); 945 return Legalized; 946 } 947 948 LegalizerHelper::LegalizeResult 949 LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx, 950 LLT WideTy) { 951 if (TypeIdx != 0) 952 return UnableToLegalize; 953 Observer.changingInstr(MI); 954 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 955 widenScalarDst(MI, WideTy); 956 Observer.changedInstr(MI); 957 return Legalized; 958 } 959 960 LegalizerHelper::LegalizeResult 961 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { 962 MIRBuilder.setInstr(MI); 963 964 switch (MI.getOpcode()) { 965 default: 966 return UnableToLegalize; 967 case TargetOpcode::G_EXTRACT: 968 return widenScalarExtract(MI, TypeIdx, WideTy); 969 case TargetOpcode::G_INSERT: 970 return widenScalarInsert(MI, TypeIdx, WideTy); 971 case TargetOpcode::G_MERGE_VALUES: 972 return widenScalarMergeValues(MI, TypeIdx, WideTy); 973 case TargetOpcode::G_UNMERGE_VALUES: 974 return widenScalarUnmergeValues(MI, TypeIdx, WideTy); 975 case TargetOpcode::G_UADDO: 976 case TargetOpcode::G_USUBO: { 977 if (TypeIdx == 1) 978 return UnableToLegalize; // TODO 979 auto LHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy}, 980 {MI.getOperand(2).getReg()}); 981 auto RHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy}, 982 {MI.getOperand(3).getReg()}); 983 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO 984 ? TargetOpcode::G_ADD 985 : TargetOpcode::G_SUB; 986 // Do the arithmetic in the larger type. 987 auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext}); 988 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg()); 989 APInt Mask = APInt::getAllOnesValue(OrigTy.getSizeInBits()); 990 auto AndOp = MIRBuilder.buildInstr( 991 TargetOpcode::G_AND, {WideTy}, 992 {NewOp, MIRBuilder.buildConstant(WideTy, Mask.getZExtValue())}); 993 // There is no overflow if the AndOp is the same as NewOp. 994 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1).getReg(), NewOp, 995 AndOp); 996 // Now trunc the NewOp to the original result. 997 MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), NewOp); 998 MI.eraseFromParent(); 999 return Legalized; 1000 } 1001 case TargetOpcode::G_CTTZ: 1002 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 1003 case TargetOpcode::G_CTLZ: 1004 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 1005 case TargetOpcode::G_CTPOP: { 1006 if (TypeIdx == 0) { 1007 Observer.changingInstr(MI); 1008 widenScalarDst(MI, WideTy, 0); 1009 Observer.changedInstr(MI); 1010 return Legalized; 1011 } 1012 1013 unsigned SrcReg = MI.getOperand(1).getReg(); 1014 1015 // First ZEXT the input. 1016 auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg); 1017 LLT CurTy = MRI.getType(SrcReg); 1018 if (MI.getOpcode() == TargetOpcode::G_CTTZ) { 1019 // The count is the same in the larger type except if the original 1020 // value was zero. This can be handled by setting the bit just off 1021 // the top of the original type. 1022 auto TopBit = 1023 APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits()); 1024 MIBSrc = MIRBuilder.buildOr( 1025 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit)); 1026 } 1027 1028 // Perform the operation at the larger size. 1029 auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc}); 1030 // This is already the correct result for CTPOP and CTTZs 1031 if (MI.getOpcode() == TargetOpcode::G_CTLZ || 1032 MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) { 1033 // The correct result is NewOp - (Difference in widety and current ty). 1034 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits(); 1035 MIBNewOp = MIRBuilder.buildInstr( 1036 TargetOpcode::G_SUB, {WideTy}, 1037 {MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)}); 1038 } 1039 1040 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp); 1041 MI.eraseFromParent(); 1042 return Legalized; 1043 } 1044 case TargetOpcode::G_BSWAP: { 1045 Observer.changingInstr(MI); 1046 unsigned DstReg = MI.getOperand(0).getReg(); 1047 1048 unsigned ShrReg = MRI.createGenericVirtualRegister(WideTy); 1049 unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); 1050 unsigned ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy); 1051 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1052 1053 MI.getOperand(0).setReg(DstExt); 1054 1055 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 1056 1057 LLT Ty = MRI.getType(DstReg); 1058 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits(); 1059 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits); 1060 MIRBuilder.buildInstr(TargetOpcode::G_LSHR) 1061 .addDef(ShrReg) 1062 .addUse(DstExt) 1063 .addUse(ShiftAmtReg); 1064 1065 MIRBuilder.buildTrunc(DstReg, ShrReg); 1066 Observer.changedInstr(MI); 1067 return Legalized; 1068 } 1069 case TargetOpcode::G_ADD: 1070 case TargetOpcode::G_AND: 1071 case TargetOpcode::G_MUL: 1072 case TargetOpcode::G_OR: 1073 case TargetOpcode::G_XOR: 1074 case TargetOpcode::G_SUB: 1075 // Perform operation at larger width (any extension is fines here, high bits 1076 // don't affect the result) and then truncate the result back to the 1077 // original type. 1078 Observer.changingInstr(MI); 1079 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1080 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); 1081 widenScalarDst(MI, WideTy); 1082 Observer.changedInstr(MI); 1083 return Legalized; 1084 1085 case TargetOpcode::G_SHL: 1086 Observer.changingInstr(MI); 1087 1088 if (TypeIdx == 0) { 1089 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1090 widenScalarDst(MI, WideTy); 1091 } else { 1092 assert(TypeIdx == 1); 1093 // The "number of bits to shift" operand must preserve its value as an 1094 // unsigned integer: 1095 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 1096 } 1097 1098 Observer.changedInstr(MI); 1099 return Legalized; 1100 1101 case TargetOpcode::G_SDIV: 1102 case TargetOpcode::G_SREM: 1103 Observer.changingInstr(MI); 1104 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); 1105 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 1106 widenScalarDst(MI, WideTy); 1107 Observer.changedInstr(MI); 1108 return Legalized; 1109 1110 case TargetOpcode::G_ASHR: 1111 case TargetOpcode::G_LSHR: 1112 Observer.changingInstr(MI); 1113 1114 if (TypeIdx == 0) { 1115 unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ? 1116 TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT; 1117 1118 widenScalarSrc(MI, WideTy, 1, CvtOp); 1119 widenScalarDst(MI, WideTy); 1120 } else { 1121 assert(TypeIdx == 1); 1122 // The "number of bits to shift" operand must preserve its value as an 1123 // unsigned integer: 1124 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 1125 } 1126 1127 Observer.changedInstr(MI); 1128 return Legalized; 1129 case TargetOpcode::G_UDIV: 1130 case TargetOpcode::G_UREM: 1131 Observer.changingInstr(MI); 1132 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 1133 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 1134 widenScalarDst(MI, WideTy); 1135 Observer.changedInstr(MI); 1136 return Legalized; 1137 1138 case TargetOpcode::G_SELECT: 1139 Observer.changingInstr(MI); 1140 if (TypeIdx == 0) { 1141 // Perform operation at larger width (any extension is fine here, high 1142 // bits don't affect the result) and then truncate the result back to the 1143 // original type. 1144 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); 1145 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT); 1146 widenScalarDst(MI, WideTy); 1147 } else { 1148 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector(); 1149 // Explicit extension is required here since high bits affect the result. 1150 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false)); 1151 } 1152 Observer.changedInstr(MI); 1153 return Legalized; 1154 1155 case TargetOpcode::G_FPTOSI: 1156 case TargetOpcode::G_FPTOUI: 1157 if (TypeIdx != 0) 1158 return UnableToLegalize; 1159 Observer.changingInstr(MI); 1160 widenScalarDst(MI, WideTy); 1161 Observer.changedInstr(MI); 1162 return Legalized; 1163 1164 case TargetOpcode::G_SITOFP: 1165 if (TypeIdx != 1) 1166 return UnableToLegalize; 1167 Observer.changingInstr(MI); 1168 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); 1169 Observer.changedInstr(MI); 1170 return Legalized; 1171 1172 case TargetOpcode::G_UITOFP: 1173 if (TypeIdx != 1) 1174 return UnableToLegalize; 1175 Observer.changingInstr(MI); 1176 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 1177 Observer.changedInstr(MI); 1178 return Legalized; 1179 1180 case TargetOpcode::G_LOAD: 1181 case TargetOpcode::G_SEXTLOAD: 1182 case TargetOpcode::G_ZEXTLOAD: 1183 Observer.changingInstr(MI); 1184 widenScalarDst(MI, WideTy); 1185 Observer.changedInstr(MI); 1186 return Legalized; 1187 1188 case TargetOpcode::G_STORE: { 1189 if (TypeIdx != 0) 1190 return UnableToLegalize; 1191 1192 LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 1193 if (!isPowerOf2_32(Ty.getSizeInBits())) 1194 return UnableToLegalize; 1195 1196 Observer.changingInstr(MI); 1197 1198 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ? 1199 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT; 1200 widenScalarSrc(MI, WideTy, 0, ExtType); 1201 1202 Observer.changedInstr(MI); 1203 return Legalized; 1204 } 1205 case TargetOpcode::G_CONSTANT: { 1206 MachineOperand &SrcMO = MI.getOperand(1); 1207 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 1208 const APInt &Val = SrcMO.getCImm()->getValue().sext(WideTy.getSizeInBits()); 1209 Observer.changingInstr(MI); 1210 SrcMO.setCImm(ConstantInt::get(Ctx, Val)); 1211 1212 widenScalarDst(MI, WideTy); 1213 Observer.changedInstr(MI); 1214 return Legalized; 1215 } 1216 case TargetOpcode::G_FCONSTANT: { 1217 MachineOperand &SrcMO = MI.getOperand(1); 1218 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 1219 APFloat Val = SrcMO.getFPImm()->getValueAPF(); 1220 bool LosesInfo; 1221 switch (WideTy.getSizeInBits()) { 1222 case 32: 1223 Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, 1224 &LosesInfo); 1225 break; 1226 case 64: 1227 Val.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, 1228 &LosesInfo); 1229 break; 1230 default: 1231 return UnableToLegalize; 1232 } 1233 1234 assert(!LosesInfo && "extend should always be lossless"); 1235 1236 Observer.changingInstr(MI); 1237 SrcMO.setFPImm(ConstantFP::get(Ctx, Val)); 1238 1239 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); 1240 Observer.changedInstr(MI); 1241 return Legalized; 1242 } 1243 case TargetOpcode::G_IMPLICIT_DEF: { 1244 Observer.changingInstr(MI); 1245 widenScalarDst(MI, WideTy); 1246 Observer.changedInstr(MI); 1247 return Legalized; 1248 } 1249 case TargetOpcode::G_BRCOND: 1250 Observer.changingInstr(MI); 1251 widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false)); 1252 Observer.changedInstr(MI); 1253 return Legalized; 1254 1255 case TargetOpcode::G_FCMP: 1256 Observer.changingInstr(MI); 1257 if (TypeIdx == 0) 1258 widenScalarDst(MI, WideTy); 1259 else { 1260 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT); 1261 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT); 1262 } 1263 Observer.changedInstr(MI); 1264 return Legalized; 1265 1266 case TargetOpcode::G_ICMP: 1267 Observer.changingInstr(MI); 1268 if (TypeIdx == 0) 1269 widenScalarDst(MI, WideTy); 1270 else { 1271 unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>( 1272 MI.getOperand(1).getPredicate())) 1273 ? TargetOpcode::G_SEXT 1274 : TargetOpcode::G_ZEXT; 1275 widenScalarSrc(MI, WideTy, 2, ExtOpcode); 1276 widenScalarSrc(MI, WideTy, 3, ExtOpcode); 1277 } 1278 Observer.changedInstr(MI); 1279 return Legalized; 1280 1281 case TargetOpcode::G_GEP: 1282 assert(TypeIdx == 1 && "unable to legalize pointer of GEP"); 1283 Observer.changingInstr(MI); 1284 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 1285 Observer.changedInstr(MI); 1286 return Legalized; 1287 1288 case TargetOpcode::G_PHI: { 1289 assert(TypeIdx == 0 && "Expecting only Idx 0"); 1290 1291 Observer.changingInstr(MI); 1292 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) { 1293 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB(); 1294 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); 1295 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT); 1296 } 1297 1298 MachineBasicBlock &MBB = *MI.getParent(); 1299 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI()); 1300 widenScalarDst(MI, WideTy); 1301 Observer.changedInstr(MI); 1302 return Legalized; 1303 } 1304 case TargetOpcode::G_EXTRACT_VECTOR_ELT: { 1305 if (TypeIdx == 0) { 1306 unsigned VecReg = MI.getOperand(1).getReg(); 1307 LLT VecTy = MRI.getType(VecReg); 1308 Observer.changingInstr(MI); 1309 1310 widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(), 1311 WideTy.getSizeInBits()), 1312 1, TargetOpcode::G_SEXT); 1313 1314 widenScalarDst(MI, WideTy, 0); 1315 Observer.changedInstr(MI); 1316 return Legalized; 1317 } 1318 1319 if (TypeIdx != 2) 1320 return UnableToLegalize; 1321 Observer.changingInstr(MI); 1322 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 1323 Observer.changedInstr(MI); 1324 return Legalized; 1325 } 1326 case TargetOpcode::G_FADD: 1327 case TargetOpcode::G_FMUL: 1328 case TargetOpcode::G_FSUB: 1329 case TargetOpcode::G_FMA: 1330 case TargetOpcode::G_FNEG: 1331 case TargetOpcode::G_FABS: 1332 case TargetOpcode::G_FCANONICALIZE: 1333 case TargetOpcode::G_FDIV: 1334 case TargetOpcode::G_FREM: 1335 case TargetOpcode::G_FCEIL: 1336 case TargetOpcode::G_FFLOOR: 1337 case TargetOpcode::G_FCOS: 1338 case TargetOpcode::G_FSIN: 1339 case TargetOpcode::G_FLOG10: 1340 case TargetOpcode::G_FLOG: 1341 case TargetOpcode::G_FLOG2: 1342 case TargetOpcode::G_FSQRT: 1343 case TargetOpcode::G_FEXP: 1344 assert(TypeIdx == 0); 1345 Observer.changingInstr(MI); 1346 1347 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) 1348 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT); 1349 1350 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); 1351 Observer.changedInstr(MI); 1352 return Legalized; 1353 case TargetOpcode::G_INTTOPTR: 1354 if (TypeIdx != 1) 1355 return UnableToLegalize; 1356 1357 Observer.changingInstr(MI); 1358 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 1359 Observer.changedInstr(MI); 1360 return Legalized; 1361 case TargetOpcode::G_PTRTOINT: 1362 if (TypeIdx != 0) 1363 return UnableToLegalize; 1364 1365 Observer.changingInstr(MI); 1366 widenScalarDst(MI, WideTy, 0); 1367 Observer.changedInstr(MI); 1368 return Legalized; 1369 } 1370 } 1371 1372 LegalizerHelper::LegalizeResult 1373 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { 1374 using namespace TargetOpcode; 1375 MIRBuilder.setInstr(MI); 1376 1377 switch(MI.getOpcode()) { 1378 default: 1379 return UnableToLegalize; 1380 case TargetOpcode::G_SREM: 1381 case TargetOpcode::G_UREM: { 1382 unsigned QuotReg = MRI.createGenericVirtualRegister(Ty); 1383 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV) 1384 .addDef(QuotReg) 1385 .addUse(MI.getOperand(1).getReg()) 1386 .addUse(MI.getOperand(2).getReg()); 1387 1388 unsigned ProdReg = MRI.createGenericVirtualRegister(Ty); 1389 MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg()); 1390 MIRBuilder.buildSub(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(), 1391 ProdReg); 1392 MI.eraseFromParent(); 1393 return Legalized; 1394 } 1395 case TargetOpcode::G_SMULO: 1396 case TargetOpcode::G_UMULO: { 1397 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the 1398 // result. 1399 unsigned Res = MI.getOperand(0).getReg(); 1400 unsigned Overflow = MI.getOperand(1).getReg(); 1401 unsigned LHS = MI.getOperand(2).getReg(); 1402 unsigned RHS = MI.getOperand(3).getReg(); 1403 1404 MIRBuilder.buildMul(Res, LHS, RHS); 1405 1406 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO 1407 ? TargetOpcode::G_SMULH 1408 : TargetOpcode::G_UMULH; 1409 1410 unsigned HiPart = MRI.createGenericVirtualRegister(Ty); 1411 MIRBuilder.buildInstr(Opcode) 1412 .addDef(HiPart) 1413 .addUse(LHS) 1414 .addUse(RHS); 1415 1416 unsigned Zero = MRI.createGenericVirtualRegister(Ty); 1417 MIRBuilder.buildConstant(Zero, 0); 1418 1419 // For *signed* multiply, overflow is detected by checking: 1420 // (hi != (lo >> bitwidth-1)) 1421 if (Opcode == TargetOpcode::G_SMULH) { 1422 unsigned Shifted = MRI.createGenericVirtualRegister(Ty); 1423 unsigned ShiftAmt = MRI.createGenericVirtualRegister(Ty); 1424 MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1); 1425 MIRBuilder.buildInstr(TargetOpcode::G_ASHR) 1426 .addDef(Shifted) 1427 .addUse(Res) 1428 .addUse(ShiftAmt); 1429 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted); 1430 } else { 1431 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero); 1432 } 1433 MI.eraseFromParent(); 1434 return Legalized; 1435 } 1436 case TargetOpcode::G_FNEG: { 1437 // TODO: Handle vector types once we are able to 1438 // represent them. 1439 if (Ty.isVector()) 1440 return UnableToLegalize; 1441 unsigned Res = MI.getOperand(0).getReg(); 1442 Type *ZeroTy; 1443 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 1444 switch (Ty.getSizeInBits()) { 1445 case 16: 1446 ZeroTy = Type::getHalfTy(Ctx); 1447 break; 1448 case 32: 1449 ZeroTy = Type::getFloatTy(Ctx); 1450 break; 1451 case 64: 1452 ZeroTy = Type::getDoubleTy(Ctx); 1453 break; 1454 case 128: 1455 ZeroTy = Type::getFP128Ty(Ctx); 1456 break; 1457 default: 1458 llvm_unreachable("unexpected floating-point type"); 1459 } 1460 ConstantFP &ZeroForNegation = 1461 *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy)); 1462 auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation); 1463 MIRBuilder.buildInstr(TargetOpcode::G_FSUB) 1464 .addDef(Res) 1465 .addUse(Zero->getOperand(0).getReg()) 1466 .addUse(MI.getOperand(1).getReg()); 1467 MI.eraseFromParent(); 1468 return Legalized; 1469 } 1470 case TargetOpcode::G_FSUB: { 1471 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)). 1472 // First, check if G_FNEG is marked as Lower. If so, we may 1473 // end up with an infinite loop as G_FSUB is used to legalize G_FNEG. 1474 if (LI.getAction({G_FNEG, {Ty}}).Action == Lower) 1475 return UnableToLegalize; 1476 unsigned Res = MI.getOperand(0).getReg(); 1477 unsigned LHS = MI.getOperand(1).getReg(); 1478 unsigned RHS = MI.getOperand(2).getReg(); 1479 unsigned Neg = MRI.createGenericVirtualRegister(Ty); 1480 MIRBuilder.buildInstr(TargetOpcode::G_FNEG).addDef(Neg).addUse(RHS); 1481 MIRBuilder.buildInstr(TargetOpcode::G_FADD) 1482 .addDef(Res) 1483 .addUse(LHS) 1484 .addUse(Neg); 1485 MI.eraseFromParent(); 1486 return Legalized; 1487 } 1488 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: { 1489 unsigned OldValRes = MI.getOperand(0).getReg(); 1490 unsigned SuccessRes = MI.getOperand(1).getReg(); 1491 unsigned Addr = MI.getOperand(2).getReg(); 1492 unsigned CmpVal = MI.getOperand(3).getReg(); 1493 unsigned NewVal = MI.getOperand(4).getReg(); 1494 MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal, 1495 **MI.memoperands_begin()); 1496 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal); 1497 MI.eraseFromParent(); 1498 return Legalized; 1499 } 1500 case TargetOpcode::G_LOAD: 1501 case TargetOpcode::G_SEXTLOAD: 1502 case TargetOpcode::G_ZEXTLOAD: { 1503 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT 1504 unsigned DstReg = MI.getOperand(0).getReg(); 1505 unsigned PtrReg = MI.getOperand(1).getReg(); 1506 LLT DstTy = MRI.getType(DstReg); 1507 auto &MMO = **MI.memoperands_begin(); 1508 1509 if (DstTy.getSizeInBits() == MMO.getSize() /* in bytes */ * 8) { 1510 // In the case of G_LOAD, this was a non-extending load already and we're 1511 // about to lower to the same instruction. 1512 if (MI.getOpcode() == TargetOpcode::G_LOAD) 1513 return UnableToLegalize; 1514 MIRBuilder.buildLoad(DstReg, PtrReg, MMO); 1515 MI.eraseFromParent(); 1516 return Legalized; 1517 } 1518 1519 if (DstTy.isScalar()) { 1520 unsigned TmpReg = MRI.createGenericVirtualRegister( 1521 LLT::scalar(MMO.getSize() /* in bytes */ * 8)); 1522 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); 1523 switch (MI.getOpcode()) { 1524 default: 1525 llvm_unreachable("Unexpected opcode"); 1526 case TargetOpcode::G_LOAD: 1527 MIRBuilder.buildAnyExt(DstReg, TmpReg); 1528 break; 1529 case TargetOpcode::G_SEXTLOAD: 1530 MIRBuilder.buildSExt(DstReg, TmpReg); 1531 break; 1532 case TargetOpcode::G_ZEXTLOAD: 1533 MIRBuilder.buildZExt(DstReg, TmpReg); 1534 break; 1535 } 1536 MI.eraseFromParent(); 1537 return Legalized; 1538 } 1539 1540 return UnableToLegalize; 1541 } 1542 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 1543 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 1544 case TargetOpcode::G_CTLZ: 1545 case TargetOpcode::G_CTTZ: 1546 case TargetOpcode::G_CTPOP: 1547 return lowerBitCount(MI, TypeIdx, Ty); 1548 case G_UADDE: { 1549 unsigned Res = MI.getOperand(0).getReg(); 1550 unsigned CarryOut = MI.getOperand(1).getReg(); 1551 unsigned LHS = MI.getOperand(2).getReg(); 1552 unsigned RHS = MI.getOperand(3).getReg(); 1553 unsigned CarryIn = MI.getOperand(4).getReg(); 1554 1555 unsigned TmpRes = MRI.createGenericVirtualRegister(Ty); 1556 unsigned ZExtCarryIn = MRI.createGenericVirtualRegister(Ty); 1557 1558 MIRBuilder.buildAdd(TmpRes, LHS, RHS); 1559 MIRBuilder.buildZExt(ZExtCarryIn, CarryIn); 1560 MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn); 1561 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS); 1562 1563 MI.eraseFromParent(); 1564 return Legalized; 1565 } 1566 case G_USUBO: { 1567 unsigned Res = MI.getOperand(0).getReg(); 1568 unsigned BorrowOut = MI.getOperand(1).getReg(); 1569 unsigned LHS = MI.getOperand(2).getReg(); 1570 unsigned RHS = MI.getOperand(3).getReg(); 1571 1572 MIRBuilder.buildSub(Res, LHS, RHS); 1573 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS); 1574 1575 MI.eraseFromParent(); 1576 return Legalized; 1577 } 1578 case G_USUBE: { 1579 unsigned Res = MI.getOperand(0).getReg(); 1580 unsigned BorrowOut = MI.getOperand(1).getReg(); 1581 unsigned LHS = MI.getOperand(2).getReg(); 1582 unsigned RHS = MI.getOperand(3).getReg(); 1583 unsigned BorrowIn = MI.getOperand(4).getReg(); 1584 1585 unsigned TmpRes = MRI.createGenericVirtualRegister(Ty); 1586 unsigned ZExtBorrowIn = MRI.createGenericVirtualRegister(Ty); 1587 unsigned LHS_EQ_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1)); 1588 unsigned LHS_ULT_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1)); 1589 1590 MIRBuilder.buildSub(TmpRes, LHS, RHS); 1591 MIRBuilder.buildZExt(ZExtBorrowIn, BorrowIn); 1592 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn); 1593 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LHS_EQ_RHS, LHS, RHS); 1594 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, LHS_ULT_RHS, LHS, RHS); 1595 MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS); 1596 1597 MI.eraseFromParent(); 1598 return Legalized; 1599 } 1600 } 1601 } 1602 1603 LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef( 1604 MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { 1605 SmallVector<unsigned, 2> DstRegs; 1606 1607 unsigned NarrowSize = NarrowTy.getSizeInBits(); 1608 unsigned DstReg = MI.getOperand(0).getReg(); 1609 unsigned Size = MRI.getType(DstReg).getSizeInBits(); 1610 int NumParts = Size / NarrowSize; 1611 // FIXME: Don't know how to handle the situation where the small vectors 1612 // aren't all the same size yet. 1613 if (Size % NarrowSize != 0) 1614 return UnableToLegalize; 1615 1616 for (int i = 0; i < NumParts; ++i) { 1617 unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy); 1618 MIRBuilder.buildUndef(TmpReg); 1619 DstRegs.push_back(TmpReg); 1620 } 1621 1622 if (NarrowTy.isVector()) 1623 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 1624 else 1625 MIRBuilder.buildBuildVector(DstReg, DstRegs); 1626 1627 MI.eraseFromParent(); 1628 return Legalized; 1629 } 1630 1631 LegalizerHelper::LegalizeResult 1632 LegalizerHelper::fewerElementsVectorBasic(MachineInstr &MI, unsigned TypeIdx, 1633 LLT NarrowTy) { 1634 const unsigned Opc = MI.getOpcode(); 1635 const unsigned NumOps = MI.getNumOperands() - 1; 1636 const unsigned NarrowSize = NarrowTy.getSizeInBits(); 1637 const unsigned DstReg = MI.getOperand(0).getReg(); 1638 const unsigned Flags = MI.getFlags(); 1639 const LLT DstTy = MRI.getType(DstReg); 1640 const unsigned Size = DstTy.getSizeInBits(); 1641 const int NumParts = Size / NarrowSize; 1642 const LLT EltTy = DstTy.getElementType(); 1643 const unsigned EltSize = EltTy.getSizeInBits(); 1644 const unsigned BitsForNumParts = NarrowSize * NumParts; 1645 1646 // Check if we have any leftovers. If we do, then only handle the case where 1647 // the leftover is one element. 1648 if (BitsForNumParts != Size && BitsForNumParts + EltSize != Size) 1649 return UnableToLegalize; 1650 1651 if (BitsForNumParts != Size) { 1652 unsigned AccumDstReg = MRI.createGenericVirtualRegister(DstTy); 1653 MIRBuilder.buildUndef(AccumDstReg); 1654 1655 // Handle the pieces which evenly divide into the requested type with 1656 // extract/op/insert sequence. 1657 for (unsigned Offset = 0; Offset < BitsForNumParts; Offset += NarrowSize) { 1658 SmallVector<SrcOp, 4> SrcOps; 1659 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { 1660 unsigned PartOpReg = MRI.createGenericVirtualRegister(NarrowTy); 1661 MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(), Offset); 1662 SrcOps.push_back(PartOpReg); 1663 } 1664 1665 unsigned PartDstReg = MRI.createGenericVirtualRegister(NarrowTy); 1666 MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags); 1667 1668 unsigned PartInsertReg = MRI.createGenericVirtualRegister(DstTy); 1669 MIRBuilder.buildInsert(PartInsertReg, AccumDstReg, PartDstReg, Offset); 1670 AccumDstReg = PartInsertReg; 1671 } 1672 1673 // Handle the remaining element sized leftover piece. 1674 SmallVector<SrcOp, 4> SrcOps; 1675 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { 1676 unsigned PartOpReg = MRI.createGenericVirtualRegister(EltTy); 1677 MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(), 1678 BitsForNumParts); 1679 SrcOps.push_back(PartOpReg); 1680 } 1681 1682 unsigned PartDstReg = MRI.createGenericVirtualRegister(EltTy); 1683 MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags); 1684 MIRBuilder.buildInsert(DstReg, AccumDstReg, PartDstReg, BitsForNumParts); 1685 MI.eraseFromParent(); 1686 1687 return Legalized; 1688 } 1689 1690 SmallVector<unsigned, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs; 1691 1692 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src0Regs); 1693 1694 if (NumOps >= 2) 1695 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src1Regs); 1696 1697 if (NumOps >= 3) 1698 extractParts(MI.getOperand(3).getReg(), NarrowTy, NumParts, Src2Regs); 1699 1700 for (int i = 0; i < NumParts; ++i) { 1701 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 1702 1703 if (NumOps == 1) 1704 MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i]}, Flags); 1705 else if (NumOps == 2) { 1706 MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i], Src1Regs[i]}, Flags); 1707 } else if (NumOps == 3) { 1708 MIRBuilder.buildInstr(Opc, {DstReg}, 1709 {Src0Regs[i], Src1Regs[i], Src2Regs[i]}, Flags); 1710 } 1711 1712 DstRegs.push_back(DstReg); 1713 } 1714 1715 if (NarrowTy.isVector()) 1716 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 1717 else 1718 MIRBuilder.buildBuildVector(DstReg, DstRegs); 1719 1720 MI.eraseFromParent(); 1721 return Legalized; 1722 } 1723 1724 // Handle splitting vector operations which need to have the same number of 1725 // elements in each type index, but each type index may have a different element 1726 // type. 1727 // 1728 // e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> -> 1729 // <2 x s64> = G_SHL <2 x s64>, <2 x s32> 1730 // <2 x s64> = G_SHL <2 x s64>, <2 x s32> 1731 // 1732 // Also handles some irregular breakdown cases, e.g. 1733 // e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> -> 1734 // <2 x s64> = G_SHL <2 x s64>, <2 x s32> 1735 // s64 = G_SHL s64, s32 1736 LegalizerHelper::LegalizeResult 1737 LegalizerHelper::fewerElementsVectorMultiEltType( 1738 MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) { 1739 if (TypeIdx != 0) 1740 return UnableToLegalize; 1741 1742 const LLT NarrowTy0 = NarrowTyArg; 1743 const unsigned NewNumElts = 1744 NarrowTy0.isVector() ? NarrowTy0.getNumElements() : 1; 1745 1746 const unsigned DstReg = MI.getOperand(0).getReg(); 1747 LLT DstTy = MRI.getType(DstReg); 1748 LLT LeftoverTy0; 1749 1750 // All of the operands need to have the same number of elements, so if we can 1751 // determine a type breakdown for the result type, we can for all of the 1752 // source types. 1753 int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0); 1754 if (NumParts < 0) 1755 return UnableToLegalize; 1756 1757 SmallVector<MachineInstrBuilder, 4> NewInsts; 1758 1759 SmallVector<unsigned, 4> DstRegs, LeftoverDstRegs; 1760 SmallVector<unsigned, 4> PartRegs, LeftoverRegs; 1761 1762 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { 1763 LLT LeftoverTy; 1764 unsigned SrcReg = MI.getOperand(I).getReg(); 1765 LLT SrcTyI = MRI.getType(SrcReg); 1766 LLT NarrowTyI = LLT::scalarOrVector(NewNumElts, SrcTyI.getScalarType()); 1767 LLT LeftoverTyI; 1768 1769 // Split this operand into the requested typed registers, and any leftover 1770 // required to reproduce the original type. 1771 if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs, 1772 LeftoverRegs)) 1773 return UnableToLegalize; 1774 1775 if (I == 1) { 1776 // For the first operand, create an instruction for each part and setup 1777 // the result. 1778 for (unsigned PartReg : PartRegs) { 1779 unsigned PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0); 1780 NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode()) 1781 .addDef(PartDstReg) 1782 .addUse(PartReg)); 1783 DstRegs.push_back(PartDstReg); 1784 } 1785 1786 for (unsigned LeftoverReg : LeftoverRegs) { 1787 unsigned PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0); 1788 NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode()) 1789 .addDef(PartDstReg) 1790 .addUse(LeftoverReg)); 1791 LeftoverDstRegs.push_back(PartDstReg); 1792 } 1793 } else { 1794 assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size()); 1795 1796 // Add the newly created operand splits to the existing instructions. The 1797 // odd-sized pieces are ordered after the requested NarrowTyArg sized 1798 // pieces. 1799 unsigned InstCount = 0; 1800 for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J) 1801 NewInsts[InstCount++].addUse(PartRegs[J]); 1802 for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J) 1803 NewInsts[InstCount++].addUse(LeftoverRegs[J]); 1804 } 1805 1806 PartRegs.clear(); 1807 LeftoverRegs.clear(); 1808 } 1809 1810 // Insert the newly built operations and rebuild the result register. 1811 for (auto &MIB : NewInsts) 1812 MIRBuilder.insertInstr(MIB); 1813 1814 insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs); 1815 1816 MI.eraseFromParent(); 1817 return Legalized; 1818 } 1819 1820 LegalizerHelper::LegalizeResult 1821 LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx, 1822 LLT NarrowTy) { 1823 if (TypeIdx != 0) 1824 return UnableToLegalize; 1825 1826 unsigned DstReg = MI.getOperand(0).getReg(); 1827 unsigned SrcReg = MI.getOperand(1).getReg(); 1828 LLT DstTy = MRI.getType(DstReg); 1829 LLT SrcTy = MRI.getType(SrcReg); 1830 1831 LLT NarrowTy0 = NarrowTy; 1832 LLT NarrowTy1; 1833 unsigned NumParts; 1834 1835 if (NarrowTy.isVector()) { 1836 // Uneven breakdown not handled. 1837 NumParts = DstTy.getNumElements() / NarrowTy.getNumElements(); 1838 if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements()) 1839 return UnableToLegalize; 1840 1841 NarrowTy1 = LLT::vector(NumParts, SrcTy.getElementType().getSizeInBits()); 1842 } else { 1843 NumParts = DstTy.getNumElements(); 1844 NarrowTy1 = SrcTy.getElementType(); 1845 } 1846 1847 SmallVector<unsigned, 4> SrcRegs, DstRegs; 1848 extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs); 1849 1850 for (unsigned I = 0; I < NumParts; ++I) { 1851 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy0); 1852 MachineInstr *NewInst = MIRBuilder.buildInstr(MI.getOpcode()) 1853 .addDef(DstReg) 1854 .addUse(SrcRegs[I]); 1855 1856 NewInst->setFlags(MI.getFlags()); 1857 DstRegs.push_back(DstReg); 1858 } 1859 1860 if (NarrowTy.isVector()) 1861 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 1862 else 1863 MIRBuilder.buildBuildVector(DstReg, DstRegs); 1864 1865 MI.eraseFromParent(); 1866 return Legalized; 1867 } 1868 1869 LegalizerHelper::LegalizeResult 1870 LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx, 1871 LLT NarrowTy) { 1872 unsigned DstReg = MI.getOperand(0).getReg(); 1873 unsigned Src0Reg = MI.getOperand(2).getReg(); 1874 LLT DstTy = MRI.getType(DstReg); 1875 LLT SrcTy = MRI.getType(Src0Reg); 1876 1877 unsigned NumParts; 1878 LLT NarrowTy0, NarrowTy1; 1879 1880 if (TypeIdx == 0) { 1881 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; 1882 unsigned OldElts = DstTy.getNumElements(); 1883 1884 NarrowTy0 = NarrowTy; 1885 NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements(); 1886 NarrowTy1 = NarrowTy.isVector() ? 1887 LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) : 1888 SrcTy.getElementType(); 1889 1890 } else { 1891 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; 1892 unsigned OldElts = SrcTy.getNumElements(); 1893 1894 NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : 1895 NarrowTy.getNumElements(); 1896 NarrowTy0 = LLT::vector(NarrowTy.getNumElements(), 1897 DstTy.getScalarSizeInBits()); 1898 NarrowTy1 = NarrowTy; 1899 } 1900 1901 // FIXME: Don't know how to handle the situation where the small vectors 1902 // aren't all the same size yet. 1903 if (NarrowTy1.isVector() && 1904 NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements()) 1905 return UnableToLegalize; 1906 1907 CmpInst::Predicate Pred 1908 = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); 1909 1910 SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs; 1911 extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs); 1912 extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs); 1913 1914 for (unsigned I = 0; I < NumParts; ++I) { 1915 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy0); 1916 DstRegs.push_back(DstReg); 1917 1918 if (MI.getOpcode() == TargetOpcode::G_ICMP) 1919 MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]); 1920 else { 1921 MachineInstr *NewCmp 1922 = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]); 1923 NewCmp->setFlags(MI.getFlags()); 1924 } 1925 } 1926 1927 if (NarrowTy1.isVector()) 1928 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 1929 else 1930 MIRBuilder.buildBuildVector(DstReg, DstRegs); 1931 1932 MI.eraseFromParent(); 1933 return Legalized; 1934 } 1935 1936 LegalizerHelper::LegalizeResult 1937 LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx, 1938 LLT NarrowTy) { 1939 unsigned DstReg = MI.getOperand(0).getReg(); 1940 unsigned CondReg = MI.getOperand(1).getReg(); 1941 1942 unsigned NumParts = 0; 1943 LLT NarrowTy0, NarrowTy1; 1944 1945 LLT DstTy = MRI.getType(DstReg); 1946 LLT CondTy = MRI.getType(CondReg); 1947 unsigned Size = DstTy.getSizeInBits(); 1948 1949 assert(TypeIdx == 0 || CondTy.isVector()); 1950 1951 if (TypeIdx == 0) { 1952 NarrowTy0 = NarrowTy; 1953 NarrowTy1 = CondTy; 1954 1955 unsigned NarrowSize = NarrowTy0.getSizeInBits(); 1956 // FIXME: Don't know how to handle the situation where the small vectors 1957 // aren't all the same size yet. 1958 if (Size % NarrowSize != 0) 1959 return UnableToLegalize; 1960 1961 NumParts = Size / NarrowSize; 1962 1963 // Need to break down the condition type 1964 if (CondTy.isVector()) { 1965 if (CondTy.getNumElements() == NumParts) 1966 NarrowTy1 = CondTy.getElementType(); 1967 else 1968 NarrowTy1 = LLT::vector(CondTy.getNumElements() / NumParts, 1969 CondTy.getScalarSizeInBits()); 1970 } 1971 } else { 1972 NumParts = CondTy.getNumElements(); 1973 if (NarrowTy.isVector()) { 1974 // TODO: Handle uneven breakdown. 1975 if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements()) 1976 return UnableToLegalize; 1977 1978 return UnableToLegalize; 1979 } else { 1980 NarrowTy0 = DstTy.getElementType(); 1981 NarrowTy1 = NarrowTy; 1982 } 1983 } 1984 1985 SmallVector<unsigned, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs; 1986 if (CondTy.isVector()) 1987 extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs); 1988 1989 extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs); 1990 extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs); 1991 1992 for (unsigned i = 0; i < NumParts; ++i) { 1993 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy0); 1994 MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg, 1995 Src1Regs[i], Src2Regs[i]); 1996 DstRegs.push_back(DstReg); 1997 } 1998 1999 if (NarrowTy0.isVector()) 2000 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 2001 else 2002 MIRBuilder.buildBuildVector(DstReg, DstRegs); 2003 2004 MI.eraseFromParent(); 2005 return Legalized; 2006 } 2007 2008 LegalizerHelper::LegalizeResult 2009 LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, 2010 LLT NarrowTy) { 2011 // FIXME: Don't know how to handle secondary types yet. 2012 if (TypeIdx != 0) 2013 return UnableToLegalize; 2014 2015 MachineMemOperand *MMO = *MI.memoperands_begin(); 2016 2017 // This implementation doesn't work for atomics. Give up instead of doing 2018 // something invalid. 2019 if (MMO->getOrdering() != AtomicOrdering::NotAtomic || 2020 MMO->getFailureOrdering() != AtomicOrdering::NotAtomic) 2021 return UnableToLegalize; 2022 2023 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD; 2024 unsigned ValReg = MI.getOperand(0).getReg(); 2025 unsigned AddrReg = MI.getOperand(1).getReg(); 2026 LLT ValTy = MRI.getType(ValReg); 2027 2028 int NumParts = -1; 2029 LLT LeftoverTy; 2030 SmallVector<unsigned, 8> NarrowRegs, NarrowLeftoverRegs; 2031 if (IsLoad) { 2032 NumParts = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy); 2033 } else { 2034 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs, 2035 NarrowLeftoverRegs)) 2036 NumParts = NarrowRegs.size(); 2037 } 2038 2039 if (NumParts == -1) 2040 return UnableToLegalize; 2041 2042 const LLT OffsetTy = LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits()); 2043 2044 unsigned TotalSize = ValTy.getSizeInBits(); 2045 2046 // Split the load/store into PartTy sized pieces starting at Offset. If this 2047 // is a load, return the new registers in ValRegs. For a store, each elements 2048 // of ValRegs should be PartTy. Returns the next offset that needs to be 2049 // handled. 2050 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<unsigned> &ValRegs, 2051 unsigned Offset) -> unsigned { 2052 MachineFunction &MF = MIRBuilder.getMF(); 2053 unsigned PartSize = PartTy.getSizeInBits(); 2054 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize; 2055 Offset += PartSize, ++Idx) { 2056 unsigned ByteSize = PartSize / 8; 2057 unsigned ByteOffset = Offset / 8; 2058 unsigned NewAddrReg = 0; 2059 2060 MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, ByteOffset); 2061 2062 MachineMemOperand *NewMMO = 2063 MF.getMachineMemOperand(MMO, ByteOffset, ByteSize); 2064 2065 if (IsLoad) { 2066 unsigned Dst = MRI.createGenericVirtualRegister(PartTy); 2067 ValRegs.push_back(Dst); 2068 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO); 2069 } else { 2070 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO); 2071 } 2072 } 2073 2074 return Offset; 2075 }; 2076 2077 unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0); 2078 2079 // Handle the rest of the register if this isn't an even type breakdown. 2080 if (LeftoverTy.isValid()) 2081 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset); 2082 2083 if (IsLoad) { 2084 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs, 2085 LeftoverTy, NarrowLeftoverRegs); 2086 } 2087 2088 MI.eraseFromParent(); 2089 return Legalized; 2090 } 2091 2092 LegalizerHelper::LegalizeResult 2093 LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, 2094 LLT NarrowTy) { 2095 using namespace TargetOpcode; 2096 2097 MIRBuilder.setInstr(MI); 2098 switch (MI.getOpcode()) { 2099 case G_IMPLICIT_DEF: 2100 return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy); 2101 case G_AND: 2102 case G_OR: 2103 case G_XOR: 2104 case G_ADD: 2105 case G_SUB: 2106 case G_MUL: 2107 case G_SMULH: 2108 case G_UMULH: 2109 case G_FADD: 2110 case G_FMUL: 2111 case G_FSUB: 2112 case G_FNEG: 2113 case G_FABS: 2114 case G_FCANONICALIZE: 2115 case G_FDIV: 2116 case G_FREM: 2117 case G_FMA: 2118 case G_FPOW: 2119 case G_FEXP: 2120 case G_FEXP2: 2121 case G_FLOG: 2122 case G_FLOG2: 2123 case G_FLOG10: 2124 case G_FCEIL: 2125 case G_FFLOOR: 2126 case G_INTRINSIC_ROUND: 2127 case G_INTRINSIC_TRUNC: 2128 case G_FCOS: 2129 case G_FSIN: 2130 case G_FSQRT: 2131 case G_BSWAP: 2132 case G_CTLZ: 2133 return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy); 2134 case G_SHL: 2135 case G_LSHR: 2136 case G_ASHR: 2137 return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy); 2138 case G_ZEXT: 2139 case G_SEXT: 2140 case G_ANYEXT: 2141 case G_FPEXT: 2142 case G_FPTRUNC: 2143 case G_SITOFP: 2144 case G_UITOFP: 2145 case G_FPTOSI: 2146 case G_FPTOUI: 2147 case G_INTTOPTR: 2148 case G_PTRTOINT: 2149 case G_ADDRSPACE_CAST: 2150 return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy); 2151 case G_ICMP: 2152 case G_FCMP: 2153 return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy); 2154 case G_SELECT: 2155 return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy); 2156 case G_LOAD: 2157 case G_STORE: 2158 return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy); 2159 default: 2160 return UnableToLegalize; 2161 } 2162 } 2163 2164 LegalizerHelper::LegalizeResult 2165 LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, 2166 const LLT HalfTy, const LLT AmtTy) { 2167 2168 unsigned InL = MRI.createGenericVirtualRegister(HalfTy); 2169 unsigned InH = MRI.createGenericVirtualRegister(HalfTy); 2170 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg()); 2171 2172 if (Amt.isNullValue()) { 2173 MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {InL, InH}); 2174 MI.eraseFromParent(); 2175 return Legalized; 2176 } 2177 2178 LLT NVT = HalfTy; 2179 unsigned NVTBits = HalfTy.getSizeInBits(); 2180 unsigned VTBits = 2 * NVTBits; 2181 2182 SrcOp Lo(0), Hi(0); 2183 if (MI.getOpcode() == TargetOpcode::G_SHL) { 2184 if (Amt.ugt(VTBits)) { 2185 Lo = Hi = MIRBuilder.buildConstant(NVT, 0); 2186 } else if (Amt.ugt(NVTBits)) { 2187 Lo = MIRBuilder.buildConstant(NVT, 0); 2188 Hi = MIRBuilder.buildShl(NVT, InL, 2189 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits)); 2190 } else if (Amt == NVTBits) { 2191 Lo = MIRBuilder.buildConstant(NVT, 0); 2192 Hi = InL; 2193 } else { 2194 Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt)); 2195 auto OrLHS = 2196 MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt)); 2197 auto OrRHS = MIRBuilder.buildLShr( 2198 NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits)); 2199 Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS); 2200 } 2201 } else if (MI.getOpcode() == TargetOpcode::G_LSHR) { 2202 if (Amt.ugt(VTBits)) { 2203 Lo = Hi = MIRBuilder.buildConstant(NVT, 0); 2204 } else if (Amt.ugt(NVTBits)) { 2205 Lo = MIRBuilder.buildLShr(NVT, InH, 2206 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits)); 2207 Hi = MIRBuilder.buildConstant(NVT, 0); 2208 } else if (Amt == NVTBits) { 2209 Lo = InH; 2210 Hi = MIRBuilder.buildConstant(NVT, 0); 2211 } else { 2212 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt); 2213 2214 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst); 2215 auto OrRHS = MIRBuilder.buildShl( 2216 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits)); 2217 2218 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS); 2219 Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst); 2220 } 2221 } else { 2222 if (Amt.ugt(VTBits)) { 2223 Hi = Lo = MIRBuilder.buildAShr( 2224 NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1)); 2225 } else if (Amt.ugt(NVTBits)) { 2226 Lo = MIRBuilder.buildAShr(NVT, InH, 2227 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits)); 2228 Hi = MIRBuilder.buildAShr(NVT, InH, 2229 MIRBuilder.buildConstant(AmtTy, NVTBits - 1)); 2230 } else if (Amt == NVTBits) { 2231 Lo = InH; 2232 Hi = MIRBuilder.buildAShr(NVT, InH, 2233 MIRBuilder.buildConstant(AmtTy, NVTBits - 1)); 2234 } else { 2235 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt); 2236 2237 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst); 2238 auto OrRHS = MIRBuilder.buildShl( 2239 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits)); 2240 2241 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS); 2242 Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst); 2243 } 2244 } 2245 2246 MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {Lo.getReg(), Hi.getReg()}); 2247 MI.eraseFromParent(); 2248 2249 return Legalized; 2250 } 2251 2252 // TODO: Optimize if constant shift amount. 2253 LegalizerHelper::LegalizeResult 2254 LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, 2255 LLT RequestedTy) { 2256 if (TypeIdx == 1) { 2257 Observer.changingInstr(MI); 2258 narrowScalarSrc(MI, RequestedTy, 2); 2259 Observer.changedInstr(MI); 2260 return Legalized; 2261 } 2262 2263 unsigned DstReg = MI.getOperand(0).getReg(); 2264 LLT DstTy = MRI.getType(DstReg); 2265 if (DstTy.isVector()) 2266 return UnableToLegalize; 2267 2268 unsigned Amt = MI.getOperand(2).getReg(); 2269 LLT ShiftAmtTy = MRI.getType(Amt); 2270 const unsigned DstEltSize = DstTy.getScalarSizeInBits(); 2271 if (DstEltSize % 2 != 0) 2272 return UnableToLegalize; 2273 2274 // Ignore the input type. We can only go to exactly half the size of the 2275 // input. If that isn't small enough, the resulting pieces will be further 2276 // legalized. 2277 const unsigned NewBitSize = DstEltSize / 2; 2278 const LLT HalfTy = LLT::scalar(NewBitSize); 2279 const LLT CondTy = LLT::scalar(1); 2280 2281 if (const MachineInstr *KShiftAmt = 2282 getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) { 2283 return narrowScalarShiftByConstant( 2284 MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy); 2285 } 2286 2287 // TODO: Expand with known bits. 2288 2289 // Handle the fully general expansion by an unknown amount. 2290 auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize); 2291 2292 unsigned InL = MRI.createGenericVirtualRegister(HalfTy); 2293 unsigned InH = MRI.createGenericVirtualRegister(HalfTy); 2294 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg()); 2295 2296 auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits); 2297 auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt); 2298 2299 auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0); 2300 auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits); 2301 auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero); 2302 2303 unsigned ResultRegs[2]; 2304 switch (MI.getOpcode()) { 2305 case TargetOpcode::G_SHL: { 2306 // Short: ShAmt < NewBitSize 2307 auto LoS = MIRBuilder.buildShl(HalfTy, InH, Amt); 2308 2309 auto OrLHS = MIRBuilder.buildShl(HalfTy, InH, Amt); 2310 auto OrRHS = MIRBuilder.buildLShr(HalfTy, InL, AmtLack); 2311 auto HiS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS); 2312 2313 // Long: ShAmt >= NewBitSize 2314 auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero. 2315 auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part. 2316 2317 auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL); 2318 auto Hi = MIRBuilder.buildSelect( 2319 HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL)); 2320 2321 ResultRegs[0] = Lo.getReg(0); 2322 ResultRegs[1] = Hi.getReg(0); 2323 break; 2324 } 2325 case TargetOpcode::G_LSHR: { 2326 // Short: ShAmt < NewBitSize 2327 auto HiS = MIRBuilder.buildLShr(HalfTy, InH, Amt); 2328 2329 auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt); 2330 auto OrRHS = MIRBuilder.buildShl(HalfTy, InH, AmtLack); 2331 auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS); 2332 2333 // Long: ShAmt >= NewBitSize 2334 auto HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero. 2335 auto LoL = MIRBuilder.buildLShr(HalfTy, InH, AmtExcess); // Lo from Hi part. 2336 2337 auto Lo = MIRBuilder.buildSelect( 2338 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL)); 2339 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL); 2340 2341 ResultRegs[0] = Lo.getReg(0); 2342 ResultRegs[1] = Hi.getReg(0); 2343 break; 2344 } 2345 case TargetOpcode::G_ASHR: { 2346 // Short: ShAmt < NewBitSize 2347 auto HiS = MIRBuilder.buildAShr(HalfTy, InH, Amt); 2348 2349 auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt); 2350 auto OrRHS = MIRBuilder.buildLShr(HalfTy, InH, AmtLack); 2351 auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS); 2352 2353 // Long: ShAmt >= NewBitSize 2354 2355 // Sign of Hi part. 2356 auto HiL = MIRBuilder.buildAShr( 2357 HalfTy, InH, MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1)); 2358 2359 auto LoL = MIRBuilder.buildAShr(HalfTy, InH, AmtExcess); // Lo from Hi part. 2360 2361 auto Lo = MIRBuilder.buildSelect( 2362 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL)); 2363 2364 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL); 2365 2366 ResultRegs[0] = Lo.getReg(0); 2367 ResultRegs[1] = Hi.getReg(0); 2368 break; 2369 } 2370 default: 2371 llvm_unreachable("not a shift"); 2372 } 2373 2374 MIRBuilder.buildMerge(DstReg, ResultRegs); 2375 MI.eraseFromParent(); 2376 return Legalized; 2377 } 2378 2379 LegalizerHelper::LegalizeResult 2380 LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, 2381 LLT MoreTy) { 2382 MIRBuilder.setInstr(MI); 2383 unsigned Opc = MI.getOpcode(); 2384 switch (Opc) { 2385 case TargetOpcode::G_IMPLICIT_DEF: { 2386 Observer.changingInstr(MI); 2387 moreElementsVectorDst(MI, MoreTy, 0); 2388 Observer.changedInstr(MI); 2389 return Legalized; 2390 } 2391 case TargetOpcode::G_AND: 2392 case TargetOpcode::G_OR: 2393 case TargetOpcode::G_XOR: { 2394 Observer.changingInstr(MI); 2395 moreElementsVectorSrc(MI, MoreTy, 1); 2396 moreElementsVectorSrc(MI, MoreTy, 2); 2397 moreElementsVectorDst(MI, MoreTy, 0); 2398 Observer.changedInstr(MI); 2399 return Legalized; 2400 } 2401 case TargetOpcode::G_EXTRACT: 2402 if (TypeIdx != 1) 2403 return UnableToLegalize; 2404 Observer.changingInstr(MI); 2405 moreElementsVectorSrc(MI, MoreTy, 1); 2406 Observer.changedInstr(MI); 2407 return Legalized; 2408 case TargetOpcode::G_SELECT: 2409 if (TypeIdx != 0) 2410 return UnableToLegalize; 2411 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) 2412 return UnableToLegalize; 2413 2414 Observer.changingInstr(MI); 2415 moreElementsVectorSrc(MI, MoreTy, 2); 2416 moreElementsVectorSrc(MI, MoreTy, 3); 2417 moreElementsVectorDst(MI, MoreTy, 0); 2418 Observer.changedInstr(MI); 2419 return Legalized; 2420 default: 2421 return UnableToLegalize; 2422 } 2423 } 2424 2425 LegalizerHelper::LegalizeResult 2426 LegalizerHelper::narrowScalarMul(MachineInstr &MI, unsigned TypeIdx, LLT NewTy) { 2427 unsigned DstReg = MI.getOperand(0).getReg(); 2428 unsigned Src0 = MI.getOperand(1).getReg(); 2429 unsigned Src1 = MI.getOperand(2).getReg(); 2430 LLT Ty = MRI.getType(DstReg); 2431 if (Ty.isVector()) 2432 return UnableToLegalize; 2433 2434 unsigned Size = Ty.getSizeInBits(); 2435 unsigned NewSize = Size / 2; 2436 if (Size != 2 * NewSize) 2437 return UnableToLegalize; 2438 2439 LLT HalfTy = LLT::scalar(NewSize); 2440 // TODO: if HalfTy != NewTy, handle the breakdown all at once? 2441 2442 unsigned ShiftAmt = MRI.createGenericVirtualRegister(Ty); 2443 unsigned Lo = MRI.createGenericVirtualRegister(HalfTy); 2444 unsigned Hi = MRI.createGenericVirtualRegister(HalfTy); 2445 unsigned ExtLo = MRI.createGenericVirtualRegister(Ty); 2446 unsigned ExtHi = MRI.createGenericVirtualRegister(Ty); 2447 unsigned ShiftedHi = MRI.createGenericVirtualRegister(Ty); 2448 2449 SmallVector<unsigned, 2> Src0Parts; 2450 SmallVector<unsigned, 2> Src1Parts; 2451 2452 extractParts(Src0, HalfTy, 2, Src0Parts); 2453 extractParts(Src1, HalfTy, 2, Src1Parts); 2454 2455 MIRBuilder.buildMul(Lo, Src0Parts[0], Src1Parts[0]); 2456 2457 // TODO: Use smulh or umulh depending on what the target has. 2458 MIRBuilder.buildUMulH(Hi, Src0Parts[1], Src1Parts[1]); 2459 2460 MIRBuilder.buildConstant(ShiftAmt, NewSize); 2461 MIRBuilder.buildAnyExt(ExtHi, Hi); 2462 MIRBuilder.buildShl(ShiftedHi, ExtHi, ShiftAmt); 2463 2464 MIRBuilder.buildZExt(ExtLo, Lo); 2465 MIRBuilder.buildOr(DstReg, ExtLo, ShiftedHi); 2466 MI.eraseFromParent(); 2467 return Legalized; 2468 } 2469 2470 2471 LegalizerHelper::LegalizeResult 2472 LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, 2473 LLT NarrowTy) { 2474 if (TypeIdx != 1) 2475 return UnableToLegalize; 2476 2477 uint64_t NarrowSize = NarrowTy.getSizeInBits(); 2478 2479 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 2480 // FIXME: add support for when SizeOp1 isn't an exact multiple of 2481 // NarrowSize. 2482 if (SizeOp1 % NarrowSize != 0) 2483 return UnableToLegalize; 2484 int NumParts = SizeOp1 / NarrowSize; 2485 2486 SmallVector<unsigned, 2> SrcRegs, DstRegs; 2487 SmallVector<uint64_t, 2> Indexes; 2488 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); 2489 2490 unsigned OpReg = MI.getOperand(0).getReg(); 2491 uint64_t OpStart = MI.getOperand(2).getImm(); 2492 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); 2493 for (int i = 0; i < NumParts; ++i) { 2494 unsigned SrcStart = i * NarrowSize; 2495 2496 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) { 2497 // No part of the extract uses this subregister, ignore it. 2498 continue; 2499 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) { 2500 // The entire subregister is extracted, forward the value. 2501 DstRegs.push_back(SrcRegs[i]); 2502 continue; 2503 } 2504 2505 // OpSegStart is where this destination segment would start in OpReg if it 2506 // extended infinitely in both directions. 2507 int64_t ExtractOffset; 2508 uint64_t SegSize; 2509 if (OpStart < SrcStart) { 2510 ExtractOffset = 0; 2511 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart); 2512 } else { 2513 ExtractOffset = OpStart - SrcStart; 2514 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize); 2515 } 2516 2517 unsigned SegReg = SrcRegs[i]; 2518 if (ExtractOffset != 0 || SegSize != NarrowSize) { 2519 // A genuine extract is needed. 2520 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); 2521 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset); 2522 } 2523 2524 DstRegs.push_back(SegReg); 2525 } 2526 2527 unsigned DstReg = MI.getOperand(0).getReg(); 2528 if(MRI.getType(DstReg).isVector()) 2529 MIRBuilder.buildBuildVector(DstReg, DstRegs); 2530 else 2531 MIRBuilder.buildMerge(DstReg, DstRegs); 2532 MI.eraseFromParent(); 2533 return Legalized; 2534 } 2535 2536 LegalizerHelper::LegalizeResult 2537 LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, 2538 LLT NarrowTy) { 2539 // FIXME: Don't know how to handle secondary types yet. 2540 if (TypeIdx != 0) 2541 return UnableToLegalize; 2542 2543 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 2544 uint64_t NarrowSize = NarrowTy.getSizeInBits(); 2545 2546 // FIXME: add support for when SizeOp0 isn't an exact multiple of 2547 // NarrowSize. 2548 if (SizeOp0 % NarrowSize != 0) 2549 return UnableToLegalize; 2550 2551 int NumParts = SizeOp0 / NarrowSize; 2552 2553 SmallVector<unsigned, 2> SrcRegs, DstRegs; 2554 SmallVector<uint64_t, 2> Indexes; 2555 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); 2556 2557 unsigned OpReg = MI.getOperand(2).getReg(); 2558 uint64_t OpStart = MI.getOperand(3).getImm(); 2559 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); 2560 for (int i = 0; i < NumParts; ++i) { 2561 unsigned DstStart = i * NarrowSize; 2562 2563 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) { 2564 // No part of the insert affects this subregister, forward the original. 2565 DstRegs.push_back(SrcRegs[i]); 2566 continue; 2567 } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) { 2568 // The entire subregister is defined by this insert, forward the new 2569 // value. 2570 DstRegs.push_back(OpReg); 2571 continue; 2572 } 2573 2574 // OpSegStart is where this destination segment would start in OpReg if it 2575 // extended infinitely in both directions. 2576 int64_t ExtractOffset, InsertOffset; 2577 uint64_t SegSize; 2578 if (OpStart < DstStart) { 2579 InsertOffset = 0; 2580 ExtractOffset = DstStart - OpStart; 2581 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart); 2582 } else { 2583 InsertOffset = OpStart - DstStart; 2584 ExtractOffset = 0; 2585 SegSize = 2586 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart); 2587 } 2588 2589 unsigned SegReg = OpReg; 2590 if (ExtractOffset != 0 || SegSize != OpSize) { 2591 // A genuine extract is needed. 2592 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); 2593 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset); 2594 } 2595 2596 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 2597 MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset); 2598 DstRegs.push_back(DstReg); 2599 } 2600 2601 assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered"); 2602 unsigned DstReg = MI.getOperand(0).getReg(); 2603 if(MRI.getType(DstReg).isVector()) 2604 MIRBuilder.buildBuildVector(DstReg, DstRegs); 2605 else 2606 MIRBuilder.buildMerge(DstReg, DstRegs); 2607 MI.eraseFromParent(); 2608 return Legalized; 2609 } 2610 2611 LegalizerHelper::LegalizeResult 2612 LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, 2613 LLT NarrowTy) { 2614 if (TypeIdx != 0) 2615 return UnableToLegalize; 2616 2617 unsigned CondReg = MI.getOperand(1).getReg(); 2618 LLT CondTy = MRI.getType(CondReg); 2619 if (CondTy.isVector()) // TODO: Handle vselect 2620 return UnableToLegalize; 2621 2622 unsigned DstReg = MI.getOperand(0).getReg(); 2623 LLT DstTy = MRI.getType(DstReg); 2624 2625 SmallVector<unsigned, 4> DstRegs, DstLeftoverRegs; 2626 SmallVector<unsigned, 4> Src1Regs, Src1LeftoverRegs; 2627 SmallVector<unsigned, 4> Src2Regs, Src2LeftoverRegs; 2628 LLT LeftoverTy; 2629 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy, 2630 Src1Regs, Src1LeftoverRegs)) 2631 return UnableToLegalize; 2632 2633 LLT Unused; 2634 if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused, 2635 Src2Regs, Src2LeftoverRegs)) 2636 llvm_unreachable("inconsistent extractParts result"); 2637 2638 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) { 2639 auto Select = MIRBuilder.buildSelect(NarrowTy, 2640 CondReg, Src1Regs[I], Src2Regs[I]); 2641 DstRegs.push_back(Select->getOperand(0).getReg()); 2642 } 2643 2644 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) { 2645 auto Select = MIRBuilder.buildSelect( 2646 LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]); 2647 DstLeftoverRegs.push_back(Select->getOperand(0).getReg()); 2648 } 2649 2650 insertParts(DstReg, DstTy, NarrowTy, DstRegs, 2651 LeftoverTy, DstLeftoverRegs); 2652 2653 MI.eraseFromParent(); 2654 return Legalized; 2655 } 2656 2657 LegalizerHelper::LegalizeResult 2658 LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { 2659 unsigned Opc = MI.getOpcode(); 2660 auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); 2661 auto isSupported = [this](const LegalityQuery &Q) { 2662 auto QAction = LI.getAction(Q).Action; 2663 return QAction == Legal || QAction == Libcall || QAction == Custom; 2664 }; 2665 switch (Opc) { 2666 default: 2667 return UnableToLegalize; 2668 case TargetOpcode::G_CTLZ_ZERO_UNDEF: { 2669 // This trivially expands to CTLZ. 2670 Observer.changingInstr(MI); 2671 MI.setDesc(TII.get(TargetOpcode::G_CTLZ)); 2672 Observer.changedInstr(MI); 2673 return Legalized; 2674 } 2675 case TargetOpcode::G_CTLZ: { 2676 unsigned SrcReg = MI.getOperand(1).getReg(); 2677 unsigned Len = Ty.getSizeInBits(); 2678 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty, Ty}})) { 2679 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero. 2680 auto MIBCtlzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF, 2681 {Ty}, {SrcReg}); 2682 auto MIBZero = MIRBuilder.buildConstant(Ty, 0); 2683 auto MIBLen = MIRBuilder.buildConstant(Ty, Len); 2684 auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), 2685 SrcReg, MIBZero); 2686 MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen, 2687 MIBCtlzZU); 2688 MI.eraseFromParent(); 2689 return Legalized; 2690 } 2691 // for now, we do this: 2692 // NewLen = NextPowerOf2(Len); 2693 // x = x | (x >> 1); 2694 // x = x | (x >> 2); 2695 // ... 2696 // x = x | (x >>16); 2697 // x = x | (x >>32); // for 64-bit input 2698 // Upto NewLen/2 2699 // return Len - popcount(x); 2700 // 2701 // Ref: "Hacker's Delight" by Henry Warren 2702 unsigned Op = SrcReg; 2703 unsigned NewLen = PowerOf2Ceil(Len); 2704 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) { 2705 auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i); 2706 auto MIBOp = MIRBuilder.buildInstr( 2707 TargetOpcode::G_OR, {Ty}, 2708 {Op, MIRBuilder.buildInstr(TargetOpcode::G_LSHR, {Ty}, 2709 {Op, MIBShiftAmt})}); 2710 Op = MIBOp->getOperand(0).getReg(); 2711 } 2712 auto MIBPop = MIRBuilder.buildInstr(TargetOpcode::G_CTPOP, {Ty}, {Op}); 2713 MIRBuilder.buildInstr(TargetOpcode::G_SUB, {MI.getOperand(0).getReg()}, 2714 {MIRBuilder.buildConstant(Ty, Len), MIBPop}); 2715 MI.eraseFromParent(); 2716 return Legalized; 2717 } 2718 case TargetOpcode::G_CTTZ_ZERO_UNDEF: { 2719 // This trivially expands to CTTZ. 2720 Observer.changingInstr(MI); 2721 MI.setDesc(TII.get(TargetOpcode::G_CTTZ)); 2722 Observer.changedInstr(MI); 2723 return Legalized; 2724 } 2725 case TargetOpcode::G_CTTZ: { 2726 unsigned SrcReg = MI.getOperand(1).getReg(); 2727 unsigned Len = Ty.getSizeInBits(); 2728 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty, Ty}})) { 2729 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with 2730 // zero. 2731 auto MIBCttzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF, 2732 {Ty}, {SrcReg}); 2733 auto MIBZero = MIRBuilder.buildConstant(Ty, 0); 2734 auto MIBLen = MIRBuilder.buildConstant(Ty, Len); 2735 auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), 2736 SrcReg, MIBZero); 2737 MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen, 2738 MIBCttzZU); 2739 MI.eraseFromParent(); 2740 return Legalized; 2741 } 2742 // for now, we use: { return popcount(~x & (x - 1)); } 2743 // unless the target has ctlz but not ctpop, in which case we use: 2744 // { return 32 - nlz(~x & (x-1)); } 2745 // Ref: "Hacker's Delight" by Henry Warren 2746 auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1); 2747 auto MIBNot = 2748 MIRBuilder.buildInstr(TargetOpcode::G_XOR, {Ty}, {SrcReg, MIBCstNeg1}); 2749 auto MIBTmp = MIRBuilder.buildInstr( 2750 TargetOpcode::G_AND, {Ty}, 2751 {MIBNot, MIRBuilder.buildInstr(TargetOpcode::G_ADD, {Ty}, 2752 {SrcReg, MIBCstNeg1})}); 2753 if (!isSupported({TargetOpcode::G_CTPOP, {Ty, Ty}}) && 2754 isSupported({TargetOpcode::G_CTLZ, {Ty, Ty}})) { 2755 auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len); 2756 MIRBuilder.buildInstr( 2757 TargetOpcode::G_SUB, {MI.getOperand(0).getReg()}, 2758 {MIBCstLen, 2759 MIRBuilder.buildInstr(TargetOpcode::G_CTLZ, {Ty}, {MIBTmp})}); 2760 MI.eraseFromParent(); 2761 return Legalized; 2762 } 2763 MI.setDesc(TII.get(TargetOpcode::G_CTPOP)); 2764 MI.getOperand(1).setReg(MIBTmp->getOperand(0).getReg()); 2765 return Legalized; 2766 } 2767 } 2768 } 2769