1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file This file implements the LegalizerHelper class to legalize 10 /// individual instructions and the LegalizeMachineIR wrapper pass for the 11 /// primary legalization. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" 16 #include "llvm/CodeGen/GlobalISel/CallLowering.h" 17 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" 18 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" 19 #include "llvm/CodeGen/MachineRegisterInfo.h" 20 #include "llvm/CodeGen/TargetInstrInfo.h" 21 #include "llvm/CodeGen/TargetLowering.h" 22 #include "llvm/CodeGen/TargetSubtargetInfo.h" 23 #include "llvm/Support/Debug.h" 24 #include "llvm/Support/MathExtras.h" 25 #include "llvm/Support/raw_ostream.h" 26 27 #define DEBUG_TYPE "legalizer" 28 29 using namespace llvm; 30 using namespace LegalizeActions; 31 32 /// Try to break down \p OrigTy into \p NarrowTy sized pieces. 33 /// 34 /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy, 35 /// with any leftover piece as type \p LeftoverTy 36 /// 37 /// Returns -1 if the breakdown is not satisfiable. 38 static int getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) { 39 assert(!LeftoverTy.isValid() && "this is an out argument"); 40 41 unsigned Size = OrigTy.getSizeInBits(); 42 unsigned NarrowSize = NarrowTy.getSizeInBits(); 43 unsigned NumParts = Size / NarrowSize; 44 unsigned LeftoverSize = Size - NumParts * NarrowSize; 45 assert(Size > NarrowSize); 46 47 if (LeftoverSize == 0) 48 return NumParts; 49 50 if (NarrowTy.isVector()) { 51 unsigned EltSize = OrigTy.getScalarSizeInBits(); 52 if (LeftoverSize % EltSize != 0) 53 return -1; 54 LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize); 55 } else { 56 LeftoverTy = LLT::scalar(LeftoverSize); 57 } 58 59 return NumParts; 60 } 61 62 LegalizerHelper::LegalizerHelper(MachineFunction &MF, 63 GISelChangeObserver &Observer, 64 MachineIRBuilder &Builder) 65 : MIRBuilder(Builder), MRI(MF.getRegInfo()), 66 LI(*MF.getSubtarget().getLegalizerInfo()), Observer(Observer) { 67 MIRBuilder.setMF(MF); 68 MIRBuilder.setChangeObserver(Observer); 69 } 70 71 LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI, 72 GISelChangeObserver &Observer, 73 MachineIRBuilder &B) 74 : MIRBuilder(B), MRI(MF.getRegInfo()), LI(LI), Observer(Observer) { 75 MIRBuilder.setMF(MF); 76 MIRBuilder.setChangeObserver(Observer); 77 } 78 LegalizerHelper::LegalizeResult 79 LegalizerHelper::legalizeInstrStep(MachineInstr &MI) { 80 LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs())); 81 82 auto Step = LI.getAction(MI, MRI); 83 switch (Step.Action) { 84 case Legal: 85 LLVM_DEBUG(dbgs() << ".. Already legal\n"); 86 return AlreadyLegal; 87 case Libcall: 88 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n"); 89 return libcall(MI); 90 case NarrowScalar: 91 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n"); 92 return narrowScalar(MI, Step.TypeIdx, Step.NewType); 93 case WidenScalar: 94 LLVM_DEBUG(dbgs() << ".. Widen scalar\n"); 95 return widenScalar(MI, Step.TypeIdx, Step.NewType); 96 case Lower: 97 LLVM_DEBUG(dbgs() << ".. Lower\n"); 98 return lower(MI, Step.TypeIdx, Step.NewType); 99 case FewerElements: 100 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n"); 101 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType); 102 case MoreElements: 103 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n"); 104 return moreElementsVector(MI, Step.TypeIdx, Step.NewType); 105 case Custom: 106 LLVM_DEBUG(dbgs() << ".. Custom legalization\n"); 107 return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized 108 : UnableToLegalize; 109 default: 110 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n"); 111 return UnableToLegalize; 112 } 113 } 114 115 void LegalizerHelper::extractParts(unsigned Reg, LLT Ty, int NumParts, 116 SmallVectorImpl<unsigned> &VRegs) { 117 for (int i = 0; i < NumParts; ++i) 118 VRegs.push_back(MRI.createGenericVirtualRegister(Ty)); 119 MIRBuilder.buildUnmerge(VRegs, Reg); 120 } 121 122 bool LegalizerHelper::extractParts(unsigned Reg, LLT RegTy, 123 LLT MainTy, LLT &LeftoverTy, 124 SmallVectorImpl<unsigned> &VRegs, 125 SmallVectorImpl<unsigned> &LeftoverRegs) { 126 assert(!LeftoverTy.isValid() && "this is an out argument"); 127 128 unsigned RegSize = RegTy.getSizeInBits(); 129 unsigned MainSize = MainTy.getSizeInBits(); 130 unsigned NumParts = RegSize / MainSize; 131 unsigned LeftoverSize = RegSize - NumParts * MainSize; 132 133 // Use an unmerge when possible. 134 if (LeftoverSize == 0) { 135 for (unsigned I = 0; I < NumParts; ++I) 136 VRegs.push_back(MRI.createGenericVirtualRegister(MainTy)); 137 MIRBuilder.buildUnmerge(VRegs, Reg); 138 return true; 139 } 140 141 if (MainTy.isVector()) { 142 unsigned EltSize = MainTy.getScalarSizeInBits(); 143 if (LeftoverSize % EltSize != 0) 144 return false; 145 LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize); 146 } else { 147 LeftoverTy = LLT::scalar(LeftoverSize); 148 } 149 150 // For irregular sizes, extract the individual parts. 151 for (unsigned I = 0; I != NumParts; ++I) { 152 unsigned NewReg = MRI.createGenericVirtualRegister(MainTy); 153 VRegs.push_back(NewReg); 154 MIRBuilder.buildExtract(NewReg, Reg, MainSize * I); 155 } 156 157 for (unsigned Offset = MainSize * NumParts; Offset < RegSize; 158 Offset += LeftoverSize) { 159 unsigned NewReg = MRI.createGenericVirtualRegister(LeftoverTy); 160 LeftoverRegs.push_back(NewReg); 161 MIRBuilder.buildExtract(NewReg, Reg, Offset); 162 } 163 164 return true; 165 } 166 167 void LegalizerHelper::insertParts(unsigned DstReg, 168 LLT ResultTy, LLT PartTy, 169 ArrayRef<unsigned> PartRegs, 170 LLT LeftoverTy, 171 ArrayRef<unsigned> LeftoverRegs) { 172 if (!LeftoverTy.isValid()) { 173 assert(LeftoverRegs.empty()); 174 175 if (!ResultTy.isVector()) { 176 MIRBuilder.buildMerge(DstReg, PartRegs); 177 return; 178 } 179 180 if (PartTy.isVector()) 181 MIRBuilder.buildConcatVectors(DstReg, PartRegs); 182 else 183 MIRBuilder.buildBuildVector(DstReg, PartRegs); 184 return; 185 } 186 187 unsigned PartSize = PartTy.getSizeInBits(); 188 unsigned LeftoverPartSize = LeftoverTy.getSizeInBits(); 189 190 unsigned CurResultReg = MRI.createGenericVirtualRegister(ResultTy); 191 MIRBuilder.buildUndef(CurResultReg); 192 193 unsigned Offset = 0; 194 for (unsigned PartReg : PartRegs) { 195 unsigned NewResultReg = MRI.createGenericVirtualRegister(ResultTy); 196 MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset); 197 CurResultReg = NewResultReg; 198 Offset += PartSize; 199 } 200 201 for (unsigned I = 0, E = LeftoverRegs.size(); I != E; ++I) { 202 // Use the original output register for the final insert to avoid a copy. 203 unsigned NewResultReg = (I + 1 == E) ? 204 DstReg : MRI.createGenericVirtualRegister(ResultTy); 205 206 MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset); 207 CurResultReg = NewResultReg; 208 Offset += LeftoverPartSize; 209 } 210 } 211 212 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { 213 switch (Opcode) { 214 case TargetOpcode::G_SDIV: 215 assert((Size == 32 || Size == 64) && "Unsupported size"); 216 return Size == 64 ? RTLIB::SDIV_I64 : RTLIB::SDIV_I32; 217 case TargetOpcode::G_UDIV: 218 assert((Size == 32 || Size == 64) && "Unsupported size"); 219 return Size == 64 ? RTLIB::UDIV_I64 : RTLIB::UDIV_I32; 220 case TargetOpcode::G_SREM: 221 assert((Size == 32 || Size == 64) && "Unsupported size"); 222 return Size == 64 ? RTLIB::SREM_I64 : RTLIB::SREM_I32; 223 case TargetOpcode::G_UREM: 224 assert((Size == 32 || Size == 64) && "Unsupported size"); 225 return Size == 64 ? RTLIB::UREM_I64 : RTLIB::UREM_I32; 226 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 227 assert(Size == 32 && "Unsupported size"); 228 return RTLIB::CTLZ_I32; 229 case TargetOpcode::G_FADD: 230 assert((Size == 32 || Size == 64) && "Unsupported size"); 231 return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32; 232 case TargetOpcode::G_FSUB: 233 assert((Size == 32 || Size == 64) && "Unsupported size"); 234 return Size == 64 ? RTLIB::SUB_F64 : RTLIB::SUB_F32; 235 case TargetOpcode::G_FMUL: 236 assert((Size == 32 || Size == 64) && "Unsupported size"); 237 return Size == 64 ? RTLIB::MUL_F64 : RTLIB::MUL_F32; 238 case TargetOpcode::G_FDIV: 239 assert((Size == 32 || Size == 64) && "Unsupported size"); 240 return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32; 241 case TargetOpcode::G_FEXP: 242 assert((Size == 32 || Size == 64) && "Unsupported size"); 243 return Size == 64 ? RTLIB::EXP_F64 : RTLIB::EXP_F32; 244 case TargetOpcode::G_FREM: 245 return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32; 246 case TargetOpcode::G_FPOW: 247 return Size == 64 ? RTLIB::POW_F64 : RTLIB::POW_F32; 248 case TargetOpcode::G_FMA: 249 assert((Size == 32 || Size == 64) && "Unsupported size"); 250 return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32; 251 case TargetOpcode::G_FSIN: 252 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); 253 return Size == 128 ? RTLIB::SIN_F128 254 : Size == 64 ? RTLIB::SIN_F64 : RTLIB::SIN_F32; 255 case TargetOpcode::G_FCOS: 256 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); 257 return Size == 128 ? RTLIB::COS_F128 258 : Size == 64 ? RTLIB::COS_F64 : RTLIB::COS_F32; 259 case TargetOpcode::G_FLOG10: 260 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); 261 return Size == 128 ? RTLIB::LOG10_F128 262 : Size == 64 ? RTLIB::LOG10_F64 : RTLIB::LOG10_F32; 263 case TargetOpcode::G_FLOG: 264 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); 265 return Size == 128 ? RTLIB::LOG_F128 266 : Size == 64 ? RTLIB::LOG_F64 : RTLIB::LOG_F32; 267 case TargetOpcode::G_FLOG2: 268 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); 269 return Size == 128 ? RTLIB::LOG2_F128 270 : Size == 64 ? RTLIB::LOG2_F64 : RTLIB::LOG2_F32; 271 } 272 llvm_unreachable("Unknown libcall function"); 273 } 274 275 LegalizerHelper::LegalizeResult 276 llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, 277 const CallLowering::ArgInfo &Result, 278 ArrayRef<CallLowering::ArgInfo> Args) { 279 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); 280 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); 281 const char *Name = TLI.getLibcallName(Libcall); 282 283 MIRBuilder.getMF().getFrameInfo().setHasCalls(true); 284 if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall), 285 MachineOperand::CreateES(Name), Result, Args)) 286 return LegalizerHelper::UnableToLegalize; 287 288 return LegalizerHelper::Legalized; 289 } 290 291 // Useful for libcalls where all operands have the same type. 292 static LegalizerHelper::LegalizeResult 293 simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, 294 Type *OpType) { 295 auto Libcall = getRTLibDesc(MI.getOpcode(), Size); 296 297 SmallVector<CallLowering::ArgInfo, 3> Args; 298 for (unsigned i = 1; i < MI.getNumOperands(); i++) 299 Args.push_back({MI.getOperand(i).getReg(), OpType}); 300 return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType}, 301 Args); 302 } 303 304 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, 305 Type *FromType) { 306 auto ToMVT = MVT::getVT(ToType); 307 auto FromMVT = MVT::getVT(FromType); 308 309 switch (Opcode) { 310 case TargetOpcode::G_FPEXT: 311 return RTLIB::getFPEXT(FromMVT, ToMVT); 312 case TargetOpcode::G_FPTRUNC: 313 return RTLIB::getFPROUND(FromMVT, ToMVT); 314 case TargetOpcode::G_FPTOSI: 315 return RTLIB::getFPTOSINT(FromMVT, ToMVT); 316 case TargetOpcode::G_FPTOUI: 317 return RTLIB::getFPTOUINT(FromMVT, ToMVT); 318 case TargetOpcode::G_SITOFP: 319 return RTLIB::getSINTTOFP(FromMVT, ToMVT); 320 case TargetOpcode::G_UITOFP: 321 return RTLIB::getUINTTOFP(FromMVT, ToMVT); 322 } 323 llvm_unreachable("Unsupported libcall function"); 324 } 325 326 static LegalizerHelper::LegalizeResult 327 conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, 328 Type *FromType) { 329 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType); 330 return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType}, 331 {{MI.getOperand(1).getReg(), FromType}}); 332 } 333 334 LegalizerHelper::LegalizeResult 335 LegalizerHelper::libcall(MachineInstr &MI) { 336 LLT LLTy = MRI.getType(MI.getOperand(0).getReg()); 337 unsigned Size = LLTy.getSizeInBits(); 338 auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); 339 340 MIRBuilder.setInstr(MI); 341 342 switch (MI.getOpcode()) { 343 default: 344 return UnableToLegalize; 345 case TargetOpcode::G_SDIV: 346 case TargetOpcode::G_UDIV: 347 case TargetOpcode::G_SREM: 348 case TargetOpcode::G_UREM: 349 case TargetOpcode::G_CTLZ_ZERO_UNDEF: { 350 Type *HLTy = IntegerType::get(Ctx, Size); 351 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); 352 if (Status != Legalized) 353 return Status; 354 break; 355 } 356 case TargetOpcode::G_FADD: 357 case TargetOpcode::G_FSUB: 358 case TargetOpcode::G_FMUL: 359 case TargetOpcode::G_FDIV: 360 case TargetOpcode::G_FMA: 361 case TargetOpcode::G_FPOW: 362 case TargetOpcode::G_FREM: 363 case TargetOpcode::G_FCOS: 364 case TargetOpcode::G_FSIN: 365 case TargetOpcode::G_FLOG10: 366 case TargetOpcode::G_FLOG: 367 case TargetOpcode::G_FLOG2: 368 case TargetOpcode::G_FEXP: { 369 if (Size > 64) { 370 LLVM_DEBUG(dbgs() << "Size " << Size << " too large to legalize.\n"); 371 return UnableToLegalize; 372 } 373 Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx); 374 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); 375 if (Status != Legalized) 376 return Status; 377 break; 378 } 379 case TargetOpcode::G_FPEXT: { 380 // FIXME: Support other floating point types (half, fp128 etc) 381 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 382 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 383 if (ToSize != 64 || FromSize != 32) 384 return UnableToLegalize; 385 LegalizeResult Status = conversionLibcall( 386 MI, MIRBuilder, Type::getDoubleTy(Ctx), Type::getFloatTy(Ctx)); 387 if (Status != Legalized) 388 return Status; 389 break; 390 } 391 case TargetOpcode::G_FPTRUNC: { 392 // FIXME: Support other floating point types (half, fp128 etc) 393 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 394 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 395 if (ToSize != 32 || FromSize != 64) 396 return UnableToLegalize; 397 LegalizeResult Status = conversionLibcall( 398 MI, MIRBuilder, Type::getFloatTy(Ctx), Type::getDoubleTy(Ctx)); 399 if (Status != Legalized) 400 return Status; 401 break; 402 } 403 case TargetOpcode::G_FPTOSI: 404 case TargetOpcode::G_FPTOUI: { 405 // FIXME: Support other types 406 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 407 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 408 if (ToSize != 32 || (FromSize != 32 && FromSize != 64)) 409 return UnableToLegalize; 410 LegalizeResult Status = conversionLibcall( 411 MI, MIRBuilder, Type::getInt32Ty(Ctx), 412 FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx)); 413 if (Status != Legalized) 414 return Status; 415 break; 416 } 417 case TargetOpcode::G_SITOFP: 418 case TargetOpcode::G_UITOFP: { 419 // FIXME: Support other types 420 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 421 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 422 if (FromSize != 32 || (ToSize != 32 && ToSize != 64)) 423 return UnableToLegalize; 424 LegalizeResult Status = conversionLibcall( 425 MI, MIRBuilder, 426 ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx), 427 Type::getInt32Ty(Ctx)); 428 if (Status != Legalized) 429 return Status; 430 break; 431 } 432 } 433 434 MI.eraseFromParent(); 435 return Legalized; 436 } 437 438 LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, 439 unsigned TypeIdx, 440 LLT NarrowTy) { 441 MIRBuilder.setInstr(MI); 442 443 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 444 uint64_t NarrowSize = NarrowTy.getSizeInBits(); 445 446 switch (MI.getOpcode()) { 447 default: 448 return UnableToLegalize; 449 case TargetOpcode::G_IMPLICIT_DEF: { 450 // FIXME: add support for when SizeOp0 isn't an exact multiple of 451 // NarrowSize. 452 if (SizeOp0 % NarrowSize != 0) 453 return UnableToLegalize; 454 int NumParts = SizeOp0 / NarrowSize; 455 456 SmallVector<unsigned, 2> DstRegs; 457 for (int i = 0; i < NumParts; ++i) 458 DstRegs.push_back( 459 MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg()); 460 461 unsigned DstReg = MI.getOperand(0).getReg(); 462 if(MRI.getType(DstReg).isVector()) 463 MIRBuilder.buildBuildVector(DstReg, DstRegs); 464 else 465 MIRBuilder.buildMerge(DstReg, DstRegs); 466 MI.eraseFromParent(); 467 return Legalized; 468 } 469 case TargetOpcode::G_ADD: { 470 // FIXME: add support for when SizeOp0 isn't an exact multiple of 471 // NarrowSize. 472 if (SizeOp0 % NarrowSize != 0) 473 return UnableToLegalize; 474 // Expand in terms of carry-setting/consuming G_ADDE instructions. 475 int NumParts = SizeOp0 / NarrowTy.getSizeInBits(); 476 477 SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs; 478 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); 479 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); 480 481 unsigned CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1)); 482 MIRBuilder.buildConstant(CarryIn, 0); 483 484 for (int i = 0; i < NumParts; ++i) { 485 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 486 unsigned CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); 487 488 MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i], 489 Src2Regs[i], CarryIn); 490 491 DstRegs.push_back(DstReg); 492 CarryIn = CarryOut; 493 } 494 unsigned DstReg = MI.getOperand(0).getReg(); 495 if(MRI.getType(DstReg).isVector()) 496 MIRBuilder.buildBuildVector(DstReg, DstRegs); 497 else 498 MIRBuilder.buildMerge(DstReg, DstRegs); 499 MI.eraseFromParent(); 500 return Legalized; 501 } 502 case TargetOpcode::G_SUB: { 503 // FIXME: add support for when SizeOp0 isn't an exact multiple of 504 // NarrowSize. 505 if (SizeOp0 % NarrowSize != 0) 506 return UnableToLegalize; 507 508 int NumParts = SizeOp0 / NarrowTy.getSizeInBits(); 509 510 SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs; 511 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); 512 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); 513 514 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 515 unsigned BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); 516 MIRBuilder.buildInstr(TargetOpcode::G_USUBO, {DstReg, BorrowOut}, 517 {Src1Regs[0], Src2Regs[0]}); 518 DstRegs.push_back(DstReg); 519 unsigned BorrowIn = BorrowOut; 520 for (int i = 1; i < NumParts; ++i) { 521 DstReg = MRI.createGenericVirtualRegister(NarrowTy); 522 BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); 523 524 MIRBuilder.buildInstr(TargetOpcode::G_USUBE, {DstReg, BorrowOut}, 525 {Src1Regs[i], Src2Regs[i], BorrowIn}); 526 527 DstRegs.push_back(DstReg); 528 BorrowIn = BorrowOut; 529 } 530 MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs); 531 MI.eraseFromParent(); 532 return Legalized; 533 } 534 case TargetOpcode::G_MUL: 535 return narrowScalarMul(MI, TypeIdx, NarrowTy); 536 case TargetOpcode::G_EXTRACT: 537 return narrowScalarExtract(MI, TypeIdx, NarrowTy); 538 case TargetOpcode::G_INSERT: 539 return narrowScalarInsert(MI, TypeIdx, NarrowTy); 540 case TargetOpcode::G_LOAD: { 541 const auto &MMO = **MI.memoperands_begin(); 542 unsigned DstReg = MI.getOperand(0).getReg(); 543 LLT DstTy = MRI.getType(DstReg); 544 if (DstTy.isVector()) 545 return UnableToLegalize; 546 547 if (8 * MMO.getSize() != DstTy.getSizeInBits()) { 548 unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy); 549 auto &MMO = **MI.memoperands_begin(); 550 MIRBuilder.buildLoad(TmpReg, MI.getOperand(1).getReg(), MMO); 551 MIRBuilder.buildAnyExt(DstReg, TmpReg); 552 MI.eraseFromParent(); 553 return Legalized; 554 } 555 556 return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy); 557 } 558 case TargetOpcode::G_ZEXTLOAD: 559 case TargetOpcode::G_SEXTLOAD: { 560 bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD; 561 unsigned DstReg = MI.getOperand(0).getReg(); 562 unsigned PtrReg = MI.getOperand(1).getReg(); 563 564 unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy); 565 auto &MMO = **MI.memoperands_begin(); 566 if (MMO.getSize() * 8 == NarrowSize) { 567 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); 568 } else { 569 unsigned ExtLoad = ZExt ? TargetOpcode::G_ZEXTLOAD 570 : TargetOpcode::G_SEXTLOAD; 571 MIRBuilder.buildInstr(ExtLoad) 572 .addDef(TmpReg) 573 .addUse(PtrReg) 574 .addMemOperand(&MMO); 575 } 576 577 if (ZExt) 578 MIRBuilder.buildZExt(DstReg, TmpReg); 579 else 580 MIRBuilder.buildSExt(DstReg, TmpReg); 581 582 MI.eraseFromParent(); 583 return Legalized; 584 } 585 case TargetOpcode::G_STORE: { 586 const auto &MMO = **MI.memoperands_begin(); 587 588 unsigned SrcReg = MI.getOperand(0).getReg(); 589 LLT SrcTy = MRI.getType(SrcReg); 590 if (SrcTy.isVector()) 591 return UnableToLegalize; 592 593 int NumParts = SizeOp0 / NarrowSize; 594 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits(); 595 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize; 596 if (SrcTy.isVector() && LeftoverBits != 0) 597 return UnableToLegalize; 598 599 if (8 * MMO.getSize() != SrcTy.getSizeInBits()) { 600 unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy); 601 auto &MMO = **MI.memoperands_begin(); 602 MIRBuilder.buildTrunc(TmpReg, SrcReg); 603 MIRBuilder.buildStore(TmpReg, MI.getOperand(1).getReg(), MMO); 604 MI.eraseFromParent(); 605 return Legalized; 606 } 607 608 return reduceLoadStoreWidth(MI, 0, NarrowTy); 609 } 610 case TargetOpcode::G_CONSTANT: { 611 // FIXME: add support for when SizeOp0 isn't an exact multiple of 612 // NarrowSize. 613 if (SizeOp0 % NarrowSize != 0) 614 return UnableToLegalize; 615 int NumParts = SizeOp0 / NarrowSize; 616 const APInt &Cst = MI.getOperand(1).getCImm()->getValue(); 617 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 618 619 SmallVector<unsigned, 2> DstRegs; 620 for (int i = 0; i < NumParts; ++i) { 621 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 622 ConstantInt *CI = 623 ConstantInt::get(Ctx, Cst.lshr(NarrowSize * i).trunc(NarrowSize)); 624 MIRBuilder.buildConstant(DstReg, *CI); 625 DstRegs.push_back(DstReg); 626 } 627 unsigned DstReg = MI.getOperand(0).getReg(); 628 if(MRI.getType(DstReg).isVector()) 629 MIRBuilder.buildBuildVector(DstReg, DstRegs); 630 else 631 MIRBuilder.buildMerge(DstReg, DstRegs); 632 MI.eraseFromParent(); 633 return Legalized; 634 } 635 case TargetOpcode::G_SELECT: 636 return narrowScalarSelect(MI, TypeIdx, NarrowTy); 637 case TargetOpcode::G_AND: 638 case TargetOpcode::G_OR: 639 case TargetOpcode::G_XOR: { 640 // Legalize bitwise operation: 641 // A = BinOp<Ty> B, C 642 // into: 643 // B1, ..., BN = G_UNMERGE_VALUES B 644 // C1, ..., CN = G_UNMERGE_VALUES C 645 // A1 = BinOp<Ty/N> B1, C2 646 // ... 647 // AN = BinOp<Ty/N> BN, CN 648 // A = G_MERGE_VALUES A1, ..., AN 649 650 // FIXME: add support for when SizeOp0 isn't an exact multiple of 651 // NarrowSize. 652 if (SizeOp0 % NarrowSize != 0) 653 return UnableToLegalize; 654 int NumParts = SizeOp0 / NarrowSize; 655 656 // List the registers where the destination will be scattered. 657 SmallVector<unsigned, 2> DstRegs; 658 // List the registers where the first argument will be split. 659 SmallVector<unsigned, 2> SrcsReg1; 660 // List the registers where the second argument will be split. 661 SmallVector<unsigned, 2> SrcsReg2; 662 // Create all the temporary registers. 663 for (int i = 0; i < NumParts; ++i) { 664 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 665 unsigned SrcReg1 = MRI.createGenericVirtualRegister(NarrowTy); 666 unsigned SrcReg2 = MRI.createGenericVirtualRegister(NarrowTy); 667 668 DstRegs.push_back(DstReg); 669 SrcsReg1.push_back(SrcReg1); 670 SrcsReg2.push_back(SrcReg2); 671 } 672 // Explode the big arguments into smaller chunks. 673 MIRBuilder.buildUnmerge(SrcsReg1, MI.getOperand(1).getReg()); 674 MIRBuilder.buildUnmerge(SrcsReg2, MI.getOperand(2).getReg()); 675 676 // Do the operation on each small part. 677 for (int i = 0; i < NumParts; ++i) 678 MIRBuilder.buildInstr(MI.getOpcode(), {DstRegs[i]}, 679 {SrcsReg1[i], SrcsReg2[i]}); 680 681 // Gather the destination registers into the final destination. 682 unsigned DstReg = MI.getOperand(0).getReg(); 683 if(MRI.getType(DstReg).isVector()) 684 MIRBuilder.buildBuildVector(DstReg, DstRegs); 685 else 686 MIRBuilder.buildMerge(DstReg, DstRegs); 687 MI.eraseFromParent(); 688 return Legalized; 689 } 690 case TargetOpcode::G_SHL: 691 case TargetOpcode::G_LSHR: 692 case TargetOpcode::G_ASHR: 693 return narrowScalarShift(MI, TypeIdx, NarrowTy); 694 case TargetOpcode::G_CTLZ: 695 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 696 case TargetOpcode::G_CTTZ: 697 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 698 case TargetOpcode::G_CTPOP: 699 if (TypeIdx != 0) 700 return UnableToLegalize; // TODO 701 702 Observer.changingInstr(MI); 703 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT); 704 Observer.changedInstr(MI); 705 return Legalized; 706 case TargetOpcode::G_INTTOPTR: 707 if (TypeIdx != 1) 708 return UnableToLegalize; 709 710 Observer.changingInstr(MI); 711 narrowScalarSrc(MI, NarrowTy, 1); 712 Observer.changedInstr(MI); 713 return Legalized; 714 case TargetOpcode::G_PTRTOINT: 715 if (TypeIdx != 0) 716 return UnableToLegalize; 717 718 Observer.changingInstr(MI); 719 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT); 720 Observer.changedInstr(MI); 721 return Legalized; 722 } 723 } 724 725 void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy, 726 unsigned OpIdx, unsigned ExtOpcode) { 727 MachineOperand &MO = MI.getOperand(OpIdx); 728 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO.getReg()}); 729 MO.setReg(ExtB->getOperand(0).getReg()); 730 } 731 732 void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, 733 unsigned OpIdx) { 734 MachineOperand &MO = MI.getOperand(OpIdx); 735 auto ExtB = MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, {NarrowTy}, 736 {MO.getReg()}); 737 MO.setReg(ExtB->getOperand(0).getReg()); 738 } 739 740 void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy, 741 unsigned OpIdx, unsigned TruncOpcode) { 742 MachineOperand &MO = MI.getOperand(OpIdx); 743 unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); 744 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 745 MIRBuilder.buildInstr(TruncOpcode, {MO.getReg()}, {DstExt}); 746 MO.setReg(DstExt); 747 } 748 749 void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy, 750 unsigned OpIdx, unsigned ExtOpcode) { 751 MachineOperand &MO = MI.getOperand(OpIdx); 752 unsigned DstTrunc = MRI.createGenericVirtualRegister(NarrowTy); 753 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 754 MIRBuilder.buildInstr(ExtOpcode, {MO.getReg()}, {DstTrunc}); 755 MO.setReg(DstTrunc); 756 } 757 758 void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy, 759 unsigned OpIdx) { 760 MachineOperand &MO = MI.getOperand(OpIdx); 761 unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); 762 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 763 MIRBuilder.buildExtract(MO.getReg(), DstExt, 0); 764 MO.setReg(DstExt); 765 } 766 767 void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, 768 unsigned OpIdx) { 769 MachineOperand &MO = MI.getOperand(OpIdx); 770 771 LLT OldTy = MRI.getType(MO.getReg()); 772 unsigned OldElts = OldTy.getNumElements(); 773 unsigned NewElts = MoreTy.getNumElements(); 774 775 unsigned NumParts = NewElts / OldElts; 776 777 // Use concat_vectors if the result is a multiple of the number of elements. 778 if (NumParts * OldElts == NewElts) { 779 SmallVector<unsigned, 8> Parts; 780 Parts.push_back(MO.getReg()); 781 782 unsigned ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0); 783 for (unsigned I = 1; I != NumParts; ++I) 784 Parts.push_back(ImpDef); 785 786 auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts); 787 MO.setReg(Concat.getReg(0)); 788 return; 789 } 790 791 unsigned MoreReg = MRI.createGenericVirtualRegister(MoreTy); 792 unsigned ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0); 793 MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0); 794 MO.setReg(MoreReg); 795 } 796 797 LegalizerHelper::LegalizeResult 798 LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, 799 LLT WideTy) { 800 if (TypeIdx != 1) 801 return UnableToLegalize; 802 803 unsigned DstReg = MI.getOperand(0).getReg(); 804 LLT DstTy = MRI.getType(DstReg); 805 if (!DstTy.isScalar()) 806 return UnableToLegalize; 807 808 unsigned NumOps = MI.getNumOperands(); 809 unsigned NumSrc = MI.getNumOperands() - 1; 810 unsigned PartSize = DstTy.getSizeInBits() / NumSrc; 811 812 unsigned Src1 = MI.getOperand(1).getReg(); 813 unsigned ResultReg = MIRBuilder.buildZExt(DstTy, Src1)->getOperand(0).getReg(); 814 815 for (unsigned I = 2; I != NumOps; ++I) { 816 const unsigned Offset = (I - 1) * PartSize; 817 818 unsigned SrcReg = MI.getOperand(I).getReg(); 819 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize)); 820 821 auto ZextInput = MIRBuilder.buildZExt(DstTy, SrcReg); 822 823 unsigned NextResult = I + 1 == NumOps ? DstReg : 824 MRI.createGenericVirtualRegister(DstTy); 825 826 auto ShiftAmt = MIRBuilder.buildConstant(DstTy, Offset); 827 auto Shl = MIRBuilder.buildShl(DstTy, ZextInput, ShiftAmt); 828 MIRBuilder.buildOr(NextResult, ResultReg, Shl); 829 ResultReg = NextResult; 830 } 831 832 MI.eraseFromParent(); 833 return Legalized; 834 } 835 836 LegalizerHelper::LegalizeResult 837 LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx, 838 LLT WideTy) { 839 if (TypeIdx != 0) 840 return UnableToLegalize; 841 842 unsigned NumDst = MI.getNumOperands() - 1; 843 unsigned SrcReg = MI.getOperand(NumDst).getReg(); 844 LLT SrcTy = MRI.getType(SrcReg); 845 if (!SrcTy.isScalar()) 846 return UnableToLegalize; 847 848 unsigned Dst0Reg = MI.getOperand(0).getReg(); 849 LLT DstTy = MRI.getType(Dst0Reg); 850 if (!DstTy.isScalar()) 851 return UnableToLegalize; 852 853 unsigned NewSrcSize = NumDst * WideTy.getSizeInBits(); 854 LLT NewSrcTy = LLT::scalar(NewSrcSize); 855 unsigned SizeDiff = WideTy.getSizeInBits() - DstTy.getSizeInBits(); 856 857 auto WideSrc = MIRBuilder.buildZExt(NewSrcTy, SrcReg); 858 859 for (unsigned I = 1; I != NumDst; ++I) { 860 auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, SizeDiff * I); 861 auto Shl = MIRBuilder.buildShl(NewSrcTy, WideSrc, ShiftAmt); 862 WideSrc = MIRBuilder.buildOr(NewSrcTy, WideSrc, Shl); 863 } 864 865 Observer.changingInstr(MI); 866 867 MI.getOperand(NumDst).setReg(WideSrc->getOperand(0).getReg()); 868 for (unsigned I = 0; I != NumDst; ++I) 869 widenScalarDst(MI, WideTy, I); 870 871 Observer.changedInstr(MI); 872 873 return Legalized; 874 } 875 876 LegalizerHelper::LegalizeResult 877 LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx, 878 LLT WideTy) { 879 unsigned DstReg = MI.getOperand(0).getReg(); 880 unsigned SrcReg = MI.getOperand(1).getReg(); 881 LLT SrcTy = MRI.getType(SrcReg); 882 883 LLT DstTy = MRI.getType(DstReg); 884 unsigned Offset = MI.getOperand(2).getImm(); 885 886 if (TypeIdx == 0) { 887 if (SrcTy.isVector() || DstTy.isVector()) 888 return UnableToLegalize; 889 890 SrcOp Src(SrcReg); 891 if (SrcTy.isPointer()) { 892 // Extracts from pointers can be handled only if they are really just 893 // simple integers. 894 const DataLayout &DL = MIRBuilder.getDataLayout(); 895 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) 896 return UnableToLegalize; 897 898 LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits()); 899 Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src); 900 SrcTy = SrcAsIntTy; 901 } 902 903 if (DstTy.isPointer()) 904 return UnableToLegalize; 905 906 if (Offset == 0) { 907 // Avoid a shift in the degenerate case. 908 MIRBuilder.buildTrunc(DstReg, 909 MIRBuilder.buildAnyExtOrTrunc(WideTy, Src)); 910 MI.eraseFromParent(); 911 return Legalized; 912 } 913 914 // Do a shift in the source type. 915 LLT ShiftTy = SrcTy; 916 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) { 917 Src = MIRBuilder.buildAnyExt(WideTy, Src); 918 ShiftTy = WideTy; 919 } else if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) 920 return UnableToLegalize; 921 922 auto LShr = MIRBuilder.buildLShr( 923 ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset)); 924 MIRBuilder.buildTrunc(DstReg, LShr); 925 MI.eraseFromParent(); 926 return Legalized; 927 } 928 929 if (!SrcTy.isVector()) 930 return UnableToLegalize; 931 932 if (DstTy != SrcTy.getElementType()) 933 return UnableToLegalize; 934 935 if (Offset % SrcTy.getScalarSizeInBits() != 0) 936 return UnableToLegalize; 937 938 Observer.changingInstr(MI); 939 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 940 941 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) * 942 Offset); 943 widenScalarDst(MI, WideTy.getScalarType(), 0); 944 Observer.changedInstr(MI); 945 return Legalized; 946 } 947 948 LegalizerHelper::LegalizeResult 949 LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx, 950 LLT WideTy) { 951 if (TypeIdx != 0) 952 return UnableToLegalize; 953 Observer.changingInstr(MI); 954 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 955 widenScalarDst(MI, WideTy); 956 Observer.changedInstr(MI); 957 return Legalized; 958 } 959 960 LegalizerHelper::LegalizeResult 961 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { 962 MIRBuilder.setInstr(MI); 963 964 switch (MI.getOpcode()) { 965 default: 966 return UnableToLegalize; 967 case TargetOpcode::G_EXTRACT: 968 return widenScalarExtract(MI, TypeIdx, WideTy); 969 case TargetOpcode::G_INSERT: 970 return widenScalarInsert(MI, TypeIdx, WideTy); 971 case TargetOpcode::G_MERGE_VALUES: 972 return widenScalarMergeValues(MI, TypeIdx, WideTy); 973 case TargetOpcode::G_UNMERGE_VALUES: 974 return widenScalarUnmergeValues(MI, TypeIdx, WideTy); 975 case TargetOpcode::G_UADDO: 976 case TargetOpcode::G_USUBO: { 977 if (TypeIdx == 1) 978 return UnableToLegalize; // TODO 979 auto LHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy}, 980 {MI.getOperand(2).getReg()}); 981 auto RHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy}, 982 {MI.getOperand(3).getReg()}); 983 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO 984 ? TargetOpcode::G_ADD 985 : TargetOpcode::G_SUB; 986 // Do the arithmetic in the larger type. 987 auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext}); 988 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg()); 989 APInt Mask = APInt::getAllOnesValue(OrigTy.getSizeInBits()); 990 auto AndOp = MIRBuilder.buildInstr( 991 TargetOpcode::G_AND, {WideTy}, 992 {NewOp, MIRBuilder.buildConstant(WideTy, Mask.getZExtValue())}); 993 // There is no overflow if the AndOp is the same as NewOp. 994 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1).getReg(), NewOp, 995 AndOp); 996 // Now trunc the NewOp to the original result. 997 MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), NewOp); 998 MI.eraseFromParent(); 999 return Legalized; 1000 } 1001 case TargetOpcode::G_CTTZ: 1002 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 1003 case TargetOpcode::G_CTLZ: 1004 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 1005 case TargetOpcode::G_CTPOP: { 1006 if (TypeIdx == 0) { 1007 Observer.changingInstr(MI); 1008 widenScalarDst(MI, WideTy, 0); 1009 Observer.changedInstr(MI); 1010 return Legalized; 1011 } 1012 1013 unsigned SrcReg = MI.getOperand(1).getReg(); 1014 1015 // First ZEXT the input. 1016 auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg); 1017 LLT CurTy = MRI.getType(SrcReg); 1018 if (MI.getOpcode() == TargetOpcode::G_CTTZ) { 1019 // The count is the same in the larger type except if the original 1020 // value was zero. This can be handled by setting the bit just off 1021 // the top of the original type. 1022 auto TopBit = 1023 APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits()); 1024 MIBSrc = MIRBuilder.buildOr( 1025 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit)); 1026 } 1027 1028 // Perform the operation at the larger size. 1029 auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc}); 1030 // This is already the correct result for CTPOP and CTTZs 1031 if (MI.getOpcode() == TargetOpcode::G_CTLZ || 1032 MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) { 1033 // The correct result is NewOp - (Difference in widety and current ty). 1034 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits(); 1035 MIBNewOp = MIRBuilder.buildInstr( 1036 TargetOpcode::G_SUB, {WideTy}, 1037 {MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)}); 1038 } 1039 1040 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp); 1041 MI.eraseFromParent(); 1042 return Legalized; 1043 } 1044 case TargetOpcode::G_BSWAP: { 1045 Observer.changingInstr(MI); 1046 unsigned DstReg = MI.getOperand(0).getReg(); 1047 1048 unsigned ShrReg = MRI.createGenericVirtualRegister(WideTy); 1049 unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); 1050 unsigned ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy); 1051 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1052 1053 MI.getOperand(0).setReg(DstExt); 1054 1055 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 1056 1057 LLT Ty = MRI.getType(DstReg); 1058 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits(); 1059 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits); 1060 MIRBuilder.buildInstr(TargetOpcode::G_LSHR) 1061 .addDef(ShrReg) 1062 .addUse(DstExt) 1063 .addUse(ShiftAmtReg); 1064 1065 MIRBuilder.buildTrunc(DstReg, ShrReg); 1066 Observer.changedInstr(MI); 1067 return Legalized; 1068 } 1069 case TargetOpcode::G_ADD: 1070 case TargetOpcode::G_AND: 1071 case TargetOpcode::G_MUL: 1072 case TargetOpcode::G_OR: 1073 case TargetOpcode::G_XOR: 1074 case TargetOpcode::G_SUB: 1075 // Perform operation at larger width (any extension is fines here, high bits 1076 // don't affect the result) and then truncate the result back to the 1077 // original type. 1078 Observer.changingInstr(MI); 1079 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1080 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); 1081 widenScalarDst(MI, WideTy); 1082 Observer.changedInstr(MI); 1083 return Legalized; 1084 1085 case TargetOpcode::G_SHL: 1086 Observer.changingInstr(MI); 1087 1088 if (TypeIdx == 0) { 1089 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1090 widenScalarDst(MI, WideTy); 1091 } else { 1092 assert(TypeIdx == 1); 1093 // The "number of bits to shift" operand must preserve its value as an 1094 // unsigned integer: 1095 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 1096 } 1097 1098 Observer.changedInstr(MI); 1099 return Legalized; 1100 1101 case TargetOpcode::G_SDIV: 1102 case TargetOpcode::G_SREM: 1103 Observer.changingInstr(MI); 1104 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); 1105 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 1106 widenScalarDst(MI, WideTy); 1107 Observer.changedInstr(MI); 1108 return Legalized; 1109 1110 case TargetOpcode::G_ASHR: 1111 case TargetOpcode::G_LSHR: 1112 Observer.changingInstr(MI); 1113 1114 if (TypeIdx == 0) { 1115 unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ? 1116 TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT; 1117 1118 widenScalarSrc(MI, WideTy, 1, CvtOp); 1119 widenScalarDst(MI, WideTy); 1120 } else { 1121 assert(TypeIdx == 1); 1122 // The "number of bits to shift" operand must preserve its value as an 1123 // unsigned integer: 1124 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 1125 } 1126 1127 Observer.changedInstr(MI); 1128 return Legalized; 1129 case TargetOpcode::G_UDIV: 1130 case TargetOpcode::G_UREM: 1131 Observer.changingInstr(MI); 1132 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 1133 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 1134 widenScalarDst(MI, WideTy); 1135 Observer.changedInstr(MI); 1136 return Legalized; 1137 1138 case TargetOpcode::G_SELECT: 1139 Observer.changingInstr(MI); 1140 if (TypeIdx == 0) { 1141 // Perform operation at larger width (any extension is fine here, high 1142 // bits don't affect the result) and then truncate the result back to the 1143 // original type. 1144 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); 1145 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT); 1146 widenScalarDst(MI, WideTy); 1147 } else { 1148 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector(); 1149 // Explicit extension is required here since high bits affect the result. 1150 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false)); 1151 } 1152 Observer.changedInstr(MI); 1153 return Legalized; 1154 1155 case TargetOpcode::G_FPTOSI: 1156 case TargetOpcode::G_FPTOUI: 1157 if (TypeIdx != 0) 1158 return UnableToLegalize; 1159 Observer.changingInstr(MI); 1160 widenScalarDst(MI, WideTy); 1161 Observer.changedInstr(MI); 1162 return Legalized; 1163 1164 case TargetOpcode::G_SITOFP: 1165 if (TypeIdx != 1) 1166 return UnableToLegalize; 1167 Observer.changingInstr(MI); 1168 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); 1169 Observer.changedInstr(MI); 1170 return Legalized; 1171 1172 case TargetOpcode::G_UITOFP: 1173 if (TypeIdx != 1) 1174 return UnableToLegalize; 1175 Observer.changingInstr(MI); 1176 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 1177 Observer.changedInstr(MI); 1178 return Legalized; 1179 1180 case TargetOpcode::G_LOAD: 1181 case TargetOpcode::G_SEXTLOAD: 1182 case TargetOpcode::G_ZEXTLOAD: 1183 Observer.changingInstr(MI); 1184 widenScalarDst(MI, WideTy); 1185 Observer.changedInstr(MI); 1186 return Legalized; 1187 1188 case TargetOpcode::G_STORE: { 1189 if (TypeIdx != 0) 1190 return UnableToLegalize; 1191 1192 LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 1193 if (!isPowerOf2_32(Ty.getSizeInBits())) 1194 return UnableToLegalize; 1195 1196 Observer.changingInstr(MI); 1197 1198 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ? 1199 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT; 1200 widenScalarSrc(MI, WideTy, 0, ExtType); 1201 1202 Observer.changedInstr(MI); 1203 return Legalized; 1204 } 1205 case TargetOpcode::G_CONSTANT: { 1206 MachineOperand &SrcMO = MI.getOperand(1); 1207 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 1208 const APInt &Val = SrcMO.getCImm()->getValue().sext(WideTy.getSizeInBits()); 1209 Observer.changingInstr(MI); 1210 SrcMO.setCImm(ConstantInt::get(Ctx, Val)); 1211 1212 widenScalarDst(MI, WideTy); 1213 Observer.changedInstr(MI); 1214 return Legalized; 1215 } 1216 case TargetOpcode::G_FCONSTANT: { 1217 MachineOperand &SrcMO = MI.getOperand(1); 1218 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 1219 APFloat Val = SrcMO.getFPImm()->getValueAPF(); 1220 bool LosesInfo; 1221 switch (WideTy.getSizeInBits()) { 1222 case 32: 1223 Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, 1224 &LosesInfo); 1225 break; 1226 case 64: 1227 Val.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, 1228 &LosesInfo); 1229 break; 1230 default: 1231 return UnableToLegalize; 1232 } 1233 1234 assert(!LosesInfo && "extend should always be lossless"); 1235 1236 Observer.changingInstr(MI); 1237 SrcMO.setFPImm(ConstantFP::get(Ctx, Val)); 1238 1239 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); 1240 Observer.changedInstr(MI); 1241 return Legalized; 1242 } 1243 case TargetOpcode::G_IMPLICIT_DEF: { 1244 Observer.changingInstr(MI); 1245 widenScalarDst(MI, WideTy); 1246 Observer.changedInstr(MI); 1247 return Legalized; 1248 } 1249 case TargetOpcode::G_BRCOND: 1250 Observer.changingInstr(MI); 1251 widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false)); 1252 Observer.changedInstr(MI); 1253 return Legalized; 1254 1255 case TargetOpcode::G_FCMP: 1256 Observer.changingInstr(MI); 1257 if (TypeIdx == 0) 1258 widenScalarDst(MI, WideTy); 1259 else { 1260 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT); 1261 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT); 1262 } 1263 Observer.changedInstr(MI); 1264 return Legalized; 1265 1266 case TargetOpcode::G_ICMP: 1267 Observer.changingInstr(MI); 1268 if (TypeIdx == 0) 1269 widenScalarDst(MI, WideTy); 1270 else { 1271 unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>( 1272 MI.getOperand(1).getPredicate())) 1273 ? TargetOpcode::G_SEXT 1274 : TargetOpcode::G_ZEXT; 1275 widenScalarSrc(MI, WideTy, 2, ExtOpcode); 1276 widenScalarSrc(MI, WideTy, 3, ExtOpcode); 1277 } 1278 Observer.changedInstr(MI); 1279 return Legalized; 1280 1281 case TargetOpcode::G_GEP: 1282 assert(TypeIdx == 1 && "unable to legalize pointer of GEP"); 1283 Observer.changingInstr(MI); 1284 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 1285 Observer.changedInstr(MI); 1286 return Legalized; 1287 1288 case TargetOpcode::G_PHI: { 1289 assert(TypeIdx == 0 && "Expecting only Idx 0"); 1290 1291 Observer.changingInstr(MI); 1292 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) { 1293 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB(); 1294 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); 1295 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT); 1296 } 1297 1298 MachineBasicBlock &MBB = *MI.getParent(); 1299 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI()); 1300 widenScalarDst(MI, WideTy); 1301 Observer.changedInstr(MI); 1302 return Legalized; 1303 } 1304 case TargetOpcode::G_EXTRACT_VECTOR_ELT: { 1305 if (TypeIdx == 0) { 1306 unsigned VecReg = MI.getOperand(1).getReg(); 1307 LLT VecTy = MRI.getType(VecReg); 1308 Observer.changingInstr(MI); 1309 1310 widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(), 1311 WideTy.getSizeInBits()), 1312 1, TargetOpcode::G_SEXT); 1313 1314 widenScalarDst(MI, WideTy, 0); 1315 Observer.changedInstr(MI); 1316 return Legalized; 1317 } 1318 1319 if (TypeIdx != 2) 1320 return UnableToLegalize; 1321 Observer.changingInstr(MI); 1322 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 1323 Observer.changedInstr(MI); 1324 return Legalized; 1325 } 1326 case TargetOpcode::G_FADD: 1327 case TargetOpcode::G_FMUL: 1328 case TargetOpcode::G_FSUB: 1329 case TargetOpcode::G_FMA: 1330 case TargetOpcode::G_FNEG: 1331 case TargetOpcode::G_FABS: 1332 case TargetOpcode::G_FCANONICALIZE: 1333 case TargetOpcode::G_FDIV: 1334 case TargetOpcode::G_FREM: 1335 case TargetOpcode::G_FCEIL: 1336 case TargetOpcode::G_FFLOOR: 1337 case TargetOpcode::G_FCOS: 1338 case TargetOpcode::G_FSIN: 1339 case TargetOpcode::G_FLOG10: 1340 case TargetOpcode::G_FLOG: 1341 case TargetOpcode::G_FLOG2: 1342 case TargetOpcode::G_FSQRT: 1343 case TargetOpcode::G_FEXP: 1344 assert(TypeIdx == 0); 1345 Observer.changingInstr(MI); 1346 1347 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) 1348 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT); 1349 1350 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); 1351 Observer.changedInstr(MI); 1352 return Legalized; 1353 case TargetOpcode::G_INTTOPTR: 1354 if (TypeIdx != 1) 1355 return UnableToLegalize; 1356 1357 Observer.changingInstr(MI); 1358 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 1359 Observer.changedInstr(MI); 1360 return Legalized; 1361 case TargetOpcode::G_PTRTOINT: 1362 if (TypeIdx != 0) 1363 return UnableToLegalize; 1364 1365 Observer.changingInstr(MI); 1366 widenScalarDst(MI, WideTy, 0); 1367 Observer.changedInstr(MI); 1368 return Legalized; 1369 } 1370 } 1371 1372 LegalizerHelper::LegalizeResult 1373 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { 1374 using namespace TargetOpcode; 1375 MIRBuilder.setInstr(MI); 1376 1377 switch(MI.getOpcode()) { 1378 default: 1379 return UnableToLegalize; 1380 case TargetOpcode::G_SREM: 1381 case TargetOpcode::G_UREM: { 1382 unsigned QuotReg = MRI.createGenericVirtualRegister(Ty); 1383 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV) 1384 .addDef(QuotReg) 1385 .addUse(MI.getOperand(1).getReg()) 1386 .addUse(MI.getOperand(2).getReg()); 1387 1388 unsigned ProdReg = MRI.createGenericVirtualRegister(Ty); 1389 MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg()); 1390 MIRBuilder.buildSub(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(), 1391 ProdReg); 1392 MI.eraseFromParent(); 1393 return Legalized; 1394 } 1395 case TargetOpcode::G_SMULO: 1396 case TargetOpcode::G_UMULO: { 1397 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the 1398 // result. 1399 unsigned Res = MI.getOperand(0).getReg(); 1400 unsigned Overflow = MI.getOperand(1).getReg(); 1401 unsigned LHS = MI.getOperand(2).getReg(); 1402 unsigned RHS = MI.getOperand(3).getReg(); 1403 1404 MIRBuilder.buildMul(Res, LHS, RHS); 1405 1406 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO 1407 ? TargetOpcode::G_SMULH 1408 : TargetOpcode::G_UMULH; 1409 1410 unsigned HiPart = MRI.createGenericVirtualRegister(Ty); 1411 MIRBuilder.buildInstr(Opcode) 1412 .addDef(HiPart) 1413 .addUse(LHS) 1414 .addUse(RHS); 1415 1416 unsigned Zero = MRI.createGenericVirtualRegister(Ty); 1417 MIRBuilder.buildConstant(Zero, 0); 1418 1419 // For *signed* multiply, overflow is detected by checking: 1420 // (hi != (lo >> bitwidth-1)) 1421 if (Opcode == TargetOpcode::G_SMULH) { 1422 unsigned Shifted = MRI.createGenericVirtualRegister(Ty); 1423 unsigned ShiftAmt = MRI.createGenericVirtualRegister(Ty); 1424 MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1); 1425 MIRBuilder.buildInstr(TargetOpcode::G_ASHR) 1426 .addDef(Shifted) 1427 .addUse(Res) 1428 .addUse(ShiftAmt); 1429 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted); 1430 } else { 1431 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero); 1432 } 1433 MI.eraseFromParent(); 1434 return Legalized; 1435 } 1436 case TargetOpcode::G_FNEG: { 1437 // TODO: Handle vector types once we are able to 1438 // represent them. 1439 if (Ty.isVector()) 1440 return UnableToLegalize; 1441 unsigned Res = MI.getOperand(0).getReg(); 1442 Type *ZeroTy; 1443 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 1444 switch (Ty.getSizeInBits()) { 1445 case 16: 1446 ZeroTy = Type::getHalfTy(Ctx); 1447 break; 1448 case 32: 1449 ZeroTy = Type::getFloatTy(Ctx); 1450 break; 1451 case 64: 1452 ZeroTy = Type::getDoubleTy(Ctx); 1453 break; 1454 case 128: 1455 ZeroTy = Type::getFP128Ty(Ctx); 1456 break; 1457 default: 1458 llvm_unreachable("unexpected floating-point type"); 1459 } 1460 ConstantFP &ZeroForNegation = 1461 *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy)); 1462 auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation); 1463 MIRBuilder.buildInstr(TargetOpcode::G_FSUB) 1464 .addDef(Res) 1465 .addUse(Zero->getOperand(0).getReg()) 1466 .addUse(MI.getOperand(1).getReg()); 1467 MI.eraseFromParent(); 1468 return Legalized; 1469 } 1470 case TargetOpcode::G_FSUB: { 1471 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)). 1472 // First, check if G_FNEG is marked as Lower. If so, we may 1473 // end up with an infinite loop as G_FSUB is used to legalize G_FNEG. 1474 if (LI.getAction({G_FNEG, {Ty}}).Action == Lower) 1475 return UnableToLegalize; 1476 unsigned Res = MI.getOperand(0).getReg(); 1477 unsigned LHS = MI.getOperand(1).getReg(); 1478 unsigned RHS = MI.getOperand(2).getReg(); 1479 unsigned Neg = MRI.createGenericVirtualRegister(Ty); 1480 MIRBuilder.buildInstr(TargetOpcode::G_FNEG).addDef(Neg).addUse(RHS); 1481 MIRBuilder.buildInstr(TargetOpcode::G_FADD) 1482 .addDef(Res) 1483 .addUse(LHS) 1484 .addUse(Neg); 1485 MI.eraseFromParent(); 1486 return Legalized; 1487 } 1488 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: { 1489 unsigned OldValRes = MI.getOperand(0).getReg(); 1490 unsigned SuccessRes = MI.getOperand(1).getReg(); 1491 unsigned Addr = MI.getOperand(2).getReg(); 1492 unsigned CmpVal = MI.getOperand(3).getReg(); 1493 unsigned NewVal = MI.getOperand(4).getReg(); 1494 MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal, 1495 **MI.memoperands_begin()); 1496 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal); 1497 MI.eraseFromParent(); 1498 return Legalized; 1499 } 1500 case TargetOpcode::G_LOAD: 1501 case TargetOpcode::G_SEXTLOAD: 1502 case TargetOpcode::G_ZEXTLOAD: { 1503 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT 1504 unsigned DstReg = MI.getOperand(0).getReg(); 1505 unsigned PtrReg = MI.getOperand(1).getReg(); 1506 LLT DstTy = MRI.getType(DstReg); 1507 auto &MMO = **MI.memoperands_begin(); 1508 1509 if (DstTy.getSizeInBits() == MMO.getSize() /* in bytes */ * 8) { 1510 // In the case of G_LOAD, this was a non-extending load already and we're 1511 // about to lower to the same instruction. 1512 if (MI.getOpcode() == TargetOpcode::G_LOAD) 1513 return UnableToLegalize; 1514 MIRBuilder.buildLoad(DstReg, PtrReg, MMO); 1515 MI.eraseFromParent(); 1516 return Legalized; 1517 } 1518 1519 if (DstTy.isScalar()) { 1520 unsigned TmpReg = MRI.createGenericVirtualRegister( 1521 LLT::scalar(MMO.getSize() /* in bytes */ * 8)); 1522 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); 1523 switch (MI.getOpcode()) { 1524 default: 1525 llvm_unreachable("Unexpected opcode"); 1526 case TargetOpcode::G_LOAD: 1527 MIRBuilder.buildAnyExt(DstReg, TmpReg); 1528 break; 1529 case TargetOpcode::G_SEXTLOAD: 1530 MIRBuilder.buildSExt(DstReg, TmpReg); 1531 break; 1532 case TargetOpcode::G_ZEXTLOAD: 1533 MIRBuilder.buildZExt(DstReg, TmpReg); 1534 break; 1535 } 1536 MI.eraseFromParent(); 1537 return Legalized; 1538 } 1539 1540 return UnableToLegalize; 1541 } 1542 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 1543 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 1544 case TargetOpcode::G_CTLZ: 1545 case TargetOpcode::G_CTTZ: 1546 case TargetOpcode::G_CTPOP: 1547 return lowerBitCount(MI, TypeIdx, Ty); 1548 case G_UADDO: { 1549 unsigned Res = MI.getOperand(0).getReg(); 1550 unsigned CarryOut = MI.getOperand(1).getReg(); 1551 unsigned LHS = MI.getOperand(2).getReg(); 1552 unsigned RHS = MI.getOperand(3).getReg(); 1553 1554 MIRBuilder.buildAdd(Res, LHS, RHS); 1555 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS); 1556 1557 MI.eraseFromParent(); 1558 return Legalized; 1559 } 1560 case G_UADDE: { 1561 unsigned Res = MI.getOperand(0).getReg(); 1562 unsigned CarryOut = MI.getOperand(1).getReg(); 1563 unsigned LHS = MI.getOperand(2).getReg(); 1564 unsigned RHS = MI.getOperand(3).getReg(); 1565 unsigned CarryIn = MI.getOperand(4).getReg(); 1566 1567 unsigned TmpRes = MRI.createGenericVirtualRegister(Ty); 1568 unsigned ZExtCarryIn = MRI.createGenericVirtualRegister(Ty); 1569 1570 MIRBuilder.buildAdd(TmpRes, LHS, RHS); 1571 MIRBuilder.buildZExt(ZExtCarryIn, CarryIn); 1572 MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn); 1573 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS); 1574 1575 MI.eraseFromParent(); 1576 return Legalized; 1577 } 1578 case G_USUBO: { 1579 unsigned Res = MI.getOperand(0).getReg(); 1580 unsigned BorrowOut = MI.getOperand(1).getReg(); 1581 unsigned LHS = MI.getOperand(2).getReg(); 1582 unsigned RHS = MI.getOperand(3).getReg(); 1583 1584 MIRBuilder.buildSub(Res, LHS, RHS); 1585 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS); 1586 1587 MI.eraseFromParent(); 1588 return Legalized; 1589 } 1590 case G_USUBE: { 1591 unsigned Res = MI.getOperand(0).getReg(); 1592 unsigned BorrowOut = MI.getOperand(1).getReg(); 1593 unsigned LHS = MI.getOperand(2).getReg(); 1594 unsigned RHS = MI.getOperand(3).getReg(); 1595 unsigned BorrowIn = MI.getOperand(4).getReg(); 1596 1597 unsigned TmpRes = MRI.createGenericVirtualRegister(Ty); 1598 unsigned ZExtBorrowIn = MRI.createGenericVirtualRegister(Ty); 1599 unsigned LHS_EQ_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1)); 1600 unsigned LHS_ULT_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1)); 1601 1602 MIRBuilder.buildSub(TmpRes, LHS, RHS); 1603 MIRBuilder.buildZExt(ZExtBorrowIn, BorrowIn); 1604 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn); 1605 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LHS_EQ_RHS, LHS, RHS); 1606 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, LHS_ULT_RHS, LHS, RHS); 1607 MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS); 1608 1609 MI.eraseFromParent(); 1610 return Legalized; 1611 } 1612 } 1613 } 1614 1615 LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef( 1616 MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { 1617 SmallVector<unsigned, 2> DstRegs; 1618 1619 unsigned NarrowSize = NarrowTy.getSizeInBits(); 1620 unsigned DstReg = MI.getOperand(0).getReg(); 1621 unsigned Size = MRI.getType(DstReg).getSizeInBits(); 1622 int NumParts = Size / NarrowSize; 1623 // FIXME: Don't know how to handle the situation where the small vectors 1624 // aren't all the same size yet. 1625 if (Size % NarrowSize != 0) 1626 return UnableToLegalize; 1627 1628 for (int i = 0; i < NumParts; ++i) { 1629 unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy); 1630 MIRBuilder.buildUndef(TmpReg); 1631 DstRegs.push_back(TmpReg); 1632 } 1633 1634 if (NarrowTy.isVector()) 1635 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 1636 else 1637 MIRBuilder.buildBuildVector(DstReg, DstRegs); 1638 1639 MI.eraseFromParent(); 1640 return Legalized; 1641 } 1642 1643 LegalizerHelper::LegalizeResult 1644 LegalizerHelper::fewerElementsVectorBasic(MachineInstr &MI, unsigned TypeIdx, 1645 LLT NarrowTy) { 1646 const unsigned Opc = MI.getOpcode(); 1647 const unsigned NumOps = MI.getNumOperands() - 1; 1648 const unsigned NarrowSize = NarrowTy.getSizeInBits(); 1649 const unsigned DstReg = MI.getOperand(0).getReg(); 1650 const unsigned Flags = MI.getFlags(); 1651 const LLT DstTy = MRI.getType(DstReg); 1652 const unsigned Size = DstTy.getSizeInBits(); 1653 const int NumParts = Size / NarrowSize; 1654 const LLT EltTy = DstTy.getElementType(); 1655 const unsigned EltSize = EltTy.getSizeInBits(); 1656 const unsigned BitsForNumParts = NarrowSize * NumParts; 1657 1658 // Check if we have any leftovers. If we do, then only handle the case where 1659 // the leftover is one element. 1660 if (BitsForNumParts != Size && BitsForNumParts + EltSize != Size) 1661 return UnableToLegalize; 1662 1663 if (BitsForNumParts != Size) { 1664 unsigned AccumDstReg = MRI.createGenericVirtualRegister(DstTy); 1665 MIRBuilder.buildUndef(AccumDstReg); 1666 1667 // Handle the pieces which evenly divide into the requested type with 1668 // extract/op/insert sequence. 1669 for (unsigned Offset = 0; Offset < BitsForNumParts; Offset += NarrowSize) { 1670 SmallVector<SrcOp, 4> SrcOps; 1671 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { 1672 unsigned PartOpReg = MRI.createGenericVirtualRegister(NarrowTy); 1673 MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(), Offset); 1674 SrcOps.push_back(PartOpReg); 1675 } 1676 1677 unsigned PartDstReg = MRI.createGenericVirtualRegister(NarrowTy); 1678 MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags); 1679 1680 unsigned PartInsertReg = MRI.createGenericVirtualRegister(DstTy); 1681 MIRBuilder.buildInsert(PartInsertReg, AccumDstReg, PartDstReg, Offset); 1682 AccumDstReg = PartInsertReg; 1683 } 1684 1685 // Handle the remaining element sized leftover piece. 1686 SmallVector<SrcOp, 4> SrcOps; 1687 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { 1688 unsigned PartOpReg = MRI.createGenericVirtualRegister(EltTy); 1689 MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(), 1690 BitsForNumParts); 1691 SrcOps.push_back(PartOpReg); 1692 } 1693 1694 unsigned PartDstReg = MRI.createGenericVirtualRegister(EltTy); 1695 MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags); 1696 MIRBuilder.buildInsert(DstReg, AccumDstReg, PartDstReg, BitsForNumParts); 1697 MI.eraseFromParent(); 1698 1699 return Legalized; 1700 } 1701 1702 SmallVector<unsigned, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs; 1703 1704 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src0Regs); 1705 1706 if (NumOps >= 2) 1707 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src1Regs); 1708 1709 if (NumOps >= 3) 1710 extractParts(MI.getOperand(3).getReg(), NarrowTy, NumParts, Src2Regs); 1711 1712 for (int i = 0; i < NumParts; ++i) { 1713 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 1714 1715 if (NumOps == 1) 1716 MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i]}, Flags); 1717 else if (NumOps == 2) { 1718 MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i], Src1Regs[i]}, Flags); 1719 } else if (NumOps == 3) { 1720 MIRBuilder.buildInstr(Opc, {DstReg}, 1721 {Src0Regs[i], Src1Regs[i], Src2Regs[i]}, Flags); 1722 } 1723 1724 DstRegs.push_back(DstReg); 1725 } 1726 1727 if (NarrowTy.isVector()) 1728 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 1729 else 1730 MIRBuilder.buildBuildVector(DstReg, DstRegs); 1731 1732 MI.eraseFromParent(); 1733 return Legalized; 1734 } 1735 1736 // Handle splitting vector operations which need to have the same number of 1737 // elements in each type index, but each type index may have a different element 1738 // type. 1739 // 1740 // e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> -> 1741 // <2 x s64> = G_SHL <2 x s64>, <2 x s32> 1742 // <2 x s64> = G_SHL <2 x s64>, <2 x s32> 1743 // 1744 // Also handles some irregular breakdown cases, e.g. 1745 // e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> -> 1746 // <2 x s64> = G_SHL <2 x s64>, <2 x s32> 1747 // s64 = G_SHL s64, s32 1748 LegalizerHelper::LegalizeResult 1749 LegalizerHelper::fewerElementsVectorMultiEltType( 1750 MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) { 1751 if (TypeIdx != 0) 1752 return UnableToLegalize; 1753 1754 const LLT NarrowTy0 = NarrowTyArg; 1755 const unsigned NewNumElts = 1756 NarrowTy0.isVector() ? NarrowTy0.getNumElements() : 1; 1757 1758 const unsigned DstReg = MI.getOperand(0).getReg(); 1759 LLT DstTy = MRI.getType(DstReg); 1760 LLT LeftoverTy0; 1761 1762 // All of the operands need to have the same number of elements, so if we can 1763 // determine a type breakdown for the result type, we can for all of the 1764 // source types. 1765 int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0); 1766 if (NumParts < 0) 1767 return UnableToLegalize; 1768 1769 SmallVector<MachineInstrBuilder, 4> NewInsts; 1770 1771 SmallVector<unsigned, 4> DstRegs, LeftoverDstRegs; 1772 SmallVector<unsigned, 4> PartRegs, LeftoverRegs; 1773 1774 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { 1775 LLT LeftoverTy; 1776 unsigned SrcReg = MI.getOperand(I).getReg(); 1777 LLT SrcTyI = MRI.getType(SrcReg); 1778 LLT NarrowTyI = LLT::scalarOrVector(NewNumElts, SrcTyI.getScalarType()); 1779 LLT LeftoverTyI; 1780 1781 // Split this operand into the requested typed registers, and any leftover 1782 // required to reproduce the original type. 1783 if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs, 1784 LeftoverRegs)) 1785 return UnableToLegalize; 1786 1787 if (I == 1) { 1788 // For the first operand, create an instruction for each part and setup 1789 // the result. 1790 for (unsigned PartReg : PartRegs) { 1791 unsigned PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0); 1792 NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode()) 1793 .addDef(PartDstReg) 1794 .addUse(PartReg)); 1795 DstRegs.push_back(PartDstReg); 1796 } 1797 1798 for (unsigned LeftoverReg : LeftoverRegs) { 1799 unsigned PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0); 1800 NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode()) 1801 .addDef(PartDstReg) 1802 .addUse(LeftoverReg)); 1803 LeftoverDstRegs.push_back(PartDstReg); 1804 } 1805 } else { 1806 assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size()); 1807 1808 // Add the newly created operand splits to the existing instructions. The 1809 // odd-sized pieces are ordered after the requested NarrowTyArg sized 1810 // pieces. 1811 unsigned InstCount = 0; 1812 for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J) 1813 NewInsts[InstCount++].addUse(PartRegs[J]); 1814 for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J) 1815 NewInsts[InstCount++].addUse(LeftoverRegs[J]); 1816 } 1817 1818 PartRegs.clear(); 1819 LeftoverRegs.clear(); 1820 } 1821 1822 // Insert the newly built operations and rebuild the result register. 1823 for (auto &MIB : NewInsts) 1824 MIRBuilder.insertInstr(MIB); 1825 1826 insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs); 1827 1828 MI.eraseFromParent(); 1829 return Legalized; 1830 } 1831 1832 LegalizerHelper::LegalizeResult 1833 LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx, 1834 LLT NarrowTy) { 1835 if (TypeIdx != 0) 1836 return UnableToLegalize; 1837 1838 unsigned DstReg = MI.getOperand(0).getReg(); 1839 unsigned SrcReg = MI.getOperand(1).getReg(); 1840 LLT DstTy = MRI.getType(DstReg); 1841 LLT SrcTy = MRI.getType(SrcReg); 1842 1843 LLT NarrowTy0 = NarrowTy; 1844 LLT NarrowTy1; 1845 unsigned NumParts; 1846 1847 if (NarrowTy.isVector()) { 1848 // Uneven breakdown not handled. 1849 NumParts = DstTy.getNumElements() / NarrowTy.getNumElements(); 1850 if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements()) 1851 return UnableToLegalize; 1852 1853 NarrowTy1 = LLT::vector(NumParts, SrcTy.getElementType().getSizeInBits()); 1854 } else { 1855 NumParts = DstTy.getNumElements(); 1856 NarrowTy1 = SrcTy.getElementType(); 1857 } 1858 1859 SmallVector<unsigned, 4> SrcRegs, DstRegs; 1860 extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs); 1861 1862 for (unsigned I = 0; I < NumParts; ++I) { 1863 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy0); 1864 MachineInstr *NewInst = MIRBuilder.buildInstr(MI.getOpcode()) 1865 .addDef(DstReg) 1866 .addUse(SrcRegs[I]); 1867 1868 NewInst->setFlags(MI.getFlags()); 1869 DstRegs.push_back(DstReg); 1870 } 1871 1872 if (NarrowTy.isVector()) 1873 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 1874 else 1875 MIRBuilder.buildBuildVector(DstReg, DstRegs); 1876 1877 MI.eraseFromParent(); 1878 return Legalized; 1879 } 1880 1881 LegalizerHelper::LegalizeResult 1882 LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx, 1883 LLT NarrowTy) { 1884 unsigned DstReg = MI.getOperand(0).getReg(); 1885 unsigned Src0Reg = MI.getOperand(2).getReg(); 1886 LLT DstTy = MRI.getType(DstReg); 1887 LLT SrcTy = MRI.getType(Src0Reg); 1888 1889 unsigned NumParts; 1890 LLT NarrowTy0, NarrowTy1; 1891 1892 if (TypeIdx == 0) { 1893 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; 1894 unsigned OldElts = DstTy.getNumElements(); 1895 1896 NarrowTy0 = NarrowTy; 1897 NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements(); 1898 NarrowTy1 = NarrowTy.isVector() ? 1899 LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) : 1900 SrcTy.getElementType(); 1901 1902 } else { 1903 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; 1904 unsigned OldElts = SrcTy.getNumElements(); 1905 1906 NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : 1907 NarrowTy.getNumElements(); 1908 NarrowTy0 = LLT::vector(NarrowTy.getNumElements(), 1909 DstTy.getScalarSizeInBits()); 1910 NarrowTy1 = NarrowTy; 1911 } 1912 1913 // FIXME: Don't know how to handle the situation where the small vectors 1914 // aren't all the same size yet. 1915 if (NarrowTy1.isVector() && 1916 NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements()) 1917 return UnableToLegalize; 1918 1919 CmpInst::Predicate Pred 1920 = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); 1921 1922 SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs; 1923 extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs); 1924 extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs); 1925 1926 for (unsigned I = 0; I < NumParts; ++I) { 1927 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy0); 1928 DstRegs.push_back(DstReg); 1929 1930 if (MI.getOpcode() == TargetOpcode::G_ICMP) 1931 MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]); 1932 else { 1933 MachineInstr *NewCmp 1934 = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]); 1935 NewCmp->setFlags(MI.getFlags()); 1936 } 1937 } 1938 1939 if (NarrowTy1.isVector()) 1940 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 1941 else 1942 MIRBuilder.buildBuildVector(DstReg, DstRegs); 1943 1944 MI.eraseFromParent(); 1945 return Legalized; 1946 } 1947 1948 LegalizerHelper::LegalizeResult 1949 LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx, 1950 LLT NarrowTy) { 1951 unsigned DstReg = MI.getOperand(0).getReg(); 1952 unsigned CondReg = MI.getOperand(1).getReg(); 1953 1954 unsigned NumParts = 0; 1955 LLT NarrowTy0, NarrowTy1; 1956 1957 LLT DstTy = MRI.getType(DstReg); 1958 LLT CondTy = MRI.getType(CondReg); 1959 unsigned Size = DstTy.getSizeInBits(); 1960 1961 assert(TypeIdx == 0 || CondTy.isVector()); 1962 1963 if (TypeIdx == 0) { 1964 NarrowTy0 = NarrowTy; 1965 NarrowTy1 = CondTy; 1966 1967 unsigned NarrowSize = NarrowTy0.getSizeInBits(); 1968 // FIXME: Don't know how to handle the situation where the small vectors 1969 // aren't all the same size yet. 1970 if (Size % NarrowSize != 0) 1971 return UnableToLegalize; 1972 1973 NumParts = Size / NarrowSize; 1974 1975 // Need to break down the condition type 1976 if (CondTy.isVector()) { 1977 if (CondTy.getNumElements() == NumParts) 1978 NarrowTy1 = CondTy.getElementType(); 1979 else 1980 NarrowTy1 = LLT::vector(CondTy.getNumElements() / NumParts, 1981 CondTy.getScalarSizeInBits()); 1982 } 1983 } else { 1984 NumParts = CondTy.getNumElements(); 1985 if (NarrowTy.isVector()) { 1986 // TODO: Handle uneven breakdown. 1987 if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements()) 1988 return UnableToLegalize; 1989 1990 return UnableToLegalize; 1991 } else { 1992 NarrowTy0 = DstTy.getElementType(); 1993 NarrowTy1 = NarrowTy; 1994 } 1995 } 1996 1997 SmallVector<unsigned, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs; 1998 if (CondTy.isVector()) 1999 extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs); 2000 2001 extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs); 2002 extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs); 2003 2004 for (unsigned i = 0; i < NumParts; ++i) { 2005 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy0); 2006 MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg, 2007 Src1Regs[i], Src2Regs[i]); 2008 DstRegs.push_back(DstReg); 2009 } 2010 2011 if (NarrowTy0.isVector()) 2012 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 2013 else 2014 MIRBuilder.buildBuildVector(DstReg, DstRegs); 2015 2016 MI.eraseFromParent(); 2017 return Legalized; 2018 } 2019 2020 LegalizerHelper::LegalizeResult 2021 LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, 2022 LLT NarrowTy) { 2023 // FIXME: Don't know how to handle secondary types yet. 2024 if (TypeIdx != 0) 2025 return UnableToLegalize; 2026 2027 MachineMemOperand *MMO = *MI.memoperands_begin(); 2028 2029 // This implementation doesn't work for atomics. Give up instead of doing 2030 // something invalid. 2031 if (MMO->getOrdering() != AtomicOrdering::NotAtomic || 2032 MMO->getFailureOrdering() != AtomicOrdering::NotAtomic) 2033 return UnableToLegalize; 2034 2035 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD; 2036 unsigned ValReg = MI.getOperand(0).getReg(); 2037 unsigned AddrReg = MI.getOperand(1).getReg(); 2038 LLT ValTy = MRI.getType(ValReg); 2039 2040 int NumParts = -1; 2041 LLT LeftoverTy; 2042 SmallVector<unsigned, 8> NarrowRegs, NarrowLeftoverRegs; 2043 if (IsLoad) { 2044 NumParts = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy); 2045 } else { 2046 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs, 2047 NarrowLeftoverRegs)) 2048 NumParts = NarrowRegs.size(); 2049 } 2050 2051 if (NumParts == -1) 2052 return UnableToLegalize; 2053 2054 const LLT OffsetTy = LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits()); 2055 2056 unsigned TotalSize = ValTy.getSizeInBits(); 2057 2058 // Split the load/store into PartTy sized pieces starting at Offset. If this 2059 // is a load, return the new registers in ValRegs. For a store, each elements 2060 // of ValRegs should be PartTy. Returns the next offset that needs to be 2061 // handled. 2062 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<unsigned> &ValRegs, 2063 unsigned Offset) -> unsigned { 2064 MachineFunction &MF = MIRBuilder.getMF(); 2065 unsigned PartSize = PartTy.getSizeInBits(); 2066 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize; 2067 Offset += PartSize, ++Idx) { 2068 unsigned ByteSize = PartSize / 8; 2069 unsigned ByteOffset = Offset / 8; 2070 unsigned NewAddrReg = 0; 2071 2072 MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, ByteOffset); 2073 2074 MachineMemOperand *NewMMO = 2075 MF.getMachineMemOperand(MMO, ByteOffset, ByteSize); 2076 2077 if (IsLoad) { 2078 unsigned Dst = MRI.createGenericVirtualRegister(PartTy); 2079 ValRegs.push_back(Dst); 2080 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO); 2081 } else { 2082 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO); 2083 } 2084 } 2085 2086 return Offset; 2087 }; 2088 2089 unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0); 2090 2091 // Handle the rest of the register if this isn't an even type breakdown. 2092 if (LeftoverTy.isValid()) 2093 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset); 2094 2095 if (IsLoad) { 2096 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs, 2097 LeftoverTy, NarrowLeftoverRegs); 2098 } 2099 2100 MI.eraseFromParent(); 2101 return Legalized; 2102 } 2103 2104 LegalizerHelper::LegalizeResult 2105 LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, 2106 LLT NarrowTy) { 2107 using namespace TargetOpcode; 2108 2109 MIRBuilder.setInstr(MI); 2110 switch (MI.getOpcode()) { 2111 case G_IMPLICIT_DEF: 2112 return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy); 2113 case G_AND: 2114 case G_OR: 2115 case G_XOR: 2116 case G_ADD: 2117 case G_SUB: 2118 case G_MUL: 2119 case G_SMULH: 2120 case G_UMULH: 2121 case G_FADD: 2122 case G_FMUL: 2123 case G_FSUB: 2124 case G_FNEG: 2125 case G_FABS: 2126 case G_FCANONICALIZE: 2127 case G_FDIV: 2128 case G_FREM: 2129 case G_FMA: 2130 case G_FPOW: 2131 case G_FEXP: 2132 case G_FEXP2: 2133 case G_FLOG: 2134 case G_FLOG2: 2135 case G_FLOG10: 2136 case G_FCEIL: 2137 case G_FFLOOR: 2138 case G_INTRINSIC_ROUND: 2139 case G_INTRINSIC_TRUNC: 2140 case G_FCOS: 2141 case G_FSIN: 2142 case G_FSQRT: 2143 case G_BSWAP: 2144 return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy); 2145 case G_SHL: 2146 case G_LSHR: 2147 case G_ASHR: 2148 case G_CTLZ: 2149 case G_CTLZ_ZERO_UNDEF: 2150 case G_CTTZ: 2151 case G_CTTZ_ZERO_UNDEF: 2152 case G_CTPOP: 2153 return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy); 2154 case G_ZEXT: 2155 case G_SEXT: 2156 case G_ANYEXT: 2157 case G_FPEXT: 2158 case G_FPTRUNC: 2159 case G_SITOFP: 2160 case G_UITOFP: 2161 case G_FPTOSI: 2162 case G_FPTOUI: 2163 case G_INTTOPTR: 2164 case G_PTRTOINT: 2165 case G_ADDRSPACE_CAST: 2166 return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy); 2167 case G_ICMP: 2168 case G_FCMP: 2169 return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy); 2170 case G_SELECT: 2171 return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy); 2172 case G_LOAD: 2173 case G_STORE: 2174 return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy); 2175 default: 2176 return UnableToLegalize; 2177 } 2178 } 2179 2180 LegalizerHelper::LegalizeResult 2181 LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, 2182 const LLT HalfTy, const LLT AmtTy) { 2183 2184 unsigned InL = MRI.createGenericVirtualRegister(HalfTy); 2185 unsigned InH = MRI.createGenericVirtualRegister(HalfTy); 2186 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg()); 2187 2188 if (Amt.isNullValue()) { 2189 MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {InL, InH}); 2190 MI.eraseFromParent(); 2191 return Legalized; 2192 } 2193 2194 LLT NVT = HalfTy; 2195 unsigned NVTBits = HalfTy.getSizeInBits(); 2196 unsigned VTBits = 2 * NVTBits; 2197 2198 SrcOp Lo(0), Hi(0); 2199 if (MI.getOpcode() == TargetOpcode::G_SHL) { 2200 if (Amt.ugt(VTBits)) { 2201 Lo = Hi = MIRBuilder.buildConstant(NVT, 0); 2202 } else if (Amt.ugt(NVTBits)) { 2203 Lo = MIRBuilder.buildConstant(NVT, 0); 2204 Hi = MIRBuilder.buildShl(NVT, InL, 2205 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits)); 2206 } else if (Amt == NVTBits) { 2207 Lo = MIRBuilder.buildConstant(NVT, 0); 2208 Hi = InL; 2209 } else { 2210 Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt)); 2211 auto OrLHS = 2212 MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt)); 2213 auto OrRHS = MIRBuilder.buildLShr( 2214 NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits)); 2215 Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS); 2216 } 2217 } else if (MI.getOpcode() == TargetOpcode::G_LSHR) { 2218 if (Amt.ugt(VTBits)) { 2219 Lo = Hi = MIRBuilder.buildConstant(NVT, 0); 2220 } else if (Amt.ugt(NVTBits)) { 2221 Lo = MIRBuilder.buildLShr(NVT, InH, 2222 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits)); 2223 Hi = MIRBuilder.buildConstant(NVT, 0); 2224 } else if (Amt == NVTBits) { 2225 Lo = InH; 2226 Hi = MIRBuilder.buildConstant(NVT, 0); 2227 } else { 2228 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt); 2229 2230 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst); 2231 auto OrRHS = MIRBuilder.buildShl( 2232 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits)); 2233 2234 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS); 2235 Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst); 2236 } 2237 } else { 2238 if (Amt.ugt(VTBits)) { 2239 Hi = Lo = MIRBuilder.buildAShr( 2240 NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1)); 2241 } else if (Amt.ugt(NVTBits)) { 2242 Lo = MIRBuilder.buildAShr(NVT, InH, 2243 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits)); 2244 Hi = MIRBuilder.buildAShr(NVT, InH, 2245 MIRBuilder.buildConstant(AmtTy, NVTBits - 1)); 2246 } else if (Amt == NVTBits) { 2247 Lo = InH; 2248 Hi = MIRBuilder.buildAShr(NVT, InH, 2249 MIRBuilder.buildConstant(AmtTy, NVTBits - 1)); 2250 } else { 2251 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt); 2252 2253 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst); 2254 auto OrRHS = MIRBuilder.buildShl( 2255 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits)); 2256 2257 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS); 2258 Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst); 2259 } 2260 } 2261 2262 MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {Lo.getReg(), Hi.getReg()}); 2263 MI.eraseFromParent(); 2264 2265 return Legalized; 2266 } 2267 2268 // TODO: Optimize if constant shift amount. 2269 LegalizerHelper::LegalizeResult 2270 LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, 2271 LLT RequestedTy) { 2272 if (TypeIdx == 1) { 2273 Observer.changingInstr(MI); 2274 narrowScalarSrc(MI, RequestedTy, 2); 2275 Observer.changedInstr(MI); 2276 return Legalized; 2277 } 2278 2279 unsigned DstReg = MI.getOperand(0).getReg(); 2280 LLT DstTy = MRI.getType(DstReg); 2281 if (DstTy.isVector()) 2282 return UnableToLegalize; 2283 2284 unsigned Amt = MI.getOperand(2).getReg(); 2285 LLT ShiftAmtTy = MRI.getType(Amt); 2286 const unsigned DstEltSize = DstTy.getScalarSizeInBits(); 2287 if (DstEltSize % 2 != 0) 2288 return UnableToLegalize; 2289 2290 // Ignore the input type. We can only go to exactly half the size of the 2291 // input. If that isn't small enough, the resulting pieces will be further 2292 // legalized. 2293 const unsigned NewBitSize = DstEltSize / 2; 2294 const LLT HalfTy = LLT::scalar(NewBitSize); 2295 const LLT CondTy = LLT::scalar(1); 2296 2297 if (const MachineInstr *KShiftAmt = 2298 getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) { 2299 return narrowScalarShiftByConstant( 2300 MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy); 2301 } 2302 2303 // TODO: Expand with known bits. 2304 2305 // Handle the fully general expansion by an unknown amount. 2306 auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize); 2307 2308 unsigned InL = MRI.createGenericVirtualRegister(HalfTy); 2309 unsigned InH = MRI.createGenericVirtualRegister(HalfTy); 2310 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg()); 2311 2312 auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits); 2313 auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt); 2314 2315 auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0); 2316 auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits); 2317 auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero); 2318 2319 unsigned ResultRegs[2]; 2320 switch (MI.getOpcode()) { 2321 case TargetOpcode::G_SHL: { 2322 // Short: ShAmt < NewBitSize 2323 auto LoS = MIRBuilder.buildShl(HalfTy, InH, Amt); 2324 2325 auto OrLHS = MIRBuilder.buildShl(HalfTy, InH, Amt); 2326 auto OrRHS = MIRBuilder.buildLShr(HalfTy, InL, AmtLack); 2327 auto HiS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS); 2328 2329 // Long: ShAmt >= NewBitSize 2330 auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero. 2331 auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part. 2332 2333 auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL); 2334 auto Hi = MIRBuilder.buildSelect( 2335 HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL)); 2336 2337 ResultRegs[0] = Lo.getReg(0); 2338 ResultRegs[1] = Hi.getReg(0); 2339 break; 2340 } 2341 case TargetOpcode::G_LSHR: { 2342 // Short: ShAmt < NewBitSize 2343 auto HiS = MIRBuilder.buildLShr(HalfTy, InH, Amt); 2344 2345 auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt); 2346 auto OrRHS = MIRBuilder.buildShl(HalfTy, InH, AmtLack); 2347 auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS); 2348 2349 // Long: ShAmt >= NewBitSize 2350 auto HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero. 2351 auto LoL = MIRBuilder.buildLShr(HalfTy, InH, AmtExcess); // Lo from Hi part. 2352 2353 auto Lo = MIRBuilder.buildSelect( 2354 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL)); 2355 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL); 2356 2357 ResultRegs[0] = Lo.getReg(0); 2358 ResultRegs[1] = Hi.getReg(0); 2359 break; 2360 } 2361 case TargetOpcode::G_ASHR: { 2362 // Short: ShAmt < NewBitSize 2363 auto HiS = MIRBuilder.buildAShr(HalfTy, InH, Amt); 2364 2365 auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt); 2366 auto OrRHS = MIRBuilder.buildLShr(HalfTy, InH, AmtLack); 2367 auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS); 2368 2369 // Long: ShAmt >= NewBitSize 2370 2371 // Sign of Hi part. 2372 auto HiL = MIRBuilder.buildAShr( 2373 HalfTy, InH, MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1)); 2374 2375 auto LoL = MIRBuilder.buildAShr(HalfTy, InH, AmtExcess); // Lo from Hi part. 2376 2377 auto Lo = MIRBuilder.buildSelect( 2378 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL)); 2379 2380 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL); 2381 2382 ResultRegs[0] = Lo.getReg(0); 2383 ResultRegs[1] = Hi.getReg(0); 2384 break; 2385 } 2386 default: 2387 llvm_unreachable("not a shift"); 2388 } 2389 2390 MIRBuilder.buildMerge(DstReg, ResultRegs); 2391 MI.eraseFromParent(); 2392 return Legalized; 2393 } 2394 2395 LegalizerHelper::LegalizeResult 2396 LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, 2397 LLT MoreTy) { 2398 MIRBuilder.setInstr(MI); 2399 unsigned Opc = MI.getOpcode(); 2400 switch (Opc) { 2401 case TargetOpcode::G_IMPLICIT_DEF: { 2402 Observer.changingInstr(MI); 2403 moreElementsVectorDst(MI, MoreTy, 0); 2404 Observer.changedInstr(MI); 2405 return Legalized; 2406 } 2407 case TargetOpcode::G_AND: 2408 case TargetOpcode::G_OR: 2409 case TargetOpcode::G_XOR: { 2410 Observer.changingInstr(MI); 2411 moreElementsVectorSrc(MI, MoreTy, 1); 2412 moreElementsVectorSrc(MI, MoreTy, 2); 2413 moreElementsVectorDst(MI, MoreTy, 0); 2414 Observer.changedInstr(MI); 2415 return Legalized; 2416 } 2417 case TargetOpcode::G_EXTRACT: 2418 if (TypeIdx != 1) 2419 return UnableToLegalize; 2420 Observer.changingInstr(MI); 2421 moreElementsVectorSrc(MI, MoreTy, 1); 2422 Observer.changedInstr(MI); 2423 return Legalized; 2424 case TargetOpcode::G_INSERT: 2425 if (TypeIdx != 0) 2426 return UnableToLegalize; 2427 Observer.changingInstr(MI); 2428 moreElementsVectorSrc(MI, MoreTy, 1); 2429 moreElementsVectorDst(MI, MoreTy, 0); 2430 Observer.changedInstr(MI); 2431 return Legalized; 2432 case TargetOpcode::G_SELECT: 2433 if (TypeIdx != 0) 2434 return UnableToLegalize; 2435 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) 2436 return UnableToLegalize; 2437 2438 Observer.changingInstr(MI); 2439 moreElementsVectorSrc(MI, MoreTy, 2); 2440 moreElementsVectorSrc(MI, MoreTy, 3); 2441 moreElementsVectorDst(MI, MoreTy, 0); 2442 Observer.changedInstr(MI); 2443 return Legalized; 2444 default: 2445 return UnableToLegalize; 2446 } 2447 } 2448 2449 LegalizerHelper::LegalizeResult 2450 LegalizerHelper::narrowScalarMul(MachineInstr &MI, unsigned TypeIdx, LLT NewTy) { 2451 unsigned DstReg = MI.getOperand(0).getReg(); 2452 unsigned Src0 = MI.getOperand(1).getReg(); 2453 unsigned Src1 = MI.getOperand(2).getReg(); 2454 LLT Ty = MRI.getType(DstReg); 2455 if (Ty.isVector()) 2456 return UnableToLegalize; 2457 2458 unsigned Size = Ty.getSizeInBits(); 2459 unsigned NewSize = Size / 2; 2460 if (Size != 2 * NewSize) 2461 return UnableToLegalize; 2462 2463 LLT HalfTy = LLT::scalar(NewSize); 2464 // TODO: if HalfTy != NewTy, handle the breakdown all at once? 2465 2466 unsigned ShiftAmt = MRI.createGenericVirtualRegister(Ty); 2467 unsigned Lo = MRI.createGenericVirtualRegister(HalfTy); 2468 unsigned Hi = MRI.createGenericVirtualRegister(HalfTy); 2469 unsigned ExtLo = MRI.createGenericVirtualRegister(Ty); 2470 unsigned ExtHi = MRI.createGenericVirtualRegister(Ty); 2471 unsigned ShiftedHi = MRI.createGenericVirtualRegister(Ty); 2472 2473 SmallVector<unsigned, 2> Src0Parts; 2474 SmallVector<unsigned, 2> Src1Parts; 2475 2476 extractParts(Src0, HalfTy, 2, Src0Parts); 2477 extractParts(Src1, HalfTy, 2, Src1Parts); 2478 2479 MIRBuilder.buildMul(Lo, Src0Parts[0], Src1Parts[0]); 2480 2481 // TODO: Use smulh or umulh depending on what the target has. 2482 MIRBuilder.buildUMulH(Hi, Src0Parts[1], Src1Parts[1]); 2483 2484 MIRBuilder.buildConstant(ShiftAmt, NewSize); 2485 MIRBuilder.buildAnyExt(ExtHi, Hi); 2486 MIRBuilder.buildShl(ShiftedHi, ExtHi, ShiftAmt); 2487 2488 MIRBuilder.buildZExt(ExtLo, Lo); 2489 MIRBuilder.buildOr(DstReg, ExtLo, ShiftedHi); 2490 MI.eraseFromParent(); 2491 return Legalized; 2492 } 2493 2494 2495 LegalizerHelper::LegalizeResult 2496 LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, 2497 LLT NarrowTy) { 2498 if (TypeIdx != 1) 2499 return UnableToLegalize; 2500 2501 uint64_t NarrowSize = NarrowTy.getSizeInBits(); 2502 2503 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 2504 // FIXME: add support for when SizeOp1 isn't an exact multiple of 2505 // NarrowSize. 2506 if (SizeOp1 % NarrowSize != 0) 2507 return UnableToLegalize; 2508 int NumParts = SizeOp1 / NarrowSize; 2509 2510 SmallVector<unsigned, 2> SrcRegs, DstRegs; 2511 SmallVector<uint64_t, 2> Indexes; 2512 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); 2513 2514 unsigned OpReg = MI.getOperand(0).getReg(); 2515 uint64_t OpStart = MI.getOperand(2).getImm(); 2516 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); 2517 for (int i = 0; i < NumParts; ++i) { 2518 unsigned SrcStart = i * NarrowSize; 2519 2520 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) { 2521 // No part of the extract uses this subregister, ignore it. 2522 continue; 2523 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) { 2524 // The entire subregister is extracted, forward the value. 2525 DstRegs.push_back(SrcRegs[i]); 2526 continue; 2527 } 2528 2529 // OpSegStart is where this destination segment would start in OpReg if it 2530 // extended infinitely in both directions. 2531 int64_t ExtractOffset; 2532 uint64_t SegSize; 2533 if (OpStart < SrcStart) { 2534 ExtractOffset = 0; 2535 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart); 2536 } else { 2537 ExtractOffset = OpStart - SrcStart; 2538 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize); 2539 } 2540 2541 unsigned SegReg = SrcRegs[i]; 2542 if (ExtractOffset != 0 || SegSize != NarrowSize) { 2543 // A genuine extract is needed. 2544 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); 2545 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset); 2546 } 2547 2548 DstRegs.push_back(SegReg); 2549 } 2550 2551 unsigned DstReg = MI.getOperand(0).getReg(); 2552 if(MRI.getType(DstReg).isVector()) 2553 MIRBuilder.buildBuildVector(DstReg, DstRegs); 2554 else 2555 MIRBuilder.buildMerge(DstReg, DstRegs); 2556 MI.eraseFromParent(); 2557 return Legalized; 2558 } 2559 2560 LegalizerHelper::LegalizeResult 2561 LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, 2562 LLT NarrowTy) { 2563 // FIXME: Don't know how to handle secondary types yet. 2564 if (TypeIdx != 0) 2565 return UnableToLegalize; 2566 2567 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 2568 uint64_t NarrowSize = NarrowTy.getSizeInBits(); 2569 2570 // FIXME: add support for when SizeOp0 isn't an exact multiple of 2571 // NarrowSize. 2572 if (SizeOp0 % NarrowSize != 0) 2573 return UnableToLegalize; 2574 2575 int NumParts = SizeOp0 / NarrowSize; 2576 2577 SmallVector<unsigned, 2> SrcRegs, DstRegs; 2578 SmallVector<uint64_t, 2> Indexes; 2579 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); 2580 2581 unsigned OpReg = MI.getOperand(2).getReg(); 2582 uint64_t OpStart = MI.getOperand(3).getImm(); 2583 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); 2584 for (int i = 0; i < NumParts; ++i) { 2585 unsigned DstStart = i * NarrowSize; 2586 2587 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) { 2588 // No part of the insert affects this subregister, forward the original. 2589 DstRegs.push_back(SrcRegs[i]); 2590 continue; 2591 } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) { 2592 // The entire subregister is defined by this insert, forward the new 2593 // value. 2594 DstRegs.push_back(OpReg); 2595 continue; 2596 } 2597 2598 // OpSegStart is where this destination segment would start in OpReg if it 2599 // extended infinitely in both directions. 2600 int64_t ExtractOffset, InsertOffset; 2601 uint64_t SegSize; 2602 if (OpStart < DstStart) { 2603 InsertOffset = 0; 2604 ExtractOffset = DstStart - OpStart; 2605 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart); 2606 } else { 2607 InsertOffset = OpStart - DstStart; 2608 ExtractOffset = 0; 2609 SegSize = 2610 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart); 2611 } 2612 2613 unsigned SegReg = OpReg; 2614 if (ExtractOffset != 0 || SegSize != OpSize) { 2615 // A genuine extract is needed. 2616 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); 2617 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset); 2618 } 2619 2620 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 2621 MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset); 2622 DstRegs.push_back(DstReg); 2623 } 2624 2625 assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered"); 2626 unsigned DstReg = MI.getOperand(0).getReg(); 2627 if(MRI.getType(DstReg).isVector()) 2628 MIRBuilder.buildBuildVector(DstReg, DstRegs); 2629 else 2630 MIRBuilder.buildMerge(DstReg, DstRegs); 2631 MI.eraseFromParent(); 2632 return Legalized; 2633 } 2634 2635 LegalizerHelper::LegalizeResult 2636 LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, 2637 LLT NarrowTy) { 2638 if (TypeIdx != 0) 2639 return UnableToLegalize; 2640 2641 unsigned CondReg = MI.getOperand(1).getReg(); 2642 LLT CondTy = MRI.getType(CondReg); 2643 if (CondTy.isVector()) // TODO: Handle vselect 2644 return UnableToLegalize; 2645 2646 unsigned DstReg = MI.getOperand(0).getReg(); 2647 LLT DstTy = MRI.getType(DstReg); 2648 2649 SmallVector<unsigned, 4> DstRegs, DstLeftoverRegs; 2650 SmallVector<unsigned, 4> Src1Regs, Src1LeftoverRegs; 2651 SmallVector<unsigned, 4> Src2Regs, Src2LeftoverRegs; 2652 LLT LeftoverTy; 2653 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy, 2654 Src1Regs, Src1LeftoverRegs)) 2655 return UnableToLegalize; 2656 2657 LLT Unused; 2658 if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused, 2659 Src2Regs, Src2LeftoverRegs)) 2660 llvm_unreachable("inconsistent extractParts result"); 2661 2662 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) { 2663 auto Select = MIRBuilder.buildSelect(NarrowTy, 2664 CondReg, Src1Regs[I], Src2Regs[I]); 2665 DstRegs.push_back(Select->getOperand(0).getReg()); 2666 } 2667 2668 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) { 2669 auto Select = MIRBuilder.buildSelect( 2670 LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]); 2671 DstLeftoverRegs.push_back(Select->getOperand(0).getReg()); 2672 } 2673 2674 insertParts(DstReg, DstTy, NarrowTy, DstRegs, 2675 LeftoverTy, DstLeftoverRegs); 2676 2677 MI.eraseFromParent(); 2678 return Legalized; 2679 } 2680 2681 LegalizerHelper::LegalizeResult 2682 LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { 2683 unsigned Opc = MI.getOpcode(); 2684 auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); 2685 auto isSupported = [this](const LegalityQuery &Q) { 2686 auto QAction = LI.getAction(Q).Action; 2687 return QAction == Legal || QAction == Libcall || QAction == Custom; 2688 }; 2689 switch (Opc) { 2690 default: 2691 return UnableToLegalize; 2692 case TargetOpcode::G_CTLZ_ZERO_UNDEF: { 2693 // This trivially expands to CTLZ. 2694 Observer.changingInstr(MI); 2695 MI.setDesc(TII.get(TargetOpcode::G_CTLZ)); 2696 Observer.changedInstr(MI); 2697 return Legalized; 2698 } 2699 case TargetOpcode::G_CTLZ: { 2700 unsigned SrcReg = MI.getOperand(1).getReg(); 2701 unsigned Len = Ty.getSizeInBits(); 2702 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty, Ty}})) { 2703 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero. 2704 auto MIBCtlzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF, 2705 {Ty}, {SrcReg}); 2706 auto MIBZero = MIRBuilder.buildConstant(Ty, 0); 2707 auto MIBLen = MIRBuilder.buildConstant(Ty, Len); 2708 auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), 2709 SrcReg, MIBZero); 2710 MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen, 2711 MIBCtlzZU); 2712 MI.eraseFromParent(); 2713 return Legalized; 2714 } 2715 // for now, we do this: 2716 // NewLen = NextPowerOf2(Len); 2717 // x = x | (x >> 1); 2718 // x = x | (x >> 2); 2719 // ... 2720 // x = x | (x >>16); 2721 // x = x | (x >>32); // for 64-bit input 2722 // Upto NewLen/2 2723 // return Len - popcount(x); 2724 // 2725 // Ref: "Hacker's Delight" by Henry Warren 2726 unsigned Op = SrcReg; 2727 unsigned NewLen = PowerOf2Ceil(Len); 2728 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) { 2729 auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i); 2730 auto MIBOp = MIRBuilder.buildInstr( 2731 TargetOpcode::G_OR, {Ty}, 2732 {Op, MIRBuilder.buildInstr(TargetOpcode::G_LSHR, {Ty}, 2733 {Op, MIBShiftAmt})}); 2734 Op = MIBOp->getOperand(0).getReg(); 2735 } 2736 auto MIBPop = MIRBuilder.buildInstr(TargetOpcode::G_CTPOP, {Ty}, {Op}); 2737 MIRBuilder.buildInstr(TargetOpcode::G_SUB, {MI.getOperand(0).getReg()}, 2738 {MIRBuilder.buildConstant(Ty, Len), MIBPop}); 2739 MI.eraseFromParent(); 2740 return Legalized; 2741 } 2742 case TargetOpcode::G_CTTZ_ZERO_UNDEF: { 2743 // This trivially expands to CTTZ. 2744 Observer.changingInstr(MI); 2745 MI.setDesc(TII.get(TargetOpcode::G_CTTZ)); 2746 Observer.changedInstr(MI); 2747 return Legalized; 2748 } 2749 case TargetOpcode::G_CTTZ: { 2750 unsigned SrcReg = MI.getOperand(1).getReg(); 2751 unsigned Len = Ty.getSizeInBits(); 2752 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty, Ty}})) { 2753 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with 2754 // zero. 2755 auto MIBCttzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF, 2756 {Ty}, {SrcReg}); 2757 auto MIBZero = MIRBuilder.buildConstant(Ty, 0); 2758 auto MIBLen = MIRBuilder.buildConstant(Ty, Len); 2759 auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), 2760 SrcReg, MIBZero); 2761 MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen, 2762 MIBCttzZU); 2763 MI.eraseFromParent(); 2764 return Legalized; 2765 } 2766 // for now, we use: { return popcount(~x & (x - 1)); } 2767 // unless the target has ctlz but not ctpop, in which case we use: 2768 // { return 32 - nlz(~x & (x-1)); } 2769 // Ref: "Hacker's Delight" by Henry Warren 2770 auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1); 2771 auto MIBNot = 2772 MIRBuilder.buildInstr(TargetOpcode::G_XOR, {Ty}, {SrcReg, MIBCstNeg1}); 2773 auto MIBTmp = MIRBuilder.buildInstr( 2774 TargetOpcode::G_AND, {Ty}, 2775 {MIBNot, MIRBuilder.buildInstr(TargetOpcode::G_ADD, {Ty}, 2776 {SrcReg, MIBCstNeg1})}); 2777 if (!isSupported({TargetOpcode::G_CTPOP, {Ty, Ty}}) && 2778 isSupported({TargetOpcode::G_CTLZ, {Ty, Ty}})) { 2779 auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len); 2780 MIRBuilder.buildInstr( 2781 TargetOpcode::G_SUB, {MI.getOperand(0).getReg()}, 2782 {MIBCstLen, 2783 MIRBuilder.buildInstr(TargetOpcode::G_CTLZ, {Ty}, {MIBTmp})}); 2784 MI.eraseFromParent(); 2785 return Legalized; 2786 } 2787 MI.setDesc(TII.get(TargetOpcode::G_CTPOP)); 2788 MI.getOperand(1).setReg(MIBTmp->getOperand(0).getReg()); 2789 return Legalized; 2790 } 2791 } 2792 } 2793