1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file This file implements the LegalizerHelper class to legalize 10 /// individual instructions and the LegalizeMachineIR wrapper pass for the 11 /// primary legalization. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" 16 #include "llvm/CodeGen/GlobalISel/CallLowering.h" 17 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" 18 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" 19 #include "llvm/CodeGen/MachineRegisterInfo.h" 20 #include "llvm/CodeGen/TargetInstrInfo.h" 21 #include "llvm/CodeGen/TargetLowering.h" 22 #include "llvm/CodeGen/TargetSubtargetInfo.h" 23 #include "llvm/Support/Debug.h" 24 #include "llvm/Support/MathExtras.h" 25 #include "llvm/Support/raw_ostream.h" 26 27 #define DEBUG_TYPE "legalizer" 28 29 using namespace llvm; 30 using namespace LegalizeActions; 31 32 LegalizerHelper::LegalizerHelper(MachineFunction &MF, 33 GISelChangeObserver &Observer, 34 MachineIRBuilder &Builder) 35 : MIRBuilder(Builder), MRI(MF.getRegInfo()), 36 LI(*MF.getSubtarget().getLegalizerInfo()), Observer(Observer) { 37 MIRBuilder.setMF(MF); 38 MIRBuilder.setChangeObserver(Observer); 39 } 40 41 LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI, 42 GISelChangeObserver &Observer, 43 MachineIRBuilder &B) 44 : MIRBuilder(B), MRI(MF.getRegInfo()), LI(LI), Observer(Observer) { 45 MIRBuilder.setMF(MF); 46 MIRBuilder.setChangeObserver(Observer); 47 } 48 LegalizerHelper::LegalizeResult 49 LegalizerHelper::legalizeInstrStep(MachineInstr &MI) { 50 LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs())); 51 52 auto Step = LI.getAction(MI, MRI); 53 switch (Step.Action) { 54 case Legal: 55 LLVM_DEBUG(dbgs() << ".. Already legal\n"); 56 return AlreadyLegal; 57 case Libcall: 58 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n"); 59 return libcall(MI); 60 case NarrowScalar: 61 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n"); 62 return narrowScalar(MI, Step.TypeIdx, Step.NewType); 63 case WidenScalar: 64 LLVM_DEBUG(dbgs() << ".. Widen scalar\n"); 65 return widenScalar(MI, Step.TypeIdx, Step.NewType); 66 case Lower: 67 LLVM_DEBUG(dbgs() << ".. Lower\n"); 68 return lower(MI, Step.TypeIdx, Step.NewType); 69 case FewerElements: 70 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n"); 71 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType); 72 case Custom: 73 LLVM_DEBUG(dbgs() << ".. Custom legalization\n"); 74 return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized 75 : UnableToLegalize; 76 default: 77 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n"); 78 return UnableToLegalize; 79 } 80 } 81 82 void LegalizerHelper::extractParts(unsigned Reg, LLT Ty, int NumParts, 83 SmallVectorImpl<unsigned> &VRegs) { 84 for (int i = 0; i < NumParts; ++i) 85 VRegs.push_back(MRI.createGenericVirtualRegister(Ty)); 86 MIRBuilder.buildUnmerge(VRegs, Reg); 87 } 88 89 bool LegalizerHelper::extractParts(unsigned Reg, LLT RegTy, 90 LLT MainTy, LLT &LeftoverTy, 91 SmallVectorImpl<unsigned> &VRegs, 92 SmallVectorImpl<unsigned> &LeftoverRegs) { 93 assert(!LeftoverTy.isValid() && "this is an out argument"); 94 95 unsigned RegSize = RegTy.getSizeInBits(); 96 unsigned MainSize = MainTy.getSizeInBits(); 97 unsigned NumParts = RegSize / MainSize; 98 unsigned LeftoverSize = RegSize - NumParts * MainSize; 99 100 // Use an unmerge when possible. 101 if (LeftoverSize == 0) { 102 for (unsigned I = 0; I < NumParts; ++I) 103 VRegs.push_back(MRI.createGenericVirtualRegister(MainTy)); 104 MIRBuilder.buildUnmerge(VRegs, Reg); 105 return true; 106 } 107 108 if (MainTy.isVector()) { 109 unsigned EltSize = MainTy.getScalarSizeInBits(); 110 if (LeftoverSize % EltSize != 0) 111 return false; 112 LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize); 113 } else { 114 LeftoverTy = LLT::scalar(LeftoverSize); 115 } 116 117 // For irregular sizes, extract the individual parts. 118 for (unsigned I = 0; I != NumParts; ++I) { 119 unsigned NewReg = MRI.createGenericVirtualRegister(MainTy); 120 VRegs.push_back(NewReg); 121 MIRBuilder.buildExtract(NewReg, Reg, MainSize * I); 122 } 123 124 for (unsigned Offset = MainSize * NumParts; Offset < RegSize; 125 Offset += LeftoverSize) { 126 unsigned NewReg = MRI.createGenericVirtualRegister(LeftoverTy); 127 LeftoverRegs.push_back(NewReg); 128 MIRBuilder.buildExtract(NewReg, Reg, Offset); 129 } 130 131 return true; 132 } 133 134 void LegalizerHelper::insertParts(unsigned DstReg, 135 LLT ResultTy, LLT PartTy, 136 ArrayRef<unsigned> PartRegs, 137 LLT LeftoverTy, 138 ArrayRef<unsigned> LeftoverRegs) { 139 if (!LeftoverTy.isValid()) { 140 assert(LeftoverRegs.empty()); 141 142 if (PartTy.isVector()) 143 MIRBuilder.buildConcatVectors(DstReg, PartRegs); 144 else 145 MIRBuilder.buildBuildVector(DstReg, PartRegs); 146 return; 147 } 148 149 unsigned PartSize = PartTy.getSizeInBits(); 150 unsigned LeftoverPartSize = LeftoverTy.getSizeInBits(); 151 152 unsigned CurResultReg = MRI.createGenericVirtualRegister(ResultTy); 153 MIRBuilder.buildUndef(CurResultReg); 154 155 unsigned Offset = 0; 156 for (unsigned PartReg : PartRegs) { 157 unsigned NewResultReg = MRI.createGenericVirtualRegister(ResultTy); 158 MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset); 159 CurResultReg = NewResultReg; 160 Offset += PartSize; 161 } 162 163 for (unsigned I = 0, E = LeftoverRegs.size(); I != E; ++I) { 164 // Use the original output register for the final insert to avoid a copy. 165 unsigned NewResultReg = (I + 1 == E) ? 166 DstReg : MRI.createGenericVirtualRegister(ResultTy); 167 168 MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset); 169 CurResultReg = NewResultReg; 170 Offset += LeftoverPartSize; 171 } 172 } 173 174 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { 175 switch (Opcode) { 176 case TargetOpcode::G_SDIV: 177 assert((Size == 32 || Size == 64) && "Unsupported size"); 178 return Size == 64 ? RTLIB::SDIV_I64 : RTLIB::SDIV_I32; 179 case TargetOpcode::G_UDIV: 180 assert((Size == 32 || Size == 64) && "Unsupported size"); 181 return Size == 64 ? RTLIB::UDIV_I64 : RTLIB::UDIV_I32; 182 case TargetOpcode::G_SREM: 183 assert((Size == 32 || Size == 64) && "Unsupported size"); 184 return Size == 64 ? RTLIB::SREM_I64 : RTLIB::SREM_I32; 185 case TargetOpcode::G_UREM: 186 assert((Size == 32 || Size == 64) && "Unsupported size"); 187 return Size == 64 ? RTLIB::UREM_I64 : RTLIB::UREM_I32; 188 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 189 assert(Size == 32 && "Unsupported size"); 190 return RTLIB::CTLZ_I32; 191 case TargetOpcode::G_FADD: 192 assert((Size == 32 || Size == 64) && "Unsupported size"); 193 return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32; 194 case TargetOpcode::G_FSUB: 195 assert((Size == 32 || Size == 64) && "Unsupported size"); 196 return Size == 64 ? RTLIB::SUB_F64 : RTLIB::SUB_F32; 197 case TargetOpcode::G_FMUL: 198 assert((Size == 32 || Size == 64) && "Unsupported size"); 199 return Size == 64 ? RTLIB::MUL_F64 : RTLIB::MUL_F32; 200 case TargetOpcode::G_FDIV: 201 assert((Size == 32 || Size == 64) && "Unsupported size"); 202 return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32; 203 case TargetOpcode::G_FEXP: 204 assert((Size == 32 || Size == 64) && "Unsupported size"); 205 return Size == 64 ? RTLIB::EXP_F64 : RTLIB::EXP_F32; 206 case TargetOpcode::G_FREM: 207 return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32; 208 case TargetOpcode::G_FPOW: 209 return Size == 64 ? RTLIB::POW_F64 : RTLIB::POW_F32; 210 case TargetOpcode::G_FMA: 211 assert((Size == 32 || Size == 64) && "Unsupported size"); 212 return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32; 213 case TargetOpcode::G_FSIN: 214 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); 215 return Size == 128 ? RTLIB::SIN_F128 216 : Size == 64 ? RTLIB::SIN_F64 : RTLIB::SIN_F32; 217 case TargetOpcode::G_FCOS: 218 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); 219 return Size == 128 ? RTLIB::COS_F128 220 : Size == 64 ? RTLIB::COS_F64 : RTLIB::COS_F32; 221 case TargetOpcode::G_FLOG10: 222 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); 223 return Size == 128 ? RTLIB::LOG10_F128 224 : Size == 64 ? RTLIB::LOG10_F64 : RTLIB::LOG10_F32; 225 case TargetOpcode::G_FLOG: 226 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); 227 return Size == 128 ? RTLIB::LOG_F128 228 : Size == 64 ? RTLIB::LOG_F64 : RTLIB::LOG_F32; 229 case TargetOpcode::G_FLOG2: 230 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); 231 return Size == 128 ? RTLIB::LOG2_F128 232 : Size == 64 ? RTLIB::LOG2_F64 : RTLIB::LOG2_F32; 233 } 234 llvm_unreachable("Unknown libcall function"); 235 } 236 237 LegalizerHelper::LegalizeResult 238 llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, 239 const CallLowering::ArgInfo &Result, 240 ArrayRef<CallLowering::ArgInfo> Args) { 241 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); 242 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); 243 const char *Name = TLI.getLibcallName(Libcall); 244 245 MIRBuilder.getMF().getFrameInfo().setHasCalls(true); 246 if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall), 247 MachineOperand::CreateES(Name), Result, Args)) 248 return LegalizerHelper::UnableToLegalize; 249 250 return LegalizerHelper::Legalized; 251 } 252 253 // Useful for libcalls where all operands have the same type. 254 static LegalizerHelper::LegalizeResult 255 simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, 256 Type *OpType) { 257 auto Libcall = getRTLibDesc(MI.getOpcode(), Size); 258 259 SmallVector<CallLowering::ArgInfo, 3> Args; 260 for (unsigned i = 1; i < MI.getNumOperands(); i++) 261 Args.push_back({MI.getOperand(i).getReg(), OpType}); 262 return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType}, 263 Args); 264 } 265 266 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, 267 Type *FromType) { 268 auto ToMVT = MVT::getVT(ToType); 269 auto FromMVT = MVT::getVT(FromType); 270 271 switch (Opcode) { 272 case TargetOpcode::G_FPEXT: 273 return RTLIB::getFPEXT(FromMVT, ToMVT); 274 case TargetOpcode::G_FPTRUNC: 275 return RTLIB::getFPROUND(FromMVT, ToMVT); 276 case TargetOpcode::G_FPTOSI: 277 return RTLIB::getFPTOSINT(FromMVT, ToMVT); 278 case TargetOpcode::G_FPTOUI: 279 return RTLIB::getFPTOUINT(FromMVT, ToMVT); 280 case TargetOpcode::G_SITOFP: 281 return RTLIB::getSINTTOFP(FromMVT, ToMVT); 282 case TargetOpcode::G_UITOFP: 283 return RTLIB::getUINTTOFP(FromMVT, ToMVT); 284 } 285 llvm_unreachable("Unsupported libcall function"); 286 } 287 288 static LegalizerHelper::LegalizeResult 289 conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, 290 Type *FromType) { 291 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType); 292 return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType}, 293 {{MI.getOperand(1).getReg(), FromType}}); 294 } 295 296 LegalizerHelper::LegalizeResult 297 LegalizerHelper::libcall(MachineInstr &MI) { 298 LLT LLTy = MRI.getType(MI.getOperand(0).getReg()); 299 unsigned Size = LLTy.getSizeInBits(); 300 auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); 301 302 MIRBuilder.setInstr(MI); 303 304 switch (MI.getOpcode()) { 305 default: 306 return UnableToLegalize; 307 case TargetOpcode::G_SDIV: 308 case TargetOpcode::G_UDIV: 309 case TargetOpcode::G_SREM: 310 case TargetOpcode::G_UREM: 311 case TargetOpcode::G_CTLZ_ZERO_UNDEF: { 312 Type *HLTy = IntegerType::get(Ctx, Size); 313 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); 314 if (Status != Legalized) 315 return Status; 316 break; 317 } 318 case TargetOpcode::G_FADD: 319 case TargetOpcode::G_FSUB: 320 case TargetOpcode::G_FMUL: 321 case TargetOpcode::G_FDIV: 322 case TargetOpcode::G_FMA: 323 case TargetOpcode::G_FPOW: 324 case TargetOpcode::G_FREM: 325 case TargetOpcode::G_FCOS: 326 case TargetOpcode::G_FSIN: 327 case TargetOpcode::G_FLOG10: 328 case TargetOpcode::G_FLOG: 329 case TargetOpcode::G_FLOG2: 330 case TargetOpcode::G_FEXP: { 331 if (Size > 64) { 332 LLVM_DEBUG(dbgs() << "Size " << Size << " too large to legalize.\n"); 333 return UnableToLegalize; 334 } 335 Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx); 336 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); 337 if (Status != Legalized) 338 return Status; 339 break; 340 } 341 case TargetOpcode::G_FPEXT: { 342 // FIXME: Support other floating point types (half, fp128 etc) 343 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 344 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 345 if (ToSize != 64 || FromSize != 32) 346 return UnableToLegalize; 347 LegalizeResult Status = conversionLibcall( 348 MI, MIRBuilder, Type::getDoubleTy(Ctx), Type::getFloatTy(Ctx)); 349 if (Status != Legalized) 350 return Status; 351 break; 352 } 353 case TargetOpcode::G_FPTRUNC: { 354 // FIXME: Support other floating point types (half, fp128 etc) 355 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 356 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 357 if (ToSize != 32 || FromSize != 64) 358 return UnableToLegalize; 359 LegalizeResult Status = conversionLibcall( 360 MI, MIRBuilder, Type::getFloatTy(Ctx), Type::getDoubleTy(Ctx)); 361 if (Status != Legalized) 362 return Status; 363 break; 364 } 365 case TargetOpcode::G_FPTOSI: 366 case TargetOpcode::G_FPTOUI: { 367 // FIXME: Support other types 368 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 369 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 370 if (ToSize != 32 || (FromSize != 32 && FromSize != 64)) 371 return UnableToLegalize; 372 LegalizeResult Status = conversionLibcall( 373 MI, MIRBuilder, Type::getInt32Ty(Ctx), 374 FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx)); 375 if (Status != Legalized) 376 return Status; 377 break; 378 } 379 case TargetOpcode::G_SITOFP: 380 case TargetOpcode::G_UITOFP: { 381 // FIXME: Support other types 382 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 383 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 384 if (FromSize != 32 || (ToSize != 32 && ToSize != 64)) 385 return UnableToLegalize; 386 LegalizeResult Status = conversionLibcall( 387 MI, MIRBuilder, 388 ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx), 389 Type::getInt32Ty(Ctx)); 390 if (Status != Legalized) 391 return Status; 392 break; 393 } 394 } 395 396 MI.eraseFromParent(); 397 return Legalized; 398 } 399 400 LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, 401 unsigned TypeIdx, 402 LLT NarrowTy) { 403 MIRBuilder.setInstr(MI); 404 405 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 406 uint64_t NarrowSize = NarrowTy.getSizeInBits(); 407 408 switch (MI.getOpcode()) { 409 default: 410 return UnableToLegalize; 411 case TargetOpcode::G_IMPLICIT_DEF: { 412 // FIXME: add support for when SizeOp0 isn't an exact multiple of 413 // NarrowSize. 414 if (SizeOp0 % NarrowSize != 0) 415 return UnableToLegalize; 416 int NumParts = SizeOp0 / NarrowSize; 417 418 SmallVector<unsigned, 2> DstRegs; 419 for (int i = 0; i < NumParts; ++i) 420 DstRegs.push_back( 421 MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg()); 422 423 unsigned DstReg = MI.getOperand(0).getReg(); 424 if(MRI.getType(DstReg).isVector()) 425 MIRBuilder.buildBuildVector(DstReg, DstRegs); 426 else 427 MIRBuilder.buildMerge(DstReg, DstRegs); 428 MI.eraseFromParent(); 429 return Legalized; 430 } 431 case TargetOpcode::G_ADD: { 432 // FIXME: add support for when SizeOp0 isn't an exact multiple of 433 // NarrowSize. 434 if (SizeOp0 % NarrowSize != 0) 435 return UnableToLegalize; 436 // Expand in terms of carry-setting/consuming G_ADDE instructions. 437 int NumParts = SizeOp0 / NarrowTy.getSizeInBits(); 438 439 SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs; 440 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); 441 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); 442 443 unsigned CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1)); 444 MIRBuilder.buildConstant(CarryIn, 0); 445 446 for (int i = 0; i < NumParts; ++i) { 447 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 448 unsigned CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); 449 450 MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i], 451 Src2Regs[i], CarryIn); 452 453 DstRegs.push_back(DstReg); 454 CarryIn = CarryOut; 455 } 456 unsigned DstReg = MI.getOperand(0).getReg(); 457 if(MRI.getType(DstReg).isVector()) 458 MIRBuilder.buildBuildVector(DstReg, DstRegs); 459 else 460 MIRBuilder.buildMerge(DstReg, DstRegs); 461 MI.eraseFromParent(); 462 return Legalized; 463 } 464 case TargetOpcode::G_SUB: { 465 // FIXME: add support for when SizeOp0 isn't an exact multiple of 466 // NarrowSize. 467 if (SizeOp0 % NarrowSize != 0) 468 return UnableToLegalize; 469 470 int NumParts = SizeOp0 / NarrowTy.getSizeInBits(); 471 472 SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs; 473 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); 474 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); 475 476 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 477 unsigned BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); 478 MIRBuilder.buildInstr(TargetOpcode::G_USUBO, {DstReg, BorrowOut}, 479 {Src1Regs[0], Src2Regs[0]}); 480 DstRegs.push_back(DstReg); 481 unsigned BorrowIn = BorrowOut; 482 for (int i = 1; i < NumParts; ++i) { 483 DstReg = MRI.createGenericVirtualRegister(NarrowTy); 484 BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); 485 486 MIRBuilder.buildInstr(TargetOpcode::G_USUBE, {DstReg, BorrowOut}, 487 {Src1Regs[i], Src2Regs[i], BorrowIn}); 488 489 DstRegs.push_back(DstReg); 490 BorrowIn = BorrowOut; 491 } 492 MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs); 493 MI.eraseFromParent(); 494 return Legalized; 495 } 496 case TargetOpcode::G_MUL: 497 return narrowScalarMul(MI, TypeIdx, NarrowTy); 498 case TargetOpcode::G_EXTRACT: { 499 if (TypeIdx != 1) 500 return UnableToLegalize; 501 502 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 503 // FIXME: add support for when SizeOp1 isn't an exact multiple of 504 // NarrowSize. 505 if (SizeOp1 % NarrowSize != 0) 506 return UnableToLegalize; 507 int NumParts = SizeOp1 / NarrowSize; 508 509 SmallVector<unsigned, 2> SrcRegs, DstRegs; 510 SmallVector<uint64_t, 2> Indexes; 511 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); 512 513 unsigned OpReg = MI.getOperand(0).getReg(); 514 uint64_t OpStart = MI.getOperand(2).getImm(); 515 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); 516 for (int i = 0; i < NumParts; ++i) { 517 unsigned SrcStart = i * NarrowSize; 518 519 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) { 520 // No part of the extract uses this subregister, ignore it. 521 continue; 522 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) { 523 // The entire subregister is extracted, forward the value. 524 DstRegs.push_back(SrcRegs[i]); 525 continue; 526 } 527 528 // OpSegStart is where this destination segment would start in OpReg if it 529 // extended infinitely in both directions. 530 int64_t ExtractOffset; 531 uint64_t SegSize; 532 if (OpStart < SrcStart) { 533 ExtractOffset = 0; 534 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart); 535 } else { 536 ExtractOffset = OpStart - SrcStart; 537 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize); 538 } 539 540 unsigned SegReg = SrcRegs[i]; 541 if (ExtractOffset != 0 || SegSize != NarrowSize) { 542 // A genuine extract is needed. 543 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); 544 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset); 545 } 546 547 DstRegs.push_back(SegReg); 548 } 549 550 unsigned DstReg = MI.getOperand(0).getReg(); 551 if(MRI.getType(DstReg).isVector()) 552 MIRBuilder.buildBuildVector(DstReg, DstRegs); 553 else 554 MIRBuilder.buildMerge(DstReg, DstRegs); 555 MI.eraseFromParent(); 556 return Legalized; 557 } 558 case TargetOpcode::G_INSERT: { 559 // FIXME: Don't know how to handle secondary types yet. 560 if (TypeIdx != 0) 561 return UnableToLegalize; 562 563 // FIXME: add support for when SizeOp0 isn't an exact multiple of 564 // NarrowSize. 565 if (SizeOp0 % NarrowSize != 0) 566 return UnableToLegalize; 567 568 int NumParts = SizeOp0 / NarrowSize; 569 570 SmallVector<unsigned, 2> SrcRegs, DstRegs; 571 SmallVector<uint64_t, 2> Indexes; 572 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); 573 574 unsigned OpReg = MI.getOperand(2).getReg(); 575 uint64_t OpStart = MI.getOperand(3).getImm(); 576 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); 577 for (int i = 0; i < NumParts; ++i) { 578 unsigned DstStart = i * NarrowSize; 579 580 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) { 581 // No part of the insert affects this subregister, forward the original. 582 DstRegs.push_back(SrcRegs[i]); 583 continue; 584 } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) { 585 // The entire subregister is defined by this insert, forward the new 586 // value. 587 DstRegs.push_back(OpReg); 588 continue; 589 } 590 591 // OpSegStart is where this destination segment would start in OpReg if it 592 // extended infinitely in both directions. 593 int64_t ExtractOffset, InsertOffset; 594 uint64_t SegSize; 595 if (OpStart < DstStart) { 596 InsertOffset = 0; 597 ExtractOffset = DstStart - OpStart; 598 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart); 599 } else { 600 InsertOffset = OpStart - DstStart; 601 ExtractOffset = 0; 602 SegSize = 603 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart); 604 } 605 606 unsigned SegReg = OpReg; 607 if (ExtractOffset != 0 || SegSize != OpSize) { 608 // A genuine extract is needed. 609 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); 610 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset); 611 } 612 613 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 614 MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset); 615 DstRegs.push_back(DstReg); 616 } 617 618 assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered"); 619 unsigned DstReg = MI.getOperand(0).getReg(); 620 if(MRI.getType(DstReg).isVector()) 621 MIRBuilder.buildBuildVector(DstReg, DstRegs); 622 else 623 MIRBuilder.buildMerge(DstReg, DstRegs); 624 MI.eraseFromParent(); 625 return Legalized; 626 } 627 case TargetOpcode::G_LOAD: { 628 const auto &MMO = **MI.memoperands_begin(); 629 unsigned DstReg = MI.getOperand(0).getReg(); 630 LLT DstTy = MRI.getType(DstReg); 631 int NumParts = SizeOp0 / NarrowSize; 632 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits(); 633 unsigned LeftoverBits = DstTy.getSizeInBits() - HandledSize; 634 635 if (DstTy.isVector() && LeftoverBits != 0) 636 return UnableToLegalize; 637 638 if (8 * MMO.getSize() != DstTy.getSizeInBits()) { 639 unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy); 640 auto &MMO = **MI.memoperands_begin(); 641 MIRBuilder.buildLoad(TmpReg, MI.getOperand(1).getReg(), MMO); 642 MIRBuilder.buildAnyExt(DstReg, TmpReg); 643 MI.eraseFromParent(); 644 return Legalized; 645 } 646 647 // This implementation doesn't work for atomics. Give up instead of doing 648 // something invalid. 649 if (MMO.getOrdering() != AtomicOrdering::NotAtomic || 650 MMO.getFailureOrdering() != AtomicOrdering::NotAtomic) 651 return UnableToLegalize; 652 653 LLT OffsetTy = LLT::scalar( 654 MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()); 655 656 MachineFunction &MF = MIRBuilder.getMF(); 657 SmallVector<unsigned, 2> DstRegs; 658 for (int i = 0; i < NumParts; ++i) { 659 unsigned PartDstReg = MRI.createGenericVirtualRegister(NarrowTy); 660 unsigned SrcReg = 0; 661 unsigned Offset = i * NarrowSize / 8; 662 663 MachineMemOperand *SplitMMO = 664 MF.getMachineMemOperand(&MMO, Offset, NarrowSize / 8); 665 666 MIRBuilder.materializeGEP(SrcReg, MI.getOperand(1).getReg(), OffsetTy, 667 Offset); 668 669 MIRBuilder.buildLoad(PartDstReg, SrcReg, *SplitMMO); 670 671 DstRegs.push_back(PartDstReg); 672 } 673 674 unsigned MergeResultReg = LeftoverBits == 0 ? DstReg : 675 MRI.createGenericVirtualRegister(LLT::scalar(HandledSize)); 676 677 // For the leftover piece, still create the merge and insert it. 678 // TODO: Would it be better to directly insert the intermediate pieces? 679 if (DstTy.isVector()) 680 MIRBuilder.buildBuildVector(MergeResultReg, DstRegs); 681 else 682 MIRBuilder.buildMerge(MergeResultReg, DstRegs); 683 684 if (LeftoverBits == 0) { 685 MI.eraseFromParent(); 686 return Legalized; 687 } 688 689 unsigned ImpDefReg = MRI.createGenericVirtualRegister(DstTy); 690 unsigned Insert0Reg = MRI.createGenericVirtualRegister(DstTy); 691 MIRBuilder.buildUndef(ImpDefReg); 692 MIRBuilder.buildInsert(Insert0Reg, ImpDefReg, MergeResultReg, 0); 693 694 unsigned PartDstReg 695 = MRI.createGenericVirtualRegister(LLT::scalar(LeftoverBits)); 696 unsigned Offset = HandledSize / 8; 697 698 MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand( 699 &MMO, Offset, LeftoverBits / 8); 700 701 unsigned SrcReg = 0; 702 MIRBuilder.materializeGEP(SrcReg, MI.getOperand(1).getReg(), OffsetTy, 703 Offset); 704 MIRBuilder.buildLoad(PartDstReg, SrcReg, *SplitMMO); 705 MIRBuilder.buildInsert(DstReg, Insert0Reg, PartDstReg, HandledSize); 706 707 MI.eraseFromParent(); 708 return Legalized; 709 } 710 case TargetOpcode::G_ZEXTLOAD: 711 case TargetOpcode::G_SEXTLOAD: { 712 bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD; 713 unsigned DstReg = MI.getOperand(0).getReg(); 714 unsigned PtrReg = MI.getOperand(1).getReg(); 715 716 unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy); 717 auto &MMO = **MI.memoperands_begin(); 718 if (MMO.getSize() * 8 == NarrowSize) { 719 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); 720 } else { 721 unsigned ExtLoad = ZExt ? TargetOpcode::G_ZEXTLOAD 722 : TargetOpcode::G_SEXTLOAD; 723 MIRBuilder.buildInstr(ExtLoad) 724 .addDef(TmpReg) 725 .addUse(PtrReg) 726 .addMemOperand(&MMO); 727 } 728 729 if (ZExt) 730 MIRBuilder.buildZExt(DstReg, TmpReg); 731 else 732 MIRBuilder.buildSExt(DstReg, TmpReg); 733 734 MI.eraseFromParent(); 735 return Legalized; 736 } 737 case TargetOpcode::G_STORE: { 738 // FIXME: add support for when SizeOp0 isn't an exact multiple of 739 // NarrowSize. 740 if (SizeOp0 % NarrowSize != 0) 741 return UnableToLegalize; 742 743 const auto &MMO = **MI.memoperands_begin(); 744 745 unsigned SrcReg = MI.getOperand(0).getReg(); 746 LLT SrcTy = MRI.getType(SrcReg); 747 748 if (8 * MMO.getSize() != SrcTy.getSizeInBits()) { 749 unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy); 750 auto &MMO = **MI.memoperands_begin(); 751 MIRBuilder.buildTrunc(TmpReg, SrcReg); 752 MIRBuilder.buildStore(TmpReg, MI.getOperand(1).getReg(), MMO); 753 MI.eraseFromParent(); 754 return Legalized; 755 } 756 757 // This implementation doesn't work for atomics. Give up instead of doing 758 // something invalid. 759 if (MMO.getOrdering() != AtomicOrdering::NotAtomic || 760 MMO.getFailureOrdering() != AtomicOrdering::NotAtomic) 761 return UnableToLegalize; 762 763 int NumParts = SizeOp0 / NarrowSize; 764 LLT OffsetTy = LLT::scalar( 765 MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()); 766 767 SmallVector<unsigned, 2> SrcRegs; 768 extractParts(MI.getOperand(0).getReg(), NarrowTy, NumParts, SrcRegs); 769 770 MachineFunction &MF = MIRBuilder.getMF(); 771 for (int i = 0; i < NumParts; ++i) { 772 unsigned DstReg = 0; 773 unsigned Offset = i * NarrowSize / 8; 774 775 MachineMemOperand *SplitMMO = 776 MF.getMachineMemOperand(&MMO, Offset, NarrowSize / 8); 777 778 MIRBuilder.materializeGEP(DstReg, MI.getOperand(1).getReg(), OffsetTy, 779 Offset); 780 781 MIRBuilder.buildStore(SrcRegs[i], DstReg, *SplitMMO); 782 } 783 MI.eraseFromParent(); 784 return Legalized; 785 } 786 case TargetOpcode::G_CONSTANT: { 787 // FIXME: add support for when SizeOp0 isn't an exact multiple of 788 // NarrowSize. 789 if (SizeOp0 % NarrowSize != 0) 790 return UnableToLegalize; 791 int NumParts = SizeOp0 / NarrowSize; 792 const APInt &Cst = MI.getOperand(1).getCImm()->getValue(); 793 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 794 795 SmallVector<unsigned, 2> DstRegs; 796 for (int i = 0; i < NumParts; ++i) { 797 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 798 ConstantInt *CI = 799 ConstantInt::get(Ctx, Cst.lshr(NarrowSize * i).trunc(NarrowSize)); 800 MIRBuilder.buildConstant(DstReg, *CI); 801 DstRegs.push_back(DstReg); 802 } 803 unsigned DstReg = MI.getOperand(0).getReg(); 804 if(MRI.getType(DstReg).isVector()) 805 MIRBuilder.buildBuildVector(DstReg, DstRegs); 806 else 807 MIRBuilder.buildMerge(DstReg, DstRegs); 808 MI.eraseFromParent(); 809 return Legalized; 810 } 811 case TargetOpcode::G_AND: 812 case TargetOpcode::G_OR: 813 case TargetOpcode::G_XOR: { 814 // Legalize bitwise operation: 815 // A = BinOp<Ty> B, C 816 // into: 817 // B1, ..., BN = G_UNMERGE_VALUES B 818 // C1, ..., CN = G_UNMERGE_VALUES C 819 // A1 = BinOp<Ty/N> B1, C2 820 // ... 821 // AN = BinOp<Ty/N> BN, CN 822 // A = G_MERGE_VALUES A1, ..., AN 823 824 // FIXME: add support for when SizeOp0 isn't an exact multiple of 825 // NarrowSize. 826 if (SizeOp0 % NarrowSize != 0) 827 return UnableToLegalize; 828 int NumParts = SizeOp0 / NarrowSize; 829 830 // List the registers where the destination will be scattered. 831 SmallVector<unsigned, 2> DstRegs; 832 // List the registers where the first argument will be split. 833 SmallVector<unsigned, 2> SrcsReg1; 834 // List the registers where the second argument will be split. 835 SmallVector<unsigned, 2> SrcsReg2; 836 // Create all the temporary registers. 837 for (int i = 0; i < NumParts; ++i) { 838 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 839 unsigned SrcReg1 = MRI.createGenericVirtualRegister(NarrowTy); 840 unsigned SrcReg2 = MRI.createGenericVirtualRegister(NarrowTy); 841 842 DstRegs.push_back(DstReg); 843 SrcsReg1.push_back(SrcReg1); 844 SrcsReg2.push_back(SrcReg2); 845 } 846 // Explode the big arguments into smaller chunks. 847 MIRBuilder.buildUnmerge(SrcsReg1, MI.getOperand(1).getReg()); 848 MIRBuilder.buildUnmerge(SrcsReg2, MI.getOperand(2).getReg()); 849 850 // Do the operation on each small part. 851 for (int i = 0; i < NumParts; ++i) 852 MIRBuilder.buildInstr(MI.getOpcode(), {DstRegs[i]}, 853 {SrcsReg1[i], SrcsReg2[i]}); 854 855 // Gather the destination registers into the final destination. 856 unsigned DstReg = MI.getOperand(0).getReg(); 857 if(MRI.getType(DstReg).isVector()) 858 MIRBuilder.buildBuildVector(DstReg, DstRegs); 859 else 860 MIRBuilder.buildMerge(DstReg, DstRegs); 861 MI.eraseFromParent(); 862 return Legalized; 863 } 864 case TargetOpcode::G_SHL: 865 case TargetOpcode::G_LSHR: 866 case TargetOpcode::G_ASHR: { 867 if (TypeIdx != 1) 868 return UnableToLegalize; // TODO 869 Observer.changingInstr(MI); 870 narrowScalarSrc(MI, NarrowTy, 2); 871 Observer.changedInstr(MI); 872 return Legalized; 873 } 874 case TargetOpcode::G_CTLZ: 875 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 876 case TargetOpcode::G_CTTZ: 877 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 878 case TargetOpcode::G_CTPOP: 879 if (TypeIdx != 0) 880 return UnableToLegalize; // TODO 881 882 Observer.changingInstr(MI); 883 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT); 884 Observer.changedInstr(MI); 885 return Legalized; 886 } 887 } 888 889 void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy, 890 unsigned OpIdx, unsigned ExtOpcode) { 891 MachineOperand &MO = MI.getOperand(OpIdx); 892 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO.getReg()}); 893 MO.setReg(ExtB->getOperand(0).getReg()); 894 } 895 896 void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, 897 unsigned OpIdx) { 898 MachineOperand &MO = MI.getOperand(OpIdx); 899 auto ExtB = MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, {NarrowTy}, 900 {MO.getReg()}); 901 MO.setReg(ExtB->getOperand(0).getReg()); 902 } 903 904 void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy, 905 unsigned OpIdx, unsigned TruncOpcode) { 906 MachineOperand &MO = MI.getOperand(OpIdx); 907 unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); 908 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 909 MIRBuilder.buildInstr(TruncOpcode, {MO.getReg()}, {DstExt}); 910 MO.setReg(DstExt); 911 } 912 913 void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy, 914 unsigned OpIdx, unsigned ExtOpcode) { 915 MachineOperand &MO = MI.getOperand(OpIdx); 916 unsigned DstTrunc = MRI.createGenericVirtualRegister(NarrowTy); 917 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 918 MIRBuilder.buildInstr(ExtOpcode, {MO.getReg()}, {DstTrunc}); 919 MO.setReg(DstTrunc); 920 } 921 922 LegalizerHelper::LegalizeResult 923 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { 924 MIRBuilder.setInstr(MI); 925 926 switch (MI.getOpcode()) { 927 default: 928 return UnableToLegalize; 929 case TargetOpcode::G_MERGE_VALUES: { 930 if (TypeIdx != 1) 931 return UnableToLegalize; 932 933 unsigned DstReg = MI.getOperand(0).getReg(); 934 LLT DstTy = MRI.getType(DstReg); 935 if (!DstTy.isScalar()) 936 return UnableToLegalize; 937 938 unsigned NumSrc = MI.getNumOperands() - 1; 939 unsigned EltSize = DstTy.getSizeInBits() / NumSrc; 940 941 unsigned ResultReg = MRI.createGenericVirtualRegister(DstTy); 942 unsigned Offset = 0; 943 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I, 944 Offset += EltSize) { 945 assert(MRI.getType(MI.getOperand(I).getReg()) == LLT::scalar(EltSize)); 946 947 unsigned ShiftAmt = MRI.createGenericVirtualRegister(DstTy); 948 unsigned Shl = MRI.createGenericVirtualRegister(DstTy); 949 unsigned ZextInput = MRI.createGenericVirtualRegister(DstTy); 950 MIRBuilder.buildZExt(ZextInput, MI.getOperand(I).getReg()); 951 952 if (Offset != 0) { 953 unsigned NextResult = I + 1 == E ? DstReg : 954 MRI.createGenericVirtualRegister(DstTy); 955 956 MIRBuilder.buildConstant(ShiftAmt, Offset); 957 MIRBuilder.buildShl(Shl, ZextInput, ShiftAmt); 958 MIRBuilder.buildOr(NextResult, ResultReg, Shl); 959 ResultReg = NextResult; 960 } else { 961 ResultReg = ZextInput; 962 } 963 } 964 965 MI.eraseFromParent(); 966 return Legalized; 967 } 968 case TargetOpcode::G_UADDO: 969 case TargetOpcode::G_USUBO: { 970 if (TypeIdx == 1) 971 return UnableToLegalize; // TODO 972 auto LHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy}, 973 {MI.getOperand(2).getReg()}); 974 auto RHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy}, 975 {MI.getOperand(3).getReg()}); 976 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO 977 ? TargetOpcode::G_ADD 978 : TargetOpcode::G_SUB; 979 // Do the arithmetic in the larger type. 980 auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext}); 981 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg()); 982 APInt Mask = APInt::getAllOnesValue(OrigTy.getSizeInBits()); 983 auto AndOp = MIRBuilder.buildInstr( 984 TargetOpcode::G_AND, {WideTy}, 985 {NewOp, MIRBuilder.buildConstant(WideTy, Mask.getZExtValue())}); 986 // There is no overflow if the AndOp is the same as NewOp. 987 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1).getReg(), NewOp, 988 AndOp); 989 // Now trunc the NewOp to the original result. 990 MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), NewOp); 991 MI.eraseFromParent(); 992 return Legalized; 993 } 994 case TargetOpcode::G_CTTZ: 995 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 996 case TargetOpcode::G_CTLZ: 997 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 998 case TargetOpcode::G_CTPOP: { 999 if (TypeIdx == 0) { 1000 widenScalarDst(MI, WideTy, 0); 1001 return Legalized; 1002 } 1003 1004 // First ZEXT the input. 1005 auto MIBSrc = MIRBuilder.buildZExt(WideTy, MI.getOperand(1).getReg()); 1006 LLT CurTy = MRI.getType(MI.getOperand(0).getReg()); 1007 if (MI.getOpcode() == TargetOpcode::G_CTTZ) { 1008 // The count is the same in the larger type except if the original 1009 // value was zero. This can be handled by setting the bit just off 1010 // the top of the original type. 1011 auto TopBit = 1012 APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits()); 1013 MIBSrc = MIRBuilder.buildInstr( 1014 TargetOpcode::G_OR, {WideTy}, 1015 {MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit.getSExtValue())}); 1016 } 1017 // Perform the operation at the larger size. 1018 auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc}); 1019 // This is already the correct result for CTPOP and CTTZs 1020 if (MI.getOpcode() == TargetOpcode::G_CTLZ || 1021 MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) { 1022 // The correct result is NewOp - (Difference in widety and current ty). 1023 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits(); 1024 MIBNewOp = MIRBuilder.buildInstr( 1025 TargetOpcode::G_SUB, {WideTy}, 1026 {MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)}); 1027 } 1028 auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); 1029 // Make the original instruction a trunc now, and update its source. 1030 Observer.changingInstr(MI); 1031 MI.setDesc(TII.get(TargetOpcode::G_TRUNC)); 1032 MI.getOperand(1).setReg(MIBNewOp->getOperand(0).getReg()); 1033 Observer.changedInstr(MI); 1034 return Legalized; 1035 } 1036 case TargetOpcode::G_BSWAP: { 1037 Observer.changingInstr(MI); 1038 unsigned DstReg = MI.getOperand(0).getReg(); 1039 1040 unsigned ShrReg = MRI.createGenericVirtualRegister(WideTy); 1041 unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); 1042 unsigned ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy); 1043 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1044 1045 MI.getOperand(0).setReg(DstExt); 1046 1047 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 1048 1049 LLT Ty = MRI.getType(DstReg); 1050 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits(); 1051 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits); 1052 MIRBuilder.buildInstr(TargetOpcode::G_LSHR) 1053 .addDef(ShrReg) 1054 .addUse(DstExt) 1055 .addUse(ShiftAmtReg); 1056 1057 MIRBuilder.buildTrunc(DstReg, ShrReg); 1058 Observer.changedInstr(MI); 1059 return Legalized; 1060 } 1061 case TargetOpcode::G_ADD: 1062 case TargetOpcode::G_AND: 1063 case TargetOpcode::G_MUL: 1064 case TargetOpcode::G_OR: 1065 case TargetOpcode::G_XOR: 1066 case TargetOpcode::G_SUB: 1067 // Perform operation at larger width (any extension is fine here, high bits 1068 // don't affect the result) and then truncate the result back to the 1069 // original type. 1070 Observer.changingInstr(MI); 1071 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1072 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); 1073 widenScalarDst(MI, WideTy); 1074 Observer.changedInstr(MI); 1075 return Legalized; 1076 1077 case TargetOpcode::G_SHL: 1078 Observer.changingInstr(MI); 1079 1080 if (TypeIdx == 0) { 1081 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1082 widenScalarDst(MI, WideTy); 1083 } else { 1084 assert(TypeIdx == 1); 1085 // The "number of bits to shift" operand must preserve its value as an 1086 // unsigned integer: 1087 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 1088 } 1089 1090 Observer.changedInstr(MI); 1091 return Legalized; 1092 1093 case TargetOpcode::G_SDIV: 1094 case TargetOpcode::G_SREM: 1095 Observer.changingInstr(MI); 1096 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); 1097 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 1098 widenScalarDst(MI, WideTy); 1099 Observer.changedInstr(MI); 1100 return Legalized; 1101 1102 case TargetOpcode::G_ASHR: 1103 case TargetOpcode::G_LSHR: 1104 Observer.changingInstr(MI); 1105 1106 if (TypeIdx == 0) { 1107 unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ? 1108 TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT; 1109 1110 widenScalarSrc(MI, WideTy, 1, CvtOp); 1111 widenScalarDst(MI, WideTy); 1112 } else { 1113 assert(TypeIdx == 1); 1114 // The "number of bits to shift" operand must preserve its value as an 1115 // unsigned integer: 1116 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 1117 } 1118 1119 Observer.changedInstr(MI); 1120 return Legalized; 1121 case TargetOpcode::G_UDIV: 1122 case TargetOpcode::G_UREM: 1123 Observer.changingInstr(MI); 1124 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 1125 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 1126 widenScalarDst(MI, WideTy); 1127 Observer.changedInstr(MI); 1128 return Legalized; 1129 1130 case TargetOpcode::G_SELECT: 1131 Observer.changingInstr(MI); 1132 if (TypeIdx == 0) { 1133 // Perform operation at larger width (any extension is fine here, high 1134 // bits don't affect the result) and then truncate the result back to the 1135 // original type. 1136 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); 1137 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT); 1138 widenScalarDst(MI, WideTy); 1139 } else { 1140 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector(); 1141 // Explicit extension is required here since high bits affect the result. 1142 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false)); 1143 } 1144 Observer.changedInstr(MI); 1145 return Legalized; 1146 1147 case TargetOpcode::G_FPTOSI: 1148 case TargetOpcode::G_FPTOUI: 1149 if (TypeIdx != 0) 1150 return UnableToLegalize; 1151 Observer.changingInstr(MI); 1152 widenScalarDst(MI, WideTy); 1153 Observer.changedInstr(MI); 1154 return Legalized; 1155 1156 case TargetOpcode::G_SITOFP: 1157 if (TypeIdx != 1) 1158 return UnableToLegalize; 1159 Observer.changingInstr(MI); 1160 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); 1161 Observer.changedInstr(MI); 1162 return Legalized; 1163 1164 case TargetOpcode::G_UITOFP: 1165 if (TypeIdx != 1) 1166 return UnableToLegalize; 1167 Observer.changingInstr(MI); 1168 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 1169 Observer.changedInstr(MI); 1170 return Legalized; 1171 1172 case TargetOpcode::G_INSERT: 1173 if (TypeIdx != 0) 1174 return UnableToLegalize; 1175 Observer.changingInstr(MI); 1176 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1177 widenScalarDst(MI, WideTy); 1178 Observer.changedInstr(MI); 1179 return Legalized; 1180 1181 case TargetOpcode::G_LOAD: 1182 // For some types like i24, we might try to widen to i32. To properly handle 1183 // this we should be using a dedicated extending load, until then avoid 1184 // trying to legalize. 1185 if (alignTo(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(), 8) != 1186 WideTy.getSizeInBits()) 1187 return UnableToLegalize; 1188 LLVM_FALLTHROUGH; 1189 case TargetOpcode::G_SEXTLOAD: 1190 case TargetOpcode::G_ZEXTLOAD: 1191 Observer.changingInstr(MI); 1192 widenScalarDst(MI, WideTy); 1193 Observer.changedInstr(MI); 1194 return Legalized; 1195 1196 case TargetOpcode::G_STORE: { 1197 if (TypeIdx != 0) 1198 return UnableToLegalize; 1199 1200 LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 1201 if (!isPowerOf2_32(Ty.getSizeInBits())) 1202 return UnableToLegalize; 1203 1204 Observer.changingInstr(MI); 1205 1206 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ? 1207 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT; 1208 widenScalarSrc(MI, WideTy, 0, ExtType); 1209 1210 Observer.changedInstr(MI); 1211 return Legalized; 1212 } 1213 case TargetOpcode::G_CONSTANT: { 1214 MachineOperand &SrcMO = MI.getOperand(1); 1215 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 1216 const APInt &Val = SrcMO.getCImm()->getValue().sext(WideTy.getSizeInBits()); 1217 Observer.changingInstr(MI); 1218 SrcMO.setCImm(ConstantInt::get(Ctx, Val)); 1219 1220 widenScalarDst(MI, WideTy); 1221 Observer.changedInstr(MI); 1222 return Legalized; 1223 } 1224 case TargetOpcode::G_FCONSTANT: { 1225 MachineOperand &SrcMO = MI.getOperand(1); 1226 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 1227 APFloat Val = SrcMO.getFPImm()->getValueAPF(); 1228 bool LosesInfo; 1229 switch (WideTy.getSizeInBits()) { 1230 case 32: 1231 Val.convert(APFloat::IEEEsingle(), APFloat::rmTowardZero, &LosesInfo); 1232 break; 1233 case 64: 1234 Val.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &LosesInfo); 1235 break; 1236 default: 1237 llvm_unreachable("Unhandled fp widen type"); 1238 } 1239 Observer.changingInstr(MI); 1240 SrcMO.setFPImm(ConstantFP::get(Ctx, Val)); 1241 1242 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); 1243 Observer.changedInstr(MI); 1244 return Legalized; 1245 } 1246 case TargetOpcode::G_IMPLICIT_DEF: { 1247 Observer.changingInstr(MI); 1248 widenScalarDst(MI, WideTy); 1249 Observer.changedInstr(MI); 1250 return Legalized; 1251 } 1252 case TargetOpcode::G_BRCOND: 1253 Observer.changingInstr(MI); 1254 widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT); 1255 Observer.changedInstr(MI); 1256 return Legalized; 1257 1258 case TargetOpcode::G_FCMP: 1259 Observer.changingInstr(MI); 1260 if (TypeIdx == 0) 1261 widenScalarDst(MI, WideTy); 1262 else { 1263 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT); 1264 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT); 1265 } 1266 Observer.changedInstr(MI); 1267 return Legalized; 1268 1269 case TargetOpcode::G_ICMP: 1270 Observer.changingInstr(MI); 1271 if (TypeIdx == 0) 1272 widenScalarDst(MI, WideTy); 1273 else { 1274 unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>( 1275 MI.getOperand(1).getPredicate())) 1276 ? TargetOpcode::G_SEXT 1277 : TargetOpcode::G_ZEXT; 1278 widenScalarSrc(MI, WideTy, 2, ExtOpcode); 1279 widenScalarSrc(MI, WideTy, 3, ExtOpcode); 1280 } 1281 Observer.changedInstr(MI); 1282 return Legalized; 1283 1284 case TargetOpcode::G_GEP: 1285 assert(TypeIdx == 1 && "unable to legalize pointer of GEP"); 1286 Observer.changingInstr(MI); 1287 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 1288 Observer.changedInstr(MI); 1289 return Legalized; 1290 1291 case TargetOpcode::G_PHI: { 1292 assert(TypeIdx == 0 && "Expecting only Idx 0"); 1293 1294 Observer.changingInstr(MI); 1295 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) { 1296 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB(); 1297 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); 1298 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT); 1299 } 1300 1301 MachineBasicBlock &MBB = *MI.getParent(); 1302 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI()); 1303 widenScalarDst(MI, WideTy); 1304 Observer.changedInstr(MI); 1305 return Legalized; 1306 } 1307 case TargetOpcode::G_EXTRACT_VECTOR_ELT: { 1308 if (TypeIdx == 0) { 1309 unsigned VecReg = MI.getOperand(1).getReg(); 1310 LLT VecTy = MRI.getType(VecReg); 1311 Observer.changingInstr(MI); 1312 1313 widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(), 1314 WideTy.getSizeInBits()), 1315 1, TargetOpcode::G_SEXT); 1316 1317 widenScalarDst(MI, WideTy, 0); 1318 Observer.changedInstr(MI); 1319 return Legalized; 1320 } 1321 1322 if (TypeIdx != 2) 1323 return UnableToLegalize; 1324 Observer.changingInstr(MI); 1325 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 1326 Observer.changedInstr(MI); 1327 return Legalized; 1328 } 1329 case TargetOpcode::G_FADD: 1330 case TargetOpcode::G_FMUL: 1331 case TargetOpcode::G_FSUB: 1332 case TargetOpcode::G_FMA: 1333 case TargetOpcode::G_FNEG: 1334 case TargetOpcode::G_FABS: 1335 case TargetOpcode::G_FDIV: 1336 case TargetOpcode::G_FREM: 1337 case TargetOpcode::G_FCEIL: 1338 case TargetOpcode::G_FCOS: 1339 case TargetOpcode::G_FSIN: 1340 case TargetOpcode::G_FLOG10: 1341 case TargetOpcode::G_FLOG: 1342 case TargetOpcode::G_FLOG2: 1343 case TargetOpcode::G_FSQRT: 1344 case TargetOpcode::G_FEXP: 1345 assert(TypeIdx == 0); 1346 Observer.changingInstr(MI); 1347 1348 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) 1349 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT); 1350 1351 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); 1352 Observer.changedInstr(MI); 1353 return Legalized; 1354 } 1355 } 1356 1357 LegalizerHelper::LegalizeResult 1358 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { 1359 using namespace TargetOpcode; 1360 MIRBuilder.setInstr(MI); 1361 1362 switch(MI.getOpcode()) { 1363 default: 1364 return UnableToLegalize; 1365 case TargetOpcode::G_SREM: 1366 case TargetOpcode::G_UREM: { 1367 unsigned QuotReg = MRI.createGenericVirtualRegister(Ty); 1368 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV) 1369 .addDef(QuotReg) 1370 .addUse(MI.getOperand(1).getReg()) 1371 .addUse(MI.getOperand(2).getReg()); 1372 1373 unsigned ProdReg = MRI.createGenericVirtualRegister(Ty); 1374 MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg()); 1375 MIRBuilder.buildSub(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(), 1376 ProdReg); 1377 MI.eraseFromParent(); 1378 return Legalized; 1379 } 1380 case TargetOpcode::G_SMULO: 1381 case TargetOpcode::G_UMULO: { 1382 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the 1383 // result. 1384 unsigned Res = MI.getOperand(0).getReg(); 1385 unsigned Overflow = MI.getOperand(1).getReg(); 1386 unsigned LHS = MI.getOperand(2).getReg(); 1387 unsigned RHS = MI.getOperand(3).getReg(); 1388 1389 MIRBuilder.buildMul(Res, LHS, RHS); 1390 1391 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO 1392 ? TargetOpcode::G_SMULH 1393 : TargetOpcode::G_UMULH; 1394 1395 unsigned HiPart = MRI.createGenericVirtualRegister(Ty); 1396 MIRBuilder.buildInstr(Opcode) 1397 .addDef(HiPart) 1398 .addUse(LHS) 1399 .addUse(RHS); 1400 1401 unsigned Zero = MRI.createGenericVirtualRegister(Ty); 1402 MIRBuilder.buildConstant(Zero, 0); 1403 1404 // For *signed* multiply, overflow is detected by checking: 1405 // (hi != (lo >> bitwidth-1)) 1406 if (Opcode == TargetOpcode::G_SMULH) { 1407 unsigned Shifted = MRI.createGenericVirtualRegister(Ty); 1408 unsigned ShiftAmt = MRI.createGenericVirtualRegister(Ty); 1409 MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1); 1410 MIRBuilder.buildInstr(TargetOpcode::G_ASHR) 1411 .addDef(Shifted) 1412 .addUse(Res) 1413 .addUse(ShiftAmt); 1414 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted); 1415 } else { 1416 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero); 1417 } 1418 MI.eraseFromParent(); 1419 return Legalized; 1420 } 1421 case TargetOpcode::G_FNEG: { 1422 // TODO: Handle vector types once we are able to 1423 // represent them. 1424 if (Ty.isVector()) 1425 return UnableToLegalize; 1426 unsigned Res = MI.getOperand(0).getReg(); 1427 Type *ZeroTy; 1428 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 1429 switch (Ty.getSizeInBits()) { 1430 case 16: 1431 ZeroTy = Type::getHalfTy(Ctx); 1432 break; 1433 case 32: 1434 ZeroTy = Type::getFloatTy(Ctx); 1435 break; 1436 case 64: 1437 ZeroTy = Type::getDoubleTy(Ctx); 1438 break; 1439 case 128: 1440 ZeroTy = Type::getFP128Ty(Ctx); 1441 break; 1442 default: 1443 llvm_unreachable("unexpected floating-point type"); 1444 } 1445 ConstantFP &ZeroForNegation = 1446 *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy)); 1447 auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation); 1448 MIRBuilder.buildInstr(TargetOpcode::G_FSUB) 1449 .addDef(Res) 1450 .addUse(Zero->getOperand(0).getReg()) 1451 .addUse(MI.getOperand(1).getReg()); 1452 MI.eraseFromParent(); 1453 return Legalized; 1454 } 1455 case TargetOpcode::G_FSUB: { 1456 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)). 1457 // First, check if G_FNEG is marked as Lower. If so, we may 1458 // end up with an infinite loop as G_FSUB is used to legalize G_FNEG. 1459 if (LI.getAction({G_FNEG, {Ty}}).Action == Lower) 1460 return UnableToLegalize; 1461 unsigned Res = MI.getOperand(0).getReg(); 1462 unsigned LHS = MI.getOperand(1).getReg(); 1463 unsigned RHS = MI.getOperand(2).getReg(); 1464 unsigned Neg = MRI.createGenericVirtualRegister(Ty); 1465 MIRBuilder.buildInstr(TargetOpcode::G_FNEG).addDef(Neg).addUse(RHS); 1466 MIRBuilder.buildInstr(TargetOpcode::G_FADD) 1467 .addDef(Res) 1468 .addUse(LHS) 1469 .addUse(Neg); 1470 MI.eraseFromParent(); 1471 return Legalized; 1472 } 1473 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: { 1474 unsigned OldValRes = MI.getOperand(0).getReg(); 1475 unsigned SuccessRes = MI.getOperand(1).getReg(); 1476 unsigned Addr = MI.getOperand(2).getReg(); 1477 unsigned CmpVal = MI.getOperand(3).getReg(); 1478 unsigned NewVal = MI.getOperand(4).getReg(); 1479 MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal, 1480 **MI.memoperands_begin()); 1481 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal); 1482 MI.eraseFromParent(); 1483 return Legalized; 1484 } 1485 case TargetOpcode::G_LOAD: 1486 case TargetOpcode::G_SEXTLOAD: 1487 case TargetOpcode::G_ZEXTLOAD: { 1488 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT 1489 unsigned DstReg = MI.getOperand(0).getReg(); 1490 unsigned PtrReg = MI.getOperand(1).getReg(); 1491 LLT DstTy = MRI.getType(DstReg); 1492 auto &MMO = **MI.memoperands_begin(); 1493 1494 if (DstTy.getSizeInBits() == MMO.getSize() /* in bytes */ * 8) { 1495 // In the case of G_LOAD, this was a non-extending load already and we're 1496 // about to lower to the same instruction. 1497 if (MI.getOpcode() == TargetOpcode::G_LOAD) 1498 return UnableToLegalize; 1499 MIRBuilder.buildLoad(DstReg, PtrReg, MMO); 1500 MI.eraseFromParent(); 1501 return Legalized; 1502 } 1503 1504 if (DstTy.isScalar()) { 1505 unsigned TmpReg = MRI.createGenericVirtualRegister( 1506 LLT::scalar(MMO.getSize() /* in bytes */ * 8)); 1507 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); 1508 switch (MI.getOpcode()) { 1509 default: 1510 llvm_unreachable("Unexpected opcode"); 1511 case TargetOpcode::G_LOAD: 1512 MIRBuilder.buildAnyExt(DstReg, TmpReg); 1513 break; 1514 case TargetOpcode::G_SEXTLOAD: 1515 MIRBuilder.buildSExt(DstReg, TmpReg); 1516 break; 1517 case TargetOpcode::G_ZEXTLOAD: 1518 MIRBuilder.buildZExt(DstReg, TmpReg); 1519 break; 1520 } 1521 MI.eraseFromParent(); 1522 return Legalized; 1523 } 1524 1525 return UnableToLegalize; 1526 } 1527 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 1528 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 1529 case TargetOpcode::G_CTLZ: 1530 case TargetOpcode::G_CTTZ: 1531 case TargetOpcode::G_CTPOP: 1532 return lowerBitCount(MI, TypeIdx, Ty); 1533 case G_UADDE: { 1534 unsigned Res = MI.getOperand(0).getReg(); 1535 unsigned CarryOut = MI.getOperand(1).getReg(); 1536 unsigned LHS = MI.getOperand(2).getReg(); 1537 unsigned RHS = MI.getOperand(3).getReg(); 1538 unsigned CarryIn = MI.getOperand(4).getReg(); 1539 1540 unsigned TmpRes = MRI.createGenericVirtualRegister(Ty); 1541 unsigned ZExtCarryIn = MRI.createGenericVirtualRegister(Ty); 1542 1543 MIRBuilder.buildAdd(TmpRes, LHS, RHS); 1544 MIRBuilder.buildZExt(ZExtCarryIn, CarryIn); 1545 MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn); 1546 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS); 1547 1548 MI.eraseFromParent(); 1549 return Legalized; 1550 } 1551 case G_USUBO: { 1552 unsigned Res = MI.getOperand(0).getReg(); 1553 unsigned BorrowOut = MI.getOperand(1).getReg(); 1554 unsigned LHS = MI.getOperand(2).getReg(); 1555 unsigned RHS = MI.getOperand(3).getReg(); 1556 1557 MIRBuilder.buildSub(Res, LHS, RHS); 1558 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS); 1559 1560 MI.eraseFromParent(); 1561 return Legalized; 1562 } 1563 case G_USUBE: { 1564 unsigned Res = MI.getOperand(0).getReg(); 1565 unsigned BorrowOut = MI.getOperand(1).getReg(); 1566 unsigned LHS = MI.getOperand(2).getReg(); 1567 unsigned RHS = MI.getOperand(3).getReg(); 1568 unsigned BorrowIn = MI.getOperand(4).getReg(); 1569 1570 unsigned TmpRes = MRI.createGenericVirtualRegister(Ty); 1571 unsigned ZExtBorrowIn = MRI.createGenericVirtualRegister(Ty); 1572 unsigned LHS_EQ_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1)); 1573 unsigned LHS_ULT_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1)); 1574 1575 MIRBuilder.buildSub(TmpRes, LHS, RHS); 1576 MIRBuilder.buildZExt(ZExtBorrowIn, BorrowIn); 1577 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn); 1578 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LHS_EQ_RHS, LHS, RHS); 1579 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, LHS_ULT_RHS, LHS, RHS); 1580 MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS); 1581 1582 MI.eraseFromParent(); 1583 return Legalized; 1584 } 1585 } 1586 } 1587 1588 LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef( 1589 MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { 1590 SmallVector<unsigned, 2> DstRegs; 1591 1592 unsigned NarrowSize = NarrowTy.getSizeInBits(); 1593 unsigned DstReg = MI.getOperand(0).getReg(); 1594 unsigned Size = MRI.getType(DstReg).getSizeInBits(); 1595 int NumParts = Size / NarrowSize; 1596 // FIXME: Don't know how to handle the situation where the small vectors 1597 // aren't all the same size yet. 1598 if (Size % NarrowSize != 0) 1599 return UnableToLegalize; 1600 1601 for (int i = 0; i < NumParts; ++i) { 1602 unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy); 1603 MIRBuilder.buildUndef(TmpReg); 1604 DstRegs.push_back(TmpReg); 1605 } 1606 1607 if (NarrowTy.isVector()) 1608 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 1609 else 1610 MIRBuilder.buildBuildVector(DstReg, DstRegs); 1611 1612 MI.eraseFromParent(); 1613 return Legalized; 1614 } 1615 1616 LegalizerHelper::LegalizeResult 1617 LegalizerHelper::fewerElementsVectorBasic(MachineInstr &MI, unsigned TypeIdx, 1618 LLT NarrowTy) { 1619 const unsigned Opc = MI.getOpcode(); 1620 const unsigned NumOps = MI.getNumOperands() - 1; 1621 const unsigned NarrowSize = NarrowTy.getSizeInBits(); 1622 const unsigned DstReg = MI.getOperand(0).getReg(); 1623 const unsigned Flags = MI.getFlags(); 1624 const LLT DstTy = MRI.getType(DstReg); 1625 const unsigned Size = DstTy.getSizeInBits(); 1626 const int NumParts = Size / NarrowSize; 1627 const LLT EltTy = DstTy.getElementType(); 1628 const unsigned EltSize = EltTy.getSizeInBits(); 1629 const unsigned BitsForNumParts = NarrowSize * NumParts; 1630 1631 // Check if we have any leftovers. If we do, then only handle the case where 1632 // the leftover is one element. 1633 if (BitsForNumParts != Size && BitsForNumParts + EltSize != Size) 1634 return UnableToLegalize; 1635 1636 if (BitsForNumParts != Size) { 1637 unsigned AccumDstReg = MRI.createGenericVirtualRegister(DstTy); 1638 MIRBuilder.buildUndef(AccumDstReg); 1639 1640 // Handle the pieces which evenly divide into the requested type with 1641 // extract/op/insert sequence. 1642 for (unsigned Offset = 0; Offset < BitsForNumParts; Offset += NarrowSize) { 1643 SmallVector<SrcOp, 4> SrcOps; 1644 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { 1645 unsigned PartOpReg = MRI.createGenericVirtualRegister(NarrowTy); 1646 MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(), Offset); 1647 SrcOps.push_back(PartOpReg); 1648 } 1649 1650 unsigned PartDstReg = MRI.createGenericVirtualRegister(NarrowTy); 1651 MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags); 1652 1653 unsigned PartInsertReg = MRI.createGenericVirtualRegister(DstTy); 1654 MIRBuilder.buildInsert(PartInsertReg, AccumDstReg, PartDstReg, Offset); 1655 AccumDstReg = PartInsertReg; 1656 Offset += NarrowSize; 1657 } 1658 1659 // Handle the remaining element sized leftover piece. 1660 SmallVector<SrcOp, 4> SrcOps; 1661 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { 1662 unsigned PartOpReg = MRI.createGenericVirtualRegister(EltTy); 1663 MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(), 1664 BitsForNumParts); 1665 SrcOps.push_back(PartOpReg); 1666 } 1667 1668 unsigned PartDstReg = MRI.createGenericVirtualRegister(EltTy); 1669 MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags); 1670 MIRBuilder.buildInsert(DstReg, AccumDstReg, PartDstReg, BitsForNumParts); 1671 MI.eraseFromParent(); 1672 1673 return Legalized; 1674 } 1675 1676 SmallVector<unsigned, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs; 1677 1678 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src0Regs); 1679 1680 if (NumOps >= 2) 1681 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src1Regs); 1682 1683 if (NumOps >= 3) 1684 extractParts(MI.getOperand(3).getReg(), NarrowTy, NumParts, Src2Regs); 1685 1686 for (int i = 0; i < NumParts; ++i) { 1687 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 1688 1689 if (NumOps == 1) 1690 MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i]}, Flags); 1691 else if (NumOps == 2) { 1692 MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i], Src1Regs[i]}, Flags); 1693 } else if (NumOps == 3) { 1694 MIRBuilder.buildInstr(Opc, {DstReg}, 1695 {Src0Regs[i], Src1Regs[i], Src2Regs[i]}, Flags); 1696 } 1697 1698 DstRegs.push_back(DstReg); 1699 } 1700 1701 if (NarrowTy.isVector()) 1702 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 1703 else 1704 MIRBuilder.buildBuildVector(DstReg, DstRegs); 1705 1706 MI.eraseFromParent(); 1707 return Legalized; 1708 } 1709 1710 LegalizerHelper::LegalizeResult 1711 LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx, 1712 LLT NarrowTy) { 1713 if (TypeIdx != 0) 1714 return UnableToLegalize; 1715 1716 unsigned DstReg = MI.getOperand(0).getReg(); 1717 unsigned SrcReg = MI.getOperand(1).getReg(); 1718 LLT DstTy = MRI.getType(DstReg); 1719 LLT SrcTy = MRI.getType(SrcReg); 1720 1721 LLT NarrowTy0 = NarrowTy; 1722 LLT NarrowTy1; 1723 unsigned NumParts; 1724 1725 if (NarrowTy.isScalar()) { 1726 NumParts = DstTy.getNumElements(); 1727 NarrowTy1 = SrcTy.getElementType(); 1728 } else { 1729 // Uneven breakdown not handled. 1730 NumParts = DstTy.getNumElements() / NarrowTy.getNumElements(); 1731 if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements()) 1732 return UnableToLegalize; 1733 1734 NarrowTy1 = LLT::vector(NumParts, SrcTy.getElementType().getSizeInBits()); 1735 } 1736 1737 SmallVector<unsigned, 4> SrcRegs, DstRegs; 1738 extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs); 1739 1740 for (unsigned I = 0; I < NumParts; ++I) { 1741 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy0); 1742 MachineInstr *NewInst = MIRBuilder.buildInstr(MI.getOpcode()) 1743 .addDef(DstReg) 1744 .addUse(SrcRegs[I]); 1745 1746 NewInst->setFlags(MI.getFlags()); 1747 DstRegs.push_back(DstReg); 1748 } 1749 1750 if (NarrowTy.isVector()) 1751 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 1752 else 1753 MIRBuilder.buildBuildVector(DstReg, DstRegs); 1754 1755 MI.eraseFromParent(); 1756 return Legalized; 1757 } 1758 1759 LegalizerHelper::LegalizeResult 1760 LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx, 1761 LLT NarrowTy) { 1762 unsigned DstReg = MI.getOperand(0).getReg(); 1763 unsigned Src0Reg = MI.getOperand(2).getReg(); 1764 LLT DstTy = MRI.getType(DstReg); 1765 LLT SrcTy = MRI.getType(Src0Reg); 1766 1767 unsigned NumParts; 1768 LLT NarrowTy0, NarrowTy1; 1769 1770 if (TypeIdx == 0) { 1771 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; 1772 unsigned OldElts = DstTy.getNumElements(); 1773 1774 NarrowTy0 = NarrowTy; 1775 NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements(); 1776 NarrowTy1 = NarrowTy.isVector() ? 1777 LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) : 1778 SrcTy.getElementType(); 1779 1780 } else { 1781 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; 1782 unsigned OldElts = SrcTy.getNumElements(); 1783 1784 NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : 1785 NarrowTy.getNumElements(); 1786 NarrowTy0 = LLT::vector(NarrowTy.getNumElements(), 1787 DstTy.getScalarSizeInBits()); 1788 NarrowTy1 = NarrowTy; 1789 } 1790 1791 // FIXME: Don't know how to handle the situation where the small vectors 1792 // aren't all the same size yet. 1793 if (NarrowTy1.isVector() && 1794 NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements()) 1795 return UnableToLegalize; 1796 1797 CmpInst::Predicate Pred 1798 = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); 1799 1800 SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs; 1801 extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs); 1802 extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs); 1803 1804 for (unsigned I = 0; I < NumParts; ++I) { 1805 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy0); 1806 DstRegs.push_back(DstReg); 1807 1808 if (MI.getOpcode() == TargetOpcode::G_ICMP) 1809 MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]); 1810 else { 1811 MachineInstr *NewCmp 1812 = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]); 1813 NewCmp->setFlags(MI.getFlags()); 1814 } 1815 } 1816 1817 if (NarrowTy1.isVector()) 1818 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 1819 else 1820 MIRBuilder.buildBuildVector(DstReg, DstRegs); 1821 1822 MI.eraseFromParent(); 1823 return Legalized; 1824 } 1825 1826 LegalizerHelper::LegalizeResult 1827 LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx, 1828 LLT NarrowTy) { 1829 unsigned DstReg = MI.getOperand(0).getReg(); 1830 unsigned CondReg = MI.getOperand(1).getReg(); 1831 1832 unsigned NumParts = 0; 1833 LLT NarrowTy0, NarrowTy1; 1834 1835 LLT DstTy = MRI.getType(DstReg); 1836 LLT CondTy = MRI.getType(CondReg); 1837 unsigned Size = DstTy.getSizeInBits(); 1838 1839 assert(TypeIdx == 0 || CondTy.isVector()); 1840 1841 if (TypeIdx == 0) { 1842 NarrowTy0 = NarrowTy; 1843 NarrowTy1 = CondTy; 1844 1845 unsigned NarrowSize = NarrowTy0.getSizeInBits(); 1846 // FIXME: Don't know how to handle the situation where the small vectors 1847 // aren't all the same size yet. 1848 if (Size % NarrowSize != 0) 1849 return UnableToLegalize; 1850 1851 NumParts = Size / NarrowSize; 1852 1853 // Need to break down the condition type 1854 if (CondTy.isVector()) { 1855 if (CondTy.getNumElements() == NumParts) 1856 NarrowTy1 = CondTy.getElementType(); 1857 else 1858 NarrowTy1 = LLT::vector(CondTy.getNumElements() / NumParts, 1859 CondTy.getScalarSizeInBits()); 1860 } 1861 } else { 1862 NumParts = CondTy.getNumElements(); 1863 if (NarrowTy.isVector()) { 1864 // TODO: Handle uneven breakdown. 1865 if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements()) 1866 return UnableToLegalize; 1867 1868 return UnableToLegalize; 1869 } else { 1870 NarrowTy0 = DstTy.getElementType(); 1871 NarrowTy1 = NarrowTy; 1872 } 1873 } 1874 1875 SmallVector<unsigned, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs; 1876 if (CondTy.isVector()) 1877 extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs); 1878 1879 extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs); 1880 extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs); 1881 1882 for (unsigned i = 0; i < NumParts; ++i) { 1883 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy0); 1884 MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg, 1885 Src1Regs[i], Src2Regs[i]); 1886 DstRegs.push_back(DstReg); 1887 } 1888 1889 if (NarrowTy0.isVector()) 1890 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 1891 else 1892 MIRBuilder.buildBuildVector(DstReg, DstRegs); 1893 1894 MI.eraseFromParent(); 1895 return Legalized; 1896 } 1897 1898 /// Try to break down \p OrigTy into \p NarrowTy sized pieces. 1899 /// 1900 /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy, 1901 /// with any leftover piece as type \p LeftoverTy 1902 /// 1903 /// Returns -1 if the breakdown is not satisfiable. 1904 static int getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) { 1905 assert(!LeftoverTy.isValid() && "this is an out argument"); 1906 1907 unsigned Size = OrigTy.getSizeInBits(); 1908 unsigned NarrowSize = NarrowTy.getSizeInBits(); 1909 unsigned NumParts = Size / NarrowSize; 1910 unsigned LeftoverSize = Size - NumParts * NarrowSize; 1911 assert(Size > NarrowSize); 1912 1913 if (LeftoverSize == 0) 1914 return NumParts; 1915 1916 if (NarrowTy.isVector()) { 1917 unsigned EltSize = OrigTy.getScalarSizeInBits(); 1918 if (LeftoverSize % EltSize != 0) 1919 return -1; 1920 LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize); 1921 } else { 1922 LeftoverTy = LLT::scalar(LeftoverSize); 1923 } 1924 1925 return NumParts; 1926 } 1927 1928 LegalizerHelper::LegalizeResult 1929 LegalizerHelper::fewerElementsVectorLoadStore(MachineInstr &MI, unsigned TypeIdx, 1930 LLT NarrowTy) { 1931 // FIXME: Don't know how to handle secondary types yet. 1932 if (TypeIdx != 0) 1933 return UnableToLegalize; 1934 1935 MachineMemOperand *MMO = *MI.memoperands_begin(); 1936 1937 // This implementation doesn't work for atomics. Give up instead of doing 1938 // something invalid. 1939 if (MMO->getOrdering() != AtomicOrdering::NotAtomic || 1940 MMO->getFailureOrdering() != AtomicOrdering::NotAtomic) 1941 return UnableToLegalize; 1942 1943 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD; 1944 unsigned ValReg = MI.getOperand(0).getReg(); 1945 unsigned AddrReg = MI.getOperand(1).getReg(); 1946 LLT ValTy = MRI.getType(ValReg); 1947 1948 int NumParts = -1; 1949 LLT LeftoverTy; 1950 SmallVector<unsigned, 8> NarrowRegs, NarrowLeftoverRegs; 1951 if (IsLoad) { 1952 NumParts = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy); 1953 } else { 1954 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs, 1955 NarrowLeftoverRegs)) 1956 NumParts = NarrowRegs.size(); 1957 } 1958 1959 if (NumParts == -1) 1960 return UnableToLegalize; 1961 1962 const LLT OffsetTy = LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits()); 1963 1964 unsigned TotalSize = ValTy.getSizeInBits(); 1965 1966 // Split the load/store into PartTy sized pieces starting at Offset. If this 1967 // is a load, return the new registers in ValRegs. For a store, each elements 1968 // of ValRegs should be PartTy. Returns the next offset that needs to be 1969 // handled. 1970 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<unsigned> &ValRegs, 1971 unsigned Offset) -> unsigned { 1972 MachineFunction &MF = MIRBuilder.getMF(); 1973 unsigned PartSize = PartTy.getSizeInBits(); 1974 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize; 1975 Offset += PartSize, ++Idx) { 1976 unsigned ByteSize = PartSize / 8; 1977 unsigned ByteOffset = Offset / 8; 1978 unsigned NewAddrReg = 0; 1979 1980 MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, ByteOffset); 1981 1982 MachineMemOperand *NewMMO = 1983 MF.getMachineMemOperand(MMO, ByteOffset, ByteSize); 1984 1985 if (IsLoad) { 1986 unsigned Dst = MRI.createGenericVirtualRegister(PartTy); 1987 ValRegs.push_back(Dst); 1988 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO); 1989 } else { 1990 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO); 1991 } 1992 } 1993 1994 return Offset; 1995 }; 1996 1997 unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0); 1998 1999 // Handle the rest of the register if this isn't an even type breakdown. 2000 if (LeftoverTy.isValid()) 2001 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset); 2002 2003 if (IsLoad) { 2004 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs, 2005 LeftoverTy, NarrowLeftoverRegs); 2006 } 2007 2008 MI.eraseFromParent(); 2009 return Legalized; 2010 } 2011 2012 LegalizerHelper::LegalizeResult 2013 LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, 2014 LLT NarrowTy) { 2015 using namespace TargetOpcode; 2016 2017 MIRBuilder.setInstr(MI); 2018 switch (MI.getOpcode()) { 2019 case G_IMPLICIT_DEF: 2020 return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy); 2021 case G_AND: 2022 case G_OR: 2023 case G_XOR: 2024 case G_ADD: 2025 case G_SUB: 2026 case G_MUL: 2027 case G_SMULH: 2028 case G_UMULH: 2029 case G_FADD: 2030 case G_FMUL: 2031 case G_FSUB: 2032 case G_FNEG: 2033 case G_FABS: 2034 case G_FDIV: 2035 case G_FREM: 2036 case G_FMA: 2037 case G_FPOW: 2038 case G_FEXP: 2039 case G_FEXP2: 2040 case G_FLOG: 2041 case G_FLOG2: 2042 case G_FLOG10: 2043 case G_FCEIL: 2044 case G_INTRINSIC_ROUND: 2045 case G_INTRINSIC_TRUNC: 2046 case G_FCOS: 2047 case G_FSIN: 2048 case G_FSQRT: 2049 case G_BSWAP: 2050 return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy); 2051 case G_ZEXT: 2052 case G_SEXT: 2053 case G_ANYEXT: 2054 case G_FPEXT: 2055 case G_FPTRUNC: 2056 case G_SITOFP: 2057 case G_UITOFP: 2058 case G_FPTOSI: 2059 case G_FPTOUI: 2060 return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy); 2061 case G_ICMP: 2062 case G_FCMP: 2063 return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy); 2064 case G_SELECT: 2065 return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy); 2066 case G_LOAD: 2067 case G_STORE: 2068 return fewerElementsVectorLoadStore(MI, TypeIdx, NarrowTy); 2069 default: 2070 return UnableToLegalize; 2071 } 2072 } 2073 2074 LegalizerHelper::LegalizeResult 2075 LegalizerHelper::narrowScalarMul(MachineInstr &MI, unsigned TypeIdx, LLT NewTy) { 2076 unsigned DstReg = MI.getOperand(0).getReg(); 2077 unsigned Src0 = MI.getOperand(1).getReg(); 2078 unsigned Src1 = MI.getOperand(2).getReg(); 2079 LLT Ty = MRI.getType(DstReg); 2080 if (Ty.isVector()) 2081 return UnableToLegalize; 2082 2083 unsigned Size = Ty.getSizeInBits(); 2084 unsigned NewSize = Size / 2; 2085 if (Size != 2 * NewSize) 2086 return UnableToLegalize; 2087 2088 LLT HalfTy = LLT::scalar(NewSize); 2089 // TODO: if HalfTy != NewTy, handle the breakdown all at once? 2090 2091 unsigned ShiftAmt = MRI.createGenericVirtualRegister(Ty); 2092 unsigned Lo = MRI.createGenericVirtualRegister(HalfTy); 2093 unsigned Hi = MRI.createGenericVirtualRegister(HalfTy); 2094 unsigned ExtLo = MRI.createGenericVirtualRegister(Ty); 2095 unsigned ExtHi = MRI.createGenericVirtualRegister(Ty); 2096 unsigned ShiftedHi = MRI.createGenericVirtualRegister(Ty); 2097 2098 SmallVector<unsigned, 2> Src0Parts; 2099 SmallVector<unsigned, 2> Src1Parts; 2100 2101 extractParts(Src0, HalfTy, 2, Src0Parts); 2102 extractParts(Src1, HalfTy, 2, Src1Parts); 2103 2104 MIRBuilder.buildMul(Lo, Src0Parts[0], Src1Parts[0]); 2105 2106 // TODO: Use smulh or umulh depending on what the target has. 2107 MIRBuilder.buildUMulH(Hi, Src0Parts[1], Src1Parts[1]); 2108 2109 MIRBuilder.buildConstant(ShiftAmt, NewSize); 2110 MIRBuilder.buildAnyExt(ExtHi, Hi); 2111 MIRBuilder.buildShl(ShiftedHi, ExtHi, ShiftAmt); 2112 2113 MIRBuilder.buildZExt(ExtLo, Lo); 2114 MIRBuilder.buildOr(DstReg, ExtLo, ShiftedHi); 2115 MI.eraseFromParent(); 2116 return Legalized; 2117 } 2118 2119 LegalizerHelper::LegalizeResult 2120 LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { 2121 unsigned Opc = MI.getOpcode(); 2122 auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); 2123 auto isSupported = [this](const LegalityQuery &Q) { 2124 auto QAction = LI.getAction(Q).Action; 2125 return QAction == Legal || QAction == Libcall || QAction == Custom; 2126 }; 2127 switch (Opc) { 2128 default: 2129 return UnableToLegalize; 2130 case TargetOpcode::G_CTLZ_ZERO_UNDEF: { 2131 // This trivially expands to CTLZ. 2132 Observer.changingInstr(MI); 2133 MI.setDesc(TII.get(TargetOpcode::G_CTLZ)); 2134 Observer.changedInstr(MI); 2135 return Legalized; 2136 } 2137 case TargetOpcode::G_CTLZ: { 2138 unsigned SrcReg = MI.getOperand(1).getReg(); 2139 unsigned Len = Ty.getSizeInBits(); 2140 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty, Ty}})) { 2141 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero. 2142 auto MIBCtlzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF, 2143 {Ty}, {SrcReg}); 2144 auto MIBZero = MIRBuilder.buildConstant(Ty, 0); 2145 auto MIBLen = MIRBuilder.buildConstant(Ty, Len); 2146 auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), 2147 SrcReg, MIBZero); 2148 MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen, 2149 MIBCtlzZU); 2150 MI.eraseFromParent(); 2151 return Legalized; 2152 } 2153 // for now, we do this: 2154 // NewLen = NextPowerOf2(Len); 2155 // x = x | (x >> 1); 2156 // x = x | (x >> 2); 2157 // ... 2158 // x = x | (x >>16); 2159 // x = x | (x >>32); // for 64-bit input 2160 // Upto NewLen/2 2161 // return Len - popcount(x); 2162 // 2163 // Ref: "Hacker's Delight" by Henry Warren 2164 unsigned Op = SrcReg; 2165 unsigned NewLen = PowerOf2Ceil(Len); 2166 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) { 2167 auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i); 2168 auto MIBOp = MIRBuilder.buildInstr( 2169 TargetOpcode::G_OR, {Ty}, 2170 {Op, MIRBuilder.buildInstr(TargetOpcode::G_LSHR, {Ty}, 2171 {Op, MIBShiftAmt})}); 2172 Op = MIBOp->getOperand(0).getReg(); 2173 } 2174 auto MIBPop = MIRBuilder.buildInstr(TargetOpcode::G_CTPOP, {Ty}, {Op}); 2175 MIRBuilder.buildInstr(TargetOpcode::G_SUB, {MI.getOperand(0).getReg()}, 2176 {MIRBuilder.buildConstant(Ty, Len), MIBPop}); 2177 MI.eraseFromParent(); 2178 return Legalized; 2179 } 2180 case TargetOpcode::G_CTTZ_ZERO_UNDEF: { 2181 // This trivially expands to CTTZ. 2182 Observer.changingInstr(MI); 2183 MI.setDesc(TII.get(TargetOpcode::G_CTTZ)); 2184 Observer.changedInstr(MI); 2185 return Legalized; 2186 } 2187 case TargetOpcode::G_CTTZ: { 2188 unsigned SrcReg = MI.getOperand(1).getReg(); 2189 unsigned Len = Ty.getSizeInBits(); 2190 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty, Ty}})) { 2191 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with 2192 // zero. 2193 auto MIBCttzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF, 2194 {Ty}, {SrcReg}); 2195 auto MIBZero = MIRBuilder.buildConstant(Ty, 0); 2196 auto MIBLen = MIRBuilder.buildConstant(Ty, Len); 2197 auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), 2198 SrcReg, MIBZero); 2199 MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen, 2200 MIBCttzZU); 2201 MI.eraseFromParent(); 2202 return Legalized; 2203 } 2204 // for now, we use: { return popcount(~x & (x - 1)); } 2205 // unless the target has ctlz but not ctpop, in which case we use: 2206 // { return 32 - nlz(~x & (x-1)); } 2207 // Ref: "Hacker's Delight" by Henry Warren 2208 auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1); 2209 auto MIBNot = 2210 MIRBuilder.buildInstr(TargetOpcode::G_XOR, {Ty}, {SrcReg, MIBCstNeg1}); 2211 auto MIBTmp = MIRBuilder.buildInstr( 2212 TargetOpcode::G_AND, {Ty}, 2213 {MIBNot, MIRBuilder.buildInstr(TargetOpcode::G_ADD, {Ty}, 2214 {SrcReg, MIBCstNeg1})}); 2215 if (!isSupported({TargetOpcode::G_CTPOP, {Ty, Ty}}) && 2216 isSupported({TargetOpcode::G_CTLZ, {Ty, Ty}})) { 2217 auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len); 2218 MIRBuilder.buildInstr( 2219 TargetOpcode::G_SUB, {MI.getOperand(0).getReg()}, 2220 {MIBCstLen, 2221 MIRBuilder.buildInstr(TargetOpcode::G_CTLZ, {Ty}, {MIBTmp})}); 2222 MI.eraseFromParent(); 2223 return Legalized; 2224 } 2225 MI.setDesc(TII.get(TargetOpcode::G_CTPOP)); 2226 MI.getOperand(1).setReg(MIBTmp->getOperand(0).getReg()); 2227 return Legalized; 2228 } 2229 } 2230 } 2231