1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file This file implements the LegalizerHelper class to legalize 10 /// individual instructions and the LegalizeMachineIR wrapper pass for the 11 /// primary legalization. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" 16 #include "llvm/CodeGen/GlobalISel/CallLowering.h" 17 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" 18 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" 19 #include "llvm/CodeGen/MachineRegisterInfo.h" 20 #include "llvm/CodeGen/TargetInstrInfo.h" 21 #include "llvm/CodeGen/TargetLowering.h" 22 #include "llvm/CodeGen/TargetSubtargetInfo.h" 23 #include "llvm/Support/Debug.h" 24 #include "llvm/Support/MathExtras.h" 25 #include "llvm/Support/raw_ostream.h" 26 27 #define DEBUG_TYPE "legalizer" 28 29 using namespace llvm; 30 using namespace LegalizeActions; 31 32 /// Try to break down \p OrigTy into \p NarrowTy sized pieces. 33 /// 34 /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy, 35 /// with any leftover piece as type \p LeftoverTy 36 /// 37 /// Returns -1 if the breakdown is not satisfiable. 38 static int getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) { 39 assert(!LeftoverTy.isValid() && "this is an out argument"); 40 41 unsigned Size = OrigTy.getSizeInBits(); 42 unsigned NarrowSize = NarrowTy.getSizeInBits(); 43 unsigned NumParts = Size / NarrowSize; 44 unsigned LeftoverSize = Size - NumParts * NarrowSize; 45 assert(Size > NarrowSize); 46 47 if (LeftoverSize == 0) 48 return NumParts; 49 50 if (NarrowTy.isVector()) { 51 unsigned EltSize = OrigTy.getScalarSizeInBits(); 52 if (LeftoverSize % EltSize != 0) 53 return -1; 54 LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize); 55 } else { 56 LeftoverTy = LLT::scalar(LeftoverSize); 57 } 58 59 return NumParts; 60 } 61 62 LegalizerHelper::LegalizerHelper(MachineFunction &MF, 63 GISelChangeObserver &Observer, 64 MachineIRBuilder &Builder) 65 : MIRBuilder(Builder), MRI(MF.getRegInfo()), 66 LI(*MF.getSubtarget().getLegalizerInfo()), Observer(Observer) { 67 MIRBuilder.setMF(MF); 68 MIRBuilder.setChangeObserver(Observer); 69 } 70 71 LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI, 72 GISelChangeObserver &Observer, 73 MachineIRBuilder &B) 74 : MIRBuilder(B), MRI(MF.getRegInfo()), LI(LI), Observer(Observer) { 75 MIRBuilder.setMF(MF); 76 MIRBuilder.setChangeObserver(Observer); 77 } 78 LegalizerHelper::LegalizeResult 79 LegalizerHelper::legalizeInstrStep(MachineInstr &MI) { 80 LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs())); 81 82 auto Step = LI.getAction(MI, MRI); 83 switch (Step.Action) { 84 case Legal: 85 LLVM_DEBUG(dbgs() << ".. Already legal\n"); 86 return AlreadyLegal; 87 case Libcall: 88 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n"); 89 return libcall(MI); 90 case NarrowScalar: 91 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n"); 92 return narrowScalar(MI, Step.TypeIdx, Step.NewType); 93 case WidenScalar: 94 LLVM_DEBUG(dbgs() << ".. Widen scalar\n"); 95 return widenScalar(MI, Step.TypeIdx, Step.NewType); 96 case Lower: 97 LLVM_DEBUG(dbgs() << ".. Lower\n"); 98 return lower(MI, Step.TypeIdx, Step.NewType); 99 case FewerElements: 100 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n"); 101 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType); 102 case MoreElements: 103 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n"); 104 return moreElementsVector(MI, Step.TypeIdx, Step.NewType); 105 case Custom: 106 LLVM_DEBUG(dbgs() << ".. Custom legalization\n"); 107 return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized 108 : UnableToLegalize; 109 default: 110 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n"); 111 return UnableToLegalize; 112 } 113 } 114 115 void LegalizerHelper::extractParts(unsigned Reg, LLT Ty, int NumParts, 116 SmallVectorImpl<unsigned> &VRegs) { 117 for (int i = 0; i < NumParts; ++i) 118 VRegs.push_back(MRI.createGenericVirtualRegister(Ty)); 119 MIRBuilder.buildUnmerge(VRegs, Reg); 120 } 121 122 bool LegalizerHelper::extractParts(unsigned Reg, LLT RegTy, 123 LLT MainTy, LLT &LeftoverTy, 124 SmallVectorImpl<unsigned> &VRegs, 125 SmallVectorImpl<unsigned> &LeftoverRegs) { 126 assert(!LeftoverTy.isValid() && "this is an out argument"); 127 128 unsigned RegSize = RegTy.getSizeInBits(); 129 unsigned MainSize = MainTy.getSizeInBits(); 130 unsigned NumParts = RegSize / MainSize; 131 unsigned LeftoverSize = RegSize - NumParts * MainSize; 132 133 // Use an unmerge when possible. 134 if (LeftoverSize == 0) { 135 for (unsigned I = 0; I < NumParts; ++I) 136 VRegs.push_back(MRI.createGenericVirtualRegister(MainTy)); 137 MIRBuilder.buildUnmerge(VRegs, Reg); 138 return true; 139 } 140 141 if (MainTy.isVector()) { 142 unsigned EltSize = MainTy.getScalarSizeInBits(); 143 if (LeftoverSize % EltSize != 0) 144 return false; 145 LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize); 146 } else { 147 LeftoverTy = LLT::scalar(LeftoverSize); 148 } 149 150 // For irregular sizes, extract the individual parts. 151 for (unsigned I = 0; I != NumParts; ++I) { 152 unsigned NewReg = MRI.createGenericVirtualRegister(MainTy); 153 VRegs.push_back(NewReg); 154 MIRBuilder.buildExtract(NewReg, Reg, MainSize * I); 155 } 156 157 for (unsigned Offset = MainSize * NumParts; Offset < RegSize; 158 Offset += LeftoverSize) { 159 unsigned NewReg = MRI.createGenericVirtualRegister(LeftoverTy); 160 LeftoverRegs.push_back(NewReg); 161 MIRBuilder.buildExtract(NewReg, Reg, Offset); 162 } 163 164 return true; 165 } 166 167 void LegalizerHelper::insertParts(unsigned DstReg, 168 LLT ResultTy, LLT PartTy, 169 ArrayRef<unsigned> PartRegs, 170 LLT LeftoverTy, 171 ArrayRef<unsigned> LeftoverRegs) { 172 if (!LeftoverTy.isValid()) { 173 assert(LeftoverRegs.empty()); 174 175 if (!ResultTy.isVector()) { 176 MIRBuilder.buildMerge(DstReg, PartRegs); 177 return; 178 } 179 180 if (PartTy.isVector()) 181 MIRBuilder.buildConcatVectors(DstReg, PartRegs); 182 else 183 MIRBuilder.buildBuildVector(DstReg, PartRegs); 184 return; 185 } 186 187 unsigned PartSize = PartTy.getSizeInBits(); 188 unsigned LeftoverPartSize = LeftoverTy.getSizeInBits(); 189 190 unsigned CurResultReg = MRI.createGenericVirtualRegister(ResultTy); 191 MIRBuilder.buildUndef(CurResultReg); 192 193 unsigned Offset = 0; 194 for (unsigned PartReg : PartRegs) { 195 unsigned NewResultReg = MRI.createGenericVirtualRegister(ResultTy); 196 MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset); 197 CurResultReg = NewResultReg; 198 Offset += PartSize; 199 } 200 201 for (unsigned I = 0, E = LeftoverRegs.size(); I != E; ++I) { 202 // Use the original output register for the final insert to avoid a copy. 203 unsigned NewResultReg = (I + 1 == E) ? 204 DstReg : MRI.createGenericVirtualRegister(ResultTy); 205 206 MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset); 207 CurResultReg = NewResultReg; 208 Offset += LeftoverPartSize; 209 } 210 } 211 212 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { 213 switch (Opcode) { 214 case TargetOpcode::G_SDIV: 215 assert((Size == 32 || Size == 64) && "Unsupported size"); 216 return Size == 64 ? RTLIB::SDIV_I64 : RTLIB::SDIV_I32; 217 case TargetOpcode::G_UDIV: 218 assert((Size == 32 || Size == 64) && "Unsupported size"); 219 return Size == 64 ? RTLIB::UDIV_I64 : RTLIB::UDIV_I32; 220 case TargetOpcode::G_SREM: 221 assert((Size == 32 || Size == 64) && "Unsupported size"); 222 return Size == 64 ? RTLIB::SREM_I64 : RTLIB::SREM_I32; 223 case TargetOpcode::G_UREM: 224 assert((Size == 32 || Size == 64) && "Unsupported size"); 225 return Size == 64 ? RTLIB::UREM_I64 : RTLIB::UREM_I32; 226 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 227 assert(Size == 32 && "Unsupported size"); 228 return RTLIB::CTLZ_I32; 229 case TargetOpcode::G_FADD: 230 assert((Size == 32 || Size == 64) && "Unsupported size"); 231 return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32; 232 case TargetOpcode::G_FSUB: 233 assert((Size == 32 || Size == 64) && "Unsupported size"); 234 return Size == 64 ? RTLIB::SUB_F64 : RTLIB::SUB_F32; 235 case TargetOpcode::G_FMUL: 236 assert((Size == 32 || Size == 64) && "Unsupported size"); 237 return Size == 64 ? RTLIB::MUL_F64 : RTLIB::MUL_F32; 238 case TargetOpcode::G_FDIV: 239 assert((Size == 32 || Size == 64) && "Unsupported size"); 240 return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32; 241 case TargetOpcode::G_FEXP: 242 assert((Size == 32 || Size == 64) && "Unsupported size"); 243 return Size == 64 ? RTLIB::EXP_F64 : RTLIB::EXP_F32; 244 case TargetOpcode::G_FREM: 245 return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32; 246 case TargetOpcode::G_FPOW: 247 return Size == 64 ? RTLIB::POW_F64 : RTLIB::POW_F32; 248 case TargetOpcode::G_FMA: 249 assert((Size == 32 || Size == 64) && "Unsupported size"); 250 return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32; 251 case TargetOpcode::G_FSIN: 252 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); 253 return Size == 128 ? RTLIB::SIN_F128 254 : Size == 64 ? RTLIB::SIN_F64 : RTLIB::SIN_F32; 255 case TargetOpcode::G_FCOS: 256 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); 257 return Size == 128 ? RTLIB::COS_F128 258 : Size == 64 ? RTLIB::COS_F64 : RTLIB::COS_F32; 259 case TargetOpcode::G_FLOG10: 260 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); 261 return Size == 128 ? RTLIB::LOG10_F128 262 : Size == 64 ? RTLIB::LOG10_F64 : RTLIB::LOG10_F32; 263 case TargetOpcode::G_FLOG: 264 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); 265 return Size == 128 ? RTLIB::LOG_F128 266 : Size == 64 ? RTLIB::LOG_F64 : RTLIB::LOG_F32; 267 case TargetOpcode::G_FLOG2: 268 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); 269 return Size == 128 ? RTLIB::LOG2_F128 270 : Size == 64 ? RTLIB::LOG2_F64 : RTLIB::LOG2_F32; 271 } 272 llvm_unreachable("Unknown libcall function"); 273 } 274 275 LegalizerHelper::LegalizeResult 276 llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, 277 const CallLowering::ArgInfo &Result, 278 ArrayRef<CallLowering::ArgInfo> Args) { 279 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); 280 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); 281 const char *Name = TLI.getLibcallName(Libcall); 282 283 MIRBuilder.getMF().getFrameInfo().setHasCalls(true); 284 if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall), 285 MachineOperand::CreateES(Name), Result, Args)) 286 return LegalizerHelper::UnableToLegalize; 287 288 return LegalizerHelper::Legalized; 289 } 290 291 // Useful for libcalls where all operands have the same type. 292 static LegalizerHelper::LegalizeResult 293 simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, 294 Type *OpType) { 295 auto Libcall = getRTLibDesc(MI.getOpcode(), Size); 296 297 SmallVector<CallLowering::ArgInfo, 3> Args; 298 for (unsigned i = 1; i < MI.getNumOperands(); i++) 299 Args.push_back({MI.getOperand(i).getReg(), OpType}); 300 return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType}, 301 Args); 302 } 303 304 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, 305 Type *FromType) { 306 auto ToMVT = MVT::getVT(ToType); 307 auto FromMVT = MVT::getVT(FromType); 308 309 switch (Opcode) { 310 case TargetOpcode::G_FPEXT: 311 return RTLIB::getFPEXT(FromMVT, ToMVT); 312 case TargetOpcode::G_FPTRUNC: 313 return RTLIB::getFPROUND(FromMVT, ToMVT); 314 case TargetOpcode::G_FPTOSI: 315 return RTLIB::getFPTOSINT(FromMVT, ToMVT); 316 case TargetOpcode::G_FPTOUI: 317 return RTLIB::getFPTOUINT(FromMVT, ToMVT); 318 case TargetOpcode::G_SITOFP: 319 return RTLIB::getSINTTOFP(FromMVT, ToMVT); 320 case TargetOpcode::G_UITOFP: 321 return RTLIB::getUINTTOFP(FromMVT, ToMVT); 322 } 323 llvm_unreachable("Unsupported libcall function"); 324 } 325 326 static LegalizerHelper::LegalizeResult 327 conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, 328 Type *FromType) { 329 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType); 330 return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType}, 331 {{MI.getOperand(1).getReg(), FromType}}); 332 } 333 334 LegalizerHelper::LegalizeResult 335 LegalizerHelper::libcall(MachineInstr &MI) { 336 LLT LLTy = MRI.getType(MI.getOperand(0).getReg()); 337 unsigned Size = LLTy.getSizeInBits(); 338 auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); 339 340 MIRBuilder.setInstr(MI); 341 342 switch (MI.getOpcode()) { 343 default: 344 return UnableToLegalize; 345 case TargetOpcode::G_SDIV: 346 case TargetOpcode::G_UDIV: 347 case TargetOpcode::G_SREM: 348 case TargetOpcode::G_UREM: 349 case TargetOpcode::G_CTLZ_ZERO_UNDEF: { 350 Type *HLTy = IntegerType::get(Ctx, Size); 351 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); 352 if (Status != Legalized) 353 return Status; 354 break; 355 } 356 case TargetOpcode::G_FADD: 357 case TargetOpcode::G_FSUB: 358 case TargetOpcode::G_FMUL: 359 case TargetOpcode::G_FDIV: 360 case TargetOpcode::G_FMA: 361 case TargetOpcode::G_FPOW: 362 case TargetOpcode::G_FREM: 363 case TargetOpcode::G_FCOS: 364 case TargetOpcode::G_FSIN: 365 case TargetOpcode::G_FLOG10: 366 case TargetOpcode::G_FLOG: 367 case TargetOpcode::G_FLOG2: 368 case TargetOpcode::G_FEXP: { 369 if (Size > 64) { 370 LLVM_DEBUG(dbgs() << "Size " << Size << " too large to legalize.\n"); 371 return UnableToLegalize; 372 } 373 Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx); 374 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); 375 if (Status != Legalized) 376 return Status; 377 break; 378 } 379 case TargetOpcode::G_FPEXT: { 380 // FIXME: Support other floating point types (half, fp128 etc) 381 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 382 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 383 if (ToSize != 64 || FromSize != 32) 384 return UnableToLegalize; 385 LegalizeResult Status = conversionLibcall( 386 MI, MIRBuilder, Type::getDoubleTy(Ctx), Type::getFloatTy(Ctx)); 387 if (Status != Legalized) 388 return Status; 389 break; 390 } 391 case TargetOpcode::G_FPTRUNC: { 392 // FIXME: Support other floating point types (half, fp128 etc) 393 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 394 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 395 if (ToSize != 32 || FromSize != 64) 396 return UnableToLegalize; 397 LegalizeResult Status = conversionLibcall( 398 MI, MIRBuilder, Type::getFloatTy(Ctx), Type::getDoubleTy(Ctx)); 399 if (Status != Legalized) 400 return Status; 401 break; 402 } 403 case TargetOpcode::G_FPTOSI: 404 case TargetOpcode::G_FPTOUI: { 405 // FIXME: Support other types 406 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 407 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 408 if (ToSize != 32 || (FromSize != 32 && FromSize != 64)) 409 return UnableToLegalize; 410 LegalizeResult Status = conversionLibcall( 411 MI, MIRBuilder, Type::getInt32Ty(Ctx), 412 FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx)); 413 if (Status != Legalized) 414 return Status; 415 break; 416 } 417 case TargetOpcode::G_SITOFP: 418 case TargetOpcode::G_UITOFP: { 419 // FIXME: Support other types 420 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 421 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 422 if (FromSize != 32 || (ToSize != 32 && ToSize != 64)) 423 return UnableToLegalize; 424 LegalizeResult Status = conversionLibcall( 425 MI, MIRBuilder, 426 ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx), 427 Type::getInt32Ty(Ctx)); 428 if (Status != Legalized) 429 return Status; 430 break; 431 } 432 } 433 434 MI.eraseFromParent(); 435 return Legalized; 436 } 437 438 LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, 439 unsigned TypeIdx, 440 LLT NarrowTy) { 441 MIRBuilder.setInstr(MI); 442 443 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 444 uint64_t NarrowSize = NarrowTy.getSizeInBits(); 445 446 switch (MI.getOpcode()) { 447 default: 448 return UnableToLegalize; 449 case TargetOpcode::G_IMPLICIT_DEF: { 450 // FIXME: add support for when SizeOp0 isn't an exact multiple of 451 // NarrowSize. 452 if (SizeOp0 % NarrowSize != 0) 453 return UnableToLegalize; 454 int NumParts = SizeOp0 / NarrowSize; 455 456 SmallVector<unsigned, 2> DstRegs; 457 for (int i = 0; i < NumParts; ++i) 458 DstRegs.push_back( 459 MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg()); 460 461 unsigned DstReg = MI.getOperand(0).getReg(); 462 if(MRI.getType(DstReg).isVector()) 463 MIRBuilder.buildBuildVector(DstReg, DstRegs); 464 else 465 MIRBuilder.buildMerge(DstReg, DstRegs); 466 MI.eraseFromParent(); 467 return Legalized; 468 } 469 case TargetOpcode::G_ADD: { 470 // FIXME: add support for when SizeOp0 isn't an exact multiple of 471 // NarrowSize. 472 if (SizeOp0 % NarrowSize != 0) 473 return UnableToLegalize; 474 // Expand in terms of carry-setting/consuming G_ADDE instructions. 475 int NumParts = SizeOp0 / NarrowTy.getSizeInBits(); 476 477 SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs; 478 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); 479 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); 480 481 unsigned CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1)); 482 MIRBuilder.buildConstant(CarryIn, 0); 483 484 for (int i = 0; i < NumParts; ++i) { 485 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 486 unsigned CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); 487 488 MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i], 489 Src2Regs[i], CarryIn); 490 491 DstRegs.push_back(DstReg); 492 CarryIn = CarryOut; 493 } 494 unsigned DstReg = MI.getOperand(0).getReg(); 495 if(MRI.getType(DstReg).isVector()) 496 MIRBuilder.buildBuildVector(DstReg, DstRegs); 497 else 498 MIRBuilder.buildMerge(DstReg, DstRegs); 499 MI.eraseFromParent(); 500 return Legalized; 501 } 502 case TargetOpcode::G_SUB: { 503 // FIXME: add support for when SizeOp0 isn't an exact multiple of 504 // NarrowSize. 505 if (SizeOp0 % NarrowSize != 0) 506 return UnableToLegalize; 507 508 int NumParts = SizeOp0 / NarrowTy.getSizeInBits(); 509 510 SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs; 511 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); 512 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); 513 514 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 515 unsigned BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); 516 MIRBuilder.buildInstr(TargetOpcode::G_USUBO, {DstReg, BorrowOut}, 517 {Src1Regs[0], Src2Regs[0]}); 518 DstRegs.push_back(DstReg); 519 unsigned BorrowIn = BorrowOut; 520 for (int i = 1; i < NumParts; ++i) { 521 DstReg = MRI.createGenericVirtualRegister(NarrowTy); 522 BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); 523 524 MIRBuilder.buildInstr(TargetOpcode::G_USUBE, {DstReg, BorrowOut}, 525 {Src1Regs[i], Src2Regs[i], BorrowIn}); 526 527 DstRegs.push_back(DstReg); 528 BorrowIn = BorrowOut; 529 } 530 MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs); 531 MI.eraseFromParent(); 532 return Legalized; 533 } 534 case TargetOpcode::G_MUL: 535 return narrowScalarMul(MI, TypeIdx, NarrowTy); 536 case TargetOpcode::G_EXTRACT: { 537 if (TypeIdx != 1) 538 return UnableToLegalize; 539 540 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 541 // FIXME: add support for when SizeOp1 isn't an exact multiple of 542 // NarrowSize. 543 if (SizeOp1 % NarrowSize != 0) 544 return UnableToLegalize; 545 int NumParts = SizeOp1 / NarrowSize; 546 547 SmallVector<unsigned, 2> SrcRegs, DstRegs; 548 SmallVector<uint64_t, 2> Indexes; 549 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); 550 551 unsigned OpReg = MI.getOperand(0).getReg(); 552 uint64_t OpStart = MI.getOperand(2).getImm(); 553 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); 554 for (int i = 0; i < NumParts; ++i) { 555 unsigned SrcStart = i * NarrowSize; 556 557 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) { 558 // No part of the extract uses this subregister, ignore it. 559 continue; 560 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) { 561 // The entire subregister is extracted, forward the value. 562 DstRegs.push_back(SrcRegs[i]); 563 continue; 564 } 565 566 // OpSegStart is where this destination segment would start in OpReg if it 567 // extended infinitely in both directions. 568 int64_t ExtractOffset; 569 uint64_t SegSize; 570 if (OpStart < SrcStart) { 571 ExtractOffset = 0; 572 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart); 573 } else { 574 ExtractOffset = OpStart - SrcStart; 575 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize); 576 } 577 578 unsigned SegReg = SrcRegs[i]; 579 if (ExtractOffset != 0 || SegSize != NarrowSize) { 580 // A genuine extract is needed. 581 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); 582 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset); 583 } 584 585 DstRegs.push_back(SegReg); 586 } 587 588 unsigned DstReg = MI.getOperand(0).getReg(); 589 if(MRI.getType(DstReg).isVector()) 590 MIRBuilder.buildBuildVector(DstReg, DstRegs); 591 else 592 MIRBuilder.buildMerge(DstReg, DstRegs); 593 MI.eraseFromParent(); 594 return Legalized; 595 } 596 case TargetOpcode::G_INSERT: { 597 // FIXME: Don't know how to handle secondary types yet. 598 if (TypeIdx != 0) 599 return UnableToLegalize; 600 601 // FIXME: add support for when SizeOp0 isn't an exact multiple of 602 // NarrowSize. 603 if (SizeOp0 % NarrowSize != 0) 604 return UnableToLegalize; 605 606 int NumParts = SizeOp0 / NarrowSize; 607 608 SmallVector<unsigned, 2> SrcRegs, DstRegs; 609 SmallVector<uint64_t, 2> Indexes; 610 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); 611 612 unsigned OpReg = MI.getOperand(2).getReg(); 613 uint64_t OpStart = MI.getOperand(3).getImm(); 614 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); 615 for (int i = 0; i < NumParts; ++i) { 616 unsigned DstStart = i * NarrowSize; 617 618 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) { 619 // No part of the insert affects this subregister, forward the original. 620 DstRegs.push_back(SrcRegs[i]); 621 continue; 622 } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) { 623 // The entire subregister is defined by this insert, forward the new 624 // value. 625 DstRegs.push_back(OpReg); 626 continue; 627 } 628 629 // OpSegStart is where this destination segment would start in OpReg if it 630 // extended infinitely in both directions. 631 int64_t ExtractOffset, InsertOffset; 632 uint64_t SegSize; 633 if (OpStart < DstStart) { 634 InsertOffset = 0; 635 ExtractOffset = DstStart - OpStart; 636 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart); 637 } else { 638 InsertOffset = OpStart - DstStart; 639 ExtractOffset = 0; 640 SegSize = 641 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart); 642 } 643 644 unsigned SegReg = OpReg; 645 if (ExtractOffset != 0 || SegSize != OpSize) { 646 // A genuine extract is needed. 647 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); 648 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset); 649 } 650 651 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 652 MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset); 653 DstRegs.push_back(DstReg); 654 } 655 656 assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered"); 657 unsigned DstReg = MI.getOperand(0).getReg(); 658 if(MRI.getType(DstReg).isVector()) 659 MIRBuilder.buildBuildVector(DstReg, DstRegs); 660 else 661 MIRBuilder.buildMerge(DstReg, DstRegs); 662 MI.eraseFromParent(); 663 return Legalized; 664 } 665 case TargetOpcode::G_LOAD: { 666 const auto &MMO = **MI.memoperands_begin(); 667 unsigned DstReg = MI.getOperand(0).getReg(); 668 LLT DstTy = MRI.getType(DstReg); 669 if (DstTy.isVector()) 670 return UnableToLegalize; 671 672 if (8 * MMO.getSize() != DstTy.getSizeInBits()) { 673 unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy); 674 auto &MMO = **MI.memoperands_begin(); 675 MIRBuilder.buildLoad(TmpReg, MI.getOperand(1).getReg(), MMO); 676 MIRBuilder.buildAnyExt(DstReg, TmpReg); 677 MI.eraseFromParent(); 678 return Legalized; 679 } 680 681 return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy); 682 } 683 case TargetOpcode::G_ZEXTLOAD: 684 case TargetOpcode::G_SEXTLOAD: { 685 bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD; 686 unsigned DstReg = MI.getOperand(0).getReg(); 687 unsigned PtrReg = MI.getOperand(1).getReg(); 688 689 unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy); 690 auto &MMO = **MI.memoperands_begin(); 691 if (MMO.getSize() * 8 == NarrowSize) { 692 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); 693 } else { 694 unsigned ExtLoad = ZExt ? TargetOpcode::G_ZEXTLOAD 695 : TargetOpcode::G_SEXTLOAD; 696 MIRBuilder.buildInstr(ExtLoad) 697 .addDef(TmpReg) 698 .addUse(PtrReg) 699 .addMemOperand(&MMO); 700 } 701 702 if (ZExt) 703 MIRBuilder.buildZExt(DstReg, TmpReg); 704 else 705 MIRBuilder.buildSExt(DstReg, TmpReg); 706 707 MI.eraseFromParent(); 708 return Legalized; 709 } 710 case TargetOpcode::G_STORE: { 711 const auto &MMO = **MI.memoperands_begin(); 712 713 unsigned SrcReg = MI.getOperand(0).getReg(); 714 LLT SrcTy = MRI.getType(SrcReg); 715 if (SrcTy.isVector()) 716 return UnableToLegalize; 717 718 int NumParts = SizeOp0 / NarrowSize; 719 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits(); 720 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize; 721 if (SrcTy.isVector() && LeftoverBits != 0) 722 return UnableToLegalize; 723 724 if (8 * MMO.getSize() != SrcTy.getSizeInBits()) { 725 unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy); 726 auto &MMO = **MI.memoperands_begin(); 727 MIRBuilder.buildTrunc(TmpReg, SrcReg); 728 MIRBuilder.buildStore(TmpReg, MI.getOperand(1).getReg(), MMO); 729 MI.eraseFromParent(); 730 return Legalized; 731 } 732 733 return reduceLoadStoreWidth(MI, 0, NarrowTy); 734 } 735 case TargetOpcode::G_CONSTANT: { 736 // FIXME: add support for when SizeOp0 isn't an exact multiple of 737 // NarrowSize. 738 if (SizeOp0 % NarrowSize != 0) 739 return UnableToLegalize; 740 int NumParts = SizeOp0 / NarrowSize; 741 const APInt &Cst = MI.getOperand(1).getCImm()->getValue(); 742 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 743 744 SmallVector<unsigned, 2> DstRegs; 745 for (int i = 0; i < NumParts; ++i) { 746 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 747 ConstantInt *CI = 748 ConstantInt::get(Ctx, Cst.lshr(NarrowSize * i).trunc(NarrowSize)); 749 MIRBuilder.buildConstant(DstReg, *CI); 750 DstRegs.push_back(DstReg); 751 } 752 unsigned DstReg = MI.getOperand(0).getReg(); 753 if(MRI.getType(DstReg).isVector()) 754 MIRBuilder.buildBuildVector(DstReg, DstRegs); 755 else 756 MIRBuilder.buildMerge(DstReg, DstRegs); 757 MI.eraseFromParent(); 758 return Legalized; 759 } 760 case TargetOpcode::G_SELECT: 761 return narrowScalarSelect(MI, TypeIdx, NarrowTy); 762 case TargetOpcode::G_AND: 763 case TargetOpcode::G_OR: 764 case TargetOpcode::G_XOR: { 765 // Legalize bitwise operation: 766 // A = BinOp<Ty> B, C 767 // into: 768 // B1, ..., BN = G_UNMERGE_VALUES B 769 // C1, ..., CN = G_UNMERGE_VALUES C 770 // A1 = BinOp<Ty/N> B1, C2 771 // ... 772 // AN = BinOp<Ty/N> BN, CN 773 // A = G_MERGE_VALUES A1, ..., AN 774 775 // FIXME: add support for when SizeOp0 isn't an exact multiple of 776 // NarrowSize. 777 if (SizeOp0 % NarrowSize != 0) 778 return UnableToLegalize; 779 int NumParts = SizeOp0 / NarrowSize; 780 781 // List the registers where the destination will be scattered. 782 SmallVector<unsigned, 2> DstRegs; 783 // List the registers where the first argument will be split. 784 SmallVector<unsigned, 2> SrcsReg1; 785 // List the registers where the second argument will be split. 786 SmallVector<unsigned, 2> SrcsReg2; 787 // Create all the temporary registers. 788 for (int i = 0; i < NumParts; ++i) { 789 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 790 unsigned SrcReg1 = MRI.createGenericVirtualRegister(NarrowTy); 791 unsigned SrcReg2 = MRI.createGenericVirtualRegister(NarrowTy); 792 793 DstRegs.push_back(DstReg); 794 SrcsReg1.push_back(SrcReg1); 795 SrcsReg2.push_back(SrcReg2); 796 } 797 // Explode the big arguments into smaller chunks. 798 MIRBuilder.buildUnmerge(SrcsReg1, MI.getOperand(1).getReg()); 799 MIRBuilder.buildUnmerge(SrcsReg2, MI.getOperand(2).getReg()); 800 801 // Do the operation on each small part. 802 for (int i = 0; i < NumParts; ++i) 803 MIRBuilder.buildInstr(MI.getOpcode(), {DstRegs[i]}, 804 {SrcsReg1[i], SrcsReg2[i]}); 805 806 // Gather the destination registers into the final destination. 807 unsigned DstReg = MI.getOperand(0).getReg(); 808 if(MRI.getType(DstReg).isVector()) 809 MIRBuilder.buildBuildVector(DstReg, DstRegs); 810 else 811 MIRBuilder.buildMerge(DstReg, DstRegs); 812 MI.eraseFromParent(); 813 return Legalized; 814 } 815 case TargetOpcode::G_SHL: 816 case TargetOpcode::G_LSHR: 817 case TargetOpcode::G_ASHR: 818 return narrowScalarShift(MI, TypeIdx, NarrowTy); 819 case TargetOpcode::G_CTLZ: 820 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 821 case TargetOpcode::G_CTTZ: 822 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 823 case TargetOpcode::G_CTPOP: 824 if (TypeIdx != 0) 825 return UnableToLegalize; // TODO 826 827 Observer.changingInstr(MI); 828 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT); 829 Observer.changedInstr(MI); 830 return Legalized; 831 case TargetOpcode::G_INTTOPTR: 832 if (TypeIdx != 1) 833 return UnableToLegalize; 834 835 Observer.changingInstr(MI); 836 narrowScalarSrc(MI, NarrowTy, 1); 837 Observer.changedInstr(MI); 838 return Legalized; 839 case TargetOpcode::G_PTRTOINT: 840 if (TypeIdx != 0) 841 return UnableToLegalize; 842 843 Observer.changingInstr(MI); 844 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT); 845 Observer.changedInstr(MI); 846 return Legalized; 847 } 848 } 849 850 void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy, 851 unsigned OpIdx, unsigned ExtOpcode) { 852 MachineOperand &MO = MI.getOperand(OpIdx); 853 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO.getReg()}); 854 MO.setReg(ExtB->getOperand(0).getReg()); 855 } 856 857 void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, 858 unsigned OpIdx) { 859 MachineOperand &MO = MI.getOperand(OpIdx); 860 auto ExtB = MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, {NarrowTy}, 861 {MO.getReg()}); 862 MO.setReg(ExtB->getOperand(0).getReg()); 863 } 864 865 void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy, 866 unsigned OpIdx, unsigned TruncOpcode) { 867 MachineOperand &MO = MI.getOperand(OpIdx); 868 unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); 869 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 870 MIRBuilder.buildInstr(TruncOpcode, {MO.getReg()}, {DstExt}); 871 MO.setReg(DstExt); 872 } 873 874 void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy, 875 unsigned OpIdx, unsigned ExtOpcode) { 876 MachineOperand &MO = MI.getOperand(OpIdx); 877 unsigned DstTrunc = MRI.createGenericVirtualRegister(NarrowTy); 878 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 879 MIRBuilder.buildInstr(ExtOpcode, {MO.getReg()}, {DstTrunc}); 880 MO.setReg(DstTrunc); 881 } 882 883 void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy, 884 unsigned OpIdx) { 885 MachineOperand &MO = MI.getOperand(OpIdx); 886 unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); 887 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 888 MIRBuilder.buildExtract(MO.getReg(), DstExt, 0); 889 MO.setReg(DstExt); 890 } 891 892 LegalizerHelper::LegalizeResult 893 LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, 894 LLT WideTy) { 895 if (TypeIdx != 1) 896 return UnableToLegalize; 897 898 unsigned DstReg = MI.getOperand(0).getReg(); 899 LLT DstTy = MRI.getType(DstReg); 900 if (!DstTy.isScalar()) 901 return UnableToLegalize; 902 903 unsigned NumOps = MI.getNumOperands(); 904 unsigned NumSrc = MI.getNumOperands() - 1; 905 unsigned PartSize = DstTy.getSizeInBits() / NumSrc; 906 907 unsigned Src1 = MI.getOperand(1).getReg(); 908 unsigned ResultReg = MIRBuilder.buildZExt(DstTy, Src1)->getOperand(0).getReg(); 909 910 for (unsigned I = 2; I != NumOps; ++I) { 911 const unsigned Offset = (I - 1) * PartSize; 912 913 unsigned SrcReg = MI.getOperand(I).getReg(); 914 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize)); 915 916 auto ZextInput = MIRBuilder.buildZExt(DstTy, SrcReg); 917 918 unsigned NextResult = I + 1 == NumOps ? DstReg : 919 MRI.createGenericVirtualRegister(DstTy); 920 921 auto ShiftAmt = MIRBuilder.buildConstant(DstTy, Offset); 922 auto Shl = MIRBuilder.buildShl(DstTy, ZextInput, ShiftAmt); 923 MIRBuilder.buildOr(NextResult, ResultReg, Shl); 924 ResultReg = NextResult; 925 } 926 927 MI.eraseFromParent(); 928 return Legalized; 929 } 930 931 LegalizerHelper::LegalizeResult 932 LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx, 933 LLT WideTy) { 934 if (TypeIdx != 0) 935 return UnableToLegalize; 936 937 unsigned NumDst = MI.getNumOperands() - 1; 938 unsigned SrcReg = MI.getOperand(NumDst).getReg(); 939 LLT SrcTy = MRI.getType(SrcReg); 940 if (!SrcTy.isScalar()) 941 return UnableToLegalize; 942 943 unsigned Dst0Reg = MI.getOperand(0).getReg(); 944 LLT DstTy = MRI.getType(Dst0Reg); 945 if (!DstTy.isScalar()) 946 return UnableToLegalize; 947 948 unsigned NewSrcSize = NumDst * WideTy.getSizeInBits(); 949 LLT NewSrcTy = LLT::scalar(NewSrcSize); 950 unsigned SizeDiff = WideTy.getSizeInBits() - DstTy.getSizeInBits(); 951 952 auto WideSrc = MIRBuilder.buildZExt(NewSrcTy, SrcReg); 953 954 for (unsigned I = 1; I != NumDst; ++I) { 955 auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, SizeDiff * I); 956 auto Shl = MIRBuilder.buildShl(NewSrcTy, WideSrc, ShiftAmt); 957 WideSrc = MIRBuilder.buildOr(NewSrcTy, WideSrc, Shl); 958 } 959 960 Observer.changingInstr(MI); 961 962 MI.getOperand(NumDst).setReg(WideSrc->getOperand(0).getReg()); 963 for (unsigned I = 0; I != NumDst; ++I) 964 widenScalarDst(MI, WideTy, I); 965 966 Observer.changedInstr(MI); 967 968 return Legalized; 969 } 970 971 LegalizerHelper::LegalizeResult 972 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { 973 MIRBuilder.setInstr(MI); 974 975 switch (MI.getOpcode()) { 976 default: 977 return UnableToLegalize; 978 case TargetOpcode::G_EXTRACT: { 979 if (TypeIdx != 1) 980 return UnableToLegalize; 981 982 unsigned SrcReg = MI.getOperand(1).getReg(); 983 LLT SrcTy = MRI.getType(SrcReg); 984 if (!SrcTy.isVector()) 985 return UnableToLegalize; 986 987 unsigned DstReg = MI.getOperand(0).getReg(); 988 LLT DstTy = MRI.getType(DstReg); 989 if (DstTy != SrcTy.getElementType()) 990 return UnableToLegalize; 991 992 unsigned Offset = MI.getOperand(2).getImm(); 993 if (Offset % SrcTy.getScalarSizeInBits() != 0) 994 return UnableToLegalize; 995 996 Observer.changingInstr(MI); 997 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 998 999 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) * 1000 Offset); 1001 widenScalarDst(MI, WideTy.getScalarType(), 0); 1002 Observer.changedInstr(MI); 1003 1004 return Legalized; 1005 } 1006 case TargetOpcode::G_MERGE_VALUES: 1007 return widenScalarMergeValues(MI, TypeIdx, WideTy); 1008 case TargetOpcode::G_UNMERGE_VALUES: 1009 return widenScalarUnmergeValues(MI, TypeIdx, WideTy); 1010 case TargetOpcode::G_UADDO: 1011 case TargetOpcode::G_USUBO: { 1012 if (TypeIdx == 1) 1013 return UnableToLegalize; // TODO 1014 auto LHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy}, 1015 {MI.getOperand(2).getReg()}); 1016 auto RHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy}, 1017 {MI.getOperand(3).getReg()}); 1018 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO 1019 ? TargetOpcode::G_ADD 1020 : TargetOpcode::G_SUB; 1021 // Do the arithmetic in the larger type. 1022 auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext}); 1023 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg()); 1024 APInt Mask = APInt::getAllOnesValue(OrigTy.getSizeInBits()); 1025 auto AndOp = MIRBuilder.buildInstr( 1026 TargetOpcode::G_AND, {WideTy}, 1027 {NewOp, MIRBuilder.buildConstant(WideTy, Mask.getZExtValue())}); 1028 // There is no overflow if the AndOp is the same as NewOp. 1029 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1).getReg(), NewOp, 1030 AndOp); 1031 // Now trunc the NewOp to the original result. 1032 MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), NewOp); 1033 MI.eraseFromParent(); 1034 return Legalized; 1035 } 1036 case TargetOpcode::G_CTTZ: 1037 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 1038 case TargetOpcode::G_CTLZ: 1039 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 1040 case TargetOpcode::G_CTPOP: { 1041 if (TypeIdx == 0) { 1042 Observer.changingInstr(MI); 1043 widenScalarDst(MI, WideTy, 0); 1044 Observer.changedInstr(MI); 1045 return Legalized; 1046 } 1047 1048 unsigned SrcReg = MI.getOperand(1).getReg(); 1049 1050 // First ZEXT the input. 1051 auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg); 1052 LLT CurTy = MRI.getType(SrcReg); 1053 if (MI.getOpcode() == TargetOpcode::G_CTTZ) { 1054 // The count is the same in the larger type except if the original 1055 // value was zero. This can be handled by setting the bit just off 1056 // the top of the original type. 1057 auto TopBit = 1058 APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits()); 1059 MIBSrc = MIRBuilder.buildOr( 1060 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit)); 1061 } 1062 1063 // Perform the operation at the larger size. 1064 auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc}); 1065 // This is already the correct result for CTPOP and CTTZs 1066 if (MI.getOpcode() == TargetOpcode::G_CTLZ || 1067 MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) { 1068 // The correct result is NewOp - (Difference in widety and current ty). 1069 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits(); 1070 MIBNewOp = MIRBuilder.buildInstr( 1071 TargetOpcode::G_SUB, {WideTy}, 1072 {MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)}); 1073 } 1074 1075 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp); 1076 MI.eraseFromParent(); 1077 return Legalized; 1078 } 1079 case TargetOpcode::G_BSWAP: { 1080 Observer.changingInstr(MI); 1081 unsigned DstReg = MI.getOperand(0).getReg(); 1082 1083 unsigned ShrReg = MRI.createGenericVirtualRegister(WideTy); 1084 unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); 1085 unsigned ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy); 1086 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1087 1088 MI.getOperand(0).setReg(DstExt); 1089 1090 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 1091 1092 LLT Ty = MRI.getType(DstReg); 1093 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits(); 1094 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits); 1095 MIRBuilder.buildInstr(TargetOpcode::G_LSHR) 1096 .addDef(ShrReg) 1097 .addUse(DstExt) 1098 .addUse(ShiftAmtReg); 1099 1100 MIRBuilder.buildTrunc(DstReg, ShrReg); 1101 Observer.changedInstr(MI); 1102 return Legalized; 1103 } 1104 case TargetOpcode::G_ADD: 1105 case TargetOpcode::G_AND: 1106 case TargetOpcode::G_MUL: 1107 case TargetOpcode::G_OR: 1108 case TargetOpcode::G_XOR: 1109 case TargetOpcode::G_SUB: 1110 // Perform operation at larger width (any extension is fine here, high bits 1111 // don't affect the result) and then truncate the result back to the 1112 // original type. 1113 Observer.changingInstr(MI); 1114 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1115 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); 1116 widenScalarDst(MI, WideTy); 1117 Observer.changedInstr(MI); 1118 return Legalized; 1119 1120 case TargetOpcode::G_SHL: 1121 Observer.changingInstr(MI); 1122 1123 if (TypeIdx == 0) { 1124 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1125 widenScalarDst(MI, WideTy); 1126 } else { 1127 assert(TypeIdx == 1); 1128 // The "number of bits to shift" operand must preserve its value as an 1129 // unsigned integer: 1130 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 1131 } 1132 1133 Observer.changedInstr(MI); 1134 return Legalized; 1135 1136 case TargetOpcode::G_SDIV: 1137 case TargetOpcode::G_SREM: 1138 Observer.changingInstr(MI); 1139 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); 1140 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 1141 widenScalarDst(MI, WideTy); 1142 Observer.changedInstr(MI); 1143 return Legalized; 1144 1145 case TargetOpcode::G_ASHR: 1146 case TargetOpcode::G_LSHR: 1147 Observer.changingInstr(MI); 1148 1149 if (TypeIdx == 0) { 1150 unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ? 1151 TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT; 1152 1153 widenScalarSrc(MI, WideTy, 1, CvtOp); 1154 widenScalarDst(MI, WideTy); 1155 } else { 1156 assert(TypeIdx == 1); 1157 // The "number of bits to shift" operand must preserve its value as an 1158 // unsigned integer: 1159 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 1160 } 1161 1162 Observer.changedInstr(MI); 1163 return Legalized; 1164 case TargetOpcode::G_UDIV: 1165 case TargetOpcode::G_UREM: 1166 Observer.changingInstr(MI); 1167 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 1168 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 1169 widenScalarDst(MI, WideTy); 1170 Observer.changedInstr(MI); 1171 return Legalized; 1172 1173 case TargetOpcode::G_SELECT: 1174 Observer.changingInstr(MI); 1175 if (TypeIdx == 0) { 1176 // Perform operation at larger width (any extension is fine here, high 1177 // bits don't affect the result) and then truncate the result back to the 1178 // original type. 1179 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); 1180 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT); 1181 widenScalarDst(MI, WideTy); 1182 } else { 1183 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector(); 1184 // Explicit extension is required here since high bits affect the result. 1185 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false)); 1186 } 1187 Observer.changedInstr(MI); 1188 return Legalized; 1189 1190 case TargetOpcode::G_FPTOSI: 1191 case TargetOpcode::G_FPTOUI: 1192 if (TypeIdx != 0) 1193 return UnableToLegalize; 1194 Observer.changingInstr(MI); 1195 widenScalarDst(MI, WideTy); 1196 Observer.changedInstr(MI); 1197 return Legalized; 1198 1199 case TargetOpcode::G_SITOFP: 1200 if (TypeIdx != 1) 1201 return UnableToLegalize; 1202 Observer.changingInstr(MI); 1203 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); 1204 Observer.changedInstr(MI); 1205 return Legalized; 1206 1207 case TargetOpcode::G_UITOFP: 1208 if (TypeIdx != 1) 1209 return UnableToLegalize; 1210 Observer.changingInstr(MI); 1211 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 1212 Observer.changedInstr(MI); 1213 return Legalized; 1214 1215 case TargetOpcode::G_INSERT: 1216 if (TypeIdx != 0) 1217 return UnableToLegalize; 1218 Observer.changingInstr(MI); 1219 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1220 widenScalarDst(MI, WideTy); 1221 Observer.changedInstr(MI); 1222 return Legalized; 1223 1224 case TargetOpcode::G_LOAD: 1225 case TargetOpcode::G_SEXTLOAD: 1226 case TargetOpcode::G_ZEXTLOAD: 1227 Observer.changingInstr(MI); 1228 widenScalarDst(MI, WideTy); 1229 Observer.changedInstr(MI); 1230 return Legalized; 1231 1232 case TargetOpcode::G_STORE: { 1233 if (TypeIdx != 0) 1234 return UnableToLegalize; 1235 1236 LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 1237 if (!isPowerOf2_32(Ty.getSizeInBits())) 1238 return UnableToLegalize; 1239 1240 Observer.changingInstr(MI); 1241 1242 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ? 1243 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT; 1244 widenScalarSrc(MI, WideTy, 0, ExtType); 1245 1246 Observer.changedInstr(MI); 1247 return Legalized; 1248 } 1249 case TargetOpcode::G_CONSTANT: { 1250 MachineOperand &SrcMO = MI.getOperand(1); 1251 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 1252 const APInt &Val = SrcMO.getCImm()->getValue().sext(WideTy.getSizeInBits()); 1253 Observer.changingInstr(MI); 1254 SrcMO.setCImm(ConstantInt::get(Ctx, Val)); 1255 1256 widenScalarDst(MI, WideTy); 1257 Observer.changedInstr(MI); 1258 return Legalized; 1259 } 1260 case TargetOpcode::G_FCONSTANT: { 1261 MachineOperand &SrcMO = MI.getOperand(1); 1262 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 1263 APFloat Val = SrcMO.getFPImm()->getValueAPF(); 1264 bool LosesInfo; 1265 switch (WideTy.getSizeInBits()) { 1266 case 32: 1267 Val.convert(APFloat::IEEEsingle(), APFloat::rmTowardZero, &LosesInfo); 1268 break; 1269 case 64: 1270 Val.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &LosesInfo); 1271 break; 1272 default: 1273 llvm_unreachable("Unhandled fp widen type"); 1274 } 1275 Observer.changingInstr(MI); 1276 SrcMO.setFPImm(ConstantFP::get(Ctx, Val)); 1277 1278 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); 1279 Observer.changedInstr(MI); 1280 return Legalized; 1281 } 1282 case TargetOpcode::G_IMPLICIT_DEF: { 1283 Observer.changingInstr(MI); 1284 widenScalarDst(MI, WideTy); 1285 Observer.changedInstr(MI); 1286 return Legalized; 1287 } 1288 case TargetOpcode::G_BRCOND: 1289 Observer.changingInstr(MI); 1290 widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT); 1291 Observer.changedInstr(MI); 1292 return Legalized; 1293 1294 case TargetOpcode::G_FCMP: 1295 Observer.changingInstr(MI); 1296 if (TypeIdx == 0) 1297 widenScalarDst(MI, WideTy); 1298 else { 1299 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT); 1300 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT); 1301 } 1302 Observer.changedInstr(MI); 1303 return Legalized; 1304 1305 case TargetOpcode::G_ICMP: 1306 Observer.changingInstr(MI); 1307 if (TypeIdx == 0) 1308 widenScalarDst(MI, WideTy); 1309 else { 1310 unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>( 1311 MI.getOperand(1).getPredicate())) 1312 ? TargetOpcode::G_SEXT 1313 : TargetOpcode::G_ZEXT; 1314 widenScalarSrc(MI, WideTy, 2, ExtOpcode); 1315 widenScalarSrc(MI, WideTy, 3, ExtOpcode); 1316 } 1317 Observer.changedInstr(MI); 1318 return Legalized; 1319 1320 case TargetOpcode::G_GEP: 1321 assert(TypeIdx == 1 && "unable to legalize pointer of GEP"); 1322 Observer.changingInstr(MI); 1323 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 1324 Observer.changedInstr(MI); 1325 return Legalized; 1326 1327 case TargetOpcode::G_PHI: { 1328 assert(TypeIdx == 0 && "Expecting only Idx 0"); 1329 1330 Observer.changingInstr(MI); 1331 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) { 1332 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB(); 1333 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); 1334 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT); 1335 } 1336 1337 MachineBasicBlock &MBB = *MI.getParent(); 1338 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI()); 1339 widenScalarDst(MI, WideTy); 1340 Observer.changedInstr(MI); 1341 return Legalized; 1342 } 1343 case TargetOpcode::G_EXTRACT_VECTOR_ELT: { 1344 if (TypeIdx == 0) { 1345 unsigned VecReg = MI.getOperand(1).getReg(); 1346 LLT VecTy = MRI.getType(VecReg); 1347 Observer.changingInstr(MI); 1348 1349 widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(), 1350 WideTy.getSizeInBits()), 1351 1, TargetOpcode::G_SEXT); 1352 1353 widenScalarDst(MI, WideTy, 0); 1354 Observer.changedInstr(MI); 1355 return Legalized; 1356 } 1357 1358 if (TypeIdx != 2) 1359 return UnableToLegalize; 1360 Observer.changingInstr(MI); 1361 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 1362 Observer.changedInstr(MI); 1363 return Legalized; 1364 } 1365 case TargetOpcode::G_FADD: 1366 case TargetOpcode::G_FMUL: 1367 case TargetOpcode::G_FSUB: 1368 case TargetOpcode::G_FMA: 1369 case TargetOpcode::G_FNEG: 1370 case TargetOpcode::G_FABS: 1371 case TargetOpcode::G_FCANONICALIZE: 1372 case TargetOpcode::G_FDIV: 1373 case TargetOpcode::G_FREM: 1374 case TargetOpcode::G_FCEIL: 1375 case TargetOpcode::G_FFLOOR: 1376 case TargetOpcode::G_FCOS: 1377 case TargetOpcode::G_FSIN: 1378 case TargetOpcode::G_FLOG10: 1379 case TargetOpcode::G_FLOG: 1380 case TargetOpcode::G_FLOG2: 1381 case TargetOpcode::G_FSQRT: 1382 case TargetOpcode::G_FEXP: 1383 assert(TypeIdx == 0); 1384 Observer.changingInstr(MI); 1385 1386 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) 1387 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT); 1388 1389 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); 1390 Observer.changedInstr(MI); 1391 return Legalized; 1392 case TargetOpcode::G_INTTOPTR: 1393 if (TypeIdx != 1) 1394 return UnableToLegalize; 1395 1396 Observer.changingInstr(MI); 1397 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 1398 Observer.changedInstr(MI); 1399 return Legalized; 1400 case TargetOpcode::G_PTRTOINT: 1401 if (TypeIdx != 0) 1402 return UnableToLegalize; 1403 1404 Observer.changingInstr(MI); 1405 widenScalarDst(MI, WideTy, 0); 1406 Observer.changedInstr(MI); 1407 return Legalized; 1408 } 1409 } 1410 1411 LegalizerHelper::LegalizeResult 1412 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { 1413 using namespace TargetOpcode; 1414 MIRBuilder.setInstr(MI); 1415 1416 switch(MI.getOpcode()) { 1417 default: 1418 return UnableToLegalize; 1419 case TargetOpcode::G_SREM: 1420 case TargetOpcode::G_UREM: { 1421 unsigned QuotReg = MRI.createGenericVirtualRegister(Ty); 1422 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV) 1423 .addDef(QuotReg) 1424 .addUse(MI.getOperand(1).getReg()) 1425 .addUse(MI.getOperand(2).getReg()); 1426 1427 unsigned ProdReg = MRI.createGenericVirtualRegister(Ty); 1428 MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg()); 1429 MIRBuilder.buildSub(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(), 1430 ProdReg); 1431 MI.eraseFromParent(); 1432 return Legalized; 1433 } 1434 case TargetOpcode::G_SMULO: 1435 case TargetOpcode::G_UMULO: { 1436 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the 1437 // result. 1438 unsigned Res = MI.getOperand(0).getReg(); 1439 unsigned Overflow = MI.getOperand(1).getReg(); 1440 unsigned LHS = MI.getOperand(2).getReg(); 1441 unsigned RHS = MI.getOperand(3).getReg(); 1442 1443 MIRBuilder.buildMul(Res, LHS, RHS); 1444 1445 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO 1446 ? TargetOpcode::G_SMULH 1447 : TargetOpcode::G_UMULH; 1448 1449 unsigned HiPart = MRI.createGenericVirtualRegister(Ty); 1450 MIRBuilder.buildInstr(Opcode) 1451 .addDef(HiPart) 1452 .addUse(LHS) 1453 .addUse(RHS); 1454 1455 unsigned Zero = MRI.createGenericVirtualRegister(Ty); 1456 MIRBuilder.buildConstant(Zero, 0); 1457 1458 // For *signed* multiply, overflow is detected by checking: 1459 // (hi != (lo >> bitwidth-1)) 1460 if (Opcode == TargetOpcode::G_SMULH) { 1461 unsigned Shifted = MRI.createGenericVirtualRegister(Ty); 1462 unsigned ShiftAmt = MRI.createGenericVirtualRegister(Ty); 1463 MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1); 1464 MIRBuilder.buildInstr(TargetOpcode::G_ASHR) 1465 .addDef(Shifted) 1466 .addUse(Res) 1467 .addUse(ShiftAmt); 1468 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted); 1469 } else { 1470 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero); 1471 } 1472 MI.eraseFromParent(); 1473 return Legalized; 1474 } 1475 case TargetOpcode::G_FNEG: { 1476 // TODO: Handle vector types once we are able to 1477 // represent them. 1478 if (Ty.isVector()) 1479 return UnableToLegalize; 1480 unsigned Res = MI.getOperand(0).getReg(); 1481 Type *ZeroTy; 1482 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 1483 switch (Ty.getSizeInBits()) { 1484 case 16: 1485 ZeroTy = Type::getHalfTy(Ctx); 1486 break; 1487 case 32: 1488 ZeroTy = Type::getFloatTy(Ctx); 1489 break; 1490 case 64: 1491 ZeroTy = Type::getDoubleTy(Ctx); 1492 break; 1493 case 128: 1494 ZeroTy = Type::getFP128Ty(Ctx); 1495 break; 1496 default: 1497 llvm_unreachable("unexpected floating-point type"); 1498 } 1499 ConstantFP &ZeroForNegation = 1500 *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy)); 1501 auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation); 1502 MIRBuilder.buildInstr(TargetOpcode::G_FSUB) 1503 .addDef(Res) 1504 .addUse(Zero->getOperand(0).getReg()) 1505 .addUse(MI.getOperand(1).getReg()); 1506 MI.eraseFromParent(); 1507 return Legalized; 1508 } 1509 case TargetOpcode::G_FSUB: { 1510 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)). 1511 // First, check if G_FNEG is marked as Lower. If so, we may 1512 // end up with an infinite loop as G_FSUB is used to legalize G_FNEG. 1513 if (LI.getAction({G_FNEG, {Ty}}).Action == Lower) 1514 return UnableToLegalize; 1515 unsigned Res = MI.getOperand(0).getReg(); 1516 unsigned LHS = MI.getOperand(1).getReg(); 1517 unsigned RHS = MI.getOperand(2).getReg(); 1518 unsigned Neg = MRI.createGenericVirtualRegister(Ty); 1519 MIRBuilder.buildInstr(TargetOpcode::G_FNEG).addDef(Neg).addUse(RHS); 1520 MIRBuilder.buildInstr(TargetOpcode::G_FADD) 1521 .addDef(Res) 1522 .addUse(LHS) 1523 .addUse(Neg); 1524 MI.eraseFromParent(); 1525 return Legalized; 1526 } 1527 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: { 1528 unsigned OldValRes = MI.getOperand(0).getReg(); 1529 unsigned SuccessRes = MI.getOperand(1).getReg(); 1530 unsigned Addr = MI.getOperand(2).getReg(); 1531 unsigned CmpVal = MI.getOperand(3).getReg(); 1532 unsigned NewVal = MI.getOperand(4).getReg(); 1533 MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal, 1534 **MI.memoperands_begin()); 1535 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal); 1536 MI.eraseFromParent(); 1537 return Legalized; 1538 } 1539 case TargetOpcode::G_LOAD: 1540 case TargetOpcode::G_SEXTLOAD: 1541 case TargetOpcode::G_ZEXTLOAD: { 1542 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT 1543 unsigned DstReg = MI.getOperand(0).getReg(); 1544 unsigned PtrReg = MI.getOperand(1).getReg(); 1545 LLT DstTy = MRI.getType(DstReg); 1546 auto &MMO = **MI.memoperands_begin(); 1547 1548 if (DstTy.getSizeInBits() == MMO.getSize() /* in bytes */ * 8) { 1549 // In the case of G_LOAD, this was a non-extending load already and we're 1550 // about to lower to the same instruction. 1551 if (MI.getOpcode() == TargetOpcode::G_LOAD) 1552 return UnableToLegalize; 1553 MIRBuilder.buildLoad(DstReg, PtrReg, MMO); 1554 MI.eraseFromParent(); 1555 return Legalized; 1556 } 1557 1558 if (DstTy.isScalar()) { 1559 unsigned TmpReg = MRI.createGenericVirtualRegister( 1560 LLT::scalar(MMO.getSize() /* in bytes */ * 8)); 1561 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); 1562 switch (MI.getOpcode()) { 1563 default: 1564 llvm_unreachable("Unexpected opcode"); 1565 case TargetOpcode::G_LOAD: 1566 MIRBuilder.buildAnyExt(DstReg, TmpReg); 1567 break; 1568 case TargetOpcode::G_SEXTLOAD: 1569 MIRBuilder.buildSExt(DstReg, TmpReg); 1570 break; 1571 case TargetOpcode::G_ZEXTLOAD: 1572 MIRBuilder.buildZExt(DstReg, TmpReg); 1573 break; 1574 } 1575 MI.eraseFromParent(); 1576 return Legalized; 1577 } 1578 1579 return UnableToLegalize; 1580 } 1581 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 1582 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 1583 case TargetOpcode::G_CTLZ: 1584 case TargetOpcode::G_CTTZ: 1585 case TargetOpcode::G_CTPOP: 1586 return lowerBitCount(MI, TypeIdx, Ty); 1587 case G_UADDE: { 1588 unsigned Res = MI.getOperand(0).getReg(); 1589 unsigned CarryOut = MI.getOperand(1).getReg(); 1590 unsigned LHS = MI.getOperand(2).getReg(); 1591 unsigned RHS = MI.getOperand(3).getReg(); 1592 unsigned CarryIn = MI.getOperand(4).getReg(); 1593 1594 unsigned TmpRes = MRI.createGenericVirtualRegister(Ty); 1595 unsigned ZExtCarryIn = MRI.createGenericVirtualRegister(Ty); 1596 1597 MIRBuilder.buildAdd(TmpRes, LHS, RHS); 1598 MIRBuilder.buildZExt(ZExtCarryIn, CarryIn); 1599 MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn); 1600 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS); 1601 1602 MI.eraseFromParent(); 1603 return Legalized; 1604 } 1605 case G_USUBO: { 1606 unsigned Res = MI.getOperand(0).getReg(); 1607 unsigned BorrowOut = MI.getOperand(1).getReg(); 1608 unsigned LHS = MI.getOperand(2).getReg(); 1609 unsigned RHS = MI.getOperand(3).getReg(); 1610 1611 MIRBuilder.buildSub(Res, LHS, RHS); 1612 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS); 1613 1614 MI.eraseFromParent(); 1615 return Legalized; 1616 } 1617 case G_USUBE: { 1618 unsigned Res = MI.getOperand(0).getReg(); 1619 unsigned BorrowOut = MI.getOperand(1).getReg(); 1620 unsigned LHS = MI.getOperand(2).getReg(); 1621 unsigned RHS = MI.getOperand(3).getReg(); 1622 unsigned BorrowIn = MI.getOperand(4).getReg(); 1623 1624 unsigned TmpRes = MRI.createGenericVirtualRegister(Ty); 1625 unsigned ZExtBorrowIn = MRI.createGenericVirtualRegister(Ty); 1626 unsigned LHS_EQ_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1)); 1627 unsigned LHS_ULT_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1)); 1628 1629 MIRBuilder.buildSub(TmpRes, LHS, RHS); 1630 MIRBuilder.buildZExt(ZExtBorrowIn, BorrowIn); 1631 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn); 1632 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LHS_EQ_RHS, LHS, RHS); 1633 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, LHS_ULT_RHS, LHS, RHS); 1634 MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS); 1635 1636 MI.eraseFromParent(); 1637 return Legalized; 1638 } 1639 } 1640 } 1641 1642 LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef( 1643 MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { 1644 SmallVector<unsigned, 2> DstRegs; 1645 1646 unsigned NarrowSize = NarrowTy.getSizeInBits(); 1647 unsigned DstReg = MI.getOperand(0).getReg(); 1648 unsigned Size = MRI.getType(DstReg).getSizeInBits(); 1649 int NumParts = Size / NarrowSize; 1650 // FIXME: Don't know how to handle the situation where the small vectors 1651 // aren't all the same size yet. 1652 if (Size % NarrowSize != 0) 1653 return UnableToLegalize; 1654 1655 for (int i = 0; i < NumParts; ++i) { 1656 unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy); 1657 MIRBuilder.buildUndef(TmpReg); 1658 DstRegs.push_back(TmpReg); 1659 } 1660 1661 if (NarrowTy.isVector()) 1662 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 1663 else 1664 MIRBuilder.buildBuildVector(DstReg, DstRegs); 1665 1666 MI.eraseFromParent(); 1667 return Legalized; 1668 } 1669 1670 LegalizerHelper::LegalizeResult 1671 LegalizerHelper::fewerElementsVectorBasic(MachineInstr &MI, unsigned TypeIdx, 1672 LLT NarrowTy) { 1673 const unsigned Opc = MI.getOpcode(); 1674 const unsigned NumOps = MI.getNumOperands() - 1; 1675 const unsigned NarrowSize = NarrowTy.getSizeInBits(); 1676 const unsigned DstReg = MI.getOperand(0).getReg(); 1677 const unsigned Flags = MI.getFlags(); 1678 const LLT DstTy = MRI.getType(DstReg); 1679 const unsigned Size = DstTy.getSizeInBits(); 1680 const int NumParts = Size / NarrowSize; 1681 const LLT EltTy = DstTy.getElementType(); 1682 const unsigned EltSize = EltTy.getSizeInBits(); 1683 const unsigned BitsForNumParts = NarrowSize * NumParts; 1684 1685 // Check if we have any leftovers. If we do, then only handle the case where 1686 // the leftover is one element. 1687 if (BitsForNumParts != Size && BitsForNumParts + EltSize != Size) 1688 return UnableToLegalize; 1689 1690 if (BitsForNumParts != Size) { 1691 unsigned AccumDstReg = MRI.createGenericVirtualRegister(DstTy); 1692 MIRBuilder.buildUndef(AccumDstReg); 1693 1694 // Handle the pieces which evenly divide into the requested type with 1695 // extract/op/insert sequence. 1696 for (unsigned Offset = 0; Offset < BitsForNumParts; Offset += NarrowSize) { 1697 SmallVector<SrcOp, 4> SrcOps; 1698 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { 1699 unsigned PartOpReg = MRI.createGenericVirtualRegister(NarrowTy); 1700 MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(), Offset); 1701 SrcOps.push_back(PartOpReg); 1702 } 1703 1704 unsigned PartDstReg = MRI.createGenericVirtualRegister(NarrowTy); 1705 MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags); 1706 1707 unsigned PartInsertReg = MRI.createGenericVirtualRegister(DstTy); 1708 MIRBuilder.buildInsert(PartInsertReg, AccumDstReg, PartDstReg, Offset); 1709 AccumDstReg = PartInsertReg; 1710 Offset += NarrowSize; 1711 } 1712 1713 // Handle the remaining element sized leftover piece. 1714 SmallVector<SrcOp, 4> SrcOps; 1715 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { 1716 unsigned PartOpReg = MRI.createGenericVirtualRegister(EltTy); 1717 MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(), 1718 BitsForNumParts); 1719 SrcOps.push_back(PartOpReg); 1720 } 1721 1722 unsigned PartDstReg = MRI.createGenericVirtualRegister(EltTy); 1723 MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags); 1724 MIRBuilder.buildInsert(DstReg, AccumDstReg, PartDstReg, BitsForNumParts); 1725 MI.eraseFromParent(); 1726 1727 return Legalized; 1728 } 1729 1730 SmallVector<unsigned, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs; 1731 1732 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src0Regs); 1733 1734 if (NumOps >= 2) 1735 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src1Regs); 1736 1737 if (NumOps >= 3) 1738 extractParts(MI.getOperand(3).getReg(), NarrowTy, NumParts, Src2Regs); 1739 1740 for (int i = 0; i < NumParts; ++i) { 1741 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 1742 1743 if (NumOps == 1) 1744 MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i]}, Flags); 1745 else if (NumOps == 2) { 1746 MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i], Src1Regs[i]}, Flags); 1747 } else if (NumOps == 3) { 1748 MIRBuilder.buildInstr(Opc, {DstReg}, 1749 {Src0Regs[i], Src1Regs[i], Src2Regs[i]}, Flags); 1750 } 1751 1752 DstRegs.push_back(DstReg); 1753 } 1754 1755 if (NarrowTy.isVector()) 1756 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 1757 else 1758 MIRBuilder.buildBuildVector(DstReg, DstRegs); 1759 1760 MI.eraseFromParent(); 1761 return Legalized; 1762 } 1763 1764 // Handle splitting vector operations which need to have the same number of 1765 // elements in each type index, but each type index may have a different element 1766 // type. 1767 // 1768 // e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> -> 1769 // <2 x s64> = G_SHL <2 x s64>, <2 x s32> 1770 // <2 x s64> = G_SHL <2 x s64>, <2 x s32> 1771 // 1772 // Also handles some irregular breakdown cases, e.g. 1773 // e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> -> 1774 // <2 x s64> = G_SHL <2 x s64>, <2 x s32> 1775 // s64 = G_SHL s64, s32 1776 LegalizerHelper::LegalizeResult 1777 LegalizerHelper::fewerElementsVectorMultiEltType( 1778 MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) { 1779 if (TypeIdx != 0) 1780 return UnableToLegalize; 1781 1782 const LLT NarrowTy0 = NarrowTyArg; 1783 const unsigned NewNumElts = 1784 NarrowTy0.isVector() ? NarrowTy0.getNumElements() : 1; 1785 1786 const unsigned DstReg = MI.getOperand(0).getReg(); 1787 LLT DstTy = MRI.getType(DstReg); 1788 LLT LeftoverTy0; 1789 1790 // All of the operands need to have the same number of elements, so if we can 1791 // determine a type breakdown for the result type, we can for all of the 1792 // source types. 1793 int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0); 1794 if (NumParts < 0) 1795 return UnableToLegalize; 1796 1797 SmallVector<MachineInstrBuilder, 4> NewInsts; 1798 1799 SmallVector<unsigned, 4> DstRegs, LeftoverDstRegs; 1800 SmallVector<unsigned, 4> PartRegs, LeftoverRegs; 1801 1802 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { 1803 LLT LeftoverTy; 1804 unsigned SrcReg = MI.getOperand(I).getReg(); 1805 LLT SrcTyI = MRI.getType(SrcReg); 1806 LLT NarrowTyI = LLT::scalarOrVector(NewNumElts, SrcTyI.getScalarType()); 1807 LLT LeftoverTyI; 1808 1809 // Split this operand into the requested typed registers, and any leftover 1810 // required to reproduce the original type. 1811 if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs, 1812 LeftoverRegs)) 1813 return UnableToLegalize; 1814 1815 if (I == 1) { 1816 // For the first operand, create an instruction for each part and setup 1817 // the result. 1818 for (unsigned PartReg : PartRegs) { 1819 unsigned PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0); 1820 NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode()) 1821 .addDef(PartDstReg) 1822 .addUse(PartReg)); 1823 DstRegs.push_back(PartDstReg); 1824 } 1825 1826 for (unsigned LeftoverReg : LeftoverRegs) { 1827 unsigned PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0); 1828 NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode()) 1829 .addDef(PartDstReg) 1830 .addUse(LeftoverReg)); 1831 LeftoverDstRegs.push_back(PartDstReg); 1832 } 1833 } else { 1834 assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size()); 1835 1836 // Add the newly created operand splits to the existing instructions. The 1837 // odd-sized pieces are ordered after the requested NarrowTyArg sized 1838 // pieces. 1839 unsigned InstCount = 0; 1840 for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J) 1841 NewInsts[InstCount++].addUse(PartRegs[J]); 1842 for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J) 1843 NewInsts[InstCount++].addUse(LeftoverRegs[J]); 1844 } 1845 1846 PartRegs.clear(); 1847 LeftoverRegs.clear(); 1848 } 1849 1850 // Insert the newly built operations and rebuild the result register. 1851 for (auto &MIB : NewInsts) 1852 MIRBuilder.insertInstr(MIB); 1853 1854 insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs); 1855 1856 MI.eraseFromParent(); 1857 return Legalized; 1858 } 1859 1860 LegalizerHelper::LegalizeResult 1861 LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx, 1862 LLT NarrowTy) { 1863 if (TypeIdx != 0) 1864 return UnableToLegalize; 1865 1866 unsigned DstReg = MI.getOperand(0).getReg(); 1867 unsigned SrcReg = MI.getOperand(1).getReg(); 1868 LLT DstTy = MRI.getType(DstReg); 1869 LLT SrcTy = MRI.getType(SrcReg); 1870 1871 LLT NarrowTy0 = NarrowTy; 1872 LLT NarrowTy1; 1873 unsigned NumParts; 1874 1875 if (NarrowTy.isVector()) { 1876 // Uneven breakdown not handled. 1877 NumParts = DstTy.getNumElements() / NarrowTy.getNumElements(); 1878 if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements()) 1879 return UnableToLegalize; 1880 1881 NarrowTy1 = LLT::vector(NumParts, SrcTy.getElementType().getSizeInBits()); 1882 } else { 1883 NumParts = DstTy.getNumElements(); 1884 NarrowTy1 = SrcTy.getElementType(); 1885 } 1886 1887 SmallVector<unsigned, 4> SrcRegs, DstRegs; 1888 extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs); 1889 1890 for (unsigned I = 0; I < NumParts; ++I) { 1891 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy0); 1892 MachineInstr *NewInst = MIRBuilder.buildInstr(MI.getOpcode()) 1893 .addDef(DstReg) 1894 .addUse(SrcRegs[I]); 1895 1896 NewInst->setFlags(MI.getFlags()); 1897 DstRegs.push_back(DstReg); 1898 } 1899 1900 if (NarrowTy.isVector()) 1901 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 1902 else 1903 MIRBuilder.buildBuildVector(DstReg, DstRegs); 1904 1905 MI.eraseFromParent(); 1906 return Legalized; 1907 } 1908 1909 LegalizerHelper::LegalizeResult 1910 LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx, 1911 LLT NarrowTy) { 1912 unsigned DstReg = MI.getOperand(0).getReg(); 1913 unsigned Src0Reg = MI.getOperand(2).getReg(); 1914 LLT DstTy = MRI.getType(DstReg); 1915 LLT SrcTy = MRI.getType(Src0Reg); 1916 1917 unsigned NumParts; 1918 LLT NarrowTy0, NarrowTy1; 1919 1920 if (TypeIdx == 0) { 1921 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; 1922 unsigned OldElts = DstTy.getNumElements(); 1923 1924 NarrowTy0 = NarrowTy; 1925 NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements(); 1926 NarrowTy1 = NarrowTy.isVector() ? 1927 LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) : 1928 SrcTy.getElementType(); 1929 1930 } else { 1931 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; 1932 unsigned OldElts = SrcTy.getNumElements(); 1933 1934 NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : 1935 NarrowTy.getNumElements(); 1936 NarrowTy0 = LLT::vector(NarrowTy.getNumElements(), 1937 DstTy.getScalarSizeInBits()); 1938 NarrowTy1 = NarrowTy; 1939 } 1940 1941 // FIXME: Don't know how to handle the situation where the small vectors 1942 // aren't all the same size yet. 1943 if (NarrowTy1.isVector() && 1944 NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements()) 1945 return UnableToLegalize; 1946 1947 CmpInst::Predicate Pred 1948 = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); 1949 1950 SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs; 1951 extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs); 1952 extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs); 1953 1954 for (unsigned I = 0; I < NumParts; ++I) { 1955 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy0); 1956 DstRegs.push_back(DstReg); 1957 1958 if (MI.getOpcode() == TargetOpcode::G_ICMP) 1959 MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]); 1960 else { 1961 MachineInstr *NewCmp 1962 = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]); 1963 NewCmp->setFlags(MI.getFlags()); 1964 } 1965 } 1966 1967 if (NarrowTy1.isVector()) 1968 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 1969 else 1970 MIRBuilder.buildBuildVector(DstReg, DstRegs); 1971 1972 MI.eraseFromParent(); 1973 return Legalized; 1974 } 1975 1976 LegalizerHelper::LegalizeResult 1977 LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx, 1978 LLT NarrowTy) { 1979 unsigned DstReg = MI.getOperand(0).getReg(); 1980 unsigned CondReg = MI.getOperand(1).getReg(); 1981 1982 unsigned NumParts = 0; 1983 LLT NarrowTy0, NarrowTy1; 1984 1985 LLT DstTy = MRI.getType(DstReg); 1986 LLT CondTy = MRI.getType(CondReg); 1987 unsigned Size = DstTy.getSizeInBits(); 1988 1989 assert(TypeIdx == 0 || CondTy.isVector()); 1990 1991 if (TypeIdx == 0) { 1992 NarrowTy0 = NarrowTy; 1993 NarrowTy1 = CondTy; 1994 1995 unsigned NarrowSize = NarrowTy0.getSizeInBits(); 1996 // FIXME: Don't know how to handle the situation where the small vectors 1997 // aren't all the same size yet. 1998 if (Size % NarrowSize != 0) 1999 return UnableToLegalize; 2000 2001 NumParts = Size / NarrowSize; 2002 2003 // Need to break down the condition type 2004 if (CondTy.isVector()) { 2005 if (CondTy.getNumElements() == NumParts) 2006 NarrowTy1 = CondTy.getElementType(); 2007 else 2008 NarrowTy1 = LLT::vector(CondTy.getNumElements() / NumParts, 2009 CondTy.getScalarSizeInBits()); 2010 } 2011 } else { 2012 NumParts = CondTy.getNumElements(); 2013 if (NarrowTy.isVector()) { 2014 // TODO: Handle uneven breakdown. 2015 if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements()) 2016 return UnableToLegalize; 2017 2018 return UnableToLegalize; 2019 } else { 2020 NarrowTy0 = DstTy.getElementType(); 2021 NarrowTy1 = NarrowTy; 2022 } 2023 } 2024 2025 SmallVector<unsigned, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs; 2026 if (CondTy.isVector()) 2027 extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs); 2028 2029 extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs); 2030 extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs); 2031 2032 for (unsigned i = 0; i < NumParts; ++i) { 2033 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy0); 2034 MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg, 2035 Src1Regs[i], Src2Regs[i]); 2036 DstRegs.push_back(DstReg); 2037 } 2038 2039 if (NarrowTy0.isVector()) 2040 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 2041 else 2042 MIRBuilder.buildBuildVector(DstReg, DstRegs); 2043 2044 MI.eraseFromParent(); 2045 return Legalized; 2046 } 2047 2048 LegalizerHelper::LegalizeResult 2049 LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, 2050 LLT NarrowTy) { 2051 // FIXME: Don't know how to handle secondary types yet. 2052 if (TypeIdx != 0) 2053 return UnableToLegalize; 2054 2055 MachineMemOperand *MMO = *MI.memoperands_begin(); 2056 2057 // This implementation doesn't work for atomics. Give up instead of doing 2058 // something invalid. 2059 if (MMO->getOrdering() != AtomicOrdering::NotAtomic || 2060 MMO->getFailureOrdering() != AtomicOrdering::NotAtomic) 2061 return UnableToLegalize; 2062 2063 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD; 2064 unsigned ValReg = MI.getOperand(0).getReg(); 2065 unsigned AddrReg = MI.getOperand(1).getReg(); 2066 LLT ValTy = MRI.getType(ValReg); 2067 2068 int NumParts = -1; 2069 LLT LeftoverTy; 2070 SmallVector<unsigned, 8> NarrowRegs, NarrowLeftoverRegs; 2071 if (IsLoad) { 2072 NumParts = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy); 2073 } else { 2074 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs, 2075 NarrowLeftoverRegs)) 2076 NumParts = NarrowRegs.size(); 2077 } 2078 2079 if (NumParts == -1) 2080 return UnableToLegalize; 2081 2082 const LLT OffsetTy = LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits()); 2083 2084 unsigned TotalSize = ValTy.getSizeInBits(); 2085 2086 // Split the load/store into PartTy sized pieces starting at Offset. If this 2087 // is a load, return the new registers in ValRegs. For a store, each elements 2088 // of ValRegs should be PartTy. Returns the next offset that needs to be 2089 // handled. 2090 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<unsigned> &ValRegs, 2091 unsigned Offset) -> unsigned { 2092 MachineFunction &MF = MIRBuilder.getMF(); 2093 unsigned PartSize = PartTy.getSizeInBits(); 2094 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize; 2095 Offset += PartSize, ++Idx) { 2096 unsigned ByteSize = PartSize / 8; 2097 unsigned ByteOffset = Offset / 8; 2098 unsigned NewAddrReg = 0; 2099 2100 MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, ByteOffset); 2101 2102 MachineMemOperand *NewMMO = 2103 MF.getMachineMemOperand(MMO, ByteOffset, ByteSize); 2104 2105 if (IsLoad) { 2106 unsigned Dst = MRI.createGenericVirtualRegister(PartTy); 2107 ValRegs.push_back(Dst); 2108 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO); 2109 } else { 2110 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO); 2111 } 2112 } 2113 2114 return Offset; 2115 }; 2116 2117 unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0); 2118 2119 // Handle the rest of the register if this isn't an even type breakdown. 2120 if (LeftoverTy.isValid()) 2121 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset); 2122 2123 if (IsLoad) { 2124 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs, 2125 LeftoverTy, NarrowLeftoverRegs); 2126 } 2127 2128 MI.eraseFromParent(); 2129 return Legalized; 2130 } 2131 2132 LegalizerHelper::LegalizeResult 2133 LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, 2134 LLT NarrowTy) { 2135 using namespace TargetOpcode; 2136 2137 MIRBuilder.setInstr(MI); 2138 switch (MI.getOpcode()) { 2139 case G_IMPLICIT_DEF: 2140 return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy); 2141 case G_AND: 2142 case G_OR: 2143 case G_XOR: 2144 case G_ADD: 2145 case G_SUB: 2146 case G_MUL: 2147 case G_SMULH: 2148 case G_UMULH: 2149 case G_FADD: 2150 case G_FMUL: 2151 case G_FSUB: 2152 case G_FNEG: 2153 case G_FABS: 2154 case G_FCANONICALIZE: 2155 case G_FDIV: 2156 case G_FREM: 2157 case G_FMA: 2158 case G_FPOW: 2159 case G_FEXP: 2160 case G_FEXP2: 2161 case G_FLOG: 2162 case G_FLOG2: 2163 case G_FLOG10: 2164 case G_FCEIL: 2165 case G_FFLOOR: 2166 case G_INTRINSIC_ROUND: 2167 case G_INTRINSIC_TRUNC: 2168 case G_FCOS: 2169 case G_FSIN: 2170 case G_FSQRT: 2171 case G_BSWAP: 2172 return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy); 2173 case G_SHL: 2174 case G_LSHR: 2175 case G_ASHR: 2176 return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy); 2177 case G_ZEXT: 2178 case G_SEXT: 2179 case G_ANYEXT: 2180 case G_FPEXT: 2181 case G_FPTRUNC: 2182 case G_SITOFP: 2183 case G_UITOFP: 2184 case G_FPTOSI: 2185 case G_FPTOUI: 2186 case G_INTTOPTR: 2187 case G_PTRTOINT: 2188 case G_ADDRSPACE_CAST: 2189 return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy); 2190 case G_ICMP: 2191 case G_FCMP: 2192 return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy); 2193 case G_SELECT: 2194 return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy); 2195 case G_LOAD: 2196 case G_STORE: 2197 return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy); 2198 default: 2199 return UnableToLegalize; 2200 } 2201 } 2202 2203 LegalizerHelper::LegalizeResult 2204 LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, 2205 const LLT HalfTy, const LLT AmtTy) { 2206 2207 unsigned InL = MRI.createGenericVirtualRegister(HalfTy); 2208 unsigned InH = MRI.createGenericVirtualRegister(HalfTy); 2209 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg()); 2210 2211 if (Amt.isNullValue()) { 2212 MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {InL, InH}); 2213 MI.eraseFromParent(); 2214 return Legalized; 2215 } 2216 2217 LLT NVT = HalfTy; 2218 unsigned NVTBits = HalfTy.getSizeInBits(); 2219 unsigned VTBits = 2 * NVTBits; 2220 2221 SrcOp Lo(0), Hi(0); 2222 if (MI.getOpcode() == TargetOpcode::G_SHL) { 2223 if (Amt.ugt(VTBits)) { 2224 Lo = Hi = MIRBuilder.buildConstant(NVT, 0); 2225 } else if (Amt.ugt(NVTBits)) { 2226 Lo = MIRBuilder.buildConstant(NVT, 0); 2227 Hi = MIRBuilder.buildShl(NVT, InL, 2228 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits)); 2229 } else if (Amt == NVTBits) { 2230 Lo = MIRBuilder.buildConstant(NVT, 0); 2231 Hi = InL; 2232 } else { 2233 Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt)); 2234 auto OrLHS = 2235 MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt)); 2236 auto OrRHS = MIRBuilder.buildLShr( 2237 NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits)); 2238 Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS); 2239 } 2240 } else if (MI.getOpcode() == TargetOpcode::G_LSHR) { 2241 if (Amt.ugt(VTBits)) { 2242 Lo = Hi = MIRBuilder.buildConstant(NVT, 0); 2243 } else if (Amt.ugt(NVTBits)) { 2244 Lo = MIRBuilder.buildLShr(NVT, InH, 2245 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits)); 2246 Hi = MIRBuilder.buildConstant(NVT, 0); 2247 } else if (Amt == NVTBits) { 2248 Lo = InH; 2249 Hi = MIRBuilder.buildConstant(NVT, 0); 2250 } else { 2251 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt); 2252 2253 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst); 2254 auto OrRHS = MIRBuilder.buildShl( 2255 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits)); 2256 2257 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS); 2258 Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst); 2259 } 2260 } else { 2261 if (Amt.ugt(VTBits)) { 2262 Hi = Lo = MIRBuilder.buildAShr( 2263 NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1)); 2264 } else if (Amt.ugt(NVTBits)) { 2265 Lo = MIRBuilder.buildAShr(NVT, InH, 2266 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits)); 2267 Hi = MIRBuilder.buildAShr(NVT, InH, 2268 MIRBuilder.buildConstant(AmtTy, NVTBits - 1)); 2269 } else if (Amt == NVTBits) { 2270 Lo = InH; 2271 Hi = MIRBuilder.buildAShr(NVT, InH, 2272 MIRBuilder.buildConstant(AmtTy, NVTBits - 1)); 2273 } else { 2274 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt); 2275 2276 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst); 2277 auto OrRHS = MIRBuilder.buildShl( 2278 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits)); 2279 2280 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS); 2281 Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst); 2282 } 2283 } 2284 2285 MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {Lo.getReg(), Hi.getReg()}); 2286 MI.eraseFromParent(); 2287 2288 return Legalized; 2289 } 2290 2291 // TODO: Optimize if constant shift amount. 2292 LegalizerHelper::LegalizeResult 2293 LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, 2294 LLT RequestedTy) { 2295 if (TypeIdx == 1) { 2296 Observer.changingInstr(MI); 2297 narrowScalarSrc(MI, RequestedTy, 2); 2298 Observer.changedInstr(MI); 2299 return Legalized; 2300 } 2301 2302 unsigned DstReg = MI.getOperand(0).getReg(); 2303 LLT DstTy = MRI.getType(DstReg); 2304 if (DstTy.isVector()) 2305 return UnableToLegalize; 2306 2307 unsigned Amt = MI.getOperand(2).getReg(); 2308 LLT ShiftAmtTy = MRI.getType(Amt); 2309 const unsigned DstEltSize = DstTy.getScalarSizeInBits(); 2310 if (DstEltSize % 2 != 0) 2311 return UnableToLegalize; 2312 2313 // Ignore the input type. We can only go to exactly half the size of the 2314 // input. If that isn't small enough, the resulting pieces will be further 2315 // legalized. 2316 const unsigned NewBitSize = DstEltSize / 2; 2317 const LLT HalfTy = LLT::scalar(NewBitSize); 2318 const LLT CondTy = LLT::scalar(1); 2319 2320 if (const MachineInstr *KShiftAmt = 2321 getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) { 2322 return narrowScalarShiftByConstant( 2323 MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy); 2324 } 2325 2326 // TODO: Expand with known bits. 2327 2328 // Handle the fully general expansion by an unknown amount. 2329 auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize); 2330 2331 unsigned InL = MRI.createGenericVirtualRegister(HalfTy); 2332 unsigned InH = MRI.createGenericVirtualRegister(HalfTy); 2333 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg()); 2334 2335 auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits); 2336 auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt); 2337 2338 auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0); 2339 auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits); 2340 auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero); 2341 2342 unsigned ResultRegs[2]; 2343 switch (MI.getOpcode()) { 2344 case TargetOpcode::G_SHL: { 2345 // Short: ShAmt < NewBitSize 2346 auto LoS = MIRBuilder.buildShl(HalfTy, InH, Amt); 2347 2348 auto OrLHS = MIRBuilder.buildShl(HalfTy, InH, Amt); 2349 auto OrRHS = MIRBuilder.buildLShr(HalfTy, InL, AmtLack); 2350 auto HiS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS); 2351 2352 // Long: ShAmt >= NewBitSize 2353 auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero. 2354 auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part. 2355 2356 auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL); 2357 auto Hi = MIRBuilder.buildSelect( 2358 HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL)); 2359 2360 ResultRegs[0] = Lo.getReg(0); 2361 ResultRegs[1] = Hi.getReg(0); 2362 break; 2363 } 2364 case TargetOpcode::G_LSHR: { 2365 // Short: ShAmt < NewBitSize 2366 auto HiS = MIRBuilder.buildLShr(HalfTy, InH, Amt); 2367 2368 auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt); 2369 auto OrRHS = MIRBuilder.buildShl(HalfTy, InH, AmtLack); 2370 auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS); 2371 2372 // Long: ShAmt >= NewBitSize 2373 auto HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero. 2374 auto LoL = MIRBuilder.buildLShr(HalfTy, InH, AmtExcess); // Lo from Hi part. 2375 2376 auto Lo = MIRBuilder.buildSelect( 2377 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL)); 2378 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL); 2379 2380 ResultRegs[0] = Lo.getReg(0); 2381 ResultRegs[1] = Hi.getReg(0); 2382 break; 2383 } 2384 case TargetOpcode::G_ASHR: { 2385 // Short: ShAmt < NewBitSize 2386 auto HiS = MIRBuilder.buildAShr(HalfTy, InH, Amt); 2387 2388 auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt); 2389 auto OrRHS = MIRBuilder.buildLShr(HalfTy, InH, AmtLack); 2390 auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS); 2391 2392 // Long: ShAmt >= NewBitSize 2393 2394 // Sign of Hi part. 2395 auto HiL = MIRBuilder.buildAShr( 2396 HalfTy, InH, MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1)); 2397 2398 auto LoL = MIRBuilder.buildAShr(HalfTy, InH, AmtExcess); // Lo from Hi part. 2399 2400 auto Lo = MIRBuilder.buildSelect( 2401 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL)); 2402 2403 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL); 2404 2405 ResultRegs[0] = Lo.getReg(0); 2406 ResultRegs[1] = Hi.getReg(0); 2407 break; 2408 } 2409 default: 2410 llvm_unreachable("not a shift"); 2411 } 2412 2413 MIRBuilder.buildMerge(DstReg, ResultRegs); 2414 MI.eraseFromParent(); 2415 return Legalized; 2416 } 2417 2418 LegalizerHelper::LegalizeResult 2419 LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, 2420 LLT MoreTy) { 2421 MIRBuilder.setInstr(MI); 2422 unsigned Opc = MI.getOpcode(); 2423 switch (Opc) { 2424 case TargetOpcode::G_IMPLICIT_DEF: { 2425 Observer.changingInstr(MI); 2426 moreElementsVectorDst(MI, MoreTy, 0); 2427 Observer.changedInstr(MI); 2428 return Legalized; 2429 } 2430 default: 2431 return UnableToLegalize; 2432 } 2433 } 2434 2435 LegalizerHelper::LegalizeResult 2436 LegalizerHelper::narrowScalarMul(MachineInstr &MI, unsigned TypeIdx, LLT NewTy) { 2437 unsigned DstReg = MI.getOperand(0).getReg(); 2438 unsigned Src0 = MI.getOperand(1).getReg(); 2439 unsigned Src1 = MI.getOperand(2).getReg(); 2440 LLT Ty = MRI.getType(DstReg); 2441 if (Ty.isVector()) 2442 return UnableToLegalize; 2443 2444 unsigned Size = Ty.getSizeInBits(); 2445 unsigned NewSize = Size / 2; 2446 if (Size != 2 * NewSize) 2447 return UnableToLegalize; 2448 2449 LLT HalfTy = LLT::scalar(NewSize); 2450 // TODO: if HalfTy != NewTy, handle the breakdown all at once? 2451 2452 unsigned ShiftAmt = MRI.createGenericVirtualRegister(Ty); 2453 unsigned Lo = MRI.createGenericVirtualRegister(HalfTy); 2454 unsigned Hi = MRI.createGenericVirtualRegister(HalfTy); 2455 unsigned ExtLo = MRI.createGenericVirtualRegister(Ty); 2456 unsigned ExtHi = MRI.createGenericVirtualRegister(Ty); 2457 unsigned ShiftedHi = MRI.createGenericVirtualRegister(Ty); 2458 2459 SmallVector<unsigned, 2> Src0Parts; 2460 SmallVector<unsigned, 2> Src1Parts; 2461 2462 extractParts(Src0, HalfTy, 2, Src0Parts); 2463 extractParts(Src1, HalfTy, 2, Src1Parts); 2464 2465 MIRBuilder.buildMul(Lo, Src0Parts[0], Src1Parts[0]); 2466 2467 // TODO: Use smulh or umulh depending on what the target has. 2468 MIRBuilder.buildUMulH(Hi, Src0Parts[1], Src1Parts[1]); 2469 2470 MIRBuilder.buildConstant(ShiftAmt, NewSize); 2471 MIRBuilder.buildAnyExt(ExtHi, Hi); 2472 MIRBuilder.buildShl(ShiftedHi, ExtHi, ShiftAmt); 2473 2474 MIRBuilder.buildZExt(ExtLo, Lo); 2475 MIRBuilder.buildOr(DstReg, ExtLo, ShiftedHi); 2476 MI.eraseFromParent(); 2477 return Legalized; 2478 } 2479 2480 LegalizerHelper::LegalizeResult 2481 LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, 2482 LLT NarrowTy) { 2483 if (TypeIdx != 0) 2484 return UnableToLegalize; 2485 2486 unsigned CondReg = MI.getOperand(1).getReg(); 2487 LLT CondTy = MRI.getType(CondReg); 2488 if (CondTy.isVector()) // TODO: Handle vselect 2489 return UnableToLegalize; 2490 2491 unsigned DstReg = MI.getOperand(0).getReg(); 2492 LLT DstTy = MRI.getType(DstReg); 2493 2494 SmallVector<unsigned, 4> DstRegs, DstLeftoverRegs; 2495 SmallVector<unsigned, 4> Src1Regs, Src1LeftoverRegs; 2496 SmallVector<unsigned, 4> Src2Regs, Src2LeftoverRegs; 2497 LLT LeftoverTy; 2498 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy, 2499 Src1Regs, Src1LeftoverRegs)) 2500 return UnableToLegalize; 2501 2502 LLT Unused; 2503 if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused, 2504 Src2Regs, Src2LeftoverRegs)) 2505 llvm_unreachable("inconsistent extractParts result"); 2506 2507 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) { 2508 auto Select = MIRBuilder.buildSelect(NarrowTy, 2509 CondReg, Src1Regs[I], Src2Regs[I]); 2510 DstRegs.push_back(Select->getOperand(0).getReg()); 2511 } 2512 2513 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) { 2514 auto Select = MIRBuilder.buildSelect( 2515 LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]); 2516 DstLeftoverRegs.push_back(Select->getOperand(0).getReg()); 2517 } 2518 2519 insertParts(DstReg, DstTy, NarrowTy, DstRegs, 2520 LeftoverTy, DstLeftoverRegs); 2521 2522 MI.eraseFromParent(); 2523 return Legalized; 2524 } 2525 2526 LegalizerHelper::LegalizeResult 2527 LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { 2528 unsigned Opc = MI.getOpcode(); 2529 auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); 2530 auto isSupported = [this](const LegalityQuery &Q) { 2531 auto QAction = LI.getAction(Q).Action; 2532 return QAction == Legal || QAction == Libcall || QAction == Custom; 2533 }; 2534 switch (Opc) { 2535 default: 2536 return UnableToLegalize; 2537 case TargetOpcode::G_CTLZ_ZERO_UNDEF: { 2538 // This trivially expands to CTLZ. 2539 Observer.changingInstr(MI); 2540 MI.setDesc(TII.get(TargetOpcode::G_CTLZ)); 2541 Observer.changedInstr(MI); 2542 return Legalized; 2543 } 2544 case TargetOpcode::G_CTLZ: { 2545 unsigned SrcReg = MI.getOperand(1).getReg(); 2546 unsigned Len = Ty.getSizeInBits(); 2547 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty, Ty}})) { 2548 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero. 2549 auto MIBCtlzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF, 2550 {Ty}, {SrcReg}); 2551 auto MIBZero = MIRBuilder.buildConstant(Ty, 0); 2552 auto MIBLen = MIRBuilder.buildConstant(Ty, Len); 2553 auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), 2554 SrcReg, MIBZero); 2555 MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen, 2556 MIBCtlzZU); 2557 MI.eraseFromParent(); 2558 return Legalized; 2559 } 2560 // for now, we do this: 2561 // NewLen = NextPowerOf2(Len); 2562 // x = x | (x >> 1); 2563 // x = x | (x >> 2); 2564 // ... 2565 // x = x | (x >>16); 2566 // x = x | (x >>32); // for 64-bit input 2567 // Upto NewLen/2 2568 // return Len - popcount(x); 2569 // 2570 // Ref: "Hacker's Delight" by Henry Warren 2571 unsigned Op = SrcReg; 2572 unsigned NewLen = PowerOf2Ceil(Len); 2573 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) { 2574 auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i); 2575 auto MIBOp = MIRBuilder.buildInstr( 2576 TargetOpcode::G_OR, {Ty}, 2577 {Op, MIRBuilder.buildInstr(TargetOpcode::G_LSHR, {Ty}, 2578 {Op, MIBShiftAmt})}); 2579 Op = MIBOp->getOperand(0).getReg(); 2580 } 2581 auto MIBPop = MIRBuilder.buildInstr(TargetOpcode::G_CTPOP, {Ty}, {Op}); 2582 MIRBuilder.buildInstr(TargetOpcode::G_SUB, {MI.getOperand(0).getReg()}, 2583 {MIRBuilder.buildConstant(Ty, Len), MIBPop}); 2584 MI.eraseFromParent(); 2585 return Legalized; 2586 } 2587 case TargetOpcode::G_CTTZ_ZERO_UNDEF: { 2588 // This trivially expands to CTTZ. 2589 Observer.changingInstr(MI); 2590 MI.setDesc(TII.get(TargetOpcode::G_CTTZ)); 2591 Observer.changedInstr(MI); 2592 return Legalized; 2593 } 2594 case TargetOpcode::G_CTTZ: { 2595 unsigned SrcReg = MI.getOperand(1).getReg(); 2596 unsigned Len = Ty.getSizeInBits(); 2597 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty, Ty}})) { 2598 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with 2599 // zero. 2600 auto MIBCttzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF, 2601 {Ty}, {SrcReg}); 2602 auto MIBZero = MIRBuilder.buildConstant(Ty, 0); 2603 auto MIBLen = MIRBuilder.buildConstant(Ty, Len); 2604 auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), 2605 SrcReg, MIBZero); 2606 MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen, 2607 MIBCttzZU); 2608 MI.eraseFromParent(); 2609 return Legalized; 2610 } 2611 // for now, we use: { return popcount(~x & (x - 1)); } 2612 // unless the target has ctlz but not ctpop, in which case we use: 2613 // { return 32 - nlz(~x & (x-1)); } 2614 // Ref: "Hacker's Delight" by Henry Warren 2615 auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1); 2616 auto MIBNot = 2617 MIRBuilder.buildInstr(TargetOpcode::G_XOR, {Ty}, {SrcReg, MIBCstNeg1}); 2618 auto MIBTmp = MIRBuilder.buildInstr( 2619 TargetOpcode::G_AND, {Ty}, 2620 {MIBNot, MIRBuilder.buildInstr(TargetOpcode::G_ADD, {Ty}, 2621 {SrcReg, MIBCstNeg1})}); 2622 if (!isSupported({TargetOpcode::G_CTPOP, {Ty, Ty}}) && 2623 isSupported({TargetOpcode::G_CTLZ, {Ty, Ty}})) { 2624 auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len); 2625 MIRBuilder.buildInstr( 2626 TargetOpcode::G_SUB, {MI.getOperand(0).getReg()}, 2627 {MIBCstLen, 2628 MIRBuilder.buildInstr(TargetOpcode::G_CTLZ, {Ty}, {MIBTmp})}); 2629 MI.eraseFromParent(); 2630 return Legalized; 2631 } 2632 MI.setDesc(TII.get(TargetOpcode::G_CTPOP)); 2633 MI.getOperand(1).setReg(MIBTmp->getOperand(0).getReg()); 2634 return Legalized; 2635 } 2636 } 2637 } 2638