1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file This file implements the LegalizerHelper class to legalize 10 /// individual instructions and the LegalizeMachineIR wrapper pass for the 11 /// primary legalization. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" 16 #include "llvm/CodeGen/GlobalISel/CallLowering.h" 17 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" 18 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" 19 #include "llvm/CodeGen/MachineRegisterInfo.h" 20 #include "llvm/CodeGen/TargetInstrInfo.h" 21 #include "llvm/CodeGen/TargetLowering.h" 22 #include "llvm/CodeGen/TargetSubtargetInfo.h" 23 #include "llvm/Support/Debug.h" 24 #include "llvm/Support/MathExtras.h" 25 #include "llvm/Support/raw_ostream.h" 26 27 #define DEBUG_TYPE "legalizer" 28 29 using namespace llvm; 30 using namespace LegalizeActions; 31 32 LegalizerHelper::LegalizerHelper(MachineFunction &MF, 33 GISelChangeObserver &Observer, 34 MachineIRBuilder &Builder) 35 : MIRBuilder(Builder), MRI(MF.getRegInfo()), 36 LI(*MF.getSubtarget().getLegalizerInfo()), Observer(Observer) { 37 MIRBuilder.setMF(MF); 38 MIRBuilder.setChangeObserver(Observer); 39 } 40 41 LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI, 42 GISelChangeObserver &Observer, 43 MachineIRBuilder &B) 44 : MIRBuilder(B), MRI(MF.getRegInfo()), LI(LI), Observer(Observer) { 45 MIRBuilder.setMF(MF); 46 MIRBuilder.setChangeObserver(Observer); 47 } 48 LegalizerHelper::LegalizeResult 49 LegalizerHelper::legalizeInstrStep(MachineInstr &MI) { 50 LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs())); 51 52 auto Step = LI.getAction(MI, MRI); 53 switch (Step.Action) { 54 case Legal: 55 LLVM_DEBUG(dbgs() << ".. Already legal\n"); 56 return AlreadyLegal; 57 case Libcall: 58 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n"); 59 return libcall(MI); 60 case NarrowScalar: 61 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n"); 62 return narrowScalar(MI, Step.TypeIdx, Step.NewType); 63 case WidenScalar: 64 LLVM_DEBUG(dbgs() << ".. Widen scalar\n"); 65 return widenScalar(MI, Step.TypeIdx, Step.NewType); 66 case Lower: 67 LLVM_DEBUG(dbgs() << ".. Lower\n"); 68 return lower(MI, Step.TypeIdx, Step.NewType); 69 case FewerElements: 70 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n"); 71 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType); 72 case Custom: 73 LLVM_DEBUG(dbgs() << ".. Custom legalization\n"); 74 return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized 75 : UnableToLegalize; 76 default: 77 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n"); 78 return UnableToLegalize; 79 } 80 } 81 82 void LegalizerHelper::extractParts(unsigned Reg, LLT Ty, int NumParts, 83 SmallVectorImpl<unsigned> &VRegs) { 84 for (int i = 0; i < NumParts; ++i) 85 VRegs.push_back(MRI.createGenericVirtualRegister(Ty)); 86 MIRBuilder.buildUnmerge(VRegs, Reg); 87 } 88 89 bool LegalizerHelper::extractParts(unsigned Reg, LLT RegTy, 90 LLT MainTy, LLT &LeftoverTy, 91 SmallVectorImpl<unsigned> &VRegs, 92 SmallVectorImpl<unsigned> &LeftoverRegs) { 93 assert(!LeftoverTy.isValid() && "this is an out argument"); 94 95 unsigned RegSize = RegTy.getSizeInBits(); 96 unsigned MainSize = MainTy.getSizeInBits(); 97 unsigned NumParts = RegSize / MainSize; 98 unsigned LeftoverSize = RegSize - NumParts * MainSize; 99 100 // Use an unmerge when possible. 101 if (LeftoverSize == 0) { 102 for (unsigned I = 0; I < NumParts; ++I) 103 VRegs.push_back(MRI.createGenericVirtualRegister(MainTy)); 104 MIRBuilder.buildUnmerge(VRegs, Reg); 105 return true; 106 } 107 108 if (MainTy.isVector()) { 109 unsigned EltSize = MainTy.getScalarSizeInBits(); 110 if (LeftoverSize % EltSize != 0) 111 return false; 112 LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize); 113 } else { 114 LeftoverTy = LLT::scalar(LeftoverSize); 115 } 116 117 // For irregular sizes, extract the individual parts. 118 for (unsigned I = 0; I != NumParts; ++I) { 119 unsigned NewReg = MRI.createGenericVirtualRegister(MainTy); 120 VRegs.push_back(NewReg); 121 MIRBuilder.buildExtract(NewReg, Reg, MainSize * I); 122 } 123 124 for (unsigned Offset = MainSize * NumParts; Offset < RegSize; 125 Offset += LeftoverSize) { 126 unsigned NewReg = MRI.createGenericVirtualRegister(LeftoverTy); 127 LeftoverRegs.push_back(NewReg); 128 MIRBuilder.buildExtract(NewReg, Reg, Offset); 129 } 130 131 return true; 132 } 133 134 void LegalizerHelper::insertParts(unsigned DstReg, 135 LLT ResultTy, LLT PartTy, 136 ArrayRef<unsigned> PartRegs, 137 LLT LeftoverTy, 138 ArrayRef<unsigned> LeftoverRegs) { 139 if (!LeftoverTy.isValid()) { 140 assert(LeftoverRegs.empty()); 141 142 if (PartTy.isVector()) 143 MIRBuilder.buildConcatVectors(DstReg, PartRegs); 144 else 145 MIRBuilder.buildBuildVector(DstReg, PartRegs); 146 return; 147 } 148 149 unsigned PartSize = PartTy.getSizeInBits(); 150 unsigned LeftoverPartSize = LeftoverTy.getSizeInBits(); 151 152 unsigned CurResultReg = MRI.createGenericVirtualRegister(ResultTy); 153 MIRBuilder.buildUndef(CurResultReg); 154 155 unsigned Offset = 0; 156 for (unsigned PartReg : PartRegs) { 157 unsigned NewResultReg = MRI.createGenericVirtualRegister(ResultTy); 158 MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset); 159 CurResultReg = NewResultReg; 160 Offset += PartSize; 161 } 162 163 for (unsigned I = 0, E = LeftoverRegs.size(); I != E; ++I) { 164 // Use the original output register for the final insert to avoid a copy. 165 unsigned NewResultReg = (I + 1 == E) ? 166 DstReg : MRI.createGenericVirtualRegister(ResultTy); 167 168 MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset); 169 CurResultReg = NewResultReg; 170 Offset += LeftoverPartSize; 171 } 172 } 173 174 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { 175 switch (Opcode) { 176 case TargetOpcode::G_SDIV: 177 assert((Size == 32 || Size == 64) && "Unsupported size"); 178 return Size == 64 ? RTLIB::SDIV_I64 : RTLIB::SDIV_I32; 179 case TargetOpcode::G_UDIV: 180 assert((Size == 32 || Size == 64) && "Unsupported size"); 181 return Size == 64 ? RTLIB::UDIV_I64 : RTLIB::UDIV_I32; 182 case TargetOpcode::G_SREM: 183 assert((Size == 32 || Size == 64) && "Unsupported size"); 184 return Size == 64 ? RTLIB::SREM_I64 : RTLIB::SREM_I32; 185 case TargetOpcode::G_UREM: 186 assert((Size == 32 || Size == 64) && "Unsupported size"); 187 return Size == 64 ? RTLIB::UREM_I64 : RTLIB::UREM_I32; 188 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 189 assert(Size == 32 && "Unsupported size"); 190 return RTLIB::CTLZ_I32; 191 case TargetOpcode::G_FADD: 192 assert((Size == 32 || Size == 64) && "Unsupported size"); 193 return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32; 194 case TargetOpcode::G_FSUB: 195 assert((Size == 32 || Size == 64) && "Unsupported size"); 196 return Size == 64 ? RTLIB::SUB_F64 : RTLIB::SUB_F32; 197 case TargetOpcode::G_FMUL: 198 assert((Size == 32 || Size == 64) && "Unsupported size"); 199 return Size == 64 ? RTLIB::MUL_F64 : RTLIB::MUL_F32; 200 case TargetOpcode::G_FDIV: 201 assert((Size == 32 || Size == 64) && "Unsupported size"); 202 return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32; 203 case TargetOpcode::G_FEXP: 204 assert((Size == 32 || Size == 64) && "Unsupported size"); 205 return Size == 64 ? RTLIB::EXP_F64 : RTLIB::EXP_F32; 206 case TargetOpcode::G_FREM: 207 return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32; 208 case TargetOpcode::G_FPOW: 209 return Size == 64 ? RTLIB::POW_F64 : RTLIB::POW_F32; 210 case TargetOpcode::G_FMA: 211 assert((Size == 32 || Size == 64) && "Unsupported size"); 212 return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32; 213 case TargetOpcode::G_FSIN: 214 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); 215 return Size == 128 ? RTLIB::SIN_F128 216 : Size == 64 ? RTLIB::SIN_F64 : RTLIB::SIN_F32; 217 case TargetOpcode::G_FCOS: 218 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); 219 return Size == 128 ? RTLIB::COS_F128 220 : Size == 64 ? RTLIB::COS_F64 : RTLIB::COS_F32; 221 case TargetOpcode::G_FLOG10: 222 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); 223 return Size == 128 ? RTLIB::LOG10_F128 224 : Size == 64 ? RTLIB::LOG10_F64 : RTLIB::LOG10_F32; 225 case TargetOpcode::G_FLOG: 226 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); 227 return Size == 128 ? RTLIB::LOG_F128 228 : Size == 64 ? RTLIB::LOG_F64 : RTLIB::LOG_F32; 229 case TargetOpcode::G_FLOG2: 230 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); 231 return Size == 128 ? RTLIB::LOG2_F128 232 : Size == 64 ? RTLIB::LOG2_F64 : RTLIB::LOG2_F32; 233 } 234 llvm_unreachable("Unknown libcall function"); 235 } 236 237 LegalizerHelper::LegalizeResult 238 llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, 239 const CallLowering::ArgInfo &Result, 240 ArrayRef<CallLowering::ArgInfo> Args) { 241 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); 242 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); 243 const char *Name = TLI.getLibcallName(Libcall); 244 245 MIRBuilder.getMF().getFrameInfo().setHasCalls(true); 246 if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall), 247 MachineOperand::CreateES(Name), Result, Args)) 248 return LegalizerHelper::UnableToLegalize; 249 250 return LegalizerHelper::Legalized; 251 } 252 253 // Useful for libcalls where all operands have the same type. 254 static LegalizerHelper::LegalizeResult 255 simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, 256 Type *OpType) { 257 auto Libcall = getRTLibDesc(MI.getOpcode(), Size); 258 259 SmallVector<CallLowering::ArgInfo, 3> Args; 260 for (unsigned i = 1; i < MI.getNumOperands(); i++) 261 Args.push_back({MI.getOperand(i).getReg(), OpType}); 262 return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType}, 263 Args); 264 } 265 266 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, 267 Type *FromType) { 268 auto ToMVT = MVT::getVT(ToType); 269 auto FromMVT = MVT::getVT(FromType); 270 271 switch (Opcode) { 272 case TargetOpcode::G_FPEXT: 273 return RTLIB::getFPEXT(FromMVT, ToMVT); 274 case TargetOpcode::G_FPTRUNC: 275 return RTLIB::getFPROUND(FromMVT, ToMVT); 276 case TargetOpcode::G_FPTOSI: 277 return RTLIB::getFPTOSINT(FromMVT, ToMVT); 278 case TargetOpcode::G_FPTOUI: 279 return RTLIB::getFPTOUINT(FromMVT, ToMVT); 280 case TargetOpcode::G_SITOFP: 281 return RTLIB::getSINTTOFP(FromMVT, ToMVT); 282 case TargetOpcode::G_UITOFP: 283 return RTLIB::getUINTTOFP(FromMVT, ToMVT); 284 } 285 llvm_unreachable("Unsupported libcall function"); 286 } 287 288 static LegalizerHelper::LegalizeResult 289 conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, 290 Type *FromType) { 291 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType); 292 return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType}, 293 {{MI.getOperand(1).getReg(), FromType}}); 294 } 295 296 LegalizerHelper::LegalizeResult 297 LegalizerHelper::libcall(MachineInstr &MI) { 298 LLT LLTy = MRI.getType(MI.getOperand(0).getReg()); 299 unsigned Size = LLTy.getSizeInBits(); 300 auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); 301 302 MIRBuilder.setInstr(MI); 303 304 switch (MI.getOpcode()) { 305 default: 306 return UnableToLegalize; 307 case TargetOpcode::G_SDIV: 308 case TargetOpcode::G_UDIV: 309 case TargetOpcode::G_SREM: 310 case TargetOpcode::G_UREM: 311 case TargetOpcode::G_CTLZ_ZERO_UNDEF: { 312 Type *HLTy = IntegerType::get(Ctx, Size); 313 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); 314 if (Status != Legalized) 315 return Status; 316 break; 317 } 318 case TargetOpcode::G_FADD: 319 case TargetOpcode::G_FSUB: 320 case TargetOpcode::G_FMUL: 321 case TargetOpcode::G_FDIV: 322 case TargetOpcode::G_FMA: 323 case TargetOpcode::G_FPOW: 324 case TargetOpcode::G_FREM: 325 case TargetOpcode::G_FCOS: 326 case TargetOpcode::G_FSIN: 327 case TargetOpcode::G_FLOG10: 328 case TargetOpcode::G_FLOG: 329 case TargetOpcode::G_FLOG2: 330 case TargetOpcode::G_FEXP: { 331 if (Size > 64) { 332 LLVM_DEBUG(dbgs() << "Size " << Size << " too large to legalize.\n"); 333 return UnableToLegalize; 334 } 335 Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx); 336 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); 337 if (Status != Legalized) 338 return Status; 339 break; 340 } 341 case TargetOpcode::G_FPEXT: { 342 // FIXME: Support other floating point types (half, fp128 etc) 343 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 344 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 345 if (ToSize != 64 || FromSize != 32) 346 return UnableToLegalize; 347 LegalizeResult Status = conversionLibcall( 348 MI, MIRBuilder, Type::getDoubleTy(Ctx), Type::getFloatTy(Ctx)); 349 if (Status != Legalized) 350 return Status; 351 break; 352 } 353 case TargetOpcode::G_FPTRUNC: { 354 // FIXME: Support other floating point types (half, fp128 etc) 355 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 356 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 357 if (ToSize != 32 || FromSize != 64) 358 return UnableToLegalize; 359 LegalizeResult Status = conversionLibcall( 360 MI, MIRBuilder, Type::getFloatTy(Ctx), Type::getDoubleTy(Ctx)); 361 if (Status != Legalized) 362 return Status; 363 break; 364 } 365 case TargetOpcode::G_FPTOSI: 366 case TargetOpcode::G_FPTOUI: { 367 // FIXME: Support other types 368 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 369 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 370 if (ToSize != 32 || (FromSize != 32 && FromSize != 64)) 371 return UnableToLegalize; 372 LegalizeResult Status = conversionLibcall( 373 MI, MIRBuilder, Type::getInt32Ty(Ctx), 374 FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx)); 375 if (Status != Legalized) 376 return Status; 377 break; 378 } 379 case TargetOpcode::G_SITOFP: 380 case TargetOpcode::G_UITOFP: { 381 // FIXME: Support other types 382 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 383 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 384 if (FromSize != 32 || (ToSize != 32 && ToSize != 64)) 385 return UnableToLegalize; 386 LegalizeResult Status = conversionLibcall( 387 MI, MIRBuilder, 388 ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx), 389 Type::getInt32Ty(Ctx)); 390 if (Status != Legalized) 391 return Status; 392 break; 393 } 394 } 395 396 MI.eraseFromParent(); 397 return Legalized; 398 } 399 400 LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, 401 unsigned TypeIdx, 402 LLT NarrowTy) { 403 MIRBuilder.setInstr(MI); 404 405 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 406 uint64_t NarrowSize = NarrowTy.getSizeInBits(); 407 408 switch (MI.getOpcode()) { 409 default: 410 return UnableToLegalize; 411 case TargetOpcode::G_IMPLICIT_DEF: { 412 // FIXME: add support for when SizeOp0 isn't an exact multiple of 413 // NarrowSize. 414 if (SizeOp0 % NarrowSize != 0) 415 return UnableToLegalize; 416 int NumParts = SizeOp0 / NarrowSize; 417 418 SmallVector<unsigned, 2> DstRegs; 419 for (int i = 0; i < NumParts; ++i) 420 DstRegs.push_back( 421 MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg()); 422 423 unsigned DstReg = MI.getOperand(0).getReg(); 424 if(MRI.getType(DstReg).isVector()) 425 MIRBuilder.buildBuildVector(DstReg, DstRegs); 426 else 427 MIRBuilder.buildMerge(DstReg, DstRegs); 428 MI.eraseFromParent(); 429 return Legalized; 430 } 431 case TargetOpcode::G_ADD: { 432 // FIXME: add support for when SizeOp0 isn't an exact multiple of 433 // NarrowSize. 434 if (SizeOp0 % NarrowSize != 0) 435 return UnableToLegalize; 436 // Expand in terms of carry-setting/consuming G_ADDE instructions. 437 int NumParts = SizeOp0 / NarrowTy.getSizeInBits(); 438 439 SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs; 440 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); 441 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); 442 443 unsigned CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1)); 444 MIRBuilder.buildConstant(CarryIn, 0); 445 446 for (int i = 0; i < NumParts; ++i) { 447 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 448 unsigned CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); 449 450 MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i], 451 Src2Regs[i], CarryIn); 452 453 DstRegs.push_back(DstReg); 454 CarryIn = CarryOut; 455 } 456 unsigned DstReg = MI.getOperand(0).getReg(); 457 if(MRI.getType(DstReg).isVector()) 458 MIRBuilder.buildBuildVector(DstReg, DstRegs); 459 else 460 MIRBuilder.buildMerge(DstReg, DstRegs); 461 MI.eraseFromParent(); 462 return Legalized; 463 } 464 case TargetOpcode::G_SUB: { 465 // FIXME: add support for when SizeOp0 isn't an exact multiple of 466 // NarrowSize. 467 if (SizeOp0 % NarrowSize != 0) 468 return UnableToLegalize; 469 470 int NumParts = SizeOp0 / NarrowTy.getSizeInBits(); 471 472 SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs; 473 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); 474 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); 475 476 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 477 unsigned BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); 478 MIRBuilder.buildInstr(TargetOpcode::G_USUBO, {DstReg, BorrowOut}, 479 {Src1Regs[0], Src2Regs[0]}); 480 DstRegs.push_back(DstReg); 481 unsigned BorrowIn = BorrowOut; 482 for (int i = 1; i < NumParts; ++i) { 483 DstReg = MRI.createGenericVirtualRegister(NarrowTy); 484 BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); 485 486 MIRBuilder.buildInstr(TargetOpcode::G_USUBE, {DstReg, BorrowOut}, 487 {Src1Regs[i], Src2Regs[i], BorrowIn}); 488 489 DstRegs.push_back(DstReg); 490 BorrowIn = BorrowOut; 491 } 492 MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs); 493 MI.eraseFromParent(); 494 return Legalized; 495 } 496 case TargetOpcode::G_MUL: 497 return narrowScalarMul(MI, TypeIdx, NarrowTy); 498 case TargetOpcode::G_EXTRACT: { 499 if (TypeIdx != 1) 500 return UnableToLegalize; 501 502 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 503 // FIXME: add support for when SizeOp1 isn't an exact multiple of 504 // NarrowSize. 505 if (SizeOp1 % NarrowSize != 0) 506 return UnableToLegalize; 507 int NumParts = SizeOp1 / NarrowSize; 508 509 SmallVector<unsigned, 2> SrcRegs, DstRegs; 510 SmallVector<uint64_t, 2> Indexes; 511 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); 512 513 unsigned OpReg = MI.getOperand(0).getReg(); 514 uint64_t OpStart = MI.getOperand(2).getImm(); 515 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); 516 for (int i = 0; i < NumParts; ++i) { 517 unsigned SrcStart = i * NarrowSize; 518 519 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) { 520 // No part of the extract uses this subregister, ignore it. 521 continue; 522 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) { 523 // The entire subregister is extracted, forward the value. 524 DstRegs.push_back(SrcRegs[i]); 525 continue; 526 } 527 528 // OpSegStart is where this destination segment would start in OpReg if it 529 // extended infinitely in both directions. 530 int64_t ExtractOffset; 531 uint64_t SegSize; 532 if (OpStart < SrcStart) { 533 ExtractOffset = 0; 534 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart); 535 } else { 536 ExtractOffset = OpStart - SrcStart; 537 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize); 538 } 539 540 unsigned SegReg = SrcRegs[i]; 541 if (ExtractOffset != 0 || SegSize != NarrowSize) { 542 // A genuine extract is needed. 543 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); 544 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset); 545 } 546 547 DstRegs.push_back(SegReg); 548 } 549 550 unsigned DstReg = MI.getOperand(0).getReg(); 551 if(MRI.getType(DstReg).isVector()) 552 MIRBuilder.buildBuildVector(DstReg, DstRegs); 553 else 554 MIRBuilder.buildMerge(DstReg, DstRegs); 555 MI.eraseFromParent(); 556 return Legalized; 557 } 558 case TargetOpcode::G_INSERT: { 559 // FIXME: Don't know how to handle secondary types yet. 560 if (TypeIdx != 0) 561 return UnableToLegalize; 562 563 // FIXME: add support for when SizeOp0 isn't an exact multiple of 564 // NarrowSize. 565 if (SizeOp0 % NarrowSize != 0) 566 return UnableToLegalize; 567 568 int NumParts = SizeOp0 / NarrowSize; 569 570 SmallVector<unsigned, 2> SrcRegs, DstRegs; 571 SmallVector<uint64_t, 2> Indexes; 572 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); 573 574 unsigned OpReg = MI.getOperand(2).getReg(); 575 uint64_t OpStart = MI.getOperand(3).getImm(); 576 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); 577 for (int i = 0; i < NumParts; ++i) { 578 unsigned DstStart = i * NarrowSize; 579 580 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) { 581 // No part of the insert affects this subregister, forward the original. 582 DstRegs.push_back(SrcRegs[i]); 583 continue; 584 } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) { 585 // The entire subregister is defined by this insert, forward the new 586 // value. 587 DstRegs.push_back(OpReg); 588 continue; 589 } 590 591 // OpSegStart is where this destination segment would start in OpReg if it 592 // extended infinitely in both directions. 593 int64_t ExtractOffset, InsertOffset; 594 uint64_t SegSize; 595 if (OpStart < DstStart) { 596 InsertOffset = 0; 597 ExtractOffset = DstStart - OpStart; 598 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart); 599 } else { 600 InsertOffset = OpStart - DstStart; 601 ExtractOffset = 0; 602 SegSize = 603 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart); 604 } 605 606 unsigned SegReg = OpReg; 607 if (ExtractOffset != 0 || SegSize != OpSize) { 608 // A genuine extract is needed. 609 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); 610 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset); 611 } 612 613 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 614 MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset); 615 DstRegs.push_back(DstReg); 616 } 617 618 assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered"); 619 unsigned DstReg = MI.getOperand(0).getReg(); 620 if(MRI.getType(DstReg).isVector()) 621 MIRBuilder.buildBuildVector(DstReg, DstRegs); 622 else 623 MIRBuilder.buildMerge(DstReg, DstRegs); 624 MI.eraseFromParent(); 625 return Legalized; 626 } 627 case TargetOpcode::G_LOAD: { 628 const auto &MMO = **MI.memoperands_begin(); 629 unsigned DstReg = MI.getOperand(0).getReg(); 630 LLT DstTy = MRI.getType(DstReg); 631 int NumParts = SizeOp0 / NarrowSize; 632 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits(); 633 unsigned LeftoverBits = DstTy.getSizeInBits() - HandledSize; 634 635 if (DstTy.isVector() && LeftoverBits != 0) 636 return UnableToLegalize; 637 638 if (8 * MMO.getSize() != DstTy.getSizeInBits()) { 639 unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy); 640 auto &MMO = **MI.memoperands_begin(); 641 MIRBuilder.buildLoad(TmpReg, MI.getOperand(1).getReg(), MMO); 642 MIRBuilder.buildAnyExt(DstReg, TmpReg); 643 MI.eraseFromParent(); 644 return Legalized; 645 } 646 647 // This implementation doesn't work for atomics. Give up instead of doing 648 // something invalid. 649 if (MMO.getOrdering() != AtomicOrdering::NotAtomic || 650 MMO.getFailureOrdering() != AtomicOrdering::NotAtomic) 651 return UnableToLegalize; 652 653 LLT OffsetTy = LLT::scalar( 654 MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()); 655 656 MachineFunction &MF = MIRBuilder.getMF(); 657 SmallVector<unsigned, 2> DstRegs; 658 for (int i = 0; i < NumParts; ++i) { 659 unsigned PartDstReg = MRI.createGenericVirtualRegister(NarrowTy); 660 unsigned SrcReg = 0; 661 unsigned Offset = i * NarrowSize / 8; 662 663 MachineMemOperand *SplitMMO = 664 MF.getMachineMemOperand(&MMO, Offset, NarrowSize / 8); 665 666 MIRBuilder.materializeGEP(SrcReg, MI.getOperand(1).getReg(), OffsetTy, 667 Offset); 668 669 MIRBuilder.buildLoad(PartDstReg, SrcReg, *SplitMMO); 670 671 DstRegs.push_back(PartDstReg); 672 } 673 674 unsigned MergeResultReg = LeftoverBits == 0 ? DstReg : 675 MRI.createGenericVirtualRegister(LLT::scalar(HandledSize)); 676 677 // For the leftover piece, still create the merge and insert it. 678 // TODO: Would it be better to directly insert the intermediate pieces? 679 if (DstTy.isVector()) 680 MIRBuilder.buildBuildVector(MergeResultReg, DstRegs); 681 else 682 MIRBuilder.buildMerge(MergeResultReg, DstRegs); 683 684 if (LeftoverBits == 0) { 685 MI.eraseFromParent(); 686 return Legalized; 687 } 688 689 unsigned ImpDefReg = MRI.createGenericVirtualRegister(DstTy); 690 unsigned Insert0Reg = MRI.createGenericVirtualRegister(DstTy); 691 MIRBuilder.buildUndef(ImpDefReg); 692 MIRBuilder.buildInsert(Insert0Reg, ImpDefReg, MergeResultReg, 0); 693 694 unsigned PartDstReg 695 = MRI.createGenericVirtualRegister(LLT::scalar(LeftoverBits)); 696 unsigned Offset = HandledSize / 8; 697 698 MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand( 699 &MMO, Offset, LeftoverBits / 8); 700 701 unsigned SrcReg = 0; 702 MIRBuilder.materializeGEP(SrcReg, MI.getOperand(1).getReg(), OffsetTy, 703 Offset); 704 MIRBuilder.buildLoad(PartDstReg, SrcReg, *SplitMMO); 705 MIRBuilder.buildInsert(DstReg, Insert0Reg, PartDstReg, HandledSize); 706 707 MI.eraseFromParent(); 708 return Legalized; 709 } 710 case TargetOpcode::G_ZEXTLOAD: 711 case TargetOpcode::G_SEXTLOAD: { 712 bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD; 713 unsigned DstReg = MI.getOperand(0).getReg(); 714 unsigned PtrReg = MI.getOperand(1).getReg(); 715 716 unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy); 717 auto &MMO = **MI.memoperands_begin(); 718 if (MMO.getSize() * 8 == NarrowSize) { 719 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); 720 } else { 721 unsigned ExtLoad = ZExt ? TargetOpcode::G_ZEXTLOAD 722 : TargetOpcode::G_SEXTLOAD; 723 MIRBuilder.buildInstr(ExtLoad) 724 .addDef(TmpReg) 725 .addUse(PtrReg) 726 .addMemOperand(&MMO); 727 } 728 729 if (ZExt) 730 MIRBuilder.buildZExt(DstReg, TmpReg); 731 else 732 MIRBuilder.buildSExt(DstReg, TmpReg); 733 734 MI.eraseFromParent(); 735 return Legalized; 736 } 737 case TargetOpcode::G_STORE: { 738 // FIXME: add support for when SizeOp0 isn't an exact multiple of 739 // NarrowSize. 740 if (SizeOp0 % NarrowSize != 0) 741 return UnableToLegalize; 742 743 const auto &MMO = **MI.memoperands_begin(); 744 745 unsigned SrcReg = MI.getOperand(0).getReg(); 746 LLT SrcTy = MRI.getType(SrcReg); 747 748 if (8 * MMO.getSize() != SrcTy.getSizeInBits()) { 749 unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy); 750 auto &MMO = **MI.memoperands_begin(); 751 MIRBuilder.buildTrunc(TmpReg, SrcReg); 752 MIRBuilder.buildStore(TmpReg, MI.getOperand(1).getReg(), MMO); 753 MI.eraseFromParent(); 754 return Legalized; 755 } 756 757 // This implementation doesn't work for atomics. Give up instead of doing 758 // something invalid. 759 if (MMO.getOrdering() != AtomicOrdering::NotAtomic || 760 MMO.getFailureOrdering() != AtomicOrdering::NotAtomic) 761 return UnableToLegalize; 762 763 int NumParts = SizeOp0 / NarrowSize; 764 LLT OffsetTy = LLT::scalar( 765 MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()); 766 767 SmallVector<unsigned, 2> SrcRegs; 768 extractParts(MI.getOperand(0).getReg(), NarrowTy, NumParts, SrcRegs); 769 770 MachineFunction &MF = MIRBuilder.getMF(); 771 for (int i = 0; i < NumParts; ++i) { 772 unsigned DstReg = 0; 773 unsigned Offset = i * NarrowSize / 8; 774 775 MachineMemOperand *SplitMMO = 776 MF.getMachineMemOperand(&MMO, Offset, NarrowSize / 8); 777 778 MIRBuilder.materializeGEP(DstReg, MI.getOperand(1).getReg(), OffsetTy, 779 Offset); 780 781 MIRBuilder.buildStore(SrcRegs[i], DstReg, *SplitMMO); 782 } 783 MI.eraseFromParent(); 784 return Legalized; 785 } 786 case TargetOpcode::G_CONSTANT: { 787 // FIXME: add support for when SizeOp0 isn't an exact multiple of 788 // NarrowSize. 789 if (SizeOp0 % NarrowSize != 0) 790 return UnableToLegalize; 791 int NumParts = SizeOp0 / NarrowSize; 792 const APInt &Cst = MI.getOperand(1).getCImm()->getValue(); 793 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 794 795 SmallVector<unsigned, 2> DstRegs; 796 for (int i = 0; i < NumParts; ++i) { 797 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 798 ConstantInt *CI = 799 ConstantInt::get(Ctx, Cst.lshr(NarrowSize * i).trunc(NarrowSize)); 800 MIRBuilder.buildConstant(DstReg, *CI); 801 DstRegs.push_back(DstReg); 802 } 803 unsigned DstReg = MI.getOperand(0).getReg(); 804 if(MRI.getType(DstReg).isVector()) 805 MIRBuilder.buildBuildVector(DstReg, DstRegs); 806 else 807 MIRBuilder.buildMerge(DstReg, DstRegs); 808 MI.eraseFromParent(); 809 return Legalized; 810 } 811 case TargetOpcode::G_AND: 812 case TargetOpcode::G_OR: 813 case TargetOpcode::G_XOR: { 814 // Legalize bitwise operation: 815 // A = BinOp<Ty> B, C 816 // into: 817 // B1, ..., BN = G_UNMERGE_VALUES B 818 // C1, ..., CN = G_UNMERGE_VALUES C 819 // A1 = BinOp<Ty/N> B1, C2 820 // ... 821 // AN = BinOp<Ty/N> BN, CN 822 // A = G_MERGE_VALUES A1, ..., AN 823 824 // FIXME: add support for when SizeOp0 isn't an exact multiple of 825 // NarrowSize. 826 if (SizeOp0 % NarrowSize != 0) 827 return UnableToLegalize; 828 int NumParts = SizeOp0 / NarrowSize; 829 830 // List the registers where the destination will be scattered. 831 SmallVector<unsigned, 2> DstRegs; 832 // List the registers where the first argument will be split. 833 SmallVector<unsigned, 2> SrcsReg1; 834 // List the registers where the second argument will be split. 835 SmallVector<unsigned, 2> SrcsReg2; 836 // Create all the temporary registers. 837 for (int i = 0; i < NumParts; ++i) { 838 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 839 unsigned SrcReg1 = MRI.createGenericVirtualRegister(NarrowTy); 840 unsigned SrcReg2 = MRI.createGenericVirtualRegister(NarrowTy); 841 842 DstRegs.push_back(DstReg); 843 SrcsReg1.push_back(SrcReg1); 844 SrcsReg2.push_back(SrcReg2); 845 } 846 // Explode the big arguments into smaller chunks. 847 MIRBuilder.buildUnmerge(SrcsReg1, MI.getOperand(1).getReg()); 848 MIRBuilder.buildUnmerge(SrcsReg2, MI.getOperand(2).getReg()); 849 850 // Do the operation on each small part. 851 for (int i = 0; i < NumParts; ++i) 852 MIRBuilder.buildInstr(MI.getOpcode(), {DstRegs[i]}, 853 {SrcsReg1[i], SrcsReg2[i]}); 854 855 // Gather the destination registers into the final destination. 856 unsigned DstReg = MI.getOperand(0).getReg(); 857 if(MRI.getType(DstReg).isVector()) 858 MIRBuilder.buildBuildVector(DstReg, DstRegs); 859 else 860 MIRBuilder.buildMerge(DstReg, DstRegs); 861 MI.eraseFromParent(); 862 return Legalized; 863 } 864 case TargetOpcode::G_SHL: 865 case TargetOpcode::G_LSHR: 866 case TargetOpcode::G_ASHR: { 867 if (TypeIdx != 1) 868 return UnableToLegalize; // TODO 869 Observer.changingInstr(MI); 870 narrowScalarSrc(MI, NarrowTy, 2); 871 Observer.changedInstr(MI); 872 return Legalized; 873 } 874 case TargetOpcode::G_CTLZ: 875 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 876 case TargetOpcode::G_CTTZ: 877 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 878 case TargetOpcode::G_CTPOP: 879 if (TypeIdx != 0) 880 return UnableToLegalize; // TODO 881 882 Observer.changingInstr(MI); 883 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT); 884 Observer.changedInstr(MI); 885 return Legalized; 886 case TargetOpcode::G_INTTOPTR: 887 if (TypeIdx != 1) 888 return UnableToLegalize; 889 890 Observer.changingInstr(MI); 891 narrowScalarSrc(MI, NarrowTy, 1); 892 Observer.changedInstr(MI); 893 return Legalized; 894 case TargetOpcode::G_PTRTOINT: 895 if (TypeIdx != 0) 896 return UnableToLegalize; 897 898 Observer.changingInstr(MI); 899 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT); 900 Observer.changedInstr(MI); 901 return Legalized; 902 } 903 } 904 905 void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy, 906 unsigned OpIdx, unsigned ExtOpcode) { 907 MachineOperand &MO = MI.getOperand(OpIdx); 908 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO.getReg()}); 909 MO.setReg(ExtB->getOperand(0).getReg()); 910 } 911 912 void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, 913 unsigned OpIdx) { 914 MachineOperand &MO = MI.getOperand(OpIdx); 915 auto ExtB = MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, {NarrowTy}, 916 {MO.getReg()}); 917 MO.setReg(ExtB->getOperand(0).getReg()); 918 } 919 920 void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy, 921 unsigned OpIdx, unsigned TruncOpcode) { 922 MachineOperand &MO = MI.getOperand(OpIdx); 923 unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); 924 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 925 MIRBuilder.buildInstr(TruncOpcode, {MO.getReg()}, {DstExt}); 926 MO.setReg(DstExt); 927 } 928 929 void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy, 930 unsigned OpIdx, unsigned ExtOpcode) { 931 MachineOperand &MO = MI.getOperand(OpIdx); 932 unsigned DstTrunc = MRI.createGenericVirtualRegister(NarrowTy); 933 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 934 MIRBuilder.buildInstr(ExtOpcode, {MO.getReg()}, {DstTrunc}); 935 MO.setReg(DstTrunc); 936 } 937 938 LegalizerHelper::LegalizeResult 939 LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, 940 LLT WideTy) { 941 if (TypeIdx != 1) 942 return UnableToLegalize; 943 944 unsigned DstReg = MI.getOperand(0).getReg(); 945 LLT DstTy = MRI.getType(DstReg); 946 if (!DstTy.isScalar()) 947 return UnableToLegalize; 948 949 unsigned NumOps = MI.getNumOperands(); 950 unsigned NumSrc = MI.getNumOperands() - 1; 951 unsigned PartSize = DstTy.getSizeInBits() / NumSrc; 952 953 unsigned Src1 = MI.getOperand(1).getReg(); 954 unsigned ResultReg = MIRBuilder.buildZExt(DstTy, Src1)->getOperand(0).getReg(); 955 956 for (unsigned I = 2; I != NumOps; ++I) { 957 const unsigned Offset = (I - 1) * PartSize; 958 959 unsigned SrcReg = MI.getOperand(I).getReg(); 960 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize)); 961 962 auto ZextInput = MIRBuilder.buildZExt(DstTy, SrcReg); 963 964 unsigned NextResult = I + 1 == NumOps ? DstReg : 965 MRI.createGenericVirtualRegister(DstTy); 966 967 auto ShiftAmt = MIRBuilder.buildConstant(DstTy, Offset); 968 auto Shl = MIRBuilder.buildShl(DstTy, ZextInput, ShiftAmt); 969 MIRBuilder.buildOr(NextResult, ResultReg, Shl); 970 ResultReg = NextResult; 971 } 972 973 MI.eraseFromParent(); 974 return Legalized; 975 } 976 977 LegalizerHelper::LegalizeResult 978 LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx, 979 LLT WideTy) { 980 if (TypeIdx != 0) 981 return UnableToLegalize; 982 983 unsigned NumDst = MI.getNumOperands() - 1; 984 unsigned SrcReg = MI.getOperand(NumDst).getReg(); 985 LLT SrcTy = MRI.getType(SrcReg); 986 if (!SrcTy.isScalar()) 987 return UnableToLegalize; 988 989 unsigned Dst0Reg = MI.getOperand(0).getReg(); 990 LLT DstTy = MRI.getType(Dst0Reg); 991 if (!DstTy.isScalar()) 992 return UnableToLegalize; 993 994 unsigned NewSrcSize = NumDst * WideTy.getSizeInBits(); 995 LLT NewSrcTy = LLT::scalar(NewSrcSize); 996 unsigned SizeDiff = WideTy.getSizeInBits() - DstTy.getSizeInBits(); 997 998 auto WideSrc = MIRBuilder.buildZExt(NewSrcTy, SrcReg); 999 1000 for (unsigned I = 1; I != NumDst; ++I) { 1001 auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, SizeDiff * I); 1002 auto Shl = MIRBuilder.buildShl(NewSrcTy, WideSrc, ShiftAmt); 1003 WideSrc = MIRBuilder.buildOr(NewSrcTy, WideSrc, Shl); 1004 } 1005 1006 Observer.changingInstr(MI); 1007 1008 MI.getOperand(NumDst).setReg(WideSrc->getOperand(0).getReg()); 1009 for (unsigned I = 0; I != NumDst; ++I) 1010 widenScalarDst(MI, WideTy, I); 1011 1012 Observer.changedInstr(MI); 1013 1014 return Legalized; 1015 } 1016 1017 LegalizerHelper::LegalizeResult 1018 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { 1019 MIRBuilder.setInstr(MI); 1020 1021 switch (MI.getOpcode()) { 1022 default: 1023 return UnableToLegalize; 1024 case TargetOpcode::G_EXTRACT: { 1025 if (TypeIdx != 1) 1026 return UnableToLegalize; 1027 1028 unsigned SrcReg = MI.getOperand(1).getReg(); 1029 LLT SrcTy = MRI.getType(SrcReg); 1030 if (!SrcTy.isVector()) 1031 return UnableToLegalize; 1032 1033 unsigned DstReg = MI.getOperand(0).getReg(); 1034 LLT DstTy = MRI.getType(DstReg); 1035 if (DstTy != SrcTy.getElementType()) 1036 return UnableToLegalize; 1037 1038 unsigned Offset = MI.getOperand(2).getImm(); 1039 if (Offset % SrcTy.getScalarSizeInBits() != 0) 1040 return UnableToLegalize; 1041 1042 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1043 1044 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) * 1045 Offset); 1046 widenScalarDst(MI, WideTy.getScalarType(), 0); 1047 1048 return Legalized; 1049 } 1050 case TargetOpcode::G_MERGE_VALUES: 1051 return widenScalarMergeValues(MI, TypeIdx, WideTy); 1052 case TargetOpcode::G_UNMERGE_VALUES: 1053 return widenScalarUnmergeValues(MI, TypeIdx, WideTy); 1054 case TargetOpcode::G_UADDO: 1055 case TargetOpcode::G_USUBO: { 1056 if (TypeIdx == 1) 1057 return UnableToLegalize; // TODO 1058 auto LHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy}, 1059 {MI.getOperand(2).getReg()}); 1060 auto RHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy}, 1061 {MI.getOperand(3).getReg()}); 1062 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO 1063 ? TargetOpcode::G_ADD 1064 : TargetOpcode::G_SUB; 1065 // Do the arithmetic in the larger type. 1066 auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext}); 1067 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg()); 1068 APInt Mask = APInt::getAllOnesValue(OrigTy.getSizeInBits()); 1069 auto AndOp = MIRBuilder.buildInstr( 1070 TargetOpcode::G_AND, {WideTy}, 1071 {NewOp, MIRBuilder.buildConstant(WideTy, Mask.getZExtValue())}); 1072 // There is no overflow if the AndOp is the same as NewOp. 1073 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1).getReg(), NewOp, 1074 AndOp); 1075 // Now trunc the NewOp to the original result. 1076 MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), NewOp); 1077 MI.eraseFromParent(); 1078 return Legalized; 1079 } 1080 case TargetOpcode::G_CTTZ: 1081 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 1082 case TargetOpcode::G_CTLZ: 1083 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 1084 case TargetOpcode::G_CTPOP: { 1085 if (TypeIdx == 0) { 1086 widenScalarDst(MI, WideTy, 0); 1087 return Legalized; 1088 } 1089 1090 // First ZEXT the input. 1091 auto MIBSrc = MIRBuilder.buildZExt(WideTy, MI.getOperand(1).getReg()); 1092 LLT CurTy = MRI.getType(MI.getOperand(0).getReg()); 1093 if (MI.getOpcode() == TargetOpcode::G_CTTZ) { 1094 // The count is the same in the larger type except if the original 1095 // value was zero. This can be handled by setting the bit just off 1096 // the top of the original type. 1097 auto TopBit = 1098 APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits()); 1099 MIBSrc = MIRBuilder.buildInstr( 1100 TargetOpcode::G_OR, {WideTy}, 1101 {MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit.getSExtValue())}); 1102 } 1103 // Perform the operation at the larger size. 1104 auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc}); 1105 // This is already the correct result for CTPOP and CTTZs 1106 if (MI.getOpcode() == TargetOpcode::G_CTLZ || 1107 MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) { 1108 // The correct result is NewOp - (Difference in widety and current ty). 1109 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits(); 1110 MIBNewOp = MIRBuilder.buildInstr( 1111 TargetOpcode::G_SUB, {WideTy}, 1112 {MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)}); 1113 } 1114 auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); 1115 // Make the original instruction a trunc now, and update its source. 1116 Observer.changingInstr(MI); 1117 MI.setDesc(TII.get(TargetOpcode::G_TRUNC)); 1118 MI.getOperand(1).setReg(MIBNewOp->getOperand(0).getReg()); 1119 Observer.changedInstr(MI); 1120 return Legalized; 1121 } 1122 case TargetOpcode::G_BSWAP: { 1123 Observer.changingInstr(MI); 1124 unsigned DstReg = MI.getOperand(0).getReg(); 1125 1126 unsigned ShrReg = MRI.createGenericVirtualRegister(WideTy); 1127 unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); 1128 unsigned ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy); 1129 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1130 1131 MI.getOperand(0).setReg(DstExt); 1132 1133 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 1134 1135 LLT Ty = MRI.getType(DstReg); 1136 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits(); 1137 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits); 1138 MIRBuilder.buildInstr(TargetOpcode::G_LSHR) 1139 .addDef(ShrReg) 1140 .addUse(DstExt) 1141 .addUse(ShiftAmtReg); 1142 1143 MIRBuilder.buildTrunc(DstReg, ShrReg); 1144 Observer.changedInstr(MI); 1145 return Legalized; 1146 } 1147 case TargetOpcode::G_ADD: 1148 case TargetOpcode::G_AND: 1149 case TargetOpcode::G_MUL: 1150 case TargetOpcode::G_OR: 1151 case TargetOpcode::G_XOR: 1152 case TargetOpcode::G_SUB: 1153 // Perform operation at larger width (any extension is fine here, high bits 1154 // don't affect the result) and then truncate the result back to the 1155 // original type. 1156 Observer.changingInstr(MI); 1157 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1158 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); 1159 widenScalarDst(MI, WideTy); 1160 Observer.changedInstr(MI); 1161 return Legalized; 1162 1163 case TargetOpcode::G_SHL: 1164 Observer.changingInstr(MI); 1165 1166 if (TypeIdx == 0) { 1167 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1168 widenScalarDst(MI, WideTy); 1169 } else { 1170 assert(TypeIdx == 1); 1171 // The "number of bits to shift" operand must preserve its value as an 1172 // unsigned integer: 1173 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 1174 } 1175 1176 Observer.changedInstr(MI); 1177 return Legalized; 1178 1179 case TargetOpcode::G_SDIV: 1180 case TargetOpcode::G_SREM: 1181 Observer.changingInstr(MI); 1182 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); 1183 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 1184 widenScalarDst(MI, WideTy); 1185 Observer.changedInstr(MI); 1186 return Legalized; 1187 1188 case TargetOpcode::G_ASHR: 1189 case TargetOpcode::G_LSHR: 1190 Observer.changingInstr(MI); 1191 1192 if (TypeIdx == 0) { 1193 unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ? 1194 TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT; 1195 1196 widenScalarSrc(MI, WideTy, 1, CvtOp); 1197 widenScalarDst(MI, WideTy); 1198 } else { 1199 assert(TypeIdx == 1); 1200 // The "number of bits to shift" operand must preserve its value as an 1201 // unsigned integer: 1202 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 1203 } 1204 1205 Observer.changedInstr(MI); 1206 return Legalized; 1207 case TargetOpcode::G_UDIV: 1208 case TargetOpcode::G_UREM: 1209 Observer.changingInstr(MI); 1210 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 1211 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 1212 widenScalarDst(MI, WideTy); 1213 Observer.changedInstr(MI); 1214 return Legalized; 1215 1216 case TargetOpcode::G_SELECT: 1217 Observer.changingInstr(MI); 1218 if (TypeIdx == 0) { 1219 // Perform operation at larger width (any extension is fine here, high 1220 // bits don't affect the result) and then truncate the result back to the 1221 // original type. 1222 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); 1223 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT); 1224 widenScalarDst(MI, WideTy); 1225 } else { 1226 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector(); 1227 // Explicit extension is required here since high bits affect the result. 1228 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false)); 1229 } 1230 Observer.changedInstr(MI); 1231 return Legalized; 1232 1233 case TargetOpcode::G_FPTOSI: 1234 case TargetOpcode::G_FPTOUI: 1235 if (TypeIdx != 0) 1236 return UnableToLegalize; 1237 Observer.changingInstr(MI); 1238 widenScalarDst(MI, WideTy); 1239 Observer.changedInstr(MI); 1240 return Legalized; 1241 1242 case TargetOpcode::G_SITOFP: 1243 if (TypeIdx != 1) 1244 return UnableToLegalize; 1245 Observer.changingInstr(MI); 1246 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); 1247 Observer.changedInstr(MI); 1248 return Legalized; 1249 1250 case TargetOpcode::G_UITOFP: 1251 if (TypeIdx != 1) 1252 return UnableToLegalize; 1253 Observer.changingInstr(MI); 1254 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 1255 Observer.changedInstr(MI); 1256 return Legalized; 1257 1258 case TargetOpcode::G_INSERT: 1259 if (TypeIdx != 0) 1260 return UnableToLegalize; 1261 Observer.changingInstr(MI); 1262 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1263 widenScalarDst(MI, WideTy); 1264 Observer.changedInstr(MI); 1265 return Legalized; 1266 1267 case TargetOpcode::G_LOAD: 1268 // For some types like i24, we might try to widen to i32. To properly handle 1269 // this we should be using a dedicated extending load, until then avoid 1270 // trying to legalize. 1271 if (alignTo(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(), 8) != 1272 WideTy.getSizeInBits()) 1273 return UnableToLegalize; 1274 LLVM_FALLTHROUGH; 1275 case TargetOpcode::G_SEXTLOAD: 1276 case TargetOpcode::G_ZEXTLOAD: 1277 Observer.changingInstr(MI); 1278 widenScalarDst(MI, WideTy); 1279 Observer.changedInstr(MI); 1280 return Legalized; 1281 1282 case TargetOpcode::G_STORE: { 1283 if (TypeIdx != 0) 1284 return UnableToLegalize; 1285 1286 LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 1287 if (!isPowerOf2_32(Ty.getSizeInBits())) 1288 return UnableToLegalize; 1289 1290 Observer.changingInstr(MI); 1291 1292 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ? 1293 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT; 1294 widenScalarSrc(MI, WideTy, 0, ExtType); 1295 1296 Observer.changedInstr(MI); 1297 return Legalized; 1298 } 1299 case TargetOpcode::G_CONSTANT: { 1300 MachineOperand &SrcMO = MI.getOperand(1); 1301 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 1302 const APInt &Val = SrcMO.getCImm()->getValue().sext(WideTy.getSizeInBits()); 1303 Observer.changingInstr(MI); 1304 SrcMO.setCImm(ConstantInt::get(Ctx, Val)); 1305 1306 widenScalarDst(MI, WideTy); 1307 Observer.changedInstr(MI); 1308 return Legalized; 1309 } 1310 case TargetOpcode::G_FCONSTANT: { 1311 MachineOperand &SrcMO = MI.getOperand(1); 1312 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 1313 APFloat Val = SrcMO.getFPImm()->getValueAPF(); 1314 bool LosesInfo; 1315 switch (WideTy.getSizeInBits()) { 1316 case 32: 1317 Val.convert(APFloat::IEEEsingle(), APFloat::rmTowardZero, &LosesInfo); 1318 break; 1319 case 64: 1320 Val.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &LosesInfo); 1321 break; 1322 default: 1323 llvm_unreachable("Unhandled fp widen type"); 1324 } 1325 Observer.changingInstr(MI); 1326 SrcMO.setFPImm(ConstantFP::get(Ctx, Val)); 1327 1328 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); 1329 Observer.changedInstr(MI); 1330 return Legalized; 1331 } 1332 case TargetOpcode::G_IMPLICIT_DEF: { 1333 Observer.changingInstr(MI); 1334 widenScalarDst(MI, WideTy); 1335 Observer.changedInstr(MI); 1336 return Legalized; 1337 } 1338 case TargetOpcode::G_BRCOND: 1339 Observer.changingInstr(MI); 1340 widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT); 1341 Observer.changedInstr(MI); 1342 return Legalized; 1343 1344 case TargetOpcode::G_FCMP: 1345 Observer.changingInstr(MI); 1346 if (TypeIdx == 0) 1347 widenScalarDst(MI, WideTy); 1348 else { 1349 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT); 1350 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT); 1351 } 1352 Observer.changedInstr(MI); 1353 return Legalized; 1354 1355 case TargetOpcode::G_ICMP: 1356 Observer.changingInstr(MI); 1357 if (TypeIdx == 0) 1358 widenScalarDst(MI, WideTy); 1359 else { 1360 unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>( 1361 MI.getOperand(1).getPredicate())) 1362 ? TargetOpcode::G_SEXT 1363 : TargetOpcode::G_ZEXT; 1364 widenScalarSrc(MI, WideTy, 2, ExtOpcode); 1365 widenScalarSrc(MI, WideTy, 3, ExtOpcode); 1366 } 1367 Observer.changedInstr(MI); 1368 return Legalized; 1369 1370 case TargetOpcode::G_GEP: 1371 assert(TypeIdx == 1 && "unable to legalize pointer of GEP"); 1372 Observer.changingInstr(MI); 1373 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 1374 Observer.changedInstr(MI); 1375 return Legalized; 1376 1377 case TargetOpcode::G_PHI: { 1378 assert(TypeIdx == 0 && "Expecting only Idx 0"); 1379 1380 Observer.changingInstr(MI); 1381 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) { 1382 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB(); 1383 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); 1384 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT); 1385 } 1386 1387 MachineBasicBlock &MBB = *MI.getParent(); 1388 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI()); 1389 widenScalarDst(MI, WideTy); 1390 Observer.changedInstr(MI); 1391 return Legalized; 1392 } 1393 case TargetOpcode::G_EXTRACT_VECTOR_ELT: { 1394 if (TypeIdx == 0) { 1395 unsigned VecReg = MI.getOperand(1).getReg(); 1396 LLT VecTy = MRI.getType(VecReg); 1397 Observer.changingInstr(MI); 1398 1399 widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(), 1400 WideTy.getSizeInBits()), 1401 1, TargetOpcode::G_SEXT); 1402 1403 widenScalarDst(MI, WideTy, 0); 1404 Observer.changedInstr(MI); 1405 return Legalized; 1406 } 1407 1408 if (TypeIdx != 2) 1409 return UnableToLegalize; 1410 Observer.changingInstr(MI); 1411 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 1412 Observer.changedInstr(MI); 1413 return Legalized; 1414 } 1415 case TargetOpcode::G_FADD: 1416 case TargetOpcode::G_FMUL: 1417 case TargetOpcode::G_FSUB: 1418 case TargetOpcode::G_FMA: 1419 case TargetOpcode::G_FNEG: 1420 case TargetOpcode::G_FABS: 1421 case TargetOpcode::G_FDIV: 1422 case TargetOpcode::G_FREM: 1423 case TargetOpcode::G_FCEIL: 1424 case TargetOpcode::G_FCOS: 1425 case TargetOpcode::G_FSIN: 1426 case TargetOpcode::G_FLOG10: 1427 case TargetOpcode::G_FLOG: 1428 case TargetOpcode::G_FLOG2: 1429 case TargetOpcode::G_FSQRT: 1430 case TargetOpcode::G_FEXP: 1431 assert(TypeIdx == 0); 1432 Observer.changingInstr(MI); 1433 1434 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) 1435 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT); 1436 1437 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); 1438 Observer.changedInstr(MI); 1439 return Legalized; 1440 case TargetOpcode::G_INTTOPTR: 1441 if (TypeIdx != 1) 1442 return UnableToLegalize; 1443 1444 Observer.changingInstr(MI); 1445 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 1446 Observer.changedInstr(MI); 1447 return Legalized; 1448 case TargetOpcode::G_PTRTOINT: 1449 if (TypeIdx != 0) 1450 return UnableToLegalize; 1451 1452 Observer.changingInstr(MI); 1453 widenScalarDst(MI, WideTy, 0); 1454 Observer.changedInstr(MI); 1455 return Legalized; 1456 } 1457 } 1458 1459 LegalizerHelper::LegalizeResult 1460 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { 1461 using namespace TargetOpcode; 1462 MIRBuilder.setInstr(MI); 1463 1464 switch(MI.getOpcode()) { 1465 default: 1466 return UnableToLegalize; 1467 case TargetOpcode::G_SREM: 1468 case TargetOpcode::G_UREM: { 1469 unsigned QuotReg = MRI.createGenericVirtualRegister(Ty); 1470 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV) 1471 .addDef(QuotReg) 1472 .addUse(MI.getOperand(1).getReg()) 1473 .addUse(MI.getOperand(2).getReg()); 1474 1475 unsigned ProdReg = MRI.createGenericVirtualRegister(Ty); 1476 MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg()); 1477 MIRBuilder.buildSub(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(), 1478 ProdReg); 1479 MI.eraseFromParent(); 1480 return Legalized; 1481 } 1482 case TargetOpcode::G_SMULO: 1483 case TargetOpcode::G_UMULO: { 1484 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the 1485 // result. 1486 unsigned Res = MI.getOperand(0).getReg(); 1487 unsigned Overflow = MI.getOperand(1).getReg(); 1488 unsigned LHS = MI.getOperand(2).getReg(); 1489 unsigned RHS = MI.getOperand(3).getReg(); 1490 1491 MIRBuilder.buildMul(Res, LHS, RHS); 1492 1493 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO 1494 ? TargetOpcode::G_SMULH 1495 : TargetOpcode::G_UMULH; 1496 1497 unsigned HiPart = MRI.createGenericVirtualRegister(Ty); 1498 MIRBuilder.buildInstr(Opcode) 1499 .addDef(HiPart) 1500 .addUse(LHS) 1501 .addUse(RHS); 1502 1503 unsigned Zero = MRI.createGenericVirtualRegister(Ty); 1504 MIRBuilder.buildConstant(Zero, 0); 1505 1506 // For *signed* multiply, overflow is detected by checking: 1507 // (hi != (lo >> bitwidth-1)) 1508 if (Opcode == TargetOpcode::G_SMULH) { 1509 unsigned Shifted = MRI.createGenericVirtualRegister(Ty); 1510 unsigned ShiftAmt = MRI.createGenericVirtualRegister(Ty); 1511 MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1); 1512 MIRBuilder.buildInstr(TargetOpcode::G_ASHR) 1513 .addDef(Shifted) 1514 .addUse(Res) 1515 .addUse(ShiftAmt); 1516 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted); 1517 } else { 1518 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero); 1519 } 1520 MI.eraseFromParent(); 1521 return Legalized; 1522 } 1523 case TargetOpcode::G_FNEG: { 1524 // TODO: Handle vector types once we are able to 1525 // represent them. 1526 if (Ty.isVector()) 1527 return UnableToLegalize; 1528 unsigned Res = MI.getOperand(0).getReg(); 1529 Type *ZeroTy; 1530 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 1531 switch (Ty.getSizeInBits()) { 1532 case 16: 1533 ZeroTy = Type::getHalfTy(Ctx); 1534 break; 1535 case 32: 1536 ZeroTy = Type::getFloatTy(Ctx); 1537 break; 1538 case 64: 1539 ZeroTy = Type::getDoubleTy(Ctx); 1540 break; 1541 case 128: 1542 ZeroTy = Type::getFP128Ty(Ctx); 1543 break; 1544 default: 1545 llvm_unreachable("unexpected floating-point type"); 1546 } 1547 ConstantFP &ZeroForNegation = 1548 *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy)); 1549 auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation); 1550 MIRBuilder.buildInstr(TargetOpcode::G_FSUB) 1551 .addDef(Res) 1552 .addUse(Zero->getOperand(0).getReg()) 1553 .addUse(MI.getOperand(1).getReg()); 1554 MI.eraseFromParent(); 1555 return Legalized; 1556 } 1557 case TargetOpcode::G_FSUB: { 1558 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)). 1559 // First, check if G_FNEG is marked as Lower. If so, we may 1560 // end up with an infinite loop as G_FSUB is used to legalize G_FNEG. 1561 if (LI.getAction({G_FNEG, {Ty}}).Action == Lower) 1562 return UnableToLegalize; 1563 unsigned Res = MI.getOperand(0).getReg(); 1564 unsigned LHS = MI.getOperand(1).getReg(); 1565 unsigned RHS = MI.getOperand(2).getReg(); 1566 unsigned Neg = MRI.createGenericVirtualRegister(Ty); 1567 MIRBuilder.buildInstr(TargetOpcode::G_FNEG).addDef(Neg).addUse(RHS); 1568 MIRBuilder.buildInstr(TargetOpcode::G_FADD) 1569 .addDef(Res) 1570 .addUse(LHS) 1571 .addUse(Neg); 1572 MI.eraseFromParent(); 1573 return Legalized; 1574 } 1575 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: { 1576 unsigned OldValRes = MI.getOperand(0).getReg(); 1577 unsigned SuccessRes = MI.getOperand(1).getReg(); 1578 unsigned Addr = MI.getOperand(2).getReg(); 1579 unsigned CmpVal = MI.getOperand(3).getReg(); 1580 unsigned NewVal = MI.getOperand(4).getReg(); 1581 MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal, 1582 **MI.memoperands_begin()); 1583 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal); 1584 MI.eraseFromParent(); 1585 return Legalized; 1586 } 1587 case TargetOpcode::G_LOAD: 1588 case TargetOpcode::G_SEXTLOAD: 1589 case TargetOpcode::G_ZEXTLOAD: { 1590 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT 1591 unsigned DstReg = MI.getOperand(0).getReg(); 1592 unsigned PtrReg = MI.getOperand(1).getReg(); 1593 LLT DstTy = MRI.getType(DstReg); 1594 auto &MMO = **MI.memoperands_begin(); 1595 1596 if (DstTy.getSizeInBits() == MMO.getSize() /* in bytes */ * 8) { 1597 // In the case of G_LOAD, this was a non-extending load already and we're 1598 // about to lower to the same instruction. 1599 if (MI.getOpcode() == TargetOpcode::G_LOAD) 1600 return UnableToLegalize; 1601 MIRBuilder.buildLoad(DstReg, PtrReg, MMO); 1602 MI.eraseFromParent(); 1603 return Legalized; 1604 } 1605 1606 if (DstTy.isScalar()) { 1607 unsigned TmpReg = MRI.createGenericVirtualRegister( 1608 LLT::scalar(MMO.getSize() /* in bytes */ * 8)); 1609 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); 1610 switch (MI.getOpcode()) { 1611 default: 1612 llvm_unreachable("Unexpected opcode"); 1613 case TargetOpcode::G_LOAD: 1614 MIRBuilder.buildAnyExt(DstReg, TmpReg); 1615 break; 1616 case TargetOpcode::G_SEXTLOAD: 1617 MIRBuilder.buildSExt(DstReg, TmpReg); 1618 break; 1619 case TargetOpcode::G_ZEXTLOAD: 1620 MIRBuilder.buildZExt(DstReg, TmpReg); 1621 break; 1622 } 1623 MI.eraseFromParent(); 1624 return Legalized; 1625 } 1626 1627 return UnableToLegalize; 1628 } 1629 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 1630 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 1631 case TargetOpcode::G_CTLZ: 1632 case TargetOpcode::G_CTTZ: 1633 case TargetOpcode::G_CTPOP: 1634 return lowerBitCount(MI, TypeIdx, Ty); 1635 case G_UADDE: { 1636 unsigned Res = MI.getOperand(0).getReg(); 1637 unsigned CarryOut = MI.getOperand(1).getReg(); 1638 unsigned LHS = MI.getOperand(2).getReg(); 1639 unsigned RHS = MI.getOperand(3).getReg(); 1640 unsigned CarryIn = MI.getOperand(4).getReg(); 1641 1642 unsigned TmpRes = MRI.createGenericVirtualRegister(Ty); 1643 unsigned ZExtCarryIn = MRI.createGenericVirtualRegister(Ty); 1644 1645 MIRBuilder.buildAdd(TmpRes, LHS, RHS); 1646 MIRBuilder.buildZExt(ZExtCarryIn, CarryIn); 1647 MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn); 1648 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS); 1649 1650 MI.eraseFromParent(); 1651 return Legalized; 1652 } 1653 case G_USUBO: { 1654 unsigned Res = MI.getOperand(0).getReg(); 1655 unsigned BorrowOut = MI.getOperand(1).getReg(); 1656 unsigned LHS = MI.getOperand(2).getReg(); 1657 unsigned RHS = MI.getOperand(3).getReg(); 1658 1659 MIRBuilder.buildSub(Res, LHS, RHS); 1660 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS); 1661 1662 MI.eraseFromParent(); 1663 return Legalized; 1664 } 1665 case G_USUBE: { 1666 unsigned Res = MI.getOperand(0).getReg(); 1667 unsigned BorrowOut = MI.getOperand(1).getReg(); 1668 unsigned LHS = MI.getOperand(2).getReg(); 1669 unsigned RHS = MI.getOperand(3).getReg(); 1670 unsigned BorrowIn = MI.getOperand(4).getReg(); 1671 1672 unsigned TmpRes = MRI.createGenericVirtualRegister(Ty); 1673 unsigned ZExtBorrowIn = MRI.createGenericVirtualRegister(Ty); 1674 unsigned LHS_EQ_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1)); 1675 unsigned LHS_ULT_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1)); 1676 1677 MIRBuilder.buildSub(TmpRes, LHS, RHS); 1678 MIRBuilder.buildZExt(ZExtBorrowIn, BorrowIn); 1679 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn); 1680 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LHS_EQ_RHS, LHS, RHS); 1681 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, LHS_ULT_RHS, LHS, RHS); 1682 MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS); 1683 1684 MI.eraseFromParent(); 1685 return Legalized; 1686 } 1687 } 1688 } 1689 1690 LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef( 1691 MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { 1692 SmallVector<unsigned, 2> DstRegs; 1693 1694 unsigned NarrowSize = NarrowTy.getSizeInBits(); 1695 unsigned DstReg = MI.getOperand(0).getReg(); 1696 unsigned Size = MRI.getType(DstReg).getSizeInBits(); 1697 int NumParts = Size / NarrowSize; 1698 // FIXME: Don't know how to handle the situation where the small vectors 1699 // aren't all the same size yet. 1700 if (Size % NarrowSize != 0) 1701 return UnableToLegalize; 1702 1703 for (int i = 0; i < NumParts; ++i) { 1704 unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy); 1705 MIRBuilder.buildUndef(TmpReg); 1706 DstRegs.push_back(TmpReg); 1707 } 1708 1709 if (NarrowTy.isVector()) 1710 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 1711 else 1712 MIRBuilder.buildBuildVector(DstReg, DstRegs); 1713 1714 MI.eraseFromParent(); 1715 return Legalized; 1716 } 1717 1718 LegalizerHelper::LegalizeResult 1719 LegalizerHelper::fewerElementsVectorBasic(MachineInstr &MI, unsigned TypeIdx, 1720 LLT NarrowTy) { 1721 const unsigned Opc = MI.getOpcode(); 1722 const unsigned NumOps = MI.getNumOperands() - 1; 1723 const unsigned NarrowSize = NarrowTy.getSizeInBits(); 1724 const unsigned DstReg = MI.getOperand(0).getReg(); 1725 const unsigned Flags = MI.getFlags(); 1726 const LLT DstTy = MRI.getType(DstReg); 1727 const unsigned Size = DstTy.getSizeInBits(); 1728 const int NumParts = Size / NarrowSize; 1729 const LLT EltTy = DstTy.getElementType(); 1730 const unsigned EltSize = EltTy.getSizeInBits(); 1731 const unsigned BitsForNumParts = NarrowSize * NumParts; 1732 1733 // Check if we have any leftovers. If we do, then only handle the case where 1734 // the leftover is one element. 1735 if (BitsForNumParts != Size && BitsForNumParts + EltSize != Size) 1736 return UnableToLegalize; 1737 1738 if (BitsForNumParts != Size) { 1739 unsigned AccumDstReg = MRI.createGenericVirtualRegister(DstTy); 1740 MIRBuilder.buildUndef(AccumDstReg); 1741 1742 // Handle the pieces which evenly divide into the requested type with 1743 // extract/op/insert sequence. 1744 for (unsigned Offset = 0; Offset < BitsForNumParts; Offset += NarrowSize) { 1745 SmallVector<SrcOp, 4> SrcOps; 1746 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { 1747 unsigned PartOpReg = MRI.createGenericVirtualRegister(NarrowTy); 1748 MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(), Offset); 1749 SrcOps.push_back(PartOpReg); 1750 } 1751 1752 unsigned PartDstReg = MRI.createGenericVirtualRegister(NarrowTy); 1753 MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags); 1754 1755 unsigned PartInsertReg = MRI.createGenericVirtualRegister(DstTy); 1756 MIRBuilder.buildInsert(PartInsertReg, AccumDstReg, PartDstReg, Offset); 1757 AccumDstReg = PartInsertReg; 1758 Offset += NarrowSize; 1759 } 1760 1761 // Handle the remaining element sized leftover piece. 1762 SmallVector<SrcOp, 4> SrcOps; 1763 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { 1764 unsigned PartOpReg = MRI.createGenericVirtualRegister(EltTy); 1765 MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(), 1766 BitsForNumParts); 1767 SrcOps.push_back(PartOpReg); 1768 } 1769 1770 unsigned PartDstReg = MRI.createGenericVirtualRegister(EltTy); 1771 MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags); 1772 MIRBuilder.buildInsert(DstReg, AccumDstReg, PartDstReg, BitsForNumParts); 1773 MI.eraseFromParent(); 1774 1775 return Legalized; 1776 } 1777 1778 SmallVector<unsigned, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs; 1779 1780 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src0Regs); 1781 1782 if (NumOps >= 2) 1783 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src1Regs); 1784 1785 if (NumOps >= 3) 1786 extractParts(MI.getOperand(3).getReg(), NarrowTy, NumParts, Src2Regs); 1787 1788 for (int i = 0; i < NumParts; ++i) { 1789 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); 1790 1791 if (NumOps == 1) 1792 MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i]}, Flags); 1793 else if (NumOps == 2) { 1794 MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i], Src1Regs[i]}, Flags); 1795 } else if (NumOps == 3) { 1796 MIRBuilder.buildInstr(Opc, {DstReg}, 1797 {Src0Regs[i], Src1Regs[i], Src2Regs[i]}, Flags); 1798 } 1799 1800 DstRegs.push_back(DstReg); 1801 } 1802 1803 if (NarrowTy.isVector()) 1804 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 1805 else 1806 MIRBuilder.buildBuildVector(DstReg, DstRegs); 1807 1808 MI.eraseFromParent(); 1809 return Legalized; 1810 } 1811 1812 LegalizerHelper::LegalizeResult 1813 LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx, 1814 LLT NarrowTy) { 1815 if (TypeIdx != 0) 1816 return UnableToLegalize; 1817 1818 unsigned DstReg = MI.getOperand(0).getReg(); 1819 unsigned SrcReg = MI.getOperand(1).getReg(); 1820 LLT DstTy = MRI.getType(DstReg); 1821 LLT SrcTy = MRI.getType(SrcReg); 1822 1823 LLT NarrowTy0 = NarrowTy; 1824 LLT NarrowTy1; 1825 unsigned NumParts; 1826 1827 if (NarrowTy.isVector()) { 1828 // Uneven breakdown not handled. 1829 NumParts = DstTy.getNumElements() / NarrowTy.getNumElements(); 1830 if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements()) 1831 return UnableToLegalize; 1832 1833 NarrowTy1 = LLT::vector(NumParts, SrcTy.getElementType().getSizeInBits()); 1834 } else { 1835 NumParts = DstTy.getNumElements(); 1836 NarrowTy1 = SrcTy.getElementType(); 1837 } 1838 1839 SmallVector<unsigned, 4> SrcRegs, DstRegs; 1840 extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs); 1841 1842 for (unsigned I = 0; I < NumParts; ++I) { 1843 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy0); 1844 MachineInstr *NewInst = MIRBuilder.buildInstr(MI.getOpcode()) 1845 .addDef(DstReg) 1846 .addUse(SrcRegs[I]); 1847 1848 NewInst->setFlags(MI.getFlags()); 1849 DstRegs.push_back(DstReg); 1850 } 1851 1852 if (NarrowTy.isVector()) 1853 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 1854 else 1855 MIRBuilder.buildBuildVector(DstReg, DstRegs); 1856 1857 MI.eraseFromParent(); 1858 return Legalized; 1859 } 1860 1861 LegalizerHelper::LegalizeResult 1862 LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx, 1863 LLT NarrowTy) { 1864 unsigned DstReg = MI.getOperand(0).getReg(); 1865 unsigned Src0Reg = MI.getOperand(2).getReg(); 1866 LLT DstTy = MRI.getType(DstReg); 1867 LLT SrcTy = MRI.getType(Src0Reg); 1868 1869 unsigned NumParts; 1870 LLT NarrowTy0, NarrowTy1; 1871 1872 if (TypeIdx == 0) { 1873 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; 1874 unsigned OldElts = DstTy.getNumElements(); 1875 1876 NarrowTy0 = NarrowTy; 1877 NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements(); 1878 NarrowTy1 = NarrowTy.isVector() ? 1879 LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) : 1880 SrcTy.getElementType(); 1881 1882 } else { 1883 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; 1884 unsigned OldElts = SrcTy.getNumElements(); 1885 1886 NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : 1887 NarrowTy.getNumElements(); 1888 NarrowTy0 = LLT::vector(NarrowTy.getNumElements(), 1889 DstTy.getScalarSizeInBits()); 1890 NarrowTy1 = NarrowTy; 1891 } 1892 1893 // FIXME: Don't know how to handle the situation where the small vectors 1894 // aren't all the same size yet. 1895 if (NarrowTy1.isVector() && 1896 NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements()) 1897 return UnableToLegalize; 1898 1899 CmpInst::Predicate Pred 1900 = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); 1901 1902 SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs; 1903 extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs); 1904 extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs); 1905 1906 for (unsigned I = 0; I < NumParts; ++I) { 1907 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy0); 1908 DstRegs.push_back(DstReg); 1909 1910 if (MI.getOpcode() == TargetOpcode::G_ICMP) 1911 MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]); 1912 else { 1913 MachineInstr *NewCmp 1914 = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]); 1915 NewCmp->setFlags(MI.getFlags()); 1916 } 1917 } 1918 1919 if (NarrowTy1.isVector()) 1920 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 1921 else 1922 MIRBuilder.buildBuildVector(DstReg, DstRegs); 1923 1924 MI.eraseFromParent(); 1925 return Legalized; 1926 } 1927 1928 LegalizerHelper::LegalizeResult 1929 LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx, 1930 LLT NarrowTy) { 1931 unsigned DstReg = MI.getOperand(0).getReg(); 1932 unsigned CondReg = MI.getOperand(1).getReg(); 1933 1934 unsigned NumParts = 0; 1935 LLT NarrowTy0, NarrowTy1; 1936 1937 LLT DstTy = MRI.getType(DstReg); 1938 LLT CondTy = MRI.getType(CondReg); 1939 unsigned Size = DstTy.getSizeInBits(); 1940 1941 assert(TypeIdx == 0 || CondTy.isVector()); 1942 1943 if (TypeIdx == 0) { 1944 NarrowTy0 = NarrowTy; 1945 NarrowTy1 = CondTy; 1946 1947 unsigned NarrowSize = NarrowTy0.getSizeInBits(); 1948 // FIXME: Don't know how to handle the situation where the small vectors 1949 // aren't all the same size yet. 1950 if (Size % NarrowSize != 0) 1951 return UnableToLegalize; 1952 1953 NumParts = Size / NarrowSize; 1954 1955 // Need to break down the condition type 1956 if (CondTy.isVector()) { 1957 if (CondTy.getNumElements() == NumParts) 1958 NarrowTy1 = CondTy.getElementType(); 1959 else 1960 NarrowTy1 = LLT::vector(CondTy.getNumElements() / NumParts, 1961 CondTy.getScalarSizeInBits()); 1962 } 1963 } else { 1964 NumParts = CondTy.getNumElements(); 1965 if (NarrowTy.isVector()) { 1966 // TODO: Handle uneven breakdown. 1967 if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements()) 1968 return UnableToLegalize; 1969 1970 return UnableToLegalize; 1971 } else { 1972 NarrowTy0 = DstTy.getElementType(); 1973 NarrowTy1 = NarrowTy; 1974 } 1975 } 1976 1977 SmallVector<unsigned, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs; 1978 if (CondTy.isVector()) 1979 extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs); 1980 1981 extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs); 1982 extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs); 1983 1984 for (unsigned i = 0; i < NumParts; ++i) { 1985 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy0); 1986 MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg, 1987 Src1Regs[i], Src2Regs[i]); 1988 DstRegs.push_back(DstReg); 1989 } 1990 1991 if (NarrowTy0.isVector()) 1992 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 1993 else 1994 MIRBuilder.buildBuildVector(DstReg, DstRegs); 1995 1996 MI.eraseFromParent(); 1997 return Legalized; 1998 } 1999 2000 /// Try to break down \p OrigTy into \p NarrowTy sized pieces. 2001 /// 2002 /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy, 2003 /// with any leftover piece as type \p LeftoverTy 2004 /// 2005 /// Returns -1 if the breakdown is not satisfiable. 2006 static int getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) { 2007 assert(!LeftoverTy.isValid() && "this is an out argument"); 2008 2009 unsigned Size = OrigTy.getSizeInBits(); 2010 unsigned NarrowSize = NarrowTy.getSizeInBits(); 2011 unsigned NumParts = Size / NarrowSize; 2012 unsigned LeftoverSize = Size - NumParts * NarrowSize; 2013 assert(Size > NarrowSize); 2014 2015 if (LeftoverSize == 0) 2016 return NumParts; 2017 2018 if (NarrowTy.isVector()) { 2019 unsigned EltSize = OrigTy.getScalarSizeInBits(); 2020 if (LeftoverSize % EltSize != 0) 2021 return -1; 2022 LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize); 2023 } else { 2024 LeftoverTy = LLT::scalar(LeftoverSize); 2025 } 2026 2027 return NumParts; 2028 } 2029 2030 LegalizerHelper::LegalizeResult 2031 LegalizerHelper::fewerElementsVectorLoadStore(MachineInstr &MI, unsigned TypeIdx, 2032 LLT NarrowTy) { 2033 // FIXME: Don't know how to handle secondary types yet. 2034 if (TypeIdx != 0) 2035 return UnableToLegalize; 2036 2037 MachineMemOperand *MMO = *MI.memoperands_begin(); 2038 2039 // This implementation doesn't work for atomics. Give up instead of doing 2040 // something invalid. 2041 if (MMO->getOrdering() != AtomicOrdering::NotAtomic || 2042 MMO->getFailureOrdering() != AtomicOrdering::NotAtomic) 2043 return UnableToLegalize; 2044 2045 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD; 2046 unsigned ValReg = MI.getOperand(0).getReg(); 2047 unsigned AddrReg = MI.getOperand(1).getReg(); 2048 LLT ValTy = MRI.getType(ValReg); 2049 2050 int NumParts = -1; 2051 LLT LeftoverTy; 2052 SmallVector<unsigned, 8> NarrowRegs, NarrowLeftoverRegs; 2053 if (IsLoad) { 2054 NumParts = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy); 2055 } else { 2056 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs, 2057 NarrowLeftoverRegs)) 2058 NumParts = NarrowRegs.size(); 2059 } 2060 2061 if (NumParts == -1) 2062 return UnableToLegalize; 2063 2064 const LLT OffsetTy = LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits()); 2065 2066 unsigned TotalSize = ValTy.getSizeInBits(); 2067 2068 // Split the load/store into PartTy sized pieces starting at Offset. If this 2069 // is a load, return the new registers in ValRegs. For a store, each elements 2070 // of ValRegs should be PartTy. Returns the next offset that needs to be 2071 // handled. 2072 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<unsigned> &ValRegs, 2073 unsigned Offset) -> unsigned { 2074 MachineFunction &MF = MIRBuilder.getMF(); 2075 unsigned PartSize = PartTy.getSizeInBits(); 2076 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize; 2077 Offset += PartSize, ++Idx) { 2078 unsigned ByteSize = PartSize / 8; 2079 unsigned ByteOffset = Offset / 8; 2080 unsigned NewAddrReg = 0; 2081 2082 MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, ByteOffset); 2083 2084 MachineMemOperand *NewMMO = 2085 MF.getMachineMemOperand(MMO, ByteOffset, ByteSize); 2086 2087 if (IsLoad) { 2088 unsigned Dst = MRI.createGenericVirtualRegister(PartTy); 2089 ValRegs.push_back(Dst); 2090 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO); 2091 } else { 2092 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO); 2093 } 2094 } 2095 2096 return Offset; 2097 }; 2098 2099 unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0); 2100 2101 // Handle the rest of the register if this isn't an even type breakdown. 2102 if (LeftoverTy.isValid()) 2103 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset); 2104 2105 if (IsLoad) { 2106 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs, 2107 LeftoverTy, NarrowLeftoverRegs); 2108 } 2109 2110 MI.eraseFromParent(); 2111 return Legalized; 2112 } 2113 2114 LegalizerHelper::LegalizeResult 2115 LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, 2116 LLT NarrowTy) { 2117 using namespace TargetOpcode; 2118 2119 MIRBuilder.setInstr(MI); 2120 switch (MI.getOpcode()) { 2121 case G_IMPLICIT_DEF: 2122 return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy); 2123 case G_AND: 2124 case G_OR: 2125 case G_XOR: 2126 case G_ADD: 2127 case G_SUB: 2128 case G_MUL: 2129 case G_SMULH: 2130 case G_UMULH: 2131 case G_FADD: 2132 case G_FMUL: 2133 case G_FSUB: 2134 case G_FNEG: 2135 case G_FABS: 2136 case G_FDIV: 2137 case G_FREM: 2138 case G_FMA: 2139 case G_FPOW: 2140 case G_FEXP: 2141 case G_FEXP2: 2142 case G_FLOG: 2143 case G_FLOG2: 2144 case G_FLOG10: 2145 case G_FCEIL: 2146 case G_INTRINSIC_ROUND: 2147 case G_INTRINSIC_TRUNC: 2148 case G_FCOS: 2149 case G_FSIN: 2150 case G_FSQRT: 2151 case G_BSWAP: 2152 return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy); 2153 case G_ZEXT: 2154 case G_SEXT: 2155 case G_ANYEXT: 2156 case G_FPEXT: 2157 case G_FPTRUNC: 2158 case G_SITOFP: 2159 case G_UITOFP: 2160 case G_FPTOSI: 2161 case G_FPTOUI: 2162 case G_INTTOPTR: 2163 case G_PTRTOINT: 2164 return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy); 2165 case G_ICMP: 2166 case G_FCMP: 2167 return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy); 2168 case G_SELECT: 2169 return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy); 2170 case G_LOAD: 2171 case G_STORE: 2172 return fewerElementsVectorLoadStore(MI, TypeIdx, NarrowTy); 2173 default: 2174 return UnableToLegalize; 2175 } 2176 } 2177 2178 LegalizerHelper::LegalizeResult 2179 LegalizerHelper::narrowScalarMul(MachineInstr &MI, unsigned TypeIdx, LLT NewTy) { 2180 unsigned DstReg = MI.getOperand(0).getReg(); 2181 unsigned Src0 = MI.getOperand(1).getReg(); 2182 unsigned Src1 = MI.getOperand(2).getReg(); 2183 LLT Ty = MRI.getType(DstReg); 2184 if (Ty.isVector()) 2185 return UnableToLegalize; 2186 2187 unsigned Size = Ty.getSizeInBits(); 2188 unsigned NewSize = Size / 2; 2189 if (Size != 2 * NewSize) 2190 return UnableToLegalize; 2191 2192 LLT HalfTy = LLT::scalar(NewSize); 2193 // TODO: if HalfTy != NewTy, handle the breakdown all at once? 2194 2195 unsigned ShiftAmt = MRI.createGenericVirtualRegister(Ty); 2196 unsigned Lo = MRI.createGenericVirtualRegister(HalfTy); 2197 unsigned Hi = MRI.createGenericVirtualRegister(HalfTy); 2198 unsigned ExtLo = MRI.createGenericVirtualRegister(Ty); 2199 unsigned ExtHi = MRI.createGenericVirtualRegister(Ty); 2200 unsigned ShiftedHi = MRI.createGenericVirtualRegister(Ty); 2201 2202 SmallVector<unsigned, 2> Src0Parts; 2203 SmallVector<unsigned, 2> Src1Parts; 2204 2205 extractParts(Src0, HalfTy, 2, Src0Parts); 2206 extractParts(Src1, HalfTy, 2, Src1Parts); 2207 2208 MIRBuilder.buildMul(Lo, Src0Parts[0], Src1Parts[0]); 2209 2210 // TODO: Use smulh or umulh depending on what the target has. 2211 MIRBuilder.buildUMulH(Hi, Src0Parts[1], Src1Parts[1]); 2212 2213 MIRBuilder.buildConstant(ShiftAmt, NewSize); 2214 MIRBuilder.buildAnyExt(ExtHi, Hi); 2215 MIRBuilder.buildShl(ShiftedHi, ExtHi, ShiftAmt); 2216 2217 MIRBuilder.buildZExt(ExtLo, Lo); 2218 MIRBuilder.buildOr(DstReg, ExtLo, ShiftedHi); 2219 MI.eraseFromParent(); 2220 return Legalized; 2221 } 2222 2223 LegalizerHelper::LegalizeResult 2224 LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { 2225 unsigned Opc = MI.getOpcode(); 2226 auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); 2227 auto isSupported = [this](const LegalityQuery &Q) { 2228 auto QAction = LI.getAction(Q).Action; 2229 return QAction == Legal || QAction == Libcall || QAction == Custom; 2230 }; 2231 switch (Opc) { 2232 default: 2233 return UnableToLegalize; 2234 case TargetOpcode::G_CTLZ_ZERO_UNDEF: { 2235 // This trivially expands to CTLZ. 2236 Observer.changingInstr(MI); 2237 MI.setDesc(TII.get(TargetOpcode::G_CTLZ)); 2238 Observer.changedInstr(MI); 2239 return Legalized; 2240 } 2241 case TargetOpcode::G_CTLZ: { 2242 unsigned SrcReg = MI.getOperand(1).getReg(); 2243 unsigned Len = Ty.getSizeInBits(); 2244 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty, Ty}})) { 2245 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero. 2246 auto MIBCtlzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF, 2247 {Ty}, {SrcReg}); 2248 auto MIBZero = MIRBuilder.buildConstant(Ty, 0); 2249 auto MIBLen = MIRBuilder.buildConstant(Ty, Len); 2250 auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), 2251 SrcReg, MIBZero); 2252 MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen, 2253 MIBCtlzZU); 2254 MI.eraseFromParent(); 2255 return Legalized; 2256 } 2257 // for now, we do this: 2258 // NewLen = NextPowerOf2(Len); 2259 // x = x | (x >> 1); 2260 // x = x | (x >> 2); 2261 // ... 2262 // x = x | (x >>16); 2263 // x = x | (x >>32); // for 64-bit input 2264 // Upto NewLen/2 2265 // return Len - popcount(x); 2266 // 2267 // Ref: "Hacker's Delight" by Henry Warren 2268 unsigned Op = SrcReg; 2269 unsigned NewLen = PowerOf2Ceil(Len); 2270 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) { 2271 auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i); 2272 auto MIBOp = MIRBuilder.buildInstr( 2273 TargetOpcode::G_OR, {Ty}, 2274 {Op, MIRBuilder.buildInstr(TargetOpcode::G_LSHR, {Ty}, 2275 {Op, MIBShiftAmt})}); 2276 Op = MIBOp->getOperand(0).getReg(); 2277 } 2278 auto MIBPop = MIRBuilder.buildInstr(TargetOpcode::G_CTPOP, {Ty}, {Op}); 2279 MIRBuilder.buildInstr(TargetOpcode::G_SUB, {MI.getOperand(0).getReg()}, 2280 {MIRBuilder.buildConstant(Ty, Len), MIBPop}); 2281 MI.eraseFromParent(); 2282 return Legalized; 2283 } 2284 case TargetOpcode::G_CTTZ_ZERO_UNDEF: { 2285 // This trivially expands to CTTZ. 2286 Observer.changingInstr(MI); 2287 MI.setDesc(TII.get(TargetOpcode::G_CTTZ)); 2288 Observer.changedInstr(MI); 2289 return Legalized; 2290 } 2291 case TargetOpcode::G_CTTZ: { 2292 unsigned SrcReg = MI.getOperand(1).getReg(); 2293 unsigned Len = Ty.getSizeInBits(); 2294 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty, Ty}})) { 2295 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with 2296 // zero. 2297 auto MIBCttzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF, 2298 {Ty}, {SrcReg}); 2299 auto MIBZero = MIRBuilder.buildConstant(Ty, 0); 2300 auto MIBLen = MIRBuilder.buildConstant(Ty, Len); 2301 auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), 2302 SrcReg, MIBZero); 2303 MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen, 2304 MIBCttzZU); 2305 MI.eraseFromParent(); 2306 return Legalized; 2307 } 2308 // for now, we use: { return popcount(~x & (x - 1)); } 2309 // unless the target has ctlz but not ctpop, in which case we use: 2310 // { return 32 - nlz(~x & (x-1)); } 2311 // Ref: "Hacker's Delight" by Henry Warren 2312 auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1); 2313 auto MIBNot = 2314 MIRBuilder.buildInstr(TargetOpcode::G_XOR, {Ty}, {SrcReg, MIBCstNeg1}); 2315 auto MIBTmp = MIRBuilder.buildInstr( 2316 TargetOpcode::G_AND, {Ty}, 2317 {MIBNot, MIRBuilder.buildInstr(TargetOpcode::G_ADD, {Ty}, 2318 {SrcReg, MIBCstNeg1})}); 2319 if (!isSupported({TargetOpcode::G_CTPOP, {Ty, Ty}}) && 2320 isSupported({TargetOpcode::G_CTLZ, {Ty, Ty}})) { 2321 auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len); 2322 MIRBuilder.buildInstr( 2323 TargetOpcode::G_SUB, {MI.getOperand(0).getReg()}, 2324 {MIBCstLen, 2325 MIRBuilder.buildInstr(TargetOpcode::G_CTLZ, {Ty}, {MIBTmp})}); 2326 MI.eraseFromParent(); 2327 return Legalized; 2328 } 2329 MI.setDesc(TII.get(TargetOpcode::G_CTPOP)); 2330 MI.getOperand(1).setReg(MIBTmp->getOperand(0).getReg()); 2331 return Legalized; 2332 } 2333 } 2334 } 2335