1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file This file implements the LegalizerHelper class to legalize 10 /// individual instructions and the LegalizeMachineIR wrapper pass for the 11 /// primary legalization. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" 16 #include "llvm/CodeGen/GlobalISel/CallLowering.h" 17 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" 18 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" 19 #include "llvm/CodeGen/MachineRegisterInfo.h" 20 #include "llvm/CodeGen/TargetFrameLowering.h" 21 #include "llvm/CodeGen/TargetInstrInfo.h" 22 #include "llvm/CodeGen/TargetLowering.h" 23 #include "llvm/CodeGen/TargetSubtargetInfo.h" 24 #include "llvm/Support/Debug.h" 25 #include "llvm/Support/MathExtras.h" 26 #include "llvm/Support/raw_ostream.h" 27 28 #define DEBUG_TYPE "legalizer" 29 30 using namespace llvm; 31 using namespace LegalizeActions; 32 33 /// Try to break down \p OrigTy into \p NarrowTy sized pieces. 34 /// 35 /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy, 36 /// with any leftover piece as type \p LeftoverTy 37 /// 38 /// Returns -1 in the first element of the pair if the breakdown is not 39 /// satisfiable. 40 static std::pair<int, int> 41 getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) { 42 assert(!LeftoverTy.isValid() && "this is an out argument"); 43 44 unsigned Size = OrigTy.getSizeInBits(); 45 unsigned NarrowSize = NarrowTy.getSizeInBits(); 46 unsigned NumParts = Size / NarrowSize; 47 unsigned LeftoverSize = Size - NumParts * NarrowSize; 48 assert(Size > NarrowSize); 49 50 if (LeftoverSize == 0) 51 return {NumParts, 0}; 52 53 if (NarrowTy.isVector()) { 54 unsigned EltSize = OrigTy.getScalarSizeInBits(); 55 if (LeftoverSize % EltSize != 0) 56 return {-1, -1}; 57 LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize); 58 } else { 59 LeftoverTy = LLT::scalar(LeftoverSize); 60 } 61 62 int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits(); 63 return std::make_pair(NumParts, NumLeftover); 64 } 65 66 LegalizerHelper::LegalizerHelper(MachineFunction &MF, 67 GISelChangeObserver &Observer, 68 MachineIRBuilder &Builder) 69 : MIRBuilder(Builder), MRI(MF.getRegInfo()), 70 LI(*MF.getSubtarget().getLegalizerInfo()), Observer(Observer) { 71 MIRBuilder.setMF(MF); 72 MIRBuilder.setChangeObserver(Observer); 73 } 74 75 LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI, 76 GISelChangeObserver &Observer, 77 MachineIRBuilder &B) 78 : MIRBuilder(B), MRI(MF.getRegInfo()), LI(LI), Observer(Observer) { 79 MIRBuilder.setMF(MF); 80 MIRBuilder.setChangeObserver(Observer); 81 } 82 LegalizerHelper::LegalizeResult 83 LegalizerHelper::legalizeInstrStep(MachineInstr &MI) { 84 LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs())); 85 86 if (MI.getOpcode() == TargetOpcode::G_INTRINSIC || 87 MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS) 88 return LI.legalizeIntrinsic(MI, MRI, MIRBuilder) ? Legalized 89 : UnableToLegalize; 90 auto Step = LI.getAction(MI, MRI); 91 switch (Step.Action) { 92 case Legal: 93 LLVM_DEBUG(dbgs() << ".. Already legal\n"); 94 return AlreadyLegal; 95 case Libcall: 96 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n"); 97 return libcall(MI); 98 case NarrowScalar: 99 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n"); 100 return narrowScalar(MI, Step.TypeIdx, Step.NewType); 101 case WidenScalar: 102 LLVM_DEBUG(dbgs() << ".. Widen scalar\n"); 103 return widenScalar(MI, Step.TypeIdx, Step.NewType); 104 case Lower: 105 LLVM_DEBUG(dbgs() << ".. Lower\n"); 106 return lower(MI, Step.TypeIdx, Step.NewType); 107 case FewerElements: 108 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n"); 109 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType); 110 case MoreElements: 111 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n"); 112 return moreElementsVector(MI, Step.TypeIdx, Step.NewType); 113 case Custom: 114 LLVM_DEBUG(dbgs() << ".. Custom legalization\n"); 115 return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized 116 : UnableToLegalize; 117 default: 118 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n"); 119 return UnableToLegalize; 120 } 121 } 122 123 void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts, 124 SmallVectorImpl<Register> &VRegs) { 125 for (int i = 0; i < NumParts; ++i) 126 VRegs.push_back(MRI.createGenericVirtualRegister(Ty)); 127 MIRBuilder.buildUnmerge(VRegs, Reg); 128 } 129 130 bool LegalizerHelper::extractParts(Register Reg, LLT RegTy, 131 LLT MainTy, LLT &LeftoverTy, 132 SmallVectorImpl<Register> &VRegs, 133 SmallVectorImpl<Register> &LeftoverRegs) { 134 assert(!LeftoverTy.isValid() && "this is an out argument"); 135 136 unsigned RegSize = RegTy.getSizeInBits(); 137 unsigned MainSize = MainTy.getSizeInBits(); 138 unsigned NumParts = RegSize / MainSize; 139 unsigned LeftoverSize = RegSize - NumParts * MainSize; 140 141 // Use an unmerge when possible. 142 if (LeftoverSize == 0) { 143 for (unsigned I = 0; I < NumParts; ++I) 144 VRegs.push_back(MRI.createGenericVirtualRegister(MainTy)); 145 MIRBuilder.buildUnmerge(VRegs, Reg); 146 return true; 147 } 148 149 if (MainTy.isVector()) { 150 unsigned EltSize = MainTy.getScalarSizeInBits(); 151 if (LeftoverSize % EltSize != 0) 152 return false; 153 LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize); 154 } else { 155 LeftoverTy = LLT::scalar(LeftoverSize); 156 } 157 158 // For irregular sizes, extract the individual parts. 159 for (unsigned I = 0; I != NumParts; ++I) { 160 Register NewReg = MRI.createGenericVirtualRegister(MainTy); 161 VRegs.push_back(NewReg); 162 MIRBuilder.buildExtract(NewReg, Reg, MainSize * I); 163 } 164 165 for (unsigned Offset = MainSize * NumParts; Offset < RegSize; 166 Offset += LeftoverSize) { 167 Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy); 168 LeftoverRegs.push_back(NewReg); 169 MIRBuilder.buildExtract(NewReg, Reg, Offset); 170 } 171 172 return true; 173 } 174 175 static LLT getGCDType(LLT OrigTy, LLT TargetTy) { 176 if (OrigTy.isVector() && TargetTy.isVector()) { 177 assert(OrigTy.getElementType() == TargetTy.getElementType()); 178 int GCD = greatestCommonDivisor(OrigTy.getNumElements(), 179 TargetTy.getNumElements()); 180 return LLT::scalarOrVector(GCD, OrigTy.getElementType()); 181 } 182 183 if (OrigTy.isVector() && !TargetTy.isVector()) { 184 assert(OrigTy.getElementType() == TargetTy); 185 return TargetTy; 186 } 187 188 assert(!OrigTy.isVector() && !TargetTy.isVector()); 189 190 int GCD = greatestCommonDivisor(OrigTy.getSizeInBits(), 191 TargetTy.getSizeInBits()); 192 return LLT::scalar(GCD); 193 } 194 195 void LegalizerHelper::insertParts(Register DstReg, 196 LLT ResultTy, LLT PartTy, 197 ArrayRef<Register> PartRegs, 198 LLT LeftoverTy, 199 ArrayRef<Register> LeftoverRegs) { 200 if (!LeftoverTy.isValid()) { 201 assert(LeftoverRegs.empty()); 202 203 if (!ResultTy.isVector()) { 204 MIRBuilder.buildMerge(DstReg, PartRegs); 205 return; 206 } 207 208 if (PartTy.isVector()) 209 MIRBuilder.buildConcatVectors(DstReg, PartRegs); 210 else 211 MIRBuilder.buildBuildVector(DstReg, PartRegs); 212 return; 213 } 214 215 unsigned PartSize = PartTy.getSizeInBits(); 216 unsigned LeftoverPartSize = LeftoverTy.getSizeInBits(); 217 218 Register CurResultReg = MRI.createGenericVirtualRegister(ResultTy); 219 MIRBuilder.buildUndef(CurResultReg); 220 221 unsigned Offset = 0; 222 for (Register PartReg : PartRegs) { 223 Register NewResultReg = MRI.createGenericVirtualRegister(ResultTy); 224 MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset); 225 CurResultReg = NewResultReg; 226 Offset += PartSize; 227 } 228 229 for (unsigned I = 0, E = LeftoverRegs.size(); I != E; ++I) { 230 // Use the original output register for the final insert to avoid a copy. 231 Register NewResultReg = (I + 1 == E) ? 232 DstReg : MRI.createGenericVirtualRegister(ResultTy); 233 234 MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset); 235 CurResultReg = NewResultReg; 236 Offset += LeftoverPartSize; 237 } 238 } 239 240 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { 241 switch (Opcode) { 242 case TargetOpcode::G_SDIV: 243 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); 244 switch (Size) { 245 case 32: 246 return RTLIB::SDIV_I32; 247 case 64: 248 return RTLIB::SDIV_I64; 249 case 128: 250 return RTLIB::SDIV_I128; 251 default: 252 llvm_unreachable("unexpected size"); 253 } 254 case TargetOpcode::G_UDIV: 255 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); 256 switch (Size) { 257 case 32: 258 return RTLIB::UDIV_I32; 259 case 64: 260 return RTLIB::UDIV_I64; 261 case 128: 262 return RTLIB::UDIV_I128; 263 default: 264 llvm_unreachable("unexpected size"); 265 } 266 case TargetOpcode::G_SREM: 267 assert((Size == 32 || Size == 64) && "Unsupported size"); 268 return Size == 64 ? RTLIB::SREM_I64 : RTLIB::SREM_I32; 269 case TargetOpcode::G_UREM: 270 assert((Size == 32 || Size == 64) && "Unsupported size"); 271 return Size == 64 ? RTLIB::UREM_I64 : RTLIB::UREM_I32; 272 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 273 assert(Size == 32 && "Unsupported size"); 274 return RTLIB::CTLZ_I32; 275 case TargetOpcode::G_FADD: 276 assert((Size == 32 || Size == 64) && "Unsupported size"); 277 return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32; 278 case TargetOpcode::G_FSUB: 279 assert((Size == 32 || Size == 64) && "Unsupported size"); 280 return Size == 64 ? RTLIB::SUB_F64 : RTLIB::SUB_F32; 281 case TargetOpcode::G_FMUL: 282 assert((Size == 32 || Size == 64) && "Unsupported size"); 283 return Size == 64 ? RTLIB::MUL_F64 : RTLIB::MUL_F32; 284 case TargetOpcode::G_FDIV: 285 assert((Size == 32 || Size == 64) && "Unsupported size"); 286 return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32; 287 case TargetOpcode::G_FEXP: 288 assert((Size == 32 || Size == 64) && "Unsupported size"); 289 return Size == 64 ? RTLIB::EXP_F64 : RTLIB::EXP_F32; 290 case TargetOpcode::G_FEXP2: 291 assert((Size == 32 || Size == 64) && "Unsupported size"); 292 return Size == 64 ? RTLIB::EXP2_F64 : RTLIB::EXP2_F32; 293 case TargetOpcode::G_FREM: 294 return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32; 295 case TargetOpcode::G_FPOW: 296 return Size == 64 ? RTLIB::POW_F64 : RTLIB::POW_F32; 297 case TargetOpcode::G_FMA: 298 assert((Size == 32 || Size == 64) && "Unsupported size"); 299 return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32; 300 case TargetOpcode::G_FSIN: 301 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); 302 return Size == 128 ? RTLIB::SIN_F128 303 : Size == 64 ? RTLIB::SIN_F64 : RTLIB::SIN_F32; 304 case TargetOpcode::G_FCOS: 305 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); 306 return Size == 128 ? RTLIB::COS_F128 307 : Size == 64 ? RTLIB::COS_F64 : RTLIB::COS_F32; 308 case TargetOpcode::G_FLOG10: 309 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); 310 return Size == 128 ? RTLIB::LOG10_F128 311 : Size == 64 ? RTLIB::LOG10_F64 : RTLIB::LOG10_F32; 312 case TargetOpcode::G_FLOG: 313 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); 314 return Size == 128 ? RTLIB::LOG_F128 315 : Size == 64 ? RTLIB::LOG_F64 : RTLIB::LOG_F32; 316 case TargetOpcode::G_FLOG2: 317 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); 318 return Size == 128 ? RTLIB::LOG2_F128 319 : Size == 64 ? RTLIB::LOG2_F64 : RTLIB::LOG2_F32; 320 case TargetOpcode::G_FCEIL: 321 assert((Size == 32 || Size == 64) && "Unsupported size"); 322 return Size == 64 ? RTLIB::CEIL_F64 : RTLIB::CEIL_F32; 323 case TargetOpcode::G_FFLOOR: 324 assert((Size == 32 || Size == 64) && "Unsupported size"); 325 return Size == 64 ? RTLIB::FLOOR_F64 : RTLIB::FLOOR_F32; 326 } 327 llvm_unreachable("Unknown libcall function"); 328 } 329 330 LegalizerHelper::LegalizeResult 331 llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, 332 const CallLowering::ArgInfo &Result, 333 ArrayRef<CallLowering::ArgInfo> Args) { 334 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); 335 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); 336 const char *Name = TLI.getLibcallName(Libcall); 337 338 MIRBuilder.getMF().getFrameInfo().setHasCalls(true); 339 340 CallLowering::CallLoweringInfo Info; 341 Info.CallConv = TLI.getLibcallCallingConv(Libcall); 342 Info.Callee = MachineOperand::CreateES(Name); 343 Info.OrigRet = Result; 344 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs)); 345 if (!CLI.lowerCall(MIRBuilder, Info)) 346 return LegalizerHelper::UnableToLegalize; 347 348 return LegalizerHelper::Legalized; 349 } 350 351 // Useful for libcalls where all operands have the same type. 352 static LegalizerHelper::LegalizeResult 353 simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, 354 Type *OpType) { 355 auto Libcall = getRTLibDesc(MI.getOpcode(), Size); 356 357 SmallVector<CallLowering::ArgInfo, 3> Args; 358 for (unsigned i = 1; i < MI.getNumOperands(); i++) 359 Args.push_back({MI.getOperand(i).getReg(), OpType}); 360 return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType}, 361 Args); 362 } 363 364 LegalizerHelper::LegalizeResult 365 llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, 366 MachineInstr &MI) { 367 assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS); 368 auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); 369 370 SmallVector<CallLowering::ArgInfo, 3> Args; 371 for (unsigned i = 1; i < MI.getNumOperands(); i++) { 372 Register Reg = MI.getOperand(i).getReg(); 373 374 // Need derive an IR type for call lowering. 375 LLT OpLLT = MRI.getType(Reg); 376 Type *OpTy = nullptr; 377 if (OpLLT.isPointer()) 378 OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace()); 379 else 380 OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits()); 381 Args.push_back({Reg, OpTy}); 382 } 383 384 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); 385 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); 386 Intrinsic::ID ID = MI.getOperand(0).getIntrinsicID(); 387 RTLIB::Libcall RTLibcall; 388 switch (ID) { 389 case Intrinsic::memcpy: 390 RTLibcall = RTLIB::MEMCPY; 391 break; 392 case Intrinsic::memset: 393 RTLibcall = RTLIB::MEMSET; 394 break; 395 case Intrinsic::memmove: 396 RTLibcall = RTLIB::MEMMOVE; 397 break; 398 default: 399 return LegalizerHelper::UnableToLegalize; 400 } 401 const char *Name = TLI.getLibcallName(RTLibcall); 402 403 MIRBuilder.setInstr(MI); 404 MIRBuilder.getMF().getFrameInfo().setHasCalls(true); 405 406 CallLowering::CallLoweringInfo Info; 407 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall); 408 Info.Callee = MachineOperand::CreateES(Name); 409 Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx)); 410 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs)); 411 if (!CLI.lowerCall(MIRBuilder, Info)) 412 return LegalizerHelper::UnableToLegalize; 413 414 return LegalizerHelper::Legalized; 415 } 416 417 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, 418 Type *FromType) { 419 auto ToMVT = MVT::getVT(ToType); 420 auto FromMVT = MVT::getVT(FromType); 421 422 switch (Opcode) { 423 case TargetOpcode::G_FPEXT: 424 return RTLIB::getFPEXT(FromMVT, ToMVT); 425 case TargetOpcode::G_FPTRUNC: 426 return RTLIB::getFPROUND(FromMVT, ToMVT); 427 case TargetOpcode::G_FPTOSI: 428 return RTLIB::getFPTOSINT(FromMVT, ToMVT); 429 case TargetOpcode::G_FPTOUI: 430 return RTLIB::getFPTOUINT(FromMVT, ToMVT); 431 case TargetOpcode::G_SITOFP: 432 return RTLIB::getSINTTOFP(FromMVT, ToMVT); 433 case TargetOpcode::G_UITOFP: 434 return RTLIB::getUINTTOFP(FromMVT, ToMVT); 435 } 436 llvm_unreachable("Unsupported libcall function"); 437 } 438 439 static LegalizerHelper::LegalizeResult 440 conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, 441 Type *FromType) { 442 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType); 443 return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType}, 444 {{MI.getOperand(1).getReg(), FromType}}); 445 } 446 447 LegalizerHelper::LegalizeResult 448 LegalizerHelper::libcall(MachineInstr &MI) { 449 LLT LLTy = MRI.getType(MI.getOperand(0).getReg()); 450 unsigned Size = LLTy.getSizeInBits(); 451 auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); 452 453 MIRBuilder.setInstr(MI); 454 455 switch (MI.getOpcode()) { 456 default: 457 return UnableToLegalize; 458 case TargetOpcode::G_SDIV: 459 case TargetOpcode::G_UDIV: 460 case TargetOpcode::G_SREM: 461 case TargetOpcode::G_UREM: 462 case TargetOpcode::G_CTLZ_ZERO_UNDEF: { 463 Type *HLTy = IntegerType::get(Ctx, Size); 464 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); 465 if (Status != Legalized) 466 return Status; 467 break; 468 } 469 case TargetOpcode::G_FADD: 470 case TargetOpcode::G_FSUB: 471 case TargetOpcode::G_FMUL: 472 case TargetOpcode::G_FDIV: 473 case TargetOpcode::G_FMA: 474 case TargetOpcode::G_FPOW: 475 case TargetOpcode::G_FREM: 476 case TargetOpcode::G_FCOS: 477 case TargetOpcode::G_FSIN: 478 case TargetOpcode::G_FLOG10: 479 case TargetOpcode::G_FLOG: 480 case TargetOpcode::G_FLOG2: 481 case TargetOpcode::G_FEXP: 482 case TargetOpcode::G_FEXP2: 483 case TargetOpcode::G_FCEIL: 484 case TargetOpcode::G_FFLOOR: { 485 if (Size > 64) { 486 LLVM_DEBUG(dbgs() << "Size " << Size << " too large to legalize.\n"); 487 return UnableToLegalize; 488 } 489 Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx); 490 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); 491 if (Status != Legalized) 492 return Status; 493 break; 494 } 495 case TargetOpcode::G_FPEXT: { 496 // FIXME: Support other floating point types (half, fp128 etc) 497 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 498 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 499 if (ToSize != 64 || FromSize != 32) 500 return UnableToLegalize; 501 LegalizeResult Status = conversionLibcall( 502 MI, MIRBuilder, Type::getDoubleTy(Ctx), Type::getFloatTy(Ctx)); 503 if (Status != Legalized) 504 return Status; 505 break; 506 } 507 case TargetOpcode::G_FPTRUNC: { 508 // FIXME: Support other floating point types (half, fp128 etc) 509 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 510 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 511 if (ToSize != 32 || FromSize != 64) 512 return UnableToLegalize; 513 LegalizeResult Status = conversionLibcall( 514 MI, MIRBuilder, Type::getFloatTy(Ctx), Type::getDoubleTy(Ctx)); 515 if (Status != Legalized) 516 return Status; 517 break; 518 } 519 case TargetOpcode::G_FPTOSI: 520 case TargetOpcode::G_FPTOUI: { 521 // FIXME: Support other types 522 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 523 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 524 if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64)) 525 return UnableToLegalize; 526 LegalizeResult Status = conversionLibcall( 527 MI, MIRBuilder, 528 ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx), 529 FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx)); 530 if (Status != Legalized) 531 return Status; 532 break; 533 } 534 case TargetOpcode::G_SITOFP: 535 case TargetOpcode::G_UITOFP: { 536 // FIXME: Support other types 537 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 538 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 539 if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64)) 540 return UnableToLegalize; 541 LegalizeResult Status = conversionLibcall( 542 MI, MIRBuilder, 543 ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx), 544 FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx)); 545 if (Status != Legalized) 546 return Status; 547 break; 548 } 549 } 550 551 MI.eraseFromParent(); 552 return Legalized; 553 } 554 555 LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, 556 unsigned TypeIdx, 557 LLT NarrowTy) { 558 MIRBuilder.setInstr(MI); 559 560 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 561 uint64_t NarrowSize = NarrowTy.getSizeInBits(); 562 563 switch (MI.getOpcode()) { 564 default: 565 return UnableToLegalize; 566 case TargetOpcode::G_IMPLICIT_DEF: { 567 // FIXME: add support for when SizeOp0 isn't an exact multiple of 568 // NarrowSize. 569 if (SizeOp0 % NarrowSize != 0) 570 return UnableToLegalize; 571 int NumParts = SizeOp0 / NarrowSize; 572 573 SmallVector<Register, 2> DstRegs; 574 for (int i = 0; i < NumParts; ++i) 575 DstRegs.push_back( 576 MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg()); 577 578 Register DstReg = MI.getOperand(0).getReg(); 579 if(MRI.getType(DstReg).isVector()) 580 MIRBuilder.buildBuildVector(DstReg, DstRegs); 581 else 582 MIRBuilder.buildMerge(DstReg, DstRegs); 583 MI.eraseFromParent(); 584 return Legalized; 585 } 586 case TargetOpcode::G_CONSTANT: { 587 LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 588 const APInt &Val = MI.getOperand(1).getCImm()->getValue(); 589 unsigned TotalSize = Ty.getSizeInBits(); 590 unsigned NarrowSize = NarrowTy.getSizeInBits(); 591 int NumParts = TotalSize / NarrowSize; 592 593 SmallVector<Register, 4> PartRegs; 594 for (int I = 0; I != NumParts; ++I) { 595 unsigned Offset = I * NarrowSize; 596 auto K = MIRBuilder.buildConstant(NarrowTy, 597 Val.lshr(Offset).trunc(NarrowSize)); 598 PartRegs.push_back(K.getReg(0)); 599 } 600 601 LLT LeftoverTy; 602 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize; 603 SmallVector<Register, 1> LeftoverRegs; 604 if (LeftoverBits != 0) { 605 LeftoverTy = LLT::scalar(LeftoverBits); 606 auto K = MIRBuilder.buildConstant( 607 LeftoverTy, 608 Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits)); 609 LeftoverRegs.push_back(K.getReg(0)); 610 } 611 612 insertParts(MI.getOperand(0).getReg(), 613 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs); 614 615 MI.eraseFromParent(); 616 return Legalized; 617 } 618 case TargetOpcode::G_SEXT: { 619 if (TypeIdx != 0) 620 return UnableToLegalize; 621 622 Register SrcReg = MI.getOperand(1).getReg(); 623 LLT SrcTy = MRI.getType(SrcReg); 624 625 // FIXME: support the general case where the requested NarrowTy may not be 626 // the same as the source type. E.g. s128 = sext(s32) 627 if ((SrcTy.getSizeInBits() != SizeOp0 / 2) || 628 SrcTy.getSizeInBits() != NarrowTy.getSizeInBits()) { 629 LLVM_DEBUG(dbgs() << "Can't narrow sext to type " << NarrowTy << "\n"); 630 return UnableToLegalize; 631 } 632 633 // Shift the sign bit of the low register through the high register. 634 auto ShiftAmt = 635 MIRBuilder.buildConstant(LLT::scalar(64), NarrowTy.getSizeInBits() - 1); 636 auto Shift = MIRBuilder.buildAShr(NarrowTy, SrcReg, ShiftAmt); 637 MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {SrcReg, Shift.getReg(0)}); 638 MI.eraseFromParent(); 639 return Legalized; 640 } 641 case TargetOpcode::G_ZEXT: { 642 if (TypeIdx != 0) 643 return UnableToLegalize; 644 645 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 646 uint64_t SizeOp1 = SrcTy.getSizeInBits(); 647 if (SizeOp0 % SizeOp1 != 0) 648 return UnableToLegalize; 649 650 // Generate a merge where the bottom bits are taken from the source, and 651 // zero everything else. 652 Register ZeroReg = MIRBuilder.buildConstant(SrcTy, 0).getReg(0); 653 unsigned NumParts = SizeOp0 / SizeOp1; 654 SmallVector<Register, 4> Srcs = {MI.getOperand(1).getReg()}; 655 for (unsigned Part = 1; Part < NumParts; ++Part) 656 Srcs.push_back(ZeroReg); 657 MIRBuilder.buildMerge(MI.getOperand(0).getReg(), Srcs); 658 MI.eraseFromParent(); 659 return Legalized; 660 } 661 case TargetOpcode::G_TRUNC: { 662 if (TypeIdx != 1) 663 return UnableToLegalize; 664 665 uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 666 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) { 667 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n"); 668 return UnableToLegalize; 669 } 670 671 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg()); 672 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Unmerge.getReg(0)); 673 MI.eraseFromParent(); 674 return Legalized; 675 } 676 677 case TargetOpcode::G_ADD: { 678 // FIXME: add support for when SizeOp0 isn't an exact multiple of 679 // NarrowSize. 680 if (SizeOp0 % NarrowSize != 0) 681 return UnableToLegalize; 682 // Expand in terms of carry-setting/consuming G_ADDE instructions. 683 int NumParts = SizeOp0 / NarrowTy.getSizeInBits(); 684 685 SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs; 686 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); 687 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); 688 689 Register CarryIn; 690 for (int i = 0; i < NumParts; ++i) { 691 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy); 692 Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); 693 694 if (i == 0) 695 MIRBuilder.buildUAddo(DstReg, CarryOut, Src1Regs[i], Src2Regs[i]); 696 else { 697 MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i], 698 Src2Regs[i], CarryIn); 699 } 700 701 DstRegs.push_back(DstReg); 702 CarryIn = CarryOut; 703 } 704 Register DstReg = MI.getOperand(0).getReg(); 705 if(MRI.getType(DstReg).isVector()) 706 MIRBuilder.buildBuildVector(DstReg, DstRegs); 707 else 708 MIRBuilder.buildMerge(DstReg, DstRegs); 709 MI.eraseFromParent(); 710 return Legalized; 711 } 712 case TargetOpcode::G_SUB: { 713 // FIXME: add support for when SizeOp0 isn't an exact multiple of 714 // NarrowSize. 715 if (SizeOp0 % NarrowSize != 0) 716 return UnableToLegalize; 717 718 int NumParts = SizeOp0 / NarrowTy.getSizeInBits(); 719 720 SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs; 721 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); 722 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); 723 724 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy); 725 Register BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); 726 MIRBuilder.buildInstr(TargetOpcode::G_USUBO, {DstReg, BorrowOut}, 727 {Src1Regs[0], Src2Regs[0]}); 728 DstRegs.push_back(DstReg); 729 Register BorrowIn = BorrowOut; 730 for (int i = 1; i < NumParts; ++i) { 731 DstReg = MRI.createGenericVirtualRegister(NarrowTy); 732 BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); 733 734 MIRBuilder.buildInstr(TargetOpcode::G_USUBE, {DstReg, BorrowOut}, 735 {Src1Regs[i], Src2Regs[i], BorrowIn}); 736 737 DstRegs.push_back(DstReg); 738 BorrowIn = BorrowOut; 739 } 740 MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs); 741 MI.eraseFromParent(); 742 return Legalized; 743 } 744 case TargetOpcode::G_MUL: 745 case TargetOpcode::G_UMULH: 746 return narrowScalarMul(MI, NarrowTy); 747 case TargetOpcode::G_EXTRACT: 748 return narrowScalarExtract(MI, TypeIdx, NarrowTy); 749 case TargetOpcode::G_INSERT: 750 return narrowScalarInsert(MI, TypeIdx, NarrowTy); 751 case TargetOpcode::G_LOAD: { 752 const auto &MMO = **MI.memoperands_begin(); 753 Register DstReg = MI.getOperand(0).getReg(); 754 LLT DstTy = MRI.getType(DstReg); 755 if (DstTy.isVector()) 756 return UnableToLegalize; 757 758 if (8 * MMO.getSize() != DstTy.getSizeInBits()) { 759 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy); 760 auto &MMO = **MI.memoperands_begin(); 761 MIRBuilder.buildLoad(TmpReg, MI.getOperand(1).getReg(), MMO); 762 MIRBuilder.buildAnyExt(DstReg, TmpReg); 763 MI.eraseFromParent(); 764 return Legalized; 765 } 766 767 return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy); 768 } 769 case TargetOpcode::G_ZEXTLOAD: 770 case TargetOpcode::G_SEXTLOAD: { 771 bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD; 772 Register DstReg = MI.getOperand(0).getReg(); 773 Register PtrReg = MI.getOperand(1).getReg(); 774 775 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy); 776 auto &MMO = **MI.memoperands_begin(); 777 if (MMO.getSizeInBits() == NarrowSize) { 778 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); 779 } else { 780 unsigned ExtLoad = ZExt ? TargetOpcode::G_ZEXTLOAD 781 : TargetOpcode::G_SEXTLOAD; 782 MIRBuilder.buildInstr(ExtLoad) 783 .addDef(TmpReg) 784 .addUse(PtrReg) 785 .addMemOperand(&MMO); 786 } 787 788 if (ZExt) 789 MIRBuilder.buildZExt(DstReg, TmpReg); 790 else 791 MIRBuilder.buildSExt(DstReg, TmpReg); 792 793 MI.eraseFromParent(); 794 return Legalized; 795 } 796 case TargetOpcode::G_STORE: { 797 const auto &MMO = **MI.memoperands_begin(); 798 799 Register SrcReg = MI.getOperand(0).getReg(); 800 LLT SrcTy = MRI.getType(SrcReg); 801 if (SrcTy.isVector()) 802 return UnableToLegalize; 803 804 int NumParts = SizeOp0 / NarrowSize; 805 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits(); 806 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize; 807 if (SrcTy.isVector() && LeftoverBits != 0) 808 return UnableToLegalize; 809 810 if (8 * MMO.getSize() != SrcTy.getSizeInBits()) { 811 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy); 812 auto &MMO = **MI.memoperands_begin(); 813 MIRBuilder.buildTrunc(TmpReg, SrcReg); 814 MIRBuilder.buildStore(TmpReg, MI.getOperand(1).getReg(), MMO); 815 MI.eraseFromParent(); 816 return Legalized; 817 } 818 819 return reduceLoadStoreWidth(MI, 0, NarrowTy); 820 } 821 case TargetOpcode::G_SELECT: 822 return narrowScalarSelect(MI, TypeIdx, NarrowTy); 823 case TargetOpcode::G_AND: 824 case TargetOpcode::G_OR: 825 case TargetOpcode::G_XOR: { 826 // Legalize bitwise operation: 827 // A = BinOp<Ty> B, C 828 // into: 829 // B1, ..., BN = G_UNMERGE_VALUES B 830 // C1, ..., CN = G_UNMERGE_VALUES C 831 // A1 = BinOp<Ty/N> B1, C2 832 // ... 833 // AN = BinOp<Ty/N> BN, CN 834 // A = G_MERGE_VALUES A1, ..., AN 835 return narrowScalarBasic(MI, TypeIdx, NarrowTy); 836 } 837 case TargetOpcode::G_SHL: 838 case TargetOpcode::G_LSHR: 839 case TargetOpcode::G_ASHR: 840 return narrowScalarShift(MI, TypeIdx, NarrowTy); 841 case TargetOpcode::G_CTLZ: 842 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 843 case TargetOpcode::G_CTTZ: 844 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 845 case TargetOpcode::G_CTPOP: 846 if (TypeIdx != 0) 847 return UnableToLegalize; // TODO 848 849 Observer.changingInstr(MI); 850 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT); 851 Observer.changedInstr(MI); 852 return Legalized; 853 case TargetOpcode::G_INTTOPTR: 854 if (TypeIdx != 1) 855 return UnableToLegalize; 856 857 Observer.changingInstr(MI); 858 narrowScalarSrc(MI, NarrowTy, 1); 859 Observer.changedInstr(MI); 860 return Legalized; 861 case TargetOpcode::G_PTRTOINT: 862 if (TypeIdx != 0) 863 return UnableToLegalize; 864 865 Observer.changingInstr(MI); 866 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT); 867 Observer.changedInstr(MI); 868 return Legalized; 869 case TargetOpcode::G_PHI: { 870 unsigned NumParts = SizeOp0 / NarrowSize; 871 SmallVector<Register, 2> DstRegs; 872 SmallVector<SmallVector<Register, 2>, 2> SrcRegs; 873 DstRegs.resize(NumParts); 874 SrcRegs.resize(MI.getNumOperands() / 2); 875 Observer.changingInstr(MI); 876 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) { 877 MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB(); 878 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); 879 extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts, 880 SrcRegs[i / 2]); 881 } 882 MachineBasicBlock &MBB = *MI.getParent(); 883 MIRBuilder.setInsertPt(MBB, MI); 884 for (unsigned i = 0; i < NumParts; ++i) { 885 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy); 886 MachineInstrBuilder MIB = 887 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]); 888 for (unsigned j = 1; j < MI.getNumOperands(); j += 2) 889 MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1)); 890 } 891 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI()); 892 MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs); 893 Observer.changedInstr(MI); 894 MI.eraseFromParent(); 895 return Legalized; 896 } 897 case TargetOpcode::G_EXTRACT_VECTOR_ELT: 898 case TargetOpcode::G_INSERT_VECTOR_ELT: { 899 if (TypeIdx != 2) 900 return UnableToLegalize; 901 902 int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3; 903 Observer.changingInstr(MI); 904 narrowScalarSrc(MI, NarrowTy, OpIdx); 905 Observer.changedInstr(MI); 906 return Legalized; 907 } 908 case TargetOpcode::G_ICMP: { 909 uint64_t SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); 910 if (NarrowSize * 2 != SrcSize) 911 return UnableToLegalize; 912 913 Observer.changingInstr(MI); 914 Register LHSL = MRI.createGenericVirtualRegister(NarrowTy); 915 Register LHSH = MRI.createGenericVirtualRegister(NarrowTy); 916 MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2).getReg()); 917 918 Register RHSL = MRI.createGenericVirtualRegister(NarrowTy); 919 Register RHSH = MRI.createGenericVirtualRegister(NarrowTy); 920 MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3).getReg()); 921 922 CmpInst::Predicate Pred = 923 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); 924 LLT ResTy = MRI.getType(MI.getOperand(0).getReg()); 925 926 if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) { 927 MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL); 928 MachineInstrBuilder XorH = MIRBuilder.buildXor(NarrowTy, LHSH, RHSH); 929 MachineInstrBuilder Or = MIRBuilder.buildOr(NarrowTy, XorL, XorH); 930 MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0); 931 MIRBuilder.buildICmp(Pred, MI.getOperand(0).getReg(), Or, Zero); 932 } else { 933 MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH); 934 MachineInstrBuilder CmpHEQ = 935 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH); 936 MachineInstrBuilder CmpLU = MIRBuilder.buildICmp( 937 ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL); 938 MIRBuilder.buildSelect(MI.getOperand(0).getReg(), CmpHEQ, CmpLU, CmpH); 939 } 940 Observer.changedInstr(MI); 941 MI.eraseFromParent(); 942 return Legalized; 943 } 944 case TargetOpcode::G_SEXT_INREG: { 945 if (TypeIdx != 0) 946 return UnableToLegalize; 947 948 if (!MI.getOperand(2).isImm()) 949 return UnableToLegalize; 950 int64_t SizeInBits = MI.getOperand(2).getImm(); 951 952 // So long as the new type has more bits than the bits we're extending we 953 // don't need to break it apart. 954 if (NarrowTy.getScalarSizeInBits() >= SizeInBits) { 955 Observer.changingInstr(MI); 956 // We don't lose any non-extension bits by truncating the src and 957 // sign-extending the dst. 958 MachineOperand &MO1 = MI.getOperand(1); 959 auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1.getReg()); 960 MO1.setReg(TruncMIB->getOperand(0).getReg()); 961 962 MachineOperand &MO2 = MI.getOperand(0); 963 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy); 964 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 965 MIRBuilder.buildInstr(TargetOpcode::G_SEXT, {MO2.getReg()}, {DstExt}); 966 MO2.setReg(DstExt); 967 Observer.changedInstr(MI); 968 return Legalized; 969 } 970 971 // Break it apart. Components below the extension point are unmodified. The 972 // component containing the extension point becomes a narrower SEXT_INREG. 973 // Components above it are ashr'd from the component containing the 974 // extension point. 975 if (SizeOp0 % NarrowSize != 0) 976 return UnableToLegalize; 977 int NumParts = SizeOp0 / NarrowSize; 978 979 // List the registers where the destination will be scattered. 980 SmallVector<Register, 2> DstRegs; 981 // List the registers where the source will be split. 982 SmallVector<Register, 2> SrcRegs; 983 984 // Create all the temporary registers. 985 for (int i = 0; i < NumParts; ++i) { 986 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy); 987 988 SrcRegs.push_back(SrcReg); 989 } 990 991 // Explode the big arguments into smaller chunks. 992 MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1).getReg()); 993 994 Register AshrCstReg = 995 MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1) 996 ->getOperand(0) 997 .getReg(); 998 Register FullExtensionReg = 0; 999 Register PartialExtensionReg = 0; 1000 1001 // Do the operation on each small part. 1002 for (int i = 0; i < NumParts; ++i) { 1003 if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits) 1004 DstRegs.push_back(SrcRegs[i]); 1005 else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) { 1006 assert(PartialExtensionReg && 1007 "Expected to visit partial extension before full"); 1008 if (FullExtensionReg) { 1009 DstRegs.push_back(FullExtensionReg); 1010 continue; 1011 } 1012 DstRegs.push_back(MIRBuilder 1013 .buildInstr(TargetOpcode::G_ASHR, {NarrowTy}, 1014 {PartialExtensionReg, AshrCstReg}) 1015 ->getOperand(0) 1016 .getReg()); 1017 FullExtensionReg = DstRegs.back(); 1018 } else { 1019 DstRegs.push_back( 1020 MIRBuilder 1021 .buildInstr( 1022 TargetOpcode::G_SEXT_INREG, {NarrowTy}, 1023 {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()}) 1024 ->getOperand(0) 1025 .getReg()); 1026 PartialExtensionReg = DstRegs.back(); 1027 } 1028 } 1029 1030 // Gather the destination registers into the final destination. 1031 Register DstReg = MI.getOperand(0).getReg(); 1032 MIRBuilder.buildMerge(DstReg, DstRegs); 1033 MI.eraseFromParent(); 1034 return Legalized; 1035 } 1036 } 1037 } 1038 1039 void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy, 1040 unsigned OpIdx, unsigned ExtOpcode) { 1041 MachineOperand &MO = MI.getOperand(OpIdx); 1042 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO.getReg()}); 1043 MO.setReg(ExtB->getOperand(0).getReg()); 1044 } 1045 1046 void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, 1047 unsigned OpIdx) { 1048 MachineOperand &MO = MI.getOperand(OpIdx); 1049 auto ExtB = MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, {NarrowTy}, 1050 {MO.getReg()}); 1051 MO.setReg(ExtB->getOperand(0).getReg()); 1052 } 1053 1054 void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy, 1055 unsigned OpIdx, unsigned TruncOpcode) { 1056 MachineOperand &MO = MI.getOperand(OpIdx); 1057 Register DstExt = MRI.createGenericVirtualRegister(WideTy); 1058 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 1059 MIRBuilder.buildInstr(TruncOpcode, {MO.getReg()}, {DstExt}); 1060 MO.setReg(DstExt); 1061 } 1062 1063 void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy, 1064 unsigned OpIdx, unsigned ExtOpcode) { 1065 MachineOperand &MO = MI.getOperand(OpIdx); 1066 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy); 1067 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 1068 MIRBuilder.buildInstr(ExtOpcode, {MO.getReg()}, {DstTrunc}); 1069 MO.setReg(DstTrunc); 1070 } 1071 1072 void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy, 1073 unsigned OpIdx) { 1074 MachineOperand &MO = MI.getOperand(OpIdx); 1075 Register DstExt = MRI.createGenericVirtualRegister(WideTy); 1076 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 1077 MIRBuilder.buildExtract(MO.getReg(), DstExt, 0); 1078 MO.setReg(DstExt); 1079 } 1080 1081 void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, 1082 unsigned OpIdx) { 1083 MachineOperand &MO = MI.getOperand(OpIdx); 1084 1085 LLT OldTy = MRI.getType(MO.getReg()); 1086 unsigned OldElts = OldTy.getNumElements(); 1087 unsigned NewElts = MoreTy.getNumElements(); 1088 1089 unsigned NumParts = NewElts / OldElts; 1090 1091 // Use concat_vectors if the result is a multiple of the number of elements. 1092 if (NumParts * OldElts == NewElts) { 1093 SmallVector<Register, 8> Parts; 1094 Parts.push_back(MO.getReg()); 1095 1096 Register ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0); 1097 for (unsigned I = 1; I != NumParts; ++I) 1098 Parts.push_back(ImpDef); 1099 1100 auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts); 1101 MO.setReg(Concat.getReg(0)); 1102 return; 1103 } 1104 1105 Register MoreReg = MRI.createGenericVirtualRegister(MoreTy); 1106 Register ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0); 1107 MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0); 1108 MO.setReg(MoreReg); 1109 } 1110 1111 LegalizerHelper::LegalizeResult 1112 LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, 1113 LLT WideTy) { 1114 if (TypeIdx != 1) 1115 return UnableToLegalize; 1116 1117 Register DstReg = MI.getOperand(0).getReg(); 1118 LLT DstTy = MRI.getType(DstReg); 1119 if (DstTy.isVector()) 1120 return UnableToLegalize; 1121 1122 Register Src1 = MI.getOperand(1).getReg(); 1123 LLT SrcTy = MRI.getType(Src1); 1124 const int DstSize = DstTy.getSizeInBits(); 1125 const int SrcSize = SrcTy.getSizeInBits(); 1126 const int WideSize = WideTy.getSizeInBits(); 1127 const int NumMerge = (DstSize + WideSize - 1) / WideSize; 1128 1129 unsigned NumOps = MI.getNumOperands(); 1130 unsigned NumSrc = MI.getNumOperands() - 1; 1131 unsigned PartSize = DstTy.getSizeInBits() / NumSrc; 1132 1133 if (WideSize >= DstSize) { 1134 // Directly pack the bits in the target type. 1135 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0); 1136 1137 for (unsigned I = 2; I != NumOps; ++I) { 1138 const unsigned Offset = (I - 1) * PartSize; 1139 1140 Register SrcReg = MI.getOperand(I).getReg(); 1141 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize)); 1142 1143 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg); 1144 1145 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg : 1146 MRI.createGenericVirtualRegister(WideTy); 1147 1148 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset); 1149 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt); 1150 MIRBuilder.buildOr(NextResult, ResultReg, Shl); 1151 ResultReg = NextResult; 1152 } 1153 1154 if (WideSize > DstSize) 1155 MIRBuilder.buildTrunc(DstReg, ResultReg); 1156 else if (DstTy.isPointer()) 1157 MIRBuilder.buildIntToPtr(DstReg, ResultReg); 1158 1159 MI.eraseFromParent(); 1160 return Legalized; 1161 } 1162 1163 // Unmerge the original values to the GCD type, and recombine to the next 1164 // multiple greater than the original type. 1165 // 1166 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6 1167 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0 1168 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1 1169 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2 1170 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6 1171 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9 1172 // %12:_(s12) = G_MERGE_VALUES %10, %11 1173 // 1174 // Padding with undef if necessary: 1175 // 1176 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6 1177 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0 1178 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1 1179 // %7:_(s2) = G_IMPLICIT_DEF 1180 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5 1181 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7 1182 // %10:_(s12) = G_MERGE_VALUES %8, %9 1183 1184 const int GCD = greatestCommonDivisor(SrcSize, WideSize); 1185 LLT GCDTy = LLT::scalar(GCD); 1186 1187 SmallVector<Register, 8> Parts; 1188 SmallVector<Register, 8> NewMergeRegs; 1189 SmallVector<Register, 8> Unmerges; 1190 LLT WideDstTy = LLT::scalar(NumMerge * WideSize); 1191 1192 // Decompose the original operands if they don't evenly divide. 1193 for (int I = 1, E = MI.getNumOperands(); I != E; ++I) { 1194 Register SrcReg = MI.getOperand(I).getReg(); 1195 if (GCD == SrcSize) { 1196 Unmerges.push_back(SrcReg); 1197 } else { 1198 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg); 1199 for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J) 1200 Unmerges.push_back(Unmerge.getReg(J)); 1201 } 1202 } 1203 1204 // Pad with undef to the next size that is a multiple of the requested size. 1205 if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) { 1206 Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0); 1207 for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I) 1208 Unmerges.push_back(UndefReg); 1209 } 1210 1211 const int PartsPerGCD = WideSize / GCD; 1212 1213 // Build merges of each piece. 1214 ArrayRef<Register> Slicer(Unmerges); 1215 for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) { 1216 auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD)); 1217 NewMergeRegs.push_back(Merge.getReg(0)); 1218 } 1219 1220 // A truncate may be necessary if the requested type doesn't evenly divide the 1221 // original result type. 1222 if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) { 1223 MIRBuilder.buildMerge(DstReg, NewMergeRegs); 1224 } else { 1225 auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs); 1226 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0)); 1227 } 1228 1229 MI.eraseFromParent(); 1230 return Legalized; 1231 } 1232 1233 LegalizerHelper::LegalizeResult 1234 LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx, 1235 LLT WideTy) { 1236 if (TypeIdx != 0) 1237 return UnableToLegalize; 1238 1239 unsigned NumDst = MI.getNumOperands() - 1; 1240 Register SrcReg = MI.getOperand(NumDst).getReg(); 1241 LLT SrcTy = MRI.getType(SrcReg); 1242 if (!SrcTy.isScalar()) 1243 return UnableToLegalize; 1244 1245 Register Dst0Reg = MI.getOperand(0).getReg(); 1246 LLT DstTy = MRI.getType(Dst0Reg); 1247 if (!DstTy.isScalar()) 1248 return UnableToLegalize; 1249 1250 unsigned NewSrcSize = NumDst * WideTy.getSizeInBits(); 1251 LLT NewSrcTy = LLT::scalar(NewSrcSize); 1252 unsigned SizeDiff = WideTy.getSizeInBits() - DstTy.getSizeInBits(); 1253 1254 auto WideSrc = MIRBuilder.buildZExt(NewSrcTy, SrcReg); 1255 1256 for (unsigned I = 1; I != NumDst; ++I) { 1257 auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, SizeDiff * I); 1258 auto Shl = MIRBuilder.buildShl(NewSrcTy, WideSrc, ShiftAmt); 1259 WideSrc = MIRBuilder.buildOr(NewSrcTy, WideSrc, Shl); 1260 } 1261 1262 Observer.changingInstr(MI); 1263 1264 MI.getOperand(NumDst).setReg(WideSrc->getOperand(0).getReg()); 1265 for (unsigned I = 0; I != NumDst; ++I) 1266 widenScalarDst(MI, WideTy, I); 1267 1268 Observer.changedInstr(MI); 1269 1270 return Legalized; 1271 } 1272 1273 LegalizerHelper::LegalizeResult 1274 LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx, 1275 LLT WideTy) { 1276 Register DstReg = MI.getOperand(0).getReg(); 1277 Register SrcReg = MI.getOperand(1).getReg(); 1278 LLT SrcTy = MRI.getType(SrcReg); 1279 1280 LLT DstTy = MRI.getType(DstReg); 1281 unsigned Offset = MI.getOperand(2).getImm(); 1282 1283 if (TypeIdx == 0) { 1284 if (SrcTy.isVector() || DstTy.isVector()) 1285 return UnableToLegalize; 1286 1287 SrcOp Src(SrcReg); 1288 if (SrcTy.isPointer()) { 1289 // Extracts from pointers can be handled only if they are really just 1290 // simple integers. 1291 const DataLayout &DL = MIRBuilder.getDataLayout(); 1292 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) 1293 return UnableToLegalize; 1294 1295 LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits()); 1296 Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src); 1297 SrcTy = SrcAsIntTy; 1298 } 1299 1300 if (DstTy.isPointer()) 1301 return UnableToLegalize; 1302 1303 if (Offset == 0) { 1304 // Avoid a shift in the degenerate case. 1305 MIRBuilder.buildTrunc(DstReg, 1306 MIRBuilder.buildAnyExtOrTrunc(WideTy, Src)); 1307 MI.eraseFromParent(); 1308 return Legalized; 1309 } 1310 1311 // Do a shift in the source type. 1312 LLT ShiftTy = SrcTy; 1313 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) { 1314 Src = MIRBuilder.buildAnyExt(WideTy, Src); 1315 ShiftTy = WideTy; 1316 } else if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) 1317 return UnableToLegalize; 1318 1319 auto LShr = MIRBuilder.buildLShr( 1320 ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset)); 1321 MIRBuilder.buildTrunc(DstReg, LShr); 1322 MI.eraseFromParent(); 1323 return Legalized; 1324 } 1325 1326 if (SrcTy.isScalar()) { 1327 Observer.changingInstr(MI); 1328 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1329 Observer.changedInstr(MI); 1330 return Legalized; 1331 } 1332 1333 if (!SrcTy.isVector()) 1334 return UnableToLegalize; 1335 1336 if (DstTy != SrcTy.getElementType()) 1337 return UnableToLegalize; 1338 1339 if (Offset % SrcTy.getScalarSizeInBits() != 0) 1340 return UnableToLegalize; 1341 1342 Observer.changingInstr(MI); 1343 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1344 1345 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) * 1346 Offset); 1347 widenScalarDst(MI, WideTy.getScalarType(), 0); 1348 Observer.changedInstr(MI); 1349 return Legalized; 1350 } 1351 1352 LegalizerHelper::LegalizeResult 1353 LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx, 1354 LLT WideTy) { 1355 if (TypeIdx != 0) 1356 return UnableToLegalize; 1357 Observer.changingInstr(MI); 1358 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1359 widenScalarDst(MI, WideTy); 1360 Observer.changedInstr(MI); 1361 return Legalized; 1362 } 1363 1364 LegalizerHelper::LegalizeResult 1365 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { 1366 MIRBuilder.setInstr(MI); 1367 1368 switch (MI.getOpcode()) { 1369 default: 1370 return UnableToLegalize; 1371 case TargetOpcode::G_EXTRACT: 1372 return widenScalarExtract(MI, TypeIdx, WideTy); 1373 case TargetOpcode::G_INSERT: 1374 return widenScalarInsert(MI, TypeIdx, WideTy); 1375 case TargetOpcode::G_MERGE_VALUES: 1376 return widenScalarMergeValues(MI, TypeIdx, WideTy); 1377 case TargetOpcode::G_UNMERGE_VALUES: 1378 return widenScalarUnmergeValues(MI, TypeIdx, WideTy); 1379 case TargetOpcode::G_UADDO: 1380 case TargetOpcode::G_USUBO: { 1381 if (TypeIdx == 1) 1382 return UnableToLegalize; // TODO 1383 auto LHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy}, 1384 {MI.getOperand(2).getReg()}); 1385 auto RHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy}, 1386 {MI.getOperand(3).getReg()}); 1387 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO 1388 ? TargetOpcode::G_ADD 1389 : TargetOpcode::G_SUB; 1390 // Do the arithmetic in the larger type. 1391 auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext}); 1392 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg()); 1393 APInt Mask = APInt::getAllOnesValue(OrigTy.getSizeInBits()); 1394 auto AndOp = MIRBuilder.buildInstr( 1395 TargetOpcode::G_AND, {WideTy}, 1396 {NewOp, MIRBuilder.buildConstant(WideTy, Mask.getZExtValue())}); 1397 // There is no overflow if the AndOp is the same as NewOp. 1398 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1).getReg(), NewOp, 1399 AndOp); 1400 // Now trunc the NewOp to the original result. 1401 MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), NewOp); 1402 MI.eraseFromParent(); 1403 return Legalized; 1404 } 1405 case TargetOpcode::G_CTTZ: 1406 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 1407 case TargetOpcode::G_CTLZ: 1408 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 1409 case TargetOpcode::G_CTPOP: { 1410 if (TypeIdx == 0) { 1411 Observer.changingInstr(MI); 1412 widenScalarDst(MI, WideTy, 0); 1413 Observer.changedInstr(MI); 1414 return Legalized; 1415 } 1416 1417 Register SrcReg = MI.getOperand(1).getReg(); 1418 1419 // First ZEXT the input. 1420 auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg); 1421 LLT CurTy = MRI.getType(SrcReg); 1422 if (MI.getOpcode() == TargetOpcode::G_CTTZ) { 1423 // The count is the same in the larger type except if the original 1424 // value was zero. This can be handled by setting the bit just off 1425 // the top of the original type. 1426 auto TopBit = 1427 APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits()); 1428 MIBSrc = MIRBuilder.buildOr( 1429 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit)); 1430 } 1431 1432 // Perform the operation at the larger size. 1433 auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc}); 1434 // This is already the correct result for CTPOP and CTTZs 1435 if (MI.getOpcode() == TargetOpcode::G_CTLZ || 1436 MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) { 1437 // The correct result is NewOp - (Difference in widety and current ty). 1438 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits(); 1439 MIBNewOp = MIRBuilder.buildInstr( 1440 TargetOpcode::G_SUB, {WideTy}, 1441 {MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)}); 1442 } 1443 1444 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp); 1445 MI.eraseFromParent(); 1446 return Legalized; 1447 } 1448 case TargetOpcode::G_BSWAP: { 1449 Observer.changingInstr(MI); 1450 Register DstReg = MI.getOperand(0).getReg(); 1451 1452 Register ShrReg = MRI.createGenericVirtualRegister(WideTy); 1453 Register DstExt = MRI.createGenericVirtualRegister(WideTy); 1454 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy); 1455 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1456 1457 MI.getOperand(0).setReg(DstExt); 1458 1459 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 1460 1461 LLT Ty = MRI.getType(DstReg); 1462 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits(); 1463 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits); 1464 MIRBuilder.buildInstr(TargetOpcode::G_LSHR) 1465 .addDef(ShrReg) 1466 .addUse(DstExt) 1467 .addUse(ShiftAmtReg); 1468 1469 MIRBuilder.buildTrunc(DstReg, ShrReg); 1470 Observer.changedInstr(MI); 1471 return Legalized; 1472 } 1473 case TargetOpcode::G_BITREVERSE: { 1474 Observer.changingInstr(MI); 1475 1476 Register DstReg = MI.getOperand(0).getReg(); 1477 LLT Ty = MRI.getType(DstReg); 1478 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits(); 1479 1480 Register DstExt = MRI.createGenericVirtualRegister(WideTy); 1481 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1482 MI.getOperand(0).setReg(DstExt); 1483 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 1484 1485 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits); 1486 auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt); 1487 MIRBuilder.buildTrunc(DstReg, Shift); 1488 Observer.changedInstr(MI); 1489 return Legalized; 1490 } 1491 case TargetOpcode::G_ADD: 1492 case TargetOpcode::G_AND: 1493 case TargetOpcode::G_MUL: 1494 case TargetOpcode::G_OR: 1495 case TargetOpcode::G_XOR: 1496 case TargetOpcode::G_SUB: 1497 // Perform operation at larger width (any extension is fines here, high bits 1498 // don't affect the result) and then truncate the result back to the 1499 // original type. 1500 Observer.changingInstr(MI); 1501 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1502 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); 1503 widenScalarDst(MI, WideTy); 1504 Observer.changedInstr(MI); 1505 return Legalized; 1506 1507 case TargetOpcode::G_SHL: 1508 Observer.changingInstr(MI); 1509 1510 if (TypeIdx == 0) { 1511 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1512 widenScalarDst(MI, WideTy); 1513 } else { 1514 assert(TypeIdx == 1); 1515 // The "number of bits to shift" operand must preserve its value as an 1516 // unsigned integer: 1517 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 1518 } 1519 1520 Observer.changedInstr(MI); 1521 return Legalized; 1522 1523 case TargetOpcode::G_SDIV: 1524 case TargetOpcode::G_SREM: 1525 case TargetOpcode::G_SMIN: 1526 case TargetOpcode::G_SMAX: 1527 Observer.changingInstr(MI); 1528 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); 1529 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 1530 widenScalarDst(MI, WideTy); 1531 Observer.changedInstr(MI); 1532 return Legalized; 1533 1534 case TargetOpcode::G_ASHR: 1535 case TargetOpcode::G_LSHR: 1536 Observer.changingInstr(MI); 1537 1538 if (TypeIdx == 0) { 1539 unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ? 1540 TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT; 1541 1542 widenScalarSrc(MI, WideTy, 1, CvtOp); 1543 widenScalarDst(MI, WideTy); 1544 } else { 1545 assert(TypeIdx == 1); 1546 // The "number of bits to shift" operand must preserve its value as an 1547 // unsigned integer: 1548 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 1549 } 1550 1551 Observer.changedInstr(MI); 1552 return Legalized; 1553 case TargetOpcode::G_UDIV: 1554 case TargetOpcode::G_UREM: 1555 case TargetOpcode::G_UMIN: 1556 case TargetOpcode::G_UMAX: 1557 Observer.changingInstr(MI); 1558 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 1559 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 1560 widenScalarDst(MI, WideTy); 1561 Observer.changedInstr(MI); 1562 return Legalized; 1563 1564 case TargetOpcode::G_SELECT: 1565 Observer.changingInstr(MI); 1566 if (TypeIdx == 0) { 1567 // Perform operation at larger width (any extension is fine here, high 1568 // bits don't affect the result) and then truncate the result back to the 1569 // original type. 1570 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); 1571 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT); 1572 widenScalarDst(MI, WideTy); 1573 } else { 1574 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector(); 1575 // Explicit extension is required here since high bits affect the result. 1576 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false)); 1577 } 1578 Observer.changedInstr(MI); 1579 return Legalized; 1580 1581 case TargetOpcode::G_FPTOSI: 1582 case TargetOpcode::G_FPTOUI: 1583 if (TypeIdx != 0) 1584 return UnableToLegalize; 1585 Observer.changingInstr(MI); 1586 widenScalarDst(MI, WideTy); 1587 Observer.changedInstr(MI); 1588 return Legalized; 1589 1590 case TargetOpcode::G_SITOFP: 1591 if (TypeIdx != 1) 1592 return UnableToLegalize; 1593 Observer.changingInstr(MI); 1594 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); 1595 Observer.changedInstr(MI); 1596 return Legalized; 1597 1598 case TargetOpcode::G_UITOFP: 1599 if (TypeIdx != 1) 1600 return UnableToLegalize; 1601 Observer.changingInstr(MI); 1602 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 1603 Observer.changedInstr(MI); 1604 return Legalized; 1605 1606 case TargetOpcode::G_LOAD: 1607 case TargetOpcode::G_SEXTLOAD: 1608 case TargetOpcode::G_ZEXTLOAD: 1609 Observer.changingInstr(MI); 1610 widenScalarDst(MI, WideTy); 1611 Observer.changedInstr(MI); 1612 return Legalized; 1613 1614 case TargetOpcode::G_STORE: { 1615 if (TypeIdx != 0) 1616 return UnableToLegalize; 1617 1618 LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 1619 if (!isPowerOf2_32(Ty.getSizeInBits())) 1620 return UnableToLegalize; 1621 1622 Observer.changingInstr(MI); 1623 1624 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ? 1625 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT; 1626 widenScalarSrc(MI, WideTy, 0, ExtType); 1627 1628 Observer.changedInstr(MI); 1629 return Legalized; 1630 } 1631 case TargetOpcode::G_CONSTANT: { 1632 MachineOperand &SrcMO = MI.getOperand(1); 1633 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 1634 const APInt &Val = SrcMO.getCImm()->getValue().sext(WideTy.getSizeInBits()); 1635 Observer.changingInstr(MI); 1636 SrcMO.setCImm(ConstantInt::get(Ctx, Val)); 1637 1638 widenScalarDst(MI, WideTy); 1639 Observer.changedInstr(MI); 1640 return Legalized; 1641 } 1642 case TargetOpcode::G_FCONSTANT: { 1643 MachineOperand &SrcMO = MI.getOperand(1); 1644 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 1645 APFloat Val = SrcMO.getFPImm()->getValueAPF(); 1646 bool LosesInfo; 1647 switch (WideTy.getSizeInBits()) { 1648 case 32: 1649 Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, 1650 &LosesInfo); 1651 break; 1652 case 64: 1653 Val.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, 1654 &LosesInfo); 1655 break; 1656 default: 1657 return UnableToLegalize; 1658 } 1659 1660 assert(!LosesInfo && "extend should always be lossless"); 1661 1662 Observer.changingInstr(MI); 1663 SrcMO.setFPImm(ConstantFP::get(Ctx, Val)); 1664 1665 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); 1666 Observer.changedInstr(MI); 1667 return Legalized; 1668 } 1669 case TargetOpcode::G_IMPLICIT_DEF: { 1670 Observer.changingInstr(MI); 1671 widenScalarDst(MI, WideTy); 1672 Observer.changedInstr(MI); 1673 return Legalized; 1674 } 1675 case TargetOpcode::G_BRCOND: 1676 Observer.changingInstr(MI); 1677 widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false)); 1678 Observer.changedInstr(MI); 1679 return Legalized; 1680 1681 case TargetOpcode::G_FCMP: 1682 Observer.changingInstr(MI); 1683 if (TypeIdx == 0) 1684 widenScalarDst(MI, WideTy); 1685 else { 1686 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT); 1687 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT); 1688 } 1689 Observer.changedInstr(MI); 1690 return Legalized; 1691 1692 case TargetOpcode::G_ICMP: 1693 Observer.changingInstr(MI); 1694 if (TypeIdx == 0) 1695 widenScalarDst(MI, WideTy); 1696 else { 1697 unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>( 1698 MI.getOperand(1).getPredicate())) 1699 ? TargetOpcode::G_SEXT 1700 : TargetOpcode::G_ZEXT; 1701 widenScalarSrc(MI, WideTy, 2, ExtOpcode); 1702 widenScalarSrc(MI, WideTy, 3, ExtOpcode); 1703 } 1704 Observer.changedInstr(MI); 1705 return Legalized; 1706 1707 case TargetOpcode::G_GEP: 1708 assert(TypeIdx == 1 && "unable to legalize pointer of GEP"); 1709 Observer.changingInstr(MI); 1710 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 1711 Observer.changedInstr(MI); 1712 return Legalized; 1713 1714 case TargetOpcode::G_PHI: { 1715 assert(TypeIdx == 0 && "Expecting only Idx 0"); 1716 1717 Observer.changingInstr(MI); 1718 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) { 1719 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB(); 1720 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); 1721 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT); 1722 } 1723 1724 MachineBasicBlock &MBB = *MI.getParent(); 1725 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI()); 1726 widenScalarDst(MI, WideTy); 1727 Observer.changedInstr(MI); 1728 return Legalized; 1729 } 1730 case TargetOpcode::G_EXTRACT_VECTOR_ELT: { 1731 if (TypeIdx == 0) { 1732 Register VecReg = MI.getOperand(1).getReg(); 1733 LLT VecTy = MRI.getType(VecReg); 1734 Observer.changingInstr(MI); 1735 1736 widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(), 1737 WideTy.getSizeInBits()), 1738 1, TargetOpcode::G_SEXT); 1739 1740 widenScalarDst(MI, WideTy, 0); 1741 Observer.changedInstr(MI); 1742 return Legalized; 1743 } 1744 1745 if (TypeIdx != 2) 1746 return UnableToLegalize; 1747 Observer.changingInstr(MI); 1748 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 1749 Observer.changedInstr(MI); 1750 return Legalized; 1751 } 1752 case TargetOpcode::G_FADD: 1753 case TargetOpcode::G_FMUL: 1754 case TargetOpcode::G_FSUB: 1755 case TargetOpcode::G_FMA: 1756 case TargetOpcode::G_FMAD: 1757 case TargetOpcode::G_FNEG: 1758 case TargetOpcode::G_FABS: 1759 case TargetOpcode::G_FCANONICALIZE: 1760 case TargetOpcode::G_FMINNUM: 1761 case TargetOpcode::G_FMAXNUM: 1762 case TargetOpcode::G_FMINNUM_IEEE: 1763 case TargetOpcode::G_FMAXNUM_IEEE: 1764 case TargetOpcode::G_FMINIMUM: 1765 case TargetOpcode::G_FMAXIMUM: 1766 case TargetOpcode::G_FDIV: 1767 case TargetOpcode::G_FREM: 1768 case TargetOpcode::G_FCEIL: 1769 case TargetOpcode::G_FFLOOR: 1770 case TargetOpcode::G_FCOS: 1771 case TargetOpcode::G_FSIN: 1772 case TargetOpcode::G_FLOG10: 1773 case TargetOpcode::G_FLOG: 1774 case TargetOpcode::G_FLOG2: 1775 case TargetOpcode::G_FRINT: 1776 case TargetOpcode::G_FNEARBYINT: 1777 case TargetOpcode::G_FSQRT: 1778 case TargetOpcode::G_FEXP: 1779 case TargetOpcode::G_FEXP2: 1780 case TargetOpcode::G_FPOW: 1781 case TargetOpcode::G_INTRINSIC_TRUNC: 1782 case TargetOpcode::G_INTRINSIC_ROUND: 1783 assert(TypeIdx == 0); 1784 Observer.changingInstr(MI); 1785 1786 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) 1787 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT); 1788 1789 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); 1790 Observer.changedInstr(MI); 1791 return Legalized; 1792 case TargetOpcode::G_INTTOPTR: 1793 if (TypeIdx != 1) 1794 return UnableToLegalize; 1795 1796 Observer.changingInstr(MI); 1797 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 1798 Observer.changedInstr(MI); 1799 return Legalized; 1800 case TargetOpcode::G_PTRTOINT: 1801 if (TypeIdx != 0) 1802 return UnableToLegalize; 1803 1804 Observer.changingInstr(MI); 1805 widenScalarDst(MI, WideTy, 0); 1806 Observer.changedInstr(MI); 1807 return Legalized; 1808 case TargetOpcode::G_BUILD_VECTOR: { 1809 Observer.changingInstr(MI); 1810 1811 const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType(); 1812 for (int I = 1, E = MI.getNumOperands(); I != E; ++I) 1813 widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT); 1814 1815 // Avoid changing the result vector type if the source element type was 1816 // requested. 1817 if (TypeIdx == 1) { 1818 auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); 1819 MI.setDesc(TII.get(TargetOpcode::G_BUILD_VECTOR_TRUNC)); 1820 } else { 1821 widenScalarDst(MI, WideTy, 0); 1822 } 1823 1824 Observer.changedInstr(MI); 1825 return Legalized; 1826 } 1827 case TargetOpcode::G_SEXT_INREG: 1828 if (TypeIdx != 0) 1829 return UnableToLegalize; 1830 1831 Observer.changingInstr(MI); 1832 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1833 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC); 1834 Observer.changedInstr(MI); 1835 return Legalized; 1836 } 1837 } 1838 1839 LegalizerHelper::LegalizeResult 1840 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { 1841 using namespace TargetOpcode; 1842 MIRBuilder.setInstr(MI); 1843 1844 switch(MI.getOpcode()) { 1845 default: 1846 return UnableToLegalize; 1847 case TargetOpcode::G_SREM: 1848 case TargetOpcode::G_UREM: { 1849 Register QuotReg = MRI.createGenericVirtualRegister(Ty); 1850 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV) 1851 .addDef(QuotReg) 1852 .addUse(MI.getOperand(1).getReg()) 1853 .addUse(MI.getOperand(2).getReg()); 1854 1855 Register ProdReg = MRI.createGenericVirtualRegister(Ty); 1856 MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg()); 1857 MIRBuilder.buildSub(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(), 1858 ProdReg); 1859 MI.eraseFromParent(); 1860 return Legalized; 1861 } 1862 case TargetOpcode::G_SMULO: 1863 case TargetOpcode::G_UMULO: { 1864 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the 1865 // result. 1866 Register Res = MI.getOperand(0).getReg(); 1867 Register Overflow = MI.getOperand(1).getReg(); 1868 Register LHS = MI.getOperand(2).getReg(); 1869 Register RHS = MI.getOperand(3).getReg(); 1870 1871 MIRBuilder.buildMul(Res, LHS, RHS); 1872 1873 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO 1874 ? TargetOpcode::G_SMULH 1875 : TargetOpcode::G_UMULH; 1876 1877 Register HiPart = MRI.createGenericVirtualRegister(Ty); 1878 MIRBuilder.buildInstr(Opcode) 1879 .addDef(HiPart) 1880 .addUse(LHS) 1881 .addUse(RHS); 1882 1883 Register Zero = MRI.createGenericVirtualRegister(Ty); 1884 MIRBuilder.buildConstant(Zero, 0); 1885 1886 // For *signed* multiply, overflow is detected by checking: 1887 // (hi != (lo >> bitwidth-1)) 1888 if (Opcode == TargetOpcode::G_SMULH) { 1889 Register Shifted = MRI.createGenericVirtualRegister(Ty); 1890 Register ShiftAmt = MRI.createGenericVirtualRegister(Ty); 1891 MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1); 1892 MIRBuilder.buildInstr(TargetOpcode::G_ASHR) 1893 .addDef(Shifted) 1894 .addUse(Res) 1895 .addUse(ShiftAmt); 1896 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted); 1897 } else { 1898 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero); 1899 } 1900 MI.eraseFromParent(); 1901 return Legalized; 1902 } 1903 case TargetOpcode::G_FNEG: { 1904 // TODO: Handle vector types once we are able to 1905 // represent them. 1906 if (Ty.isVector()) 1907 return UnableToLegalize; 1908 Register Res = MI.getOperand(0).getReg(); 1909 Type *ZeroTy; 1910 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 1911 switch (Ty.getSizeInBits()) { 1912 case 16: 1913 ZeroTy = Type::getHalfTy(Ctx); 1914 break; 1915 case 32: 1916 ZeroTy = Type::getFloatTy(Ctx); 1917 break; 1918 case 64: 1919 ZeroTy = Type::getDoubleTy(Ctx); 1920 break; 1921 case 128: 1922 ZeroTy = Type::getFP128Ty(Ctx); 1923 break; 1924 default: 1925 llvm_unreachable("unexpected floating-point type"); 1926 } 1927 ConstantFP &ZeroForNegation = 1928 *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy)); 1929 auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation); 1930 Register SubByReg = MI.getOperand(1).getReg(); 1931 Register ZeroReg = Zero->getOperand(0).getReg(); 1932 MIRBuilder.buildInstr(TargetOpcode::G_FSUB, {Res}, {ZeroReg, SubByReg}, 1933 MI.getFlags()); 1934 MI.eraseFromParent(); 1935 return Legalized; 1936 } 1937 case TargetOpcode::G_FSUB: { 1938 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)). 1939 // First, check if G_FNEG is marked as Lower. If so, we may 1940 // end up with an infinite loop as G_FSUB is used to legalize G_FNEG. 1941 if (LI.getAction({G_FNEG, {Ty}}).Action == Lower) 1942 return UnableToLegalize; 1943 Register Res = MI.getOperand(0).getReg(); 1944 Register LHS = MI.getOperand(1).getReg(); 1945 Register RHS = MI.getOperand(2).getReg(); 1946 Register Neg = MRI.createGenericVirtualRegister(Ty); 1947 MIRBuilder.buildInstr(TargetOpcode::G_FNEG).addDef(Neg).addUse(RHS); 1948 MIRBuilder.buildInstr(TargetOpcode::G_FADD, {Res}, {LHS, Neg}, MI.getFlags()); 1949 MI.eraseFromParent(); 1950 return Legalized; 1951 } 1952 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: { 1953 Register OldValRes = MI.getOperand(0).getReg(); 1954 Register SuccessRes = MI.getOperand(1).getReg(); 1955 Register Addr = MI.getOperand(2).getReg(); 1956 Register CmpVal = MI.getOperand(3).getReg(); 1957 Register NewVal = MI.getOperand(4).getReg(); 1958 MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal, 1959 **MI.memoperands_begin()); 1960 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal); 1961 MI.eraseFromParent(); 1962 return Legalized; 1963 } 1964 case TargetOpcode::G_LOAD: 1965 case TargetOpcode::G_SEXTLOAD: 1966 case TargetOpcode::G_ZEXTLOAD: { 1967 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT 1968 Register DstReg = MI.getOperand(0).getReg(); 1969 Register PtrReg = MI.getOperand(1).getReg(); 1970 LLT DstTy = MRI.getType(DstReg); 1971 auto &MMO = **MI.memoperands_begin(); 1972 1973 if (DstTy.getSizeInBits() == MMO.getSizeInBits()) { 1974 if (MI.getOpcode() == TargetOpcode::G_LOAD) { 1975 // This load needs splitting into power of 2 sized loads. 1976 if (DstTy.isVector()) 1977 return UnableToLegalize; 1978 if (isPowerOf2_32(DstTy.getSizeInBits())) 1979 return UnableToLegalize; // Don't know what we're being asked to do. 1980 1981 // Our strategy here is to generate anyextending loads for the smaller 1982 // types up to next power-2 result type, and then combine the two larger 1983 // result values together, before truncating back down to the non-pow-2 1984 // type. 1985 // E.g. v1 = i24 load => 1986 // v2 = i32 load (2 byte) 1987 // v3 = i32 load (1 byte) 1988 // v4 = i32 shl v3, 16 1989 // v5 = i32 or v4, v2 1990 // v1 = i24 trunc v5 1991 // By doing this we generate the correct truncate which should get 1992 // combined away as an artifact with a matching extend. 1993 uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits()); 1994 uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize; 1995 1996 MachineFunction &MF = MIRBuilder.getMF(); 1997 MachineMemOperand *LargeMMO = 1998 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); 1999 MachineMemOperand *SmallMMO = MF.getMachineMemOperand( 2000 &MMO, LargeSplitSize / 8, SmallSplitSize / 8); 2001 2002 LLT PtrTy = MRI.getType(PtrReg); 2003 unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits()); 2004 LLT AnyExtTy = LLT::scalar(AnyExtSize); 2005 Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy); 2006 Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy); 2007 auto LargeLoad = 2008 MIRBuilder.buildLoad(LargeLdReg, PtrReg, *LargeMMO); 2009 2010 auto OffsetCst = 2011 MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8); 2012 Register GEPReg = MRI.createGenericVirtualRegister(PtrTy); 2013 auto SmallPtr = MIRBuilder.buildGEP(GEPReg, PtrReg, OffsetCst.getReg(0)); 2014 auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0), 2015 *SmallMMO); 2016 2017 auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize); 2018 auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt); 2019 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad); 2020 MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)}); 2021 MI.eraseFromParent(); 2022 return Legalized; 2023 } 2024 MIRBuilder.buildLoad(DstReg, PtrReg, MMO); 2025 MI.eraseFromParent(); 2026 return Legalized; 2027 } 2028 2029 if (DstTy.isScalar()) { 2030 Register TmpReg = 2031 MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits())); 2032 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); 2033 switch (MI.getOpcode()) { 2034 default: 2035 llvm_unreachable("Unexpected opcode"); 2036 case TargetOpcode::G_LOAD: 2037 MIRBuilder.buildAnyExt(DstReg, TmpReg); 2038 break; 2039 case TargetOpcode::G_SEXTLOAD: 2040 MIRBuilder.buildSExt(DstReg, TmpReg); 2041 break; 2042 case TargetOpcode::G_ZEXTLOAD: 2043 MIRBuilder.buildZExt(DstReg, TmpReg); 2044 break; 2045 } 2046 MI.eraseFromParent(); 2047 return Legalized; 2048 } 2049 2050 return UnableToLegalize; 2051 } 2052 case TargetOpcode::G_STORE: { 2053 // Lower a non-power of 2 store into multiple pow-2 stores. 2054 // E.g. split an i24 store into an i16 store + i8 store. 2055 // We do this by first extending the stored value to the next largest power 2056 // of 2 type, and then using truncating stores to store the components. 2057 // By doing this, likewise with G_LOAD, generate an extend that can be 2058 // artifact-combined away instead of leaving behind extracts. 2059 Register SrcReg = MI.getOperand(0).getReg(); 2060 Register PtrReg = MI.getOperand(1).getReg(); 2061 LLT SrcTy = MRI.getType(SrcReg); 2062 MachineMemOperand &MMO = **MI.memoperands_begin(); 2063 if (SrcTy.getSizeInBits() != MMO.getSizeInBits()) 2064 return UnableToLegalize; 2065 if (SrcTy.isVector()) 2066 return UnableToLegalize; 2067 if (isPowerOf2_32(SrcTy.getSizeInBits())) 2068 return UnableToLegalize; // Don't know what we're being asked to do. 2069 2070 // Extend to the next pow-2. 2071 const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits())); 2072 auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg); 2073 2074 // Obtain the smaller value by shifting away the larger value. 2075 uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits()); 2076 uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize; 2077 auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize); 2078 auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt); 2079 2080 // Generate the GEP and truncating stores. 2081 LLT PtrTy = MRI.getType(PtrReg); 2082 auto OffsetCst = 2083 MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8); 2084 Register GEPReg = MRI.createGenericVirtualRegister(PtrTy); 2085 auto SmallPtr = MIRBuilder.buildGEP(GEPReg, PtrReg, OffsetCst.getReg(0)); 2086 2087 MachineFunction &MF = MIRBuilder.getMF(); 2088 MachineMemOperand *LargeMMO = 2089 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); 2090 MachineMemOperand *SmallMMO = 2091 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8); 2092 MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO); 2093 MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO); 2094 MI.eraseFromParent(); 2095 return Legalized; 2096 } 2097 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 2098 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 2099 case TargetOpcode::G_CTLZ: 2100 case TargetOpcode::G_CTTZ: 2101 case TargetOpcode::G_CTPOP: 2102 return lowerBitCount(MI, TypeIdx, Ty); 2103 case G_UADDO: { 2104 Register Res = MI.getOperand(0).getReg(); 2105 Register CarryOut = MI.getOperand(1).getReg(); 2106 Register LHS = MI.getOperand(2).getReg(); 2107 Register RHS = MI.getOperand(3).getReg(); 2108 2109 MIRBuilder.buildAdd(Res, LHS, RHS); 2110 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS); 2111 2112 MI.eraseFromParent(); 2113 return Legalized; 2114 } 2115 case G_UADDE: { 2116 Register Res = MI.getOperand(0).getReg(); 2117 Register CarryOut = MI.getOperand(1).getReg(); 2118 Register LHS = MI.getOperand(2).getReg(); 2119 Register RHS = MI.getOperand(3).getReg(); 2120 Register CarryIn = MI.getOperand(4).getReg(); 2121 2122 Register TmpRes = MRI.createGenericVirtualRegister(Ty); 2123 Register ZExtCarryIn = MRI.createGenericVirtualRegister(Ty); 2124 2125 MIRBuilder.buildAdd(TmpRes, LHS, RHS); 2126 MIRBuilder.buildZExt(ZExtCarryIn, CarryIn); 2127 MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn); 2128 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS); 2129 2130 MI.eraseFromParent(); 2131 return Legalized; 2132 } 2133 case G_USUBO: { 2134 Register Res = MI.getOperand(0).getReg(); 2135 Register BorrowOut = MI.getOperand(1).getReg(); 2136 Register LHS = MI.getOperand(2).getReg(); 2137 Register RHS = MI.getOperand(3).getReg(); 2138 2139 MIRBuilder.buildSub(Res, LHS, RHS); 2140 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS); 2141 2142 MI.eraseFromParent(); 2143 return Legalized; 2144 } 2145 case G_USUBE: { 2146 Register Res = MI.getOperand(0).getReg(); 2147 Register BorrowOut = MI.getOperand(1).getReg(); 2148 Register LHS = MI.getOperand(2).getReg(); 2149 Register RHS = MI.getOperand(3).getReg(); 2150 Register BorrowIn = MI.getOperand(4).getReg(); 2151 2152 Register TmpRes = MRI.createGenericVirtualRegister(Ty); 2153 Register ZExtBorrowIn = MRI.createGenericVirtualRegister(Ty); 2154 Register LHS_EQ_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1)); 2155 Register LHS_ULT_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1)); 2156 2157 MIRBuilder.buildSub(TmpRes, LHS, RHS); 2158 MIRBuilder.buildZExt(ZExtBorrowIn, BorrowIn); 2159 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn); 2160 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LHS_EQ_RHS, LHS, RHS); 2161 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, LHS_ULT_RHS, LHS, RHS); 2162 MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS); 2163 2164 MI.eraseFromParent(); 2165 return Legalized; 2166 } 2167 case G_UITOFP: 2168 return lowerUITOFP(MI, TypeIdx, Ty); 2169 case G_SITOFP: 2170 return lowerSITOFP(MI, TypeIdx, Ty); 2171 case G_FPTOUI: 2172 return lowerFPTOUI(MI, TypeIdx, Ty); 2173 case G_SMIN: 2174 case G_SMAX: 2175 case G_UMIN: 2176 case G_UMAX: 2177 return lowerMinMax(MI, TypeIdx, Ty); 2178 case G_FCOPYSIGN: 2179 return lowerFCopySign(MI, TypeIdx, Ty); 2180 case G_FMINNUM: 2181 case G_FMAXNUM: 2182 return lowerFMinNumMaxNum(MI); 2183 case G_UNMERGE_VALUES: 2184 return lowerUnmergeValues(MI); 2185 case TargetOpcode::G_SEXT_INREG: { 2186 assert(MI.getOperand(2).isImm() && "Expected immediate"); 2187 int64_t SizeInBits = MI.getOperand(2).getImm(); 2188 2189 Register DstReg = MI.getOperand(0).getReg(); 2190 Register SrcReg = MI.getOperand(1).getReg(); 2191 LLT DstTy = MRI.getType(DstReg); 2192 Register TmpRes = MRI.createGenericVirtualRegister(DstTy); 2193 2194 auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits); 2195 MIRBuilder.buildInstr(TargetOpcode::G_SHL, {TmpRes}, {SrcReg, MIBSz->getOperand(0).getReg()}); 2196 MIRBuilder.buildInstr(TargetOpcode::G_ASHR, {DstReg}, {TmpRes, MIBSz->getOperand(0).getReg()}); 2197 MI.eraseFromParent(); 2198 return Legalized; 2199 } 2200 case G_SHUFFLE_VECTOR: 2201 return lowerShuffleVector(MI); 2202 case G_DYN_STACKALLOC: 2203 return lowerDynStackAlloc(MI); 2204 } 2205 } 2206 2207 LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef( 2208 MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { 2209 SmallVector<Register, 2> DstRegs; 2210 2211 unsigned NarrowSize = NarrowTy.getSizeInBits(); 2212 Register DstReg = MI.getOperand(0).getReg(); 2213 unsigned Size = MRI.getType(DstReg).getSizeInBits(); 2214 int NumParts = Size / NarrowSize; 2215 // FIXME: Don't know how to handle the situation where the small vectors 2216 // aren't all the same size yet. 2217 if (Size % NarrowSize != 0) 2218 return UnableToLegalize; 2219 2220 for (int i = 0; i < NumParts; ++i) { 2221 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy); 2222 MIRBuilder.buildUndef(TmpReg); 2223 DstRegs.push_back(TmpReg); 2224 } 2225 2226 if (NarrowTy.isVector()) 2227 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 2228 else 2229 MIRBuilder.buildBuildVector(DstReg, DstRegs); 2230 2231 MI.eraseFromParent(); 2232 return Legalized; 2233 } 2234 2235 LegalizerHelper::LegalizeResult 2236 LegalizerHelper::fewerElementsVectorBasic(MachineInstr &MI, unsigned TypeIdx, 2237 LLT NarrowTy) { 2238 const unsigned Opc = MI.getOpcode(); 2239 const unsigned NumOps = MI.getNumOperands() - 1; 2240 const unsigned NarrowSize = NarrowTy.getSizeInBits(); 2241 const Register DstReg = MI.getOperand(0).getReg(); 2242 const unsigned Flags = MI.getFlags(); 2243 const LLT DstTy = MRI.getType(DstReg); 2244 const unsigned Size = DstTy.getSizeInBits(); 2245 const int NumParts = Size / NarrowSize; 2246 const LLT EltTy = DstTy.getElementType(); 2247 const unsigned EltSize = EltTy.getSizeInBits(); 2248 const unsigned BitsForNumParts = NarrowSize * NumParts; 2249 2250 // Check if we have any leftovers. If we do, then only handle the case where 2251 // the leftover is one element. 2252 if (BitsForNumParts != Size && BitsForNumParts + EltSize != Size) 2253 return UnableToLegalize; 2254 2255 if (BitsForNumParts != Size) { 2256 Register AccumDstReg = MRI.createGenericVirtualRegister(DstTy); 2257 MIRBuilder.buildUndef(AccumDstReg); 2258 2259 // Handle the pieces which evenly divide into the requested type with 2260 // extract/op/insert sequence. 2261 for (unsigned Offset = 0; Offset < BitsForNumParts; Offset += NarrowSize) { 2262 SmallVector<SrcOp, 4> SrcOps; 2263 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { 2264 Register PartOpReg = MRI.createGenericVirtualRegister(NarrowTy); 2265 MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(), Offset); 2266 SrcOps.push_back(PartOpReg); 2267 } 2268 2269 Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy); 2270 MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags); 2271 2272 Register PartInsertReg = MRI.createGenericVirtualRegister(DstTy); 2273 MIRBuilder.buildInsert(PartInsertReg, AccumDstReg, PartDstReg, Offset); 2274 AccumDstReg = PartInsertReg; 2275 } 2276 2277 // Handle the remaining element sized leftover piece. 2278 SmallVector<SrcOp, 4> SrcOps; 2279 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { 2280 Register PartOpReg = MRI.createGenericVirtualRegister(EltTy); 2281 MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(), 2282 BitsForNumParts); 2283 SrcOps.push_back(PartOpReg); 2284 } 2285 2286 Register PartDstReg = MRI.createGenericVirtualRegister(EltTy); 2287 MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags); 2288 MIRBuilder.buildInsert(DstReg, AccumDstReg, PartDstReg, BitsForNumParts); 2289 MI.eraseFromParent(); 2290 2291 return Legalized; 2292 } 2293 2294 SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs; 2295 2296 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src0Regs); 2297 2298 if (NumOps >= 2) 2299 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src1Regs); 2300 2301 if (NumOps >= 3) 2302 extractParts(MI.getOperand(3).getReg(), NarrowTy, NumParts, Src2Regs); 2303 2304 for (int i = 0; i < NumParts; ++i) { 2305 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy); 2306 2307 if (NumOps == 1) 2308 MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i]}, Flags); 2309 else if (NumOps == 2) { 2310 MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i], Src1Regs[i]}, Flags); 2311 } else if (NumOps == 3) { 2312 MIRBuilder.buildInstr(Opc, {DstReg}, 2313 {Src0Regs[i], Src1Regs[i], Src2Regs[i]}, Flags); 2314 } 2315 2316 DstRegs.push_back(DstReg); 2317 } 2318 2319 if (NarrowTy.isVector()) 2320 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 2321 else 2322 MIRBuilder.buildBuildVector(DstReg, DstRegs); 2323 2324 MI.eraseFromParent(); 2325 return Legalized; 2326 } 2327 2328 // Handle splitting vector operations which need to have the same number of 2329 // elements in each type index, but each type index may have a different element 2330 // type. 2331 // 2332 // e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> -> 2333 // <2 x s64> = G_SHL <2 x s64>, <2 x s32> 2334 // <2 x s64> = G_SHL <2 x s64>, <2 x s32> 2335 // 2336 // Also handles some irregular breakdown cases, e.g. 2337 // e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> -> 2338 // <2 x s64> = G_SHL <2 x s64>, <2 x s32> 2339 // s64 = G_SHL s64, s32 2340 LegalizerHelper::LegalizeResult 2341 LegalizerHelper::fewerElementsVectorMultiEltType( 2342 MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) { 2343 if (TypeIdx != 0) 2344 return UnableToLegalize; 2345 2346 const LLT NarrowTy0 = NarrowTyArg; 2347 const unsigned NewNumElts = 2348 NarrowTy0.isVector() ? NarrowTy0.getNumElements() : 1; 2349 2350 const Register DstReg = MI.getOperand(0).getReg(); 2351 LLT DstTy = MRI.getType(DstReg); 2352 LLT LeftoverTy0; 2353 2354 // All of the operands need to have the same number of elements, so if we can 2355 // determine a type breakdown for the result type, we can for all of the 2356 // source types. 2357 int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0).first; 2358 if (NumParts < 0) 2359 return UnableToLegalize; 2360 2361 SmallVector<MachineInstrBuilder, 4> NewInsts; 2362 2363 SmallVector<Register, 4> DstRegs, LeftoverDstRegs; 2364 SmallVector<Register, 4> PartRegs, LeftoverRegs; 2365 2366 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { 2367 LLT LeftoverTy; 2368 Register SrcReg = MI.getOperand(I).getReg(); 2369 LLT SrcTyI = MRI.getType(SrcReg); 2370 LLT NarrowTyI = LLT::scalarOrVector(NewNumElts, SrcTyI.getScalarType()); 2371 LLT LeftoverTyI; 2372 2373 // Split this operand into the requested typed registers, and any leftover 2374 // required to reproduce the original type. 2375 if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs, 2376 LeftoverRegs)) 2377 return UnableToLegalize; 2378 2379 if (I == 1) { 2380 // For the first operand, create an instruction for each part and setup 2381 // the result. 2382 for (Register PartReg : PartRegs) { 2383 Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0); 2384 NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode()) 2385 .addDef(PartDstReg) 2386 .addUse(PartReg)); 2387 DstRegs.push_back(PartDstReg); 2388 } 2389 2390 for (Register LeftoverReg : LeftoverRegs) { 2391 Register PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0); 2392 NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode()) 2393 .addDef(PartDstReg) 2394 .addUse(LeftoverReg)); 2395 LeftoverDstRegs.push_back(PartDstReg); 2396 } 2397 } else { 2398 assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size()); 2399 2400 // Add the newly created operand splits to the existing instructions. The 2401 // odd-sized pieces are ordered after the requested NarrowTyArg sized 2402 // pieces. 2403 unsigned InstCount = 0; 2404 for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J) 2405 NewInsts[InstCount++].addUse(PartRegs[J]); 2406 for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J) 2407 NewInsts[InstCount++].addUse(LeftoverRegs[J]); 2408 } 2409 2410 PartRegs.clear(); 2411 LeftoverRegs.clear(); 2412 } 2413 2414 // Insert the newly built operations and rebuild the result register. 2415 for (auto &MIB : NewInsts) 2416 MIRBuilder.insertInstr(MIB); 2417 2418 insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs); 2419 2420 MI.eraseFromParent(); 2421 return Legalized; 2422 } 2423 2424 LegalizerHelper::LegalizeResult 2425 LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx, 2426 LLT NarrowTy) { 2427 if (TypeIdx != 0) 2428 return UnableToLegalize; 2429 2430 Register DstReg = MI.getOperand(0).getReg(); 2431 Register SrcReg = MI.getOperand(1).getReg(); 2432 LLT DstTy = MRI.getType(DstReg); 2433 LLT SrcTy = MRI.getType(SrcReg); 2434 2435 LLT NarrowTy0 = NarrowTy; 2436 LLT NarrowTy1; 2437 unsigned NumParts; 2438 2439 if (NarrowTy.isVector()) { 2440 // Uneven breakdown not handled. 2441 NumParts = DstTy.getNumElements() / NarrowTy.getNumElements(); 2442 if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements()) 2443 return UnableToLegalize; 2444 2445 NarrowTy1 = LLT::vector(NumParts, SrcTy.getElementType().getSizeInBits()); 2446 } else { 2447 NumParts = DstTy.getNumElements(); 2448 NarrowTy1 = SrcTy.getElementType(); 2449 } 2450 2451 SmallVector<Register, 4> SrcRegs, DstRegs; 2452 extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs); 2453 2454 for (unsigned I = 0; I < NumParts; ++I) { 2455 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0); 2456 MachineInstr *NewInst = MIRBuilder.buildInstr(MI.getOpcode()) 2457 .addDef(DstReg) 2458 .addUse(SrcRegs[I]); 2459 2460 NewInst->setFlags(MI.getFlags()); 2461 DstRegs.push_back(DstReg); 2462 } 2463 2464 if (NarrowTy.isVector()) 2465 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 2466 else 2467 MIRBuilder.buildBuildVector(DstReg, DstRegs); 2468 2469 MI.eraseFromParent(); 2470 return Legalized; 2471 } 2472 2473 LegalizerHelper::LegalizeResult 2474 LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx, 2475 LLT NarrowTy) { 2476 Register DstReg = MI.getOperand(0).getReg(); 2477 Register Src0Reg = MI.getOperand(2).getReg(); 2478 LLT DstTy = MRI.getType(DstReg); 2479 LLT SrcTy = MRI.getType(Src0Reg); 2480 2481 unsigned NumParts; 2482 LLT NarrowTy0, NarrowTy1; 2483 2484 if (TypeIdx == 0) { 2485 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; 2486 unsigned OldElts = DstTy.getNumElements(); 2487 2488 NarrowTy0 = NarrowTy; 2489 NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements(); 2490 NarrowTy1 = NarrowTy.isVector() ? 2491 LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) : 2492 SrcTy.getElementType(); 2493 2494 } else { 2495 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; 2496 unsigned OldElts = SrcTy.getNumElements(); 2497 2498 NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : 2499 NarrowTy.getNumElements(); 2500 NarrowTy0 = LLT::vector(NarrowTy.getNumElements(), 2501 DstTy.getScalarSizeInBits()); 2502 NarrowTy1 = NarrowTy; 2503 } 2504 2505 // FIXME: Don't know how to handle the situation where the small vectors 2506 // aren't all the same size yet. 2507 if (NarrowTy1.isVector() && 2508 NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements()) 2509 return UnableToLegalize; 2510 2511 CmpInst::Predicate Pred 2512 = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); 2513 2514 SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs; 2515 extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs); 2516 extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs); 2517 2518 for (unsigned I = 0; I < NumParts; ++I) { 2519 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0); 2520 DstRegs.push_back(DstReg); 2521 2522 if (MI.getOpcode() == TargetOpcode::G_ICMP) 2523 MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]); 2524 else { 2525 MachineInstr *NewCmp 2526 = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]); 2527 NewCmp->setFlags(MI.getFlags()); 2528 } 2529 } 2530 2531 if (NarrowTy1.isVector()) 2532 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 2533 else 2534 MIRBuilder.buildBuildVector(DstReg, DstRegs); 2535 2536 MI.eraseFromParent(); 2537 return Legalized; 2538 } 2539 2540 LegalizerHelper::LegalizeResult 2541 LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx, 2542 LLT NarrowTy) { 2543 Register DstReg = MI.getOperand(0).getReg(); 2544 Register CondReg = MI.getOperand(1).getReg(); 2545 2546 unsigned NumParts = 0; 2547 LLT NarrowTy0, NarrowTy1; 2548 2549 LLT DstTy = MRI.getType(DstReg); 2550 LLT CondTy = MRI.getType(CondReg); 2551 unsigned Size = DstTy.getSizeInBits(); 2552 2553 assert(TypeIdx == 0 || CondTy.isVector()); 2554 2555 if (TypeIdx == 0) { 2556 NarrowTy0 = NarrowTy; 2557 NarrowTy1 = CondTy; 2558 2559 unsigned NarrowSize = NarrowTy0.getSizeInBits(); 2560 // FIXME: Don't know how to handle the situation where the small vectors 2561 // aren't all the same size yet. 2562 if (Size % NarrowSize != 0) 2563 return UnableToLegalize; 2564 2565 NumParts = Size / NarrowSize; 2566 2567 // Need to break down the condition type 2568 if (CondTy.isVector()) { 2569 if (CondTy.getNumElements() == NumParts) 2570 NarrowTy1 = CondTy.getElementType(); 2571 else 2572 NarrowTy1 = LLT::vector(CondTy.getNumElements() / NumParts, 2573 CondTy.getScalarSizeInBits()); 2574 } 2575 } else { 2576 NumParts = CondTy.getNumElements(); 2577 if (NarrowTy.isVector()) { 2578 // TODO: Handle uneven breakdown. 2579 if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements()) 2580 return UnableToLegalize; 2581 2582 return UnableToLegalize; 2583 } else { 2584 NarrowTy0 = DstTy.getElementType(); 2585 NarrowTy1 = NarrowTy; 2586 } 2587 } 2588 2589 SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs; 2590 if (CondTy.isVector()) 2591 extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs); 2592 2593 extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs); 2594 extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs); 2595 2596 for (unsigned i = 0; i < NumParts; ++i) { 2597 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0); 2598 MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg, 2599 Src1Regs[i], Src2Regs[i]); 2600 DstRegs.push_back(DstReg); 2601 } 2602 2603 if (NarrowTy0.isVector()) 2604 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 2605 else 2606 MIRBuilder.buildBuildVector(DstReg, DstRegs); 2607 2608 MI.eraseFromParent(); 2609 return Legalized; 2610 } 2611 2612 LegalizerHelper::LegalizeResult 2613 LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, 2614 LLT NarrowTy) { 2615 const Register DstReg = MI.getOperand(0).getReg(); 2616 LLT PhiTy = MRI.getType(DstReg); 2617 LLT LeftoverTy; 2618 2619 // All of the operands need to have the same number of elements, so if we can 2620 // determine a type breakdown for the result type, we can for all of the 2621 // source types. 2622 int NumParts, NumLeftover; 2623 std::tie(NumParts, NumLeftover) 2624 = getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy); 2625 if (NumParts < 0) 2626 return UnableToLegalize; 2627 2628 SmallVector<Register, 4> DstRegs, LeftoverDstRegs; 2629 SmallVector<MachineInstrBuilder, 4> NewInsts; 2630 2631 const int TotalNumParts = NumParts + NumLeftover; 2632 2633 // Insert the new phis in the result block first. 2634 for (int I = 0; I != TotalNumParts; ++I) { 2635 LLT Ty = I < NumParts ? NarrowTy : LeftoverTy; 2636 Register PartDstReg = MRI.createGenericVirtualRegister(Ty); 2637 NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI) 2638 .addDef(PartDstReg)); 2639 if (I < NumParts) 2640 DstRegs.push_back(PartDstReg); 2641 else 2642 LeftoverDstRegs.push_back(PartDstReg); 2643 } 2644 2645 MachineBasicBlock *MBB = MI.getParent(); 2646 MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI()); 2647 insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs); 2648 2649 SmallVector<Register, 4> PartRegs, LeftoverRegs; 2650 2651 // Insert code to extract the incoming values in each predecessor block. 2652 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { 2653 PartRegs.clear(); 2654 LeftoverRegs.clear(); 2655 2656 Register SrcReg = MI.getOperand(I).getReg(); 2657 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB(); 2658 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); 2659 2660 LLT Unused; 2661 if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs, 2662 LeftoverRegs)) 2663 return UnableToLegalize; 2664 2665 // Add the newly created operand splits to the existing instructions. The 2666 // odd-sized pieces are ordered after the requested NarrowTyArg sized 2667 // pieces. 2668 for (int J = 0; J != TotalNumParts; ++J) { 2669 MachineInstrBuilder MIB = NewInsts[J]; 2670 MIB.addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]); 2671 MIB.addMBB(&OpMBB); 2672 } 2673 } 2674 2675 MI.eraseFromParent(); 2676 return Legalized; 2677 } 2678 2679 LegalizerHelper::LegalizeResult 2680 LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI, 2681 unsigned TypeIdx, 2682 LLT NarrowTy) { 2683 if (TypeIdx != 1) 2684 return UnableToLegalize; 2685 2686 const int NumDst = MI.getNumOperands() - 1; 2687 const Register SrcReg = MI.getOperand(NumDst).getReg(); 2688 LLT SrcTy = MRI.getType(SrcReg); 2689 2690 LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 2691 2692 // TODO: Create sequence of extracts. 2693 if (DstTy == NarrowTy) 2694 return UnableToLegalize; 2695 2696 LLT GCDTy = getGCDType(SrcTy, NarrowTy); 2697 if (DstTy == GCDTy) { 2698 // This would just be a copy of the same unmerge. 2699 // TODO: Create extracts, pad with undef and create intermediate merges. 2700 return UnableToLegalize; 2701 } 2702 2703 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg); 2704 const int NumUnmerge = Unmerge->getNumOperands() - 1; 2705 const int PartsPerUnmerge = NumDst / NumUnmerge; 2706 2707 for (int I = 0; I != NumUnmerge; ++I) { 2708 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES); 2709 2710 for (int J = 0; J != PartsPerUnmerge; ++J) 2711 MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg()); 2712 MIB.addUse(Unmerge.getReg(I)); 2713 } 2714 2715 MI.eraseFromParent(); 2716 return Legalized; 2717 } 2718 2719 LegalizerHelper::LegalizeResult 2720 LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, 2721 LLT NarrowTy) { 2722 // FIXME: Don't know how to handle secondary types yet. 2723 if (TypeIdx != 0) 2724 return UnableToLegalize; 2725 2726 MachineMemOperand *MMO = *MI.memoperands_begin(); 2727 2728 // This implementation doesn't work for atomics. Give up instead of doing 2729 // something invalid. 2730 if (MMO->getOrdering() != AtomicOrdering::NotAtomic || 2731 MMO->getFailureOrdering() != AtomicOrdering::NotAtomic) 2732 return UnableToLegalize; 2733 2734 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD; 2735 Register ValReg = MI.getOperand(0).getReg(); 2736 Register AddrReg = MI.getOperand(1).getReg(); 2737 LLT ValTy = MRI.getType(ValReg); 2738 2739 int NumParts = -1; 2740 int NumLeftover = -1; 2741 LLT LeftoverTy; 2742 SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs; 2743 if (IsLoad) { 2744 std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy); 2745 } else { 2746 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs, 2747 NarrowLeftoverRegs)) { 2748 NumParts = NarrowRegs.size(); 2749 NumLeftover = NarrowLeftoverRegs.size(); 2750 } 2751 } 2752 2753 if (NumParts == -1) 2754 return UnableToLegalize; 2755 2756 const LLT OffsetTy = LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits()); 2757 2758 unsigned TotalSize = ValTy.getSizeInBits(); 2759 2760 // Split the load/store into PartTy sized pieces starting at Offset. If this 2761 // is a load, return the new registers in ValRegs. For a store, each elements 2762 // of ValRegs should be PartTy. Returns the next offset that needs to be 2763 // handled. 2764 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs, 2765 unsigned Offset) -> unsigned { 2766 MachineFunction &MF = MIRBuilder.getMF(); 2767 unsigned PartSize = PartTy.getSizeInBits(); 2768 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize; 2769 Offset += PartSize, ++Idx) { 2770 unsigned ByteSize = PartSize / 8; 2771 unsigned ByteOffset = Offset / 8; 2772 Register NewAddrReg; 2773 2774 MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, ByteOffset); 2775 2776 MachineMemOperand *NewMMO = 2777 MF.getMachineMemOperand(MMO, ByteOffset, ByteSize); 2778 2779 if (IsLoad) { 2780 Register Dst = MRI.createGenericVirtualRegister(PartTy); 2781 ValRegs.push_back(Dst); 2782 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO); 2783 } else { 2784 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO); 2785 } 2786 } 2787 2788 return Offset; 2789 }; 2790 2791 unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0); 2792 2793 // Handle the rest of the register if this isn't an even type breakdown. 2794 if (LeftoverTy.isValid()) 2795 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset); 2796 2797 if (IsLoad) { 2798 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs, 2799 LeftoverTy, NarrowLeftoverRegs); 2800 } 2801 2802 MI.eraseFromParent(); 2803 return Legalized; 2804 } 2805 2806 LegalizerHelper::LegalizeResult 2807 LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, 2808 LLT NarrowTy) { 2809 using namespace TargetOpcode; 2810 2811 MIRBuilder.setInstr(MI); 2812 switch (MI.getOpcode()) { 2813 case G_IMPLICIT_DEF: 2814 return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy); 2815 case G_AND: 2816 case G_OR: 2817 case G_XOR: 2818 case G_ADD: 2819 case G_SUB: 2820 case G_MUL: 2821 case G_SMULH: 2822 case G_UMULH: 2823 case G_FADD: 2824 case G_FMUL: 2825 case G_FSUB: 2826 case G_FNEG: 2827 case G_FABS: 2828 case G_FCANONICALIZE: 2829 case G_FDIV: 2830 case G_FREM: 2831 case G_FMA: 2832 case G_FMAD: 2833 case G_FPOW: 2834 case G_FEXP: 2835 case G_FEXP2: 2836 case G_FLOG: 2837 case G_FLOG2: 2838 case G_FLOG10: 2839 case G_FNEARBYINT: 2840 case G_FCEIL: 2841 case G_FFLOOR: 2842 case G_FRINT: 2843 case G_INTRINSIC_ROUND: 2844 case G_INTRINSIC_TRUNC: 2845 case G_FCOS: 2846 case G_FSIN: 2847 case G_FSQRT: 2848 case G_BSWAP: 2849 case G_BITREVERSE: 2850 case G_SDIV: 2851 case G_SMIN: 2852 case G_SMAX: 2853 case G_UMIN: 2854 case G_UMAX: 2855 case G_FMINNUM: 2856 case G_FMAXNUM: 2857 case G_FMINNUM_IEEE: 2858 case G_FMAXNUM_IEEE: 2859 case G_FMINIMUM: 2860 case G_FMAXIMUM: 2861 return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy); 2862 case G_SHL: 2863 case G_LSHR: 2864 case G_ASHR: 2865 case G_CTLZ: 2866 case G_CTLZ_ZERO_UNDEF: 2867 case G_CTTZ: 2868 case G_CTTZ_ZERO_UNDEF: 2869 case G_CTPOP: 2870 case G_FCOPYSIGN: 2871 return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy); 2872 case G_ZEXT: 2873 case G_SEXT: 2874 case G_ANYEXT: 2875 case G_FPEXT: 2876 case G_FPTRUNC: 2877 case G_SITOFP: 2878 case G_UITOFP: 2879 case G_FPTOSI: 2880 case G_FPTOUI: 2881 case G_INTTOPTR: 2882 case G_PTRTOINT: 2883 case G_ADDRSPACE_CAST: 2884 return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy); 2885 case G_ICMP: 2886 case G_FCMP: 2887 return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy); 2888 case G_SELECT: 2889 return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy); 2890 case G_PHI: 2891 return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy); 2892 case G_UNMERGE_VALUES: 2893 return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy); 2894 case G_LOAD: 2895 case G_STORE: 2896 return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy); 2897 default: 2898 return UnableToLegalize; 2899 } 2900 } 2901 2902 LegalizerHelper::LegalizeResult 2903 LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, 2904 const LLT HalfTy, const LLT AmtTy) { 2905 2906 Register InL = MRI.createGenericVirtualRegister(HalfTy); 2907 Register InH = MRI.createGenericVirtualRegister(HalfTy); 2908 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg()); 2909 2910 if (Amt.isNullValue()) { 2911 MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {InL, InH}); 2912 MI.eraseFromParent(); 2913 return Legalized; 2914 } 2915 2916 LLT NVT = HalfTy; 2917 unsigned NVTBits = HalfTy.getSizeInBits(); 2918 unsigned VTBits = 2 * NVTBits; 2919 2920 SrcOp Lo(Register(0)), Hi(Register(0)); 2921 if (MI.getOpcode() == TargetOpcode::G_SHL) { 2922 if (Amt.ugt(VTBits)) { 2923 Lo = Hi = MIRBuilder.buildConstant(NVT, 0); 2924 } else if (Amt.ugt(NVTBits)) { 2925 Lo = MIRBuilder.buildConstant(NVT, 0); 2926 Hi = MIRBuilder.buildShl(NVT, InL, 2927 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits)); 2928 } else if (Amt == NVTBits) { 2929 Lo = MIRBuilder.buildConstant(NVT, 0); 2930 Hi = InL; 2931 } else { 2932 Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt)); 2933 auto OrLHS = 2934 MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt)); 2935 auto OrRHS = MIRBuilder.buildLShr( 2936 NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits)); 2937 Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS); 2938 } 2939 } else if (MI.getOpcode() == TargetOpcode::G_LSHR) { 2940 if (Amt.ugt(VTBits)) { 2941 Lo = Hi = MIRBuilder.buildConstant(NVT, 0); 2942 } else if (Amt.ugt(NVTBits)) { 2943 Lo = MIRBuilder.buildLShr(NVT, InH, 2944 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits)); 2945 Hi = MIRBuilder.buildConstant(NVT, 0); 2946 } else if (Amt == NVTBits) { 2947 Lo = InH; 2948 Hi = MIRBuilder.buildConstant(NVT, 0); 2949 } else { 2950 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt); 2951 2952 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst); 2953 auto OrRHS = MIRBuilder.buildShl( 2954 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits)); 2955 2956 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS); 2957 Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst); 2958 } 2959 } else { 2960 if (Amt.ugt(VTBits)) { 2961 Hi = Lo = MIRBuilder.buildAShr( 2962 NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1)); 2963 } else if (Amt.ugt(NVTBits)) { 2964 Lo = MIRBuilder.buildAShr(NVT, InH, 2965 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits)); 2966 Hi = MIRBuilder.buildAShr(NVT, InH, 2967 MIRBuilder.buildConstant(AmtTy, NVTBits - 1)); 2968 } else if (Amt == NVTBits) { 2969 Lo = InH; 2970 Hi = MIRBuilder.buildAShr(NVT, InH, 2971 MIRBuilder.buildConstant(AmtTy, NVTBits - 1)); 2972 } else { 2973 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt); 2974 2975 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst); 2976 auto OrRHS = MIRBuilder.buildShl( 2977 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits)); 2978 2979 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS); 2980 Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst); 2981 } 2982 } 2983 2984 MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {Lo.getReg(), Hi.getReg()}); 2985 MI.eraseFromParent(); 2986 2987 return Legalized; 2988 } 2989 2990 // TODO: Optimize if constant shift amount. 2991 LegalizerHelper::LegalizeResult 2992 LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, 2993 LLT RequestedTy) { 2994 if (TypeIdx == 1) { 2995 Observer.changingInstr(MI); 2996 narrowScalarSrc(MI, RequestedTy, 2); 2997 Observer.changedInstr(MI); 2998 return Legalized; 2999 } 3000 3001 Register DstReg = MI.getOperand(0).getReg(); 3002 LLT DstTy = MRI.getType(DstReg); 3003 if (DstTy.isVector()) 3004 return UnableToLegalize; 3005 3006 Register Amt = MI.getOperand(2).getReg(); 3007 LLT ShiftAmtTy = MRI.getType(Amt); 3008 const unsigned DstEltSize = DstTy.getScalarSizeInBits(); 3009 if (DstEltSize % 2 != 0) 3010 return UnableToLegalize; 3011 3012 // Ignore the input type. We can only go to exactly half the size of the 3013 // input. If that isn't small enough, the resulting pieces will be further 3014 // legalized. 3015 const unsigned NewBitSize = DstEltSize / 2; 3016 const LLT HalfTy = LLT::scalar(NewBitSize); 3017 const LLT CondTy = LLT::scalar(1); 3018 3019 if (const MachineInstr *KShiftAmt = 3020 getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) { 3021 return narrowScalarShiftByConstant( 3022 MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy); 3023 } 3024 3025 // TODO: Expand with known bits. 3026 3027 // Handle the fully general expansion by an unknown amount. 3028 auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize); 3029 3030 Register InL = MRI.createGenericVirtualRegister(HalfTy); 3031 Register InH = MRI.createGenericVirtualRegister(HalfTy); 3032 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg()); 3033 3034 auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits); 3035 auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt); 3036 3037 auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0); 3038 auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits); 3039 auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero); 3040 3041 Register ResultRegs[2]; 3042 switch (MI.getOpcode()) { 3043 case TargetOpcode::G_SHL: { 3044 // Short: ShAmt < NewBitSize 3045 auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt); 3046 3047 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack); 3048 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt); 3049 auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr); 3050 3051 // Long: ShAmt >= NewBitSize 3052 auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero. 3053 auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part. 3054 3055 auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL); 3056 auto Hi = MIRBuilder.buildSelect( 3057 HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL)); 3058 3059 ResultRegs[0] = Lo.getReg(0); 3060 ResultRegs[1] = Hi.getReg(0); 3061 break; 3062 } 3063 case TargetOpcode::G_LSHR: 3064 case TargetOpcode::G_ASHR: { 3065 // Short: ShAmt < NewBitSize 3066 auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt}); 3067 3068 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt); 3069 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack); 3070 auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr); 3071 3072 // Long: ShAmt >= NewBitSize 3073 MachineInstrBuilder HiL; 3074 if (MI.getOpcode() == TargetOpcode::G_LSHR) { 3075 HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero. 3076 } else { 3077 auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1); 3078 HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part. 3079 } 3080 auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, 3081 {InH, AmtExcess}); // Lo from Hi part. 3082 3083 auto Lo = MIRBuilder.buildSelect( 3084 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL)); 3085 3086 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL); 3087 3088 ResultRegs[0] = Lo.getReg(0); 3089 ResultRegs[1] = Hi.getReg(0); 3090 break; 3091 } 3092 default: 3093 llvm_unreachable("not a shift"); 3094 } 3095 3096 MIRBuilder.buildMerge(DstReg, ResultRegs); 3097 MI.eraseFromParent(); 3098 return Legalized; 3099 } 3100 3101 LegalizerHelper::LegalizeResult 3102 LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, 3103 LLT MoreTy) { 3104 assert(TypeIdx == 0 && "Expecting only Idx 0"); 3105 3106 Observer.changingInstr(MI); 3107 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { 3108 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB(); 3109 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); 3110 moreElementsVectorSrc(MI, MoreTy, I); 3111 } 3112 3113 MachineBasicBlock &MBB = *MI.getParent(); 3114 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI()); 3115 moreElementsVectorDst(MI, MoreTy, 0); 3116 Observer.changedInstr(MI); 3117 return Legalized; 3118 } 3119 3120 LegalizerHelper::LegalizeResult 3121 LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, 3122 LLT MoreTy) { 3123 MIRBuilder.setInstr(MI); 3124 unsigned Opc = MI.getOpcode(); 3125 switch (Opc) { 3126 case TargetOpcode::G_IMPLICIT_DEF: 3127 case TargetOpcode::G_LOAD: { 3128 if (TypeIdx != 0) 3129 return UnableToLegalize; 3130 Observer.changingInstr(MI); 3131 moreElementsVectorDst(MI, MoreTy, 0); 3132 Observer.changedInstr(MI); 3133 return Legalized; 3134 } 3135 case TargetOpcode::G_STORE: 3136 if (TypeIdx != 0) 3137 return UnableToLegalize; 3138 Observer.changingInstr(MI); 3139 moreElementsVectorSrc(MI, MoreTy, 0); 3140 Observer.changedInstr(MI); 3141 return Legalized; 3142 case TargetOpcode::G_AND: 3143 case TargetOpcode::G_OR: 3144 case TargetOpcode::G_XOR: 3145 case TargetOpcode::G_SMIN: 3146 case TargetOpcode::G_SMAX: 3147 case TargetOpcode::G_UMIN: 3148 case TargetOpcode::G_UMAX: { 3149 Observer.changingInstr(MI); 3150 moreElementsVectorSrc(MI, MoreTy, 1); 3151 moreElementsVectorSrc(MI, MoreTy, 2); 3152 moreElementsVectorDst(MI, MoreTy, 0); 3153 Observer.changedInstr(MI); 3154 return Legalized; 3155 } 3156 case TargetOpcode::G_EXTRACT: 3157 if (TypeIdx != 1) 3158 return UnableToLegalize; 3159 Observer.changingInstr(MI); 3160 moreElementsVectorSrc(MI, MoreTy, 1); 3161 Observer.changedInstr(MI); 3162 return Legalized; 3163 case TargetOpcode::G_INSERT: 3164 if (TypeIdx != 0) 3165 return UnableToLegalize; 3166 Observer.changingInstr(MI); 3167 moreElementsVectorSrc(MI, MoreTy, 1); 3168 moreElementsVectorDst(MI, MoreTy, 0); 3169 Observer.changedInstr(MI); 3170 return Legalized; 3171 case TargetOpcode::G_SELECT: 3172 if (TypeIdx != 0) 3173 return UnableToLegalize; 3174 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) 3175 return UnableToLegalize; 3176 3177 Observer.changingInstr(MI); 3178 moreElementsVectorSrc(MI, MoreTy, 2); 3179 moreElementsVectorSrc(MI, MoreTy, 3); 3180 moreElementsVectorDst(MI, MoreTy, 0); 3181 Observer.changedInstr(MI); 3182 return Legalized; 3183 case TargetOpcode::G_UNMERGE_VALUES: { 3184 if (TypeIdx != 1) 3185 return UnableToLegalize; 3186 3187 LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 3188 int NumDst = MI.getNumOperands() - 1; 3189 moreElementsVectorSrc(MI, MoreTy, NumDst); 3190 3191 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES); 3192 for (int I = 0; I != NumDst; ++I) 3193 MIB.addDef(MI.getOperand(I).getReg()); 3194 3195 int NewNumDst = MoreTy.getSizeInBits() / DstTy.getSizeInBits(); 3196 for (int I = NumDst; I != NewNumDst; ++I) 3197 MIB.addDef(MRI.createGenericVirtualRegister(DstTy)); 3198 3199 MIB.addUse(MI.getOperand(NumDst).getReg()); 3200 MI.eraseFromParent(); 3201 return Legalized; 3202 } 3203 case TargetOpcode::G_PHI: 3204 return moreElementsVectorPhi(MI, TypeIdx, MoreTy); 3205 default: 3206 return UnableToLegalize; 3207 } 3208 } 3209 3210 void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs, 3211 ArrayRef<Register> Src1Regs, 3212 ArrayRef<Register> Src2Regs, 3213 LLT NarrowTy) { 3214 MachineIRBuilder &B = MIRBuilder; 3215 unsigned SrcParts = Src1Regs.size(); 3216 unsigned DstParts = DstRegs.size(); 3217 3218 unsigned DstIdx = 0; // Low bits of the result. 3219 Register FactorSum = 3220 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0); 3221 DstRegs[DstIdx] = FactorSum; 3222 3223 unsigned CarrySumPrevDstIdx; 3224 SmallVector<Register, 4> Factors; 3225 3226 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) { 3227 // Collect low parts of muls for DstIdx. 3228 for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1; 3229 i <= std::min(DstIdx, SrcParts - 1); ++i) { 3230 MachineInstrBuilder Mul = 3231 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]); 3232 Factors.push_back(Mul.getReg(0)); 3233 } 3234 // Collect high parts of muls from previous DstIdx. 3235 for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts; 3236 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) { 3237 MachineInstrBuilder Umulh = 3238 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]); 3239 Factors.push_back(Umulh.getReg(0)); 3240 } 3241 // Add CarrySum from additons calculated for previous DstIdx. 3242 if (DstIdx != 1) { 3243 Factors.push_back(CarrySumPrevDstIdx); 3244 } 3245 3246 Register CarrySum; 3247 // Add all factors and accumulate all carries into CarrySum. 3248 if (DstIdx != DstParts - 1) { 3249 MachineInstrBuilder Uaddo = 3250 B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]); 3251 FactorSum = Uaddo.getReg(0); 3252 CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0); 3253 for (unsigned i = 2; i < Factors.size(); ++i) { 3254 MachineInstrBuilder Uaddo = 3255 B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]); 3256 FactorSum = Uaddo.getReg(0); 3257 MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1)); 3258 CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0); 3259 } 3260 } else { 3261 // Since value for the next index is not calculated, neither is CarrySum. 3262 FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0); 3263 for (unsigned i = 2; i < Factors.size(); ++i) 3264 FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0); 3265 } 3266 3267 CarrySumPrevDstIdx = CarrySum; 3268 DstRegs[DstIdx] = FactorSum; 3269 Factors.clear(); 3270 } 3271 } 3272 3273 LegalizerHelper::LegalizeResult 3274 LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) { 3275 Register DstReg = MI.getOperand(0).getReg(); 3276 Register Src1 = MI.getOperand(1).getReg(); 3277 Register Src2 = MI.getOperand(2).getReg(); 3278 3279 LLT Ty = MRI.getType(DstReg); 3280 if (Ty.isVector()) 3281 return UnableToLegalize; 3282 3283 unsigned SrcSize = MRI.getType(Src1).getSizeInBits(); 3284 unsigned DstSize = Ty.getSizeInBits(); 3285 unsigned NarrowSize = NarrowTy.getSizeInBits(); 3286 if (DstSize % NarrowSize != 0 || SrcSize % NarrowSize != 0) 3287 return UnableToLegalize; 3288 3289 unsigned NumDstParts = DstSize / NarrowSize; 3290 unsigned NumSrcParts = SrcSize / NarrowSize; 3291 bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH; 3292 unsigned DstTmpParts = NumDstParts * (IsMulHigh ? 2 : 1); 3293 3294 SmallVector<Register, 2> Src1Parts, Src2Parts, DstTmpRegs; 3295 extractParts(Src1, NarrowTy, NumSrcParts, Src1Parts); 3296 extractParts(Src2, NarrowTy, NumSrcParts, Src2Parts); 3297 DstTmpRegs.resize(DstTmpParts); 3298 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy); 3299 3300 // Take only high half of registers if this is high mul. 3301 ArrayRef<Register> DstRegs( 3302 IsMulHigh ? &DstTmpRegs[DstTmpParts / 2] : &DstTmpRegs[0], NumDstParts); 3303 MIRBuilder.buildMerge(DstReg, DstRegs); 3304 MI.eraseFromParent(); 3305 return Legalized; 3306 } 3307 3308 LegalizerHelper::LegalizeResult 3309 LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, 3310 LLT NarrowTy) { 3311 if (TypeIdx != 1) 3312 return UnableToLegalize; 3313 3314 uint64_t NarrowSize = NarrowTy.getSizeInBits(); 3315 3316 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 3317 // FIXME: add support for when SizeOp1 isn't an exact multiple of 3318 // NarrowSize. 3319 if (SizeOp1 % NarrowSize != 0) 3320 return UnableToLegalize; 3321 int NumParts = SizeOp1 / NarrowSize; 3322 3323 SmallVector<Register, 2> SrcRegs, DstRegs; 3324 SmallVector<uint64_t, 2> Indexes; 3325 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); 3326 3327 Register OpReg = MI.getOperand(0).getReg(); 3328 uint64_t OpStart = MI.getOperand(2).getImm(); 3329 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); 3330 for (int i = 0; i < NumParts; ++i) { 3331 unsigned SrcStart = i * NarrowSize; 3332 3333 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) { 3334 // No part of the extract uses this subregister, ignore it. 3335 continue; 3336 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) { 3337 // The entire subregister is extracted, forward the value. 3338 DstRegs.push_back(SrcRegs[i]); 3339 continue; 3340 } 3341 3342 // OpSegStart is where this destination segment would start in OpReg if it 3343 // extended infinitely in both directions. 3344 int64_t ExtractOffset; 3345 uint64_t SegSize; 3346 if (OpStart < SrcStart) { 3347 ExtractOffset = 0; 3348 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart); 3349 } else { 3350 ExtractOffset = OpStart - SrcStart; 3351 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize); 3352 } 3353 3354 Register SegReg = SrcRegs[i]; 3355 if (ExtractOffset != 0 || SegSize != NarrowSize) { 3356 // A genuine extract is needed. 3357 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); 3358 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset); 3359 } 3360 3361 DstRegs.push_back(SegReg); 3362 } 3363 3364 Register DstReg = MI.getOperand(0).getReg(); 3365 if(MRI.getType(DstReg).isVector()) 3366 MIRBuilder.buildBuildVector(DstReg, DstRegs); 3367 else 3368 MIRBuilder.buildMerge(DstReg, DstRegs); 3369 MI.eraseFromParent(); 3370 return Legalized; 3371 } 3372 3373 LegalizerHelper::LegalizeResult 3374 LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, 3375 LLT NarrowTy) { 3376 // FIXME: Don't know how to handle secondary types yet. 3377 if (TypeIdx != 0) 3378 return UnableToLegalize; 3379 3380 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 3381 uint64_t NarrowSize = NarrowTy.getSizeInBits(); 3382 3383 // FIXME: add support for when SizeOp0 isn't an exact multiple of 3384 // NarrowSize. 3385 if (SizeOp0 % NarrowSize != 0) 3386 return UnableToLegalize; 3387 3388 int NumParts = SizeOp0 / NarrowSize; 3389 3390 SmallVector<Register, 2> SrcRegs, DstRegs; 3391 SmallVector<uint64_t, 2> Indexes; 3392 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); 3393 3394 Register OpReg = MI.getOperand(2).getReg(); 3395 uint64_t OpStart = MI.getOperand(3).getImm(); 3396 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); 3397 for (int i = 0; i < NumParts; ++i) { 3398 unsigned DstStart = i * NarrowSize; 3399 3400 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) { 3401 // No part of the insert affects this subregister, forward the original. 3402 DstRegs.push_back(SrcRegs[i]); 3403 continue; 3404 } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) { 3405 // The entire subregister is defined by this insert, forward the new 3406 // value. 3407 DstRegs.push_back(OpReg); 3408 continue; 3409 } 3410 3411 // OpSegStart is where this destination segment would start in OpReg if it 3412 // extended infinitely in both directions. 3413 int64_t ExtractOffset, InsertOffset; 3414 uint64_t SegSize; 3415 if (OpStart < DstStart) { 3416 InsertOffset = 0; 3417 ExtractOffset = DstStart - OpStart; 3418 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart); 3419 } else { 3420 InsertOffset = OpStart - DstStart; 3421 ExtractOffset = 0; 3422 SegSize = 3423 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart); 3424 } 3425 3426 Register SegReg = OpReg; 3427 if (ExtractOffset != 0 || SegSize != OpSize) { 3428 // A genuine extract is needed. 3429 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); 3430 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset); 3431 } 3432 3433 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy); 3434 MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset); 3435 DstRegs.push_back(DstReg); 3436 } 3437 3438 assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered"); 3439 Register DstReg = MI.getOperand(0).getReg(); 3440 if(MRI.getType(DstReg).isVector()) 3441 MIRBuilder.buildBuildVector(DstReg, DstRegs); 3442 else 3443 MIRBuilder.buildMerge(DstReg, DstRegs); 3444 MI.eraseFromParent(); 3445 return Legalized; 3446 } 3447 3448 LegalizerHelper::LegalizeResult 3449 LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, 3450 LLT NarrowTy) { 3451 Register DstReg = MI.getOperand(0).getReg(); 3452 LLT DstTy = MRI.getType(DstReg); 3453 3454 assert(MI.getNumOperands() == 3 && TypeIdx == 0); 3455 3456 SmallVector<Register, 4> DstRegs, DstLeftoverRegs; 3457 SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs; 3458 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs; 3459 LLT LeftoverTy; 3460 if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy, 3461 Src0Regs, Src0LeftoverRegs)) 3462 return UnableToLegalize; 3463 3464 LLT Unused; 3465 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused, 3466 Src1Regs, Src1LeftoverRegs)) 3467 llvm_unreachable("inconsistent extractParts result"); 3468 3469 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) { 3470 auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, 3471 {Src0Regs[I], Src1Regs[I]}); 3472 DstRegs.push_back(Inst->getOperand(0).getReg()); 3473 } 3474 3475 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) { 3476 auto Inst = MIRBuilder.buildInstr( 3477 MI.getOpcode(), 3478 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]}); 3479 DstLeftoverRegs.push_back(Inst->getOperand(0).getReg()); 3480 } 3481 3482 insertParts(DstReg, DstTy, NarrowTy, DstRegs, 3483 LeftoverTy, DstLeftoverRegs); 3484 3485 MI.eraseFromParent(); 3486 return Legalized; 3487 } 3488 3489 LegalizerHelper::LegalizeResult 3490 LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, 3491 LLT NarrowTy) { 3492 if (TypeIdx != 0) 3493 return UnableToLegalize; 3494 3495 Register CondReg = MI.getOperand(1).getReg(); 3496 LLT CondTy = MRI.getType(CondReg); 3497 if (CondTy.isVector()) // TODO: Handle vselect 3498 return UnableToLegalize; 3499 3500 Register DstReg = MI.getOperand(0).getReg(); 3501 LLT DstTy = MRI.getType(DstReg); 3502 3503 SmallVector<Register, 4> DstRegs, DstLeftoverRegs; 3504 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs; 3505 SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs; 3506 LLT LeftoverTy; 3507 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy, 3508 Src1Regs, Src1LeftoverRegs)) 3509 return UnableToLegalize; 3510 3511 LLT Unused; 3512 if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused, 3513 Src2Regs, Src2LeftoverRegs)) 3514 llvm_unreachable("inconsistent extractParts result"); 3515 3516 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) { 3517 auto Select = MIRBuilder.buildSelect(NarrowTy, 3518 CondReg, Src1Regs[I], Src2Regs[I]); 3519 DstRegs.push_back(Select->getOperand(0).getReg()); 3520 } 3521 3522 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) { 3523 auto Select = MIRBuilder.buildSelect( 3524 LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]); 3525 DstLeftoverRegs.push_back(Select->getOperand(0).getReg()); 3526 } 3527 3528 insertParts(DstReg, DstTy, NarrowTy, DstRegs, 3529 LeftoverTy, DstLeftoverRegs); 3530 3531 MI.eraseFromParent(); 3532 return Legalized; 3533 } 3534 3535 LegalizerHelper::LegalizeResult 3536 LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { 3537 unsigned Opc = MI.getOpcode(); 3538 auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); 3539 auto isSupported = [this](const LegalityQuery &Q) { 3540 auto QAction = LI.getAction(Q).Action; 3541 return QAction == Legal || QAction == Libcall || QAction == Custom; 3542 }; 3543 switch (Opc) { 3544 default: 3545 return UnableToLegalize; 3546 case TargetOpcode::G_CTLZ_ZERO_UNDEF: { 3547 // This trivially expands to CTLZ. 3548 Observer.changingInstr(MI); 3549 MI.setDesc(TII.get(TargetOpcode::G_CTLZ)); 3550 Observer.changedInstr(MI); 3551 return Legalized; 3552 } 3553 case TargetOpcode::G_CTLZ: { 3554 Register SrcReg = MI.getOperand(1).getReg(); 3555 unsigned Len = Ty.getSizeInBits(); 3556 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty, Ty}})) { 3557 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero. 3558 auto MIBCtlzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF, 3559 {Ty}, {SrcReg}); 3560 auto MIBZero = MIRBuilder.buildConstant(Ty, 0); 3561 auto MIBLen = MIRBuilder.buildConstant(Ty, Len); 3562 auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), 3563 SrcReg, MIBZero); 3564 MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen, 3565 MIBCtlzZU); 3566 MI.eraseFromParent(); 3567 return Legalized; 3568 } 3569 // for now, we do this: 3570 // NewLen = NextPowerOf2(Len); 3571 // x = x | (x >> 1); 3572 // x = x | (x >> 2); 3573 // ... 3574 // x = x | (x >>16); 3575 // x = x | (x >>32); // for 64-bit input 3576 // Upto NewLen/2 3577 // return Len - popcount(x); 3578 // 3579 // Ref: "Hacker's Delight" by Henry Warren 3580 Register Op = SrcReg; 3581 unsigned NewLen = PowerOf2Ceil(Len); 3582 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) { 3583 auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i); 3584 auto MIBOp = MIRBuilder.buildInstr( 3585 TargetOpcode::G_OR, {Ty}, 3586 {Op, MIRBuilder.buildInstr(TargetOpcode::G_LSHR, {Ty}, 3587 {Op, MIBShiftAmt})}); 3588 Op = MIBOp->getOperand(0).getReg(); 3589 } 3590 auto MIBPop = MIRBuilder.buildInstr(TargetOpcode::G_CTPOP, {Ty}, {Op}); 3591 MIRBuilder.buildInstr(TargetOpcode::G_SUB, {MI.getOperand(0).getReg()}, 3592 {MIRBuilder.buildConstant(Ty, Len), MIBPop}); 3593 MI.eraseFromParent(); 3594 return Legalized; 3595 } 3596 case TargetOpcode::G_CTTZ_ZERO_UNDEF: { 3597 // This trivially expands to CTTZ. 3598 Observer.changingInstr(MI); 3599 MI.setDesc(TII.get(TargetOpcode::G_CTTZ)); 3600 Observer.changedInstr(MI); 3601 return Legalized; 3602 } 3603 case TargetOpcode::G_CTTZ: { 3604 Register SrcReg = MI.getOperand(1).getReg(); 3605 unsigned Len = Ty.getSizeInBits(); 3606 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty, Ty}})) { 3607 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with 3608 // zero. 3609 auto MIBCttzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF, 3610 {Ty}, {SrcReg}); 3611 auto MIBZero = MIRBuilder.buildConstant(Ty, 0); 3612 auto MIBLen = MIRBuilder.buildConstant(Ty, Len); 3613 auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), 3614 SrcReg, MIBZero); 3615 MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen, 3616 MIBCttzZU); 3617 MI.eraseFromParent(); 3618 return Legalized; 3619 } 3620 // for now, we use: { return popcount(~x & (x - 1)); } 3621 // unless the target has ctlz but not ctpop, in which case we use: 3622 // { return 32 - nlz(~x & (x-1)); } 3623 // Ref: "Hacker's Delight" by Henry Warren 3624 auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1); 3625 auto MIBNot = 3626 MIRBuilder.buildInstr(TargetOpcode::G_XOR, {Ty}, {SrcReg, MIBCstNeg1}); 3627 auto MIBTmp = MIRBuilder.buildInstr( 3628 TargetOpcode::G_AND, {Ty}, 3629 {MIBNot, MIRBuilder.buildInstr(TargetOpcode::G_ADD, {Ty}, 3630 {SrcReg, MIBCstNeg1})}); 3631 if (!isSupported({TargetOpcode::G_CTPOP, {Ty, Ty}}) && 3632 isSupported({TargetOpcode::G_CTLZ, {Ty, Ty}})) { 3633 auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len); 3634 MIRBuilder.buildInstr( 3635 TargetOpcode::G_SUB, {MI.getOperand(0).getReg()}, 3636 {MIBCstLen, 3637 MIRBuilder.buildInstr(TargetOpcode::G_CTLZ, {Ty}, {MIBTmp})}); 3638 MI.eraseFromParent(); 3639 return Legalized; 3640 } 3641 MI.setDesc(TII.get(TargetOpcode::G_CTPOP)); 3642 MI.getOperand(1).setReg(MIBTmp->getOperand(0).getReg()); 3643 return Legalized; 3644 } 3645 } 3646 } 3647 3648 // Expand s32 = G_UITOFP s64 using bit operations to an IEEE float 3649 // representation. 3650 LegalizerHelper::LegalizeResult 3651 LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) { 3652 Register Dst = MI.getOperand(0).getReg(); 3653 Register Src = MI.getOperand(1).getReg(); 3654 const LLT S64 = LLT::scalar(64); 3655 const LLT S32 = LLT::scalar(32); 3656 const LLT S1 = LLT::scalar(1); 3657 3658 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32); 3659 3660 // unsigned cul2f(ulong u) { 3661 // uint lz = clz(u); 3662 // uint e = (u != 0) ? 127U + 63U - lz : 0; 3663 // u = (u << lz) & 0x7fffffffffffffffUL; 3664 // ulong t = u & 0xffffffffffUL; 3665 // uint v = (e << 23) | (uint)(u >> 40); 3666 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U); 3667 // return as_float(v + r); 3668 // } 3669 3670 auto Zero32 = MIRBuilder.buildConstant(S32, 0); 3671 auto Zero64 = MIRBuilder.buildConstant(S64, 0); 3672 3673 auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src); 3674 3675 auto K = MIRBuilder.buildConstant(S32, 127U + 63U); 3676 auto Sub = MIRBuilder.buildSub(S32, K, LZ); 3677 3678 auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64); 3679 auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32); 3680 3681 auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1); 3682 auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ); 3683 3684 auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0); 3685 3686 auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL); 3687 auto T = MIRBuilder.buildAnd(S64, U, Mask1); 3688 3689 auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40)); 3690 auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23)); 3691 auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl)); 3692 3693 auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL); 3694 auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C); 3695 auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C); 3696 auto One = MIRBuilder.buildConstant(S32, 1); 3697 3698 auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One); 3699 auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32); 3700 auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0); 3701 MIRBuilder.buildAdd(Dst, V, R); 3702 3703 return Legalized; 3704 } 3705 3706 LegalizerHelper::LegalizeResult 3707 LegalizerHelper::lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { 3708 Register Dst = MI.getOperand(0).getReg(); 3709 Register Src = MI.getOperand(1).getReg(); 3710 LLT DstTy = MRI.getType(Dst); 3711 LLT SrcTy = MRI.getType(Src); 3712 3713 if (SrcTy != LLT::scalar(64)) 3714 return UnableToLegalize; 3715 3716 if (DstTy == LLT::scalar(32)) { 3717 // TODO: SelectionDAG has several alternative expansions to port which may 3718 // be more reasonble depending on the available instructions. If a target 3719 // has sitofp, does not have CTLZ, or can efficiently use f64 as an 3720 // intermediate type, this is probably worse. 3721 return lowerU64ToF32BitOps(MI); 3722 } 3723 3724 return UnableToLegalize; 3725 } 3726 3727 LegalizerHelper::LegalizeResult 3728 LegalizerHelper::lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { 3729 Register Dst = MI.getOperand(0).getReg(); 3730 Register Src = MI.getOperand(1).getReg(); 3731 LLT DstTy = MRI.getType(Dst); 3732 LLT SrcTy = MRI.getType(Src); 3733 3734 const LLT S64 = LLT::scalar(64); 3735 const LLT S32 = LLT::scalar(32); 3736 const LLT S1 = LLT::scalar(1); 3737 3738 if (SrcTy != S64) 3739 return UnableToLegalize; 3740 3741 if (DstTy == S32) { 3742 // signed cl2f(long l) { 3743 // long s = l >> 63; 3744 // float r = cul2f((l + s) ^ s); 3745 // return s ? -r : r; 3746 // } 3747 Register L = Src; 3748 auto SignBit = MIRBuilder.buildConstant(S64, 63); 3749 auto S = MIRBuilder.buildAShr(S64, L, SignBit); 3750 3751 auto LPlusS = MIRBuilder.buildAdd(S64, L, S); 3752 auto Xor = MIRBuilder.buildXor(S64, LPlusS, S); 3753 auto R = MIRBuilder.buildUITOFP(S32, Xor); 3754 3755 auto RNeg = MIRBuilder.buildFNeg(S32, R); 3756 auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S, 3757 MIRBuilder.buildConstant(S64, 0)); 3758 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R); 3759 return Legalized; 3760 } 3761 3762 return UnableToLegalize; 3763 } 3764 3765 LegalizerHelper::LegalizeResult 3766 LegalizerHelper::lowerFPTOUI(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { 3767 Register Dst = MI.getOperand(0).getReg(); 3768 Register Src = MI.getOperand(1).getReg(); 3769 LLT DstTy = MRI.getType(Dst); 3770 LLT SrcTy = MRI.getType(Src); 3771 const LLT S64 = LLT::scalar(64); 3772 const LLT S32 = LLT::scalar(32); 3773 3774 if (SrcTy != S64 && SrcTy != S32) 3775 return UnableToLegalize; 3776 if (DstTy != S32 && DstTy != S64) 3777 return UnableToLegalize; 3778 3779 // FPTOSI gives same result as FPTOUI for positive signed integers. 3780 // FPTOUI needs to deal with fp values that convert to unsigned integers 3781 // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp. 3782 3783 APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits()); 3784 APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle() 3785 : APFloat::IEEEdouble(), 3786 APInt::getNullValue(SrcTy.getSizeInBits())); 3787 TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven); 3788 3789 MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src); 3790 3791 MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP); 3792 // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on 3793 // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1. 3794 MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold); 3795 MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub); 3796 MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt); 3797 MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit); 3798 3799 MachineInstrBuilder FCMP = 3800 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, DstTy, Src, Threshold); 3801 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res); 3802 3803 MI.eraseFromParent(); 3804 return Legalized; 3805 } 3806 3807 static CmpInst::Predicate minMaxToCompare(unsigned Opc) { 3808 switch (Opc) { 3809 case TargetOpcode::G_SMIN: 3810 return CmpInst::ICMP_SLT; 3811 case TargetOpcode::G_SMAX: 3812 return CmpInst::ICMP_SGT; 3813 case TargetOpcode::G_UMIN: 3814 return CmpInst::ICMP_ULT; 3815 case TargetOpcode::G_UMAX: 3816 return CmpInst::ICMP_UGT; 3817 default: 3818 llvm_unreachable("not in integer min/max"); 3819 } 3820 } 3821 3822 LegalizerHelper::LegalizeResult 3823 LegalizerHelper::lowerMinMax(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { 3824 Register Dst = MI.getOperand(0).getReg(); 3825 Register Src0 = MI.getOperand(1).getReg(); 3826 Register Src1 = MI.getOperand(2).getReg(); 3827 3828 const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode()); 3829 LLT CmpType = MRI.getType(Dst).changeElementSize(1); 3830 3831 auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1); 3832 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1); 3833 3834 MI.eraseFromParent(); 3835 return Legalized; 3836 } 3837 3838 LegalizerHelper::LegalizeResult 3839 LegalizerHelper::lowerFCopySign(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { 3840 Register Dst = MI.getOperand(0).getReg(); 3841 Register Src0 = MI.getOperand(1).getReg(); 3842 Register Src1 = MI.getOperand(2).getReg(); 3843 3844 const LLT Src0Ty = MRI.getType(Src0); 3845 const LLT Src1Ty = MRI.getType(Src1); 3846 3847 const int Src0Size = Src0Ty.getScalarSizeInBits(); 3848 const int Src1Size = Src1Ty.getScalarSizeInBits(); 3849 3850 auto SignBitMask = MIRBuilder.buildConstant( 3851 Src0Ty, APInt::getSignMask(Src0Size)); 3852 3853 auto NotSignBitMask = MIRBuilder.buildConstant( 3854 Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1)); 3855 3856 auto And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask); 3857 MachineInstr *Or; 3858 3859 if (Src0Ty == Src1Ty) { 3860 auto And1 = MIRBuilder.buildAnd(Src1Ty, Src0, SignBitMask); 3861 Or = MIRBuilder.buildOr(Dst, And0, And1); 3862 } else if (Src0Size > Src1Size) { 3863 auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size); 3864 auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1); 3865 auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt); 3866 auto And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask); 3867 Or = MIRBuilder.buildOr(Dst, And0, And1); 3868 } else { 3869 auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size); 3870 auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt); 3871 auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift); 3872 auto And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask); 3873 Or = MIRBuilder.buildOr(Dst, And0, And1); 3874 } 3875 3876 // Be careful about setting nsz/nnan/ninf on every instruction, since the 3877 // constants are a nan and -0.0, but the final result should preserve 3878 // everything. 3879 if (unsigned Flags = MI.getFlags()) 3880 Or->setFlags(Flags); 3881 3882 MI.eraseFromParent(); 3883 return Legalized; 3884 } 3885 3886 LegalizerHelper::LegalizeResult 3887 LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) { 3888 unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ? 3889 TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE; 3890 3891 Register Dst = MI.getOperand(0).getReg(); 3892 Register Src0 = MI.getOperand(1).getReg(); 3893 Register Src1 = MI.getOperand(2).getReg(); 3894 LLT Ty = MRI.getType(Dst); 3895 3896 if (!MI.getFlag(MachineInstr::FmNoNans)) { 3897 // Insert canonicalizes if it's possible we need to quiet to get correct 3898 // sNaN behavior. 3899 3900 // Note this must be done here, and not as an optimization combine in the 3901 // absence of a dedicate quiet-snan instruction as we're using an 3902 // omni-purpose G_FCANONICALIZE. 3903 if (!isKnownNeverSNaN(Src0, MRI)) 3904 Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0); 3905 3906 if (!isKnownNeverSNaN(Src1, MRI)) 3907 Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0); 3908 } 3909 3910 // If there are no nans, it's safe to simply replace this with the non-IEEE 3911 // version. 3912 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags()); 3913 MI.eraseFromParent(); 3914 return Legalized; 3915 } 3916 3917 LegalizerHelper::LegalizeResult 3918 LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) { 3919 const unsigned NumDst = MI.getNumOperands() - 1; 3920 const Register SrcReg = MI.getOperand(NumDst).getReg(); 3921 LLT SrcTy = MRI.getType(SrcReg); 3922 3923 Register Dst0Reg = MI.getOperand(0).getReg(); 3924 LLT DstTy = MRI.getType(Dst0Reg); 3925 3926 3927 // Expand scalarizing unmerge as bitcast to integer and shift. 3928 if (!DstTy.isVector() && SrcTy.isVector() && 3929 SrcTy.getElementType() == DstTy) { 3930 LLT IntTy = LLT::scalar(SrcTy.getSizeInBits()); 3931 Register Cast = MIRBuilder.buildBitcast(IntTy, SrcReg).getReg(0); 3932 3933 MIRBuilder.buildTrunc(Dst0Reg, Cast); 3934 3935 const unsigned DstSize = DstTy.getSizeInBits(); 3936 unsigned Offset = DstSize; 3937 for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) { 3938 auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset); 3939 auto Shift = MIRBuilder.buildLShr(IntTy, Cast, ShiftAmt); 3940 MIRBuilder.buildTrunc(MI.getOperand(I), Shift); 3941 } 3942 3943 MI.eraseFromParent(); 3944 return Legalized; 3945 } 3946 3947 return UnableToLegalize; 3948 } 3949 3950 LegalizerHelper::LegalizeResult 3951 LegalizerHelper::lowerShuffleVector(MachineInstr &MI) { 3952 Register DstReg = MI.getOperand(0).getReg(); 3953 Register Src0Reg = MI.getOperand(1).getReg(); 3954 Register Src1Reg = MI.getOperand(2).getReg(); 3955 LLT Src0Ty = MRI.getType(Src0Reg); 3956 LLT DstTy = MRI.getType(DstReg); 3957 LLT IdxTy = LLT::scalar(32); 3958 3959 const Constant *ShufMask = MI.getOperand(3).getShuffleMask(); 3960 3961 SmallVector<int, 32> Mask; 3962 ShuffleVectorInst::getShuffleMask(ShufMask, Mask); 3963 3964 if (DstTy.isScalar()) { 3965 if (Src0Ty.isVector()) 3966 return UnableToLegalize; 3967 3968 // This is just a SELECT. 3969 assert(Mask.size() == 1 && "Expected a single mask element"); 3970 Register Val; 3971 if (Mask[0] < 0 || Mask[0] > 1) 3972 Val = MIRBuilder.buildUndef(DstTy).getReg(0); 3973 else 3974 Val = Mask[0] == 0 ? Src0Reg : Src1Reg; 3975 MIRBuilder.buildCopy(DstReg, Val); 3976 MI.eraseFromParent(); 3977 return Legalized; 3978 } 3979 3980 Register Undef; 3981 SmallVector<Register, 32> BuildVec; 3982 LLT EltTy = DstTy.getElementType(); 3983 3984 for (int Idx : Mask) { 3985 if (Idx < 0) { 3986 if (!Undef.isValid()) 3987 Undef = MIRBuilder.buildUndef(EltTy).getReg(0); 3988 BuildVec.push_back(Undef); 3989 continue; 3990 } 3991 3992 if (Src0Ty.isScalar()) { 3993 BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg); 3994 } else { 3995 int NumElts = Src0Ty.getNumElements(); 3996 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg; 3997 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts; 3998 auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx); 3999 auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK); 4000 BuildVec.push_back(Extract.getReg(0)); 4001 } 4002 } 4003 4004 MIRBuilder.buildBuildVector(DstReg, BuildVec); 4005 MI.eraseFromParent(); 4006 return Legalized; 4007 } 4008 4009 LegalizerHelper::LegalizeResult 4010 LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) { 4011 Register Dst = MI.getOperand(0).getReg(); 4012 Register AllocSize = MI.getOperand(1).getReg(); 4013 unsigned Align = MI.getOperand(2).getImm(); 4014 4015 const auto &MF = *MI.getMF(); 4016 const auto &TLI = *MF.getSubtarget().getTargetLowering(); 4017 4018 LLT PtrTy = MRI.getType(Dst); 4019 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits()); 4020 4021 Register SPReg = TLI.getStackPointerRegisterToSaveRestore(); 4022 auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg); 4023 SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp); 4024 4025 // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't 4026 // have to generate an extra instruction to negate the alloc and then use 4027 // G_GEP to add the negative offset. 4028 auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize); 4029 if (Align) { 4030 APInt AlignMask(IntPtrTy.getSizeInBits(), Align, true); 4031 AlignMask.negate(); 4032 auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask); 4033 Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst); 4034 } 4035 4036 SPTmp = MIRBuilder.buildCast(PtrTy, Alloc); 4037 MIRBuilder.buildCopy(SPReg, SPTmp); 4038 MIRBuilder.buildCopy(Dst, SPTmp); 4039 4040 MI.eraseFromParent(); 4041 return Legalized; 4042 } 4043