1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file This file implements the LegalizerHelper class to legalize 10 /// individual instructions and the LegalizeMachineIR wrapper pass for the 11 /// primary legalization. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" 16 #include "llvm/CodeGen/GlobalISel/CallLowering.h" 17 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" 18 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" 19 #include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h" 20 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 21 #include "llvm/CodeGen/GlobalISel/Utils.h" 22 #include "llvm/CodeGen/LowLevelType.h" 23 #include "llvm/CodeGen/MachineRegisterInfo.h" 24 #include "llvm/CodeGen/TargetFrameLowering.h" 25 #include "llvm/CodeGen/TargetInstrInfo.h" 26 #include "llvm/CodeGen/TargetLowering.h" 27 #include "llvm/CodeGen/TargetOpcodes.h" 28 #include "llvm/CodeGen/TargetSubtargetInfo.h" 29 #include "llvm/IR/Instructions.h" 30 #include "llvm/Support/Debug.h" 31 #include "llvm/Support/MathExtras.h" 32 #include "llvm/Support/raw_ostream.h" 33 34 #define DEBUG_TYPE "legalizer" 35 36 using namespace llvm; 37 using namespace LegalizeActions; 38 using namespace MIPatternMatch; 39 40 /// Try to break down \p OrigTy into \p NarrowTy sized pieces. 41 /// 42 /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy, 43 /// with any leftover piece as type \p LeftoverTy 44 /// 45 /// Returns -1 in the first element of the pair if the breakdown is not 46 /// satisfiable. 47 static std::pair<int, int> 48 getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) { 49 assert(!LeftoverTy.isValid() && "this is an out argument"); 50 51 unsigned Size = OrigTy.getSizeInBits(); 52 unsigned NarrowSize = NarrowTy.getSizeInBits(); 53 unsigned NumParts = Size / NarrowSize; 54 unsigned LeftoverSize = Size - NumParts * NarrowSize; 55 assert(Size > NarrowSize); 56 57 if (LeftoverSize == 0) 58 return {NumParts, 0}; 59 60 if (NarrowTy.isVector()) { 61 unsigned EltSize = OrigTy.getScalarSizeInBits(); 62 if (LeftoverSize % EltSize != 0) 63 return {-1, -1}; 64 LeftoverTy = LLT::scalarOrVector( 65 ElementCount::getFixed(LeftoverSize / EltSize), EltSize); 66 } else { 67 LeftoverTy = LLT::scalar(LeftoverSize); 68 } 69 70 int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits(); 71 return std::make_pair(NumParts, NumLeftover); 72 } 73 74 static Type *getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty) { 75 76 if (!Ty.isScalar()) 77 return nullptr; 78 79 switch (Ty.getSizeInBits()) { 80 case 16: 81 return Type::getHalfTy(Ctx); 82 case 32: 83 return Type::getFloatTy(Ctx); 84 case 64: 85 return Type::getDoubleTy(Ctx); 86 case 80: 87 return Type::getX86_FP80Ty(Ctx); 88 case 128: 89 return Type::getFP128Ty(Ctx); 90 default: 91 return nullptr; 92 } 93 } 94 95 LegalizerHelper::LegalizerHelper(MachineFunction &MF, 96 GISelChangeObserver &Observer, 97 MachineIRBuilder &Builder) 98 : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()), 99 LI(*MF.getSubtarget().getLegalizerInfo()), 100 TLI(*MF.getSubtarget().getTargetLowering()) { } 101 102 LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI, 103 GISelChangeObserver &Observer, 104 MachineIRBuilder &B) 105 : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI), 106 TLI(*MF.getSubtarget().getTargetLowering()) { } 107 108 LegalizerHelper::LegalizeResult 109 LegalizerHelper::legalizeInstrStep(MachineInstr &MI, 110 LostDebugLocObserver &LocObserver) { 111 LLVM_DEBUG(dbgs() << "Legalizing: " << MI); 112 113 MIRBuilder.setInstrAndDebugLoc(MI); 114 115 if (MI.getOpcode() == TargetOpcode::G_INTRINSIC || 116 MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS) 117 return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize; 118 auto Step = LI.getAction(MI, MRI); 119 switch (Step.Action) { 120 case Legal: 121 LLVM_DEBUG(dbgs() << ".. Already legal\n"); 122 return AlreadyLegal; 123 case Libcall: 124 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n"); 125 return libcall(MI, LocObserver); 126 case NarrowScalar: 127 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n"); 128 return narrowScalar(MI, Step.TypeIdx, Step.NewType); 129 case WidenScalar: 130 LLVM_DEBUG(dbgs() << ".. Widen scalar\n"); 131 return widenScalar(MI, Step.TypeIdx, Step.NewType); 132 case Bitcast: 133 LLVM_DEBUG(dbgs() << ".. Bitcast type\n"); 134 return bitcast(MI, Step.TypeIdx, Step.NewType); 135 case Lower: 136 LLVM_DEBUG(dbgs() << ".. Lower\n"); 137 return lower(MI, Step.TypeIdx, Step.NewType); 138 case FewerElements: 139 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n"); 140 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType); 141 case MoreElements: 142 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n"); 143 return moreElementsVector(MI, Step.TypeIdx, Step.NewType); 144 case Custom: 145 LLVM_DEBUG(dbgs() << ".. Custom legalization\n"); 146 return LI.legalizeCustom(*this, MI) ? Legalized : UnableToLegalize; 147 default: 148 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n"); 149 return UnableToLegalize; 150 } 151 } 152 153 void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts, 154 SmallVectorImpl<Register> &VRegs) { 155 for (int i = 0; i < NumParts; ++i) 156 VRegs.push_back(MRI.createGenericVirtualRegister(Ty)); 157 MIRBuilder.buildUnmerge(VRegs, Reg); 158 } 159 160 bool LegalizerHelper::extractParts(Register Reg, LLT RegTy, 161 LLT MainTy, LLT &LeftoverTy, 162 SmallVectorImpl<Register> &VRegs, 163 SmallVectorImpl<Register> &LeftoverRegs) { 164 assert(!LeftoverTy.isValid() && "this is an out argument"); 165 166 unsigned RegSize = RegTy.getSizeInBits(); 167 unsigned MainSize = MainTy.getSizeInBits(); 168 unsigned NumParts = RegSize / MainSize; 169 unsigned LeftoverSize = RegSize - NumParts * MainSize; 170 171 // Use an unmerge when possible. 172 if (LeftoverSize == 0) { 173 for (unsigned I = 0; I < NumParts; ++I) 174 VRegs.push_back(MRI.createGenericVirtualRegister(MainTy)); 175 MIRBuilder.buildUnmerge(VRegs, Reg); 176 return true; 177 } 178 179 if (MainTy.isVector()) { 180 unsigned EltSize = MainTy.getScalarSizeInBits(); 181 if (LeftoverSize % EltSize != 0) 182 return false; 183 LeftoverTy = LLT::scalarOrVector( 184 ElementCount::getFixed(LeftoverSize / EltSize), EltSize); 185 } else { 186 LeftoverTy = LLT::scalar(LeftoverSize); 187 } 188 189 // For irregular sizes, extract the individual parts. 190 for (unsigned I = 0; I != NumParts; ++I) { 191 Register NewReg = MRI.createGenericVirtualRegister(MainTy); 192 VRegs.push_back(NewReg); 193 MIRBuilder.buildExtract(NewReg, Reg, MainSize * I); 194 } 195 196 for (unsigned Offset = MainSize * NumParts; Offset < RegSize; 197 Offset += LeftoverSize) { 198 Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy); 199 LeftoverRegs.push_back(NewReg); 200 MIRBuilder.buildExtract(NewReg, Reg, Offset); 201 } 202 203 return true; 204 } 205 206 void LegalizerHelper::insertParts(Register DstReg, 207 LLT ResultTy, LLT PartTy, 208 ArrayRef<Register> PartRegs, 209 LLT LeftoverTy, 210 ArrayRef<Register> LeftoverRegs) { 211 if (!LeftoverTy.isValid()) { 212 assert(LeftoverRegs.empty()); 213 214 if (!ResultTy.isVector()) { 215 MIRBuilder.buildMerge(DstReg, PartRegs); 216 return; 217 } 218 219 if (PartTy.isVector()) 220 MIRBuilder.buildConcatVectors(DstReg, PartRegs); 221 else 222 MIRBuilder.buildBuildVector(DstReg, PartRegs); 223 return; 224 } 225 226 SmallVector<Register> GCDRegs; 227 LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy); 228 for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs)) 229 extractGCDType(GCDRegs, GCDTy, PartReg); 230 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs); 231 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs); 232 } 233 234 /// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs. 235 static void getUnmergeResults(SmallVectorImpl<Register> &Regs, 236 const MachineInstr &MI) { 237 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES); 238 239 const int StartIdx = Regs.size(); 240 const int NumResults = MI.getNumOperands() - 1; 241 Regs.resize(Regs.size() + NumResults); 242 for (int I = 0; I != NumResults; ++I) 243 Regs[StartIdx + I] = MI.getOperand(I).getReg(); 244 } 245 246 void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, 247 LLT GCDTy, Register SrcReg) { 248 LLT SrcTy = MRI.getType(SrcReg); 249 if (SrcTy == GCDTy) { 250 // If the source already evenly divides the result type, we don't need to do 251 // anything. 252 Parts.push_back(SrcReg); 253 } else { 254 // Need to split into common type sized pieces. 255 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg); 256 getUnmergeResults(Parts, *Unmerge); 257 } 258 } 259 260 LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy, 261 LLT NarrowTy, Register SrcReg) { 262 LLT SrcTy = MRI.getType(SrcReg); 263 LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy); 264 extractGCDType(Parts, GCDTy, SrcReg); 265 return GCDTy; 266 } 267 268 LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy, 269 SmallVectorImpl<Register> &VRegs, 270 unsigned PadStrategy) { 271 LLT LCMTy = getLCMType(DstTy, NarrowTy); 272 273 int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits(); 274 int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits(); 275 int NumOrigSrc = VRegs.size(); 276 277 Register PadReg; 278 279 // Get a value we can use to pad the source value if the sources won't evenly 280 // cover the result type. 281 if (NumOrigSrc < NumParts * NumSubParts) { 282 if (PadStrategy == TargetOpcode::G_ZEXT) 283 PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0); 284 else if (PadStrategy == TargetOpcode::G_ANYEXT) 285 PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0); 286 else { 287 assert(PadStrategy == TargetOpcode::G_SEXT); 288 289 // Shift the sign bit of the low register through the high register. 290 auto ShiftAmt = 291 MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1); 292 PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0); 293 } 294 } 295 296 // Registers for the final merge to be produced. 297 SmallVector<Register, 4> Remerge(NumParts); 298 299 // Registers needed for intermediate merges, which will be merged into a 300 // source for Remerge. 301 SmallVector<Register, 4> SubMerge(NumSubParts); 302 303 // Once we've fully read off the end of the original source bits, we can reuse 304 // the same high bits for remaining padding elements. 305 Register AllPadReg; 306 307 // Build merges to the LCM type to cover the original result type. 308 for (int I = 0; I != NumParts; ++I) { 309 bool AllMergePartsArePadding = true; 310 311 // Build the requested merges to the requested type. 312 for (int J = 0; J != NumSubParts; ++J) { 313 int Idx = I * NumSubParts + J; 314 if (Idx >= NumOrigSrc) { 315 SubMerge[J] = PadReg; 316 continue; 317 } 318 319 SubMerge[J] = VRegs[Idx]; 320 321 // There are meaningful bits here we can't reuse later. 322 AllMergePartsArePadding = false; 323 } 324 325 // If we've filled up a complete piece with padding bits, we can directly 326 // emit the natural sized constant if applicable, rather than a merge of 327 // smaller constants. 328 if (AllMergePartsArePadding && !AllPadReg) { 329 if (PadStrategy == TargetOpcode::G_ANYEXT) 330 AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0); 331 else if (PadStrategy == TargetOpcode::G_ZEXT) 332 AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0); 333 334 // If this is a sign extension, we can't materialize a trivial constant 335 // with the right type and have to produce a merge. 336 } 337 338 if (AllPadReg) { 339 // Avoid creating additional instructions if we're just adding additional 340 // copies of padding bits. 341 Remerge[I] = AllPadReg; 342 continue; 343 } 344 345 if (NumSubParts == 1) 346 Remerge[I] = SubMerge[0]; 347 else 348 Remerge[I] = MIRBuilder.buildMerge(NarrowTy, SubMerge).getReg(0); 349 350 // In the sign extend padding case, re-use the first all-signbit merge. 351 if (AllMergePartsArePadding && !AllPadReg) 352 AllPadReg = Remerge[I]; 353 } 354 355 VRegs = std::move(Remerge); 356 return LCMTy; 357 } 358 359 void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy, 360 ArrayRef<Register> RemergeRegs) { 361 LLT DstTy = MRI.getType(DstReg); 362 363 // Create the merge to the widened source, and extract the relevant bits into 364 // the result. 365 366 if (DstTy == LCMTy) { 367 MIRBuilder.buildMerge(DstReg, RemergeRegs); 368 return; 369 } 370 371 auto Remerge = MIRBuilder.buildMerge(LCMTy, RemergeRegs); 372 if (DstTy.isScalar() && LCMTy.isScalar()) { 373 MIRBuilder.buildTrunc(DstReg, Remerge); 374 return; 375 } 376 377 if (LCMTy.isVector()) { 378 unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits(); 379 SmallVector<Register, 8> UnmergeDefs(NumDefs); 380 UnmergeDefs[0] = DstReg; 381 for (unsigned I = 1; I != NumDefs; ++I) 382 UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy); 383 384 MIRBuilder.buildUnmerge(UnmergeDefs, 385 MIRBuilder.buildMerge(LCMTy, RemergeRegs)); 386 return; 387 } 388 389 llvm_unreachable("unhandled case"); 390 } 391 392 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { 393 #define RTLIBCASE_INT(LibcallPrefix) \ 394 do { \ 395 switch (Size) { \ 396 case 32: \ 397 return RTLIB::LibcallPrefix##32; \ 398 case 64: \ 399 return RTLIB::LibcallPrefix##64; \ 400 case 128: \ 401 return RTLIB::LibcallPrefix##128; \ 402 default: \ 403 llvm_unreachable("unexpected size"); \ 404 } \ 405 } while (0) 406 407 #define RTLIBCASE(LibcallPrefix) \ 408 do { \ 409 switch (Size) { \ 410 case 32: \ 411 return RTLIB::LibcallPrefix##32; \ 412 case 64: \ 413 return RTLIB::LibcallPrefix##64; \ 414 case 80: \ 415 return RTLIB::LibcallPrefix##80; \ 416 case 128: \ 417 return RTLIB::LibcallPrefix##128; \ 418 default: \ 419 llvm_unreachable("unexpected size"); \ 420 } \ 421 } while (0) 422 423 switch (Opcode) { 424 case TargetOpcode::G_SDIV: 425 RTLIBCASE_INT(SDIV_I); 426 case TargetOpcode::G_UDIV: 427 RTLIBCASE_INT(UDIV_I); 428 case TargetOpcode::G_SREM: 429 RTLIBCASE_INT(SREM_I); 430 case TargetOpcode::G_UREM: 431 RTLIBCASE_INT(UREM_I); 432 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 433 RTLIBCASE_INT(CTLZ_I); 434 case TargetOpcode::G_FADD: 435 RTLIBCASE(ADD_F); 436 case TargetOpcode::G_FSUB: 437 RTLIBCASE(SUB_F); 438 case TargetOpcode::G_FMUL: 439 RTLIBCASE(MUL_F); 440 case TargetOpcode::G_FDIV: 441 RTLIBCASE(DIV_F); 442 case TargetOpcode::G_FEXP: 443 RTLIBCASE(EXP_F); 444 case TargetOpcode::G_FEXP2: 445 RTLIBCASE(EXP2_F); 446 case TargetOpcode::G_FREM: 447 RTLIBCASE(REM_F); 448 case TargetOpcode::G_FPOW: 449 RTLIBCASE(POW_F); 450 case TargetOpcode::G_FMA: 451 RTLIBCASE(FMA_F); 452 case TargetOpcode::G_FSIN: 453 RTLIBCASE(SIN_F); 454 case TargetOpcode::G_FCOS: 455 RTLIBCASE(COS_F); 456 case TargetOpcode::G_FLOG10: 457 RTLIBCASE(LOG10_F); 458 case TargetOpcode::G_FLOG: 459 RTLIBCASE(LOG_F); 460 case TargetOpcode::G_FLOG2: 461 RTLIBCASE(LOG2_F); 462 case TargetOpcode::G_FCEIL: 463 RTLIBCASE(CEIL_F); 464 case TargetOpcode::G_FFLOOR: 465 RTLIBCASE(FLOOR_F); 466 case TargetOpcode::G_FMINNUM: 467 RTLIBCASE(FMIN_F); 468 case TargetOpcode::G_FMAXNUM: 469 RTLIBCASE(FMAX_F); 470 case TargetOpcode::G_FSQRT: 471 RTLIBCASE(SQRT_F); 472 case TargetOpcode::G_FRINT: 473 RTLIBCASE(RINT_F); 474 case TargetOpcode::G_FNEARBYINT: 475 RTLIBCASE(NEARBYINT_F); 476 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: 477 RTLIBCASE(ROUNDEVEN_F); 478 } 479 llvm_unreachable("Unknown libcall function"); 480 } 481 482 /// True if an instruction is in tail position in its caller. Intended for 483 /// legalizing libcalls as tail calls when possible. 484 static bool isLibCallInTailPosition(MachineInstr &MI, 485 const TargetInstrInfo &TII, 486 MachineRegisterInfo &MRI) { 487 MachineBasicBlock &MBB = *MI.getParent(); 488 const Function &F = MBB.getParent()->getFunction(); 489 490 // Conservatively require the attributes of the call to match those of 491 // the return. Ignore NoAlias and NonNull because they don't affect the 492 // call sequence. 493 AttributeList CallerAttrs = F.getAttributes(); 494 if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex) 495 .removeAttribute(Attribute::NoAlias) 496 .removeAttribute(Attribute::NonNull) 497 .hasAttributes()) 498 return false; 499 500 // It's not safe to eliminate the sign / zero extension of the return value. 501 if (CallerAttrs.hasRetAttr(Attribute::ZExt) || 502 CallerAttrs.hasRetAttr(Attribute::SExt)) 503 return false; 504 505 // Only tail call if the following instruction is a standard return or if we 506 // have a `thisreturn` callee, and a sequence like: 507 // 508 // G_MEMCPY %0, %1, %2 509 // $x0 = COPY %0 510 // RET_ReallyLR implicit $x0 511 auto Next = next_nodbg(MI.getIterator(), MBB.instr_end()); 512 if (Next != MBB.instr_end() && Next->isCopy()) { 513 switch (MI.getOpcode()) { 514 default: 515 llvm_unreachable("unsupported opcode"); 516 case TargetOpcode::G_BZERO: 517 return false; 518 case TargetOpcode::G_MEMCPY: 519 case TargetOpcode::G_MEMMOVE: 520 case TargetOpcode::G_MEMSET: 521 break; 522 } 523 524 Register VReg = MI.getOperand(0).getReg(); 525 if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg()) 526 return false; 527 528 Register PReg = Next->getOperand(0).getReg(); 529 if (!PReg.isPhysical()) 530 return false; 531 532 auto Ret = next_nodbg(Next, MBB.instr_end()); 533 if (Ret == MBB.instr_end() || !Ret->isReturn()) 534 return false; 535 536 if (Ret->getNumImplicitOperands() != 1) 537 return false; 538 539 if (PReg != Ret->getOperand(0).getReg()) 540 return false; 541 542 // Skip over the COPY that we just validated. 543 Next = Ret; 544 } 545 546 if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn()) 547 return false; 548 549 return true; 550 } 551 552 LegalizerHelper::LegalizeResult 553 llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name, 554 const CallLowering::ArgInfo &Result, 555 ArrayRef<CallLowering::ArgInfo> Args, 556 const CallingConv::ID CC) { 557 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); 558 559 CallLowering::CallLoweringInfo Info; 560 Info.CallConv = CC; 561 Info.Callee = MachineOperand::CreateES(Name); 562 Info.OrigRet = Result; 563 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs)); 564 if (!CLI.lowerCall(MIRBuilder, Info)) 565 return LegalizerHelper::UnableToLegalize; 566 567 return LegalizerHelper::Legalized; 568 } 569 570 LegalizerHelper::LegalizeResult 571 llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, 572 const CallLowering::ArgInfo &Result, 573 ArrayRef<CallLowering::ArgInfo> Args) { 574 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); 575 const char *Name = TLI.getLibcallName(Libcall); 576 const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall); 577 return createLibcall(MIRBuilder, Name, Result, Args, CC); 578 } 579 580 // Useful for libcalls where all operands have the same type. 581 static LegalizerHelper::LegalizeResult 582 simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, 583 Type *OpType) { 584 auto Libcall = getRTLibDesc(MI.getOpcode(), Size); 585 586 // FIXME: What does the original arg index mean here? 587 SmallVector<CallLowering::ArgInfo, 3> Args; 588 for (unsigned i = 1; i < MI.getNumOperands(); i++) 589 Args.push_back({MI.getOperand(i).getReg(), OpType, 0}); 590 return createLibcall(MIRBuilder, Libcall, 591 {MI.getOperand(0).getReg(), OpType, 0}, Args); 592 } 593 594 LegalizerHelper::LegalizeResult 595 llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, 596 MachineInstr &MI, LostDebugLocObserver &LocObserver) { 597 auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); 598 599 SmallVector<CallLowering::ArgInfo, 3> Args; 600 // Add all the args, except for the last which is an imm denoting 'tail'. 601 for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) { 602 Register Reg = MI.getOperand(i).getReg(); 603 604 // Need derive an IR type for call lowering. 605 LLT OpLLT = MRI.getType(Reg); 606 Type *OpTy = nullptr; 607 if (OpLLT.isPointer()) 608 OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace()); 609 else 610 OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits()); 611 Args.push_back({Reg, OpTy, 0}); 612 } 613 614 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); 615 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); 616 RTLIB::Libcall RTLibcall; 617 unsigned Opc = MI.getOpcode(); 618 switch (Opc) { 619 case TargetOpcode::G_BZERO: 620 RTLibcall = RTLIB::BZERO; 621 break; 622 case TargetOpcode::G_MEMCPY: 623 RTLibcall = RTLIB::MEMCPY; 624 Args[0].Flags[0].setReturned(); 625 break; 626 case TargetOpcode::G_MEMMOVE: 627 RTLibcall = RTLIB::MEMMOVE; 628 Args[0].Flags[0].setReturned(); 629 break; 630 case TargetOpcode::G_MEMSET: 631 RTLibcall = RTLIB::MEMSET; 632 Args[0].Flags[0].setReturned(); 633 break; 634 default: 635 llvm_unreachable("unsupported opcode"); 636 } 637 const char *Name = TLI.getLibcallName(RTLibcall); 638 639 // Unsupported libcall on the target. 640 if (!Name) { 641 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for " 642 << MIRBuilder.getTII().getName(Opc) << "\n"); 643 return LegalizerHelper::UnableToLegalize; 644 } 645 646 CallLowering::CallLoweringInfo Info; 647 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall); 648 Info.Callee = MachineOperand::CreateES(Name); 649 Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0); 650 Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() && 651 isLibCallInTailPosition(MI, MIRBuilder.getTII(), MRI); 652 653 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs)); 654 if (!CLI.lowerCall(MIRBuilder, Info)) 655 return LegalizerHelper::UnableToLegalize; 656 657 if (Info.LoweredTailCall) { 658 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?"); 659 660 // Check debug locations before removing the return. 661 LocObserver.checkpoint(true); 662 663 // We must have a return following the call (or debug insts) to get past 664 // isLibCallInTailPosition. 665 do { 666 MachineInstr *Next = MI.getNextNode(); 667 assert(Next && 668 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) && 669 "Expected instr following MI to be return or debug inst?"); 670 // We lowered a tail call, so the call is now the return from the block. 671 // Delete the old return. 672 Next->eraseFromParent(); 673 } while (MI.getNextNode()); 674 675 // We expect to lose the debug location from the return. 676 LocObserver.checkpoint(false); 677 } 678 679 return LegalizerHelper::Legalized; 680 } 681 682 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, 683 Type *FromType) { 684 auto ToMVT = MVT::getVT(ToType); 685 auto FromMVT = MVT::getVT(FromType); 686 687 switch (Opcode) { 688 case TargetOpcode::G_FPEXT: 689 return RTLIB::getFPEXT(FromMVT, ToMVT); 690 case TargetOpcode::G_FPTRUNC: 691 return RTLIB::getFPROUND(FromMVT, ToMVT); 692 case TargetOpcode::G_FPTOSI: 693 return RTLIB::getFPTOSINT(FromMVT, ToMVT); 694 case TargetOpcode::G_FPTOUI: 695 return RTLIB::getFPTOUINT(FromMVT, ToMVT); 696 case TargetOpcode::G_SITOFP: 697 return RTLIB::getSINTTOFP(FromMVT, ToMVT); 698 case TargetOpcode::G_UITOFP: 699 return RTLIB::getUINTTOFP(FromMVT, ToMVT); 700 } 701 llvm_unreachable("Unsupported libcall function"); 702 } 703 704 static LegalizerHelper::LegalizeResult 705 conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, 706 Type *FromType) { 707 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType); 708 return createLibcall(MIRBuilder, Libcall, 709 {MI.getOperand(0).getReg(), ToType, 0}, 710 {{MI.getOperand(1).getReg(), FromType, 0}}); 711 } 712 713 LegalizerHelper::LegalizeResult 714 LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { 715 LLT LLTy = MRI.getType(MI.getOperand(0).getReg()); 716 unsigned Size = LLTy.getSizeInBits(); 717 auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); 718 719 switch (MI.getOpcode()) { 720 default: 721 return UnableToLegalize; 722 case TargetOpcode::G_SDIV: 723 case TargetOpcode::G_UDIV: 724 case TargetOpcode::G_SREM: 725 case TargetOpcode::G_UREM: 726 case TargetOpcode::G_CTLZ_ZERO_UNDEF: { 727 Type *HLTy = IntegerType::get(Ctx, Size); 728 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); 729 if (Status != Legalized) 730 return Status; 731 break; 732 } 733 case TargetOpcode::G_FADD: 734 case TargetOpcode::G_FSUB: 735 case TargetOpcode::G_FMUL: 736 case TargetOpcode::G_FDIV: 737 case TargetOpcode::G_FMA: 738 case TargetOpcode::G_FPOW: 739 case TargetOpcode::G_FREM: 740 case TargetOpcode::G_FCOS: 741 case TargetOpcode::G_FSIN: 742 case TargetOpcode::G_FLOG10: 743 case TargetOpcode::G_FLOG: 744 case TargetOpcode::G_FLOG2: 745 case TargetOpcode::G_FEXP: 746 case TargetOpcode::G_FEXP2: 747 case TargetOpcode::G_FCEIL: 748 case TargetOpcode::G_FFLOOR: 749 case TargetOpcode::G_FMINNUM: 750 case TargetOpcode::G_FMAXNUM: 751 case TargetOpcode::G_FSQRT: 752 case TargetOpcode::G_FRINT: 753 case TargetOpcode::G_FNEARBYINT: 754 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: { 755 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy); 756 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) { 757 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n"); 758 return UnableToLegalize; 759 } 760 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); 761 if (Status != Legalized) 762 return Status; 763 break; 764 } 765 case TargetOpcode::G_FPEXT: 766 case TargetOpcode::G_FPTRUNC: { 767 Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg())); 768 Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg())); 769 if (!FromTy || !ToTy) 770 return UnableToLegalize; 771 LegalizeResult Status = conversionLibcall(MI, MIRBuilder, ToTy, FromTy ); 772 if (Status != Legalized) 773 return Status; 774 break; 775 } 776 case TargetOpcode::G_FPTOSI: 777 case TargetOpcode::G_FPTOUI: { 778 // FIXME: Support other types 779 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 780 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 781 if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64)) 782 return UnableToLegalize; 783 LegalizeResult Status = conversionLibcall( 784 MI, MIRBuilder, 785 ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx), 786 FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx)); 787 if (Status != Legalized) 788 return Status; 789 break; 790 } 791 case TargetOpcode::G_SITOFP: 792 case TargetOpcode::G_UITOFP: { 793 // FIXME: Support other types 794 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 795 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 796 if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64)) 797 return UnableToLegalize; 798 LegalizeResult Status = conversionLibcall( 799 MI, MIRBuilder, 800 ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx), 801 FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx)); 802 if (Status != Legalized) 803 return Status; 804 break; 805 } 806 case TargetOpcode::G_BZERO: 807 case TargetOpcode::G_MEMCPY: 808 case TargetOpcode::G_MEMMOVE: 809 case TargetOpcode::G_MEMSET: { 810 LegalizeResult Result = 811 createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI, LocObserver); 812 if (Result != Legalized) 813 return Result; 814 MI.eraseFromParent(); 815 return Result; 816 } 817 } 818 819 MI.eraseFromParent(); 820 return Legalized; 821 } 822 823 LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, 824 unsigned TypeIdx, 825 LLT NarrowTy) { 826 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 827 uint64_t NarrowSize = NarrowTy.getSizeInBits(); 828 829 switch (MI.getOpcode()) { 830 default: 831 return UnableToLegalize; 832 case TargetOpcode::G_IMPLICIT_DEF: { 833 Register DstReg = MI.getOperand(0).getReg(); 834 LLT DstTy = MRI.getType(DstReg); 835 836 // If SizeOp0 is not an exact multiple of NarrowSize, emit 837 // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed. 838 // FIXME: Although this would also be legal for the general case, it causes 839 // a lot of regressions in the emitted code (superfluous COPYs, artifact 840 // combines not being hit). This seems to be a problem related to the 841 // artifact combiner. 842 if (SizeOp0 % NarrowSize != 0) { 843 LLT ImplicitTy = NarrowTy; 844 if (DstTy.isVector()) 845 ImplicitTy = LLT::vector(DstTy.getElementCount(), ImplicitTy); 846 847 Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0); 848 MIRBuilder.buildAnyExt(DstReg, ImplicitReg); 849 850 MI.eraseFromParent(); 851 return Legalized; 852 } 853 854 int NumParts = SizeOp0 / NarrowSize; 855 856 SmallVector<Register, 2> DstRegs; 857 for (int i = 0; i < NumParts; ++i) 858 DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0)); 859 860 if (DstTy.isVector()) 861 MIRBuilder.buildBuildVector(DstReg, DstRegs); 862 else 863 MIRBuilder.buildMerge(DstReg, DstRegs); 864 MI.eraseFromParent(); 865 return Legalized; 866 } 867 case TargetOpcode::G_CONSTANT: { 868 LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 869 const APInt &Val = MI.getOperand(1).getCImm()->getValue(); 870 unsigned TotalSize = Ty.getSizeInBits(); 871 unsigned NarrowSize = NarrowTy.getSizeInBits(); 872 int NumParts = TotalSize / NarrowSize; 873 874 SmallVector<Register, 4> PartRegs; 875 for (int I = 0; I != NumParts; ++I) { 876 unsigned Offset = I * NarrowSize; 877 auto K = MIRBuilder.buildConstant(NarrowTy, 878 Val.lshr(Offset).trunc(NarrowSize)); 879 PartRegs.push_back(K.getReg(0)); 880 } 881 882 LLT LeftoverTy; 883 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize; 884 SmallVector<Register, 1> LeftoverRegs; 885 if (LeftoverBits != 0) { 886 LeftoverTy = LLT::scalar(LeftoverBits); 887 auto K = MIRBuilder.buildConstant( 888 LeftoverTy, 889 Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits)); 890 LeftoverRegs.push_back(K.getReg(0)); 891 } 892 893 insertParts(MI.getOperand(0).getReg(), 894 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs); 895 896 MI.eraseFromParent(); 897 return Legalized; 898 } 899 case TargetOpcode::G_SEXT: 900 case TargetOpcode::G_ZEXT: 901 case TargetOpcode::G_ANYEXT: 902 return narrowScalarExt(MI, TypeIdx, NarrowTy); 903 case TargetOpcode::G_TRUNC: { 904 if (TypeIdx != 1) 905 return UnableToLegalize; 906 907 uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 908 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) { 909 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n"); 910 return UnableToLegalize; 911 } 912 913 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1)); 914 MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0)); 915 MI.eraseFromParent(); 916 return Legalized; 917 } 918 919 case TargetOpcode::G_FREEZE: 920 return reduceOperationWidth(MI, TypeIdx, NarrowTy); 921 case TargetOpcode::G_ADD: 922 case TargetOpcode::G_SUB: 923 case TargetOpcode::G_SADDO: 924 case TargetOpcode::G_SSUBO: 925 case TargetOpcode::G_SADDE: 926 case TargetOpcode::G_SSUBE: 927 case TargetOpcode::G_UADDO: 928 case TargetOpcode::G_USUBO: 929 case TargetOpcode::G_UADDE: 930 case TargetOpcode::G_USUBE: 931 return narrowScalarAddSub(MI, TypeIdx, NarrowTy); 932 case TargetOpcode::G_MUL: 933 case TargetOpcode::G_UMULH: 934 return narrowScalarMul(MI, NarrowTy); 935 case TargetOpcode::G_EXTRACT: 936 return narrowScalarExtract(MI, TypeIdx, NarrowTy); 937 case TargetOpcode::G_INSERT: 938 return narrowScalarInsert(MI, TypeIdx, NarrowTy); 939 case TargetOpcode::G_LOAD: { 940 auto &LoadMI = cast<GLoad>(MI); 941 Register DstReg = LoadMI.getDstReg(); 942 LLT DstTy = MRI.getType(DstReg); 943 if (DstTy.isVector()) 944 return UnableToLegalize; 945 946 if (8 * LoadMI.getMemSize() != DstTy.getSizeInBits()) { 947 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy); 948 MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO()); 949 MIRBuilder.buildAnyExt(DstReg, TmpReg); 950 LoadMI.eraseFromParent(); 951 return Legalized; 952 } 953 954 return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy); 955 } 956 case TargetOpcode::G_ZEXTLOAD: 957 case TargetOpcode::G_SEXTLOAD: { 958 auto &LoadMI = cast<GExtLoad>(MI); 959 Register DstReg = LoadMI.getDstReg(); 960 Register PtrReg = LoadMI.getPointerReg(); 961 962 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy); 963 auto &MMO = LoadMI.getMMO(); 964 unsigned MemSize = MMO.getSizeInBits(); 965 966 if (MemSize == NarrowSize) { 967 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); 968 } else if (MemSize < NarrowSize) { 969 MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO); 970 } else if (MemSize > NarrowSize) { 971 // FIXME: Need to split the load. 972 return UnableToLegalize; 973 } 974 975 if (isa<GZExtLoad>(LoadMI)) 976 MIRBuilder.buildZExt(DstReg, TmpReg); 977 else 978 MIRBuilder.buildSExt(DstReg, TmpReg); 979 980 LoadMI.eraseFromParent(); 981 return Legalized; 982 } 983 case TargetOpcode::G_STORE: { 984 auto &StoreMI = cast<GStore>(MI); 985 986 Register SrcReg = StoreMI.getValueReg(); 987 LLT SrcTy = MRI.getType(SrcReg); 988 if (SrcTy.isVector()) 989 return UnableToLegalize; 990 991 int NumParts = SizeOp0 / NarrowSize; 992 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits(); 993 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize; 994 if (SrcTy.isVector() && LeftoverBits != 0) 995 return UnableToLegalize; 996 997 if (8 * StoreMI.getMemSize() != SrcTy.getSizeInBits()) { 998 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy); 999 MIRBuilder.buildTrunc(TmpReg, SrcReg); 1000 MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO()); 1001 StoreMI.eraseFromParent(); 1002 return Legalized; 1003 } 1004 1005 return reduceLoadStoreWidth(StoreMI, 0, NarrowTy); 1006 } 1007 case TargetOpcode::G_SELECT: 1008 return narrowScalarSelect(MI, TypeIdx, NarrowTy); 1009 case TargetOpcode::G_AND: 1010 case TargetOpcode::G_OR: 1011 case TargetOpcode::G_XOR: { 1012 // Legalize bitwise operation: 1013 // A = BinOp<Ty> B, C 1014 // into: 1015 // B1, ..., BN = G_UNMERGE_VALUES B 1016 // C1, ..., CN = G_UNMERGE_VALUES C 1017 // A1 = BinOp<Ty/N> B1, C2 1018 // ... 1019 // AN = BinOp<Ty/N> BN, CN 1020 // A = G_MERGE_VALUES A1, ..., AN 1021 return narrowScalarBasic(MI, TypeIdx, NarrowTy); 1022 } 1023 case TargetOpcode::G_SHL: 1024 case TargetOpcode::G_LSHR: 1025 case TargetOpcode::G_ASHR: 1026 return narrowScalarShift(MI, TypeIdx, NarrowTy); 1027 case TargetOpcode::G_CTLZ: 1028 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 1029 case TargetOpcode::G_CTTZ: 1030 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 1031 case TargetOpcode::G_CTPOP: 1032 if (TypeIdx == 1) 1033 switch (MI.getOpcode()) { 1034 case TargetOpcode::G_CTLZ: 1035 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 1036 return narrowScalarCTLZ(MI, TypeIdx, NarrowTy); 1037 case TargetOpcode::G_CTTZ: 1038 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 1039 return narrowScalarCTTZ(MI, TypeIdx, NarrowTy); 1040 case TargetOpcode::G_CTPOP: 1041 return narrowScalarCTPOP(MI, TypeIdx, NarrowTy); 1042 default: 1043 return UnableToLegalize; 1044 } 1045 1046 Observer.changingInstr(MI); 1047 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT); 1048 Observer.changedInstr(MI); 1049 return Legalized; 1050 case TargetOpcode::G_INTTOPTR: 1051 if (TypeIdx != 1) 1052 return UnableToLegalize; 1053 1054 Observer.changingInstr(MI); 1055 narrowScalarSrc(MI, NarrowTy, 1); 1056 Observer.changedInstr(MI); 1057 return Legalized; 1058 case TargetOpcode::G_PTRTOINT: 1059 if (TypeIdx != 0) 1060 return UnableToLegalize; 1061 1062 Observer.changingInstr(MI); 1063 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT); 1064 Observer.changedInstr(MI); 1065 return Legalized; 1066 case TargetOpcode::G_PHI: { 1067 // FIXME: add support for when SizeOp0 isn't an exact multiple of 1068 // NarrowSize. 1069 if (SizeOp0 % NarrowSize != 0) 1070 return UnableToLegalize; 1071 1072 unsigned NumParts = SizeOp0 / NarrowSize; 1073 SmallVector<Register, 2> DstRegs(NumParts); 1074 SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2); 1075 Observer.changingInstr(MI); 1076 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) { 1077 MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB(); 1078 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); 1079 extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts, 1080 SrcRegs[i / 2]); 1081 } 1082 MachineBasicBlock &MBB = *MI.getParent(); 1083 MIRBuilder.setInsertPt(MBB, MI); 1084 for (unsigned i = 0; i < NumParts; ++i) { 1085 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy); 1086 MachineInstrBuilder MIB = 1087 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]); 1088 for (unsigned j = 1; j < MI.getNumOperands(); j += 2) 1089 MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1)); 1090 } 1091 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI()); 1092 MIRBuilder.buildMerge(MI.getOperand(0), DstRegs); 1093 Observer.changedInstr(MI); 1094 MI.eraseFromParent(); 1095 return Legalized; 1096 } 1097 case TargetOpcode::G_EXTRACT_VECTOR_ELT: 1098 case TargetOpcode::G_INSERT_VECTOR_ELT: { 1099 if (TypeIdx != 2) 1100 return UnableToLegalize; 1101 1102 int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3; 1103 Observer.changingInstr(MI); 1104 narrowScalarSrc(MI, NarrowTy, OpIdx); 1105 Observer.changedInstr(MI); 1106 return Legalized; 1107 } 1108 case TargetOpcode::G_ICMP: { 1109 Register LHS = MI.getOperand(2).getReg(); 1110 LLT SrcTy = MRI.getType(LHS); 1111 uint64_t SrcSize = SrcTy.getSizeInBits(); 1112 CmpInst::Predicate Pred = 1113 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); 1114 1115 // TODO: Handle the non-equality case for weird sizes. 1116 if (NarrowSize * 2 != SrcSize && !ICmpInst::isEquality(Pred)) 1117 return UnableToLegalize; 1118 1119 LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover) 1120 SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs; 1121 if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs, 1122 LHSLeftoverRegs)) 1123 return UnableToLegalize; 1124 1125 LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type. 1126 SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs; 1127 if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused, 1128 RHSPartRegs, RHSLeftoverRegs)) 1129 return UnableToLegalize; 1130 1131 // We now have the LHS and RHS of the compare split into narrow-type 1132 // registers, plus potentially some leftover type. 1133 Register Dst = MI.getOperand(0).getReg(); 1134 LLT ResTy = MRI.getType(Dst); 1135 if (ICmpInst::isEquality(Pred)) { 1136 // For each part on the LHS and RHS, keep track of the result of XOR-ing 1137 // them together. For each equal part, the result should be all 0s. For 1138 // each non-equal part, we'll get at least one 1. 1139 auto Zero = MIRBuilder.buildConstant(NarrowTy, 0); 1140 SmallVector<Register, 4> Xors; 1141 for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) { 1142 auto LHS = std::get<0>(LHSAndRHS); 1143 auto RHS = std::get<1>(LHSAndRHS); 1144 auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0); 1145 Xors.push_back(Xor); 1146 } 1147 1148 // Build a G_XOR for each leftover register. Each G_XOR must be widened 1149 // to the desired narrow type so that we can OR them together later. 1150 SmallVector<Register, 4> WidenedXors; 1151 for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) { 1152 auto LHS = std::get<0>(LHSAndRHS); 1153 auto RHS = std::get<1>(LHSAndRHS); 1154 auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0); 1155 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor); 1156 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors, 1157 /* PadStrategy = */ TargetOpcode::G_ZEXT); 1158 Xors.insert(Xors.end(), WidenedXors.begin(), WidenedXors.end()); 1159 } 1160 1161 // Now, for each part we broke up, we know if they are equal/not equal 1162 // based off the G_XOR. We can OR these all together and compare against 1163 // 0 to get the result. 1164 assert(Xors.size() >= 2 && "Should have gotten at least two Xors?"); 1165 auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]); 1166 for (unsigned I = 2, E = Xors.size(); I < E; ++I) 1167 Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]); 1168 MIRBuilder.buildICmp(Pred, Dst, Or, Zero); 1169 } else { 1170 // TODO: Handle non-power-of-two types. 1171 assert(LHSPartRegs.size() == 2 && "Expected exactly 2 LHS part regs?"); 1172 assert(RHSPartRegs.size() == 2 && "Expected exactly 2 RHS part regs?"); 1173 Register LHSL = LHSPartRegs[0]; 1174 Register LHSH = LHSPartRegs[1]; 1175 Register RHSL = RHSPartRegs[0]; 1176 Register RHSH = RHSPartRegs[1]; 1177 MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH); 1178 MachineInstrBuilder CmpHEQ = 1179 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH); 1180 MachineInstrBuilder CmpLU = MIRBuilder.buildICmp( 1181 ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL); 1182 MIRBuilder.buildSelect(Dst, CmpHEQ, CmpLU, CmpH); 1183 } 1184 MI.eraseFromParent(); 1185 return Legalized; 1186 } 1187 case TargetOpcode::G_SEXT_INREG: { 1188 if (TypeIdx != 0) 1189 return UnableToLegalize; 1190 1191 int64_t SizeInBits = MI.getOperand(2).getImm(); 1192 1193 // So long as the new type has more bits than the bits we're extending we 1194 // don't need to break it apart. 1195 if (NarrowTy.getScalarSizeInBits() >= SizeInBits) { 1196 Observer.changingInstr(MI); 1197 // We don't lose any non-extension bits by truncating the src and 1198 // sign-extending the dst. 1199 MachineOperand &MO1 = MI.getOperand(1); 1200 auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1); 1201 MO1.setReg(TruncMIB.getReg(0)); 1202 1203 MachineOperand &MO2 = MI.getOperand(0); 1204 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy); 1205 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 1206 MIRBuilder.buildSExt(MO2, DstExt); 1207 MO2.setReg(DstExt); 1208 Observer.changedInstr(MI); 1209 return Legalized; 1210 } 1211 1212 // Break it apart. Components below the extension point are unmodified. The 1213 // component containing the extension point becomes a narrower SEXT_INREG. 1214 // Components above it are ashr'd from the component containing the 1215 // extension point. 1216 if (SizeOp0 % NarrowSize != 0) 1217 return UnableToLegalize; 1218 int NumParts = SizeOp0 / NarrowSize; 1219 1220 // List the registers where the destination will be scattered. 1221 SmallVector<Register, 2> DstRegs; 1222 // List the registers where the source will be split. 1223 SmallVector<Register, 2> SrcRegs; 1224 1225 // Create all the temporary registers. 1226 for (int i = 0; i < NumParts; ++i) { 1227 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy); 1228 1229 SrcRegs.push_back(SrcReg); 1230 } 1231 1232 // Explode the big arguments into smaller chunks. 1233 MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1)); 1234 1235 Register AshrCstReg = 1236 MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1) 1237 .getReg(0); 1238 Register FullExtensionReg = 0; 1239 Register PartialExtensionReg = 0; 1240 1241 // Do the operation on each small part. 1242 for (int i = 0; i < NumParts; ++i) { 1243 if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits) 1244 DstRegs.push_back(SrcRegs[i]); 1245 else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) { 1246 assert(PartialExtensionReg && 1247 "Expected to visit partial extension before full"); 1248 if (FullExtensionReg) { 1249 DstRegs.push_back(FullExtensionReg); 1250 continue; 1251 } 1252 DstRegs.push_back( 1253 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg) 1254 .getReg(0)); 1255 FullExtensionReg = DstRegs.back(); 1256 } else { 1257 DstRegs.push_back( 1258 MIRBuilder 1259 .buildInstr( 1260 TargetOpcode::G_SEXT_INREG, {NarrowTy}, 1261 {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()}) 1262 .getReg(0)); 1263 PartialExtensionReg = DstRegs.back(); 1264 } 1265 } 1266 1267 // Gather the destination registers into the final destination. 1268 Register DstReg = MI.getOperand(0).getReg(); 1269 MIRBuilder.buildMerge(DstReg, DstRegs); 1270 MI.eraseFromParent(); 1271 return Legalized; 1272 } 1273 case TargetOpcode::G_BSWAP: 1274 case TargetOpcode::G_BITREVERSE: { 1275 if (SizeOp0 % NarrowSize != 0) 1276 return UnableToLegalize; 1277 1278 Observer.changingInstr(MI); 1279 SmallVector<Register, 2> SrcRegs, DstRegs; 1280 unsigned NumParts = SizeOp0 / NarrowSize; 1281 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); 1282 1283 for (unsigned i = 0; i < NumParts; ++i) { 1284 auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, 1285 {SrcRegs[NumParts - 1 - i]}); 1286 DstRegs.push_back(DstPart.getReg(0)); 1287 } 1288 1289 MIRBuilder.buildMerge(MI.getOperand(0), DstRegs); 1290 1291 Observer.changedInstr(MI); 1292 MI.eraseFromParent(); 1293 return Legalized; 1294 } 1295 case TargetOpcode::G_PTR_ADD: 1296 case TargetOpcode::G_PTRMASK: { 1297 if (TypeIdx != 1) 1298 return UnableToLegalize; 1299 Observer.changingInstr(MI); 1300 narrowScalarSrc(MI, NarrowTy, 2); 1301 Observer.changedInstr(MI); 1302 return Legalized; 1303 } 1304 case TargetOpcode::G_FPTOUI: 1305 case TargetOpcode::G_FPTOSI: 1306 return narrowScalarFPTOI(MI, TypeIdx, NarrowTy); 1307 case TargetOpcode::G_FPEXT: 1308 if (TypeIdx != 0) 1309 return UnableToLegalize; 1310 Observer.changingInstr(MI); 1311 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT); 1312 Observer.changedInstr(MI); 1313 return Legalized; 1314 } 1315 } 1316 1317 Register LegalizerHelper::coerceToScalar(Register Val) { 1318 LLT Ty = MRI.getType(Val); 1319 if (Ty.isScalar()) 1320 return Val; 1321 1322 const DataLayout &DL = MIRBuilder.getDataLayout(); 1323 LLT NewTy = LLT::scalar(Ty.getSizeInBits()); 1324 if (Ty.isPointer()) { 1325 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace())) 1326 return Register(); 1327 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0); 1328 } 1329 1330 Register NewVal = Val; 1331 1332 assert(Ty.isVector()); 1333 LLT EltTy = Ty.getElementType(); 1334 if (EltTy.isPointer()) 1335 NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0); 1336 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0); 1337 } 1338 1339 void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy, 1340 unsigned OpIdx, unsigned ExtOpcode) { 1341 MachineOperand &MO = MI.getOperand(OpIdx); 1342 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO}); 1343 MO.setReg(ExtB.getReg(0)); 1344 } 1345 1346 void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, 1347 unsigned OpIdx) { 1348 MachineOperand &MO = MI.getOperand(OpIdx); 1349 auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO); 1350 MO.setReg(ExtB.getReg(0)); 1351 } 1352 1353 void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy, 1354 unsigned OpIdx, unsigned TruncOpcode) { 1355 MachineOperand &MO = MI.getOperand(OpIdx); 1356 Register DstExt = MRI.createGenericVirtualRegister(WideTy); 1357 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 1358 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt}); 1359 MO.setReg(DstExt); 1360 } 1361 1362 void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy, 1363 unsigned OpIdx, unsigned ExtOpcode) { 1364 MachineOperand &MO = MI.getOperand(OpIdx); 1365 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy); 1366 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 1367 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc}); 1368 MO.setReg(DstTrunc); 1369 } 1370 1371 void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy, 1372 unsigned OpIdx) { 1373 MachineOperand &MO = MI.getOperand(OpIdx); 1374 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 1375 MO.setReg(widenWithUnmerge(WideTy, MO.getReg())); 1376 } 1377 1378 void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, 1379 unsigned OpIdx) { 1380 MachineOperand &MO = MI.getOperand(OpIdx); 1381 1382 LLT OldTy = MRI.getType(MO.getReg()); 1383 unsigned OldElts = OldTy.getNumElements(); 1384 unsigned NewElts = MoreTy.getNumElements(); 1385 1386 unsigned NumParts = NewElts / OldElts; 1387 1388 // Use concat_vectors if the result is a multiple of the number of elements. 1389 if (NumParts * OldElts == NewElts) { 1390 SmallVector<Register, 8> Parts; 1391 Parts.push_back(MO.getReg()); 1392 1393 Register ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0); 1394 for (unsigned I = 1; I != NumParts; ++I) 1395 Parts.push_back(ImpDef); 1396 1397 auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts); 1398 MO.setReg(Concat.getReg(0)); 1399 return; 1400 } 1401 1402 Register MoreReg = MRI.createGenericVirtualRegister(MoreTy); 1403 Register ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0); 1404 MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0); 1405 MO.setReg(MoreReg); 1406 } 1407 1408 void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) { 1409 MachineOperand &Op = MI.getOperand(OpIdx); 1410 Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0)); 1411 } 1412 1413 void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) { 1414 MachineOperand &MO = MI.getOperand(OpIdx); 1415 Register CastDst = MRI.createGenericVirtualRegister(CastTy); 1416 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 1417 MIRBuilder.buildBitcast(MO, CastDst); 1418 MO.setReg(CastDst); 1419 } 1420 1421 LegalizerHelper::LegalizeResult 1422 LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, 1423 LLT WideTy) { 1424 if (TypeIdx != 1) 1425 return UnableToLegalize; 1426 1427 Register DstReg = MI.getOperand(0).getReg(); 1428 LLT DstTy = MRI.getType(DstReg); 1429 if (DstTy.isVector()) 1430 return UnableToLegalize; 1431 1432 Register Src1 = MI.getOperand(1).getReg(); 1433 LLT SrcTy = MRI.getType(Src1); 1434 const int DstSize = DstTy.getSizeInBits(); 1435 const int SrcSize = SrcTy.getSizeInBits(); 1436 const int WideSize = WideTy.getSizeInBits(); 1437 const int NumMerge = (DstSize + WideSize - 1) / WideSize; 1438 1439 unsigned NumOps = MI.getNumOperands(); 1440 unsigned NumSrc = MI.getNumOperands() - 1; 1441 unsigned PartSize = DstTy.getSizeInBits() / NumSrc; 1442 1443 if (WideSize >= DstSize) { 1444 // Directly pack the bits in the target type. 1445 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0); 1446 1447 for (unsigned I = 2; I != NumOps; ++I) { 1448 const unsigned Offset = (I - 1) * PartSize; 1449 1450 Register SrcReg = MI.getOperand(I).getReg(); 1451 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize)); 1452 1453 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg); 1454 1455 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg : 1456 MRI.createGenericVirtualRegister(WideTy); 1457 1458 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset); 1459 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt); 1460 MIRBuilder.buildOr(NextResult, ResultReg, Shl); 1461 ResultReg = NextResult; 1462 } 1463 1464 if (WideSize > DstSize) 1465 MIRBuilder.buildTrunc(DstReg, ResultReg); 1466 else if (DstTy.isPointer()) 1467 MIRBuilder.buildIntToPtr(DstReg, ResultReg); 1468 1469 MI.eraseFromParent(); 1470 return Legalized; 1471 } 1472 1473 // Unmerge the original values to the GCD type, and recombine to the next 1474 // multiple greater than the original type. 1475 // 1476 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6 1477 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0 1478 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1 1479 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2 1480 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6 1481 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9 1482 // %12:_(s12) = G_MERGE_VALUES %10, %11 1483 // 1484 // Padding with undef if necessary: 1485 // 1486 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6 1487 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0 1488 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1 1489 // %7:_(s2) = G_IMPLICIT_DEF 1490 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5 1491 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7 1492 // %10:_(s12) = G_MERGE_VALUES %8, %9 1493 1494 const int GCD = greatestCommonDivisor(SrcSize, WideSize); 1495 LLT GCDTy = LLT::scalar(GCD); 1496 1497 SmallVector<Register, 8> Parts; 1498 SmallVector<Register, 8> NewMergeRegs; 1499 SmallVector<Register, 8> Unmerges; 1500 LLT WideDstTy = LLT::scalar(NumMerge * WideSize); 1501 1502 // Decompose the original operands if they don't evenly divide. 1503 for (int I = 1, E = MI.getNumOperands(); I != E; ++I) { 1504 Register SrcReg = MI.getOperand(I).getReg(); 1505 if (GCD == SrcSize) { 1506 Unmerges.push_back(SrcReg); 1507 } else { 1508 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg); 1509 for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J) 1510 Unmerges.push_back(Unmerge.getReg(J)); 1511 } 1512 } 1513 1514 // Pad with undef to the next size that is a multiple of the requested size. 1515 if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) { 1516 Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0); 1517 for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I) 1518 Unmerges.push_back(UndefReg); 1519 } 1520 1521 const int PartsPerGCD = WideSize / GCD; 1522 1523 // Build merges of each piece. 1524 ArrayRef<Register> Slicer(Unmerges); 1525 for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) { 1526 auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD)); 1527 NewMergeRegs.push_back(Merge.getReg(0)); 1528 } 1529 1530 // A truncate may be necessary if the requested type doesn't evenly divide the 1531 // original result type. 1532 if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) { 1533 MIRBuilder.buildMerge(DstReg, NewMergeRegs); 1534 } else { 1535 auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs); 1536 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0)); 1537 } 1538 1539 MI.eraseFromParent(); 1540 return Legalized; 1541 } 1542 1543 Register LegalizerHelper::widenWithUnmerge(LLT WideTy, Register OrigReg) { 1544 Register WideReg = MRI.createGenericVirtualRegister(WideTy); 1545 LLT OrigTy = MRI.getType(OrigReg); 1546 LLT LCMTy = getLCMType(WideTy, OrigTy); 1547 1548 const int NumMergeParts = LCMTy.getSizeInBits() / WideTy.getSizeInBits(); 1549 const int NumUnmergeParts = LCMTy.getSizeInBits() / OrigTy.getSizeInBits(); 1550 1551 Register UnmergeSrc = WideReg; 1552 1553 // Create a merge to the LCM type, padding with undef 1554 // %0:_(<3 x s32>) = G_FOO => <4 x s32> 1555 // => 1556 // %1:_(<4 x s32>) = G_FOO 1557 // %2:_(<4 x s32>) = G_IMPLICIT_DEF 1558 // %3:_(<12 x s32>) = G_CONCAT_VECTORS %1, %2, %2 1559 // %0:_(<3 x s32>), %4:_, %5:_, %6:_ = G_UNMERGE_VALUES %3 1560 if (NumMergeParts > 1) { 1561 Register Undef = MIRBuilder.buildUndef(WideTy).getReg(0); 1562 SmallVector<Register, 8> MergeParts(NumMergeParts, Undef); 1563 MergeParts[0] = WideReg; 1564 UnmergeSrc = MIRBuilder.buildMerge(LCMTy, MergeParts).getReg(0); 1565 } 1566 1567 // Unmerge to the original register and pad with dead defs. 1568 SmallVector<Register, 8> UnmergeResults(NumUnmergeParts); 1569 UnmergeResults[0] = OrigReg; 1570 for (int I = 1; I != NumUnmergeParts; ++I) 1571 UnmergeResults[I] = MRI.createGenericVirtualRegister(OrigTy); 1572 1573 MIRBuilder.buildUnmerge(UnmergeResults, UnmergeSrc); 1574 return WideReg; 1575 } 1576 1577 LegalizerHelper::LegalizeResult 1578 LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx, 1579 LLT WideTy) { 1580 if (TypeIdx != 0) 1581 return UnableToLegalize; 1582 1583 int NumDst = MI.getNumOperands() - 1; 1584 Register SrcReg = MI.getOperand(NumDst).getReg(); 1585 LLT SrcTy = MRI.getType(SrcReg); 1586 if (SrcTy.isVector()) 1587 return UnableToLegalize; 1588 1589 Register Dst0Reg = MI.getOperand(0).getReg(); 1590 LLT DstTy = MRI.getType(Dst0Reg); 1591 if (!DstTy.isScalar()) 1592 return UnableToLegalize; 1593 1594 if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) { 1595 if (SrcTy.isPointer()) { 1596 const DataLayout &DL = MIRBuilder.getDataLayout(); 1597 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) { 1598 LLVM_DEBUG( 1599 dbgs() << "Not casting non-integral address space integer\n"); 1600 return UnableToLegalize; 1601 } 1602 1603 SrcTy = LLT::scalar(SrcTy.getSizeInBits()); 1604 SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0); 1605 } 1606 1607 // Widen SrcTy to WideTy. This does not affect the result, but since the 1608 // user requested this size, it is probably better handled than SrcTy and 1609 // should reduce the total number of legalization artifacts 1610 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) { 1611 SrcTy = WideTy; 1612 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0); 1613 } 1614 1615 // Theres no unmerge type to target. Directly extract the bits from the 1616 // source type 1617 unsigned DstSize = DstTy.getSizeInBits(); 1618 1619 MIRBuilder.buildTrunc(Dst0Reg, SrcReg); 1620 for (int I = 1; I != NumDst; ++I) { 1621 auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I); 1622 auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt); 1623 MIRBuilder.buildTrunc(MI.getOperand(I), Shr); 1624 } 1625 1626 MI.eraseFromParent(); 1627 return Legalized; 1628 } 1629 1630 // Extend the source to a wider type. 1631 LLT LCMTy = getLCMType(SrcTy, WideTy); 1632 1633 Register WideSrc = SrcReg; 1634 if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) { 1635 // TODO: If this is an integral address space, cast to integer and anyext. 1636 if (SrcTy.isPointer()) { 1637 LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n"); 1638 return UnableToLegalize; 1639 } 1640 1641 WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0); 1642 } 1643 1644 auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc); 1645 1646 // Create a sequence of unmerges and merges to the original results. Since we 1647 // may have widened the source, we will need to pad the results with dead defs 1648 // to cover the source register. 1649 // e.g. widen s48 to s64: 1650 // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96) 1651 // 1652 // => 1653 // %4:_(s192) = G_ANYEXT %0:_(s96) 1654 // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge 1655 // ; unpack to GCD type, with extra dead defs 1656 // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64) 1657 // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64) 1658 // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64) 1659 // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination 1660 // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination 1661 const LLT GCDTy = getGCDType(WideTy, DstTy); 1662 const int NumUnmerge = Unmerge->getNumOperands() - 1; 1663 const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits(); 1664 1665 // Directly unmerge to the destination without going through a GCD type 1666 // if possible 1667 if (PartsPerRemerge == 1) { 1668 const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits(); 1669 1670 for (int I = 0; I != NumUnmerge; ++I) { 1671 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES); 1672 1673 for (int J = 0; J != PartsPerUnmerge; ++J) { 1674 int Idx = I * PartsPerUnmerge + J; 1675 if (Idx < NumDst) 1676 MIB.addDef(MI.getOperand(Idx).getReg()); 1677 else { 1678 // Create dead def for excess components. 1679 MIB.addDef(MRI.createGenericVirtualRegister(DstTy)); 1680 } 1681 } 1682 1683 MIB.addUse(Unmerge.getReg(I)); 1684 } 1685 } else { 1686 SmallVector<Register, 16> Parts; 1687 for (int J = 0; J != NumUnmerge; ++J) 1688 extractGCDType(Parts, GCDTy, Unmerge.getReg(J)); 1689 1690 SmallVector<Register, 8> RemergeParts; 1691 for (int I = 0; I != NumDst; ++I) { 1692 for (int J = 0; J < PartsPerRemerge; ++J) { 1693 const int Idx = I * PartsPerRemerge + J; 1694 RemergeParts.emplace_back(Parts[Idx]); 1695 } 1696 1697 MIRBuilder.buildMerge(MI.getOperand(I).getReg(), RemergeParts); 1698 RemergeParts.clear(); 1699 } 1700 } 1701 1702 MI.eraseFromParent(); 1703 return Legalized; 1704 } 1705 1706 LegalizerHelper::LegalizeResult 1707 LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx, 1708 LLT WideTy) { 1709 Register DstReg = MI.getOperand(0).getReg(); 1710 Register SrcReg = MI.getOperand(1).getReg(); 1711 LLT SrcTy = MRI.getType(SrcReg); 1712 1713 LLT DstTy = MRI.getType(DstReg); 1714 unsigned Offset = MI.getOperand(2).getImm(); 1715 1716 if (TypeIdx == 0) { 1717 if (SrcTy.isVector() || DstTy.isVector()) 1718 return UnableToLegalize; 1719 1720 SrcOp Src(SrcReg); 1721 if (SrcTy.isPointer()) { 1722 // Extracts from pointers can be handled only if they are really just 1723 // simple integers. 1724 const DataLayout &DL = MIRBuilder.getDataLayout(); 1725 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) 1726 return UnableToLegalize; 1727 1728 LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits()); 1729 Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src); 1730 SrcTy = SrcAsIntTy; 1731 } 1732 1733 if (DstTy.isPointer()) 1734 return UnableToLegalize; 1735 1736 if (Offset == 0) { 1737 // Avoid a shift in the degenerate case. 1738 MIRBuilder.buildTrunc(DstReg, 1739 MIRBuilder.buildAnyExtOrTrunc(WideTy, Src)); 1740 MI.eraseFromParent(); 1741 return Legalized; 1742 } 1743 1744 // Do a shift in the source type. 1745 LLT ShiftTy = SrcTy; 1746 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) { 1747 Src = MIRBuilder.buildAnyExt(WideTy, Src); 1748 ShiftTy = WideTy; 1749 } 1750 1751 auto LShr = MIRBuilder.buildLShr( 1752 ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset)); 1753 MIRBuilder.buildTrunc(DstReg, LShr); 1754 MI.eraseFromParent(); 1755 return Legalized; 1756 } 1757 1758 if (SrcTy.isScalar()) { 1759 Observer.changingInstr(MI); 1760 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1761 Observer.changedInstr(MI); 1762 return Legalized; 1763 } 1764 1765 if (!SrcTy.isVector()) 1766 return UnableToLegalize; 1767 1768 if (DstTy != SrcTy.getElementType()) 1769 return UnableToLegalize; 1770 1771 if (Offset % SrcTy.getScalarSizeInBits() != 0) 1772 return UnableToLegalize; 1773 1774 Observer.changingInstr(MI); 1775 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1776 1777 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) * 1778 Offset); 1779 widenScalarDst(MI, WideTy.getScalarType(), 0); 1780 Observer.changedInstr(MI); 1781 return Legalized; 1782 } 1783 1784 LegalizerHelper::LegalizeResult 1785 LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx, 1786 LLT WideTy) { 1787 if (TypeIdx != 0 || WideTy.isVector()) 1788 return UnableToLegalize; 1789 Observer.changingInstr(MI); 1790 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1791 widenScalarDst(MI, WideTy); 1792 Observer.changedInstr(MI); 1793 return Legalized; 1794 } 1795 1796 LegalizerHelper::LegalizeResult 1797 LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx, 1798 LLT WideTy) { 1799 if (TypeIdx == 1) 1800 return UnableToLegalize; // TODO 1801 1802 unsigned Opcode; 1803 unsigned ExtOpcode; 1804 Optional<Register> CarryIn = None; 1805 switch (MI.getOpcode()) { 1806 default: 1807 llvm_unreachable("Unexpected opcode!"); 1808 case TargetOpcode::G_SADDO: 1809 Opcode = TargetOpcode::G_ADD; 1810 ExtOpcode = TargetOpcode::G_SEXT; 1811 break; 1812 case TargetOpcode::G_SSUBO: 1813 Opcode = TargetOpcode::G_SUB; 1814 ExtOpcode = TargetOpcode::G_SEXT; 1815 break; 1816 case TargetOpcode::G_UADDO: 1817 Opcode = TargetOpcode::G_ADD; 1818 ExtOpcode = TargetOpcode::G_ZEXT; 1819 break; 1820 case TargetOpcode::G_USUBO: 1821 Opcode = TargetOpcode::G_SUB; 1822 ExtOpcode = TargetOpcode::G_ZEXT; 1823 break; 1824 case TargetOpcode::G_SADDE: 1825 Opcode = TargetOpcode::G_UADDE; 1826 ExtOpcode = TargetOpcode::G_SEXT; 1827 CarryIn = MI.getOperand(4).getReg(); 1828 break; 1829 case TargetOpcode::G_SSUBE: 1830 Opcode = TargetOpcode::G_USUBE; 1831 ExtOpcode = TargetOpcode::G_SEXT; 1832 CarryIn = MI.getOperand(4).getReg(); 1833 break; 1834 case TargetOpcode::G_UADDE: 1835 Opcode = TargetOpcode::G_UADDE; 1836 ExtOpcode = TargetOpcode::G_ZEXT; 1837 CarryIn = MI.getOperand(4).getReg(); 1838 break; 1839 case TargetOpcode::G_USUBE: 1840 Opcode = TargetOpcode::G_USUBE; 1841 ExtOpcode = TargetOpcode::G_ZEXT; 1842 CarryIn = MI.getOperand(4).getReg(); 1843 break; 1844 } 1845 1846 auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)}); 1847 auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)}); 1848 // Do the arithmetic in the larger type. 1849 Register NewOp; 1850 if (CarryIn) { 1851 LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg()); 1852 NewOp = MIRBuilder 1853 .buildInstr(Opcode, {WideTy, CarryOutTy}, 1854 {LHSExt, RHSExt, *CarryIn}) 1855 .getReg(0); 1856 } else { 1857 NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0); 1858 } 1859 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg()); 1860 auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp); 1861 auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp}); 1862 // There is no overflow if the ExtOp is the same as NewOp. 1863 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp); 1864 // Now trunc the NewOp to the original result. 1865 MIRBuilder.buildTrunc(MI.getOperand(0), NewOp); 1866 MI.eraseFromParent(); 1867 return Legalized; 1868 } 1869 1870 LegalizerHelper::LegalizeResult 1871 LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx, 1872 LLT WideTy) { 1873 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT || 1874 MI.getOpcode() == TargetOpcode::G_SSUBSAT || 1875 MI.getOpcode() == TargetOpcode::G_SSHLSAT; 1876 bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT || 1877 MI.getOpcode() == TargetOpcode::G_USHLSAT; 1878 // We can convert this to: 1879 // 1. Any extend iN to iM 1880 // 2. SHL by M-N 1881 // 3. [US][ADD|SUB|SHL]SAT 1882 // 4. L/ASHR by M-N 1883 // 1884 // It may be more efficient to lower this to a min and a max operation in 1885 // the higher precision arithmetic if the promoted operation isn't legal, 1886 // but this decision is up to the target's lowering request. 1887 Register DstReg = MI.getOperand(0).getReg(); 1888 1889 unsigned NewBits = WideTy.getScalarSizeInBits(); 1890 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits(); 1891 1892 // Shifts must zero-extend the RHS to preserve the unsigned quantity, and 1893 // must not left shift the RHS to preserve the shift amount. 1894 auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1)); 1895 auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2)) 1896 : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2)); 1897 auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount); 1898 auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK); 1899 auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK); 1900 1901 auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, 1902 {ShiftL, ShiftR}, MI.getFlags()); 1903 1904 // Use a shift that will preserve the number of sign bits when the trunc is 1905 // folded away. 1906 auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK) 1907 : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK); 1908 1909 MIRBuilder.buildTrunc(DstReg, Result); 1910 MI.eraseFromParent(); 1911 return Legalized; 1912 } 1913 1914 LegalizerHelper::LegalizeResult 1915 LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx, 1916 LLT WideTy) { 1917 if (TypeIdx == 1) 1918 return UnableToLegalize; 1919 1920 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO; 1921 Register Result = MI.getOperand(0).getReg(); 1922 Register OriginalOverflow = MI.getOperand(1).getReg(); 1923 Register LHS = MI.getOperand(2).getReg(); 1924 Register RHS = MI.getOperand(3).getReg(); 1925 LLT SrcTy = MRI.getType(LHS); 1926 LLT OverflowTy = MRI.getType(OriginalOverflow); 1927 unsigned SrcBitWidth = SrcTy.getScalarSizeInBits(); 1928 1929 // To determine if the result overflowed in the larger type, we extend the 1930 // input to the larger type, do the multiply (checking if it overflows), 1931 // then also check the high bits of the result to see if overflow happened 1932 // there. 1933 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT; 1934 auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS}); 1935 auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS}); 1936 1937 auto Mulo = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy, OverflowTy}, 1938 {LeftOperand, RightOperand}); 1939 auto Mul = Mulo->getOperand(0); 1940 MIRBuilder.buildTrunc(Result, Mul); 1941 1942 MachineInstrBuilder ExtResult; 1943 // Overflow occurred if it occurred in the larger type, or if the high part 1944 // of the result does not zero/sign-extend the low part. Check this second 1945 // possibility first. 1946 if (IsSigned) { 1947 // For signed, overflow occurred when the high part does not sign-extend 1948 // the low part. 1949 ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth); 1950 } else { 1951 // Unsigned overflow occurred when the high part does not zero-extend the 1952 // low part. 1953 ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth); 1954 } 1955 1956 // Multiplication cannot overflow if the WideTy is >= 2 * original width, 1957 // so we don't need to check the overflow result of larger type Mulo. 1958 if (WideTy.getScalarSizeInBits() < 2 * SrcBitWidth) { 1959 auto Overflow = 1960 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult); 1961 // Finally check if the multiplication in the larger type itself overflowed. 1962 MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow); 1963 } else { 1964 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult); 1965 } 1966 MI.eraseFromParent(); 1967 return Legalized; 1968 } 1969 1970 LegalizerHelper::LegalizeResult 1971 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { 1972 switch (MI.getOpcode()) { 1973 default: 1974 return UnableToLegalize; 1975 case TargetOpcode::G_ATOMICRMW_XCHG: 1976 case TargetOpcode::G_ATOMICRMW_ADD: 1977 case TargetOpcode::G_ATOMICRMW_SUB: 1978 case TargetOpcode::G_ATOMICRMW_AND: 1979 case TargetOpcode::G_ATOMICRMW_OR: 1980 case TargetOpcode::G_ATOMICRMW_XOR: 1981 case TargetOpcode::G_ATOMICRMW_MIN: 1982 case TargetOpcode::G_ATOMICRMW_MAX: 1983 case TargetOpcode::G_ATOMICRMW_UMIN: 1984 case TargetOpcode::G_ATOMICRMW_UMAX: 1985 assert(TypeIdx == 0 && "atomicrmw with second scalar type"); 1986 Observer.changingInstr(MI); 1987 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); 1988 widenScalarDst(MI, WideTy, 0); 1989 Observer.changedInstr(MI); 1990 return Legalized; 1991 case TargetOpcode::G_ATOMIC_CMPXCHG: 1992 assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type"); 1993 Observer.changingInstr(MI); 1994 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); 1995 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT); 1996 widenScalarDst(MI, WideTy, 0); 1997 Observer.changedInstr(MI); 1998 return Legalized; 1999 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: 2000 if (TypeIdx == 0) { 2001 Observer.changingInstr(MI); 2002 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT); 2003 widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT); 2004 widenScalarDst(MI, WideTy, 0); 2005 Observer.changedInstr(MI); 2006 return Legalized; 2007 } 2008 assert(TypeIdx == 1 && 2009 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type"); 2010 Observer.changingInstr(MI); 2011 widenScalarDst(MI, WideTy, 1); 2012 Observer.changedInstr(MI); 2013 return Legalized; 2014 case TargetOpcode::G_EXTRACT: 2015 return widenScalarExtract(MI, TypeIdx, WideTy); 2016 case TargetOpcode::G_INSERT: 2017 return widenScalarInsert(MI, TypeIdx, WideTy); 2018 case TargetOpcode::G_MERGE_VALUES: 2019 return widenScalarMergeValues(MI, TypeIdx, WideTy); 2020 case TargetOpcode::G_UNMERGE_VALUES: 2021 return widenScalarUnmergeValues(MI, TypeIdx, WideTy); 2022 case TargetOpcode::G_SADDO: 2023 case TargetOpcode::G_SSUBO: 2024 case TargetOpcode::G_UADDO: 2025 case TargetOpcode::G_USUBO: 2026 case TargetOpcode::G_SADDE: 2027 case TargetOpcode::G_SSUBE: 2028 case TargetOpcode::G_UADDE: 2029 case TargetOpcode::G_USUBE: 2030 return widenScalarAddSubOverflow(MI, TypeIdx, WideTy); 2031 case TargetOpcode::G_UMULO: 2032 case TargetOpcode::G_SMULO: 2033 return widenScalarMulo(MI, TypeIdx, WideTy); 2034 case TargetOpcode::G_SADDSAT: 2035 case TargetOpcode::G_SSUBSAT: 2036 case TargetOpcode::G_SSHLSAT: 2037 case TargetOpcode::G_UADDSAT: 2038 case TargetOpcode::G_USUBSAT: 2039 case TargetOpcode::G_USHLSAT: 2040 return widenScalarAddSubShlSat(MI, TypeIdx, WideTy); 2041 case TargetOpcode::G_CTTZ: 2042 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 2043 case TargetOpcode::G_CTLZ: 2044 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 2045 case TargetOpcode::G_CTPOP: { 2046 if (TypeIdx == 0) { 2047 Observer.changingInstr(MI); 2048 widenScalarDst(MI, WideTy, 0); 2049 Observer.changedInstr(MI); 2050 return Legalized; 2051 } 2052 2053 Register SrcReg = MI.getOperand(1).getReg(); 2054 2055 // First extend the input. 2056 unsigned ExtOpc = MI.getOpcode() == TargetOpcode::G_CTTZ || 2057 MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF 2058 ? TargetOpcode::G_ANYEXT 2059 : TargetOpcode::G_ZEXT; 2060 auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg}); 2061 LLT CurTy = MRI.getType(SrcReg); 2062 unsigned NewOpc = MI.getOpcode(); 2063 if (NewOpc == TargetOpcode::G_CTTZ) { 2064 // The count is the same in the larger type except if the original 2065 // value was zero. This can be handled by setting the bit just off 2066 // the top of the original type. 2067 auto TopBit = 2068 APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits()); 2069 MIBSrc = MIRBuilder.buildOr( 2070 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit)); 2071 // Now we know the operand is non-zero, use the more relaxed opcode. 2072 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF; 2073 } 2074 2075 // Perform the operation at the larger size. 2076 auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc}); 2077 // This is already the correct result for CTPOP and CTTZs 2078 if (MI.getOpcode() == TargetOpcode::G_CTLZ || 2079 MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) { 2080 // The correct result is NewOp - (Difference in widety and current ty). 2081 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits(); 2082 MIBNewOp = MIRBuilder.buildSub( 2083 WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)); 2084 } 2085 2086 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp); 2087 MI.eraseFromParent(); 2088 return Legalized; 2089 } 2090 case TargetOpcode::G_BSWAP: { 2091 Observer.changingInstr(MI); 2092 Register DstReg = MI.getOperand(0).getReg(); 2093 2094 Register ShrReg = MRI.createGenericVirtualRegister(WideTy); 2095 Register DstExt = MRI.createGenericVirtualRegister(WideTy); 2096 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy); 2097 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 2098 2099 MI.getOperand(0).setReg(DstExt); 2100 2101 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 2102 2103 LLT Ty = MRI.getType(DstReg); 2104 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits(); 2105 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits); 2106 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg); 2107 2108 MIRBuilder.buildTrunc(DstReg, ShrReg); 2109 Observer.changedInstr(MI); 2110 return Legalized; 2111 } 2112 case TargetOpcode::G_BITREVERSE: { 2113 Observer.changingInstr(MI); 2114 2115 Register DstReg = MI.getOperand(0).getReg(); 2116 LLT Ty = MRI.getType(DstReg); 2117 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits(); 2118 2119 Register DstExt = MRI.createGenericVirtualRegister(WideTy); 2120 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 2121 MI.getOperand(0).setReg(DstExt); 2122 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 2123 2124 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits); 2125 auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt); 2126 MIRBuilder.buildTrunc(DstReg, Shift); 2127 Observer.changedInstr(MI); 2128 return Legalized; 2129 } 2130 case TargetOpcode::G_FREEZE: 2131 Observer.changingInstr(MI); 2132 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 2133 widenScalarDst(MI, WideTy); 2134 Observer.changedInstr(MI); 2135 return Legalized; 2136 2137 case TargetOpcode::G_ABS: 2138 Observer.changingInstr(MI); 2139 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); 2140 widenScalarDst(MI, WideTy); 2141 Observer.changedInstr(MI); 2142 return Legalized; 2143 2144 case TargetOpcode::G_ADD: 2145 case TargetOpcode::G_AND: 2146 case TargetOpcode::G_MUL: 2147 case TargetOpcode::G_OR: 2148 case TargetOpcode::G_XOR: 2149 case TargetOpcode::G_SUB: 2150 // Perform operation at larger width (any extension is fines here, high bits 2151 // don't affect the result) and then truncate the result back to the 2152 // original type. 2153 Observer.changingInstr(MI); 2154 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 2155 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); 2156 widenScalarDst(MI, WideTy); 2157 Observer.changedInstr(MI); 2158 return Legalized; 2159 2160 case TargetOpcode::G_SBFX: 2161 case TargetOpcode::G_UBFX: 2162 Observer.changingInstr(MI); 2163 2164 if (TypeIdx == 0) { 2165 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 2166 widenScalarDst(MI, WideTy); 2167 } else { 2168 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 2169 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT); 2170 } 2171 2172 Observer.changedInstr(MI); 2173 return Legalized; 2174 2175 case TargetOpcode::G_SHL: 2176 Observer.changingInstr(MI); 2177 2178 if (TypeIdx == 0) { 2179 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 2180 widenScalarDst(MI, WideTy); 2181 } else { 2182 assert(TypeIdx == 1); 2183 // The "number of bits to shift" operand must preserve its value as an 2184 // unsigned integer: 2185 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 2186 } 2187 2188 Observer.changedInstr(MI); 2189 return Legalized; 2190 2191 case TargetOpcode::G_SDIV: 2192 case TargetOpcode::G_SREM: 2193 case TargetOpcode::G_SMIN: 2194 case TargetOpcode::G_SMAX: 2195 Observer.changingInstr(MI); 2196 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); 2197 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 2198 widenScalarDst(MI, WideTy); 2199 Observer.changedInstr(MI); 2200 return Legalized; 2201 2202 case TargetOpcode::G_SDIVREM: 2203 Observer.changingInstr(MI); 2204 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 2205 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT); 2206 widenScalarDst(MI, WideTy); 2207 widenScalarDst(MI, WideTy, 1); 2208 Observer.changedInstr(MI); 2209 return Legalized; 2210 2211 case TargetOpcode::G_ASHR: 2212 case TargetOpcode::G_LSHR: 2213 Observer.changingInstr(MI); 2214 2215 if (TypeIdx == 0) { 2216 unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ? 2217 TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT; 2218 2219 widenScalarSrc(MI, WideTy, 1, CvtOp); 2220 widenScalarDst(MI, WideTy); 2221 } else { 2222 assert(TypeIdx == 1); 2223 // The "number of bits to shift" operand must preserve its value as an 2224 // unsigned integer: 2225 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 2226 } 2227 2228 Observer.changedInstr(MI); 2229 return Legalized; 2230 case TargetOpcode::G_UDIV: 2231 case TargetOpcode::G_UREM: 2232 case TargetOpcode::G_UMIN: 2233 case TargetOpcode::G_UMAX: 2234 Observer.changingInstr(MI); 2235 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 2236 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 2237 widenScalarDst(MI, WideTy); 2238 Observer.changedInstr(MI); 2239 return Legalized; 2240 2241 case TargetOpcode::G_UDIVREM: 2242 Observer.changingInstr(MI); 2243 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 2244 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT); 2245 widenScalarDst(MI, WideTy); 2246 widenScalarDst(MI, WideTy, 1); 2247 Observer.changedInstr(MI); 2248 return Legalized; 2249 2250 case TargetOpcode::G_SELECT: 2251 Observer.changingInstr(MI); 2252 if (TypeIdx == 0) { 2253 // Perform operation at larger width (any extension is fine here, high 2254 // bits don't affect the result) and then truncate the result back to the 2255 // original type. 2256 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); 2257 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT); 2258 widenScalarDst(MI, WideTy); 2259 } else { 2260 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector(); 2261 // Explicit extension is required here since high bits affect the result. 2262 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false)); 2263 } 2264 Observer.changedInstr(MI); 2265 return Legalized; 2266 2267 case TargetOpcode::G_FPTOSI: 2268 case TargetOpcode::G_FPTOUI: 2269 Observer.changingInstr(MI); 2270 2271 if (TypeIdx == 0) 2272 widenScalarDst(MI, WideTy); 2273 else 2274 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT); 2275 2276 Observer.changedInstr(MI); 2277 return Legalized; 2278 case TargetOpcode::G_SITOFP: 2279 Observer.changingInstr(MI); 2280 2281 if (TypeIdx == 0) 2282 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); 2283 else 2284 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); 2285 2286 Observer.changedInstr(MI); 2287 return Legalized; 2288 case TargetOpcode::G_UITOFP: 2289 Observer.changingInstr(MI); 2290 2291 if (TypeIdx == 0) 2292 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); 2293 else 2294 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 2295 2296 Observer.changedInstr(MI); 2297 return Legalized; 2298 case TargetOpcode::G_LOAD: 2299 case TargetOpcode::G_SEXTLOAD: 2300 case TargetOpcode::G_ZEXTLOAD: 2301 Observer.changingInstr(MI); 2302 widenScalarDst(MI, WideTy); 2303 Observer.changedInstr(MI); 2304 return Legalized; 2305 2306 case TargetOpcode::G_STORE: { 2307 if (TypeIdx != 0) 2308 return UnableToLegalize; 2309 2310 LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 2311 if (!Ty.isScalar()) 2312 return UnableToLegalize; 2313 2314 Observer.changingInstr(MI); 2315 2316 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ? 2317 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT; 2318 widenScalarSrc(MI, WideTy, 0, ExtType); 2319 2320 Observer.changedInstr(MI); 2321 return Legalized; 2322 } 2323 case TargetOpcode::G_CONSTANT: { 2324 MachineOperand &SrcMO = MI.getOperand(1); 2325 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 2326 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant( 2327 MRI.getType(MI.getOperand(0).getReg())); 2328 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT || 2329 ExtOpc == TargetOpcode::G_ANYEXT) && 2330 "Illegal Extend"); 2331 const APInt &SrcVal = SrcMO.getCImm()->getValue(); 2332 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT) 2333 ? SrcVal.sext(WideTy.getSizeInBits()) 2334 : SrcVal.zext(WideTy.getSizeInBits()); 2335 Observer.changingInstr(MI); 2336 SrcMO.setCImm(ConstantInt::get(Ctx, Val)); 2337 2338 widenScalarDst(MI, WideTy); 2339 Observer.changedInstr(MI); 2340 return Legalized; 2341 } 2342 case TargetOpcode::G_FCONSTANT: { 2343 MachineOperand &SrcMO = MI.getOperand(1); 2344 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 2345 APFloat Val = SrcMO.getFPImm()->getValueAPF(); 2346 bool LosesInfo; 2347 switch (WideTy.getSizeInBits()) { 2348 case 32: 2349 Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, 2350 &LosesInfo); 2351 break; 2352 case 64: 2353 Val.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, 2354 &LosesInfo); 2355 break; 2356 default: 2357 return UnableToLegalize; 2358 } 2359 2360 assert(!LosesInfo && "extend should always be lossless"); 2361 2362 Observer.changingInstr(MI); 2363 SrcMO.setFPImm(ConstantFP::get(Ctx, Val)); 2364 2365 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); 2366 Observer.changedInstr(MI); 2367 return Legalized; 2368 } 2369 case TargetOpcode::G_IMPLICIT_DEF: { 2370 Observer.changingInstr(MI); 2371 widenScalarDst(MI, WideTy); 2372 Observer.changedInstr(MI); 2373 return Legalized; 2374 } 2375 case TargetOpcode::G_BRCOND: 2376 Observer.changingInstr(MI); 2377 widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false)); 2378 Observer.changedInstr(MI); 2379 return Legalized; 2380 2381 case TargetOpcode::G_FCMP: 2382 Observer.changingInstr(MI); 2383 if (TypeIdx == 0) 2384 widenScalarDst(MI, WideTy); 2385 else { 2386 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT); 2387 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT); 2388 } 2389 Observer.changedInstr(MI); 2390 return Legalized; 2391 2392 case TargetOpcode::G_ICMP: 2393 Observer.changingInstr(MI); 2394 if (TypeIdx == 0) 2395 widenScalarDst(MI, WideTy); 2396 else { 2397 unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>( 2398 MI.getOperand(1).getPredicate())) 2399 ? TargetOpcode::G_SEXT 2400 : TargetOpcode::G_ZEXT; 2401 widenScalarSrc(MI, WideTy, 2, ExtOpcode); 2402 widenScalarSrc(MI, WideTy, 3, ExtOpcode); 2403 } 2404 Observer.changedInstr(MI); 2405 return Legalized; 2406 2407 case TargetOpcode::G_PTR_ADD: 2408 assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD"); 2409 Observer.changingInstr(MI); 2410 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 2411 Observer.changedInstr(MI); 2412 return Legalized; 2413 2414 case TargetOpcode::G_PHI: { 2415 assert(TypeIdx == 0 && "Expecting only Idx 0"); 2416 2417 Observer.changingInstr(MI); 2418 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) { 2419 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB(); 2420 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); 2421 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT); 2422 } 2423 2424 MachineBasicBlock &MBB = *MI.getParent(); 2425 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI()); 2426 widenScalarDst(MI, WideTy); 2427 Observer.changedInstr(MI); 2428 return Legalized; 2429 } 2430 case TargetOpcode::G_EXTRACT_VECTOR_ELT: { 2431 if (TypeIdx == 0) { 2432 Register VecReg = MI.getOperand(1).getReg(); 2433 LLT VecTy = MRI.getType(VecReg); 2434 Observer.changingInstr(MI); 2435 2436 widenScalarSrc( 2437 MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1, 2438 TargetOpcode::G_SEXT); 2439 2440 widenScalarDst(MI, WideTy, 0); 2441 Observer.changedInstr(MI); 2442 return Legalized; 2443 } 2444 2445 if (TypeIdx != 2) 2446 return UnableToLegalize; 2447 Observer.changingInstr(MI); 2448 // TODO: Probably should be zext 2449 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 2450 Observer.changedInstr(MI); 2451 return Legalized; 2452 } 2453 case TargetOpcode::G_INSERT_VECTOR_ELT: { 2454 if (TypeIdx == 1) { 2455 Observer.changingInstr(MI); 2456 2457 Register VecReg = MI.getOperand(1).getReg(); 2458 LLT VecTy = MRI.getType(VecReg); 2459 LLT WideVecTy = LLT::vector(VecTy.getElementCount(), WideTy); 2460 2461 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT); 2462 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); 2463 widenScalarDst(MI, WideVecTy, 0); 2464 Observer.changedInstr(MI); 2465 return Legalized; 2466 } 2467 2468 if (TypeIdx == 2) { 2469 Observer.changingInstr(MI); 2470 // TODO: Probably should be zext 2471 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT); 2472 Observer.changedInstr(MI); 2473 return Legalized; 2474 } 2475 2476 return UnableToLegalize; 2477 } 2478 case TargetOpcode::G_FADD: 2479 case TargetOpcode::G_FMUL: 2480 case TargetOpcode::G_FSUB: 2481 case TargetOpcode::G_FMA: 2482 case TargetOpcode::G_FMAD: 2483 case TargetOpcode::G_FNEG: 2484 case TargetOpcode::G_FABS: 2485 case TargetOpcode::G_FCANONICALIZE: 2486 case TargetOpcode::G_FMINNUM: 2487 case TargetOpcode::G_FMAXNUM: 2488 case TargetOpcode::G_FMINNUM_IEEE: 2489 case TargetOpcode::G_FMAXNUM_IEEE: 2490 case TargetOpcode::G_FMINIMUM: 2491 case TargetOpcode::G_FMAXIMUM: 2492 case TargetOpcode::G_FDIV: 2493 case TargetOpcode::G_FREM: 2494 case TargetOpcode::G_FCEIL: 2495 case TargetOpcode::G_FFLOOR: 2496 case TargetOpcode::G_FCOS: 2497 case TargetOpcode::G_FSIN: 2498 case TargetOpcode::G_FLOG10: 2499 case TargetOpcode::G_FLOG: 2500 case TargetOpcode::G_FLOG2: 2501 case TargetOpcode::G_FRINT: 2502 case TargetOpcode::G_FNEARBYINT: 2503 case TargetOpcode::G_FSQRT: 2504 case TargetOpcode::G_FEXP: 2505 case TargetOpcode::G_FEXP2: 2506 case TargetOpcode::G_FPOW: 2507 case TargetOpcode::G_INTRINSIC_TRUNC: 2508 case TargetOpcode::G_INTRINSIC_ROUND: 2509 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: 2510 assert(TypeIdx == 0); 2511 Observer.changingInstr(MI); 2512 2513 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) 2514 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT); 2515 2516 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); 2517 Observer.changedInstr(MI); 2518 return Legalized; 2519 case TargetOpcode::G_FPOWI: { 2520 if (TypeIdx != 0) 2521 return UnableToLegalize; 2522 Observer.changingInstr(MI); 2523 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT); 2524 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); 2525 Observer.changedInstr(MI); 2526 return Legalized; 2527 } 2528 case TargetOpcode::G_INTTOPTR: 2529 if (TypeIdx != 1) 2530 return UnableToLegalize; 2531 2532 Observer.changingInstr(MI); 2533 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 2534 Observer.changedInstr(MI); 2535 return Legalized; 2536 case TargetOpcode::G_PTRTOINT: 2537 if (TypeIdx != 0) 2538 return UnableToLegalize; 2539 2540 Observer.changingInstr(MI); 2541 widenScalarDst(MI, WideTy, 0); 2542 Observer.changedInstr(MI); 2543 return Legalized; 2544 case TargetOpcode::G_BUILD_VECTOR: { 2545 Observer.changingInstr(MI); 2546 2547 const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType(); 2548 for (int I = 1, E = MI.getNumOperands(); I != E; ++I) 2549 widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT); 2550 2551 // Avoid changing the result vector type if the source element type was 2552 // requested. 2553 if (TypeIdx == 1) { 2554 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC)); 2555 } else { 2556 widenScalarDst(MI, WideTy, 0); 2557 } 2558 2559 Observer.changedInstr(MI); 2560 return Legalized; 2561 } 2562 case TargetOpcode::G_SEXT_INREG: 2563 if (TypeIdx != 0) 2564 return UnableToLegalize; 2565 2566 Observer.changingInstr(MI); 2567 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 2568 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC); 2569 Observer.changedInstr(MI); 2570 return Legalized; 2571 case TargetOpcode::G_PTRMASK: { 2572 if (TypeIdx != 1) 2573 return UnableToLegalize; 2574 Observer.changingInstr(MI); 2575 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 2576 Observer.changedInstr(MI); 2577 return Legalized; 2578 } 2579 } 2580 } 2581 2582 static void getUnmergePieces(SmallVectorImpl<Register> &Pieces, 2583 MachineIRBuilder &B, Register Src, LLT Ty) { 2584 auto Unmerge = B.buildUnmerge(Ty, Src); 2585 for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I) 2586 Pieces.push_back(Unmerge.getReg(I)); 2587 } 2588 2589 LegalizerHelper::LegalizeResult 2590 LegalizerHelper::lowerBitcast(MachineInstr &MI) { 2591 Register Dst = MI.getOperand(0).getReg(); 2592 Register Src = MI.getOperand(1).getReg(); 2593 LLT DstTy = MRI.getType(Dst); 2594 LLT SrcTy = MRI.getType(Src); 2595 2596 if (SrcTy.isVector()) { 2597 LLT SrcEltTy = SrcTy.getElementType(); 2598 SmallVector<Register, 8> SrcRegs; 2599 2600 if (DstTy.isVector()) { 2601 int NumDstElt = DstTy.getNumElements(); 2602 int NumSrcElt = SrcTy.getNumElements(); 2603 2604 LLT DstEltTy = DstTy.getElementType(); 2605 LLT DstCastTy = DstEltTy; // Intermediate bitcast result type 2606 LLT SrcPartTy = SrcEltTy; // Original unmerge result type. 2607 2608 // If there's an element size mismatch, insert intermediate casts to match 2609 // the result element type. 2610 if (NumSrcElt < NumDstElt) { // Source element type is larger. 2611 // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>) 2612 // 2613 // => 2614 // 2615 // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0 2616 // %3:_(<2 x s8>) = G_BITCAST %2 2617 // %4:_(<2 x s8>) = G_BITCAST %3 2618 // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4 2619 DstCastTy = LLT::fixed_vector(NumDstElt / NumSrcElt, DstEltTy); 2620 SrcPartTy = SrcEltTy; 2621 } else if (NumSrcElt > NumDstElt) { // Source element type is smaller. 2622 // 2623 // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>) 2624 // 2625 // => 2626 // 2627 // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0 2628 // %3:_(s16) = G_BITCAST %2 2629 // %4:_(s16) = G_BITCAST %3 2630 // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4 2631 SrcPartTy = LLT::fixed_vector(NumSrcElt / NumDstElt, SrcEltTy); 2632 DstCastTy = DstEltTy; 2633 } 2634 2635 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy); 2636 for (Register &SrcReg : SrcRegs) 2637 SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0); 2638 } else 2639 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy); 2640 2641 MIRBuilder.buildMerge(Dst, SrcRegs); 2642 MI.eraseFromParent(); 2643 return Legalized; 2644 } 2645 2646 if (DstTy.isVector()) { 2647 SmallVector<Register, 8> SrcRegs; 2648 getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType()); 2649 MIRBuilder.buildMerge(Dst, SrcRegs); 2650 MI.eraseFromParent(); 2651 return Legalized; 2652 } 2653 2654 return UnableToLegalize; 2655 } 2656 2657 /// Figure out the bit offset into a register when coercing a vector index for 2658 /// the wide element type. This is only for the case when promoting vector to 2659 /// one with larger elements. 2660 // 2661 /// 2662 /// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize)) 2663 /// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize) 2664 static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, 2665 Register Idx, 2666 unsigned NewEltSize, 2667 unsigned OldEltSize) { 2668 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize); 2669 LLT IdxTy = B.getMRI()->getType(Idx); 2670 2671 // Now figure out the amount we need to shift to get the target bits. 2672 auto OffsetMask = B.buildConstant( 2673 IdxTy, ~(APInt::getAllOnesValue(IdxTy.getSizeInBits()) << Log2EltRatio)); 2674 auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask); 2675 return B.buildShl(IdxTy, OffsetIdx, 2676 B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0); 2677 } 2678 2679 /// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this 2680 /// is casting to a vector with a smaller element size, perform multiple element 2681 /// extracts and merge the results. If this is coercing to a vector with larger 2682 /// elements, index the bitcasted vector and extract the target element with bit 2683 /// operations. This is intended to force the indexing in the native register 2684 /// size for architectures that can dynamically index the register file. 2685 LegalizerHelper::LegalizeResult 2686 LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, 2687 LLT CastTy) { 2688 if (TypeIdx != 1) 2689 return UnableToLegalize; 2690 2691 Register Dst = MI.getOperand(0).getReg(); 2692 Register SrcVec = MI.getOperand(1).getReg(); 2693 Register Idx = MI.getOperand(2).getReg(); 2694 LLT SrcVecTy = MRI.getType(SrcVec); 2695 LLT IdxTy = MRI.getType(Idx); 2696 2697 LLT SrcEltTy = SrcVecTy.getElementType(); 2698 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1; 2699 unsigned OldNumElts = SrcVecTy.getNumElements(); 2700 2701 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy; 2702 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0); 2703 2704 const unsigned NewEltSize = NewEltTy.getSizeInBits(); 2705 const unsigned OldEltSize = SrcEltTy.getSizeInBits(); 2706 if (NewNumElts > OldNumElts) { 2707 // Decreasing the vector element size 2708 // 2709 // e.g. i64 = extract_vector_elt x:v2i64, y:i32 2710 // => 2711 // v4i32:castx = bitcast x:v2i64 2712 // 2713 // i64 = bitcast 2714 // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))), 2715 // (i32 (extract_vector_elt castx, (2 * y + 1))) 2716 // 2717 if (NewNumElts % OldNumElts != 0) 2718 return UnableToLegalize; 2719 2720 // Type of the intermediate result vector. 2721 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts; 2722 LLT MidTy = 2723 LLT::scalarOrVector(ElementCount::getFixed(NewEltsPerOldElt), NewEltTy); 2724 2725 auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt); 2726 2727 SmallVector<Register, 8> NewOps(NewEltsPerOldElt); 2728 auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK); 2729 2730 for (unsigned I = 0; I < NewEltsPerOldElt; ++I) { 2731 auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I); 2732 auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset); 2733 auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx); 2734 NewOps[I] = Elt.getReg(0); 2735 } 2736 2737 auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps); 2738 MIRBuilder.buildBitcast(Dst, NewVec); 2739 MI.eraseFromParent(); 2740 return Legalized; 2741 } 2742 2743 if (NewNumElts < OldNumElts) { 2744 if (NewEltSize % OldEltSize != 0) 2745 return UnableToLegalize; 2746 2747 // This only depends on powers of 2 because we use bit tricks to figure out 2748 // the bit offset we need to shift to get the target element. A general 2749 // expansion could emit division/multiply. 2750 if (!isPowerOf2_32(NewEltSize / OldEltSize)) 2751 return UnableToLegalize; 2752 2753 // Increasing the vector element size. 2754 // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx 2755 // 2756 // => 2757 // 2758 // %cast = G_BITCAST %vec 2759 // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize) 2760 // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx 2761 // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize)) 2762 // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize) 2763 // %elt_bits = G_LSHR %wide_elt, %offset_bits 2764 // %elt = G_TRUNC %elt_bits 2765 2766 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize); 2767 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio); 2768 2769 // Divide to get the index in the wider element type. 2770 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio); 2771 2772 Register WideElt = CastVec; 2773 if (CastTy.isVector()) { 2774 WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, 2775 ScaledIdx).getReg(0); 2776 } 2777 2778 // Compute the bit offset into the register of the target element. 2779 Register OffsetBits = getBitcastWiderVectorElementOffset( 2780 MIRBuilder, Idx, NewEltSize, OldEltSize); 2781 2782 // Shift the wide element to get the target element. 2783 auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits); 2784 MIRBuilder.buildTrunc(Dst, ExtractedBits); 2785 MI.eraseFromParent(); 2786 return Legalized; 2787 } 2788 2789 return UnableToLegalize; 2790 } 2791 2792 /// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p 2793 /// TargetReg, while preserving other bits in \p TargetReg. 2794 /// 2795 /// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset) 2796 static Register buildBitFieldInsert(MachineIRBuilder &B, 2797 Register TargetReg, Register InsertReg, 2798 Register OffsetBits) { 2799 LLT TargetTy = B.getMRI()->getType(TargetReg); 2800 LLT InsertTy = B.getMRI()->getType(InsertReg); 2801 auto ZextVal = B.buildZExt(TargetTy, InsertReg); 2802 auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits); 2803 2804 // Produce a bitmask of the value to insert 2805 auto EltMask = B.buildConstant( 2806 TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(), 2807 InsertTy.getSizeInBits())); 2808 // Shift it into position 2809 auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits); 2810 auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask); 2811 2812 // Clear out the bits in the wide element 2813 auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask); 2814 2815 // The value to insert has all zeros already, so stick it into the masked 2816 // wide element. 2817 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0); 2818 } 2819 2820 /// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this 2821 /// is increasing the element size, perform the indexing in the target element 2822 /// type, and use bit operations to insert at the element position. This is 2823 /// intended for architectures that can dynamically index the register file and 2824 /// want to force indexing in the native register size. 2825 LegalizerHelper::LegalizeResult 2826 LegalizerHelper::bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, 2827 LLT CastTy) { 2828 if (TypeIdx != 0) 2829 return UnableToLegalize; 2830 2831 Register Dst = MI.getOperand(0).getReg(); 2832 Register SrcVec = MI.getOperand(1).getReg(); 2833 Register Val = MI.getOperand(2).getReg(); 2834 Register Idx = MI.getOperand(3).getReg(); 2835 2836 LLT VecTy = MRI.getType(Dst); 2837 LLT IdxTy = MRI.getType(Idx); 2838 2839 LLT VecEltTy = VecTy.getElementType(); 2840 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy; 2841 const unsigned NewEltSize = NewEltTy.getSizeInBits(); 2842 const unsigned OldEltSize = VecEltTy.getSizeInBits(); 2843 2844 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1; 2845 unsigned OldNumElts = VecTy.getNumElements(); 2846 2847 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0); 2848 if (NewNumElts < OldNumElts) { 2849 if (NewEltSize % OldEltSize != 0) 2850 return UnableToLegalize; 2851 2852 // This only depends on powers of 2 because we use bit tricks to figure out 2853 // the bit offset we need to shift to get the target element. A general 2854 // expansion could emit division/multiply. 2855 if (!isPowerOf2_32(NewEltSize / OldEltSize)) 2856 return UnableToLegalize; 2857 2858 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize); 2859 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio); 2860 2861 // Divide to get the index in the wider element type. 2862 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio); 2863 2864 Register ExtractedElt = CastVec; 2865 if (CastTy.isVector()) { 2866 ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, 2867 ScaledIdx).getReg(0); 2868 } 2869 2870 // Compute the bit offset into the register of the target element. 2871 Register OffsetBits = getBitcastWiderVectorElementOffset( 2872 MIRBuilder, Idx, NewEltSize, OldEltSize); 2873 2874 Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt, 2875 Val, OffsetBits); 2876 if (CastTy.isVector()) { 2877 InsertedElt = MIRBuilder.buildInsertVectorElement( 2878 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0); 2879 } 2880 2881 MIRBuilder.buildBitcast(Dst, InsertedElt); 2882 MI.eraseFromParent(); 2883 return Legalized; 2884 } 2885 2886 return UnableToLegalize; 2887 } 2888 2889 LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) { 2890 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT 2891 Register DstReg = LoadMI.getDstReg(); 2892 Register PtrReg = LoadMI.getPointerReg(); 2893 LLT DstTy = MRI.getType(DstReg); 2894 MachineMemOperand &MMO = LoadMI.getMMO(); 2895 LLT MemTy = MMO.getMemoryType(); 2896 MachineFunction &MF = MIRBuilder.getMF(); 2897 2898 unsigned MemSizeInBits = MemTy.getSizeInBits(); 2899 unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes(); 2900 2901 if (MemSizeInBits != MemStoreSizeInBits) { 2902 if (MemTy.isVector()) 2903 return UnableToLegalize; 2904 2905 // Promote to a byte-sized load if not loading an integral number of 2906 // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24. 2907 LLT WideMemTy = LLT::scalar(MemStoreSizeInBits); 2908 MachineMemOperand *NewMMO = 2909 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy); 2910 2911 Register LoadReg = DstReg; 2912 LLT LoadTy = DstTy; 2913 2914 // If this wasn't already an extending load, we need to widen the result 2915 // register to avoid creating a load with a narrower result than the source. 2916 if (MemStoreSizeInBits > DstTy.getSizeInBits()) { 2917 LoadTy = WideMemTy; 2918 LoadReg = MRI.createGenericVirtualRegister(WideMemTy); 2919 } 2920 2921 if (isa<GSExtLoad>(LoadMI)) { 2922 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO); 2923 MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits); 2924 } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == DstTy) { 2925 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO); 2926 // The extra bits are guaranteed to be zero, since we stored them that 2927 // way. A zext load from Wide thus automatically gives zext from MemVT. 2928 MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits); 2929 } else { 2930 MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO); 2931 } 2932 2933 if (DstTy != LoadTy) 2934 MIRBuilder.buildTrunc(DstReg, LoadReg); 2935 2936 LoadMI.eraseFromParent(); 2937 return Legalized; 2938 } 2939 2940 // Big endian lowering not implemented. 2941 if (MIRBuilder.getDataLayout().isBigEndian()) 2942 return UnableToLegalize; 2943 2944 // This load needs splitting into power of 2 sized loads. 2945 // 2946 // Our strategy here is to generate anyextending loads for the smaller 2947 // types up to next power-2 result type, and then combine the two larger 2948 // result values together, before truncating back down to the non-pow-2 2949 // type. 2950 // E.g. v1 = i24 load => 2951 // v2 = i32 zextload (2 byte) 2952 // v3 = i32 load (1 byte) 2953 // v4 = i32 shl v3, 16 2954 // v5 = i32 or v4, v2 2955 // v1 = i24 trunc v5 2956 // By doing this we generate the correct truncate which should get 2957 // combined away as an artifact with a matching extend. 2958 2959 uint64_t LargeSplitSize, SmallSplitSize; 2960 2961 if (!isPowerOf2_32(MemSizeInBits)) { 2962 // This load needs splitting into power of 2 sized loads. 2963 LargeSplitSize = PowerOf2Floor(MemSizeInBits); 2964 SmallSplitSize = MemSizeInBits - LargeSplitSize; 2965 } else { 2966 // This is already a power of 2, but we still need to split this in half. 2967 // 2968 // Assume we're being asked to decompose an unaligned load. 2969 // TODO: If this requires multiple splits, handle them all at once. 2970 auto &Ctx = MF.getFunction().getContext(); 2971 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO)) 2972 return UnableToLegalize; 2973 2974 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2; 2975 } 2976 2977 if (MemTy.isVector()) { 2978 // TODO: Handle vector extloads 2979 if (MemTy != DstTy) 2980 return UnableToLegalize; 2981 2982 // TODO: We can do better than scalarizing the vector and at least split it 2983 // in half. 2984 return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType()); 2985 } 2986 2987 MachineMemOperand *LargeMMO = 2988 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); 2989 MachineMemOperand *SmallMMO = 2990 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8); 2991 2992 LLT PtrTy = MRI.getType(PtrReg); 2993 unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits()); 2994 LLT AnyExtTy = LLT::scalar(AnyExtSize); 2995 auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy, 2996 PtrReg, *LargeMMO); 2997 2998 auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), 2999 LargeSplitSize / 8); 3000 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy); 3001 auto SmallPtr = MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst); 3002 auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy, 3003 SmallPtr, *SmallMMO); 3004 3005 auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize); 3006 auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt); 3007 3008 if (AnyExtTy == DstTy) 3009 MIRBuilder.buildOr(DstReg, Shift, LargeLoad); 3010 else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) { 3011 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad); 3012 MIRBuilder.buildTrunc(DstReg, {Or}); 3013 } else { 3014 assert(DstTy.isPointer() && "expected pointer"); 3015 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad); 3016 3017 // FIXME: We currently consider this to be illegal for non-integral address 3018 // spaces, but we need still need a way to reinterpret the bits. 3019 MIRBuilder.buildIntToPtr(DstReg, Or); 3020 } 3021 3022 LoadMI.eraseFromParent(); 3023 return Legalized; 3024 } 3025 3026 LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) { 3027 // Lower a non-power of 2 store into multiple pow-2 stores. 3028 // E.g. split an i24 store into an i16 store + i8 store. 3029 // We do this by first extending the stored value to the next largest power 3030 // of 2 type, and then using truncating stores to store the components. 3031 // By doing this, likewise with G_LOAD, generate an extend that can be 3032 // artifact-combined away instead of leaving behind extracts. 3033 Register SrcReg = StoreMI.getValueReg(); 3034 Register PtrReg = StoreMI.getPointerReg(); 3035 LLT SrcTy = MRI.getType(SrcReg); 3036 MachineFunction &MF = MIRBuilder.getMF(); 3037 MachineMemOperand &MMO = **StoreMI.memoperands_begin(); 3038 LLT MemTy = MMO.getMemoryType(); 3039 3040 unsigned StoreWidth = MemTy.getSizeInBits(); 3041 unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes(); 3042 3043 if (StoreWidth != StoreSizeInBits) { 3044 if (SrcTy.isVector()) 3045 return UnableToLegalize; 3046 3047 // Promote to a byte-sized store with upper bits zero if not 3048 // storing an integral number of bytes. For example, promote 3049 // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1) 3050 LLT WideTy = LLT::scalar(StoreSizeInBits); 3051 3052 if (StoreSizeInBits > SrcTy.getSizeInBits()) { 3053 // Avoid creating a store with a narrower source than result. 3054 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0); 3055 SrcTy = WideTy; 3056 } 3057 3058 auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth); 3059 3060 MachineMemOperand *NewMMO = 3061 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy); 3062 MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO); 3063 StoreMI.eraseFromParent(); 3064 return Legalized; 3065 } 3066 3067 if (MemTy.isVector()) { 3068 // TODO: Handle vector trunc stores 3069 if (MemTy != SrcTy) 3070 return UnableToLegalize; 3071 3072 // TODO: We can do better than scalarizing the vector and at least split it 3073 // in half. 3074 return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType()); 3075 } 3076 3077 unsigned MemSizeInBits = MemTy.getSizeInBits(); 3078 uint64_t LargeSplitSize, SmallSplitSize; 3079 3080 if (!isPowerOf2_32(MemSizeInBits)) { 3081 LargeSplitSize = PowerOf2Floor(MemTy.getSizeInBits()); 3082 SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize; 3083 } else { 3084 auto &Ctx = MF.getFunction().getContext(); 3085 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO)) 3086 return UnableToLegalize; // Don't know what we're being asked to do. 3087 3088 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2; 3089 } 3090 3091 // Extend to the next pow-2. If this store was itself the result of lowering, 3092 // e.g. an s56 store being broken into s32 + s24, we might have a stored type 3093 // that's wider than the stored size. 3094 unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits()); 3095 const LLT NewSrcTy = LLT::scalar(AnyExtSize); 3096 3097 if (SrcTy.isPointer()) { 3098 const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits()); 3099 SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0); 3100 } 3101 3102 auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg); 3103 3104 // Obtain the smaller value by shifting away the larger value. 3105 auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize); 3106 auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt); 3107 3108 // Generate the PtrAdd and truncating stores. 3109 LLT PtrTy = MRI.getType(PtrReg); 3110 auto OffsetCst = MIRBuilder.buildConstant( 3111 LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8); 3112 auto SmallPtr = 3113 MIRBuilder.buildPtrAdd(PtrTy, PtrReg, OffsetCst); 3114 3115 MachineMemOperand *LargeMMO = 3116 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); 3117 MachineMemOperand *SmallMMO = 3118 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8); 3119 MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO); 3120 MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO); 3121 StoreMI.eraseFromParent(); 3122 return Legalized; 3123 } 3124 3125 LegalizerHelper::LegalizeResult 3126 LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) { 3127 switch (MI.getOpcode()) { 3128 case TargetOpcode::G_LOAD: { 3129 if (TypeIdx != 0) 3130 return UnableToLegalize; 3131 MachineMemOperand &MMO = **MI.memoperands_begin(); 3132 3133 // Not sure how to interpret a bitcast of an extending load. 3134 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits()) 3135 return UnableToLegalize; 3136 3137 Observer.changingInstr(MI); 3138 bitcastDst(MI, CastTy, 0); 3139 MMO.setType(CastTy); 3140 Observer.changedInstr(MI); 3141 return Legalized; 3142 } 3143 case TargetOpcode::G_STORE: { 3144 if (TypeIdx != 0) 3145 return UnableToLegalize; 3146 3147 MachineMemOperand &MMO = **MI.memoperands_begin(); 3148 3149 // Not sure how to interpret a bitcast of a truncating store. 3150 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits()) 3151 return UnableToLegalize; 3152 3153 Observer.changingInstr(MI); 3154 bitcastSrc(MI, CastTy, 0); 3155 MMO.setType(CastTy); 3156 Observer.changedInstr(MI); 3157 return Legalized; 3158 } 3159 case TargetOpcode::G_SELECT: { 3160 if (TypeIdx != 0) 3161 return UnableToLegalize; 3162 3163 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) { 3164 LLVM_DEBUG( 3165 dbgs() << "bitcast action not implemented for vector select\n"); 3166 return UnableToLegalize; 3167 } 3168 3169 Observer.changingInstr(MI); 3170 bitcastSrc(MI, CastTy, 2); 3171 bitcastSrc(MI, CastTy, 3); 3172 bitcastDst(MI, CastTy, 0); 3173 Observer.changedInstr(MI); 3174 return Legalized; 3175 } 3176 case TargetOpcode::G_AND: 3177 case TargetOpcode::G_OR: 3178 case TargetOpcode::G_XOR: { 3179 Observer.changingInstr(MI); 3180 bitcastSrc(MI, CastTy, 1); 3181 bitcastSrc(MI, CastTy, 2); 3182 bitcastDst(MI, CastTy, 0); 3183 Observer.changedInstr(MI); 3184 return Legalized; 3185 } 3186 case TargetOpcode::G_EXTRACT_VECTOR_ELT: 3187 return bitcastExtractVectorElt(MI, TypeIdx, CastTy); 3188 case TargetOpcode::G_INSERT_VECTOR_ELT: 3189 return bitcastInsertVectorElt(MI, TypeIdx, CastTy); 3190 default: 3191 return UnableToLegalize; 3192 } 3193 } 3194 3195 // Legalize an instruction by changing the opcode in place. 3196 void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) { 3197 Observer.changingInstr(MI); 3198 MI.setDesc(MIRBuilder.getTII().get(NewOpcode)); 3199 Observer.changedInstr(MI); 3200 } 3201 3202 LegalizerHelper::LegalizeResult 3203 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { 3204 using namespace TargetOpcode; 3205 3206 switch(MI.getOpcode()) { 3207 default: 3208 return UnableToLegalize; 3209 case TargetOpcode::G_BITCAST: 3210 return lowerBitcast(MI); 3211 case TargetOpcode::G_SREM: 3212 case TargetOpcode::G_UREM: { 3213 LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 3214 auto Quot = 3215 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty}, 3216 {MI.getOperand(1), MI.getOperand(2)}); 3217 3218 auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2)); 3219 MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod); 3220 MI.eraseFromParent(); 3221 return Legalized; 3222 } 3223 case TargetOpcode::G_SADDO: 3224 case TargetOpcode::G_SSUBO: 3225 return lowerSADDO_SSUBO(MI); 3226 case TargetOpcode::G_UMULH: 3227 case TargetOpcode::G_SMULH: 3228 return lowerSMULH_UMULH(MI); 3229 case TargetOpcode::G_SMULO: 3230 case TargetOpcode::G_UMULO: { 3231 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the 3232 // result. 3233 Register Res = MI.getOperand(0).getReg(); 3234 Register Overflow = MI.getOperand(1).getReg(); 3235 Register LHS = MI.getOperand(2).getReg(); 3236 Register RHS = MI.getOperand(3).getReg(); 3237 LLT Ty = MRI.getType(Res); 3238 3239 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO 3240 ? TargetOpcode::G_SMULH 3241 : TargetOpcode::G_UMULH; 3242 3243 Observer.changingInstr(MI); 3244 const auto &TII = MIRBuilder.getTII(); 3245 MI.setDesc(TII.get(TargetOpcode::G_MUL)); 3246 MI.RemoveOperand(1); 3247 Observer.changedInstr(MI); 3248 3249 auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS}); 3250 auto Zero = MIRBuilder.buildConstant(Ty, 0); 3251 3252 // Move insert point forward so we can use the Res register if needed. 3253 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 3254 3255 // For *signed* multiply, overflow is detected by checking: 3256 // (hi != (lo >> bitwidth-1)) 3257 if (Opcode == TargetOpcode::G_SMULH) { 3258 auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1); 3259 auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt); 3260 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted); 3261 } else { 3262 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero); 3263 } 3264 return Legalized; 3265 } 3266 case TargetOpcode::G_FNEG: { 3267 Register Res = MI.getOperand(0).getReg(); 3268 LLT Ty = MRI.getType(Res); 3269 3270 // TODO: Handle vector types once we are able to 3271 // represent them. 3272 if (Ty.isVector()) 3273 return UnableToLegalize; 3274 auto SignMask = 3275 MIRBuilder.buildConstant(Ty, APInt::getSignMask(Ty.getSizeInBits())); 3276 Register SubByReg = MI.getOperand(1).getReg(); 3277 MIRBuilder.buildXor(Res, SubByReg, SignMask); 3278 MI.eraseFromParent(); 3279 return Legalized; 3280 } 3281 case TargetOpcode::G_FSUB: { 3282 Register Res = MI.getOperand(0).getReg(); 3283 LLT Ty = MRI.getType(Res); 3284 3285 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)). 3286 // First, check if G_FNEG is marked as Lower. If so, we may 3287 // end up with an infinite loop as G_FSUB is used to legalize G_FNEG. 3288 if (LI.getAction({G_FNEG, {Ty}}).Action == Lower) 3289 return UnableToLegalize; 3290 Register LHS = MI.getOperand(1).getReg(); 3291 Register RHS = MI.getOperand(2).getReg(); 3292 Register Neg = MRI.createGenericVirtualRegister(Ty); 3293 MIRBuilder.buildFNeg(Neg, RHS); 3294 MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags()); 3295 MI.eraseFromParent(); 3296 return Legalized; 3297 } 3298 case TargetOpcode::G_FMAD: 3299 return lowerFMad(MI); 3300 case TargetOpcode::G_FFLOOR: 3301 return lowerFFloor(MI); 3302 case TargetOpcode::G_INTRINSIC_ROUND: 3303 return lowerIntrinsicRound(MI); 3304 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: { 3305 // Since round even is the assumed rounding mode for unconstrained FP 3306 // operations, rint and roundeven are the same operation. 3307 changeOpcode(MI, TargetOpcode::G_FRINT); 3308 return Legalized; 3309 } 3310 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: { 3311 Register OldValRes = MI.getOperand(0).getReg(); 3312 Register SuccessRes = MI.getOperand(1).getReg(); 3313 Register Addr = MI.getOperand(2).getReg(); 3314 Register CmpVal = MI.getOperand(3).getReg(); 3315 Register NewVal = MI.getOperand(4).getReg(); 3316 MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal, 3317 **MI.memoperands_begin()); 3318 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal); 3319 MI.eraseFromParent(); 3320 return Legalized; 3321 } 3322 case TargetOpcode::G_LOAD: 3323 case TargetOpcode::G_SEXTLOAD: 3324 case TargetOpcode::G_ZEXTLOAD: 3325 return lowerLoad(cast<GAnyLoad>(MI)); 3326 case TargetOpcode::G_STORE: 3327 return lowerStore(cast<GStore>(MI)); 3328 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 3329 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 3330 case TargetOpcode::G_CTLZ: 3331 case TargetOpcode::G_CTTZ: 3332 case TargetOpcode::G_CTPOP: 3333 return lowerBitCount(MI); 3334 case G_UADDO: { 3335 Register Res = MI.getOperand(0).getReg(); 3336 Register CarryOut = MI.getOperand(1).getReg(); 3337 Register LHS = MI.getOperand(2).getReg(); 3338 Register RHS = MI.getOperand(3).getReg(); 3339 3340 MIRBuilder.buildAdd(Res, LHS, RHS); 3341 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS); 3342 3343 MI.eraseFromParent(); 3344 return Legalized; 3345 } 3346 case G_UADDE: { 3347 Register Res = MI.getOperand(0).getReg(); 3348 Register CarryOut = MI.getOperand(1).getReg(); 3349 Register LHS = MI.getOperand(2).getReg(); 3350 Register RHS = MI.getOperand(3).getReg(); 3351 Register CarryIn = MI.getOperand(4).getReg(); 3352 LLT Ty = MRI.getType(Res); 3353 3354 auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS); 3355 auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn); 3356 MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn); 3357 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS); 3358 3359 MI.eraseFromParent(); 3360 return Legalized; 3361 } 3362 case G_USUBO: { 3363 Register Res = MI.getOperand(0).getReg(); 3364 Register BorrowOut = MI.getOperand(1).getReg(); 3365 Register LHS = MI.getOperand(2).getReg(); 3366 Register RHS = MI.getOperand(3).getReg(); 3367 3368 MIRBuilder.buildSub(Res, LHS, RHS); 3369 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS); 3370 3371 MI.eraseFromParent(); 3372 return Legalized; 3373 } 3374 case G_USUBE: { 3375 Register Res = MI.getOperand(0).getReg(); 3376 Register BorrowOut = MI.getOperand(1).getReg(); 3377 Register LHS = MI.getOperand(2).getReg(); 3378 Register RHS = MI.getOperand(3).getReg(); 3379 Register BorrowIn = MI.getOperand(4).getReg(); 3380 const LLT CondTy = MRI.getType(BorrowOut); 3381 const LLT Ty = MRI.getType(Res); 3382 3383 auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS); 3384 auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn); 3385 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn); 3386 3387 auto LHS_EQ_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, LHS, RHS); 3388 auto LHS_ULT_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, LHS, RHS); 3389 MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS); 3390 3391 MI.eraseFromParent(); 3392 return Legalized; 3393 } 3394 case G_UITOFP: 3395 return lowerUITOFP(MI); 3396 case G_SITOFP: 3397 return lowerSITOFP(MI); 3398 case G_FPTOUI: 3399 return lowerFPTOUI(MI); 3400 case G_FPTOSI: 3401 return lowerFPTOSI(MI); 3402 case G_FPTRUNC: 3403 return lowerFPTRUNC(MI); 3404 case G_FPOWI: 3405 return lowerFPOWI(MI); 3406 case G_SMIN: 3407 case G_SMAX: 3408 case G_UMIN: 3409 case G_UMAX: 3410 return lowerMinMax(MI); 3411 case G_FCOPYSIGN: 3412 return lowerFCopySign(MI); 3413 case G_FMINNUM: 3414 case G_FMAXNUM: 3415 return lowerFMinNumMaxNum(MI); 3416 case G_MERGE_VALUES: 3417 return lowerMergeValues(MI); 3418 case G_UNMERGE_VALUES: 3419 return lowerUnmergeValues(MI); 3420 case TargetOpcode::G_SEXT_INREG: { 3421 assert(MI.getOperand(2).isImm() && "Expected immediate"); 3422 int64_t SizeInBits = MI.getOperand(2).getImm(); 3423 3424 Register DstReg = MI.getOperand(0).getReg(); 3425 Register SrcReg = MI.getOperand(1).getReg(); 3426 LLT DstTy = MRI.getType(DstReg); 3427 Register TmpRes = MRI.createGenericVirtualRegister(DstTy); 3428 3429 auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits); 3430 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0)); 3431 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0)); 3432 MI.eraseFromParent(); 3433 return Legalized; 3434 } 3435 case G_EXTRACT_VECTOR_ELT: 3436 case G_INSERT_VECTOR_ELT: 3437 return lowerExtractInsertVectorElt(MI); 3438 case G_SHUFFLE_VECTOR: 3439 return lowerShuffleVector(MI); 3440 case G_DYN_STACKALLOC: 3441 return lowerDynStackAlloc(MI); 3442 case G_EXTRACT: 3443 return lowerExtract(MI); 3444 case G_INSERT: 3445 return lowerInsert(MI); 3446 case G_BSWAP: 3447 return lowerBswap(MI); 3448 case G_BITREVERSE: 3449 return lowerBitreverse(MI); 3450 case G_READ_REGISTER: 3451 case G_WRITE_REGISTER: 3452 return lowerReadWriteRegister(MI); 3453 case G_UADDSAT: 3454 case G_USUBSAT: { 3455 // Try to make a reasonable guess about which lowering strategy to use. The 3456 // target can override this with custom lowering and calling the 3457 // implementation functions. 3458 LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 3459 if (LI.isLegalOrCustom({G_UMIN, Ty})) 3460 return lowerAddSubSatToMinMax(MI); 3461 return lowerAddSubSatToAddoSubo(MI); 3462 } 3463 case G_SADDSAT: 3464 case G_SSUBSAT: { 3465 LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 3466 3467 // FIXME: It would probably make more sense to see if G_SADDO is preferred, 3468 // since it's a shorter expansion. However, we would need to figure out the 3469 // preferred boolean type for the carry out for the query. 3470 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty})) 3471 return lowerAddSubSatToMinMax(MI); 3472 return lowerAddSubSatToAddoSubo(MI); 3473 } 3474 case G_SSHLSAT: 3475 case G_USHLSAT: 3476 return lowerShlSat(MI); 3477 case G_ABS: 3478 return lowerAbsToAddXor(MI); 3479 case G_SELECT: 3480 return lowerSelect(MI); 3481 case G_SDIVREM: 3482 case G_UDIVREM: 3483 return lowerDIVREM(MI); 3484 case G_FSHL: 3485 case G_FSHR: 3486 return lowerFunnelShift(MI); 3487 case G_ROTL: 3488 case G_ROTR: 3489 return lowerRotate(MI); 3490 case G_ISNAN: 3491 return lowerIsNaN(MI); 3492 } 3493 } 3494 3495 Align LegalizerHelper::getStackTemporaryAlignment(LLT Ty, 3496 Align MinAlign) const { 3497 // FIXME: We're missing a way to go back from LLT to llvm::Type to query the 3498 // datalayout for the preferred alignment. Also there should be a target hook 3499 // for this to allow targets to reduce the alignment and ignore the 3500 // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of 3501 // the type. 3502 return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign); 3503 } 3504 3505 MachineInstrBuilder 3506 LegalizerHelper::createStackTemporary(TypeSize Bytes, Align Alignment, 3507 MachinePointerInfo &PtrInfo) { 3508 MachineFunction &MF = MIRBuilder.getMF(); 3509 const DataLayout &DL = MIRBuilder.getDataLayout(); 3510 int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false); 3511 3512 unsigned AddrSpace = DL.getAllocaAddrSpace(); 3513 LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace)); 3514 3515 PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx); 3516 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx); 3517 } 3518 3519 static Register clampDynamicVectorIndex(MachineIRBuilder &B, Register IdxReg, 3520 LLT VecTy) { 3521 int64_t IdxVal; 3522 if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal))) 3523 return IdxReg; 3524 3525 LLT IdxTy = B.getMRI()->getType(IdxReg); 3526 unsigned NElts = VecTy.getNumElements(); 3527 if (isPowerOf2_32(NElts)) { 3528 APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts)); 3529 return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0); 3530 } 3531 3532 return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1)) 3533 .getReg(0); 3534 } 3535 3536 Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy, 3537 Register Index) { 3538 LLT EltTy = VecTy.getElementType(); 3539 3540 // Calculate the element offset and add it to the pointer. 3541 unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size. 3542 assert(EltSize * 8 == EltTy.getSizeInBits() && 3543 "Converting bits to bytes lost precision"); 3544 3545 Index = clampDynamicVectorIndex(MIRBuilder, Index, VecTy); 3546 3547 LLT IdxTy = MRI.getType(Index); 3548 auto Mul = MIRBuilder.buildMul(IdxTy, Index, 3549 MIRBuilder.buildConstant(IdxTy, EltSize)); 3550 3551 LLT PtrTy = MRI.getType(VecPtr); 3552 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0); 3553 } 3554 3555 LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef( 3556 MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { 3557 Register DstReg = MI.getOperand(0).getReg(); 3558 LLT DstTy = MRI.getType(DstReg); 3559 LLT LCMTy = getLCMType(DstTy, NarrowTy); 3560 3561 unsigned NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits(); 3562 3563 auto NewUndef = MIRBuilder.buildUndef(NarrowTy); 3564 SmallVector<Register, 8> Parts(NumParts, NewUndef.getReg(0)); 3565 3566 buildWidenedRemergeToDst(DstReg, LCMTy, Parts); 3567 MI.eraseFromParent(); 3568 return Legalized; 3569 } 3570 3571 // Handle splitting vector operations which need to have the same number of 3572 // elements in each type index, but each type index may have a different element 3573 // type. 3574 // 3575 // e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> -> 3576 // <2 x s64> = G_SHL <2 x s64>, <2 x s32> 3577 // <2 x s64> = G_SHL <2 x s64>, <2 x s32> 3578 // 3579 // Also handles some irregular breakdown cases, e.g. 3580 // e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> -> 3581 // <2 x s64> = G_SHL <2 x s64>, <2 x s32> 3582 // s64 = G_SHL s64, s32 3583 LegalizerHelper::LegalizeResult 3584 LegalizerHelper::fewerElementsVectorMultiEltType( 3585 MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) { 3586 if (TypeIdx != 0) 3587 return UnableToLegalize; 3588 3589 const LLT NarrowTy0 = NarrowTyArg; 3590 const Register DstReg = MI.getOperand(0).getReg(); 3591 LLT DstTy = MRI.getType(DstReg); 3592 LLT LeftoverTy0; 3593 3594 // All of the operands need to have the same number of elements, so if we can 3595 // determine a type breakdown for the result type, we can for all of the 3596 // source types. 3597 int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0).first; 3598 if (NumParts < 0) 3599 return UnableToLegalize; 3600 3601 SmallVector<MachineInstrBuilder, 4> NewInsts; 3602 3603 SmallVector<Register, 4> DstRegs, LeftoverDstRegs; 3604 SmallVector<Register, 4> PartRegs, LeftoverRegs; 3605 3606 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { 3607 Register SrcReg = MI.getOperand(I).getReg(); 3608 LLT SrcTyI = MRI.getType(SrcReg); 3609 const auto NewEC = NarrowTy0.isVector() ? NarrowTy0.getElementCount() 3610 : ElementCount::getFixed(1); 3611 LLT NarrowTyI = LLT::scalarOrVector(NewEC, SrcTyI.getScalarType()); 3612 LLT LeftoverTyI; 3613 3614 // Split this operand into the requested typed registers, and any leftover 3615 // required to reproduce the original type. 3616 if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs, 3617 LeftoverRegs)) 3618 return UnableToLegalize; 3619 3620 if (I == 1) { 3621 // For the first operand, create an instruction for each part and setup 3622 // the result. 3623 for (Register PartReg : PartRegs) { 3624 Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0); 3625 NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode()) 3626 .addDef(PartDstReg) 3627 .addUse(PartReg)); 3628 DstRegs.push_back(PartDstReg); 3629 } 3630 3631 for (Register LeftoverReg : LeftoverRegs) { 3632 Register PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0); 3633 NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode()) 3634 .addDef(PartDstReg) 3635 .addUse(LeftoverReg)); 3636 LeftoverDstRegs.push_back(PartDstReg); 3637 } 3638 } else { 3639 assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size()); 3640 3641 // Add the newly created operand splits to the existing instructions. The 3642 // odd-sized pieces are ordered after the requested NarrowTyArg sized 3643 // pieces. 3644 unsigned InstCount = 0; 3645 for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J) 3646 NewInsts[InstCount++].addUse(PartRegs[J]); 3647 for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J) 3648 NewInsts[InstCount++].addUse(LeftoverRegs[J]); 3649 } 3650 3651 PartRegs.clear(); 3652 LeftoverRegs.clear(); 3653 } 3654 3655 // Insert the newly built operations and rebuild the result register. 3656 for (auto &MIB : NewInsts) 3657 MIRBuilder.insertInstr(MIB); 3658 3659 insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs); 3660 3661 MI.eraseFromParent(); 3662 return Legalized; 3663 } 3664 3665 LegalizerHelper::LegalizeResult 3666 LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx, 3667 LLT NarrowTy) { 3668 if (TypeIdx != 0) 3669 return UnableToLegalize; 3670 3671 Register DstReg = MI.getOperand(0).getReg(); 3672 Register SrcReg = MI.getOperand(1).getReg(); 3673 LLT DstTy = MRI.getType(DstReg); 3674 LLT SrcTy = MRI.getType(SrcReg); 3675 3676 LLT NarrowTy0 = NarrowTy; 3677 LLT NarrowTy1; 3678 unsigned NumParts; 3679 3680 if (NarrowTy.isVector()) { 3681 // Uneven breakdown not handled. 3682 NumParts = DstTy.getNumElements() / NarrowTy.getNumElements(); 3683 if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements()) 3684 return UnableToLegalize; 3685 3686 NarrowTy1 = LLT::vector(NarrowTy.getElementCount(), SrcTy.getElementType()); 3687 } else { 3688 NumParts = DstTy.getNumElements(); 3689 NarrowTy1 = SrcTy.getElementType(); 3690 } 3691 3692 SmallVector<Register, 4> SrcRegs, DstRegs; 3693 extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs); 3694 3695 for (unsigned I = 0; I < NumParts; ++I) { 3696 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0); 3697 MachineInstr *NewInst = 3698 MIRBuilder.buildInstr(MI.getOpcode(), {DstReg}, {SrcRegs[I]}); 3699 3700 NewInst->setFlags(MI.getFlags()); 3701 DstRegs.push_back(DstReg); 3702 } 3703 3704 if (NarrowTy.isVector()) 3705 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 3706 else 3707 MIRBuilder.buildBuildVector(DstReg, DstRegs); 3708 3709 MI.eraseFromParent(); 3710 return Legalized; 3711 } 3712 3713 LegalizerHelper::LegalizeResult 3714 LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx, 3715 LLT NarrowTy) { 3716 Register DstReg = MI.getOperand(0).getReg(); 3717 Register Src0Reg = MI.getOperand(2).getReg(); 3718 LLT DstTy = MRI.getType(DstReg); 3719 LLT SrcTy = MRI.getType(Src0Reg); 3720 3721 unsigned NumParts; 3722 LLT NarrowTy0, NarrowTy1; 3723 3724 if (TypeIdx == 0) { 3725 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; 3726 unsigned OldElts = DstTy.getNumElements(); 3727 3728 NarrowTy0 = NarrowTy; 3729 NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements(); 3730 NarrowTy1 = NarrowTy.isVector() ? LLT::vector(NarrowTy.getElementCount(), 3731 SrcTy.getScalarSizeInBits()) 3732 : SrcTy.getElementType(); 3733 3734 } else { 3735 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; 3736 unsigned OldElts = SrcTy.getNumElements(); 3737 3738 NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : 3739 NarrowTy.getNumElements(); 3740 NarrowTy0 = 3741 LLT::vector(NarrowTy.getElementCount(), DstTy.getScalarSizeInBits()); 3742 NarrowTy1 = NarrowTy; 3743 } 3744 3745 // FIXME: Don't know how to handle the situation where the small vectors 3746 // aren't all the same size yet. 3747 if (NarrowTy1.isVector() && 3748 NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements()) 3749 return UnableToLegalize; 3750 3751 CmpInst::Predicate Pred 3752 = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); 3753 3754 SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs; 3755 extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs); 3756 extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs); 3757 3758 for (unsigned I = 0; I < NumParts; ++I) { 3759 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0); 3760 DstRegs.push_back(DstReg); 3761 3762 if (MI.getOpcode() == TargetOpcode::G_ICMP) 3763 MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]); 3764 else { 3765 MachineInstr *NewCmp 3766 = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]); 3767 NewCmp->setFlags(MI.getFlags()); 3768 } 3769 } 3770 3771 if (NarrowTy1.isVector()) 3772 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 3773 else 3774 MIRBuilder.buildBuildVector(DstReg, DstRegs); 3775 3776 MI.eraseFromParent(); 3777 return Legalized; 3778 } 3779 3780 LegalizerHelper::LegalizeResult 3781 LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx, 3782 LLT NarrowTy) { 3783 Register DstReg = MI.getOperand(0).getReg(); 3784 Register CondReg = MI.getOperand(1).getReg(); 3785 3786 unsigned NumParts = 0; 3787 LLT NarrowTy0, NarrowTy1; 3788 3789 LLT DstTy = MRI.getType(DstReg); 3790 LLT CondTy = MRI.getType(CondReg); 3791 unsigned Size = DstTy.getSizeInBits(); 3792 3793 assert(TypeIdx == 0 || CondTy.isVector()); 3794 3795 if (TypeIdx == 0) { 3796 NarrowTy0 = NarrowTy; 3797 NarrowTy1 = CondTy; 3798 3799 unsigned NarrowSize = NarrowTy0.getSizeInBits(); 3800 // FIXME: Don't know how to handle the situation where the small vectors 3801 // aren't all the same size yet. 3802 if (Size % NarrowSize != 0) 3803 return UnableToLegalize; 3804 3805 NumParts = Size / NarrowSize; 3806 3807 // Need to break down the condition type 3808 if (CondTy.isVector()) { 3809 if (CondTy.getNumElements() == NumParts) 3810 NarrowTy1 = CondTy.getElementType(); 3811 else 3812 NarrowTy1 = 3813 LLT::vector(CondTy.getElementCount().divideCoefficientBy(NumParts), 3814 CondTy.getScalarSizeInBits()); 3815 } 3816 } else { 3817 NumParts = CondTy.getNumElements(); 3818 if (NarrowTy.isVector()) { 3819 // TODO: Handle uneven breakdown. 3820 if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements()) 3821 return UnableToLegalize; 3822 3823 return UnableToLegalize; 3824 } else { 3825 NarrowTy0 = DstTy.getElementType(); 3826 NarrowTy1 = NarrowTy; 3827 } 3828 } 3829 3830 SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs; 3831 if (CondTy.isVector()) 3832 extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs); 3833 3834 extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs); 3835 extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs); 3836 3837 for (unsigned i = 0; i < NumParts; ++i) { 3838 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0); 3839 MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg, 3840 Src1Regs[i], Src2Regs[i]); 3841 DstRegs.push_back(DstReg); 3842 } 3843 3844 if (NarrowTy0.isVector()) 3845 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 3846 else 3847 MIRBuilder.buildBuildVector(DstReg, DstRegs); 3848 3849 MI.eraseFromParent(); 3850 return Legalized; 3851 } 3852 3853 LegalizerHelper::LegalizeResult 3854 LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, 3855 LLT NarrowTy) { 3856 const Register DstReg = MI.getOperand(0).getReg(); 3857 LLT PhiTy = MRI.getType(DstReg); 3858 LLT LeftoverTy; 3859 3860 // All of the operands need to have the same number of elements, so if we can 3861 // determine a type breakdown for the result type, we can for all of the 3862 // source types. 3863 int NumParts, NumLeftover; 3864 std::tie(NumParts, NumLeftover) 3865 = getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy); 3866 if (NumParts < 0) 3867 return UnableToLegalize; 3868 3869 SmallVector<Register, 4> DstRegs, LeftoverDstRegs; 3870 SmallVector<MachineInstrBuilder, 4> NewInsts; 3871 3872 const int TotalNumParts = NumParts + NumLeftover; 3873 3874 // Insert the new phis in the result block first. 3875 for (int I = 0; I != TotalNumParts; ++I) { 3876 LLT Ty = I < NumParts ? NarrowTy : LeftoverTy; 3877 Register PartDstReg = MRI.createGenericVirtualRegister(Ty); 3878 NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI) 3879 .addDef(PartDstReg)); 3880 if (I < NumParts) 3881 DstRegs.push_back(PartDstReg); 3882 else 3883 LeftoverDstRegs.push_back(PartDstReg); 3884 } 3885 3886 MachineBasicBlock *MBB = MI.getParent(); 3887 MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI()); 3888 insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs); 3889 3890 SmallVector<Register, 4> PartRegs, LeftoverRegs; 3891 3892 // Insert code to extract the incoming values in each predecessor block. 3893 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { 3894 PartRegs.clear(); 3895 LeftoverRegs.clear(); 3896 3897 Register SrcReg = MI.getOperand(I).getReg(); 3898 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB(); 3899 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); 3900 3901 LLT Unused; 3902 if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs, 3903 LeftoverRegs)) 3904 return UnableToLegalize; 3905 3906 // Add the newly created operand splits to the existing instructions. The 3907 // odd-sized pieces are ordered after the requested NarrowTyArg sized 3908 // pieces. 3909 for (int J = 0; J != TotalNumParts; ++J) { 3910 MachineInstrBuilder MIB = NewInsts[J]; 3911 MIB.addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]); 3912 MIB.addMBB(&OpMBB); 3913 } 3914 } 3915 3916 MI.eraseFromParent(); 3917 return Legalized; 3918 } 3919 3920 LegalizerHelper::LegalizeResult 3921 LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI, 3922 unsigned TypeIdx, 3923 LLT NarrowTy) { 3924 if (TypeIdx != 1) 3925 return UnableToLegalize; 3926 3927 const int NumDst = MI.getNumOperands() - 1; 3928 const Register SrcReg = MI.getOperand(NumDst).getReg(); 3929 LLT SrcTy = MRI.getType(SrcReg); 3930 3931 LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 3932 3933 // TODO: Create sequence of extracts. 3934 if (DstTy == NarrowTy) 3935 return UnableToLegalize; 3936 3937 LLT GCDTy = getGCDType(SrcTy, NarrowTy); 3938 if (DstTy == GCDTy) { 3939 // This would just be a copy of the same unmerge. 3940 // TODO: Create extracts, pad with undef and create intermediate merges. 3941 return UnableToLegalize; 3942 } 3943 3944 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg); 3945 const int NumUnmerge = Unmerge->getNumOperands() - 1; 3946 const int PartsPerUnmerge = NumDst / NumUnmerge; 3947 3948 for (int I = 0; I != NumUnmerge; ++I) { 3949 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES); 3950 3951 for (int J = 0; J != PartsPerUnmerge; ++J) 3952 MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg()); 3953 MIB.addUse(Unmerge.getReg(I)); 3954 } 3955 3956 MI.eraseFromParent(); 3957 return Legalized; 3958 } 3959 3960 LegalizerHelper::LegalizeResult 3961 LegalizerHelper::fewerElementsVectorMulo(MachineInstr &MI, unsigned TypeIdx, 3962 LLT NarrowTy) { 3963 Register Result = MI.getOperand(0).getReg(); 3964 Register Overflow = MI.getOperand(1).getReg(); 3965 Register LHS = MI.getOperand(2).getReg(); 3966 Register RHS = MI.getOperand(3).getReg(); 3967 3968 LLT SrcTy = MRI.getType(LHS); 3969 if (!SrcTy.isVector()) 3970 return UnableToLegalize; 3971 3972 LLT ElementType = SrcTy.getElementType(); 3973 LLT OverflowElementTy = MRI.getType(Overflow).getElementType(); 3974 const ElementCount NumResult = SrcTy.getElementCount(); 3975 LLT GCDTy = getGCDType(SrcTy, NarrowTy); 3976 3977 // Unmerge the operands to smaller parts of GCD type. 3978 auto UnmergeLHS = MIRBuilder.buildUnmerge(GCDTy, LHS); 3979 auto UnmergeRHS = MIRBuilder.buildUnmerge(GCDTy, RHS); 3980 3981 const int NumOps = UnmergeLHS->getNumOperands() - 1; 3982 const ElementCount PartsPerUnmerge = NumResult.divideCoefficientBy(NumOps); 3983 LLT OverflowTy = LLT::scalarOrVector(PartsPerUnmerge, OverflowElementTy); 3984 LLT ResultTy = LLT::scalarOrVector(PartsPerUnmerge, ElementType); 3985 3986 // Perform the operation over unmerged parts. 3987 SmallVector<Register, 8> ResultParts; 3988 SmallVector<Register, 8> OverflowParts; 3989 for (int I = 0; I != NumOps; ++I) { 3990 Register Operand1 = UnmergeLHS->getOperand(I).getReg(); 3991 Register Operand2 = UnmergeRHS->getOperand(I).getReg(); 3992 auto PartMul = MIRBuilder.buildInstr(MI.getOpcode(), {ResultTy, OverflowTy}, 3993 {Operand1, Operand2}); 3994 ResultParts.push_back(PartMul->getOperand(0).getReg()); 3995 OverflowParts.push_back(PartMul->getOperand(1).getReg()); 3996 } 3997 3998 LLT ResultLCMTy = buildLCMMergePieces(SrcTy, NarrowTy, GCDTy, ResultParts); 3999 LLT OverflowLCMTy = 4000 LLT::scalarOrVector(ResultLCMTy.getElementCount(), OverflowElementTy); 4001 4002 // Recombine the pieces to the original result and overflow registers. 4003 buildWidenedRemergeToDst(Result, ResultLCMTy, ResultParts); 4004 buildWidenedRemergeToDst(Overflow, OverflowLCMTy, OverflowParts); 4005 MI.eraseFromParent(); 4006 return Legalized; 4007 } 4008 4009 // Handle FewerElementsVector a G_BUILD_VECTOR or G_CONCAT_VECTORS that produces 4010 // a vector 4011 // 4012 // Create a G_BUILD_VECTOR or G_CONCAT_VECTORS of NarrowTy pieces, padding with 4013 // undef as necessary. 4014 // 4015 // %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2 4016 // -> <2 x s16> 4017 // 4018 // %4:_(s16) = G_IMPLICIT_DEF 4019 // %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1 4020 // %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4 4021 // %7:_(<2 x s16>) = G_IMPLICIT_DEF 4022 // %8:_(<6 x s16>) = G_CONCAT_VECTORS %5, %6, %7 4023 // %3:_(<3 x s16>), %8:_(<3 x s16>) = G_UNMERGE_VALUES %8 4024 LegalizerHelper::LegalizeResult 4025 LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, 4026 LLT NarrowTy) { 4027 Register DstReg = MI.getOperand(0).getReg(); 4028 LLT DstTy = MRI.getType(DstReg); 4029 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 4030 LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy); 4031 4032 // Break into a common type 4033 SmallVector<Register, 16> Parts; 4034 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) 4035 extractGCDType(Parts, GCDTy, MI.getOperand(I).getReg()); 4036 4037 // Build the requested new merge, padding with undef. 4038 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, 4039 TargetOpcode::G_ANYEXT); 4040 4041 // Pack into the original result register. 4042 buildWidenedRemergeToDst(DstReg, LCMTy, Parts); 4043 4044 MI.eraseFromParent(); 4045 return Legalized; 4046 } 4047 4048 LegalizerHelper::LegalizeResult 4049 LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, 4050 unsigned TypeIdx, 4051 LLT NarrowVecTy) { 4052 Register DstReg = MI.getOperand(0).getReg(); 4053 Register SrcVec = MI.getOperand(1).getReg(); 4054 Register InsertVal; 4055 bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT; 4056 4057 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index"); 4058 if (IsInsert) 4059 InsertVal = MI.getOperand(2).getReg(); 4060 4061 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg(); 4062 4063 // TODO: Handle total scalarization case. 4064 if (!NarrowVecTy.isVector()) 4065 return UnableToLegalize; 4066 4067 LLT VecTy = MRI.getType(SrcVec); 4068 4069 // If the index is a constant, we can really break this down as you would 4070 // expect, and index into the target size pieces. 4071 int64_t IdxVal; 4072 auto MaybeCst = 4073 getConstantVRegValWithLookThrough(Idx, MRI, /*LookThroughInstrs*/ true, 4074 /*HandleFConstants*/ false); 4075 if (MaybeCst) { 4076 IdxVal = MaybeCst->Value.getSExtValue(); 4077 // Avoid out of bounds indexing the pieces. 4078 if (IdxVal >= VecTy.getNumElements()) { 4079 MIRBuilder.buildUndef(DstReg); 4080 MI.eraseFromParent(); 4081 return Legalized; 4082 } 4083 4084 SmallVector<Register, 8> VecParts; 4085 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec); 4086 4087 // Build a sequence of NarrowTy pieces in VecParts for this operand. 4088 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts, 4089 TargetOpcode::G_ANYEXT); 4090 4091 unsigned NewNumElts = NarrowVecTy.getNumElements(); 4092 4093 LLT IdxTy = MRI.getType(Idx); 4094 int64_t PartIdx = IdxVal / NewNumElts; 4095 auto NewIdx = 4096 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx); 4097 4098 if (IsInsert) { 4099 LLT PartTy = MRI.getType(VecParts[PartIdx]); 4100 4101 // Use the adjusted index to insert into one of the subvectors. 4102 auto InsertPart = MIRBuilder.buildInsertVectorElement( 4103 PartTy, VecParts[PartIdx], InsertVal, NewIdx); 4104 VecParts[PartIdx] = InsertPart.getReg(0); 4105 4106 // Recombine the inserted subvector with the others to reform the result 4107 // vector. 4108 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts); 4109 } else { 4110 MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx); 4111 } 4112 4113 MI.eraseFromParent(); 4114 return Legalized; 4115 } 4116 4117 // With a variable index, we can't perform the operation in a smaller type, so 4118 // we're forced to expand this. 4119 // 4120 // TODO: We could emit a chain of compare/select to figure out which piece to 4121 // index. 4122 return lowerExtractInsertVectorElt(MI); 4123 } 4124 4125 LegalizerHelper::LegalizeResult 4126 LegalizerHelper::reduceLoadStoreWidth(GLoadStore &LdStMI, unsigned TypeIdx, 4127 LLT NarrowTy) { 4128 // FIXME: Don't know how to handle secondary types yet. 4129 if (TypeIdx != 0) 4130 return UnableToLegalize; 4131 4132 // This implementation doesn't work for atomics. Give up instead of doing 4133 // something invalid. 4134 if (LdStMI.isAtomic()) 4135 return UnableToLegalize; 4136 4137 bool IsLoad = isa<GLoad>(LdStMI); 4138 Register ValReg = LdStMI.getReg(0); 4139 Register AddrReg = LdStMI.getPointerReg(); 4140 LLT ValTy = MRI.getType(ValReg); 4141 4142 // FIXME: Do we need a distinct NarrowMemory legalize action? 4143 if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize()) { 4144 LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n"); 4145 return UnableToLegalize; 4146 } 4147 4148 int NumParts = -1; 4149 int NumLeftover = -1; 4150 LLT LeftoverTy; 4151 SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs; 4152 if (IsLoad) { 4153 std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy); 4154 } else { 4155 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs, 4156 NarrowLeftoverRegs)) { 4157 NumParts = NarrowRegs.size(); 4158 NumLeftover = NarrowLeftoverRegs.size(); 4159 } 4160 } 4161 4162 if (NumParts == -1) 4163 return UnableToLegalize; 4164 4165 LLT PtrTy = MRI.getType(AddrReg); 4166 const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits()); 4167 4168 unsigned TotalSize = ValTy.getSizeInBits(); 4169 4170 // Split the load/store into PartTy sized pieces starting at Offset. If this 4171 // is a load, return the new registers in ValRegs. For a store, each elements 4172 // of ValRegs should be PartTy. Returns the next offset that needs to be 4173 // handled. 4174 auto MMO = LdStMI.getMMO(); 4175 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs, 4176 unsigned Offset) -> unsigned { 4177 MachineFunction &MF = MIRBuilder.getMF(); 4178 unsigned PartSize = PartTy.getSizeInBits(); 4179 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize; 4180 Offset += PartSize, ++Idx) { 4181 unsigned ByteOffset = Offset / 8; 4182 Register NewAddrReg; 4183 4184 MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset); 4185 4186 MachineMemOperand *NewMMO = 4187 MF.getMachineMemOperand(&MMO, ByteOffset, PartTy); 4188 4189 if (IsLoad) { 4190 Register Dst = MRI.createGenericVirtualRegister(PartTy); 4191 ValRegs.push_back(Dst); 4192 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO); 4193 } else { 4194 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO); 4195 } 4196 } 4197 4198 return Offset; 4199 }; 4200 4201 unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0); 4202 4203 // Handle the rest of the register if this isn't an even type breakdown. 4204 if (LeftoverTy.isValid()) 4205 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset); 4206 4207 if (IsLoad) { 4208 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs, 4209 LeftoverTy, NarrowLeftoverRegs); 4210 } 4211 4212 LdStMI.eraseFromParent(); 4213 return Legalized; 4214 } 4215 4216 LegalizerHelper::LegalizeResult 4217 LegalizerHelper::reduceOperationWidth(MachineInstr &MI, unsigned int TypeIdx, 4218 LLT NarrowTy) { 4219 assert(TypeIdx == 0 && "only one type index expected"); 4220 4221 const unsigned Opc = MI.getOpcode(); 4222 const int NumDefOps = MI.getNumExplicitDefs(); 4223 const int NumSrcOps = MI.getNumOperands() - NumDefOps; 4224 const unsigned Flags = MI.getFlags(); 4225 const unsigned NarrowSize = NarrowTy.getSizeInBits(); 4226 const LLT NarrowScalarTy = LLT::scalar(NarrowSize); 4227 4228 assert(MI.getNumOperands() <= 4 && "expected instruction with either 1 " 4229 "result and 1-3 sources or 2 results and " 4230 "1-2 sources"); 4231 4232 SmallVector<Register, 2> DstRegs; 4233 for (int I = 0; I < NumDefOps; ++I) 4234 DstRegs.push_back(MI.getOperand(I).getReg()); 4235 4236 // First of all check whether we are narrowing (changing the element type) 4237 // or reducing the vector elements 4238 const LLT DstTy = MRI.getType(DstRegs[0]); 4239 const bool IsNarrow = NarrowTy.getScalarType() != DstTy.getScalarType(); 4240 4241 SmallVector<Register, 8> ExtractedRegs[3]; 4242 SmallVector<Register, 8> Parts; 4243 4244 // Break down all the sources into NarrowTy pieces we can operate on. This may 4245 // involve creating merges to a wider type, padded with undef. 4246 for (int I = 0; I != NumSrcOps; ++I) { 4247 Register SrcReg = MI.getOperand(I + NumDefOps).getReg(); 4248 LLT SrcTy = MRI.getType(SrcReg); 4249 4250 // The type to narrow SrcReg to. For narrowing, this is a smaller scalar. 4251 // For fewerElements, this is a smaller vector with the same element type. 4252 LLT OpNarrowTy; 4253 if (IsNarrow) { 4254 OpNarrowTy = NarrowScalarTy; 4255 4256 // In case of narrowing, we need to cast vectors to scalars for this to 4257 // work properly 4258 // FIXME: Can we do without the bitcast here if we're narrowing? 4259 if (SrcTy.isVector()) { 4260 SrcTy = LLT::scalar(SrcTy.getSizeInBits()); 4261 SrcReg = MIRBuilder.buildBitcast(SrcTy, SrcReg).getReg(0); 4262 } 4263 } else { 4264 auto NarrowEC = NarrowTy.isVector() ? NarrowTy.getElementCount() 4265 : ElementCount::getFixed(1); 4266 OpNarrowTy = LLT::scalarOrVector(NarrowEC, SrcTy.getScalarType()); 4267 } 4268 4269 LLT GCDTy = extractGCDType(ExtractedRegs[I], SrcTy, OpNarrowTy, SrcReg); 4270 4271 // Build a sequence of NarrowTy pieces in ExtractedRegs for this operand. 4272 buildLCMMergePieces(SrcTy, OpNarrowTy, GCDTy, ExtractedRegs[I], 4273 TargetOpcode::G_ANYEXT); 4274 } 4275 4276 SmallVector<Register, 8> ResultRegs[2]; 4277 4278 // Input operands for each sub-instruction. 4279 SmallVector<SrcOp, 4> InputRegs(NumSrcOps, Register()); 4280 4281 int NumParts = ExtractedRegs[0].size(); 4282 const unsigned DstSize = DstTy.getSizeInBits(); 4283 const LLT DstScalarTy = LLT::scalar(DstSize); 4284 4285 // Narrowing needs to use scalar types 4286 LLT DstLCMTy, NarrowDstTy; 4287 if (IsNarrow) { 4288 DstLCMTy = getLCMType(DstScalarTy, NarrowScalarTy); 4289 NarrowDstTy = NarrowScalarTy; 4290 } else { 4291 DstLCMTy = getLCMType(DstTy, NarrowTy); 4292 NarrowDstTy = NarrowTy; 4293 } 4294 4295 // We widened the source registers to satisfy merge/unmerge size 4296 // constraints. We'll have some extra fully undef parts. 4297 const int NumRealParts = (DstSize + NarrowSize - 1) / NarrowSize; 4298 4299 for (int I = 0; I != NumRealParts; ++I) { 4300 // Emit this instruction on each of the split pieces. 4301 for (int J = 0; J != NumSrcOps; ++J) 4302 InputRegs[J] = ExtractedRegs[J][I]; 4303 4304 MachineInstrBuilder Inst; 4305 if (NumDefOps == 1) 4306 Inst = MIRBuilder.buildInstr(Opc, {NarrowDstTy}, InputRegs, Flags); 4307 else 4308 Inst = MIRBuilder.buildInstr(Opc, {NarrowDstTy, NarrowDstTy}, InputRegs, 4309 Flags); 4310 4311 for (int J = 0; J != NumDefOps; ++J) 4312 ResultRegs[J].push_back(Inst.getReg(J)); 4313 } 4314 4315 // Fill out the widened result with undef instead of creating instructions 4316 // with undef inputs. 4317 int NumUndefParts = NumParts - NumRealParts; 4318 if (NumUndefParts != 0) { 4319 Register Undef = MIRBuilder.buildUndef(NarrowDstTy).getReg(0); 4320 for (int I = 0; I != NumDefOps; ++I) 4321 ResultRegs[I].append(NumUndefParts, Undef); 4322 } 4323 4324 // Extract the possibly padded result. Use a scratch register if we need to do 4325 // a final bitcast, otherwise use the original result register. 4326 Register MergeDstReg; 4327 for (int I = 0; I != NumDefOps; ++I) { 4328 if (IsNarrow && DstTy.isVector()) 4329 MergeDstReg = MRI.createGenericVirtualRegister(DstScalarTy); 4330 else 4331 MergeDstReg = DstRegs[I]; 4332 4333 buildWidenedRemergeToDst(MergeDstReg, DstLCMTy, ResultRegs[I]); 4334 4335 // Recast to vector if we narrowed a vector 4336 if (IsNarrow && DstTy.isVector()) 4337 MIRBuilder.buildBitcast(DstRegs[I], MergeDstReg); 4338 } 4339 4340 MI.eraseFromParent(); 4341 return Legalized; 4342 } 4343 4344 LegalizerHelper::LegalizeResult 4345 LegalizerHelper::fewerElementsVectorSextInReg(MachineInstr &MI, unsigned TypeIdx, 4346 LLT NarrowTy) { 4347 Register DstReg = MI.getOperand(0).getReg(); 4348 Register SrcReg = MI.getOperand(1).getReg(); 4349 int64_t Imm = MI.getOperand(2).getImm(); 4350 4351 LLT DstTy = MRI.getType(DstReg); 4352 4353 SmallVector<Register, 8> Parts; 4354 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg); 4355 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts); 4356 4357 for (Register &R : Parts) 4358 R = MIRBuilder.buildSExtInReg(NarrowTy, R, Imm).getReg(0); 4359 4360 buildWidenedRemergeToDst(DstReg, LCMTy, Parts); 4361 4362 MI.eraseFromParent(); 4363 return Legalized; 4364 } 4365 4366 LegalizerHelper::LegalizeResult 4367 LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, 4368 LLT NarrowTy) { 4369 using namespace TargetOpcode; 4370 4371 switch (MI.getOpcode()) { 4372 case G_IMPLICIT_DEF: 4373 return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy); 4374 case G_TRUNC: 4375 case G_AND: 4376 case G_OR: 4377 case G_XOR: 4378 case G_ADD: 4379 case G_SUB: 4380 case G_MUL: 4381 case G_PTR_ADD: 4382 case G_SMULH: 4383 case G_UMULH: 4384 case G_FADD: 4385 case G_FMUL: 4386 case G_FSUB: 4387 case G_FNEG: 4388 case G_FABS: 4389 case G_FCANONICALIZE: 4390 case G_FDIV: 4391 case G_FREM: 4392 case G_FMA: 4393 case G_FMAD: 4394 case G_FPOW: 4395 case G_FEXP: 4396 case G_FEXP2: 4397 case G_FLOG: 4398 case G_FLOG2: 4399 case G_FLOG10: 4400 case G_FNEARBYINT: 4401 case G_FCEIL: 4402 case G_FFLOOR: 4403 case G_FRINT: 4404 case G_INTRINSIC_ROUND: 4405 case G_INTRINSIC_ROUNDEVEN: 4406 case G_INTRINSIC_TRUNC: 4407 case G_FCOS: 4408 case G_FSIN: 4409 case G_FSQRT: 4410 case G_BSWAP: 4411 case G_BITREVERSE: 4412 case G_SDIV: 4413 case G_UDIV: 4414 case G_SREM: 4415 case G_UREM: 4416 case G_SDIVREM: 4417 case G_UDIVREM: 4418 case G_SMIN: 4419 case G_SMAX: 4420 case G_UMIN: 4421 case G_UMAX: 4422 case G_ABS: 4423 case G_FMINNUM: 4424 case G_FMAXNUM: 4425 case G_FMINNUM_IEEE: 4426 case G_FMAXNUM_IEEE: 4427 case G_FMINIMUM: 4428 case G_FMAXIMUM: 4429 case G_FSHL: 4430 case G_FSHR: 4431 case G_FREEZE: 4432 case G_SADDSAT: 4433 case G_SSUBSAT: 4434 case G_UADDSAT: 4435 case G_USUBSAT: 4436 return reduceOperationWidth(MI, TypeIdx, NarrowTy); 4437 case G_UMULO: 4438 case G_SMULO: 4439 return fewerElementsVectorMulo(MI, TypeIdx, NarrowTy); 4440 case G_SHL: 4441 case G_LSHR: 4442 case G_ASHR: 4443 case G_SSHLSAT: 4444 case G_USHLSAT: 4445 case G_CTLZ: 4446 case G_CTLZ_ZERO_UNDEF: 4447 case G_CTTZ: 4448 case G_CTTZ_ZERO_UNDEF: 4449 case G_CTPOP: 4450 case G_FCOPYSIGN: 4451 return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy); 4452 case G_ZEXT: 4453 case G_SEXT: 4454 case G_ANYEXT: 4455 case G_FPEXT: 4456 case G_FPTRUNC: 4457 case G_SITOFP: 4458 case G_UITOFP: 4459 case G_FPTOSI: 4460 case G_FPTOUI: 4461 case G_INTTOPTR: 4462 case G_PTRTOINT: 4463 case G_ADDRSPACE_CAST: 4464 return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy); 4465 case G_ICMP: 4466 case G_FCMP: 4467 return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy); 4468 case G_SELECT: 4469 return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy); 4470 case G_PHI: 4471 return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy); 4472 case G_UNMERGE_VALUES: 4473 return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy); 4474 case G_BUILD_VECTOR: 4475 assert(TypeIdx == 0 && "not a vector type index"); 4476 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy); 4477 case G_CONCAT_VECTORS: 4478 if (TypeIdx != 1) // TODO: This probably does work as expected already. 4479 return UnableToLegalize; 4480 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy); 4481 case G_EXTRACT_VECTOR_ELT: 4482 case G_INSERT_VECTOR_ELT: 4483 return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy); 4484 case G_LOAD: 4485 case G_STORE: 4486 return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy); 4487 case G_SEXT_INREG: 4488 return fewerElementsVectorSextInReg(MI, TypeIdx, NarrowTy); 4489 GISEL_VECREDUCE_CASES_NONSEQ 4490 return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy); 4491 case G_SHUFFLE_VECTOR: 4492 return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy); 4493 default: 4494 return UnableToLegalize; 4495 } 4496 } 4497 4498 LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle( 4499 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) { 4500 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 4501 if (TypeIdx != 0) 4502 return UnableToLegalize; 4503 4504 Register DstReg = MI.getOperand(0).getReg(); 4505 Register Src1Reg = MI.getOperand(1).getReg(); 4506 Register Src2Reg = MI.getOperand(2).getReg(); 4507 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask(); 4508 LLT DstTy = MRI.getType(DstReg); 4509 LLT Src1Ty = MRI.getType(Src1Reg); 4510 LLT Src2Ty = MRI.getType(Src2Reg); 4511 // The shuffle should be canonicalized by now. 4512 if (DstTy != Src1Ty) 4513 return UnableToLegalize; 4514 if (DstTy != Src2Ty) 4515 return UnableToLegalize; 4516 4517 if (!isPowerOf2_32(DstTy.getNumElements())) 4518 return UnableToLegalize; 4519 4520 // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly. 4521 // Further legalization attempts will be needed to do split further. 4522 NarrowTy = 4523 DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2)); 4524 unsigned NewElts = NarrowTy.getNumElements(); 4525 4526 SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs; 4527 extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs); 4528 extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs); 4529 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0], 4530 SplitSrc2Regs[1]}; 4531 4532 Register Hi, Lo; 4533 4534 // If Lo or Hi uses elements from at most two of the four input vectors, then 4535 // express it as a vector shuffle of those two inputs. Otherwise extract the 4536 // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR. 4537 SmallVector<int, 16> Ops; 4538 for (unsigned High = 0; High < 2; ++High) { 4539 Register &Output = High ? Hi : Lo; 4540 4541 // Build a shuffle mask for the output, discovering on the fly which 4542 // input vectors to use as shuffle operands (recorded in InputUsed). 4543 // If building a suitable shuffle vector proves too hard, then bail 4544 // out with useBuildVector set. 4545 unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered. 4546 unsigned FirstMaskIdx = High * NewElts; 4547 bool UseBuildVector = false; 4548 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) { 4549 // The mask element. This indexes into the input. 4550 int Idx = Mask[FirstMaskIdx + MaskOffset]; 4551 4552 // The input vector this mask element indexes into. 4553 unsigned Input = (unsigned)Idx / NewElts; 4554 4555 if (Input >= array_lengthof(Inputs)) { 4556 // The mask element does not index into any input vector. 4557 Ops.push_back(-1); 4558 continue; 4559 } 4560 4561 // Turn the index into an offset from the start of the input vector. 4562 Idx -= Input * NewElts; 4563 4564 // Find or create a shuffle vector operand to hold this input. 4565 unsigned OpNo; 4566 for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) { 4567 if (InputUsed[OpNo] == Input) { 4568 // This input vector is already an operand. 4569 break; 4570 } else if (InputUsed[OpNo] == -1U) { 4571 // Create a new operand for this input vector. 4572 InputUsed[OpNo] = Input; 4573 break; 4574 } 4575 } 4576 4577 if (OpNo >= array_lengthof(InputUsed)) { 4578 // More than two input vectors used! Give up on trying to create a 4579 // shuffle vector. Insert all elements into a BUILD_VECTOR instead. 4580 UseBuildVector = true; 4581 break; 4582 } 4583 4584 // Add the mask index for the new shuffle vector. 4585 Ops.push_back(Idx + OpNo * NewElts); 4586 } 4587 4588 if (UseBuildVector) { 4589 LLT EltTy = NarrowTy.getElementType(); 4590 SmallVector<Register, 16> SVOps; 4591 4592 // Extract the input elements by hand. 4593 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) { 4594 // The mask element. This indexes into the input. 4595 int Idx = Mask[FirstMaskIdx + MaskOffset]; 4596 4597 // The input vector this mask element indexes into. 4598 unsigned Input = (unsigned)Idx / NewElts; 4599 4600 if (Input >= array_lengthof(Inputs)) { 4601 // The mask element is "undef" or indexes off the end of the input. 4602 SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0)); 4603 continue; 4604 } 4605 4606 // Turn the index into an offset from the start of the input vector. 4607 Idx -= Input * NewElts; 4608 4609 // Extract the vector element by hand. 4610 SVOps.push_back(MIRBuilder 4611 .buildExtractVectorElement( 4612 EltTy, Inputs[Input], 4613 MIRBuilder.buildConstant(LLT::scalar(32), Idx)) 4614 .getReg(0)); 4615 } 4616 4617 // Construct the Lo/Hi output using a G_BUILD_VECTOR. 4618 Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0); 4619 } else if (InputUsed[0] == -1U) { 4620 // No input vectors were used! The result is undefined. 4621 Output = MIRBuilder.buildUndef(NarrowTy).getReg(0); 4622 } else { 4623 Register Op0 = Inputs[InputUsed[0]]; 4624 // If only one input was used, use an undefined vector for the other. 4625 Register Op1 = InputUsed[1] == -1U 4626 ? MIRBuilder.buildUndef(NarrowTy).getReg(0) 4627 : Inputs[InputUsed[1]]; 4628 // At least one input vector was used. Create a new shuffle vector. 4629 Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0); 4630 } 4631 4632 Ops.clear(); 4633 } 4634 4635 MIRBuilder.buildConcatVectors(DstReg, {Lo, Hi}); 4636 MI.eraseFromParent(); 4637 return Legalized; 4638 } 4639 4640 LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions( 4641 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) { 4642 unsigned Opc = MI.getOpcode(); 4643 assert(Opc != TargetOpcode::G_VECREDUCE_SEQ_FADD && 4644 Opc != TargetOpcode::G_VECREDUCE_SEQ_FMUL && 4645 "Sequential reductions not expected"); 4646 4647 if (TypeIdx != 1) 4648 return UnableToLegalize; 4649 4650 // The semantics of the normal non-sequential reductions allow us to freely 4651 // re-associate the operation. 4652 Register SrcReg = MI.getOperand(1).getReg(); 4653 LLT SrcTy = MRI.getType(SrcReg); 4654 Register DstReg = MI.getOperand(0).getReg(); 4655 LLT DstTy = MRI.getType(DstReg); 4656 4657 if (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0) 4658 return UnableToLegalize; 4659 4660 SmallVector<Register> SplitSrcs; 4661 const unsigned NumParts = SrcTy.getNumElements() / NarrowTy.getNumElements(); 4662 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs); 4663 SmallVector<Register> PartialReductions; 4664 for (unsigned Part = 0; Part < NumParts; ++Part) { 4665 PartialReductions.push_back( 4666 MIRBuilder.buildInstr(Opc, {DstTy}, {SplitSrcs[Part]}).getReg(0)); 4667 } 4668 4669 unsigned ScalarOpc; 4670 switch (Opc) { 4671 case TargetOpcode::G_VECREDUCE_FADD: 4672 ScalarOpc = TargetOpcode::G_FADD; 4673 break; 4674 case TargetOpcode::G_VECREDUCE_FMUL: 4675 ScalarOpc = TargetOpcode::G_FMUL; 4676 break; 4677 case TargetOpcode::G_VECREDUCE_FMAX: 4678 ScalarOpc = TargetOpcode::G_FMAXNUM; 4679 break; 4680 case TargetOpcode::G_VECREDUCE_FMIN: 4681 ScalarOpc = TargetOpcode::G_FMINNUM; 4682 break; 4683 case TargetOpcode::G_VECREDUCE_ADD: 4684 ScalarOpc = TargetOpcode::G_ADD; 4685 break; 4686 case TargetOpcode::G_VECREDUCE_MUL: 4687 ScalarOpc = TargetOpcode::G_MUL; 4688 break; 4689 case TargetOpcode::G_VECREDUCE_AND: 4690 ScalarOpc = TargetOpcode::G_AND; 4691 break; 4692 case TargetOpcode::G_VECREDUCE_OR: 4693 ScalarOpc = TargetOpcode::G_OR; 4694 break; 4695 case TargetOpcode::G_VECREDUCE_XOR: 4696 ScalarOpc = TargetOpcode::G_XOR; 4697 break; 4698 case TargetOpcode::G_VECREDUCE_SMAX: 4699 ScalarOpc = TargetOpcode::G_SMAX; 4700 break; 4701 case TargetOpcode::G_VECREDUCE_SMIN: 4702 ScalarOpc = TargetOpcode::G_SMIN; 4703 break; 4704 case TargetOpcode::G_VECREDUCE_UMAX: 4705 ScalarOpc = TargetOpcode::G_UMAX; 4706 break; 4707 case TargetOpcode::G_VECREDUCE_UMIN: 4708 ScalarOpc = TargetOpcode::G_UMIN; 4709 break; 4710 default: 4711 LLVM_DEBUG(dbgs() << "Can't legalize: unknown reduction kind.\n"); 4712 return UnableToLegalize; 4713 } 4714 4715 // If the types involved are powers of 2, we can generate intermediate vector 4716 // ops, before generating a final reduction operation. 4717 if (isPowerOf2_32(SrcTy.getNumElements()) && 4718 isPowerOf2_32(NarrowTy.getNumElements())) { 4719 return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc); 4720 } 4721 4722 Register Acc = PartialReductions[0]; 4723 for (unsigned Part = 1; Part < NumParts; ++Part) { 4724 if (Part == NumParts - 1) { 4725 MIRBuilder.buildInstr(ScalarOpc, {DstReg}, 4726 {Acc, PartialReductions[Part]}); 4727 } else { 4728 Acc = MIRBuilder 4729 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]}) 4730 .getReg(0); 4731 } 4732 } 4733 MI.eraseFromParent(); 4734 return Legalized; 4735 } 4736 4737 LegalizerHelper::LegalizeResult 4738 LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg, 4739 LLT SrcTy, LLT NarrowTy, 4740 unsigned ScalarOpc) { 4741 SmallVector<Register> SplitSrcs; 4742 // Split the sources into NarrowTy size pieces. 4743 extractParts(SrcReg, NarrowTy, 4744 SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs); 4745 // We're going to do a tree reduction using vector operations until we have 4746 // one NarrowTy size value left. 4747 while (SplitSrcs.size() > 1) { 4748 SmallVector<Register> PartialRdxs; 4749 for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) { 4750 Register LHS = SplitSrcs[Idx]; 4751 Register RHS = SplitSrcs[Idx + 1]; 4752 // Create the intermediate vector op. 4753 Register Res = 4754 MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0); 4755 PartialRdxs.push_back(Res); 4756 } 4757 SplitSrcs = std::move(PartialRdxs); 4758 } 4759 // Finally generate the requested NarrowTy based reduction. 4760 Observer.changingInstr(MI); 4761 MI.getOperand(1).setReg(SplitSrcs[0]); 4762 Observer.changedInstr(MI); 4763 return Legalized; 4764 } 4765 4766 LegalizerHelper::LegalizeResult 4767 LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, 4768 const LLT HalfTy, const LLT AmtTy) { 4769 4770 Register InL = MRI.createGenericVirtualRegister(HalfTy); 4771 Register InH = MRI.createGenericVirtualRegister(HalfTy); 4772 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1)); 4773 4774 if (Amt.isNullValue()) { 4775 MIRBuilder.buildMerge(MI.getOperand(0), {InL, InH}); 4776 MI.eraseFromParent(); 4777 return Legalized; 4778 } 4779 4780 LLT NVT = HalfTy; 4781 unsigned NVTBits = HalfTy.getSizeInBits(); 4782 unsigned VTBits = 2 * NVTBits; 4783 4784 SrcOp Lo(Register(0)), Hi(Register(0)); 4785 if (MI.getOpcode() == TargetOpcode::G_SHL) { 4786 if (Amt.ugt(VTBits)) { 4787 Lo = Hi = MIRBuilder.buildConstant(NVT, 0); 4788 } else if (Amt.ugt(NVTBits)) { 4789 Lo = MIRBuilder.buildConstant(NVT, 0); 4790 Hi = MIRBuilder.buildShl(NVT, InL, 4791 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits)); 4792 } else if (Amt == NVTBits) { 4793 Lo = MIRBuilder.buildConstant(NVT, 0); 4794 Hi = InL; 4795 } else { 4796 Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt)); 4797 auto OrLHS = 4798 MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt)); 4799 auto OrRHS = MIRBuilder.buildLShr( 4800 NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits)); 4801 Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS); 4802 } 4803 } else if (MI.getOpcode() == TargetOpcode::G_LSHR) { 4804 if (Amt.ugt(VTBits)) { 4805 Lo = Hi = MIRBuilder.buildConstant(NVT, 0); 4806 } else if (Amt.ugt(NVTBits)) { 4807 Lo = MIRBuilder.buildLShr(NVT, InH, 4808 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits)); 4809 Hi = MIRBuilder.buildConstant(NVT, 0); 4810 } else if (Amt == NVTBits) { 4811 Lo = InH; 4812 Hi = MIRBuilder.buildConstant(NVT, 0); 4813 } else { 4814 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt); 4815 4816 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst); 4817 auto OrRHS = MIRBuilder.buildShl( 4818 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits)); 4819 4820 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS); 4821 Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst); 4822 } 4823 } else { 4824 if (Amt.ugt(VTBits)) { 4825 Hi = Lo = MIRBuilder.buildAShr( 4826 NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1)); 4827 } else if (Amt.ugt(NVTBits)) { 4828 Lo = MIRBuilder.buildAShr(NVT, InH, 4829 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits)); 4830 Hi = MIRBuilder.buildAShr(NVT, InH, 4831 MIRBuilder.buildConstant(AmtTy, NVTBits - 1)); 4832 } else if (Amt == NVTBits) { 4833 Lo = InH; 4834 Hi = MIRBuilder.buildAShr(NVT, InH, 4835 MIRBuilder.buildConstant(AmtTy, NVTBits - 1)); 4836 } else { 4837 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt); 4838 4839 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst); 4840 auto OrRHS = MIRBuilder.buildShl( 4841 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits)); 4842 4843 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS); 4844 Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst); 4845 } 4846 } 4847 4848 MIRBuilder.buildMerge(MI.getOperand(0), {Lo, Hi}); 4849 MI.eraseFromParent(); 4850 4851 return Legalized; 4852 } 4853 4854 // TODO: Optimize if constant shift amount. 4855 LegalizerHelper::LegalizeResult 4856 LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, 4857 LLT RequestedTy) { 4858 if (TypeIdx == 1) { 4859 Observer.changingInstr(MI); 4860 narrowScalarSrc(MI, RequestedTy, 2); 4861 Observer.changedInstr(MI); 4862 return Legalized; 4863 } 4864 4865 Register DstReg = MI.getOperand(0).getReg(); 4866 LLT DstTy = MRI.getType(DstReg); 4867 if (DstTy.isVector()) 4868 return UnableToLegalize; 4869 4870 Register Amt = MI.getOperand(2).getReg(); 4871 LLT ShiftAmtTy = MRI.getType(Amt); 4872 const unsigned DstEltSize = DstTy.getScalarSizeInBits(); 4873 if (DstEltSize % 2 != 0) 4874 return UnableToLegalize; 4875 4876 // Ignore the input type. We can only go to exactly half the size of the 4877 // input. If that isn't small enough, the resulting pieces will be further 4878 // legalized. 4879 const unsigned NewBitSize = DstEltSize / 2; 4880 const LLT HalfTy = LLT::scalar(NewBitSize); 4881 const LLT CondTy = LLT::scalar(1); 4882 4883 if (auto VRegAndVal = 4884 getConstantVRegValWithLookThrough(Amt, MRI, true, false)) { 4885 return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy, 4886 ShiftAmtTy); 4887 } 4888 4889 // TODO: Expand with known bits. 4890 4891 // Handle the fully general expansion by an unknown amount. 4892 auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize); 4893 4894 Register InL = MRI.createGenericVirtualRegister(HalfTy); 4895 Register InH = MRI.createGenericVirtualRegister(HalfTy); 4896 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1)); 4897 4898 auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits); 4899 auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt); 4900 4901 auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0); 4902 auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits); 4903 auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero); 4904 4905 Register ResultRegs[2]; 4906 switch (MI.getOpcode()) { 4907 case TargetOpcode::G_SHL: { 4908 // Short: ShAmt < NewBitSize 4909 auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt); 4910 4911 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack); 4912 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt); 4913 auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr); 4914 4915 // Long: ShAmt >= NewBitSize 4916 auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero. 4917 auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part. 4918 4919 auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL); 4920 auto Hi = MIRBuilder.buildSelect( 4921 HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL)); 4922 4923 ResultRegs[0] = Lo.getReg(0); 4924 ResultRegs[1] = Hi.getReg(0); 4925 break; 4926 } 4927 case TargetOpcode::G_LSHR: 4928 case TargetOpcode::G_ASHR: { 4929 // Short: ShAmt < NewBitSize 4930 auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt}); 4931 4932 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt); 4933 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack); 4934 auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr); 4935 4936 // Long: ShAmt >= NewBitSize 4937 MachineInstrBuilder HiL; 4938 if (MI.getOpcode() == TargetOpcode::G_LSHR) { 4939 HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero. 4940 } else { 4941 auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1); 4942 HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part. 4943 } 4944 auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, 4945 {InH, AmtExcess}); // Lo from Hi part. 4946 4947 auto Lo = MIRBuilder.buildSelect( 4948 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL)); 4949 4950 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL); 4951 4952 ResultRegs[0] = Lo.getReg(0); 4953 ResultRegs[1] = Hi.getReg(0); 4954 break; 4955 } 4956 default: 4957 llvm_unreachable("not a shift"); 4958 } 4959 4960 MIRBuilder.buildMerge(DstReg, ResultRegs); 4961 MI.eraseFromParent(); 4962 return Legalized; 4963 } 4964 4965 LegalizerHelper::LegalizeResult 4966 LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, 4967 LLT MoreTy) { 4968 assert(TypeIdx == 0 && "Expecting only Idx 0"); 4969 4970 Observer.changingInstr(MI); 4971 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { 4972 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB(); 4973 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); 4974 moreElementsVectorSrc(MI, MoreTy, I); 4975 } 4976 4977 MachineBasicBlock &MBB = *MI.getParent(); 4978 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI()); 4979 moreElementsVectorDst(MI, MoreTy, 0); 4980 Observer.changedInstr(MI); 4981 return Legalized; 4982 } 4983 4984 LegalizerHelper::LegalizeResult 4985 LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, 4986 LLT MoreTy) { 4987 unsigned Opc = MI.getOpcode(); 4988 switch (Opc) { 4989 case TargetOpcode::G_IMPLICIT_DEF: 4990 case TargetOpcode::G_LOAD: { 4991 if (TypeIdx != 0) 4992 return UnableToLegalize; 4993 Observer.changingInstr(MI); 4994 moreElementsVectorDst(MI, MoreTy, 0); 4995 Observer.changedInstr(MI); 4996 return Legalized; 4997 } 4998 case TargetOpcode::G_STORE: 4999 if (TypeIdx != 0) 5000 return UnableToLegalize; 5001 Observer.changingInstr(MI); 5002 moreElementsVectorSrc(MI, MoreTy, 0); 5003 Observer.changedInstr(MI); 5004 return Legalized; 5005 case TargetOpcode::G_AND: 5006 case TargetOpcode::G_OR: 5007 case TargetOpcode::G_XOR: 5008 case TargetOpcode::G_SMIN: 5009 case TargetOpcode::G_SMAX: 5010 case TargetOpcode::G_UMIN: 5011 case TargetOpcode::G_UMAX: 5012 case TargetOpcode::G_FMINNUM: 5013 case TargetOpcode::G_FMAXNUM: 5014 case TargetOpcode::G_FMINNUM_IEEE: 5015 case TargetOpcode::G_FMAXNUM_IEEE: 5016 case TargetOpcode::G_FMINIMUM: 5017 case TargetOpcode::G_FMAXIMUM: { 5018 Observer.changingInstr(MI); 5019 moreElementsVectorSrc(MI, MoreTy, 1); 5020 moreElementsVectorSrc(MI, MoreTy, 2); 5021 moreElementsVectorDst(MI, MoreTy, 0); 5022 Observer.changedInstr(MI); 5023 return Legalized; 5024 } 5025 case TargetOpcode::G_EXTRACT: 5026 if (TypeIdx != 1) 5027 return UnableToLegalize; 5028 Observer.changingInstr(MI); 5029 moreElementsVectorSrc(MI, MoreTy, 1); 5030 Observer.changedInstr(MI); 5031 return Legalized; 5032 case TargetOpcode::G_INSERT: 5033 case TargetOpcode::G_FREEZE: 5034 if (TypeIdx != 0) 5035 return UnableToLegalize; 5036 Observer.changingInstr(MI); 5037 moreElementsVectorSrc(MI, MoreTy, 1); 5038 moreElementsVectorDst(MI, MoreTy, 0); 5039 Observer.changedInstr(MI); 5040 return Legalized; 5041 case TargetOpcode::G_SELECT: 5042 if (TypeIdx != 0) 5043 return UnableToLegalize; 5044 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) 5045 return UnableToLegalize; 5046 5047 Observer.changingInstr(MI); 5048 moreElementsVectorSrc(MI, MoreTy, 2); 5049 moreElementsVectorSrc(MI, MoreTy, 3); 5050 moreElementsVectorDst(MI, MoreTy, 0); 5051 Observer.changedInstr(MI); 5052 return Legalized; 5053 case TargetOpcode::G_UNMERGE_VALUES: { 5054 if (TypeIdx != 1) 5055 return UnableToLegalize; 5056 5057 LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 5058 int NumDst = MI.getNumOperands() - 1; 5059 moreElementsVectorSrc(MI, MoreTy, NumDst); 5060 5061 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES); 5062 for (int I = 0; I != NumDst; ++I) 5063 MIB.addDef(MI.getOperand(I).getReg()); 5064 5065 int NewNumDst = MoreTy.getSizeInBits() / DstTy.getSizeInBits(); 5066 for (int I = NumDst; I != NewNumDst; ++I) 5067 MIB.addDef(MRI.createGenericVirtualRegister(DstTy)); 5068 5069 MIB.addUse(MI.getOperand(NumDst).getReg()); 5070 MI.eraseFromParent(); 5071 return Legalized; 5072 } 5073 case TargetOpcode::G_PHI: 5074 return moreElementsVectorPhi(MI, TypeIdx, MoreTy); 5075 case TargetOpcode::G_SHUFFLE_VECTOR: 5076 return moreElementsVectorShuffle(MI, TypeIdx, MoreTy); 5077 default: 5078 return UnableToLegalize; 5079 } 5080 } 5081 5082 LegalizerHelper::LegalizeResult 5083 LegalizerHelper::moreElementsVectorShuffle(MachineInstr &MI, 5084 unsigned int TypeIdx, LLT MoreTy) { 5085 if (TypeIdx != 0) 5086 return UnableToLegalize; 5087 5088 Register DstReg = MI.getOperand(0).getReg(); 5089 Register Src1Reg = MI.getOperand(1).getReg(); 5090 Register Src2Reg = MI.getOperand(2).getReg(); 5091 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask(); 5092 LLT DstTy = MRI.getType(DstReg); 5093 LLT Src1Ty = MRI.getType(Src1Reg); 5094 LLT Src2Ty = MRI.getType(Src2Reg); 5095 unsigned NumElts = DstTy.getNumElements(); 5096 unsigned WidenNumElts = MoreTy.getNumElements(); 5097 5098 // Expect a canonicalized shuffle. 5099 if (DstTy != Src1Ty || DstTy != Src2Ty) 5100 return UnableToLegalize; 5101 5102 moreElementsVectorSrc(MI, MoreTy, 1); 5103 moreElementsVectorSrc(MI, MoreTy, 2); 5104 5105 // Adjust mask based on new input vector length. 5106 SmallVector<int, 16> NewMask; 5107 for (unsigned I = 0; I != NumElts; ++I) { 5108 int Idx = Mask[I]; 5109 if (Idx < static_cast<int>(NumElts)) 5110 NewMask.push_back(Idx); 5111 else 5112 NewMask.push_back(Idx - NumElts + WidenNumElts); 5113 } 5114 for (unsigned I = NumElts; I != WidenNumElts; ++I) 5115 NewMask.push_back(-1); 5116 moreElementsVectorDst(MI, MoreTy, 0); 5117 MIRBuilder.setInstrAndDebugLoc(MI); 5118 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(), 5119 MI.getOperand(1).getReg(), 5120 MI.getOperand(2).getReg(), NewMask); 5121 MI.eraseFromParent(); 5122 return Legalized; 5123 } 5124 5125 void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs, 5126 ArrayRef<Register> Src1Regs, 5127 ArrayRef<Register> Src2Regs, 5128 LLT NarrowTy) { 5129 MachineIRBuilder &B = MIRBuilder; 5130 unsigned SrcParts = Src1Regs.size(); 5131 unsigned DstParts = DstRegs.size(); 5132 5133 unsigned DstIdx = 0; // Low bits of the result. 5134 Register FactorSum = 5135 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0); 5136 DstRegs[DstIdx] = FactorSum; 5137 5138 unsigned CarrySumPrevDstIdx; 5139 SmallVector<Register, 4> Factors; 5140 5141 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) { 5142 // Collect low parts of muls for DstIdx. 5143 for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1; 5144 i <= std::min(DstIdx, SrcParts - 1); ++i) { 5145 MachineInstrBuilder Mul = 5146 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]); 5147 Factors.push_back(Mul.getReg(0)); 5148 } 5149 // Collect high parts of muls from previous DstIdx. 5150 for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts; 5151 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) { 5152 MachineInstrBuilder Umulh = 5153 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]); 5154 Factors.push_back(Umulh.getReg(0)); 5155 } 5156 // Add CarrySum from additions calculated for previous DstIdx. 5157 if (DstIdx != 1) { 5158 Factors.push_back(CarrySumPrevDstIdx); 5159 } 5160 5161 Register CarrySum; 5162 // Add all factors and accumulate all carries into CarrySum. 5163 if (DstIdx != DstParts - 1) { 5164 MachineInstrBuilder Uaddo = 5165 B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]); 5166 FactorSum = Uaddo.getReg(0); 5167 CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0); 5168 for (unsigned i = 2; i < Factors.size(); ++i) { 5169 MachineInstrBuilder Uaddo = 5170 B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]); 5171 FactorSum = Uaddo.getReg(0); 5172 MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1)); 5173 CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0); 5174 } 5175 } else { 5176 // Since value for the next index is not calculated, neither is CarrySum. 5177 FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0); 5178 for (unsigned i = 2; i < Factors.size(); ++i) 5179 FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0); 5180 } 5181 5182 CarrySumPrevDstIdx = CarrySum; 5183 DstRegs[DstIdx] = FactorSum; 5184 Factors.clear(); 5185 } 5186 } 5187 5188 LegalizerHelper::LegalizeResult 5189 LegalizerHelper::narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, 5190 LLT NarrowTy) { 5191 if (TypeIdx != 0) 5192 return UnableToLegalize; 5193 5194 Register DstReg = MI.getOperand(0).getReg(); 5195 LLT DstType = MRI.getType(DstReg); 5196 // FIXME: add support for vector types 5197 if (DstType.isVector()) 5198 return UnableToLegalize; 5199 5200 unsigned Opcode = MI.getOpcode(); 5201 unsigned OpO, OpE, OpF; 5202 switch (Opcode) { 5203 case TargetOpcode::G_SADDO: 5204 case TargetOpcode::G_SADDE: 5205 case TargetOpcode::G_UADDO: 5206 case TargetOpcode::G_UADDE: 5207 case TargetOpcode::G_ADD: 5208 OpO = TargetOpcode::G_UADDO; 5209 OpE = TargetOpcode::G_UADDE; 5210 OpF = TargetOpcode::G_UADDE; 5211 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE) 5212 OpF = TargetOpcode::G_SADDE; 5213 break; 5214 case TargetOpcode::G_SSUBO: 5215 case TargetOpcode::G_SSUBE: 5216 case TargetOpcode::G_USUBO: 5217 case TargetOpcode::G_USUBE: 5218 case TargetOpcode::G_SUB: 5219 OpO = TargetOpcode::G_USUBO; 5220 OpE = TargetOpcode::G_USUBE; 5221 OpF = TargetOpcode::G_USUBE; 5222 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE) 5223 OpF = TargetOpcode::G_SSUBE; 5224 break; 5225 default: 5226 llvm_unreachable("Unexpected add/sub opcode!"); 5227 } 5228 5229 // 1 for a plain add/sub, 2 if this is an operation with a carry-out. 5230 unsigned NumDefs = MI.getNumExplicitDefs(); 5231 Register Src1 = MI.getOperand(NumDefs).getReg(); 5232 Register Src2 = MI.getOperand(NumDefs + 1).getReg(); 5233 Register CarryDst, CarryIn; 5234 if (NumDefs == 2) 5235 CarryDst = MI.getOperand(1).getReg(); 5236 if (MI.getNumOperands() == NumDefs + 3) 5237 CarryIn = MI.getOperand(NumDefs + 2).getReg(); 5238 5239 LLT RegTy = MRI.getType(MI.getOperand(0).getReg()); 5240 LLT LeftoverTy, DummyTy; 5241 SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs; 5242 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left); 5243 extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left); 5244 5245 int NarrowParts = Src1Regs.size(); 5246 for (int I = 0, E = Src1Left.size(); I != E; ++I) { 5247 Src1Regs.push_back(Src1Left[I]); 5248 Src2Regs.push_back(Src2Left[I]); 5249 } 5250 DstRegs.reserve(Src1Regs.size()); 5251 5252 for (int i = 0, e = Src1Regs.size(); i != e; ++i) { 5253 Register DstReg = 5254 MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i])); 5255 Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); 5256 // Forward the final carry-out to the destination register 5257 if (i == e - 1 && CarryDst) 5258 CarryOut = CarryDst; 5259 5260 if (!CarryIn) { 5261 MIRBuilder.buildInstr(OpO, {DstReg, CarryOut}, 5262 {Src1Regs[i], Src2Regs[i]}); 5263 } else if (i == e - 1) { 5264 MIRBuilder.buildInstr(OpF, {DstReg, CarryOut}, 5265 {Src1Regs[i], Src2Regs[i], CarryIn}); 5266 } else { 5267 MIRBuilder.buildInstr(OpE, {DstReg, CarryOut}, 5268 {Src1Regs[i], Src2Regs[i], CarryIn}); 5269 } 5270 5271 DstRegs.push_back(DstReg); 5272 CarryIn = CarryOut; 5273 } 5274 insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy, 5275 makeArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy, 5276 makeArrayRef(DstRegs).drop_front(NarrowParts)); 5277 5278 MI.eraseFromParent(); 5279 return Legalized; 5280 } 5281 5282 LegalizerHelper::LegalizeResult 5283 LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) { 5284 Register DstReg = MI.getOperand(0).getReg(); 5285 Register Src1 = MI.getOperand(1).getReg(); 5286 Register Src2 = MI.getOperand(2).getReg(); 5287 5288 LLT Ty = MRI.getType(DstReg); 5289 if (Ty.isVector()) 5290 return UnableToLegalize; 5291 5292 unsigned SrcSize = MRI.getType(Src1).getSizeInBits(); 5293 unsigned DstSize = Ty.getSizeInBits(); 5294 unsigned NarrowSize = NarrowTy.getSizeInBits(); 5295 if (DstSize % NarrowSize != 0 || SrcSize % NarrowSize != 0) 5296 return UnableToLegalize; 5297 5298 unsigned NumDstParts = DstSize / NarrowSize; 5299 unsigned NumSrcParts = SrcSize / NarrowSize; 5300 bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH; 5301 unsigned DstTmpParts = NumDstParts * (IsMulHigh ? 2 : 1); 5302 5303 SmallVector<Register, 2> Src1Parts, Src2Parts; 5304 SmallVector<Register, 2> DstTmpRegs(DstTmpParts); 5305 extractParts(Src1, NarrowTy, NumSrcParts, Src1Parts); 5306 extractParts(Src2, NarrowTy, NumSrcParts, Src2Parts); 5307 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy); 5308 5309 // Take only high half of registers if this is high mul. 5310 ArrayRef<Register> DstRegs( 5311 IsMulHigh ? &DstTmpRegs[DstTmpParts / 2] : &DstTmpRegs[0], NumDstParts); 5312 MIRBuilder.buildMerge(DstReg, DstRegs); 5313 MI.eraseFromParent(); 5314 return Legalized; 5315 } 5316 5317 LegalizerHelper::LegalizeResult 5318 LegalizerHelper::narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, 5319 LLT NarrowTy) { 5320 if (TypeIdx != 0) 5321 return UnableToLegalize; 5322 5323 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI; 5324 5325 Register Src = MI.getOperand(1).getReg(); 5326 LLT SrcTy = MRI.getType(Src); 5327 5328 // If all finite floats fit into the narrowed integer type, we can just swap 5329 // out the result type. This is practically only useful for conversions from 5330 // half to at least 16-bits, so just handle the one case. 5331 if (SrcTy.getScalarType() != LLT::scalar(16) || 5332 NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u)) 5333 return UnableToLegalize; 5334 5335 Observer.changingInstr(MI); 5336 narrowScalarDst(MI, NarrowTy, 0, 5337 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT); 5338 Observer.changedInstr(MI); 5339 return Legalized; 5340 } 5341 5342 LegalizerHelper::LegalizeResult 5343 LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, 5344 LLT NarrowTy) { 5345 if (TypeIdx != 1) 5346 return UnableToLegalize; 5347 5348 uint64_t NarrowSize = NarrowTy.getSizeInBits(); 5349 5350 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 5351 // FIXME: add support for when SizeOp1 isn't an exact multiple of 5352 // NarrowSize. 5353 if (SizeOp1 % NarrowSize != 0) 5354 return UnableToLegalize; 5355 int NumParts = SizeOp1 / NarrowSize; 5356 5357 SmallVector<Register, 2> SrcRegs, DstRegs; 5358 SmallVector<uint64_t, 2> Indexes; 5359 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); 5360 5361 Register OpReg = MI.getOperand(0).getReg(); 5362 uint64_t OpStart = MI.getOperand(2).getImm(); 5363 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); 5364 for (int i = 0; i < NumParts; ++i) { 5365 unsigned SrcStart = i * NarrowSize; 5366 5367 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) { 5368 // No part of the extract uses this subregister, ignore it. 5369 continue; 5370 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) { 5371 // The entire subregister is extracted, forward the value. 5372 DstRegs.push_back(SrcRegs[i]); 5373 continue; 5374 } 5375 5376 // OpSegStart is where this destination segment would start in OpReg if it 5377 // extended infinitely in both directions. 5378 int64_t ExtractOffset; 5379 uint64_t SegSize; 5380 if (OpStart < SrcStart) { 5381 ExtractOffset = 0; 5382 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart); 5383 } else { 5384 ExtractOffset = OpStart - SrcStart; 5385 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize); 5386 } 5387 5388 Register SegReg = SrcRegs[i]; 5389 if (ExtractOffset != 0 || SegSize != NarrowSize) { 5390 // A genuine extract is needed. 5391 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); 5392 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset); 5393 } 5394 5395 DstRegs.push_back(SegReg); 5396 } 5397 5398 Register DstReg = MI.getOperand(0).getReg(); 5399 if (MRI.getType(DstReg).isVector()) 5400 MIRBuilder.buildBuildVector(DstReg, DstRegs); 5401 else if (DstRegs.size() > 1) 5402 MIRBuilder.buildMerge(DstReg, DstRegs); 5403 else 5404 MIRBuilder.buildCopy(DstReg, DstRegs[0]); 5405 MI.eraseFromParent(); 5406 return Legalized; 5407 } 5408 5409 LegalizerHelper::LegalizeResult 5410 LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, 5411 LLT NarrowTy) { 5412 // FIXME: Don't know how to handle secondary types yet. 5413 if (TypeIdx != 0) 5414 return UnableToLegalize; 5415 5416 SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs; 5417 SmallVector<uint64_t, 2> Indexes; 5418 LLT RegTy = MRI.getType(MI.getOperand(0).getReg()); 5419 LLT LeftoverTy; 5420 extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs, 5421 LeftoverRegs); 5422 5423 for (Register Reg : LeftoverRegs) 5424 SrcRegs.push_back(Reg); 5425 5426 uint64_t NarrowSize = NarrowTy.getSizeInBits(); 5427 Register OpReg = MI.getOperand(2).getReg(); 5428 uint64_t OpStart = MI.getOperand(3).getImm(); 5429 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); 5430 for (int I = 0, E = SrcRegs.size(); I != E; ++I) { 5431 unsigned DstStart = I * NarrowSize; 5432 5433 if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) { 5434 // The entire subregister is defined by this insert, forward the new 5435 // value. 5436 DstRegs.push_back(OpReg); 5437 continue; 5438 } 5439 5440 Register SrcReg = SrcRegs[I]; 5441 if (MRI.getType(SrcRegs[I]) == LeftoverTy) { 5442 // The leftover reg is smaller than NarrowTy, so we need to extend it. 5443 SrcReg = MRI.createGenericVirtualRegister(NarrowTy); 5444 MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]); 5445 } 5446 5447 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) { 5448 // No part of the insert affects this subregister, forward the original. 5449 DstRegs.push_back(SrcReg); 5450 continue; 5451 } 5452 5453 // OpSegStart is where this destination segment would start in OpReg if it 5454 // extended infinitely in both directions. 5455 int64_t ExtractOffset, InsertOffset; 5456 uint64_t SegSize; 5457 if (OpStart < DstStart) { 5458 InsertOffset = 0; 5459 ExtractOffset = DstStart - OpStart; 5460 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart); 5461 } else { 5462 InsertOffset = OpStart - DstStart; 5463 ExtractOffset = 0; 5464 SegSize = 5465 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart); 5466 } 5467 5468 Register SegReg = OpReg; 5469 if (ExtractOffset != 0 || SegSize != OpSize) { 5470 // A genuine extract is needed. 5471 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); 5472 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset); 5473 } 5474 5475 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy); 5476 MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset); 5477 DstRegs.push_back(DstReg); 5478 } 5479 5480 uint64_t WideSize = DstRegs.size() * NarrowSize; 5481 Register DstReg = MI.getOperand(0).getReg(); 5482 if (WideSize > RegTy.getSizeInBits()) { 5483 Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize)); 5484 MIRBuilder.buildMerge(MergeReg, DstRegs); 5485 MIRBuilder.buildTrunc(DstReg, MergeReg); 5486 } else 5487 MIRBuilder.buildMerge(DstReg, DstRegs); 5488 5489 MI.eraseFromParent(); 5490 return Legalized; 5491 } 5492 5493 LegalizerHelper::LegalizeResult 5494 LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, 5495 LLT NarrowTy) { 5496 Register DstReg = MI.getOperand(0).getReg(); 5497 LLT DstTy = MRI.getType(DstReg); 5498 5499 assert(MI.getNumOperands() == 3 && TypeIdx == 0); 5500 5501 SmallVector<Register, 4> DstRegs, DstLeftoverRegs; 5502 SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs; 5503 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs; 5504 LLT LeftoverTy; 5505 if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy, 5506 Src0Regs, Src0LeftoverRegs)) 5507 return UnableToLegalize; 5508 5509 LLT Unused; 5510 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused, 5511 Src1Regs, Src1LeftoverRegs)) 5512 llvm_unreachable("inconsistent extractParts result"); 5513 5514 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) { 5515 auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, 5516 {Src0Regs[I], Src1Regs[I]}); 5517 DstRegs.push_back(Inst.getReg(0)); 5518 } 5519 5520 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) { 5521 auto Inst = MIRBuilder.buildInstr( 5522 MI.getOpcode(), 5523 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]}); 5524 DstLeftoverRegs.push_back(Inst.getReg(0)); 5525 } 5526 5527 insertParts(DstReg, DstTy, NarrowTy, DstRegs, 5528 LeftoverTy, DstLeftoverRegs); 5529 5530 MI.eraseFromParent(); 5531 return Legalized; 5532 } 5533 5534 LegalizerHelper::LegalizeResult 5535 LegalizerHelper::narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, 5536 LLT NarrowTy) { 5537 if (TypeIdx != 0) 5538 return UnableToLegalize; 5539 5540 Register DstReg = MI.getOperand(0).getReg(); 5541 Register SrcReg = MI.getOperand(1).getReg(); 5542 5543 LLT DstTy = MRI.getType(DstReg); 5544 if (DstTy.isVector()) 5545 return UnableToLegalize; 5546 5547 SmallVector<Register, 8> Parts; 5548 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg); 5549 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode()); 5550 buildWidenedRemergeToDst(DstReg, LCMTy, Parts); 5551 5552 MI.eraseFromParent(); 5553 return Legalized; 5554 } 5555 5556 LegalizerHelper::LegalizeResult 5557 LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, 5558 LLT NarrowTy) { 5559 if (TypeIdx != 0) 5560 return UnableToLegalize; 5561 5562 Register CondReg = MI.getOperand(1).getReg(); 5563 LLT CondTy = MRI.getType(CondReg); 5564 if (CondTy.isVector()) // TODO: Handle vselect 5565 return UnableToLegalize; 5566 5567 Register DstReg = MI.getOperand(0).getReg(); 5568 LLT DstTy = MRI.getType(DstReg); 5569 5570 SmallVector<Register, 4> DstRegs, DstLeftoverRegs; 5571 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs; 5572 SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs; 5573 LLT LeftoverTy; 5574 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy, 5575 Src1Regs, Src1LeftoverRegs)) 5576 return UnableToLegalize; 5577 5578 LLT Unused; 5579 if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused, 5580 Src2Regs, Src2LeftoverRegs)) 5581 llvm_unreachable("inconsistent extractParts result"); 5582 5583 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) { 5584 auto Select = MIRBuilder.buildSelect(NarrowTy, 5585 CondReg, Src1Regs[I], Src2Regs[I]); 5586 DstRegs.push_back(Select.getReg(0)); 5587 } 5588 5589 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) { 5590 auto Select = MIRBuilder.buildSelect( 5591 LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]); 5592 DstLeftoverRegs.push_back(Select.getReg(0)); 5593 } 5594 5595 insertParts(DstReg, DstTy, NarrowTy, DstRegs, 5596 LeftoverTy, DstLeftoverRegs); 5597 5598 MI.eraseFromParent(); 5599 return Legalized; 5600 } 5601 5602 LegalizerHelper::LegalizeResult 5603 LegalizerHelper::narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, 5604 LLT NarrowTy) { 5605 if (TypeIdx != 1) 5606 return UnableToLegalize; 5607 5608 Register DstReg = MI.getOperand(0).getReg(); 5609 Register SrcReg = MI.getOperand(1).getReg(); 5610 LLT DstTy = MRI.getType(DstReg); 5611 LLT SrcTy = MRI.getType(SrcReg); 5612 unsigned NarrowSize = NarrowTy.getSizeInBits(); 5613 5614 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) { 5615 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF; 5616 5617 MachineIRBuilder &B = MIRBuilder; 5618 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg); 5619 // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi) 5620 auto C_0 = B.buildConstant(NarrowTy, 0); 5621 auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), 5622 UnmergeSrc.getReg(1), C_0); 5623 auto LoCTLZ = IsUndef ? 5624 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) : 5625 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0)); 5626 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize); 5627 auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize); 5628 auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)); 5629 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ); 5630 5631 MI.eraseFromParent(); 5632 return Legalized; 5633 } 5634 5635 return UnableToLegalize; 5636 } 5637 5638 LegalizerHelper::LegalizeResult 5639 LegalizerHelper::narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, 5640 LLT NarrowTy) { 5641 if (TypeIdx != 1) 5642 return UnableToLegalize; 5643 5644 Register DstReg = MI.getOperand(0).getReg(); 5645 Register SrcReg = MI.getOperand(1).getReg(); 5646 LLT DstTy = MRI.getType(DstReg); 5647 LLT SrcTy = MRI.getType(SrcReg); 5648 unsigned NarrowSize = NarrowTy.getSizeInBits(); 5649 5650 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) { 5651 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF; 5652 5653 MachineIRBuilder &B = MIRBuilder; 5654 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg); 5655 // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo) 5656 auto C_0 = B.buildConstant(NarrowTy, 0); 5657 auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), 5658 UnmergeSrc.getReg(0), C_0); 5659 auto HiCTTZ = IsUndef ? 5660 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) : 5661 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1)); 5662 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize); 5663 auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize); 5664 auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)); 5665 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ); 5666 5667 MI.eraseFromParent(); 5668 return Legalized; 5669 } 5670 5671 return UnableToLegalize; 5672 } 5673 5674 LegalizerHelper::LegalizeResult 5675 LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, 5676 LLT NarrowTy) { 5677 if (TypeIdx != 1) 5678 return UnableToLegalize; 5679 5680 Register DstReg = MI.getOperand(0).getReg(); 5681 LLT DstTy = MRI.getType(DstReg); 5682 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 5683 unsigned NarrowSize = NarrowTy.getSizeInBits(); 5684 5685 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) { 5686 auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1)); 5687 5688 auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0)); 5689 auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1)); 5690 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP); 5691 5692 MI.eraseFromParent(); 5693 return Legalized; 5694 } 5695 5696 return UnableToLegalize; 5697 } 5698 5699 LegalizerHelper::LegalizeResult 5700 LegalizerHelper::lowerBitCount(MachineInstr &MI) { 5701 unsigned Opc = MI.getOpcode(); 5702 const auto &TII = MIRBuilder.getTII(); 5703 auto isSupported = [this](const LegalityQuery &Q) { 5704 auto QAction = LI.getAction(Q).Action; 5705 return QAction == Legal || QAction == Libcall || QAction == Custom; 5706 }; 5707 switch (Opc) { 5708 default: 5709 return UnableToLegalize; 5710 case TargetOpcode::G_CTLZ_ZERO_UNDEF: { 5711 // This trivially expands to CTLZ. 5712 Observer.changingInstr(MI); 5713 MI.setDesc(TII.get(TargetOpcode::G_CTLZ)); 5714 Observer.changedInstr(MI); 5715 return Legalized; 5716 } 5717 case TargetOpcode::G_CTLZ: { 5718 Register DstReg = MI.getOperand(0).getReg(); 5719 Register SrcReg = MI.getOperand(1).getReg(); 5720 LLT DstTy = MRI.getType(DstReg); 5721 LLT SrcTy = MRI.getType(SrcReg); 5722 unsigned Len = SrcTy.getSizeInBits(); 5723 5724 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) { 5725 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero. 5726 auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg); 5727 auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0); 5728 auto ICmp = MIRBuilder.buildICmp( 5729 CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc); 5730 auto LenConst = MIRBuilder.buildConstant(DstTy, Len); 5731 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU); 5732 MI.eraseFromParent(); 5733 return Legalized; 5734 } 5735 // for now, we do this: 5736 // NewLen = NextPowerOf2(Len); 5737 // x = x | (x >> 1); 5738 // x = x | (x >> 2); 5739 // ... 5740 // x = x | (x >>16); 5741 // x = x | (x >>32); // for 64-bit input 5742 // Upto NewLen/2 5743 // return Len - popcount(x); 5744 // 5745 // Ref: "Hacker's Delight" by Henry Warren 5746 Register Op = SrcReg; 5747 unsigned NewLen = PowerOf2Ceil(Len); 5748 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) { 5749 auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i); 5750 auto MIBOp = MIRBuilder.buildOr( 5751 SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt)); 5752 Op = MIBOp.getReg(0); 5753 } 5754 auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op); 5755 MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len), 5756 MIBPop); 5757 MI.eraseFromParent(); 5758 return Legalized; 5759 } 5760 case TargetOpcode::G_CTTZ_ZERO_UNDEF: { 5761 // This trivially expands to CTTZ. 5762 Observer.changingInstr(MI); 5763 MI.setDesc(TII.get(TargetOpcode::G_CTTZ)); 5764 Observer.changedInstr(MI); 5765 return Legalized; 5766 } 5767 case TargetOpcode::G_CTTZ: { 5768 Register DstReg = MI.getOperand(0).getReg(); 5769 Register SrcReg = MI.getOperand(1).getReg(); 5770 LLT DstTy = MRI.getType(DstReg); 5771 LLT SrcTy = MRI.getType(SrcReg); 5772 5773 unsigned Len = SrcTy.getSizeInBits(); 5774 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) { 5775 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with 5776 // zero. 5777 auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg); 5778 auto Zero = MIRBuilder.buildConstant(SrcTy, 0); 5779 auto ICmp = MIRBuilder.buildICmp( 5780 CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero); 5781 auto LenConst = MIRBuilder.buildConstant(DstTy, Len); 5782 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU); 5783 MI.eraseFromParent(); 5784 return Legalized; 5785 } 5786 // for now, we use: { return popcount(~x & (x - 1)); } 5787 // unless the target has ctlz but not ctpop, in which case we use: 5788 // { return 32 - nlz(~x & (x-1)); } 5789 // Ref: "Hacker's Delight" by Henry Warren 5790 auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1); 5791 auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1); 5792 auto MIBTmp = MIRBuilder.buildAnd( 5793 SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1)); 5794 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) && 5795 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) { 5796 auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len); 5797 MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen, 5798 MIRBuilder.buildCTLZ(SrcTy, MIBTmp)); 5799 MI.eraseFromParent(); 5800 return Legalized; 5801 } 5802 MI.setDesc(TII.get(TargetOpcode::G_CTPOP)); 5803 MI.getOperand(1).setReg(MIBTmp.getReg(0)); 5804 return Legalized; 5805 } 5806 case TargetOpcode::G_CTPOP: { 5807 Register SrcReg = MI.getOperand(1).getReg(); 5808 LLT Ty = MRI.getType(SrcReg); 5809 unsigned Size = Ty.getSizeInBits(); 5810 MachineIRBuilder &B = MIRBuilder; 5811 5812 // Count set bits in blocks of 2 bits. Default approach would be 5813 // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 } 5814 // We use following formula instead: 5815 // B2Count = val - { (val >> 1) & 0x55555555 } 5816 // since it gives same result in blocks of 2 with one instruction less. 5817 auto C_1 = B.buildConstant(Ty, 1); 5818 auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1); 5819 APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55)); 5820 auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0); 5821 auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0); 5822 auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi); 5823 5824 // In order to get count in blocks of 4 add values from adjacent block of 2. 5825 // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 } 5826 auto C_2 = B.buildConstant(Ty, 2); 5827 auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2); 5828 APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33)); 5829 auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0); 5830 auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0); 5831 auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0); 5832 auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count); 5833 5834 // For count in blocks of 8 bits we don't have to mask high 4 bits before 5835 // addition since count value sits in range {0,...,8} and 4 bits are enough 5836 // to hold such binary values. After addition high 4 bits still hold count 5837 // of set bits in high 4 bit block, set them to zero and get 8 bit result. 5838 // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F 5839 auto C_4 = B.buildConstant(Ty, 4); 5840 auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4); 5841 auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count); 5842 APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F)); 5843 auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0); 5844 auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0); 5845 5846 assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm"); 5847 // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this 5848 // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks. 5849 auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01))); 5850 auto ResTmp = B.buildMul(Ty, B8Count, MulMask); 5851 5852 // Shift count result from 8 high bits to low bits. 5853 auto C_SizeM8 = B.buildConstant(Ty, Size - 8); 5854 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8); 5855 5856 MI.eraseFromParent(); 5857 return Legalized; 5858 } 5859 } 5860 } 5861 5862 // Check that (every element of) Reg is undef or not an exact multiple of BW. 5863 static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, 5864 Register Reg, unsigned BW) { 5865 return matchUnaryPredicate( 5866 MRI, Reg, 5867 [=](const Constant *C) { 5868 // Null constant here means an undef. 5869 const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(C); 5870 return !CI || CI->getValue().urem(BW) != 0; 5871 }, 5872 /*AllowUndefs*/ true); 5873 } 5874 5875 LegalizerHelper::LegalizeResult 5876 LegalizerHelper::lowerFunnelShiftWithInverse(MachineInstr &MI) { 5877 Register Dst = MI.getOperand(0).getReg(); 5878 Register X = MI.getOperand(1).getReg(); 5879 Register Y = MI.getOperand(2).getReg(); 5880 Register Z = MI.getOperand(3).getReg(); 5881 LLT Ty = MRI.getType(Dst); 5882 LLT ShTy = MRI.getType(Z); 5883 5884 unsigned BW = Ty.getScalarSizeInBits(); 5885 5886 if (!isPowerOf2_32(BW)) 5887 return UnableToLegalize; 5888 5889 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL; 5890 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL; 5891 5892 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) { 5893 // fshl X, Y, Z -> fshr X, Y, -Z 5894 // fshr X, Y, Z -> fshl X, Y, -Z 5895 auto Zero = MIRBuilder.buildConstant(ShTy, 0); 5896 Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0); 5897 } else { 5898 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z 5899 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z 5900 auto One = MIRBuilder.buildConstant(ShTy, 1); 5901 if (IsFSHL) { 5902 Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0); 5903 X = MIRBuilder.buildLShr(Ty, X, One).getReg(0); 5904 } else { 5905 X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0); 5906 Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0); 5907 } 5908 5909 Z = MIRBuilder.buildNot(ShTy, Z).getReg(0); 5910 } 5911 5912 MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z}); 5913 MI.eraseFromParent(); 5914 return Legalized; 5915 } 5916 5917 LegalizerHelper::LegalizeResult 5918 LegalizerHelper::lowerFunnelShiftAsShifts(MachineInstr &MI) { 5919 Register Dst = MI.getOperand(0).getReg(); 5920 Register X = MI.getOperand(1).getReg(); 5921 Register Y = MI.getOperand(2).getReg(); 5922 Register Z = MI.getOperand(3).getReg(); 5923 LLT Ty = MRI.getType(Dst); 5924 LLT ShTy = MRI.getType(Z); 5925 5926 const unsigned BW = Ty.getScalarSizeInBits(); 5927 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL; 5928 5929 Register ShX, ShY; 5930 Register ShAmt, InvShAmt; 5931 5932 // FIXME: Emit optimized urem by constant instead of letting it expand later. 5933 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) { 5934 // fshl: X << C | Y >> (BW - C) 5935 // fshr: X << (BW - C) | Y >> C 5936 // where C = Z % BW is not zero 5937 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW); 5938 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0); 5939 InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0); 5940 ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0); 5941 ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0); 5942 } else { 5943 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW)) 5944 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW) 5945 auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1); 5946 if (isPowerOf2_32(BW)) { 5947 // Z % BW -> Z & (BW - 1) 5948 ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0); 5949 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1) 5950 auto NotZ = MIRBuilder.buildNot(ShTy, Z); 5951 InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0); 5952 } else { 5953 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW); 5954 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0); 5955 InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0); 5956 } 5957 5958 auto One = MIRBuilder.buildConstant(ShTy, 1); 5959 if (IsFSHL) { 5960 ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0); 5961 auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One); 5962 ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0); 5963 } else { 5964 auto ShX1 = MIRBuilder.buildShl(Ty, X, One); 5965 ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0); 5966 ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0); 5967 } 5968 } 5969 5970 MIRBuilder.buildOr(Dst, ShX, ShY); 5971 MI.eraseFromParent(); 5972 return Legalized; 5973 } 5974 5975 LegalizerHelper::LegalizeResult 5976 LegalizerHelper::lowerFunnelShift(MachineInstr &MI) { 5977 // These operations approximately do the following (while avoiding undefined 5978 // shifts by BW): 5979 // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) 5980 // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) 5981 Register Dst = MI.getOperand(0).getReg(); 5982 LLT Ty = MRI.getType(Dst); 5983 LLT ShTy = MRI.getType(MI.getOperand(3).getReg()); 5984 5985 bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL; 5986 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL; 5987 5988 // TODO: Use smarter heuristic that accounts for vector legalization. 5989 if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower) 5990 return lowerFunnelShiftAsShifts(MI); 5991 5992 // This only works for powers of 2, fallback to shifts if it fails. 5993 LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI); 5994 if (Result == UnableToLegalize) 5995 return lowerFunnelShiftAsShifts(MI); 5996 return Result; 5997 } 5998 5999 LegalizerHelper::LegalizeResult 6000 LegalizerHelper::lowerRotateWithReverseRotate(MachineInstr &MI) { 6001 Register Dst = MI.getOperand(0).getReg(); 6002 Register Src = MI.getOperand(1).getReg(); 6003 Register Amt = MI.getOperand(2).getReg(); 6004 LLT AmtTy = MRI.getType(Amt); 6005 auto Zero = MIRBuilder.buildConstant(AmtTy, 0); 6006 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL; 6007 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL; 6008 auto Neg = MIRBuilder.buildSub(AmtTy, Zero, Amt); 6009 MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg}); 6010 MI.eraseFromParent(); 6011 return Legalized; 6012 } 6013 6014 LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotate(MachineInstr &MI) { 6015 Register Dst = MI.getOperand(0).getReg(); 6016 Register Src = MI.getOperand(1).getReg(); 6017 Register Amt = MI.getOperand(2).getReg(); 6018 LLT DstTy = MRI.getType(Dst); 6019 LLT SrcTy = MRI.getType(Dst); 6020 LLT AmtTy = MRI.getType(Amt); 6021 6022 unsigned EltSizeInBits = DstTy.getScalarSizeInBits(); 6023 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL; 6024 6025 MIRBuilder.setInstrAndDebugLoc(MI); 6026 6027 // If a rotate in the other direction is supported, use it. 6028 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL; 6029 if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) && 6030 isPowerOf2_32(EltSizeInBits)) 6031 return lowerRotateWithReverseRotate(MI); 6032 6033 auto Zero = MIRBuilder.buildConstant(AmtTy, 0); 6034 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR; 6035 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL; 6036 auto BitWidthMinusOneC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits - 1); 6037 Register ShVal; 6038 Register RevShiftVal; 6039 if (isPowerOf2_32(EltSizeInBits)) { 6040 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1)) 6041 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1)) 6042 auto NegAmt = MIRBuilder.buildSub(AmtTy, Zero, Amt); 6043 auto ShAmt = MIRBuilder.buildAnd(AmtTy, Amt, BitWidthMinusOneC); 6044 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0); 6045 auto RevAmt = MIRBuilder.buildAnd(AmtTy, NegAmt, BitWidthMinusOneC); 6046 RevShiftVal = 6047 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, RevAmt}).getReg(0); 6048 } else { 6049 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w)) 6050 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w)) 6051 auto BitWidthC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits); 6052 auto ShAmt = MIRBuilder.buildURem(AmtTy, Amt, BitWidthC); 6053 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0); 6054 auto RevAmt = MIRBuilder.buildSub(AmtTy, BitWidthMinusOneC, ShAmt); 6055 auto One = MIRBuilder.buildConstant(AmtTy, 1); 6056 auto Inner = MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, One}); 6057 RevShiftVal = 6058 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0); 6059 } 6060 MIRBuilder.buildOr(Dst, ShVal, RevShiftVal); 6061 MI.eraseFromParent(); 6062 return Legalized; 6063 } 6064 6065 // Expand s32 = G_UITOFP s64 using bit operations to an IEEE float 6066 // representation. 6067 LegalizerHelper::LegalizeResult 6068 LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) { 6069 Register Dst = MI.getOperand(0).getReg(); 6070 Register Src = MI.getOperand(1).getReg(); 6071 const LLT S64 = LLT::scalar(64); 6072 const LLT S32 = LLT::scalar(32); 6073 const LLT S1 = LLT::scalar(1); 6074 6075 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32); 6076 6077 // unsigned cul2f(ulong u) { 6078 // uint lz = clz(u); 6079 // uint e = (u != 0) ? 127U + 63U - lz : 0; 6080 // u = (u << lz) & 0x7fffffffffffffffUL; 6081 // ulong t = u & 0xffffffffffUL; 6082 // uint v = (e << 23) | (uint)(u >> 40); 6083 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U); 6084 // return as_float(v + r); 6085 // } 6086 6087 auto Zero32 = MIRBuilder.buildConstant(S32, 0); 6088 auto Zero64 = MIRBuilder.buildConstant(S64, 0); 6089 6090 auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src); 6091 6092 auto K = MIRBuilder.buildConstant(S32, 127U + 63U); 6093 auto Sub = MIRBuilder.buildSub(S32, K, LZ); 6094 6095 auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64); 6096 auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32); 6097 6098 auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1); 6099 auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ); 6100 6101 auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0); 6102 6103 auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL); 6104 auto T = MIRBuilder.buildAnd(S64, U, Mask1); 6105 6106 auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40)); 6107 auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23)); 6108 auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl)); 6109 6110 auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL); 6111 auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C); 6112 auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C); 6113 auto One = MIRBuilder.buildConstant(S32, 1); 6114 6115 auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One); 6116 auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32); 6117 auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0); 6118 MIRBuilder.buildAdd(Dst, V, R); 6119 6120 MI.eraseFromParent(); 6121 return Legalized; 6122 } 6123 6124 LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) { 6125 Register Dst = MI.getOperand(0).getReg(); 6126 Register Src = MI.getOperand(1).getReg(); 6127 LLT DstTy = MRI.getType(Dst); 6128 LLT SrcTy = MRI.getType(Src); 6129 6130 if (SrcTy == LLT::scalar(1)) { 6131 auto True = MIRBuilder.buildFConstant(DstTy, 1.0); 6132 auto False = MIRBuilder.buildFConstant(DstTy, 0.0); 6133 MIRBuilder.buildSelect(Dst, Src, True, False); 6134 MI.eraseFromParent(); 6135 return Legalized; 6136 } 6137 6138 if (SrcTy != LLT::scalar(64)) 6139 return UnableToLegalize; 6140 6141 if (DstTy == LLT::scalar(32)) { 6142 // TODO: SelectionDAG has several alternative expansions to port which may 6143 // be more reasonble depending on the available instructions. If a target 6144 // has sitofp, does not have CTLZ, or can efficiently use f64 as an 6145 // intermediate type, this is probably worse. 6146 return lowerU64ToF32BitOps(MI); 6147 } 6148 6149 return UnableToLegalize; 6150 } 6151 6152 LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) { 6153 Register Dst = MI.getOperand(0).getReg(); 6154 Register Src = MI.getOperand(1).getReg(); 6155 LLT DstTy = MRI.getType(Dst); 6156 LLT SrcTy = MRI.getType(Src); 6157 6158 const LLT S64 = LLT::scalar(64); 6159 const LLT S32 = LLT::scalar(32); 6160 const LLT S1 = LLT::scalar(1); 6161 6162 if (SrcTy == S1) { 6163 auto True = MIRBuilder.buildFConstant(DstTy, -1.0); 6164 auto False = MIRBuilder.buildFConstant(DstTy, 0.0); 6165 MIRBuilder.buildSelect(Dst, Src, True, False); 6166 MI.eraseFromParent(); 6167 return Legalized; 6168 } 6169 6170 if (SrcTy != S64) 6171 return UnableToLegalize; 6172 6173 if (DstTy == S32) { 6174 // signed cl2f(long l) { 6175 // long s = l >> 63; 6176 // float r = cul2f((l + s) ^ s); 6177 // return s ? -r : r; 6178 // } 6179 Register L = Src; 6180 auto SignBit = MIRBuilder.buildConstant(S64, 63); 6181 auto S = MIRBuilder.buildAShr(S64, L, SignBit); 6182 6183 auto LPlusS = MIRBuilder.buildAdd(S64, L, S); 6184 auto Xor = MIRBuilder.buildXor(S64, LPlusS, S); 6185 auto R = MIRBuilder.buildUITOFP(S32, Xor); 6186 6187 auto RNeg = MIRBuilder.buildFNeg(S32, R); 6188 auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S, 6189 MIRBuilder.buildConstant(S64, 0)); 6190 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R); 6191 MI.eraseFromParent(); 6192 return Legalized; 6193 } 6194 6195 return UnableToLegalize; 6196 } 6197 6198 LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) { 6199 Register Dst = MI.getOperand(0).getReg(); 6200 Register Src = MI.getOperand(1).getReg(); 6201 LLT DstTy = MRI.getType(Dst); 6202 LLT SrcTy = MRI.getType(Src); 6203 const LLT S64 = LLT::scalar(64); 6204 const LLT S32 = LLT::scalar(32); 6205 6206 if (SrcTy != S64 && SrcTy != S32) 6207 return UnableToLegalize; 6208 if (DstTy != S32 && DstTy != S64) 6209 return UnableToLegalize; 6210 6211 // FPTOSI gives same result as FPTOUI for positive signed integers. 6212 // FPTOUI needs to deal with fp values that convert to unsigned integers 6213 // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp. 6214 6215 APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits()); 6216 APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle() 6217 : APFloat::IEEEdouble(), 6218 APInt::getNullValue(SrcTy.getSizeInBits())); 6219 TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven); 6220 6221 MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src); 6222 6223 MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP); 6224 // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on 6225 // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1. 6226 MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold); 6227 MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub); 6228 MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt); 6229 MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit); 6230 6231 const LLT S1 = LLT::scalar(1); 6232 6233 MachineInstrBuilder FCMP = 6234 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold); 6235 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res); 6236 6237 MI.eraseFromParent(); 6238 return Legalized; 6239 } 6240 6241 LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) { 6242 Register Dst = MI.getOperand(0).getReg(); 6243 Register Src = MI.getOperand(1).getReg(); 6244 LLT DstTy = MRI.getType(Dst); 6245 LLT SrcTy = MRI.getType(Src); 6246 const LLT S64 = LLT::scalar(64); 6247 const LLT S32 = LLT::scalar(32); 6248 6249 // FIXME: Only f32 to i64 conversions are supported. 6250 if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64) 6251 return UnableToLegalize; 6252 6253 // Expand f32 -> i64 conversion 6254 // This algorithm comes from compiler-rt's implementation of fixsfdi: 6255 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c 6256 6257 unsigned SrcEltBits = SrcTy.getScalarSizeInBits(); 6258 6259 auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000); 6260 auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23); 6261 6262 auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask); 6263 auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit); 6264 6265 auto SignMask = MIRBuilder.buildConstant(SrcTy, 6266 APInt::getSignMask(SrcEltBits)); 6267 auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask); 6268 auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1); 6269 auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit); 6270 Sign = MIRBuilder.buildSExt(DstTy, Sign); 6271 6272 auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF); 6273 auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask); 6274 auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000); 6275 6276 auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K); 6277 R = MIRBuilder.buildZExt(DstTy, R); 6278 6279 auto Bias = MIRBuilder.buildConstant(SrcTy, 127); 6280 auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias); 6281 auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit); 6282 auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent); 6283 6284 auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent); 6285 auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub); 6286 6287 const LLT S1 = LLT::scalar(1); 6288 auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, 6289 S1, Exponent, ExponentLoBit); 6290 6291 R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl); 6292 6293 auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign); 6294 auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign); 6295 6296 auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0); 6297 6298 auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, 6299 S1, Exponent, ZeroSrcTy); 6300 6301 auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0); 6302 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret); 6303 6304 MI.eraseFromParent(); 6305 return Legalized; 6306 } 6307 6308 // f64 -> f16 conversion using round-to-nearest-even rounding mode. 6309 LegalizerHelper::LegalizeResult 6310 LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) { 6311 Register Dst = MI.getOperand(0).getReg(); 6312 Register Src = MI.getOperand(1).getReg(); 6313 6314 if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly. 6315 return UnableToLegalize; 6316 6317 const unsigned ExpMask = 0x7ff; 6318 const unsigned ExpBiasf64 = 1023; 6319 const unsigned ExpBiasf16 = 15; 6320 const LLT S32 = LLT::scalar(32); 6321 const LLT S1 = LLT::scalar(1); 6322 6323 auto Unmerge = MIRBuilder.buildUnmerge(S32, Src); 6324 Register U = Unmerge.getReg(0); 6325 Register UH = Unmerge.getReg(1); 6326 6327 auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20)); 6328 E = MIRBuilder.buildAnd(S32, E, MIRBuilder.buildConstant(S32, ExpMask)); 6329 6330 // Subtract the fp64 exponent bias (1023) to get the real exponent and 6331 // add the f16 bias (15) to get the biased exponent for the f16 format. 6332 E = MIRBuilder.buildAdd( 6333 S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16)); 6334 6335 auto M = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 8)); 6336 M = MIRBuilder.buildAnd(S32, M, MIRBuilder.buildConstant(S32, 0xffe)); 6337 6338 auto MaskedSig = MIRBuilder.buildAnd(S32, UH, 6339 MIRBuilder.buildConstant(S32, 0x1ff)); 6340 MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U); 6341 6342 auto Zero = MIRBuilder.buildConstant(S32, 0); 6343 auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero); 6344 auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0); 6345 M = MIRBuilder.buildOr(S32, M, Lo40Set); 6346 6347 // (M != 0 ? 0x0200 : 0) | 0x7c00; 6348 auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200); 6349 auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero); 6350 auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero); 6351 6352 auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00); 6353 auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00); 6354 6355 // N = M | (E << 12); 6356 auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12)); 6357 auto N = MIRBuilder.buildOr(S32, M, EShl12); 6358 6359 // B = clamp(1-E, 0, 13); 6360 auto One = MIRBuilder.buildConstant(S32, 1); 6361 auto OneSubExp = MIRBuilder.buildSub(S32, One, E); 6362 auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero); 6363 B = MIRBuilder.buildSMin(S32, B, MIRBuilder.buildConstant(S32, 13)); 6364 6365 auto SigSetHigh = MIRBuilder.buildOr(S32, M, 6366 MIRBuilder.buildConstant(S32, 0x1000)); 6367 6368 auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B); 6369 auto D0 = MIRBuilder.buildShl(S32, D, B); 6370 6371 auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, 6372 D0, SigSetHigh); 6373 auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh); 6374 D = MIRBuilder.buildOr(S32, D, D1); 6375 6376 auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One); 6377 auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N); 6378 6379 auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7)); 6380 V = MIRBuilder.buildLShr(S32, V, MIRBuilder.buildConstant(S32, 2)); 6381 6382 auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3, 6383 MIRBuilder.buildConstant(S32, 3)); 6384 auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3); 6385 6386 auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3, 6387 MIRBuilder.buildConstant(S32, 5)); 6388 auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5); 6389 6390 V1 = MIRBuilder.buildOr(S32, V0, V1); 6391 V = MIRBuilder.buildAdd(S32, V, V1); 6392 6393 auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, 6394 E, MIRBuilder.buildConstant(S32, 30)); 6395 V = MIRBuilder.buildSelect(S32, CmpEGt30, 6396 MIRBuilder.buildConstant(S32, 0x7c00), V); 6397 6398 auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, 6399 E, MIRBuilder.buildConstant(S32, 1039)); 6400 V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V); 6401 6402 // Extract the sign bit. 6403 auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16)); 6404 Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000)); 6405 6406 // Insert the sign bit 6407 V = MIRBuilder.buildOr(S32, Sign, V); 6408 6409 MIRBuilder.buildTrunc(Dst, V); 6410 MI.eraseFromParent(); 6411 return Legalized; 6412 } 6413 6414 LegalizerHelper::LegalizeResult 6415 LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) { 6416 Register Dst = MI.getOperand(0).getReg(); 6417 Register Src = MI.getOperand(1).getReg(); 6418 6419 LLT DstTy = MRI.getType(Dst); 6420 LLT SrcTy = MRI.getType(Src); 6421 const LLT S64 = LLT::scalar(64); 6422 const LLT S16 = LLT::scalar(16); 6423 6424 if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64) 6425 return lowerFPTRUNC_F64_TO_F16(MI); 6426 6427 return UnableToLegalize; 6428 } 6429 6430 // TODO: If RHS is a constant SelectionDAGBuilder expands this into a 6431 // multiplication tree. 6432 LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) { 6433 Register Dst = MI.getOperand(0).getReg(); 6434 Register Src0 = MI.getOperand(1).getReg(); 6435 Register Src1 = MI.getOperand(2).getReg(); 6436 LLT Ty = MRI.getType(Dst); 6437 6438 auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1); 6439 MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags()); 6440 MI.eraseFromParent(); 6441 return Legalized; 6442 } 6443 6444 static CmpInst::Predicate minMaxToCompare(unsigned Opc) { 6445 switch (Opc) { 6446 case TargetOpcode::G_SMIN: 6447 return CmpInst::ICMP_SLT; 6448 case TargetOpcode::G_SMAX: 6449 return CmpInst::ICMP_SGT; 6450 case TargetOpcode::G_UMIN: 6451 return CmpInst::ICMP_ULT; 6452 case TargetOpcode::G_UMAX: 6453 return CmpInst::ICMP_UGT; 6454 default: 6455 llvm_unreachable("not in integer min/max"); 6456 } 6457 } 6458 6459 LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) { 6460 Register Dst = MI.getOperand(0).getReg(); 6461 Register Src0 = MI.getOperand(1).getReg(); 6462 Register Src1 = MI.getOperand(2).getReg(); 6463 6464 const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode()); 6465 LLT CmpType = MRI.getType(Dst).changeElementSize(1); 6466 6467 auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1); 6468 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1); 6469 6470 MI.eraseFromParent(); 6471 return Legalized; 6472 } 6473 6474 LegalizerHelper::LegalizeResult 6475 LegalizerHelper::lowerFCopySign(MachineInstr &MI) { 6476 Register Dst = MI.getOperand(0).getReg(); 6477 Register Src0 = MI.getOperand(1).getReg(); 6478 Register Src1 = MI.getOperand(2).getReg(); 6479 6480 const LLT Src0Ty = MRI.getType(Src0); 6481 const LLT Src1Ty = MRI.getType(Src1); 6482 6483 const int Src0Size = Src0Ty.getScalarSizeInBits(); 6484 const int Src1Size = Src1Ty.getScalarSizeInBits(); 6485 6486 auto SignBitMask = MIRBuilder.buildConstant( 6487 Src0Ty, APInt::getSignMask(Src0Size)); 6488 6489 auto NotSignBitMask = MIRBuilder.buildConstant( 6490 Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1)); 6491 6492 Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0); 6493 Register And1; 6494 if (Src0Ty == Src1Ty) { 6495 And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0); 6496 } else if (Src0Size > Src1Size) { 6497 auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size); 6498 auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1); 6499 auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt); 6500 And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0); 6501 } else { 6502 auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size); 6503 auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt); 6504 auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift); 6505 And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0); 6506 } 6507 6508 // Be careful about setting nsz/nnan/ninf on every instruction, since the 6509 // constants are a nan and -0.0, but the final result should preserve 6510 // everything. 6511 unsigned Flags = MI.getFlags(); 6512 MIRBuilder.buildOr(Dst, And0, And1, Flags); 6513 6514 MI.eraseFromParent(); 6515 return Legalized; 6516 } 6517 6518 LegalizerHelper::LegalizeResult 6519 LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) { 6520 unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ? 6521 TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE; 6522 6523 Register Dst = MI.getOperand(0).getReg(); 6524 Register Src0 = MI.getOperand(1).getReg(); 6525 Register Src1 = MI.getOperand(2).getReg(); 6526 LLT Ty = MRI.getType(Dst); 6527 6528 if (!MI.getFlag(MachineInstr::FmNoNans)) { 6529 // Insert canonicalizes if it's possible we need to quiet to get correct 6530 // sNaN behavior. 6531 6532 // Note this must be done here, and not as an optimization combine in the 6533 // absence of a dedicate quiet-snan instruction as we're using an 6534 // omni-purpose G_FCANONICALIZE. 6535 if (!isKnownNeverSNaN(Src0, MRI)) 6536 Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0); 6537 6538 if (!isKnownNeverSNaN(Src1, MRI)) 6539 Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0); 6540 } 6541 6542 // If there are no nans, it's safe to simply replace this with the non-IEEE 6543 // version. 6544 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags()); 6545 MI.eraseFromParent(); 6546 return Legalized; 6547 } 6548 6549 LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) { 6550 // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c 6551 Register DstReg = MI.getOperand(0).getReg(); 6552 LLT Ty = MRI.getType(DstReg); 6553 unsigned Flags = MI.getFlags(); 6554 6555 auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2), 6556 Flags); 6557 MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags); 6558 MI.eraseFromParent(); 6559 return Legalized; 6560 } 6561 6562 LegalizerHelper::LegalizeResult 6563 LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) { 6564 Register DstReg = MI.getOperand(0).getReg(); 6565 Register X = MI.getOperand(1).getReg(); 6566 const unsigned Flags = MI.getFlags(); 6567 const LLT Ty = MRI.getType(DstReg); 6568 const LLT CondTy = Ty.changeElementSize(1); 6569 6570 // round(x) => 6571 // t = trunc(x); 6572 // d = fabs(x - t); 6573 // o = copysign(1.0f, x); 6574 // return t + (d >= 0.5 ? o : 0.0); 6575 6576 auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags); 6577 6578 auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags); 6579 auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags); 6580 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0); 6581 auto One = MIRBuilder.buildFConstant(Ty, 1.0); 6582 auto Half = MIRBuilder.buildFConstant(Ty, 0.5); 6583 auto SignOne = MIRBuilder.buildFCopysign(Ty, One, X); 6584 6585 auto Cmp = MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, 6586 Flags); 6587 auto Sel = MIRBuilder.buildSelect(Ty, Cmp, SignOne, Zero, Flags); 6588 6589 MIRBuilder.buildFAdd(DstReg, T, Sel, Flags); 6590 6591 MI.eraseFromParent(); 6592 return Legalized; 6593 } 6594 6595 LegalizerHelper::LegalizeResult 6596 LegalizerHelper::lowerFFloor(MachineInstr &MI) { 6597 Register DstReg = MI.getOperand(0).getReg(); 6598 Register SrcReg = MI.getOperand(1).getReg(); 6599 unsigned Flags = MI.getFlags(); 6600 LLT Ty = MRI.getType(DstReg); 6601 const LLT CondTy = Ty.changeElementSize(1); 6602 6603 // result = trunc(src); 6604 // if (src < 0.0 && src != result) 6605 // result += -1.0. 6606 6607 auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags); 6608 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0); 6609 6610 auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy, 6611 SrcReg, Zero, Flags); 6612 auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy, 6613 SrcReg, Trunc, Flags); 6614 auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc); 6615 auto AddVal = MIRBuilder.buildSITOFP(Ty, And); 6616 6617 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags); 6618 MI.eraseFromParent(); 6619 return Legalized; 6620 } 6621 6622 LegalizerHelper::LegalizeResult 6623 LegalizerHelper::lowerMergeValues(MachineInstr &MI) { 6624 const unsigned NumOps = MI.getNumOperands(); 6625 Register DstReg = MI.getOperand(0).getReg(); 6626 Register Src0Reg = MI.getOperand(1).getReg(); 6627 LLT DstTy = MRI.getType(DstReg); 6628 LLT SrcTy = MRI.getType(Src0Reg); 6629 unsigned PartSize = SrcTy.getSizeInBits(); 6630 6631 LLT WideTy = LLT::scalar(DstTy.getSizeInBits()); 6632 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0); 6633 6634 for (unsigned I = 2; I != NumOps; ++I) { 6635 const unsigned Offset = (I - 1) * PartSize; 6636 6637 Register SrcReg = MI.getOperand(I).getReg(); 6638 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg); 6639 6640 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg : 6641 MRI.createGenericVirtualRegister(WideTy); 6642 6643 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset); 6644 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt); 6645 MIRBuilder.buildOr(NextResult, ResultReg, Shl); 6646 ResultReg = NextResult; 6647 } 6648 6649 if (DstTy.isPointer()) { 6650 if (MIRBuilder.getDataLayout().isNonIntegralAddressSpace( 6651 DstTy.getAddressSpace())) { 6652 LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n"); 6653 return UnableToLegalize; 6654 } 6655 6656 MIRBuilder.buildIntToPtr(DstReg, ResultReg); 6657 } 6658 6659 MI.eraseFromParent(); 6660 return Legalized; 6661 } 6662 6663 LegalizerHelper::LegalizeResult 6664 LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) { 6665 const unsigned NumDst = MI.getNumOperands() - 1; 6666 Register SrcReg = MI.getOperand(NumDst).getReg(); 6667 Register Dst0Reg = MI.getOperand(0).getReg(); 6668 LLT DstTy = MRI.getType(Dst0Reg); 6669 if (DstTy.isPointer()) 6670 return UnableToLegalize; // TODO 6671 6672 SrcReg = coerceToScalar(SrcReg); 6673 if (!SrcReg) 6674 return UnableToLegalize; 6675 6676 // Expand scalarizing unmerge as bitcast to integer and shift. 6677 LLT IntTy = MRI.getType(SrcReg); 6678 6679 MIRBuilder.buildTrunc(Dst0Reg, SrcReg); 6680 6681 const unsigned DstSize = DstTy.getSizeInBits(); 6682 unsigned Offset = DstSize; 6683 for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) { 6684 auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset); 6685 auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt); 6686 MIRBuilder.buildTrunc(MI.getOperand(I), Shift); 6687 } 6688 6689 MI.eraseFromParent(); 6690 return Legalized; 6691 } 6692 6693 /// Lower a vector extract or insert by writing the vector to a stack temporary 6694 /// and reloading the element or vector. 6695 /// 6696 /// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx 6697 /// => 6698 /// %stack_temp = G_FRAME_INDEX 6699 /// G_STORE %vec, %stack_temp 6700 /// %idx = clamp(%idx, %vec.getNumElements()) 6701 /// %element_ptr = G_PTR_ADD %stack_temp, %idx 6702 /// %dst = G_LOAD %element_ptr 6703 LegalizerHelper::LegalizeResult 6704 LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) { 6705 Register DstReg = MI.getOperand(0).getReg(); 6706 Register SrcVec = MI.getOperand(1).getReg(); 6707 Register InsertVal; 6708 if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT) 6709 InsertVal = MI.getOperand(2).getReg(); 6710 6711 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg(); 6712 6713 LLT VecTy = MRI.getType(SrcVec); 6714 LLT EltTy = VecTy.getElementType(); 6715 if (!EltTy.isByteSized()) { // Not implemented. 6716 LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n"); 6717 return UnableToLegalize; 6718 } 6719 6720 unsigned EltBytes = EltTy.getSizeInBytes(); 6721 Align VecAlign = getStackTemporaryAlignment(VecTy); 6722 Align EltAlign; 6723 6724 MachinePointerInfo PtrInfo; 6725 auto StackTemp = createStackTemporary(TypeSize::Fixed(VecTy.getSizeInBytes()), 6726 VecAlign, PtrInfo); 6727 MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign); 6728 6729 // Get the pointer to the element, and be sure not to hit undefined behavior 6730 // if the index is out of bounds. 6731 Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx); 6732 6733 int64_t IdxVal; 6734 if (mi_match(Idx, MRI, m_ICst(IdxVal))) { 6735 int64_t Offset = IdxVal * EltBytes; 6736 PtrInfo = PtrInfo.getWithOffset(Offset); 6737 EltAlign = commonAlignment(VecAlign, Offset); 6738 } else { 6739 // We lose information with a variable offset. 6740 EltAlign = getStackTemporaryAlignment(EltTy); 6741 PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace()); 6742 } 6743 6744 if (InsertVal) { 6745 // Write the inserted element 6746 MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign); 6747 6748 // Reload the whole vector. 6749 MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign); 6750 } else { 6751 MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign); 6752 } 6753 6754 MI.eraseFromParent(); 6755 return Legalized; 6756 } 6757 6758 LegalizerHelper::LegalizeResult 6759 LegalizerHelper::lowerShuffleVector(MachineInstr &MI) { 6760 Register DstReg = MI.getOperand(0).getReg(); 6761 Register Src0Reg = MI.getOperand(1).getReg(); 6762 Register Src1Reg = MI.getOperand(2).getReg(); 6763 LLT Src0Ty = MRI.getType(Src0Reg); 6764 LLT DstTy = MRI.getType(DstReg); 6765 LLT IdxTy = LLT::scalar(32); 6766 6767 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask(); 6768 6769 if (DstTy.isScalar()) { 6770 if (Src0Ty.isVector()) 6771 return UnableToLegalize; 6772 6773 // This is just a SELECT. 6774 assert(Mask.size() == 1 && "Expected a single mask element"); 6775 Register Val; 6776 if (Mask[0] < 0 || Mask[0] > 1) 6777 Val = MIRBuilder.buildUndef(DstTy).getReg(0); 6778 else 6779 Val = Mask[0] == 0 ? Src0Reg : Src1Reg; 6780 MIRBuilder.buildCopy(DstReg, Val); 6781 MI.eraseFromParent(); 6782 return Legalized; 6783 } 6784 6785 Register Undef; 6786 SmallVector<Register, 32> BuildVec; 6787 LLT EltTy = DstTy.getElementType(); 6788 6789 for (int Idx : Mask) { 6790 if (Idx < 0) { 6791 if (!Undef.isValid()) 6792 Undef = MIRBuilder.buildUndef(EltTy).getReg(0); 6793 BuildVec.push_back(Undef); 6794 continue; 6795 } 6796 6797 if (Src0Ty.isScalar()) { 6798 BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg); 6799 } else { 6800 int NumElts = Src0Ty.getNumElements(); 6801 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg; 6802 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts; 6803 auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx); 6804 auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK); 6805 BuildVec.push_back(Extract.getReg(0)); 6806 } 6807 } 6808 6809 MIRBuilder.buildBuildVector(DstReg, BuildVec); 6810 MI.eraseFromParent(); 6811 return Legalized; 6812 } 6813 6814 LegalizerHelper::LegalizeResult 6815 LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) { 6816 const auto &MF = *MI.getMF(); 6817 const auto &TFI = *MF.getSubtarget().getFrameLowering(); 6818 if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp) 6819 return UnableToLegalize; 6820 6821 Register Dst = MI.getOperand(0).getReg(); 6822 Register AllocSize = MI.getOperand(1).getReg(); 6823 Align Alignment = assumeAligned(MI.getOperand(2).getImm()); 6824 6825 LLT PtrTy = MRI.getType(Dst); 6826 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits()); 6827 6828 Register SPReg = TLI.getStackPointerRegisterToSaveRestore(); 6829 auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg); 6830 SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp); 6831 6832 // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't 6833 // have to generate an extra instruction to negate the alloc and then use 6834 // G_PTR_ADD to add the negative offset. 6835 auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize); 6836 if (Alignment > Align(1)) { 6837 APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true); 6838 AlignMask.negate(); 6839 auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask); 6840 Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst); 6841 } 6842 6843 SPTmp = MIRBuilder.buildCast(PtrTy, Alloc); 6844 MIRBuilder.buildCopy(SPReg, SPTmp); 6845 MIRBuilder.buildCopy(Dst, SPTmp); 6846 6847 MI.eraseFromParent(); 6848 return Legalized; 6849 } 6850 6851 LegalizerHelper::LegalizeResult 6852 LegalizerHelper::lowerExtract(MachineInstr &MI) { 6853 Register Dst = MI.getOperand(0).getReg(); 6854 Register Src = MI.getOperand(1).getReg(); 6855 unsigned Offset = MI.getOperand(2).getImm(); 6856 6857 LLT DstTy = MRI.getType(Dst); 6858 LLT SrcTy = MRI.getType(Src); 6859 6860 if (DstTy.isScalar() && 6861 (SrcTy.isScalar() || 6862 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) { 6863 LLT SrcIntTy = SrcTy; 6864 if (!SrcTy.isScalar()) { 6865 SrcIntTy = LLT::scalar(SrcTy.getSizeInBits()); 6866 Src = MIRBuilder.buildBitcast(SrcIntTy, Src).getReg(0); 6867 } 6868 6869 if (Offset == 0) 6870 MIRBuilder.buildTrunc(Dst, Src); 6871 else { 6872 auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset); 6873 auto Shr = MIRBuilder.buildLShr(SrcIntTy, Src, ShiftAmt); 6874 MIRBuilder.buildTrunc(Dst, Shr); 6875 } 6876 6877 MI.eraseFromParent(); 6878 return Legalized; 6879 } 6880 6881 return UnableToLegalize; 6882 } 6883 6884 LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) { 6885 Register Dst = MI.getOperand(0).getReg(); 6886 Register Src = MI.getOperand(1).getReg(); 6887 Register InsertSrc = MI.getOperand(2).getReg(); 6888 uint64_t Offset = MI.getOperand(3).getImm(); 6889 6890 LLT DstTy = MRI.getType(Src); 6891 LLT InsertTy = MRI.getType(InsertSrc); 6892 6893 if (InsertTy.isVector() || 6894 (DstTy.isVector() && DstTy.getElementType() != InsertTy)) 6895 return UnableToLegalize; 6896 6897 const DataLayout &DL = MIRBuilder.getDataLayout(); 6898 if ((DstTy.isPointer() && 6899 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) || 6900 (InsertTy.isPointer() && 6901 DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) { 6902 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n"); 6903 return UnableToLegalize; 6904 } 6905 6906 LLT IntDstTy = DstTy; 6907 6908 if (!DstTy.isScalar()) { 6909 IntDstTy = LLT::scalar(DstTy.getSizeInBits()); 6910 Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0); 6911 } 6912 6913 if (!InsertTy.isScalar()) { 6914 const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits()); 6915 InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0); 6916 } 6917 6918 Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0); 6919 if (Offset != 0) { 6920 auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset); 6921 ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0); 6922 } 6923 6924 APInt MaskVal = APInt::getBitsSetWithWrap( 6925 DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset); 6926 6927 auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal); 6928 auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask); 6929 auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc); 6930 6931 MIRBuilder.buildCast(Dst, Or); 6932 MI.eraseFromParent(); 6933 return Legalized; 6934 } 6935 6936 LegalizerHelper::LegalizeResult 6937 LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) { 6938 Register Dst0 = MI.getOperand(0).getReg(); 6939 Register Dst1 = MI.getOperand(1).getReg(); 6940 Register LHS = MI.getOperand(2).getReg(); 6941 Register RHS = MI.getOperand(3).getReg(); 6942 const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO; 6943 6944 LLT Ty = MRI.getType(Dst0); 6945 LLT BoolTy = MRI.getType(Dst1); 6946 6947 if (IsAdd) 6948 MIRBuilder.buildAdd(Dst0, LHS, RHS); 6949 else 6950 MIRBuilder.buildSub(Dst0, LHS, RHS); 6951 6952 // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow. 6953 6954 auto Zero = MIRBuilder.buildConstant(Ty, 0); 6955 6956 // For an addition, the result should be less than one of the operands (LHS) 6957 // if and only if the other operand (RHS) is negative, otherwise there will 6958 // be overflow. 6959 // For a subtraction, the result should be less than one of the operands 6960 // (LHS) if and only if the other operand (RHS) is (non-zero) positive, 6961 // otherwise there will be overflow. 6962 auto ResultLowerThanLHS = 6963 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, Dst0, LHS); 6964 auto ConditionRHS = MIRBuilder.buildICmp( 6965 IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero); 6966 6967 MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS); 6968 MI.eraseFromParent(); 6969 return Legalized; 6970 } 6971 6972 LegalizerHelper::LegalizeResult 6973 LegalizerHelper::lowerAddSubSatToMinMax(MachineInstr &MI) { 6974 Register Res = MI.getOperand(0).getReg(); 6975 Register LHS = MI.getOperand(1).getReg(); 6976 Register RHS = MI.getOperand(2).getReg(); 6977 LLT Ty = MRI.getType(Res); 6978 bool IsSigned; 6979 bool IsAdd; 6980 unsigned BaseOp; 6981 switch (MI.getOpcode()) { 6982 default: 6983 llvm_unreachable("unexpected addsat/subsat opcode"); 6984 case TargetOpcode::G_UADDSAT: 6985 IsSigned = false; 6986 IsAdd = true; 6987 BaseOp = TargetOpcode::G_ADD; 6988 break; 6989 case TargetOpcode::G_SADDSAT: 6990 IsSigned = true; 6991 IsAdd = true; 6992 BaseOp = TargetOpcode::G_ADD; 6993 break; 6994 case TargetOpcode::G_USUBSAT: 6995 IsSigned = false; 6996 IsAdd = false; 6997 BaseOp = TargetOpcode::G_SUB; 6998 break; 6999 case TargetOpcode::G_SSUBSAT: 7000 IsSigned = true; 7001 IsAdd = false; 7002 BaseOp = TargetOpcode::G_SUB; 7003 break; 7004 } 7005 7006 if (IsSigned) { 7007 // sadd.sat(a, b) -> 7008 // hi = 0x7fffffff - smax(a, 0) 7009 // lo = 0x80000000 - smin(a, 0) 7010 // a + smin(smax(lo, b), hi) 7011 // ssub.sat(a, b) -> 7012 // lo = smax(a, -1) - 0x7fffffff 7013 // hi = smin(a, -1) - 0x80000000 7014 // a - smin(smax(lo, b), hi) 7015 // TODO: AMDGPU can use a "median of 3" instruction here: 7016 // a +/- med3(lo, b, hi) 7017 uint64_t NumBits = Ty.getScalarSizeInBits(); 7018 auto MaxVal = 7019 MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits)); 7020 auto MinVal = 7021 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits)); 7022 MachineInstrBuilder Hi, Lo; 7023 if (IsAdd) { 7024 auto Zero = MIRBuilder.buildConstant(Ty, 0); 7025 Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero)); 7026 Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero)); 7027 } else { 7028 auto NegOne = MIRBuilder.buildConstant(Ty, -1); 7029 Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne), 7030 MaxVal); 7031 Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne), 7032 MinVal); 7033 } 7034 auto RHSClamped = 7035 MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi); 7036 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped}); 7037 } else { 7038 // uadd.sat(a, b) -> a + umin(~a, b) 7039 // usub.sat(a, b) -> a - umin(a, b) 7040 Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS; 7041 auto Min = MIRBuilder.buildUMin(Ty, Not, RHS); 7042 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min}); 7043 } 7044 7045 MI.eraseFromParent(); 7046 return Legalized; 7047 } 7048 7049 LegalizerHelper::LegalizeResult 7050 LegalizerHelper::lowerAddSubSatToAddoSubo(MachineInstr &MI) { 7051 Register Res = MI.getOperand(0).getReg(); 7052 Register LHS = MI.getOperand(1).getReg(); 7053 Register RHS = MI.getOperand(2).getReg(); 7054 LLT Ty = MRI.getType(Res); 7055 LLT BoolTy = Ty.changeElementSize(1); 7056 bool IsSigned; 7057 bool IsAdd; 7058 unsigned OverflowOp; 7059 switch (MI.getOpcode()) { 7060 default: 7061 llvm_unreachable("unexpected addsat/subsat opcode"); 7062 case TargetOpcode::G_UADDSAT: 7063 IsSigned = false; 7064 IsAdd = true; 7065 OverflowOp = TargetOpcode::G_UADDO; 7066 break; 7067 case TargetOpcode::G_SADDSAT: 7068 IsSigned = true; 7069 IsAdd = true; 7070 OverflowOp = TargetOpcode::G_SADDO; 7071 break; 7072 case TargetOpcode::G_USUBSAT: 7073 IsSigned = false; 7074 IsAdd = false; 7075 OverflowOp = TargetOpcode::G_USUBO; 7076 break; 7077 case TargetOpcode::G_SSUBSAT: 7078 IsSigned = true; 7079 IsAdd = false; 7080 OverflowOp = TargetOpcode::G_SSUBO; 7081 break; 7082 } 7083 7084 auto OverflowRes = 7085 MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS}); 7086 Register Tmp = OverflowRes.getReg(0); 7087 Register Ov = OverflowRes.getReg(1); 7088 MachineInstrBuilder Clamp; 7089 if (IsSigned) { 7090 // sadd.sat(a, b) -> 7091 // {tmp, ov} = saddo(a, b) 7092 // ov ? (tmp >>s 31) + 0x80000000 : r 7093 // ssub.sat(a, b) -> 7094 // {tmp, ov} = ssubo(a, b) 7095 // ov ? (tmp >>s 31) + 0x80000000 : r 7096 uint64_t NumBits = Ty.getScalarSizeInBits(); 7097 auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1); 7098 auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount); 7099 auto MinVal = 7100 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits)); 7101 Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal); 7102 } else { 7103 // uadd.sat(a, b) -> 7104 // {tmp, ov} = uaddo(a, b) 7105 // ov ? 0xffffffff : tmp 7106 // usub.sat(a, b) -> 7107 // {tmp, ov} = usubo(a, b) 7108 // ov ? 0 : tmp 7109 Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0); 7110 } 7111 MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp); 7112 7113 MI.eraseFromParent(); 7114 return Legalized; 7115 } 7116 7117 LegalizerHelper::LegalizeResult 7118 LegalizerHelper::lowerShlSat(MachineInstr &MI) { 7119 assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT || 7120 MI.getOpcode() == TargetOpcode::G_USHLSAT) && 7121 "Expected shlsat opcode!"); 7122 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT; 7123 Register Res = MI.getOperand(0).getReg(); 7124 Register LHS = MI.getOperand(1).getReg(); 7125 Register RHS = MI.getOperand(2).getReg(); 7126 LLT Ty = MRI.getType(Res); 7127 LLT BoolTy = Ty.changeElementSize(1); 7128 7129 unsigned BW = Ty.getScalarSizeInBits(); 7130 auto Result = MIRBuilder.buildShl(Ty, LHS, RHS); 7131 auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS) 7132 : MIRBuilder.buildLShr(Ty, Result, RHS); 7133 7134 MachineInstrBuilder SatVal; 7135 if (IsSigned) { 7136 auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW)); 7137 auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW)); 7138 auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS, 7139 MIRBuilder.buildConstant(Ty, 0)); 7140 SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax); 7141 } else { 7142 SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW)); 7143 } 7144 auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig); 7145 MIRBuilder.buildSelect(Res, Ov, SatVal, Result); 7146 7147 MI.eraseFromParent(); 7148 return Legalized; 7149 } 7150 7151 LegalizerHelper::LegalizeResult 7152 LegalizerHelper::lowerBswap(MachineInstr &MI) { 7153 Register Dst = MI.getOperand(0).getReg(); 7154 Register Src = MI.getOperand(1).getReg(); 7155 const LLT Ty = MRI.getType(Src); 7156 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8; 7157 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8; 7158 7159 // Swap most and least significant byte, set remaining bytes in Res to zero. 7160 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt); 7161 auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt); 7162 auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt); 7163 auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft); 7164 7165 // Set i-th high/low byte in Res to i-th low/high byte from Src. 7166 for (unsigned i = 1; i < SizeInBytes / 2; ++i) { 7167 // AND with Mask leaves byte i unchanged and sets remaining bytes to 0. 7168 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8)); 7169 auto Mask = MIRBuilder.buildConstant(Ty, APMask); 7170 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i); 7171 // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt. 7172 auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask); 7173 auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt); 7174 Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft); 7175 // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask. 7176 auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt); 7177 auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask); 7178 Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight); 7179 } 7180 Res.getInstr()->getOperand(0).setReg(Dst); 7181 7182 MI.eraseFromParent(); 7183 return Legalized; 7184 } 7185 7186 //{ (Src & Mask) >> N } | { (Src << N) & Mask } 7187 static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, 7188 MachineInstrBuilder Src, APInt Mask) { 7189 const LLT Ty = Dst.getLLTTy(*B.getMRI()); 7190 MachineInstrBuilder C_N = B.buildConstant(Ty, N); 7191 MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask); 7192 auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N); 7193 auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0); 7194 return B.buildOr(Dst, LHS, RHS); 7195 } 7196 7197 LegalizerHelper::LegalizeResult 7198 LegalizerHelper::lowerBitreverse(MachineInstr &MI) { 7199 Register Dst = MI.getOperand(0).getReg(); 7200 Register Src = MI.getOperand(1).getReg(); 7201 const LLT Ty = MRI.getType(Src); 7202 unsigned Size = Ty.getSizeInBits(); 7203 7204 MachineInstrBuilder BSWAP = 7205 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src}); 7206 7207 // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654 7208 // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4] 7209 // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0] 7210 MachineInstrBuilder Swap4 = 7211 SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0))); 7212 7213 // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76 7214 // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2] 7215 // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC] 7216 MachineInstrBuilder Swap2 = 7217 SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC))); 7218 7219 // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5 6|7 7220 // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1] 7221 // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA] 7222 SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA))); 7223 7224 MI.eraseFromParent(); 7225 return Legalized; 7226 } 7227 7228 LegalizerHelper::LegalizeResult 7229 LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) { 7230 MachineFunction &MF = MIRBuilder.getMF(); 7231 7232 bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER; 7233 int NameOpIdx = IsRead ? 1 : 0; 7234 int ValRegIndex = IsRead ? 0 : 1; 7235 7236 Register ValReg = MI.getOperand(ValRegIndex).getReg(); 7237 const LLT Ty = MRI.getType(ValReg); 7238 const MDString *RegStr = cast<MDString>( 7239 cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0)); 7240 7241 Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF); 7242 if (!PhysReg.isValid()) 7243 return UnableToLegalize; 7244 7245 if (IsRead) 7246 MIRBuilder.buildCopy(ValReg, PhysReg); 7247 else 7248 MIRBuilder.buildCopy(PhysReg, ValReg); 7249 7250 MI.eraseFromParent(); 7251 return Legalized; 7252 } 7253 7254 LegalizerHelper::LegalizeResult 7255 LegalizerHelper::lowerSMULH_UMULH(MachineInstr &MI) { 7256 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH; 7257 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT; 7258 Register Result = MI.getOperand(0).getReg(); 7259 LLT OrigTy = MRI.getType(Result); 7260 auto SizeInBits = OrigTy.getScalarSizeInBits(); 7261 LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2); 7262 7263 auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)}); 7264 auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)}); 7265 auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS); 7266 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR; 7267 7268 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits); 7269 auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt}); 7270 MIRBuilder.buildTrunc(Result, Shifted); 7271 7272 MI.eraseFromParent(); 7273 return Legalized; 7274 } 7275 7276 LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) { 7277 // Implement vector G_SELECT in terms of XOR, AND, OR. 7278 Register DstReg = MI.getOperand(0).getReg(); 7279 Register MaskReg = MI.getOperand(1).getReg(); 7280 Register Op1Reg = MI.getOperand(2).getReg(); 7281 Register Op2Reg = MI.getOperand(3).getReg(); 7282 LLT DstTy = MRI.getType(DstReg); 7283 LLT MaskTy = MRI.getType(MaskReg); 7284 LLT Op1Ty = MRI.getType(Op1Reg); 7285 if (!DstTy.isVector()) 7286 return UnableToLegalize; 7287 7288 // Vector selects can have a scalar predicate. If so, splat into a vector and 7289 // finish for later legalization attempts to try again. 7290 if (MaskTy.isScalar()) { 7291 Register MaskElt = MaskReg; 7292 if (MaskTy.getSizeInBits() < DstTy.getScalarSizeInBits()) 7293 MaskElt = MIRBuilder.buildSExt(DstTy.getElementType(), MaskElt).getReg(0); 7294 // Generate a vector splat idiom to be pattern matched later. 7295 auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt); 7296 Observer.changingInstr(MI); 7297 MI.getOperand(1).setReg(ShufSplat.getReg(0)); 7298 Observer.changedInstr(MI); 7299 return Legalized; 7300 } 7301 7302 if (MaskTy.getSizeInBits() != Op1Ty.getSizeInBits()) { 7303 return UnableToLegalize; 7304 } 7305 7306 auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg); 7307 auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg); 7308 auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask); 7309 MIRBuilder.buildOr(DstReg, NewOp1, NewOp2); 7310 MI.eraseFromParent(); 7311 return Legalized; 7312 } 7313 7314 LegalizerHelper::LegalizeResult LegalizerHelper::lowerDIVREM(MachineInstr &MI) { 7315 // Split DIVREM into individual instructions. 7316 unsigned Opcode = MI.getOpcode(); 7317 7318 MIRBuilder.buildInstr( 7319 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV 7320 : TargetOpcode::G_UDIV, 7321 {MI.getOperand(0).getReg()}, {MI.getOperand(2), MI.getOperand(3)}); 7322 MIRBuilder.buildInstr( 7323 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM 7324 : TargetOpcode::G_UREM, 7325 {MI.getOperand(1).getReg()}, {MI.getOperand(2), MI.getOperand(3)}); 7326 MI.eraseFromParent(); 7327 return Legalized; 7328 } 7329 7330 LegalizerHelper::LegalizeResult 7331 LegalizerHelper::lowerAbsToAddXor(MachineInstr &MI) { 7332 // Expand %res = G_ABS %a into: 7333 // %v1 = G_ASHR %a, scalar_size-1 7334 // %v2 = G_ADD %a, %v1 7335 // %res = G_XOR %v2, %v1 7336 LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 7337 Register OpReg = MI.getOperand(1).getReg(); 7338 auto ShiftAmt = 7339 MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1); 7340 auto Shift = MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt); 7341 auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift); 7342 MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift); 7343 MI.eraseFromParent(); 7344 return Legalized; 7345 } 7346 7347 LegalizerHelper::LegalizeResult 7348 LegalizerHelper::lowerAbsToMaxNeg(MachineInstr &MI) { 7349 // Expand %res = G_ABS %a into: 7350 // %v1 = G_CONSTANT 0 7351 // %v2 = G_SUB %v1, %a 7352 // %res = G_SMAX %a, %v2 7353 Register SrcReg = MI.getOperand(1).getReg(); 7354 LLT Ty = MRI.getType(SrcReg); 7355 auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0); 7356 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0); 7357 MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub); 7358 MI.eraseFromParent(); 7359 return Legalized; 7360 } 7361 7362 LegalizerHelper::LegalizeResult LegalizerHelper::lowerIsNaN(MachineInstr &MI) { 7363 Register Dst = MI.getOperand(0).getReg(); 7364 Register Src = MI.getOperand(1).getReg(); 7365 LLT SrcTy = MRI.getType(Src); 7366 if (MI.getFlags() & MachineInstr::NoFPExcept) { 7367 // Lower to an unordered comparison. 7368 auto Zero = MIRBuilder.buildFConstant(SrcTy, 0.0); 7369 MIRBuilder.buildFCmp(CmpInst::Predicate::FCMP_UNO, Dst, Src, Zero); 7370 MI.eraseFromParent(); 7371 return Legalized; 7372 } 7373 7374 // Use integer operations to avoid traps if the argument is SNaN. 7375 7376 // NaN has all exp bits set and a non zero significand. Therefore: 7377 // isnan(V) == exp mask < abs(V) 7378 auto FPToSI = MIRBuilder.buildFPTOSI(SrcTy, Src); 7379 auto Mask = APInt::getSignedMaxValue(SrcTy.getScalarSizeInBits()); 7380 auto MaskCst = MIRBuilder.buildConstant(SrcTy, Mask); 7381 auto AbsV = MIRBuilder.buildAnd(SrcTy, FPToSI, MaskCst); 7382 auto *FloatTy = getFloatTypeForLLT(MI.getMF()->getFunction().getContext(), 7383 SrcTy.getScalarType()); 7384 if (!FloatTy) 7385 return UnableToLegalize; 7386 auto ExpMask = APFloat::getInf(FloatTy->getFltSemantics()).bitcastToAPInt(); 7387 auto ExpMaskCst = MIRBuilder.buildConstant(SrcTy, ExpMask); 7388 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_SLT, Dst, ExpMaskCst, AbsV); 7389 MI.eraseFromParent(); 7390 return Legalized; 7391 } 7392