1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file This file implements the LegalizerHelper class to legalize 10 /// individual instructions and the LegalizeMachineIR wrapper pass for the 11 /// primary legalization. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" 16 #include "llvm/CodeGen/GlobalISel/CallLowering.h" 17 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" 18 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" 19 #include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h" 20 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 21 #include "llvm/CodeGen/GlobalISel/Utils.h" 22 #include "llvm/CodeGen/MachineRegisterInfo.h" 23 #include "llvm/CodeGen/TargetFrameLowering.h" 24 #include "llvm/CodeGen/TargetInstrInfo.h" 25 #include "llvm/CodeGen/TargetLowering.h" 26 #include "llvm/CodeGen/TargetOpcodes.h" 27 #include "llvm/CodeGen/TargetSubtargetInfo.h" 28 #include "llvm/IR/Instructions.h" 29 #include "llvm/Support/Debug.h" 30 #include "llvm/Support/MathExtras.h" 31 #include "llvm/Support/raw_ostream.h" 32 33 #define DEBUG_TYPE "legalizer" 34 35 using namespace llvm; 36 using namespace LegalizeActions; 37 using namespace MIPatternMatch; 38 39 /// Try to break down \p OrigTy into \p NarrowTy sized pieces. 40 /// 41 /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy, 42 /// with any leftover piece as type \p LeftoverTy 43 /// 44 /// Returns -1 in the first element of the pair if the breakdown is not 45 /// satisfiable. 46 static std::pair<int, int> 47 getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) { 48 assert(!LeftoverTy.isValid() && "this is an out argument"); 49 50 unsigned Size = OrigTy.getSizeInBits(); 51 unsigned NarrowSize = NarrowTy.getSizeInBits(); 52 unsigned NumParts = Size / NarrowSize; 53 unsigned LeftoverSize = Size - NumParts * NarrowSize; 54 assert(Size > NarrowSize); 55 56 if (LeftoverSize == 0) 57 return {NumParts, 0}; 58 59 if (NarrowTy.isVector()) { 60 unsigned EltSize = OrigTy.getScalarSizeInBits(); 61 if (LeftoverSize % EltSize != 0) 62 return {-1, -1}; 63 LeftoverTy = LLT::scalarOrVector( 64 ElementCount::getFixed(LeftoverSize / EltSize), EltSize); 65 } else { 66 LeftoverTy = LLT::scalar(LeftoverSize); 67 } 68 69 int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits(); 70 return std::make_pair(NumParts, NumLeftover); 71 } 72 73 static Type *getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty) { 74 75 if (!Ty.isScalar()) 76 return nullptr; 77 78 switch (Ty.getSizeInBits()) { 79 case 16: 80 return Type::getHalfTy(Ctx); 81 case 32: 82 return Type::getFloatTy(Ctx); 83 case 64: 84 return Type::getDoubleTy(Ctx); 85 case 80: 86 return Type::getX86_FP80Ty(Ctx); 87 case 128: 88 return Type::getFP128Ty(Ctx); 89 default: 90 return nullptr; 91 } 92 } 93 94 LegalizerHelper::LegalizerHelper(MachineFunction &MF, 95 GISelChangeObserver &Observer, 96 MachineIRBuilder &Builder) 97 : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()), 98 LI(*MF.getSubtarget().getLegalizerInfo()), 99 TLI(*MF.getSubtarget().getTargetLowering()) { } 100 101 LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI, 102 GISelChangeObserver &Observer, 103 MachineIRBuilder &B) 104 : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI), 105 TLI(*MF.getSubtarget().getTargetLowering()) { } 106 107 LegalizerHelper::LegalizeResult 108 LegalizerHelper::legalizeInstrStep(MachineInstr &MI, 109 LostDebugLocObserver &LocObserver) { 110 LLVM_DEBUG(dbgs() << "Legalizing: " << MI); 111 112 MIRBuilder.setInstrAndDebugLoc(MI); 113 114 if (MI.getOpcode() == TargetOpcode::G_INTRINSIC || 115 MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS) 116 return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize; 117 auto Step = LI.getAction(MI, MRI); 118 switch (Step.Action) { 119 case Legal: 120 LLVM_DEBUG(dbgs() << ".. Already legal\n"); 121 return AlreadyLegal; 122 case Libcall: 123 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n"); 124 return libcall(MI, LocObserver); 125 case NarrowScalar: 126 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n"); 127 return narrowScalar(MI, Step.TypeIdx, Step.NewType); 128 case WidenScalar: 129 LLVM_DEBUG(dbgs() << ".. Widen scalar\n"); 130 return widenScalar(MI, Step.TypeIdx, Step.NewType); 131 case Bitcast: 132 LLVM_DEBUG(dbgs() << ".. Bitcast type\n"); 133 return bitcast(MI, Step.TypeIdx, Step.NewType); 134 case Lower: 135 LLVM_DEBUG(dbgs() << ".. Lower\n"); 136 return lower(MI, Step.TypeIdx, Step.NewType); 137 case FewerElements: 138 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n"); 139 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType); 140 case MoreElements: 141 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n"); 142 return moreElementsVector(MI, Step.TypeIdx, Step.NewType); 143 case Custom: 144 LLVM_DEBUG(dbgs() << ".. Custom legalization\n"); 145 return LI.legalizeCustom(*this, MI) ? Legalized : UnableToLegalize; 146 default: 147 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n"); 148 return UnableToLegalize; 149 } 150 } 151 152 void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts, 153 SmallVectorImpl<Register> &VRegs) { 154 for (int i = 0; i < NumParts; ++i) 155 VRegs.push_back(MRI.createGenericVirtualRegister(Ty)); 156 MIRBuilder.buildUnmerge(VRegs, Reg); 157 } 158 159 bool LegalizerHelper::extractParts(Register Reg, LLT RegTy, 160 LLT MainTy, LLT &LeftoverTy, 161 SmallVectorImpl<Register> &VRegs, 162 SmallVectorImpl<Register> &LeftoverRegs) { 163 assert(!LeftoverTy.isValid() && "this is an out argument"); 164 165 unsigned RegSize = RegTy.getSizeInBits(); 166 unsigned MainSize = MainTy.getSizeInBits(); 167 unsigned NumParts = RegSize / MainSize; 168 unsigned LeftoverSize = RegSize - NumParts * MainSize; 169 170 // Use an unmerge when possible. 171 if (LeftoverSize == 0) { 172 for (unsigned I = 0; I < NumParts; ++I) 173 VRegs.push_back(MRI.createGenericVirtualRegister(MainTy)); 174 MIRBuilder.buildUnmerge(VRegs, Reg); 175 return true; 176 } 177 178 if (MainTy.isVector()) { 179 unsigned EltSize = MainTy.getScalarSizeInBits(); 180 if (LeftoverSize % EltSize != 0) 181 return false; 182 LeftoverTy = LLT::scalarOrVector( 183 ElementCount::getFixed(LeftoverSize / EltSize), EltSize); 184 } else { 185 LeftoverTy = LLT::scalar(LeftoverSize); 186 } 187 188 // For irregular sizes, extract the individual parts. 189 for (unsigned I = 0; I != NumParts; ++I) { 190 Register NewReg = MRI.createGenericVirtualRegister(MainTy); 191 VRegs.push_back(NewReg); 192 MIRBuilder.buildExtract(NewReg, Reg, MainSize * I); 193 } 194 195 for (unsigned Offset = MainSize * NumParts; Offset < RegSize; 196 Offset += LeftoverSize) { 197 Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy); 198 LeftoverRegs.push_back(NewReg); 199 MIRBuilder.buildExtract(NewReg, Reg, Offset); 200 } 201 202 return true; 203 } 204 205 void LegalizerHelper::insertParts(Register DstReg, 206 LLT ResultTy, LLT PartTy, 207 ArrayRef<Register> PartRegs, 208 LLT LeftoverTy, 209 ArrayRef<Register> LeftoverRegs) { 210 if (!LeftoverTy.isValid()) { 211 assert(LeftoverRegs.empty()); 212 213 if (!ResultTy.isVector()) { 214 MIRBuilder.buildMerge(DstReg, PartRegs); 215 return; 216 } 217 218 if (PartTy.isVector()) 219 MIRBuilder.buildConcatVectors(DstReg, PartRegs); 220 else 221 MIRBuilder.buildBuildVector(DstReg, PartRegs); 222 return; 223 } 224 225 SmallVector<Register> GCDRegs; 226 LLT GCDTy; 227 for (Register PartReg : PartRegs) 228 GCDTy = extractGCDType(GCDRegs, ResultTy, LeftoverTy, PartReg); 229 230 for (Register PartReg : LeftoverRegs) 231 extractGCDType(GCDRegs, ResultTy, LeftoverTy, PartReg); 232 233 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs); 234 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs); 235 } 236 237 /// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs. 238 static void getUnmergeResults(SmallVectorImpl<Register> &Regs, 239 const MachineInstr &MI) { 240 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES); 241 242 const int StartIdx = Regs.size(); 243 const int NumResults = MI.getNumOperands() - 1; 244 Regs.resize(Regs.size() + NumResults); 245 for (int I = 0; I != NumResults; ++I) 246 Regs[StartIdx + I] = MI.getOperand(I).getReg(); 247 } 248 249 void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, 250 LLT GCDTy, Register SrcReg) { 251 LLT SrcTy = MRI.getType(SrcReg); 252 if (SrcTy == GCDTy) { 253 // If the source already evenly divides the result type, we don't need to do 254 // anything. 255 Parts.push_back(SrcReg); 256 } else { 257 // Need to split into common type sized pieces. 258 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg); 259 getUnmergeResults(Parts, *Unmerge); 260 } 261 } 262 263 LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy, 264 LLT NarrowTy, Register SrcReg) { 265 LLT SrcTy = MRI.getType(SrcReg); 266 LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy); 267 extractGCDType(Parts, GCDTy, SrcReg); 268 return GCDTy; 269 } 270 271 LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy, 272 SmallVectorImpl<Register> &VRegs, 273 unsigned PadStrategy) { 274 LLT LCMTy = getLCMType(DstTy, NarrowTy); 275 276 int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits(); 277 int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits(); 278 int NumOrigSrc = VRegs.size(); 279 280 Register PadReg; 281 282 // Get a value we can use to pad the source value if the sources won't evenly 283 // cover the result type. 284 if (NumOrigSrc < NumParts * NumSubParts) { 285 if (PadStrategy == TargetOpcode::G_ZEXT) 286 PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0); 287 else if (PadStrategy == TargetOpcode::G_ANYEXT) 288 PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0); 289 else { 290 assert(PadStrategy == TargetOpcode::G_SEXT); 291 292 // Shift the sign bit of the low register through the high register. 293 auto ShiftAmt = 294 MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1); 295 PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0); 296 } 297 } 298 299 // Registers for the final merge to be produced. 300 SmallVector<Register, 4> Remerge(NumParts); 301 302 // Registers needed for intermediate merges, which will be merged into a 303 // source for Remerge. 304 SmallVector<Register, 4> SubMerge(NumSubParts); 305 306 // Once we've fully read off the end of the original source bits, we can reuse 307 // the same high bits for remaining padding elements. 308 Register AllPadReg; 309 310 // Build merges to the LCM type to cover the original result type. 311 for (int I = 0; I != NumParts; ++I) { 312 bool AllMergePartsArePadding = true; 313 314 // Build the requested merges to the requested type. 315 for (int J = 0; J != NumSubParts; ++J) { 316 int Idx = I * NumSubParts + J; 317 if (Idx >= NumOrigSrc) { 318 SubMerge[J] = PadReg; 319 continue; 320 } 321 322 SubMerge[J] = VRegs[Idx]; 323 324 // There are meaningful bits here we can't reuse later. 325 AllMergePartsArePadding = false; 326 } 327 328 // If we've filled up a complete piece with padding bits, we can directly 329 // emit the natural sized constant if applicable, rather than a merge of 330 // smaller constants. 331 if (AllMergePartsArePadding && !AllPadReg) { 332 if (PadStrategy == TargetOpcode::G_ANYEXT) 333 AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0); 334 else if (PadStrategy == TargetOpcode::G_ZEXT) 335 AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0); 336 337 // If this is a sign extension, we can't materialize a trivial constant 338 // with the right type and have to produce a merge. 339 } 340 341 if (AllPadReg) { 342 // Avoid creating additional instructions if we're just adding additional 343 // copies of padding bits. 344 Remerge[I] = AllPadReg; 345 continue; 346 } 347 348 if (NumSubParts == 1) 349 Remerge[I] = SubMerge[0]; 350 else 351 Remerge[I] = MIRBuilder.buildMerge(NarrowTy, SubMerge).getReg(0); 352 353 // In the sign extend padding case, re-use the first all-signbit merge. 354 if (AllMergePartsArePadding && !AllPadReg) 355 AllPadReg = Remerge[I]; 356 } 357 358 VRegs = std::move(Remerge); 359 return LCMTy; 360 } 361 362 void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy, 363 ArrayRef<Register> RemergeRegs) { 364 LLT DstTy = MRI.getType(DstReg); 365 366 // Create the merge to the widened source, and extract the relevant bits into 367 // the result. 368 369 if (DstTy == LCMTy) { 370 MIRBuilder.buildMerge(DstReg, RemergeRegs); 371 return; 372 } 373 374 auto Remerge = MIRBuilder.buildMerge(LCMTy, RemergeRegs); 375 if (DstTy.isScalar() && LCMTy.isScalar()) { 376 MIRBuilder.buildTrunc(DstReg, Remerge); 377 return; 378 } 379 380 if (LCMTy.isVector()) { 381 unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits(); 382 SmallVector<Register, 8> UnmergeDefs(NumDefs); 383 UnmergeDefs[0] = DstReg; 384 for (unsigned I = 1; I != NumDefs; ++I) 385 UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy); 386 387 MIRBuilder.buildUnmerge(UnmergeDefs, 388 MIRBuilder.buildMerge(LCMTy, RemergeRegs)); 389 return; 390 } 391 392 llvm_unreachable("unhandled case"); 393 } 394 395 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { 396 #define RTLIBCASE_INT(LibcallPrefix) \ 397 do { \ 398 switch (Size) { \ 399 case 32: \ 400 return RTLIB::LibcallPrefix##32; \ 401 case 64: \ 402 return RTLIB::LibcallPrefix##64; \ 403 case 128: \ 404 return RTLIB::LibcallPrefix##128; \ 405 default: \ 406 llvm_unreachable("unexpected size"); \ 407 } \ 408 } while (0) 409 410 #define RTLIBCASE(LibcallPrefix) \ 411 do { \ 412 switch (Size) { \ 413 case 32: \ 414 return RTLIB::LibcallPrefix##32; \ 415 case 64: \ 416 return RTLIB::LibcallPrefix##64; \ 417 case 80: \ 418 return RTLIB::LibcallPrefix##80; \ 419 case 128: \ 420 return RTLIB::LibcallPrefix##128; \ 421 default: \ 422 llvm_unreachable("unexpected size"); \ 423 } \ 424 } while (0) 425 426 switch (Opcode) { 427 case TargetOpcode::G_SDIV: 428 RTLIBCASE_INT(SDIV_I); 429 case TargetOpcode::G_UDIV: 430 RTLIBCASE_INT(UDIV_I); 431 case TargetOpcode::G_SREM: 432 RTLIBCASE_INT(SREM_I); 433 case TargetOpcode::G_UREM: 434 RTLIBCASE_INT(UREM_I); 435 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 436 RTLIBCASE_INT(CTLZ_I); 437 case TargetOpcode::G_FADD: 438 RTLIBCASE(ADD_F); 439 case TargetOpcode::G_FSUB: 440 RTLIBCASE(SUB_F); 441 case TargetOpcode::G_FMUL: 442 RTLIBCASE(MUL_F); 443 case TargetOpcode::G_FDIV: 444 RTLIBCASE(DIV_F); 445 case TargetOpcode::G_FEXP: 446 RTLIBCASE(EXP_F); 447 case TargetOpcode::G_FEXP2: 448 RTLIBCASE(EXP2_F); 449 case TargetOpcode::G_FREM: 450 RTLIBCASE(REM_F); 451 case TargetOpcode::G_FPOW: 452 RTLIBCASE(POW_F); 453 case TargetOpcode::G_FMA: 454 RTLIBCASE(FMA_F); 455 case TargetOpcode::G_FSIN: 456 RTLIBCASE(SIN_F); 457 case TargetOpcode::G_FCOS: 458 RTLIBCASE(COS_F); 459 case TargetOpcode::G_FLOG10: 460 RTLIBCASE(LOG10_F); 461 case TargetOpcode::G_FLOG: 462 RTLIBCASE(LOG_F); 463 case TargetOpcode::G_FLOG2: 464 RTLIBCASE(LOG2_F); 465 case TargetOpcode::G_FCEIL: 466 RTLIBCASE(CEIL_F); 467 case TargetOpcode::G_FFLOOR: 468 RTLIBCASE(FLOOR_F); 469 case TargetOpcode::G_FMINNUM: 470 RTLIBCASE(FMIN_F); 471 case TargetOpcode::G_FMAXNUM: 472 RTLIBCASE(FMAX_F); 473 case TargetOpcode::G_FSQRT: 474 RTLIBCASE(SQRT_F); 475 case TargetOpcode::G_FRINT: 476 RTLIBCASE(RINT_F); 477 case TargetOpcode::G_FNEARBYINT: 478 RTLIBCASE(NEARBYINT_F); 479 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: 480 RTLIBCASE(ROUNDEVEN_F); 481 } 482 llvm_unreachable("Unknown libcall function"); 483 } 484 485 /// True if an instruction is in tail position in its caller. Intended for 486 /// legalizing libcalls as tail calls when possible. 487 static bool isLibCallInTailPosition(const TargetInstrInfo &TII, 488 MachineInstr &MI) { 489 MachineBasicBlock &MBB = *MI.getParent(); 490 const Function &F = MBB.getParent()->getFunction(); 491 492 // Conservatively require the attributes of the call to match those of 493 // the return. Ignore NoAlias and NonNull because they don't affect the 494 // call sequence. 495 AttributeList CallerAttrs = F.getAttributes(); 496 if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex) 497 .removeAttribute(Attribute::NoAlias) 498 .removeAttribute(Attribute::NonNull) 499 .hasAttributes()) 500 return false; 501 502 // It's not safe to eliminate the sign / zero extension of the return value. 503 if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) || 504 CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt)) 505 return false; 506 507 // Only tail call if the following instruction is a standard return. 508 auto Next = next_nodbg(MI.getIterator(), MBB.instr_end()); 509 if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn()) 510 return false; 511 512 return true; 513 } 514 515 LegalizerHelper::LegalizeResult 516 llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name, 517 const CallLowering::ArgInfo &Result, 518 ArrayRef<CallLowering::ArgInfo> Args, 519 const CallingConv::ID CC) { 520 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); 521 522 CallLowering::CallLoweringInfo Info; 523 Info.CallConv = CC; 524 Info.Callee = MachineOperand::CreateES(Name); 525 Info.OrigRet = Result; 526 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs)); 527 if (!CLI.lowerCall(MIRBuilder, Info)) 528 return LegalizerHelper::UnableToLegalize; 529 530 return LegalizerHelper::Legalized; 531 } 532 533 LegalizerHelper::LegalizeResult 534 llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, 535 const CallLowering::ArgInfo &Result, 536 ArrayRef<CallLowering::ArgInfo> Args) { 537 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); 538 const char *Name = TLI.getLibcallName(Libcall); 539 const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall); 540 return createLibcall(MIRBuilder, Name, Result, Args, CC); 541 } 542 543 // Useful for libcalls where all operands have the same type. 544 static LegalizerHelper::LegalizeResult 545 simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, 546 Type *OpType) { 547 auto Libcall = getRTLibDesc(MI.getOpcode(), Size); 548 549 // FIXME: What does the original arg index mean here? 550 SmallVector<CallLowering::ArgInfo, 3> Args; 551 for (unsigned i = 1; i < MI.getNumOperands(); i++) 552 Args.push_back({MI.getOperand(i).getReg(), OpType, 0}); 553 return createLibcall(MIRBuilder, Libcall, 554 {MI.getOperand(0).getReg(), OpType, 0}, Args); 555 } 556 557 LegalizerHelper::LegalizeResult 558 llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, 559 MachineInstr &MI, LostDebugLocObserver &LocObserver) { 560 auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); 561 562 SmallVector<CallLowering::ArgInfo, 3> Args; 563 // Add all the args, except for the last which is an imm denoting 'tail'. 564 for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) { 565 Register Reg = MI.getOperand(i).getReg(); 566 567 // Need derive an IR type for call lowering. 568 LLT OpLLT = MRI.getType(Reg); 569 Type *OpTy = nullptr; 570 if (OpLLT.isPointer()) 571 OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace()); 572 else 573 OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits()); 574 Args.push_back({Reg, OpTy, 0}); 575 } 576 577 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); 578 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); 579 RTLIB::Libcall RTLibcall; 580 unsigned Opc = MI.getOpcode(); 581 switch (Opc) { 582 case TargetOpcode::G_BZERO: 583 RTLibcall = RTLIB::BZERO; 584 break; 585 case TargetOpcode::G_MEMCPY: 586 RTLibcall = RTLIB::MEMCPY; 587 break; 588 case TargetOpcode::G_MEMMOVE: 589 RTLibcall = RTLIB::MEMMOVE; 590 break; 591 case TargetOpcode::G_MEMSET: 592 RTLibcall = RTLIB::MEMSET; 593 break; 594 default: 595 return LegalizerHelper::UnableToLegalize; 596 } 597 const char *Name = TLI.getLibcallName(RTLibcall); 598 599 // Unsupported libcall on the target. 600 if (!Name) { 601 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for " 602 << MIRBuilder.getTII().getName(Opc) << "\n"); 603 return LegalizerHelper::UnableToLegalize; 604 } 605 606 CallLowering::CallLoweringInfo Info; 607 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall); 608 Info.Callee = MachineOperand::CreateES(Name); 609 Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0); 610 Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() && 611 isLibCallInTailPosition(MIRBuilder.getTII(), MI); 612 613 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs)); 614 if (!CLI.lowerCall(MIRBuilder, Info)) 615 return LegalizerHelper::UnableToLegalize; 616 617 618 if (Info.LoweredTailCall) { 619 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?"); 620 621 // Check debug locations before removing the return. 622 LocObserver.checkpoint(true); 623 624 // We must have a return following the call (or debug insts) to get past 625 // isLibCallInTailPosition. 626 do { 627 MachineInstr *Next = MI.getNextNode(); 628 assert(Next && (Next->isReturn() || Next->isDebugInstr()) && 629 "Expected instr following MI to be return or debug inst?"); 630 // We lowered a tail call, so the call is now the return from the block. 631 // Delete the old return. 632 Next->eraseFromParent(); 633 } while (MI.getNextNode()); 634 635 // We expect to lose the debug location from the return. 636 LocObserver.checkpoint(false); 637 } 638 639 return LegalizerHelper::Legalized; 640 } 641 642 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, 643 Type *FromType) { 644 auto ToMVT = MVT::getVT(ToType); 645 auto FromMVT = MVT::getVT(FromType); 646 647 switch (Opcode) { 648 case TargetOpcode::G_FPEXT: 649 return RTLIB::getFPEXT(FromMVT, ToMVT); 650 case TargetOpcode::G_FPTRUNC: 651 return RTLIB::getFPROUND(FromMVT, ToMVT); 652 case TargetOpcode::G_FPTOSI: 653 return RTLIB::getFPTOSINT(FromMVT, ToMVT); 654 case TargetOpcode::G_FPTOUI: 655 return RTLIB::getFPTOUINT(FromMVT, ToMVT); 656 case TargetOpcode::G_SITOFP: 657 return RTLIB::getSINTTOFP(FromMVT, ToMVT); 658 case TargetOpcode::G_UITOFP: 659 return RTLIB::getUINTTOFP(FromMVT, ToMVT); 660 } 661 llvm_unreachable("Unsupported libcall function"); 662 } 663 664 static LegalizerHelper::LegalizeResult 665 conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, 666 Type *FromType) { 667 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType); 668 return createLibcall(MIRBuilder, Libcall, 669 {MI.getOperand(0).getReg(), ToType, 0}, 670 {{MI.getOperand(1).getReg(), FromType, 0}}); 671 } 672 673 LegalizerHelper::LegalizeResult 674 LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { 675 LLT LLTy = MRI.getType(MI.getOperand(0).getReg()); 676 unsigned Size = LLTy.getSizeInBits(); 677 auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); 678 679 switch (MI.getOpcode()) { 680 default: 681 return UnableToLegalize; 682 case TargetOpcode::G_SDIV: 683 case TargetOpcode::G_UDIV: 684 case TargetOpcode::G_SREM: 685 case TargetOpcode::G_UREM: 686 case TargetOpcode::G_CTLZ_ZERO_UNDEF: { 687 Type *HLTy = IntegerType::get(Ctx, Size); 688 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); 689 if (Status != Legalized) 690 return Status; 691 break; 692 } 693 case TargetOpcode::G_FADD: 694 case TargetOpcode::G_FSUB: 695 case TargetOpcode::G_FMUL: 696 case TargetOpcode::G_FDIV: 697 case TargetOpcode::G_FMA: 698 case TargetOpcode::G_FPOW: 699 case TargetOpcode::G_FREM: 700 case TargetOpcode::G_FCOS: 701 case TargetOpcode::G_FSIN: 702 case TargetOpcode::G_FLOG10: 703 case TargetOpcode::G_FLOG: 704 case TargetOpcode::G_FLOG2: 705 case TargetOpcode::G_FEXP: 706 case TargetOpcode::G_FEXP2: 707 case TargetOpcode::G_FCEIL: 708 case TargetOpcode::G_FFLOOR: 709 case TargetOpcode::G_FMINNUM: 710 case TargetOpcode::G_FMAXNUM: 711 case TargetOpcode::G_FSQRT: 712 case TargetOpcode::G_FRINT: 713 case TargetOpcode::G_FNEARBYINT: 714 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: { 715 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy); 716 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) { 717 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n"); 718 return UnableToLegalize; 719 } 720 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); 721 if (Status != Legalized) 722 return Status; 723 break; 724 } 725 case TargetOpcode::G_FPEXT: 726 case TargetOpcode::G_FPTRUNC: { 727 Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg())); 728 Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg())); 729 if (!FromTy || !ToTy) 730 return UnableToLegalize; 731 LegalizeResult Status = conversionLibcall(MI, MIRBuilder, ToTy, FromTy ); 732 if (Status != Legalized) 733 return Status; 734 break; 735 } 736 case TargetOpcode::G_FPTOSI: 737 case TargetOpcode::G_FPTOUI: { 738 // FIXME: Support other types 739 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 740 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 741 if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64)) 742 return UnableToLegalize; 743 LegalizeResult Status = conversionLibcall( 744 MI, MIRBuilder, 745 ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx), 746 FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx)); 747 if (Status != Legalized) 748 return Status; 749 break; 750 } 751 case TargetOpcode::G_SITOFP: 752 case TargetOpcode::G_UITOFP: { 753 // FIXME: Support other types 754 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 755 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 756 if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64)) 757 return UnableToLegalize; 758 LegalizeResult Status = conversionLibcall( 759 MI, MIRBuilder, 760 ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx), 761 FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx)); 762 if (Status != Legalized) 763 return Status; 764 break; 765 } 766 case TargetOpcode::G_BZERO: 767 case TargetOpcode::G_MEMCPY: 768 case TargetOpcode::G_MEMMOVE: 769 case TargetOpcode::G_MEMSET: { 770 LegalizeResult Result = 771 createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI, LocObserver); 772 if (Result != Legalized) 773 return Result; 774 MI.eraseFromParent(); 775 return Result; 776 } 777 } 778 779 MI.eraseFromParent(); 780 return Legalized; 781 } 782 783 LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, 784 unsigned TypeIdx, 785 LLT NarrowTy) { 786 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 787 uint64_t NarrowSize = NarrowTy.getSizeInBits(); 788 789 switch (MI.getOpcode()) { 790 default: 791 return UnableToLegalize; 792 case TargetOpcode::G_IMPLICIT_DEF: { 793 Register DstReg = MI.getOperand(0).getReg(); 794 LLT DstTy = MRI.getType(DstReg); 795 796 // If SizeOp0 is not an exact multiple of NarrowSize, emit 797 // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed. 798 // FIXME: Although this would also be legal for the general case, it causes 799 // a lot of regressions in the emitted code (superfluous COPYs, artifact 800 // combines not being hit). This seems to be a problem related to the 801 // artifact combiner. 802 if (SizeOp0 % NarrowSize != 0) { 803 LLT ImplicitTy = NarrowTy; 804 if (DstTy.isVector()) 805 ImplicitTy = LLT::vector(DstTy.getElementCount(), ImplicitTy); 806 807 Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0); 808 MIRBuilder.buildAnyExt(DstReg, ImplicitReg); 809 810 MI.eraseFromParent(); 811 return Legalized; 812 } 813 814 int NumParts = SizeOp0 / NarrowSize; 815 816 SmallVector<Register, 2> DstRegs; 817 for (int i = 0; i < NumParts; ++i) 818 DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0)); 819 820 if (DstTy.isVector()) 821 MIRBuilder.buildBuildVector(DstReg, DstRegs); 822 else 823 MIRBuilder.buildMerge(DstReg, DstRegs); 824 MI.eraseFromParent(); 825 return Legalized; 826 } 827 case TargetOpcode::G_CONSTANT: { 828 LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 829 const APInt &Val = MI.getOperand(1).getCImm()->getValue(); 830 unsigned TotalSize = Ty.getSizeInBits(); 831 unsigned NarrowSize = NarrowTy.getSizeInBits(); 832 int NumParts = TotalSize / NarrowSize; 833 834 SmallVector<Register, 4> PartRegs; 835 for (int I = 0; I != NumParts; ++I) { 836 unsigned Offset = I * NarrowSize; 837 auto K = MIRBuilder.buildConstant(NarrowTy, 838 Val.lshr(Offset).trunc(NarrowSize)); 839 PartRegs.push_back(K.getReg(0)); 840 } 841 842 LLT LeftoverTy; 843 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize; 844 SmallVector<Register, 1> LeftoverRegs; 845 if (LeftoverBits != 0) { 846 LeftoverTy = LLT::scalar(LeftoverBits); 847 auto K = MIRBuilder.buildConstant( 848 LeftoverTy, 849 Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits)); 850 LeftoverRegs.push_back(K.getReg(0)); 851 } 852 853 insertParts(MI.getOperand(0).getReg(), 854 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs); 855 856 MI.eraseFromParent(); 857 return Legalized; 858 } 859 case TargetOpcode::G_SEXT: 860 case TargetOpcode::G_ZEXT: 861 case TargetOpcode::G_ANYEXT: 862 return narrowScalarExt(MI, TypeIdx, NarrowTy); 863 case TargetOpcode::G_TRUNC: { 864 if (TypeIdx != 1) 865 return UnableToLegalize; 866 867 uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 868 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) { 869 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n"); 870 return UnableToLegalize; 871 } 872 873 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1)); 874 MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0)); 875 MI.eraseFromParent(); 876 return Legalized; 877 } 878 879 case TargetOpcode::G_FREEZE: 880 return reduceOperationWidth(MI, TypeIdx, NarrowTy); 881 case TargetOpcode::G_ADD: 882 case TargetOpcode::G_SUB: 883 case TargetOpcode::G_SADDO: 884 case TargetOpcode::G_SSUBO: 885 case TargetOpcode::G_SADDE: 886 case TargetOpcode::G_SSUBE: 887 case TargetOpcode::G_UADDO: 888 case TargetOpcode::G_USUBO: 889 case TargetOpcode::G_UADDE: 890 case TargetOpcode::G_USUBE: 891 return narrowScalarAddSub(MI, TypeIdx, NarrowTy); 892 case TargetOpcode::G_MUL: 893 case TargetOpcode::G_UMULH: 894 return narrowScalarMul(MI, NarrowTy); 895 case TargetOpcode::G_EXTRACT: 896 return narrowScalarExtract(MI, TypeIdx, NarrowTy); 897 case TargetOpcode::G_INSERT: 898 return narrowScalarInsert(MI, TypeIdx, NarrowTy); 899 case TargetOpcode::G_LOAD: { 900 auto &MMO = **MI.memoperands_begin(); 901 Register DstReg = MI.getOperand(0).getReg(); 902 LLT DstTy = MRI.getType(DstReg); 903 if (DstTy.isVector()) 904 return UnableToLegalize; 905 906 if (8 * MMO.getSize() != DstTy.getSizeInBits()) { 907 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy); 908 MIRBuilder.buildLoad(TmpReg, MI.getOperand(1), MMO); 909 MIRBuilder.buildAnyExt(DstReg, TmpReg); 910 MI.eraseFromParent(); 911 return Legalized; 912 } 913 914 return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy); 915 } 916 case TargetOpcode::G_ZEXTLOAD: 917 case TargetOpcode::G_SEXTLOAD: { 918 bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD; 919 Register DstReg = MI.getOperand(0).getReg(); 920 Register PtrReg = MI.getOperand(1).getReg(); 921 922 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy); 923 auto &MMO = **MI.memoperands_begin(); 924 unsigned MemSize = MMO.getSizeInBits(); 925 926 if (MemSize == NarrowSize) { 927 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); 928 } else if (MemSize < NarrowSize) { 929 MIRBuilder.buildLoadInstr(MI.getOpcode(), TmpReg, PtrReg, MMO); 930 } else if (MemSize > NarrowSize) { 931 // FIXME: Need to split the load. 932 return UnableToLegalize; 933 } 934 935 if (ZExt) 936 MIRBuilder.buildZExt(DstReg, TmpReg); 937 else 938 MIRBuilder.buildSExt(DstReg, TmpReg); 939 940 MI.eraseFromParent(); 941 return Legalized; 942 } 943 case TargetOpcode::G_STORE: { 944 const auto &MMO = **MI.memoperands_begin(); 945 946 Register SrcReg = MI.getOperand(0).getReg(); 947 LLT SrcTy = MRI.getType(SrcReg); 948 if (SrcTy.isVector()) 949 return UnableToLegalize; 950 951 int NumParts = SizeOp0 / NarrowSize; 952 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits(); 953 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize; 954 if (SrcTy.isVector() && LeftoverBits != 0) 955 return UnableToLegalize; 956 957 if (8 * MMO.getSize() != SrcTy.getSizeInBits()) { 958 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy); 959 auto &MMO = **MI.memoperands_begin(); 960 MIRBuilder.buildTrunc(TmpReg, SrcReg); 961 MIRBuilder.buildStore(TmpReg, MI.getOperand(1), MMO); 962 MI.eraseFromParent(); 963 return Legalized; 964 } 965 966 return reduceLoadStoreWidth(MI, 0, NarrowTy); 967 } 968 case TargetOpcode::G_SELECT: 969 return narrowScalarSelect(MI, TypeIdx, NarrowTy); 970 case TargetOpcode::G_AND: 971 case TargetOpcode::G_OR: 972 case TargetOpcode::G_XOR: { 973 // Legalize bitwise operation: 974 // A = BinOp<Ty> B, C 975 // into: 976 // B1, ..., BN = G_UNMERGE_VALUES B 977 // C1, ..., CN = G_UNMERGE_VALUES C 978 // A1 = BinOp<Ty/N> B1, C2 979 // ... 980 // AN = BinOp<Ty/N> BN, CN 981 // A = G_MERGE_VALUES A1, ..., AN 982 return narrowScalarBasic(MI, TypeIdx, NarrowTy); 983 } 984 case TargetOpcode::G_SHL: 985 case TargetOpcode::G_LSHR: 986 case TargetOpcode::G_ASHR: 987 return narrowScalarShift(MI, TypeIdx, NarrowTy); 988 case TargetOpcode::G_CTLZ: 989 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 990 case TargetOpcode::G_CTTZ: 991 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 992 case TargetOpcode::G_CTPOP: 993 if (TypeIdx == 1) 994 switch (MI.getOpcode()) { 995 case TargetOpcode::G_CTLZ: 996 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 997 return narrowScalarCTLZ(MI, TypeIdx, NarrowTy); 998 case TargetOpcode::G_CTTZ: 999 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 1000 return narrowScalarCTTZ(MI, TypeIdx, NarrowTy); 1001 case TargetOpcode::G_CTPOP: 1002 return narrowScalarCTPOP(MI, TypeIdx, NarrowTy); 1003 default: 1004 return UnableToLegalize; 1005 } 1006 1007 Observer.changingInstr(MI); 1008 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT); 1009 Observer.changedInstr(MI); 1010 return Legalized; 1011 case TargetOpcode::G_INTTOPTR: 1012 if (TypeIdx != 1) 1013 return UnableToLegalize; 1014 1015 Observer.changingInstr(MI); 1016 narrowScalarSrc(MI, NarrowTy, 1); 1017 Observer.changedInstr(MI); 1018 return Legalized; 1019 case TargetOpcode::G_PTRTOINT: 1020 if (TypeIdx != 0) 1021 return UnableToLegalize; 1022 1023 Observer.changingInstr(MI); 1024 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT); 1025 Observer.changedInstr(MI); 1026 return Legalized; 1027 case TargetOpcode::G_PHI: { 1028 // FIXME: add support for when SizeOp0 isn't an exact multiple of 1029 // NarrowSize. 1030 if (SizeOp0 % NarrowSize != 0) 1031 return UnableToLegalize; 1032 1033 unsigned NumParts = SizeOp0 / NarrowSize; 1034 SmallVector<Register, 2> DstRegs(NumParts); 1035 SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2); 1036 Observer.changingInstr(MI); 1037 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) { 1038 MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB(); 1039 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); 1040 extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts, 1041 SrcRegs[i / 2]); 1042 } 1043 MachineBasicBlock &MBB = *MI.getParent(); 1044 MIRBuilder.setInsertPt(MBB, MI); 1045 for (unsigned i = 0; i < NumParts; ++i) { 1046 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy); 1047 MachineInstrBuilder MIB = 1048 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]); 1049 for (unsigned j = 1; j < MI.getNumOperands(); j += 2) 1050 MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1)); 1051 } 1052 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI()); 1053 MIRBuilder.buildMerge(MI.getOperand(0), DstRegs); 1054 Observer.changedInstr(MI); 1055 MI.eraseFromParent(); 1056 return Legalized; 1057 } 1058 case TargetOpcode::G_EXTRACT_VECTOR_ELT: 1059 case TargetOpcode::G_INSERT_VECTOR_ELT: { 1060 if (TypeIdx != 2) 1061 return UnableToLegalize; 1062 1063 int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3; 1064 Observer.changingInstr(MI); 1065 narrowScalarSrc(MI, NarrowTy, OpIdx); 1066 Observer.changedInstr(MI); 1067 return Legalized; 1068 } 1069 case TargetOpcode::G_ICMP: { 1070 uint64_t SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); 1071 if (NarrowSize * 2 != SrcSize) 1072 return UnableToLegalize; 1073 1074 Observer.changingInstr(MI); 1075 Register LHSL = MRI.createGenericVirtualRegister(NarrowTy); 1076 Register LHSH = MRI.createGenericVirtualRegister(NarrowTy); 1077 MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2)); 1078 1079 Register RHSL = MRI.createGenericVirtualRegister(NarrowTy); 1080 Register RHSH = MRI.createGenericVirtualRegister(NarrowTy); 1081 MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3)); 1082 1083 CmpInst::Predicate Pred = 1084 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); 1085 LLT ResTy = MRI.getType(MI.getOperand(0).getReg()); 1086 1087 if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) { 1088 MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL); 1089 MachineInstrBuilder XorH = MIRBuilder.buildXor(NarrowTy, LHSH, RHSH); 1090 MachineInstrBuilder Or = MIRBuilder.buildOr(NarrowTy, XorL, XorH); 1091 MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0); 1092 MIRBuilder.buildICmp(Pred, MI.getOperand(0), Or, Zero); 1093 } else { 1094 MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH); 1095 MachineInstrBuilder CmpHEQ = 1096 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH); 1097 MachineInstrBuilder CmpLU = MIRBuilder.buildICmp( 1098 ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL); 1099 MIRBuilder.buildSelect(MI.getOperand(0), CmpHEQ, CmpLU, CmpH); 1100 } 1101 Observer.changedInstr(MI); 1102 MI.eraseFromParent(); 1103 return Legalized; 1104 } 1105 case TargetOpcode::G_SEXT_INREG: { 1106 if (TypeIdx != 0) 1107 return UnableToLegalize; 1108 1109 int64_t SizeInBits = MI.getOperand(2).getImm(); 1110 1111 // So long as the new type has more bits than the bits we're extending we 1112 // don't need to break it apart. 1113 if (NarrowTy.getScalarSizeInBits() >= SizeInBits) { 1114 Observer.changingInstr(MI); 1115 // We don't lose any non-extension bits by truncating the src and 1116 // sign-extending the dst. 1117 MachineOperand &MO1 = MI.getOperand(1); 1118 auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1); 1119 MO1.setReg(TruncMIB.getReg(0)); 1120 1121 MachineOperand &MO2 = MI.getOperand(0); 1122 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy); 1123 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 1124 MIRBuilder.buildSExt(MO2, DstExt); 1125 MO2.setReg(DstExt); 1126 Observer.changedInstr(MI); 1127 return Legalized; 1128 } 1129 1130 // Break it apart. Components below the extension point are unmodified. The 1131 // component containing the extension point becomes a narrower SEXT_INREG. 1132 // Components above it are ashr'd from the component containing the 1133 // extension point. 1134 if (SizeOp0 % NarrowSize != 0) 1135 return UnableToLegalize; 1136 int NumParts = SizeOp0 / NarrowSize; 1137 1138 // List the registers where the destination will be scattered. 1139 SmallVector<Register, 2> DstRegs; 1140 // List the registers where the source will be split. 1141 SmallVector<Register, 2> SrcRegs; 1142 1143 // Create all the temporary registers. 1144 for (int i = 0; i < NumParts; ++i) { 1145 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy); 1146 1147 SrcRegs.push_back(SrcReg); 1148 } 1149 1150 // Explode the big arguments into smaller chunks. 1151 MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1)); 1152 1153 Register AshrCstReg = 1154 MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1) 1155 .getReg(0); 1156 Register FullExtensionReg = 0; 1157 Register PartialExtensionReg = 0; 1158 1159 // Do the operation on each small part. 1160 for (int i = 0; i < NumParts; ++i) { 1161 if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits) 1162 DstRegs.push_back(SrcRegs[i]); 1163 else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) { 1164 assert(PartialExtensionReg && 1165 "Expected to visit partial extension before full"); 1166 if (FullExtensionReg) { 1167 DstRegs.push_back(FullExtensionReg); 1168 continue; 1169 } 1170 DstRegs.push_back( 1171 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg) 1172 .getReg(0)); 1173 FullExtensionReg = DstRegs.back(); 1174 } else { 1175 DstRegs.push_back( 1176 MIRBuilder 1177 .buildInstr( 1178 TargetOpcode::G_SEXT_INREG, {NarrowTy}, 1179 {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()}) 1180 .getReg(0)); 1181 PartialExtensionReg = DstRegs.back(); 1182 } 1183 } 1184 1185 // Gather the destination registers into the final destination. 1186 Register DstReg = MI.getOperand(0).getReg(); 1187 MIRBuilder.buildMerge(DstReg, DstRegs); 1188 MI.eraseFromParent(); 1189 return Legalized; 1190 } 1191 case TargetOpcode::G_BSWAP: 1192 case TargetOpcode::G_BITREVERSE: { 1193 if (SizeOp0 % NarrowSize != 0) 1194 return UnableToLegalize; 1195 1196 Observer.changingInstr(MI); 1197 SmallVector<Register, 2> SrcRegs, DstRegs; 1198 unsigned NumParts = SizeOp0 / NarrowSize; 1199 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); 1200 1201 for (unsigned i = 0; i < NumParts; ++i) { 1202 auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, 1203 {SrcRegs[NumParts - 1 - i]}); 1204 DstRegs.push_back(DstPart.getReg(0)); 1205 } 1206 1207 MIRBuilder.buildMerge(MI.getOperand(0), DstRegs); 1208 1209 Observer.changedInstr(MI); 1210 MI.eraseFromParent(); 1211 return Legalized; 1212 } 1213 case TargetOpcode::G_PTR_ADD: 1214 case TargetOpcode::G_PTRMASK: { 1215 if (TypeIdx != 1) 1216 return UnableToLegalize; 1217 Observer.changingInstr(MI); 1218 narrowScalarSrc(MI, NarrowTy, 2); 1219 Observer.changedInstr(MI); 1220 return Legalized; 1221 } 1222 case TargetOpcode::G_FPTOUI: 1223 case TargetOpcode::G_FPTOSI: 1224 return narrowScalarFPTOI(MI, TypeIdx, NarrowTy); 1225 case TargetOpcode::G_FPEXT: 1226 if (TypeIdx != 0) 1227 return UnableToLegalize; 1228 Observer.changingInstr(MI); 1229 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT); 1230 Observer.changedInstr(MI); 1231 return Legalized; 1232 } 1233 } 1234 1235 Register LegalizerHelper::coerceToScalar(Register Val) { 1236 LLT Ty = MRI.getType(Val); 1237 if (Ty.isScalar()) 1238 return Val; 1239 1240 const DataLayout &DL = MIRBuilder.getDataLayout(); 1241 LLT NewTy = LLT::scalar(Ty.getSizeInBits()); 1242 if (Ty.isPointer()) { 1243 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace())) 1244 return Register(); 1245 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0); 1246 } 1247 1248 Register NewVal = Val; 1249 1250 assert(Ty.isVector()); 1251 LLT EltTy = Ty.getElementType(); 1252 if (EltTy.isPointer()) 1253 NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0); 1254 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0); 1255 } 1256 1257 void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy, 1258 unsigned OpIdx, unsigned ExtOpcode) { 1259 MachineOperand &MO = MI.getOperand(OpIdx); 1260 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO}); 1261 MO.setReg(ExtB.getReg(0)); 1262 } 1263 1264 void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, 1265 unsigned OpIdx) { 1266 MachineOperand &MO = MI.getOperand(OpIdx); 1267 auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO); 1268 MO.setReg(ExtB.getReg(0)); 1269 } 1270 1271 void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy, 1272 unsigned OpIdx, unsigned TruncOpcode) { 1273 MachineOperand &MO = MI.getOperand(OpIdx); 1274 Register DstExt = MRI.createGenericVirtualRegister(WideTy); 1275 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 1276 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt}); 1277 MO.setReg(DstExt); 1278 } 1279 1280 void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy, 1281 unsigned OpIdx, unsigned ExtOpcode) { 1282 MachineOperand &MO = MI.getOperand(OpIdx); 1283 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy); 1284 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 1285 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc}); 1286 MO.setReg(DstTrunc); 1287 } 1288 1289 void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy, 1290 unsigned OpIdx) { 1291 MachineOperand &MO = MI.getOperand(OpIdx); 1292 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 1293 MO.setReg(widenWithUnmerge(WideTy, MO.getReg())); 1294 } 1295 1296 void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, 1297 unsigned OpIdx) { 1298 MachineOperand &MO = MI.getOperand(OpIdx); 1299 1300 LLT OldTy = MRI.getType(MO.getReg()); 1301 unsigned OldElts = OldTy.getNumElements(); 1302 unsigned NewElts = MoreTy.getNumElements(); 1303 1304 unsigned NumParts = NewElts / OldElts; 1305 1306 // Use concat_vectors if the result is a multiple of the number of elements. 1307 if (NumParts * OldElts == NewElts) { 1308 SmallVector<Register, 8> Parts; 1309 Parts.push_back(MO.getReg()); 1310 1311 Register ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0); 1312 for (unsigned I = 1; I != NumParts; ++I) 1313 Parts.push_back(ImpDef); 1314 1315 auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts); 1316 MO.setReg(Concat.getReg(0)); 1317 return; 1318 } 1319 1320 Register MoreReg = MRI.createGenericVirtualRegister(MoreTy); 1321 Register ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0); 1322 MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0); 1323 MO.setReg(MoreReg); 1324 } 1325 1326 void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) { 1327 MachineOperand &Op = MI.getOperand(OpIdx); 1328 Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0)); 1329 } 1330 1331 void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) { 1332 MachineOperand &MO = MI.getOperand(OpIdx); 1333 Register CastDst = MRI.createGenericVirtualRegister(CastTy); 1334 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 1335 MIRBuilder.buildBitcast(MO, CastDst); 1336 MO.setReg(CastDst); 1337 } 1338 1339 LegalizerHelper::LegalizeResult 1340 LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, 1341 LLT WideTy) { 1342 if (TypeIdx != 1) 1343 return UnableToLegalize; 1344 1345 Register DstReg = MI.getOperand(0).getReg(); 1346 LLT DstTy = MRI.getType(DstReg); 1347 if (DstTy.isVector()) 1348 return UnableToLegalize; 1349 1350 Register Src1 = MI.getOperand(1).getReg(); 1351 LLT SrcTy = MRI.getType(Src1); 1352 const int DstSize = DstTy.getSizeInBits(); 1353 const int SrcSize = SrcTy.getSizeInBits(); 1354 const int WideSize = WideTy.getSizeInBits(); 1355 const int NumMerge = (DstSize + WideSize - 1) / WideSize; 1356 1357 unsigned NumOps = MI.getNumOperands(); 1358 unsigned NumSrc = MI.getNumOperands() - 1; 1359 unsigned PartSize = DstTy.getSizeInBits() / NumSrc; 1360 1361 if (WideSize >= DstSize) { 1362 // Directly pack the bits in the target type. 1363 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0); 1364 1365 for (unsigned I = 2; I != NumOps; ++I) { 1366 const unsigned Offset = (I - 1) * PartSize; 1367 1368 Register SrcReg = MI.getOperand(I).getReg(); 1369 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize)); 1370 1371 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg); 1372 1373 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg : 1374 MRI.createGenericVirtualRegister(WideTy); 1375 1376 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset); 1377 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt); 1378 MIRBuilder.buildOr(NextResult, ResultReg, Shl); 1379 ResultReg = NextResult; 1380 } 1381 1382 if (WideSize > DstSize) 1383 MIRBuilder.buildTrunc(DstReg, ResultReg); 1384 else if (DstTy.isPointer()) 1385 MIRBuilder.buildIntToPtr(DstReg, ResultReg); 1386 1387 MI.eraseFromParent(); 1388 return Legalized; 1389 } 1390 1391 // Unmerge the original values to the GCD type, and recombine to the next 1392 // multiple greater than the original type. 1393 // 1394 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6 1395 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0 1396 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1 1397 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2 1398 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6 1399 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9 1400 // %12:_(s12) = G_MERGE_VALUES %10, %11 1401 // 1402 // Padding with undef if necessary: 1403 // 1404 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6 1405 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0 1406 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1 1407 // %7:_(s2) = G_IMPLICIT_DEF 1408 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5 1409 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7 1410 // %10:_(s12) = G_MERGE_VALUES %8, %9 1411 1412 const int GCD = greatestCommonDivisor(SrcSize, WideSize); 1413 LLT GCDTy = LLT::scalar(GCD); 1414 1415 SmallVector<Register, 8> Parts; 1416 SmallVector<Register, 8> NewMergeRegs; 1417 SmallVector<Register, 8> Unmerges; 1418 LLT WideDstTy = LLT::scalar(NumMerge * WideSize); 1419 1420 // Decompose the original operands if they don't evenly divide. 1421 for (int I = 1, E = MI.getNumOperands(); I != E; ++I) { 1422 Register SrcReg = MI.getOperand(I).getReg(); 1423 if (GCD == SrcSize) { 1424 Unmerges.push_back(SrcReg); 1425 } else { 1426 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg); 1427 for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J) 1428 Unmerges.push_back(Unmerge.getReg(J)); 1429 } 1430 } 1431 1432 // Pad with undef to the next size that is a multiple of the requested size. 1433 if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) { 1434 Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0); 1435 for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I) 1436 Unmerges.push_back(UndefReg); 1437 } 1438 1439 const int PartsPerGCD = WideSize / GCD; 1440 1441 // Build merges of each piece. 1442 ArrayRef<Register> Slicer(Unmerges); 1443 for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) { 1444 auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD)); 1445 NewMergeRegs.push_back(Merge.getReg(0)); 1446 } 1447 1448 // A truncate may be necessary if the requested type doesn't evenly divide the 1449 // original result type. 1450 if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) { 1451 MIRBuilder.buildMerge(DstReg, NewMergeRegs); 1452 } else { 1453 auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs); 1454 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0)); 1455 } 1456 1457 MI.eraseFromParent(); 1458 return Legalized; 1459 } 1460 1461 Register LegalizerHelper::widenWithUnmerge(LLT WideTy, Register OrigReg) { 1462 Register WideReg = MRI.createGenericVirtualRegister(WideTy); 1463 LLT OrigTy = MRI.getType(OrigReg); 1464 LLT LCMTy = getLCMType(WideTy, OrigTy); 1465 1466 const int NumMergeParts = LCMTy.getSizeInBits() / WideTy.getSizeInBits(); 1467 const int NumUnmergeParts = LCMTy.getSizeInBits() / OrigTy.getSizeInBits(); 1468 1469 Register UnmergeSrc = WideReg; 1470 1471 // Create a merge to the LCM type, padding with undef 1472 // %0:_(<3 x s32>) = G_FOO => <4 x s32> 1473 // => 1474 // %1:_(<4 x s32>) = G_FOO 1475 // %2:_(<4 x s32>) = G_IMPLICIT_DEF 1476 // %3:_(<12 x s32>) = G_CONCAT_VECTORS %1, %2, %2 1477 // %0:_(<3 x s32>), %4:_, %5:_, %6:_ = G_UNMERGE_VALUES %3 1478 if (NumMergeParts > 1) { 1479 Register Undef = MIRBuilder.buildUndef(WideTy).getReg(0); 1480 SmallVector<Register, 8> MergeParts(NumMergeParts, Undef); 1481 MergeParts[0] = WideReg; 1482 UnmergeSrc = MIRBuilder.buildMerge(LCMTy, MergeParts).getReg(0); 1483 } 1484 1485 // Unmerge to the original register and pad with dead defs. 1486 SmallVector<Register, 8> UnmergeResults(NumUnmergeParts); 1487 UnmergeResults[0] = OrigReg; 1488 for (int I = 1; I != NumUnmergeParts; ++I) 1489 UnmergeResults[I] = MRI.createGenericVirtualRegister(OrigTy); 1490 1491 MIRBuilder.buildUnmerge(UnmergeResults, UnmergeSrc); 1492 return WideReg; 1493 } 1494 1495 LegalizerHelper::LegalizeResult 1496 LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx, 1497 LLT WideTy) { 1498 if (TypeIdx != 0) 1499 return UnableToLegalize; 1500 1501 int NumDst = MI.getNumOperands() - 1; 1502 Register SrcReg = MI.getOperand(NumDst).getReg(); 1503 LLT SrcTy = MRI.getType(SrcReg); 1504 if (SrcTy.isVector()) 1505 return UnableToLegalize; 1506 1507 Register Dst0Reg = MI.getOperand(0).getReg(); 1508 LLT DstTy = MRI.getType(Dst0Reg); 1509 if (!DstTy.isScalar()) 1510 return UnableToLegalize; 1511 1512 if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) { 1513 if (SrcTy.isPointer()) { 1514 const DataLayout &DL = MIRBuilder.getDataLayout(); 1515 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) { 1516 LLVM_DEBUG( 1517 dbgs() << "Not casting non-integral address space integer\n"); 1518 return UnableToLegalize; 1519 } 1520 1521 SrcTy = LLT::scalar(SrcTy.getSizeInBits()); 1522 SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0); 1523 } 1524 1525 // Widen SrcTy to WideTy. This does not affect the result, but since the 1526 // user requested this size, it is probably better handled than SrcTy and 1527 // should reduce the total number of legalization artifacts 1528 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) { 1529 SrcTy = WideTy; 1530 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0); 1531 } 1532 1533 // Theres no unmerge type to target. Directly extract the bits from the 1534 // source type 1535 unsigned DstSize = DstTy.getSizeInBits(); 1536 1537 MIRBuilder.buildTrunc(Dst0Reg, SrcReg); 1538 for (int I = 1; I != NumDst; ++I) { 1539 auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I); 1540 auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt); 1541 MIRBuilder.buildTrunc(MI.getOperand(I), Shr); 1542 } 1543 1544 MI.eraseFromParent(); 1545 return Legalized; 1546 } 1547 1548 // Extend the source to a wider type. 1549 LLT LCMTy = getLCMType(SrcTy, WideTy); 1550 1551 Register WideSrc = SrcReg; 1552 if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) { 1553 // TODO: If this is an integral address space, cast to integer and anyext. 1554 if (SrcTy.isPointer()) { 1555 LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n"); 1556 return UnableToLegalize; 1557 } 1558 1559 WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0); 1560 } 1561 1562 auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc); 1563 1564 // Create a sequence of unmerges and merges to the original results. Since we 1565 // may have widened the source, we will need to pad the results with dead defs 1566 // to cover the source register. 1567 // e.g. widen s48 to s64: 1568 // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96) 1569 // 1570 // => 1571 // %4:_(s192) = G_ANYEXT %0:_(s96) 1572 // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge 1573 // ; unpack to GCD type, with extra dead defs 1574 // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64) 1575 // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64) 1576 // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64) 1577 // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination 1578 // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination 1579 const LLT GCDTy = getGCDType(WideTy, DstTy); 1580 const int NumUnmerge = Unmerge->getNumOperands() - 1; 1581 const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits(); 1582 1583 // Directly unmerge to the destination without going through a GCD type 1584 // if possible 1585 if (PartsPerRemerge == 1) { 1586 const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits(); 1587 1588 for (int I = 0; I != NumUnmerge; ++I) { 1589 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES); 1590 1591 for (int J = 0; J != PartsPerUnmerge; ++J) { 1592 int Idx = I * PartsPerUnmerge + J; 1593 if (Idx < NumDst) 1594 MIB.addDef(MI.getOperand(Idx).getReg()); 1595 else { 1596 // Create dead def for excess components. 1597 MIB.addDef(MRI.createGenericVirtualRegister(DstTy)); 1598 } 1599 } 1600 1601 MIB.addUse(Unmerge.getReg(I)); 1602 } 1603 } else { 1604 SmallVector<Register, 16> Parts; 1605 for (int J = 0; J != NumUnmerge; ++J) 1606 extractGCDType(Parts, GCDTy, Unmerge.getReg(J)); 1607 1608 SmallVector<Register, 8> RemergeParts; 1609 for (int I = 0; I != NumDst; ++I) { 1610 for (int J = 0; J < PartsPerRemerge; ++J) { 1611 const int Idx = I * PartsPerRemerge + J; 1612 RemergeParts.emplace_back(Parts[Idx]); 1613 } 1614 1615 MIRBuilder.buildMerge(MI.getOperand(I).getReg(), RemergeParts); 1616 RemergeParts.clear(); 1617 } 1618 } 1619 1620 MI.eraseFromParent(); 1621 return Legalized; 1622 } 1623 1624 LegalizerHelper::LegalizeResult 1625 LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx, 1626 LLT WideTy) { 1627 Register DstReg = MI.getOperand(0).getReg(); 1628 Register SrcReg = MI.getOperand(1).getReg(); 1629 LLT SrcTy = MRI.getType(SrcReg); 1630 1631 LLT DstTy = MRI.getType(DstReg); 1632 unsigned Offset = MI.getOperand(2).getImm(); 1633 1634 if (TypeIdx == 0) { 1635 if (SrcTy.isVector() || DstTy.isVector()) 1636 return UnableToLegalize; 1637 1638 SrcOp Src(SrcReg); 1639 if (SrcTy.isPointer()) { 1640 // Extracts from pointers can be handled only if they are really just 1641 // simple integers. 1642 const DataLayout &DL = MIRBuilder.getDataLayout(); 1643 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) 1644 return UnableToLegalize; 1645 1646 LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits()); 1647 Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src); 1648 SrcTy = SrcAsIntTy; 1649 } 1650 1651 if (DstTy.isPointer()) 1652 return UnableToLegalize; 1653 1654 if (Offset == 0) { 1655 // Avoid a shift in the degenerate case. 1656 MIRBuilder.buildTrunc(DstReg, 1657 MIRBuilder.buildAnyExtOrTrunc(WideTy, Src)); 1658 MI.eraseFromParent(); 1659 return Legalized; 1660 } 1661 1662 // Do a shift in the source type. 1663 LLT ShiftTy = SrcTy; 1664 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) { 1665 Src = MIRBuilder.buildAnyExt(WideTy, Src); 1666 ShiftTy = WideTy; 1667 } 1668 1669 auto LShr = MIRBuilder.buildLShr( 1670 ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset)); 1671 MIRBuilder.buildTrunc(DstReg, LShr); 1672 MI.eraseFromParent(); 1673 return Legalized; 1674 } 1675 1676 if (SrcTy.isScalar()) { 1677 Observer.changingInstr(MI); 1678 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1679 Observer.changedInstr(MI); 1680 return Legalized; 1681 } 1682 1683 if (!SrcTy.isVector()) 1684 return UnableToLegalize; 1685 1686 if (DstTy != SrcTy.getElementType()) 1687 return UnableToLegalize; 1688 1689 if (Offset % SrcTy.getScalarSizeInBits() != 0) 1690 return UnableToLegalize; 1691 1692 Observer.changingInstr(MI); 1693 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1694 1695 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) * 1696 Offset); 1697 widenScalarDst(MI, WideTy.getScalarType(), 0); 1698 Observer.changedInstr(MI); 1699 return Legalized; 1700 } 1701 1702 LegalizerHelper::LegalizeResult 1703 LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx, 1704 LLT WideTy) { 1705 if (TypeIdx != 0 || WideTy.isVector()) 1706 return UnableToLegalize; 1707 Observer.changingInstr(MI); 1708 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1709 widenScalarDst(MI, WideTy); 1710 Observer.changedInstr(MI); 1711 return Legalized; 1712 } 1713 1714 LegalizerHelper::LegalizeResult 1715 LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx, 1716 LLT WideTy) { 1717 if (TypeIdx == 1) 1718 return UnableToLegalize; // TODO 1719 1720 unsigned Opcode; 1721 unsigned ExtOpcode; 1722 Optional<Register> CarryIn = None; 1723 switch (MI.getOpcode()) { 1724 default: 1725 llvm_unreachable("Unexpected opcode!"); 1726 case TargetOpcode::G_SADDO: 1727 Opcode = TargetOpcode::G_ADD; 1728 ExtOpcode = TargetOpcode::G_SEXT; 1729 break; 1730 case TargetOpcode::G_SSUBO: 1731 Opcode = TargetOpcode::G_SUB; 1732 ExtOpcode = TargetOpcode::G_SEXT; 1733 break; 1734 case TargetOpcode::G_UADDO: 1735 Opcode = TargetOpcode::G_ADD; 1736 ExtOpcode = TargetOpcode::G_ZEXT; 1737 break; 1738 case TargetOpcode::G_USUBO: 1739 Opcode = TargetOpcode::G_SUB; 1740 ExtOpcode = TargetOpcode::G_ZEXT; 1741 break; 1742 case TargetOpcode::G_SADDE: 1743 Opcode = TargetOpcode::G_UADDE; 1744 ExtOpcode = TargetOpcode::G_SEXT; 1745 CarryIn = MI.getOperand(4).getReg(); 1746 break; 1747 case TargetOpcode::G_SSUBE: 1748 Opcode = TargetOpcode::G_USUBE; 1749 ExtOpcode = TargetOpcode::G_SEXT; 1750 CarryIn = MI.getOperand(4).getReg(); 1751 break; 1752 case TargetOpcode::G_UADDE: 1753 Opcode = TargetOpcode::G_UADDE; 1754 ExtOpcode = TargetOpcode::G_ZEXT; 1755 CarryIn = MI.getOperand(4).getReg(); 1756 break; 1757 case TargetOpcode::G_USUBE: 1758 Opcode = TargetOpcode::G_USUBE; 1759 ExtOpcode = TargetOpcode::G_ZEXT; 1760 CarryIn = MI.getOperand(4).getReg(); 1761 break; 1762 } 1763 1764 auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)}); 1765 auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)}); 1766 // Do the arithmetic in the larger type. 1767 Register NewOp; 1768 if (CarryIn) { 1769 LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg()); 1770 NewOp = MIRBuilder 1771 .buildInstr(Opcode, {WideTy, CarryOutTy}, 1772 {LHSExt, RHSExt, *CarryIn}) 1773 .getReg(0); 1774 } else { 1775 NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0); 1776 } 1777 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg()); 1778 auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp); 1779 auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp}); 1780 // There is no overflow if the ExtOp is the same as NewOp. 1781 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp); 1782 // Now trunc the NewOp to the original result. 1783 MIRBuilder.buildTrunc(MI.getOperand(0), NewOp); 1784 MI.eraseFromParent(); 1785 return Legalized; 1786 } 1787 1788 LegalizerHelper::LegalizeResult 1789 LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx, 1790 LLT WideTy) { 1791 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT || 1792 MI.getOpcode() == TargetOpcode::G_SSUBSAT || 1793 MI.getOpcode() == TargetOpcode::G_SSHLSAT; 1794 bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT || 1795 MI.getOpcode() == TargetOpcode::G_USHLSAT; 1796 // We can convert this to: 1797 // 1. Any extend iN to iM 1798 // 2. SHL by M-N 1799 // 3. [US][ADD|SUB|SHL]SAT 1800 // 4. L/ASHR by M-N 1801 // 1802 // It may be more efficient to lower this to a min and a max operation in 1803 // the higher precision arithmetic if the promoted operation isn't legal, 1804 // but this decision is up to the target's lowering request. 1805 Register DstReg = MI.getOperand(0).getReg(); 1806 1807 unsigned NewBits = WideTy.getScalarSizeInBits(); 1808 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits(); 1809 1810 // Shifts must zero-extend the RHS to preserve the unsigned quantity, and 1811 // must not left shift the RHS to preserve the shift amount. 1812 auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1)); 1813 auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2)) 1814 : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2)); 1815 auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount); 1816 auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK); 1817 auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK); 1818 1819 auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, 1820 {ShiftL, ShiftR}, MI.getFlags()); 1821 1822 // Use a shift that will preserve the number of sign bits when the trunc is 1823 // folded away. 1824 auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK) 1825 : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK); 1826 1827 MIRBuilder.buildTrunc(DstReg, Result); 1828 MI.eraseFromParent(); 1829 return Legalized; 1830 } 1831 1832 LegalizerHelper::LegalizeResult 1833 LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx, 1834 LLT WideTy) { 1835 if (TypeIdx == 1) 1836 return UnableToLegalize; 1837 1838 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO; 1839 Register Result = MI.getOperand(0).getReg(); 1840 Register OriginalOverflow = MI.getOperand(1).getReg(); 1841 Register LHS = MI.getOperand(2).getReg(); 1842 Register RHS = MI.getOperand(3).getReg(); 1843 LLT SrcTy = MRI.getType(LHS); 1844 LLT OverflowTy = MRI.getType(OriginalOverflow); 1845 unsigned SrcBitWidth = SrcTy.getScalarSizeInBits(); 1846 1847 // To determine if the result overflowed in the larger type, we extend the 1848 // input to the larger type, do the multiply (checking if it overflows), 1849 // then also check the high bits of the result to see if overflow happened 1850 // there. 1851 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT; 1852 auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS}); 1853 auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS}); 1854 1855 auto Mulo = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy, OverflowTy}, 1856 {LeftOperand, RightOperand}); 1857 auto Mul = Mulo->getOperand(0); 1858 MIRBuilder.buildTrunc(Result, Mul); 1859 1860 MachineInstrBuilder ExtResult; 1861 // Overflow occurred if it occurred in the larger type, or if the high part 1862 // of the result does not zero/sign-extend the low part. Check this second 1863 // possibility first. 1864 if (IsSigned) { 1865 // For signed, overflow occurred when the high part does not sign-extend 1866 // the low part. 1867 ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth); 1868 } else { 1869 // Unsigned overflow occurred when the high part does not zero-extend the 1870 // low part. 1871 ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth); 1872 } 1873 1874 // Multiplication cannot overflow if the WideTy is >= 2 * original width, 1875 // so we don't need to check the overflow result of larger type Mulo. 1876 if (WideTy.getScalarSizeInBits() < 2 * SrcBitWidth) { 1877 auto Overflow = 1878 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult); 1879 // Finally check if the multiplication in the larger type itself overflowed. 1880 MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow); 1881 } else { 1882 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult); 1883 } 1884 MI.eraseFromParent(); 1885 return Legalized; 1886 } 1887 1888 LegalizerHelper::LegalizeResult 1889 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { 1890 switch (MI.getOpcode()) { 1891 default: 1892 return UnableToLegalize; 1893 case TargetOpcode::G_EXTRACT: 1894 return widenScalarExtract(MI, TypeIdx, WideTy); 1895 case TargetOpcode::G_INSERT: 1896 return widenScalarInsert(MI, TypeIdx, WideTy); 1897 case TargetOpcode::G_MERGE_VALUES: 1898 return widenScalarMergeValues(MI, TypeIdx, WideTy); 1899 case TargetOpcode::G_UNMERGE_VALUES: 1900 return widenScalarUnmergeValues(MI, TypeIdx, WideTy); 1901 case TargetOpcode::G_SADDO: 1902 case TargetOpcode::G_SSUBO: 1903 case TargetOpcode::G_UADDO: 1904 case TargetOpcode::G_USUBO: 1905 case TargetOpcode::G_SADDE: 1906 case TargetOpcode::G_SSUBE: 1907 case TargetOpcode::G_UADDE: 1908 case TargetOpcode::G_USUBE: 1909 return widenScalarAddSubOverflow(MI, TypeIdx, WideTy); 1910 case TargetOpcode::G_UMULO: 1911 case TargetOpcode::G_SMULO: 1912 return widenScalarMulo(MI, TypeIdx, WideTy); 1913 case TargetOpcode::G_SADDSAT: 1914 case TargetOpcode::G_SSUBSAT: 1915 case TargetOpcode::G_SSHLSAT: 1916 case TargetOpcode::G_UADDSAT: 1917 case TargetOpcode::G_USUBSAT: 1918 case TargetOpcode::G_USHLSAT: 1919 return widenScalarAddSubShlSat(MI, TypeIdx, WideTy); 1920 case TargetOpcode::G_CTTZ: 1921 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 1922 case TargetOpcode::G_CTLZ: 1923 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 1924 case TargetOpcode::G_CTPOP: { 1925 if (TypeIdx == 0) { 1926 Observer.changingInstr(MI); 1927 widenScalarDst(MI, WideTy, 0); 1928 Observer.changedInstr(MI); 1929 return Legalized; 1930 } 1931 1932 Register SrcReg = MI.getOperand(1).getReg(); 1933 1934 // First ZEXT the input. 1935 auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg); 1936 LLT CurTy = MRI.getType(SrcReg); 1937 if (MI.getOpcode() == TargetOpcode::G_CTTZ) { 1938 // The count is the same in the larger type except if the original 1939 // value was zero. This can be handled by setting the bit just off 1940 // the top of the original type. 1941 auto TopBit = 1942 APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits()); 1943 MIBSrc = MIRBuilder.buildOr( 1944 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit)); 1945 } 1946 1947 // Perform the operation at the larger size. 1948 auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc}); 1949 // This is already the correct result for CTPOP and CTTZs 1950 if (MI.getOpcode() == TargetOpcode::G_CTLZ || 1951 MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) { 1952 // The correct result is NewOp - (Difference in widety and current ty). 1953 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits(); 1954 MIBNewOp = MIRBuilder.buildSub( 1955 WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)); 1956 } 1957 1958 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp); 1959 MI.eraseFromParent(); 1960 return Legalized; 1961 } 1962 case TargetOpcode::G_BSWAP: { 1963 Observer.changingInstr(MI); 1964 Register DstReg = MI.getOperand(0).getReg(); 1965 1966 Register ShrReg = MRI.createGenericVirtualRegister(WideTy); 1967 Register DstExt = MRI.createGenericVirtualRegister(WideTy); 1968 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy); 1969 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1970 1971 MI.getOperand(0).setReg(DstExt); 1972 1973 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 1974 1975 LLT Ty = MRI.getType(DstReg); 1976 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits(); 1977 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits); 1978 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg); 1979 1980 MIRBuilder.buildTrunc(DstReg, ShrReg); 1981 Observer.changedInstr(MI); 1982 return Legalized; 1983 } 1984 case TargetOpcode::G_BITREVERSE: { 1985 Observer.changingInstr(MI); 1986 1987 Register DstReg = MI.getOperand(0).getReg(); 1988 LLT Ty = MRI.getType(DstReg); 1989 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits(); 1990 1991 Register DstExt = MRI.createGenericVirtualRegister(WideTy); 1992 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1993 MI.getOperand(0).setReg(DstExt); 1994 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 1995 1996 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits); 1997 auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt); 1998 MIRBuilder.buildTrunc(DstReg, Shift); 1999 Observer.changedInstr(MI); 2000 return Legalized; 2001 } 2002 case TargetOpcode::G_FREEZE: 2003 Observer.changingInstr(MI); 2004 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 2005 widenScalarDst(MI, WideTy); 2006 Observer.changedInstr(MI); 2007 return Legalized; 2008 2009 case TargetOpcode::G_ABS: 2010 Observer.changingInstr(MI); 2011 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); 2012 widenScalarDst(MI, WideTy); 2013 Observer.changedInstr(MI); 2014 return Legalized; 2015 2016 case TargetOpcode::G_ADD: 2017 case TargetOpcode::G_AND: 2018 case TargetOpcode::G_MUL: 2019 case TargetOpcode::G_OR: 2020 case TargetOpcode::G_XOR: 2021 case TargetOpcode::G_SUB: 2022 // Perform operation at larger width (any extension is fines here, high bits 2023 // don't affect the result) and then truncate the result back to the 2024 // original type. 2025 Observer.changingInstr(MI); 2026 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 2027 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); 2028 widenScalarDst(MI, WideTy); 2029 Observer.changedInstr(MI); 2030 return Legalized; 2031 2032 case TargetOpcode::G_SBFX: 2033 case TargetOpcode::G_UBFX: 2034 Observer.changingInstr(MI); 2035 2036 if (TypeIdx == 0) { 2037 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 2038 widenScalarDst(MI, WideTy); 2039 } else { 2040 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 2041 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT); 2042 } 2043 2044 Observer.changedInstr(MI); 2045 return Legalized; 2046 2047 case TargetOpcode::G_SHL: 2048 Observer.changingInstr(MI); 2049 2050 if (TypeIdx == 0) { 2051 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 2052 widenScalarDst(MI, WideTy); 2053 } else { 2054 assert(TypeIdx == 1); 2055 // The "number of bits to shift" operand must preserve its value as an 2056 // unsigned integer: 2057 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 2058 } 2059 2060 Observer.changedInstr(MI); 2061 return Legalized; 2062 2063 case TargetOpcode::G_SDIV: 2064 case TargetOpcode::G_SREM: 2065 case TargetOpcode::G_SMIN: 2066 case TargetOpcode::G_SMAX: 2067 Observer.changingInstr(MI); 2068 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); 2069 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 2070 widenScalarDst(MI, WideTy); 2071 Observer.changedInstr(MI); 2072 return Legalized; 2073 2074 case TargetOpcode::G_SDIVREM: 2075 Observer.changingInstr(MI); 2076 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 2077 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT); 2078 widenScalarDst(MI, WideTy); 2079 widenScalarDst(MI, WideTy, 1); 2080 Observer.changedInstr(MI); 2081 return Legalized; 2082 2083 case TargetOpcode::G_ASHR: 2084 case TargetOpcode::G_LSHR: 2085 Observer.changingInstr(MI); 2086 2087 if (TypeIdx == 0) { 2088 unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ? 2089 TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT; 2090 2091 widenScalarSrc(MI, WideTy, 1, CvtOp); 2092 widenScalarDst(MI, WideTy); 2093 } else { 2094 assert(TypeIdx == 1); 2095 // The "number of bits to shift" operand must preserve its value as an 2096 // unsigned integer: 2097 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 2098 } 2099 2100 Observer.changedInstr(MI); 2101 return Legalized; 2102 case TargetOpcode::G_UDIV: 2103 case TargetOpcode::G_UREM: 2104 case TargetOpcode::G_UMIN: 2105 case TargetOpcode::G_UMAX: 2106 Observer.changingInstr(MI); 2107 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 2108 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 2109 widenScalarDst(MI, WideTy); 2110 Observer.changedInstr(MI); 2111 return Legalized; 2112 2113 case TargetOpcode::G_UDIVREM: 2114 Observer.changingInstr(MI); 2115 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 2116 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT); 2117 widenScalarDst(MI, WideTy); 2118 widenScalarDst(MI, WideTy, 1); 2119 Observer.changedInstr(MI); 2120 return Legalized; 2121 2122 case TargetOpcode::G_SELECT: 2123 Observer.changingInstr(MI); 2124 if (TypeIdx == 0) { 2125 // Perform operation at larger width (any extension is fine here, high 2126 // bits don't affect the result) and then truncate the result back to the 2127 // original type. 2128 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); 2129 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT); 2130 widenScalarDst(MI, WideTy); 2131 } else { 2132 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector(); 2133 // Explicit extension is required here since high bits affect the result. 2134 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false)); 2135 } 2136 Observer.changedInstr(MI); 2137 return Legalized; 2138 2139 case TargetOpcode::G_FPTOSI: 2140 case TargetOpcode::G_FPTOUI: 2141 Observer.changingInstr(MI); 2142 2143 if (TypeIdx == 0) 2144 widenScalarDst(MI, WideTy); 2145 else 2146 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT); 2147 2148 Observer.changedInstr(MI); 2149 return Legalized; 2150 case TargetOpcode::G_SITOFP: 2151 Observer.changingInstr(MI); 2152 2153 if (TypeIdx == 0) 2154 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); 2155 else 2156 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); 2157 2158 Observer.changedInstr(MI); 2159 return Legalized; 2160 case TargetOpcode::G_UITOFP: 2161 Observer.changingInstr(MI); 2162 2163 if (TypeIdx == 0) 2164 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); 2165 else 2166 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 2167 2168 Observer.changedInstr(MI); 2169 return Legalized; 2170 case TargetOpcode::G_LOAD: 2171 case TargetOpcode::G_SEXTLOAD: 2172 case TargetOpcode::G_ZEXTLOAD: 2173 Observer.changingInstr(MI); 2174 widenScalarDst(MI, WideTy); 2175 Observer.changedInstr(MI); 2176 return Legalized; 2177 2178 case TargetOpcode::G_STORE: { 2179 if (TypeIdx != 0) 2180 return UnableToLegalize; 2181 2182 LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 2183 if (!Ty.isScalar()) 2184 return UnableToLegalize; 2185 2186 Observer.changingInstr(MI); 2187 2188 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ? 2189 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT; 2190 widenScalarSrc(MI, WideTy, 0, ExtType); 2191 2192 Observer.changedInstr(MI); 2193 return Legalized; 2194 } 2195 case TargetOpcode::G_CONSTANT: { 2196 MachineOperand &SrcMO = MI.getOperand(1); 2197 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 2198 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant( 2199 MRI.getType(MI.getOperand(0).getReg())); 2200 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT || 2201 ExtOpc == TargetOpcode::G_ANYEXT) && 2202 "Illegal Extend"); 2203 const APInt &SrcVal = SrcMO.getCImm()->getValue(); 2204 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT) 2205 ? SrcVal.sext(WideTy.getSizeInBits()) 2206 : SrcVal.zext(WideTy.getSizeInBits()); 2207 Observer.changingInstr(MI); 2208 SrcMO.setCImm(ConstantInt::get(Ctx, Val)); 2209 2210 widenScalarDst(MI, WideTy); 2211 Observer.changedInstr(MI); 2212 return Legalized; 2213 } 2214 case TargetOpcode::G_FCONSTANT: { 2215 MachineOperand &SrcMO = MI.getOperand(1); 2216 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 2217 APFloat Val = SrcMO.getFPImm()->getValueAPF(); 2218 bool LosesInfo; 2219 switch (WideTy.getSizeInBits()) { 2220 case 32: 2221 Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, 2222 &LosesInfo); 2223 break; 2224 case 64: 2225 Val.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, 2226 &LosesInfo); 2227 break; 2228 default: 2229 return UnableToLegalize; 2230 } 2231 2232 assert(!LosesInfo && "extend should always be lossless"); 2233 2234 Observer.changingInstr(MI); 2235 SrcMO.setFPImm(ConstantFP::get(Ctx, Val)); 2236 2237 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); 2238 Observer.changedInstr(MI); 2239 return Legalized; 2240 } 2241 case TargetOpcode::G_IMPLICIT_DEF: { 2242 Observer.changingInstr(MI); 2243 widenScalarDst(MI, WideTy); 2244 Observer.changedInstr(MI); 2245 return Legalized; 2246 } 2247 case TargetOpcode::G_BRCOND: 2248 Observer.changingInstr(MI); 2249 widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false)); 2250 Observer.changedInstr(MI); 2251 return Legalized; 2252 2253 case TargetOpcode::G_FCMP: 2254 Observer.changingInstr(MI); 2255 if (TypeIdx == 0) 2256 widenScalarDst(MI, WideTy); 2257 else { 2258 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT); 2259 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT); 2260 } 2261 Observer.changedInstr(MI); 2262 return Legalized; 2263 2264 case TargetOpcode::G_ICMP: 2265 Observer.changingInstr(MI); 2266 if (TypeIdx == 0) 2267 widenScalarDst(MI, WideTy); 2268 else { 2269 unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>( 2270 MI.getOperand(1).getPredicate())) 2271 ? TargetOpcode::G_SEXT 2272 : TargetOpcode::G_ZEXT; 2273 widenScalarSrc(MI, WideTy, 2, ExtOpcode); 2274 widenScalarSrc(MI, WideTy, 3, ExtOpcode); 2275 } 2276 Observer.changedInstr(MI); 2277 return Legalized; 2278 2279 case TargetOpcode::G_PTR_ADD: 2280 assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD"); 2281 Observer.changingInstr(MI); 2282 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 2283 Observer.changedInstr(MI); 2284 return Legalized; 2285 2286 case TargetOpcode::G_PHI: { 2287 assert(TypeIdx == 0 && "Expecting only Idx 0"); 2288 2289 Observer.changingInstr(MI); 2290 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) { 2291 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB(); 2292 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); 2293 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT); 2294 } 2295 2296 MachineBasicBlock &MBB = *MI.getParent(); 2297 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI()); 2298 widenScalarDst(MI, WideTy); 2299 Observer.changedInstr(MI); 2300 return Legalized; 2301 } 2302 case TargetOpcode::G_EXTRACT_VECTOR_ELT: { 2303 if (TypeIdx == 0) { 2304 Register VecReg = MI.getOperand(1).getReg(); 2305 LLT VecTy = MRI.getType(VecReg); 2306 Observer.changingInstr(MI); 2307 2308 widenScalarSrc( 2309 MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1, 2310 TargetOpcode::G_SEXT); 2311 2312 widenScalarDst(MI, WideTy, 0); 2313 Observer.changedInstr(MI); 2314 return Legalized; 2315 } 2316 2317 if (TypeIdx != 2) 2318 return UnableToLegalize; 2319 Observer.changingInstr(MI); 2320 // TODO: Probably should be zext 2321 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 2322 Observer.changedInstr(MI); 2323 return Legalized; 2324 } 2325 case TargetOpcode::G_INSERT_VECTOR_ELT: { 2326 if (TypeIdx == 1) { 2327 Observer.changingInstr(MI); 2328 2329 Register VecReg = MI.getOperand(1).getReg(); 2330 LLT VecTy = MRI.getType(VecReg); 2331 LLT WideVecTy = LLT::vector(VecTy.getElementCount(), WideTy); 2332 2333 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT); 2334 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); 2335 widenScalarDst(MI, WideVecTy, 0); 2336 Observer.changedInstr(MI); 2337 return Legalized; 2338 } 2339 2340 if (TypeIdx == 2) { 2341 Observer.changingInstr(MI); 2342 // TODO: Probably should be zext 2343 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT); 2344 Observer.changedInstr(MI); 2345 return Legalized; 2346 } 2347 2348 return UnableToLegalize; 2349 } 2350 case TargetOpcode::G_FADD: 2351 case TargetOpcode::G_FMUL: 2352 case TargetOpcode::G_FSUB: 2353 case TargetOpcode::G_FMA: 2354 case TargetOpcode::G_FMAD: 2355 case TargetOpcode::G_FNEG: 2356 case TargetOpcode::G_FABS: 2357 case TargetOpcode::G_FCANONICALIZE: 2358 case TargetOpcode::G_FMINNUM: 2359 case TargetOpcode::G_FMAXNUM: 2360 case TargetOpcode::G_FMINNUM_IEEE: 2361 case TargetOpcode::G_FMAXNUM_IEEE: 2362 case TargetOpcode::G_FMINIMUM: 2363 case TargetOpcode::G_FMAXIMUM: 2364 case TargetOpcode::G_FDIV: 2365 case TargetOpcode::G_FREM: 2366 case TargetOpcode::G_FCEIL: 2367 case TargetOpcode::G_FFLOOR: 2368 case TargetOpcode::G_FCOS: 2369 case TargetOpcode::G_FSIN: 2370 case TargetOpcode::G_FLOG10: 2371 case TargetOpcode::G_FLOG: 2372 case TargetOpcode::G_FLOG2: 2373 case TargetOpcode::G_FRINT: 2374 case TargetOpcode::G_FNEARBYINT: 2375 case TargetOpcode::G_FSQRT: 2376 case TargetOpcode::G_FEXP: 2377 case TargetOpcode::G_FEXP2: 2378 case TargetOpcode::G_FPOW: 2379 case TargetOpcode::G_INTRINSIC_TRUNC: 2380 case TargetOpcode::G_INTRINSIC_ROUND: 2381 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: 2382 assert(TypeIdx == 0); 2383 Observer.changingInstr(MI); 2384 2385 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) 2386 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT); 2387 2388 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); 2389 Observer.changedInstr(MI); 2390 return Legalized; 2391 case TargetOpcode::G_FPOWI: { 2392 if (TypeIdx != 0) 2393 return UnableToLegalize; 2394 Observer.changingInstr(MI); 2395 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT); 2396 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); 2397 Observer.changedInstr(MI); 2398 return Legalized; 2399 } 2400 case TargetOpcode::G_INTTOPTR: 2401 if (TypeIdx != 1) 2402 return UnableToLegalize; 2403 2404 Observer.changingInstr(MI); 2405 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 2406 Observer.changedInstr(MI); 2407 return Legalized; 2408 case TargetOpcode::G_PTRTOINT: 2409 if (TypeIdx != 0) 2410 return UnableToLegalize; 2411 2412 Observer.changingInstr(MI); 2413 widenScalarDst(MI, WideTy, 0); 2414 Observer.changedInstr(MI); 2415 return Legalized; 2416 case TargetOpcode::G_BUILD_VECTOR: { 2417 Observer.changingInstr(MI); 2418 2419 const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType(); 2420 for (int I = 1, E = MI.getNumOperands(); I != E; ++I) 2421 widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT); 2422 2423 // Avoid changing the result vector type if the source element type was 2424 // requested. 2425 if (TypeIdx == 1) { 2426 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC)); 2427 } else { 2428 widenScalarDst(MI, WideTy, 0); 2429 } 2430 2431 Observer.changedInstr(MI); 2432 return Legalized; 2433 } 2434 case TargetOpcode::G_SEXT_INREG: 2435 if (TypeIdx != 0) 2436 return UnableToLegalize; 2437 2438 Observer.changingInstr(MI); 2439 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 2440 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC); 2441 Observer.changedInstr(MI); 2442 return Legalized; 2443 case TargetOpcode::G_PTRMASK: { 2444 if (TypeIdx != 1) 2445 return UnableToLegalize; 2446 Observer.changingInstr(MI); 2447 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 2448 Observer.changedInstr(MI); 2449 return Legalized; 2450 } 2451 } 2452 } 2453 2454 static void getUnmergePieces(SmallVectorImpl<Register> &Pieces, 2455 MachineIRBuilder &B, Register Src, LLT Ty) { 2456 auto Unmerge = B.buildUnmerge(Ty, Src); 2457 for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I) 2458 Pieces.push_back(Unmerge.getReg(I)); 2459 } 2460 2461 LegalizerHelper::LegalizeResult 2462 LegalizerHelper::lowerBitcast(MachineInstr &MI) { 2463 Register Dst = MI.getOperand(0).getReg(); 2464 Register Src = MI.getOperand(1).getReg(); 2465 LLT DstTy = MRI.getType(Dst); 2466 LLT SrcTy = MRI.getType(Src); 2467 2468 if (SrcTy.isVector()) { 2469 LLT SrcEltTy = SrcTy.getElementType(); 2470 SmallVector<Register, 8> SrcRegs; 2471 2472 if (DstTy.isVector()) { 2473 int NumDstElt = DstTy.getNumElements(); 2474 int NumSrcElt = SrcTy.getNumElements(); 2475 2476 LLT DstEltTy = DstTy.getElementType(); 2477 LLT DstCastTy = DstEltTy; // Intermediate bitcast result type 2478 LLT SrcPartTy = SrcEltTy; // Original unmerge result type. 2479 2480 // If there's an element size mismatch, insert intermediate casts to match 2481 // the result element type. 2482 if (NumSrcElt < NumDstElt) { // Source element type is larger. 2483 // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>) 2484 // 2485 // => 2486 // 2487 // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0 2488 // %3:_(<2 x s8>) = G_BITCAST %2 2489 // %4:_(<2 x s8>) = G_BITCAST %3 2490 // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4 2491 DstCastTy = LLT::fixed_vector(NumDstElt / NumSrcElt, DstEltTy); 2492 SrcPartTy = SrcEltTy; 2493 } else if (NumSrcElt > NumDstElt) { // Source element type is smaller. 2494 // 2495 // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>) 2496 // 2497 // => 2498 // 2499 // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0 2500 // %3:_(s16) = G_BITCAST %2 2501 // %4:_(s16) = G_BITCAST %3 2502 // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4 2503 SrcPartTy = LLT::fixed_vector(NumSrcElt / NumDstElt, SrcEltTy); 2504 DstCastTy = DstEltTy; 2505 } 2506 2507 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy); 2508 for (Register &SrcReg : SrcRegs) 2509 SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0); 2510 } else 2511 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy); 2512 2513 MIRBuilder.buildMerge(Dst, SrcRegs); 2514 MI.eraseFromParent(); 2515 return Legalized; 2516 } 2517 2518 if (DstTy.isVector()) { 2519 SmallVector<Register, 8> SrcRegs; 2520 getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType()); 2521 MIRBuilder.buildMerge(Dst, SrcRegs); 2522 MI.eraseFromParent(); 2523 return Legalized; 2524 } 2525 2526 return UnableToLegalize; 2527 } 2528 2529 /// Figure out the bit offset into a register when coercing a vector index for 2530 /// the wide element type. This is only for the case when promoting vector to 2531 /// one with larger elements. 2532 // 2533 /// 2534 /// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize)) 2535 /// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize) 2536 static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, 2537 Register Idx, 2538 unsigned NewEltSize, 2539 unsigned OldEltSize) { 2540 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize); 2541 LLT IdxTy = B.getMRI()->getType(Idx); 2542 2543 // Now figure out the amount we need to shift to get the target bits. 2544 auto OffsetMask = B.buildConstant( 2545 IdxTy, ~(APInt::getAllOnesValue(IdxTy.getSizeInBits()) << Log2EltRatio)); 2546 auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask); 2547 return B.buildShl(IdxTy, OffsetIdx, 2548 B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0); 2549 } 2550 2551 /// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this 2552 /// is casting to a vector with a smaller element size, perform multiple element 2553 /// extracts and merge the results. If this is coercing to a vector with larger 2554 /// elements, index the bitcasted vector and extract the target element with bit 2555 /// operations. This is intended to force the indexing in the native register 2556 /// size for architectures that can dynamically index the register file. 2557 LegalizerHelper::LegalizeResult 2558 LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, 2559 LLT CastTy) { 2560 if (TypeIdx != 1) 2561 return UnableToLegalize; 2562 2563 Register Dst = MI.getOperand(0).getReg(); 2564 Register SrcVec = MI.getOperand(1).getReg(); 2565 Register Idx = MI.getOperand(2).getReg(); 2566 LLT SrcVecTy = MRI.getType(SrcVec); 2567 LLT IdxTy = MRI.getType(Idx); 2568 2569 LLT SrcEltTy = SrcVecTy.getElementType(); 2570 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1; 2571 unsigned OldNumElts = SrcVecTy.getNumElements(); 2572 2573 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy; 2574 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0); 2575 2576 const unsigned NewEltSize = NewEltTy.getSizeInBits(); 2577 const unsigned OldEltSize = SrcEltTy.getSizeInBits(); 2578 if (NewNumElts > OldNumElts) { 2579 // Decreasing the vector element size 2580 // 2581 // e.g. i64 = extract_vector_elt x:v2i64, y:i32 2582 // => 2583 // v4i32:castx = bitcast x:v2i64 2584 // 2585 // i64 = bitcast 2586 // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))), 2587 // (i32 (extract_vector_elt castx, (2 * y + 1))) 2588 // 2589 if (NewNumElts % OldNumElts != 0) 2590 return UnableToLegalize; 2591 2592 // Type of the intermediate result vector. 2593 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts; 2594 LLT MidTy = 2595 LLT::scalarOrVector(ElementCount::getFixed(NewEltsPerOldElt), NewEltTy); 2596 2597 auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt); 2598 2599 SmallVector<Register, 8> NewOps(NewEltsPerOldElt); 2600 auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK); 2601 2602 for (unsigned I = 0; I < NewEltsPerOldElt; ++I) { 2603 auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I); 2604 auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset); 2605 auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx); 2606 NewOps[I] = Elt.getReg(0); 2607 } 2608 2609 auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps); 2610 MIRBuilder.buildBitcast(Dst, NewVec); 2611 MI.eraseFromParent(); 2612 return Legalized; 2613 } 2614 2615 if (NewNumElts < OldNumElts) { 2616 if (NewEltSize % OldEltSize != 0) 2617 return UnableToLegalize; 2618 2619 // This only depends on powers of 2 because we use bit tricks to figure out 2620 // the bit offset we need to shift to get the target element. A general 2621 // expansion could emit division/multiply. 2622 if (!isPowerOf2_32(NewEltSize / OldEltSize)) 2623 return UnableToLegalize; 2624 2625 // Increasing the vector element size. 2626 // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx 2627 // 2628 // => 2629 // 2630 // %cast = G_BITCAST %vec 2631 // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize) 2632 // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx 2633 // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize)) 2634 // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize) 2635 // %elt_bits = G_LSHR %wide_elt, %offset_bits 2636 // %elt = G_TRUNC %elt_bits 2637 2638 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize); 2639 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio); 2640 2641 // Divide to get the index in the wider element type. 2642 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio); 2643 2644 Register WideElt = CastVec; 2645 if (CastTy.isVector()) { 2646 WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, 2647 ScaledIdx).getReg(0); 2648 } 2649 2650 // Compute the bit offset into the register of the target element. 2651 Register OffsetBits = getBitcastWiderVectorElementOffset( 2652 MIRBuilder, Idx, NewEltSize, OldEltSize); 2653 2654 // Shift the wide element to get the target element. 2655 auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits); 2656 MIRBuilder.buildTrunc(Dst, ExtractedBits); 2657 MI.eraseFromParent(); 2658 return Legalized; 2659 } 2660 2661 return UnableToLegalize; 2662 } 2663 2664 /// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p 2665 /// TargetReg, while preserving other bits in \p TargetReg. 2666 /// 2667 /// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset) 2668 static Register buildBitFieldInsert(MachineIRBuilder &B, 2669 Register TargetReg, Register InsertReg, 2670 Register OffsetBits) { 2671 LLT TargetTy = B.getMRI()->getType(TargetReg); 2672 LLT InsertTy = B.getMRI()->getType(InsertReg); 2673 auto ZextVal = B.buildZExt(TargetTy, InsertReg); 2674 auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits); 2675 2676 // Produce a bitmask of the value to insert 2677 auto EltMask = B.buildConstant( 2678 TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(), 2679 InsertTy.getSizeInBits())); 2680 // Shift it into position 2681 auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits); 2682 auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask); 2683 2684 // Clear out the bits in the wide element 2685 auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask); 2686 2687 // The value to insert has all zeros already, so stick it into the masked 2688 // wide element. 2689 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0); 2690 } 2691 2692 /// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this 2693 /// is increasing the element size, perform the indexing in the target element 2694 /// type, and use bit operations to insert at the element position. This is 2695 /// intended for architectures that can dynamically index the register file and 2696 /// want to force indexing in the native register size. 2697 LegalizerHelper::LegalizeResult 2698 LegalizerHelper::bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, 2699 LLT CastTy) { 2700 if (TypeIdx != 0) 2701 return UnableToLegalize; 2702 2703 Register Dst = MI.getOperand(0).getReg(); 2704 Register SrcVec = MI.getOperand(1).getReg(); 2705 Register Val = MI.getOperand(2).getReg(); 2706 Register Idx = MI.getOperand(3).getReg(); 2707 2708 LLT VecTy = MRI.getType(Dst); 2709 LLT IdxTy = MRI.getType(Idx); 2710 2711 LLT VecEltTy = VecTy.getElementType(); 2712 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy; 2713 const unsigned NewEltSize = NewEltTy.getSizeInBits(); 2714 const unsigned OldEltSize = VecEltTy.getSizeInBits(); 2715 2716 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1; 2717 unsigned OldNumElts = VecTy.getNumElements(); 2718 2719 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0); 2720 if (NewNumElts < OldNumElts) { 2721 if (NewEltSize % OldEltSize != 0) 2722 return UnableToLegalize; 2723 2724 // This only depends on powers of 2 because we use bit tricks to figure out 2725 // the bit offset we need to shift to get the target element. A general 2726 // expansion could emit division/multiply. 2727 if (!isPowerOf2_32(NewEltSize / OldEltSize)) 2728 return UnableToLegalize; 2729 2730 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize); 2731 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio); 2732 2733 // Divide to get the index in the wider element type. 2734 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio); 2735 2736 Register ExtractedElt = CastVec; 2737 if (CastTy.isVector()) { 2738 ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, 2739 ScaledIdx).getReg(0); 2740 } 2741 2742 // Compute the bit offset into the register of the target element. 2743 Register OffsetBits = getBitcastWiderVectorElementOffset( 2744 MIRBuilder, Idx, NewEltSize, OldEltSize); 2745 2746 Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt, 2747 Val, OffsetBits); 2748 if (CastTy.isVector()) { 2749 InsertedElt = MIRBuilder.buildInsertVectorElement( 2750 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0); 2751 } 2752 2753 MIRBuilder.buildBitcast(Dst, InsertedElt); 2754 MI.eraseFromParent(); 2755 return Legalized; 2756 } 2757 2758 return UnableToLegalize; 2759 } 2760 2761 LegalizerHelper::LegalizeResult 2762 LegalizerHelper::lowerLoad(MachineInstr &MI) { 2763 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT 2764 Register DstReg = MI.getOperand(0).getReg(); 2765 Register PtrReg = MI.getOperand(1).getReg(); 2766 LLT DstTy = MRI.getType(DstReg); 2767 MachineMemOperand &MMO = **MI.memoperands_begin(); 2768 LLT MemTy = MMO.getMemoryType(); 2769 MachineFunction &MF = MIRBuilder.getMF(); 2770 if (MemTy.isVector()) 2771 return UnableToLegalize; 2772 2773 unsigned MemSizeInBits = MemTy.getSizeInBits(); 2774 unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes(); 2775 2776 if (MemSizeInBits != MemStoreSizeInBits) { 2777 // Promote to a byte-sized load if not loading an integral number of 2778 // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24. 2779 LLT WideMemTy = LLT::scalar(MemStoreSizeInBits); 2780 MachineMemOperand *NewMMO = 2781 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy); 2782 2783 Register LoadReg = DstReg; 2784 LLT LoadTy = DstTy; 2785 2786 // If this wasn't already an extending load, we need to widen the result 2787 // register to avoid creating a load with a narrower result than the source. 2788 if (MemStoreSizeInBits > DstTy.getSizeInBits()) { 2789 LoadTy = WideMemTy; 2790 LoadReg = MRI.createGenericVirtualRegister(WideMemTy); 2791 } 2792 2793 if (MI.getOpcode() == TargetOpcode::G_SEXTLOAD) { 2794 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO); 2795 MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits); 2796 } else if (MI.getOpcode() == TargetOpcode::G_ZEXTLOAD || 2797 WideMemTy == DstTy) { 2798 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO); 2799 // The extra bits are guaranteed to be zero, since we stored them that 2800 // way. A zext load from Wide thus automatically gives zext from MemVT. 2801 MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits); 2802 } else { 2803 MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO); 2804 } 2805 2806 if (DstTy != LoadTy) 2807 MIRBuilder.buildTrunc(DstReg, LoadReg); 2808 2809 MI.eraseFromParent(); 2810 return Legalized; 2811 } 2812 2813 if (DstTy.getSizeInBits() != MMO.getSizeInBits()) 2814 return UnableToLegalize; 2815 2816 if (MI.getOpcode() == TargetOpcode::G_LOAD) { 2817 // This load needs splitting into power of 2 sized loads. 2818 if (DstTy.isVector()) 2819 return UnableToLegalize; 2820 if (isPowerOf2_32(DstTy.getSizeInBits())) 2821 return UnableToLegalize; // Don't know what we're being asked to do. 2822 2823 // Our strategy here is to generate anyextending loads for the smaller 2824 // types up to next power-2 result type, and then combine the two larger 2825 // result values together, before truncating back down to the non-pow-2 2826 // type. 2827 // E.g. v1 = i24 load => 2828 // v2 = i32 zextload (2 byte) 2829 // v3 = i32 load (1 byte) 2830 // v4 = i32 shl v3, 16 2831 // v5 = i32 or v4, v2 2832 // v1 = i24 trunc v5 2833 // By doing this we generate the correct truncate which should get 2834 // combined away as an artifact with a matching extend. 2835 uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits()); 2836 uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize; 2837 2838 MachineFunction &MF = MIRBuilder.getMF(); 2839 MachineMemOperand *LargeMMO = 2840 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); 2841 MachineMemOperand *SmallMMO = MF.getMachineMemOperand( 2842 &MMO, LargeSplitSize / 8, SmallSplitSize / 8); 2843 2844 LLT PtrTy = MRI.getType(PtrReg); 2845 unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits()); 2846 LLT AnyExtTy = LLT::scalar(AnyExtSize); 2847 auto LargeLoad = MIRBuilder.buildLoadInstr( 2848 TargetOpcode::G_ZEXTLOAD, AnyExtTy, PtrReg, *LargeMMO); 2849 2850 auto OffsetCst = MIRBuilder.buildConstant( 2851 LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8); 2852 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy); 2853 auto SmallPtr = 2854 MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst); 2855 auto SmallLoad = MIRBuilder.buildLoad(AnyExtTy, SmallPtr, 2856 *SmallMMO); 2857 2858 auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize); 2859 auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt); 2860 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad); 2861 MIRBuilder.buildTrunc(DstReg, {Or}); 2862 MI.eraseFromParent(); 2863 return Legalized; 2864 } 2865 2866 return UnableToLegalize; 2867 } 2868 2869 LegalizerHelper::LegalizeResult 2870 LegalizerHelper::lowerStore(MachineInstr &MI) { 2871 // Lower a non-power of 2 store into multiple pow-2 stores. 2872 // E.g. split an i24 store into an i16 store + i8 store. 2873 // We do this by first extending the stored value to the next largest power 2874 // of 2 type, and then using truncating stores to store the components. 2875 // By doing this, likewise with G_LOAD, generate an extend that can be 2876 // artifact-combined away instead of leaving behind extracts. 2877 Register SrcReg = MI.getOperand(0).getReg(); 2878 Register PtrReg = MI.getOperand(1).getReg(); 2879 LLT SrcTy = MRI.getType(SrcReg); 2880 MachineFunction &MF = MIRBuilder.getMF(); 2881 MachineMemOperand &MMO = **MI.memoperands_begin(); 2882 LLT MemTy = MMO.getMemoryType(); 2883 2884 if (SrcTy.isVector()) 2885 return UnableToLegalize; 2886 2887 unsigned StoreWidth = MemTy.getSizeInBits(); 2888 unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes(); 2889 2890 if (StoreWidth != StoreSizeInBits) { 2891 // Promote to a byte-sized store with upper bits zero if not 2892 // storing an integral number of bytes. For example, promote 2893 // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1) 2894 LLT WideTy = LLT::scalar(StoreSizeInBits); 2895 2896 if (StoreSizeInBits > SrcTy.getSizeInBits()) { 2897 // Avoid creating a store with a narrower source than result. 2898 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0); 2899 SrcTy = WideTy; 2900 } 2901 2902 auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth); 2903 2904 MachineMemOperand *NewMMO = 2905 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy); 2906 MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO); 2907 MI.eraseFromParent(); 2908 return Legalized; 2909 } 2910 2911 if (isPowerOf2_32(MemTy.getSizeInBits())) 2912 return UnableToLegalize; // Don't know what we're being asked to do. 2913 2914 // Extend to the next pow-2. 2915 const LLT ExtendTy = LLT::scalar(NextPowerOf2(MemTy.getSizeInBits())); 2916 auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg); 2917 2918 // Obtain the smaller value by shifting away the larger value. 2919 uint64_t LargeSplitSize = PowerOf2Floor(MemTy.getSizeInBits()); 2920 uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize; 2921 auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize); 2922 auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt); 2923 2924 // Generate the PtrAdd and truncating stores. 2925 LLT PtrTy = MRI.getType(PtrReg); 2926 auto OffsetCst = MIRBuilder.buildConstant( 2927 LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8); 2928 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy); 2929 auto SmallPtr = 2930 MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst); 2931 2932 MachineMemOperand *LargeMMO = 2933 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); 2934 MachineMemOperand *SmallMMO = 2935 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8); 2936 MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO); 2937 MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO); 2938 MI.eraseFromParent(); 2939 return Legalized; 2940 } 2941 2942 LegalizerHelper::LegalizeResult 2943 LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) { 2944 switch (MI.getOpcode()) { 2945 case TargetOpcode::G_LOAD: { 2946 if (TypeIdx != 0) 2947 return UnableToLegalize; 2948 2949 Observer.changingInstr(MI); 2950 bitcastDst(MI, CastTy, 0); 2951 Observer.changedInstr(MI); 2952 return Legalized; 2953 } 2954 case TargetOpcode::G_STORE: { 2955 if (TypeIdx != 0) 2956 return UnableToLegalize; 2957 2958 Observer.changingInstr(MI); 2959 bitcastSrc(MI, CastTy, 0); 2960 Observer.changedInstr(MI); 2961 return Legalized; 2962 } 2963 case TargetOpcode::G_SELECT: { 2964 if (TypeIdx != 0) 2965 return UnableToLegalize; 2966 2967 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) { 2968 LLVM_DEBUG( 2969 dbgs() << "bitcast action not implemented for vector select\n"); 2970 return UnableToLegalize; 2971 } 2972 2973 Observer.changingInstr(MI); 2974 bitcastSrc(MI, CastTy, 2); 2975 bitcastSrc(MI, CastTy, 3); 2976 bitcastDst(MI, CastTy, 0); 2977 Observer.changedInstr(MI); 2978 return Legalized; 2979 } 2980 case TargetOpcode::G_AND: 2981 case TargetOpcode::G_OR: 2982 case TargetOpcode::G_XOR: { 2983 Observer.changingInstr(MI); 2984 bitcastSrc(MI, CastTy, 1); 2985 bitcastSrc(MI, CastTy, 2); 2986 bitcastDst(MI, CastTy, 0); 2987 Observer.changedInstr(MI); 2988 return Legalized; 2989 } 2990 case TargetOpcode::G_EXTRACT_VECTOR_ELT: 2991 return bitcastExtractVectorElt(MI, TypeIdx, CastTy); 2992 case TargetOpcode::G_INSERT_VECTOR_ELT: 2993 return bitcastInsertVectorElt(MI, TypeIdx, CastTy); 2994 default: 2995 return UnableToLegalize; 2996 } 2997 } 2998 2999 // Legalize an instruction by changing the opcode in place. 3000 void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) { 3001 Observer.changingInstr(MI); 3002 MI.setDesc(MIRBuilder.getTII().get(NewOpcode)); 3003 Observer.changedInstr(MI); 3004 } 3005 3006 LegalizerHelper::LegalizeResult 3007 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { 3008 using namespace TargetOpcode; 3009 3010 switch(MI.getOpcode()) { 3011 default: 3012 return UnableToLegalize; 3013 case TargetOpcode::G_BITCAST: 3014 return lowerBitcast(MI); 3015 case TargetOpcode::G_SREM: 3016 case TargetOpcode::G_UREM: { 3017 LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 3018 auto Quot = 3019 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty}, 3020 {MI.getOperand(1), MI.getOperand(2)}); 3021 3022 auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2)); 3023 MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod); 3024 MI.eraseFromParent(); 3025 return Legalized; 3026 } 3027 case TargetOpcode::G_SADDO: 3028 case TargetOpcode::G_SSUBO: 3029 return lowerSADDO_SSUBO(MI); 3030 case TargetOpcode::G_UMULH: 3031 case TargetOpcode::G_SMULH: 3032 return lowerSMULH_UMULH(MI); 3033 case TargetOpcode::G_SMULO: 3034 case TargetOpcode::G_UMULO: { 3035 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the 3036 // result. 3037 Register Res = MI.getOperand(0).getReg(); 3038 Register Overflow = MI.getOperand(1).getReg(); 3039 Register LHS = MI.getOperand(2).getReg(); 3040 Register RHS = MI.getOperand(3).getReg(); 3041 LLT Ty = MRI.getType(Res); 3042 3043 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO 3044 ? TargetOpcode::G_SMULH 3045 : TargetOpcode::G_UMULH; 3046 3047 Observer.changingInstr(MI); 3048 const auto &TII = MIRBuilder.getTII(); 3049 MI.setDesc(TII.get(TargetOpcode::G_MUL)); 3050 MI.RemoveOperand(1); 3051 Observer.changedInstr(MI); 3052 3053 auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS}); 3054 auto Zero = MIRBuilder.buildConstant(Ty, 0); 3055 3056 // Move insert point forward so we can use the Res register if needed. 3057 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 3058 3059 // For *signed* multiply, overflow is detected by checking: 3060 // (hi != (lo >> bitwidth-1)) 3061 if (Opcode == TargetOpcode::G_SMULH) { 3062 auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1); 3063 auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt); 3064 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted); 3065 } else { 3066 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero); 3067 } 3068 return Legalized; 3069 } 3070 case TargetOpcode::G_FNEG: { 3071 Register Res = MI.getOperand(0).getReg(); 3072 LLT Ty = MRI.getType(Res); 3073 3074 // TODO: Handle vector types once we are able to 3075 // represent them. 3076 if (Ty.isVector()) 3077 return UnableToLegalize; 3078 auto SignMask = 3079 MIRBuilder.buildConstant(Ty, APInt::getSignMask(Ty.getSizeInBits())); 3080 Register SubByReg = MI.getOperand(1).getReg(); 3081 MIRBuilder.buildXor(Res, SubByReg, SignMask); 3082 MI.eraseFromParent(); 3083 return Legalized; 3084 } 3085 case TargetOpcode::G_FSUB: { 3086 Register Res = MI.getOperand(0).getReg(); 3087 LLT Ty = MRI.getType(Res); 3088 3089 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)). 3090 // First, check if G_FNEG is marked as Lower. If so, we may 3091 // end up with an infinite loop as G_FSUB is used to legalize G_FNEG. 3092 if (LI.getAction({G_FNEG, {Ty}}).Action == Lower) 3093 return UnableToLegalize; 3094 Register LHS = MI.getOperand(1).getReg(); 3095 Register RHS = MI.getOperand(2).getReg(); 3096 Register Neg = MRI.createGenericVirtualRegister(Ty); 3097 MIRBuilder.buildFNeg(Neg, RHS); 3098 MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags()); 3099 MI.eraseFromParent(); 3100 return Legalized; 3101 } 3102 case TargetOpcode::G_FMAD: 3103 return lowerFMad(MI); 3104 case TargetOpcode::G_FFLOOR: 3105 return lowerFFloor(MI); 3106 case TargetOpcode::G_INTRINSIC_ROUND: 3107 return lowerIntrinsicRound(MI); 3108 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: { 3109 // Since round even is the assumed rounding mode for unconstrained FP 3110 // operations, rint and roundeven are the same operation. 3111 changeOpcode(MI, TargetOpcode::G_FRINT); 3112 return Legalized; 3113 } 3114 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: { 3115 Register OldValRes = MI.getOperand(0).getReg(); 3116 Register SuccessRes = MI.getOperand(1).getReg(); 3117 Register Addr = MI.getOperand(2).getReg(); 3118 Register CmpVal = MI.getOperand(3).getReg(); 3119 Register NewVal = MI.getOperand(4).getReg(); 3120 MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal, 3121 **MI.memoperands_begin()); 3122 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal); 3123 MI.eraseFromParent(); 3124 return Legalized; 3125 } 3126 case TargetOpcode::G_LOAD: 3127 case TargetOpcode::G_SEXTLOAD: 3128 case TargetOpcode::G_ZEXTLOAD: 3129 return lowerLoad(MI); 3130 case TargetOpcode::G_STORE: 3131 return lowerStore(MI); 3132 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 3133 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 3134 case TargetOpcode::G_CTLZ: 3135 case TargetOpcode::G_CTTZ: 3136 case TargetOpcode::G_CTPOP: 3137 return lowerBitCount(MI); 3138 case G_UADDO: { 3139 Register Res = MI.getOperand(0).getReg(); 3140 Register CarryOut = MI.getOperand(1).getReg(); 3141 Register LHS = MI.getOperand(2).getReg(); 3142 Register RHS = MI.getOperand(3).getReg(); 3143 3144 MIRBuilder.buildAdd(Res, LHS, RHS); 3145 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS); 3146 3147 MI.eraseFromParent(); 3148 return Legalized; 3149 } 3150 case G_UADDE: { 3151 Register Res = MI.getOperand(0).getReg(); 3152 Register CarryOut = MI.getOperand(1).getReg(); 3153 Register LHS = MI.getOperand(2).getReg(); 3154 Register RHS = MI.getOperand(3).getReg(); 3155 Register CarryIn = MI.getOperand(4).getReg(); 3156 LLT Ty = MRI.getType(Res); 3157 3158 auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS); 3159 auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn); 3160 MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn); 3161 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS); 3162 3163 MI.eraseFromParent(); 3164 return Legalized; 3165 } 3166 case G_USUBO: { 3167 Register Res = MI.getOperand(0).getReg(); 3168 Register BorrowOut = MI.getOperand(1).getReg(); 3169 Register LHS = MI.getOperand(2).getReg(); 3170 Register RHS = MI.getOperand(3).getReg(); 3171 3172 MIRBuilder.buildSub(Res, LHS, RHS); 3173 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS); 3174 3175 MI.eraseFromParent(); 3176 return Legalized; 3177 } 3178 case G_USUBE: { 3179 Register Res = MI.getOperand(0).getReg(); 3180 Register BorrowOut = MI.getOperand(1).getReg(); 3181 Register LHS = MI.getOperand(2).getReg(); 3182 Register RHS = MI.getOperand(3).getReg(); 3183 Register BorrowIn = MI.getOperand(4).getReg(); 3184 const LLT CondTy = MRI.getType(BorrowOut); 3185 const LLT Ty = MRI.getType(Res); 3186 3187 auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS); 3188 auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn); 3189 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn); 3190 3191 auto LHS_EQ_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, LHS, RHS); 3192 auto LHS_ULT_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, LHS, RHS); 3193 MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS); 3194 3195 MI.eraseFromParent(); 3196 return Legalized; 3197 } 3198 case G_UITOFP: 3199 return lowerUITOFP(MI); 3200 case G_SITOFP: 3201 return lowerSITOFP(MI); 3202 case G_FPTOUI: 3203 return lowerFPTOUI(MI); 3204 case G_FPTOSI: 3205 return lowerFPTOSI(MI); 3206 case G_FPTRUNC: 3207 return lowerFPTRUNC(MI); 3208 case G_FPOWI: 3209 return lowerFPOWI(MI); 3210 case G_SMIN: 3211 case G_SMAX: 3212 case G_UMIN: 3213 case G_UMAX: 3214 return lowerMinMax(MI); 3215 case G_FCOPYSIGN: 3216 return lowerFCopySign(MI); 3217 case G_FMINNUM: 3218 case G_FMAXNUM: 3219 return lowerFMinNumMaxNum(MI); 3220 case G_MERGE_VALUES: 3221 return lowerMergeValues(MI); 3222 case G_UNMERGE_VALUES: 3223 return lowerUnmergeValues(MI); 3224 case TargetOpcode::G_SEXT_INREG: { 3225 assert(MI.getOperand(2).isImm() && "Expected immediate"); 3226 int64_t SizeInBits = MI.getOperand(2).getImm(); 3227 3228 Register DstReg = MI.getOperand(0).getReg(); 3229 Register SrcReg = MI.getOperand(1).getReg(); 3230 LLT DstTy = MRI.getType(DstReg); 3231 Register TmpRes = MRI.createGenericVirtualRegister(DstTy); 3232 3233 auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits); 3234 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0)); 3235 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0)); 3236 MI.eraseFromParent(); 3237 return Legalized; 3238 } 3239 case G_EXTRACT_VECTOR_ELT: 3240 case G_INSERT_VECTOR_ELT: 3241 return lowerExtractInsertVectorElt(MI); 3242 case G_SHUFFLE_VECTOR: 3243 return lowerShuffleVector(MI); 3244 case G_DYN_STACKALLOC: 3245 return lowerDynStackAlloc(MI); 3246 case G_EXTRACT: 3247 return lowerExtract(MI); 3248 case G_INSERT: 3249 return lowerInsert(MI); 3250 case G_BSWAP: 3251 return lowerBswap(MI); 3252 case G_BITREVERSE: 3253 return lowerBitreverse(MI); 3254 case G_READ_REGISTER: 3255 case G_WRITE_REGISTER: 3256 return lowerReadWriteRegister(MI); 3257 case G_UADDSAT: 3258 case G_USUBSAT: { 3259 // Try to make a reasonable guess about which lowering strategy to use. The 3260 // target can override this with custom lowering and calling the 3261 // implementation functions. 3262 LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 3263 if (LI.isLegalOrCustom({G_UMIN, Ty})) 3264 return lowerAddSubSatToMinMax(MI); 3265 return lowerAddSubSatToAddoSubo(MI); 3266 } 3267 case G_SADDSAT: 3268 case G_SSUBSAT: { 3269 LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 3270 3271 // FIXME: It would probably make more sense to see if G_SADDO is preferred, 3272 // since it's a shorter expansion. However, we would need to figure out the 3273 // preferred boolean type for the carry out for the query. 3274 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty})) 3275 return lowerAddSubSatToMinMax(MI); 3276 return lowerAddSubSatToAddoSubo(MI); 3277 } 3278 case G_SSHLSAT: 3279 case G_USHLSAT: 3280 return lowerShlSat(MI); 3281 case G_ABS: 3282 return lowerAbsToAddXor(MI); 3283 case G_SELECT: 3284 return lowerSelect(MI); 3285 case G_SDIVREM: 3286 case G_UDIVREM: 3287 return lowerDIVREM(MI); 3288 case G_FSHL: 3289 case G_FSHR: 3290 return lowerFunnelShift(MI); 3291 case G_ROTL: 3292 case G_ROTR: 3293 return lowerRotate(MI); 3294 } 3295 } 3296 3297 Align LegalizerHelper::getStackTemporaryAlignment(LLT Ty, 3298 Align MinAlign) const { 3299 // FIXME: We're missing a way to go back from LLT to llvm::Type to query the 3300 // datalayout for the preferred alignment. Also there should be a target hook 3301 // for this to allow targets to reduce the alignment and ignore the 3302 // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of 3303 // the type. 3304 return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign); 3305 } 3306 3307 MachineInstrBuilder 3308 LegalizerHelper::createStackTemporary(TypeSize Bytes, Align Alignment, 3309 MachinePointerInfo &PtrInfo) { 3310 MachineFunction &MF = MIRBuilder.getMF(); 3311 const DataLayout &DL = MIRBuilder.getDataLayout(); 3312 int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false); 3313 3314 unsigned AddrSpace = DL.getAllocaAddrSpace(); 3315 LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace)); 3316 3317 PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx); 3318 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx); 3319 } 3320 3321 static Register clampDynamicVectorIndex(MachineIRBuilder &B, Register IdxReg, 3322 LLT VecTy) { 3323 int64_t IdxVal; 3324 if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal))) 3325 return IdxReg; 3326 3327 LLT IdxTy = B.getMRI()->getType(IdxReg); 3328 unsigned NElts = VecTy.getNumElements(); 3329 if (isPowerOf2_32(NElts)) { 3330 APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts)); 3331 return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0); 3332 } 3333 3334 return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1)) 3335 .getReg(0); 3336 } 3337 3338 Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy, 3339 Register Index) { 3340 LLT EltTy = VecTy.getElementType(); 3341 3342 // Calculate the element offset and add it to the pointer. 3343 unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size. 3344 assert(EltSize * 8 == EltTy.getSizeInBits() && 3345 "Converting bits to bytes lost precision"); 3346 3347 Index = clampDynamicVectorIndex(MIRBuilder, Index, VecTy); 3348 3349 LLT IdxTy = MRI.getType(Index); 3350 auto Mul = MIRBuilder.buildMul(IdxTy, Index, 3351 MIRBuilder.buildConstant(IdxTy, EltSize)); 3352 3353 LLT PtrTy = MRI.getType(VecPtr); 3354 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0); 3355 } 3356 3357 LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef( 3358 MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { 3359 Register DstReg = MI.getOperand(0).getReg(); 3360 LLT DstTy = MRI.getType(DstReg); 3361 LLT LCMTy = getLCMType(DstTy, NarrowTy); 3362 3363 unsigned NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits(); 3364 3365 auto NewUndef = MIRBuilder.buildUndef(NarrowTy); 3366 SmallVector<Register, 8> Parts(NumParts, NewUndef.getReg(0)); 3367 3368 buildWidenedRemergeToDst(DstReg, LCMTy, Parts); 3369 MI.eraseFromParent(); 3370 return Legalized; 3371 } 3372 3373 // Handle splitting vector operations which need to have the same number of 3374 // elements in each type index, but each type index may have a different element 3375 // type. 3376 // 3377 // e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> -> 3378 // <2 x s64> = G_SHL <2 x s64>, <2 x s32> 3379 // <2 x s64> = G_SHL <2 x s64>, <2 x s32> 3380 // 3381 // Also handles some irregular breakdown cases, e.g. 3382 // e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> -> 3383 // <2 x s64> = G_SHL <2 x s64>, <2 x s32> 3384 // s64 = G_SHL s64, s32 3385 LegalizerHelper::LegalizeResult 3386 LegalizerHelper::fewerElementsVectorMultiEltType( 3387 MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) { 3388 if (TypeIdx != 0) 3389 return UnableToLegalize; 3390 3391 const LLT NarrowTy0 = NarrowTyArg; 3392 const Register DstReg = MI.getOperand(0).getReg(); 3393 LLT DstTy = MRI.getType(DstReg); 3394 LLT LeftoverTy0; 3395 3396 // All of the operands need to have the same number of elements, so if we can 3397 // determine a type breakdown for the result type, we can for all of the 3398 // source types. 3399 int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0).first; 3400 if (NumParts < 0) 3401 return UnableToLegalize; 3402 3403 SmallVector<MachineInstrBuilder, 4> NewInsts; 3404 3405 SmallVector<Register, 4> DstRegs, LeftoverDstRegs; 3406 SmallVector<Register, 4> PartRegs, LeftoverRegs; 3407 3408 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { 3409 Register SrcReg = MI.getOperand(I).getReg(); 3410 LLT SrcTyI = MRI.getType(SrcReg); 3411 const auto NewEC = NarrowTy0.isVector() ? NarrowTy0.getElementCount() 3412 : ElementCount::getFixed(1); 3413 LLT NarrowTyI = LLT::scalarOrVector(NewEC, SrcTyI.getScalarType()); 3414 LLT LeftoverTyI; 3415 3416 // Split this operand into the requested typed registers, and any leftover 3417 // required to reproduce the original type. 3418 if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs, 3419 LeftoverRegs)) 3420 return UnableToLegalize; 3421 3422 if (I == 1) { 3423 // For the first operand, create an instruction for each part and setup 3424 // the result. 3425 for (Register PartReg : PartRegs) { 3426 Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0); 3427 NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode()) 3428 .addDef(PartDstReg) 3429 .addUse(PartReg)); 3430 DstRegs.push_back(PartDstReg); 3431 } 3432 3433 for (Register LeftoverReg : LeftoverRegs) { 3434 Register PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0); 3435 NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode()) 3436 .addDef(PartDstReg) 3437 .addUse(LeftoverReg)); 3438 LeftoverDstRegs.push_back(PartDstReg); 3439 } 3440 } else { 3441 assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size()); 3442 3443 // Add the newly created operand splits to the existing instructions. The 3444 // odd-sized pieces are ordered after the requested NarrowTyArg sized 3445 // pieces. 3446 unsigned InstCount = 0; 3447 for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J) 3448 NewInsts[InstCount++].addUse(PartRegs[J]); 3449 for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J) 3450 NewInsts[InstCount++].addUse(LeftoverRegs[J]); 3451 } 3452 3453 PartRegs.clear(); 3454 LeftoverRegs.clear(); 3455 } 3456 3457 // Insert the newly built operations and rebuild the result register. 3458 for (auto &MIB : NewInsts) 3459 MIRBuilder.insertInstr(MIB); 3460 3461 insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs); 3462 3463 MI.eraseFromParent(); 3464 return Legalized; 3465 } 3466 3467 LegalizerHelper::LegalizeResult 3468 LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx, 3469 LLT NarrowTy) { 3470 if (TypeIdx != 0) 3471 return UnableToLegalize; 3472 3473 Register DstReg = MI.getOperand(0).getReg(); 3474 Register SrcReg = MI.getOperand(1).getReg(); 3475 LLT DstTy = MRI.getType(DstReg); 3476 LLT SrcTy = MRI.getType(SrcReg); 3477 3478 LLT NarrowTy0 = NarrowTy; 3479 LLT NarrowTy1; 3480 unsigned NumParts; 3481 3482 if (NarrowTy.isVector()) { 3483 // Uneven breakdown not handled. 3484 NumParts = DstTy.getNumElements() / NarrowTy.getNumElements(); 3485 if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements()) 3486 return UnableToLegalize; 3487 3488 NarrowTy1 = LLT::vector(NarrowTy.getElementCount(), SrcTy.getElementType()); 3489 } else { 3490 NumParts = DstTy.getNumElements(); 3491 NarrowTy1 = SrcTy.getElementType(); 3492 } 3493 3494 SmallVector<Register, 4> SrcRegs, DstRegs; 3495 extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs); 3496 3497 for (unsigned I = 0; I < NumParts; ++I) { 3498 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0); 3499 MachineInstr *NewInst = 3500 MIRBuilder.buildInstr(MI.getOpcode(), {DstReg}, {SrcRegs[I]}); 3501 3502 NewInst->setFlags(MI.getFlags()); 3503 DstRegs.push_back(DstReg); 3504 } 3505 3506 if (NarrowTy.isVector()) 3507 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 3508 else 3509 MIRBuilder.buildBuildVector(DstReg, DstRegs); 3510 3511 MI.eraseFromParent(); 3512 return Legalized; 3513 } 3514 3515 LegalizerHelper::LegalizeResult 3516 LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx, 3517 LLT NarrowTy) { 3518 Register DstReg = MI.getOperand(0).getReg(); 3519 Register Src0Reg = MI.getOperand(2).getReg(); 3520 LLT DstTy = MRI.getType(DstReg); 3521 LLT SrcTy = MRI.getType(Src0Reg); 3522 3523 unsigned NumParts; 3524 LLT NarrowTy0, NarrowTy1; 3525 3526 if (TypeIdx == 0) { 3527 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; 3528 unsigned OldElts = DstTy.getNumElements(); 3529 3530 NarrowTy0 = NarrowTy; 3531 NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements(); 3532 NarrowTy1 = NarrowTy.isVector() ? LLT::vector(NarrowTy.getElementCount(), 3533 SrcTy.getScalarSizeInBits()) 3534 : SrcTy.getElementType(); 3535 3536 } else { 3537 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; 3538 unsigned OldElts = SrcTy.getNumElements(); 3539 3540 NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : 3541 NarrowTy.getNumElements(); 3542 NarrowTy0 = 3543 LLT::vector(NarrowTy.getElementCount(), DstTy.getScalarSizeInBits()); 3544 NarrowTy1 = NarrowTy; 3545 } 3546 3547 // FIXME: Don't know how to handle the situation where the small vectors 3548 // aren't all the same size yet. 3549 if (NarrowTy1.isVector() && 3550 NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements()) 3551 return UnableToLegalize; 3552 3553 CmpInst::Predicate Pred 3554 = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); 3555 3556 SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs; 3557 extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs); 3558 extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs); 3559 3560 for (unsigned I = 0; I < NumParts; ++I) { 3561 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0); 3562 DstRegs.push_back(DstReg); 3563 3564 if (MI.getOpcode() == TargetOpcode::G_ICMP) 3565 MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]); 3566 else { 3567 MachineInstr *NewCmp 3568 = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]); 3569 NewCmp->setFlags(MI.getFlags()); 3570 } 3571 } 3572 3573 if (NarrowTy1.isVector()) 3574 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 3575 else 3576 MIRBuilder.buildBuildVector(DstReg, DstRegs); 3577 3578 MI.eraseFromParent(); 3579 return Legalized; 3580 } 3581 3582 LegalizerHelper::LegalizeResult 3583 LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx, 3584 LLT NarrowTy) { 3585 Register DstReg = MI.getOperand(0).getReg(); 3586 Register CondReg = MI.getOperand(1).getReg(); 3587 3588 unsigned NumParts = 0; 3589 LLT NarrowTy0, NarrowTy1; 3590 3591 LLT DstTy = MRI.getType(DstReg); 3592 LLT CondTy = MRI.getType(CondReg); 3593 unsigned Size = DstTy.getSizeInBits(); 3594 3595 assert(TypeIdx == 0 || CondTy.isVector()); 3596 3597 if (TypeIdx == 0) { 3598 NarrowTy0 = NarrowTy; 3599 NarrowTy1 = CondTy; 3600 3601 unsigned NarrowSize = NarrowTy0.getSizeInBits(); 3602 // FIXME: Don't know how to handle the situation where the small vectors 3603 // aren't all the same size yet. 3604 if (Size % NarrowSize != 0) 3605 return UnableToLegalize; 3606 3607 NumParts = Size / NarrowSize; 3608 3609 // Need to break down the condition type 3610 if (CondTy.isVector()) { 3611 if (CondTy.getNumElements() == NumParts) 3612 NarrowTy1 = CondTy.getElementType(); 3613 else 3614 NarrowTy1 = 3615 LLT::vector(CondTy.getElementCount().divideCoefficientBy(NumParts), 3616 CondTy.getScalarSizeInBits()); 3617 } 3618 } else { 3619 NumParts = CondTy.getNumElements(); 3620 if (NarrowTy.isVector()) { 3621 // TODO: Handle uneven breakdown. 3622 if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements()) 3623 return UnableToLegalize; 3624 3625 return UnableToLegalize; 3626 } else { 3627 NarrowTy0 = DstTy.getElementType(); 3628 NarrowTy1 = NarrowTy; 3629 } 3630 } 3631 3632 SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs; 3633 if (CondTy.isVector()) 3634 extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs); 3635 3636 extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs); 3637 extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs); 3638 3639 for (unsigned i = 0; i < NumParts; ++i) { 3640 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0); 3641 MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg, 3642 Src1Regs[i], Src2Regs[i]); 3643 DstRegs.push_back(DstReg); 3644 } 3645 3646 if (NarrowTy0.isVector()) 3647 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 3648 else 3649 MIRBuilder.buildBuildVector(DstReg, DstRegs); 3650 3651 MI.eraseFromParent(); 3652 return Legalized; 3653 } 3654 3655 LegalizerHelper::LegalizeResult 3656 LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, 3657 LLT NarrowTy) { 3658 const Register DstReg = MI.getOperand(0).getReg(); 3659 LLT PhiTy = MRI.getType(DstReg); 3660 LLT LeftoverTy; 3661 3662 // All of the operands need to have the same number of elements, so if we can 3663 // determine a type breakdown for the result type, we can for all of the 3664 // source types. 3665 int NumParts, NumLeftover; 3666 std::tie(NumParts, NumLeftover) 3667 = getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy); 3668 if (NumParts < 0) 3669 return UnableToLegalize; 3670 3671 SmallVector<Register, 4> DstRegs, LeftoverDstRegs; 3672 SmallVector<MachineInstrBuilder, 4> NewInsts; 3673 3674 const int TotalNumParts = NumParts + NumLeftover; 3675 3676 // Insert the new phis in the result block first. 3677 for (int I = 0; I != TotalNumParts; ++I) { 3678 LLT Ty = I < NumParts ? NarrowTy : LeftoverTy; 3679 Register PartDstReg = MRI.createGenericVirtualRegister(Ty); 3680 NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI) 3681 .addDef(PartDstReg)); 3682 if (I < NumParts) 3683 DstRegs.push_back(PartDstReg); 3684 else 3685 LeftoverDstRegs.push_back(PartDstReg); 3686 } 3687 3688 MachineBasicBlock *MBB = MI.getParent(); 3689 MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI()); 3690 insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs); 3691 3692 SmallVector<Register, 4> PartRegs, LeftoverRegs; 3693 3694 // Insert code to extract the incoming values in each predecessor block. 3695 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { 3696 PartRegs.clear(); 3697 LeftoverRegs.clear(); 3698 3699 Register SrcReg = MI.getOperand(I).getReg(); 3700 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB(); 3701 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); 3702 3703 LLT Unused; 3704 if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs, 3705 LeftoverRegs)) 3706 return UnableToLegalize; 3707 3708 // Add the newly created operand splits to the existing instructions. The 3709 // odd-sized pieces are ordered after the requested NarrowTyArg sized 3710 // pieces. 3711 for (int J = 0; J != TotalNumParts; ++J) { 3712 MachineInstrBuilder MIB = NewInsts[J]; 3713 MIB.addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]); 3714 MIB.addMBB(&OpMBB); 3715 } 3716 } 3717 3718 MI.eraseFromParent(); 3719 return Legalized; 3720 } 3721 3722 LegalizerHelper::LegalizeResult 3723 LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI, 3724 unsigned TypeIdx, 3725 LLT NarrowTy) { 3726 if (TypeIdx != 1) 3727 return UnableToLegalize; 3728 3729 const int NumDst = MI.getNumOperands() - 1; 3730 const Register SrcReg = MI.getOperand(NumDst).getReg(); 3731 LLT SrcTy = MRI.getType(SrcReg); 3732 3733 LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 3734 3735 // TODO: Create sequence of extracts. 3736 if (DstTy == NarrowTy) 3737 return UnableToLegalize; 3738 3739 LLT GCDTy = getGCDType(SrcTy, NarrowTy); 3740 if (DstTy == GCDTy) { 3741 // This would just be a copy of the same unmerge. 3742 // TODO: Create extracts, pad with undef and create intermediate merges. 3743 return UnableToLegalize; 3744 } 3745 3746 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg); 3747 const int NumUnmerge = Unmerge->getNumOperands() - 1; 3748 const int PartsPerUnmerge = NumDst / NumUnmerge; 3749 3750 for (int I = 0; I != NumUnmerge; ++I) { 3751 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES); 3752 3753 for (int J = 0; J != PartsPerUnmerge; ++J) 3754 MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg()); 3755 MIB.addUse(Unmerge.getReg(I)); 3756 } 3757 3758 MI.eraseFromParent(); 3759 return Legalized; 3760 } 3761 3762 LegalizerHelper::LegalizeResult 3763 LegalizerHelper::fewerElementsVectorMulo(MachineInstr &MI, unsigned TypeIdx, 3764 LLT NarrowTy) { 3765 Register Result = MI.getOperand(0).getReg(); 3766 Register Overflow = MI.getOperand(1).getReg(); 3767 Register LHS = MI.getOperand(2).getReg(); 3768 Register RHS = MI.getOperand(3).getReg(); 3769 3770 LLT SrcTy = MRI.getType(LHS); 3771 if (!SrcTy.isVector()) 3772 return UnableToLegalize; 3773 3774 LLT ElementType = SrcTy.getElementType(); 3775 LLT OverflowElementTy = MRI.getType(Overflow).getElementType(); 3776 const ElementCount NumResult = SrcTy.getElementCount(); 3777 LLT GCDTy = getGCDType(SrcTy, NarrowTy); 3778 3779 // Unmerge the operands to smaller parts of GCD type. 3780 auto UnmergeLHS = MIRBuilder.buildUnmerge(GCDTy, LHS); 3781 auto UnmergeRHS = MIRBuilder.buildUnmerge(GCDTy, RHS); 3782 3783 const int NumOps = UnmergeLHS->getNumOperands() - 1; 3784 const ElementCount PartsPerUnmerge = NumResult.divideCoefficientBy(NumOps); 3785 LLT OverflowTy = LLT::scalarOrVector(PartsPerUnmerge, OverflowElementTy); 3786 LLT ResultTy = LLT::scalarOrVector(PartsPerUnmerge, ElementType); 3787 3788 // Perform the operation over unmerged parts. 3789 SmallVector<Register, 8> ResultParts; 3790 SmallVector<Register, 8> OverflowParts; 3791 for (int I = 0; I != NumOps; ++I) { 3792 Register Operand1 = UnmergeLHS->getOperand(I).getReg(); 3793 Register Operand2 = UnmergeRHS->getOperand(I).getReg(); 3794 auto PartMul = MIRBuilder.buildInstr(MI.getOpcode(), {ResultTy, OverflowTy}, 3795 {Operand1, Operand2}); 3796 ResultParts.push_back(PartMul->getOperand(0).getReg()); 3797 OverflowParts.push_back(PartMul->getOperand(1).getReg()); 3798 } 3799 3800 LLT ResultLCMTy = buildLCMMergePieces(SrcTy, NarrowTy, GCDTy, ResultParts); 3801 LLT OverflowLCMTy = 3802 LLT::scalarOrVector(ResultLCMTy.getElementCount(), OverflowElementTy); 3803 3804 // Recombine the pieces to the original result and overflow registers. 3805 buildWidenedRemergeToDst(Result, ResultLCMTy, ResultParts); 3806 buildWidenedRemergeToDst(Overflow, OverflowLCMTy, OverflowParts); 3807 MI.eraseFromParent(); 3808 return Legalized; 3809 } 3810 3811 // Handle FewerElementsVector a G_BUILD_VECTOR or G_CONCAT_VECTORS that produces 3812 // a vector 3813 // 3814 // Create a G_BUILD_VECTOR or G_CONCAT_VECTORS of NarrowTy pieces, padding with 3815 // undef as necessary. 3816 // 3817 // %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2 3818 // -> <2 x s16> 3819 // 3820 // %4:_(s16) = G_IMPLICIT_DEF 3821 // %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1 3822 // %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4 3823 // %7:_(<2 x s16>) = G_IMPLICIT_DEF 3824 // %8:_(<6 x s16>) = G_CONCAT_VECTORS %5, %6, %7 3825 // %3:_(<3 x s16>), %8:_(<3 x s16>) = G_UNMERGE_VALUES %8 3826 LegalizerHelper::LegalizeResult 3827 LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, 3828 LLT NarrowTy) { 3829 Register DstReg = MI.getOperand(0).getReg(); 3830 LLT DstTy = MRI.getType(DstReg); 3831 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 3832 LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy); 3833 3834 // Break into a common type 3835 SmallVector<Register, 16> Parts; 3836 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) 3837 extractGCDType(Parts, GCDTy, MI.getOperand(I).getReg()); 3838 3839 // Build the requested new merge, padding with undef. 3840 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, 3841 TargetOpcode::G_ANYEXT); 3842 3843 // Pack into the original result register. 3844 buildWidenedRemergeToDst(DstReg, LCMTy, Parts); 3845 3846 MI.eraseFromParent(); 3847 return Legalized; 3848 } 3849 3850 LegalizerHelper::LegalizeResult 3851 LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, 3852 unsigned TypeIdx, 3853 LLT NarrowVecTy) { 3854 Register DstReg = MI.getOperand(0).getReg(); 3855 Register SrcVec = MI.getOperand(1).getReg(); 3856 Register InsertVal; 3857 bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT; 3858 3859 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index"); 3860 if (IsInsert) 3861 InsertVal = MI.getOperand(2).getReg(); 3862 3863 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg(); 3864 3865 // TODO: Handle total scalarization case. 3866 if (!NarrowVecTy.isVector()) 3867 return UnableToLegalize; 3868 3869 LLT VecTy = MRI.getType(SrcVec); 3870 3871 // If the index is a constant, we can really break this down as you would 3872 // expect, and index into the target size pieces. 3873 int64_t IdxVal; 3874 auto MaybeCst = 3875 getConstantVRegValWithLookThrough(Idx, MRI, /*LookThroughInstrs*/ true, 3876 /*HandleFConstants*/ false); 3877 if (MaybeCst) { 3878 IdxVal = MaybeCst->Value.getSExtValue(); 3879 // Avoid out of bounds indexing the pieces. 3880 if (IdxVal >= VecTy.getNumElements()) { 3881 MIRBuilder.buildUndef(DstReg); 3882 MI.eraseFromParent(); 3883 return Legalized; 3884 } 3885 3886 SmallVector<Register, 8> VecParts; 3887 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec); 3888 3889 // Build a sequence of NarrowTy pieces in VecParts for this operand. 3890 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts, 3891 TargetOpcode::G_ANYEXT); 3892 3893 unsigned NewNumElts = NarrowVecTy.getNumElements(); 3894 3895 LLT IdxTy = MRI.getType(Idx); 3896 int64_t PartIdx = IdxVal / NewNumElts; 3897 auto NewIdx = 3898 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx); 3899 3900 if (IsInsert) { 3901 LLT PartTy = MRI.getType(VecParts[PartIdx]); 3902 3903 // Use the adjusted index to insert into one of the subvectors. 3904 auto InsertPart = MIRBuilder.buildInsertVectorElement( 3905 PartTy, VecParts[PartIdx], InsertVal, NewIdx); 3906 VecParts[PartIdx] = InsertPart.getReg(0); 3907 3908 // Recombine the inserted subvector with the others to reform the result 3909 // vector. 3910 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts); 3911 } else { 3912 MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx); 3913 } 3914 3915 MI.eraseFromParent(); 3916 return Legalized; 3917 } 3918 3919 // With a variable index, we can't perform the operation in a smaller type, so 3920 // we're forced to expand this. 3921 // 3922 // TODO: We could emit a chain of compare/select to figure out which piece to 3923 // index. 3924 return lowerExtractInsertVectorElt(MI); 3925 } 3926 3927 LegalizerHelper::LegalizeResult 3928 LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, 3929 LLT NarrowTy) { 3930 // FIXME: Don't know how to handle secondary types yet. 3931 if (TypeIdx != 0) 3932 return UnableToLegalize; 3933 3934 MachineMemOperand *MMO = *MI.memoperands_begin(); 3935 3936 // This implementation doesn't work for atomics. Give up instead of doing 3937 // something invalid. 3938 if (MMO->isAtomic()) 3939 return UnableToLegalize; 3940 3941 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD; 3942 Register ValReg = MI.getOperand(0).getReg(); 3943 Register AddrReg = MI.getOperand(1).getReg(); 3944 LLT ValTy = MRI.getType(ValReg); 3945 3946 // FIXME: Do we need a distinct NarrowMemory legalize action? 3947 if (ValTy.getSizeInBits() != 8 * MMO->getSize()) { 3948 LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n"); 3949 return UnableToLegalize; 3950 } 3951 3952 int NumParts = -1; 3953 int NumLeftover = -1; 3954 LLT LeftoverTy; 3955 SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs; 3956 if (IsLoad) { 3957 std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy); 3958 } else { 3959 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs, 3960 NarrowLeftoverRegs)) { 3961 NumParts = NarrowRegs.size(); 3962 NumLeftover = NarrowLeftoverRegs.size(); 3963 } 3964 } 3965 3966 if (NumParts == -1) 3967 return UnableToLegalize; 3968 3969 LLT PtrTy = MRI.getType(AddrReg); 3970 const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits()); 3971 3972 unsigned TotalSize = ValTy.getSizeInBits(); 3973 3974 // Split the load/store into PartTy sized pieces starting at Offset. If this 3975 // is a load, return the new registers in ValRegs. For a store, each elements 3976 // of ValRegs should be PartTy. Returns the next offset that needs to be 3977 // handled. 3978 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs, 3979 unsigned Offset) -> unsigned { 3980 MachineFunction &MF = MIRBuilder.getMF(); 3981 unsigned PartSize = PartTy.getSizeInBits(); 3982 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize; 3983 Offset += PartSize, ++Idx) { 3984 unsigned ByteOffset = Offset / 8; 3985 Register NewAddrReg; 3986 3987 MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset); 3988 3989 MachineMemOperand *NewMMO = 3990 MF.getMachineMemOperand(MMO, ByteOffset, PartTy); 3991 3992 if (IsLoad) { 3993 Register Dst = MRI.createGenericVirtualRegister(PartTy); 3994 ValRegs.push_back(Dst); 3995 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO); 3996 } else { 3997 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO); 3998 } 3999 } 4000 4001 return Offset; 4002 }; 4003 4004 unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0); 4005 4006 // Handle the rest of the register if this isn't an even type breakdown. 4007 if (LeftoverTy.isValid()) 4008 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset); 4009 4010 if (IsLoad) { 4011 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs, 4012 LeftoverTy, NarrowLeftoverRegs); 4013 } 4014 4015 MI.eraseFromParent(); 4016 return Legalized; 4017 } 4018 4019 LegalizerHelper::LegalizeResult 4020 LegalizerHelper::reduceOperationWidth(MachineInstr &MI, unsigned int TypeIdx, 4021 LLT NarrowTy) { 4022 assert(TypeIdx == 0 && "only one type index expected"); 4023 4024 const unsigned Opc = MI.getOpcode(); 4025 const int NumDefOps = MI.getNumExplicitDefs(); 4026 const int NumSrcOps = MI.getNumOperands() - NumDefOps; 4027 const unsigned Flags = MI.getFlags(); 4028 const unsigned NarrowSize = NarrowTy.getSizeInBits(); 4029 const LLT NarrowScalarTy = LLT::scalar(NarrowSize); 4030 4031 assert(MI.getNumOperands() <= 4 && "expected instruction with either 1 " 4032 "result and 1-3 sources or 2 results and " 4033 "1-2 sources"); 4034 4035 SmallVector<Register, 2> DstRegs; 4036 for (int I = 0; I < NumDefOps; ++I) 4037 DstRegs.push_back(MI.getOperand(I).getReg()); 4038 4039 // First of all check whether we are narrowing (changing the element type) 4040 // or reducing the vector elements 4041 const LLT DstTy = MRI.getType(DstRegs[0]); 4042 const bool IsNarrow = NarrowTy.getScalarType() != DstTy.getScalarType(); 4043 4044 SmallVector<Register, 8> ExtractedRegs[3]; 4045 SmallVector<Register, 8> Parts; 4046 4047 // Break down all the sources into NarrowTy pieces we can operate on. This may 4048 // involve creating merges to a wider type, padded with undef. 4049 for (int I = 0; I != NumSrcOps; ++I) { 4050 Register SrcReg = MI.getOperand(I + NumDefOps).getReg(); 4051 LLT SrcTy = MRI.getType(SrcReg); 4052 4053 // The type to narrow SrcReg to. For narrowing, this is a smaller scalar. 4054 // For fewerElements, this is a smaller vector with the same element type. 4055 LLT OpNarrowTy; 4056 if (IsNarrow) { 4057 OpNarrowTy = NarrowScalarTy; 4058 4059 // In case of narrowing, we need to cast vectors to scalars for this to 4060 // work properly 4061 // FIXME: Can we do without the bitcast here if we're narrowing? 4062 if (SrcTy.isVector()) { 4063 SrcTy = LLT::scalar(SrcTy.getSizeInBits()); 4064 SrcReg = MIRBuilder.buildBitcast(SrcTy, SrcReg).getReg(0); 4065 } 4066 } else { 4067 auto NarrowEC = NarrowTy.isVector() ? NarrowTy.getElementCount() 4068 : ElementCount::getFixed(1); 4069 OpNarrowTy = LLT::scalarOrVector(NarrowEC, SrcTy.getScalarType()); 4070 } 4071 4072 LLT GCDTy = extractGCDType(ExtractedRegs[I], SrcTy, OpNarrowTy, SrcReg); 4073 4074 // Build a sequence of NarrowTy pieces in ExtractedRegs for this operand. 4075 buildLCMMergePieces(SrcTy, OpNarrowTy, GCDTy, ExtractedRegs[I], 4076 TargetOpcode::G_ANYEXT); 4077 } 4078 4079 SmallVector<Register, 8> ResultRegs[2]; 4080 4081 // Input operands for each sub-instruction. 4082 SmallVector<SrcOp, 4> InputRegs(NumSrcOps, Register()); 4083 4084 int NumParts = ExtractedRegs[0].size(); 4085 const unsigned DstSize = DstTy.getSizeInBits(); 4086 const LLT DstScalarTy = LLT::scalar(DstSize); 4087 4088 // Narrowing needs to use scalar types 4089 LLT DstLCMTy, NarrowDstTy; 4090 if (IsNarrow) { 4091 DstLCMTy = getLCMType(DstScalarTy, NarrowScalarTy); 4092 NarrowDstTy = NarrowScalarTy; 4093 } else { 4094 DstLCMTy = getLCMType(DstTy, NarrowTy); 4095 NarrowDstTy = NarrowTy; 4096 } 4097 4098 // We widened the source registers to satisfy merge/unmerge size 4099 // constraints. We'll have some extra fully undef parts. 4100 const int NumRealParts = (DstSize + NarrowSize - 1) / NarrowSize; 4101 4102 for (int I = 0; I != NumRealParts; ++I) { 4103 // Emit this instruction on each of the split pieces. 4104 for (int J = 0; J != NumSrcOps; ++J) 4105 InputRegs[J] = ExtractedRegs[J][I]; 4106 4107 MachineInstrBuilder Inst; 4108 if (NumDefOps == 1) 4109 Inst = MIRBuilder.buildInstr(Opc, {NarrowDstTy}, InputRegs, Flags); 4110 else 4111 Inst = MIRBuilder.buildInstr(Opc, {NarrowDstTy, NarrowDstTy}, InputRegs, 4112 Flags); 4113 4114 for (int J = 0; J != NumDefOps; ++J) 4115 ResultRegs[J].push_back(Inst.getReg(J)); 4116 } 4117 4118 // Fill out the widened result with undef instead of creating instructions 4119 // with undef inputs. 4120 int NumUndefParts = NumParts - NumRealParts; 4121 if (NumUndefParts != 0) { 4122 Register Undef = MIRBuilder.buildUndef(NarrowDstTy).getReg(0); 4123 for (int I = 0; I != NumDefOps; ++I) 4124 ResultRegs[I].append(NumUndefParts, Undef); 4125 } 4126 4127 // Extract the possibly padded result. Use a scratch register if we need to do 4128 // a final bitcast, otherwise use the original result register. 4129 Register MergeDstReg; 4130 for (int I = 0; I != NumDefOps; ++I) { 4131 if (IsNarrow && DstTy.isVector()) 4132 MergeDstReg = MRI.createGenericVirtualRegister(DstScalarTy); 4133 else 4134 MergeDstReg = DstRegs[I]; 4135 4136 buildWidenedRemergeToDst(MergeDstReg, DstLCMTy, ResultRegs[I]); 4137 4138 // Recast to vector if we narrowed a vector 4139 if (IsNarrow && DstTy.isVector()) 4140 MIRBuilder.buildBitcast(DstRegs[I], MergeDstReg); 4141 } 4142 4143 MI.eraseFromParent(); 4144 return Legalized; 4145 } 4146 4147 LegalizerHelper::LegalizeResult 4148 LegalizerHelper::fewerElementsVectorSextInReg(MachineInstr &MI, unsigned TypeIdx, 4149 LLT NarrowTy) { 4150 Register DstReg = MI.getOperand(0).getReg(); 4151 Register SrcReg = MI.getOperand(1).getReg(); 4152 int64_t Imm = MI.getOperand(2).getImm(); 4153 4154 LLT DstTy = MRI.getType(DstReg); 4155 4156 SmallVector<Register, 8> Parts; 4157 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg); 4158 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts); 4159 4160 for (Register &R : Parts) 4161 R = MIRBuilder.buildSExtInReg(NarrowTy, R, Imm).getReg(0); 4162 4163 buildWidenedRemergeToDst(DstReg, LCMTy, Parts); 4164 4165 MI.eraseFromParent(); 4166 return Legalized; 4167 } 4168 4169 LegalizerHelper::LegalizeResult 4170 LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, 4171 LLT NarrowTy) { 4172 using namespace TargetOpcode; 4173 4174 switch (MI.getOpcode()) { 4175 case G_IMPLICIT_DEF: 4176 return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy); 4177 case G_TRUNC: 4178 case G_AND: 4179 case G_OR: 4180 case G_XOR: 4181 case G_ADD: 4182 case G_SUB: 4183 case G_MUL: 4184 case G_PTR_ADD: 4185 case G_SMULH: 4186 case G_UMULH: 4187 case G_FADD: 4188 case G_FMUL: 4189 case G_FSUB: 4190 case G_FNEG: 4191 case G_FABS: 4192 case G_FCANONICALIZE: 4193 case G_FDIV: 4194 case G_FREM: 4195 case G_FMA: 4196 case G_FMAD: 4197 case G_FPOW: 4198 case G_FEXP: 4199 case G_FEXP2: 4200 case G_FLOG: 4201 case G_FLOG2: 4202 case G_FLOG10: 4203 case G_FNEARBYINT: 4204 case G_FCEIL: 4205 case G_FFLOOR: 4206 case G_FRINT: 4207 case G_INTRINSIC_ROUND: 4208 case G_INTRINSIC_ROUNDEVEN: 4209 case G_INTRINSIC_TRUNC: 4210 case G_FCOS: 4211 case G_FSIN: 4212 case G_FSQRT: 4213 case G_BSWAP: 4214 case G_BITREVERSE: 4215 case G_SDIV: 4216 case G_UDIV: 4217 case G_SREM: 4218 case G_UREM: 4219 case G_SDIVREM: 4220 case G_UDIVREM: 4221 case G_SMIN: 4222 case G_SMAX: 4223 case G_UMIN: 4224 case G_UMAX: 4225 case G_ABS: 4226 case G_FMINNUM: 4227 case G_FMAXNUM: 4228 case G_FMINNUM_IEEE: 4229 case G_FMAXNUM_IEEE: 4230 case G_FMINIMUM: 4231 case G_FMAXIMUM: 4232 case G_FSHL: 4233 case G_FSHR: 4234 case G_FREEZE: 4235 case G_SADDSAT: 4236 case G_SSUBSAT: 4237 case G_UADDSAT: 4238 case G_USUBSAT: 4239 return reduceOperationWidth(MI, TypeIdx, NarrowTy); 4240 case G_UMULO: 4241 case G_SMULO: 4242 return fewerElementsVectorMulo(MI, TypeIdx, NarrowTy); 4243 case G_SHL: 4244 case G_LSHR: 4245 case G_ASHR: 4246 case G_SSHLSAT: 4247 case G_USHLSAT: 4248 case G_CTLZ: 4249 case G_CTLZ_ZERO_UNDEF: 4250 case G_CTTZ: 4251 case G_CTTZ_ZERO_UNDEF: 4252 case G_CTPOP: 4253 case G_FCOPYSIGN: 4254 return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy); 4255 case G_ZEXT: 4256 case G_SEXT: 4257 case G_ANYEXT: 4258 case G_FPEXT: 4259 case G_FPTRUNC: 4260 case G_SITOFP: 4261 case G_UITOFP: 4262 case G_FPTOSI: 4263 case G_FPTOUI: 4264 case G_INTTOPTR: 4265 case G_PTRTOINT: 4266 case G_ADDRSPACE_CAST: 4267 return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy); 4268 case G_ICMP: 4269 case G_FCMP: 4270 return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy); 4271 case G_SELECT: 4272 return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy); 4273 case G_PHI: 4274 return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy); 4275 case G_UNMERGE_VALUES: 4276 return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy); 4277 case G_BUILD_VECTOR: 4278 assert(TypeIdx == 0 && "not a vector type index"); 4279 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy); 4280 case G_CONCAT_VECTORS: 4281 if (TypeIdx != 1) // TODO: This probably does work as expected already. 4282 return UnableToLegalize; 4283 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy); 4284 case G_EXTRACT_VECTOR_ELT: 4285 case G_INSERT_VECTOR_ELT: 4286 return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy); 4287 case G_LOAD: 4288 case G_STORE: 4289 return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy); 4290 case G_SEXT_INREG: 4291 return fewerElementsVectorSextInReg(MI, TypeIdx, NarrowTy); 4292 GISEL_VECREDUCE_CASES_NONSEQ 4293 return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy); 4294 case G_SHUFFLE_VECTOR: 4295 return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy); 4296 default: 4297 return UnableToLegalize; 4298 } 4299 } 4300 4301 LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle( 4302 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) { 4303 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 4304 if (TypeIdx != 0) 4305 return UnableToLegalize; 4306 4307 Register DstReg = MI.getOperand(0).getReg(); 4308 Register Src1Reg = MI.getOperand(1).getReg(); 4309 Register Src2Reg = MI.getOperand(2).getReg(); 4310 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask(); 4311 LLT DstTy = MRI.getType(DstReg); 4312 LLT Src1Ty = MRI.getType(Src1Reg); 4313 LLT Src2Ty = MRI.getType(Src2Reg); 4314 // The shuffle should be canonicalized by now. 4315 if (DstTy != Src1Ty) 4316 return UnableToLegalize; 4317 if (DstTy != Src2Ty) 4318 return UnableToLegalize; 4319 4320 if (!isPowerOf2_32(DstTy.getNumElements())) 4321 return UnableToLegalize; 4322 4323 // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly. 4324 // Further legalization attempts will be needed to do split further. 4325 NarrowTy = 4326 DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2)); 4327 unsigned NewElts = NarrowTy.getNumElements(); 4328 4329 SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs; 4330 extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs); 4331 extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs); 4332 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0], 4333 SplitSrc2Regs[1]}; 4334 4335 Register Hi, Lo; 4336 4337 // If Lo or Hi uses elements from at most two of the four input vectors, then 4338 // express it as a vector shuffle of those two inputs. Otherwise extract the 4339 // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR. 4340 SmallVector<int, 16> Ops; 4341 for (unsigned High = 0; High < 2; ++High) { 4342 Register &Output = High ? Hi : Lo; 4343 4344 // Build a shuffle mask for the output, discovering on the fly which 4345 // input vectors to use as shuffle operands (recorded in InputUsed). 4346 // If building a suitable shuffle vector proves too hard, then bail 4347 // out with useBuildVector set. 4348 unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered. 4349 unsigned FirstMaskIdx = High * NewElts; 4350 bool UseBuildVector = false; 4351 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) { 4352 // The mask element. This indexes into the input. 4353 int Idx = Mask[FirstMaskIdx + MaskOffset]; 4354 4355 // The input vector this mask element indexes into. 4356 unsigned Input = (unsigned)Idx / NewElts; 4357 4358 if (Input >= array_lengthof(Inputs)) { 4359 // The mask element does not index into any input vector. 4360 Ops.push_back(-1); 4361 continue; 4362 } 4363 4364 // Turn the index into an offset from the start of the input vector. 4365 Idx -= Input * NewElts; 4366 4367 // Find or create a shuffle vector operand to hold this input. 4368 unsigned OpNo; 4369 for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) { 4370 if (InputUsed[OpNo] == Input) { 4371 // This input vector is already an operand. 4372 break; 4373 } else if (InputUsed[OpNo] == -1U) { 4374 // Create a new operand for this input vector. 4375 InputUsed[OpNo] = Input; 4376 break; 4377 } 4378 } 4379 4380 if (OpNo >= array_lengthof(InputUsed)) { 4381 // More than two input vectors used! Give up on trying to create a 4382 // shuffle vector. Insert all elements into a BUILD_VECTOR instead. 4383 UseBuildVector = true; 4384 break; 4385 } 4386 4387 // Add the mask index for the new shuffle vector. 4388 Ops.push_back(Idx + OpNo * NewElts); 4389 } 4390 4391 if (UseBuildVector) { 4392 LLT EltTy = NarrowTy.getElementType(); 4393 SmallVector<Register, 16> SVOps; 4394 4395 // Extract the input elements by hand. 4396 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) { 4397 // The mask element. This indexes into the input. 4398 int Idx = Mask[FirstMaskIdx + MaskOffset]; 4399 4400 // The input vector this mask element indexes into. 4401 unsigned Input = (unsigned)Idx / NewElts; 4402 4403 if (Input >= array_lengthof(Inputs)) { 4404 // The mask element is "undef" or indexes off the end of the input. 4405 SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0)); 4406 continue; 4407 } 4408 4409 // Turn the index into an offset from the start of the input vector. 4410 Idx -= Input * NewElts; 4411 4412 // Extract the vector element by hand. 4413 SVOps.push_back(MIRBuilder 4414 .buildExtractVectorElement( 4415 EltTy, Inputs[Input], 4416 MIRBuilder.buildConstant(LLT::scalar(32), Idx)) 4417 .getReg(0)); 4418 } 4419 4420 // Construct the Lo/Hi output using a G_BUILD_VECTOR. 4421 Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0); 4422 } else if (InputUsed[0] == -1U) { 4423 // No input vectors were used! The result is undefined. 4424 Output = MIRBuilder.buildUndef(NarrowTy).getReg(0); 4425 } else { 4426 Register Op0 = Inputs[InputUsed[0]]; 4427 // If only one input was used, use an undefined vector for the other. 4428 Register Op1 = InputUsed[1] == -1U 4429 ? MIRBuilder.buildUndef(NarrowTy).getReg(0) 4430 : Inputs[InputUsed[1]]; 4431 // At least one input vector was used. Create a new shuffle vector. 4432 Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0); 4433 } 4434 4435 Ops.clear(); 4436 } 4437 4438 MIRBuilder.buildConcatVectors(DstReg, {Lo, Hi}); 4439 MI.eraseFromParent(); 4440 return Legalized; 4441 } 4442 4443 LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions( 4444 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) { 4445 unsigned Opc = MI.getOpcode(); 4446 assert(Opc != TargetOpcode::G_VECREDUCE_SEQ_FADD && 4447 Opc != TargetOpcode::G_VECREDUCE_SEQ_FMUL && 4448 "Sequential reductions not expected"); 4449 4450 if (TypeIdx != 1) 4451 return UnableToLegalize; 4452 4453 // The semantics of the normal non-sequential reductions allow us to freely 4454 // re-associate the operation. 4455 Register SrcReg = MI.getOperand(1).getReg(); 4456 LLT SrcTy = MRI.getType(SrcReg); 4457 Register DstReg = MI.getOperand(0).getReg(); 4458 LLT DstTy = MRI.getType(DstReg); 4459 4460 if (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0) 4461 return UnableToLegalize; 4462 4463 SmallVector<Register> SplitSrcs; 4464 const unsigned NumParts = SrcTy.getNumElements() / NarrowTy.getNumElements(); 4465 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs); 4466 SmallVector<Register> PartialReductions; 4467 for (unsigned Part = 0; Part < NumParts; ++Part) { 4468 PartialReductions.push_back( 4469 MIRBuilder.buildInstr(Opc, {DstTy}, {SplitSrcs[Part]}).getReg(0)); 4470 } 4471 4472 unsigned ScalarOpc; 4473 switch (Opc) { 4474 case TargetOpcode::G_VECREDUCE_FADD: 4475 ScalarOpc = TargetOpcode::G_FADD; 4476 break; 4477 case TargetOpcode::G_VECREDUCE_FMUL: 4478 ScalarOpc = TargetOpcode::G_FMUL; 4479 break; 4480 case TargetOpcode::G_VECREDUCE_FMAX: 4481 ScalarOpc = TargetOpcode::G_FMAXNUM; 4482 break; 4483 case TargetOpcode::G_VECREDUCE_FMIN: 4484 ScalarOpc = TargetOpcode::G_FMINNUM; 4485 break; 4486 case TargetOpcode::G_VECREDUCE_ADD: 4487 ScalarOpc = TargetOpcode::G_ADD; 4488 break; 4489 case TargetOpcode::G_VECREDUCE_MUL: 4490 ScalarOpc = TargetOpcode::G_MUL; 4491 break; 4492 case TargetOpcode::G_VECREDUCE_AND: 4493 ScalarOpc = TargetOpcode::G_AND; 4494 break; 4495 case TargetOpcode::G_VECREDUCE_OR: 4496 ScalarOpc = TargetOpcode::G_OR; 4497 break; 4498 case TargetOpcode::G_VECREDUCE_XOR: 4499 ScalarOpc = TargetOpcode::G_XOR; 4500 break; 4501 case TargetOpcode::G_VECREDUCE_SMAX: 4502 ScalarOpc = TargetOpcode::G_SMAX; 4503 break; 4504 case TargetOpcode::G_VECREDUCE_SMIN: 4505 ScalarOpc = TargetOpcode::G_SMIN; 4506 break; 4507 case TargetOpcode::G_VECREDUCE_UMAX: 4508 ScalarOpc = TargetOpcode::G_UMAX; 4509 break; 4510 case TargetOpcode::G_VECREDUCE_UMIN: 4511 ScalarOpc = TargetOpcode::G_UMIN; 4512 break; 4513 default: 4514 LLVM_DEBUG(dbgs() << "Can't legalize: unknown reduction kind.\n"); 4515 return UnableToLegalize; 4516 } 4517 4518 // If the types involved are powers of 2, we can generate intermediate vector 4519 // ops, before generating a final reduction operation. 4520 if (isPowerOf2_32(SrcTy.getNumElements()) && 4521 isPowerOf2_32(NarrowTy.getNumElements())) { 4522 return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc); 4523 } 4524 4525 Register Acc = PartialReductions[0]; 4526 for (unsigned Part = 1; Part < NumParts; ++Part) { 4527 if (Part == NumParts - 1) { 4528 MIRBuilder.buildInstr(ScalarOpc, {DstReg}, 4529 {Acc, PartialReductions[Part]}); 4530 } else { 4531 Acc = MIRBuilder 4532 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]}) 4533 .getReg(0); 4534 } 4535 } 4536 MI.eraseFromParent(); 4537 return Legalized; 4538 } 4539 4540 LegalizerHelper::LegalizeResult 4541 LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg, 4542 LLT SrcTy, LLT NarrowTy, 4543 unsigned ScalarOpc) { 4544 SmallVector<Register> SplitSrcs; 4545 // Split the sources into NarrowTy size pieces. 4546 extractParts(SrcReg, NarrowTy, 4547 SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs); 4548 // We're going to do a tree reduction using vector operations until we have 4549 // one NarrowTy size value left. 4550 while (SplitSrcs.size() > 1) { 4551 SmallVector<Register> PartialRdxs; 4552 for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) { 4553 Register LHS = SplitSrcs[Idx]; 4554 Register RHS = SplitSrcs[Idx + 1]; 4555 // Create the intermediate vector op. 4556 Register Res = 4557 MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0); 4558 PartialRdxs.push_back(Res); 4559 } 4560 SplitSrcs = std::move(PartialRdxs); 4561 } 4562 // Finally generate the requested NarrowTy based reduction. 4563 Observer.changingInstr(MI); 4564 MI.getOperand(1).setReg(SplitSrcs[0]); 4565 Observer.changedInstr(MI); 4566 return Legalized; 4567 } 4568 4569 LegalizerHelper::LegalizeResult 4570 LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, 4571 const LLT HalfTy, const LLT AmtTy) { 4572 4573 Register InL = MRI.createGenericVirtualRegister(HalfTy); 4574 Register InH = MRI.createGenericVirtualRegister(HalfTy); 4575 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1)); 4576 4577 if (Amt.isNullValue()) { 4578 MIRBuilder.buildMerge(MI.getOperand(0), {InL, InH}); 4579 MI.eraseFromParent(); 4580 return Legalized; 4581 } 4582 4583 LLT NVT = HalfTy; 4584 unsigned NVTBits = HalfTy.getSizeInBits(); 4585 unsigned VTBits = 2 * NVTBits; 4586 4587 SrcOp Lo(Register(0)), Hi(Register(0)); 4588 if (MI.getOpcode() == TargetOpcode::G_SHL) { 4589 if (Amt.ugt(VTBits)) { 4590 Lo = Hi = MIRBuilder.buildConstant(NVT, 0); 4591 } else if (Amt.ugt(NVTBits)) { 4592 Lo = MIRBuilder.buildConstant(NVT, 0); 4593 Hi = MIRBuilder.buildShl(NVT, InL, 4594 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits)); 4595 } else if (Amt == NVTBits) { 4596 Lo = MIRBuilder.buildConstant(NVT, 0); 4597 Hi = InL; 4598 } else { 4599 Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt)); 4600 auto OrLHS = 4601 MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt)); 4602 auto OrRHS = MIRBuilder.buildLShr( 4603 NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits)); 4604 Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS); 4605 } 4606 } else if (MI.getOpcode() == TargetOpcode::G_LSHR) { 4607 if (Amt.ugt(VTBits)) { 4608 Lo = Hi = MIRBuilder.buildConstant(NVT, 0); 4609 } else if (Amt.ugt(NVTBits)) { 4610 Lo = MIRBuilder.buildLShr(NVT, InH, 4611 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits)); 4612 Hi = MIRBuilder.buildConstant(NVT, 0); 4613 } else if (Amt == NVTBits) { 4614 Lo = InH; 4615 Hi = MIRBuilder.buildConstant(NVT, 0); 4616 } else { 4617 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt); 4618 4619 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst); 4620 auto OrRHS = MIRBuilder.buildShl( 4621 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits)); 4622 4623 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS); 4624 Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst); 4625 } 4626 } else { 4627 if (Amt.ugt(VTBits)) { 4628 Hi = Lo = MIRBuilder.buildAShr( 4629 NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1)); 4630 } else if (Amt.ugt(NVTBits)) { 4631 Lo = MIRBuilder.buildAShr(NVT, InH, 4632 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits)); 4633 Hi = MIRBuilder.buildAShr(NVT, InH, 4634 MIRBuilder.buildConstant(AmtTy, NVTBits - 1)); 4635 } else if (Amt == NVTBits) { 4636 Lo = InH; 4637 Hi = MIRBuilder.buildAShr(NVT, InH, 4638 MIRBuilder.buildConstant(AmtTy, NVTBits - 1)); 4639 } else { 4640 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt); 4641 4642 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst); 4643 auto OrRHS = MIRBuilder.buildShl( 4644 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits)); 4645 4646 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS); 4647 Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst); 4648 } 4649 } 4650 4651 MIRBuilder.buildMerge(MI.getOperand(0), {Lo, Hi}); 4652 MI.eraseFromParent(); 4653 4654 return Legalized; 4655 } 4656 4657 // TODO: Optimize if constant shift amount. 4658 LegalizerHelper::LegalizeResult 4659 LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, 4660 LLT RequestedTy) { 4661 if (TypeIdx == 1) { 4662 Observer.changingInstr(MI); 4663 narrowScalarSrc(MI, RequestedTy, 2); 4664 Observer.changedInstr(MI); 4665 return Legalized; 4666 } 4667 4668 Register DstReg = MI.getOperand(0).getReg(); 4669 LLT DstTy = MRI.getType(DstReg); 4670 if (DstTy.isVector()) 4671 return UnableToLegalize; 4672 4673 Register Amt = MI.getOperand(2).getReg(); 4674 LLT ShiftAmtTy = MRI.getType(Amt); 4675 const unsigned DstEltSize = DstTy.getScalarSizeInBits(); 4676 if (DstEltSize % 2 != 0) 4677 return UnableToLegalize; 4678 4679 // Ignore the input type. We can only go to exactly half the size of the 4680 // input. If that isn't small enough, the resulting pieces will be further 4681 // legalized. 4682 const unsigned NewBitSize = DstEltSize / 2; 4683 const LLT HalfTy = LLT::scalar(NewBitSize); 4684 const LLT CondTy = LLT::scalar(1); 4685 4686 if (const MachineInstr *KShiftAmt = 4687 getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) { 4688 return narrowScalarShiftByConstant( 4689 MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy); 4690 } 4691 4692 // TODO: Expand with known bits. 4693 4694 // Handle the fully general expansion by an unknown amount. 4695 auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize); 4696 4697 Register InL = MRI.createGenericVirtualRegister(HalfTy); 4698 Register InH = MRI.createGenericVirtualRegister(HalfTy); 4699 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1)); 4700 4701 auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits); 4702 auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt); 4703 4704 auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0); 4705 auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits); 4706 auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero); 4707 4708 Register ResultRegs[2]; 4709 switch (MI.getOpcode()) { 4710 case TargetOpcode::G_SHL: { 4711 // Short: ShAmt < NewBitSize 4712 auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt); 4713 4714 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack); 4715 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt); 4716 auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr); 4717 4718 // Long: ShAmt >= NewBitSize 4719 auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero. 4720 auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part. 4721 4722 auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL); 4723 auto Hi = MIRBuilder.buildSelect( 4724 HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL)); 4725 4726 ResultRegs[0] = Lo.getReg(0); 4727 ResultRegs[1] = Hi.getReg(0); 4728 break; 4729 } 4730 case TargetOpcode::G_LSHR: 4731 case TargetOpcode::G_ASHR: { 4732 // Short: ShAmt < NewBitSize 4733 auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt}); 4734 4735 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt); 4736 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack); 4737 auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr); 4738 4739 // Long: ShAmt >= NewBitSize 4740 MachineInstrBuilder HiL; 4741 if (MI.getOpcode() == TargetOpcode::G_LSHR) { 4742 HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero. 4743 } else { 4744 auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1); 4745 HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part. 4746 } 4747 auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, 4748 {InH, AmtExcess}); // Lo from Hi part. 4749 4750 auto Lo = MIRBuilder.buildSelect( 4751 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL)); 4752 4753 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL); 4754 4755 ResultRegs[0] = Lo.getReg(0); 4756 ResultRegs[1] = Hi.getReg(0); 4757 break; 4758 } 4759 default: 4760 llvm_unreachable("not a shift"); 4761 } 4762 4763 MIRBuilder.buildMerge(DstReg, ResultRegs); 4764 MI.eraseFromParent(); 4765 return Legalized; 4766 } 4767 4768 LegalizerHelper::LegalizeResult 4769 LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, 4770 LLT MoreTy) { 4771 assert(TypeIdx == 0 && "Expecting only Idx 0"); 4772 4773 Observer.changingInstr(MI); 4774 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { 4775 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB(); 4776 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); 4777 moreElementsVectorSrc(MI, MoreTy, I); 4778 } 4779 4780 MachineBasicBlock &MBB = *MI.getParent(); 4781 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI()); 4782 moreElementsVectorDst(MI, MoreTy, 0); 4783 Observer.changedInstr(MI); 4784 return Legalized; 4785 } 4786 4787 LegalizerHelper::LegalizeResult 4788 LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, 4789 LLT MoreTy) { 4790 unsigned Opc = MI.getOpcode(); 4791 switch (Opc) { 4792 case TargetOpcode::G_IMPLICIT_DEF: 4793 case TargetOpcode::G_LOAD: { 4794 if (TypeIdx != 0) 4795 return UnableToLegalize; 4796 Observer.changingInstr(MI); 4797 moreElementsVectorDst(MI, MoreTy, 0); 4798 Observer.changedInstr(MI); 4799 return Legalized; 4800 } 4801 case TargetOpcode::G_STORE: 4802 if (TypeIdx != 0) 4803 return UnableToLegalize; 4804 Observer.changingInstr(MI); 4805 moreElementsVectorSrc(MI, MoreTy, 0); 4806 Observer.changedInstr(MI); 4807 return Legalized; 4808 case TargetOpcode::G_AND: 4809 case TargetOpcode::G_OR: 4810 case TargetOpcode::G_XOR: 4811 case TargetOpcode::G_SMIN: 4812 case TargetOpcode::G_SMAX: 4813 case TargetOpcode::G_UMIN: 4814 case TargetOpcode::G_UMAX: 4815 case TargetOpcode::G_FMINNUM: 4816 case TargetOpcode::G_FMAXNUM: 4817 case TargetOpcode::G_FMINNUM_IEEE: 4818 case TargetOpcode::G_FMAXNUM_IEEE: 4819 case TargetOpcode::G_FMINIMUM: 4820 case TargetOpcode::G_FMAXIMUM: { 4821 Observer.changingInstr(MI); 4822 moreElementsVectorSrc(MI, MoreTy, 1); 4823 moreElementsVectorSrc(MI, MoreTy, 2); 4824 moreElementsVectorDst(MI, MoreTy, 0); 4825 Observer.changedInstr(MI); 4826 return Legalized; 4827 } 4828 case TargetOpcode::G_EXTRACT: 4829 if (TypeIdx != 1) 4830 return UnableToLegalize; 4831 Observer.changingInstr(MI); 4832 moreElementsVectorSrc(MI, MoreTy, 1); 4833 Observer.changedInstr(MI); 4834 return Legalized; 4835 case TargetOpcode::G_INSERT: 4836 case TargetOpcode::G_FREEZE: 4837 if (TypeIdx != 0) 4838 return UnableToLegalize; 4839 Observer.changingInstr(MI); 4840 moreElementsVectorSrc(MI, MoreTy, 1); 4841 moreElementsVectorDst(MI, MoreTy, 0); 4842 Observer.changedInstr(MI); 4843 return Legalized; 4844 case TargetOpcode::G_SELECT: 4845 if (TypeIdx != 0) 4846 return UnableToLegalize; 4847 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) 4848 return UnableToLegalize; 4849 4850 Observer.changingInstr(MI); 4851 moreElementsVectorSrc(MI, MoreTy, 2); 4852 moreElementsVectorSrc(MI, MoreTy, 3); 4853 moreElementsVectorDst(MI, MoreTy, 0); 4854 Observer.changedInstr(MI); 4855 return Legalized; 4856 case TargetOpcode::G_UNMERGE_VALUES: { 4857 if (TypeIdx != 1) 4858 return UnableToLegalize; 4859 4860 LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 4861 int NumDst = MI.getNumOperands() - 1; 4862 moreElementsVectorSrc(MI, MoreTy, NumDst); 4863 4864 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES); 4865 for (int I = 0; I != NumDst; ++I) 4866 MIB.addDef(MI.getOperand(I).getReg()); 4867 4868 int NewNumDst = MoreTy.getSizeInBits() / DstTy.getSizeInBits(); 4869 for (int I = NumDst; I != NewNumDst; ++I) 4870 MIB.addDef(MRI.createGenericVirtualRegister(DstTy)); 4871 4872 MIB.addUse(MI.getOperand(NumDst).getReg()); 4873 MI.eraseFromParent(); 4874 return Legalized; 4875 } 4876 case TargetOpcode::G_PHI: 4877 return moreElementsVectorPhi(MI, TypeIdx, MoreTy); 4878 default: 4879 return UnableToLegalize; 4880 } 4881 } 4882 4883 void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs, 4884 ArrayRef<Register> Src1Regs, 4885 ArrayRef<Register> Src2Regs, 4886 LLT NarrowTy) { 4887 MachineIRBuilder &B = MIRBuilder; 4888 unsigned SrcParts = Src1Regs.size(); 4889 unsigned DstParts = DstRegs.size(); 4890 4891 unsigned DstIdx = 0; // Low bits of the result. 4892 Register FactorSum = 4893 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0); 4894 DstRegs[DstIdx] = FactorSum; 4895 4896 unsigned CarrySumPrevDstIdx; 4897 SmallVector<Register, 4> Factors; 4898 4899 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) { 4900 // Collect low parts of muls for DstIdx. 4901 for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1; 4902 i <= std::min(DstIdx, SrcParts - 1); ++i) { 4903 MachineInstrBuilder Mul = 4904 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]); 4905 Factors.push_back(Mul.getReg(0)); 4906 } 4907 // Collect high parts of muls from previous DstIdx. 4908 for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts; 4909 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) { 4910 MachineInstrBuilder Umulh = 4911 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]); 4912 Factors.push_back(Umulh.getReg(0)); 4913 } 4914 // Add CarrySum from additions calculated for previous DstIdx. 4915 if (DstIdx != 1) { 4916 Factors.push_back(CarrySumPrevDstIdx); 4917 } 4918 4919 Register CarrySum; 4920 // Add all factors and accumulate all carries into CarrySum. 4921 if (DstIdx != DstParts - 1) { 4922 MachineInstrBuilder Uaddo = 4923 B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]); 4924 FactorSum = Uaddo.getReg(0); 4925 CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0); 4926 for (unsigned i = 2; i < Factors.size(); ++i) { 4927 MachineInstrBuilder Uaddo = 4928 B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]); 4929 FactorSum = Uaddo.getReg(0); 4930 MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1)); 4931 CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0); 4932 } 4933 } else { 4934 // Since value for the next index is not calculated, neither is CarrySum. 4935 FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0); 4936 for (unsigned i = 2; i < Factors.size(); ++i) 4937 FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0); 4938 } 4939 4940 CarrySumPrevDstIdx = CarrySum; 4941 DstRegs[DstIdx] = FactorSum; 4942 Factors.clear(); 4943 } 4944 } 4945 4946 LegalizerHelper::LegalizeResult 4947 LegalizerHelper::narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, 4948 LLT NarrowTy) { 4949 if (TypeIdx != 0) 4950 return UnableToLegalize; 4951 4952 Register DstReg = MI.getOperand(0).getReg(); 4953 LLT DstType = MRI.getType(DstReg); 4954 // FIXME: add support for vector types 4955 if (DstType.isVector()) 4956 return UnableToLegalize; 4957 4958 unsigned Opcode = MI.getOpcode(); 4959 unsigned OpO, OpE, OpF; 4960 switch (Opcode) { 4961 case TargetOpcode::G_SADDO: 4962 case TargetOpcode::G_SADDE: 4963 case TargetOpcode::G_UADDO: 4964 case TargetOpcode::G_UADDE: 4965 case TargetOpcode::G_ADD: 4966 OpO = TargetOpcode::G_UADDO; 4967 OpE = TargetOpcode::G_UADDE; 4968 OpF = TargetOpcode::G_UADDE; 4969 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE) 4970 OpF = TargetOpcode::G_SADDE; 4971 break; 4972 case TargetOpcode::G_SSUBO: 4973 case TargetOpcode::G_SSUBE: 4974 case TargetOpcode::G_USUBO: 4975 case TargetOpcode::G_USUBE: 4976 case TargetOpcode::G_SUB: 4977 OpO = TargetOpcode::G_USUBO; 4978 OpE = TargetOpcode::G_USUBE; 4979 OpF = TargetOpcode::G_USUBE; 4980 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE) 4981 OpF = TargetOpcode::G_SSUBE; 4982 break; 4983 default: 4984 llvm_unreachable("Unexpected add/sub opcode!"); 4985 } 4986 4987 // 1 for a plain add/sub, 2 if this is an operation with a carry-out. 4988 unsigned NumDefs = MI.getNumExplicitDefs(); 4989 Register Src1 = MI.getOperand(NumDefs).getReg(); 4990 Register Src2 = MI.getOperand(NumDefs + 1).getReg(); 4991 Register CarryDst, CarryIn; 4992 if (NumDefs == 2) 4993 CarryDst = MI.getOperand(1).getReg(); 4994 if (MI.getNumOperands() == NumDefs + 3) 4995 CarryIn = MI.getOperand(NumDefs + 2).getReg(); 4996 4997 LLT RegTy = MRI.getType(MI.getOperand(0).getReg()); 4998 LLT LeftoverTy, DummyTy; 4999 SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs; 5000 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left); 5001 extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left); 5002 5003 int NarrowParts = Src1Regs.size(); 5004 for (int I = 0, E = Src1Left.size(); I != E; ++I) { 5005 Src1Regs.push_back(Src1Left[I]); 5006 Src2Regs.push_back(Src2Left[I]); 5007 } 5008 DstRegs.reserve(Src1Regs.size()); 5009 5010 for (int i = 0, e = Src1Regs.size(); i != e; ++i) { 5011 Register DstReg = 5012 MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i])); 5013 Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); 5014 // Forward the final carry-out to the destination register 5015 if (i == e - 1 && CarryDst) 5016 CarryOut = CarryDst; 5017 5018 if (!CarryIn) { 5019 MIRBuilder.buildInstr(OpO, {DstReg, CarryOut}, 5020 {Src1Regs[i], Src2Regs[i]}); 5021 } else if (i == e - 1) { 5022 MIRBuilder.buildInstr(OpF, {DstReg, CarryOut}, 5023 {Src1Regs[i], Src2Regs[i], CarryIn}); 5024 } else { 5025 MIRBuilder.buildInstr(OpE, {DstReg, CarryOut}, 5026 {Src1Regs[i], Src2Regs[i], CarryIn}); 5027 } 5028 5029 DstRegs.push_back(DstReg); 5030 CarryIn = CarryOut; 5031 } 5032 insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy, 5033 makeArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy, 5034 makeArrayRef(DstRegs).drop_front(NarrowParts)); 5035 5036 MI.eraseFromParent(); 5037 return Legalized; 5038 } 5039 5040 LegalizerHelper::LegalizeResult 5041 LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) { 5042 Register DstReg = MI.getOperand(0).getReg(); 5043 Register Src1 = MI.getOperand(1).getReg(); 5044 Register Src2 = MI.getOperand(2).getReg(); 5045 5046 LLT Ty = MRI.getType(DstReg); 5047 if (Ty.isVector()) 5048 return UnableToLegalize; 5049 5050 unsigned SrcSize = MRI.getType(Src1).getSizeInBits(); 5051 unsigned DstSize = Ty.getSizeInBits(); 5052 unsigned NarrowSize = NarrowTy.getSizeInBits(); 5053 if (DstSize % NarrowSize != 0 || SrcSize % NarrowSize != 0) 5054 return UnableToLegalize; 5055 5056 unsigned NumDstParts = DstSize / NarrowSize; 5057 unsigned NumSrcParts = SrcSize / NarrowSize; 5058 bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH; 5059 unsigned DstTmpParts = NumDstParts * (IsMulHigh ? 2 : 1); 5060 5061 SmallVector<Register, 2> Src1Parts, Src2Parts; 5062 SmallVector<Register, 2> DstTmpRegs(DstTmpParts); 5063 extractParts(Src1, NarrowTy, NumSrcParts, Src1Parts); 5064 extractParts(Src2, NarrowTy, NumSrcParts, Src2Parts); 5065 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy); 5066 5067 // Take only high half of registers if this is high mul. 5068 ArrayRef<Register> DstRegs( 5069 IsMulHigh ? &DstTmpRegs[DstTmpParts / 2] : &DstTmpRegs[0], NumDstParts); 5070 MIRBuilder.buildMerge(DstReg, DstRegs); 5071 MI.eraseFromParent(); 5072 return Legalized; 5073 } 5074 5075 LegalizerHelper::LegalizeResult 5076 LegalizerHelper::narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, 5077 LLT NarrowTy) { 5078 if (TypeIdx != 0) 5079 return UnableToLegalize; 5080 5081 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI; 5082 5083 Register Src = MI.getOperand(1).getReg(); 5084 LLT SrcTy = MRI.getType(Src); 5085 5086 // If all finite floats fit into the narrowed integer type, we can just swap 5087 // out the result type. This is practically only useful for conversions from 5088 // half to at least 16-bits, so just handle the one case. 5089 if (SrcTy.getScalarType() != LLT::scalar(16) || 5090 NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u)) 5091 return UnableToLegalize; 5092 5093 Observer.changingInstr(MI); 5094 narrowScalarDst(MI, NarrowTy, 0, 5095 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT); 5096 Observer.changedInstr(MI); 5097 return Legalized; 5098 } 5099 5100 LegalizerHelper::LegalizeResult 5101 LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, 5102 LLT NarrowTy) { 5103 if (TypeIdx != 1) 5104 return UnableToLegalize; 5105 5106 uint64_t NarrowSize = NarrowTy.getSizeInBits(); 5107 5108 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 5109 // FIXME: add support for when SizeOp1 isn't an exact multiple of 5110 // NarrowSize. 5111 if (SizeOp1 % NarrowSize != 0) 5112 return UnableToLegalize; 5113 int NumParts = SizeOp1 / NarrowSize; 5114 5115 SmallVector<Register, 2> SrcRegs, DstRegs; 5116 SmallVector<uint64_t, 2> Indexes; 5117 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); 5118 5119 Register OpReg = MI.getOperand(0).getReg(); 5120 uint64_t OpStart = MI.getOperand(2).getImm(); 5121 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); 5122 for (int i = 0; i < NumParts; ++i) { 5123 unsigned SrcStart = i * NarrowSize; 5124 5125 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) { 5126 // No part of the extract uses this subregister, ignore it. 5127 continue; 5128 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) { 5129 // The entire subregister is extracted, forward the value. 5130 DstRegs.push_back(SrcRegs[i]); 5131 continue; 5132 } 5133 5134 // OpSegStart is where this destination segment would start in OpReg if it 5135 // extended infinitely in both directions. 5136 int64_t ExtractOffset; 5137 uint64_t SegSize; 5138 if (OpStart < SrcStart) { 5139 ExtractOffset = 0; 5140 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart); 5141 } else { 5142 ExtractOffset = OpStart - SrcStart; 5143 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize); 5144 } 5145 5146 Register SegReg = SrcRegs[i]; 5147 if (ExtractOffset != 0 || SegSize != NarrowSize) { 5148 // A genuine extract is needed. 5149 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); 5150 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset); 5151 } 5152 5153 DstRegs.push_back(SegReg); 5154 } 5155 5156 Register DstReg = MI.getOperand(0).getReg(); 5157 if (MRI.getType(DstReg).isVector()) 5158 MIRBuilder.buildBuildVector(DstReg, DstRegs); 5159 else if (DstRegs.size() > 1) 5160 MIRBuilder.buildMerge(DstReg, DstRegs); 5161 else 5162 MIRBuilder.buildCopy(DstReg, DstRegs[0]); 5163 MI.eraseFromParent(); 5164 return Legalized; 5165 } 5166 5167 LegalizerHelper::LegalizeResult 5168 LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, 5169 LLT NarrowTy) { 5170 // FIXME: Don't know how to handle secondary types yet. 5171 if (TypeIdx != 0) 5172 return UnableToLegalize; 5173 5174 SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs; 5175 SmallVector<uint64_t, 2> Indexes; 5176 LLT RegTy = MRI.getType(MI.getOperand(0).getReg()); 5177 LLT LeftoverTy; 5178 extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs, 5179 LeftoverRegs); 5180 5181 for (Register Reg : LeftoverRegs) 5182 SrcRegs.push_back(Reg); 5183 5184 uint64_t NarrowSize = NarrowTy.getSizeInBits(); 5185 Register OpReg = MI.getOperand(2).getReg(); 5186 uint64_t OpStart = MI.getOperand(3).getImm(); 5187 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); 5188 for (int I = 0, E = SrcRegs.size(); I != E; ++I) { 5189 unsigned DstStart = I * NarrowSize; 5190 5191 if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) { 5192 // The entire subregister is defined by this insert, forward the new 5193 // value. 5194 DstRegs.push_back(OpReg); 5195 continue; 5196 } 5197 5198 Register SrcReg = SrcRegs[I]; 5199 if (MRI.getType(SrcRegs[I]) == LeftoverTy) { 5200 // The leftover reg is smaller than NarrowTy, so we need to extend it. 5201 SrcReg = MRI.createGenericVirtualRegister(NarrowTy); 5202 MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]); 5203 } 5204 5205 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) { 5206 // No part of the insert affects this subregister, forward the original. 5207 DstRegs.push_back(SrcReg); 5208 continue; 5209 } 5210 5211 // OpSegStart is where this destination segment would start in OpReg if it 5212 // extended infinitely in both directions. 5213 int64_t ExtractOffset, InsertOffset; 5214 uint64_t SegSize; 5215 if (OpStart < DstStart) { 5216 InsertOffset = 0; 5217 ExtractOffset = DstStart - OpStart; 5218 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart); 5219 } else { 5220 InsertOffset = OpStart - DstStart; 5221 ExtractOffset = 0; 5222 SegSize = 5223 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart); 5224 } 5225 5226 Register SegReg = OpReg; 5227 if (ExtractOffset != 0 || SegSize != OpSize) { 5228 // A genuine extract is needed. 5229 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); 5230 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset); 5231 } 5232 5233 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy); 5234 MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset); 5235 DstRegs.push_back(DstReg); 5236 } 5237 5238 uint64_t WideSize = DstRegs.size() * NarrowSize; 5239 Register DstReg = MI.getOperand(0).getReg(); 5240 if (WideSize > RegTy.getSizeInBits()) { 5241 Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize)); 5242 MIRBuilder.buildMerge(MergeReg, DstRegs); 5243 MIRBuilder.buildTrunc(DstReg, MergeReg); 5244 } else 5245 MIRBuilder.buildMerge(DstReg, DstRegs); 5246 5247 MI.eraseFromParent(); 5248 return Legalized; 5249 } 5250 5251 LegalizerHelper::LegalizeResult 5252 LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, 5253 LLT NarrowTy) { 5254 Register DstReg = MI.getOperand(0).getReg(); 5255 LLT DstTy = MRI.getType(DstReg); 5256 5257 assert(MI.getNumOperands() == 3 && TypeIdx == 0); 5258 5259 SmallVector<Register, 4> DstRegs, DstLeftoverRegs; 5260 SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs; 5261 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs; 5262 LLT LeftoverTy; 5263 if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy, 5264 Src0Regs, Src0LeftoverRegs)) 5265 return UnableToLegalize; 5266 5267 LLT Unused; 5268 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused, 5269 Src1Regs, Src1LeftoverRegs)) 5270 llvm_unreachable("inconsistent extractParts result"); 5271 5272 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) { 5273 auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, 5274 {Src0Regs[I], Src1Regs[I]}); 5275 DstRegs.push_back(Inst.getReg(0)); 5276 } 5277 5278 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) { 5279 auto Inst = MIRBuilder.buildInstr( 5280 MI.getOpcode(), 5281 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]}); 5282 DstLeftoverRegs.push_back(Inst.getReg(0)); 5283 } 5284 5285 insertParts(DstReg, DstTy, NarrowTy, DstRegs, 5286 LeftoverTy, DstLeftoverRegs); 5287 5288 MI.eraseFromParent(); 5289 return Legalized; 5290 } 5291 5292 LegalizerHelper::LegalizeResult 5293 LegalizerHelper::narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, 5294 LLT NarrowTy) { 5295 if (TypeIdx != 0) 5296 return UnableToLegalize; 5297 5298 Register DstReg = MI.getOperand(0).getReg(); 5299 Register SrcReg = MI.getOperand(1).getReg(); 5300 5301 LLT DstTy = MRI.getType(DstReg); 5302 if (DstTy.isVector()) 5303 return UnableToLegalize; 5304 5305 SmallVector<Register, 8> Parts; 5306 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg); 5307 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode()); 5308 buildWidenedRemergeToDst(DstReg, LCMTy, Parts); 5309 5310 MI.eraseFromParent(); 5311 return Legalized; 5312 } 5313 5314 LegalizerHelper::LegalizeResult 5315 LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, 5316 LLT NarrowTy) { 5317 if (TypeIdx != 0) 5318 return UnableToLegalize; 5319 5320 Register CondReg = MI.getOperand(1).getReg(); 5321 LLT CondTy = MRI.getType(CondReg); 5322 if (CondTy.isVector()) // TODO: Handle vselect 5323 return UnableToLegalize; 5324 5325 Register DstReg = MI.getOperand(0).getReg(); 5326 LLT DstTy = MRI.getType(DstReg); 5327 5328 SmallVector<Register, 4> DstRegs, DstLeftoverRegs; 5329 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs; 5330 SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs; 5331 LLT LeftoverTy; 5332 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy, 5333 Src1Regs, Src1LeftoverRegs)) 5334 return UnableToLegalize; 5335 5336 LLT Unused; 5337 if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused, 5338 Src2Regs, Src2LeftoverRegs)) 5339 llvm_unreachable("inconsistent extractParts result"); 5340 5341 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) { 5342 auto Select = MIRBuilder.buildSelect(NarrowTy, 5343 CondReg, Src1Regs[I], Src2Regs[I]); 5344 DstRegs.push_back(Select.getReg(0)); 5345 } 5346 5347 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) { 5348 auto Select = MIRBuilder.buildSelect( 5349 LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]); 5350 DstLeftoverRegs.push_back(Select.getReg(0)); 5351 } 5352 5353 insertParts(DstReg, DstTy, NarrowTy, DstRegs, 5354 LeftoverTy, DstLeftoverRegs); 5355 5356 MI.eraseFromParent(); 5357 return Legalized; 5358 } 5359 5360 LegalizerHelper::LegalizeResult 5361 LegalizerHelper::narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, 5362 LLT NarrowTy) { 5363 if (TypeIdx != 1) 5364 return UnableToLegalize; 5365 5366 Register DstReg = MI.getOperand(0).getReg(); 5367 Register SrcReg = MI.getOperand(1).getReg(); 5368 LLT DstTy = MRI.getType(DstReg); 5369 LLT SrcTy = MRI.getType(SrcReg); 5370 unsigned NarrowSize = NarrowTy.getSizeInBits(); 5371 5372 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) { 5373 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF; 5374 5375 MachineIRBuilder &B = MIRBuilder; 5376 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg); 5377 // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi) 5378 auto C_0 = B.buildConstant(NarrowTy, 0); 5379 auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), 5380 UnmergeSrc.getReg(1), C_0); 5381 auto LoCTLZ = IsUndef ? 5382 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) : 5383 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0)); 5384 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize); 5385 auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize); 5386 auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)); 5387 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ); 5388 5389 MI.eraseFromParent(); 5390 return Legalized; 5391 } 5392 5393 return UnableToLegalize; 5394 } 5395 5396 LegalizerHelper::LegalizeResult 5397 LegalizerHelper::narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, 5398 LLT NarrowTy) { 5399 if (TypeIdx != 1) 5400 return UnableToLegalize; 5401 5402 Register DstReg = MI.getOperand(0).getReg(); 5403 Register SrcReg = MI.getOperand(1).getReg(); 5404 LLT DstTy = MRI.getType(DstReg); 5405 LLT SrcTy = MRI.getType(SrcReg); 5406 unsigned NarrowSize = NarrowTy.getSizeInBits(); 5407 5408 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) { 5409 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF; 5410 5411 MachineIRBuilder &B = MIRBuilder; 5412 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg); 5413 // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo) 5414 auto C_0 = B.buildConstant(NarrowTy, 0); 5415 auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), 5416 UnmergeSrc.getReg(0), C_0); 5417 auto HiCTTZ = IsUndef ? 5418 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) : 5419 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1)); 5420 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize); 5421 auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize); 5422 auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)); 5423 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ); 5424 5425 MI.eraseFromParent(); 5426 return Legalized; 5427 } 5428 5429 return UnableToLegalize; 5430 } 5431 5432 LegalizerHelper::LegalizeResult 5433 LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, 5434 LLT NarrowTy) { 5435 if (TypeIdx != 1) 5436 return UnableToLegalize; 5437 5438 Register DstReg = MI.getOperand(0).getReg(); 5439 LLT DstTy = MRI.getType(DstReg); 5440 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 5441 unsigned NarrowSize = NarrowTy.getSizeInBits(); 5442 5443 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) { 5444 auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1)); 5445 5446 auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0)); 5447 auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1)); 5448 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP); 5449 5450 MI.eraseFromParent(); 5451 return Legalized; 5452 } 5453 5454 return UnableToLegalize; 5455 } 5456 5457 LegalizerHelper::LegalizeResult 5458 LegalizerHelper::lowerBitCount(MachineInstr &MI) { 5459 unsigned Opc = MI.getOpcode(); 5460 const auto &TII = MIRBuilder.getTII(); 5461 auto isSupported = [this](const LegalityQuery &Q) { 5462 auto QAction = LI.getAction(Q).Action; 5463 return QAction == Legal || QAction == Libcall || QAction == Custom; 5464 }; 5465 switch (Opc) { 5466 default: 5467 return UnableToLegalize; 5468 case TargetOpcode::G_CTLZ_ZERO_UNDEF: { 5469 // This trivially expands to CTLZ. 5470 Observer.changingInstr(MI); 5471 MI.setDesc(TII.get(TargetOpcode::G_CTLZ)); 5472 Observer.changedInstr(MI); 5473 return Legalized; 5474 } 5475 case TargetOpcode::G_CTLZ: { 5476 Register DstReg = MI.getOperand(0).getReg(); 5477 Register SrcReg = MI.getOperand(1).getReg(); 5478 LLT DstTy = MRI.getType(DstReg); 5479 LLT SrcTy = MRI.getType(SrcReg); 5480 unsigned Len = SrcTy.getSizeInBits(); 5481 5482 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) { 5483 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero. 5484 auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg); 5485 auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0); 5486 auto ICmp = MIRBuilder.buildICmp( 5487 CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc); 5488 auto LenConst = MIRBuilder.buildConstant(DstTy, Len); 5489 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU); 5490 MI.eraseFromParent(); 5491 return Legalized; 5492 } 5493 // for now, we do this: 5494 // NewLen = NextPowerOf2(Len); 5495 // x = x | (x >> 1); 5496 // x = x | (x >> 2); 5497 // ... 5498 // x = x | (x >>16); 5499 // x = x | (x >>32); // for 64-bit input 5500 // Upto NewLen/2 5501 // return Len - popcount(x); 5502 // 5503 // Ref: "Hacker's Delight" by Henry Warren 5504 Register Op = SrcReg; 5505 unsigned NewLen = PowerOf2Ceil(Len); 5506 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) { 5507 auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i); 5508 auto MIBOp = MIRBuilder.buildOr( 5509 SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt)); 5510 Op = MIBOp.getReg(0); 5511 } 5512 auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op); 5513 MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len), 5514 MIBPop); 5515 MI.eraseFromParent(); 5516 return Legalized; 5517 } 5518 case TargetOpcode::G_CTTZ_ZERO_UNDEF: { 5519 // This trivially expands to CTTZ. 5520 Observer.changingInstr(MI); 5521 MI.setDesc(TII.get(TargetOpcode::G_CTTZ)); 5522 Observer.changedInstr(MI); 5523 return Legalized; 5524 } 5525 case TargetOpcode::G_CTTZ: { 5526 Register DstReg = MI.getOperand(0).getReg(); 5527 Register SrcReg = MI.getOperand(1).getReg(); 5528 LLT DstTy = MRI.getType(DstReg); 5529 LLT SrcTy = MRI.getType(SrcReg); 5530 5531 unsigned Len = SrcTy.getSizeInBits(); 5532 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) { 5533 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with 5534 // zero. 5535 auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg); 5536 auto Zero = MIRBuilder.buildConstant(SrcTy, 0); 5537 auto ICmp = MIRBuilder.buildICmp( 5538 CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero); 5539 auto LenConst = MIRBuilder.buildConstant(DstTy, Len); 5540 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU); 5541 MI.eraseFromParent(); 5542 return Legalized; 5543 } 5544 // for now, we use: { return popcount(~x & (x - 1)); } 5545 // unless the target has ctlz but not ctpop, in which case we use: 5546 // { return 32 - nlz(~x & (x-1)); } 5547 // Ref: "Hacker's Delight" by Henry Warren 5548 auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1); 5549 auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1); 5550 auto MIBTmp = MIRBuilder.buildAnd( 5551 SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1)); 5552 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) && 5553 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) { 5554 auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len); 5555 MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen, 5556 MIRBuilder.buildCTLZ(SrcTy, MIBTmp)); 5557 MI.eraseFromParent(); 5558 return Legalized; 5559 } 5560 MI.setDesc(TII.get(TargetOpcode::G_CTPOP)); 5561 MI.getOperand(1).setReg(MIBTmp.getReg(0)); 5562 return Legalized; 5563 } 5564 case TargetOpcode::G_CTPOP: { 5565 Register SrcReg = MI.getOperand(1).getReg(); 5566 LLT Ty = MRI.getType(SrcReg); 5567 unsigned Size = Ty.getSizeInBits(); 5568 MachineIRBuilder &B = MIRBuilder; 5569 5570 // Count set bits in blocks of 2 bits. Default approach would be 5571 // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 } 5572 // We use following formula instead: 5573 // B2Count = val - { (val >> 1) & 0x55555555 } 5574 // since it gives same result in blocks of 2 with one instruction less. 5575 auto C_1 = B.buildConstant(Ty, 1); 5576 auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1); 5577 APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55)); 5578 auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0); 5579 auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0); 5580 auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi); 5581 5582 // In order to get count in blocks of 4 add values from adjacent block of 2. 5583 // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 } 5584 auto C_2 = B.buildConstant(Ty, 2); 5585 auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2); 5586 APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33)); 5587 auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0); 5588 auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0); 5589 auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0); 5590 auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count); 5591 5592 // For count in blocks of 8 bits we don't have to mask high 4 bits before 5593 // addition since count value sits in range {0,...,8} and 4 bits are enough 5594 // to hold such binary values. After addition high 4 bits still hold count 5595 // of set bits in high 4 bit block, set them to zero and get 8 bit result. 5596 // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F 5597 auto C_4 = B.buildConstant(Ty, 4); 5598 auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4); 5599 auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count); 5600 APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F)); 5601 auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0); 5602 auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0); 5603 5604 assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm"); 5605 // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this 5606 // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks. 5607 auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01))); 5608 auto ResTmp = B.buildMul(Ty, B8Count, MulMask); 5609 5610 // Shift count result from 8 high bits to low bits. 5611 auto C_SizeM8 = B.buildConstant(Ty, Size - 8); 5612 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8); 5613 5614 MI.eraseFromParent(); 5615 return Legalized; 5616 } 5617 } 5618 } 5619 5620 // Check that (every element of) Reg is undef or not an exact multiple of BW. 5621 static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, 5622 Register Reg, unsigned BW) { 5623 return matchUnaryPredicate( 5624 MRI, Reg, 5625 [=](const Constant *C) { 5626 // Null constant here means an undef. 5627 const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(C); 5628 return !CI || CI->getValue().urem(BW) != 0; 5629 }, 5630 /*AllowUndefs*/ true); 5631 } 5632 5633 LegalizerHelper::LegalizeResult 5634 LegalizerHelper::lowerFunnelShiftWithInverse(MachineInstr &MI) { 5635 Register Dst = MI.getOperand(0).getReg(); 5636 Register X = MI.getOperand(1).getReg(); 5637 Register Y = MI.getOperand(2).getReg(); 5638 Register Z = MI.getOperand(3).getReg(); 5639 LLT Ty = MRI.getType(Dst); 5640 LLT ShTy = MRI.getType(Z); 5641 5642 unsigned BW = Ty.getScalarSizeInBits(); 5643 5644 if (!isPowerOf2_32(BW)) 5645 return UnableToLegalize; 5646 5647 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL; 5648 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL; 5649 5650 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) { 5651 // fshl X, Y, Z -> fshr X, Y, -Z 5652 // fshr X, Y, Z -> fshl X, Y, -Z 5653 auto Zero = MIRBuilder.buildConstant(ShTy, 0); 5654 Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0); 5655 } else { 5656 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z 5657 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z 5658 auto One = MIRBuilder.buildConstant(ShTy, 1); 5659 if (IsFSHL) { 5660 Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0); 5661 X = MIRBuilder.buildLShr(Ty, X, One).getReg(0); 5662 } else { 5663 X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0); 5664 Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0); 5665 } 5666 5667 Z = MIRBuilder.buildNot(ShTy, Z).getReg(0); 5668 } 5669 5670 MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z}); 5671 MI.eraseFromParent(); 5672 return Legalized; 5673 } 5674 5675 LegalizerHelper::LegalizeResult 5676 LegalizerHelper::lowerFunnelShiftAsShifts(MachineInstr &MI) { 5677 Register Dst = MI.getOperand(0).getReg(); 5678 Register X = MI.getOperand(1).getReg(); 5679 Register Y = MI.getOperand(2).getReg(); 5680 Register Z = MI.getOperand(3).getReg(); 5681 LLT Ty = MRI.getType(Dst); 5682 LLT ShTy = MRI.getType(Z); 5683 5684 const unsigned BW = Ty.getScalarSizeInBits(); 5685 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL; 5686 5687 Register ShX, ShY; 5688 Register ShAmt, InvShAmt; 5689 5690 // FIXME: Emit optimized urem by constant instead of letting it expand later. 5691 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) { 5692 // fshl: X << C | Y >> (BW - C) 5693 // fshr: X << (BW - C) | Y >> C 5694 // where C = Z % BW is not zero 5695 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW); 5696 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0); 5697 InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0); 5698 ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0); 5699 ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0); 5700 } else { 5701 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW)) 5702 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW) 5703 auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1); 5704 if (isPowerOf2_32(BW)) { 5705 // Z % BW -> Z & (BW - 1) 5706 ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0); 5707 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1) 5708 auto NotZ = MIRBuilder.buildNot(ShTy, Z); 5709 InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0); 5710 } else { 5711 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW); 5712 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0); 5713 InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0); 5714 } 5715 5716 auto One = MIRBuilder.buildConstant(ShTy, 1); 5717 if (IsFSHL) { 5718 ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0); 5719 auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One); 5720 ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0); 5721 } else { 5722 auto ShX1 = MIRBuilder.buildShl(Ty, X, One); 5723 ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0); 5724 ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0); 5725 } 5726 } 5727 5728 MIRBuilder.buildOr(Dst, ShX, ShY); 5729 MI.eraseFromParent(); 5730 return Legalized; 5731 } 5732 5733 LegalizerHelper::LegalizeResult 5734 LegalizerHelper::lowerFunnelShift(MachineInstr &MI) { 5735 // These operations approximately do the following (while avoiding undefined 5736 // shifts by BW): 5737 // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) 5738 // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) 5739 Register Dst = MI.getOperand(0).getReg(); 5740 LLT Ty = MRI.getType(Dst); 5741 LLT ShTy = MRI.getType(MI.getOperand(3).getReg()); 5742 5743 bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL; 5744 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL; 5745 5746 // TODO: Use smarter heuristic that accounts for vector legalization. 5747 if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower) 5748 return lowerFunnelShiftAsShifts(MI); 5749 5750 // This only works for powers of 2, fallback to shifts if it fails. 5751 LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI); 5752 if (Result == UnableToLegalize) 5753 return lowerFunnelShiftAsShifts(MI); 5754 return Result; 5755 } 5756 5757 LegalizerHelper::LegalizeResult 5758 LegalizerHelper::lowerRotateWithReverseRotate(MachineInstr &MI) { 5759 Register Dst = MI.getOperand(0).getReg(); 5760 Register Src = MI.getOperand(1).getReg(); 5761 Register Amt = MI.getOperand(2).getReg(); 5762 LLT AmtTy = MRI.getType(Amt); 5763 auto Zero = MIRBuilder.buildConstant(AmtTy, 0); 5764 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL; 5765 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL; 5766 auto Neg = MIRBuilder.buildSub(AmtTy, Zero, Amt); 5767 MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg}); 5768 MI.eraseFromParent(); 5769 return Legalized; 5770 } 5771 5772 LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotate(MachineInstr &MI) { 5773 Register Dst = MI.getOperand(0).getReg(); 5774 Register Src = MI.getOperand(1).getReg(); 5775 Register Amt = MI.getOperand(2).getReg(); 5776 LLT DstTy = MRI.getType(Dst); 5777 LLT SrcTy = MRI.getType(Dst); 5778 LLT AmtTy = MRI.getType(Amt); 5779 5780 unsigned EltSizeInBits = DstTy.getScalarSizeInBits(); 5781 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL; 5782 5783 MIRBuilder.setInstrAndDebugLoc(MI); 5784 5785 // If a rotate in the other direction is supported, use it. 5786 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL; 5787 if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) && 5788 isPowerOf2_32(EltSizeInBits)) 5789 return lowerRotateWithReverseRotate(MI); 5790 5791 auto Zero = MIRBuilder.buildConstant(AmtTy, 0); 5792 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR; 5793 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL; 5794 auto BitWidthMinusOneC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits - 1); 5795 Register ShVal; 5796 Register RevShiftVal; 5797 if (isPowerOf2_32(EltSizeInBits)) { 5798 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1)) 5799 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1)) 5800 auto NegAmt = MIRBuilder.buildSub(AmtTy, Zero, Amt); 5801 auto ShAmt = MIRBuilder.buildAnd(AmtTy, Amt, BitWidthMinusOneC); 5802 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0); 5803 auto RevAmt = MIRBuilder.buildAnd(AmtTy, NegAmt, BitWidthMinusOneC); 5804 RevShiftVal = 5805 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, RevAmt}).getReg(0); 5806 } else { 5807 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w)) 5808 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w)) 5809 auto BitWidthC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits); 5810 auto ShAmt = MIRBuilder.buildURem(AmtTy, Amt, BitWidthC); 5811 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0); 5812 auto RevAmt = MIRBuilder.buildSub(AmtTy, BitWidthMinusOneC, ShAmt); 5813 auto One = MIRBuilder.buildConstant(AmtTy, 1); 5814 auto Inner = MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, One}); 5815 RevShiftVal = 5816 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0); 5817 } 5818 MIRBuilder.buildOr(Dst, ShVal, RevShiftVal); 5819 MI.eraseFromParent(); 5820 return Legalized; 5821 } 5822 5823 // Expand s32 = G_UITOFP s64 using bit operations to an IEEE float 5824 // representation. 5825 LegalizerHelper::LegalizeResult 5826 LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) { 5827 Register Dst = MI.getOperand(0).getReg(); 5828 Register Src = MI.getOperand(1).getReg(); 5829 const LLT S64 = LLT::scalar(64); 5830 const LLT S32 = LLT::scalar(32); 5831 const LLT S1 = LLT::scalar(1); 5832 5833 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32); 5834 5835 // unsigned cul2f(ulong u) { 5836 // uint lz = clz(u); 5837 // uint e = (u != 0) ? 127U + 63U - lz : 0; 5838 // u = (u << lz) & 0x7fffffffffffffffUL; 5839 // ulong t = u & 0xffffffffffUL; 5840 // uint v = (e << 23) | (uint)(u >> 40); 5841 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U); 5842 // return as_float(v + r); 5843 // } 5844 5845 auto Zero32 = MIRBuilder.buildConstant(S32, 0); 5846 auto Zero64 = MIRBuilder.buildConstant(S64, 0); 5847 5848 auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src); 5849 5850 auto K = MIRBuilder.buildConstant(S32, 127U + 63U); 5851 auto Sub = MIRBuilder.buildSub(S32, K, LZ); 5852 5853 auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64); 5854 auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32); 5855 5856 auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1); 5857 auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ); 5858 5859 auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0); 5860 5861 auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL); 5862 auto T = MIRBuilder.buildAnd(S64, U, Mask1); 5863 5864 auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40)); 5865 auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23)); 5866 auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl)); 5867 5868 auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL); 5869 auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C); 5870 auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C); 5871 auto One = MIRBuilder.buildConstant(S32, 1); 5872 5873 auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One); 5874 auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32); 5875 auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0); 5876 MIRBuilder.buildAdd(Dst, V, R); 5877 5878 MI.eraseFromParent(); 5879 return Legalized; 5880 } 5881 5882 LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) { 5883 Register Dst = MI.getOperand(0).getReg(); 5884 Register Src = MI.getOperand(1).getReg(); 5885 LLT DstTy = MRI.getType(Dst); 5886 LLT SrcTy = MRI.getType(Src); 5887 5888 if (SrcTy == LLT::scalar(1)) { 5889 auto True = MIRBuilder.buildFConstant(DstTy, 1.0); 5890 auto False = MIRBuilder.buildFConstant(DstTy, 0.0); 5891 MIRBuilder.buildSelect(Dst, Src, True, False); 5892 MI.eraseFromParent(); 5893 return Legalized; 5894 } 5895 5896 if (SrcTy != LLT::scalar(64)) 5897 return UnableToLegalize; 5898 5899 if (DstTy == LLT::scalar(32)) { 5900 // TODO: SelectionDAG has several alternative expansions to port which may 5901 // be more reasonble depending on the available instructions. If a target 5902 // has sitofp, does not have CTLZ, or can efficiently use f64 as an 5903 // intermediate type, this is probably worse. 5904 return lowerU64ToF32BitOps(MI); 5905 } 5906 5907 return UnableToLegalize; 5908 } 5909 5910 LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) { 5911 Register Dst = MI.getOperand(0).getReg(); 5912 Register Src = MI.getOperand(1).getReg(); 5913 LLT DstTy = MRI.getType(Dst); 5914 LLT SrcTy = MRI.getType(Src); 5915 5916 const LLT S64 = LLT::scalar(64); 5917 const LLT S32 = LLT::scalar(32); 5918 const LLT S1 = LLT::scalar(1); 5919 5920 if (SrcTy == S1) { 5921 auto True = MIRBuilder.buildFConstant(DstTy, -1.0); 5922 auto False = MIRBuilder.buildFConstant(DstTy, 0.0); 5923 MIRBuilder.buildSelect(Dst, Src, True, False); 5924 MI.eraseFromParent(); 5925 return Legalized; 5926 } 5927 5928 if (SrcTy != S64) 5929 return UnableToLegalize; 5930 5931 if (DstTy == S32) { 5932 // signed cl2f(long l) { 5933 // long s = l >> 63; 5934 // float r = cul2f((l + s) ^ s); 5935 // return s ? -r : r; 5936 // } 5937 Register L = Src; 5938 auto SignBit = MIRBuilder.buildConstant(S64, 63); 5939 auto S = MIRBuilder.buildAShr(S64, L, SignBit); 5940 5941 auto LPlusS = MIRBuilder.buildAdd(S64, L, S); 5942 auto Xor = MIRBuilder.buildXor(S64, LPlusS, S); 5943 auto R = MIRBuilder.buildUITOFP(S32, Xor); 5944 5945 auto RNeg = MIRBuilder.buildFNeg(S32, R); 5946 auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S, 5947 MIRBuilder.buildConstant(S64, 0)); 5948 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R); 5949 MI.eraseFromParent(); 5950 return Legalized; 5951 } 5952 5953 return UnableToLegalize; 5954 } 5955 5956 LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) { 5957 Register Dst = MI.getOperand(0).getReg(); 5958 Register Src = MI.getOperand(1).getReg(); 5959 LLT DstTy = MRI.getType(Dst); 5960 LLT SrcTy = MRI.getType(Src); 5961 const LLT S64 = LLT::scalar(64); 5962 const LLT S32 = LLT::scalar(32); 5963 5964 if (SrcTy != S64 && SrcTy != S32) 5965 return UnableToLegalize; 5966 if (DstTy != S32 && DstTy != S64) 5967 return UnableToLegalize; 5968 5969 // FPTOSI gives same result as FPTOUI for positive signed integers. 5970 // FPTOUI needs to deal with fp values that convert to unsigned integers 5971 // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp. 5972 5973 APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits()); 5974 APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle() 5975 : APFloat::IEEEdouble(), 5976 APInt::getNullValue(SrcTy.getSizeInBits())); 5977 TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven); 5978 5979 MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src); 5980 5981 MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP); 5982 // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on 5983 // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1. 5984 MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold); 5985 MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub); 5986 MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt); 5987 MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit); 5988 5989 const LLT S1 = LLT::scalar(1); 5990 5991 MachineInstrBuilder FCMP = 5992 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold); 5993 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res); 5994 5995 MI.eraseFromParent(); 5996 return Legalized; 5997 } 5998 5999 LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) { 6000 Register Dst = MI.getOperand(0).getReg(); 6001 Register Src = MI.getOperand(1).getReg(); 6002 LLT DstTy = MRI.getType(Dst); 6003 LLT SrcTy = MRI.getType(Src); 6004 const LLT S64 = LLT::scalar(64); 6005 const LLT S32 = LLT::scalar(32); 6006 6007 // FIXME: Only f32 to i64 conversions are supported. 6008 if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64) 6009 return UnableToLegalize; 6010 6011 // Expand f32 -> i64 conversion 6012 // This algorithm comes from compiler-rt's implementation of fixsfdi: 6013 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c 6014 6015 unsigned SrcEltBits = SrcTy.getScalarSizeInBits(); 6016 6017 auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000); 6018 auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23); 6019 6020 auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask); 6021 auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit); 6022 6023 auto SignMask = MIRBuilder.buildConstant(SrcTy, 6024 APInt::getSignMask(SrcEltBits)); 6025 auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask); 6026 auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1); 6027 auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit); 6028 Sign = MIRBuilder.buildSExt(DstTy, Sign); 6029 6030 auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF); 6031 auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask); 6032 auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000); 6033 6034 auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K); 6035 R = MIRBuilder.buildZExt(DstTy, R); 6036 6037 auto Bias = MIRBuilder.buildConstant(SrcTy, 127); 6038 auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias); 6039 auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit); 6040 auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent); 6041 6042 auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent); 6043 auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub); 6044 6045 const LLT S1 = LLT::scalar(1); 6046 auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, 6047 S1, Exponent, ExponentLoBit); 6048 6049 R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl); 6050 6051 auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign); 6052 auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign); 6053 6054 auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0); 6055 6056 auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, 6057 S1, Exponent, ZeroSrcTy); 6058 6059 auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0); 6060 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret); 6061 6062 MI.eraseFromParent(); 6063 return Legalized; 6064 } 6065 6066 // f64 -> f16 conversion using round-to-nearest-even rounding mode. 6067 LegalizerHelper::LegalizeResult 6068 LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) { 6069 Register Dst = MI.getOperand(0).getReg(); 6070 Register Src = MI.getOperand(1).getReg(); 6071 6072 if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly. 6073 return UnableToLegalize; 6074 6075 const unsigned ExpMask = 0x7ff; 6076 const unsigned ExpBiasf64 = 1023; 6077 const unsigned ExpBiasf16 = 15; 6078 const LLT S32 = LLT::scalar(32); 6079 const LLT S1 = LLT::scalar(1); 6080 6081 auto Unmerge = MIRBuilder.buildUnmerge(S32, Src); 6082 Register U = Unmerge.getReg(0); 6083 Register UH = Unmerge.getReg(1); 6084 6085 auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20)); 6086 E = MIRBuilder.buildAnd(S32, E, MIRBuilder.buildConstant(S32, ExpMask)); 6087 6088 // Subtract the fp64 exponent bias (1023) to get the real exponent and 6089 // add the f16 bias (15) to get the biased exponent for the f16 format. 6090 E = MIRBuilder.buildAdd( 6091 S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16)); 6092 6093 auto M = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 8)); 6094 M = MIRBuilder.buildAnd(S32, M, MIRBuilder.buildConstant(S32, 0xffe)); 6095 6096 auto MaskedSig = MIRBuilder.buildAnd(S32, UH, 6097 MIRBuilder.buildConstant(S32, 0x1ff)); 6098 MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U); 6099 6100 auto Zero = MIRBuilder.buildConstant(S32, 0); 6101 auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero); 6102 auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0); 6103 M = MIRBuilder.buildOr(S32, M, Lo40Set); 6104 6105 // (M != 0 ? 0x0200 : 0) | 0x7c00; 6106 auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200); 6107 auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero); 6108 auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero); 6109 6110 auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00); 6111 auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00); 6112 6113 // N = M | (E << 12); 6114 auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12)); 6115 auto N = MIRBuilder.buildOr(S32, M, EShl12); 6116 6117 // B = clamp(1-E, 0, 13); 6118 auto One = MIRBuilder.buildConstant(S32, 1); 6119 auto OneSubExp = MIRBuilder.buildSub(S32, One, E); 6120 auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero); 6121 B = MIRBuilder.buildSMin(S32, B, MIRBuilder.buildConstant(S32, 13)); 6122 6123 auto SigSetHigh = MIRBuilder.buildOr(S32, M, 6124 MIRBuilder.buildConstant(S32, 0x1000)); 6125 6126 auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B); 6127 auto D0 = MIRBuilder.buildShl(S32, D, B); 6128 6129 auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, 6130 D0, SigSetHigh); 6131 auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh); 6132 D = MIRBuilder.buildOr(S32, D, D1); 6133 6134 auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One); 6135 auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N); 6136 6137 auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7)); 6138 V = MIRBuilder.buildLShr(S32, V, MIRBuilder.buildConstant(S32, 2)); 6139 6140 auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3, 6141 MIRBuilder.buildConstant(S32, 3)); 6142 auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3); 6143 6144 auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3, 6145 MIRBuilder.buildConstant(S32, 5)); 6146 auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5); 6147 6148 V1 = MIRBuilder.buildOr(S32, V0, V1); 6149 V = MIRBuilder.buildAdd(S32, V, V1); 6150 6151 auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, 6152 E, MIRBuilder.buildConstant(S32, 30)); 6153 V = MIRBuilder.buildSelect(S32, CmpEGt30, 6154 MIRBuilder.buildConstant(S32, 0x7c00), V); 6155 6156 auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, 6157 E, MIRBuilder.buildConstant(S32, 1039)); 6158 V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V); 6159 6160 // Extract the sign bit. 6161 auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16)); 6162 Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000)); 6163 6164 // Insert the sign bit 6165 V = MIRBuilder.buildOr(S32, Sign, V); 6166 6167 MIRBuilder.buildTrunc(Dst, V); 6168 MI.eraseFromParent(); 6169 return Legalized; 6170 } 6171 6172 LegalizerHelper::LegalizeResult 6173 LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) { 6174 Register Dst = MI.getOperand(0).getReg(); 6175 Register Src = MI.getOperand(1).getReg(); 6176 6177 LLT DstTy = MRI.getType(Dst); 6178 LLT SrcTy = MRI.getType(Src); 6179 const LLT S64 = LLT::scalar(64); 6180 const LLT S16 = LLT::scalar(16); 6181 6182 if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64) 6183 return lowerFPTRUNC_F64_TO_F16(MI); 6184 6185 return UnableToLegalize; 6186 } 6187 6188 // TODO: If RHS is a constant SelectionDAGBuilder expands this into a 6189 // multiplication tree. 6190 LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) { 6191 Register Dst = MI.getOperand(0).getReg(); 6192 Register Src0 = MI.getOperand(1).getReg(); 6193 Register Src1 = MI.getOperand(2).getReg(); 6194 LLT Ty = MRI.getType(Dst); 6195 6196 auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1); 6197 MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags()); 6198 MI.eraseFromParent(); 6199 return Legalized; 6200 } 6201 6202 static CmpInst::Predicate minMaxToCompare(unsigned Opc) { 6203 switch (Opc) { 6204 case TargetOpcode::G_SMIN: 6205 return CmpInst::ICMP_SLT; 6206 case TargetOpcode::G_SMAX: 6207 return CmpInst::ICMP_SGT; 6208 case TargetOpcode::G_UMIN: 6209 return CmpInst::ICMP_ULT; 6210 case TargetOpcode::G_UMAX: 6211 return CmpInst::ICMP_UGT; 6212 default: 6213 llvm_unreachable("not in integer min/max"); 6214 } 6215 } 6216 6217 LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) { 6218 Register Dst = MI.getOperand(0).getReg(); 6219 Register Src0 = MI.getOperand(1).getReg(); 6220 Register Src1 = MI.getOperand(2).getReg(); 6221 6222 const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode()); 6223 LLT CmpType = MRI.getType(Dst).changeElementSize(1); 6224 6225 auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1); 6226 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1); 6227 6228 MI.eraseFromParent(); 6229 return Legalized; 6230 } 6231 6232 LegalizerHelper::LegalizeResult 6233 LegalizerHelper::lowerFCopySign(MachineInstr &MI) { 6234 Register Dst = MI.getOperand(0).getReg(); 6235 Register Src0 = MI.getOperand(1).getReg(); 6236 Register Src1 = MI.getOperand(2).getReg(); 6237 6238 const LLT Src0Ty = MRI.getType(Src0); 6239 const LLT Src1Ty = MRI.getType(Src1); 6240 6241 const int Src0Size = Src0Ty.getScalarSizeInBits(); 6242 const int Src1Size = Src1Ty.getScalarSizeInBits(); 6243 6244 auto SignBitMask = MIRBuilder.buildConstant( 6245 Src0Ty, APInt::getSignMask(Src0Size)); 6246 6247 auto NotSignBitMask = MIRBuilder.buildConstant( 6248 Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1)); 6249 6250 Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0); 6251 Register And1; 6252 if (Src0Ty == Src1Ty) { 6253 And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0); 6254 } else if (Src0Size > Src1Size) { 6255 auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size); 6256 auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1); 6257 auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt); 6258 And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0); 6259 } else { 6260 auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size); 6261 auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt); 6262 auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift); 6263 And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0); 6264 } 6265 6266 // Be careful about setting nsz/nnan/ninf on every instruction, since the 6267 // constants are a nan and -0.0, but the final result should preserve 6268 // everything. 6269 unsigned Flags = MI.getFlags(); 6270 MIRBuilder.buildOr(Dst, And0, And1, Flags); 6271 6272 MI.eraseFromParent(); 6273 return Legalized; 6274 } 6275 6276 LegalizerHelper::LegalizeResult 6277 LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) { 6278 unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ? 6279 TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE; 6280 6281 Register Dst = MI.getOperand(0).getReg(); 6282 Register Src0 = MI.getOperand(1).getReg(); 6283 Register Src1 = MI.getOperand(2).getReg(); 6284 LLT Ty = MRI.getType(Dst); 6285 6286 if (!MI.getFlag(MachineInstr::FmNoNans)) { 6287 // Insert canonicalizes if it's possible we need to quiet to get correct 6288 // sNaN behavior. 6289 6290 // Note this must be done here, and not as an optimization combine in the 6291 // absence of a dedicate quiet-snan instruction as we're using an 6292 // omni-purpose G_FCANONICALIZE. 6293 if (!isKnownNeverSNaN(Src0, MRI)) 6294 Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0); 6295 6296 if (!isKnownNeverSNaN(Src1, MRI)) 6297 Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0); 6298 } 6299 6300 // If there are no nans, it's safe to simply replace this with the non-IEEE 6301 // version. 6302 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags()); 6303 MI.eraseFromParent(); 6304 return Legalized; 6305 } 6306 6307 LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) { 6308 // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c 6309 Register DstReg = MI.getOperand(0).getReg(); 6310 LLT Ty = MRI.getType(DstReg); 6311 unsigned Flags = MI.getFlags(); 6312 6313 auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2), 6314 Flags); 6315 MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags); 6316 MI.eraseFromParent(); 6317 return Legalized; 6318 } 6319 6320 LegalizerHelper::LegalizeResult 6321 LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) { 6322 Register DstReg = MI.getOperand(0).getReg(); 6323 Register X = MI.getOperand(1).getReg(); 6324 const unsigned Flags = MI.getFlags(); 6325 const LLT Ty = MRI.getType(DstReg); 6326 const LLT CondTy = Ty.changeElementSize(1); 6327 6328 // round(x) => 6329 // t = trunc(x); 6330 // d = fabs(x - t); 6331 // o = copysign(1.0f, x); 6332 // return t + (d >= 0.5 ? o : 0.0); 6333 6334 auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags); 6335 6336 auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags); 6337 auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags); 6338 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0); 6339 auto One = MIRBuilder.buildFConstant(Ty, 1.0); 6340 auto Half = MIRBuilder.buildFConstant(Ty, 0.5); 6341 auto SignOne = MIRBuilder.buildFCopysign(Ty, One, X); 6342 6343 auto Cmp = MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, 6344 Flags); 6345 auto Sel = MIRBuilder.buildSelect(Ty, Cmp, SignOne, Zero, Flags); 6346 6347 MIRBuilder.buildFAdd(DstReg, T, Sel, Flags); 6348 6349 MI.eraseFromParent(); 6350 return Legalized; 6351 } 6352 6353 LegalizerHelper::LegalizeResult 6354 LegalizerHelper::lowerFFloor(MachineInstr &MI) { 6355 Register DstReg = MI.getOperand(0).getReg(); 6356 Register SrcReg = MI.getOperand(1).getReg(); 6357 unsigned Flags = MI.getFlags(); 6358 LLT Ty = MRI.getType(DstReg); 6359 const LLT CondTy = Ty.changeElementSize(1); 6360 6361 // result = trunc(src); 6362 // if (src < 0.0 && src != result) 6363 // result += -1.0. 6364 6365 auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags); 6366 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0); 6367 6368 auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy, 6369 SrcReg, Zero, Flags); 6370 auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy, 6371 SrcReg, Trunc, Flags); 6372 auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc); 6373 auto AddVal = MIRBuilder.buildSITOFP(Ty, And); 6374 6375 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags); 6376 MI.eraseFromParent(); 6377 return Legalized; 6378 } 6379 6380 LegalizerHelper::LegalizeResult 6381 LegalizerHelper::lowerMergeValues(MachineInstr &MI) { 6382 const unsigned NumOps = MI.getNumOperands(); 6383 Register DstReg = MI.getOperand(0).getReg(); 6384 Register Src0Reg = MI.getOperand(1).getReg(); 6385 LLT DstTy = MRI.getType(DstReg); 6386 LLT SrcTy = MRI.getType(Src0Reg); 6387 unsigned PartSize = SrcTy.getSizeInBits(); 6388 6389 LLT WideTy = LLT::scalar(DstTy.getSizeInBits()); 6390 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0); 6391 6392 for (unsigned I = 2; I != NumOps; ++I) { 6393 const unsigned Offset = (I - 1) * PartSize; 6394 6395 Register SrcReg = MI.getOperand(I).getReg(); 6396 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg); 6397 6398 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg : 6399 MRI.createGenericVirtualRegister(WideTy); 6400 6401 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset); 6402 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt); 6403 MIRBuilder.buildOr(NextResult, ResultReg, Shl); 6404 ResultReg = NextResult; 6405 } 6406 6407 if (DstTy.isPointer()) { 6408 if (MIRBuilder.getDataLayout().isNonIntegralAddressSpace( 6409 DstTy.getAddressSpace())) { 6410 LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n"); 6411 return UnableToLegalize; 6412 } 6413 6414 MIRBuilder.buildIntToPtr(DstReg, ResultReg); 6415 } 6416 6417 MI.eraseFromParent(); 6418 return Legalized; 6419 } 6420 6421 LegalizerHelper::LegalizeResult 6422 LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) { 6423 const unsigned NumDst = MI.getNumOperands() - 1; 6424 Register SrcReg = MI.getOperand(NumDst).getReg(); 6425 Register Dst0Reg = MI.getOperand(0).getReg(); 6426 LLT DstTy = MRI.getType(Dst0Reg); 6427 if (DstTy.isPointer()) 6428 return UnableToLegalize; // TODO 6429 6430 SrcReg = coerceToScalar(SrcReg); 6431 if (!SrcReg) 6432 return UnableToLegalize; 6433 6434 // Expand scalarizing unmerge as bitcast to integer and shift. 6435 LLT IntTy = MRI.getType(SrcReg); 6436 6437 MIRBuilder.buildTrunc(Dst0Reg, SrcReg); 6438 6439 const unsigned DstSize = DstTy.getSizeInBits(); 6440 unsigned Offset = DstSize; 6441 for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) { 6442 auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset); 6443 auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt); 6444 MIRBuilder.buildTrunc(MI.getOperand(I), Shift); 6445 } 6446 6447 MI.eraseFromParent(); 6448 return Legalized; 6449 } 6450 6451 /// Lower a vector extract or insert by writing the vector to a stack temporary 6452 /// and reloading the element or vector. 6453 /// 6454 /// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx 6455 /// => 6456 /// %stack_temp = G_FRAME_INDEX 6457 /// G_STORE %vec, %stack_temp 6458 /// %idx = clamp(%idx, %vec.getNumElements()) 6459 /// %element_ptr = G_PTR_ADD %stack_temp, %idx 6460 /// %dst = G_LOAD %element_ptr 6461 LegalizerHelper::LegalizeResult 6462 LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) { 6463 Register DstReg = MI.getOperand(0).getReg(); 6464 Register SrcVec = MI.getOperand(1).getReg(); 6465 Register InsertVal; 6466 if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT) 6467 InsertVal = MI.getOperand(2).getReg(); 6468 6469 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg(); 6470 6471 LLT VecTy = MRI.getType(SrcVec); 6472 LLT EltTy = VecTy.getElementType(); 6473 if (!EltTy.isByteSized()) { // Not implemented. 6474 LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n"); 6475 return UnableToLegalize; 6476 } 6477 6478 unsigned EltBytes = EltTy.getSizeInBytes(); 6479 Align VecAlign = getStackTemporaryAlignment(VecTy); 6480 Align EltAlign; 6481 6482 MachinePointerInfo PtrInfo; 6483 auto StackTemp = createStackTemporary(TypeSize::Fixed(VecTy.getSizeInBytes()), 6484 VecAlign, PtrInfo); 6485 MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign); 6486 6487 // Get the pointer to the element, and be sure not to hit undefined behavior 6488 // if the index is out of bounds. 6489 Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx); 6490 6491 int64_t IdxVal; 6492 if (mi_match(Idx, MRI, m_ICst(IdxVal))) { 6493 int64_t Offset = IdxVal * EltBytes; 6494 PtrInfo = PtrInfo.getWithOffset(Offset); 6495 EltAlign = commonAlignment(VecAlign, Offset); 6496 } else { 6497 // We lose information with a variable offset. 6498 EltAlign = getStackTemporaryAlignment(EltTy); 6499 PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace()); 6500 } 6501 6502 if (InsertVal) { 6503 // Write the inserted element 6504 MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign); 6505 6506 // Reload the whole vector. 6507 MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign); 6508 } else { 6509 MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign); 6510 } 6511 6512 MI.eraseFromParent(); 6513 return Legalized; 6514 } 6515 6516 LegalizerHelper::LegalizeResult 6517 LegalizerHelper::lowerShuffleVector(MachineInstr &MI) { 6518 Register DstReg = MI.getOperand(0).getReg(); 6519 Register Src0Reg = MI.getOperand(1).getReg(); 6520 Register Src1Reg = MI.getOperand(2).getReg(); 6521 LLT Src0Ty = MRI.getType(Src0Reg); 6522 LLT DstTy = MRI.getType(DstReg); 6523 LLT IdxTy = LLT::scalar(32); 6524 6525 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask(); 6526 6527 if (DstTy.isScalar()) { 6528 if (Src0Ty.isVector()) 6529 return UnableToLegalize; 6530 6531 // This is just a SELECT. 6532 assert(Mask.size() == 1 && "Expected a single mask element"); 6533 Register Val; 6534 if (Mask[0] < 0 || Mask[0] > 1) 6535 Val = MIRBuilder.buildUndef(DstTy).getReg(0); 6536 else 6537 Val = Mask[0] == 0 ? Src0Reg : Src1Reg; 6538 MIRBuilder.buildCopy(DstReg, Val); 6539 MI.eraseFromParent(); 6540 return Legalized; 6541 } 6542 6543 Register Undef; 6544 SmallVector<Register, 32> BuildVec; 6545 LLT EltTy = DstTy.getElementType(); 6546 6547 for (int Idx : Mask) { 6548 if (Idx < 0) { 6549 if (!Undef.isValid()) 6550 Undef = MIRBuilder.buildUndef(EltTy).getReg(0); 6551 BuildVec.push_back(Undef); 6552 continue; 6553 } 6554 6555 if (Src0Ty.isScalar()) { 6556 BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg); 6557 } else { 6558 int NumElts = Src0Ty.getNumElements(); 6559 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg; 6560 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts; 6561 auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx); 6562 auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK); 6563 BuildVec.push_back(Extract.getReg(0)); 6564 } 6565 } 6566 6567 MIRBuilder.buildBuildVector(DstReg, BuildVec); 6568 MI.eraseFromParent(); 6569 return Legalized; 6570 } 6571 6572 LegalizerHelper::LegalizeResult 6573 LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) { 6574 const auto &MF = *MI.getMF(); 6575 const auto &TFI = *MF.getSubtarget().getFrameLowering(); 6576 if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp) 6577 return UnableToLegalize; 6578 6579 Register Dst = MI.getOperand(0).getReg(); 6580 Register AllocSize = MI.getOperand(1).getReg(); 6581 Align Alignment = assumeAligned(MI.getOperand(2).getImm()); 6582 6583 LLT PtrTy = MRI.getType(Dst); 6584 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits()); 6585 6586 Register SPReg = TLI.getStackPointerRegisterToSaveRestore(); 6587 auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg); 6588 SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp); 6589 6590 // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't 6591 // have to generate an extra instruction to negate the alloc and then use 6592 // G_PTR_ADD to add the negative offset. 6593 auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize); 6594 if (Alignment > Align(1)) { 6595 APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true); 6596 AlignMask.negate(); 6597 auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask); 6598 Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst); 6599 } 6600 6601 SPTmp = MIRBuilder.buildCast(PtrTy, Alloc); 6602 MIRBuilder.buildCopy(SPReg, SPTmp); 6603 MIRBuilder.buildCopy(Dst, SPTmp); 6604 6605 MI.eraseFromParent(); 6606 return Legalized; 6607 } 6608 6609 LegalizerHelper::LegalizeResult 6610 LegalizerHelper::lowerExtract(MachineInstr &MI) { 6611 Register Dst = MI.getOperand(0).getReg(); 6612 Register Src = MI.getOperand(1).getReg(); 6613 unsigned Offset = MI.getOperand(2).getImm(); 6614 6615 LLT DstTy = MRI.getType(Dst); 6616 LLT SrcTy = MRI.getType(Src); 6617 6618 if (DstTy.isScalar() && 6619 (SrcTy.isScalar() || 6620 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) { 6621 LLT SrcIntTy = SrcTy; 6622 if (!SrcTy.isScalar()) { 6623 SrcIntTy = LLT::scalar(SrcTy.getSizeInBits()); 6624 Src = MIRBuilder.buildBitcast(SrcIntTy, Src).getReg(0); 6625 } 6626 6627 if (Offset == 0) 6628 MIRBuilder.buildTrunc(Dst, Src); 6629 else { 6630 auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset); 6631 auto Shr = MIRBuilder.buildLShr(SrcIntTy, Src, ShiftAmt); 6632 MIRBuilder.buildTrunc(Dst, Shr); 6633 } 6634 6635 MI.eraseFromParent(); 6636 return Legalized; 6637 } 6638 6639 return UnableToLegalize; 6640 } 6641 6642 LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) { 6643 Register Dst = MI.getOperand(0).getReg(); 6644 Register Src = MI.getOperand(1).getReg(); 6645 Register InsertSrc = MI.getOperand(2).getReg(); 6646 uint64_t Offset = MI.getOperand(3).getImm(); 6647 6648 LLT DstTy = MRI.getType(Src); 6649 LLT InsertTy = MRI.getType(InsertSrc); 6650 6651 if (InsertTy.isVector() || 6652 (DstTy.isVector() && DstTy.getElementType() != InsertTy)) 6653 return UnableToLegalize; 6654 6655 const DataLayout &DL = MIRBuilder.getDataLayout(); 6656 if ((DstTy.isPointer() && 6657 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) || 6658 (InsertTy.isPointer() && 6659 DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) { 6660 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n"); 6661 return UnableToLegalize; 6662 } 6663 6664 LLT IntDstTy = DstTy; 6665 6666 if (!DstTy.isScalar()) { 6667 IntDstTy = LLT::scalar(DstTy.getSizeInBits()); 6668 Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0); 6669 } 6670 6671 if (!InsertTy.isScalar()) { 6672 const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits()); 6673 InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0); 6674 } 6675 6676 Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0); 6677 if (Offset != 0) { 6678 auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset); 6679 ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0); 6680 } 6681 6682 APInt MaskVal = APInt::getBitsSetWithWrap( 6683 DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset); 6684 6685 auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal); 6686 auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask); 6687 auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc); 6688 6689 MIRBuilder.buildCast(Dst, Or); 6690 MI.eraseFromParent(); 6691 return Legalized; 6692 } 6693 6694 LegalizerHelper::LegalizeResult 6695 LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) { 6696 Register Dst0 = MI.getOperand(0).getReg(); 6697 Register Dst1 = MI.getOperand(1).getReg(); 6698 Register LHS = MI.getOperand(2).getReg(); 6699 Register RHS = MI.getOperand(3).getReg(); 6700 const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO; 6701 6702 LLT Ty = MRI.getType(Dst0); 6703 LLT BoolTy = MRI.getType(Dst1); 6704 6705 if (IsAdd) 6706 MIRBuilder.buildAdd(Dst0, LHS, RHS); 6707 else 6708 MIRBuilder.buildSub(Dst0, LHS, RHS); 6709 6710 // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow. 6711 6712 auto Zero = MIRBuilder.buildConstant(Ty, 0); 6713 6714 // For an addition, the result should be less than one of the operands (LHS) 6715 // if and only if the other operand (RHS) is negative, otherwise there will 6716 // be overflow. 6717 // For a subtraction, the result should be less than one of the operands 6718 // (LHS) if and only if the other operand (RHS) is (non-zero) positive, 6719 // otherwise there will be overflow. 6720 auto ResultLowerThanLHS = 6721 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, Dst0, LHS); 6722 auto ConditionRHS = MIRBuilder.buildICmp( 6723 IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero); 6724 6725 MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS); 6726 MI.eraseFromParent(); 6727 return Legalized; 6728 } 6729 6730 LegalizerHelper::LegalizeResult 6731 LegalizerHelper::lowerAddSubSatToMinMax(MachineInstr &MI) { 6732 Register Res = MI.getOperand(0).getReg(); 6733 Register LHS = MI.getOperand(1).getReg(); 6734 Register RHS = MI.getOperand(2).getReg(); 6735 LLT Ty = MRI.getType(Res); 6736 bool IsSigned; 6737 bool IsAdd; 6738 unsigned BaseOp; 6739 switch (MI.getOpcode()) { 6740 default: 6741 llvm_unreachable("unexpected addsat/subsat opcode"); 6742 case TargetOpcode::G_UADDSAT: 6743 IsSigned = false; 6744 IsAdd = true; 6745 BaseOp = TargetOpcode::G_ADD; 6746 break; 6747 case TargetOpcode::G_SADDSAT: 6748 IsSigned = true; 6749 IsAdd = true; 6750 BaseOp = TargetOpcode::G_ADD; 6751 break; 6752 case TargetOpcode::G_USUBSAT: 6753 IsSigned = false; 6754 IsAdd = false; 6755 BaseOp = TargetOpcode::G_SUB; 6756 break; 6757 case TargetOpcode::G_SSUBSAT: 6758 IsSigned = true; 6759 IsAdd = false; 6760 BaseOp = TargetOpcode::G_SUB; 6761 break; 6762 } 6763 6764 if (IsSigned) { 6765 // sadd.sat(a, b) -> 6766 // hi = 0x7fffffff - smax(a, 0) 6767 // lo = 0x80000000 - smin(a, 0) 6768 // a + smin(smax(lo, b), hi) 6769 // ssub.sat(a, b) -> 6770 // lo = smax(a, -1) - 0x7fffffff 6771 // hi = smin(a, -1) - 0x80000000 6772 // a - smin(smax(lo, b), hi) 6773 // TODO: AMDGPU can use a "median of 3" instruction here: 6774 // a +/- med3(lo, b, hi) 6775 uint64_t NumBits = Ty.getScalarSizeInBits(); 6776 auto MaxVal = 6777 MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits)); 6778 auto MinVal = 6779 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits)); 6780 MachineInstrBuilder Hi, Lo; 6781 if (IsAdd) { 6782 auto Zero = MIRBuilder.buildConstant(Ty, 0); 6783 Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero)); 6784 Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero)); 6785 } else { 6786 auto NegOne = MIRBuilder.buildConstant(Ty, -1); 6787 Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne), 6788 MaxVal); 6789 Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne), 6790 MinVal); 6791 } 6792 auto RHSClamped = 6793 MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi); 6794 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped}); 6795 } else { 6796 // uadd.sat(a, b) -> a + umin(~a, b) 6797 // usub.sat(a, b) -> a - umin(a, b) 6798 Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS; 6799 auto Min = MIRBuilder.buildUMin(Ty, Not, RHS); 6800 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min}); 6801 } 6802 6803 MI.eraseFromParent(); 6804 return Legalized; 6805 } 6806 6807 LegalizerHelper::LegalizeResult 6808 LegalizerHelper::lowerAddSubSatToAddoSubo(MachineInstr &MI) { 6809 Register Res = MI.getOperand(0).getReg(); 6810 Register LHS = MI.getOperand(1).getReg(); 6811 Register RHS = MI.getOperand(2).getReg(); 6812 LLT Ty = MRI.getType(Res); 6813 LLT BoolTy = Ty.changeElementSize(1); 6814 bool IsSigned; 6815 bool IsAdd; 6816 unsigned OverflowOp; 6817 switch (MI.getOpcode()) { 6818 default: 6819 llvm_unreachable("unexpected addsat/subsat opcode"); 6820 case TargetOpcode::G_UADDSAT: 6821 IsSigned = false; 6822 IsAdd = true; 6823 OverflowOp = TargetOpcode::G_UADDO; 6824 break; 6825 case TargetOpcode::G_SADDSAT: 6826 IsSigned = true; 6827 IsAdd = true; 6828 OverflowOp = TargetOpcode::G_SADDO; 6829 break; 6830 case TargetOpcode::G_USUBSAT: 6831 IsSigned = false; 6832 IsAdd = false; 6833 OverflowOp = TargetOpcode::G_USUBO; 6834 break; 6835 case TargetOpcode::G_SSUBSAT: 6836 IsSigned = true; 6837 IsAdd = false; 6838 OverflowOp = TargetOpcode::G_SSUBO; 6839 break; 6840 } 6841 6842 auto OverflowRes = 6843 MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS}); 6844 Register Tmp = OverflowRes.getReg(0); 6845 Register Ov = OverflowRes.getReg(1); 6846 MachineInstrBuilder Clamp; 6847 if (IsSigned) { 6848 // sadd.sat(a, b) -> 6849 // {tmp, ov} = saddo(a, b) 6850 // ov ? (tmp >>s 31) + 0x80000000 : r 6851 // ssub.sat(a, b) -> 6852 // {tmp, ov} = ssubo(a, b) 6853 // ov ? (tmp >>s 31) + 0x80000000 : r 6854 uint64_t NumBits = Ty.getScalarSizeInBits(); 6855 auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1); 6856 auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount); 6857 auto MinVal = 6858 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits)); 6859 Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal); 6860 } else { 6861 // uadd.sat(a, b) -> 6862 // {tmp, ov} = uaddo(a, b) 6863 // ov ? 0xffffffff : tmp 6864 // usub.sat(a, b) -> 6865 // {tmp, ov} = usubo(a, b) 6866 // ov ? 0 : tmp 6867 Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0); 6868 } 6869 MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp); 6870 6871 MI.eraseFromParent(); 6872 return Legalized; 6873 } 6874 6875 LegalizerHelper::LegalizeResult 6876 LegalizerHelper::lowerShlSat(MachineInstr &MI) { 6877 assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT || 6878 MI.getOpcode() == TargetOpcode::G_USHLSAT) && 6879 "Expected shlsat opcode!"); 6880 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT; 6881 Register Res = MI.getOperand(0).getReg(); 6882 Register LHS = MI.getOperand(1).getReg(); 6883 Register RHS = MI.getOperand(2).getReg(); 6884 LLT Ty = MRI.getType(Res); 6885 LLT BoolTy = Ty.changeElementSize(1); 6886 6887 unsigned BW = Ty.getScalarSizeInBits(); 6888 auto Result = MIRBuilder.buildShl(Ty, LHS, RHS); 6889 auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS) 6890 : MIRBuilder.buildLShr(Ty, Result, RHS); 6891 6892 MachineInstrBuilder SatVal; 6893 if (IsSigned) { 6894 auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW)); 6895 auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW)); 6896 auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS, 6897 MIRBuilder.buildConstant(Ty, 0)); 6898 SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax); 6899 } else { 6900 SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW)); 6901 } 6902 auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig); 6903 MIRBuilder.buildSelect(Res, Ov, SatVal, Result); 6904 6905 MI.eraseFromParent(); 6906 return Legalized; 6907 } 6908 6909 LegalizerHelper::LegalizeResult 6910 LegalizerHelper::lowerBswap(MachineInstr &MI) { 6911 Register Dst = MI.getOperand(0).getReg(); 6912 Register Src = MI.getOperand(1).getReg(); 6913 const LLT Ty = MRI.getType(Src); 6914 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8; 6915 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8; 6916 6917 // Swap most and least significant byte, set remaining bytes in Res to zero. 6918 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt); 6919 auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt); 6920 auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt); 6921 auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft); 6922 6923 // Set i-th high/low byte in Res to i-th low/high byte from Src. 6924 for (unsigned i = 1; i < SizeInBytes / 2; ++i) { 6925 // AND with Mask leaves byte i unchanged and sets remaining bytes to 0. 6926 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8)); 6927 auto Mask = MIRBuilder.buildConstant(Ty, APMask); 6928 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i); 6929 // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt. 6930 auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask); 6931 auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt); 6932 Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft); 6933 // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask. 6934 auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt); 6935 auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask); 6936 Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight); 6937 } 6938 Res.getInstr()->getOperand(0).setReg(Dst); 6939 6940 MI.eraseFromParent(); 6941 return Legalized; 6942 } 6943 6944 //{ (Src & Mask) >> N } | { (Src << N) & Mask } 6945 static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, 6946 MachineInstrBuilder Src, APInt Mask) { 6947 const LLT Ty = Dst.getLLTTy(*B.getMRI()); 6948 MachineInstrBuilder C_N = B.buildConstant(Ty, N); 6949 MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask); 6950 auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N); 6951 auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0); 6952 return B.buildOr(Dst, LHS, RHS); 6953 } 6954 6955 LegalizerHelper::LegalizeResult 6956 LegalizerHelper::lowerBitreverse(MachineInstr &MI) { 6957 Register Dst = MI.getOperand(0).getReg(); 6958 Register Src = MI.getOperand(1).getReg(); 6959 const LLT Ty = MRI.getType(Src); 6960 unsigned Size = Ty.getSizeInBits(); 6961 6962 MachineInstrBuilder BSWAP = 6963 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src}); 6964 6965 // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654 6966 // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4] 6967 // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0] 6968 MachineInstrBuilder Swap4 = 6969 SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0))); 6970 6971 // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76 6972 // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2] 6973 // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC] 6974 MachineInstrBuilder Swap2 = 6975 SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC))); 6976 6977 // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5 6|7 6978 // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1] 6979 // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA] 6980 SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA))); 6981 6982 MI.eraseFromParent(); 6983 return Legalized; 6984 } 6985 6986 LegalizerHelper::LegalizeResult 6987 LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) { 6988 MachineFunction &MF = MIRBuilder.getMF(); 6989 6990 bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER; 6991 int NameOpIdx = IsRead ? 1 : 0; 6992 int ValRegIndex = IsRead ? 0 : 1; 6993 6994 Register ValReg = MI.getOperand(ValRegIndex).getReg(); 6995 const LLT Ty = MRI.getType(ValReg); 6996 const MDString *RegStr = cast<MDString>( 6997 cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0)); 6998 6999 Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF); 7000 if (!PhysReg.isValid()) 7001 return UnableToLegalize; 7002 7003 if (IsRead) 7004 MIRBuilder.buildCopy(ValReg, PhysReg); 7005 else 7006 MIRBuilder.buildCopy(PhysReg, ValReg); 7007 7008 MI.eraseFromParent(); 7009 return Legalized; 7010 } 7011 7012 LegalizerHelper::LegalizeResult 7013 LegalizerHelper::lowerSMULH_UMULH(MachineInstr &MI) { 7014 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH; 7015 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT; 7016 Register Result = MI.getOperand(0).getReg(); 7017 LLT OrigTy = MRI.getType(Result); 7018 auto SizeInBits = OrigTy.getScalarSizeInBits(); 7019 LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2); 7020 7021 auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)}); 7022 auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)}); 7023 auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS); 7024 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR; 7025 7026 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits); 7027 auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt}); 7028 MIRBuilder.buildTrunc(Result, Shifted); 7029 7030 MI.eraseFromParent(); 7031 return Legalized; 7032 } 7033 7034 LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) { 7035 // Implement vector G_SELECT in terms of XOR, AND, OR. 7036 Register DstReg = MI.getOperand(0).getReg(); 7037 Register MaskReg = MI.getOperand(1).getReg(); 7038 Register Op1Reg = MI.getOperand(2).getReg(); 7039 Register Op2Reg = MI.getOperand(3).getReg(); 7040 LLT DstTy = MRI.getType(DstReg); 7041 LLT MaskTy = MRI.getType(MaskReg); 7042 LLT Op1Ty = MRI.getType(Op1Reg); 7043 if (!DstTy.isVector()) 7044 return UnableToLegalize; 7045 7046 // Vector selects can have a scalar predicate. If so, splat into a vector and 7047 // finish for later legalization attempts to try again. 7048 if (MaskTy.isScalar()) { 7049 Register MaskElt = MaskReg; 7050 if (MaskTy.getSizeInBits() < DstTy.getScalarSizeInBits()) 7051 MaskElt = MIRBuilder.buildSExt(DstTy.getElementType(), MaskElt).getReg(0); 7052 // Generate a vector splat idiom to be pattern matched later. 7053 auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt); 7054 Observer.changingInstr(MI); 7055 MI.getOperand(1).setReg(ShufSplat.getReg(0)); 7056 Observer.changedInstr(MI); 7057 return Legalized; 7058 } 7059 7060 if (MaskTy.getSizeInBits() != Op1Ty.getSizeInBits()) { 7061 return UnableToLegalize; 7062 } 7063 7064 auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg); 7065 auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg); 7066 auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask); 7067 MIRBuilder.buildOr(DstReg, NewOp1, NewOp2); 7068 MI.eraseFromParent(); 7069 return Legalized; 7070 } 7071 7072 LegalizerHelper::LegalizeResult LegalizerHelper::lowerDIVREM(MachineInstr &MI) { 7073 // Split DIVREM into individual instructions. 7074 unsigned Opcode = MI.getOpcode(); 7075 7076 MIRBuilder.buildInstr( 7077 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV 7078 : TargetOpcode::G_UDIV, 7079 {MI.getOperand(0).getReg()}, {MI.getOperand(2), MI.getOperand(3)}); 7080 MIRBuilder.buildInstr( 7081 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM 7082 : TargetOpcode::G_UREM, 7083 {MI.getOperand(1).getReg()}, {MI.getOperand(2), MI.getOperand(3)}); 7084 MI.eraseFromParent(); 7085 return Legalized; 7086 } 7087 7088 LegalizerHelper::LegalizeResult 7089 LegalizerHelper::lowerAbsToAddXor(MachineInstr &MI) { 7090 // Expand %res = G_ABS %a into: 7091 // %v1 = G_ASHR %a, scalar_size-1 7092 // %v2 = G_ADD %a, %v1 7093 // %res = G_XOR %v2, %v1 7094 LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 7095 Register OpReg = MI.getOperand(1).getReg(); 7096 auto ShiftAmt = 7097 MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1); 7098 auto Shift = MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt); 7099 auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift); 7100 MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift); 7101 MI.eraseFromParent(); 7102 return Legalized; 7103 } 7104 7105 LegalizerHelper::LegalizeResult 7106 LegalizerHelper::lowerAbsToMaxNeg(MachineInstr &MI) { 7107 // Expand %res = G_ABS %a into: 7108 // %v1 = G_CONSTANT 0 7109 // %v2 = G_SUB %v1, %a 7110 // %res = G_SMAX %a, %v2 7111 Register SrcReg = MI.getOperand(1).getReg(); 7112 LLT Ty = MRI.getType(SrcReg); 7113 auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0); 7114 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0); 7115 MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub); 7116 MI.eraseFromParent(); 7117 return Legalized; 7118 } 7119