1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file This file implements the LegalizerHelper class to legalize 10 /// individual instructions and the LegalizeMachineIR wrapper pass for the 11 /// primary legalization. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" 16 #include "llvm/CodeGen/GlobalISel/CallLowering.h" 17 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" 18 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" 19 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 20 #include "llvm/CodeGen/MachineRegisterInfo.h" 21 #include "llvm/CodeGen/TargetFrameLowering.h" 22 #include "llvm/CodeGen/TargetInstrInfo.h" 23 #include "llvm/CodeGen/TargetLowering.h" 24 #include "llvm/CodeGen/TargetSubtargetInfo.h" 25 #include "llvm/Support/Debug.h" 26 #include "llvm/Support/MathExtras.h" 27 #include "llvm/Support/raw_ostream.h" 28 29 #define DEBUG_TYPE "legalizer" 30 31 using namespace llvm; 32 using namespace LegalizeActions; 33 using namespace MIPatternMatch; 34 35 /// Try to break down \p OrigTy into \p NarrowTy sized pieces. 36 /// 37 /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy, 38 /// with any leftover piece as type \p LeftoverTy 39 /// 40 /// Returns -1 in the first element of the pair if the breakdown is not 41 /// satisfiable. 42 static std::pair<int, int> 43 getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) { 44 assert(!LeftoverTy.isValid() && "this is an out argument"); 45 46 unsigned Size = OrigTy.getSizeInBits(); 47 unsigned NarrowSize = NarrowTy.getSizeInBits(); 48 unsigned NumParts = Size / NarrowSize; 49 unsigned LeftoverSize = Size - NumParts * NarrowSize; 50 assert(Size > NarrowSize); 51 52 if (LeftoverSize == 0) 53 return {NumParts, 0}; 54 55 if (NarrowTy.isVector()) { 56 unsigned EltSize = OrigTy.getScalarSizeInBits(); 57 if (LeftoverSize % EltSize != 0) 58 return {-1, -1}; 59 LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize); 60 } else { 61 LeftoverTy = LLT::scalar(LeftoverSize); 62 } 63 64 int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits(); 65 return std::make_pair(NumParts, NumLeftover); 66 } 67 68 static Type *getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty) { 69 70 if (!Ty.isScalar()) 71 return nullptr; 72 73 switch (Ty.getSizeInBits()) { 74 case 16: 75 return Type::getHalfTy(Ctx); 76 case 32: 77 return Type::getFloatTy(Ctx); 78 case 64: 79 return Type::getDoubleTy(Ctx); 80 case 80: 81 return Type::getX86_FP80Ty(Ctx); 82 case 128: 83 return Type::getFP128Ty(Ctx); 84 default: 85 return nullptr; 86 } 87 } 88 89 LegalizerHelper::LegalizerHelper(MachineFunction &MF, 90 GISelChangeObserver &Observer, 91 MachineIRBuilder &Builder) 92 : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()), 93 LI(*MF.getSubtarget().getLegalizerInfo()), 94 TLI(*MF.getSubtarget().getTargetLowering()) { 95 MIRBuilder.setChangeObserver(Observer); 96 } 97 98 LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI, 99 GISelChangeObserver &Observer, 100 MachineIRBuilder &B) 101 : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI), 102 TLI(*MF.getSubtarget().getTargetLowering()) { 103 MIRBuilder.setChangeObserver(Observer); 104 } 105 LegalizerHelper::LegalizeResult 106 LegalizerHelper::legalizeInstrStep(MachineInstr &MI) { 107 LLVM_DEBUG(dbgs() << "Legalizing: " << MI); 108 109 MIRBuilder.setInstrAndDebugLoc(MI); 110 111 if (MI.getOpcode() == TargetOpcode::G_INTRINSIC || 112 MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS) 113 return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize; 114 auto Step = LI.getAction(MI, MRI); 115 switch (Step.Action) { 116 case Legal: 117 LLVM_DEBUG(dbgs() << ".. Already legal\n"); 118 return AlreadyLegal; 119 case Libcall: 120 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n"); 121 return libcall(MI); 122 case NarrowScalar: 123 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n"); 124 return narrowScalar(MI, Step.TypeIdx, Step.NewType); 125 case WidenScalar: 126 LLVM_DEBUG(dbgs() << ".. Widen scalar\n"); 127 return widenScalar(MI, Step.TypeIdx, Step.NewType); 128 case Bitcast: 129 LLVM_DEBUG(dbgs() << ".. Bitcast type\n"); 130 return bitcast(MI, Step.TypeIdx, Step.NewType); 131 case Lower: 132 LLVM_DEBUG(dbgs() << ".. Lower\n"); 133 return lower(MI, Step.TypeIdx, Step.NewType); 134 case FewerElements: 135 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n"); 136 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType); 137 case MoreElements: 138 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n"); 139 return moreElementsVector(MI, Step.TypeIdx, Step.NewType); 140 case Custom: 141 LLVM_DEBUG(dbgs() << ".. Custom legalization\n"); 142 return LI.legalizeCustom(*this, MI) ? Legalized : UnableToLegalize; 143 default: 144 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n"); 145 return UnableToLegalize; 146 } 147 } 148 149 void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts, 150 SmallVectorImpl<Register> &VRegs) { 151 for (int i = 0; i < NumParts; ++i) 152 VRegs.push_back(MRI.createGenericVirtualRegister(Ty)); 153 MIRBuilder.buildUnmerge(VRegs, Reg); 154 } 155 156 bool LegalizerHelper::extractParts(Register Reg, LLT RegTy, 157 LLT MainTy, LLT &LeftoverTy, 158 SmallVectorImpl<Register> &VRegs, 159 SmallVectorImpl<Register> &LeftoverRegs) { 160 assert(!LeftoverTy.isValid() && "this is an out argument"); 161 162 unsigned RegSize = RegTy.getSizeInBits(); 163 unsigned MainSize = MainTy.getSizeInBits(); 164 unsigned NumParts = RegSize / MainSize; 165 unsigned LeftoverSize = RegSize - NumParts * MainSize; 166 167 // Use an unmerge when possible. 168 if (LeftoverSize == 0) { 169 for (unsigned I = 0; I < NumParts; ++I) 170 VRegs.push_back(MRI.createGenericVirtualRegister(MainTy)); 171 MIRBuilder.buildUnmerge(VRegs, Reg); 172 return true; 173 } 174 175 if (MainTy.isVector()) { 176 unsigned EltSize = MainTy.getScalarSizeInBits(); 177 if (LeftoverSize % EltSize != 0) 178 return false; 179 LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize); 180 } else { 181 LeftoverTy = LLT::scalar(LeftoverSize); 182 } 183 184 // For irregular sizes, extract the individual parts. 185 for (unsigned I = 0; I != NumParts; ++I) { 186 Register NewReg = MRI.createGenericVirtualRegister(MainTy); 187 VRegs.push_back(NewReg); 188 MIRBuilder.buildExtract(NewReg, Reg, MainSize * I); 189 } 190 191 for (unsigned Offset = MainSize * NumParts; Offset < RegSize; 192 Offset += LeftoverSize) { 193 Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy); 194 LeftoverRegs.push_back(NewReg); 195 MIRBuilder.buildExtract(NewReg, Reg, Offset); 196 } 197 198 return true; 199 } 200 201 void LegalizerHelper::insertParts(Register DstReg, 202 LLT ResultTy, LLT PartTy, 203 ArrayRef<Register> PartRegs, 204 LLT LeftoverTy, 205 ArrayRef<Register> LeftoverRegs) { 206 if (!LeftoverTy.isValid()) { 207 assert(LeftoverRegs.empty()); 208 209 if (!ResultTy.isVector()) { 210 MIRBuilder.buildMerge(DstReg, PartRegs); 211 return; 212 } 213 214 if (PartTy.isVector()) 215 MIRBuilder.buildConcatVectors(DstReg, PartRegs); 216 else 217 MIRBuilder.buildBuildVector(DstReg, PartRegs); 218 return; 219 } 220 221 unsigned PartSize = PartTy.getSizeInBits(); 222 unsigned LeftoverPartSize = LeftoverTy.getSizeInBits(); 223 224 Register CurResultReg = MRI.createGenericVirtualRegister(ResultTy); 225 MIRBuilder.buildUndef(CurResultReg); 226 227 unsigned Offset = 0; 228 for (Register PartReg : PartRegs) { 229 Register NewResultReg = MRI.createGenericVirtualRegister(ResultTy); 230 MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset); 231 CurResultReg = NewResultReg; 232 Offset += PartSize; 233 } 234 235 for (unsigned I = 0, E = LeftoverRegs.size(); I != E; ++I) { 236 // Use the original output register for the final insert to avoid a copy. 237 Register NewResultReg = (I + 1 == E) ? 238 DstReg : MRI.createGenericVirtualRegister(ResultTy); 239 240 MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset); 241 CurResultReg = NewResultReg; 242 Offset += LeftoverPartSize; 243 } 244 } 245 246 /// Return the result registers of G_UNMERGE_VALUES \p MI in \p Regs 247 static void getUnmergeResults(SmallVectorImpl<Register> &Regs, 248 const MachineInstr &MI) { 249 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES); 250 251 const int NumResults = MI.getNumOperands() - 1; 252 Regs.resize(NumResults); 253 for (int I = 0; I != NumResults; ++I) 254 Regs[I] = MI.getOperand(I).getReg(); 255 } 256 257 LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy, 258 LLT NarrowTy, Register SrcReg) { 259 LLT SrcTy = MRI.getType(SrcReg); 260 261 LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy); 262 if (SrcTy == GCDTy) { 263 // If the source already evenly divides the result type, we don't need to do 264 // anything. 265 Parts.push_back(SrcReg); 266 } else { 267 // Need to split into common type sized pieces. 268 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg); 269 getUnmergeResults(Parts, *Unmerge); 270 } 271 272 return GCDTy; 273 } 274 275 LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy, 276 SmallVectorImpl<Register> &VRegs, 277 unsigned PadStrategy) { 278 LLT LCMTy = getLCMType(DstTy, NarrowTy); 279 280 int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits(); 281 int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits(); 282 int NumOrigSrc = VRegs.size(); 283 284 Register PadReg; 285 286 // Get a value we can use to pad the source value if the sources won't evenly 287 // cover the result type. 288 if (NumOrigSrc < NumParts * NumSubParts) { 289 if (PadStrategy == TargetOpcode::G_ZEXT) 290 PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0); 291 else if (PadStrategy == TargetOpcode::G_ANYEXT) 292 PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0); 293 else { 294 assert(PadStrategy == TargetOpcode::G_SEXT); 295 296 // Shift the sign bit of the low register through the high register. 297 auto ShiftAmt = 298 MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1); 299 PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0); 300 } 301 } 302 303 // Registers for the final merge to be produced. 304 SmallVector<Register, 4> Remerge(NumParts); 305 306 // Registers needed for intermediate merges, which will be merged into a 307 // source for Remerge. 308 SmallVector<Register, 4> SubMerge(NumSubParts); 309 310 // Once we've fully read off the end of the original source bits, we can reuse 311 // the same high bits for remaining padding elements. 312 Register AllPadReg; 313 314 // Build merges to the LCM type to cover the original result type. 315 for (int I = 0; I != NumParts; ++I) { 316 bool AllMergePartsArePadding = true; 317 318 // Build the requested merges to the requested type. 319 for (int J = 0; J != NumSubParts; ++J) { 320 int Idx = I * NumSubParts + J; 321 if (Idx >= NumOrigSrc) { 322 SubMerge[J] = PadReg; 323 continue; 324 } 325 326 SubMerge[J] = VRegs[Idx]; 327 328 // There are meaningful bits here we can't reuse later. 329 AllMergePartsArePadding = false; 330 } 331 332 // If we've filled up a complete piece with padding bits, we can directly 333 // emit the natural sized constant if applicable, rather than a merge of 334 // smaller constants. 335 if (AllMergePartsArePadding && !AllPadReg) { 336 if (PadStrategy == TargetOpcode::G_ANYEXT) 337 AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0); 338 else if (PadStrategy == TargetOpcode::G_ZEXT) 339 AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0); 340 341 // If this is a sign extension, we can't materialize a trivial constant 342 // with the right type and have to produce a merge. 343 } 344 345 if (AllPadReg) { 346 // Avoid creating additional instructions if we're just adding additional 347 // copies of padding bits. 348 Remerge[I] = AllPadReg; 349 continue; 350 } 351 352 if (NumSubParts == 1) 353 Remerge[I] = SubMerge[0]; 354 else 355 Remerge[I] = MIRBuilder.buildMerge(NarrowTy, SubMerge).getReg(0); 356 357 // In the sign extend padding case, re-use the first all-signbit merge. 358 if (AllMergePartsArePadding && !AllPadReg) 359 AllPadReg = Remerge[I]; 360 } 361 362 VRegs = std::move(Remerge); 363 return LCMTy; 364 } 365 366 void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy, 367 ArrayRef<Register> RemergeRegs) { 368 LLT DstTy = MRI.getType(DstReg); 369 370 // Create the merge to the widened source, and extract the relevant bits into 371 // the result. 372 373 if (DstTy == LCMTy) { 374 MIRBuilder.buildMerge(DstReg, RemergeRegs); 375 return; 376 } 377 378 auto Remerge = MIRBuilder.buildMerge(LCMTy, RemergeRegs); 379 if (DstTy.isScalar() && LCMTy.isScalar()) { 380 MIRBuilder.buildTrunc(DstReg, Remerge); 381 return; 382 } 383 384 if (LCMTy.isVector()) { 385 MIRBuilder.buildExtract(DstReg, Remerge, 0); 386 return; 387 } 388 389 llvm_unreachable("unhandled case"); 390 } 391 392 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { 393 #define RTLIBCASE_INT(LibcallPrefix) \ 394 do { \ 395 switch (Size) { \ 396 case 32: \ 397 return RTLIB::LibcallPrefix##32; \ 398 case 64: \ 399 return RTLIB::LibcallPrefix##64; \ 400 case 128: \ 401 return RTLIB::LibcallPrefix##128; \ 402 default: \ 403 llvm_unreachable("unexpected size"); \ 404 } \ 405 } while (0) 406 407 #define RTLIBCASE(LibcallPrefix) \ 408 do { \ 409 switch (Size) { \ 410 case 32: \ 411 return RTLIB::LibcallPrefix##32; \ 412 case 64: \ 413 return RTLIB::LibcallPrefix##64; \ 414 case 80: \ 415 return RTLIB::LibcallPrefix##80; \ 416 case 128: \ 417 return RTLIB::LibcallPrefix##128; \ 418 default: \ 419 llvm_unreachable("unexpected size"); \ 420 } \ 421 } while (0) 422 423 switch (Opcode) { 424 case TargetOpcode::G_SDIV: 425 RTLIBCASE_INT(SDIV_I); 426 case TargetOpcode::G_UDIV: 427 RTLIBCASE_INT(UDIV_I); 428 case TargetOpcode::G_SREM: 429 RTLIBCASE_INT(SREM_I); 430 case TargetOpcode::G_UREM: 431 RTLIBCASE_INT(UREM_I); 432 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 433 RTLIBCASE_INT(CTLZ_I); 434 case TargetOpcode::G_FADD: 435 RTLIBCASE(ADD_F); 436 case TargetOpcode::G_FSUB: 437 RTLIBCASE(SUB_F); 438 case TargetOpcode::G_FMUL: 439 RTLIBCASE(MUL_F); 440 case TargetOpcode::G_FDIV: 441 RTLIBCASE(DIV_F); 442 case TargetOpcode::G_FEXP: 443 RTLIBCASE(EXP_F); 444 case TargetOpcode::G_FEXP2: 445 RTLIBCASE(EXP2_F); 446 case TargetOpcode::G_FREM: 447 RTLIBCASE(REM_F); 448 case TargetOpcode::G_FPOW: 449 RTLIBCASE(POW_F); 450 case TargetOpcode::G_FMA: 451 RTLIBCASE(FMA_F); 452 case TargetOpcode::G_FSIN: 453 RTLIBCASE(SIN_F); 454 case TargetOpcode::G_FCOS: 455 RTLIBCASE(COS_F); 456 case TargetOpcode::G_FLOG10: 457 RTLIBCASE(LOG10_F); 458 case TargetOpcode::G_FLOG: 459 RTLIBCASE(LOG_F); 460 case TargetOpcode::G_FLOG2: 461 RTLIBCASE(LOG2_F); 462 case TargetOpcode::G_FCEIL: 463 RTLIBCASE(CEIL_F); 464 case TargetOpcode::G_FFLOOR: 465 RTLIBCASE(FLOOR_F); 466 case TargetOpcode::G_FMINNUM: 467 RTLIBCASE(FMIN_F); 468 case TargetOpcode::G_FMAXNUM: 469 RTLIBCASE(FMAX_F); 470 case TargetOpcode::G_FSQRT: 471 RTLIBCASE(SQRT_F); 472 case TargetOpcode::G_FRINT: 473 RTLIBCASE(RINT_F); 474 case TargetOpcode::G_FNEARBYINT: 475 RTLIBCASE(NEARBYINT_F); 476 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: 477 RTLIBCASE(ROUNDEVEN_F); 478 } 479 llvm_unreachable("Unknown libcall function"); 480 } 481 482 /// True if an instruction is in tail position in its caller. Intended for 483 /// legalizing libcalls as tail calls when possible. 484 static bool isLibCallInTailPosition(const TargetInstrInfo &TII, 485 MachineInstr &MI) { 486 MachineBasicBlock &MBB = *MI.getParent(); 487 const Function &F = MBB.getParent()->getFunction(); 488 489 // Conservatively require the attributes of the call to match those of 490 // the return. Ignore NoAlias and NonNull because they don't affect the 491 // call sequence. 492 AttributeList CallerAttrs = F.getAttributes(); 493 if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex) 494 .removeAttribute(Attribute::NoAlias) 495 .removeAttribute(Attribute::NonNull) 496 .hasAttributes()) 497 return false; 498 499 // It's not safe to eliminate the sign / zero extension of the return value. 500 if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) || 501 CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt)) 502 return false; 503 504 // Only tail call if the following instruction is a standard return. 505 auto Next = next_nodbg(MI.getIterator(), MBB.instr_end()); 506 if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn()) 507 return false; 508 509 return true; 510 } 511 512 LegalizerHelper::LegalizeResult 513 llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name, 514 const CallLowering::ArgInfo &Result, 515 ArrayRef<CallLowering::ArgInfo> Args, 516 const CallingConv::ID CC) { 517 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); 518 519 CallLowering::CallLoweringInfo Info; 520 Info.CallConv = CC; 521 Info.Callee = MachineOperand::CreateES(Name); 522 Info.OrigRet = Result; 523 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs)); 524 if (!CLI.lowerCall(MIRBuilder, Info)) 525 return LegalizerHelper::UnableToLegalize; 526 527 return LegalizerHelper::Legalized; 528 } 529 530 LegalizerHelper::LegalizeResult 531 llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, 532 const CallLowering::ArgInfo &Result, 533 ArrayRef<CallLowering::ArgInfo> Args) { 534 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); 535 const char *Name = TLI.getLibcallName(Libcall); 536 const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall); 537 return createLibcall(MIRBuilder, Name, Result, Args, CC); 538 } 539 540 // Useful for libcalls where all operands have the same type. 541 static LegalizerHelper::LegalizeResult 542 simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, 543 Type *OpType) { 544 auto Libcall = getRTLibDesc(MI.getOpcode(), Size); 545 546 SmallVector<CallLowering::ArgInfo, 3> Args; 547 for (unsigned i = 1; i < MI.getNumOperands(); i++) 548 Args.push_back({MI.getOperand(i).getReg(), OpType}); 549 return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType}, 550 Args); 551 } 552 553 LegalizerHelper::LegalizeResult 554 llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, 555 MachineInstr &MI) { 556 assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS); 557 auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); 558 559 SmallVector<CallLowering::ArgInfo, 3> Args; 560 // Add all the args, except for the last which is an imm denoting 'tail'. 561 for (unsigned i = 1; i < MI.getNumOperands() - 1; i++) { 562 Register Reg = MI.getOperand(i).getReg(); 563 564 // Need derive an IR type for call lowering. 565 LLT OpLLT = MRI.getType(Reg); 566 Type *OpTy = nullptr; 567 if (OpLLT.isPointer()) 568 OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace()); 569 else 570 OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits()); 571 Args.push_back({Reg, OpTy}); 572 } 573 574 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); 575 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); 576 Intrinsic::ID ID = MI.getOperand(0).getIntrinsicID(); 577 RTLIB::Libcall RTLibcall; 578 switch (ID) { 579 case Intrinsic::memcpy: 580 RTLibcall = RTLIB::MEMCPY; 581 break; 582 case Intrinsic::memset: 583 RTLibcall = RTLIB::MEMSET; 584 break; 585 case Intrinsic::memmove: 586 RTLibcall = RTLIB::MEMMOVE; 587 break; 588 default: 589 return LegalizerHelper::UnableToLegalize; 590 } 591 const char *Name = TLI.getLibcallName(RTLibcall); 592 593 MIRBuilder.setInstrAndDebugLoc(MI); 594 595 CallLowering::CallLoweringInfo Info; 596 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall); 597 Info.Callee = MachineOperand::CreateES(Name); 598 Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx)); 599 Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() == 1 && 600 isLibCallInTailPosition(MIRBuilder.getTII(), MI); 601 602 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs)); 603 if (!CLI.lowerCall(MIRBuilder, Info)) 604 return LegalizerHelper::UnableToLegalize; 605 606 if (Info.LoweredTailCall) { 607 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?"); 608 // We must have a return following the call (or debug insts) to get past 609 // isLibCallInTailPosition. 610 do { 611 MachineInstr *Next = MI.getNextNode(); 612 assert(Next && (Next->isReturn() || Next->isDebugInstr()) && 613 "Expected instr following MI to be return or debug inst?"); 614 // We lowered a tail call, so the call is now the return from the block. 615 // Delete the old return. 616 Next->eraseFromParent(); 617 } while (MI.getNextNode()); 618 } 619 620 return LegalizerHelper::Legalized; 621 } 622 623 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, 624 Type *FromType) { 625 auto ToMVT = MVT::getVT(ToType); 626 auto FromMVT = MVT::getVT(FromType); 627 628 switch (Opcode) { 629 case TargetOpcode::G_FPEXT: 630 return RTLIB::getFPEXT(FromMVT, ToMVT); 631 case TargetOpcode::G_FPTRUNC: 632 return RTLIB::getFPROUND(FromMVT, ToMVT); 633 case TargetOpcode::G_FPTOSI: 634 return RTLIB::getFPTOSINT(FromMVT, ToMVT); 635 case TargetOpcode::G_FPTOUI: 636 return RTLIB::getFPTOUINT(FromMVT, ToMVT); 637 case TargetOpcode::G_SITOFP: 638 return RTLIB::getSINTTOFP(FromMVT, ToMVT); 639 case TargetOpcode::G_UITOFP: 640 return RTLIB::getUINTTOFP(FromMVT, ToMVT); 641 } 642 llvm_unreachable("Unsupported libcall function"); 643 } 644 645 static LegalizerHelper::LegalizeResult 646 conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, 647 Type *FromType) { 648 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType); 649 return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType}, 650 {{MI.getOperand(1).getReg(), FromType}}); 651 } 652 653 LegalizerHelper::LegalizeResult 654 LegalizerHelper::libcall(MachineInstr &MI) { 655 LLT LLTy = MRI.getType(MI.getOperand(0).getReg()); 656 unsigned Size = LLTy.getSizeInBits(); 657 auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); 658 659 switch (MI.getOpcode()) { 660 default: 661 return UnableToLegalize; 662 case TargetOpcode::G_SDIV: 663 case TargetOpcode::G_UDIV: 664 case TargetOpcode::G_SREM: 665 case TargetOpcode::G_UREM: 666 case TargetOpcode::G_CTLZ_ZERO_UNDEF: { 667 Type *HLTy = IntegerType::get(Ctx, Size); 668 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); 669 if (Status != Legalized) 670 return Status; 671 break; 672 } 673 case TargetOpcode::G_FADD: 674 case TargetOpcode::G_FSUB: 675 case TargetOpcode::G_FMUL: 676 case TargetOpcode::G_FDIV: 677 case TargetOpcode::G_FMA: 678 case TargetOpcode::G_FPOW: 679 case TargetOpcode::G_FREM: 680 case TargetOpcode::G_FCOS: 681 case TargetOpcode::G_FSIN: 682 case TargetOpcode::G_FLOG10: 683 case TargetOpcode::G_FLOG: 684 case TargetOpcode::G_FLOG2: 685 case TargetOpcode::G_FEXP: 686 case TargetOpcode::G_FEXP2: 687 case TargetOpcode::G_FCEIL: 688 case TargetOpcode::G_FFLOOR: 689 case TargetOpcode::G_FMINNUM: 690 case TargetOpcode::G_FMAXNUM: 691 case TargetOpcode::G_FSQRT: 692 case TargetOpcode::G_FRINT: 693 case TargetOpcode::G_FNEARBYINT: 694 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: { 695 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy); 696 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) { 697 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n"); 698 return UnableToLegalize; 699 } 700 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); 701 if (Status != Legalized) 702 return Status; 703 break; 704 } 705 case TargetOpcode::G_FPEXT: 706 case TargetOpcode::G_FPTRUNC: { 707 Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg())); 708 Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg())); 709 if (!FromTy || !ToTy) 710 return UnableToLegalize; 711 LegalizeResult Status = conversionLibcall(MI, MIRBuilder, ToTy, FromTy ); 712 if (Status != Legalized) 713 return Status; 714 break; 715 } 716 case TargetOpcode::G_FPTOSI: 717 case TargetOpcode::G_FPTOUI: { 718 // FIXME: Support other types 719 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 720 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 721 if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64)) 722 return UnableToLegalize; 723 LegalizeResult Status = conversionLibcall( 724 MI, MIRBuilder, 725 ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx), 726 FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx)); 727 if (Status != Legalized) 728 return Status; 729 break; 730 } 731 case TargetOpcode::G_SITOFP: 732 case TargetOpcode::G_UITOFP: { 733 // FIXME: Support other types 734 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 735 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 736 if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64)) 737 return UnableToLegalize; 738 LegalizeResult Status = conversionLibcall( 739 MI, MIRBuilder, 740 ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx), 741 FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx)); 742 if (Status != Legalized) 743 return Status; 744 break; 745 } 746 } 747 748 MI.eraseFromParent(); 749 return Legalized; 750 } 751 752 LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, 753 unsigned TypeIdx, 754 LLT NarrowTy) { 755 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 756 uint64_t NarrowSize = NarrowTy.getSizeInBits(); 757 758 switch (MI.getOpcode()) { 759 default: 760 return UnableToLegalize; 761 case TargetOpcode::G_IMPLICIT_DEF: { 762 Register DstReg = MI.getOperand(0).getReg(); 763 LLT DstTy = MRI.getType(DstReg); 764 765 // If SizeOp0 is not an exact multiple of NarrowSize, emit 766 // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed. 767 // FIXME: Although this would also be legal for the general case, it causes 768 // a lot of regressions in the emitted code (superfluous COPYs, artifact 769 // combines not being hit). This seems to be a problem related to the 770 // artifact combiner. 771 if (SizeOp0 % NarrowSize != 0) { 772 LLT ImplicitTy = NarrowTy; 773 if (DstTy.isVector()) 774 ImplicitTy = LLT::vector(DstTy.getNumElements(), ImplicitTy); 775 776 Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0); 777 MIRBuilder.buildAnyExt(DstReg, ImplicitReg); 778 779 MI.eraseFromParent(); 780 return Legalized; 781 } 782 783 int NumParts = SizeOp0 / NarrowSize; 784 785 SmallVector<Register, 2> DstRegs; 786 for (int i = 0; i < NumParts; ++i) 787 DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0)); 788 789 if (DstTy.isVector()) 790 MIRBuilder.buildBuildVector(DstReg, DstRegs); 791 else 792 MIRBuilder.buildMerge(DstReg, DstRegs); 793 MI.eraseFromParent(); 794 return Legalized; 795 } 796 case TargetOpcode::G_CONSTANT: { 797 LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 798 const APInt &Val = MI.getOperand(1).getCImm()->getValue(); 799 unsigned TotalSize = Ty.getSizeInBits(); 800 unsigned NarrowSize = NarrowTy.getSizeInBits(); 801 int NumParts = TotalSize / NarrowSize; 802 803 SmallVector<Register, 4> PartRegs; 804 for (int I = 0; I != NumParts; ++I) { 805 unsigned Offset = I * NarrowSize; 806 auto K = MIRBuilder.buildConstant(NarrowTy, 807 Val.lshr(Offset).trunc(NarrowSize)); 808 PartRegs.push_back(K.getReg(0)); 809 } 810 811 LLT LeftoverTy; 812 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize; 813 SmallVector<Register, 1> LeftoverRegs; 814 if (LeftoverBits != 0) { 815 LeftoverTy = LLT::scalar(LeftoverBits); 816 auto K = MIRBuilder.buildConstant( 817 LeftoverTy, 818 Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits)); 819 LeftoverRegs.push_back(K.getReg(0)); 820 } 821 822 insertParts(MI.getOperand(0).getReg(), 823 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs); 824 825 MI.eraseFromParent(); 826 return Legalized; 827 } 828 case TargetOpcode::G_SEXT: 829 case TargetOpcode::G_ZEXT: 830 case TargetOpcode::G_ANYEXT: 831 return narrowScalarExt(MI, TypeIdx, NarrowTy); 832 case TargetOpcode::G_TRUNC: { 833 if (TypeIdx != 1) 834 return UnableToLegalize; 835 836 uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 837 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) { 838 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n"); 839 return UnableToLegalize; 840 } 841 842 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1)); 843 MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0)); 844 MI.eraseFromParent(); 845 return Legalized; 846 } 847 848 case TargetOpcode::G_FREEZE: 849 return reduceOperationWidth(MI, TypeIdx, NarrowTy); 850 851 case TargetOpcode::G_ADD: { 852 // FIXME: add support for when SizeOp0 isn't an exact multiple of 853 // NarrowSize. 854 if (SizeOp0 % NarrowSize != 0) 855 return UnableToLegalize; 856 // Expand in terms of carry-setting/consuming G_ADDE instructions. 857 int NumParts = SizeOp0 / NarrowTy.getSizeInBits(); 858 859 SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs; 860 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); 861 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); 862 863 Register CarryIn; 864 for (int i = 0; i < NumParts; ++i) { 865 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy); 866 Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); 867 868 if (i == 0) 869 MIRBuilder.buildUAddo(DstReg, CarryOut, Src1Regs[i], Src2Regs[i]); 870 else { 871 MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i], 872 Src2Regs[i], CarryIn); 873 } 874 875 DstRegs.push_back(DstReg); 876 CarryIn = CarryOut; 877 } 878 Register DstReg = MI.getOperand(0).getReg(); 879 if(MRI.getType(DstReg).isVector()) 880 MIRBuilder.buildBuildVector(DstReg, DstRegs); 881 else 882 MIRBuilder.buildMerge(DstReg, DstRegs); 883 MI.eraseFromParent(); 884 return Legalized; 885 } 886 case TargetOpcode::G_SUB: { 887 // FIXME: add support for when SizeOp0 isn't an exact multiple of 888 // NarrowSize. 889 if (SizeOp0 % NarrowSize != 0) 890 return UnableToLegalize; 891 892 int NumParts = SizeOp0 / NarrowTy.getSizeInBits(); 893 894 SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs; 895 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); 896 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); 897 898 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy); 899 Register BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); 900 MIRBuilder.buildInstr(TargetOpcode::G_USUBO, {DstReg, BorrowOut}, 901 {Src1Regs[0], Src2Regs[0]}); 902 DstRegs.push_back(DstReg); 903 Register BorrowIn = BorrowOut; 904 for (int i = 1; i < NumParts; ++i) { 905 DstReg = MRI.createGenericVirtualRegister(NarrowTy); 906 BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); 907 908 MIRBuilder.buildInstr(TargetOpcode::G_USUBE, {DstReg, BorrowOut}, 909 {Src1Regs[i], Src2Regs[i], BorrowIn}); 910 911 DstRegs.push_back(DstReg); 912 BorrowIn = BorrowOut; 913 } 914 MIRBuilder.buildMerge(MI.getOperand(0), DstRegs); 915 MI.eraseFromParent(); 916 return Legalized; 917 } 918 case TargetOpcode::G_MUL: 919 case TargetOpcode::G_UMULH: 920 return narrowScalarMul(MI, NarrowTy); 921 case TargetOpcode::G_EXTRACT: 922 return narrowScalarExtract(MI, TypeIdx, NarrowTy); 923 case TargetOpcode::G_INSERT: 924 return narrowScalarInsert(MI, TypeIdx, NarrowTy); 925 case TargetOpcode::G_LOAD: { 926 auto &MMO = **MI.memoperands_begin(); 927 Register DstReg = MI.getOperand(0).getReg(); 928 LLT DstTy = MRI.getType(DstReg); 929 if (DstTy.isVector()) 930 return UnableToLegalize; 931 932 if (8 * MMO.getSize() != DstTy.getSizeInBits()) { 933 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy); 934 MIRBuilder.buildLoad(TmpReg, MI.getOperand(1), MMO); 935 MIRBuilder.buildAnyExt(DstReg, TmpReg); 936 MI.eraseFromParent(); 937 return Legalized; 938 } 939 940 return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy); 941 } 942 case TargetOpcode::G_ZEXTLOAD: 943 case TargetOpcode::G_SEXTLOAD: { 944 bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD; 945 Register DstReg = MI.getOperand(0).getReg(); 946 Register PtrReg = MI.getOperand(1).getReg(); 947 948 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy); 949 auto &MMO = **MI.memoperands_begin(); 950 if (MMO.getSizeInBits() == NarrowSize) { 951 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); 952 } else { 953 MIRBuilder.buildLoadInstr(MI.getOpcode(), TmpReg, PtrReg, MMO); 954 } 955 956 if (ZExt) 957 MIRBuilder.buildZExt(DstReg, TmpReg); 958 else 959 MIRBuilder.buildSExt(DstReg, TmpReg); 960 961 MI.eraseFromParent(); 962 return Legalized; 963 } 964 case TargetOpcode::G_STORE: { 965 const auto &MMO = **MI.memoperands_begin(); 966 967 Register SrcReg = MI.getOperand(0).getReg(); 968 LLT SrcTy = MRI.getType(SrcReg); 969 if (SrcTy.isVector()) 970 return UnableToLegalize; 971 972 int NumParts = SizeOp0 / NarrowSize; 973 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits(); 974 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize; 975 if (SrcTy.isVector() && LeftoverBits != 0) 976 return UnableToLegalize; 977 978 if (8 * MMO.getSize() != SrcTy.getSizeInBits()) { 979 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy); 980 auto &MMO = **MI.memoperands_begin(); 981 MIRBuilder.buildTrunc(TmpReg, SrcReg); 982 MIRBuilder.buildStore(TmpReg, MI.getOperand(1), MMO); 983 MI.eraseFromParent(); 984 return Legalized; 985 } 986 987 return reduceLoadStoreWidth(MI, 0, NarrowTy); 988 } 989 case TargetOpcode::G_SELECT: 990 return narrowScalarSelect(MI, TypeIdx, NarrowTy); 991 case TargetOpcode::G_AND: 992 case TargetOpcode::G_OR: 993 case TargetOpcode::G_XOR: { 994 // Legalize bitwise operation: 995 // A = BinOp<Ty> B, C 996 // into: 997 // B1, ..., BN = G_UNMERGE_VALUES B 998 // C1, ..., CN = G_UNMERGE_VALUES C 999 // A1 = BinOp<Ty/N> B1, C2 1000 // ... 1001 // AN = BinOp<Ty/N> BN, CN 1002 // A = G_MERGE_VALUES A1, ..., AN 1003 return narrowScalarBasic(MI, TypeIdx, NarrowTy); 1004 } 1005 case TargetOpcode::G_SHL: 1006 case TargetOpcode::G_LSHR: 1007 case TargetOpcode::G_ASHR: 1008 return narrowScalarShift(MI, TypeIdx, NarrowTy); 1009 case TargetOpcode::G_CTLZ: 1010 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 1011 case TargetOpcode::G_CTTZ: 1012 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 1013 case TargetOpcode::G_CTPOP: 1014 if (TypeIdx == 1) 1015 switch (MI.getOpcode()) { 1016 case TargetOpcode::G_CTLZ: 1017 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 1018 return narrowScalarCTLZ(MI, TypeIdx, NarrowTy); 1019 case TargetOpcode::G_CTTZ: 1020 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 1021 return narrowScalarCTTZ(MI, TypeIdx, NarrowTy); 1022 case TargetOpcode::G_CTPOP: 1023 return narrowScalarCTPOP(MI, TypeIdx, NarrowTy); 1024 default: 1025 return UnableToLegalize; 1026 } 1027 1028 Observer.changingInstr(MI); 1029 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT); 1030 Observer.changedInstr(MI); 1031 return Legalized; 1032 case TargetOpcode::G_INTTOPTR: 1033 if (TypeIdx != 1) 1034 return UnableToLegalize; 1035 1036 Observer.changingInstr(MI); 1037 narrowScalarSrc(MI, NarrowTy, 1); 1038 Observer.changedInstr(MI); 1039 return Legalized; 1040 case TargetOpcode::G_PTRTOINT: 1041 if (TypeIdx != 0) 1042 return UnableToLegalize; 1043 1044 Observer.changingInstr(MI); 1045 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT); 1046 Observer.changedInstr(MI); 1047 return Legalized; 1048 case TargetOpcode::G_PHI: { 1049 unsigned NumParts = SizeOp0 / NarrowSize; 1050 SmallVector<Register, 2> DstRegs(NumParts); 1051 SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2); 1052 Observer.changingInstr(MI); 1053 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) { 1054 MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB(); 1055 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); 1056 extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts, 1057 SrcRegs[i / 2]); 1058 } 1059 MachineBasicBlock &MBB = *MI.getParent(); 1060 MIRBuilder.setInsertPt(MBB, MI); 1061 for (unsigned i = 0; i < NumParts; ++i) { 1062 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy); 1063 MachineInstrBuilder MIB = 1064 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]); 1065 for (unsigned j = 1; j < MI.getNumOperands(); j += 2) 1066 MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1)); 1067 } 1068 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI()); 1069 MIRBuilder.buildMerge(MI.getOperand(0), DstRegs); 1070 Observer.changedInstr(MI); 1071 MI.eraseFromParent(); 1072 return Legalized; 1073 } 1074 case TargetOpcode::G_EXTRACT_VECTOR_ELT: 1075 case TargetOpcode::G_INSERT_VECTOR_ELT: { 1076 if (TypeIdx != 2) 1077 return UnableToLegalize; 1078 1079 int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3; 1080 Observer.changingInstr(MI); 1081 narrowScalarSrc(MI, NarrowTy, OpIdx); 1082 Observer.changedInstr(MI); 1083 return Legalized; 1084 } 1085 case TargetOpcode::G_ICMP: { 1086 uint64_t SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); 1087 if (NarrowSize * 2 != SrcSize) 1088 return UnableToLegalize; 1089 1090 Observer.changingInstr(MI); 1091 Register LHSL = MRI.createGenericVirtualRegister(NarrowTy); 1092 Register LHSH = MRI.createGenericVirtualRegister(NarrowTy); 1093 MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2)); 1094 1095 Register RHSL = MRI.createGenericVirtualRegister(NarrowTy); 1096 Register RHSH = MRI.createGenericVirtualRegister(NarrowTy); 1097 MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3)); 1098 1099 CmpInst::Predicate Pred = 1100 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); 1101 LLT ResTy = MRI.getType(MI.getOperand(0).getReg()); 1102 1103 if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) { 1104 MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL); 1105 MachineInstrBuilder XorH = MIRBuilder.buildXor(NarrowTy, LHSH, RHSH); 1106 MachineInstrBuilder Or = MIRBuilder.buildOr(NarrowTy, XorL, XorH); 1107 MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0); 1108 MIRBuilder.buildICmp(Pred, MI.getOperand(0), Or, Zero); 1109 } else { 1110 MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH); 1111 MachineInstrBuilder CmpHEQ = 1112 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH); 1113 MachineInstrBuilder CmpLU = MIRBuilder.buildICmp( 1114 ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL); 1115 MIRBuilder.buildSelect(MI.getOperand(0), CmpHEQ, CmpLU, CmpH); 1116 } 1117 Observer.changedInstr(MI); 1118 MI.eraseFromParent(); 1119 return Legalized; 1120 } 1121 case TargetOpcode::G_SEXT_INREG: { 1122 if (TypeIdx != 0) 1123 return UnableToLegalize; 1124 1125 int64_t SizeInBits = MI.getOperand(2).getImm(); 1126 1127 // So long as the new type has more bits than the bits we're extending we 1128 // don't need to break it apart. 1129 if (NarrowTy.getScalarSizeInBits() >= SizeInBits) { 1130 Observer.changingInstr(MI); 1131 // We don't lose any non-extension bits by truncating the src and 1132 // sign-extending the dst. 1133 MachineOperand &MO1 = MI.getOperand(1); 1134 auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1); 1135 MO1.setReg(TruncMIB.getReg(0)); 1136 1137 MachineOperand &MO2 = MI.getOperand(0); 1138 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy); 1139 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 1140 MIRBuilder.buildSExt(MO2, DstExt); 1141 MO2.setReg(DstExt); 1142 Observer.changedInstr(MI); 1143 return Legalized; 1144 } 1145 1146 // Break it apart. Components below the extension point are unmodified. The 1147 // component containing the extension point becomes a narrower SEXT_INREG. 1148 // Components above it are ashr'd from the component containing the 1149 // extension point. 1150 if (SizeOp0 % NarrowSize != 0) 1151 return UnableToLegalize; 1152 int NumParts = SizeOp0 / NarrowSize; 1153 1154 // List the registers where the destination will be scattered. 1155 SmallVector<Register, 2> DstRegs; 1156 // List the registers where the source will be split. 1157 SmallVector<Register, 2> SrcRegs; 1158 1159 // Create all the temporary registers. 1160 for (int i = 0; i < NumParts; ++i) { 1161 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy); 1162 1163 SrcRegs.push_back(SrcReg); 1164 } 1165 1166 // Explode the big arguments into smaller chunks. 1167 MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1)); 1168 1169 Register AshrCstReg = 1170 MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1) 1171 .getReg(0); 1172 Register FullExtensionReg = 0; 1173 Register PartialExtensionReg = 0; 1174 1175 // Do the operation on each small part. 1176 for (int i = 0; i < NumParts; ++i) { 1177 if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits) 1178 DstRegs.push_back(SrcRegs[i]); 1179 else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) { 1180 assert(PartialExtensionReg && 1181 "Expected to visit partial extension before full"); 1182 if (FullExtensionReg) { 1183 DstRegs.push_back(FullExtensionReg); 1184 continue; 1185 } 1186 DstRegs.push_back( 1187 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg) 1188 .getReg(0)); 1189 FullExtensionReg = DstRegs.back(); 1190 } else { 1191 DstRegs.push_back( 1192 MIRBuilder 1193 .buildInstr( 1194 TargetOpcode::G_SEXT_INREG, {NarrowTy}, 1195 {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()}) 1196 .getReg(0)); 1197 PartialExtensionReg = DstRegs.back(); 1198 } 1199 } 1200 1201 // Gather the destination registers into the final destination. 1202 Register DstReg = MI.getOperand(0).getReg(); 1203 MIRBuilder.buildMerge(DstReg, DstRegs); 1204 MI.eraseFromParent(); 1205 return Legalized; 1206 } 1207 case TargetOpcode::G_BSWAP: 1208 case TargetOpcode::G_BITREVERSE: { 1209 if (SizeOp0 % NarrowSize != 0) 1210 return UnableToLegalize; 1211 1212 Observer.changingInstr(MI); 1213 SmallVector<Register, 2> SrcRegs, DstRegs; 1214 unsigned NumParts = SizeOp0 / NarrowSize; 1215 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); 1216 1217 for (unsigned i = 0; i < NumParts; ++i) { 1218 auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, 1219 {SrcRegs[NumParts - 1 - i]}); 1220 DstRegs.push_back(DstPart.getReg(0)); 1221 } 1222 1223 MIRBuilder.buildMerge(MI.getOperand(0), DstRegs); 1224 1225 Observer.changedInstr(MI); 1226 MI.eraseFromParent(); 1227 return Legalized; 1228 } 1229 case TargetOpcode::G_PTR_ADD: 1230 case TargetOpcode::G_PTRMASK: { 1231 if (TypeIdx != 1) 1232 return UnableToLegalize; 1233 Observer.changingInstr(MI); 1234 narrowScalarSrc(MI, NarrowTy, 2); 1235 Observer.changedInstr(MI); 1236 return Legalized; 1237 } 1238 case TargetOpcode::G_FPTOUI: { 1239 if (TypeIdx != 0) 1240 return UnableToLegalize; 1241 Observer.changingInstr(MI); 1242 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT); 1243 Observer.changedInstr(MI); 1244 return Legalized; 1245 } 1246 case TargetOpcode::G_FPTOSI: { 1247 if (TypeIdx != 0) 1248 return UnableToLegalize; 1249 Observer.changingInstr(MI); 1250 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_SEXT); 1251 Observer.changedInstr(MI); 1252 return Legalized; 1253 } 1254 case TargetOpcode::G_FPEXT: 1255 if (TypeIdx != 0) 1256 return UnableToLegalize; 1257 Observer.changingInstr(MI); 1258 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT); 1259 Observer.changedInstr(MI); 1260 return Legalized; 1261 } 1262 } 1263 1264 Register LegalizerHelper::coerceToScalar(Register Val) { 1265 LLT Ty = MRI.getType(Val); 1266 if (Ty.isScalar()) 1267 return Val; 1268 1269 const DataLayout &DL = MIRBuilder.getDataLayout(); 1270 LLT NewTy = LLT::scalar(Ty.getSizeInBits()); 1271 if (Ty.isPointer()) { 1272 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace())) 1273 return Register(); 1274 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0); 1275 } 1276 1277 Register NewVal = Val; 1278 1279 assert(Ty.isVector()); 1280 LLT EltTy = Ty.getElementType(); 1281 if (EltTy.isPointer()) 1282 NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0); 1283 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0); 1284 } 1285 1286 void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy, 1287 unsigned OpIdx, unsigned ExtOpcode) { 1288 MachineOperand &MO = MI.getOperand(OpIdx); 1289 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO}); 1290 MO.setReg(ExtB.getReg(0)); 1291 } 1292 1293 void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, 1294 unsigned OpIdx) { 1295 MachineOperand &MO = MI.getOperand(OpIdx); 1296 auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO); 1297 MO.setReg(ExtB.getReg(0)); 1298 } 1299 1300 void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy, 1301 unsigned OpIdx, unsigned TruncOpcode) { 1302 MachineOperand &MO = MI.getOperand(OpIdx); 1303 Register DstExt = MRI.createGenericVirtualRegister(WideTy); 1304 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 1305 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt}); 1306 MO.setReg(DstExt); 1307 } 1308 1309 void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy, 1310 unsigned OpIdx, unsigned ExtOpcode) { 1311 MachineOperand &MO = MI.getOperand(OpIdx); 1312 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy); 1313 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 1314 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc}); 1315 MO.setReg(DstTrunc); 1316 } 1317 1318 void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy, 1319 unsigned OpIdx) { 1320 MachineOperand &MO = MI.getOperand(OpIdx); 1321 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 1322 MO.setReg(widenWithUnmerge(WideTy, MO.getReg())); 1323 } 1324 1325 void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, 1326 unsigned OpIdx) { 1327 MachineOperand &MO = MI.getOperand(OpIdx); 1328 1329 LLT OldTy = MRI.getType(MO.getReg()); 1330 unsigned OldElts = OldTy.getNumElements(); 1331 unsigned NewElts = MoreTy.getNumElements(); 1332 1333 unsigned NumParts = NewElts / OldElts; 1334 1335 // Use concat_vectors if the result is a multiple of the number of elements. 1336 if (NumParts * OldElts == NewElts) { 1337 SmallVector<Register, 8> Parts; 1338 Parts.push_back(MO.getReg()); 1339 1340 Register ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0); 1341 for (unsigned I = 1; I != NumParts; ++I) 1342 Parts.push_back(ImpDef); 1343 1344 auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts); 1345 MO.setReg(Concat.getReg(0)); 1346 return; 1347 } 1348 1349 Register MoreReg = MRI.createGenericVirtualRegister(MoreTy); 1350 Register ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0); 1351 MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0); 1352 MO.setReg(MoreReg); 1353 } 1354 1355 void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) { 1356 MachineOperand &Op = MI.getOperand(OpIdx); 1357 Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0)); 1358 } 1359 1360 void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) { 1361 MachineOperand &MO = MI.getOperand(OpIdx); 1362 Register CastDst = MRI.createGenericVirtualRegister(CastTy); 1363 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 1364 MIRBuilder.buildBitcast(MO, CastDst); 1365 MO.setReg(CastDst); 1366 } 1367 1368 LegalizerHelper::LegalizeResult 1369 LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, 1370 LLT WideTy) { 1371 if (TypeIdx != 1) 1372 return UnableToLegalize; 1373 1374 Register DstReg = MI.getOperand(0).getReg(); 1375 LLT DstTy = MRI.getType(DstReg); 1376 if (DstTy.isVector()) 1377 return UnableToLegalize; 1378 1379 Register Src1 = MI.getOperand(1).getReg(); 1380 LLT SrcTy = MRI.getType(Src1); 1381 const int DstSize = DstTy.getSizeInBits(); 1382 const int SrcSize = SrcTy.getSizeInBits(); 1383 const int WideSize = WideTy.getSizeInBits(); 1384 const int NumMerge = (DstSize + WideSize - 1) / WideSize; 1385 1386 unsigned NumOps = MI.getNumOperands(); 1387 unsigned NumSrc = MI.getNumOperands() - 1; 1388 unsigned PartSize = DstTy.getSizeInBits() / NumSrc; 1389 1390 if (WideSize >= DstSize) { 1391 // Directly pack the bits in the target type. 1392 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0); 1393 1394 for (unsigned I = 2; I != NumOps; ++I) { 1395 const unsigned Offset = (I - 1) * PartSize; 1396 1397 Register SrcReg = MI.getOperand(I).getReg(); 1398 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize)); 1399 1400 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg); 1401 1402 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg : 1403 MRI.createGenericVirtualRegister(WideTy); 1404 1405 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset); 1406 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt); 1407 MIRBuilder.buildOr(NextResult, ResultReg, Shl); 1408 ResultReg = NextResult; 1409 } 1410 1411 if (WideSize > DstSize) 1412 MIRBuilder.buildTrunc(DstReg, ResultReg); 1413 else if (DstTy.isPointer()) 1414 MIRBuilder.buildIntToPtr(DstReg, ResultReg); 1415 1416 MI.eraseFromParent(); 1417 return Legalized; 1418 } 1419 1420 // Unmerge the original values to the GCD type, and recombine to the next 1421 // multiple greater than the original type. 1422 // 1423 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6 1424 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0 1425 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1 1426 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2 1427 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6 1428 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9 1429 // %12:_(s12) = G_MERGE_VALUES %10, %11 1430 // 1431 // Padding with undef if necessary: 1432 // 1433 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6 1434 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0 1435 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1 1436 // %7:_(s2) = G_IMPLICIT_DEF 1437 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5 1438 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7 1439 // %10:_(s12) = G_MERGE_VALUES %8, %9 1440 1441 const int GCD = greatestCommonDivisor(SrcSize, WideSize); 1442 LLT GCDTy = LLT::scalar(GCD); 1443 1444 SmallVector<Register, 8> Parts; 1445 SmallVector<Register, 8> NewMergeRegs; 1446 SmallVector<Register, 8> Unmerges; 1447 LLT WideDstTy = LLT::scalar(NumMerge * WideSize); 1448 1449 // Decompose the original operands if they don't evenly divide. 1450 for (int I = 1, E = MI.getNumOperands(); I != E; ++I) { 1451 Register SrcReg = MI.getOperand(I).getReg(); 1452 if (GCD == SrcSize) { 1453 Unmerges.push_back(SrcReg); 1454 } else { 1455 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg); 1456 for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J) 1457 Unmerges.push_back(Unmerge.getReg(J)); 1458 } 1459 } 1460 1461 // Pad with undef to the next size that is a multiple of the requested size. 1462 if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) { 1463 Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0); 1464 for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I) 1465 Unmerges.push_back(UndefReg); 1466 } 1467 1468 const int PartsPerGCD = WideSize / GCD; 1469 1470 // Build merges of each piece. 1471 ArrayRef<Register> Slicer(Unmerges); 1472 for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) { 1473 auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD)); 1474 NewMergeRegs.push_back(Merge.getReg(0)); 1475 } 1476 1477 // A truncate may be necessary if the requested type doesn't evenly divide the 1478 // original result type. 1479 if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) { 1480 MIRBuilder.buildMerge(DstReg, NewMergeRegs); 1481 } else { 1482 auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs); 1483 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0)); 1484 } 1485 1486 MI.eraseFromParent(); 1487 return Legalized; 1488 } 1489 1490 Register LegalizerHelper::widenWithUnmerge(LLT WideTy, Register OrigReg) { 1491 Register WideReg = MRI.createGenericVirtualRegister(WideTy); 1492 LLT OrigTy = MRI.getType(OrigReg); 1493 LLT LCMTy = getLCMType(WideTy, OrigTy); 1494 1495 const int NumMergeParts = LCMTy.getSizeInBits() / WideTy.getSizeInBits(); 1496 const int NumUnmergeParts = LCMTy.getSizeInBits() / OrigTy.getSizeInBits(); 1497 1498 Register UnmergeSrc = WideReg; 1499 1500 // Create a merge to the LCM type, padding with undef 1501 // %0:_(<3 x s32>) = G_FOO => <4 x s32> 1502 // => 1503 // %1:_(<4 x s32>) = G_FOO 1504 // %2:_(<4 x s32>) = G_IMPLICIT_DEF 1505 // %3:_(<12 x s32>) = G_CONCAT_VECTORS %1, %2, %2 1506 // %0:_(<3 x s32>), %4:_, %5:_, %6:_ = G_UNMERGE_VALUES %3 1507 if (NumMergeParts > 1) { 1508 Register Undef = MIRBuilder.buildUndef(WideTy).getReg(0); 1509 SmallVector<Register, 8> MergeParts(NumMergeParts, Undef); 1510 MergeParts[0] = WideReg; 1511 UnmergeSrc = MIRBuilder.buildMerge(LCMTy, MergeParts).getReg(0); 1512 } 1513 1514 // Unmerge to the original register and pad with dead defs. 1515 SmallVector<Register, 8> UnmergeResults(NumUnmergeParts); 1516 UnmergeResults[0] = OrigReg; 1517 for (int I = 1; I != NumUnmergeParts; ++I) 1518 UnmergeResults[I] = MRI.createGenericVirtualRegister(OrigTy); 1519 1520 MIRBuilder.buildUnmerge(UnmergeResults, UnmergeSrc); 1521 return WideReg; 1522 } 1523 1524 LegalizerHelper::LegalizeResult 1525 LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx, 1526 LLT WideTy) { 1527 if (TypeIdx != 0) 1528 return UnableToLegalize; 1529 1530 int NumDst = MI.getNumOperands() - 1; 1531 Register SrcReg = MI.getOperand(NumDst).getReg(); 1532 LLT SrcTy = MRI.getType(SrcReg); 1533 if (SrcTy.isVector()) 1534 return UnableToLegalize; 1535 1536 Register Dst0Reg = MI.getOperand(0).getReg(); 1537 LLT DstTy = MRI.getType(Dst0Reg); 1538 if (!DstTy.isScalar()) 1539 return UnableToLegalize; 1540 1541 if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) { 1542 if (SrcTy.isPointer()) { 1543 const DataLayout &DL = MIRBuilder.getDataLayout(); 1544 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) { 1545 LLVM_DEBUG( 1546 dbgs() << "Not casting non-integral address space integer\n"); 1547 return UnableToLegalize; 1548 } 1549 1550 SrcTy = LLT::scalar(SrcTy.getSizeInBits()); 1551 SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0); 1552 } 1553 1554 // Widen SrcTy to WideTy. This does not affect the result, but since the 1555 // user requested this size, it is probably better handled than SrcTy and 1556 // should reduce the total number of legalization artifacts 1557 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) { 1558 SrcTy = WideTy; 1559 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0); 1560 } 1561 1562 // Theres no unmerge type to target. Directly extract the bits from the 1563 // source type 1564 unsigned DstSize = DstTy.getSizeInBits(); 1565 1566 MIRBuilder.buildTrunc(Dst0Reg, SrcReg); 1567 for (int I = 1; I != NumDst; ++I) { 1568 auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I); 1569 auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt); 1570 MIRBuilder.buildTrunc(MI.getOperand(I), Shr); 1571 } 1572 1573 MI.eraseFromParent(); 1574 return Legalized; 1575 } 1576 1577 // Extend the source to a wider type. 1578 LLT LCMTy = getLCMType(SrcTy, WideTy); 1579 1580 Register WideSrc = SrcReg; 1581 if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) { 1582 // TODO: If this is an integral address space, cast to integer and anyext. 1583 if (SrcTy.isPointer()) { 1584 LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n"); 1585 return UnableToLegalize; 1586 } 1587 1588 WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0); 1589 } 1590 1591 auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc); 1592 1593 // Create a sequence of unmerges to the original results. since we may have 1594 // widened the source, we will need to pad the results with dead defs to cover 1595 // the source register. 1596 // e.g. widen s16 to s32: 1597 // %1:_(s16), %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0:_(s48) 1598 // 1599 // => 1600 // %4:_(s64) = G_ANYEXT %0:_(s48) 1601 // %5:_(s32), %6:_(s32) = G_UNMERGE_VALUES %4 ; Requested unmerge 1602 // %1:_(s16), %2:_(s16) = G_UNMERGE_VALUES %5 ; unpack to original regs 1603 // %3:_(s16), dead %7 = G_UNMERGE_VALUES %6 ; original reg + extra dead def 1604 1605 const int NumUnmerge = Unmerge->getNumOperands() - 1; 1606 const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits(); 1607 1608 for (int I = 0; I != NumUnmerge; ++I) { 1609 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES); 1610 1611 for (int J = 0; J != PartsPerUnmerge; ++J) { 1612 int Idx = I * PartsPerUnmerge + J; 1613 if (Idx < NumDst) 1614 MIB.addDef(MI.getOperand(Idx).getReg()); 1615 else { 1616 // Create dead def for excess components. 1617 MIB.addDef(MRI.createGenericVirtualRegister(DstTy)); 1618 } 1619 } 1620 1621 MIB.addUse(Unmerge.getReg(I)); 1622 } 1623 1624 MI.eraseFromParent(); 1625 return Legalized; 1626 } 1627 1628 LegalizerHelper::LegalizeResult 1629 LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx, 1630 LLT WideTy) { 1631 Register DstReg = MI.getOperand(0).getReg(); 1632 Register SrcReg = MI.getOperand(1).getReg(); 1633 LLT SrcTy = MRI.getType(SrcReg); 1634 1635 LLT DstTy = MRI.getType(DstReg); 1636 unsigned Offset = MI.getOperand(2).getImm(); 1637 1638 if (TypeIdx == 0) { 1639 if (SrcTy.isVector() || DstTy.isVector()) 1640 return UnableToLegalize; 1641 1642 SrcOp Src(SrcReg); 1643 if (SrcTy.isPointer()) { 1644 // Extracts from pointers can be handled only if they are really just 1645 // simple integers. 1646 const DataLayout &DL = MIRBuilder.getDataLayout(); 1647 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) 1648 return UnableToLegalize; 1649 1650 LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits()); 1651 Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src); 1652 SrcTy = SrcAsIntTy; 1653 } 1654 1655 if (DstTy.isPointer()) 1656 return UnableToLegalize; 1657 1658 if (Offset == 0) { 1659 // Avoid a shift in the degenerate case. 1660 MIRBuilder.buildTrunc(DstReg, 1661 MIRBuilder.buildAnyExtOrTrunc(WideTy, Src)); 1662 MI.eraseFromParent(); 1663 return Legalized; 1664 } 1665 1666 // Do a shift in the source type. 1667 LLT ShiftTy = SrcTy; 1668 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) { 1669 Src = MIRBuilder.buildAnyExt(WideTy, Src); 1670 ShiftTy = WideTy; 1671 } 1672 1673 auto LShr = MIRBuilder.buildLShr( 1674 ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset)); 1675 MIRBuilder.buildTrunc(DstReg, LShr); 1676 MI.eraseFromParent(); 1677 return Legalized; 1678 } 1679 1680 if (SrcTy.isScalar()) { 1681 Observer.changingInstr(MI); 1682 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1683 Observer.changedInstr(MI); 1684 return Legalized; 1685 } 1686 1687 if (!SrcTy.isVector()) 1688 return UnableToLegalize; 1689 1690 if (DstTy != SrcTy.getElementType()) 1691 return UnableToLegalize; 1692 1693 if (Offset % SrcTy.getScalarSizeInBits() != 0) 1694 return UnableToLegalize; 1695 1696 Observer.changingInstr(MI); 1697 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1698 1699 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) * 1700 Offset); 1701 widenScalarDst(MI, WideTy.getScalarType(), 0); 1702 Observer.changedInstr(MI); 1703 return Legalized; 1704 } 1705 1706 LegalizerHelper::LegalizeResult 1707 LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx, 1708 LLT WideTy) { 1709 if (TypeIdx != 0 || WideTy.isVector()) 1710 return UnableToLegalize; 1711 Observer.changingInstr(MI); 1712 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1713 widenScalarDst(MI, WideTy); 1714 Observer.changedInstr(MI); 1715 return Legalized; 1716 } 1717 1718 LegalizerHelper::LegalizeResult 1719 LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx, 1720 LLT WideTy) { 1721 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT || 1722 MI.getOpcode() == TargetOpcode::G_SSUBSAT || 1723 MI.getOpcode() == TargetOpcode::G_SSHLSAT; 1724 bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT || 1725 MI.getOpcode() == TargetOpcode::G_USHLSAT; 1726 // We can convert this to: 1727 // 1. Any extend iN to iM 1728 // 2. SHL by M-N 1729 // 3. [US][ADD|SUB|SHL]SAT 1730 // 4. L/ASHR by M-N 1731 // 1732 // It may be more efficient to lower this to a min and a max operation in 1733 // the higher precision arithmetic if the promoted operation isn't legal, 1734 // but this decision is up to the target's lowering request. 1735 Register DstReg = MI.getOperand(0).getReg(); 1736 1737 unsigned NewBits = WideTy.getScalarSizeInBits(); 1738 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits(); 1739 1740 // Shifts must zero-extend the RHS to preserve the unsigned quantity, and 1741 // must not left shift the RHS to preserve the shift amount. 1742 auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1)); 1743 auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2)) 1744 : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2)); 1745 auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount); 1746 auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK); 1747 auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK); 1748 1749 auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, 1750 {ShiftL, ShiftR}, MI.getFlags()); 1751 1752 // Use a shift that will preserve the number of sign bits when the trunc is 1753 // folded away. 1754 auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK) 1755 : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK); 1756 1757 MIRBuilder.buildTrunc(DstReg, Result); 1758 MI.eraseFromParent(); 1759 return Legalized; 1760 } 1761 1762 LegalizerHelper::LegalizeResult 1763 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { 1764 switch (MI.getOpcode()) { 1765 default: 1766 return UnableToLegalize; 1767 case TargetOpcode::G_EXTRACT: 1768 return widenScalarExtract(MI, TypeIdx, WideTy); 1769 case TargetOpcode::G_INSERT: 1770 return widenScalarInsert(MI, TypeIdx, WideTy); 1771 case TargetOpcode::G_MERGE_VALUES: 1772 return widenScalarMergeValues(MI, TypeIdx, WideTy); 1773 case TargetOpcode::G_UNMERGE_VALUES: 1774 return widenScalarUnmergeValues(MI, TypeIdx, WideTy); 1775 case TargetOpcode::G_UADDO: 1776 case TargetOpcode::G_USUBO: { 1777 if (TypeIdx == 1) 1778 return UnableToLegalize; // TODO 1779 auto LHSZext = MIRBuilder.buildZExt(WideTy, MI.getOperand(2)); 1780 auto RHSZext = MIRBuilder.buildZExt(WideTy, MI.getOperand(3)); 1781 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO 1782 ? TargetOpcode::G_ADD 1783 : TargetOpcode::G_SUB; 1784 // Do the arithmetic in the larger type. 1785 auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext}); 1786 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg()); 1787 APInt Mask = 1788 APInt::getLowBitsSet(WideTy.getSizeInBits(), OrigTy.getSizeInBits()); 1789 auto AndOp = MIRBuilder.buildAnd( 1790 WideTy, NewOp, MIRBuilder.buildConstant(WideTy, Mask)); 1791 // There is no overflow if the AndOp is the same as NewOp. 1792 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, AndOp); 1793 // Now trunc the NewOp to the original result. 1794 MIRBuilder.buildTrunc(MI.getOperand(0), NewOp); 1795 MI.eraseFromParent(); 1796 return Legalized; 1797 } 1798 case TargetOpcode::G_SADDSAT: 1799 case TargetOpcode::G_SSUBSAT: 1800 case TargetOpcode::G_SSHLSAT: 1801 case TargetOpcode::G_UADDSAT: 1802 case TargetOpcode::G_USUBSAT: 1803 case TargetOpcode::G_USHLSAT: 1804 return widenScalarAddSubShlSat(MI, TypeIdx, WideTy); 1805 case TargetOpcode::G_CTTZ: 1806 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 1807 case TargetOpcode::G_CTLZ: 1808 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 1809 case TargetOpcode::G_CTPOP: { 1810 if (TypeIdx == 0) { 1811 Observer.changingInstr(MI); 1812 widenScalarDst(MI, WideTy, 0); 1813 Observer.changedInstr(MI); 1814 return Legalized; 1815 } 1816 1817 Register SrcReg = MI.getOperand(1).getReg(); 1818 1819 // First ZEXT the input. 1820 auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg); 1821 LLT CurTy = MRI.getType(SrcReg); 1822 if (MI.getOpcode() == TargetOpcode::G_CTTZ) { 1823 // The count is the same in the larger type except if the original 1824 // value was zero. This can be handled by setting the bit just off 1825 // the top of the original type. 1826 auto TopBit = 1827 APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits()); 1828 MIBSrc = MIRBuilder.buildOr( 1829 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit)); 1830 } 1831 1832 // Perform the operation at the larger size. 1833 auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc}); 1834 // This is already the correct result for CTPOP and CTTZs 1835 if (MI.getOpcode() == TargetOpcode::G_CTLZ || 1836 MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) { 1837 // The correct result is NewOp - (Difference in widety and current ty). 1838 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits(); 1839 MIBNewOp = MIRBuilder.buildSub( 1840 WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)); 1841 } 1842 1843 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp); 1844 MI.eraseFromParent(); 1845 return Legalized; 1846 } 1847 case TargetOpcode::G_BSWAP: { 1848 Observer.changingInstr(MI); 1849 Register DstReg = MI.getOperand(0).getReg(); 1850 1851 Register ShrReg = MRI.createGenericVirtualRegister(WideTy); 1852 Register DstExt = MRI.createGenericVirtualRegister(WideTy); 1853 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy); 1854 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1855 1856 MI.getOperand(0).setReg(DstExt); 1857 1858 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 1859 1860 LLT Ty = MRI.getType(DstReg); 1861 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits(); 1862 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits); 1863 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg); 1864 1865 MIRBuilder.buildTrunc(DstReg, ShrReg); 1866 Observer.changedInstr(MI); 1867 return Legalized; 1868 } 1869 case TargetOpcode::G_BITREVERSE: { 1870 Observer.changingInstr(MI); 1871 1872 Register DstReg = MI.getOperand(0).getReg(); 1873 LLT Ty = MRI.getType(DstReg); 1874 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits(); 1875 1876 Register DstExt = MRI.createGenericVirtualRegister(WideTy); 1877 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1878 MI.getOperand(0).setReg(DstExt); 1879 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 1880 1881 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits); 1882 auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt); 1883 MIRBuilder.buildTrunc(DstReg, Shift); 1884 Observer.changedInstr(MI); 1885 return Legalized; 1886 } 1887 case TargetOpcode::G_FREEZE: 1888 Observer.changingInstr(MI); 1889 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1890 widenScalarDst(MI, WideTy); 1891 Observer.changedInstr(MI); 1892 return Legalized; 1893 1894 case TargetOpcode::G_ADD: 1895 case TargetOpcode::G_AND: 1896 case TargetOpcode::G_MUL: 1897 case TargetOpcode::G_OR: 1898 case TargetOpcode::G_XOR: 1899 case TargetOpcode::G_SUB: 1900 // Perform operation at larger width (any extension is fines here, high bits 1901 // don't affect the result) and then truncate the result back to the 1902 // original type. 1903 Observer.changingInstr(MI); 1904 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1905 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); 1906 widenScalarDst(MI, WideTy); 1907 Observer.changedInstr(MI); 1908 return Legalized; 1909 1910 case TargetOpcode::G_SHL: 1911 Observer.changingInstr(MI); 1912 1913 if (TypeIdx == 0) { 1914 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1915 widenScalarDst(MI, WideTy); 1916 } else { 1917 assert(TypeIdx == 1); 1918 // The "number of bits to shift" operand must preserve its value as an 1919 // unsigned integer: 1920 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 1921 } 1922 1923 Observer.changedInstr(MI); 1924 return Legalized; 1925 1926 case TargetOpcode::G_SDIV: 1927 case TargetOpcode::G_SREM: 1928 case TargetOpcode::G_SMIN: 1929 case TargetOpcode::G_SMAX: 1930 Observer.changingInstr(MI); 1931 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); 1932 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 1933 widenScalarDst(MI, WideTy); 1934 Observer.changedInstr(MI); 1935 return Legalized; 1936 1937 case TargetOpcode::G_ASHR: 1938 case TargetOpcode::G_LSHR: 1939 Observer.changingInstr(MI); 1940 1941 if (TypeIdx == 0) { 1942 unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ? 1943 TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT; 1944 1945 widenScalarSrc(MI, WideTy, 1, CvtOp); 1946 widenScalarDst(MI, WideTy); 1947 } else { 1948 assert(TypeIdx == 1); 1949 // The "number of bits to shift" operand must preserve its value as an 1950 // unsigned integer: 1951 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 1952 } 1953 1954 Observer.changedInstr(MI); 1955 return Legalized; 1956 case TargetOpcode::G_UDIV: 1957 case TargetOpcode::G_UREM: 1958 case TargetOpcode::G_UMIN: 1959 case TargetOpcode::G_UMAX: 1960 Observer.changingInstr(MI); 1961 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 1962 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 1963 widenScalarDst(MI, WideTy); 1964 Observer.changedInstr(MI); 1965 return Legalized; 1966 1967 case TargetOpcode::G_SELECT: 1968 Observer.changingInstr(MI); 1969 if (TypeIdx == 0) { 1970 // Perform operation at larger width (any extension is fine here, high 1971 // bits don't affect the result) and then truncate the result back to the 1972 // original type. 1973 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); 1974 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT); 1975 widenScalarDst(MI, WideTy); 1976 } else { 1977 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector(); 1978 // Explicit extension is required here since high bits affect the result. 1979 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false)); 1980 } 1981 Observer.changedInstr(MI); 1982 return Legalized; 1983 1984 case TargetOpcode::G_FPTOSI: 1985 case TargetOpcode::G_FPTOUI: 1986 Observer.changingInstr(MI); 1987 1988 if (TypeIdx == 0) 1989 widenScalarDst(MI, WideTy); 1990 else 1991 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT); 1992 1993 Observer.changedInstr(MI); 1994 return Legalized; 1995 case TargetOpcode::G_SITOFP: 1996 Observer.changingInstr(MI); 1997 1998 if (TypeIdx == 0) 1999 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); 2000 else 2001 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); 2002 2003 Observer.changedInstr(MI); 2004 return Legalized; 2005 case TargetOpcode::G_UITOFP: 2006 Observer.changingInstr(MI); 2007 2008 if (TypeIdx == 0) 2009 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); 2010 else 2011 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 2012 2013 Observer.changedInstr(MI); 2014 return Legalized; 2015 case TargetOpcode::G_LOAD: 2016 case TargetOpcode::G_SEXTLOAD: 2017 case TargetOpcode::G_ZEXTLOAD: 2018 Observer.changingInstr(MI); 2019 widenScalarDst(MI, WideTy); 2020 Observer.changedInstr(MI); 2021 return Legalized; 2022 2023 case TargetOpcode::G_STORE: { 2024 if (TypeIdx != 0) 2025 return UnableToLegalize; 2026 2027 LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 2028 if (!isPowerOf2_32(Ty.getSizeInBits())) 2029 return UnableToLegalize; 2030 2031 Observer.changingInstr(MI); 2032 2033 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ? 2034 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT; 2035 widenScalarSrc(MI, WideTy, 0, ExtType); 2036 2037 Observer.changedInstr(MI); 2038 return Legalized; 2039 } 2040 case TargetOpcode::G_CONSTANT: { 2041 MachineOperand &SrcMO = MI.getOperand(1); 2042 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 2043 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant( 2044 MRI.getType(MI.getOperand(0).getReg())); 2045 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT || 2046 ExtOpc == TargetOpcode::G_ANYEXT) && 2047 "Illegal Extend"); 2048 const APInt &SrcVal = SrcMO.getCImm()->getValue(); 2049 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT) 2050 ? SrcVal.sext(WideTy.getSizeInBits()) 2051 : SrcVal.zext(WideTy.getSizeInBits()); 2052 Observer.changingInstr(MI); 2053 SrcMO.setCImm(ConstantInt::get(Ctx, Val)); 2054 2055 widenScalarDst(MI, WideTy); 2056 Observer.changedInstr(MI); 2057 return Legalized; 2058 } 2059 case TargetOpcode::G_FCONSTANT: { 2060 MachineOperand &SrcMO = MI.getOperand(1); 2061 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 2062 APFloat Val = SrcMO.getFPImm()->getValueAPF(); 2063 bool LosesInfo; 2064 switch (WideTy.getSizeInBits()) { 2065 case 32: 2066 Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, 2067 &LosesInfo); 2068 break; 2069 case 64: 2070 Val.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, 2071 &LosesInfo); 2072 break; 2073 default: 2074 return UnableToLegalize; 2075 } 2076 2077 assert(!LosesInfo && "extend should always be lossless"); 2078 2079 Observer.changingInstr(MI); 2080 SrcMO.setFPImm(ConstantFP::get(Ctx, Val)); 2081 2082 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); 2083 Observer.changedInstr(MI); 2084 return Legalized; 2085 } 2086 case TargetOpcode::G_IMPLICIT_DEF: { 2087 Observer.changingInstr(MI); 2088 widenScalarDst(MI, WideTy); 2089 Observer.changedInstr(MI); 2090 return Legalized; 2091 } 2092 case TargetOpcode::G_BRCOND: 2093 Observer.changingInstr(MI); 2094 widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false)); 2095 Observer.changedInstr(MI); 2096 return Legalized; 2097 2098 case TargetOpcode::G_FCMP: 2099 Observer.changingInstr(MI); 2100 if (TypeIdx == 0) 2101 widenScalarDst(MI, WideTy); 2102 else { 2103 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT); 2104 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT); 2105 } 2106 Observer.changedInstr(MI); 2107 return Legalized; 2108 2109 case TargetOpcode::G_ICMP: 2110 Observer.changingInstr(MI); 2111 if (TypeIdx == 0) 2112 widenScalarDst(MI, WideTy); 2113 else { 2114 unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>( 2115 MI.getOperand(1).getPredicate())) 2116 ? TargetOpcode::G_SEXT 2117 : TargetOpcode::G_ZEXT; 2118 widenScalarSrc(MI, WideTy, 2, ExtOpcode); 2119 widenScalarSrc(MI, WideTy, 3, ExtOpcode); 2120 } 2121 Observer.changedInstr(MI); 2122 return Legalized; 2123 2124 case TargetOpcode::G_PTR_ADD: 2125 assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD"); 2126 Observer.changingInstr(MI); 2127 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 2128 Observer.changedInstr(MI); 2129 return Legalized; 2130 2131 case TargetOpcode::G_PHI: { 2132 assert(TypeIdx == 0 && "Expecting only Idx 0"); 2133 2134 Observer.changingInstr(MI); 2135 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) { 2136 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB(); 2137 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); 2138 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT); 2139 } 2140 2141 MachineBasicBlock &MBB = *MI.getParent(); 2142 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI()); 2143 widenScalarDst(MI, WideTy); 2144 Observer.changedInstr(MI); 2145 return Legalized; 2146 } 2147 case TargetOpcode::G_EXTRACT_VECTOR_ELT: { 2148 if (TypeIdx == 0) { 2149 Register VecReg = MI.getOperand(1).getReg(); 2150 LLT VecTy = MRI.getType(VecReg); 2151 Observer.changingInstr(MI); 2152 2153 widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(), 2154 WideTy.getSizeInBits()), 2155 1, TargetOpcode::G_SEXT); 2156 2157 widenScalarDst(MI, WideTy, 0); 2158 Observer.changedInstr(MI); 2159 return Legalized; 2160 } 2161 2162 if (TypeIdx != 2) 2163 return UnableToLegalize; 2164 Observer.changingInstr(MI); 2165 // TODO: Probably should be zext 2166 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 2167 Observer.changedInstr(MI); 2168 return Legalized; 2169 } 2170 case TargetOpcode::G_INSERT_VECTOR_ELT: { 2171 if (TypeIdx == 1) { 2172 Observer.changingInstr(MI); 2173 2174 Register VecReg = MI.getOperand(1).getReg(); 2175 LLT VecTy = MRI.getType(VecReg); 2176 LLT WideVecTy = LLT::vector(VecTy.getNumElements(), WideTy); 2177 2178 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT); 2179 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); 2180 widenScalarDst(MI, WideVecTy, 0); 2181 Observer.changedInstr(MI); 2182 return Legalized; 2183 } 2184 2185 if (TypeIdx == 2) { 2186 Observer.changingInstr(MI); 2187 // TODO: Probably should be zext 2188 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT); 2189 Observer.changedInstr(MI); 2190 return Legalized; 2191 } 2192 2193 return UnableToLegalize; 2194 } 2195 case TargetOpcode::G_FADD: 2196 case TargetOpcode::G_FMUL: 2197 case TargetOpcode::G_FSUB: 2198 case TargetOpcode::G_FMA: 2199 case TargetOpcode::G_FMAD: 2200 case TargetOpcode::G_FNEG: 2201 case TargetOpcode::G_FABS: 2202 case TargetOpcode::G_FCANONICALIZE: 2203 case TargetOpcode::G_FMINNUM: 2204 case TargetOpcode::G_FMAXNUM: 2205 case TargetOpcode::G_FMINNUM_IEEE: 2206 case TargetOpcode::G_FMAXNUM_IEEE: 2207 case TargetOpcode::G_FMINIMUM: 2208 case TargetOpcode::G_FMAXIMUM: 2209 case TargetOpcode::G_FDIV: 2210 case TargetOpcode::G_FREM: 2211 case TargetOpcode::G_FCEIL: 2212 case TargetOpcode::G_FFLOOR: 2213 case TargetOpcode::G_FCOS: 2214 case TargetOpcode::G_FSIN: 2215 case TargetOpcode::G_FLOG10: 2216 case TargetOpcode::G_FLOG: 2217 case TargetOpcode::G_FLOG2: 2218 case TargetOpcode::G_FRINT: 2219 case TargetOpcode::G_FNEARBYINT: 2220 case TargetOpcode::G_FSQRT: 2221 case TargetOpcode::G_FEXP: 2222 case TargetOpcode::G_FEXP2: 2223 case TargetOpcode::G_FPOW: 2224 case TargetOpcode::G_INTRINSIC_TRUNC: 2225 case TargetOpcode::G_INTRINSIC_ROUND: 2226 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: 2227 assert(TypeIdx == 0); 2228 Observer.changingInstr(MI); 2229 2230 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) 2231 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT); 2232 2233 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); 2234 Observer.changedInstr(MI); 2235 return Legalized; 2236 case TargetOpcode::G_FPOWI: { 2237 if (TypeIdx != 0) 2238 return UnableToLegalize; 2239 Observer.changingInstr(MI); 2240 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT); 2241 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); 2242 Observer.changedInstr(MI); 2243 return Legalized; 2244 } 2245 case TargetOpcode::G_INTTOPTR: 2246 if (TypeIdx != 1) 2247 return UnableToLegalize; 2248 2249 Observer.changingInstr(MI); 2250 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 2251 Observer.changedInstr(MI); 2252 return Legalized; 2253 case TargetOpcode::G_PTRTOINT: 2254 if (TypeIdx != 0) 2255 return UnableToLegalize; 2256 2257 Observer.changingInstr(MI); 2258 widenScalarDst(MI, WideTy, 0); 2259 Observer.changedInstr(MI); 2260 return Legalized; 2261 case TargetOpcode::G_BUILD_VECTOR: { 2262 Observer.changingInstr(MI); 2263 2264 const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType(); 2265 for (int I = 1, E = MI.getNumOperands(); I != E; ++I) 2266 widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT); 2267 2268 // Avoid changing the result vector type if the source element type was 2269 // requested. 2270 if (TypeIdx == 1) { 2271 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC)); 2272 } else { 2273 widenScalarDst(MI, WideTy, 0); 2274 } 2275 2276 Observer.changedInstr(MI); 2277 return Legalized; 2278 } 2279 case TargetOpcode::G_SEXT_INREG: 2280 if (TypeIdx != 0) 2281 return UnableToLegalize; 2282 2283 Observer.changingInstr(MI); 2284 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 2285 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC); 2286 Observer.changedInstr(MI); 2287 return Legalized; 2288 case TargetOpcode::G_PTRMASK: { 2289 if (TypeIdx != 1) 2290 return UnableToLegalize; 2291 Observer.changingInstr(MI); 2292 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 2293 Observer.changedInstr(MI); 2294 return Legalized; 2295 } 2296 } 2297 } 2298 2299 static void getUnmergePieces(SmallVectorImpl<Register> &Pieces, 2300 MachineIRBuilder &B, Register Src, LLT Ty) { 2301 auto Unmerge = B.buildUnmerge(Ty, Src); 2302 for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I) 2303 Pieces.push_back(Unmerge.getReg(I)); 2304 } 2305 2306 LegalizerHelper::LegalizeResult 2307 LegalizerHelper::lowerBitcast(MachineInstr &MI) { 2308 Register Dst = MI.getOperand(0).getReg(); 2309 Register Src = MI.getOperand(1).getReg(); 2310 LLT DstTy = MRI.getType(Dst); 2311 LLT SrcTy = MRI.getType(Src); 2312 2313 if (SrcTy.isVector()) { 2314 LLT SrcEltTy = SrcTy.getElementType(); 2315 SmallVector<Register, 8> SrcRegs; 2316 2317 if (DstTy.isVector()) { 2318 int NumDstElt = DstTy.getNumElements(); 2319 int NumSrcElt = SrcTy.getNumElements(); 2320 2321 LLT DstEltTy = DstTy.getElementType(); 2322 LLT DstCastTy = DstEltTy; // Intermediate bitcast result type 2323 LLT SrcPartTy = SrcEltTy; // Original unmerge result type. 2324 2325 // If there's an element size mismatch, insert intermediate casts to match 2326 // the result element type. 2327 if (NumSrcElt < NumDstElt) { // Source element type is larger. 2328 // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>) 2329 // 2330 // => 2331 // 2332 // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0 2333 // %3:_(<2 x s8>) = G_BITCAST %2 2334 // %4:_(<2 x s8>) = G_BITCAST %3 2335 // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4 2336 DstCastTy = LLT::vector(NumDstElt / NumSrcElt, DstEltTy); 2337 SrcPartTy = SrcEltTy; 2338 } else if (NumSrcElt > NumDstElt) { // Source element type is smaller. 2339 // 2340 // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>) 2341 // 2342 // => 2343 // 2344 // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0 2345 // %3:_(s16) = G_BITCAST %2 2346 // %4:_(s16) = G_BITCAST %3 2347 // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4 2348 SrcPartTy = LLT::vector(NumSrcElt / NumDstElt, SrcEltTy); 2349 DstCastTy = DstEltTy; 2350 } 2351 2352 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy); 2353 for (Register &SrcReg : SrcRegs) 2354 SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0); 2355 } else 2356 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy); 2357 2358 MIRBuilder.buildMerge(Dst, SrcRegs); 2359 MI.eraseFromParent(); 2360 return Legalized; 2361 } 2362 2363 if (DstTy.isVector()) { 2364 SmallVector<Register, 8> SrcRegs; 2365 getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType()); 2366 MIRBuilder.buildMerge(Dst, SrcRegs); 2367 MI.eraseFromParent(); 2368 return Legalized; 2369 } 2370 2371 return UnableToLegalize; 2372 } 2373 2374 /// Figure out the bit offset into a register when coercing a vector index for 2375 /// the wide element type. This is only for the case when promoting vector to 2376 /// one with larger elements. 2377 // 2378 /// 2379 /// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize)) 2380 /// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize) 2381 static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, 2382 Register Idx, 2383 unsigned NewEltSize, 2384 unsigned OldEltSize) { 2385 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize); 2386 LLT IdxTy = B.getMRI()->getType(Idx); 2387 2388 // Now figure out the amount we need to shift to get the target bits. 2389 auto OffsetMask = B.buildConstant( 2390 IdxTy, ~(APInt::getAllOnesValue(IdxTy.getSizeInBits()) << Log2EltRatio)); 2391 auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask); 2392 return B.buildShl(IdxTy, OffsetIdx, 2393 B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0); 2394 } 2395 2396 /// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this 2397 /// is casting to a vector with a smaller element size, perform multiple element 2398 /// extracts and merge the results. If this is coercing to a vector with larger 2399 /// elements, index the bitcasted vector and extract the target element with bit 2400 /// operations. This is intended to force the indexing in the native register 2401 /// size for architectures that can dynamically index the register file. 2402 LegalizerHelper::LegalizeResult 2403 LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, 2404 LLT CastTy) { 2405 if (TypeIdx != 1) 2406 return UnableToLegalize; 2407 2408 Register Dst = MI.getOperand(0).getReg(); 2409 Register SrcVec = MI.getOperand(1).getReg(); 2410 Register Idx = MI.getOperand(2).getReg(); 2411 LLT SrcVecTy = MRI.getType(SrcVec); 2412 LLT IdxTy = MRI.getType(Idx); 2413 2414 LLT SrcEltTy = SrcVecTy.getElementType(); 2415 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1; 2416 unsigned OldNumElts = SrcVecTy.getNumElements(); 2417 2418 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy; 2419 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0); 2420 2421 const unsigned NewEltSize = NewEltTy.getSizeInBits(); 2422 const unsigned OldEltSize = SrcEltTy.getSizeInBits(); 2423 if (NewNumElts > OldNumElts) { 2424 // Decreasing the vector element size 2425 // 2426 // e.g. i64 = extract_vector_elt x:v2i64, y:i32 2427 // => 2428 // v4i32:castx = bitcast x:v2i64 2429 // 2430 // i64 = bitcast 2431 // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))), 2432 // (i32 (extract_vector_elt castx, (2 * y + 1))) 2433 // 2434 if (NewNumElts % OldNumElts != 0) 2435 return UnableToLegalize; 2436 2437 // Type of the intermediate result vector. 2438 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts; 2439 LLT MidTy = LLT::scalarOrVector(NewEltsPerOldElt, NewEltTy); 2440 2441 auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt); 2442 2443 SmallVector<Register, 8> NewOps(NewEltsPerOldElt); 2444 auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK); 2445 2446 for (unsigned I = 0; I < NewEltsPerOldElt; ++I) { 2447 auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I); 2448 auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset); 2449 auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx); 2450 NewOps[I] = Elt.getReg(0); 2451 } 2452 2453 auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps); 2454 MIRBuilder.buildBitcast(Dst, NewVec); 2455 MI.eraseFromParent(); 2456 return Legalized; 2457 } 2458 2459 if (NewNumElts < OldNumElts) { 2460 if (NewEltSize % OldEltSize != 0) 2461 return UnableToLegalize; 2462 2463 // This only depends on powers of 2 because we use bit tricks to figure out 2464 // the bit offset we need to shift to get the target element. A general 2465 // expansion could emit division/multiply. 2466 if (!isPowerOf2_32(NewEltSize / OldEltSize)) 2467 return UnableToLegalize; 2468 2469 // Increasing the vector element size. 2470 // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx 2471 // 2472 // => 2473 // 2474 // %cast = G_BITCAST %vec 2475 // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize) 2476 // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx 2477 // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize)) 2478 // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize) 2479 // %elt_bits = G_LSHR %wide_elt, %offset_bits 2480 // %elt = G_TRUNC %elt_bits 2481 2482 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize); 2483 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio); 2484 2485 // Divide to get the index in the wider element type. 2486 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio); 2487 2488 Register WideElt = CastVec; 2489 if (CastTy.isVector()) { 2490 WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, 2491 ScaledIdx).getReg(0); 2492 } 2493 2494 // Compute the bit offset into the register of the target element. 2495 Register OffsetBits = getBitcastWiderVectorElementOffset( 2496 MIRBuilder, Idx, NewEltSize, OldEltSize); 2497 2498 // Shift the wide element to get the target element. 2499 auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits); 2500 MIRBuilder.buildTrunc(Dst, ExtractedBits); 2501 MI.eraseFromParent(); 2502 return Legalized; 2503 } 2504 2505 return UnableToLegalize; 2506 } 2507 2508 /// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p 2509 /// TargetReg, while preserving other bits in \p TargetReg. 2510 /// 2511 /// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset) 2512 static Register buildBitFieldInsert(MachineIRBuilder &B, 2513 Register TargetReg, Register InsertReg, 2514 Register OffsetBits) { 2515 LLT TargetTy = B.getMRI()->getType(TargetReg); 2516 LLT InsertTy = B.getMRI()->getType(InsertReg); 2517 auto ZextVal = B.buildZExt(TargetTy, InsertReg); 2518 auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits); 2519 2520 // Produce a bitmask of the value to insert 2521 auto EltMask = B.buildConstant( 2522 TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(), 2523 InsertTy.getSizeInBits())); 2524 // Shift it into position 2525 auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits); 2526 auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask); 2527 2528 // Clear out the bits in the wide element 2529 auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask); 2530 2531 // The value to insert has all zeros already, so stick it into the masked 2532 // wide element. 2533 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0); 2534 } 2535 2536 /// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this 2537 /// is increasing the element size, perform the indexing in the target element 2538 /// type, and use bit operations to insert at the element position. This is 2539 /// intended for architectures that can dynamically index the register file and 2540 /// want to force indexing in the native register size. 2541 LegalizerHelper::LegalizeResult 2542 LegalizerHelper::bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, 2543 LLT CastTy) { 2544 if (TypeIdx != 0) 2545 return UnableToLegalize; 2546 2547 Register Dst = MI.getOperand(0).getReg(); 2548 Register SrcVec = MI.getOperand(1).getReg(); 2549 Register Val = MI.getOperand(2).getReg(); 2550 Register Idx = MI.getOperand(3).getReg(); 2551 2552 LLT VecTy = MRI.getType(Dst); 2553 LLT IdxTy = MRI.getType(Idx); 2554 2555 LLT VecEltTy = VecTy.getElementType(); 2556 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy; 2557 const unsigned NewEltSize = NewEltTy.getSizeInBits(); 2558 const unsigned OldEltSize = VecEltTy.getSizeInBits(); 2559 2560 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1; 2561 unsigned OldNumElts = VecTy.getNumElements(); 2562 2563 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0); 2564 if (NewNumElts < OldNumElts) { 2565 if (NewEltSize % OldEltSize != 0) 2566 return UnableToLegalize; 2567 2568 // This only depends on powers of 2 because we use bit tricks to figure out 2569 // the bit offset we need to shift to get the target element. A general 2570 // expansion could emit division/multiply. 2571 if (!isPowerOf2_32(NewEltSize / OldEltSize)) 2572 return UnableToLegalize; 2573 2574 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize); 2575 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio); 2576 2577 // Divide to get the index in the wider element type. 2578 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio); 2579 2580 Register ExtractedElt = CastVec; 2581 if (CastTy.isVector()) { 2582 ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, 2583 ScaledIdx).getReg(0); 2584 } 2585 2586 // Compute the bit offset into the register of the target element. 2587 Register OffsetBits = getBitcastWiderVectorElementOffset( 2588 MIRBuilder, Idx, NewEltSize, OldEltSize); 2589 2590 Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt, 2591 Val, OffsetBits); 2592 if (CastTy.isVector()) { 2593 InsertedElt = MIRBuilder.buildInsertVectorElement( 2594 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0); 2595 } 2596 2597 MIRBuilder.buildBitcast(Dst, InsertedElt); 2598 MI.eraseFromParent(); 2599 return Legalized; 2600 } 2601 2602 return UnableToLegalize; 2603 } 2604 2605 LegalizerHelper::LegalizeResult 2606 LegalizerHelper::lowerLoad(MachineInstr &MI) { 2607 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT 2608 Register DstReg = MI.getOperand(0).getReg(); 2609 Register PtrReg = MI.getOperand(1).getReg(); 2610 LLT DstTy = MRI.getType(DstReg); 2611 auto &MMO = **MI.memoperands_begin(); 2612 2613 if (DstTy.getSizeInBits() == MMO.getSizeInBits()) { 2614 if (MI.getOpcode() == TargetOpcode::G_LOAD) { 2615 // This load needs splitting into power of 2 sized loads. 2616 if (DstTy.isVector()) 2617 return UnableToLegalize; 2618 if (isPowerOf2_32(DstTy.getSizeInBits())) 2619 return UnableToLegalize; // Don't know what we're being asked to do. 2620 2621 // Our strategy here is to generate anyextending loads for the smaller 2622 // types up to next power-2 result type, and then combine the two larger 2623 // result values together, before truncating back down to the non-pow-2 2624 // type. 2625 // E.g. v1 = i24 load => 2626 // v2 = i32 zextload (2 byte) 2627 // v3 = i32 load (1 byte) 2628 // v4 = i32 shl v3, 16 2629 // v5 = i32 or v4, v2 2630 // v1 = i24 trunc v5 2631 // By doing this we generate the correct truncate which should get 2632 // combined away as an artifact with a matching extend. 2633 uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits()); 2634 uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize; 2635 2636 MachineFunction &MF = MIRBuilder.getMF(); 2637 MachineMemOperand *LargeMMO = 2638 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); 2639 MachineMemOperand *SmallMMO = MF.getMachineMemOperand( 2640 &MMO, LargeSplitSize / 8, SmallSplitSize / 8); 2641 2642 LLT PtrTy = MRI.getType(PtrReg); 2643 unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits()); 2644 LLT AnyExtTy = LLT::scalar(AnyExtSize); 2645 Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy); 2646 Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy); 2647 auto LargeLoad = MIRBuilder.buildLoadInstr( 2648 TargetOpcode::G_ZEXTLOAD, LargeLdReg, PtrReg, *LargeMMO); 2649 2650 auto OffsetCst = MIRBuilder.buildConstant( 2651 LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8); 2652 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy); 2653 auto SmallPtr = 2654 MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0)); 2655 auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0), 2656 *SmallMMO); 2657 2658 auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize); 2659 auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt); 2660 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad); 2661 MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)}); 2662 MI.eraseFromParent(); 2663 return Legalized; 2664 } 2665 2666 MIRBuilder.buildLoad(DstReg, PtrReg, MMO); 2667 MI.eraseFromParent(); 2668 return Legalized; 2669 } 2670 2671 if (DstTy.isScalar()) { 2672 Register TmpReg = 2673 MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits())); 2674 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); 2675 switch (MI.getOpcode()) { 2676 default: 2677 llvm_unreachable("Unexpected opcode"); 2678 case TargetOpcode::G_LOAD: 2679 MIRBuilder.buildAnyExtOrTrunc(DstReg, TmpReg); 2680 break; 2681 case TargetOpcode::G_SEXTLOAD: 2682 MIRBuilder.buildSExt(DstReg, TmpReg); 2683 break; 2684 case TargetOpcode::G_ZEXTLOAD: 2685 MIRBuilder.buildZExt(DstReg, TmpReg); 2686 break; 2687 } 2688 2689 MI.eraseFromParent(); 2690 return Legalized; 2691 } 2692 2693 return UnableToLegalize; 2694 } 2695 2696 LegalizerHelper::LegalizeResult 2697 LegalizerHelper::lowerStore(MachineInstr &MI) { 2698 // Lower a non-power of 2 store into multiple pow-2 stores. 2699 // E.g. split an i24 store into an i16 store + i8 store. 2700 // We do this by first extending the stored value to the next largest power 2701 // of 2 type, and then using truncating stores to store the components. 2702 // By doing this, likewise with G_LOAD, generate an extend that can be 2703 // artifact-combined away instead of leaving behind extracts. 2704 Register SrcReg = MI.getOperand(0).getReg(); 2705 Register PtrReg = MI.getOperand(1).getReg(); 2706 LLT SrcTy = MRI.getType(SrcReg); 2707 MachineMemOperand &MMO = **MI.memoperands_begin(); 2708 if (SrcTy.getSizeInBits() != MMO.getSizeInBits()) 2709 return UnableToLegalize; 2710 if (SrcTy.isVector()) 2711 return UnableToLegalize; 2712 if (isPowerOf2_32(SrcTy.getSizeInBits())) 2713 return UnableToLegalize; // Don't know what we're being asked to do. 2714 2715 // Extend to the next pow-2. 2716 const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits())); 2717 auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg); 2718 2719 // Obtain the smaller value by shifting away the larger value. 2720 uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits()); 2721 uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize; 2722 auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize); 2723 auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt); 2724 2725 // Generate the PtrAdd and truncating stores. 2726 LLT PtrTy = MRI.getType(PtrReg); 2727 auto OffsetCst = MIRBuilder.buildConstant( 2728 LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8); 2729 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy); 2730 auto SmallPtr = 2731 MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0)); 2732 2733 MachineFunction &MF = MIRBuilder.getMF(); 2734 MachineMemOperand *LargeMMO = 2735 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); 2736 MachineMemOperand *SmallMMO = 2737 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8); 2738 MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO); 2739 MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO); 2740 MI.eraseFromParent(); 2741 return Legalized; 2742 } 2743 2744 LegalizerHelper::LegalizeResult 2745 LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) { 2746 switch (MI.getOpcode()) { 2747 case TargetOpcode::G_LOAD: { 2748 if (TypeIdx != 0) 2749 return UnableToLegalize; 2750 2751 Observer.changingInstr(MI); 2752 bitcastDst(MI, CastTy, 0); 2753 Observer.changedInstr(MI); 2754 return Legalized; 2755 } 2756 case TargetOpcode::G_STORE: { 2757 if (TypeIdx != 0) 2758 return UnableToLegalize; 2759 2760 Observer.changingInstr(MI); 2761 bitcastSrc(MI, CastTy, 0); 2762 Observer.changedInstr(MI); 2763 return Legalized; 2764 } 2765 case TargetOpcode::G_SELECT: { 2766 if (TypeIdx != 0) 2767 return UnableToLegalize; 2768 2769 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) { 2770 LLVM_DEBUG( 2771 dbgs() << "bitcast action not implemented for vector select\n"); 2772 return UnableToLegalize; 2773 } 2774 2775 Observer.changingInstr(MI); 2776 bitcastSrc(MI, CastTy, 2); 2777 bitcastSrc(MI, CastTy, 3); 2778 bitcastDst(MI, CastTy, 0); 2779 Observer.changedInstr(MI); 2780 return Legalized; 2781 } 2782 case TargetOpcode::G_AND: 2783 case TargetOpcode::G_OR: 2784 case TargetOpcode::G_XOR: { 2785 Observer.changingInstr(MI); 2786 bitcastSrc(MI, CastTy, 1); 2787 bitcastSrc(MI, CastTy, 2); 2788 bitcastDst(MI, CastTy, 0); 2789 Observer.changedInstr(MI); 2790 return Legalized; 2791 } 2792 case TargetOpcode::G_EXTRACT_VECTOR_ELT: 2793 return bitcastExtractVectorElt(MI, TypeIdx, CastTy); 2794 case TargetOpcode::G_INSERT_VECTOR_ELT: 2795 return bitcastInsertVectorElt(MI, TypeIdx, CastTy); 2796 default: 2797 return UnableToLegalize; 2798 } 2799 } 2800 2801 // Legalize an instruction by changing the opcode in place. 2802 void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) { 2803 Observer.changingInstr(MI); 2804 MI.setDesc(MIRBuilder.getTII().get(NewOpcode)); 2805 Observer.changedInstr(MI); 2806 } 2807 2808 LegalizerHelper::LegalizeResult 2809 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { 2810 using namespace TargetOpcode; 2811 2812 switch(MI.getOpcode()) { 2813 default: 2814 return UnableToLegalize; 2815 case TargetOpcode::G_BITCAST: 2816 return lowerBitcast(MI); 2817 case TargetOpcode::G_SREM: 2818 case TargetOpcode::G_UREM: { 2819 LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 2820 auto Quot = 2821 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty}, 2822 {MI.getOperand(1), MI.getOperand(2)}); 2823 2824 auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2)); 2825 MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod); 2826 MI.eraseFromParent(); 2827 return Legalized; 2828 } 2829 case TargetOpcode::G_SADDO: 2830 case TargetOpcode::G_SSUBO: 2831 return lowerSADDO_SSUBO(MI); 2832 case TargetOpcode::G_SMULO: 2833 case TargetOpcode::G_UMULO: { 2834 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the 2835 // result. 2836 Register Res = MI.getOperand(0).getReg(); 2837 Register Overflow = MI.getOperand(1).getReg(); 2838 Register LHS = MI.getOperand(2).getReg(); 2839 Register RHS = MI.getOperand(3).getReg(); 2840 LLT Ty = MRI.getType(Res); 2841 2842 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO 2843 ? TargetOpcode::G_SMULH 2844 : TargetOpcode::G_UMULH; 2845 2846 Observer.changingInstr(MI); 2847 const auto &TII = MIRBuilder.getTII(); 2848 MI.setDesc(TII.get(TargetOpcode::G_MUL)); 2849 MI.RemoveOperand(1); 2850 Observer.changedInstr(MI); 2851 2852 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 2853 2854 auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS}); 2855 auto Zero = MIRBuilder.buildConstant(Ty, 0); 2856 2857 // For *signed* multiply, overflow is detected by checking: 2858 // (hi != (lo >> bitwidth-1)) 2859 if (Opcode == TargetOpcode::G_SMULH) { 2860 auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1); 2861 auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt); 2862 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted); 2863 } else { 2864 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero); 2865 } 2866 return Legalized; 2867 } 2868 case TargetOpcode::G_FNEG: { 2869 Register Res = MI.getOperand(0).getReg(); 2870 LLT Ty = MRI.getType(Res); 2871 2872 // TODO: Handle vector types once we are able to 2873 // represent them. 2874 if (Ty.isVector()) 2875 return UnableToLegalize; 2876 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 2877 Type *ZeroTy = getFloatTypeForLLT(Ctx, Ty); 2878 if (!ZeroTy) 2879 return UnableToLegalize; 2880 ConstantFP &ZeroForNegation = 2881 *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy)); 2882 auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation); 2883 Register SubByReg = MI.getOperand(1).getReg(); 2884 Register ZeroReg = Zero.getReg(0); 2885 MIRBuilder.buildFSub(Res, ZeroReg, SubByReg, MI.getFlags()); 2886 MI.eraseFromParent(); 2887 return Legalized; 2888 } 2889 case TargetOpcode::G_FSUB: { 2890 Register Res = MI.getOperand(0).getReg(); 2891 LLT Ty = MRI.getType(Res); 2892 2893 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)). 2894 // First, check if G_FNEG is marked as Lower. If so, we may 2895 // end up with an infinite loop as G_FSUB is used to legalize G_FNEG. 2896 if (LI.getAction({G_FNEG, {Ty}}).Action == Lower) 2897 return UnableToLegalize; 2898 Register LHS = MI.getOperand(1).getReg(); 2899 Register RHS = MI.getOperand(2).getReg(); 2900 Register Neg = MRI.createGenericVirtualRegister(Ty); 2901 MIRBuilder.buildFNeg(Neg, RHS); 2902 MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags()); 2903 MI.eraseFromParent(); 2904 return Legalized; 2905 } 2906 case TargetOpcode::G_FMAD: 2907 return lowerFMad(MI); 2908 case TargetOpcode::G_FFLOOR: 2909 return lowerFFloor(MI); 2910 case TargetOpcode::G_INTRINSIC_ROUND: 2911 return lowerIntrinsicRound(MI); 2912 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: { 2913 // Since round even is the assumed rounding mode for unconstrained FP 2914 // operations, rint and roundeven are the same operation. 2915 changeOpcode(MI, TargetOpcode::G_FRINT); 2916 return Legalized; 2917 } 2918 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: { 2919 Register OldValRes = MI.getOperand(0).getReg(); 2920 Register SuccessRes = MI.getOperand(1).getReg(); 2921 Register Addr = MI.getOperand(2).getReg(); 2922 Register CmpVal = MI.getOperand(3).getReg(); 2923 Register NewVal = MI.getOperand(4).getReg(); 2924 MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal, 2925 **MI.memoperands_begin()); 2926 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal); 2927 MI.eraseFromParent(); 2928 return Legalized; 2929 } 2930 case TargetOpcode::G_LOAD: 2931 case TargetOpcode::G_SEXTLOAD: 2932 case TargetOpcode::G_ZEXTLOAD: 2933 return lowerLoad(MI); 2934 case TargetOpcode::G_STORE: 2935 return lowerStore(MI); 2936 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 2937 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 2938 case TargetOpcode::G_CTLZ: 2939 case TargetOpcode::G_CTTZ: 2940 case TargetOpcode::G_CTPOP: 2941 return lowerBitCount(MI); 2942 case G_UADDO: { 2943 Register Res = MI.getOperand(0).getReg(); 2944 Register CarryOut = MI.getOperand(1).getReg(); 2945 Register LHS = MI.getOperand(2).getReg(); 2946 Register RHS = MI.getOperand(3).getReg(); 2947 2948 MIRBuilder.buildAdd(Res, LHS, RHS); 2949 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS); 2950 2951 MI.eraseFromParent(); 2952 return Legalized; 2953 } 2954 case G_UADDE: { 2955 Register Res = MI.getOperand(0).getReg(); 2956 Register CarryOut = MI.getOperand(1).getReg(); 2957 Register LHS = MI.getOperand(2).getReg(); 2958 Register RHS = MI.getOperand(3).getReg(); 2959 Register CarryIn = MI.getOperand(4).getReg(); 2960 LLT Ty = MRI.getType(Res); 2961 2962 auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS); 2963 auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn); 2964 MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn); 2965 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS); 2966 2967 MI.eraseFromParent(); 2968 return Legalized; 2969 } 2970 case G_USUBO: { 2971 Register Res = MI.getOperand(0).getReg(); 2972 Register BorrowOut = MI.getOperand(1).getReg(); 2973 Register LHS = MI.getOperand(2).getReg(); 2974 Register RHS = MI.getOperand(3).getReg(); 2975 2976 MIRBuilder.buildSub(Res, LHS, RHS); 2977 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS); 2978 2979 MI.eraseFromParent(); 2980 return Legalized; 2981 } 2982 case G_USUBE: { 2983 Register Res = MI.getOperand(0).getReg(); 2984 Register BorrowOut = MI.getOperand(1).getReg(); 2985 Register LHS = MI.getOperand(2).getReg(); 2986 Register RHS = MI.getOperand(3).getReg(); 2987 Register BorrowIn = MI.getOperand(4).getReg(); 2988 const LLT CondTy = MRI.getType(BorrowOut); 2989 const LLT Ty = MRI.getType(Res); 2990 2991 auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS); 2992 auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn); 2993 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn); 2994 2995 auto LHS_EQ_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, LHS, RHS); 2996 auto LHS_ULT_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, LHS, RHS); 2997 MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS); 2998 2999 MI.eraseFromParent(); 3000 return Legalized; 3001 } 3002 case G_UITOFP: 3003 return lowerUITOFP(MI); 3004 case G_SITOFP: 3005 return lowerSITOFP(MI); 3006 case G_FPTOUI: 3007 return lowerFPTOUI(MI); 3008 case G_FPTOSI: 3009 return lowerFPTOSI(MI); 3010 case G_FPTRUNC: 3011 return lowerFPTRUNC(MI); 3012 case G_FPOWI: 3013 return lowerFPOWI(MI); 3014 case G_SMIN: 3015 case G_SMAX: 3016 case G_UMIN: 3017 case G_UMAX: 3018 return lowerMinMax(MI); 3019 case G_FCOPYSIGN: 3020 return lowerFCopySign(MI); 3021 case G_FMINNUM: 3022 case G_FMAXNUM: 3023 return lowerFMinNumMaxNum(MI); 3024 case G_MERGE_VALUES: 3025 return lowerMergeValues(MI); 3026 case G_UNMERGE_VALUES: 3027 return lowerUnmergeValues(MI); 3028 case TargetOpcode::G_SEXT_INREG: { 3029 assert(MI.getOperand(2).isImm() && "Expected immediate"); 3030 int64_t SizeInBits = MI.getOperand(2).getImm(); 3031 3032 Register DstReg = MI.getOperand(0).getReg(); 3033 Register SrcReg = MI.getOperand(1).getReg(); 3034 LLT DstTy = MRI.getType(DstReg); 3035 Register TmpRes = MRI.createGenericVirtualRegister(DstTy); 3036 3037 auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits); 3038 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0)); 3039 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0)); 3040 MI.eraseFromParent(); 3041 return Legalized; 3042 } 3043 case G_EXTRACT_VECTOR_ELT: 3044 case G_INSERT_VECTOR_ELT: 3045 return lowerExtractInsertVectorElt(MI); 3046 case G_SHUFFLE_VECTOR: 3047 return lowerShuffleVector(MI); 3048 case G_DYN_STACKALLOC: 3049 return lowerDynStackAlloc(MI); 3050 case G_EXTRACT: 3051 return lowerExtract(MI); 3052 case G_INSERT: 3053 return lowerInsert(MI); 3054 case G_BSWAP: 3055 return lowerBswap(MI); 3056 case G_BITREVERSE: 3057 return lowerBitreverse(MI); 3058 case G_READ_REGISTER: 3059 case G_WRITE_REGISTER: 3060 return lowerReadWriteRegister(MI); 3061 case G_UADDSAT: 3062 case G_USUBSAT: { 3063 // Try to make a reasonable guess about which lowering strategy to use. The 3064 // target can override this with custom lowering and calling the 3065 // implementation functions. 3066 LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 3067 if (LI.isLegalOrCustom({G_UMIN, Ty})) 3068 return lowerAddSubSatToMinMax(MI); 3069 return lowerAddSubSatToAddoSubo(MI); 3070 } 3071 case G_SADDSAT: 3072 case G_SSUBSAT: { 3073 LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 3074 3075 // FIXME: It would probably make more sense to see if G_SADDO is preferred, 3076 // since it's a shorter expansion. However, we would need to figure out the 3077 // preferred boolean type for the carry out for the query. 3078 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty})) 3079 return lowerAddSubSatToMinMax(MI); 3080 return lowerAddSubSatToAddoSubo(MI); 3081 } 3082 case G_SSHLSAT: 3083 case G_USHLSAT: 3084 return lowerShlSat(MI); 3085 } 3086 } 3087 3088 Align LegalizerHelper::getStackTemporaryAlignment(LLT Ty, 3089 Align MinAlign) const { 3090 // FIXME: We're missing a way to go back from LLT to llvm::Type to query the 3091 // datalayout for the preferred alignment. Also there should be a target hook 3092 // for this to allow targets to reduce the alignment and ignore the 3093 // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of 3094 // the type. 3095 return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign); 3096 } 3097 3098 MachineInstrBuilder 3099 LegalizerHelper::createStackTemporary(TypeSize Bytes, Align Alignment, 3100 MachinePointerInfo &PtrInfo) { 3101 MachineFunction &MF = MIRBuilder.getMF(); 3102 const DataLayout &DL = MIRBuilder.getDataLayout(); 3103 int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false); 3104 3105 unsigned AddrSpace = DL.getAllocaAddrSpace(); 3106 LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace)); 3107 3108 PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx); 3109 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx); 3110 } 3111 3112 static Register clampDynamicVectorIndex(MachineIRBuilder &B, Register IdxReg, 3113 LLT VecTy) { 3114 int64_t IdxVal; 3115 if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal))) 3116 return IdxReg; 3117 3118 LLT IdxTy = B.getMRI()->getType(IdxReg); 3119 unsigned NElts = VecTy.getNumElements(); 3120 if (isPowerOf2_32(NElts)) { 3121 APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts)); 3122 return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0); 3123 } 3124 3125 return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1)) 3126 .getReg(0); 3127 } 3128 3129 Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy, 3130 Register Index) { 3131 LLT EltTy = VecTy.getElementType(); 3132 3133 // Calculate the element offset and add it to the pointer. 3134 unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size. 3135 assert(EltSize * 8 == EltTy.getSizeInBits() && 3136 "Converting bits to bytes lost precision"); 3137 3138 Index = clampDynamicVectorIndex(MIRBuilder, Index, VecTy); 3139 3140 LLT IdxTy = MRI.getType(Index); 3141 auto Mul = MIRBuilder.buildMul(IdxTy, Index, 3142 MIRBuilder.buildConstant(IdxTy, EltSize)); 3143 3144 LLT PtrTy = MRI.getType(VecPtr); 3145 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0); 3146 } 3147 3148 LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef( 3149 MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { 3150 Register DstReg = MI.getOperand(0).getReg(); 3151 LLT DstTy = MRI.getType(DstReg); 3152 LLT LCMTy = getLCMType(DstTy, NarrowTy); 3153 3154 unsigned NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits(); 3155 3156 auto NewUndef = MIRBuilder.buildUndef(NarrowTy); 3157 SmallVector<Register, 8> Parts(NumParts, NewUndef.getReg(0)); 3158 3159 buildWidenedRemergeToDst(DstReg, LCMTy, Parts); 3160 MI.eraseFromParent(); 3161 return Legalized; 3162 } 3163 3164 // Handle splitting vector operations which need to have the same number of 3165 // elements in each type index, but each type index may have a different element 3166 // type. 3167 // 3168 // e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> -> 3169 // <2 x s64> = G_SHL <2 x s64>, <2 x s32> 3170 // <2 x s64> = G_SHL <2 x s64>, <2 x s32> 3171 // 3172 // Also handles some irregular breakdown cases, e.g. 3173 // e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> -> 3174 // <2 x s64> = G_SHL <2 x s64>, <2 x s32> 3175 // s64 = G_SHL s64, s32 3176 LegalizerHelper::LegalizeResult 3177 LegalizerHelper::fewerElementsVectorMultiEltType( 3178 MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) { 3179 if (TypeIdx != 0) 3180 return UnableToLegalize; 3181 3182 const LLT NarrowTy0 = NarrowTyArg; 3183 const unsigned NewNumElts = 3184 NarrowTy0.isVector() ? NarrowTy0.getNumElements() : 1; 3185 3186 const Register DstReg = MI.getOperand(0).getReg(); 3187 LLT DstTy = MRI.getType(DstReg); 3188 LLT LeftoverTy0; 3189 3190 // All of the operands need to have the same number of elements, so if we can 3191 // determine a type breakdown for the result type, we can for all of the 3192 // source types. 3193 int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0).first; 3194 if (NumParts < 0) 3195 return UnableToLegalize; 3196 3197 SmallVector<MachineInstrBuilder, 4> NewInsts; 3198 3199 SmallVector<Register, 4> DstRegs, LeftoverDstRegs; 3200 SmallVector<Register, 4> PartRegs, LeftoverRegs; 3201 3202 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { 3203 Register SrcReg = MI.getOperand(I).getReg(); 3204 LLT SrcTyI = MRI.getType(SrcReg); 3205 LLT NarrowTyI = LLT::scalarOrVector(NewNumElts, SrcTyI.getScalarType()); 3206 LLT LeftoverTyI; 3207 3208 // Split this operand into the requested typed registers, and any leftover 3209 // required to reproduce the original type. 3210 if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs, 3211 LeftoverRegs)) 3212 return UnableToLegalize; 3213 3214 if (I == 1) { 3215 // For the first operand, create an instruction for each part and setup 3216 // the result. 3217 for (Register PartReg : PartRegs) { 3218 Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0); 3219 NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode()) 3220 .addDef(PartDstReg) 3221 .addUse(PartReg)); 3222 DstRegs.push_back(PartDstReg); 3223 } 3224 3225 for (Register LeftoverReg : LeftoverRegs) { 3226 Register PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0); 3227 NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode()) 3228 .addDef(PartDstReg) 3229 .addUse(LeftoverReg)); 3230 LeftoverDstRegs.push_back(PartDstReg); 3231 } 3232 } else { 3233 assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size()); 3234 3235 // Add the newly created operand splits to the existing instructions. The 3236 // odd-sized pieces are ordered after the requested NarrowTyArg sized 3237 // pieces. 3238 unsigned InstCount = 0; 3239 for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J) 3240 NewInsts[InstCount++].addUse(PartRegs[J]); 3241 for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J) 3242 NewInsts[InstCount++].addUse(LeftoverRegs[J]); 3243 } 3244 3245 PartRegs.clear(); 3246 LeftoverRegs.clear(); 3247 } 3248 3249 // Insert the newly built operations and rebuild the result register. 3250 for (auto &MIB : NewInsts) 3251 MIRBuilder.insertInstr(MIB); 3252 3253 insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs); 3254 3255 MI.eraseFromParent(); 3256 return Legalized; 3257 } 3258 3259 LegalizerHelper::LegalizeResult 3260 LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx, 3261 LLT NarrowTy) { 3262 if (TypeIdx != 0) 3263 return UnableToLegalize; 3264 3265 Register DstReg = MI.getOperand(0).getReg(); 3266 Register SrcReg = MI.getOperand(1).getReg(); 3267 LLT DstTy = MRI.getType(DstReg); 3268 LLT SrcTy = MRI.getType(SrcReg); 3269 3270 LLT NarrowTy0 = NarrowTy; 3271 LLT NarrowTy1; 3272 unsigned NumParts; 3273 3274 if (NarrowTy.isVector()) { 3275 // Uneven breakdown not handled. 3276 NumParts = DstTy.getNumElements() / NarrowTy.getNumElements(); 3277 if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements()) 3278 return UnableToLegalize; 3279 3280 NarrowTy1 = LLT::vector(NumParts, SrcTy.getElementType().getSizeInBits()); 3281 } else { 3282 NumParts = DstTy.getNumElements(); 3283 NarrowTy1 = SrcTy.getElementType(); 3284 } 3285 3286 SmallVector<Register, 4> SrcRegs, DstRegs; 3287 extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs); 3288 3289 for (unsigned I = 0; I < NumParts; ++I) { 3290 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0); 3291 MachineInstr *NewInst = 3292 MIRBuilder.buildInstr(MI.getOpcode(), {DstReg}, {SrcRegs[I]}); 3293 3294 NewInst->setFlags(MI.getFlags()); 3295 DstRegs.push_back(DstReg); 3296 } 3297 3298 if (NarrowTy.isVector()) 3299 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 3300 else 3301 MIRBuilder.buildBuildVector(DstReg, DstRegs); 3302 3303 MI.eraseFromParent(); 3304 return Legalized; 3305 } 3306 3307 LegalizerHelper::LegalizeResult 3308 LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx, 3309 LLT NarrowTy) { 3310 Register DstReg = MI.getOperand(0).getReg(); 3311 Register Src0Reg = MI.getOperand(2).getReg(); 3312 LLT DstTy = MRI.getType(DstReg); 3313 LLT SrcTy = MRI.getType(Src0Reg); 3314 3315 unsigned NumParts; 3316 LLT NarrowTy0, NarrowTy1; 3317 3318 if (TypeIdx == 0) { 3319 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; 3320 unsigned OldElts = DstTy.getNumElements(); 3321 3322 NarrowTy0 = NarrowTy; 3323 NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements(); 3324 NarrowTy1 = NarrowTy.isVector() ? 3325 LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) : 3326 SrcTy.getElementType(); 3327 3328 } else { 3329 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; 3330 unsigned OldElts = SrcTy.getNumElements(); 3331 3332 NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : 3333 NarrowTy.getNumElements(); 3334 NarrowTy0 = LLT::vector(NarrowTy.getNumElements(), 3335 DstTy.getScalarSizeInBits()); 3336 NarrowTy1 = NarrowTy; 3337 } 3338 3339 // FIXME: Don't know how to handle the situation where the small vectors 3340 // aren't all the same size yet. 3341 if (NarrowTy1.isVector() && 3342 NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements()) 3343 return UnableToLegalize; 3344 3345 CmpInst::Predicate Pred 3346 = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); 3347 3348 SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs; 3349 extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs); 3350 extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs); 3351 3352 for (unsigned I = 0; I < NumParts; ++I) { 3353 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0); 3354 DstRegs.push_back(DstReg); 3355 3356 if (MI.getOpcode() == TargetOpcode::G_ICMP) 3357 MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]); 3358 else { 3359 MachineInstr *NewCmp 3360 = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]); 3361 NewCmp->setFlags(MI.getFlags()); 3362 } 3363 } 3364 3365 if (NarrowTy1.isVector()) 3366 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 3367 else 3368 MIRBuilder.buildBuildVector(DstReg, DstRegs); 3369 3370 MI.eraseFromParent(); 3371 return Legalized; 3372 } 3373 3374 LegalizerHelper::LegalizeResult 3375 LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx, 3376 LLT NarrowTy) { 3377 Register DstReg = MI.getOperand(0).getReg(); 3378 Register CondReg = MI.getOperand(1).getReg(); 3379 3380 unsigned NumParts = 0; 3381 LLT NarrowTy0, NarrowTy1; 3382 3383 LLT DstTy = MRI.getType(DstReg); 3384 LLT CondTy = MRI.getType(CondReg); 3385 unsigned Size = DstTy.getSizeInBits(); 3386 3387 assert(TypeIdx == 0 || CondTy.isVector()); 3388 3389 if (TypeIdx == 0) { 3390 NarrowTy0 = NarrowTy; 3391 NarrowTy1 = CondTy; 3392 3393 unsigned NarrowSize = NarrowTy0.getSizeInBits(); 3394 // FIXME: Don't know how to handle the situation where the small vectors 3395 // aren't all the same size yet. 3396 if (Size % NarrowSize != 0) 3397 return UnableToLegalize; 3398 3399 NumParts = Size / NarrowSize; 3400 3401 // Need to break down the condition type 3402 if (CondTy.isVector()) { 3403 if (CondTy.getNumElements() == NumParts) 3404 NarrowTy1 = CondTy.getElementType(); 3405 else 3406 NarrowTy1 = LLT::vector(CondTy.getNumElements() / NumParts, 3407 CondTy.getScalarSizeInBits()); 3408 } 3409 } else { 3410 NumParts = CondTy.getNumElements(); 3411 if (NarrowTy.isVector()) { 3412 // TODO: Handle uneven breakdown. 3413 if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements()) 3414 return UnableToLegalize; 3415 3416 return UnableToLegalize; 3417 } else { 3418 NarrowTy0 = DstTy.getElementType(); 3419 NarrowTy1 = NarrowTy; 3420 } 3421 } 3422 3423 SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs; 3424 if (CondTy.isVector()) 3425 extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs); 3426 3427 extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs); 3428 extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs); 3429 3430 for (unsigned i = 0; i < NumParts; ++i) { 3431 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0); 3432 MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg, 3433 Src1Regs[i], Src2Regs[i]); 3434 DstRegs.push_back(DstReg); 3435 } 3436 3437 if (NarrowTy0.isVector()) 3438 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 3439 else 3440 MIRBuilder.buildBuildVector(DstReg, DstRegs); 3441 3442 MI.eraseFromParent(); 3443 return Legalized; 3444 } 3445 3446 LegalizerHelper::LegalizeResult 3447 LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, 3448 LLT NarrowTy) { 3449 const Register DstReg = MI.getOperand(0).getReg(); 3450 LLT PhiTy = MRI.getType(DstReg); 3451 LLT LeftoverTy; 3452 3453 // All of the operands need to have the same number of elements, so if we can 3454 // determine a type breakdown for the result type, we can for all of the 3455 // source types. 3456 int NumParts, NumLeftover; 3457 std::tie(NumParts, NumLeftover) 3458 = getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy); 3459 if (NumParts < 0) 3460 return UnableToLegalize; 3461 3462 SmallVector<Register, 4> DstRegs, LeftoverDstRegs; 3463 SmallVector<MachineInstrBuilder, 4> NewInsts; 3464 3465 const int TotalNumParts = NumParts + NumLeftover; 3466 3467 // Insert the new phis in the result block first. 3468 for (int I = 0; I != TotalNumParts; ++I) { 3469 LLT Ty = I < NumParts ? NarrowTy : LeftoverTy; 3470 Register PartDstReg = MRI.createGenericVirtualRegister(Ty); 3471 NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI) 3472 .addDef(PartDstReg)); 3473 if (I < NumParts) 3474 DstRegs.push_back(PartDstReg); 3475 else 3476 LeftoverDstRegs.push_back(PartDstReg); 3477 } 3478 3479 MachineBasicBlock *MBB = MI.getParent(); 3480 MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI()); 3481 insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs); 3482 3483 SmallVector<Register, 4> PartRegs, LeftoverRegs; 3484 3485 // Insert code to extract the incoming values in each predecessor block. 3486 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { 3487 PartRegs.clear(); 3488 LeftoverRegs.clear(); 3489 3490 Register SrcReg = MI.getOperand(I).getReg(); 3491 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB(); 3492 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); 3493 3494 LLT Unused; 3495 if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs, 3496 LeftoverRegs)) 3497 return UnableToLegalize; 3498 3499 // Add the newly created operand splits to the existing instructions. The 3500 // odd-sized pieces are ordered after the requested NarrowTyArg sized 3501 // pieces. 3502 for (int J = 0; J != TotalNumParts; ++J) { 3503 MachineInstrBuilder MIB = NewInsts[J]; 3504 MIB.addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]); 3505 MIB.addMBB(&OpMBB); 3506 } 3507 } 3508 3509 MI.eraseFromParent(); 3510 return Legalized; 3511 } 3512 3513 LegalizerHelper::LegalizeResult 3514 LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI, 3515 unsigned TypeIdx, 3516 LLT NarrowTy) { 3517 if (TypeIdx != 1) 3518 return UnableToLegalize; 3519 3520 const int NumDst = MI.getNumOperands() - 1; 3521 const Register SrcReg = MI.getOperand(NumDst).getReg(); 3522 LLT SrcTy = MRI.getType(SrcReg); 3523 3524 LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 3525 3526 // TODO: Create sequence of extracts. 3527 if (DstTy == NarrowTy) 3528 return UnableToLegalize; 3529 3530 LLT GCDTy = getGCDType(SrcTy, NarrowTy); 3531 if (DstTy == GCDTy) { 3532 // This would just be a copy of the same unmerge. 3533 // TODO: Create extracts, pad with undef and create intermediate merges. 3534 return UnableToLegalize; 3535 } 3536 3537 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg); 3538 const int NumUnmerge = Unmerge->getNumOperands() - 1; 3539 const int PartsPerUnmerge = NumDst / NumUnmerge; 3540 3541 for (int I = 0; I != NumUnmerge; ++I) { 3542 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES); 3543 3544 for (int J = 0; J != PartsPerUnmerge; ++J) 3545 MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg()); 3546 MIB.addUse(Unmerge.getReg(I)); 3547 } 3548 3549 MI.eraseFromParent(); 3550 return Legalized; 3551 } 3552 3553 LegalizerHelper::LegalizeResult 3554 LegalizerHelper::fewerElementsVectorBuildVector(MachineInstr &MI, 3555 unsigned TypeIdx, 3556 LLT NarrowTy) { 3557 assert(TypeIdx == 0 && "not a vector type index"); 3558 Register DstReg = MI.getOperand(0).getReg(); 3559 LLT DstTy = MRI.getType(DstReg); 3560 LLT SrcTy = DstTy.getElementType(); 3561 3562 int DstNumElts = DstTy.getNumElements(); 3563 int NarrowNumElts = NarrowTy.getNumElements(); 3564 int NumConcat = (DstNumElts + NarrowNumElts - 1) / NarrowNumElts; 3565 LLT WidenedDstTy = LLT::vector(NarrowNumElts * NumConcat, SrcTy); 3566 3567 SmallVector<Register, 8> ConcatOps; 3568 SmallVector<Register, 8> SubBuildVector; 3569 3570 Register UndefReg; 3571 if (WidenedDstTy != DstTy) 3572 UndefReg = MIRBuilder.buildUndef(SrcTy).getReg(0); 3573 3574 // Create a G_CONCAT_VECTORS of NarrowTy pieces, padding with undef as 3575 // necessary. 3576 // 3577 // %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2 3578 // -> <2 x s16> 3579 // 3580 // %4:_(s16) = G_IMPLICIT_DEF 3581 // %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1 3582 // %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4 3583 // %7:_(<4 x s16>) = G_CONCAT_VECTORS %5, %6 3584 // %3:_(<3 x s16>) = G_EXTRACT %7, 0 3585 for (int I = 0; I != NumConcat; ++I) { 3586 for (int J = 0; J != NarrowNumElts; ++J) { 3587 int SrcIdx = NarrowNumElts * I + J; 3588 3589 if (SrcIdx < DstNumElts) { 3590 Register SrcReg = MI.getOperand(SrcIdx + 1).getReg(); 3591 SubBuildVector.push_back(SrcReg); 3592 } else 3593 SubBuildVector.push_back(UndefReg); 3594 } 3595 3596 auto BuildVec = MIRBuilder.buildBuildVector(NarrowTy, SubBuildVector); 3597 ConcatOps.push_back(BuildVec.getReg(0)); 3598 SubBuildVector.clear(); 3599 } 3600 3601 if (DstTy == WidenedDstTy) 3602 MIRBuilder.buildConcatVectors(DstReg, ConcatOps); 3603 else { 3604 auto Concat = MIRBuilder.buildConcatVectors(WidenedDstTy, ConcatOps); 3605 MIRBuilder.buildExtract(DstReg, Concat, 0); 3606 } 3607 3608 MI.eraseFromParent(); 3609 return Legalized; 3610 } 3611 3612 LegalizerHelper::LegalizeResult 3613 LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, 3614 unsigned TypeIdx, 3615 LLT NarrowVecTy) { 3616 Register DstReg = MI.getOperand(0).getReg(); 3617 Register SrcVec = MI.getOperand(1).getReg(); 3618 Register InsertVal; 3619 bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT; 3620 3621 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index"); 3622 if (IsInsert) 3623 InsertVal = MI.getOperand(2).getReg(); 3624 3625 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg(); 3626 3627 // TODO: Handle total scalarization case. 3628 if (!NarrowVecTy.isVector()) 3629 return UnableToLegalize; 3630 3631 LLT VecTy = MRI.getType(SrcVec); 3632 3633 // If the index is a constant, we can really break this down as you would 3634 // expect, and index into the target size pieces. 3635 int64_t IdxVal; 3636 if (mi_match(Idx, MRI, m_ICst(IdxVal))) { 3637 // Avoid out of bounds indexing the pieces. 3638 if (IdxVal >= VecTy.getNumElements()) { 3639 MIRBuilder.buildUndef(DstReg); 3640 MI.eraseFromParent(); 3641 return Legalized; 3642 } 3643 3644 SmallVector<Register, 8> VecParts; 3645 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec); 3646 3647 // Build a sequence of NarrowTy pieces in VecParts for this operand. 3648 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts, 3649 TargetOpcode::G_ANYEXT); 3650 3651 unsigned NewNumElts = NarrowVecTy.getNumElements(); 3652 3653 LLT IdxTy = MRI.getType(Idx); 3654 int64_t PartIdx = IdxVal / NewNumElts; 3655 auto NewIdx = 3656 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx); 3657 3658 if (IsInsert) { 3659 LLT PartTy = MRI.getType(VecParts[PartIdx]); 3660 3661 // Use the adjusted index to insert into one of the subvectors. 3662 auto InsertPart = MIRBuilder.buildInsertVectorElement( 3663 PartTy, VecParts[PartIdx], InsertVal, NewIdx); 3664 VecParts[PartIdx] = InsertPart.getReg(0); 3665 3666 // Recombine the inserted subvector with the others to reform the result 3667 // vector. 3668 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts); 3669 } else { 3670 MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx); 3671 } 3672 3673 MI.eraseFromParent(); 3674 return Legalized; 3675 } 3676 3677 // With a variable index, we can't perform the operation in a smaller type, so 3678 // we're forced to expand this. 3679 // 3680 // TODO: We could emit a chain of compare/select to figure out which piece to 3681 // index. 3682 return lowerExtractInsertVectorElt(MI); 3683 } 3684 3685 LegalizerHelper::LegalizeResult 3686 LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, 3687 LLT NarrowTy) { 3688 // FIXME: Don't know how to handle secondary types yet. 3689 if (TypeIdx != 0) 3690 return UnableToLegalize; 3691 3692 MachineMemOperand *MMO = *MI.memoperands_begin(); 3693 3694 // This implementation doesn't work for atomics. Give up instead of doing 3695 // something invalid. 3696 if (MMO->getOrdering() != AtomicOrdering::NotAtomic || 3697 MMO->getFailureOrdering() != AtomicOrdering::NotAtomic) 3698 return UnableToLegalize; 3699 3700 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD; 3701 Register ValReg = MI.getOperand(0).getReg(); 3702 Register AddrReg = MI.getOperand(1).getReg(); 3703 LLT ValTy = MRI.getType(ValReg); 3704 3705 // FIXME: Do we need a distinct NarrowMemory legalize action? 3706 if (ValTy.getSizeInBits() != 8 * MMO->getSize()) { 3707 LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n"); 3708 return UnableToLegalize; 3709 } 3710 3711 int NumParts = -1; 3712 int NumLeftover = -1; 3713 LLT LeftoverTy; 3714 SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs; 3715 if (IsLoad) { 3716 std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy); 3717 } else { 3718 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs, 3719 NarrowLeftoverRegs)) { 3720 NumParts = NarrowRegs.size(); 3721 NumLeftover = NarrowLeftoverRegs.size(); 3722 } 3723 } 3724 3725 if (NumParts == -1) 3726 return UnableToLegalize; 3727 3728 LLT PtrTy = MRI.getType(AddrReg); 3729 const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits()); 3730 3731 unsigned TotalSize = ValTy.getSizeInBits(); 3732 3733 // Split the load/store into PartTy sized pieces starting at Offset. If this 3734 // is a load, return the new registers in ValRegs. For a store, each elements 3735 // of ValRegs should be PartTy. Returns the next offset that needs to be 3736 // handled. 3737 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs, 3738 unsigned Offset) -> unsigned { 3739 MachineFunction &MF = MIRBuilder.getMF(); 3740 unsigned PartSize = PartTy.getSizeInBits(); 3741 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize; 3742 Offset += PartSize, ++Idx) { 3743 unsigned ByteSize = PartSize / 8; 3744 unsigned ByteOffset = Offset / 8; 3745 Register NewAddrReg; 3746 3747 MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset); 3748 3749 MachineMemOperand *NewMMO = 3750 MF.getMachineMemOperand(MMO, ByteOffset, ByteSize); 3751 3752 if (IsLoad) { 3753 Register Dst = MRI.createGenericVirtualRegister(PartTy); 3754 ValRegs.push_back(Dst); 3755 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO); 3756 } else { 3757 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO); 3758 } 3759 } 3760 3761 return Offset; 3762 }; 3763 3764 unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0); 3765 3766 // Handle the rest of the register if this isn't an even type breakdown. 3767 if (LeftoverTy.isValid()) 3768 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset); 3769 3770 if (IsLoad) { 3771 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs, 3772 LeftoverTy, NarrowLeftoverRegs); 3773 } 3774 3775 MI.eraseFromParent(); 3776 return Legalized; 3777 } 3778 3779 LegalizerHelper::LegalizeResult 3780 LegalizerHelper::reduceOperationWidth(MachineInstr &MI, unsigned int TypeIdx, 3781 LLT NarrowTy) { 3782 assert(TypeIdx == 0 && "only one type index expected"); 3783 3784 const unsigned Opc = MI.getOpcode(); 3785 const int NumOps = MI.getNumOperands() - 1; 3786 const Register DstReg = MI.getOperand(0).getReg(); 3787 const unsigned Flags = MI.getFlags(); 3788 const unsigned NarrowSize = NarrowTy.getSizeInBits(); 3789 const LLT NarrowScalarTy = LLT::scalar(NarrowSize); 3790 3791 assert(NumOps <= 3 && "expected instruction with 1 result and 1-3 sources"); 3792 3793 // First of all check whether we are narrowing (changing the element type) 3794 // or reducing the vector elements 3795 const LLT DstTy = MRI.getType(DstReg); 3796 const bool IsNarrow = NarrowTy.getScalarType() != DstTy.getScalarType(); 3797 3798 SmallVector<Register, 8> ExtractedRegs[3]; 3799 SmallVector<Register, 8> Parts; 3800 3801 unsigned NarrowElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; 3802 3803 // Break down all the sources into NarrowTy pieces we can operate on. This may 3804 // involve creating merges to a wider type, padded with undef. 3805 for (int I = 0; I != NumOps; ++I) { 3806 Register SrcReg = MI.getOperand(I + 1).getReg(); 3807 LLT SrcTy = MRI.getType(SrcReg); 3808 3809 // The type to narrow SrcReg to. For narrowing, this is a smaller scalar. 3810 // For fewerElements, this is a smaller vector with the same element type. 3811 LLT OpNarrowTy; 3812 if (IsNarrow) { 3813 OpNarrowTy = NarrowScalarTy; 3814 3815 // In case of narrowing, we need to cast vectors to scalars for this to 3816 // work properly 3817 // FIXME: Can we do without the bitcast here if we're narrowing? 3818 if (SrcTy.isVector()) { 3819 SrcTy = LLT::scalar(SrcTy.getSizeInBits()); 3820 SrcReg = MIRBuilder.buildBitcast(SrcTy, SrcReg).getReg(0); 3821 } 3822 } else { 3823 OpNarrowTy = LLT::scalarOrVector(NarrowElts, SrcTy.getScalarType()); 3824 } 3825 3826 LLT GCDTy = extractGCDType(ExtractedRegs[I], SrcTy, OpNarrowTy, SrcReg); 3827 3828 // Build a sequence of NarrowTy pieces in ExtractedRegs for this operand. 3829 buildLCMMergePieces(SrcTy, OpNarrowTy, GCDTy, ExtractedRegs[I], 3830 TargetOpcode::G_ANYEXT); 3831 } 3832 3833 SmallVector<Register, 8> ResultRegs; 3834 3835 // Input operands for each sub-instruction. 3836 SmallVector<SrcOp, 4> InputRegs(NumOps, Register()); 3837 3838 int NumParts = ExtractedRegs[0].size(); 3839 const unsigned DstSize = DstTy.getSizeInBits(); 3840 const LLT DstScalarTy = LLT::scalar(DstSize); 3841 3842 // Narrowing needs to use scalar types 3843 LLT DstLCMTy, NarrowDstTy; 3844 if (IsNarrow) { 3845 DstLCMTy = getLCMType(DstScalarTy, NarrowScalarTy); 3846 NarrowDstTy = NarrowScalarTy; 3847 } else { 3848 DstLCMTy = getLCMType(DstTy, NarrowTy); 3849 NarrowDstTy = NarrowTy; 3850 } 3851 3852 // We widened the source registers to satisfy merge/unmerge size 3853 // constraints. We'll have some extra fully undef parts. 3854 const int NumRealParts = (DstSize + NarrowSize - 1) / NarrowSize; 3855 3856 for (int I = 0; I != NumRealParts; ++I) { 3857 // Emit this instruction on each of the split pieces. 3858 for (int J = 0; J != NumOps; ++J) 3859 InputRegs[J] = ExtractedRegs[J][I]; 3860 3861 auto Inst = MIRBuilder.buildInstr(Opc, {NarrowDstTy}, InputRegs, Flags); 3862 ResultRegs.push_back(Inst.getReg(0)); 3863 } 3864 3865 // Fill out the widened result with undef instead of creating instructions 3866 // with undef inputs. 3867 int NumUndefParts = NumParts - NumRealParts; 3868 if (NumUndefParts != 0) 3869 ResultRegs.append(NumUndefParts, 3870 MIRBuilder.buildUndef(NarrowDstTy).getReg(0)); 3871 3872 // Extract the possibly padded result. Use a scratch register if we need to do 3873 // a final bitcast, otherwise use the original result register. 3874 Register MergeDstReg; 3875 if (IsNarrow && DstTy.isVector()) 3876 MergeDstReg = MRI.createGenericVirtualRegister(DstScalarTy); 3877 else 3878 MergeDstReg = DstReg; 3879 3880 buildWidenedRemergeToDst(MergeDstReg, DstLCMTy, ResultRegs); 3881 3882 // Recast to vector if we narrowed a vector 3883 if (IsNarrow && DstTy.isVector()) 3884 MIRBuilder.buildBitcast(DstReg, MergeDstReg); 3885 3886 MI.eraseFromParent(); 3887 return Legalized; 3888 } 3889 3890 LegalizerHelper::LegalizeResult 3891 LegalizerHelper::fewerElementsVectorSextInReg(MachineInstr &MI, unsigned TypeIdx, 3892 LLT NarrowTy) { 3893 Register DstReg = MI.getOperand(0).getReg(); 3894 Register SrcReg = MI.getOperand(1).getReg(); 3895 int64_t Imm = MI.getOperand(2).getImm(); 3896 3897 LLT DstTy = MRI.getType(DstReg); 3898 3899 SmallVector<Register, 8> Parts; 3900 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg); 3901 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts); 3902 3903 for (Register &R : Parts) 3904 R = MIRBuilder.buildSExtInReg(NarrowTy, R, Imm).getReg(0); 3905 3906 buildWidenedRemergeToDst(DstReg, LCMTy, Parts); 3907 3908 MI.eraseFromParent(); 3909 return Legalized; 3910 } 3911 3912 LegalizerHelper::LegalizeResult 3913 LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, 3914 LLT NarrowTy) { 3915 using namespace TargetOpcode; 3916 3917 switch (MI.getOpcode()) { 3918 case G_IMPLICIT_DEF: 3919 return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy); 3920 case G_TRUNC: 3921 case G_AND: 3922 case G_OR: 3923 case G_XOR: 3924 case G_ADD: 3925 case G_SUB: 3926 case G_MUL: 3927 case G_PTR_ADD: 3928 case G_SMULH: 3929 case G_UMULH: 3930 case G_FADD: 3931 case G_FMUL: 3932 case G_FSUB: 3933 case G_FNEG: 3934 case G_FABS: 3935 case G_FCANONICALIZE: 3936 case G_FDIV: 3937 case G_FREM: 3938 case G_FMA: 3939 case G_FMAD: 3940 case G_FPOW: 3941 case G_FEXP: 3942 case G_FEXP2: 3943 case G_FLOG: 3944 case G_FLOG2: 3945 case G_FLOG10: 3946 case G_FNEARBYINT: 3947 case G_FCEIL: 3948 case G_FFLOOR: 3949 case G_FRINT: 3950 case G_INTRINSIC_ROUND: 3951 case G_INTRINSIC_ROUNDEVEN: 3952 case G_INTRINSIC_TRUNC: 3953 case G_FCOS: 3954 case G_FSIN: 3955 case G_FSQRT: 3956 case G_BSWAP: 3957 case G_BITREVERSE: 3958 case G_SDIV: 3959 case G_UDIV: 3960 case G_SREM: 3961 case G_UREM: 3962 case G_SMIN: 3963 case G_SMAX: 3964 case G_UMIN: 3965 case G_UMAX: 3966 case G_FMINNUM: 3967 case G_FMAXNUM: 3968 case G_FMINNUM_IEEE: 3969 case G_FMAXNUM_IEEE: 3970 case G_FMINIMUM: 3971 case G_FMAXIMUM: 3972 case G_FSHL: 3973 case G_FSHR: 3974 case G_FREEZE: 3975 case G_SADDSAT: 3976 case G_SSUBSAT: 3977 case G_UADDSAT: 3978 case G_USUBSAT: 3979 return reduceOperationWidth(MI, TypeIdx, NarrowTy); 3980 case G_SHL: 3981 case G_LSHR: 3982 case G_ASHR: 3983 case G_SSHLSAT: 3984 case G_USHLSAT: 3985 case G_CTLZ: 3986 case G_CTLZ_ZERO_UNDEF: 3987 case G_CTTZ: 3988 case G_CTTZ_ZERO_UNDEF: 3989 case G_CTPOP: 3990 case G_FCOPYSIGN: 3991 return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy); 3992 case G_ZEXT: 3993 case G_SEXT: 3994 case G_ANYEXT: 3995 case G_FPEXT: 3996 case G_FPTRUNC: 3997 case G_SITOFP: 3998 case G_UITOFP: 3999 case G_FPTOSI: 4000 case G_FPTOUI: 4001 case G_INTTOPTR: 4002 case G_PTRTOINT: 4003 case G_ADDRSPACE_CAST: 4004 return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy); 4005 case G_ICMP: 4006 case G_FCMP: 4007 return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy); 4008 case G_SELECT: 4009 return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy); 4010 case G_PHI: 4011 return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy); 4012 case G_UNMERGE_VALUES: 4013 return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy); 4014 case G_BUILD_VECTOR: 4015 return fewerElementsVectorBuildVector(MI, TypeIdx, NarrowTy); 4016 case G_EXTRACT_VECTOR_ELT: 4017 case G_INSERT_VECTOR_ELT: 4018 return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy); 4019 case G_LOAD: 4020 case G_STORE: 4021 return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy); 4022 case G_SEXT_INREG: 4023 return fewerElementsVectorSextInReg(MI, TypeIdx, NarrowTy); 4024 default: 4025 return UnableToLegalize; 4026 } 4027 } 4028 4029 LegalizerHelper::LegalizeResult 4030 LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, 4031 const LLT HalfTy, const LLT AmtTy) { 4032 4033 Register InL = MRI.createGenericVirtualRegister(HalfTy); 4034 Register InH = MRI.createGenericVirtualRegister(HalfTy); 4035 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1)); 4036 4037 if (Amt.isNullValue()) { 4038 MIRBuilder.buildMerge(MI.getOperand(0), {InL, InH}); 4039 MI.eraseFromParent(); 4040 return Legalized; 4041 } 4042 4043 LLT NVT = HalfTy; 4044 unsigned NVTBits = HalfTy.getSizeInBits(); 4045 unsigned VTBits = 2 * NVTBits; 4046 4047 SrcOp Lo(Register(0)), Hi(Register(0)); 4048 if (MI.getOpcode() == TargetOpcode::G_SHL) { 4049 if (Amt.ugt(VTBits)) { 4050 Lo = Hi = MIRBuilder.buildConstant(NVT, 0); 4051 } else if (Amt.ugt(NVTBits)) { 4052 Lo = MIRBuilder.buildConstant(NVT, 0); 4053 Hi = MIRBuilder.buildShl(NVT, InL, 4054 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits)); 4055 } else if (Amt == NVTBits) { 4056 Lo = MIRBuilder.buildConstant(NVT, 0); 4057 Hi = InL; 4058 } else { 4059 Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt)); 4060 auto OrLHS = 4061 MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt)); 4062 auto OrRHS = MIRBuilder.buildLShr( 4063 NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits)); 4064 Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS); 4065 } 4066 } else if (MI.getOpcode() == TargetOpcode::G_LSHR) { 4067 if (Amt.ugt(VTBits)) { 4068 Lo = Hi = MIRBuilder.buildConstant(NVT, 0); 4069 } else if (Amt.ugt(NVTBits)) { 4070 Lo = MIRBuilder.buildLShr(NVT, InH, 4071 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits)); 4072 Hi = MIRBuilder.buildConstant(NVT, 0); 4073 } else if (Amt == NVTBits) { 4074 Lo = InH; 4075 Hi = MIRBuilder.buildConstant(NVT, 0); 4076 } else { 4077 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt); 4078 4079 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst); 4080 auto OrRHS = MIRBuilder.buildShl( 4081 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits)); 4082 4083 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS); 4084 Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst); 4085 } 4086 } else { 4087 if (Amt.ugt(VTBits)) { 4088 Hi = Lo = MIRBuilder.buildAShr( 4089 NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1)); 4090 } else if (Amt.ugt(NVTBits)) { 4091 Lo = MIRBuilder.buildAShr(NVT, InH, 4092 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits)); 4093 Hi = MIRBuilder.buildAShr(NVT, InH, 4094 MIRBuilder.buildConstant(AmtTy, NVTBits - 1)); 4095 } else if (Amt == NVTBits) { 4096 Lo = InH; 4097 Hi = MIRBuilder.buildAShr(NVT, InH, 4098 MIRBuilder.buildConstant(AmtTy, NVTBits - 1)); 4099 } else { 4100 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt); 4101 4102 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst); 4103 auto OrRHS = MIRBuilder.buildShl( 4104 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits)); 4105 4106 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS); 4107 Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst); 4108 } 4109 } 4110 4111 MIRBuilder.buildMerge(MI.getOperand(0), {Lo, Hi}); 4112 MI.eraseFromParent(); 4113 4114 return Legalized; 4115 } 4116 4117 // TODO: Optimize if constant shift amount. 4118 LegalizerHelper::LegalizeResult 4119 LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, 4120 LLT RequestedTy) { 4121 if (TypeIdx == 1) { 4122 Observer.changingInstr(MI); 4123 narrowScalarSrc(MI, RequestedTy, 2); 4124 Observer.changedInstr(MI); 4125 return Legalized; 4126 } 4127 4128 Register DstReg = MI.getOperand(0).getReg(); 4129 LLT DstTy = MRI.getType(DstReg); 4130 if (DstTy.isVector()) 4131 return UnableToLegalize; 4132 4133 Register Amt = MI.getOperand(2).getReg(); 4134 LLT ShiftAmtTy = MRI.getType(Amt); 4135 const unsigned DstEltSize = DstTy.getScalarSizeInBits(); 4136 if (DstEltSize % 2 != 0) 4137 return UnableToLegalize; 4138 4139 // Ignore the input type. We can only go to exactly half the size of the 4140 // input. If that isn't small enough, the resulting pieces will be further 4141 // legalized. 4142 const unsigned NewBitSize = DstEltSize / 2; 4143 const LLT HalfTy = LLT::scalar(NewBitSize); 4144 const LLT CondTy = LLT::scalar(1); 4145 4146 if (const MachineInstr *KShiftAmt = 4147 getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) { 4148 return narrowScalarShiftByConstant( 4149 MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy); 4150 } 4151 4152 // TODO: Expand with known bits. 4153 4154 // Handle the fully general expansion by an unknown amount. 4155 auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize); 4156 4157 Register InL = MRI.createGenericVirtualRegister(HalfTy); 4158 Register InH = MRI.createGenericVirtualRegister(HalfTy); 4159 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1)); 4160 4161 auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits); 4162 auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt); 4163 4164 auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0); 4165 auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits); 4166 auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero); 4167 4168 Register ResultRegs[2]; 4169 switch (MI.getOpcode()) { 4170 case TargetOpcode::G_SHL: { 4171 // Short: ShAmt < NewBitSize 4172 auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt); 4173 4174 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack); 4175 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt); 4176 auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr); 4177 4178 // Long: ShAmt >= NewBitSize 4179 auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero. 4180 auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part. 4181 4182 auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL); 4183 auto Hi = MIRBuilder.buildSelect( 4184 HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL)); 4185 4186 ResultRegs[0] = Lo.getReg(0); 4187 ResultRegs[1] = Hi.getReg(0); 4188 break; 4189 } 4190 case TargetOpcode::G_LSHR: 4191 case TargetOpcode::G_ASHR: { 4192 // Short: ShAmt < NewBitSize 4193 auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt}); 4194 4195 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt); 4196 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack); 4197 auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr); 4198 4199 // Long: ShAmt >= NewBitSize 4200 MachineInstrBuilder HiL; 4201 if (MI.getOpcode() == TargetOpcode::G_LSHR) { 4202 HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero. 4203 } else { 4204 auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1); 4205 HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part. 4206 } 4207 auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, 4208 {InH, AmtExcess}); // Lo from Hi part. 4209 4210 auto Lo = MIRBuilder.buildSelect( 4211 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL)); 4212 4213 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL); 4214 4215 ResultRegs[0] = Lo.getReg(0); 4216 ResultRegs[1] = Hi.getReg(0); 4217 break; 4218 } 4219 default: 4220 llvm_unreachable("not a shift"); 4221 } 4222 4223 MIRBuilder.buildMerge(DstReg, ResultRegs); 4224 MI.eraseFromParent(); 4225 return Legalized; 4226 } 4227 4228 LegalizerHelper::LegalizeResult 4229 LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, 4230 LLT MoreTy) { 4231 assert(TypeIdx == 0 && "Expecting only Idx 0"); 4232 4233 Observer.changingInstr(MI); 4234 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { 4235 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB(); 4236 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); 4237 moreElementsVectorSrc(MI, MoreTy, I); 4238 } 4239 4240 MachineBasicBlock &MBB = *MI.getParent(); 4241 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI()); 4242 moreElementsVectorDst(MI, MoreTy, 0); 4243 Observer.changedInstr(MI); 4244 return Legalized; 4245 } 4246 4247 LegalizerHelper::LegalizeResult 4248 LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, 4249 LLT MoreTy) { 4250 unsigned Opc = MI.getOpcode(); 4251 switch (Opc) { 4252 case TargetOpcode::G_IMPLICIT_DEF: 4253 case TargetOpcode::G_LOAD: { 4254 if (TypeIdx != 0) 4255 return UnableToLegalize; 4256 Observer.changingInstr(MI); 4257 moreElementsVectorDst(MI, MoreTy, 0); 4258 Observer.changedInstr(MI); 4259 return Legalized; 4260 } 4261 case TargetOpcode::G_STORE: 4262 if (TypeIdx != 0) 4263 return UnableToLegalize; 4264 Observer.changingInstr(MI); 4265 moreElementsVectorSrc(MI, MoreTy, 0); 4266 Observer.changedInstr(MI); 4267 return Legalized; 4268 case TargetOpcode::G_AND: 4269 case TargetOpcode::G_OR: 4270 case TargetOpcode::G_XOR: 4271 case TargetOpcode::G_SMIN: 4272 case TargetOpcode::G_SMAX: 4273 case TargetOpcode::G_UMIN: 4274 case TargetOpcode::G_UMAX: 4275 case TargetOpcode::G_FMINNUM: 4276 case TargetOpcode::G_FMAXNUM: 4277 case TargetOpcode::G_FMINNUM_IEEE: 4278 case TargetOpcode::G_FMAXNUM_IEEE: 4279 case TargetOpcode::G_FMINIMUM: 4280 case TargetOpcode::G_FMAXIMUM: { 4281 Observer.changingInstr(MI); 4282 moreElementsVectorSrc(MI, MoreTy, 1); 4283 moreElementsVectorSrc(MI, MoreTy, 2); 4284 moreElementsVectorDst(MI, MoreTy, 0); 4285 Observer.changedInstr(MI); 4286 return Legalized; 4287 } 4288 case TargetOpcode::G_EXTRACT: 4289 if (TypeIdx != 1) 4290 return UnableToLegalize; 4291 Observer.changingInstr(MI); 4292 moreElementsVectorSrc(MI, MoreTy, 1); 4293 Observer.changedInstr(MI); 4294 return Legalized; 4295 case TargetOpcode::G_INSERT: 4296 case TargetOpcode::G_FREEZE: 4297 if (TypeIdx != 0) 4298 return UnableToLegalize; 4299 Observer.changingInstr(MI); 4300 moreElementsVectorSrc(MI, MoreTy, 1); 4301 moreElementsVectorDst(MI, MoreTy, 0); 4302 Observer.changedInstr(MI); 4303 return Legalized; 4304 case TargetOpcode::G_SELECT: 4305 if (TypeIdx != 0) 4306 return UnableToLegalize; 4307 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) 4308 return UnableToLegalize; 4309 4310 Observer.changingInstr(MI); 4311 moreElementsVectorSrc(MI, MoreTy, 2); 4312 moreElementsVectorSrc(MI, MoreTy, 3); 4313 moreElementsVectorDst(MI, MoreTy, 0); 4314 Observer.changedInstr(MI); 4315 return Legalized; 4316 case TargetOpcode::G_UNMERGE_VALUES: { 4317 if (TypeIdx != 1) 4318 return UnableToLegalize; 4319 4320 LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 4321 int NumDst = MI.getNumOperands() - 1; 4322 moreElementsVectorSrc(MI, MoreTy, NumDst); 4323 4324 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES); 4325 for (int I = 0; I != NumDst; ++I) 4326 MIB.addDef(MI.getOperand(I).getReg()); 4327 4328 int NewNumDst = MoreTy.getSizeInBits() / DstTy.getSizeInBits(); 4329 for (int I = NumDst; I != NewNumDst; ++I) 4330 MIB.addDef(MRI.createGenericVirtualRegister(DstTy)); 4331 4332 MIB.addUse(MI.getOperand(NumDst).getReg()); 4333 MI.eraseFromParent(); 4334 return Legalized; 4335 } 4336 case TargetOpcode::G_PHI: 4337 return moreElementsVectorPhi(MI, TypeIdx, MoreTy); 4338 default: 4339 return UnableToLegalize; 4340 } 4341 } 4342 4343 void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs, 4344 ArrayRef<Register> Src1Regs, 4345 ArrayRef<Register> Src2Regs, 4346 LLT NarrowTy) { 4347 MachineIRBuilder &B = MIRBuilder; 4348 unsigned SrcParts = Src1Regs.size(); 4349 unsigned DstParts = DstRegs.size(); 4350 4351 unsigned DstIdx = 0; // Low bits of the result. 4352 Register FactorSum = 4353 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0); 4354 DstRegs[DstIdx] = FactorSum; 4355 4356 unsigned CarrySumPrevDstIdx; 4357 SmallVector<Register, 4> Factors; 4358 4359 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) { 4360 // Collect low parts of muls for DstIdx. 4361 for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1; 4362 i <= std::min(DstIdx, SrcParts - 1); ++i) { 4363 MachineInstrBuilder Mul = 4364 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]); 4365 Factors.push_back(Mul.getReg(0)); 4366 } 4367 // Collect high parts of muls from previous DstIdx. 4368 for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts; 4369 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) { 4370 MachineInstrBuilder Umulh = 4371 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]); 4372 Factors.push_back(Umulh.getReg(0)); 4373 } 4374 // Add CarrySum from additions calculated for previous DstIdx. 4375 if (DstIdx != 1) { 4376 Factors.push_back(CarrySumPrevDstIdx); 4377 } 4378 4379 Register CarrySum; 4380 // Add all factors and accumulate all carries into CarrySum. 4381 if (DstIdx != DstParts - 1) { 4382 MachineInstrBuilder Uaddo = 4383 B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]); 4384 FactorSum = Uaddo.getReg(0); 4385 CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0); 4386 for (unsigned i = 2; i < Factors.size(); ++i) { 4387 MachineInstrBuilder Uaddo = 4388 B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]); 4389 FactorSum = Uaddo.getReg(0); 4390 MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1)); 4391 CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0); 4392 } 4393 } else { 4394 // Since value for the next index is not calculated, neither is CarrySum. 4395 FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0); 4396 for (unsigned i = 2; i < Factors.size(); ++i) 4397 FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0); 4398 } 4399 4400 CarrySumPrevDstIdx = CarrySum; 4401 DstRegs[DstIdx] = FactorSum; 4402 Factors.clear(); 4403 } 4404 } 4405 4406 LegalizerHelper::LegalizeResult 4407 LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) { 4408 Register DstReg = MI.getOperand(0).getReg(); 4409 Register Src1 = MI.getOperand(1).getReg(); 4410 Register Src2 = MI.getOperand(2).getReg(); 4411 4412 LLT Ty = MRI.getType(DstReg); 4413 if (Ty.isVector()) 4414 return UnableToLegalize; 4415 4416 unsigned SrcSize = MRI.getType(Src1).getSizeInBits(); 4417 unsigned DstSize = Ty.getSizeInBits(); 4418 unsigned NarrowSize = NarrowTy.getSizeInBits(); 4419 if (DstSize % NarrowSize != 0 || SrcSize % NarrowSize != 0) 4420 return UnableToLegalize; 4421 4422 unsigned NumDstParts = DstSize / NarrowSize; 4423 unsigned NumSrcParts = SrcSize / NarrowSize; 4424 bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH; 4425 unsigned DstTmpParts = NumDstParts * (IsMulHigh ? 2 : 1); 4426 4427 SmallVector<Register, 2> Src1Parts, Src2Parts; 4428 SmallVector<Register, 2> DstTmpRegs(DstTmpParts); 4429 extractParts(Src1, NarrowTy, NumSrcParts, Src1Parts); 4430 extractParts(Src2, NarrowTy, NumSrcParts, Src2Parts); 4431 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy); 4432 4433 // Take only high half of registers if this is high mul. 4434 ArrayRef<Register> DstRegs( 4435 IsMulHigh ? &DstTmpRegs[DstTmpParts / 2] : &DstTmpRegs[0], NumDstParts); 4436 MIRBuilder.buildMerge(DstReg, DstRegs); 4437 MI.eraseFromParent(); 4438 return Legalized; 4439 } 4440 4441 LegalizerHelper::LegalizeResult 4442 LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, 4443 LLT NarrowTy) { 4444 if (TypeIdx != 1) 4445 return UnableToLegalize; 4446 4447 uint64_t NarrowSize = NarrowTy.getSizeInBits(); 4448 4449 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 4450 // FIXME: add support for when SizeOp1 isn't an exact multiple of 4451 // NarrowSize. 4452 if (SizeOp1 % NarrowSize != 0) 4453 return UnableToLegalize; 4454 int NumParts = SizeOp1 / NarrowSize; 4455 4456 SmallVector<Register, 2> SrcRegs, DstRegs; 4457 SmallVector<uint64_t, 2> Indexes; 4458 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); 4459 4460 Register OpReg = MI.getOperand(0).getReg(); 4461 uint64_t OpStart = MI.getOperand(2).getImm(); 4462 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); 4463 for (int i = 0; i < NumParts; ++i) { 4464 unsigned SrcStart = i * NarrowSize; 4465 4466 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) { 4467 // No part of the extract uses this subregister, ignore it. 4468 continue; 4469 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) { 4470 // The entire subregister is extracted, forward the value. 4471 DstRegs.push_back(SrcRegs[i]); 4472 continue; 4473 } 4474 4475 // OpSegStart is where this destination segment would start in OpReg if it 4476 // extended infinitely in both directions. 4477 int64_t ExtractOffset; 4478 uint64_t SegSize; 4479 if (OpStart < SrcStart) { 4480 ExtractOffset = 0; 4481 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart); 4482 } else { 4483 ExtractOffset = OpStart - SrcStart; 4484 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize); 4485 } 4486 4487 Register SegReg = SrcRegs[i]; 4488 if (ExtractOffset != 0 || SegSize != NarrowSize) { 4489 // A genuine extract is needed. 4490 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); 4491 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset); 4492 } 4493 4494 DstRegs.push_back(SegReg); 4495 } 4496 4497 Register DstReg = MI.getOperand(0).getReg(); 4498 if (MRI.getType(DstReg).isVector()) 4499 MIRBuilder.buildBuildVector(DstReg, DstRegs); 4500 else if (DstRegs.size() > 1) 4501 MIRBuilder.buildMerge(DstReg, DstRegs); 4502 else 4503 MIRBuilder.buildCopy(DstReg, DstRegs[0]); 4504 MI.eraseFromParent(); 4505 return Legalized; 4506 } 4507 4508 LegalizerHelper::LegalizeResult 4509 LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, 4510 LLT NarrowTy) { 4511 // FIXME: Don't know how to handle secondary types yet. 4512 if (TypeIdx != 0) 4513 return UnableToLegalize; 4514 4515 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 4516 uint64_t NarrowSize = NarrowTy.getSizeInBits(); 4517 4518 // FIXME: add support for when SizeOp0 isn't an exact multiple of 4519 // NarrowSize. 4520 if (SizeOp0 % NarrowSize != 0) 4521 return UnableToLegalize; 4522 4523 int NumParts = SizeOp0 / NarrowSize; 4524 4525 SmallVector<Register, 2> SrcRegs, DstRegs; 4526 SmallVector<uint64_t, 2> Indexes; 4527 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); 4528 4529 Register OpReg = MI.getOperand(2).getReg(); 4530 uint64_t OpStart = MI.getOperand(3).getImm(); 4531 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); 4532 for (int i = 0; i < NumParts; ++i) { 4533 unsigned DstStart = i * NarrowSize; 4534 4535 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) { 4536 // No part of the insert affects this subregister, forward the original. 4537 DstRegs.push_back(SrcRegs[i]); 4538 continue; 4539 } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) { 4540 // The entire subregister is defined by this insert, forward the new 4541 // value. 4542 DstRegs.push_back(OpReg); 4543 continue; 4544 } 4545 4546 // OpSegStart is where this destination segment would start in OpReg if it 4547 // extended infinitely in both directions. 4548 int64_t ExtractOffset, InsertOffset; 4549 uint64_t SegSize; 4550 if (OpStart < DstStart) { 4551 InsertOffset = 0; 4552 ExtractOffset = DstStart - OpStart; 4553 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart); 4554 } else { 4555 InsertOffset = OpStart - DstStart; 4556 ExtractOffset = 0; 4557 SegSize = 4558 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart); 4559 } 4560 4561 Register SegReg = OpReg; 4562 if (ExtractOffset != 0 || SegSize != OpSize) { 4563 // A genuine extract is needed. 4564 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); 4565 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset); 4566 } 4567 4568 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy); 4569 MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset); 4570 DstRegs.push_back(DstReg); 4571 } 4572 4573 assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered"); 4574 Register DstReg = MI.getOperand(0).getReg(); 4575 if(MRI.getType(DstReg).isVector()) 4576 MIRBuilder.buildBuildVector(DstReg, DstRegs); 4577 else 4578 MIRBuilder.buildMerge(DstReg, DstRegs); 4579 MI.eraseFromParent(); 4580 return Legalized; 4581 } 4582 4583 LegalizerHelper::LegalizeResult 4584 LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, 4585 LLT NarrowTy) { 4586 Register DstReg = MI.getOperand(0).getReg(); 4587 LLT DstTy = MRI.getType(DstReg); 4588 4589 assert(MI.getNumOperands() == 3 && TypeIdx == 0); 4590 4591 SmallVector<Register, 4> DstRegs, DstLeftoverRegs; 4592 SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs; 4593 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs; 4594 LLT LeftoverTy; 4595 if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy, 4596 Src0Regs, Src0LeftoverRegs)) 4597 return UnableToLegalize; 4598 4599 LLT Unused; 4600 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused, 4601 Src1Regs, Src1LeftoverRegs)) 4602 llvm_unreachable("inconsistent extractParts result"); 4603 4604 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) { 4605 auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, 4606 {Src0Regs[I], Src1Regs[I]}); 4607 DstRegs.push_back(Inst.getReg(0)); 4608 } 4609 4610 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) { 4611 auto Inst = MIRBuilder.buildInstr( 4612 MI.getOpcode(), 4613 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]}); 4614 DstLeftoverRegs.push_back(Inst.getReg(0)); 4615 } 4616 4617 insertParts(DstReg, DstTy, NarrowTy, DstRegs, 4618 LeftoverTy, DstLeftoverRegs); 4619 4620 MI.eraseFromParent(); 4621 return Legalized; 4622 } 4623 4624 LegalizerHelper::LegalizeResult 4625 LegalizerHelper::narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, 4626 LLT NarrowTy) { 4627 if (TypeIdx != 0) 4628 return UnableToLegalize; 4629 4630 Register DstReg = MI.getOperand(0).getReg(); 4631 Register SrcReg = MI.getOperand(1).getReg(); 4632 4633 LLT DstTy = MRI.getType(DstReg); 4634 if (DstTy.isVector()) 4635 return UnableToLegalize; 4636 4637 SmallVector<Register, 8> Parts; 4638 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg); 4639 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode()); 4640 buildWidenedRemergeToDst(DstReg, LCMTy, Parts); 4641 4642 MI.eraseFromParent(); 4643 return Legalized; 4644 } 4645 4646 LegalizerHelper::LegalizeResult 4647 LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, 4648 LLT NarrowTy) { 4649 if (TypeIdx != 0) 4650 return UnableToLegalize; 4651 4652 Register CondReg = MI.getOperand(1).getReg(); 4653 LLT CondTy = MRI.getType(CondReg); 4654 if (CondTy.isVector()) // TODO: Handle vselect 4655 return UnableToLegalize; 4656 4657 Register DstReg = MI.getOperand(0).getReg(); 4658 LLT DstTy = MRI.getType(DstReg); 4659 4660 SmallVector<Register, 4> DstRegs, DstLeftoverRegs; 4661 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs; 4662 SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs; 4663 LLT LeftoverTy; 4664 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy, 4665 Src1Regs, Src1LeftoverRegs)) 4666 return UnableToLegalize; 4667 4668 LLT Unused; 4669 if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused, 4670 Src2Regs, Src2LeftoverRegs)) 4671 llvm_unreachable("inconsistent extractParts result"); 4672 4673 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) { 4674 auto Select = MIRBuilder.buildSelect(NarrowTy, 4675 CondReg, Src1Regs[I], Src2Regs[I]); 4676 DstRegs.push_back(Select.getReg(0)); 4677 } 4678 4679 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) { 4680 auto Select = MIRBuilder.buildSelect( 4681 LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]); 4682 DstLeftoverRegs.push_back(Select.getReg(0)); 4683 } 4684 4685 insertParts(DstReg, DstTy, NarrowTy, DstRegs, 4686 LeftoverTy, DstLeftoverRegs); 4687 4688 MI.eraseFromParent(); 4689 return Legalized; 4690 } 4691 4692 LegalizerHelper::LegalizeResult 4693 LegalizerHelper::narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, 4694 LLT NarrowTy) { 4695 if (TypeIdx != 1) 4696 return UnableToLegalize; 4697 4698 Register DstReg = MI.getOperand(0).getReg(); 4699 Register SrcReg = MI.getOperand(1).getReg(); 4700 LLT DstTy = MRI.getType(DstReg); 4701 LLT SrcTy = MRI.getType(SrcReg); 4702 unsigned NarrowSize = NarrowTy.getSizeInBits(); 4703 4704 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) { 4705 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF; 4706 4707 MachineIRBuilder &B = MIRBuilder; 4708 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg); 4709 // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi) 4710 auto C_0 = B.buildConstant(NarrowTy, 0); 4711 auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), 4712 UnmergeSrc.getReg(1), C_0); 4713 auto LoCTLZ = IsUndef ? 4714 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) : 4715 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0)); 4716 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize); 4717 auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize); 4718 auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)); 4719 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ); 4720 4721 MI.eraseFromParent(); 4722 return Legalized; 4723 } 4724 4725 return UnableToLegalize; 4726 } 4727 4728 LegalizerHelper::LegalizeResult 4729 LegalizerHelper::narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, 4730 LLT NarrowTy) { 4731 if (TypeIdx != 1) 4732 return UnableToLegalize; 4733 4734 Register DstReg = MI.getOperand(0).getReg(); 4735 Register SrcReg = MI.getOperand(1).getReg(); 4736 LLT DstTy = MRI.getType(DstReg); 4737 LLT SrcTy = MRI.getType(SrcReg); 4738 unsigned NarrowSize = NarrowTy.getSizeInBits(); 4739 4740 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) { 4741 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF; 4742 4743 MachineIRBuilder &B = MIRBuilder; 4744 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg); 4745 // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo) 4746 auto C_0 = B.buildConstant(NarrowTy, 0); 4747 auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), 4748 UnmergeSrc.getReg(0), C_0); 4749 auto HiCTTZ = IsUndef ? 4750 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) : 4751 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1)); 4752 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize); 4753 auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize); 4754 auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)); 4755 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ); 4756 4757 MI.eraseFromParent(); 4758 return Legalized; 4759 } 4760 4761 return UnableToLegalize; 4762 } 4763 4764 LegalizerHelper::LegalizeResult 4765 LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, 4766 LLT NarrowTy) { 4767 if (TypeIdx != 1) 4768 return UnableToLegalize; 4769 4770 Register DstReg = MI.getOperand(0).getReg(); 4771 LLT DstTy = MRI.getType(DstReg); 4772 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 4773 unsigned NarrowSize = NarrowTy.getSizeInBits(); 4774 4775 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) { 4776 auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1)); 4777 4778 auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0)); 4779 auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1)); 4780 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP); 4781 4782 MI.eraseFromParent(); 4783 return Legalized; 4784 } 4785 4786 return UnableToLegalize; 4787 } 4788 4789 LegalizerHelper::LegalizeResult 4790 LegalizerHelper::lowerBitCount(MachineInstr &MI) { 4791 unsigned Opc = MI.getOpcode(); 4792 const auto &TII = MIRBuilder.getTII(); 4793 auto isSupported = [this](const LegalityQuery &Q) { 4794 auto QAction = LI.getAction(Q).Action; 4795 return QAction == Legal || QAction == Libcall || QAction == Custom; 4796 }; 4797 switch (Opc) { 4798 default: 4799 return UnableToLegalize; 4800 case TargetOpcode::G_CTLZ_ZERO_UNDEF: { 4801 // This trivially expands to CTLZ. 4802 Observer.changingInstr(MI); 4803 MI.setDesc(TII.get(TargetOpcode::G_CTLZ)); 4804 Observer.changedInstr(MI); 4805 return Legalized; 4806 } 4807 case TargetOpcode::G_CTLZ: { 4808 Register DstReg = MI.getOperand(0).getReg(); 4809 Register SrcReg = MI.getOperand(1).getReg(); 4810 LLT DstTy = MRI.getType(DstReg); 4811 LLT SrcTy = MRI.getType(SrcReg); 4812 unsigned Len = SrcTy.getSizeInBits(); 4813 4814 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) { 4815 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero. 4816 auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg); 4817 auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0); 4818 auto ICmp = MIRBuilder.buildICmp( 4819 CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc); 4820 auto LenConst = MIRBuilder.buildConstant(DstTy, Len); 4821 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU); 4822 MI.eraseFromParent(); 4823 return Legalized; 4824 } 4825 // for now, we do this: 4826 // NewLen = NextPowerOf2(Len); 4827 // x = x | (x >> 1); 4828 // x = x | (x >> 2); 4829 // ... 4830 // x = x | (x >>16); 4831 // x = x | (x >>32); // for 64-bit input 4832 // Upto NewLen/2 4833 // return Len - popcount(x); 4834 // 4835 // Ref: "Hacker's Delight" by Henry Warren 4836 Register Op = SrcReg; 4837 unsigned NewLen = PowerOf2Ceil(Len); 4838 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) { 4839 auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i); 4840 auto MIBOp = MIRBuilder.buildOr( 4841 SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt)); 4842 Op = MIBOp.getReg(0); 4843 } 4844 auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op); 4845 MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len), 4846 MIBPop); 4847 MI.eraseFromParent(); 4848 return Legalized; 4849 } 4850 case TargetOpcode::G_CTTZ_ZERO_UNDEF: { 4851 // This trivially expands to CTTZ. 4852 Observer.changingInstr(MI); 4853 MI.setDesc(TII.get(TargetOpcode::G_CTTZ)); 4854 Observer.changedInstr(MI); 4855 return Legalized; 4856 } 4857 case TargetOpcode::G_CTTZ: { 4858 Register DstReg = MI.getOperand(0).getReg(); 4859 Register SrcReg = MI.getOperand(1).getReg(); 4860 LLT DstTy = MRI.getType(DstReg); 4861 LLT SrcTy = MRI.getType(SrcReg); 4862 4863 unsigned Len = SrcTy.getSizeInBits(); 4864 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) { 4865 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with 4866 // zero. 4867 auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg); 4868 auto Zero = MIRBuilder.buildConstant(SrcTy, 0); 4869 auto ICmp = MIRBuilder.buildICmp( 4870 CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero); 4871 auto LenConst = MIRBuilder.buildConstant(DstTy, Len); 4872 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU); 4873 MI.eraseFromParent(); 4874 return Legalized; 4875 } 4876 // for now, we use: { return popcount(~x & (x - 1)); } 4877 // unless the target has ctlz but not ctpop, in which case we use: 4878 // { return 32 - nlz(~x & (x-1)); } 4879 // Ref: "Hacker's Delight" by Henry Warren 4880 auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1); 4881 auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1); 4882 auto MIBTmp = MIRBuilder.buildAnd( 4883 SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1)); 4884 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) && 4885 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) { 4886 auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len); 4887 MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen, 4888 MIRBuilder.buildCTLZ(SrcTy, MIBTmp)); 4889 MI.eraseFromParent(); 4890 return Legalized; 4891 } 4892 MI.setDesc(TII.get(TargetOpcode::G_CTPOP)); 4893 MI.getOperand(1).setReg(MIBTmp.getReg(0)); 4894 return Legalized; 4895 } 4896 case TargetOpcode::G_CTPOP: { 4897 Register SrcReg = MI.getOperand(1).getReg(); 4898 LLT Ty = MRI.getType(SrcReg); 4899 unsigned Size = Ty.getSizeInBits(); 4900 MachineIRBuilder &B = MIRBuilder; 4901 4902 // Count set bits in blocks of 2 bits. Default approach would be 4903 // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 } 4904 // We use following formula instead: 4905 // B2Count = val - { (val >> 1) & 0x55555555 } 4906 // since it gives same result in blocks of 2 with one instruction less. 4907 auto C_1 = B.buildConstant(Ty, 1); 4908 auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1); 4909 APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55)); 4910 auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0); 4911 auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0); 4912 auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi); 4913 4914 // In order to get count in blocks of 4 add values from adjacent block of 2. 4915 // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 } 4916 auto C_2 = B.buildConstant(Ty, 2); 4917 auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2); 4918 APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33)); 4919 auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0); 4920 auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0); 4921 auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0); 4922 auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count); 4923 4924 // For count in blocks of 8 bits we don't have to mask high 4 bits before 4925 // addition since count value sits in range {0,...,8} and 4 bits are enough 4926 // to hold such binary values. After addition high 4 bits still hold count 4927 // of set bits in high 4 bit block, set them to zero and get 8 bit result. 4928 // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F 4929 auto C_4 = B.buildConstant(Ty, 4); 4930 auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4); 4931 auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count); 4932 APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F)); 4933 auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0); 4934 auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0); 4935 4936 assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm"); 4937 // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this 4938 // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks. 4939 auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01))); 4940 auto ResTmp = B.buildMul(Ty, B8Count, MulMask); 4941 4942 // Shift count result from 8 high bits to low bits. 4943 auto C_SizeM8 = B.buildConstant(Ty, Size - 8); 4944 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8); 4945 4946 MI.eraseFromParent(); 4947 return Legalized; 4948 } 4949 } 4950 } 4951 4952 // Expand s32 = G_UITOFP s64 using bit operations to an IEEE float 4953 // representation. 4954 LegalizerHelper::LegalizeResult 4955 LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) { 4956 Register Dst = MI.getOperand(0).getReg(); 4957 Register Src = MI.getOperand(1).getReg(); 4958 const LLT S64 = LLT::scalar(64); 4959 const LLT S32 = LLT::scalar(32); 4960 const LLT S1 = LLT::scalar(1); 4961 4962 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32); 4963 4964 // unsigned cul2f(ulong u) { 4965 // uint lz = clz(u); 4966 // uint e = (u != 0) ? 127U + 63U - lz : 0; 4967 // u = (u << lz) & 0x7fffffffffffffffUL; 4968 // ulong t = u & 0xffffffffffUL; 4969 // uint v = (e << 23) | (uint)(u >> 40); 4970 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U); 4971 // return as_float(v + r); 4972 // } 4973 4974 auto Zero32 = MIRBuilder.buildConstant(S32, 0); 4975 auto Zero64 = MIRBuilder.buildConstant(S64, 0); 4976 4977 auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src); 4978 4979 auto K = MIRBuilder.buildConstant(S32, 127U + 63U); 4980 auto Sub = MIRBuilder.buildSub(S32, K, LZ); 4981 4982 auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64); 4983 auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32); 4984 4985 auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1); 4986 auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ); 4987 4988 auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0); 4989 4990 auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL); 4991 auto T = MIRBuilder.buildAnd(S64, U, Mask1); 4992 4993 auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40)); 4994 auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23)); 4995 auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl)); 4996 4997 auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL); 4998 auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C); 4999 auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C); 5000 auto One = MIRBuilder.buildConstant(S32, 1); 5001 5002 auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One); 5003 auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32); 5004 auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0); 5005 MIRBuilder.buildAdd(Dst, V, R); 5006 5007 MI.eraseFromParent(); 5008 return Legalized; 5009 } 5010 5011 LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) { 5012 Register Dst = MI.getOperand(0).getReg(); 5013 Register Src = MI.getOperand(1).getReg(); 5014 LLT DstTy = MRI.getType(Dst); 5015 LLT SrcTy = MRI.getType(Src); 5016 5017 if (SrcTy == LLT::scalar(1)) { 5018 auto True = MIRBuilder.buildFConstant(DstTy, 1.0); 5019 auto False = MIRBuilder.buildFConstant(DstTy, 0.0); 5020 MIRBuilder.buildSelect(Dst, Src, True, False); 5021 MI.eraseFromParent(); 5022 return Legalized; 5023 } 5024 5025 if (SrcTy != LLT::scalar(64)) 5026 return UnableToLegalize; 5027 5028 if (DstTy == LLT::scalar(32)) { 5029 // TODO: SelectionDAG has several alternative expansions to port which may 5030 // be more reasonble depending on the available instructions. If a target 5031 // has sitofp, does not have CTLZ, or can efficiently use f64 as an 5032 // intermediate type, this is probably worse. 5033 return lowerU64ToF32BitOps(MI); 5034 } 5035 5036 return UnableToLegalize; 5037 } 5038 5039 LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) { 5040 Register Dst = MI.getOperand(0).getReg(); 5041 Register Src = MI.getOperand(1).getReg(); 5042 LLT DstTy = MRI.getType(Dst); 5043 LLT SrcTy = MRI.getType(Src); 5044 5045 const LLT S64 = LLT::scalar(64); 5046 const LLT S32 = LLT::scalar(32); 5047 const LLT S1 = LLT::scalar(1); 5048 5049 if (SrcTy == S1) { 5050 auto True = MIRBuilder.buildFConstant(DstTy, -1.0); 5051 auto False = MIRBuilder.buildFConstant(DstTy, 0.0); 5052 MIRBuilder.buildSelect(Dst, Src, True, False); 5053 MI.eraseFromParent(); 5054 return Legalized; 5055 } 5056 5057 if (SrcTy != S64) 5058 return UnableToLegalize; 5059 5060 if (DstTy == S32) { 5061 // signed cl2f(long l) { 5062 // long s = l >> 63; 5063 // float r = cul2f((l + s) ^ s); 5064 // return s ? -r : r; 5065 // } 5066 Register L = Src; 5067 auto SignBit = MIRBuilder.buildConstant(S64, 63); 5068 auto S = MIRBuilder.buildAShr(S64, L, SignBit); 5069 5070 auto LPlusS = MIRBuilder.buildAdd(S64, L, S); 5071 auto Xor = MIRBuilder.buildXor(S64, LPlusS, S); 5072 auto R = MIRBuilder.buildUITOFP(S32, Xor); 5073 5074 auto RNeg = MIRBuilder.buildFNeg(S32, R); 5075 auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S, 5076 MIRBuilder.buildConstant(S64, 0)); 5077 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R); 5078 MI.eraseFromParent(); 5079 return Legalized; 5080 } 5081 5082 return UnableToLegalize; 5083 } 5084 5085 LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) { 5086 Register Dst = MI.getOperand(0).getReg(); 5087 Register Src = MI.getOperand(1).getReg(); 5088 LLT DstTy = MRI.getType(Dst); 5089 LLT SrcTy = MRI.getType(Src); 5090 const LLT S64 = LLT::scalar(64); 5091 const LLT S32 = LLT::scalar(32); 5092 5093 if (SrcTy != S64 && SrcTy != S32) 5094 return UnableToLegalize; 5095 if (DstTy != S32 && DstTy != S64) 5096 return UnableToLegalize; 5097 5098 // FPTOSI gives same result as FPTOUI for positive signed integers. 5099 // FPTOUI needs to deal with fp values that convert to unsigned integers 5100 // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp. 5101 5102 APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits()); 5103 APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle() 5104 : APFloat::IEEEdouble(), 5105 APInt::getNullValue(SrcTy.getSizeInBits())); 5106 TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven); 5107 5108 MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src); 5109 5110 MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP); 5111 // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on 5112 // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1. 5113 MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold); 5114 MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub); 5115 MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt); 5116 MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit); 5117 5118 const LLT S1 = LLT::scalar(1); 5119 5120 MachineInstrBuilder FCMP = 5121 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold); 5122 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res); 5123 5124 MI.eraseFromParent(); 5125 return Legalized; 5126 } 5127 5128 LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) { 5129 Register Dst = MI.getOperand(0).getReg(); 5130 Register Src = MI.getOperand(1).getReg(); 5131 LLT DstTy = MRI.getType(Dst); 5132 LLT SrcTy = MRI.getType(Src); 5133 const LLT S64 = LLT::scalar(64); 5134 const LLT S32 = LLT::scalar(32); 5135 5136 // FIXME: Only f32 to i64 conversions are supported. 5137 if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64) 5138 return UnableToLegalize; 5139 5140 // Expand f32 -> i64 conversion 5141 // This algorithm comes from compiler-rt's implementation of fixsfdi: 5142 // https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/builtins/fixsfdi.c 5143 5144 unsigned SrcEltBits = SrcTy.getScalarSizeInBits(); 5145 5146 auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000); 5147 auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23); 5148 5149 auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask); 5150 auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit); 5151 5152 auto SignMask = MIRBuilder.buildConstant(SrcTy, 5153 APInt::getSignMask(SrcEltBits)); 5154 auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask); 5155 auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1); 5156 auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit); 5157 Sign = MIRBuilder.buildSExt(DstTy, Sign); 5158 5159 auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF); 5160 auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask); 5161 auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000); 5162 5163 auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K); 5164 R = MIRBuilder.buildZExt(DstTy, R); 5165 5166 auto Bias = MIRBuilder.buildConstant(SrcTy, 127); 5167 auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias); 5168 auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit); 5169 auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent); 5170 5171 auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent); 5172 auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub); 5173 5174 const LLT S1 = LLT::scalar(1); 5175 auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, 5176 S1, Exponent, ExponentLoBit); 5177 5178 R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl); 5179 5180 auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign); 5181 auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign); 5182 5183 auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0); 5184 5185 auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, 5186 S1, Exponent, ZeroSrcTy); 5187 5188 auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0); 5189 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret); 5190 5191 MI.eraseFromParent(); 5192 return Legalized; 5193 } 5194 5195 // f64 -> f16 conversion using round-to-nearest-even rounding mode. 5196 LegalizerHelper::LegalizeResult 5197 LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) { 5198 Register Dst = MI.getOperand(0).getReg(); 5199 Register Src = MI.getOperand(1).getReg(); 5200 5201 if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly. 5202 return UnableToLegalize; 5203 5204 const unsigned ExpMask = 0x7ff; 5205 const unsigned ExpBiasf64 = 1023; 5206 const unsigned ExpBiasf16 = 15; 5207 const LLT S32 = LLT::scalar(32); 5208 const LLT S1 = LLT::scalar(1); 5209 5210 auto Unmerge = MIRBuilder.buildUnmerge(S32, Src); 5211 Register U = Unmerge.getReg(0); 5212 Register UH = Unmerge.getReg(1); 5213 5214 auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20)); 5215 E = MIRBuilder.buildAnd(S32, E, MIRBuilder.buildConstant(S32, ExpMask)); 5216 5217 // Subtract the fp64 exponent bias (1023) to get the real exponent and 5218 // add the f16 bias (15) to get the biased exponent for the f16 format. 5219 E = MIRBuilder.buildAdd( 5220 S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16)); 5221 5222 auto M = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 8)); 5223 M = MIRBuilder.buildAnd(S32, M, MIRBuilder.buildConstant(S32, 0xffe)); 5224 5225 auto MaskedSig = MIRBuilder.buildAnd(S32, UH, 5226 MIRBuilder.buildConstant(S32, 0x1ff)); 5227 MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U); 5228 5229 auto Zero = MIRBuilder.buildConstant(S32, 0); 5230 auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero); 5231 auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0); 5232 M = MIRBuilder.buildOr(S32, M, Lo40Set); 5233 5234 // (M != 0 ? 0x0200 : 0) | 0x7c00; 5235 auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200); 5236 auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero); 5237 auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero); 5238 5239 auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00); 5240 auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00); 5241 5242 // N = M | (E << 12); 5243 auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12)); 5244 auto N = MIRBuilder.buildOr(S32, M, EShl12); 5245 5246 // B = clamp(1-E, 0, 13); 5247 auto One = MIRBuilder.buildConstant(S32, 1); 5248 auto OneSubExp = MIRBuilder.buildSub(S32, One, E); 5249 auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero); 5250 B = MIRBuilder.buildSMin(S32, B, MIRBuilder.buildConstant(S32, 13)); 5251 5252 auto SigSetHigh = MIRBuilder.buildOr(S32, M, 5253 MIRBuilder.buildConstant(S32, 0x1000)); 5254 5255 auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B); 5256 auto D0 = MIRBuilder.buildShl(S32, D, B); 5257 5258 auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, 5259 D0, SigSetHigh); 5260 auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh); 5261 D = MIRBuilder.buildOr(S32, D, D1); 5262 5263 auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One); 5264 auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N); 5265 5266 auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7)); 5267 V = MIRBuilder.buildLShr(S32, V, MIRBuilder.buildConstant(S32, 2)); 5268 5269 auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3, 5270 MIRBuilder.buildConstant(S32, 3)); 5271 auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3); 5272 5273 auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3, 5274 MIRBuilder.buildConstant(S32, 5)); 5275 auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5); 5276 5277 V1 = MIRBuilder.buildOr(S32, V0, V1); 5278 V = MIRBuilder.buildAdd(S32, V, V1); 5279 5280 auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, 5281 E, MIRBuilder.buildConstant(S32, 30)); 5282 V = MIRBuilder.buildSelect(S32, CmpEGt30, 5283 MIRBuilder.buildConstant(S32, 0x7c00), V); 5284 5285 auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, 5286 E, MIRBuilder.buildConstant(S32, 1039)); 5287 V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V); 5288 5289 // Extract the sign bit. 5290 auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16)); 5291 Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000)); 5292 5293 // Insert the sign bit 5294 V = MIRBuilder.buildOr(S32, Sign, V); 5295 5296 MIRBuilder.buildTrunc(Dst, V); 5297 MI.eraseFromParent(); 5298 return Legalized; 5299 } 5300 5301 LegalizerHelper::LegalizeResult 5302 LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) { 5303 Register Dst = MI.getOperand(0).getReg(); 5304 Register Src = MI.getOperand(1).getReg(); 5305 5306 LLT DstTy = MRI.getType(Dst); 5307 LLT SrcTy = MRI.getType(Src); 5308 const LLT S64 = LLT::scalar(64); 5309 const LLT S16 = LLT::scalar(16); 5310 5311 if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64) 5312 return lowerFPTRUNC_F64_TO_F16(MI); 5313 5314 return UnableToLegalize; 5315 } 5316 5317 // TODO: If RHS is a constant SelectionDAGBuilder expands this into a 5318 // multiplication tree. 5319 LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) { 5320 Register Dst = MI.getOperand(0).getReg(); 5321 Register Src0 = MI.getOperand(1).getReg(); 5322 Register Src1 = MI.getOperand(2).getReg(); 5323 LLT Ty = MRI.getType(Dst); 5324 5325 auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1); 5326 MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags()); 5327 MI.eraseFromParent(); 5328 return Legalized; 5329 } 5330 5331 static CmpInst::Predicate minMaxToCompare(unsigned Opc) { 5332 switch (Opc) { 5333 case TargetOpcode::G_SMIN: 5334 return CmpInst::ICMP_SLT; 5335 case TargetOpcode::G_SMAX: 5336 return CmpInst::ICMP_SGT; 5337 case TargetOpcode::G_UMIN: 5338 return CmpInst::ICMP_ULT; 5339 case TargetOpcode::G_UMAX: 5340 return CmpInst::ICMP_UGT; 5341 default: 5342 llvm_unreachable("not in integer min/max"); 5343 } 5344 } 5345 5346 LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) { 5347 Register Dst = MI.getOperand(0).getReg(); 5348 Register Src0 = MI.getOperand(1).getReg(); 5349 Register Src1 = MI.getOperand(2).getReg(); 5350 5351 const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode()); 5352 LLT CmpType = MRI.getType(Dst).changeElementSize(1); 5353 5354 auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1); 5355 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1); 5356 5357 MI.eraseFromParent(); 5358 return Legalized; 5359 } 5360 5361 LegalizerHelper::LegalizeResult 5362 LegalizerHelper::lowerFCopySign(MachineInstr &MI) { 5363 Register Dst = MI.getOperand(0).getReg(); 5364 Register Src0 = MI.getOperand(1).getReg(); 5365 Register Src1 = MI.getOperand(2).getReg(); 5366 5367 const LLT Src0Ty = MRI.getType(Src0); 5368 const LLT Src1Ty = MRI.getType(Src1); 5369 5370 const int Src0Size = Src0Ty.getScalarSizeInBits(); 5371 const int Src1Size = Src1Ty.getScalarSizeInBits(); 5372 5373 auto SignBitMask = MIRBuilder.buildConstant( 5374 Src0Ty, APInt::getSignMask(Src0Size)); 5375 5376 auto NotSignBitMask = MIRBuilder.buildConstant( 5377 Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1)); 5378 5379 auto And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask); 5380 MachineInstr *Or; 5381 5382 if (Src0Ty == Src1Ty) { 5383 auto And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask); 5384 Or = MIRBuilder.buildOr(Dst, And0, And1); 5385 } else if (Src0Size > Src1Size) { 5386 auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size); 5387 auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1); 5388 auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt); 5389 auto And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask); 5390 Or = MIRBuilder.buildOr(Dst, And0, And1); 5391 } else { 5392 auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size); 5393 auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt); 5394 auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift); 5395 auto And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask); 5396 Or = MIRBuilder.buildOr(Dst, And0, And1); 5397 } 5398 5399 // Be careful about setting nsz/nnan/ninf on every instruction, since the 5400 // constants are a nan and -0.0, but the final result should preserve 5401 // everything. 5402 if (unsigned Flags = MI.getFlags()) 5403 Or->setFlags(Flags); 5404 5405 MI.eraseFromParent(); 5406 return Legalized; 5407 } 5408 5409 LegalizerHelper::LegalizeResult 5410 LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) { 5411 unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ? 5412 TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE; 5413 5414 Register Dst = MI.getOperand(0).getReg(); 5415 Register Src0 = MI.getOperand(1).getReg(); 5416 Register Src1 = MI.getOperand(2).getReg(); 5417 LLT Ty = MRI.getType(Dst); 5418 5419 if (!MI.getFlag(MachineInstr::FmNoNans)) { 5420 // Insert canonicalizes if it's possible we need to quiet to get correct 5421 // sNaN behavior. 5422 5423 // Note this must be done here, and not as an optimization combine in the 5424 // absence of a dedicate quiet-snan instruction as we're using an 5425 // omni-purpose G_FCANONICALIZE. 5426 if (!isKnownNeverSNaN(Src0, MRI)) 5427 Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0); 5428 5429 if (!isKnownNeverSNaN(Src1, MRI)) 5430 Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0); 5431 } 5432 5433 // If there are no nans, it's safe to simply replace this with the non-IEEE 5434 // version. 5435 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags()); 5436 MI.eraseFromParent(); 5437 return Legalized; 5438 } 5439 5440 LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) { 5441 // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c 5442 Register DstReg = MI.getOperand(0).getReg(); 5443 LLT Ty = MRI.getType(DstReg); 5444 unsigned Flags = MI.getFlags(); 5445 5446 auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2), 5447 Flags); 5448 MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags); 5449 MI.eraseFromParent(); 5450 return Legalized; 5451 } 5452 5453 LegalizerHelper::LegalizeResult 5454 LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) { 5455 Register DstReg = MI.getOperand(0).getReg(); 5456 Register X = MI.getOperand(1).getReg(); 5457 const unsigned Flags = MI.getFlags(); 5458 const LLT Ty = MRI.getType(DstReg); 5459 const LLT CondTy = Ty.changeElementSize(1); 5460 5461 // round(x) => 5462 // t = trunc(x); 5463 // d = fabs(x - t); 5464 // o = copysign(1.0f, x); 5465 // return t + (d >= 0.5 ? o : 0.0); 5466 5467 auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags); 5468 5469 auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags); 5470 auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags); 5471 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0); 5472 auto One = MIRBuilder.buildFConstant(Ty, 1.0); 5473 auto Half = MIRBuilder.buildFConstant(Ty, 0.5); 5474 auto SignOne = MIRBuilder.buildFCopysign(Ty, One, X); 5475 5476 auto Cmp = MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, 5477 Flags); 5478 auto Sel = MIRBuilder.buildSelect(Ty, Cmp, SignOne, Zero, Flags); 5479 5480 MIRBuilder.buildFAdd(DstReg, T, Sel, Flags); 5481 5482 MI.eraseFromParent(); 5483 return Legalized; 5484 } 5485 5486 LegalizerHelper::LegalizeResult 5487 LegalizerHelper::lowerFFloor(MachineInstr &MI) { 5488 Register DstReg = MI.getOperand(0).getReg(); 5489 Register SrcReg = MI.getOperand(1).getReg(); 5490 unsigned Flags = MI.getFlags(); 5491 LLT Ty = MRI.getType(DstReg); 5492 const LLT CondTy = Ty.changeElementSize(1); 5493 5494 // result = trunc(src); 5495 // if (src < 0.0 && src != result) 5496 // result += -1.0. 5497 5498 auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags); 5499 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0); 5500 5501 auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy, 5502 SrcReg, Zero, Flags); 5503 auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy, 5504 SrcReg, Trunc, Flags); 5505 auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc); 5506 auto AddVal = MIRBuilder.buildSITOFP(Ty, And); 5507 5508 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags); 5509 MI.eraseFromParent(); 5510 return Legalized; 5511 } 5512 5513 LegalizerHelper::LegalizeResult 5514 LegalizerHelper::lowerMergeValues(MachineInstr &MI) { 5515 const unsigned NumOps = MI.getNumOperands(); 5516 Register DstReg = MI.getOperand(0).getReg(); 5517 Register Src0Reg = MI.getOperand(1).getReg(); 5518 LLT DstTy = MRI.getType(DstReg); 5519 LLT SrcTy = MRI.getType(Src0Reg); 5520 unsigned PartSize = SrcTy.getSizeInBits(); 5521 5522 LLT WideTy = LLT::scalar(DstTy.getSizeInBits()); 5523 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0); 5524 5525 for (unsigned I = 2; I != NumOps; ++I) { 5526 const unsigned Offset = (I - 1) * PartSize; 5527 5528 Register SrcReg = MI.getOperand(I).getReg(); 5529 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg); 5530 5531 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg : 5532 MRI.createGenericVirtualRegister(WideTy); 5533 5534 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset); 5535 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt); 5536 MIRBuilder.buildOr(NextResult, ResultReg, Shl); 5537 ResultReg = NextResult; 5538 } 5539 5540 if (DstTy.isPointer()) { 5541 if (MIRBuilder.getDataLayout().isNonIntegralAddressSpace( 5542 DstTy.getAddressSpace())) { 5543 LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n"); 5544 return UnableToLegalize; 5545 } 5546 5547 MIRBuilder.buildIntToPtr(DstReg, ResultReg); 5548 } 5549 5550 MI.eraseFromParent(); 5551 return Legalized; 5552 } 5553 5554 LegalizerHelper::LegalizeResult 5555 LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) { 5556 const unsigned NumDst = MI.getNumOperands() - 1; 5557 Register SrcReg = MI.getOperand(NumDst).getReg(); 5558 Register Dst0Reg = MI.getOperand(0).getReg(); 5559 LLT DstTy = MRI.getType(Dst0Reg); 5560 if (DstTy.isPointer()) 5561 return UnableToLegalize; // TODO 5562 5563 SrcReg = coerceToScalar(SrcReg); 5564 if (!SrcReg) 5565 return UnableToLegalize; 5566 5567 // Expand scalarizing unmerge as bitcast to integer and shift. 5568 LLT IntTy = MRI.getType(SrcReg); 5569 5570 MIRBuilder.buildTrunc(Dst0Reg, SrcReg); 5571 5572 const unsigned DstSize = DstTy.getSizeInBits(); 5573 unsigned Offset = DstSize; 5574 for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) { 5575 auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset); 5576 auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt); 5577 MIRBuilder.buildTrunc(MI.getOperand(I), Shift); 5578 } 5579 5580 MI.eraseFromParent(); 5581 return Legalized; 5582 } 5583 5584 /// Lower a vector extract or insert by writing the vector to a stack temporary 5585 /// and reloading the element or vector. 5586 /// 5587 /// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx 5588 /// => 5589 /// %stack_temp = G_FRAME_INDEX 5590 /// G_STORE %vec, %stack_temp 5591 /// %idx = clamp(%idx, %vec.getNumElements()) 5592 /// %element_ptr = G_PTR_ADD %stack_temp, %idx 5593 /// %dst = G_LOAD %element_ptr 5594 LegalizerHelper::LegalizeResult 5595 LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) { 5596 Register DstReg = MI.getOperand(0).getReg(); 5597 Register SrcVec = MI.getOperand(1).getReg(); 5598 Register InsertVal; 5599 if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT) 5600 InsertVal = MI.getOperand(2).getReg(); 5601 5602 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg(); 5603 5604 LLT VecTy = MRI.getType(SrcVec); 5605 LLT EltTy = VecTy.getElementType(); 5606 if (!EltTy.isByteSized()) { // Not implemented. 5607 LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n"); 5608 return UnableToLegalize; 5609 } 5610 5611 unsigned EltBytes = EltTy.getSizeInBytes(); 5612 Align VecAlign = getStackTemporaryAlignment(VecTy); 5613 Align EltAlign; 5614 5615 MachinePointerInfo PtrInfo; 5616 auto StackTemp = createStackTemporary(TypeSize::Fixed(VecTy.getSizeInBytes()), 5617 VecAlign, PtrInfo); 5618 MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign); 5619 5620 // Get the pointer to the element, and be sure not to hit undefined behavior 5621 // if the index is out of bounds. 5622 Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx); 5623 5624 int64_t IdxVal; 5625 if (mi_match(Idx, MRI, m_ICst(IdxVal))) { 5626 int64_t Offset = IdxVal * EltBytes; 5627 PtrInfo = PtrInfo.getWithOffset(Offset); 5628 EltAlign = commonAlignment(VecAlign, Offset); 5629 } else { 5630 // We lose information with a variable offset. 5631 EltAlign = getStackTemporaryAlignment(EltTy); 5632 PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace()); 5633 } 5634 5635 if (InsertVal) { 5636 // Write the inserted element 5637 MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign); 5638 5639 // Reload the whole vector. 5640 MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign); 5641 } else { 5642 MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign); 5643 } 5644 5645 MI.eraseFromParent(); 5646 return Legalized; 5647 } 5648 5649 LegalizerHelper::LegalizeResult 5650 LegalizerHelper::lowerShuffleVector(MachineInstr &MI) { 5651 Register DstReg = MI.getOperand(0).getReg(); 5652 Register Src0Reg = MI.getOperand(1).getReg(); 5653 Register Src1Reg = MI.getOperand(2).getReg(); 5654 LLT Src0Ty = MRI.getType(Src0Reg); 5655 LLT DstTy = MRI.getType(DstReg); 5656 LLT IdxTy = LLT::scalar(32); 5657 5658 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask(); 5659 5660 if (DstTy.isScalar()) { 5661 if (Src0Ty.isVector()) 5662 return UnableToLegalize; 5663 5664 // This is just a SELECT. 5665 assert(Mask.size() == 1 && "Expected a single mask element"); 5666 Register Val; 5667 if (Mask[0] < 0 || Mask[0] > 1) 5668 Val = MIRBuilder.buildUndef(DstTy).getReg(0); 5669 else 5670 Val = Mask[0] == 0 ? Src0Reg : Src1Reg; 5671 MIRBuilder.buildCopy(DstReg, Val); 5672 MI.eraseFromParent(); 5673 return Legalized; 5674 } 5675 5676 Register Undef; 5677 SmallVector<Register, 32> BuildVec; 5678 LLT EltTy = DstTy.getElementType(); 5679 5680 for (int Idx : Mask) { 5681 if (Idx < 0) { 5682 if (!Undef.isValid()) 5683 Undef = MIRBuilder.buildUndef(EltTy).getReg(0); 5684 BuildVec.push_back(Undef); 5685 continue; 5686 } 5687 5688 if (Src0Ty.isScalar()) { 5689 BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg); 5690 } else { 5691 int NumElts = Src0Ty.getNumElements(); 5692 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg; 5693 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts; 5694 auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx); 5695 auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK); 5696 BuildVec.push_back(Extract.getReg(0)); 5697 } 5698 } 5699 5700 MIRBuilder.buildBuildVector(DstReg, BuildVec); 5701 MI.eraseFromParent(); 5702 return Legalized; 5703 } 5704 5705 LegalizerHelper::LegalizeResult 5706 LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) { 5707 const auto &MF = *MI.getMF(); 5708 const auto &TFI = *MF.getSubtarget().getFrameLowering(); 5709 if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp) 5710 return UnableToLegalize; 5711 5712 Register Dst = MI.getOperand(0).getReg(); 5713 Register AllocSize = MI.getOperand(1).getReg(); 5714 Align Alignment = assumeAligned(MI.getOperand(2).getImm()); 5715 5716 LLT PtrTy = MRI.getType(Dst); 5717 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits()); 5718 5719 Register SPReg = TLI.getStackPointerRegisterToSaveRestore(); 5720 auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg); 5721 SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp); 5722 5723 // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't 5724 // have to generate an extra instruction to negate the alloc and then use 5725 // G_PTR_ADD to add the negative offset. 5726 auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize); 5727 if (Alignment > Align(1)) { 5728 APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true); 5729 AlignMask.negate(); 5730 auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask); 5731 Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst); 5732 } 5733 5734 SPTmp = MIRBuilder.buildCast(PtrTy, Alloc); 5735 MIRBuilder.buildCopy(SPReg, SPTmp); 5736 MIRBuilder.buildCopy(Dst, SPTmp); 5737 5738 MI.eraseFromParent(); 5739 return Legalized; 5740 } 5741 5742 LegalizerHelper::LegalizeResult 5743 LegalizerHelper::lowerExtract(MachineInstr &MI) { 5744 Register Dst = MI.getOperand(0).getReg(); 5745 Register Src = MI.getOperand(1).getReg(); 5746 unsigned Offset = MI.getOperand(2).getImm(); 5747 5748 LLT DstTy = MRI.getType(Dst); 5749 LLT SrcTy = MRI.getType(Src); 5750 5751 if (DstTy.isScalar() && 5752 (SrcTy.isScalar() || 5753 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) { 5754 LLT SrcIntTy = SrcTy; 5755 if (!SrcTy.isScalar()) { 5756 SrcIntTy = LLT::scalar(SrcTy.getSizeInBits()); 5757 Src = MIRBuilder.buildBitcast(SrcIntTy, Src).getReg(0); 5758 } 5759 5760 if (Offset == 0) 5761 MIRBuilder.buildTrunc(Dst, Src); 5762 else { 5763 auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset); 5764 auto Shr = MIRBuilder.buildLShr(SrcIntTy, Src, ShiftAmt); 5765 MIRBuilder.buildTrunc(Dst, Shr); 5766 } 5767 5768 MI.eraseFromParent(); 5769 return Legalized; 5770 } 5771 5772 return UnableToLegalize; 5773 } 5774 5775 LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) { 5776 Register Dst = MI.getOperand(0).getReg(); 5777 Register Src = MI.getOperand(1).getReg(); 5778 Register InsertSrc = MI.getOperand(2).getReg(); 5779 uint64_t Offset = MI.getOperand(3).getImm(); 5780 5781 LLT DstTy = MRI.getType(Src); 5782 LLT InsertTy = MRI.getType(InsertSrc); 5783 5784 if (InsertTy.isVector() || 5785 (DstTy.isVector() && DstTy.getElementType() != InsertTy)) 5786 return UnableToLegalize; 5787 5788 const DataLayout &DL = MIRBuilder.getDataLayout(); 5789 if ((DstTy.isPointer() && 5790 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) || 5791 (InsertTy.isPointer() && 5792 DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) { 5793 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n"); 5794 return UnableToLegalize; 5795 } 5796 5797 LLT IntDstTy = DstTy; 5798 5799 if (!DstTy.isScalar()) { 5800 IntDstTy = LLT::scalar(DstTy.getSizeInBits()); 5801 Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0); 5802 } 5803 5804 if (!InsertTy.isScalar()) { 5805 const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits()); 5806 InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0); 5807 } 5808 5809 Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0); 5810 if (Offset != 0) { 5811 auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset); 5812 ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0); 5813 } 5814 5815 APInt MaskVal = APInt::getBitsSetWithWrap( 5816 DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset); 5817 5818 auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal); 5819 auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask); 5820 auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc); 5821 5822 MIRBuilder.buildCast(Dst, Or); 5823 MI.eraseFromParent(); 5824 return Legalized; 5825 } 5826 5827 LegalizerHelper::LegalizeResult 5828 LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) { 5829 Register Dst0 = MI.getOperand(0).getReg(); 5830 Register Dst1 = MI.getOperand(1).getReg(); 5831 Register LHS = MI.getOperand(2).getReg(); 5832 Register RHS = MI.getOperand(3).getReg(); 5833 const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO; 5834 5835 LLT Ty = MRI.getType(Dst0); 5836 LLT BoolTy = MRI.getType(Dst1); 5837 5838 if (IsAdd) 5839 MIRBuilder.buildAdd(Dst0, LHS, RHS); 5840 else 5841 MIRBuilder.buildSub(Dst0, LHS, RHS); 5842 5843 // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow. 5844 5845 auto Zero = MIRBuilder.buildConstant(Ty, 0); 5846 5847 // For an addition, the result should be less than one of the operands (LHS) 5848 // if and only if the other operand (RHS) is negative, otherwise there will 5849 // be overflow. 5850 // For a subtraction, the result should be less than one of the operands 5851 // (LHS) if and only if the other operand (RHS) is (non-zero) positive, 5852 // otherwise there will be overflow. 5853 auto ResultLowerThanLHS = 5854 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, Dst0, LHS); 5855 auto ConditionRHS = MIRBuilder.buildICmp( 5856 IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero); 5857 5858 MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS); 5859 MI.eraseFromParent(); 5860 return Legalized; 5861 } 5862 5863 LegalizerHelper::LegalizeResult 5864 LegalizerHelper::lowerAddSubSatToMinMax(MachineInstr &MI) { 5865 Register Res = MI.getOperand(0).getReg(); 5866 Register LHS = MI.getOperand(1).getReg(); 5867 Register RHS = MI.getOperand(2).getReg(); 5868 LLT Ty = MRI.getType(Res); 5869 bool IsSigned; 5870 bool IsAdd; 5871 unsigned BaseOp; 5872 switch (MI.getOpcode()) { 5873 default: 5874 llvm_unreachable("unexpected addsat/subsat opcode"); 5875 case TargetOpcode::G_UADDSAT: 5876 IsSigned = false; 5877 IsAdd = true; 5878 BaseOp = TargetOpcode::G_ADD; 5879 break; 5880 case TargetOpcode::G_SADDSAT: 5881 IsSigned = true; 5882 IsAdd = true; 5883 BaseOp = TargetOpcode::G_ADD; 5884 break; 5885 case TargetOpcode::G_USUBSAT: 5886 IsSigned = false; 5887 IsAdd = false; 5888 BaseOp = TargetOpcode::G_SUB; 5889 break; 5890 case TargetOpcode::G_SSUBSAT: 5891 IsSigned = true; 5892 IsAdd = false; 5893 BaseOp = TargetOpcode::G_SUB; 5894 break; 5895 } 5896 5897 if (IsSigned) { 5898 // sadd.sat(a, b) -> 5899 // hi = 0x7fffffff - smax(a, 0) 5900 // lo = 0x80000000 - smin(a, 0) 5901 // a + smin(smax(lo, b), hi) 5902 // ssub.sat(a, b) -> 5903 // lo = smax(a, -1) - 0x7fffffff 5904 // hi = smin(a, -1) - 0x80000000 5905 // a - smin(smax(lo, b), hi) 5906 // TODO: AMDGPU can use a "median of 3" instruction here: 5907 // a +/- med3(lo, b, hi) 5908 uint64_t NumBits = Ty.getScalarSizeInBits(); 5909 auto MaxVal = 5910 MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits)); 5911 auto MinVal = 5912 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits)); 5913 MachineInstrBuilder Hi, Lo; 5914 if (IsAdd) { 5915 auto Zero = MIRBuilder.buildConstant(Ty, 0); 5916 Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero)); 5917 Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero)); 5918 } else { 5919 auto NegOne = MIRBuilder.buildConstant(Ty, -1); 5920 Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne), 5921 MaxVal); 5922 Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne), 5923 MinVal); 5924 } 5925 auto RHSClamped = 5926 MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi); 5927 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped}); 5928 } else { 5929 // uadd.sat(a, b) -> a + umin(~a, b) 5930 // usub.sat(a, b) -> a - umin(a, b) 5931 Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS; 5932 auto Min = MIRBuilder.buildUMin(Ty, Not, RHS); 5933 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min}); 5934 } 5935 5936 MI.eraseFromParent(); 5937 return Legalized; 5938 } 5939 5940 LegalizerHelper::LegalizeResult 5941 LegalizerHelper::lowerAddSubSatToAddoSubo(MachineInstr &MI) { 5942 Register Res = MI.getOperand(0).getReg(); 5943 Register LHS = MI.getOperand(1).getReg(); 5944 Register RHS = MI.getOperand(2).getReg(); 5945 LLT Ty = MRI.getType(Res); 5946 LLT BoolTy = Ty.changeElementSize(1); 5947 bool IsSigned; 5948 bool IsAdd; 5949 unsigned OverflowOp; 5950 switch (MI.getOpcode()) { 5951 default: 5952 llvm_unreachable("unexpected addsat/subsat opcode"); 5953 case TargetOpcode::G_UADDSAT: 5954 IsSigned = false; 5955 IsAdd = true; 5956 OverflowOp = TargetOpcode::G_UADDO; 5957 break; 5958 case TargetOpcode::G_SADDSAT: 5959 IsSigned = true; 5960 IsAdd = true; 5961 OverflowOp = TargetOpcode::G_SADDO; 5962 break; 5963 case TargetOpcode::G_USUBSAT: 5964 IsSigned = false; 5965 IsAdd = false; 5966 OverflowOp = TargetOpcode::G_USUBO; 5967 break; 5968 case TargetOpcode::G_SSUBSAT: 5969 IsSigned = true; 5970 IsAdd = false; 5971 OverflowOp = TargetOpcode::G_SSUBO; 5972 break; 5973 } 5974 5975 auto OverflowRes = 5976 MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS}); 5977 Register Tmp = OverflowRes.getReg(0); 5978 Register Ov = OverflowRes.getReg(1); 5979 MachineInstrBuilder Clamp; 5980 if (IsSigned) { 5981 // sadd.sat(a, b) -> 5982 // {tmp, ov} = saddo(a, b) 5983 // ov ? (tmp >>s 31) + 0x80000000 : r 5984 // ssub.sat(a, b) -> 5985 // {tmp, ov} = ssubo(a, b) 5986 // ov ? (tmp >>s 31) + 0x80000000 : r 5987 uint64_t NumBits = Ty.getScalarSizeInBits(); 5988 auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1); 5989 auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount); 5990 auto MinVal = 5991 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits)); 5992 Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal); 5993 } else { 5994 // uadd.sat(a, b) -> 5995 // {tmp, ov} = uaddo(a, b) 5996 // ov ? 0xffffffff : tmp 5997 // usub.sat(a, b) -> 5998 // {tmp, ov} = usubo(a, b) 5999 // ov ? 0 : tmp 6000 Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0); 6001 } 6002 MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp); 6003 6004 MI.eraseFromParent(); 6005 return Legalized; 6006 } 6007 6008 LegalizerHelper::LegalizeResult 6009 LegalizerHelper::lowerShlSat(MachineInstr &MI) { 6010 assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT || 6011 MI.getOpcode() == TargetOpcode::G_USHLSAT) && 6012 "Expected shlsat opcode!"); 6013 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT; 6014 Register Res = MI.getOperand(0).getReg(); 6015 Register LHS = MI.getOperand(1).getReg(); 6016 Register RHS = MI.getOperand(2).getReg(); 6017 LLT Ty = MRI.getType(Res); 6018 LLT BoolTy = Ty.changeElementSize(1); 6019 6020 unsigned BW = Ty.getScalarSizeInBits(); 6021 auto Result = MIRBuilder.buildShl(Ty, LHS, RHS); 6022 auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS) 6023 : MIRBuilder.buildLShr(Ty, Result, RHS); 6024 6025 MachineInstrBuilder SatVal; 6026 if (IsSigned) { 6027 auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW)); 6028 auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW)); 6029 auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS, 6030 MIRBuilder.buildConstant(Ty, 0)); 6031 SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax); 6032 } else { 6033 SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW)); 6034 } 6035 auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, Ty, LHS, Orig); 6036 MIRBuilder.buildSelect(Res, Ov, SatVal, Result); 6037 6038 MI.eraseFromParent(); 6039 return Legalized; 6040 } 6041 6042 LegalizerHelper::LegalizeResult 6043 LegalizerHelper::lowerBswap(MachineInstr &MI) { 6044 Register Dst = MI.getOperand(0).getReg(); 6045 Register Src = MI.getOperand(1).getReg(); 6046 const LLT Ty = MRI.getType(Src); 6047 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8; 6048 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8; 6049 6050 // Swap most and least significant byte, set remaining bytes in Res to zero. 6051 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt); 6052 auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt); 6053 auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt); 6054 auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft); 6055 6056 // Set i-th high/low byte in Res to i-th low/high byte from Src. 6057 for (unsigned i = 1; i < SizeInBytes / 2; ++i) { 6058 // AND with Mask leaves byte i unchanged and sets remaining bytes to 0. 6059 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8)); 6060 auto Mask = MIRBuilder.buildConstant(Ty, APMask); 6061 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i); 6062 // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt. 6063 auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask); 6064 auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt); 6065 Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft); 6066 // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask. 6067 auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt); 6068 auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask); 6069 Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight); 6070 } 6071 Res.getInstr()->getOperand(0).setReg(Dst); 6072 6073 MI.eraseFromParent(); 6074 return Legalized; 6075 } 6076 6077 //{ (Src & Mask) >> N } | { (Src << N) & Mask } 6078 static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, 6079 MachineInstrBuilder Src, APInt Mask) { 6080 const LLT Ty = Dst.getLLTTy(*B.getMRI()); 6081 MachineInstrBuilder C_N = B.buildConstant(Ty, N); 6082 MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask); 6083 auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N); 6084 auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0); 6085 return B.buildOr(Dst, LHS, RHS); 6086 } 6087 6088 LegalizerHelper::LegalizeResult 6089 LegalizerHelper::lowerBitreverse(MachineInstr &MI) { 6090 Register Dst = MI.getOperand(0).getReg(); 6091 Register Src = MI.getOperand(1).getReg(); 6092 const LLT Ty = MRI.getType(Src); 6093 unsigned Size = Ty.getSizeInBits(); 6094 6095 MachineInstrBuilder BSWAP = 6096 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src}); 6097 6098 // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654 6099 // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4] 6100 // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0] 6101 MachineInstrBuilder Swap4 = 6102 SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0))); 6103 6104 // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76 6105 // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2] 6106 // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC] 6107 MachineInstrBuilder Swap2 = 6108 SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC))); 6109 6110 // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5 6|7 6111 // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1] 6112 // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA] 6113 SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA))); 6114 6115 MI.eraseFromParent(); 6116 return Legalized; 6117 } 6118 6119 LegalizerHelper::LegalizeResult 6120 LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) { 6121 MachineFunction &MF = MIRBuilder.getMF(); 6122 6123 bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER; 6124 int NameOpIdx = IsRead ? 1 : 0; 6125 int ValRegIndex = IsRead ? 0 : 1; 6126 6127 Register ValReg = MI.getOperand(ValRegIndex).getReg(); 6128 const LLT Ty = MRI.getType(ValReg); 6129 const MDString *RegStr = cast<MDString>( 6130 cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0)); 6131 6132 Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF); 6133 if (!PhysReg.isValid()) 6134 return UnableToLegalize; 6135 6136 if (IsRead) 6137 MIRBuilder.buildCopy(ValReg, PhysReg); 6138 else 6139 MIRBuilder.buildCopy(PhysReg, ValReg); 6140 6141 MI.eraseFromParent(); 6142 return Legalized; 6143 } 6144