1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file This file implements the LegalizerHelper class to legalize 10 /// individual instructions and the LegalizeMachineIR wrapper pass for the 11 /// primary legalization. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" 16 #include "llvm/CodeGen/GlobalISel/CallLowering.h" 17 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" 18 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" 19 #include "llvm/CodeGen/MachineRegisterInfo.h" 20 #include "llvm/CodeGen/TargetFrameLowering.h" 21 #include "llvm/CodeGen/TargetInstrInfo.h" 22 #include "llvm/CodeGen/TargetLowering.h" 23 #include "llvm/CodeGen/TargetSubtargetInfo.h" 24 #include "llvm/Support/Debug.h" 25 #include "llvm/Support/MathExtras.h" 26 #include "llvm/Support/raw_ostream.h" 27 28 #define DEBUG_TYPE "legalizer" 29 30 using namespace llvm; 31 using namespace LegalizeActions; 32 33 /// Try to break down \p OrigTy into \p NarrowTy sized pieces. 34 /// 35 /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy, 36 /// with any leftover piece as type \p LeftoverTy 37 /// 38 /// Returns -1 in the first element of the pair if the breakdown is not 39 /// satisfiable. 40 static std::pair<int, int> 41 getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) { 42 assert(!LeftoverTy.isValid() && "this is an out argument"); 43 44 unsigned Size = OrigTy.getSizeInBits(); 45 unsigned NarrowSize = NarrowTy.getSizeInBits(); 46 unsigned NumParts = Size / NarrowSize; 47 unsigned LeftoverSize = Size - NumParts * NarrowSize; 48 assert(Size > NarrowSize); 49 50 if (LeftoverSize == 0) 51 return {NumParts, 0}; 52 53 if (NarrowTy.isVector()) { 54 unsigned EltSize = OrigTy.getScalarSizeInBits(); 55 if (LeftoverSize % EltSize != 0) 56 return {-1, -1}; 57 LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize); 58 } else { 59 LeftoverTy = LLT::scalar(LeftoverSize); 60 } 61 62 int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits(); 63 return std::make_pair(NumParts, NumLeftover); 64 } 65 66 static Type *getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty) { 67 68 if (!Ty.isScalar()) 69 return nullptr; 70 71 switch (Ty.getSizeInBits()) { 72 case 16: 73 return Type::getHalfTy(Ctx); 74 case 32: 75 return Type::getFloatTy(Ctx); 76 case 64: 77 return Type::getDoubleTy(Ctx); 78 case 128: 79 return Type::getFP128Ty(Ctx); 80 default: 81 return nullptr; 82 } 83 } 84 85 LegalizerHelper::LegalizerHelper(MachineFunction &MF, 86 GISelChangeObserver &Observer, 87 MachineIRBuilder &Builder) 88 : MIRBuilder(Builder), MRI(MF.getRegInfo()), 89 LI(*MF.getSubtarget().getLegalizerInfo()), Observer(Observer) { 90 MIRBuilder.setMF(MF); 91 MIRBuilder.setChangeObserver(Observer); 92 } 93 94 LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI, 95 GISelChangeObserver &Observer, 96 MachineIRBuilder &B) 97 : MIRBuilder(B), MRI(MF.getRegInfo()), LI(LI), Observer(Observer) { 98 MIRBuilder.setMF(MF); 99 MIRBuilder.setChangeObserver(Observer); 100 } 101 LegalizerHelper::LegalizeResult 102 LegalizerHelper::legalizeInstrStep(MachineInstr &MI) { 103 LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs())); 104 105 if (MI.getOpcode() == TargetOpcode::G_INTRINSIC || 106 MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS) 107 return LI.legalizeIntrinsic(MI, MIRBuilder, Observer) ? Legalized 108 : UnableToLegalize; 109 auto Step = LI.getAction(MI, MRI); 110 switch (Step.Action) { 111 case Legal: 112 LLVM_DEBUG(dbgs() << ".. Already legal\n"); 113 return AlreadyLegal; 114 case Libcall: 115 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n"); 116 return libcall(MI); 117 case NarrowScalar: 118 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n"); 119 return narrowScalar(MI, Step.TypeIdx, Step.NewType); 120 case WidenScalar: 121 LLVM_DEBUG(dbgs() << ".. Widen scalar\n"); 122 return widenScalar(MI, Step.TypeIdx, Step.NewType); 123 case Bitcast: 124 LLVM_DEBUG(dbgs() << ".. Bitcast type\n"); 125 return bitcast(MI, Step.TypeIdx, Step.NewType); 126 case Lower: 127 LLVM_DEBUG(dbgs() << ".. Lower\n"); 128 return lower(MI, Step.TypeIdx, Step.NewType); 129 case FewerElements: 130 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n"); 131 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType); 132 case MoreElements: 133 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n"); 134 return moreElementsVector(MI, Step.TypeIdx, Step.NewType); 135 case Custom: 136 LLVM_DEBUG(dbgs() << ".. Custom legalization\n"); 137 return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized 138 : UnableToLegalize; 139 default: 140 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n"); 141 return UnableToLegalize; 142 } 143 } 144 145 void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts, 146 SmallVectorImpl<Register> &VRegs) { 147 for (int i = 0; i < NumParts; ++i) 148 VRegs.push_back(MRI.createGenericVirtualRegister(Ty)); 149 MIRBuilder.buildUnmerge(VRegs, Reg); 150 } 151 152 bool LegalizerHelper::extractParts(Register Reg, LLT RegTy, 153 LLT MainTy, LLT &LeftoverTy, 154 SmallVectorImpl<Register> &VRegs, 155 SmallVectorImpl<Register> &LeftoverRegs) { 156 assert(!LeftoverTy.isValid() && "this is an out argument"); 157 158 unsigned RegSize = RegTy.getSizeInBits(); 159 unsigned MainSize = MainTy.getSizeInBits(); 160 unsigned NumParts = RegSize / MainSize; 161 unsigned LeftoverSize = RegSize - NumParts * MainSize; 162 163 // Use an unmerge when possible. 164 if (LeftoverSize == 0) { 165 for (unsigned I = 0; I < NumParts; ++I) 166 VRegs.push_back(MRI.createGenericVirtualRegister(MainTy)); 167 MIRBuilder.buildUnmerge(VRegs, Reg); 168 return true; 169 } 170 171 if (MainTy.isVector()) { 172 unsigned EltSize = MainTy.getScalarSizeInBits(); 173 if (LeftoverSize % EltSize != 0) 174 return false; 175 LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize); 176 } else { 177 LeftoverTy = LLT::scalar(LeftoverSize); 178 } 179 180 // For irregular sizes, extract the individual parts. 181 for (unsigned I = 0; I != NumParts; ++I) { 182 Register NewReg = MRI.createGenericVirtualRegister(MainTy); 183 VRegs.push_back(NewReg); 184 MIRBuilder.buildExtract(NewReg, Reg, MainSize * I); 185 } 186 187 for (unsigned Offset = MainSize * NumParts; Offset < RegSize; 188 Offset += LeftoverSize) { 189 Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy); 190 LeftoverRegs.push_back(NewReg); 191 MIRBuilder.buildExtract(NewReg, Reg, Offset); 192 } 193 194 return true; 195 } 196 197 void LegalizerHelper::insertParts(Register DstReg, 198 LLT ResultTy, LLT PartTy, 199 ArrayRef<Register> PartRegs, 200 LLT LeftoverTy, 201 ArrayRef<Register> LeftoverRegs) { 202 if (!LeftoverTy.isValid()) { 203 assert(LeftoverRegs.empty()); 204 205 if (!ResultTy.isVector()) { 206 MIRBuilder.buildMerge(DstReg, PartRegs); 207 return; 208 } 209 210 if (PartTy.isVector()) 211 MIRBuilder.buildConcatVectors(DstReg, PartRegs); 212 else 213 MIRBuilder.buildBuildVector(DstReg, PartRegs); 214 return; 215 } 216 217 unsigned PartSize = PartTy.getSizeInBits(); 218 unsigned LeftoverPartSize = LeftoverTy.getSizeInBits(); 219 220 Register CurResultReg = MRI.createGenericVirtualRegister(ResultTy); 221 MIRBuilder.buildUndef(CurResultReg); 222 223 unsigned Offset = 0; 224 for (Register PartReg : PartRegs) { 225 Register NewResultReg = MRI.createGenericVirtualRegister(ResultTy); 226 MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset); 227 CurResultReg = NewResultReg; 228 Offset += PartSize; 229 } 230 231 for (unsigned I = 0, E = LeftoverRegs.size(); I != E; ++I) { 232 // Use the original output register for the final insert to avoid a copy. 233 Register NewResultReg = (I + 1 == E) ? 234 DstReg : MRI.createGenericVirtualRegister(ResultTy); 235 236 MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset); 237 CurResultReg = NewResultReg; 238 Offset += LeftoverPartSize; 239 } 240 } 241 242 /// Return the result registers of G_UNMERGE_VALUES \p MI in \p Regs 243 static void getUnmergeResults(SmallVectorImpl<Register> &Regs, 244 const MachineInstr &MI) { 245 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES); 246 247 const int NumResults = MI.getNumOperands() - 1; 248 Regs.resize(NumResults); 249 for (int I = 0; I != NumResults; ++I) 250 Regs[I] = MI.getOperand(I).getReg(); 251 } 252 253 LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy, 254 LLT NarrowTy, Register SrcReg) { 255 LLT SrcTy = MRI.getType(SrcReg); 256 257 LLT GCDTy = getGCDType(DstTy, getGCDType(SrcTy, NarrowTy)); 258 if (SrcTy == GCDTy) { 259 // If the source already evenly divides the result type, we don't need to do 260 // anything. 261 Parts.push_back(SrcReg); 262 } else { 263 // Need to split into common type sized pieces. 264 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg); 265 getUnmergeResults(Parts, *Unmerge); 266 } 267 268 return GCDTy; 269 } 270 271 LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy, 272 SmallVectorImpl<Register> &VRegs, 273 unsigned PadStrategy) { 274 LLT LCMTy = getLCMType(DstTy, NarrowTy); 275 276 int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits(); 277 int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits(); 278 int NumOrigSrc = VRegs.size(); 279 280 Register PadReg; 281 282 // Get a value we can use to pad the source value if the sources won't evenly 283 // cover the result type. 284 if (NumOrigSrc < NumParts * NumSubParts) { 285 if (PadStrategy == TargetOpcode::G_ZEXT) 286 PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0); 287 else if (PadStrategy == TargetOpcode::G_ANYEXT) 288 PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0); 289 else { 290 assert(PadStrategy == TargetOpcode::G_SEXT); 291 292 // Shift the sign bit of the low register through the high register. 293 auto ShiftAmt = 294 MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1); 295 PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0); 296 } 297 } 298 299 // Registers for the final merge to be produced. 300 SmallVector<Register, 4> Remerge(NumParts); 301 302 // Registers needed for intermediate merges, which will be merged into a 303 // source for Remerge. 304 SmallVector<Register, 4> SubMerge(NumSubParts); 305 306 // Once we've fully read off the end of the original source bits, we can reuse 307 // the same high bits for remaining padding elements. 308 Register AllPadReg; 309 310 // Build merges to the LCM type to cover the original result type. 311 for (int I = 0; I != NumParts; ++I) { 312 bool AllMergePartsArePadding = true; 313 314 // Build the requested merges to the requested type. 315 for (int J = 0; J != NumSubParts; ++J) { 316 int Idx = I * NumSubParts + J; 317 if (Idx >= NumOrigSrc) { 318 SubMerge[J] = PadReg; 319 continue; 320 } 321 322 SubMerge[J] = VRegs[Idx]; 323 324 // There are meaningful bits here we can't reuse later. 325 AllMergePartsArePadding = false; 326 } 327 328 // If we've filled up a complete piece with padding bits, we can directly 329 // emit the natural sized constant if applicable, rather than a merge of 330 // smaller constants. 331 if (AllMergePartsArePadding && !AllPadReg) { 332 if (PadStrategy == TargetOpcode::G_ANYEXT) 333 AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0); 334 else if (PadStrategy == TargetOpcode::G_ZEXT) 335 AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0); 336 337 // If this is a sign extension, we can't materialize a trivial constant 338 // with the right type and have to produce a merge. 339 } 340 341 if (AllPadReg) { 342 // Avoid creating additional instructions if we're just adding additional 343 // copies of padding bits. 344 Remerge[I] = AllPadReg; 345 continue; 346 } 347 348 if (NumSubParts == 1) 349 Remerge[I] = SubMerge[0]; 350 else 351 Remerge[I] = MIRBuilder.buildMerge(NarrowTy, SubMerge).getReg(0); 352 353 // In the sign extend padding case, re-use the first all-signbit merge. 354 if (AllMergePartsArePadding && !AllPadReg) 355 AllPadReg = Remerge[I]; 356 } 357 358 VRegs = std::move(Remerge); 359 return LCMTy; 360 } 361 362 void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy, 363 ArrayRef<Register> RemergeRegs) { 364 LLT DstTy = MRI.getType(DstReg); 365 366 // Create the merge to the widened source, and extract the relevant bits into 367 // the result. 368 369 if (DstTy == LCMTy) { 370 MIRBuilder.buildMerge(DstReg, RemergeRegs); 371 return; 372 } 373 374 auto Remerge = MIRBuilder.buildMerge(LCMTy, RemergeRegs); 375 if (DstTy.isScalar() && LCMTy.isScalar()) { 376 MIRBuilder.buildTrunc(DstReg, Remerge); 377 return; 378 } 379 380 if (LCMTy.isVector()) { 381 MIRBuilder.buildExtract(DstReg, Remerge, 0); 382 return; 383 } 384 385 llvm_unreachable("unhandled case"); 386 } 387 388 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { 389 #define RTLIBCASE(LibcallPrefix) \ 390 do { \ 391 switch (Size) { \ 392 case 32: \ 393 return RTLIB::LibcallPrefix##32; \ 394 case 64: \ 395 return RTLIB::LibcallPrefix##64; \ 396 case 128: \ 397 return RTLIB::LibcallPrefix##128; \ 398 default: \ 399 llvm_unreachable("unexpected size"); \ 400 } \ 401 } while (0) 402 403 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); 404 405 switch (Opcode) { 406 case TargetOpcode::G_SDIV: 407 RTLIBCASE(SDIV_I); 408 case TargetOpcode::G_UDIV: 409 RTLIBCASE(UDIV_I); 410 case TargetOpcode::G_SREM: 411 RTLIBCASE(SREM_I); 412 case TargetOpcode::G_UREM: 413 RTLIBCASE(UREM_I); 414 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 415 RTLIBCASE(CTLZ_I); 416 case TargetOpcode::G_FADD: 417 RTLIBCASE(ADD_F); 418 case TargetOpcode::G_FSUB: 419 RTLIBCASE(SUB_F); 420 case TargetOpcode::G_FMUL: 421 RTLIBCASE(MUL_F); 422 case TargetOpcode::G_FDIV: 423 RTLIBCASE(DIV_F); 424 case TargetOpcode::G_FEXP: 425 RTLIBCASE(EXP_F); 426 case TargetOpcode::G_FEXP2: 427 RTLIBCASE(EXP2_F); 428 case TargetOpcode::G_FREM: 429 RTLIBCASE(REM_F); 430 case TargetOpcode::G_FPOW: 431 RTLIBCASE(POW_F); 432 case TargetOpcode::G_FMA: 433 RTLIBCASE(FMA_F); 434 case TargetOpcode::G_FSIN: 435 RTLIBCASE(SIN_F); 436 case TargetOpcode::G_FCOS: 437 RTLIBCASE(COS_F); 438 case TargetOpcode::G_FLOG10: 439 RTLIBCASE(LOG10_F); 440 case TargetOpcode::G_FLOG: 441 RTLIBCASE(LOG_F); 442 case TargetOpcode::G_FLOG2: 443 RTLIBCASE(LOG2_F); 444 case TargetOpcode::G_FCEIL: 445 RTLIBCASE(CEIL_F); 446 case TargetOpcode::G_FFLOOR: 447 RTLIBCASE(FLOOR_F); 448 case TargetOpcode::G_FMINNUM: 449 RTLIBCASE(FMIN_F); 450 case TargetOpcode::G_FMAXNUM: 451 RTLIBCASE(FMAX_F); 452 case TargetOpcode::G_FSQRT: 453 RTLIBCASE(SQRT_F); 454 case TargetOpcode::G_FRINT: 455 RTLIBCASE(RINT_F); 456 case TargetOpcode::G_FNEARBYINT: 457 RTLIBCASE(NEARBYINT_F); 458 } 459 llvm_unreachable("Unknown libcall function"); 460 } 461 462 /// True if an instruction is in tail position in its caller. Intended for 463 /// legalizing libcalls as tail calls when possible. 464 static bool isLibCallInTailPosition(MachineInstr &MI) { 465 const Function &F = MI.getParent()->getParent()->getFunction(); 466 467 // Conservatively require the attributes of the call to match those of 468 // the return. Ignore NoAlias and NonNull because they don't affect the 469 // call sequence. 470 AttributeList CallerAttrs = F.getAttributes(); 471 if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex) 472 .removeAttribute(Attribute::NoAlias) 473 .removeAttribute(Attribute::NonNull) 474 .hasAttributes()) 475 return false; 476 477 // It's not safe to eliminate the sign / zero extension of the return value. 478 if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) || 479 CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt)) 480 return false; 481 482 // Only tail call if the following instruction is a standard return. 483 auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); 484 MachineInstr *Next = MI.getNextNode(); 485 if (!Next || TII.isTailCall(*Next) || !Next->isReturn()) 486 return false; 487 488 return true; 489 } 490 491 LegalizerHelper::LegalizeResult 492 llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, 493 const CallLowering::ArgInfo &Result, 494 ArrayRef<CallLowering::ArgInfo> Args) { 495 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); 496 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); 497 const char *Name = TLI.getLibcallName(Libcall); 498 499 CallLowering::CallLoweringInfo Info; 500 Info.CallConv = TLI.getLibcallCallingConv(Libcall); 501 Info.Callee = MachineOperand::CreateES(Name); 502 Info.OrigRet = Result; 503 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs)); 504 if (!CLI.lowerCall(MIRBuilder, Info)) 505 return LegalizerHelper::UnableToLegalize; 506 507 return LegalizerHelper::Legalized; 508 } 509 510 // Useful for libcalls where all operands have the same type. 511 static LegalizerHelper::LegalizeResult 512 simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, 513 Type *OpType) { 514 auto Libcall = getRTLibDesc(MI.getOpcode(), Size); 515 516 SmallVector<CallLowering::ArgInfo, 3> Args; 517 for (unsigned i = 1; i < MI.getNumOperands(); i++) 518 Args.push_back({MI.getOperand(i).getReg(), OpType}); 519 return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType}, 520 Args); 521 } 522 523 LegalizerHelper::LegalizeResult 524 llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, 525 MachineInstr &MI) { 526 assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS); 527 auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); 528 529 SmallVector<CallLowering::ArgInfo, 3> Args; 530 // Add all the args, except for the last which is an imm denoting 'tail'. 531 for (unsigned i = 1; i < MI.getNumOperands() - 1; i++) { 532 Register Reg = MI.getOperand(i).getReg(); 533 534 // Need derive an IR type for call lowering. 535 LLT OpLLT = MRI.getType(Reg); 536 Type *OpTy = nullptr; 537 if (OpLLT.isPointer()) 538 OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace()); 539 else 540 OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits()); 541 Args.push_back({Reg, OpTy}); 542 } 543 544 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); 545 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); 546 Intrinsic::ID ID = MI.getOperand(0).getIntrinsicID(); 547 RTLIB::Libcall RTLibcall; 548 switch (ID) { 549 case Intrinsic::memcpy: 550 RTLibcall = RTLIB::MEMCPY; 551 break; 552 case Intrinsic::memset: 553 RTLibcall = RTLIB::MEMSET; 554 break; 555 case Intrinsic::memmove: 556 RTLibcall = RTLIB::MEMMOVE; 557 break; 558 default: 559 return LegalizerHelper::UnableToLegalize; 560 } 561 const char *Name = TLI.getLibcallName(RTLibcall); 562 563 MIRBuilder.setInstr(MI); 564 565 CallLowering::CallLoweringInfo Info; 566 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall); 567 Info.Callee = MachineOperand::CreateES(Name); 568 Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx)); 569 Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() == 1 && 570 isLibCallInTailPosition(MI); 571 572 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs)); 573 if (!CLI.lowerCall(MIRBuilder, Info)) 574 return LegalizerHelper::UnableToLegalize; 575 576 if (Info.LoweredTailCall) { 577 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?"); 578 // We must have a return following the call to get past 579 // isLibCallInTailPosition. 580 assert(MI.getNextNode() && MI.getNextNode()->isReturn() && 581 "Expected instr following MI to be a return?"); 582 583 // We lowered a tail call, so the call is now the return from the block. 584 // Delete the old return. 585 MI.getNextNode()->eraseFromParent(); 586 } 587 588 return LegalizerHelper::Legalized; 589 } 590 591 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, 592 Type *FromType) { 593 auto ToMVT = MVT::getVT(ToType); 594 auto FromMVT = MVT::getVT(FromType); 595 596 switch (Opcode) { 597 case TargetOpcode::G_FPEXT: 598 return RTLIB::getFPEXT(FromMVT, ToMVT); 599 case TargetOpcode::G_FPTRUNC: 600 return RTLIB::getFPROUND(FromMVT, ToMVT); 601 case TargetOpcode::G_FPTOSI: 602 return RTLIB::getFPTOSINT(FromMVT, ToMVT); 603 case TargetOpcode::G_FPTOUI: 604 return RTLIB::getFPTOUINT(FromMVT, ToMVT); 605 case TargetOpcode::G_SITOFP: 606 return RTLIB::getSINTTOFP(FromMVT, ToMVT); 607 case TargetOpcode::G_UITOFP: 608 return RTLIB::getUINTTOFP(FromMVT, ToMVT); 609 } 610 llvm_unreachable("Unsupported libcall function"); 611 } 612 613 static LegalizerHelper::LegalizeResult 614 conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, 615 Type *FromType) { 616 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType); 617 return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType}, 618 {{MI.getOperand(1).getReg(), FromType}}); 619 } 620 621 LegalizerHelper::LegalizeResult 622 LegalizerHelper::libcall(MachineInstr &MI) { 623 LLT LLTy = MRI.getType(MI.getOperand(0).getReg()); 624 unsigned Size = LLTy.getSizeInBits(); 625 auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); 626 627 MIRBuilder.setInstr(MI); 628 629 switch (MI.getOpcode()) { 630 default: 631 return UnableToLegalize; 632 case TargetOpcode::G_SDIV: 633 case TargetOpcode::G_UDIV: 634 case TargetOpcode::G_SREM: 635 case TargetOpcode::G_UREM: 636 case TargetOpcode::G_CTLZ_ZERO_UNDEF: { 637 Type *HLTy = IntegerType::get(Ctx, Size); 638 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); 639 if (Status != Legalized) 640 return Status; 641 break; 642 } 643 case TargetOpcode::G_FADD: 644 case TargetOpcode::G_FSUB: 645 case TargetOpcode::G_FMUL: 646 case TargetOpcode::G_FDIV: 647 case TargetOpcode::G_FMA: 648 case TargetOpcode::G_FPOW: 649 case TargetOpcode::G_FREM: 650 case TargetOpcode::G_FCOS: 651 case TargetOpcode::G_FSIN: 652 case TargetOpcode::G_FLOG10: 653 case TargetOpcode::G_FLOG: 654 case TargetOpcode::G_FLOG2: 655 case TargetOpcode::G_FEXP: 656 case TargetOpcode::G_FEXP2: 657 case TargetOpcode::G_FCEIL: 658 case TargetOpcode::G_FFLOOR: 659 case TargetOpcode::G_FMINNUM: 660 case TargetOpcode::G_FMAXNUM: 661 case TargetOpcode::G_FSQRT: 662 case TargetOpcode::G_FRINT: 663 case TargetOpcode::G_FNEARBYINT: { 664 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy); 665 if (!HLTy || (Size != 32 && Size != 64 && Size != 128)) { 666 LLVM_DEBUG(dbgs() << "No libcall available for size " << Size << ".\n"); 667 return UnableToLegalize; 668 } 669 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); 670 if (Status != Legalized) 671 return Status; 672 break; 673 } 674 case TargetOpcode::G_FPEXT: 675 case TargetOpcode::G_FPTRUNC: { 676 Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg())); 677 Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg())); 678 if (!FromTy || !ToTy) 679 return UnableToLegalize; 680 LegalizeResult Status = conversionLibcall(MI, MIRBuilder, ToTy, FromTy ); 681 if (Status != Legalized) 682 return Status; 683 break; 684 } 685 case TargetOpcode::G_FPTOSI: 686 case TargetOpcode::G_FPTOUI: { 687 // FIXME: Support other types 688 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 689 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 690 if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64)) 691 return UnableToLegalize; 692 LegalizeResult Status = conversionLibcall( 693 MI, MIRBuilder, 694 ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx), 695 FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx)); 696 if (Status != Legalized) 697 return Status; 698 break; 699 } 700 case TargetOpcode::G_SITOFP: 701 case TargetOpcode::G_UITOFP: { 702 // FIXME: Support other types 703 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 704 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 705 if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64)) 706 return UnableToLegalize; 707 LegalizeResult Status = conversionLibcall( 708 MI, MIRBuilder, 709 ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx), 710 FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx)); 711 if (Status != Legalized) 712 return Status; 713 break; 714 } 715 } 716 717 MI.eraseFromParent(); 718 return Legalized; 719 } 720 721 LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, 722 unsigned TypeIdx, 723 LLT NarrowTy) { 724 MIRBuilder.setInstr(MI); 725 726 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 727 uint64_t NarrowSize = NarrowTy.getSizeInBits(); 728 729 switch (MI.getOpcode()) { 730 default: 731 return UnableToLegalize; 732 case TargetOpcode::G_IMPLICIT_DEF: { 733 // FIXME: add support for when SizeOp0 isn't an exact multiple of 734 // NarrowSize. 735 if (SizeOp0 % NarrowSize != 0) 736 return UnableToLegalize; 737 int NumParts = SizeOp0 / NarrowSize; 738 739 SmallVector<Register, 2> DstRegs; 740 for (int i = 0; i < NumParts; ++i) 741 DstRegs.push_back( 742 MIRBuilder.buildUndef(NarrowTy).getReg(0)); 743 744 Register DstReg = MI.getOperand(0).getReg(); 745 if(MRI.getType(DstReg).isVector()) 746 MIRBuilder.buildBuildVector(DstReg, DstRegs); 747 else 748 MIRBuilder.buildMerge(DstReg, DstRegs); 749 MI.eraseFromParent(); 750 return Legalized; 751 } 752 case TargetOpcode::G_CONSTANT: { 753 LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 754 const APInt &Val = MI.getOperand(1).getCImm()->getValue(); 755 unsigned TotalSize = Ty.getSizeInBits(); 756 unsigned NarrowSize = NarrowTy.getSizeInBits(); 757 int NumParts = TotalSize / NarrowSize; 758 759 SmallVector<Register, 4> PartRegs; 760 for (int I = 0; I != NumParts; ++I) { 761 unsigned Offset = I * NarrowSize; 762 auto K = MIRBuilder.buildConstant(NarrowTy, 763 Val.lshr(Offset).trunc(NarrowSize)); 764 PartRegs.push_back(K.getReg(0)); 765 } 766 767 LLT LeftoverTy; 768 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize; 769 SmallVector<Register, 1> LeftoverRegs; 770 if (LeftoverBits != 0) { 771 LeftoverTy = LLT::scalar(LeftoverBits); 772 auto K = MIRBuilder.buildConstant( 773 LeftoverTy, 774 Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits)); 775 LeftoverRegs.push_back(K.getReg(0)); 776 } 777 778 insertParts(MI.getOperand(0).getReg(), 779 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs); 780 781 MI.eraseFromParent(); 782 return Legalized; 783 } 784 case TargetOpcode::G_SEXT: 785 case TargetOpcode::G_ZEXT: 786 case TargetOpcode::G_ANYEXT: 787 return narrowScalarExt(MI, TypeIdx, NarrowTy); 788 case TargetOpcode::G_TRUNC: { 789 if (TypeIdx != 1) 790 return UnableToLegalize; 791 792 uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 793 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) { 794 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n"); 795 return UnableToLegalize; 796 } 797 798 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1)); 799 MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0)); 800 MI.eraseFromParent(); 801 return Legalized; 802 } 803 804 case TargetOpcode::G_ADD: { 805 // FIXME: add support for when SizeOp0 isn't an exact multiple of 806 // NarrowSize. 807 if (SizeOp0 % NarrowSize != 0) 808 return UnableToLegalize; 809 // Expand in terms of carry-setting/consuming G_ADDE instructions. 810 int NumParts = SizeOp0 / NarrowTy.getSizeInBits(); 811 812 SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs; 813 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); 814 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); 815 816 Register CarryIn; 817 for (int i = 0; i < NumParts; ++i) { 818 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy); 819 Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); 820 821 if (i == 0) 822 MIRBuilder.buildUAddo(DstReg, CarryOut, Src1Regs[i], Src2Regs[i]); 823 else { 824 MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i], 825 Src2Regs[i], CarryIn); 826 } 827 828 DstRegs.push_back(DstReg); 829 CarryIn = CarryOut; 830 } 831 Register DstReg = MI.getOperand(0).getReg(); 832 if(MRI.getType(DstReg).isVector()) 833 MIRBuilder.buildBuildVector(DstReg, DstRegs); 834 else 835 MIRBuilder.buildMerge(DstReg, DstRegs); 836 MI.eraseFromParent(); 837 return Legalized; 838 } 839 case TargetOpcode::G_SUB: { 840 // FIXME: add support for when SizeOp0 isn't an exact multiple of 841 // NarrowSize. 842 if (SizeOp0 % NarrowSize != 0) 843 return UnableToLegalize; 844 845 int NumParts = SizeOp0 / NarrowTy.getSizeInBits(); 846 847 SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs; 848 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); 849 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); 850 851 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy); 852 Register BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); 853 MIRBuilder.buildInstr(TargetOpcode::G_USUBO, {DstReg, BorrowOut}, 854 {Src1Regs[0], Src2Regs[0]}); 855 DstRegs.push_back(DstReg); 856 Register BorrowIn = BorrowOut; 857 for (int i = 1; i < NumParts; ++i) { 858 DstReg = MRI.createGenericVirtualRegister(NarrowTy); 859 BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); 860 861 MIRBuilder.buildInstr(TargetOpcode::G_USUBE, {DstReg, BorrowOut}, 862 {Src1Regs[i], Src2Regs[i], BorrowIn}); 863 864 DstRegs.push_back(DstReg); 865 BorrowIn = BorrowOut; 866 } 867 MIRBuilder.buildMerge(MI.getOperand(0), DstRegs); 868 MI.eraseFromParent(); 869 return Legalized; 870 } 871 case TargetOpcode::G_MUL: 872 case TargetOpcode::G_UMULH: 873 return narrowScalarMul(MI, NarrowTy); 874 case TargetOpcode::G_EXTRACT: 875 return narrowScalarExtract(MI, TypeIdx, NarrowTy); 876 case TargetOpcode::G_INSERT: 877 return narrowScalarInsert(MI, TypeIdx, NarrowTy); 878 case TargetOpcode::G_LOAD: { 879 const auto &MMO = **MI.memoperands_begin(); 880 Register DstReg = MI.getOperand(0).getReg(); 881 LLT DstTy = MRI.getType(DstReg); 882 if (DstTy.isVector()) 883 return UnableToLegalize; 884 885 if (8 * MMO.getSize() != DstTy.getSizeInBits()) { 886 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy); 887 auto &MMO = **MI.memoperands_begin(); 888 MIRBuilder.buildLoad(TmpReg, MI.getOperand(1), MMO); 889 MIRBuilder.buildAnyExt(DstReg, TmpReg); 890 MI.eraseFromParent(); 891 return Legalized; 892 } 893 894 return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy); 895 } 896 case TargetOpcode::G_ZEXTLOAD: 897 case TargetOpcode::G_SEXTLOAD: { 898 bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD; 899 Register DstReg = MI.getOperand(0).getReg(); 900 Register PtrReg = MI.getOperand(1).getReg(); 901 902 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy); 903 auto &MMO = **MI.memoperands_begin(); 904 if (MMO.getSizeInBits() == NarrowSize) { 905 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); 906 } else { 907 MIRBuilder.buildLoadInstr(MI.getOpcode(), TmpReg, PtrReg, MMO); 908 } 909 910 if (ZExt) 911 MIRBuilder.buildZExt(DstReg, TmpReg); 912 else 913 MIRBuilder.buildSExt(DstReg, TmpReg); 914 915 MI.eraseFromParent(); 916 return Legalized; 917 } 918 case TargetOpcode::G_STORE: { 919 const auto &MMO = **MI.memoperands_begin(); 920 921 Register SrcReg = MI.getOperand(0).getReg(); 922 LLT SrcTy = MRI.getType(SrcReg); 923 if (SrcTy.isVector()) 924 return UnableToLegalize; 925 926 int NumParts = SizeOp0 / NarrowSize; 927 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits(); 928 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize; 929 if (SrcTy.isVector() && LeftoverBits != 0) 930 return UnableToLegalize; 931 932 if (8 * MMO.getSize() != SrcTy.getSizeInBits()) { 933 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy); 934 auto &MMO = **MI.memoperands_begin(); 935 MIRBuilder.buildTrunc(TmpReg, SrcReg); 936 MIRBuilder.buildStore(TmpReg, MI.getOperand(1), MMO); 937 MI.eraseFromParent(); 938 return Legalized; 939 } 940 941 return reduceLoadStoreWidth(MI, 0, NarrowTy); 942 } 943 case TargetOpcode::G_SELECT: 944 return narrowScalarSelect(MI, TypeIdx, NarrowTy); 945 case TargetOpcode::G_AND: 946 case TargetOpcode::G_OR: 947 case TargetOpcode::G_XOR: { 948 // Legalize bitwise operation: 949 // A = BinOp<Ty> B, C 950 // into: 951 // B1, ..., BN = G_UNMERGE_VALUES B 952 // C1, ..., CN = G_UNMERGE_VALUES C 953 // A1 = BinOp<Ty/N> B1, C2 954 // ... 955 // AN = BinOp<Ty/N> BN, CN 956 // A = G_MERGE_VALUES A1, ..., AN 957 return narrowScalarBasic(MI, TypeIdx, NarrowTy); 958 } 959 case TargetOpcode::G_SHL: 960 case TargetOpcode::G_LSHR: 961 case TargetOpcode::G_ASHR: 962 return narrowScalarShift(MI, TypeIdx, NarrowTy); 963 case TargetOpcode::G_CTLZ: 964 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 965 case TargetOpcode::G_CTTZ: 966 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 967 case TargetOpcode::G_CTPOP: 968 if (TypeIdx == 1) 969 switch (MI.getOpcode()) { 970 case TargetOpcode::G_CTLZ: 971 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 972 return narrowScalarCTLZ(MI, TypeIdx, NarrowTy); 973 case TargetOpcode::G_CTTZ: 974 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 975 return narrowScalarCTTZ(MI, TypeIdx, NarrowTy); 976 case TargetOpcode::G_CTPOP: 977 return narrowScalarCTPOP(MI, TypeIdx, NarrowTy); 978 default: 979 return UnableToLegalize; 980 } 981 982 Observer.changingInstr(MI); 983 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT); 984 Observer.changedInstr(MI); 985 return Legalized; 986 case TargetOpcode::G_INTTOPTR: 987 if (TypeIdx != 1) 988 return UnableToLegalize; 989 990 Observer.changingInstr(MI); 991 narrowScalarSrc(MI, NarrowTy, 1); 992 Observer.changedInstr(MI); 993 return Legalized; 994 case TargetOpcode::G_PTRTOINT: 995 if (TypeIdx != 0) 996 return UnableToLegalize; 997 998 Observer.changingInstr(MI); 999 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT); 1000 Observer.changedInstr(MI); 1001 return Legalized; 1002 case TargetOpcode::G_PHI: { 1003 unsigned NumParts = SizeOp0 / NarrowSize; 1004 SmallVector<Register, 2> DstRegs(NumParts); 1005 SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2); 1006 Observer.changingInstr(MI); 1007 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) { 1008 MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB(); 1009 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); 1010 extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts, 1011 SrcRegs[i / 2]); 1012 } 1013 MachineBasicBlock &MBB = *MI.getParent(); 1014 MIRBuilder.setInsertPt(MBB, MI); 1015 for (unsigned i = 0; i < NumParts; ++i) { 1016 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy); 1017 MachineInstrBuilder MIB = 1018 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]); 1019 for (unsigned j = 1; j < MI.getNumOperands(); j += 2) 1020 MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1)); 1021 } 1022 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI()); 1023 MIRBuilder.buildMerge(MI.getOperand(0), DstRegs); 1024 Observer.changedInstr(MI); 1025 MI.eraseFromParent(); 1026 return Legalized; 1027 } 1028 case TargetOpcode::G_EXTRACT_VECTOR_ELT: 1029 case TargetOpcode::G_INSERT_VECTOR_ELT: { 1030 if (TypeIdx != 2) 1031 return UnableToLegalize; 1032 1033 int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3; 1034 Observer.changingInstr(MI); 1035 narrowScalarSrc(MI, NarrowTy, OpIdx); 1036 Observer.changedInstr(MI); 1037 return Legalized; 1038 } 1039 case TargetOpcode::G_ICMP: { 1040 uint64_t SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); 1041 if (NarrowSize * 2 != SrcSize) 1042 return UnableToLegalize; 1043 1044 Observer.changingInstr(MI); 1045 Register LHSL = MRI.createGenericVirtualRegister(NarrowTy); 1046 Register LHSH = MRI.createGenericVirtualRegister(NarrowTy); 1047 MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2)); 1048 1049 Register RHSL = MRI.createGenericVirtualRegister(NarrowTy); 1050 Register RHSH = MRI.createGenericVirtualRegister(NarrowTy); 1051 MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3)); 1052 1053 CmpInst::Predicate Pred = 1054 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); 1055 LLT ResTy = MRI.getType(MI.getOperand(0).getReg()); 1056 1057 if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) { 1058 MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL); 1059 MachineInstrBuilder XorH = MIRBuilder.buildXor(NarrowTy, LHSH, RHSH); 1060 MachineInstrBuilder Or = MIRBuilder.buildOr(NarrowTy, XorL, XorH); 1061 MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0); 1062 MIRBuilder.buildICmp(Pred, MI.getOperand(0), Or, Zero); 1063 } else { 1064 MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH); 1065 MachineInstrBuilder CmpHEQ = 1066 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH); 1067 MachineInstrBuilder CmpLU = MIRBuilder.buildICmp( 1068 ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL); 1069 MIRBuilder.buildSelect(MI.getOperand(0), CmpHEQ, CmpLU, CmpH); 1070 } 1071 Observer.changedInstr(MI); 1072 MI.eraseFromParent(); 1073 return Legalized; 1074 } 1075 case TargetOpcode::G_SEXT_INREG: { 1076 if (TypeIdx != 0) 1077 return UnableToLegalize; 1078 1079 int64_t SizeInBits = MI.getOperand(2).getImm(); 1080 1081 // So long as the new type has more bits than the bits we're extending we 1082 // don't need to break it apart. 1083 if (NarrowTy.getScalarSizeInBits() >= SizeInBits) { 1084 Observer.changingInstr(MI); 1085 // We don't lose any non-extension bits by truncating the src and 1086 // sign-extending the dst. 1087 MachineOperand &MO1 = MI.getOperand(1); 1088 auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1); 1089 MO1.setReg(TruncMIB.getReg(0)); 1090 1091 MachineOperand &MO2 = MI.getOperand(0); 1092 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy); 1093 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 1094 MIRBuilder.buildSExt(MO2, DstExt); 1095 MO2.setReg(DstExt); 1096 Observer.changedInstr(MI); 1097 return Legalized; 1098 } 1099 1100 // Break it apart. Components below the extension point are unmodified. The 1101 // component containing the extension point becomes a narrower SEXT_INREG. 1102 // Components above it are ashr'd from the component containing the 1103 // extension point. 1104 if (SizeOp0 % NarrowSize != 0) 1105 return UnableToLegalize; 1106 int NumParts = SizeOp0 / NarrowSize; 1107 1108 // List the registers where the destination will be scattered. 1109 SmallVector<Register, 2> DstRegs; 1110 // List the registers where the source will be split. 1111 SmallVector<Register, 2> SrcRegs; 1112 1113 // Create all the temporary registers. 1114 for (int i = 0; i < NumParts; ++i) { 1115 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy); 1116 1117 SrcRegs.push_back(SrcReg); 1118 } 1119 1120 // Explode the big arguments into smaller chunks. 1121 MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1)); 1122 1123 Register AshrCstReg = 1124 MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1) 1125 .getReg(0); 1126 Register FullExtensionReg = 0; 1127 Register PartialExtensionReg = 0; 1128 1129 // Do the operation on each small part. 1130 for (int i = 0; i < NumParts; ++i) { 1131 if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits) 1132 DstRegs.push_back(SrcRegs[i]); 1133 else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) { 1134 assert(PartialExtensionReg && 1135 "Expected to visit partial extension before full"); 1136 if (FullExtensionReg) { 1137 DstRegs.push_back(FullExtensionReg); 1138 continue; 1139 } 1140 DstRegs.push_back( 1141 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg) 1142 .getReg(0)); 1143 FullExtensionReg = DstRegs.back(); 1144 } else { 1145 DstRegs.push_back( 1146 MIRBuilder 1147 .buildInstr( 1148 TargetOpcode::G_SEXT_INREG, {NarrowTy}, 1149 {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()}) 1150 .getReg(0)); 1151 PartialExtensionReg = DstRegs.back(); 1152 } 1153 } 1154 1155 // Gather the destination registers into the final destination. 1156 Register DstReg = MI.getOperand(0).getReg(); 1157 MIRBuilder.buildMerge(DstReg, DstRegs); 1158 MI.eraseFromParent(); 1159 return Legalized; 1160 } 1161 case TargetOpcode::G_BSWAP: 1162 case TargetOpcode::G_BITREVERSE: { 1163 if (SizeOp0 % NarrowSize != 0) 1164 return UnableToLegalize; 1165 1166 Observer.changingInstr(MI); 1167 SmallVector<Register, 2> SrcRegs, DstRegs; 1168 unsigned NumParts = SizeOp0 / NarrowSize; 1169 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); 1170 1171 for (unsigned i = 0; i < NumParts; ++i) { 1172 auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, 1173 {SrcRegs[NumParts - 1 - i]}); 1174 DstRegs.push_back(DstPart.getReg(0)); 1175 } 1176 1177 MIRBuilder.buildMerge(MI.getOperand(0), DstRegs); 1178 1179 Observer.changedInstr(MI); 1180 MI.eraseFromParent(); 1181 return Legalized; 1182 } 1183 } 1184 } 1185 1186 void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy, 1187 unsigned OpIdx, unsigned ExtOpcode) { 1188 MachineOperand &MO = MI.getOperand(OpIdx); 1189 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO}); 1190 MO.setReg(ExtB.getReg(0)); 1191 } 1192 1193 void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, 1194 unsigned OpIdx) { 1195 MachineOperand &MO = MI.getOperand(OpIdx); 1196 auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO); 1197 MO.setReg(ExtB.getReg(0)); 1198 } 1199 1200 void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy, 1201 unsigned OpIdx, unsigned TruncOpcode) { 1202 MachineOperand &MO = MI.getOperand(OpIdx); 1203 Register DstExt = MRI.createGenericVirtualRegister(WideTy); 1204 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 1205 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt}); 1206 MO.setReg(DstExt); 1207 } 1208 1209 void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy, 1210 unsigned OpIdx, unsigned ExtOpcode) { 1211 MachineOperand &MO = MI.getOperand(OpIdx); 1212 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy); 1213 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 1214 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc}); 1215 MO.setReg(DstTrunc); 1216 } 1217 1218 void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy, 1219 unsigned OpIdx) { 1220 MachineOperand &MO = MI.getOperand(OpIdx); 1221 Register DstExt = MRI.createGenericVirtualRegister(WideTy); 1222 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 1223 MIRBuilder.buildExtract(MO, DstExt, 0); 1224 MO.setReg(DstExt); 1225 } 1226 1227 void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, 1228 unsigned OpIdx) { 1229 MachineOperand &MO = MI.getOperand(OpIdx); 1230 1231 LLT OldTy = MRI.getType(MO.getReg()); 1232 unsigned OldElts = OldTy.getNumElements(); 1233 unsigned NewElts = MoreTy.getNumElements(); 1234 1235 unsigned NumParts = NewElts / OldElts; 1236 1237 // Use concat_vectors if the result is a multiple of the number of elements. 1238 if (NumParts * OldElts == NewElts) { 1239 SmallVector<Register, 8> Parts; 1240 Parts.push_back(MO.getReg()); 1241 1242 Register ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0); 1243 for (unsigned I = 1; I != NumParts; ++I) 1244 Parts.push_back(ImpDef); 1245 1246 auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts); 1247 MO.setReg(Concat.getReg(0)); 1248 return; 1249 } 1250 1251 Register MoreReg = MRI.createGenericVirtualRegister(MoreTy); 1252 Register ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0); 1253 MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0); 1254 MO.setReg(MoreReg); 1255 } 1256 1257 void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) { 1258 MachineOperand &Op = MI.getOperand(OpIdx); 1259 Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0)); 1260 } 1261 1262 void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) { 1263 MachineOperand &MO = MI.getOperand(OpIdx); 1264 Register CastDst = MRI.createGenericVirtualRegister(CastTy); 1265 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 1266 MIRBuilder.buildBitcast(MO, CastDst); 1267 MO.setReg(CastDst); 1268 } 1269 1270 LegalizerHelper::LegalizeResult 1271 LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, 1272 LLT WideTy) { 1273 if (TypeIdx != 1) 1274 return UnableToLegalize; 1275 1276 Register DstReg = MI.getOperand(0).getReg(); 1277 LLT DstTy = MRI.getType(DstReg); 1278 if (DstTy.isVector()) 1279 return UnableToLegalize; 1280 1281 Register Src1 = MI.getOperand(1).getReg(); 1282 LLT SrcTy = MRI.getType(Src1); 1283 const int DstSize = DstTy.getSizeInBits(); 1284 const int SrcSize = SrcTy.getSizeInBits(); 1285 const int WideSize = WideTy.getSizeInBits(); 1286 const int NumMerge = (DstSize + WideSize - 1) / WideSize; 1287 1288 unsigned NumOps = MI.getNumOperands(); 1289 unsigned NumSrc = MI.getNumOperands() - 1; 1290 unsigned PartSize = DstTy.getSizeInBits() / NumSrc; 1291 1292 if (WideSize >= DstSize) { 1293 // Directly pack the bits in the target type. 1294 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0); 1295 1296 for (unsigned I = 2; I != NumOps; ++I) { 1297 const unsigned Offset = (I - 1) * PartSize; 1298 1299 Register SrcReg = MI.getOperand(I).getReg(); 1300 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize)); 1301 1302 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg); 1303 1304 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg : 1305 MRI.createGenericVirtualRegister(WideTy); 1306 1307 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset); 1308 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt); 1309 MIRBuilder.buildOr(NextResult, ResultReg, Shl); 1310 ResultReg = NextResult; 1311 } 1312 1313 if (WideSize > DstSize) 1314 MIRBuilder.buildTrunc(DstReg, ResultReg); 1315 else if (DstTy.isPointer()) 1316 MIRBuilder.buildIntToPtr(DstReg, ResultReg); 1317 1318 MI.eraseFromParent(); 1319 return Legalized; 1320 } 1321 1322 // Unmerge the original values to the GCD type, and recombine to the next 1323 // multiple greater than the original type. 1324 // 1325 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6 1326 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0 1327 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1 1328 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2 1329 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6 1330 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9 1331 // %12:_(s12) = G_MERGE_VALUES %10, %11 1332 // 1333 // Padding with undef if necessary: 1334 // 1335 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6 1336 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0 1337 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1 1338 // %7:_(s2) = G_IMPLICIT_DEF 1339 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5 1340 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7 1341 // %10:_(s12) = G_MERGE_VALUES %8, %9 1342 1343 const int GCD = greatestCommonDivisor(SrcSize, WideSize); 1344 LLT GCDTy = LLT::scalar(GCD); 1345 1346 SmallVector<Register, 8> Parts; 1347 SmallVector<Register, 8> NewMergeRegs; 1348 SmallVector<Register, 8> Unmerges; 1349 LLT WideDstTy = LLT::scalar(NumMerge * WideSize); 1350 1351 // Decompose the original operands if they don't evenly divide. 1352 for (int I = 1, E = MI.getNumOperands(); I != E; ++I) { 1353 Register SrcReg = MI.getOperand(I).getReg(); 1354 if (GCD == SrcSize) { 1355 Unmerges.push_back(SrcReg); 1356 } else { 1357 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg); 1358 for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J) 1359 Unmerges.push_back(Unmerge.getReg(J)); 1360 } 1361 } 1362 1363 // Pad with undef to the next size that is a multiple of the requested size. 1364 if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) { 1365 Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0); 1366 for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I) 1367 Unmerges.push_back(UndefReg); 1368 } 1369 1370 const int PartsPerGCD = WideSize / GCD; 1371 1372 // Build merges of each piece. 1373 ArrayRef<Register> Slicer(Unmerges); 1374 for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) { 1375 auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD)); 1376 NewMergeRegs.push_back(Merge.getReg(0)); 1377 } 1378 1379 // A truncate may be necessary if the requested type doesn't evenly divide the 1380 // original result type. 1381 if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) { 1382 MIRBuilder.buildMerge(DstReg, NewMergeRegs); 1383 } else { 1384 auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs); 1385 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0)); 1386 } 1387 1388 MI.eraseFromParent(); 1389 return Legalized; 1390 } 1391 1392 LegalizerHelper::LegalizeResult 1393 LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx, 1394 LLT WideTy) { 1395 if (TypeIdx != 0) 1396 return UnableToLegalize; 1397 1398 int NumDst = MI.getNumOperands() - 1; 1399 Register SrcReg = MI.getOperand(NumDst).getReg(); 1400 LLT SrcTy = MRI.getType(SrcReg); 1401 if (SrcTy.isVector()) 1402 return UnableToLegalize; 1403 1404 Register Dst0Reg = MI.getOperand(0).getReg(); 1405 LLT DstTy = MRI.getType(Dst0Reg); 1406 if (!DstTy.isScalar()) 1407 return UnableToLegalize; 1408 1409 if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) { 1410 if (SrcTy.isPointer()) { 1411 const DataLayout &DL = MIRBuilder.getDataLayout(); 1412 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) { 1413 LLVM_DEBUG( 1414 dbgs() << "Not casting non-integral address space integer\n"); 1415 return UnableToLegalize; 1416 } 1417 1418 SrcTy = LLT::scalar(SrcTy.getSizeInBits()); 1419 SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0); 1420 } 1421 1422 // Widen SrcTy to WideTy. This does not affect the result, but since the 1423 // user requested this size, it is probably better handled than SrcTy and 1424 // should reduce the total number of legalization artifacts 1425 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) { 1426 SrcTy = WideTy; 1427 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0); 1428 } 1429 1430 // Theres no unmerge type to target. Directly extract the bits from the 1431 // source type 1432 unsigned DstSize = DstTy.getSizeInBits(); 1433 1434 MIRBuilder.buildTrunc(Dst0Reg, SrcReg); 1435 for (int I = 1; I != NumDst; ++I) { 1436 auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I); 1437 auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt); 1438 MIRBuilder.buildTrunc(MI.getOperand(I), Shr); 1439 } 1440 1441 MI.eraseFromParent(); 1442 return Legalized; 1443 } 1444 1445 // Extend the source to a wider type. 1446 LLT LCMTy = getLCMType(SrcTy, WideTy); 1447 1448 Register WideSrc = SrcReg; 1449 if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) { 1450 // TODO: If this is an integral address space, cast to integer and anyext. 1451 if (SrcTy.isPointer()) { 1452 LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n"); 1453 return UnableToLegalize; 1454 } 1455 1456 WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0); 1457 } 1458 1459 auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc); 1460 1461 // Create a sequence of unmerges to the original results. since we may have 1462 // widened the source, we will need to pad the results with dead defs to cover 1463 // the source register. 1464 // e.g. widen s16 to s32: 1465 // %1:_(s16), %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0:_(s48) 1466 // 1467 // => 1468 // %4:_(s64) = G_ANYEXT %0:_(s48) 1469 // %5:_(s32), %6:_(s32) = G_UNMERGE_VALUES %4 ; Requested unmerge 1470 // %1:_(s16), %2:_(s16) = G_UNMERGE_VALUES %5 ; unpack to original regs 1471 // %3:_(s16), dead %7 = G_UNMERGE_VALUES %6 ; original reg + extra dead def 1472 1473 const int NumUnmerge = Unmerge->getNumOperands() - 1; 1474 const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits(); 1475 1476 for (int I = 0; I != NumUnmerge; ++I) { 1477 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES); 1478 1479 for (int J = 0; J != PartsPerUnmerge; ++J) { 1480 int Idx = I * PartsPerUnmerge + J; 1481 if (Idx < NumDst) 1482 MIB.addDef(MI.getOperand(Idx).getReg()); 1483 else { 1484 // Create dead def for excess components. 1485 MIB.addDef(MRI.createGenericVirtualRegister(DstTy)); 1486 } 1487 } 1488 1489 MIB.addUse(Unmerge.getReg(I)); 1490 } 1491 1492 MI.eraseFromParent(); 1493 return Legalized; 1494 } 1495 1496 LegalizerHelper::LegalizeResult 1497 LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx, 1498 LLT WideTy) { 1499 Register DstReg = MI.getOperand(0).getReg(); 1500 Register SrcReg = MI.getOperand(1).getReg(); 1501 LLT SrcTy = MRI.getType(SrcReg); 1502 1503 LLT DstTy = MRI.getType(DstReg); 1504 unsigned Offset = MI.getOperand(2).getImm(); 1505 1506 if (TypeIdx == 0) { 1507 if (SrcTy.isVector() || DstTy.isVector()) 1508 return UnableToLegalize; 1509 1510 SrcOp Src(SrcReg); 1511 if (SrcTy.isPointer()) { 1512 // Extracts from pointers can be handled only if they are really just 1513 // simple integers. 1514 const DataLayout &DL = MIRBuilder.getDataLayout(); 1515 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) 1516 return UnableToLegalize; 1517 1518 LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits()); 1519 Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src); 1520 SrcTy = SrcAsIntTy; 1521 } 1522 1523 if (DstTy.isPointer()) 1524 return UnableToLegalize; 1525 1526 if (Offset == 0) { 1527 // Avoid a shift in the degenerate case. 1528 MIRBuilder.buildTrunc(DstReg, 1529 MIRBuilder.buildAnyExtOrTrunc(WideTy, Src)); 1530 MI.eraseFromParent(); 1531 return Legalized; 1532 } 1533 1534 // Do a shift in the source type. 1535 LLT ShiftTy = SrcTy; 1536 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) { 1537 Src = MIRBuilder.buildAnyExt(WideTy, Src); 1538 ShiftTy = WideTy; 1539 } else if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) 1540 return UnableToLegalize; 1541 1542 auto LShr = MIRBuilder.buildLShr( 1543 ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset)); 1544 MIRBuilder.buildTrunc(DstReg, LShr); 1545 MI.eraseFromParent(); 1546 return Legalized; 1547 } 1548 1549 if (SrcTy.isScalar()) { 1550 Observer.changingInstr(MI); 1551 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1552 Observer.changedInstr(MI); 1553 return Legalized; 1554 } 1555 1556 if (!SrcTy.isVector()) 1557 return UnableToLegalize; 1558 1559 if (DstTy != SrcTy.getElementType()) 1560 return UnableToLegalize; 1561 1562 if (Offset % SrcTy.getScalarSizeInBits() != 0) 1563 return UnableToLegalize; 1564 1565 Observer.changingInstr(MI); 1566 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1567 1568 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) * 1569 Offset); 1570 widenScalarDst(MI, WideTy.getScalarType(), 0); 1571 Observer.changedInstr(MI); 1572 return Legalized; 1573 } 1574 1575 LegalizerHelper::LegalizeResult 1576 LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx, 1577 LLT WideTy) { 1578 if (TypeIdx != 0) 1579 return UnableToLegalize; 1580 Observer.changingInstr(MI); 1581 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1582 widenScalarDst(MI, WideTy); 1583 Observer.changedInstr(MI); 1584 return Legalized; 1585 } 1586 1587 LegalizerHelper::LegalizeResult 1588 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { 1589 MIRBuilder.setInstr(MI); 1590 1591 switch (MI.getOpcode()) { 1592 default: 1593 return UnableToLegalize; 1594 case TargetOpcode::G_EXTRACT: 1595 return widenScalarExtract(MI, TypeIdx, WideTy); 1596 case TargetOpcode::G_INSERT: 1597 return widenScalarInsert(MI, TypeIdx, WideTy); 1598 case TargetOpcode::G_MERGE_VALUES: 1599 return widenScalarMergeValues(MI, TypeIdx, WideTy); 1600 case TargetOpcode::G_UNMERGE_VALUES: 1601 return widenScalarUnmergeValues(MI, TypeIdx, WideTy); 1602 case TargetOpcode::G_UADDO: 1603 case TargetOpcode::G_USUBO: { 1604 if (TypeIdx == 1) 1605 return UnableToLegalize; // TODO 1606 auto LHSZext = MIRBuilder.buildZExt(WideTy, MI.getOperand(2)); 1607 auto RHSZext = MIRBuilder.buildZExt(WideTy, MI.getOperand(3)); 1608 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO 1609 ? TargetOpcode::G_ADD 1610 : TargetOpcode::G_SUB; 1611 // Do the arithmetic in the larger type. 1612 auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext}); 1613 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg()); 1614 APInt Mask = 1615 APInt::getLowBitsSet(WideTy.getSizeInBits(), OrigTy.getSizeInBits()); 1616 auto AndOp = MIRBuilder.buildAnd( 1617 WideTy, NewOp, MIRBuilder.buildConstant(WideTy, Mask)); 1618 // There is no overflow if the AndOp is the same as NewOp. 1619 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, AndOp); 1620 // Now trunc the NewOp to the original result. 1621 MIRBuilder.buildTrunc(MI.getOperand(0), NewOp); 1622 MI.eraseFromParent(); 1623 return Legalized; 1624 } 1625 case TargetOpcode::G_CTTZ: 1626 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 1627 case TargetOpcode::G_CTLZ: 1628 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 1629 case TargetOpcode::G_CTPOP: { 1630 if (TypeIdx == 0) { 1631 Observer.changingInstr(MI); 1632 widenScalarDst(MI, WideTy, 0); 1633 Observer.changedInstr(MI); 1634 return Legalized; 1635 } 1636 1637 Register SrcReg = MI.getOperand(1).getReg(); 1638 1639 // First ZEXT the input. 1640 auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg); 1641 LLT CurTy = MRI.getType(SrcReg); 1642 if (MI.getOpcode() == TargetOpcode::G_CTTZ) { 1643 // The count is the same in the larger type except if the original 1644 // value was zero. This can be handled by setting the bit just off 1645 // the top of the original type. 1646 auto TopBit = 1647 APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits()); 1648 MIBSrc = MIRBuilder.buildOr( 1649 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit)); 1650 } 1651 1652 // Perform the operation at the larger size. 1653 auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc}); 1654 // This is already the correct result for CTPOP and CTTZs 1655 if (MI.getOpcode() == TargetOpcode::G_CTLZ || 1656 MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) { 1657 // The correct result is NewOp - (Difference in widety and current ty). 1658 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits(); 1659 MIBNewOp = MIRBuilder.buildSub( 1660 WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)); 1661 } 1662 1663 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp); 1664 MI.eraseFromParent(); 1665 return Legalized; 1666 } 1667 case TargetOpcode::G_BSWAP: { 1668 Observer.changingInstr(MI); 1669 Register DstReg = MI.getOperand(0).getReg(); 1670 1671 Register ShrReg = MRI.createGenericVirtualRegister(WideTy); 1672 Register DstExt = MRI.createGenericVirtualRegister(WideTy); 1673 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy); 1674 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1675 1676 MI.getOperand(0).setReg(DstExt); 1677 1678 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 1679 1680 LLT Ty = MRI.getType(DstReg); 1681 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits(); 1682 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits); 1683 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg); 1684 1685 MIRBuilder.buildTrunc(DstReg, ShrReg); 1686 Observer.changedInstr(MI); 1687 return Legalized; 1688 } 1689 case TargetOpcode::G_BITREVERSE: { 1690 Observer.changingInstr(MI); 1691 1692 Register DstReg = MI.getOperand(0).getReg(); 1693 LLT Ty = MRI.getType(DstReg); 1694 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits(); 1695 1696 Register DstExt = MRI.createGenericVirtualRegister(WideTy); 1697 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1698 MI.getOperand(0).setReg(DstExt); 1699 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 1700 1701 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits); 1702 auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt); 1703 MIRBuilder.buildTrunc(DstReg, Shift); 1704 Observer.changedInstr(MI); 1705 return Legalized; 1706 } 1707 case TargetOpcode::G_ADD: 1708 case TargetOpcode::G_AND: 1709 case TargetOpcode::G_MUL: 1710 case TargetOpcode::G_OR: 1711 case TargetOpcode::G_XOR: 1712 case TargetOpcode::G_SUB: 1713 // Perform operation at larger width (any extension is fines here, high bits 1714 // don't affect the result) and then truncate the result back to the 1715 // original type. 1716 Observer.changingInstr(MI); 1717 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1718 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); 1719 widenScalarDst(MI, WideTy); 1720 Observer.changedInstr(MI); 1721 return Legalized; 1722 1723 case TargetOpcode::G_SHL: 1724 Observer.changingInstr(MI); 1725 1726 if (TypeIdx == 0) { 1727 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 1728 widenScalarDst(MI, WideTy); 1729 } else { 1730 assert(TypeIdx == 1); 1731 // The "number of bits to shift" operand must preserve its value as an 1732 // unsigned integer: 1733 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 1734 } 1735 1736 Observer.changedInstr(MI); 1737 return Legalized; 1738 1739 case TargetOpcode::G_SDIV: 1740 case TargetOpcode::G_SREM: 1741 case TargetOpcode::G_SMIN: 1742 case TargetOpcode::G_SMAX: 1743 Observer.changingInstr(MI); 1744 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); 1745 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 1746 widenScalarDst(MI, WideTy); 1747 Observer.changedInstr(MI); 1748 return Legalized; 1749 1750 case TargetOpcode::G_ASHR: 1751 case TargetOpcode::G_LSHR: 1752 Observer.changingInstr(MI); 1753 1754 if (TypeIdx == 0) { 1755 unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ? 1756 TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT; 1757 1758 widenScalarSrc(MI, WideTy, 1, CvtOp); 1759 widenScalarDst(MI, WideTy); 1760 } else { 1761 assert(TypeIdx == 1); 1762 // The "number of bits to shift" operand must preserve its value as an 1763 // unsigned integer: 1764 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 1765 } 1766 1767 Observer.changedInstr(MI); 1768 return Legalized; 1769 case TargetOpcode::G_UDIV: 1770 case TargetOpcode::G_UREM: 1771 case TargetOpcode::G_UMIN: 1772 case TargetOpcode::G_UMAX: 1773 Observer.changingInstr(MI); 1774 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 1775 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); 1776 widenScalarDst(MI, WideTy); 1777 Observer.changedInstr(MI); 1778 return Legalized; 1779 1780 case TargetOpcode::G_SELECT: 1781 Observer.changingInstr(MI); 1782 if (TypeIdx == 0) { 1783 // Perform operation at larger width (any extension is fine here, high 1784 // bits don't affect the result) and then truncate the result back to the 1785 // original type. 1786 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); 1787 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT); 1788 widenScalarDst(MI, WideTy); 1789 } else { 1790 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector(); 1791 // Explicit extension is required here since high bits affect the result. 1792 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false)); 1793 } 1794 Observer.changedInstr(MI); 1795 return Legalized; 1796 1797 case TargetOpcode::G_FPTOSI: 1798 case TargetOpcode::G_FPTOUI: 1799 Observer.changingInstr(MI); 1800 1801 if (TypeIdx == 0) 1802 widenScalarDst(MI, WideTy); 1803 else 1804 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT); 1805 1806 Observer.changedInstr(MI); 1807 return Legalized; 1808 case TargetOpcode::G_SITOFP: 1809 if (TypeIdx != 1) 1810 return UnableToLegalize; 1811 Observer.changingInstr(MI); 1812 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); 1813 Observer.changedInstr(MI); 1814 return Legalized; 1815 1816 case TargetOpcode::G_UITOFP: 1817 if (TypeIdx != 1) 1818 return UnableToLegalize; 1819 Observer.changingInstr(MI); 1820 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 1821 Observer.changedInstr(MI); 1822 return Legalized; 1823 1824 case TargetOpcode::G_LOAD: 1825 case TargetOpcode::G_SEXTLOAD: 1826 case TargetOpcode::G_ZEXTLOAD: 1827 Observer.changingInstr(MI); 1828 widenScalarDst(MI, WideTy); 1829 Observer.changedInstr(MI); 1830 return Legalized; 1831 1832 case TargetOpcode::G_STORE: { 1833 if (TypeIdx != 0) 1834 return UnableToLegalize; 1835 1836 LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 1837 if (!isPowerOf2_32(Ty.getSizeInBits())) 1838 return UnableToLegalize; 1839 1840 Observer.changingInstr(MI); 1841 1842 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ? 1843 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT; 1844 widenScalarSrc(MI, WideTy, 0, ExtType); 1845 1846 Observer.changedInstr(MI); 1847 return Legalized; 1848 } 1849 case TargetOpcode::G_CONSTANT: { 1850 MachineOperand &SrcMO = MI.getOperand(1); 1851 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 1852 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant( 1853 MRI.getType(MI.getOperand(0).getReg())); 1854 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT || 1855 ExtOpc == TargetOpcode::G_ANYEXT) && 1856 "Illegal Extend"); 1857 const APInt &SrcVal = SrcMO.getCImm()->getValue(); 1858 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT) 1859 ? SrcVal.sext(WideTy.getSizeInBits()) 1860 : SrcVal.zext(WideTy.getSizeInBits()); 1861 Observer.changingInstr(MI); 1862 SrcMO.setCImm(ConstantInt::get(Ctx, Val)); 1863 1864 widenScalarDst(MI, WideTy); 1865 Observer.changedInstr(MI); 1866 return Legalized; 1867 } 1868 case TargetOpcode::G_FCONSTANT: { 1869 MachineOperand &SrcMO = MI.getOperand(1); 1870 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 1871 APFloat Val = SrcMO.getFPImm()->getValueAPF(); 1872 bool LosesInfo; 1873 switch (WideTy.getSizeInBits()) { 1874 case 32: 1875 Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, 1876 &LosesInfo); 1877 break; 1878 case 64: 1879 Val.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, 1880 &LosesInfo); 1881 break; 1882 default: 1883 return UnableToLegalize; 1884 } 1885 1886 assert(!LosesInfo && "extend should always be lossless"); 1887 1888 Observer.changingInstr(MI); 1889 SrcMO.setFPImm(ConstantFP::get(Ctx, Val)); 1890 1891 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); 1892 Observer.changedInstr(MI); 1893 return Legalized; 1894 } 1895 case TargetOpcode::G_IMPLICIT_DEF: { 1896 Observer.changingInstr(MI); 1897 widenScalarDst(MI, WideTy); 1898 Observer.changedInstr(MI); 1899 return Legalized; 1900 } 1901 case TargetOpcode::G_BRCOND: 1902 Observer.changingInstr(MI); 1903 widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false)); 1904 Observer.changedInstr(MI); 1905 return Legalized; 1906 1907 case TargetOpcode::G_FCMP: 1908 Observer.changingInstr(MI); 1909 if (TypeIdx == 0) 1910 widenScalarDst(MI, WideTy); 1911 else { 1912 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT); 1913 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT); 1914 } 1915 Observer.changedInstr(MI); 1916 return Legalized; 1917 1918 case TargetOpcode::G_ICMP: 1919 Observer.changingInstr(MI); 1920 if (TypeIdx == 0) 1921 widenScalarDst(MI, WideTy); 1922 else { 1923 unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>( 1924 MI.getOperand(1).getPredicate())) 1925 ? TargetOpcode::G_SEXT 1926 : TargetOpcode::G_ZEXT; 1927 widenScalarSrc(MI, WideTy, 2, ExtOpcode); 1928 widenScalarSrc(MI, WideTy, 3, ExtOpcode); 1929 } 1930 Observer.changedInstr(MI); 1931 return Legalized; 1932 1933 case TargetOpcode::G_PTR_ADD: 1934 assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD"); 1935 Observer.changingInstr(MI); 1936 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 1937 Observer.changedInstr(MI); 1938 return Legalized; 1939 1940 case TargetOpcode::G_PHI: { 1941 assert(TypeIdx == 0 && "Expecting only Idx 0"); 1942 1943 Observer.changingInstr(MI); 1944 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) { 1945 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB(); 1946 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); 1947 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT); 1948 } 1949 1950 MachineBasicBlock &MBB = *MI.getParent(); 1951 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI()); 1952 widenScalarDst(MI, WideTy); 1953 Observer.changedInstr(MI); 1954 return Legalized; 1955 } 1956 case TargetOpcode::G_EXTRACT_VECTOR_ELT: { 1957 if (TypeIdx == 0) { 1958 Register VecReg = MI.getOperand(1).getReg(); 1959 LLT VecTy = MRI.getType(VecReg); 1960 Observer.changingInstr(MI); 1961 1962 widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(), 1963 WideTy.getSizeInBits()), 1964 1, TargetOpcode::G_SEXT); 1965 1966 widenScalarDst(MI, WideTy, 0); 1967 Observer.changedInstr(MI); 1968 return Legalized; 1969 } 1970 1971 if (TypeIdx != 2) 1972 return UnableToLegalize; 1973 Observer.changingInstr(MI); 1974 // TODO: Probably should be zext 1975 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); 1976 Observer.changedInstr(MI); 1977 return Legalized; 1978 } 1979 case TargetOpcode::G_INSERT_VECTOR_ELT: { 1980 if (TypeIdx == 1) { 1981 Observer.changingInstr(MI); 1982 1983 Register VecReg = MI.getOperand(1).getReg(); 1984 LLT VecTy = MRI.getType(VecReg); 1985 LLT WideVecTy = LLT::vector(VecTy.getNumElements(), WideTy); 1986 1987 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT); 1988 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); 1989 widenScalarDst(MI, WideVecTy, 0); 1990 Observer.changedInstr(MI); 1991 return Legalized; 1992 } 1993 1994 if (TypeIdx == 2) { 1995 Observer.changingInstr(MI); 1996 // TODO: Probably should be zext 1997 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT); 1998 Observer.changedInstr(MI); 1999 } 2000 2001 return Legalized; 2002 } 2003 case TargetOpcode::G_FADD: 2004 case TargetOpcode::G_FMUL: 2005 case TargetOpcode::G_FSUB: 2006 case TargetOpcode::G_FMA: 2007 case TargetOpcode::G_FMAD: 2008 case TargetOpcode::G_FNEG: 2009 case TargetOpcode::G_FABS: 2010 case TargetOpcode::G_FCANONICALIZE: 2011 case TargetOpcode::G_FMINNUM: 2012 case TargetOpcode::G_FMAXNUM: 2013 case TargetOpcode::G_FMINNUM_IEEE: 2014 case TargetOpcode::G_FMAXNUM_IEEE: 2015 case TargetOpcode::G_FMINIMUM: 2016 case TargetOpcode::G_FMAXIMUM: 2017 case TargetOpcode::G_FDIV: 2018 case TargetOpcode::G_FREM: 2019 case TargetOpcode::G_FCEIL: 2020 case TargetOpcode::G_FFLOOR: 2021 case TargetOpcode::G_FCOS: 2022 case TargetOpcode::G_FSIN: 2023 case TargetOpcode::G_FLOG10: 2024 case TargetOpcode::G_FLOG: 2025 case TargetOpcode::G_FLOG2: 2026 case TargetOpcode::G_FRINT: 2027 case TargetOpcode::G_FNEARBYINT: 2028 case TargetOpcode::G_FSQRT: 2029 case TargetOpcode::G_FEXP: 2030 case TargetOpcode::G_FEXP2: 2031 case TargetOpcode::G_FPOW: 2032 case TargetOpcode::G_INTRINSIC_TRUNC: 2033 case TargetOpcode::G_INTRINSIC_ROUND: 2034 assert(TypeIdx == 0); 2035 Observer.changingInstr(MI); 2036 2037 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) 2038 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT); 2039 2040 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); 2041 Observer.changedInstr(MI); 2042 return Legalized; 2043 case TargetOpcode::G_INTTOPTR: 2044 if (TypeIdx != 1) 2045 return UnableToLegalize; 2046 2047 Observer.changingInstr(MI); 2048 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); 2049 Observer.changedInstr(MI); 2050 return Legalized; 2051 case TargetOpcode::G_PTRTOINT: 2052 if (TypeIdx != 0) 2053 return UnableToLegalize; 2054 2055 Observer.changingInstr(MI); 2056 widenScalarDst(MI, WideTy, 0); 2057 Observer.changedInstr(MI); 2058 return Legalized; 2059 case TargetOpcode::G_BUILD_VECTOR: { 2060 Observer.changingInstr(MI); 2061 2062 const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType(); 2063 for (int I = 1, E = MI.getNumOperands(); I != E; ++I) 2064 widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT); 2065 2066 // Avoid changing the result vector type if the source element type was 2067 // requested. 2068 if (TypeIdx == 1) { 2069 auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); 2070 MI.setDesc(TII.get(TargetOpcode::G_BUILD_VECTOR_TRUNC)); 2071 } else { 2072 widenScalarDst(MI, WideTy, 0); 2073 } 2074 2075 Observer.changedInstr(MI); 2076 return Legalized; 2077 } 2078 case TargetOpcode::G_SEXT_INREG: 2079 if (TypeIdx != 0) 2080 return UnableToLegalize; 2081 2082 Observer.changingInstr(MI); 2083 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); 2084 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC); 2085 Observer.changedInstr(MI); 2086 return Legalized; 2087 } 2088 } 2089 2090 static void getUnmergePieces(SmallVectorImpl<Register> &Pieces, 2091 MachineIRBuilder &B, Register Src, LLT Ty) { 2092 auto Unmerge = B.buildUnmerge(Ty, Src); 2093 for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I) 2094 Pieces.push_back(Unmerge.getReg(I)); 2095 } 2096 2097 LegalizerHelper::LegalizeResult 2098 LegalizerHelper::lowerBitcast(MachineInstr &MI) { 2099 Register Dst = MI.getOperand(0).getReg(); 2100 Register Src = MI.getOperand(1).getReg(); 2101 LLT DstTy = MRI.getType(Dst); 2102 LLT SrcTy = MRI.getType(Src); 2103 2104 if (SrcTy.isVector() && !DstTy.isVector()) { 2105 SmallVector<Register, 8> SrcRegs; 2106 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcTy.getElementType()); 2107 MIRBuilder.buildMerge(Dst, SrcRegs); 2108 MI.eraseFromParent(); 2109 return Legalized; 2110 } 2111 2112 if (DstTy.isVector() && !SrcTy.isVector()) { 2113 SmallVector<Register, 8> SrcRegs; 2114 getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType()); 2115 MIRBuilder.buildMerge(Dst, SrcRegs); 2116 MI.eraseFromParent(); 2117 return Legalized; 2118 } 2119 2120 return UnableToLegalize; 2121 } 2122 2123 LegalizerHelper::LegalizeResult 2124 LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) { 2125 MIRBuilder.setInstr(MI); 2126 2127 switch (MI.getOpcode()) { 2128 case TargetOpcode::G_LOAD: { 2129 if (TypeIdx != 0) 2130 return UnableToLegalize; 2131 2132 Observer.changingInstr(MI); 2133 bitcastDst(MI, CastTy, 0); 2134 Observer.changedInstr(MI); 2135 return Legalized; 2136 } 2137 case TargetOpcode::G_STORE: { 2138 if (TypeIdx != 0) 2139 return UnableToLegalize; 2140 2141 Observer.changingInstr(MI); 2142 bitcastSrc(MI, CastTy, 0); 2143 Observer.changedInstr(MI); 2144 return Legalized; 2145 } 2146 case TargetOpcode::G_SELECT: { 2147 if (TypeIdx != 0) 2148 return UnableToLegalize; 2149 2150 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) { 2151 LLVM_DEBUG( 2152 dbgs() << "bitcast action not implemented for vector select\n"); 2153 return UnableToLegalize; 2154 } 2155 2156 Observer.changingInstr(MI); 2157 bitcastSrc(MI, CastTy, 2); 2158 bitcastSrc(MI, CastTy, 3); 2159 bitcastDst(MI, CastTy, 0); 2160 Observer.changedInstr(MI); 2161 return Legalized; 2162 } 2163 case TargetOpcode::G_AND: 2164 case TargetOpcode::G_OR: 2165 case TargetOpcode::G_XOR: { 2166 Observer.changingInstr(MI); 2167 bitcastSrc(MI, CastTy, 1); 2168 bitcastSrc(MI, CastTy, 2); 2169 bitcastDst(MI, CastTy, 0); 2170 Observer.changedInstr(MI); 2171 return Legalized; 2172 } 2173 default: 2174 return UnableToLegalize; 2175 } 2176 } 2177 2178 LegalizerHelper::LegalizeResult 2179 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { 2180 using namespace TargetOpcode; 2181 MIRBuilder.setInstr(MI); 2182 2183 switch(MI.getOpcode()) { 2184 default: 2185 return UnableToLegalize; 2186 case TargetOpcode::G_BITCAST: 2187 return lowerBitcast(MI); 2188 case TargetOpcode::G_SREM: 2189 case TargetOpcode::G_UREM: { 2190 auto Quot = 2191 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty}, 2192 {MI.getOperand(1), MI.getOperand(2)}); 2193 2194 auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2)); 2195 MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod); 2196 MI.eraseFromParent(); 2197 return Legalized; 2198 } 2199 case TargetOpcode::G_SADDO: 2200 case TargetOpcode::G_SSUBO: 2201 return lowerSADDO_SSUBO(MI); 2202 case TargetOpcode::G_SMULO: 2203 case TargetOpcode::G_UMULO: { 2204 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the 2205 // result. 2206 Register Res = MI.getOperand(0).getReg(); 2207 Register Overflow = MI.getOperand(1).getReg(); 2208 Register LHS = MI.getOperand(2).getReg(); 2209 Register RHS = MI.getOperand(3).getReg(); 2210 2211 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO 2212 ? TargetOpcode::G_SMULH 2213 : TargetOpcode::G_UMULH; 2214 2215 Observer.changingInstr(MI); 2216 const auto &TII = MIRBuilder.getTII(); 2217 MI.setDesc(TII.get(TargetOpcode::G_MUL)); 2218 MI.RemoveOperand(1); 2219 Observer.changedInstr(MI); 2220 2221 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 2222 2223 auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS}); 2224 auto Zero = MIRBuilder.buildConstant(Ty, 0); 2225 2226 // For *signed* multiply, overflow is detected by checking: 2227 // (hi != (lo >> bitwidth-1)) 2228 if (Opcode == TargetOpcode::G_SMULH) { 2229 auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1); 2230 auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt); 2231 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted); 2232 } else { 2233 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero); 2234 } 2235 return Legalized; 2236 } 2237 case TargetOpcode::G_FNEG: { 2238 // TODO: Handle vector types once we are able to 2239 // represent them. 2240 if (Ty.isVector()) 2241 return UnableToLegalize; 2242 Register Res = MI.getOperand(0).getReg(); 2243 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); 2244 Type *ZeroTy = getFloatTypeForLLT(Ctx, Ty); 2245 if (!ZeroTy) 2246 return UnableToLegalize; 2247 ConstantFP &ZeroForNegation = 2248 *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy)); 2249 auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation); 2250 Register SubByReg = MI.getOperand(1).getReg(); 2251 Register ZeroReg = Zero.getReg(0); 2252 MIRBuilder.buildFSub(Res, ZeroReg, SubByReg, MI.getFlags()); 2253 MI.eraseFromParent(); 2254 return Legalized; 2255 } 2256 case TargetOpcode::G_FSUB: { 2257 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)). 2258 // First, check if G_FNEG is marked as Lower. If so, we may 2259 // end up with an infinite loop as G_FSUB is used to legalize G_FNEG. 2260 if (LI.getAction({G_FNEG, {Ty}}).Action == Lower) 2261 return UnableToLegalize; 2262 Register Res = MI.getOperand(0).getReg(); 2263 Register LHS = MI.getOperand(1).getReg(); 2264 Register RHS = MI.getOperand(2).getReg(); 2265 Register Neg = MRI.createGenericVirtualRegister(Ty); 2266 MIRBuilder.buildFNeg(Neg, RHS); 2267 MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags()); 2268 MI.eraseFromParent(); 2269 return Legalized; 2270 } 2271 case TargetOpcode::G_FMAD: 2272 return lowerFMad(MI); 2273 case TargetOpcode::G_FFLOOR: 2274 return lowerFFloor(MI); 2275 case TargetOpcode::G_INTRINSIC_ROUND: 2276 return lowerIntrinsicRound(MI); 2277 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: { 2278 Register OldValRes = MI.getOperand(0).getReg(); 2279 Register SuccessRes = MI.getOperand(1).getReg(); 2280 Register Addr = MI.getOperand(2).getReg(); 2281 Register CmpVal = MI.getOperand(3).getReg(); 2282 Register NewVal = MI.getOperand(4).getReg(); 2283 MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal, 2284 **MI.memoperands_begin()); 2285 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal); 2286 MI.eraseFromParent(); 2287 return Legalized; 2288 } 2289 case TargetOpcode::G_LOAD: 2290 case TargetOpcode::G_SEXTLOAD: 2291 case TargetOpcode::G_ZEXTLOAD: { 2292 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT 2293 Register DstReg = MI.getOperand(0).getReg(); 2294 Register PtrReg = MI.getOperand(1).getReg(); 2295 LLT DstTy = MRI.getType(DstReg); 2296 auto &MMO = **MI.memoperands_begin(); 2297 2298 if (DstTy.getSizeInBits() == MMO.getSizeInBits()) { 2299 if (MI.getOpcode() == TargetOpcode::G_LOAD) { 2300 // This load needs splitting into power of 2 sized loads. 2301 if (DstTy.isVector()) 2302 return UnableToLegalize; 2303 if (isPowerOf2_32(DstTy.getSizeInBits())) 2304 return UnableToLegalize; // Don't know what we're being asked to do. 2305 2306 // Our strategy here is to generate anyextending loads for the smaller 2307 // types up to next power-2 result type, and then combine the two larger 2308 // result values together, before truncating back down to the non-pow-2 2309 // type. 2310 // E.g. v1 = i24 load => 2311 // v2 = i32 zextload (2 byte) 2312 // v3 = i32 load (1 byte) 2313 // v4 = i32 shl v3, 16 2314 // v5 = i32 or v4, v2 2315 // v1 = i24 trunc v5 2316 // By doing this we generate the correct truncate which should get 2317 // combined away as an artifact with a matching extend. 2318 uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits()); 2319 uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize; 2320 2321 MachineFunction &MF = MIRBuilder.getMF(); 2322 MachineMemOperand *LargeMMO = 2323 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); 2324 MachineMemOperand *SmallMMO = MF.getMachineMemOperand( 2325 &MMO, LargeSplitSize / 8, SmallSplitSize / 8); 2326 2327 LLT PtrTy = MRI.getType(PtrReg); 2328 unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits()); 2329 LLT AnyExtTy = LLT::scalar(AnyExtSize); 2330 Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy); 2331 Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy); 2332 auto LargeLoad = MIRBuilder.buildLoadInstr( 2333 TargetOpcode::G_ZEXTLOAD, LargeLdReg, PtrReg, *LargeMMO); 2334 2335 auto OffsetCst = MIRBuilder.buildConstant( 2336 LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8); 2337 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy); 2338 auto SmallPtr = 2339 MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0)); 2340 auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0), 2341 *SmallMMO); 2342 2343 auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize); 2344 auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt); 2345 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad); 2346 MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)}); 2347 MI.eraseFromParent(); 2348 return Legalized; 2349 } 2350 MIRBuilder.buildLoad(DstReg, PtrReg, MMO); 2351 MI.eraseFromParent(); 2352 return Legalized; 2353 } 2354 2355 if (DstTy.isScalar()) { 2356 Register TmpReg = 2357 MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits())); 2358 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); 2359 switch (MI.getOpcode()) { 2360 default: 2361 llvm_unreachable("Unexpected opcode"); 2362 case TargetOpcode::G_LOAD: 2363 MIRBuilder.buildExtOrTrunc(TargetOpcode::G_ANYEXT, DstReg, TmpReg); 2364 break; 2365 case TargetOpcode::G_SEXTLOAD: 2366 MIRBuilder.buildSExt(DstReg, TmpReg); 2367 break; 2368 case TargetOpcode::G_ZEXTLOAD: 2369 MIRBuilder.buildZExt(DstReg, TmpReg); 2370 break; 2371 } 2372 MI.eraseFromParent(); 2373 return Legalized; 2374 } 2375 2376 return UnableToLegalize; 2377 } 2378 case TargetOpcode::G_STORE: { 2379 // Lower a non-power of 2 store into multiple pow-2 stores. 2380 // E.g. split an i24 store into an i16 store + i8 store. 2381 // We do this by first extending the stored value to the next largest power 2382 // of 2 type, and then using truncating stores to store the components. 2383 // By doing this, likewise with G_LOAD, generate an extend that can be 2384 // artifact-combined away instead of leaving behind extracts. 2385 Register SrcReg = MI.getOperand(0).getReg(); 2386 Register PtrReg = MI.getOperand(1).getReg(); 2387 LLT SrcTy = MRI.getType(SrcReg); 2388 MachineMemOperand &MMO = **MI.memoperands_begin(); 2389 if (SrcTy.getSizeInBits() != MMO.getSizeInBits()) 2390 return UnableToLegalize; 2391 if (SrcTy.isVector()) 2392 return UnableToLegalize; 2393 if (isPowerOf2_32(SrcTy.getSizeInBits())) 2394 return UnableToLegalize; // Don't know what we're being asked to do. 2395 2396 // Extend to the next pow-2. 2397 const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits())); 2398 auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg); 2399 2400 // Obtain the smaller value by shifting away the larger value. 2401 uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits()); 2402 uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize; 2403 auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize); 2404 auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt); 2405 2406 // Generate the PtrAdd and truncating stores. 2407 LLT PtrTy = MRI.getType(PtrReg); 2408 auto OffsetCst = MIRBuilder.buildConstant( 2409 LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8); 2410 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy); 2411 auto SmallPtr = 2412 MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0)); 2413 2414 MachineFunction &MF = MIRBuilder.getMF(); 2415 MachineMemOperand *LargeMMO = 2416 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); 2417 MachineMemOperand *SmallMMO = 2418 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8); 2419 MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO); 2420 MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO); 2421 MI.eraseFromParent(); 2422 return Legalized; 2423 } 2424 case TargetOpcode::G_CTLZ_ZERO_UNDEF: 2425 case TargetOpcode::G_CTTZ_ZERO_UNDEF: 2426 case TargetOpcode::G_CTLZ: 2427 case TargetOpcode::G_CTTZ: 2428 case TargetOpcode::G_CTPOP: 2429 return lowerBitCount(MI, TypeIdx, Ty); 2430 case G_UADDO: { 2431 Register Res = MI.getOperand(0).getReg(); 2432 Register CarryOut = MI.getOperand(1).getReg(); 2433 Register LHS = MI.getOperand(2).getReg(); 2434 Register RHS = MI.getOperand(3).getReg(); 2435 2436 MIRBuilder.buildAdd(Res, LHS, RHS); 2437 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS); 2438 2439 MI.eraseFromParent(); 2440 return Legalized; 2441 } 2442 case G_UADDE: { 2443 Register Res = MI.getOperand(0).getReg(); 2444 Register CarryOut = MI.getOperand(1).getReg(); 2445 Register LHS = MI.getOperand(2).getReg(); 2446 Register RHS = MI.getOperand(3).getReg(); 2447 Register CarryIn = MI.getOperand(4).getReg(); 2448 LLT Ty = MRI.getType(Res); 2449 2450 auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS); 2451 auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn); 2452 MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn); 2453 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS); 2454 2455 MI.eraseFromParent(); 2456 return Legalized; 2457 } 2458 case G_USUBO: { 2459 Register Res = MI.getOperand(0).getReg(); 2460 Register BorrowOut = MI.getOperand(1).getReg(); 2461 Register LHS = MI.getOperand(2).getReg(); 2462 Register RHS = MI.getOperand(3).getReg(); 2463 2464 MIRBuilder.buildSub(Res, LHS, RHS); 2465 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS); 2466 2467 MI.eraseFromParent(); 2468 return Legalized; 2469 } 2470 case G_USUBE: { 2471 Register Res = MI.getOperand(0).getReg(); 2472 Register BorrowOut = MI.getOperand(1).getReg(); 2473 Register LHS = MI.getOperand(2).getReg(); 2474 Register RHS = MI.getOperand(3).getReg(); 2475 Register BorrowIn = MI.getOperand(4).getReg(); 2476 const LLT CondTy = MRI.getType(BorrowOut); 2477 const LLT Ty = MRI.getType(Res); 2478 2479 auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS); 2480 auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn); 2481 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn); 2482 2483 auto LHS_EQ_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, LHS, RHS); 2484 auto LHS_ULT_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, LHS, RHS); 2485 MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS); 2486 2487 MI.eraseFromParent(); 2488 return Legalized; 2489 } 2490 case G_UITOFP: 2491 return lowerUITOFP(MI, TypeIdx, Ty); 2492 case G_SITOFP: 2493 return lowerSITOFP(MI, TypeIdx, Ty); 2494 case G_FPTOUI: 2495 return lowerFPTOUI(MI, TypeIdx, Ty); 2496 case G_FPTOSI: 2497 return lowerFPTOSI(MI); 2498 case G_FPTRUNC: 2499 return lowerFPTRUNC(MI, TypeIdx, Ty); 2500 case G_SMIN: 2501 case G_SMAX: 2502 case G_UMIN: 2503 case G_UMAX: 2504 return lowerMinMax(MI, TypeIdx, Ty); 2505 case G_FCOPYSIGN: 2506 return lowerFCopySign(MI, TypeIdx, Ty); 2507 case G_FMINNUM: 2508 case G_FMAXNUM: 2509 return lowerFMinNumMaxNum(MI); 2510 case G_UNMERGE_VALUES: 2511 return lowerUnmergeValues(MI); 2512 case TargetOpcode::G_SEXT_INREG: { 2513 assert(MI.getOperand(2).isImm() && "Expected immediate"); 2514 int64_t SizeInBits = MI.getOperand(2).getImm(); 2515 2516 Register DstReg = MI.getOperand(0).getReg(); 2517 Register SrcReg = MI.getOperand(1).getReg(); 2518 LLT DstTy = MRI.getType(DstReg); 2519 Register TmpRes = MRI.createGenericVirtualRegister(DstTy); 2520 2521 auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits); 2522 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0)); 2523 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0)); 2524 MI.eraseFromParent(); 2525 return Legalized; 2526 } 2527 case G_SHUFFLE_VECTOR: 2528 return lowerShuffleVector(MI); 2529 case G_DYN_STACKALLOC: 2530 return lowerDynStackAlloc(MI); 2531 case G_EXTRACT: 2532 return lowerExtract(MI); 2533 case G_INSERT: 2534 return lowerInsert(MI); 2535 case G_BSWAP: 2536 return lowerBswap(MI); 2537 case G_BITREVERSE: 2538 return lowerBitreverse(MI); 2539 case G_READ_REGISTER: 2540 case G_WRITE_REGISTER: 2541 return lowerReadWriteRegister(MI); 2542 } 2543 } 2544 2545 LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef( 2546 MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { 2547 SmallVector<Register, 2> DstRegs; 2548 2549 unsigned NarrowSize = NarrowTy.getSizeInBits(); 2550 Register DstReg = MI.getOperand(0).getReg(); 2551 unsigned Size = MRI.getType(DstReg).getSizeInBits(); 2552 int NumParts = Size / NarrowSize; 2553 // FIXME: Don't know how to handle the situation where the small vectors 2554 // aren't all the same size yet. 2555 if (Size % NarrowSize != 0) 2556 return UnableToLegalize; 2557 2558 for (int i = 0; i < NumParts; ++i) { 2559 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy); 2560 MIRBuilder.buildUndef(TmpReg); 2561 DstRegs.push_back(TmpReg); 2562 } 2563 2564 if (NarrowTy.isVector()) 2565 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 2566 else 2567 MIRBuilder.buildBuildVector(DstReg, DstRegs); 2568 2569 MI.eraseFromParent(); 2570 return Legalized; 2571 } 2572 2573 // Handles operands with different types, but all must have the same number of 2574 // elements. There will be multiple type indexes. NarrowTy is expected to have 2575 // the result element type. 2576 LegalizerHelper::LegalizeResult 2577 LegalizerHelper::fewerElementsVectorBasic(MachineInstr &MI, unsigned TypeIdx, 2578 LLT NarrowTy) { 2579 assert(TypeIdx == 0 && "only one type index expected"); 2580 2581 const unsigned Opc = MI.getOpcode(); 2582 const int NumOps = MI.getNumOperands() - 1; 2583 const Register DstReg = MI.getOperand(0).getReg(); 2584 const unsigned Flags = MI.getFlags(); 2585 2586 assert(NumOps <= 3 && "expected instrution with 1 result and 1-3 sources"); 2587 2588 SmallVector<Register, 8> ExtractedRegs[3]; 2589 SmallVector<Register, 8> Parts; 2590 2591 unsigned NarrowElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; 2592 2593 // Break down all the sources into NarrowTy pieces we can operate on. This may 2594 // involve creating merges to a wider type, padded with undef. 2595 for (int I = 0; I != NumOps; ++I) { 2596 Register SrcReg = MI.getOperand(I + 1).getReg(); 2597 LLT SrcTy = MRI.getType(SrcReg); 2598 2599 // Each operand may have its own type, but only the number of elements 2600 // matters. 2601 LLT OpNarrowTy = LLT::scalarOrVector(NarrowElts, SrcTy.getScalarType()); 2602 LLT GCDTy = extractGCDType(ExtractedRegs[I], SrcTy, OpNarrowTy, SrcReg); 2603 2604 // Build a sequence of NarrowTy pieces in ExtractedRegs for this operand. 2605 buildLCMMergePieces(SrcTy, OpNarrowTy, GCDTy, 2606 ExtractedRegs[I], TargetOpcode::G_ANYEXT); 2607 } 2608 2609 SmallVector<Register, 8> ResultRegs; 2610 2611 // Input operands for each sub-instruction. 2612 SmallVector<SrcOp, 4> InputRegs(NumOps, Register()); 2613 2614 int NumParts = ExtractedRegs[0].size(); 2615 const LLT DstTy = MRI.getType(DstReg); 2616 const unsigned DstSize = DstTy.getSizeInBits(); 2617 LLT DstLCMTy = getLCMType(DstTy, NarrowTy); 2618 2619 const unsigned NarrowSize = NarrowTy.getSizeInBits(); 2620 2621 // We widened the source registers to satisfy merge/unmerge size 2622 // constraints. We'll have some extra fully undef parts. 2623 const int NumRealParts = (DstSize + NarrowSize - 1) / NarrowSize; 2624 2625 for (int I = 0; I != NumRealParts; ++I) { 2626 // Emit this instruction on each of the split pieces. 2627 for (int J = 0; J != NumOps; ++J) 2628 InputRegs[J] = ExtractedRegs[J][I]; 2629 2630 auto Inst = MIRBuilder.buildInstr(Opc, {NarrowTy}, InputRegs, Flags); 2631 ResultRegs.push_back(Inst.getReg(0)); 2632 } 2633 2634 // Fill out the widened result with undef instead of creating instructions 2635 // with undef inputs. 2636 int NumUndefParts = NumParts - NumRealParts; 2637 if (NumUndefParts != 0) 2638 ResultRegs.append(NumUndefParts, MIRBuilder.buildUndef(NarrowTy).getReg(0)); 2639 2640 // Extract the possibly padded result to the original result register. 2641 buildWidenedRemergeToDst(DstReg, DstLCMTy, ResultRegs); 2642 2643 MI.eraseFromParent(); 2644 return Legalized; 2645 } 2646 2647 // Handle splitting vector operations which need to have the same number of 2648 // elements in each type index, but each type index may have a different element 2649 // type. 2650 // 2651 // e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> -> 2652 // <2 x s64> = G_SHL <2 x s64>, <2 x s32> 2653 // <2 x s64> = G_SHL <2 x s64>, <2 x s32> 2654 // 2655 // Also handles some irregular breakdown cases, e.g. 2656 // e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> -> 2657 // <2 x s64> = G_SHL <2 x s64>, <2 x s32> 2658 // s64 = G_SHL s64, s32 2659 LegalizerHelper::LegalizeResult 2660 LegalizerHelper::fewerElementsVectorMultiEltType( 2661 MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) { 2662 if (TypeIdx != 0) 2663 return UnableToLegalize; 2664 2665 const LLT NarrowTy0 = NarrowTyArg; 2666 const unsigned NewNumElts = 2667 NarrowTy0.isVector() ? NarrowTy0.getNumElements() : 1; 2668 2669 const Register DstReg = MI.getOperand(0).getReg(); 2670 LLT DstTy = MRI.getType(DstReg); 2671 LLT LeftoverTy0; 2672 2673 // All of the operands need to have the same number of elements, so if we can 2674 // determine a type breakdown for the result type, we can for all of the 2675 // source types. 2676 int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0).first; 2677 if (NumParts < 0) 2678 return UnableToLegalize; 2679 2680 SmallVector<MachineInstrBuilder, 4> NewInsts; 2681 2682 SmallVector<Register, 4> DstRegs, LeftoverDstRegs; 2683 SmallVector<Register, 4> PartRegs, LeftoverRegs; 2684 2685 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { 2686 LLT LeftoverTy; 2687 Register SrcReg = MI.getOperand(I).getReg(); 2688 LLT SrcTyI = MRI.getType(SrcReg); 2689 LLT NarrowTyI = LLT::scalarOrVector(NewNumElts, SrcTyI.getScalarType()); 2690 LLT LeftoverTyI; 2691 2692 // Split this operand into the requested typed registers, and any leftover 2693 // required to reproduce the original type. 2694 if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs, 2695 LeftoverRegs)) 2696 return UnableToLegalize; 2697 2698 if (I == 1) { 2699 // For the first operand, create an instruction for each part and setup 2700 // the result. 2701 for (Register PartReg : PartRegs) { 2702 Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0); 2703 NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode()) 2704 .addDef(PartDstReg) 2705 .addUse(PartReg)); 2706 DstRegs.push_back(PartDstReg); 2707 } 2708 2709 for (Register LeftoverReg : LeftoverRegs) { 2710 Register PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0); 2711 NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode()) 2712 .addDef(PartDstReg) 2713 .addUse(LeftoverReg)); 2714 LeftoverDstRegs.push_back(PartDstReg); 2715 } 2716 } else { 2717 assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size()); 2718 2719 // Add the newly created operand splits to the existing instructions. The 2720 // odd-sized pieces are ordered after the requested NarrowTyArg sized 2721 // pieces. 2722 unsigned InstCount = 0; 2723 for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J) 2724 NewInsts[InstCount++].addUse(PartRegs[J]); 2725 for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J) 2726 NewInsts[InstCount++].addUse(LeftoverRegs[J]); 2727 } 2728 2729 PartRegs.clear(); 2730 LeftoverRegs.clear(); 2731 } 2732 2733 // Insert the newly built operations and rebuild the result register. 2734 for (auto &MIB : NewInsts) 2735 MIRBuilder.insertInstr(MIB); 2736 2737 insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs); 2738 2739 MI.eraseFromParent(); 2740 return Legalized; 2741 } 2742 2743 LegalizerHelper::LegalizeResult 2744 LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx, 2745 LLT NarrowTy) { 2746 if (TypeIdx != 0) 2747 return UnableToLegalize; 2748 2749 Register DstReg = MI.getOperand(0).getReg(); 2750 Register SrcReg = MI.getOperand(1).getReg(); 2751 LLT DstTy = MRI.getType(DstReg); 2752 LLT SrcTy = MRI.getType(SrcReg); 2753 2754 LLT NarrowTy0 = NarrowTy; 2755 LLT NarrowTy1; 2756 unsigned NumParts; 2757 2758 if (NarrowTy.isVector()) { 2759 // Uneven breakdown not handled. 2760 NumParts = DstTy.getNumElements() / NarrowTy.getNumElements(); 2761 if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements()) 2762 return UnableToLegalize; 2763 2764 NarrowTy1 = LLT::vector(NumParts, SrcTy.getElementType().getSizeInBits()); 2765 } else { 2766 NumParts = DstTy.getNumElements(); 2767 NarrowTy1 = SrcTy.getElementType(); 2768 } 2769 2770 SmallVector<Register, 4> SrcRegs, DstRegs; 2771 extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs); 2772 2773 for (unsigned I = 0; I < NumParts; ++I) { 2774 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0); 2775 MachineInstr *NewInst = 2776 MIRBuilder.buildInstr(MI.getOpcode(), {DstReg}, {SrcRegs[I]}); 2777 2778 NewInst->setFlags(MI.getFlags()); 2779 DstRegs.push_back(DstReg); 2780 } 2781 2782 if (NarrowTy.isVector()) 2783 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 2784 else 2785 MIRBuilder.buildBuildVector(DstReg, DstRegs); 2786 2787 MI.eraseFromParent(); 2788 return Legalized; 2789 } 2790 2791 LegalizerHelper::LegalizeResult 2792 LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx, 2793 LLT NarrowTy) { 2794 Register DstReg = MI.getOperand(0).getReg(); 2795 Register Src0Reg = MI.getOperand(2).getReg(); 2796 LLT DstTy = MRI.getType(DstReg); 2797 LLT SrcTy = MRI.getType(Src0Reg); 2798 2799 unsigned NumParts; 2800 LLT NarrowTy0, NarrowTy1; 2801 2802 if (TypeIdx == 0) { 2803 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; 2804 unsigned OldElts = DstTy.getNumElements(); 2805 2806 NarrowTy0 = NarrowTy; 2807 NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements(); 2808 NarrowTy1 = NarrowTy.isVector() ? 2809 LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) : 2810 SrcTy.getElementType(); 2811 2812 } else { 2813 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; 2814 unsigned OldElts = SrcTy.getNumElements(); 2815 2816 NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : 2817 NarrowTy.getNumElements(); 2818 NarrowTy0 = LLT::vector(NarrowTy.getNumElements(), 2819 DstTy.getScalarSizeInBits()); 2820 NarrowTy1 = NarrowTy; 2821 } 2822 2823 // FIXME: Don't know how to handle the situation where the small vectors 2824 // aren't all the same size yet. 2825 if (NarrowTy1.isVector() && 2826 NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements()) 2827 return UnableToLegalize; 2828 2829 CmpInst::Predicate Pred 2830 = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); 2831 2832 SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs; 2833 extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs); 2834 extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs); 2835 2836 for (unsigned I = 0; I < NumParts; ++I) { 2837 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0); 2838 DstRegs.push_back(DstReg); 2839 2840 if (MI.getOpcode() == TargetOpcode::G_ICMP) 2841 MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]); 2842 else { 2843 MachineInstr *NewCmp 2844 = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]); 2845 NewCmp->setFlags(MI.getFlags()); 2846 } 2847 } 2848 2849 if (NarrowTy1.isVector()) 2850 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 2851 else 2852 MIRBuilder.buildBuildVector(DstReg, DstRegs); 2853 2854 MI.eraseFromParent(); 2855 return Legalized; 2856 } 2857 2858 LegalizerHelper::LegalizeResult 2859 LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx, 2860 LLT NarrowTy) { 2861 Register DstReg = MI.getOperand(0).getReg(); 2862 Register CondReg = MI.getOperand(1).getReg(); 2863 2864 unsigned NumParts = 0; 2865 LLT NarrowTy0, NarrowTy1; 2866 2867 LLT DstTy = MRI.getType(DstReg); 2868 LLT CondTy = MRI.getType(CondReg); 2869 unsigned Size = DstTy.getSizeInBits(); 2870 2871 assert(TypeIdx == 0 || CondTy.isVector()); 2872 2873 if (TypeIdx == 0) { 2874 NarrowTy0 = NarrowTy; 2875 NarrowTy1 = CondTy; 2876 2877 unsigned NarrowSize = NarrowTy0.getSizeInBits(); 2878 // FIXME: Don't know how to handle the situation where the small vectors 2879 // aren't all the same size yet. 2880 if (Size % NarrowSize != 0) 2881 return UnableToLegalize; 2882 2883 NumParts = Size / NarrowSize; 2884 2885 // Need to break down the condition type 2886 if (CondTy.isVector()) { 2887 if (CondTy.getNumElements() == NumParts) 2888 NarrowTy1 = CondTy.getElementType(); 2889 else 2890 NarrowTy1 = LLT::vector(CondTy.getNumElements() / NumParts, 2891 CondTy.getScalarSizeInBits()); 2892 } 2893 } else { 2894 NumParts = CondTy.getNumElements(); 2895 if (NarrowTy.isVector()) { 2896 // TODO: Handle uneven breakdown. 2897 if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements()) 2898 return UnableToLegalize; 2899 2900 return UnableToLegalize; 2901 } else { 2902 NarrowTy0 = DstTy.getElementType(); 2903 NarrowTy1 = NarrowTy; 2904 } 2905 } 2906 2907 SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs; 2908 if (CondTy.isVector()) 2909 extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs); 2910 2911 extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs); 2912 extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs); 2913 2914 for (unsigned i = 0; i < NumParts; ++i) { 2915 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0); 2916 MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg, 2917 Src1Regs[i], Src2Regs[i]); 2918 DstRegs.push_back(DstReg); 2919 } 2920 2921 if (NarrowTy0.isVector()) 2922 MIRBuilder.buildConcatVectors(DstReg, DstRegs); 2923 else 2924 MIRBuilder.buildBuildVector(DstReg, DstRegs); 2925 2926 MI.eraseFromParent(); 2927 return Legalized; 2928 } 2929 2930 LegalizerHelper::LegalizeResult 2931 LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, 2932 LLT NarrowTy) { 2933 const Register DstReg = MI.getOperand(0).getReg(); 2934 LLT PhiTy = MRI.getType(DstReg); 2935 LLT LeftoverTy; 2936 2937 // All of the operands need to have the same number of elements, so if we can 2938 // determine a type breakdown for the result type, we can for all of the 2939 // source types. 2940 int NumParts, NumLeftover; 2941 std::tie(NumParts, NumLeftover) 2942 = getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy); 2943 if (NumParts < 0) 2944 return UnableToLegalize; 2945 2946 SmallVector<Register, 4> DstRegs, LeftoverDstRegs; 2947 SmallVector<MachineInstrBuilder, 4> NewInsts; 2948 2949 const int TotalNumParts = NumParts + NumLeftover; 2950 2951 // Insert the new phis in the result block first. 2952 for (int I = 0; I != TotalNumParts; ++I) { 2953 LLT Ty = I < NumParts ? NarrowTy : LeftoverTy; 2954 Register PartDstReg = MRI.createGenericVirtualRegister(Ty); 2955 NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI) 2956 .addDef(PartDstReg)); 2957 if (I < NumParts) 2958 DstRegs.push_back(PartDstReg); 2959 else 2960 LeftoverDstRegs.push_back(PartDstReg); 2961 } 2962 2963 MachineBasicBlock *MBB = MI.getParent(); 2964 MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI()); 2965 insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs); 2966 2967 SmallVector<Register, 4> PartRegs, LeftoverRegs; 2968 2969 // Insert code to extract the incoming values in each predecessor block. 2970 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { 2971 PartRegs.clear(); 2972 LeftoverRegs.clear(); 2973 2974 Register SrcReg = MI.getOperand(I).getReg(); 2975 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB(); 2976 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); 2977 2978 LLT Unused; 2979 if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs, 2980 LeftoverRegs)) 2981 return UnableToLegalize; 2982 2983 // Add the newly created operand splits to the existing instructions. The 2984 // odd-sized pieces are ordered after the requested NarrowTyArg sized 2985 // pieces. 2986 for (int J = 0; J != TotalNumParts; ++J) { 2987 MachineInstrBuilder MIB = NewInsts[J]; 2988 MIB.addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]); 2989 MIB.addMBB(&OpMBB); 2990 } 2991 } 2992 2993 MI.eraseFromParent(); 2994 return Legalized; 2995 } 2996 2997 LegalizerHelper::LegalizeResult 2998 LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI, 2999 unsigned TypeIdx, 3000 LLT NarrowTy) { 3001 if (TypeIdx != 1) 3002 return UnableToLegalize; 3003 3004 const int NumDst = MI.getNumOperands() - 1; 3005 const Register SrcReg = MI.getOperand(NumDst).getReg(); 3006 LLT SrcTy = MRI.getType(SrcReg); 3007 3008 LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 3009 3010 // TODO: Create sequence of extracts. 3011 if (DstTy == NarrowTy) 3012 return UnableToLegalize; 3013 3014 LLT GCDTy = getGCDType(SrcTy, NarrowTy); 3015 if (DstTy == GCDTy) { 3016 // This would just be a copy of the same unmerge. 3017 // TODO: Create extracts, pad with undef and create intermediate merges. 3018 return UnableToLegalize; 3019 } 3020 3021 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg); 3022 const int NumUnmerge = Unmerge->getNumOperands() - 1; 3023 const int PartsPerUnmerge = NumDst / NumUnmerge; 3024 3025 for (int I = 0; I != NumUnmerge; ++I) { 3026 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES); 3027 3028 for (int J = 0; J != PartsPerUnmerge; ++J) 3029 MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg()); 3030 MIB.addUse(Unmerge.getReg(I)); 3031 } 3032 3033 MI.eraseFromParent(); 3034 return Legalized; 3035 } 3036 3037 LegalizerHelper::LegalizeResult 3038 LegalizerHelper::fewerElementsVectorBuildVector(MachineInstr &MI, 3039 unsigned TypeIdx, 3040 LLT NarrowTy) { 3041 assert(TypeIdx == 0 && "not a vector type index"); 3042 Register DstReg = MI.getOperand(0).getReg(); 3043 LLT DstTy = MRI.getType(DstReg); 3044 LLT SrcTy = DstTy.getElementType(); 3045 3046 int DstNumElts = DstTy.getNumElements(); 3047 int NarrowNumElts = NarrowTy.getNumElements(); 3048 int NumConcat = (DstNumElts + NarrowNumElts - 1) / NarrowNumElts; 3049 LLT WidenedDstTy = LLT::vector(NarrowNumElts * NumConcat, SrcTy); 3050 3051 SmallVector<Register, 8> ConcatOps; 3052 SmallVector<Register, 8> SubBuildVector; 3053 3054 Register UndefReg; 3055 if (WidenedDstTy != DstTy) 3056 UndefReg = MIRBuilder.buildUndef(SrcTy).getReg(0); 3057 3058 // Create a G_CONCAT_VECTORS of NarrowTy pieces, padding with undef as 3059 // necessary. 3060 // 3061 // %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2 3062 // -> <2 x s16> 3063 // 3064 // %4:_(s16) = G_IMPLICIT_DEF 3065 // %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1 3066 // %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4 3067 // %7:_(<4 x s16>) = G_CONCAT_VECTORS %5, %6 3068 // %3:_(<3 x s16>) = G_EXTRACT %7, 0 3069 for (int I = 0; I != NumConcat; ++I) { 3070 for (int J = 0; J != NarrowNumElts; ++J) { 3071 int SrcIdx = NarrowNumElts * I + J; 3072 3073 if (SrcIdx < DstNumElts) { 3074 Register SrcReg = MI.getOperand(SrcIdx + 1).getReg(); 3075 SubBuildVector.push_back(SrcReg); 3076 } else 3077 SubBuildVector.push_back(UndefReg); 3078 } 3079 3080 auto BuildVec = MIRBuilder.buildBuildVector(NarrowTy, SubBuildVector); 3081 ConcatOps.push_back(BuildVec.getReg(0)); 3082 SubBuildVector.clear(); 3083 } 3084 3085 if (DstTy == WidenedDstTy) 3086 MIRBuilder.buildConcatVectors(DstReg, ConcatOps); 3087 else { 3088 auto Concat = MIRBuilder.buildConcatVectors(WidenedDstTy, ConcatOps); 3089 MIRBuilder.buildExtract(DstReg, Concat, 0); 3090 } 3091 3092 MI.eraseFromParent(); 3093 return Legalized; 3094 } 3095 3096 LegalizerHelper::LegalizeResult 3097 LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, 3098 LLT NarrowTy) { 3099 // FIXME: Don't know how to handle secondary types yet. 3100 if (TypeIdx != 0) 3101 return UnableToLegalize; 3102 3103 MachineMemOperand *MMO = *MI.memoperands_begin(); 3104 3105 // This implementation doesn't work for atomics. Give up instead of doing 3106 // something invalid. 3107 if (MMO->getOrdering() != AtomicOrdering::NotAtomic || 3108 MMO->getFailureOrdering() != AtomicOrdering::NotAtomic) 3109 return UnableToLegalize; 3110 3111 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD; 3112 Register ValReg = MI.getOperand(0).getReg(); 3113 Register AddrReg = MI.getOperand(1).getReg(); 3114 LLT ValTy = MRI.getType(ValReg); 3115 3116 // FIXME: Do we need a distinct NarrowMemory legalize action? 3117 if (ValTy.getSizeInBits() != 8 * MMO->getSize()) { 3118 LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n"); 3119 return UnableToLegalize; 3120 } 3121 3122 int NumParts = -1; 3123 int NumLeftover = -1; 3124 LLT LeftoverTy; 3125 SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs; 3126 if (IsLoad) { 3127 std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy); 3128 } else { 3129 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs, 3130 NarrowLeftoverRegs)) { 3131 NumParts = NarrowRegs.size(); 3132 NumLeftover = NarrowLeftoverRegs.size(); 3133 } 3134 } 3135 3136 if (NumParts == -1) 3137 return UnableToLegalize; 3138 3139 const LLT OffsetTy = LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits()); 3140 3141 unsigned TotalSize = ValTy.getSizeInBits(); 3142 3143 // Split the load/store into PartTy sized pieces starting at Offset. If this 3144 // is a load, return the new registers in ValRegs. For a store, each elements 3145 // of ValRegs should be PartTy. Returns the next offset that needs to be 3146 // handled. 3147 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs, 3148 unsigned Offset) -> unsigned { 3149 MachineFunction &MF = MIRBuilder.getMF(); 3150 unsigned PartSize = PartTy.getSizeInBits(); 3151 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize; 3152 Offset += PartSize, ++Idx) { 3153 unsigned ByteSize = PartSize / 8; 3154 unsigned ByteOffset = Offset / 8; 3155 Register NewAddrReg; 3156 3157 MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset); 3158 3159 MachineMemOperand *NewMMO = 3160 MF.getMachineMemOperand(MMO, ByteOffset, ByteSize); 3161 3162 if (IsLoad) { 3163 Register Dst = MRI.createGenericVirtualRegister(PartTy); 3164 ValRegs.push_back(Dst); 3165 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO); 3166 } else { 3167 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO); 3168 } 3169 } 3170 3171 return Offset; 3172 }; 3173 3174 unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0); 3175 3176 // Handle the rest of the register if this isn't an even type breakdown. 3177 if (LeftoverTy.isValid()) 3178 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset); 3179 3180 if (IsLoad) { 3181 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs, 3182 LeftoverTy, NarrowLeftoverRegs); 3183 } 3184 3185 MI.eraseFromParent(); 3186 return Legalized; 3187 } 3188 3189 LegalizerHelper::LegalizeResult 3190 LegalizerHelper::fewerElementsVectorSextInReg(MachineInstr &MI, unsigned TypeIdx, 3191 LLT NarrowTy) { 3192 Register DstReg = MI.getOperand(0).getReg(); 3193 Register SrcReg = MI.getOperand(1).getReg(); 3194 int64_t Imm = MI.getOperand(2).getImm(); 3195 3196 LLT DstTy = MRI.getType(DstReg); 3197 3198 SmallVector<Register, 8> Parts; 3199 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg); 3200 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts); 3201 3202 for (Register &R : Parts) 3203 R = MIRBuilder.buildSExtInReg(NarrowTy, R, Imm).getReg(0); 3204 3205 buildWidenedRemergeToDst(DstReg, LCMTy, Parts); 3206 3207 MI.eraseFromParent(); 3208 return Legalized; 3209 } 3210 3211 LegalizerHelper::LegalizeResult 3212 LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, 3213 LLT NarrowTy) { 3214 using namespace TargetOpcode; 3215 3216 MIRBuilder.setInstr(MI); 3217 switch (MI.getOpcode()) { 3218 case G_IMPLICIT_DEF: 3219 return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy); 3220 case G_TRUNC: 3221 case G_AND: 3222 case G_OR: 3223 case G_XOR: 3224 case G_ADD: 3225 case G_SUB: 3226 case G_MUL: 3227 case G_SMULH: 3228 case G_UMULH: 3229 case G_FADD: 3230 case G_FMUL: 3231 case G_FSUB: 3232 case G_FNEG: 3233 case G_FABS: 3234 case G_FCANONICALIZE: 3235 case G_FDIV: 3236 case G_FREM: 3237 case G_FMA: 3238 case G_FMAD: 3239 case G_FPOW: 3240 case G_FEXP: 3241 case G_FEXP2: 3242 case G_FLOG: 3243 case G_FLOG2: 3244 case G_FLOG10: 3245 case G_FNEARBYINT: 3246 case G_FCEIL: 3247 case G_FFLOOR: 3248 case G_FRINT: 3249 case G_INTRINSIC_ROUND: 3250 case G_INTRINSIC_TRUNC: 3251 case G_FCOS: 3252 case G_FSIN: 3253 case G_FSQRT: 3254 case G_BSWAP: 3255 case G_BITREVERSE: 3256 case G_SDIV: 3257 case G_UDIV: 3258 case G_SREM: 3259 case G_UREM: 3260 case G_SMIN: 3261 case G_SMAX: 3262 case G_UMIN: 3263 case G_UMAX: 3264 case G_FMINNUM: 3265 case G_FMAXNUM: 3266 case G_FMINNUM_IEEE: 3267 case G_FMAXNUM_IEEE: 3268 case G_FMINIMUM: 3269 case G_FMAXIMUM: 3270 return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy); 3271 case G_SHL: 3272 case G_LSHR: 3273 case G_ASHR: 3274 case G_CTLZ: 3275 case G_CTLZ_ZERO_UNDEF: 3276 case G_CTTZ: 3277 case G_CTTZ_ZERO_UNDEF: 3278 case G_CTPOP: 3279 case G_FCOPYSIGN: 3280 return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy); 3281 case G_ZEXT: 3282 case G_SEXT: 3283 case G_ANYEXT: 3284 case G_FPEXT: 3285 case G_FPTRUNC: 3286 case G_SITOFP: 3287 case G_UITOFP: 3288 case G_FPTOSI: 3289 case G_FPTOUI: 3290 case G_INTTOPTR: 3291 case G_PTRTOINT: 3292 case G_ADDRSPACE_CAST: 3293 return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy); 3294 case G_ICMP: 3295 case G_FCMP: 3296 return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy); 3297 case G_SELECT: 3298 return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy); 3299 case G_PHI: 3300 return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy); 3301 case G_UNMERGE_VALUES: 3302 return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy); 3303 case G_BUILD_VECTOR: 3304 return fewerElementsVectorBuildVector(MI, TypeIdx, NarrowTy); 3305 case G_LOAD: 3306 case G_STORE: 3307 return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy); 3308 case G_SEXT_INREG: 3309 return fewerElementsVectorSextInReg(MI, TypeIdx, NarrowTy); 3310 default: 3311 return UnableToLegalize; 3312 } 3313 } 3314 3315 LegalizerHelper::LegalizeResult 3316 LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, 3317 const LLT HalfTy, const LLT AmtTy) { 3318 3319 Register InL = MRI.createGenericVirtualRegister(HalfTy); 3320 Register InH = MRI.createGenericVirtualRegister(HalfTy); 3321 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1)); 3322 3323 if (Amt.isNullValue()) { 3324 MIRBuilder.buildMerge(MI.getOperand(0), {InL, InH}); 3325 MI.eraseFromParent(); 3326 return Legalized; 3327 } 3328 3329 LLT NVT = HalfTy; 3330 unsigned NVTBits = HalfTy.getSizeInBits(); 3331 unsigned VTBits = 2 * NVTBits; 3332 3333 SrcOp Lo(Register(0)), Hi(Register(0)); 3334 if (MI.getOpcode() == TargetOpcode::G_SHL) { 3335 if (Amt.ugt(VTBits)) { 3336 Lo = Hi = MIRBuilder.buildConstant(NVT, 0); 3337 } else if (Amt.ugt(NVTBits)) { 3338 Lo = MIRBuilder.buildConstant(NVT, 0); 3339 Hi = MIRBuilder.buildShl(NVT, InL, 3340 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits)); 3341 } else if (Amt == NVTBits) { 3342 Lo = MIRBuilder.buildConstant(NVT, 0); 3343 Hi = InL; 3344 } else { 3345 Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt)); 3346 auto OrLHS = 3347 MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt)); 3348 auto OrRHS = MIRBuilder.buildLShr( 3349 NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits)); 3350 Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS); 3351 } 3352 } else if (MI.getOpcode() == TargetOpcode::G_LSHR) { 3353 if (Amt.ugt(VTBits)) { 3354 Lo = Hi = MIRBuilder.buildConstant(NVT, 0); 3355 } else if (Amt.ugt(NVTBits)) { 3356 Lo = MIRBuilder.buildLShr(NVT, InH, 3357 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits)); 3358 Hi = MIRBuilder.buildConstant(NVT, 0); 3359 } else if (Amt == NVTBits) { 3360 Lo = InH; 3361 Hi = MIRBuilder.buildConstant(NVT, 0); 3362 } else { 3363 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt); 3364 3365 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst); 3366 auto OrRHS = MIRBuilder.buildShl( 3367 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits)); 3368 3369 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS); 3370 Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst); 3371 } 3372 } else { 3373 if (Amt.ugt(VTBits)) { 3374 Hi = Lo = MIRBuilder.buildAShr( 3375 NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1)); 3376 } else if (Amt.ugt(NVTBits)) { 3377 Lo = MIRBuilder.buildAShr(NVT, InH, 3378 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits)); 3379 Hi = MIRBuilder.buildAShr(NVT, InH, 3380 MIRBuilder.buildConstant(AmtTy, NVTBits - 1)); 3381 } else if (Amt == NVTBits) { 3382 Lo = InH; 3383 Hi = MIRBuilder.buildAShr(NVT, InH, 3384 MIRBuilder.buildConstant(AmtTy, NVTBits - 1)); 3385 } else { 3386 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt); 3387 3388 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst); 3389 auto OrRHS = MIRBuilder.buildShl( 3390 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits)); 3391 3392 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS); 3393 Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst); 3394 } 3395 } 3396 3397 MIRBuilder.buildMerge(MI.getOperand(0), {Lo, Hi}); 3398 MI.eraseFromParent(); 3399 3400 return Legalized; 3401 } 3402 3403 // TODO: Optimize if constant shift amount. 3404 LegalizerHelper::LegalizeResult 3405 LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, 3406 LLT RequestedTy) { 3407 if (TypeIdx == 1) { 3408 Observer.changingInstr(MI); 3409 narrowScalarSrc(MI, RequestedTy, 2); 3410 Observer.changedInstr(MI); 3411 return Legalized; 3412 } 3413 3414 Register DstReg = MI.getOperand(0).getReg(); 3415 LLT DstTy = MRI.getType(DstReg); 3416 if (DstTy.isVector()) 3417 return UnableToLegalize; 3418 3419 Register Amt = MI.getOperand(2).getReg(); 3420 LLT ShiftAmtTy = MRI.getType(Amt); 3421 const unsigned DstEltSize = DstTy.getScalarSizeInBits(); 3422 if (DstEltSize % 2 != 0) 3423 return UnableToLegalize; 3424 3425 // Ignore the input type. We can only go to exactly half the size of the 3426 // input. If that isn't small enough, the resulting pieces will be further 3427 // legalized. 3428 const unsigned NewBitSize = DstEltSize / 2; 3429 const LLT HalfTy = LLT::scalar(NewBitSize); 3430 const LLT CondTy = LLT::scalar(1); 3431 3432 if (const MachineInstr *KShiftAmt = 3433 getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) { 3434 return narrowScalarShiftByConstant( 3435 MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy); 3436 } 3437 3438 // TODO: Expand with known bits. 3439 3440 // Handle the fully general expansion by an unknown amount. 3441 auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize); 3442 3443 Register InL = MRI.createGenericVirtualRegister(HalfTy); 3444 Register InH = MRI.createGenericVirtualRegister(HalfTy); 3445 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1)); 3446 3447 auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits); 3448 auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt); 3449 3450 auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0); 3451 auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits); 3452 auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero); 3453 3454 Register ResultRegs[2]; 3455 switch (MI.getOpcode()) { 3456 case TargetOpcode::G_SHL: { 3457 // Short: ShAmt < NewBitSize 3458 auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt); 3459 3460 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack); 3461 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt); 3462 auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr); 3463 3464 // Long: ShAmt >= NewBitSize 3465 auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero. 3466 auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part. 3467 3468 auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL); 3469 auto Hi = MIRBuilder.buildSelect( 3470 HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL)); 3471 3472 ResultRegs[0] = Lo.getReg(0); 3473 ResultRegs[1] = Hi.getReg(0); 3474 break; 3475 } 3476 case TargetOpcode::G_LSHR: 3477 case TargetOpcode::G_ASHR: { 3478 // Short: ShAmt < NewBitSize 3479 auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt}); 3480 3481 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt); 3482 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack); 3483 auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr); 3484 3485 // Long: ShAmt >= NewBitSize 3486 MachineInstrBuilder HiL; 3487 if (MI.getOpcode() == TargetOpcode::G_LSHR) { 3488 HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero. 3489 } else { 3490 auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1); 3491 HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part. 3492 } 3493 auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, 3494 {InH, AmtExcess}); // Lo from Hi part. 3495 3496 auto Lo = MIRBuilder.buildSelect( 3497 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL)); 3498 3499 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL); 3500 3501 ResultRegs[0] = Lo.getReg(0); 3502 ResultRegs[1] = Hi.getReg(0); 3503 break; 3504 } 3505 default: 3506 llvm_unreachable("not a shift"); 3507 } 3508 3509 MIRBuilder.buildMerge(DstReg, ResultRegs); 3510 MI.eraseFromParent(); 3511 return Legalized; 3512 } 3513 3514 LegalizerHelper::LegalizeResult 3515 LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, 3516 LLT MoreTy) { 3517 assert(TypeIdx == 0 && "Expecting only Idx 0"); 3518 3519 Observer.changingInstr(MI); 3520 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { 3521 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB(); 3522 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); 3523 moreElementsVectorSrc(MI, MoreTy, I); 3524 } 3525 3526 MachineBasicBlock &MBB = *MI.getParent(); 3527 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI()); 3528 moreElementsVectorDst(MI, MoreTy, 0); 3529 Observer.changedInstr(MI); 3530 return Legalized; 3531 } 3532 3533 LegalizerHelper::LegalizeResult 3534 LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, 3535 LLT MoreTy) { 3536 MIRBuilder.setInstr(MI); 3537 unsigned Opc = MI.getOpcode(); 3538 switch (Opc) { 3539 case TargetOpcode::G_IMPLICIT_DEF: 3540 case TargetOpcode::G_LOAD: { 3541 if (TypeIdx != 0) 3542 return UnableToLegalize; 3543 Observer.changingInstr(MI); 3544 moreElementsVectorDst(MI, MoreTy, 0); 3545 Observer.changedInstr(MI); 3546 return Legalized; 3547 } 3548 case TargetOpcode::G_STORE: 3549 if (TypeIdx != 0) 3550 return UnableToLegalize; 3551 Observer.changingInstr(MI); 3552 moreElementsVectorSrc(MI, MoreTy, 0); 3553 Observer.changedInstr(MI); 3554 return Legalized; 3555 case TargetOpcode::G_AND: 3556 case TargetOpcode::G_OR: 3557 case TargetOpcode::G_XOR: 3558 case TargetOpcode::G_SMIN: 3559 case TargetOpcode::G_SMAX: 3560 case TargetOpcode::G_UMIN: 3561 case TargetOpcode::G_UMAX: 3562 case TargetOpcode::G_FMINNUM: 3563 case TargetOpcode::G_FMAXNUM: 3564 case TargetOpcode::G_FMINNUM_IEEE: 3565 case TargetOpcode::G_FMAXNUM_IEEE: 3566 case TargetOpcode::G_FMINIMUM: 3567 case TargetOpcode::G_FMAXIMUM: { 3568 Observer.changingInstr(MI); 3569 moreElementsVectorSrc(MI, MoreTy, 1); 3570 moreElementsVectorSrc(MI, MoreTy, 2); 3571 moreElementsVectorDst(MI, MoreTy, 0); 3572 Observer.changedInstr(MI); 3573 return Legalized; 3574 } 3575 case TargetOpcode::G_EXTRACT: 3576 if (TypeIdx != 1) 3577 return UnableToLegalize; 3578 Observer.changingInstr(MI); 3579 moreElementsVectorSrc(MI, MoreTy, 1); 3580 Observer.changedInstr(MI); 3581 return Legalized; 3582 case TargetOpcode::G_INSERT: 3583 if (TypeIdx != 0) 3584 return UnableToLegalize; 3585 Observer.changingInstr(MI); 3586 moreElementsVectorSrc(MI, MoreTy, 1); 3587 moreElementsVectorDst(MI, MoreTy, 0); 3588 Observer.changedInstr(MI); 3589 return Legalized; 3590 case TargetOpcode::G_SELECT: 3591 if (TypeIdx != 0) 3592 return UnableToLegalize; 3593 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) 3594 return UnableToLegalize; 3595 3596 Observer.changingInstr(MI); 3597 moreElementsVectorSrc(MI, MoreTy, 2); 3598 moreElementsVectorSrc(MI, MoreTy, 3); 3599 moreElementsVectorDst(MI, MoreTy, 0); 3600 Observer.changedInstr(MI); 3601 return Legalized; 3602 case TargetOpcode::G_UNMERGE_VALUES: { 3603 if (TypeIdx != 1) 3604 return UnableToLegalize; 3605 3606 LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 3607 int NumDst = MI.getNumOperands() - 1; 3608 moreElementsVectorSrc(MI, MoreTy, NumDst); 3609 3610 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES); 3611 for (int I = 0; I != NumDst; ++I) 3612 MIB.addDef(MI.getOperand(I).getReg()); 3613 3614 int NewNumDst = MoreTy.getSizeInBits() / DstTy.getSizeInBits(); 3615 for (int I = NumDst; I != NewNumDst; ++I) 3616 MIB.addDef(MRI.createGenericVirtualRegister(DstTy)); 3617 3618 MIB.addUse(MI.getOperand(NumDst).getReg()); 3619 MI.eraseFromParent(); 3620 return Legalized; 3621 } 3622 case TargetOpcode::G_PHI: 3623 return moreElementsVectorPhi(MI, TypeIdx, MoreTy); 3624 default: 3625 return UnableToLegalize; 3626 } 3627 } 3628 3629 void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs, 3630 ArrayRef<Register> Src1Regs, 3631 ArrayRef<Register> Src2Regs, 3632 LLT NarrowTy) { 3633 MachineIRBuilder &B = MIRBuilder; 3634 unsigned SrcParts = Src1Regs.size(); 3635 unsigned DstParts = DstRegs.size(); 3636 3637 unsigned DstIdx = 0; // Low bits of the result. 3638 Register FactorSum = 3639 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0); 3640 DstRegs[DstIdx] = FactorSum; 3641 3642 unsigned CarrySumPrevDstIdx; 3643 SmallVector<Register, 4> Factors; 3644 3645 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) { 3646 // Collect low parts of muls for DstIdx. 3647 for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1; 3648 i <= std::min(DstIdx, SrcParts - 1); ++i) { 3649 MachineInstrBuilder Mul = 3650 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]); 3651 Factors.push_back(Mul.getReg(0)); 3652 } 3653 // Collect high parts of muls from previous DstIdx. 3654 for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts; 3655 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) { 3656 MachineInstrBuilder Umulh = 3657 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]); 3658 Factors.push_back(Umulh.getReg(0)); 3659 } 3660 // Add CarrySum from additions calculated for previous DstIdx. 3661 if (DstIdx != 1) { 3662 Factors.push_back(CarrySumPrevDstIdx); 3663 } 3664 3665 Register CarrySum; 3666 // Add all factors and accumulate all carries into CarrySum. 3667 if (DstIdx != DstParts - 1) { 3668 MachineInstrBuilder Uaddo = 3669 B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]); 3670 FactorSum = Uaddo.getReg(0); 3671 CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0); 3672 for (unsigned i = 2; i < Factors.size(); ++i) { 3673 MachineInstrBuilder Uaddo = 3674 B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]); 3675 FactorSum = Uaddo.getReg(0); 3676 MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1)); 3677 CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0); 3678 } 3679 } else { 3680 // Since value for the next index is not calculated, neither is CarrySum. 3681 FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0); 3682 for (unsigned i = 2; i < Factors.size(); ++i) 3683 FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0); 3684 } 3685 3686 CarrySumPrevDstIdx = CarrySum; 3687 DstRegs[DstIdx] = FactorSum; 3688 Factors.clear(); 3689 } 3690 } 3691 3692 LegalizerHelper::LegalizeResult 3693 LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) { 3694 Register DstReg = MI.getOperand(0).getReg(); 3695 Register Src1 = MI.getOperand(1).getReg(); 3696 Register Src2 = MI.getOperand(2).getReg(); 3697 3698 LLT Ty = MRI.getType(DstReg); 3699 if (Ty.isVector()) 3700 return UnableToLegalize; 3701 3702 unsigned SrcSize = MRI.getType(Src1).getSizeInBits(); 3703 unsigned DstSize = Ty.getSizeInBits(); 3704 unsigned NarrowSize = NarrowTy.getSizeInBits(); 3705 if (DstSize % NarrowSize != 0 || SrcSize % NarrowSize != 0) 3706 return UnableToLegalize; 3707 3708 unsigned NumDstParts = DstSize / NarrowSize; 3709 unsigned NumSrcParts = SrcSize / NarrowSize; 3710 bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH; 3711 unsigned DstTmpParts = NumDstParts * (IsMulHigh ? 2 : 1); 3712 3713 SmallVector<Register, 2> Src1Parts, Src2Parts; 3714 SmallVector<Register, 2> DstTmpRegs(DstTmpParts); 3715 extractParts(Src1, NarrowTy, NumSrcParts, Src1Parts); 3716 extractParts(Src2, NarrowTy, NumSrcParts, Src2Parts); 3717 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy); 3718 3719 // Take only high half of registers if this is high mul. 3720 ArrayRef<Register> DstRegs( 3721 IsMulHigh ? &DstTmpRegs[DstTmpParts / 2] : &DstTmpRegs[0], NumDstParts); 3722 MIRBuilder.buildMerge(DstReg, DstRegs); 3723 MI.eraseFromParent(); 3724 return Legalized; 3725 } 3726 3727 LegalizerHelper::LegalizeResult 3728 LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, 3729 LLT NarrowTy) { 3730 if (TypeIdx != 1) 3731 return UnableToLegalize; 3732 3733 uint64_t NarrowSize = NarrowTy.getSizeInBits(); 3734 3735 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 3736 // FIXME: add support for when SizeOp1 isn't an exact multiple of 3737 // NarrowSize. 3738 if (SizeOp1 % NarrowSize != 0) 3739 return UnableToLegalize; 3740 int NumParts = SizeOp1 / NarrowSize; 3741 3742 SmallVector<Register, 2> SrcRegs, DstRegs; 3743 SmallVector<uint64_t, 2> Indexes; 3744 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); 3745 3746 Register OpReg = MI.getOperand(0).getReg(); 3747 uint64_t OpStart = MI.getOperand(2).getImm(); 3748 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); 3749 for (int i = 0; i < NumParts; ++i) { 3750 unsigned SrcStart = i * NarrowSize; 3751 3752 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) { 3753 // No part of the extract uses this subregister, ignore it. 3754 continue; 3755 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) { 3756 // The entire subregister is extracted, forward the value. 3757 DstRegs.push_back(SrcRegs[i]); 3758 continue; 3759 } 3760 3761 // OpSegStart is where this destination segment would start in OpReg if it 3762 // extended infinitely in both directions. 3763 int64_t ExtractOffset; 3764 uint64_t SegSize; 3765 if (OpStart < SrcStart) { 3766 ExtractOffset = 0; 3767 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart); 3768 } else { 3769 ExtractOffset = OpStart - SrcStart; 3770 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize); 3771 } 3772 3773 Register SegReg = SrcRegs[i]; 3774 if (ExtractOffset != 0 || SegSize != NarrowSize) { 3775 // A genuine extract is needed. 3776 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); 3777 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset); 3778 } 3779 3780 DstRegs.push_back(SegReg); 3781 } 3782 3783 Register DstReg = MI.getOperand(0).getReg(); 3784 if (MRI.getType(DstReg).isVector()) 3785 MIRBuilder.buildBuildVector(DstReg, DstRegs); 3786 else if (DstRegs.size() > 1) 3787 MIRBuilder.buildMerge(DstReg, DstRegs); 3788 else 3789 MIRBuilder.buildCopy(DstReg, DstRegs[0]); 3790 MI.eraseFromParent(); 3791 return Legalized; 3792 } 3793 3794 LegalizerHelper::LegalizeResult 3795 LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, 3796 LLT NarrowTy) { 3797 // FIXME: Don't know how to handle secondary types yet. 3798 if (TypeIdx != 0) 3799 return UnableToLegalize; 3800 3801 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); 3802 uint64_t NarrowSize = NarrowTy.getSizeInBits(); 3803 3804 // FIXME: add support for when SizeOp0 isn't an exact multiple of 3805 // NarrowSize. 3806 if (SizeOp0 % NarrowSize != 0) 3807 return UnableToLegalize; 3808 3809 int NumParts = SizeOp0 / NarrowSize; 3810 3811 SmallVector<Register, 2> SrcRegs, DstRegs; 3812 SmallVector<uint64_t, 2> Indexes; 3813 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); 3814 3815 Register OpReg = MI.getOperand(2).getReg(); 3816 uint64_t OpStart = MI.getOperand(3).getImm(); 3817 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); 3818 for (int i = 0; i < NumParts; ++i) { 3819 unsigned DstStart = i * NarrowSize; 3820 3821 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) { 3822 // No part of the insert affects this subregister, forward the original. 3823 DstRegs.push_back(SrcRegs[i]); 3824 continue; 3825 } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) { 3826 // The entire subregister is defined by this insert, forward the new 3827 // value. 3828 DstRegs.push_back(OpReg); 3829 continue; 3830 } 3831 3832 // OpSegStart is where this destination segment would start in OpReg if it 3833 // extended infinitely in both directions. 3834 int64_t ExtractOffset, InsertOffset; 3835 uint64_t SegSize; 3836 if (OpStart < DstStart) { 3837 InsertOffset = 0; 3838 ExtractOffset = DstStart - OpStart; 3839 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart); 3840 } else { 3841 InsertOffset = OpStart - DstStart; 3842 ExtractOffset = 0; 3843 SegSize = 3844 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart); 3845 } 3846 3847 Register SegReg = OpReg; 3848 if (ExtractOffset != 0 || SegSize != OpSize) { 3849 // A genuine extract is needed. 3850 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); 3851 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset); 3852 } 3853 3854 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy); 3855 MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset); 3856 DstRegs.push_back(DstReg); 3857 } 3858 3859 assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered"); 3860 Register DstReg = MI.getOperand(0).getReg(); 3861 if(MRI.getType(DstReg).isVector()) 3862 MIRBuilder.buildBuildVector(DstReg, DstRegs); 3863 else 3864 MIRBuilder.buildMerge(DstReg, DstRegs); 3865 MI.eraseFromParent(); 3866 return Legalized; 3867 } 3868 3869 LegalizerHelper::LegalizeResult 3870 LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, 3871 LLT NarrowTy) { 3872 Register DstReg = MI.getOperand(0).getReg(); 3873 LLT DstTy = MRI.getType(DstReg); 3874 3875 assert(MI.getNumOperands() == 3 && TypeIdx == 0); 3876 3877 SmallVector<Register, 4> DstRegs, DstLeftoverRegs; 3878 SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs; 3879 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs; 3880 LLT LeftoverTy; 3881 if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy, 3882 Src0Regs, Src0LeftoverRegs)) 3883 return UnableToLegalize; 3884 3885 LLT Unused; 3886 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused, 3887 Src1Regs, Src1LeftoverRegs)) 3888 llvm_unreachable("inconsistent extractParts result"); 3889 3890 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) { 3891 auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, 3892 {Src0Regs[I], Src1Regs[I]}); 3893 DstRegs.push_back(Inst.getReg(0)); 3894 } 3895 3896 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) { 3897 auto Inst = MIRBuilder.buildInstr( 3898 MI.getOpcode(), 3899 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]}); 3900 DstLeftoverRegs.push_back(Inst.getReg(0)); 3901 } 3902 3903 insertParts(DstReg, DstTy, NarrowTy, DstRegs, 3904 LeftoverTy, DstLeftoverRegs); 3905 3906 MI.eraseFromParent(); 3907 return Legalized; 3908 } 3909 3910 LegalizerHelper::LegalizeResult 3911 LegalizerHelper::narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, 3912 LLT NarrowTy) { 3913 if (TypeIdx != 0) 3914 return UnableToLegalize; 3915 3916 Register DstReg = MI.getOperand(0).getReg(); 3917 Register SrcReg = MI.getOperand(1).getReg(); 3918 3919 LLT DstTy = MRI.getType(DstReg); 3920 if (DstTy.isVector()) 3921 return UnableToLegalize; 3922 3923 SmallVector<Register, 8> Parts; 3924 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg); 3925 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode()); 3926 buildWidenedRemergeToDst(DstReg, LCMTy, Parts); 3927 3928 MI.eraseFromParent(); 3929 return Legalized; 3930 } 3931 3932 LegalizerHelper::LegalizeResult 3933 LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, 3934 LLT NarrowTy) { 3935 if (TypeIdx != 0) 3936 return UnableToLegalize; 3937 3938 Register CondReg = MI.getOperand(1).getReg(); 3939 LLT CondTy = MRI.getType(CondReg); 3940 if (CondTy.isVector()) // TODO: Handle vselect 3941 return UnableToLegalize; 3942 3943 Register DstReg = MI.getOperand(0).getReg(); 3944 LLT DstTy = MRI.getType(DstReg); 3945 3946 SmallVector<Register, 4> DstRegs, DstLeftoverRegs; 3947 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs; 3948 SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs; 3949 LLT LeftoverTy; 3950 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy, 3951 Src1Regs, Src1LeftoverRegs)) 3952 return UnableToLegalize; 3953 3954 LLT Unused; 3955 if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused, 3956 Src2Regs, Src2LeftoverRegs)) 3957 llvm_unreachable("inconsistent extractParts result"); 3958 3959 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) { 3960 auto Select = MIRBuilder.buildSelect(NarrowTy, 3961 CondReg, Src1Regs[I], Src2Regs[I]); 3962 DstRegs.push_back(Select.getReg(0)); 3963 } 3964 3965 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) { 3966 auto Select = MIRBuilder.buildSelect( 3967 LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]); 3968 DstLeftoverRegs.push_back(Select.getReg(0)); 3969 } 3970 3971 insertParts(DstReg, DstTy, NarrowTy, DstRegs, 3972 LeftoverTy, DstLeftoverRegs); 3973 3974 MI.eraseFromParent(); 3975 return Legalized; 3976 } 3977 3978 LegalizerHelper::LegalizeResult 3979 LegalizerHelper::narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, 3980 LLT NarrowTy) { 3981 if (TypeIdx != 1) 3982 return UnableToLegalize; 3983 3984 Register DstReg = MI.getOperand(0).getReg(); 3985 Register SrcReg = MI.getOperand(1).getReg(); 3986 LLT DstTy = MRI.getType(DstReg); 3987 LLT SrcTy = MRI.getType(SrcReg); 3988 unsigned NarrowSize = NarrowTy.getSizeInBits(); 3989 3990 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) { 3991 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF; 3992 3993 MachineIRBuilder &B = MIRBuilder; 3994 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg); 3995 // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi) 3996 auto C_0 = B.buildConstant(NarrowTy, 0); 3997 auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), 3998 UnmergeSrc.getReg(1), C_0); 3999 auto LoCTLZ = IsUndef ? 4000 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) : 4001 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0)); 4002 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize); 4003 auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize); 4004 auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)); 4005 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ); 4006 4007 MI.eraseFromParent(); 4008 return Legalized; 4009 } 4010 4011 return UnableToLegalize; 4012 } 4013 4014 LegalizerHelper::LegalizeResult 4015 LegalizerHelper::narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, 4016 LLT NarrowTy) { 4017 if (TypeIdx != 1) 4018 return UnableToLegalize; 4019 4020 Register DstReg = MI.getOperand(0).getReg(); 4021 Register SrcReg = MI.getOperand(1).getReg(); 4022 LLT DstTy = MRI.getType(DstReg); 4023 LLT SrcTy = MRI.getType(SrcReg); 4024 unsigned NarrowSize = NarrowTy.getSizeInBits(); 4025 4026 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) { 4027 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF; 4028 4029 MachineIRBuilder &B = MIRBuilder; 4030 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg); 4031 // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo) 4032 auto C_0 = B.buildConstant(NarrowTy, 0); 4033 auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), 4034 UnmergeSrc.getReg(0), C_0); 4035 auto HiCTTZ = IsUndef ? 4036 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) : 4037 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1)); 4038 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize); 4039 auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize); 4040 auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)); 4041 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ); 4042 4043 MI.eraseFromParent(); 4044 return Legalized; 4045 } 4046 4047 return UnableToLegalize; 4048 } 4049 4050 LegalizerHelper::LegalizeResult 4051 LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, 4052 LLT NarrowTy) { 4053 if (TypeIdx != 1) 4054 return UnableToLegalize; 4055 4056 Register DstReg = MI.getOperand(0).getReg(); 4057 LLT DstTy = MRI.getType(DstReg); 4058 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 4059 unsigned NarrowSize = NarrowTy.getSizeInBits(); 4060 4061 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) { 4062 auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1)); 4063 4064 auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0)); 4065 auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1)); 4066 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP); 4067 4068 MI.eraseFromParent(); 4069 return Legalized; 4070 } 4071 4072 return UnableToLegalize; 4073 } 4074 4075 LegalizerHelper::LegalizeResult 4076 LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { 4077 unsigned Opc = MI.getOpcode(); 4078 auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); 4079 auto isSupported = [this](const LegalityQuery &Q) { 4080 auto QAction = LI.getAction(Q).Action; 4081 return QAction == Legal || QAction == Libcall || QAction == Custom; 4082 }; 4083 switch (Opc) { 4084 default: 4085 return UnableToLegalize; 4086 case TargetOpcode::G_CTLZ_ZERO_UNDEF: { 4087 // This trivially expands to CTLZ. 4088 Observer.changingInstr(MI); 4089 MI.setDesc(TII.get(TargetOpcode::G_CTLZ)); 4090 Observer.changedInstr(MI); 4091 return Legalized; 4092 } 4093 case TargetOpcode::G_CTLZ: { 4094 Register DstReg = MI.getOperand(0).getReg(); 4095 Register SrcReg = MI.getOperand(1).getReg(); 4096 LLT DstTy = MRI.getType(DstReg); 4097 LLT SrcTy = MRI.getType(SrcReg); 4098 unsigned Len = SrcTy.getSizeInBits(); 4099 4100 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) { 4101 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero. 4102 auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg); 4103 auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0); 4104 auto ICmp = MIRBuilder.buildICmp( 4105 CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc); 4106 auto LenConst = MIRBuilder.buildConstant(DstTy, Len); 4107 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU); 4108 MI.eraseFromParent(); 4109 return Legalized; 4110 } 4111 // for now, we do this: 4112 // NewLen = NextPowerOf2(Len); 4113 // x = x | (x >> 1); 4114 // x = x | (x >> 2); 4115 // ... 4116 // x = x | (x >>16); 4117 // x = x | (x >>32); // for 64-bit input 4118 // Upto NewLen/2 4119 // return Len - popcount(x); 4120 // 4121 // Ref: "Hacker's Delight" by Henry Warren 4122 Register Op = SrcReg; 4123 unsigned NewLen = PowerOf2Ceil(Len); 4124 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) { 4125 auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i); 4126 auto MIBOp = MIRBuilder.buildOr( 4127 SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt)); 4128 Op = MIBOp.getReg(0); 4129 } 4130 auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op); 4131 MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len), 4132 MIBPop); 4133 MI.eraseFromParent(); 4134 return Legalized; 4135 } 4136 case TargetOpcode::G_CTTZ_ZERO_UNDEF: { 4137 // This trivially expands to CTTZ. 4138 Observer.changingInstr(MI); 4139 MI.setDesc(TII.get(TargetOpcode::G_CTTZ)); 4140 Observer.changedInstr(MI); 4141 return Legalized; 4142 } 4143 case TargetOpcode::G_CTTZ: { 4144 Register DstReg = MI.getOperand(0).getReg(); 4145 Register SrcReg = MI.getOperand(1).getReg(); 4146 LLT DstTy = MRI.getType(DstReg); 4147 LLT SrcTy = MRI.getType(SrcReg); 4148 4149 unsigned Len = SrcTy.getSizeInBits(); 4150 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) { 4151 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with 4152 // zero. 4153 auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg); 4154 auto Zero = MIRBuilder.buildConstant(SrcTy, 0); 4155 auto ICmp = MIRBuilder.buildICmp( 4156 CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero); 4157 auto LenConst = MIRBuilder.buildConstant(DstTy, Len); 4158 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU); 4159 MI.eraseFromParent(); 4160 return Legalized; 4161 } 4162 // for now, we use: { return popcount(~x & (x - 1)); } 4163 // unless the target has ctlz but not ctpop, in which case we use: 4164 // { return 32 - nlz(~x & (x-1)); } 4165 // Ref: "Hacker's Delight" by Henry Warren 4166 auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1); 4167 auto MIBNot = MIRBuilder.buildXor(Ty, SrcReg, MIBCstNeg1); 4168 auto MIBTmp = MIRBuilder.buildAnd( 4169 Ty, MIBNot, MIRBuilder.buildAdd(Ty, SrcReg, MIBCstNeg1)); 4170 if (!isSupported({TargetOpcode::G_CTPOP, {Ty, Ty}}) && 4171 isSupported({TargetOpcode::G_CTLZ, {Ty, Ty}})) { 4172 auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len); 4173 MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen, 4174 MIRBuilder.buildCTLZ(Ty, MIBTmp)); 4175 MI.eraseFromParent(); 4176 return Legalized; 4177 } 4178 MI.setDesc(TII.get(TargetOpcode::G_CTPOP)); 4179 MI.getOperand(1).setReg(MIBTmp.getReg(0)); 4180 return Legalized; 4181 } 4182 case TargetOpcode::G_CTPOP: { 4183 unsigned Size = Ty.getSizeInBits(); 4184 MachineIRBuilder &B = MIRBuilder; 4185 4186 // Count set bits in blocks of 2 bits. Default approach would be 4187 // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 } 4188 // We use following formula instead: 4189 // B2Count = val - { (val >> 1) & 0x55555555 } 4190 // since it gives same result in blocks of 2 with one instruction less. 4191 auto C_1 = B.buildConstant(Ty, 1); 4192 auto B2Set1LoTo1Hi = B.buildLShr(Ty, MI.getOperand(1).getReg(), C_1); 4193 APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55)); 4194 auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0); 4195 auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0); 4196 auto B2Count = B.buildSub(Ty, MI.getOperand(1).getReg(), B2Count1Hi); 4197 4198 // In order to get count in blocks of 4 add values from adjacent block of 2. 4199 // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 } 4200 auto C_2 = B.buildConstant(Ty, 2); 4201 auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2); 4202 APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33)); 4203 auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0); 4204 auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0); 4205 auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0); 4206 auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count); 4207 4208 // For count in blocks of 8 bits we don't have to mask high 4 bits before 4209 // addition since count value sits in range {0,...,8} and 4 bits are enough 4210 // to hold such binary values. After addition high 4 bits still hold count 4211 // of set bits in high 4 bit block, set them to zero and get 8 bit result. 4212 // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F 4213 auto C_4 = B.buildConstant(Ty, 4); 4214 auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4); 4215 auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count); 4216 APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F)); 4217 auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0); 4218 auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0); 4219 4220 assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm"); 4221 // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this 4222 // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks. 4223 auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01))); 4224 auto ResTmp = B.buildMul(Ty, B8Count, MulMask); 4225 4226 // Shift count result from 8 high bits to low bits. 4227 auto C_SizeM8 = B.buildConstant(Ty, Size - 8); 4228 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8); 4229 4230 MI.eraseFromParent(); 4231 return Legalized; 4232 } 4233 } 4234 } 4235 4236 // Expand s32 = G_UITOFP s64 using bit operations to an IEEE float 4237 // representation. 4238 LegalizerHelper::LegalizeResult 4239 LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) { 4240 Register Dst = MI.getOperand(0).getReg(); 4241 Register Src = MI.getOperand(1).getReg(); 4242 const LLT S64 = LLT::scalar(64); 4243 const LLT S32 = LLT::scalar(32); 4244 const LLT S1 = LLT::scalar(1); 4245 4246 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32); 4247 4248 // unsigned cul2f(ulong u) { 4249 // uint lz = clz(u); 4250 // uint e = (u != 0) ? 127U + 63U - lz : 0; 4251 // u = (u << lz) & 0x7fffffffffffffffUL; 4252 // ulong t = u & 0xffffffffffUL; 4253 // uint v = (e << 23) | (uint)(u >> 40); 4254 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U); 4255 // return as_float(v + r); 4256 // } 4257 4258 auto Zero32 = MIRBuilder.buildConstant(S32, 0); 4259 auto Zero64 = MIRBuilder.buildConstant(S64, 0); 4260 4261 auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src); 4262 4263 auto K = MIRBuilder.buildConstant(S32, 127U + 63U); 4264 auto Sub = MIRBuilder.buildSub(S32, K, LZ); 4265 4266 auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64); 4267 auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32); 4268 4269 auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1); 4270 auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ); 4271 4272 auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0); 4273 4274 auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL); 4275 auto T = MIRBuilder.buildAnd(S64, U, Mask1); 4276 4277 auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40)); 4278 auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23)); 4279 auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl)); 4280 4281 auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL); 4282 auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C); 4283 auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C); 4284 auto One = MIRBuilder.buildConstant(S32, 1); 4285 4286 auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One); 4287 auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32); 4288 auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0); 4289 MIRBuilder.buildAdd(Dst, V, R); 4290 4291 return Legalized; 4292 } 4293 4294 LegalizerHelper::LegalizeResult 4295 LegalizerHelper::lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { 4296 Register Dst = MI.getOperand(0).getReg(); 4297 Register Src = MI.getOperand(1).getReg(); 4298 LLT DstTy = MRI.getType(Dst); 4299 LLT SrcTy = MRI.getType(Src); 4300 4301 if (SrcTy == LLT::scalar(1)) { 4302 auto True = MIRBuilder.buildFConstant(DstTy, 1.0); 4303 auto False = MIRBuilder.buildFConstant(DstTy, 0.0); 4304 MIRBuilder.buildSelect(Dst, Src, True, False); 4305 MI.eraseFromParent(); 4306 return Legalized; 4307 } 4308 4309 if (SrcTy != LLT::scalar(64)) 4310 return UnableToLegalize; 4311 4312 if (DstTy == LLT::scalar(32)) { 4313 // TODO: SelectionDAG has several alternative expansions to port which may 4314 // be more reasonble depending on the available instructions. If a target 4315 // has sitofp, does not have CTLZ, or can efficiently use f64 as an 4316 // intermediate type, this is probably worse. 4317 return lowerU64ToF32BitOps(MI); 4318 } 4319 4320 return UnableToLegalize; 4321 } 4322 4323 LegalizerHelper::LegalizeResult 4324 LegalizerHelper::lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { 4325 Register Dst = MI.getOperand(0).getReg(); 4326 Register Src = MI.getOperand(1).getReg(); 4327 LLT DstTy = MRI.getType(Dst); 4328 LLT SrcTy = MRI.getType(Src); 4329 4330 const LLT S64 = LLT::scalar(64); 4331 const LLT S32 = LLT::scalar(32); 4332 const LLT S1 = LLT::scalar(1); 4333 4334 if (SrcTy == S1) { 4335 auto True = MIRBuilder.buildFConstant(DstTy, -1.0); 4336 auto False = MIRBuilder.buildFConstant(DstTy, 0.0); 4337 MIRBuilder.buildSelect(Dst, Src, True, False); 4338 MI.eraseFromParent(); 4339 return Legalized; 4340 } 4341 4342 if (SrcTy != S64) 4343 return UnableToLegalize; 4344 4345 if (DstTy == S32) { 4346 // signed cl2f(long l) { 4347 // long s = l >> 63; 4348 // float r = cul2f((l + s) ^ s); 4349 // return s ? -r : r; 4350 // } 4351 Register L = Src; 4352 auto SignBit = MIRBuilder.buildConstant(S64, 63); 4353 auto S = MIRBuilder.buildAShr(S64, L, SignBit); 4354 4355 auto LPlusS = MIRBuilder.buildAdd(S64, L, S); 4356 auto Xor = MIRBuilder.buildXor(S64, LPlusS, S); 4357 auto R = MIRBuilder.buildUITOFP(S32, Xor); 4358 4359 auto RNeg = MIRBuilder.buildFNeg(S32, R); 4360 auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S, 4361 MIRBuilder.buildConstant(S64, 0)); 4362 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R); 4363 return Legalized; 4364 } 4365 4366 return UnableToLegalize; 4367 } 4368 4369 LegalizerHelper::LegalizeResult 4370 LegalizerHelper::lowerFPTOUI(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { 4371 Register Dst = MI.getOperand(0).getReg(); 4372 Register Src = MI.getOperand(1).getReg(); 4373 LLT DstTy = MRI.getType(Dst); 4374 LLT SrcTy = MRI.getType(Src); 4375 const LLT S64 = LLT::scalar(64); 4376 const LLT S32 = LLT::scalar(32); 4377 4378 if (SrcTy != S64 && SrcTy != S32) 4379 return UnableToLegalize; 4380 if (DstTy != S32 && DstTy != S64) 4381 return UnableToLegalize; 4382 4383 // FPTOSI gives same result as FPTOUI for positive signed integers. 4384 // FPTOUI needs to deal with fp values that convert to unsigned integers 4385 // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp. 4386 4387 APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits()); 4388 APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle() 4389 : APFloat::IEEEdouble(), 4390 APInt::getNullValue(SrcTy.getSizeInBits())); 4391 TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven); 4392 4393 MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src); 4394 4395 MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP); 4396 // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on 4397 // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1. 4398 MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold); 4399 MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub); 4400 MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt); 4401 MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit); 4402 4403 const LLT S1 = LLT::scalar(1); 4404 4405 MachineInstrBuilder FCMP = 4406 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold); 4407 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res); 4408 4409 MI.eraseFromParent(); 4410 return Legalized; 4411 } 4412 4413 LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) { 4414 Register Dst = MI.getOperand(0).getReg(); 4415 Register Src = MI.getOperand(1).getReg(); 4416 LLT DstTy = MRI.getType(Dst); 4417 LLT SrcTy = MRI.getType(Src); 4418 const LLT S64 = LLT::scalar(64); 4419 const LLT S32 = LLT::scalar(32); 4420 4421 // FIXME: Only f32 to i64 conversions are supported. 4422 if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64) 4423 return UnableToLegalize; 4424 4425 // Expand f32 -> i64 conversion 4426 // This algorithm comes from compiler-rt's implementation of fixsfdi: 4427 // https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/builtins/fixsfdi.c 4428 4429 unsigned SrcEltBits = SrcTy.getScalarSizeInBits(); 4430 4431 auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000); 4432 auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23); 4433 4434 auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask); 4435 auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit); 4436 4437 auto SignMask = MIRBuilder.buildConstant(SrcTy, 4438 APInt::getSignMask(SrcEltBits)); 4439 auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask); 4440 auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1); 4441 auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit); 4442 Sign = MIRBuilder.buildSExt(DstTy, Sign); 4443 4444 auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF); 4445 auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask); 4446 auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000); 4447 4448 auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K); 4449 R = MIRBuilder.buildZExt(DstTy, R); 4450 4451 auto Bias = MIRBuilder.buildConstant(SrcTy, 127); 4452 auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias); 4453 auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit); 4454 auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent); 4455 4456 auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent); 4457 auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub); 4458 4459 const LLT S1 = LLT::scalar(1); 4460 auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, 4461 S1, Exponent, ExponentLoBit); 4462 4463 R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl); 4464 4465 auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign); 4466 auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign); 4467 4468 auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0); 4469 4470 auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, 4471 S1, Exponent, ZeroSrcTy); 4472 4473 auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0); 4474 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret); 4475 4476 MI.eraseFromParent(); 4477 return Legalized; 4478 } 4479 4480 // f64 -> f16 conversion using round-to-nearest-even rounding mode. 4481 LegalizerHelper::LegalizeResult 4482 LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) { 4483 Register Dst = MI.getOperand(0).getReg(); 4484 Register Src = MI.getOperand(1).getReg(); 4485 4486 if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly. 4487 return UnableToLegalize; 4488 4489 const unsigned ExpMask = 0x7ff; 4490 const unsigned ExpBiasf64 = 1023; 4491 const unsigned ExpBiasf16 = 15; 4492 const LLT S32 = LLT::scalar(32); 4493 const LLT S1 = LLT::scalar(1); 4494 4495 auto Unmerge = MIRBuilder.buildUnmerge(S32, Src); 4496 Register U = Unmerge.getReg(0); 4497 Register UH = Unmerge.getReg(1); 4498 4499 auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20)); 4500 4501 // Subtract the fp64 exponent bias (1023) to get the real exponent and 4502 // add the f16 bias (15) to get the biased exponent for the f16 format. 4503 E = MIRBuilder.buildAdd( 4504 S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16)); 4505 E = MIRBuilder.buildAnd(S32, E, MIRBuilder.buildConstant(S32, ExpMask)); 4506 4507 auto M = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 8)); 4508 M = MIRBuilder.buildAnd(S32, M, MIRBuilder.buildConstant(S32, 0xffe)); 4509 4510 auto MaskedSig = MIRBuilder.buildAnd(S32, UH, 4511 MIRBuilder.buildConstant(S32, 0x1ff)); 4512 MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U); 4513 4514 auto Zero = MIRBuilder.buildConstant(S32, 0); 4515 auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero); 4516 auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0); 4517 M = MIRBuilder.buildOr(S32, M, Lo40Set); 4518 4519 // (M != 0 ? 0x0200 : 0) | 0x7c00; 4520 auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200); 4521 auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero); 4522 auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero); 4523 4524 auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00); 4525 auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00); 4526 4527 // N = M | (E << 12); 4528 auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12)); 4529 auto N = MIRBuilder.buildOr(S32, M, EShl12); 4530 4531 // B = clamp(1-E, 0, 13); 4532 auto One = MIRBuilder.buildConstant(S32, 1); 4533 auto OneSubExp = MIRBuilder.buildSub(S32, One, E); 4534 auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero); 4535 B = MIRBuilder.buildSMin(S32, B, MIRBuilder.buildConstant(S32, 13)); 4536 4537 auto SigSetHigh = MIRBuilder.buildOr(S32, M, 4538 MIRBuilder.buildConstant(S32, 0x1000)); 4539 4540 auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B); 4541 auto D0 = MIRBuilder.buildShl(S32, D, B); 4542 4543 auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, 4544 D0, SigSetHigh); 4545 auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh); 4546 D = MIRBuilder.buildOr(S32, D, D1); 4547 4548 auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One); 4549 auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N); 4550 4551 auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7)); 4552 V = MIRBuilder.buildLShr(S32, V, MIRBuilder.buildConstant(S32, 2)); 4553 4554 auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3, 4555 MIRBuilder.buildConstant(S32, 3)); 4556 auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3); 4557 4558 auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3, 4559 MIRBuilder.buildConstant(S32, 5)); 4560 auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5); 4561 4562 V1 = MIRBuilder.buildOr(S32, V0, V1); 4563 V = MIRBuilder.buildAdd(S32, V, V1); 4564 4565 auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, 4566 E, MIRBuilder.buildConstant(S32, 30)); 4567 V = MIRBuilder.buildSelect(S32, CmpEGt30, 4568 MIRBuilder.buildConstant(S32, 0x7c00), V); 4569 4570 auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, 4571 E, MIRBuilder.buildConstant(S32, 1039)); 4572 V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V); 4573 4574 // Extract the sign bit. 4575 auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16)); 4576 Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000)); 4577 4578 // Insert the sign bit 4579 V = MIRBuilder.buildOr(S32, Sign, V); 4580 4581 MIRBuilder.buildTrunc(Dst, V); 4582 MI.eraseFromParent(); 4583 return Legalized; 4584 } 4585 4586 LegalizerHelper::LegalizeResult 4587 LegalizerHelper::lowerFPTRUNC(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { 4588 Register Dst = MI.getOperand(0).getReg(); 4589 Register Src = MI.getOperand(1).getReg(); 4590 4591 LLT DstTy = MRI.getType(Dst); 4592 LLT SrcTy = MRI.getType(Src); 4593 const LLT S64 = LLT::scalar(64); 4594 const LLT S16 = LLT::scalar(16); 4595 4596 if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64) 4597 return lowerFPTRUNC_F64_TO_F16(MI); 4598 4599 return UnableToLegalize; 4600 } 4601 4602 static CmpInst::Predicate minMaxToCompare(unsigned Opc) { 4603 switch (Opc) { 4604 case TargetOpcode::G_SMIN: 4605 return CmpInst::ICMP_SLT; 4606 case TargetOpcode::G_SMAX: 4607 return CmpInst::ICMP_SGT; 4608 case TargetOpcode::G_UMIN: 4609 return CmpInst::ICMP_ULT; 4610 case TargetOpcode::G_UMAX: 4611 return CmpInst::ICMP_UGT; 4612 default: 4613 llvm_unreachable("not in integer min/max"); 4614 } 4615 } 4616 4617 LegalizerHelper::LegalizeResult 4618 LegalizerHelper::lowerMinMax(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { 4619 Register Dst = MI.getOperand(0).getReg(); 4620 Register Src0 = MI.getOperand(1).getReg(); 4621 Register Src1 = MI.getOperand(2).getReg(); 4622 4623 const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode()); 4624 LLT CmpType = MRI.getType(Dst).changeElementSize(1); 4625 4626 auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1); 4627 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1); 4628 4629 MI.eraseFromParent(); 4630 return Legalized; 4631 } 4632 4633 LegalizerHelper::LegalizeResult 4634 LegalizerHelper::lowerFCopySign(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { 4635 Register Dst = MI.getOperand(0).getReg(); 4636 Register Src0 = MI.getOperand(1).getReg(); 4637 Register Src1 = MI.getOperand(2).getReg(); 4638 4639 const LLT Src0Ty = MRI.getType(Src0); 4640 const LLT Src1Ty = MRI.getType(Src1); 4641 4642 const int Src0Size = Src0Ty.getScalarSizeInBits(); 4643 const int Src1Size = Src1Ty.getScalarSizeInBits(); 4644 4645 auto SignBitMask = MIRBuilder.buildConstant( 4646 Src0Ty, APInt::getSignMask(Src0Size)); 4647 4648 auto NotSignBitMask = MIRBuilder.buildConstant( 4649 Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1)); 4650 4651 auto And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask); 4652 MachineInstr *Or; 4653 4654 if (Src0Ty == Src1Ty) { 4655 auto And1 = MIRBuilder.buildAnd(Src1Ty, Src0, SignBitMask); 4656 Or = MIRBuilder.buildOr(Dst, And0, And1); 4657 } else if (Src0Size > Src1Size) { 4658 auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size); 4659 auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1); 4660 auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt); 4661 auto And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask); 4662 Or = MIRBuilder.buildOr(Dst, And0, And1); 4663 } else { 4664 auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size); 4665 auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt); 4666 auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift); 4667 auto And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask); 4668 Or = MIRBuilder.buildOr(Dst, And0, And1); 4669 } 4670 4671 // Be careful about setting nsz/nnan/ninf on every instruction, since the 4672 // constants are a nan and -0.0, but the final result should preserve 4673 // everything. 4674 if (unsigned Flags = MI.getFlags()) 4675 Or->setFlags(Flags); 4676 4677 MI.eraseFromParent(); 4678 return Legalized; 4679 } 4680 4681 LegalizerHelper::LegalizeResult 4682 LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) { 4683 unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ? 4684 TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE; 4685 4686 Register Dst = MI.getOperand(0).getReg(); 4687 Register Src0 = MI.getOperand(1).getReg(); 4688 Register Src1 = MI.getOperand(2).getReg(); 4689 LLT Ty = MRI.getType(Dst); 4690 4691 if (!MI.getFlag(MachineInstr::FmNoNans)) { 4692 // Insert canonicalizes if it's possible we need to quiet to get correct 4693 // sNaN behavior. 4694 4695 // Note this must be done here, and not as an optimization combine in the 4696 // absence of a dedicate quiet-snan instruction as we're using an 4697 // omni-purpose G_FCANONICALIZE. 4698 if (!isKnownNeverSNaN(Src0, MRI)) 4699 Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0); 4700 4701 if (!isKnownNeverSNaN(Src1, MRI)) 4702 Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0); 4703 } 4704 4705 // If there are no nans, it's safe to simply replace this with the non-IEEE 4706 // version. 4707 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags()); 4708 MI.eraseFromParent(); 4709 return Legalized; 4710 } 4711 4712 LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) { 4713 // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c 4714 Register DstReg = MI.getOperand(0).getReg(); 4715 LLT Ty = MRI.getType(DstReg); 4716 unsigned Flags = MI.getFlags(); 4717 4718 auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2), 4719 Flags); 4720 MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags); 4721 MI.eraseFromParent(); 4722 return Legalized; 4723 } 4724 4725 LegalizerHelper::LegalizeResult 4726 LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) { 4727 Register DstReg = MI.getOperand(0).getReg(); 4728 Register X = MI.getOperand(1).getReg(); 4729 const unsigned Flags = MI.getFlags(); 4730 const LLT Ty = MRI.getType(DstReg); 4731 const LLT CondTy = Ty.changeElementSize(1); 4732 4733 // round(x) => 4734 // t = trunc(x); 4735 // d = fabs(x - t); 4736 // o = copysign(1.0f, x); 4737 // return t + (d >= 0.5 ? o : 0.0); 4738 4739 auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags); 4740 4741 auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags); 4742 auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags); 4743 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0); 4744 auto One = MIRBuilder.buildFConstant(Ty, 1.0); 4745 auto Half = MIRBuilder.buildFConstant(Ty, 0.5); 4746 auto SignOne = MIRBuilder.buildFCopysign(Ty, One, X); 4747 4748 auto Cmp = MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, 4749 Flags); 4750 auto Sel = MIRBuilder.buildSelect(Ty, Cmp, SignOne, Zero, Flags); 4751 4752 MIRBuilder.buildFAdd(DstReg, T, Sel, Flags); 4753 4754 MI.eraseFromParent(); 4755 return Legalized; 4756 } 4757 4758 LegalizerHelper::LegalizeResult 4759 LegalizerHelper::lowerFFloor(MachineInstr &MI) { 4760 Register DstReg = MI.getOperand(0).getReg(); 4761 Register SrcReg = MI.getOperand(1).getReg(); 4762 unsigned Flags = MI.getFlags(); 4763 LLT Ty = MRI.getType(DstReg); 4764 const LLT CondTy = Ty.changeElementSize(1); 4765 4766 // result = trunc(src); 4767 // if (src < 0.0 && src != result) 4768 // result += -1.0. 4769 4770 auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags); 4771 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0); 4772 4773 auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy, 4774 SrcReg, Zero, Flags); 4775 auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy, 4776 SrcReg, Trunc, Flags); 4777 auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc); 4778 auto AddVal = MIRBuilder.buildSITOFP(Ty, And); 4779 4780 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags); 4781 MI.eraseFromParent(); 4782 return Legalized; 4783 } 4784 4785 LegalizerHelper::LegalizeResult 4786 LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) { 4787 const unsigned NumDst = MI.getNumOperands() - 1; 4788 const Register SrcReg = MI.getOperand(NumDst).getReg(); 4789 LLT SrcTy = MRI.getType(SrcReg); 4790 4791 Register Dst0Reg = MI.getOperand(0).getReg(); 4792 LLT DstTy = MRI.getType(Dst0Reg); 4793 4794 4795 // Expand scalarizing unmerge as bitcast to integer and shift. 4796 if (!DstTy.isVector() && SrcTy.isVector() && 4797 SrcTy.getElementType() == DstTy) { 4798 LLT IntTy = LLT::scalar(SrcTy.getSizeInBits()); 4799 Register Cast = MIRBuilder.buildBitcast(IntTy, SrcReg).getReg(0); 4800 4801 MIRBuilder.buildTrunc(Dst0Reg, Cast); 4802 4803 const unsigned DstSize = DstTy.getSizeInBits(); 4804 unsigned Offset = DstSize; 4805 for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) { 4806 auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset); 4807 auto Shift = MIRBuilder.buildLShr(IntTy, Cast, ShiftAmt); 4808 MIRBuilder.buildTrunc(MI.getOperand(I), Shift); 4809 } 4810 4811 MI.eraseFromParent(); 4812 return Legalized; 4813 } 4814 4815 return UnableToLegalize; 4816 } 4817 4818 LegalizerHelper::LegalizeResult 4819 LegalizerHelper::lowerShuffleVector(MachineInstr &MI) { 4820 Register DstReg = MI.getOperand(0).getReg(); 4821 Register Src0Reg = MI.getOperand(1).getReg(); 4822 Register Src1Reg = MI.getOperand(2).getReg(); 4823 LLT Src0Ty = MRI.getType(Src0Reg); 4824 LLT DstTy = MRI.getType(DstReg); 4825 LLT IdxTy = LLT::scalar(32); 4826 4827 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask(); 4828 4829 if (DstTy.isScalar()) { 4830 if (Src0Ty.isVector()) 4831 return UnableToLegalize; 4832 4833 // This is just a SELECT. 4834 assert(Mask.size() == 1 && "Expected a single mask element"); 4835 Register Val; 4836 if (Mask[0] < 0 || Mask[0] > 1) 4837 Val = MIRBuilder.buildUndef(DstTy).getReg(0); 4838 else 4839 Val = Mask[0] == 0 ? Src0Reg : Src1Reg; 4840 MIRBuilder.buildCopy(DstReg, Val); 4841 MI.eraseFromParent(); 4842 return Legalized; 4843 } 4844 4845 Register Undef; 4846 SmallVector<Register, 32> BuildVec; 4847 LLT EltTy = DstTy.getElementType(); 4848 4849 for (int Idx : Mask) { 4850 if (Idx < 0) { 4851 if (!Undef.isValid()) 4852 Undef = MIRBuilder.buildUndef(EltTy).getReg(0); 4853 BuildVec.push_back(Undef); 4854 continue; 4855 } 4856 4857 if (Src0Ty.isScalar()) { 4858 BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg); 4859 } else { 4860 int NumElts = Src0Ty.getNumElements(); 4861 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg; 4862 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts; 4863 auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx); 4864 auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK); 4865 BuildVec.push_back(Extract.getReg(0)); 4866 } 4867 } 4868 4869 MIRBuilder.buildBuildVector(DstReg, BuildVec); 4870 MI.eraseFromParent(); 4871 return Legalized; 4872 } 4873 4874 LegalizerHelper::LegalizeResult 4875 LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) { 4876 Register Dst = MI.getOperand(0).getReg(); 4877 Register AllocSize = MI.getOperand(1).getReg(); 4878 unsigned Align = MI.getOperand(2).getImm(); 4879 4880 const auto &MF = *MI.getMF(); 4881 const auto &TLI = *MF.getSubtarget().getTargetLowering(); 4882 4883 LLT PtrTy = MRI.getType(Dst); 4884 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits()); 4885 4886 Register SPReg = TLI.getStackPointerRegisterToSaveRestore(); 4887 auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg); 4888 SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp); 4889 4890 // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't 4891 // have to generate an extra instruction to negate the alloc and then use 4892 // G_PTR_ADD to add the negative offset. 4893 auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize); 4894 if (Align) { 4895 APInt AlignMask(IntPtrTy.getSizeInBits(), Align, true); 4896 AlignMask.negate(); 4897 auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask); 4898 Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst); 4899 } 4900 4901 SPTmp = MIRBuilder.buildCast(PtrTy, Alloc); 4902 MIRBuilder.buildCopy(SPReg, SPTmp); 4903 MIRBuilder.buildCopy(Dst, SPTmp); 4904 4905 MI.eraseFromParent(); 4906 return Legalized; 4907 } 4908 4909 LegalizerHelper::LegalizeResult 4910 LegalizerHelper::lowerExtract(MachineInstr &MI) { 4911 Register Dst = MI.getOperand(0).getReg(); 4912 Register Src = MI.getOperand(1).getReg(); 4913 unsigned Offset = MI.getOperand(2).getImm(); 4914 4915 LLT DstTy = MRI.getType(Dst); 4916 LLT SrcTy = MRI.getType(Src); 4917 4918 if (DstTy.isScalar() && 4919 (SrcTy.isScalar() || 4920 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) { 4921 LLT SrcIntTy = SrcTy; 4922 if (!SrcTy.isScalar()) { 4923 SrcIntTy = LLT::scalar(SrcTy.getSizeInBits()); 4924 Src = MIRBuilder.buildBitcast(SrcIntTy, Src).getReg(0); 4925 } 4926 4927 if (Offset == 0) 4928 MIRBuilder.buildTrunc(Dst, Src); 4929 else { 4930 auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset); 4931 auto Shr = MIRBuilder.buildLShr(SrcIntTy, Src, ShiftAmt); 4932 MIRBuilder.buildTrunc(Dst, Shr); 4933 } 4934 4935 MI.eraseFromParent(); 4936 return Legalized; 4937 } 4938 4939 return UnableToLegalize; 4940 } 4941 4942 LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) { 4943 Register Dst = MI.getOperand(0).getReg(); 4944 Register Src = MI.getOperand(1).getReg(); 4945 Register InsertSrc = MI.getOperand(2).getReg(); 4946 uint64_t Offset = MI.getOperand(3).getImm(); 4947 4948 LLT DstTy = MRI.getType(Src); 4949 LLT InsertTy = MRI.getType(InsertSrc); 4950 4951 if (InsertTy.isVector() || 4952 (DstTy.isVector() && DstTy.getElementType() != InsertTy)) 4953 return UnableToLegalize; 4954 4955 const DataLayout &DL = MIRBuilder.getDataLayout(); 4956 if ((DstTy.isPointer() && 4957 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) || 4958 (InsertTy.isPointer() && 4959 DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) { 4960 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n"); 4961 return UnableToLegalize; 4962 } 4963 4964 LLT IntDstTy = DstTy; 4965 4966 if (!DstTy.isScalar()) { 4967 IntDstTy = LLT::scalar(DstTy.getSizeInBits()); 4968 Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0); 4969 } 4970 4971 if (!InsertTy.isScalar()) { 4972 const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits()); 4973 InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0); 4974 } 4975 4976 Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0); 4977 if (Offset != 0) { 4978 auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset); 4979 ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0); 4980 } 4981 4982 APInt MaskVal = APInt::getBitsSetWithWrap( 4983 DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset); 4984 4985 auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal); 4986 auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask); 4987 auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc); 4988 4989 MIRBuilder.buildCast(Dst, Or); 4990 MI.eraseFromParent(); 4991 return Legalized; 4992 } 4993 4994 LegalizerHelper::LegalizeResult 4995 LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) { 4996 Register Dst0 = MI.getOperand(0).getReg(); 4997 Register Dst1 = MI.getOperand(1).getReg(); 4998 Register LHS = MI.getOperand(2).getReg(); 4999 Register RHS = MI.getOperand(3).getReg(); 5000 const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO; 5001 5002 LLT Ty = MRI.getType(Dst0); 5003 LLT BoolTy = MRI.getType(Dst1); 5004 5005 if (IsAdd) 5006 MIRBuilder.buildAdd(Dst0, LHS, RHS); 5007 else 5008 MIRBuilder.buildSub(Dst0, LHS, RHS); 5009 5010 // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow. 5011 5012 auto Zero = MIRBuilder.buildConstant(Ty, 0); 5013 5014 // For an addition, the result should be less than one of the operands (LHS) 5015 // if and only if the other operand (RHS) is negative, otherwise there will 5016 // be overflow. 5017 // For a subtraction, the result should be less than one of the operands 5018 // (LHS) if and only if the other operand (RHS) is (non-zero) positive, 5019 // otherwise there will be overflow. 5020 auto ResultLowerThanLHS = 5021 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, Dst0, LHS); 5022 auto ConditionRHS = MIRBuilder.buildICmp( 5023 IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero); 5024 5025 MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS); 5026 MI.eraseFromParent(); 5027 return Legalized; 5028 } 5029 5030 LegalizerHelper::LegalizeResult 5031 LegalizerHelper::lowerBswap(MachineInstr &MI) { 5032 Register Dst = MI.getOperand(0).getReg(); 5033 Register Src = MI.getOperand(1).getReg(); 5034 const LLT Ty = MRI.getType(Src); 5035 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8; 5036 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8; 5037 5038 // Swap most and least significant byte, set remaining bytes in Res to zero. 5039 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt); 5040 auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt); 5041 auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt); 5042 auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft); 5043 5044 // Set i-th high/low byte in Res to i-th low/high byte from Src. 5045 for (unsigned i = 1; i < SizeInBytes / 2; ++i) { 5046 // AND with Mask leaves byte i unchanged and sets remaining bytes to 0. 5047 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8)); 5048 auto Mask = MIRBuilder.buildConstant(Ty, APMask); 5049 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i); 5050 // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt. 5051 auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask); 5052 auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt); 5053 Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft); 5054 // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask. 5055 auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt); 5056 auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask); 5057 Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight); 5058 } 5059 Res.getInstr()->getOperand(0).setReg(Dst); 5060 5061 MI.eraseFromParent(); 5062 return Legalized; 5063 } 5064 5065 //{ (Src & Mask) >> N } | { (Src << N) & Mask } 5066 static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, 5067 MachineInstrBuilder Src, APInt Mask) { 5068 const LLT Ty = Dst.getLLTTy(*B.getMRI()); 5069 MachineInstrBuilder C_N = B.buildConstant(Ty, N); 5070 MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask); 5071 auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N); 5072 auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0); 5073 return B.buildOr(Dst, LHS, RHS); 5074 } 5075 5076 LegalizerHelper::LegalizeResult 5077 LegalizerHelper::lowerBitreverse(MachineInstr &MI) { 5078 Register Dst = MI.getOperand(0).getReg(); 5079 Register Src = MI.getOperand(1).getReg(); 5080 const LLT Ty = MRI.getType(Src); 5081 unsigned Size = Ty.getSizeInBits(); 5082 5083 MachineInstrBuilder BSWAP = 5084 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src}); 5085 5086 // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654 5087 // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4] 5088 // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0] 5089 MachineInstrBuilder Swap4 = 5090 SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0))); 5091 5092 // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76 5093 // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2] 5094 // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC] 5095 MachineInstrBuilder Swap2 = 5096 SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC))); 5097 5098 // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5 6|7 5099 // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1] 5100 // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA] 5101 SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA))); 5102 5103 MI.eraseFromParent(); 5104 return Legalized; 5105 } 5106 5107 LegalizerHelper::LegalizeResult 5108 LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) { 5109 MachineFunction &MF = MIRBuilder.getMF(); 5110 const TargetSubtargetInfo &STI = MF.getSubtarget(); 5111 const TargetLowering *TLI = STI.getTargetLowering(); 5112 5113 bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER; 5114 int NameOpIdx = IsRead ? 1 : 0; 5115 int ValRegIndex = IsRead ? 0 : 1; 5116 5117 Register ValReg = MI.getOperand(ValRegIndex).getReg(); 5118 const LLT Ty = MRI.getType(ValReg); 5119 const MDString *RegStr = cast<MDString>( 5120 cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0)); 5121 5122 Register PhysReg = TLI->getRegisterByName(RegStr->getString().data(), Ty, MF); 5123 if (!PhysReg.isValid()) 5124 return UnableToLegalize; 5125 5126 if (IsRead) 5127 MIRBuilder.buildCopy(ValReg, PhysReg); 5128 else 5129 MIRBuilder.buildCopy(PhysReg, ValReg); 5130 5131 MI.eraseFromParent(); 5132 return Legalized; 5133 } 5134