1 //===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" 9 #include "llvm/CodeGen/GlobalISel/Combiner.h" 10 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" 11 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" 12 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" 13 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 14 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 15 #include "llvm/CodeGen/GlobalISel/Utils.h" 16 #include "llvm/CodeGen/MachineDominators.h" 17 #include "llvm/CodeGen/MachineFrameInfo.h" 18 #include "llvm/CodeGen/MachineInstr.h" 19 #include "llvm/CodeGen/MachineMemOperand.h" 20 #include "llvm/CodeGen/MachineRegisterInfo.h" 21 #include "llvm/CodeGen/TargetInstrInfo.h" 22 #include "llvm/CodeGen/TargetLowering.h" 23 #include "llvm/Support/MathExtras.h" 24 #include "llvm/Target/TargetMachine.h" 25 26 #define DEBUG_TYPE "gi-combiner" 27 28 using namespace llvm; 29 using namespace MIPatternMatch; 30 31 // Option to allow testing of the combiner while no targets know about indexed 32 // addressing. 33 static cl::opt<bool> 34 ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false), 35 cl::desc("Force all indexed operations to be " 36 "legal for the GlobalISel combiner")); 37 38 CombinerHelper::CombinerHelper(GISelChangeObserver &Observer, 39 MachineIRBuilder &B, GISelKnownBits *KB, 40 MachineDominatorTree *MDT, 41 const LegalizerInfo *LI) 42 : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), 43 KB(KB), MDT(MDT), LI(LI) { 44 (void)this->KB; 45 } 46 47 bool CombinerHelper::isLegalOrBeforeLegalizer( 48 const LegalityQuery &Query) const { 49 return !LI || LI->getAction(Query).Action == LegalizeActions::Legal; 50 } 51 52 void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, 53 Register ToReg) const { 54 Observer.changingAllUsesOfReg(MRI, FromReg); 55 56 if (MRI.constrainRegAttrs(ToReg, FromReg)) 57 MRI.replaceRegWith(FromReg, ToReg); 58 else 59 Builder.buildCopy(ToReg, FromReg); 60 61 Observer.finishedChangingAllUsesOfReg(); 62 } 63 64 void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI, 65 MachineOperand &FromRegOp, 66 Register ToReg) const { 67 assert(FromRegOp.getParent() && "Expected an operand in an MI"); 68 Observer.changingInstr(*FromRegOp.getParent()); 69 70 FromRegOp.setReg(ToReg); 71 72 Observer.changedInstr(*FromRegOp.getParent()); 73 } 74 75 bool CombinerHelper::tryCombineCopy(MachineInstr &MI) { 76 if (matchCombineCopy(MI)) { 77 applyCombineCopy(MI); 78 return true; 79 } 80 return false; 81 } 82 bool CombinerHelper::matchCombineCopy(MachineInstr &MI) { 83 if (MI.getOpcode() != TargetOpcode::COPY) 84 return false; 85 Register DstReg = MI.getOperand(0).getReg(); 86 Register SrcReg = MI.getOperand(1).getReg(); 87 return canReplaceReg(DstReg, SrcReg, MRI); 88 } 89 void CombinerHelper::applyCombineCopy(MachineInstr &MI) { 90 Register DstReg = MI.getOperand(0).getReg(); 91 Register SrcReg = MI.getOperand(1).getReg(); 92 MI.eraseFromParent(); 93 replaceRegWith(MRI, DstReg, SrcReg); 94 } 95 96 bool CombinerHelper::tryCombineConcatVectors(MachineInstr &MI) { 97 bool IsUndef = false; 98 SmallVector<Register, 4> Ops; 99 if (matchCombineConcatVectors(MI, IsUndef, Ops)) { 100 applyCombineConcatVectors(MI, IsUndef, Ops); 101 return true; 102 } 103 return false; 104 } 105 106 bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef, 107 SmallVectorImpl<Register> &Ops) { 108 assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && 109 "Invalid instruction"); 110 IsUndef = true; 111 MachineInstr *Undef = nullptr; 112 113 // Walk over all the operands of concat vectors and check if they are 114 // build_vector themselves or undef. 115 // Then collect their operands in Ops. 116 for (const MachineOperand &MO : MI.uses()) { 117 Register Reg = MO.getReg(); 118 MachineInstr *Def = MRI.getVRegDef(Reg); 119 assert(Def && "Operand not defined"); 120 switch (Def->getOpcode()) { 121 case TargetOpcode::G_BUILD_VECTOR: 122 IsUndef = false; 123 // Remember the operands of the build_vector to fold 124 // them into the yet-to-build flattened concat vectors. 125 for (const MachineOperand &BuildVecMO : Def->uses()) 126 Ops.push_back(BuildVecMO.getReg()); 127 break; 128 case TargetOpcode::G_IMPLICIT_DEF: { 129 LLT OpType = MRI.getType(Reg); 130 // Keep one undef value for all the undef operands. 131 if (!Undef) { 132 Builder.setInsertPt(*MI.getParent(), MI); 133 Undef = Builder.buildUndef(OpType.getScalarType()); 134 } 135 assert(MRI.getType(Undef->getOperand(0).getReg()) == 136 OpType.getScalarType() && 137 "All undefs should have the same type"); 138 // Break the undef vector in as many scalar elements as needed 139 // for the flattening. 140 for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements(); 141 EltIdx != EltEnd; ++EltIdx) 142 Ops.push_back(Undef->getOperand(0).getReg()); 143 break; 144 } 145 default: 146 return false; 147 } 148 } 149 return true; 150 } 151 void CombinerHelper::applyCombineConcatVectors( 152 MachineInstr &MI, bool IsUndef, const ArrayRef<Register> Ops) { 153 // We determined that the concat_vectors can be flatten. 154 // Generate the flattened build_vector. 155 Register DstReg = MI.getOperand(0).getReg(); 156 Builder.setInsertPt(*MI.getParent(), MI); 157 Register NewDstReg = MRI.cloneVirtualRegister(DstReg); 158 159 // Note: IsUndef is sort of redundant. We could have determine it by 160 // checking that at all Ops are undef. Alternatively, we could have 161 // generate a build_vector of undefs and rely on another combine to 162 // clean that up. For now, given we already gather this information 163 // in tryCombineConcatVectors, just save compile time and issue the 164 // right thing. 165 if (IsUndef) 166 Builder.buildUndef(NewDstReg); 167 else 168 Builder.buildBuildVector(NewDstReg, Ops); 169 MI.eraseFromParent(); 170 replaceRegWith(MRI, DstReg, NewDstReg); 171 } 172 173 bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) { 174 SmallVector<Register, 4> Ops; 175 if (matchCombineShuffleVector(MI, Ops)) { 176 applyCombineShuffleVector(MI, Ops); 177 return true; 178 } 179 return false; 180 } 181 182 bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI, 183 SmallVectorImpl<Register> &Ops) { 184 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR && 185 "Invalid instruction kind"); 186 LLT DstType = MRI.getType(MI.getOperand(0).getReg()); 187 Register Src1 = MI.getOperand(1).getReg(); 188 LLT SrcType = MRI.getType(Src1); 189 // As bizarre as it may look, shuffle vector can actually produce 190 // scalar! This is because at the IR level a <1 x ty> shuffle 191 // vector is perfectly valid. 192 unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1; 193 unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1; 194 195 // If the resulting vector is smaller than the size of the source 196 // vectors being concatenated, we won't be able to replace the 197 // shuffle vector into a concat_vectors. 198 // 199 // Note: We may still be able to produce a concat_vectors fed by 200 // extract_vector_elt and so on. It is less clear that would 201 // be better though, so don't bother for now. 202 // 203 // If the destination is a scalar, the size of the sources doesn't 204 // matter. we will lower the shuffle to a plain copy. This will 205 // work only if the source and destination have the same size. But 206 // that's covered by the next condition. 207 // 208 // TODO: If the size between the source and destination don't match 209 // we could still emit an extract vector element in that case. 210 if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1) 211 return false; 212 213 // Check that the shuffle mask can be broken evenly between the 214 // different sources. 215 if (DstNumElts % SrcNumElts != 0) 216 return false; 217 218 // Mask length is a multiple of the source vector length. 219 // Check if the shuffle is some kind of concatenation of the input 220 // vectors. 221 unsigned NumConcat = DstNumElts / SrcNumElts; 222 SmallVector<int, 8> ConcatSrcs(NumConcat, -1); 223 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask(); 224 for (unsigned i = 0; i != DstNumElts; ++i) { 225 int Idx = Mask[i]; 226 // Undef value. 227 if (Idx < 0) 228 continue; 229 // Ensure the indices in each SrcType sized piece are sequential and that 230 // the same source is used for the whole piece. 231 if ((Idx % SrcNumElts != (i % SrcNumElts)) || 232 (ConcatSrcs[i / SrcNumElts] >= 0 && 233 ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) 234 return false; 235 // Remember which source this index came from. 236 ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts; 237 } 238 239 // The shuffle is concatenating multiple vectors together. 240 // Collect the different operands for that. 241 Register UndefReg; 242 Register Src2 = MI.getOperand(2).getReg(); 243 for (auto Src : ConcatSrcs) { 244 if (Src < 0) { 245 if (!UndefReg) { 246 Builder.setInsertPt(*MI.getParent(), MI); 247 UndefReg = Builder.buildUndef(SrcType).getReg(0); 248 } 249 Ops.push_back(UndefReg); 250 } else if (Src == 0) 251 Ops.push_back(Src1); 252 else 253 Ops.push_back(Src2); 254 } 255 return true; 256 } 257 258 void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI, 259 const ArrayRef<Register> Ops) { 260 Register DstReg = MI.getOperand(0).getReg(); 261 Builder.setInsertPt(*MI.getParent(), MI); 262 Register NewDstReg = MRI.cloneVirtualRegister(DstReg); 263 264 if (Ops.size() == 1) 265 Builder.buildCopy(NewDstReg, Ops[0]); 266 else 267 Builder.buildMerge(NewDstReg, Ops); 268 269 MI.eraseFromParent(); 270 replaceRegWith(MRI, DstReg, NewDstReg); 271 } 272 273 namespace { 274 275 /// Select a preference between two uses. CurrentUse is the current preference 276 /// while *ForCandidate is attributes of the candidate under consideration. 277 PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse, 278 const LLT TyForCandidate, 279 unsigned OpcodeForCandidate, 280 MachineInstr *MIForCandidate) { 281 if (!CurrentUse.Ty.isValid()) { 282 if (CurrentUse.ExtendOpcode == OpcodeForCandidate || 283 CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT) 284 return {TyForCandidate, OpcodeForCandidate, MIForCandidate}; 285 return CurrentUse; 286 } 287 288 // We permit the extend to hoist through basic blocks but this is only 289 // sensible if the target has extending loads. If you end up lowering back 290 // into a load and extend during the legalizer then the end result is 291 // hoisting the extend up to the load. 292 293 // Prefer defined extensions to undefined extensions as these are more 294 // likely to reduce the number of instructions. 295 if (OpcodeForCandidate == TargetOpcode::G_ANYEXT && 296 CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT) 297 return CurrentUse; 298 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT && 299 OpcodeForCandidate != TargetOpcode::G_ANYEXT) 300 return {TyForCandidate, OpcodeForCandidate, MIForCandidate}; 301 302 // Prefer sign extensions to zero extensions as sign-extensions tend to be 303 // more expensive. 304 if (CurrentUse.Ty == TyForCandidate) { 305 if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT && 306 OpcodeForCandidate == TargetOpcode::G_ZEXT) 307 return CurrentUse; 308 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT && 309 OpcodeForCandidate == TargetOpcode::G_SEXT) 310 return {TyForCandidate, OpcodeForCandidate, MIForCandidate}; 311 } 312 313 // This is potentially target specific. We've chosen the largest type 314 // because G_TRUNC is usually free. One potential catch with this is that 315 // some targets have a reduced number of larger registers than smaller 316 // registers and this choice potentially increases the live-range for the 317 // larger value. 318 if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) { 319 return {TyForCandidate, OpcodeForCandidate, MIForCandidate}; 320 } 321 return CurrentUse; 322 } 323 324 /// Find a suitable place to insert some instructions and insert them. This 325 /// function accounts for special cases like inserting before a PHI node. 326 /// The current strategy for inserting before PHI's is to duplicate the 327 /// instructions for each predecessor. However, while that's ok for G_TRUNC 328 /// on most targets since it generally requires no code, other targets/cases may 329 /// want to try harder to find a dominating block. 330 static void InsertInsnsWithoutSideEffectsBeforeUse( 331 MachineIRBuilder &Builder, MachineInstr &DefMI, MachineOperand &UseMO, 332 std::function<void(MachineBasicBlock *, MachineBasicBlock::iterator, 333 MachineOperand &UseMO)> 334 Inserter) { 335 MachineInstr &UseMI = *UseMO.getParent(); 336 337 MachineBasicBlock *InsertBB = UseMI.getParent(); 338 339 // If the use is a PHI then we want the predecessor block instead. 340 if (UseMI.isPHI()) { 341 MachineOperand *PredBB = std::next(&UseMO); 342 InsertBB = PredBB->getMBB(); 343 } 344 345 // If the block is the same block as the def then we want to insert just after 346 // the def instead of at the start of the block. 347 if (InsertBB == DefMI.getParent()) { 348 MachineBasicBlock::iterator InsertPt = &DefMI; 349 Inserter(InsertBB, std::next(InsertPt), UseMO); 350 return; 351 } 352 353 // Otherwise we want the start of the BB 354 Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO); 355 } 356 } // end anonymous namespace 357 358 bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) { 359 PreferredTuple Preferred; 360 if (matchCombineExtendingLoads(MI, Preferred)) { 361 applyCombineExtendingLoads(MI, Preferred); 362 return true; 363 } 364 return false; 365 } 366 367 bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI, 368 PreferredTuple &Preferred) { 369 // We match the loads and follow the uses to the extend instead of matching 370 // the extends and following the def to the load. This is because the load 371 // must remain in the same position for correctness (unless we also add code 372 // to find a safe place to sink it) whereas the extend is freely movable. 373 // It also prevents us from duplicating the load for the volatile case or just 374 // for performance. 375 376 if (MI.getOpcode() != TargetOpcode::G_LOAD && 377 MI.getOpcode() != TargetOpcode::G_SEXTLOAD && 378 MI.getOpcode() != TargetOpcode::G_ZEXTLOAD) 379 return false; 380 381 auto &LoadValue = MI.getOperand(0); 382 assert(LoadValue.isReg() && "Result wasn't a register?"); 383 384 LLT LoadValueTy = MRI.getType(LoadValue.getReg()); 385 if (!LoadValueTy.isScalar()) 386 return false; 387 388 // Most architectures are going to legalize <s8 loads into at least a 1 byte 389 // load, and the MMOs can only describe memory accesses in multiples of bytes. 390 // If we try to perform extload combining on those, we can end up with 391 // %a(s8) = extload %ptr (load 1 byte from %ptr) 392 // ... which is an illegal extload instruction. 393 if (LoadValueTy.getSizeInBits() < 8) 394 return false; 395 396 // For non power-of-2 types, they will very likely be legalized into multiple 397 // loads. Don't bother trying to match them into extending loads. 398 if (!isPowerOf2_32(LoadValueTy.getSizeInBits())) 399 return false; 400 401 // Find the preferred type aside from the any-extends (unless it's the only 402 // one) and non-extending ops. We'll emit an extending load to that type and 403 // and emit a variant of (extend (trunc X)) for the others according to the 404 // relative type sizes. At the same time, pick an extend to use based on the 405 // extend involved in the chosen type. 406 unsigned PreferredOpcode = MI.getOpcode() == TargetOpcode::G_LOAD 407 ? TargetOpcode::G_ANYEXT 408 : MI.getOpcode() == TargetOpcode::G_SEXTLOAD 409 ? TargetOpcode::G_SEXT 410 : TargetOpcode::G_ZEXT; 411 Preferred = {LLT(), PreferredOpcode, nullptr}; 412 for (auto &UseMI : MRI.use_nodbg_instructions(LoadValue.getReg())) { 413 if (UseMI.getOpcode() == TargetOpcode::G_SEXT || 414 UseMI.getOpcode() == TargetOpcode::G_ZEXT || 415 (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) { 416 // Check for legality. 417 if (LI) { 418 LegalityQuery::MemDesc MMDesc; 419 const auto &MMO = **MI.memoperands_begin(); 420 MMDesc.SizeInBits = MMO.getSizeInBits(); 421 MMDesc.AlignInBits = MMO.getAlign().value() * 8; 422 MMDesc.Ordering = MMO.getOrdering(); 423 LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg()); 424 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 425 if (LI->getAction({MI.getOpcode(), {UseTy, SrcTy}, {MMDesc}}).Action != 426 LegalizeActions::Legal) 427 continue; 428 } 429 Preferred = ChoosePreferredUse(Preferred, 430 MRI.getType(UseMI.getOperand(0).getReg()), 431 UseMI.getOpcode(), &UseMI); 432 } 433 } 434 435 // There were no extends 436 if (!Preferred.MI) 437 return false; 438 // It should be impossible to chose an extend without selecting a different 439 // type since by definition the result of an extend is larger. 440 assert(Preferred.Ty != LoadValueTy && "Extending to same type?"); 441 442 LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI); 443 return true; 444 } 445 446 void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI, 447 PreferredTuple &Preferred) { 448 // Rewrite the load to the chosen extending load. 449 Register ChosenDstReg = Preferred.MI->getOperand(0).getReg(); 450 451 // Inserter to insert a truncate back to the original type at a given point 452 // with some basic CSE to limit truncate duplication to one per BB. 453 DenseMap<MachineBasicBlock *, MachineInstr *> EmittedInsns; 454 auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB, 455 MachineBasicBlock::iterator InsertBefore, 456 MachineOperand &UseMO) { 457 MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB); 458 if (PreviouslyEmitted) { 459 Observer.changingInstr(*UseMO.getParent()); 460 UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg()); 461 Observer.changedInstr(*UseMO.getParent()); 462 return; 463 } 464 465 Builder.setInsertPt(*InsertIntoBB, InsertBefore); 466 Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg()); 467 MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg); 468 EmittedInsns[InsertIntoBB] = NewMI; 469 replaceRegOpWith(MRI, UseMO, NewDstReg); 470 }; 471 472 Observer.changingInstr(MI); 473 MI.setDesc( 474 Builder.getTII().get(Preferred.ExtendOpcode == TargetOpcode::G_SEXT 475 ? TargetOpcode::G_SEXTLOAD 476 : Preferred.ExtendOpcode == TargetOpcode::G_ZEXT 477 ? TargetOpcode::G_ZEXTLOAD 478 : TargetOpcode::G_LOAD)); 479 480 // Rewrite all the uses to fix up the types. 481 auto &LoadValue = MI.getOperand(0); 482 SmallVector<MachineOperand *, 4> Uses; 483 for (auto &UseMO : MRI.use_operands(LoadValue.getReg())) 484 Uses.push_back(&UseMO); 485 486 for (auto *UseMO : Uses) { 487 MachineInstr *UseMI = UseMO->getParent(); 488 489 // If the extend is compatible with the preferred extend then we should fix 490 // up the type and extend so that it uses the preferred use. 491 if (UseMI->getOpcode() == Preferred.ExtendOpcode || 492 UseMI->getOpcode() == TargetOpcode::G_ANYEXT) { 493 Register UseDstReg = UseMI->getOperand(0).getReg(); 494 MachineOperand &UseSrcMO = UseMI->getOperand(1); 495 const LLT UseDstTy = MRI.getType(UseDstReg); 496 if (UseDstReg != ChosenDstReg) { 497 if (Preferred.Ty == UseDstTy) { 498 // If the use has the same type as the preferred use, then merge 499 // the vregs and erase the extend. For example: 500 // %1:_(s8) = G_LOAD ... 501 // %2:_(s32) = G_SEXT %1(s8) 502 // %3:_(s32) = G_ANYEXT %1(s8) 503 // ... = ... %3(s32) 504 // rewrites to: 505 // %2:_(s32) = G_SEXTLOAD ... 506 // ... = ... %2(s32) 507 replaceRegWith(MRI, UseDstReg, ChosenDstReg); 508 Observer.erasingInstr(*UseMO->getParent()); 509 UseMO->getParent()->eraseFromParent(); 510 } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) { 511 // If the preferred size is smaller, then keep the extend but extend 512 // from the result of the extending load. For example: 513 // %1:_(s8) = G_LOAD ... 514 // %2:_(s32) = G_SEXT %1(s8) 515 // %3:_(s64) = G_ANYEXT %1(s8) 516 // ... = ... %3(s64) 517 /// rewrites to: 518 // %2:_(s32) = G_SEXTLOAD ... 519 // %3:_(s64) = G_ANYEXT %2:_(s32) 520 // ... = ... %3(s64) 521 replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg); 522 } else { 523 // If the preferred size is large, then insert a truncate. For 524 // example: 525 // %1:_(s8) = G_LOAD ... 526 // %2:_(s64) = G_SEXT %1(s8) 527 // %3:_(s32) = G_ZEXT %1(s8) 528 // ... = ... %3(s32) 529 /// rewrites to: 530 // %2:_(s64) = G_SEXTLOAD ... 531 // %4:_(s8) = G_TRUNC %2:_(s32) 532 // %3:_(s64) = G_ZEXT %2:_(s8) 533 // ... = ... %3(s64) 534 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, 535 InsertTruncAt); 536 } 537 continue; 538 } 539 // The use is (one of) the uses of the preferred use we chose earlier. 540 // We're going to update the load to def this value later so just erase 541 // the old extend. 542 Observer.erasingInstr(*UseMO->getParent()); 543 UseMO->getParent()->eraseFromParent(); 544 continue; 545 } 546 547 // The use isn't an extend. Truncate back to the type we originally loaded. 548 // This is free on many targets. 549 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt); 550 } 551 552 MI.getOperand(0).setReg(ChosenDstReg); 553 Observer.changedInstr(MI); 554 } 555 556 bool CombinerHelper::isPredecessor(const MachineInstr &DefMI, 557 const MachineInstr &UseMI) { 558 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() && 559 "shouldn't consider debug uses"); 560 assert(DefMI.getParent() == UseMI.getParent()); 561 if (&DefMI == &UseMI) 562 return false; 563 564 // Loop through the basic block until we find one of the instructions. 565 MachineBasicBlock::const_iterator I = DefMI.getParent()->begin(); 566 for (; &*I != &DefMI && &*I != &UseMI; ++I) 567 return &*I == &DefMI; 568 569 llvm_unreachable("Block must contain instructions"); 570 } 571 572 bool CombinerHelper::dominates(const MachineInstr &DefMI, 573 const MachineInstr &UseMI) { 574 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() && 575 "shouldn't consider debug uses"); 576 if (MDT) 577 return MDT->dominates(&DefMI, &UseMI); 578 else if (DefMI.getParent() != UseMI.getParent()) 579 return false; 580 581 return isPredecessor(DefMI, UseMI); 582 } 583 584 bool CombinerHelper::matchSextTruncSextLoad(MachineInstr &MI) { 585 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); 586 Register SrcReg = MI.getOperand(1).getReg(); 587 Register LoadUser = SrcReg; 588 589 if (MRI.getType(SrcReg).isVector()) 590 return false; 591 592 Register TruncSrc; 593 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) 594 LoadUser = TruncSrc; 595 596 uint64_t SizeInBits = MI.getOperand(2).getImm(); 597 // If the source is a G_SEXTLOAD from the same bit width, then we don't 598 // need any extend at all, just a truncate. 599 if (auto *LoadMI = getOpcodeDef(TargetOpcode::G_SEXTLOAD, LoadUser, MRI)) { 600 const auto &MMO = **LoadMI->memoperands_begin(); 601 // If truncating more than the original extended value, abort. 602 if (TruncSrc && MRI.getType(TruncSrc).getSizeInBits() < MMO.getSizeInBits()) 603 return false; 604 if (MMO.getSizeInBits() == SizeInBits) 605 return true; 606 } 607 return false; 608 } 609 610 bool CombinerHelper::applySextTruncSextLoad(MachineInstr &MI) { 611 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); 612 Builder.setInstrAndDebugLoc(MI); 613 Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg()); 614 MI.eraseFromParent(); 615 return true; 616 } 617 618 bool CombinerHelper::matchSextInRegOfLoad( 619 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) { 620 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); 621 622 // Only supports scalars for now. 623 if (MRI.getType(MI.getOperand(0).getReg()).isVector()) 624 return false; 625 626 Register SrcReg = MI.getOperand(1).getReg(); 627 MachineInstr *LoadDef = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI); 628 if (!LoadDef || !MRI.hasOneNonDBGUse(LoadDef->getOperand(0).getReg())) 629 return false; 630 631 // If the sign extend extends from a narrower width than the load's width, 632 // then we can narrow the load width when we combine to a G_SEXTLOAD. 633 auto &MMO = **LoadDef->memoperands_begin(); 634 // Don't do this for non-simple loads. 635 if (MMO.isAtomic() || MMO.isVolatile()) 636 return false; 637 638 // Avoid widening the load at all. 639 unsigned NewSizeBits = 640 std::min((uint64_t)MI.getOperand(2).getImm(), MMO.getSizeInBits()); 641 642 // Don't generate G_SEXTLOADs with a < 1 byte width. 643 if (NewSizeBits < 8) 644 return false; 645 // Don't bother creating a non-power-2 sextload, it will likely be broken up 646 // anyway for most targets. 647 if (!isPowerOf2_32(NewSizeBits)) 648 return false; 649 MatchInfo = std::make_tuple(LoadDef->getOperand(0).getReg(), NewSizeBits); 650 return true; 651 } 652 653 bool CombinerHelper::applySextInRegOfLoad( 654 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) { 655 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); 656 Register LoadReg; 657 unsigned ScalarSizeBits; 658 std::tie(LoadReg, ScalarSizeBits) = MatchInfo; 659 auto *LoadDef = MRI.getVRegDef(LoadReg); 660 assert(LoadDef && "Expected a load reg"); 661 662 // If we have the following: 663 // %ld = G_LOAD %ptr, (load 2) 664 // %ext = G_SEXT_INREG %ld, 8 665 // ==> 666 // %ld = G_SEXTLOAD %ptr (load 1) 667 668 auto &MMO = **LoadDef->memoperands_begin(); 669 Builder.setInstrAndDebugLoc(MI); 670 auto &MF = Builder.getMF(); 671 auto PtrInfo = MMO.getPointerInfo(); 672 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8); 673 Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(), 674 LoadDef->getOperand(1).getReg(), *NewMMO); 675 MI.eraseFromParent(); 676 return true; 677 } 678 679 bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr, 680 Register &Base, Register &Offset) { 681 auto &MF = *MI.getParent()->getParent(); 682 const auto &TLI = *MF.getSubtarget().getTargetLowering(); 683 684 #ifndef NDEBUG 685 unsigned Opcode = MI.getOpcode(); 686 assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD || 687 Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE); 688 #endif 689 690 Base = MI.getOperand(1).getReg(); 691 MachineInstr *BaseDef = MRI.getUniqueVRegDef(Base); 692 if (BaseDef && BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) 693 return false; 694 695 LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI); 696 697 for (auto &Use : MRI.use_nodbg_instructions(Base)) { 698 if (Use.getOpcode() != TargetOpcode::G_PTR_ADD) 699 continue; 700 701 Offset = Use.getOperand(2).getReg(); 702 if (!ForceLegalIndexing && 703 !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ false, MRI)) { 704 LLVM_DEBUG(dbgs() << " Ignoring candidate with illegal addrmode: " 705 << Use); 706 continue; 707 } 708 709 // Make sure the offset calculation is before the potentially indexed op. 710 // FIXME: we really care about dependency here. The offset calculation might 711 // be movable. 712 MachineInstr *OffsetDef = MRI.getUniqueVRegDef(Offset); 713 if (!OffsetDef || !dominates(*OffsetDef, MI)) { 714 LLVM_DEBUG(dbgs() << " Ignoring candidate with offset after mem-op: " 715 << Use); 716 continue; 717 } 718 719 // FIXME: check whether all uses of Base are load/store with foldable 720 // addressing modes. If so, using the normal addr-modes is better than 721 // forming an indexed one. 722 723 bool MemOpDominatesAddrUses = true; 724 for (auto &PtrAddUse : 725 MRI.use_nodbg_instructions(Use.getOperand(0).getReg())) { 726 if (!dominates(MI, PtrAddUse)) { 727 MemOpDominatesAddrUses = false; 728 break; 729 } 730 } 731 732 if (!MemOpDominatesAddrUses) { 733 LLVM_DEBUG( 734 dbgs() << " Ignoring candidate as memop does not dominate uses: " 735 << Use); 736 continue; 737 } 738 739 LLVM_DEBUG(dbgs() << " Found match: " << Use); 740 Addr = Use.getOperand(0).getReg(); 741 return true; 742 } 743 744 return false; 745 } 746 747 bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr, 748 Register &Base, Register &Offset) { 749 auto &MF = *MI.getParent()->getParent(); 750 const auto &TLI = *MF.getSubtarget().getTargetLowering(); 751 752 #ifndef NDEBUG 753 unsigned Opcode = MI.getOpcode(); 754 assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD || 755 Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE); 756 #endif 757 758 Addr = MI.getOperand(1).getReg(); 759 MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_PTR_ADD, Addr, MRI); 760 if (!AddrDef || MRI.hasOneNonDBGUse(Addr)) 761 return false; 762 763 Base = AddrDef->getOperand(1).getReg(); 764 Offset = AddrDef->getOperand(2).getReg(); 765 766 LLVM_DEBUG(dbgs() << "Found potential pre-indexed load_store: " << MI); 767 768 if (!ForceLegalIndexing && 769 !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ true, MRI)) { 770 LLVM_DEBUG(dbgs() << " Skipping, not legal for target"); 771 return false; 772 } 773 774 MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI); 775 if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) { 776 LLVM_DEBUG(dbgs() << " Skipping, frame index would need copy anyway."); 777 return false; 778 } 779 780 if (MI.getOpcode() == TargetOpcode::G_STORE) { 781 // Would require a copy. 782 if (Base == MI.getOperand(0).getReg()) { 783 LLVM_DEBUG(dbgs() << " Skipping, storing base so need copy anyway."); 784 return false; 785 } 786 787 // We're expecting one use of Addr in MI, but it could also be the 788 // value stored, which isn't actually dominated by the instruction. 789 if (MI.getOperand(0).getReg() == Addr) { 790 LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses"); 791 return false; 792 } 793 } 794 795 // FIXME: check whether all uses of the base pointer are constant PtrAdds. 796 // That might allow us to end base's liveness here by adjusting the constant. 797 798 for (auto &UseMI : MRI.use_nodbg_instructions(Addr)) { 799 if (!dominates(MI, UseMI)) { 800 LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses."); 801 return false; 802 } 803 } 804 805 return true; 806 } 807 808 bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) { 809 IndexedLoadStoreMatchInfo MatchInfo; 810 if (matchCombineIndexedLoadStore(MI, MatchInfo)) { 811 applyCombineIndexedLoadStore(MI, MatchInfo); 812 return true; 813 } 814 return false; 815 } 816 817 bool CombinerHelper::matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) { 818 unsigned Opcode = MI.getOpcode(); 819 if (Opcode != TargetOpcode::G_LOAD && Opcode != TargetOpcode::G_SEXTLOAD && 820 Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE) 821 return false; 822 823 MatchInfo.IsPre = findPreIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base, 824 MatchInfo.Offset); 825 if (!MatchInfo.IsPre && 826 !findPostIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base, 827 MatchInfo.Offset)) 828 return false; 829 830 return true; 831 } 832 833 void CombinerHelper::applyCombineIndexedLoadStore( 834 MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) { 835 MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr); 836 MachineIRBuilder MIRBuilder(MI); 837 unsigned Opcode = MI.getOpcode(); 838 bool IsStore = Opcode == TargetOpcode::G_STORE; 839 unsigned NewOpcode; 840 switch (Opcode) { 841 case TargetOpcode::G_LOAD: 842 NewOpcode = TargetOpcode::G_INDEXED_LOAD; 843 break; 844 case TargetOpcode::G_SEXTLOAD: 845 NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD; 846 break; 847 case TargetOpcode::G_ZEXTLOAD: 848 NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD; 849 break; 850 case TargetOpcode::G_STORE: 851 NewOpcode = TargetOpcode::G_INDEXED_STORE; 852 break; 853 default: 854 llvm_unreachable("Unknown load/store opcode"); 855 } 856 857 auto MIB = MIRBuilder.buildInstr(NewOpcode); 858 if (IsStore) { 859 MIB.addDef(MatchInfo.Addr); 860 MIB.addUse(MI.getOperand(0).getReg()); 861 } else { 862 MIB.addDef(MI.getOperand(0).getReg()); 863 MIB.addDef(MatchInfo.Addr); 864 } 865 866 MIB.addUse(MatchInfo.Base); 867 MIB.addUse(MatchInfo.Offset); 868 MIB.addImm(MatchInfo.IsPre); 869 MI.eraseFromParent(); 870 AddrDef.eraseFromParent(); 871 872 LLVM_DEBUG(dbgs() << " Combinined to indexed operation"); 873 } 874 875 bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) { 876 if (MI.getOpcode() != TargetOpcode::G_BR) 877 return false; 878 879 // Try to match the following: 880 // bb1: 881 // %c(s32) = G_ICMP pred, %a, %b 882 // %c1(s1) = G_TRUNC %c(s32) 883 // G_BRCOND %c1, %bb2 884 // G_BR %bb3 885 // bb2: 886 // ... 887 // bb3: 888 889 // The above pattern does not have a fall through to the successor bb2, always 890 // resulting in a branch no matter which path is taken. Here we try to find 891 // and replace that pattern with conditional branch to bb3 and otherwise 892 // fallthrough to bb2. 893 894 MachineBasicBlock *MBB = MI.getParent(); 895 MachineBasicBlock::iterator BrIt(MI); 896 if (BrIt == MBB->begin()) 897 return false; 898 assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator"); 899 900 MachineInstr *BrCond = &*std::prev(BrIt); 901 if (BrCond->getOpcode() != TargetOpcode::G_BRCOND) 902 return false; 903 904 // Check that the next block is the conditional branch target. 905 if (!MBB->isLayoutSuccessor(BrCond->getOperand(1).getMBB())) 906 return false; 907 908 MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg()); 909 if (!CmpMI || CmpMI->getOpcode() != TargetOpcode::G_ICMP || 910 !MRI.hasOneNonDBGUse(CmpMI->getOperand(0).getReg())) 911 return false; 912 return true; 913 } 914 915 bool CombinerHelper::tryElideBrByInvertingCond(MachineInstr &MI) { 916 if (!matchElideBrByInvertingCond(MI)) 917 return false; 918 applyElideBrByInvertingCond(MI); 919 return true; 920 } 921 922 void CombinerHelper::applyElideBrByInvertingCond(MachineInstr &MI) { 923 MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB(); 924 MachineBasicBlock::iterator BrIt(MI); 925 MachineInstr *BrCond = &*std::prev(BrIt); 926 MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg()); 927 928 CmpInst::Predicate InversePred = CmpInst::getInversePredicate( 929 (CmpInst::Predicate)CmpMI->getOperand(1).getPredicate()); 930 931 // Invert the G_ICMP condition. 932 Observer.changingInstr(*CmpMI); 933 CmpMI->getOperand(1).setPredicate(InversePred); 934 Observer.changedInstr(*CmpMI); 935 936 // Change the conditional branch target. 937 Observer.changingInstr(*BrCond); 938 BrCond->getOperand(1).setMBB(BrTarget); 939 Observer.changedInstr(*BrCond); 940 MI.eraseFromParent(); 941 } 942 943 static bool shouldLowerMemFuncForSize(const MachineFunction &MF) { 944 // On Darwin, -Os means optimize for size without hurting performance, so 945 // only really optimize for size when -Oz (MinSize) is used. 946 if (MF.getTarget().getTargetTriple().isOSDarwin()) 947 return MF.getFunction().hasMinSize(); 948 return MF.getFunction().hasOptSize(); 949 } 950 951 // Returns a list of types to use for memory op lowering in MemOps. A partial 952 // port of findOptimalMemOpLowering in TargetLowering. 953 static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps, 954 unsigned Limit, const MemOp &Op, 955 unsigned DstAS, unsigned SrcAS, 956 const AttributeList &FuncAttributes, 957 const TargetLowering &TLI) { 958 if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign()) 959 return false; 960 961 LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes); 962 963 if (Ty == LLT()) { 964 // Use the largest scalar type whose alignment constraints are satisfied. 965 // We only need to check DstAlign here as SrcAlign is always greater or 966 // equal to DstAlign (or zero). 967 Ty = LLT::scalar(64); 968 if (Op.isFixedDstAlign()) 969 while (Op.getDstAlign() < Ty.getSizeInBytes() && 970 !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign())) 971 Ty = LLT::scalar(Ty.getSizeInBytes()); 972 assert(Ty.getSizeInBits() > 0 && "Could not find valid type"); 973 // FIXME: check for the largest legal type we can load/store to. 974 } 975 976 unsigned NumMemOps = 0; 977 uint64_t Size = Op.size(); 978 while (Size) { 979 unsigned TySize = Ty.getSizeInBytes(); 980 while (TySize > Size) { 981 // For now, only use non-vector load / store's for the left-over pieces. 982 LLT NewTy = Ty; 983 // FIXME: check for mem op safety and legality of the types. Not all of 984 // SDAGisms map cleanly to GISel concepts. 985 if (NewTy.isVector()) 986 NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32); 987 NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1)); 988 unsigned NewTySize = NewTy.getSizeInBytes(); 989 assert(NewTySize > 0 && "Could not find appropriate type"); 990 991 // If the new LLT cannot cover all of the remaining bits, then consider 992 // issuing a (or a pair of) unaligned and overlapping load / store. 993 bool Fast; 994 // Need to get a VT equivalent for allowMisalignedMemoryAccesses(). 995 MVT VT = getMVTForLLT(Ty); 996 if (NumMemOps && Op.allowOverlap() && NewTySize < Size && 997 TLI.allowsMisalignedMemoryAccesses( 998 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign().value() : 0, 999 MachineMemOperand::MONone, &Fast) && 1000 Fast) 1001 TySize = Size; 1002 else { 1003 Ty = NewTy; 1004 TySize = NewTySize; 1005 } 1006 } 1007 1008 if (++NumMemOps > Limit) 1009 return false; 1010 1011 MemOps.push_back(Ty); 1012 Size -= TySize; 1013 } 1014 1015 return true; 1016 } 1017 1018 static Type *getTypeForLLT(LLT Ty, LLVMContext &C) { 1019 if (Ty.isVector()) 1020 return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()), 1021 Ty.getNumElements()); 1022 return IntegerType::get(C, Ty.getSizeInBits()); 1023 } 1024 1025 // Get a vectorized representation of the memset value operand, GISel edition. 1026 static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) { 1027 MachineRegisterInfo &MRI = *MIB.getMRI(); 1028 unsigned NumBits = Ty.getScalarSizeInBits(); 1029 auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI); 1030 if (!Ty.isVector() && ValVRegAndVal) { 1031 unsigned KnownVal = ValVRegAndVal->Value; 1032 APInt Scalar = APInt(8, KnownVal); 1033 APInt SplatVal = APInt::getSplat(NumBits, Scalar); 1034 return MIB.buildConstant(Ty, SplatVal).getReg(0); 1035 } 1036 1037 // Extend the byte value to the larger type, and then multiply by a magic 1038 // value 0x010101... in order to replicate it across every byte. 1039 // Unless it's zero, in which case just emit a larger G_CONSTANT 0. 1040 if (ValVRegAndVal && ValVRegAndVal->Value == 0) { 1041 return MIB.buildConstant(Ty, 0).getReg(0); 1042 } 1043 1044 LLT ExtType = Ty.getScalarType(); 1045 auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val); 1046 if (NumBits > 8) { 1047 APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01)); 1048 auto MagicMI = MIB.buildConstant(ExtType, Magic); 1049 Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0); 1050 } 1051 1052 // For vector types create a G_BUILD_VECTOR. 1053 if (Ty.isVector()) 1054 Val = MIB.buildSplatVector(Ty, Val).getReg(0); 1055 1056 return Val; 1057 } 1058 1059 bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst, 1060 Register Val, unsigned KnownLen, 1061 Align Alignment, bool IsVolatile) { 1062 auto &MF = *MI.getParent()->getParent(); 1063 const auto &TLI = *MF.getSubtarget().getTargetLowering(); 1064 auto &DL = MF.getDataLayout(); 1065 LLVMContext &C = MF.getFunction().getContext(); 1066 1067 assert(KnownLen != 0 && "Have a zero length memset length!"); 1068 1069 bool DstAlignCanChange = false; 1070 MachineFrameInfo &MFI = MF.getFrameInfo(); 1071 bool OptSize = shouldLowerMemFuncForSize(MF); 1072 1073 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); 1074 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) 1075 DstAlignCanChange = true; 1076 1077 unsigned Limit = TLI.getMaxStoresPerMemset(OptSize); 1078 std::vector<LLT> MemOps; 1079 1080 const auto &DstMMO = **MI.memoperands_begin(); 1081 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo(); 1082 1083 auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI); 1084 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0; 1085 1086 if (!findGISelOptimalMemOpLowering(MemOps, Limit, 1087 MemOp::Set(KnownLen, DstAlignCanChange, 1088 Alignment, 1089 /*IsZeroMemset=*/IsZeroVal, 1090 /*IsVolatile=*/IsVolatile), 1091 DstPtrInfo.getAddrSpace(), ~0u, 1092 MF.getFunction().getAttributes(), TLI)) 1093 return false; 1094 1095 if (DstAlignCanChange) { 1096 // Get an estimate of the type from the LLT. 1097 Type *IRTy = getTypeForLLT(MemOps[0], C); 1098 Align NewAlign = DL.getABITypeAlign(IRTy); 1099 if (NewAlign > Alignment) { 1100 Alignment = NewAlign; 1101 unsigned FI = FIDef->getOperand(1).getIndex(); 1102 // Give the stack frame object a larger alignment if needed. 1103 if (MFI.getObjectAlign(FI) < Alignment) 1104 MFI.setObjectAlignment(FI, Alignment); 1105 } 1106 } 1107 1108 MachineIRBuilder MIB(MI); 1109 // Find the largest store and generate the bit pattern for it. 1110 LLT LargestTy = MemOps[0]; 1111 for (unsigned i = 1; i < MemOps.size(); i++) 1112 if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits()) 1113 LargestTy = MemOps[i]; 1114 1115 // The memset stored value is always defined as an s8, so in order to make it 1116 // work with larger store types we need to repeat the bit pattern across the 1117 // wider type. 1118 Register MemSetValue = getMemsetValue(Val, LargestTy, MIB); 1119 1120 if (!MemSetValue) 1121 return false; 1122 1123 // Generate the stores. For each store type in the list, we generate the 1124 // matching store of that type to the destination address. 1125 LLT PtrTy = MRI.getType(Dst); 1126 unsigned DstOff = 0; 1127 unsigned Size = KnownLen; 1128 for (unsigned I = 0; I < MemOps.size(); I++) { 1129 LLT Ty = MemOps[I]; 1130 unsigned TySize = Ty.getSizeInBytes(); 1131 if (TySize > Size) { 1132 // Issuing an unaligned load / store pair that overlaps with the previous 1133 // pair. Adjust the offset accordingly. 1134 assert(I == MemOps.size() - 1 && I != 0); 1135 DstOff -= TySize - Size; 1136 } 1137 1138 // If this store is smaller than the largest store see whether we can get 1139 // the smaller value for free with a truncate. 1140 Register Value = MemSetValue; 1141 if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) { 1142 MVT VT = getMVTForLLT(Ty); 1143 MVT LargestVT = getMVTForLLT(LargestTy); 1144 if (!LargestTy.isVector() && !Ty.isVector() && 1145 TLI.isTruncateFree(LargestVT, VT)) 1146 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0); 1147 else 1148 Value = getMemsetValue(Val, Ty, MIB); 1149 if (!Value) 1150 return false; 1151 } 1152 1153 auto *StoreMMO = 1154 MF.getMachineMemOperand(&DstMMO, DstOff, Ty.getSizeInBytes()); 1155 1156 Register Ptr = Dst; 1157 if (DstOff != 0) { 1158 auto Offset = 1159 MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff); 1160 Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0); 1161 } 1162 1163 MIB.buildStore(Value, Ptr, *StoreMMO); 1164 DstOff += Ty.getSizeInBytes(); 1165 Size -= TySize; 1166 } 1167 1168 MI.eraseFromParent(); 1169 return true; 1170 } 1171 1172 bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst, 1173 Register Src, unsigned KnownLen, 1174 Align DstAlign, Align SrcAlign, 1175 bool IsVolatile) { 1176 auto &MF = *MI.getParent()->getParent(); 1177 const auto &TLI = *MF.getSubtarget().getTargetLowering(); 1178 auto &DL = MF.getDataLayout(); 1179 LLVMContext &C = MF.getFunction().getContext(); 1180 1181 assert(KnownLen != 0 && "Have a zero length memcpy length!"); 1182 1183 bool DstAlignCanChange = false; 1184 MachineFrameInfo &MFI = MF.getFrameInfo(); 1185 bool OptSize = shouldLowerMemFuncForSize(MF); 1186 Align Alignment = commonAlignment(DstAlign, SrcAlign); 1187 1188 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); 1189 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) 1190 DstAlignCanChange = true; 1191 1192 // FIXME: infer better src pointer alignment like SelectionDAG does here. 1193 // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining 1194 // if the memcpy is in a tail call position. 1195 1196 unsigned Limit = TLI.getMaxStoresPerMemcpy(OptSize); 1197 std::vector<LLT> MemOps; 1198 1199 const auto &DstMMO = **MI.memoperands_begin(); 1200 const auto &SrcMMO = **std::next(MI.memoperands_begin()); 1201 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo(); 1202 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo(); 1203 1204 if (!findGISelOptimalMemOpLowering( 1205 MemOps, Limit, 1206 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign, 1207 IsVolatile), 1208 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), 1209 MF.getFunction().getAttributes(), TLI)) 1210 return false; 1211 1212 if (DstAlignCanChange) { 1213 // Get an estimate of the type from the LLT. 1214 Type *IRTy = getTypeForLLT(MemOps[0], C); 1215 Align NewAlign = DL.getABITypeAlign(IRTy); 1216 1217 // Don't promote to an alignment that would require dynamic stack 1218 // realignment. 1219 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 1220 if (!TRI->needsStackRealignment(MF)) 1221 while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign)) 1222 NewAlign = NewAlign / 2; 1223 1224 if (NewAlign > Alignment) { 1225 Alignment = NewAlign; 1226 unsigned FI = FIDef->getOperand(1).getIndex(); 1227 // Give the stack frame object a larger alignment if needed. 1228 if (MFI.getObjectAlign(FI) < Alignment) 1229 MFI.setObjectAlignment(FI, Alignment); 1230 } 1231 } 1232 1233 LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n"); 1234 1235 MachineIRBuilder MIB(MI); 1236 // Now we need to emit a pair of load and stores for each of the types we've 1237 // collected. I.e. for each type, generate a load from the source pointer of 1238 // that type width, and then generate a corresponding store to the dest buffer 1239 // of that value loaded. This can result in a sequence of loads and stores 1240 // mixed types, depending on what the target specifies as good types to use. 1241 unsigned CurrOffset = 0; 1242 LLT PtrTy = MRI.getType(Src); 1243 unsigned Size = KnownLen; 1244 for (auto CopyTy : MemOps) { 1245 // Issuing an unaligned load / store pair that overlaps with the previous 1246 // pair. Adjust the offset accordingly. 1247 if (CopyTy.getSizeInBytes() > Size) 1248 CurrOffset -= CopyTy.getSizeInBytes() - Size; 1249 1250 // Construct MMOs for the accesses. 1251 auto *LoadMMO = 1252 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes()); 1253 auto *StoreMMO = 1254 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes()); 1255 1256 // Create the load. 1257 Register LoadPtr = Src; 1258 Register Offset; 1259 if (CurrOffset != 0) { 1260 Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset) 1261 .getReg(0); 1262 LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0); 1263 } 1264 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO); 1265 1266 // Create the store. 1267 Register StorePtr = 1268 CurrOffset == 0 ? Dst : MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0); 1269 MIB.buildStore(LdVal, StorePtr, *StoreMMO); 1270 CurrOffset += CopyTy.getSizeInBytes(); 1271 Size -= CopyTy.getSizeInBytes(); 1272 } 1273 1274 MI.eraseFromParent(); 1275 return true; 1276 } 1277 1278 bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst, 1279 Register Src, unsigned KnownLen, 1280 Align DstAlign, Align SrcAlign, 1281 bool IsVolatile) { 1282 auto &MF = *MI.getParent()->getParent(); 1283 const auto &TLI = *MF.getSubtarget().getTargetLowering(); 1284 auto &DL = MF.getDataLayout(); 1285 LLVMContext &C = MF.getFunction().getContext(); 1286 1287 assert(KnownLen != 0 && "Have a zero length memmove length!"); 1288 1289 bool DstAlignCanChange = false; 1290 MachineFrameInfo &MFI = MF.getFrameInfo(); 1291 bool OptSize = shouldLowerMemFuncForSize(MF); 1292 Align Alignment = commonAlignment(DstAlign, SrcAlign); 1293 1294 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); 1295 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) 1296 DstAlignCanChange = true; 1297 1298 unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize); 1299 std::vector<LLT> MemOps; 1300 1301 const auto &DstMMO = **MI.memoperands_begin(); 1302 const auto &SrcMMO = **std::next(MI.memoperands_begin()); 1303 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo(); 1304 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo(); 1305 1306 // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due 1307 // to a bug in it's findOptimalMemOpLowering implementation. For now do the 1308 // same thing here. 1309 if (!findGISelOptimalMemOpLowering( 1310 MemOps, Limit, 1311 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign, 1312 /*IsVolatile*/ true), 1313 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), 1314 MF.getFunction().getAttributes(), TLI)) 1315 return false; 1316 1317 if (DstAlignCanChange) { 1318 // Get an estimate of the type from the LLT. 1319 Type *IRTy = getTypeForLLT(MemOps[0], C); 1320 Align NewAlign = DL.getABITypeAlign(IRTy); 1321 1322 // Don't promote to an alignment that would require dynamic stack 1323 // realignment. 1324 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 1325 if (!TRI->needsStackRealignment(MF)) 1326 while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign)) 1327 NewAlign = NewAlign / 2; 1328 1329 if (NewAlign > Alignment) { 1330 Alignment = NewAlign; 1331 unsigned FI = FIDef->getOperand(1).getIndex(); 1332 // Give the stack frame object a larger alignment if needed. 1333 if (MFI.getObjectAlign(FI) < Alignment) 1334 MFI.setObjectAlignment(FI, Alignment); 1335 } 1336 } 1337 1338 LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n"); 1339 1340 MachineIRBuilder MIB(MI); 1341 // Memmove requires that we perform the loads first before issuing the stores. 1342 // Apart from that, this loop is pretty much doing the same thing as the 1343 // memcpy codegen function. 1344 unsigned CurrOffset = 0; 1345 LLT PtrTy = MRI.getType(Src); 1346 SmallVector<Register, 16> LoadVals; 1347 for (auto CopyTy : MemOps) { 1348 // Construct MMO for the load. 1349 auto *LoadMMO = 1350 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes()); 1351 1352 // Create the load. 1353 Register LoadPtr = Src; 1354 if (CurrOffset != 0) { 1355 auto Offset = 1356 MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset); 1357 LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0); 1358 } 1359 LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0)); 1360 CurrOffset += CopyTy.getSizeInBytes(); 1361 } 1362 1363 CurrOffset = 0; 1364 for (unsigned I = 0; I < MemOps.size(); ++I) { 1365 LLT CopyTy = MemOps[I]; 1366 // Now store the values loaded. 1367 auto *StoreMMO = 1368 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes()); 1369 1370 Register StorePtr = Dst; 1371 if (CurrOffset != 0) { 1372 auto Offset = 1373 MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset); 1374 StorePtr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0); 1375 } 1376 MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO); 1377 CurrOffset += CopyTy.getSizeInBytes(); 1378 } 1379 MI.eraseFromParent(); 1380 return true; 1381 } 1382 1383 bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) { 1384 // This combine is fairly complex so it's not written with a separate 1385 // matcher function. 1386 assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS); 1387 Intrinsic::ID ID = (Intrinsic::ID)MI.getIntrinsicID(); 1388 assert((ID == Intrinsic::memcpy || ID == Intrinsic::memmove || 1389 ID == Intrinsic::memset) && 1390 "Expected a memcpy like intrinsic"); 1391 1392 auto MMOIt = MI.memoperands_begin(); 1393 const MachineMemOperand *MemOp = *MMOIt; 1394 bool IsVolatile = MemOp->isVolatile(); 1395 // Don't try to optimize volatile. 1396 if (IsVolatile) 1397 return false; 1398 1399 Align DstAlign = MemOp->getBaseAlign(); 1400 Align SrcAlign; 1401 Register Dst = MI.getOperand(1).getReg(); 1402 Register Src = MI.getOperand(2).getReg(); 1403 Register Len = MI.getOperand(3).getReg(); 1404 1405 if (ID != Intrinsic::memset) { 1406 assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI"); 1407 MemOp = *(++MMOIt); 1408 SrcAlign = MemOp->getBaseAlign(); 1409 } 1410 1411 // See if this is a constant length copy 1412 auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI); 1413 if (!LenVRegAndVal) 1414 return false; // Leave it to the legalizer to lower it to a libcall. 1415 unsigned KnownLen = LenVRegAndVal->Value; 1416 1417 if (KnownLen == 0) { 1418 MI.eraseFromParent(); 1419 return true; 1420 } 1421 1422 if (MaxLen && KnownLen > MaxLen) 1423 return false; 1424 1425 if (ID == Intrinsic::memcpy) 1426 return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile); 1427 if (ID == Intrinsic::memmove) 1428 return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile); 1429 if (ID == Intrinsic::memset) 1430 return optimizeMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile); 1431 return false; 1432 } 1433 1434 bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI, 1435 PtrAddChain &MatchInfo) { 1436 // We're trying to match the following pattern: 1437 // %t1 = G_PTR_ADD %base, G_CONSTANT imm1 1438 // %root = G_PTR_ADD %t1, G_CONSTANT imm2 1439 // --> 1440 // %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2) 1441 1442 if (MI.getOpcode() != TargetOpcode::G_PTR_ADD) 1443 return false; 1444 1445 Register Add2 = MI.getOperand(1).getReg(); 1446 Register Imm1 = MI.getOperand(2).getReg(); 1447 auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI); 1448 if (!MaybeImmVal) 1449 return false; 1450 1451 MachineInstr *Add2Def = MRI.getUniqueVRegDef(Add2); 1452 if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD) 1453 return false; 1454 1455 Register Base = Add2Def->getOperand(1).getReg(); 1456 Register Imm2 = Add2Def->getOperand(2).getReg(); 1457 auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI); 1458 if (!MaybeImm2Val) 1459 return false; 1460 1461 // Pass the combined immediate to the apply function. 1462 MatchInfo.Imm = MaybeImmVal->Value + MaybeImm2Val->Value; 1463 MatchInfo.Base = Base; 1464 return true; 1465 } 1466 1467 bool CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI, 1468 PtrAddChain &MatchInfo) { 1469 assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD"); 1470 MachineIRBuilder MIB(MI); 1471 LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg()); 1472 auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm); 1473 Observer.changingInstr(MI); 1474 MI.getOperand(1).setReg(MatchInfo.Base); 1475 MI.getOperand(2).setReg(NewOffset.getReg(0)); 1476 Observer.changedInstr(MI); 1477 return true; 1478 } 1479 1480 bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI, 1481 unsigned &ShiftVal) { 1482 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL"); 1483 auto MaybeImmVal = 1484 getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI); 1485 if (!MaybeImmVal || !isPowerOf2_64(MaybeImmVal->Value)) 1486 return false; 1487 ShiftVal = Log2_64(MaybeImmVal->Value); 1488 return true; 1489 } 1490 1491 bool CombinerHelper::applyCombineMulToShl(MachineInstr &MI, 1492 unsigned &ShiftVal) { 1493 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL"); 1494 MachineIRBuilder MIB(MI); 1495 LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg()); 1496 auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal); 1497 Observer.changingInstr(MI); 1498 MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL)); 1499 MI.getOperand(2).setReg(ShiftCst.getReg(0)); 1500 Observer.changedInstr(MI); 1501 return true; 1502 } 1503 1504 bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI, 1505 unsigned TargetShiftSize, 1506 unsigned &ShiftVal) { 1507 assert((MI.getOpcode() == TargetOpcode::G_SHL || 1508 MI.getOpcode() == TargetOpcode::G_LSHR || 1509 MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift"); 1510 1511 LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 1512 if (Ty.isVector()) // TODO: 1513 return false; 1514 1515 // Don't narrow further than the requested size. 1516 unsigned Size = Ty.getSizeInBits(); 1517 if (Size <= TargetShiftSize) 1518 return false; 1519 1520 auto MaybeImmVal = 1521 getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI); 1522 if (!MaybeImmVal) 1523 return false; 1524 1525 ShiftVal = MaybeImmVal->Value; 1526 return ShiftVal >= Size / 2 && ShiftVal < Size; 1527 } 1528 1529 bool CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI, 1530 const unsigned &ShiftVal) { 1531 Register DstReg = MI.getOperand(0).getReg(); 1532 Register SrcReg = MI.getOperand(1).getReg(); 1533 LLT Ty = MRI.getType(SrcReg); 1534 unsigned Size = Ty.getSizeInBits(); 1535 unsigned HalfSize = Size / 2; 1536 assert(ShiftVal >= HalfSize); 1537 1538 LLT HalfTy = LLT::scalar(HalfSize); 1539 1540 Builder.setInstr(MI); 1541 auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg); 1542 unsigned NarrowShiftAmt = ShiftVal - HalfSize; 1543 1544 if (MI.getOpcode() == TargetOpcode::G_LSHR) { 1545 Register Narrowed = Unmerge.getReg(1); 1546 1547 // dst = G_LSHR s64:x, C for C >= 32 1548 // => 1549 // lo, hi = G_UNMERGE_VALUES x 1550 // dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0 1551 1552 if (NarrowShiftAmt != 0) { 1553 Narrowed = Builder.buildLShr(HalfTy, Narrowed, 1554 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0); 1555 } 1556 1557 auto Zero = Builder.buildConstant(HalfTy, 0); 1558 Builder.buildMerge(DstReg, { Narrowed, Zero }); 1559 } else if (MI.getOpcode() == TargetOpcode::G_SHL) { 1560 Register Narrowed = Unmerge.getReg(0); 1561 // dst = G_SHL s64:x, C for C >= 32 1562 // => 1563 // lo, hi = G_UNMERGE_VALUES x 1564 // dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32) 1565 if (NarrowShiftAmt != 0) { 1566 Narrowed = Builder.buildShl(HalfTy, Narrowed, 1567 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0); 1568 } 1569 1570 auto Zero = Builder.buildConstant(HalfTy, 0); 1571 Builder.buildMerge(DstReg, { Zero, Narrowed }); 1572 } else { 1573 assert(MI.getOpcode() == TargetOpcode::G_ASHR); 1574 auto Hi = Builder.buildAShr( 1575 HalfTy, Unmerge.getReg(1), 1576 Builder.buildConstant(HalfTy, HalfSize - 1)); 1577 1578 if (ShiftVal == HalfSize) { 1579 // (G_ASHR i64:x, 32) -> 1580 // G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31) 1581 Builder.buildMerge(DstReg, { Unmerge.getReg(1), Hi }); 1582 } else if (ShiftVal == Size - 1) { 1583 // Don't need a second shift. 1584 // (G_ASHR i64:x, 63) -> 1585 // %narrowed = (G_ASHR hi_32(x), 31) 1586 // G_MERGE_VALUES %narrowed, %narrowed 1587 Builder.buildMerge(DstReg, { Hi, Hi }); 1588 } else { 1589 auto Lo = Builder.buildAShr( 1590 HalfTy, Unmerge.getReg(1), 1591 Builder.buildConstant(HalfTy, ShiftVal - HalfSize)); 1592 1593 // (G_ASHR i64:x, C) ->, for C >= 32 1594 // G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31) 1595 Builder.buildMerge(DstReg, { Lo, Hi }); 1596 } 1597 } 1598 1599 MI.eraseFromParent(); 1600 return true; 1601 } 1602 1603 bool CombinerHelper::tryCombineShiftToUnmerge(MachineInstr &MI, 1604 unsigned TargetShiftAmount) { 1605 unsigned ShiftAmt; 1606 if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) { 1607 applyCombineShiftToUnmerge(MI, ShiftAmt); 1608 return true; 1609 } 1610 1611 return false; 1612 } 1613 1614 bool CombinerHelper::matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) { 1615 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR"); 1616 Register DstReg = MI.getOperand(0).getReg(); 1617 LLT DstTy = MRI.getType(DstReg); 1618 Register SrcReg = MI.getOperand(1).getReg(); 1619 return mi_match(SrcReg, MRI, 1620 m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg)))); 1621 } 1622 1623 bool CombinerHelper::applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) { 1624 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR"); 1625 Register DstReg = MI.getOperand(0).getReg(); 1626 Builder.setInstr(MI); 1627 Builder.buildCopy(DstReg, Reg); 1628 MI.eraseFromParent(); 1629 return true; 1630 } 1631 1632 bool CombinerHelper::matchCombineP2IToI2P(MachineInstr &MI, Register &Reg) { 1633 assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT"); 1634 Register SrcReg = MI.getOperand(1).getReg(); 1635 return mi_match(SrcReg, MRI, m_GIntToPtr(m_Reg(Reg))); 1636 } 1637 1638 bool CombinerHelper::applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) { 1639 assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT"); 1640 Register DstReg = MI.getOperand(0).getReg(); 1641 Builder.setInstr(MI); 1642 Builder.buildZExtOrTrunc(DstReg, Reg); 1643 MI.eraseFromParent(); 1644 return true; 1645 } 1646 1647 bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) { 1648 return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) { 1649 return MO.isReg() && 1650 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI); 1651 }); 1652 } 1653 1654 bool CombinerHelper::matchAllExplicitUsesAreUndef(MachineInstr &MI) { 1655 return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) { 1656 return !MO.isReg() || 1657 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI); 1658 }); 1659 } 1660 1661 bool CombinerHelper::matchUndefShuffleVectorMask(MachineInstr &MI) { 1662 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 1663 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask(); 1664 return all_of(Mask, [](int Elt) { return Elt < 0; }); 1665 } 1666 1667 bool CombinerHelper::matchUndefStore(MachineInstr &MI) { 1668 assert(MI.getOpcode() == TargetOpcode::G_STORE); 1669 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(), 1670 MRI); 1671 } 1672 1673 bool CombinerHelper::eraseInst(MachineInstr &MI) { 1674 MI.eraseFromParent(); 1675 return true; 1676 } 1677 1678 bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1, 1679 const MachineOperand &MOP2) { 1680 if (!MOP1.isReg() || !MOP2.isReg()) 1681 return false; 1682 MachineInstr *I1 = getDefIgnoringCopies(MOP1.getReg(), MRI); 1683 if (!I1) 1684 return false; 1685 MachineInstr *I2 = getDefIgnoringCopies(MOP2.getReg(), MRI); 1686 if (!I2) 1687 return false; 1688 1689 // Handle a case like this: 1690 // 1691 // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>) 1692 // 1693 // Even though %0 and %1 are produced by the same instruction they are not 1694 // the same values. 1695 if (I1 == I2) 1696 return MOP1.getReg() == MOP2.getReg(); 1697 1698 // If we have an instruction which loads or stores, we can't guarantee that 1699 // it is identical. 1700 // 1701 // For example, we may have 1702 // 1703 // %x1 = G_LOAD %addr (load N from @somewhere) 1704 // ... 1705 // call @foo 1706 // ... 1707 // %x2 = G_LOAD %addr (load N from @somewhere) 1708 // ... 1709 // %or = G_OR %x1, %x2 1710 // 1711 // It's possible that @foo will modify whatever lives at the address we're 1712 // loading from. To be safe, let's just assume that all loads and stores 1713 // are different (unless we have something which is guaranteed to not 1714 // change.) 1715 if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad(nullptr)) 1716 return false; 1717 1718 // Check for physical registers on the instructions first to avoid cases 1719 // like this: 1720 // 1721 // %a = COPY $physreg 1722 // ... 1723 // SOMETHING implicit-def $physreg 1724 // ... 1725 // %b = COPY $physreg 1726 // 1727 // These copies are not equivalent. 1728 if (any_of(I1->uses(), [](const MachineOperand &MO) { 1729 return MO.isReg() && MO.getReg().isPhysical(); 1730 })) { 1731 // Check if we have a case like this: 1732 // 1733 // %a = COPY $physreg 1734 // %b = COPY %a 1735 // 1736 // In this case, I1 and I2 will both be equal to %a = COPY $physreg. 1737 // From that, we know that they must have the same value, since they must 1738 // have come from the same COPY. 1739 return I1->isIdenticalTo(*I2); 1740 } 1741 1742 // We don't have any physical registers, so we don't necessarily need the 1743 // same vreg defs. 1744 // 1745 // On the off-chance that there's some target instruction feeding into the 1746 // instruction, let's use produceSameValue instead of isIdenticalTo. 1747 return Builder.getTII().produceSameValue(*I1, *I2, &MRI); 1748 } 1749 1750 bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) { 1751 if (!MOP.isReg()) 1752 return false; 1753 // MIPatternMatch doesn't let us look through G_ZEXT etc. 1754 auto ValAndVReg = getConstantVRegValWithLookThrough(MOP.getReg(), MRI); 1755 return ValAndVReg && ValAndVReg->Value == C; 1756 } 1757 1758 bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI, 1759 unsigned OpIdx) { 1760 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?"); 1761 Register OldReg = MI.getOperand(0).getReg(); 1762 Register Replacement = MI.getOperand(OpIdx).getReg(); 1763 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?"); 1764 MI.eraseFromParent(); 1765 replaceRegWith(MRI, OldReg, Replacement); 1766 return true; 1767 } 1768 1769 bool CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI, 1770 Register Replacement) { 1771 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?"); 1772 Register OldReg = MI.getOperand(0).getReg(); 1773 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?"); 1774 MI.eraseFromParent(); 1775 replaceRegWith(MRI, OldReg, Replacement); 1776 return true; 1777 } 1778 1779 bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) { 1780 assert(MI.getOpcode() == TargetOpcode::G_SELECT); 1781 // Match (cond ? x : x) 1782 return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) && 1783 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(), 1784 MRI); 1785 } 1786 1787 bool CombinerHelper::matchBinOpSameVal(MachineInstr &MI) { 1788 return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) && 1789 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(), 1790 MRI); 1791 } 1792 1793 bool CombinerHelper::matchOperandIsZero(MachineInstr &MI, unsigned OpIdx) { 1794 return matchConstantOp(MI.getOperand(OpIdx), 0) && 1795 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(OpIdx).getReg(), 1796 MRI); 1797 } 1798 1799 bool CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) { 1800 assert(MI.getNumDefs() == 1 && "Expected only one def?"); 1801 Builder.setInstr(MI); 1802 Builder.buildFConstant(MI.getOperand(0), C); 1803 MI.eraseFromParent(); 1804 return true; 1805 } 1806 1807 bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, int64_t C) { 1808 assert(MI.getNumDefs() == 1 && "Expected only one def?"); 1809 Builder.setInstr(MI); 1810 Builder.buildConstant(MI.getOperand(0), C); 1811 MI.eraseFromParent(); 1812 return true; 1813 } 1814 1815 bool CombinerHelper::replaceInstWithUndef(MachineInstr &MI) { 1816 assert(MI.getNumDefs() == 1 && "Expected only one def?"); 1817 Builder.setInstr(MI); 1818 Builder.buildUndef(MI.getOperand(0)); 1819 MI.eraseFromParent(); 1820 return true; 1821 } 1822 1823 bool CombinerHelper::matchSimplifyAddToSub( 1824 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) { 1825 Register LHS = MI.getOperand(1).getReg(); 1826 Register RHS = MI.getOperand(2).getReg(); 1827 Register &NewLHS = std::get<0>(MatchInfo); 1828 Register &NewRHS = std::get<1>(MatchInfo); 1829 1830 // Helper lambda to check for opportunities for 1831 // ((0-A) + B) -> B - A 1832 // (A + (0-B)) -> A - B 1833 auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) { 1834 int64_t Cst; 1835 if (!mi_match(MaybeSub, MRI, m_GSub(m_ICst(Cst), m_Reg(NewRHS))) || 1836 Cst != 0) 1837 return false; 1838 NewLHS = MaybeNewLHS; 1839 return true; 1840 }; 1841 1842 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS); 1843 } 1844 1845 bool CombinerHelper::applySimplifyAddToSub( 1846 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) { 1847 Builder.setInstr(MI); 1848 Register SubLHS, SubRHS; 1849 std::tie(SubLHS, SubRHS) = MatchInfo; 1850 Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS); 1851 MI.eraseFromParent(); 1852 return true; 1853 } 1854 1855 bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands( 1856 MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) { 1857 // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ... 1858 // 1859 // Creates the new hand + logic instruction (but does not insert them.) 1860 // 1861 // On success, MatchInfo is populated with the new instructions. These are 1862 // inserted in applyHoistLogicOpWithSameOpcodeHands. 1863 unsigned LogicOpcode = MI.getOpcode(); 1864 assert(LogicOpcode == TargetOpcode::G_AND || 1865 LogicOpcode == TargetOpcode::G_OR || 1866 LogicOpcode == TargetOpcode::G_XOR); 1867 MachineIRBuilder MIB(MI); 1868 Register Dst = MI.getOperand(0).getReg(); 1869 Register LHSReg = MI.getOperand(1).getReg(); 1870 Register RHSReg = MI.getOperand(2).getReg(); 1871 1872 // Don't recompute anything. 1873 if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg)) 1874 return false; 1875 1876 // Make sure we have (hand x, ...), (hand y, ...) 1877 MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI); 1878 MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI); 1879 if (!LeftHandInst || !RightHandInst) 1880 return false; 1881 unsigned HandOpcode = LeftHandInst->getOpcode(); 1882 if (HandOpcode != RightHandInst->getOpcode()) 1883 return false; 1884 if (!LeftHandInst->getOperand(1).isReg() || 1885 !RightHandInst->getOperand(1).isReg()) 1886 return false; 1887 1888 // Make sure the types match up, and if we're doing this post-legalization, 1889 // we end up with legal types. 1890 Register X = LeftHandInst->getOperand(1).getReg(); 1891 Register Y = RightHandInst->getOperand(1).getReg(); 1892 LLT XTy = MRI.getType(X); 1893 LLT YTy = MRI.getType(Y); 1894 if (XTy != YTy) 1895 return false; 1896 if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}})) 1897 return false; 1898 1899 // Optional extra source register. 1900 Register ExtraHandOpSrcReg; 1901 switch (HandOpcode) { 1902 default: 1903 return false; 1904 case TargetOpcode::G_ANYEXT: 1905 case TargetOpcode::G_SEXT: 1906 case TargetOpcode::G_ZEXT: { 1907 // Match: logic (ext X), (ext Y) --> ext (logic X, Y) 1908 break; 1909 } 1910 case TargetOpcode::G_AND: 1911 case TargetOpcode::G_ASHR: 1912 case TargetOpcode::G_LSHR: 1913 case TargetOpcode::G_SHL: { 1914 // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z 1915 MachineOperand &ZOp = LeftHandInst->getOperand(2); 1916 if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2))) 1917 return false; 1918 ExtraHandOpSrcReg = ZOp.getReg(); 1919 break; 1920 } 1921 } 1922 1923 // Record the steps to build the new instructions. 1924 // 1925 // Steps to build (logic x, y) 1926 auto NewLogicDst = MRI.createGenericVirtualRegister(XTy); 1927 OperandBuildSteps LogicBuildSteps = { 1928 [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); }, 1929 [=](MachineInstrBuilder &MIB) { MIB.addReg(X); }, 1930 [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }}; 1931 InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps); 1932 1933 // Steps to build hand (logic x, y), ...z 1934 OperandBuildSteps HandBuildSteps = { 1935 [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); }, 1936 [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }}; 1937 if (ExtraHandOpSrcReg.isValid()) 1938 HandBuildSteps.push_back( 1939 [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); }); 1940 InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps); 1941 1942 MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps}); 1943 return true; 1944 } 1945 1946 bool CombinerHelper::applyBuildInstructionSteps( 1947 MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) { 1948 assert(MatchInfo.InstrsToBuild.size() && 1949 "Expected at least one instr to build?"); 1950 Builder.setInstr(MI); 1951 for (auto &InstrToBuild : MatchInfo.InstrsToBuild) { 1952 assert(InstrToBuild.Opcode && "Expected a valid opcode?"); 1953 assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?"); 1954 MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode); 1955 for (auto &OperandFn : InstrToBuild.OperandFns) 1956 OperandFn(Instr); 1957 } 1958 MI.eraseFromParent(); 1959 return true; 1960 } 1961 1962 bool CombinerHelper::matchAshrShlToSextInreg( 1963 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) { 1964 assert(MI.getOpcode() == TargetOpcode::G_ASHR); 1965 int64_t ShlCst, AshrCst; 1966 Register Src; 1967 // FIXME: detect splat constant vectors. 1968 if (!mi_match(MI.getOperand(0).getReg(), MRI, 1969 m_GAShr(m_GShl(m_Reg(Src), m_ICst(ShlCst)), m_ICst(AshrCst)))) 1970 return false; 1971 if (ShlCst != AshrCst) 1972 return false; 1973 if (!isLegalOrBeforeLegalizer( 1974 {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}})) 1975 return false; 1976 MatchInfo = std::make_tuple(Src, ShlCst); 1977 return true; 1978 } 1979 bool CombinerHelper::applyAshShlToSextInreg( 1980 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) { 1981 assert(MI.getOpcode() == TargetOpcode::G_ASHR); 1982 Register Src; 1983 int64_t ShiftAmt; 1984 std::tie(Src, ShiftAmt) = MatchInfo; 1985 unsigned Size = MRI.getType(Src).getScalarSizeInBits(); 1986 Builder.setInstrAndDebugLoc(MI); 1987 Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt); 1988 MI.eraseFromParent(); 1989 return true; 1990 } 1991 1992 bool CombinerHelper::matchAndWithTrivialMask(MachineInstr &MI, 1993 Register &Replacement) { 1994 // Given 1995 // 1996 // %mask:_(sN) = G_CONSTANT iN 000...0111...1 1997 // %x:_(sN) = G_SOMETHING 1998 // %y:_(sN) = G_AND %x, %mask 1999 // 2000 // Eliminate the G_AND when it is known that x & mask == x. 2001 // 2002 // Patterns like this can appear as a result of legalization. E.g. 2003 // 2004 // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y 2005 // %one:_(s32) = G_CONSTANT i32 1 2006 // %and:_(s32) = G_AND %cmp, %one 2007 // 2008 // In this case, G_ICMP only produces a single bit, so x & 1 == x. 2009 assert(MI.getOpcode() == TargetOpcode::G_AND); 2010 if (!KB) 2011 return false; 2012 2013 // Replacement = %x, AndDst = %y. Check that we can replace AndDst with the 2014 // LHS of the G_AND. 2015 Replacement = MI.getOperand(1).getReg(); 2016 Register AndDst = MI.getOperand(0).getReg(); 2017 LLT DstTy = MRI.getType(AndDst); 2018 2019 // FIXME: This should be removed once GISelKnownBits supports vectors. 2020 if (DstTy.isVector()) 2021 return false; 2022 if (!canReplaceReg(AndDst, Replacement, MRI)) 2023 return false; 2024 2025 // Check that we have a constant on the RHS of the G_AND, which is of the form 2026 // 000...0111...1. 2027 int64_t Cst; 2028 if (!mi_match(MI.getOperand(2).getReg(), MRI, m_ICst(Cst))) 2029 return false; 2030 APInt Mask(DstTy.getSizeInBits(), Cst); 2031 if (!Mask.isMask()) 2032 return false; 2033 2034 // Now, let's check that x & Mask == x. If this is true, then x & ~Mask == 0. 2035 return KB->maskedValueIsZero(Replacement, ~Mask); 2036 } 2037 2038 bool CombinerHelper::tryCombine(MachineInstr &MI) { 2039 if (tryCombineCopy(MI)) 2040 return true; 2041 if (tryCombineExtendingLoads(MI)) 2042 return true; 2043 if (tryCombineIndexedLoadStore(MI)) 2044 return true; 2045 return false; 2046 } 2047