1 //=== A15SDOptimizerPass.cpp - Optimize DPR and SPR register accesses on A15==// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // The Cortex-A15 processor employs a tracking scheme in its register renaming 11 // in order to process each instruction's micro-ops speculatively and 12 // out-of-order with appropriate forwarding. The ARM architecture allows VFP 13 // instructions to read and write 32-bit S-registers. Each S-register 14 // corresponds to one half (upper or lower) of an overlaid 64-bit D-register. 15 // 16 // There are several instruction patterns which can be used to provide this 17 // capability which can provide higher performance than other, potentially more 18 // direct patterns, specifically around when one micro-op reads a D-register 19 // operand that has recently been written as one or more S-register results. 20 // 21 // This file defines a pre-regalloc pass which looks for SPR producers which 22 // are going to be used by a DPR (or QPR) consumers and creates the more 23 // optimized access pattern. 24 // 25 //===----------------------------------------------------------------------===// 26 27 #include "ARM.h" 28 #include "ARMBaseInstrInfo.h" 29 #include "ARMBaseRegisterInfo.h" 30 #include "ARMSubtarget.h" 31 #include "llvm/ADT/Statistic.h" 32 #include "llvm/CodeGen/MachineFunction.h" 33 #include "llvm/CodeGen/MachineFunctionPass.h" 34 #include "llvm/CodeGen/MachineInstr.h" 35 #include "llvm/CodeGen/MachineInstrBuilder.h" 36 #include "llvm/CodeGen/MachineRegisterInfo.h" 37 #include "llvm/Support/Debug.h" 38 #include "llvm/Target/TargetRegisterInfo.h" 39 #include "llvm/Target/TargetSubtargetInfo.h" 40 #include <map> 41 #include <set> 42 43 using namespace llvm; 44 45 #define DEBUG_TYPE "a15-sd-optimizer" 46 47 namespace { 48 struct A15SDOptimizer : public MachineFunctionPass { 49 static char ID; 50 A15SDOptimizer() : MachineFunctionPass(ID) {} 51 52 bool runOnMachineFunction(MachineFunction &Fn) override; 53 54 const char *getPassName() const override { 55 return "ARM A15 S->D optimizer"; 56 } 57 58 private: 59 const ARMBaseInstrInfo *TII; 60 const TargetRegisterInfo *TRI; 61 MachineRegisterInfo *MRI; 62 63 bool runOnInstruction(MachineInstr *MI); 64 65 // 66 // Instruction builder helpers 67 // 68 unsigned createDupLane(MachineBasicBlock &MBB, 69 MachineBasicBlock::iterator InsertBefore, 70 DebugLoc DL, 71 unsigned Reg, unsigned Lane, 72 bool QPR=false); 73 74 unsigned createExtractSubreg(MachineBasicBlock &MBB, 75 MachineBasicBlock::iterator InsertBefore, 76 DebugLoc DL, 77 unsigned DReg, unsigned Lane, 78 const TargetRegisterClass *TRC); 79 80 unsigned createVExt(MachineBasicBlock &MBB, 81 MachineBasicBlock::iterator InsertBefore, 82 DebugLoc DL, 83 unsigned Ssub0, unsigned Ssub1); 84 85 unsigned createRegSequence(MachineBasicBlock &MBB, 86 MachineBasicBlock::iterator InsertBefore, 87 DebugLoc DL, 88 unsigned Reg1, unsigned Reg2); 89 90 unsigned createInsertSubreg(MachineBasicBlock &MBB, 91 MachineBasicBlock::iterator InsertBefore, 92 DebugLoc DL, unsigned DReg, unsigned Lane, 93 unsigned ToInsert); 94 95 unsigned createImplicitDef(MachineBasicBlock &MBB, 96 MachineBasicBlock::iterator InsertBefore, 97 DebugLoc DL); 98 99 // 100 // Various property checkers 101 // 102 bool usesRegClass(MachineOperand &MO, const TargetRegisterClass *TRC); 103 bool hasPartialWrite(MachineInstr *MI); 104 SmallVector<unsigned, 8> getReadDPRs(MachineInstr *MI); 105 unsigned getDPRLaneFromSPR(unsigned SReg); 106 107 // 108 // Methods used for getting the definitions of partial registers 109 // 110 111 MachineInstr *elideCopies(MachineInstr *MI); 112 void elideCopiesAndPHIs(MachineInstr *MI, 113 SmallVectorImpl<MachineInstr*> &Outs); 114 115 // 116 // Pattern optimization methods 117 // 118 unsigned optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg); 119 unsigned optimizeSDPattern(MachineInstr *MI); 120 unsigned getPrefSPRLane(unsigned SReg); 121 122 // 123 // Sanitizing method - used to make sure if don't leave dead code around. 124 // 125 void eraseInstrWithNoUses(MachineInstr *MI); 126 127 // 128 // A map used to track the changes done by this pass. 129 // 130 std::map<MachineInstr*, unsigned> Replacements; 131 std::set<MachineInstr *> DeadInstr; 132 }; 133 char A15SDOptimizer::ID = 0; 134 } // end anonymous namespace 135 136 // Returns true if this is a use of a SPR register. 137 bool A15SDOptimizer::usesRegClass(MachineOperand &MO, 138 const TargetRegisterClass *TRC) { 139 if (!MO.isReg()) 140 return false; 141 unsigned Reg = MO.getReg(); 142 143 if (TargetRegisterInfo::isVirtualRegister(Reg)) 144 return MRI->getRegClass(Reg)->hasSuperClassEq(TRC); 145 else 146 return TRC->contains(Reg); 147 } 148 149 unsigned A15SDOptimizer::getDPRLaneFromSPR(unsigned SReg) { 150 unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, 151 &ARM::DPRRegClass); 152 if (DReg != ARM::NoRegister) return ARM::ssub_1; 153 return ARM::ssub_0; 154 } 155 156 // Get the subreg type that is most likely to be coalesced 157 // for an SPR register that will be used in VDUP32d pseudo. 158 unsigned A15SDOptimizer::getPrefSPRLane(unsigned SReg) { 159 if (!TRI->isVirtualRegister(SReg)) 160 return getDPRLaneFromSPR(SReg); 161 162 MachineInstr *MI = MRI->getVRegDef(SReg); 163 if (!MI) return ARM::ssub_0; 164 MachineOperand *MO = MI->findRegisterDefOperand(SReg); 165 166 assert(MO->isReg() && "Non-register operand found!"); 167 if (!MO) return ARM::ssub_0; 168 169 if (MI->isCopy() && usesRegClass(MI->getOperand(1), 170 &ARM::SPRRegClass)) { 171 SReg = MI->getOperand(1).getReg(); 172 } 173 174 if (TargetRegisterInfo::isVirtualRegister(SReg)) { 175 if (MO->getSubReg() == ARM::ssub_1) return ARM::ssub_1; 176 return ARM::ssub_0; 177 } 178 return getDPRLaneFromSPR(SReg); 179 } 180 181 // MI is known to be dead. Figure out what instructions 182 // are also made dead by this and mark them for removal. 183 void A15SDOptimizer::eraseInstrWithNoUses(MachineInstr *MI) { 184 SmallVector<MachineInstr *, 8> Front; 185 DeadInstr.insert(MI); 186 187 DEBUG(dbgs() << "Deleting base instruction " << *MI << "\n"); 188 Front.push_back(MI); 189 190 while (Front.size() != 0) { 191 MI = Front.back(); 192 Front.pop_back(); 193 194 // MI is already known to be dead. We need to see 195 // if other instructions can also be removed. 196 for (unsigned int i = 0; i < MI->getNumOperands(); ++i) { 197 MachineOperand &MO = MI->getOperand(i); 198 if ((!MO.isReg()) || (!MO.isUse())) 199 continue; 200 unsigned Reg = MO.getReg(); 201 if (!TRI->isVirtualRegister(Reg)) 202 continue; 203 MachineOperand *Op = MI->findRegisterDefOperand(Reg); 204 205 if (!Op) 206 continue; 207 208 MachineInstr *Def = Op->getParent(); 209 210 // We don't need to do anything if we have already marked 211 // this instruction as being dead. 212 if (DeadInstr.find(Def) != DeadInstr.end()) 213 continue; 214 215 // Check if all the uses of this instruction are marked as 216 // dead. If so, we can also mark this instruction as being 217 // dead. 218 bool IsDead = true; 219 for (unsigned int j = 0; j < Def->getNumOperands(); ++j) { 220 MachineOperand &MODef = Def->getOperand(j); 221 if ((!MODef.isReg()) || (!MODef.isDef())) 222 continue; 223 unsigned DefReg = MODef.getReg(); 224 if (!TRI->isVirtualRegister(DefReg)) { 225 IsDead = false; 226 break; 227 } 228 for (MachineRegisterInfo::use_instr_iterator 229 II = MRI->use_instr_begin(Reg), EE = MRI->use_instr_end(); 230 II != EE; ++II) { 231 // We don't care about self references. 232 if (&*II == Def) 233 continue; 234 if (DeadInstr.find(&*II) == DeadInstr.end()) { 235 IsDead = false; 236 break; 237 } 238 } 239 } 240 241 if (!IsDead) continue; 242 243 DEBUG(dbgs() << "Deleting instruction " << *Def << "\n"); 244 DeadInstr.insert(Def); 245 } 246 } 247 } 248 249 // Creates the more optimized patterns and generally does all the code 250 // transformations in this pass. 251 unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) { 252 if (MI->isCopy()) { 253 return optimizeAllLanesPattern(MI, MI->getOperand(1).getReg()); 254 } 255 256 if (MI->isInsertSubreg()) { 257 unsigned DPRReg = MI->getOperand(1).getReg(); 258 unsigned SPRReg = MI->getOperand(2).getReg(); 259 260 if (TRI->isVirtualRegister(DPRReg) && TRI->isVirtualRegister(SPRReg)) { 261 MachineInstr *DPRMI = MRI->getVRegDef(MI->getOperand(1).getReg()); 262 MachineInstr *SPRMI = MRI->getVRegDef(MI->getOperand(2).getReg()); 263 264 if (DPRMI && SPRMI) { 265 // See if the first operand of this insert_subreg is IMPLICIT_DEF 266 MachineInstr *ECDef = elideCopies(DPRMI); 267 if (ECDef && ECDef->isImplicitDef()) { 268 // Another corner case - if we're inserting something that is purely 269 // a subreg copy of a DPR, just use that DPR. 270 271 MachineInstr *EC = elideCopies(SPRMI); 272 // Is it a subreg copy of ssub_0? 273 if (EC && EC->isCopy() && 274 EC->getOperand(1).getSubReg() == ARM::ssub_0) { 275 DEBUG(dbgs() << "Found a subreg copy: " << *SPRMI); 276 277 // Find the thing we're subreg copying out of - is it of the same 278 // regclass as DPRMI? (i.e. a DPR or QPR). 279 unsigned FullReg = SPRMI->getOperand(1).getReg(); 280 const TargetRegisterClass *TRC = 281 MRI->getRegClass(MI->getOperand(1).getReg()); 282 if (TRC->hasSuperClassEq(MRI->getRegClass(FullReg))) { 283 DEBUG(dbgs() << "Subreg copy is compatible - returning "); 284 DEBUG(dbgs() << PrintReg(FullReg) << "\n"); 285 eraseInstrWithNoUses(MI); 286 return FullReg; 287 } 288 } 289 290 return optimizeAllLanesPattern(MI, MI->getOperand(2).getReg()); 291 } 292 } 293 } 294 return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg()); 295 } 296 297 if (MI->isRegSequence() && usesRegClass(MI->getOperand(1), 298 &ARM::SPRRegClass)) { 299 // See if all bar one of the operands are IMPLICIT_DEF and insert the 300 // optimizer pattern accordingly. 301 unsigned NumImplicit = 0, NumTotal = 0; 302 unsigned NonImplicitReg = ~0U; 303 304 for (unsigned I = 1; I < MI->getNumExplicitOperands(); ++I) { 305 if (!MI->getOperand(I).isReg()) 306 continue; 307 ++NumTotal; 308 unsigned OpReg = MI->getOperand(I).getReg(); 309 310 if (!TRI->isVirtualRegister(OpReg)) 311 break; 312 313 MachineInstr *Def = MRI->getVRegDef(OpReg); 314 if (!Def) 315 break; 316 if (Def->isImplicitDef()) 317 ++NumImplicit; 318 else 319 NonImplicitReg = MI->getOperand(I).getReg(); 320 } 321 322 if (NumImplicit == NumTotal - 1) 323 return optimizeAllLanesPattern(MI, NonImplicitReg); 324 else 325 return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg()); 326 } 327 328 llvm_unreachable("Unhandled update pattern!"); 329 } 330 331 // Return true if this MachineInstr inserts a scalar (SPR) value into 332 // a D or Q register. 333 bool A15SDOptimizer::hasPartialWrite(MachineInstr *MI) { 334 // The only way we can do a partial register update is through a COPY, 335 // INSERT_SUBREG or REG_SEQUENCE. 336 if (MI->isCopy() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass)) 337 return true; 338 339 if (MI->isInsertSubreg() && usesRegClass(MI->getOperand(2), 340 &ARM::SPRRegClass)) 341 return true; 342 343 if (MI->isRegSequence() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass)) 344 return true; 345 346 return false; 347 } 348 349 // Looks through full copies to get the instruction that defines the input 350 // operand for MI. 351 MachineInstr *A15SDOptimizer::elideCopies(MachineInstr *MI) { 352 if (!MI->isFullCopy()) 353 return MI; 354 if (!TRI->isVirtualRegister(MI->getOperand(1).getReg())) 355 return nullptr; 356 MachineInstr *Def = MRI->getVRegDef(MI->getOperand(1).getReg()); 357 if (!Def) 358 return nullptr; 359 return elideCopies(Def); 360 } 361 362 // Look through full copies and PHIs to get the set of non-copy MachineInstrs 363 // that can produce MI. 364 void A15SDOptimizer::elideCopiesAndPHIs(MachineInstr *MI, 365 SmallVectorImpl<MachineInstr*> &Outs) { 366 // Looking through PHIs may create loops so we need to track what 367 // instructions we have visited before. 368 std::set<MachineInstr *> Reached; 369 SmallVector<MachineInstr *, 8> Front; 370 Front.push_back(MI); 371 while (Front.size() != 0) { 372 MI = Front.back(); 373 Front.pop_back(); 374 375 // If we have already explored this MachineInstr, ignore it. 376 if (Reached.find(MI) != Reached.end()) 377 continue; 378 Reached.insert(MI); 379 if (MI->isPHI()) { 380 for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) { 381 unsigned Reg = MI->getOperand(I).getReg(); 382 if (!TRI->isVirtualRegister(Reg)) { 383 continue; 384 } 385 MachineInstr *NewMI = MRI->getVRegDef(Reg); 386 if (!NewMI) 387 continue; 388 Front.push_back(NewMI); 389 } 390 } else if (MI->isFullCopy()) { 391 if (!TRI->isVirtualRegister(MI->getOperand(1).getReg())) 392 continue; 393 MachineInstr *NewMI = MRI->getVRegDef(MI->getOperand(1).getReg()); 394 if (!NewMI) 395 continue; 396 Front.push_back(NewMI); 397 } else { 398 DEBUG(dbgs() << "Found partial copy" << *MI <<"\n"); 399 Outs.push_back(MI); 400 } 401 } 402 } 403 404 // Return the DPR virtual registers that are read by this machine instruction 405 // (if any). 406 SmallVector<unsigned, 8> A15SDOptimizer::getReadDPRs(MachineInstr *MI) { 407 if (MI->isCopyLike() || MI->isInsertSubreg() || MI->isRegSequence() || 408 MI->isKill()) 409 return SmallVector<unsigned, 8>(); 410 411 SmallVector<unsigned, 8> Defs; 412 for (unsigned i = 0; i < MI->getNumOperands(); ++i) { 413 MachineOperand &MO = MI->getOperand(i); 414 415 if (!MO.isReg() || !MO.isUse()) 416 continue; 417 if (!usesRegClass(MO, &ARM::DPRRegClass) && 418 !usesRegClass(MO, &ARM::QPRRegClass) && 419 !usesRegClass(MO, &ARM::DPairRegClass)) // Treat DPair as QPR 420 continue; 421 422 Defs.push_back(MO.getReg()); 423 } 424 return Defs; 425 } 426 427 // Creates a DPR register from an SPR one by using a VDUP. 428 unsigned 429 A15SDOptimizer::createDupLane(MachineBasicBlock &MBB, 430 MachineBasicBlock::iterator InsertBefore, 431 DebugLoc DL, 432 unsigned Reg, unsigned Lane, bool QPR) { 433 unsigned Out = MRI->createVirtualRegister(QPR ? &ARM::QPRRegClass : 434 &ARM::DPRRegClass); 435 AddDefaultPred(BuildMI(MBB, 436 InsertBefore, 437 DL, 438 TII->get(QPR ? ARM::VDUPLN32q : ARM::VDUPLN32d), 439 Out) 440 .addReg(Reg) 441 .addImm(Lane)); 442 443 return Out; 444 } 445 446 // Creates a SPR register from a DPR by copying the value in lane 0. 447 unsigned 448 A15SDOptimizer::createExtractSubreg(MachineBasicBlock &MBB, 449 MachineBasicBlock::iterator InsertBefore, 450 DebugLoc DL, 451 unsigned DReg, unsigned Lane, 452 const TargetRegisterClass *TRC) { 453 unsigned Out = MRI->createVirtualRegister(TRC); 454 BuildMI(MBB, 455 InsertBefore, 456 DL, 457 TII->get(TargetOpcode::COPY), Out) 458 .addReg(DReg, 0, Lane); 459 460 return Out; 461 } 462 463 // Takes two SPR registers and creates a DPR by using a REG_SEQUENCE. 464 unsigned 465 A15SDOptimizer::createRegSequence(MachineBasicBlock &MBB, 466 MachineBasicBlock::iterator InsertBefore, 467 DebugLoc DL, 468 unsigned Reg1, unsigned Reg2) { 469 unsigned Out = MRI->createVirtualRegister(&ARM::QPRRegClass); 470 BuildMI(MBB, 471 InsertBefore, 472 DL, 473 TII->get(TargetOpcode::REG_SEQUENCE), Out) 474 .addReg(Reg1) 475 .addImm(ARM::dsub_0) 476 .addReg(Reg2) 477 .addImm(ARM::dsub_1); 478 return Out; 479 } 480 481 // Takes two DPR registers that have previously been VDUPed (Ssub0 and Ssub1) 482 // and merges them into one DPR register. 483 unsigned 484 A15SDOptimizer::createVExt(MachineBasicBlock &MBB, 485 MachineBasicBlock::iterator InsertBefore, 486 DebugLoc DL, 487 unsigned Ssub0, unsigned Ssub1) { 488 unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass); 489 AddDefaultPred(BuildMI(MBB, 490 InsertBefore, 491 DL, 492 TII->get(ARM::VEXTd32), Out) 493 .addReg(Ssub0) 494 .addReg(Ssub1) 495 .addImm(1)); 496 return Out; 497 } 498 499 unsigned 500 A15SDOptimizer::createInsertSubreg(MachineBasicBlock &MBB, 501 MachineBasicBlock::iterator InsertBefore, 502 DebugLoc DL, unsigned DReg, unsigned Lane, 503 unsigned ToInsert) { 504 unsigned Out = MRI->createVirtualRegister(&ARM::DPR_VFP2RegClass); 505 BuildMI(MBB, 506 InsertBefore, 507 DL, 508 TII->get(TargetOpcode::INSERT_SUBREG), Out) 509 .addReg(DReg) 510 .addReg(ToInsert) 511 .addImm(Lane); 512 513 return Out; 514 } 515 516 unsigned 517 A15SDOptimizer::createImplicitDef(MachineBasicBlock &MBB, 518 MachineBasicBlock::iterator InsertBefore, 519 DebugLoc DL) { 520 unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass); 521 BuildMI(MBB, 522 InsertBefore, 523 DL, 524 TII->get(TargetOpcode::IMPLICIT_DEF), Out); 525 return Out; 526 } 527 528 // This function inserts instructions in order to optimize interactions between 529 // SPR registers and DPR/QPR registers. It does so by performing VDUPs on all 530 // lanes, and the using VEXT instructions to recompose the result. 531 unsigned 532 A15SDOptimizer::optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg) { 533 MachineBasicBlock::iterator InsertPt(MI); 534 DebugLoc DL = MI->getDebugLoc(); 535 MachineBasicBlock &MBB = *MI->getParent(); 536 InsertPt++; 537 unsigned Out; 538 539 // DPair has the same length as QPR and also has two DPRs as subreg. 540 // Treat DPair as QPR. 541 if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::QPRRegClass) || 542 MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::DPairRegClass)) { 543 unsigned DSub0 = createExtractSubreg(MBB, InsertPt, DL, Reg, 544 ARM::dsub_0, &ARM::DPRRegClass); 545 unsigned DSub1 = createExtractSubreg(MBB, InsertPt, DL, Reg, 546 ARM::dsub_1, &ARM::DPRRegClass); 547 548 unsigned Out1 = createDupLane(MBB, InsertPt, DL, DSub0, 0); 549 unsigned Out2 = createDupLane(MBB, InsertPt, DL, DSub0, 1); 550 Out = createVExt(MBB, InsertPt, DL, Out1, Out2); 551 552 unsigned Out3 = createDupLane(MBB, InsertPt, DL, DSub1, 0); 553 unsigned Out4 = createDupLane(MBB, InsertPt, DL, DSub1, 1); 554 Out2 = createVExt(MBB, InsertPt, DL, Out3, Out4); 555 556 Out = createRegSequence(MBB, InsertPt, DL, Out, Out2); 557 558 } else if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::DPRRegClass)) { 559 unsigned Out1 = createDupLane(MBB, InsertPt, DL, Reg, 0); 560 unsigned Out2 = createDupLane(MBB, InsertPt, DL, Reg, 1); 561 Out = createVExt(MBB, InsertPt, DL, Out1, Out2); 562 563 } else { 564 assert(MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::SPRRegClass) && 565 "Found unexpected regclass!"); 566 567 unsigned PrefLane = getPrefSPRLane(Reg); 568 unsigned Lane; 569 switch (PrefLane) { 570 case ARM::ssub_0: Lane = 0; break; 571 case ARM::ssub_1: Lane = 1; break; 572 default: llvm_unreachable("Unknown preferred lane!"); 573 } 574 575 // Treat DPair as QPR 576 bool UsesQPR = usesRegClass(MI->getOperand(0), &ARM::QPRRegClass) || 577 usesRegClass(MI->getOperand(0), &ARM::DPairRegClass); 578 579 Out = createImplicitDef(MBB, InsertPt, DL); 580 Out = createInsertSubreg(MBB, InsertPt, DL, Out, PrefLane, Reg); 581 Out = createDupLane(MBB, InsertPt, DL, Out, Lane, UsesQPR); 582 eraseInstrWithNoUses(MI); 583 } 584 return Out; 585 } 586 587 bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) { 588 // We look for instructions that write S registers that are then read as 589 // D/Q registers. These can only be caused by COPY, INSERT_SUBREG and 590 // REG_SEQUENCE pseudos that insert an SPR value into a DPR register or 591 // merge two SPR values to form a DPR register. In order avoid false 592 // positives we make sure that there is an SPR producer so we look past 593 // COPY and PHI nodes to find it. 594 // 595 // The best code pattern for when an SPR producer is going to be used by a 596 // DPR or QPR consumer depends on whether the other lanes of the 597 // corresponding DPR/QPR are currently defined. 598 // 599 // We can handle these efficiently, depending on the type of 600 // pseudo-instruction that is producing the pattern 601 // 602 // * COPY: * VDUP all lanes and merge the results together 603 // using VEXTs. 604 // 605 // * INSERT_SUBREG: * If the SPR value was originally in another DPR/QPR 606 // lane, and the other lane(s) of the DPR/QPR register 607 // that we are inserting in are undefined, use the 608 // original DPR/QPR value. 609 // * Otherwise, fall back on the same stategy as COPY. 610 // 611 // * REG_SEQUENCE: * If all except one of the input operands are 612 // IMPLICIT_DEFs, insert the VDUP pattern for just the 613 // defined input operand 614 // * Otherwise, fall back on the same stategy as COPY. 615 // 616 617 // First, get all the reads of D-registers done by this instruction. 618 SmallVector<unsigned, 8> Defs = getReadDPRs(MI); 619 bool Modified = false; 620 621 for (SmallVectorImpl<unsigned>::iterator I = Defs.begin(), E = Defs.end(); 622 I != E; ++I) { 623 // Follow the def-use chain for this DPR through COPYs, and also through 624 // PHIs (which are essentially multi-way COPYs). It is because of PHIs that 625 // we can end up with multiple defs of this DPR. 626 627 SmallVector<MachineInstr *, 8> DefSrcs; 628 if (!TRI->isVirtualRegister(*I)) 629 continue; 630 MachineInstr *Def = MRI->getVRegDef(*I); 631 if (!Def) 632 continue; 633 634 elideCopiesAndPHIs(Def, DefSrcs); 635 636 for (SmallVectorImpl<MachineInstr *>::iterator II = DefSrcs.begin(), 637 EE = DefSrcs.end(); II != EE; ++II) { 638 MachineInstr *MI = *II; 639 640 // If we've already analyzed and replaced this operand, don't do 641 // anything. 642 if (Replacements.find(MI) != Replacements.end()) 643 continue; 644 645 // Now, work out if the instruction causes a SPR->DPR dependency. 646 if (!hasPartialWrite(MI)) 647 continue; 648 649 // Collect all the uses of this MI's DPR def for updating later. 650 SmallVector<MachineOperand*, 8> Uses; 651 unsigned DPRDefReg = MI->getOperand(0).getReg(); 652 for (MachineRegisterInfo::use_iterator I = MRI->use_begin(DPRDefReg), 653 E = MRI->use_end(); I != E; ++I) 654 Uses.push_back(&*I); 655 656 // We can optimize this. 657 unsigned NewReg = optimizeSDPattern(MI); 658 659 if (NewReg != 0) { 660 Modified = true; 661 for (SmallVectorImpl<MachineOperand *>::const_iterator I = Uses.begin(), 662 E = Uses.end(); I != E; ++I) { 663 // Make sure to constrain the register class of the new register to 664 // match what we're replacing. Otherwise we can optimize a DPR_VFP2 665 // reference into a plain DPR, and that will end poorly. NewReg is 666 // always virtual here, so there will always be a matching subclass 667 // to find. 668 MRI->constrainRegClass(NewReg, MRI->getRegClass((*I)->getReg())); 669 670 DEBUG(dbgs() << "Replacing operand " 671 << **I << " with " 672 << PrintReg(NewReg) << "\n"); 673 (*I)->substVirtReg(NewReg, 0, *TRI); 674 } 675 } 676 Replacements[MI] = NewReg; 677 } 678 } 679 return Modified; 680 } 681 682 bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) { 683 const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>(); 684 // Since the A15SDOptimizer pass can insert VDUP instructions, it can only be 685 // enabled when NEON is available. 686 if (!(STI.isCortexA15() && STI.hasNEON())) 687 return false; 688 TII = STI.getInstrInfo(); 689 TRI = STI.getRegisterInfo(); 690 MRI = &Fn.getRegInfo(); 691 bool Modified = false; 692 693 DEBUG(dbgs() << "Running on function " << Fn.getName()<< "\n"); 694 695 DeadInstr.clear(); 696 Replacements.clear(); 697 698 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 699 ++MFI) { 700 701 for (MachineBasicBlock::iterator MI = MFI->begin(), ME = MFI->end(); 702 MI != ME;) { 703 Modified |= runOnInstruction(MI++); 704 } 705 706 } 707 708 for (std::set<MachineInstr *>::iterator I = DeadInstr.begin(), 709 E = DeadInstr.end(); 710 I != E; ++I) { 711 (*I)->eraseFromParent(); 712 } 713 714 return Modified; 715 } 716 717 FunctionPass *llvm::createA15SDOptimizerPass() { 718 return new A15SDOptimizer(); 719 } 720