1 //===-------------- BPFMIPeephole.cpp - MI Peephole Cleanups  -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs peephole optimizations to cleanup ugly code sequences at
10 // MachineInstruction layer.
11 //
12 // Currently, there are two optimizations implemented:
13 //  - One pre-RA MachineSSA pass to eliminate type promotion sequences, those
14 //    zero extend 32-bit subregisters to 64-bit registers, if the compiler
15 //    could prove the subregisters is defined by 32-bit operations in which
16 //    case the upper half of the underlying 64-bit registers were zeroed
17 //    implicitly.
18 //
19 //  - One post-RA PreEmit pass to do final cleanup on some redundant
20 //    instructions generated due to bad RA on subregister.
21 //===----------------------------------------------------------------------===//
22 
23 #include "BPF.h"
24 #include "BPFInstrInfo.h"
25 #include "BPFTargetMachine.h"
26 #include "llvm/ADT/Statistic.h"
27 #include "llvm/CodeGen/MachineFunctionPass.h"
28 #include "llvm/CodeGen/MachineInstrBuilder.h"
29 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 #include "llvm/Support/Debug.h"
31 #include <set>
32 
33 using namespace llvm;
34 
35 #define DEBUG_TYPE "bpf-mi-zext-elim"
36 
37 STATISTIC(ZExtElemNum, "Number of zero extension shifts eliminated");
38 
39 namespace {
40 
41 struct BPFMIPeephole : public MachineFunctionPass {
42 
43   static char ID;
44   const BPFInstrInfo *TII;
45   MachineFunction *MF;
46   MachineRegisterInfo *MRI;
47 
48   BPFMIPeephole() : MachineFunctionPass(ID) {
49     initializeBPFMIPeepholePass(*PassRegistry::getPassRegistry());
50   }
51 
52 private:
53   // Initialize class variables.
54   void initialize(MachineFunction &MFParm);
55 
56   bool isCopyFrom32Def(MachineInstr *CopyMI);
57   bool isInsnFrom32Def(MachineInstr *DefInsn);
58   bool isPhiFrom32Def(MachineInstr *MovMI);
59   bool isMovFrom32Def(MachineInstr *MovMI);
60   bool eliminateZExtSeq();
61   bool eliminateZExt();
62 
63   std::set<MachineInstr *> PhiInsns;
64 
65 public:
66 
67   // Main entry point for this pass.
68   bool runOnMachineFunction(MachineFunction &MF) override {
69     if (skipFunction(MF.getFunction()))
70       return false;
71 
72     initialize(MF);
73 
74     // First try to eliminate (zext, lshift, rshift) and then
75     // try to eliminate zext.
76     bool ZExtSeqExist, ZExtExist;
77     ZExtSeqExist = eliminateZExtSeq();
78     ZExtExist = eliminateZExt();
79     return ZExtSeqExist || ZExtExist;
80   }
81 };
82 
83 // Initialize class variables.
84 void BPFMIPeephole::initialize(MachineFunction &MFParm) {
85   MF = &MFParm;
86   MRI = &MF->getRegInfo();
87   TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
88   LLVM_DEBUG(dbgs() << "*** BPF MachineSSA ZEXT Elim peephole pass ***\n\n");
89 }
90 
91 bool BPFMIPeephole::isCopyFrom32Def(MachineInstr *CopyMI)
92 {
93   MachineOperand &opnd = CopyMI->getOperand(1);
94 
95   if (!opnd.isReg())
96     return false;
97 
98   // Return false if getting value from a 32bit physical register.
99   // Most likely, this physical register is aliased to
100   // function call return value or current function parameters.
101   Register Reg = opnd.getReg();
102   if (!Register::isVirtualRegister(Reg))
103     return false;
104 
105   if (MRI->getRegClass(Reg) == &BPF::GPRRegClass)
106     return false;
107 
108   MachineInstr *DefInsn = MRI->getVRegDef(Reg);
109   if (!isInsnFrom32Def(DefInsn))
110     return false;
111 
112   return true;
113 }
114 
115 bool BPFMIPeephole::isPhiFrom32Def(MachineInstr *PhiMI)
116 {
117   for (unsigned i = 1, e = PhiMI->getNumOperands(); i < e; i += 2) {
118     MachineOperand &opnd = PhiMI->getOperand(i);
119 
120     if (!opnd.isReg())
121       return false;
122 
123     MachineInstr *PhiDef = MRI->getVRegDef(opnd.getReg());
124     if (!PhiDef)
125       return false;
126     if (PhiDef->isPHI()) {
127       if (PhiInsns.find(PhiDef) != PhiInsns.end())
128         return false;
129       PhiInsns.insert(PhiDef);
130       if (!isPhiFrom32Def(PhiDef))
131         return false;
132     }
133     if (PhiDef->getOpcode() == BPF::COPY && !isCopyFrom32Def(PhiDef))
134       return false;
135   }
136 
137   return true;
138 }
139 
140 // The \p DefInsn instruction defines a virtual register.
141 bool BPFMIPeephole::isInsnFrom32Def(MachineInstr *DefInsn)
142 {
143   if (!DefInsn)
144     return false;
145 
146   if (DefInsn->isPHI()) {
147     if (PhiInsns.find(DefInsn) != PhiInsns.end())
148       return false;
149     PhiInsns.insert(DefInsn);
150     if (!isPhiFrom32Def(DefInsn))
151       return false;
152   } else if (DefInsn->getOpcode() == BPF::COPY) {
153     if (!isCopyFrom32Def(DefInsn))
154       return false;
155   }
156 
157   return true;
158 }
159 
160 bool BPFMIPeephole::isMovFrom32Def(MachineInstr *MovMI)
161 {
162   MachineInstr *DefInsn = MRI->getVRegDef(MovMI->getOperand(1).getReg());
163 
164   LLVM_DEBUG(dbgs() << "  Def of Mov Src:");
165   LLVM_DEBUG(DefInsn->dump());
166 
167   PhiInsns.clear();
168   if (!isInsnFrom32Def(DefInsn))
169     return false;
170 
171   LLVM_DEBUG(dbgs() << "  One ZExt elim sequence identified.\n");
172 
173   return true;
174 }
175 
176 bool BPFMIPeephole::eliminateZExtSeq() {
177   MachineInstr* ToErase = nullptr;
178   bool Eliminated = false;
179 
180   for (MachineBasicBlock &MBB : *MF) {
181     for (MachineInstr &MI : MBB) {
182       // If the previous instruction was marked for elimination, remove it now.
183       if (ToErase) {
184         ToErase->eraseFromParent();
185         ToErase = nullptr;
186       }
187 
188       // Eliminate the 32-bit to 64-bit zero extension sequence when possible.
189       //
190       //   MOV_32_64 rB, wA
191       //   SLL_ri    rB, rB, 32
192       //   SRL_ri    rB, rB, 32
193       if (MI.getOpcode() == BPF::SRL_ri &&
194           MI.getOperand(2).getImm() == 32) {
195         Register DstReg = MI.getOperand(0).getReg();
196         Register ShfReg = MI.getOperand(1).getReg();
197         MachineInstr *SllMI = MRI->getVRegDef(ShfReg);
198 
199         LLVM_DEBUG(dbgs() << "Starting SRL found:");
200         LLVM_DEBUG(MI.dump());
201 
202         if (!SllMI ||
203             SllMI->isPHI() ||
204             SllMI->getOpcode() != BPF::SLL_ri ||
205             SllMI->getOperand(2).getImm() != 32)
206           continue;
207 
208         LLVM_DEBUG(dbgs() << "  SLL found:");
209         LLVM_DEBUG(SllMI->dump());
210 
211         MachineInstr *MovMI = MRI->getVRegDef(SllMI->getOperand(1).getReg());
212         if (!MovMI ||
213             MovMI->isPHI() ||
214             MovMI->getOpcode() != BPF::MOV_32_64)
215           continue;
216 
217         LLVM_DEBUG(dbgs() << "  Type cast Mov found:");
218         LLVM_DEBUG(MovMI->dump());
219 
220         Register SubReg = MovMI->getOperand(1).getReg();
221         if (!isMovFrom32Def(MovMI)) {
222           LLVM_DEBUG(dbgs()
223                      << "  One ZExt elim sequence failed qualifying elim.\n");
224           continue;
225         }
226 
227         BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::SUBREG_TO_REG), DstReg)
228           .addImm(0).addReg(SubReg).addImm(BPF::sub_32);
229 
230         SllMI->eraseFromParent();
231         MovMI->eraseFromParent();
232         // MI is the right shift, we can't erase it in it's own iteration.
233         // Mark it to ToErase, and erase in the next iteration.
234         ToErase = &MI;
235         ZExtElemNum++;
236         Eliminated = true;
237       }
238     }
239   }
240 
241   return Eliminated;
242 }
243 
244 bool BPFMIPeephole::eliminateZExt() {
245   MachineInstr* ToErase = nullptr;
246   bool Eliminated = false;
247 
248   for (MachineBasicBlock &MBB : *MF) {
249     for (MachineInstr &MI : MBB) {
250       // If the previous instruction was marked for elimination, remove it now.
251       if (ToErase) {
252         ToErase->eraseFromParent();
253         ToErase = nullptr;
254       }
255 
256       if (MI.getOpcode() != BPF::MOV_32_64)
257         continue;
258 
259       // Eliminate MOV_32_64 if possible.
260       //   MOV_32_64 rA, wB
261       //
262       // If wB has been zero extended, replace it with a SUBREG_TO_REG.
263       // This is to workaround BPF programs where pkt->{data, data_end}
264       // is encoded as u32, but actually the verifier populates them
265       // as 64bit pointer. The MOV_32_64 will zero out the top 32 bits.
266       LLVM_DEBUG(dbgs() << "Candidate MOV_32_64 instruction:");
267       LLVM_DEBUG(MI.dump());
268 
269       if (!isMovFrom32Def(&MI))
270         continue;
271 
272       LLVM_DEBUG(dbgs() << "Removing the MOV_32_64 instruction\n");
273 
274       Register dst = MI.getOperand(0).getReg();
275       Register src = MI.getOperand(1).getReg();
276 
277       // Build a SUBREG_TO_REG instruction.
278       BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::SUBREG_TO_REG), dst)
279         .addImm(0).addReg(src).addImm(BPF::sub_32);
280 
281       ToErase = &MI;
282       Eliminated = true;
283     }
284   }
285 
286   return Eliminated;
287 }
288 
289 } // end default namespace
290 
291 INITIALIZE_PASS(BPFMIPeephole, DEBUG_TYPE,
292                 "BPF MachineSSA Peephole Optimization For ZEXT Eliminate",
293                 false, false)
294 
295 char BPFMIPeephole::ID = 0;
296 FunctionPass* llvm::createBPFMIPeepholePass() { return new BPFMIPeephole(); }
297 
298 STATISTIC(RedundantMovElemNum, "Number of redundant moves eliminated");
299 
300 namespace {
301 
302 struct BPFMIPreEmitPeephole : public MachineFunctionPass {
303 
304   static char ID;
305   MachineFunction *MF;
306   const TargetRegisterInfo *TRI;
307 
308   BPFMIPreEmitPeephole() : MachineFunctionPass(ID) {
309     initializeBPFMIPreEmitPeepholePass(*PassRegistry::getPassRegistry());
310   }
311 
312 private:
313   // Initialize class variables.
314   void initialize(MachineFunction &MFParm);
315 
316   bool eliminateRedundantMov();
317 
318 public:
319 
320   // Main entry point for this pass.
321   bool runOnMachineFunction(MachineFunction &MF) override {
322     if (skipFunction(MF.getFunction()))
323       return false;
324 
325     initialize(MF);
326 
327     return eliminateRedundantMov();
328   }
329 };
330 
331 // Initialize class variables.
332 void BPFMIPreEmitPeephole::initialize(MachineFunction &MFParm) {
333   MF = &MFParm;
334   TRI = MF->getSubtarget<BPFSubtarget>().getRegisterInfo();
335   LLVM_DEBUG(dbgs() << "*** BPF PreEmit peephole pass ***\n\n");
336 }
337 
338 bool BPFMIPreEmitPeephole::eliminateRedundantMov() {
339   MachineInstr* ToErase = nullptr;
340   bool Eliminated = false;
341 
342   for (MachineBasicBlock &MBB : *MF) {
343     for (MachineInstr &MI : MBB) {
344       // If the previous instruction was marked for elimination, remove it now.
345       if (ToErase) {
346         LLVM_DEBUG(dbgs() << "  Redundant Mov Eliminated:");
347         LLVM_DEBUG(ToErase->dump());
348         ToErase->eraseFromParent();
349         ToErase = nullptr;
350       }
351 
352       // Eliminate identical move:
353       //
354       //   MOV rA, rA
355       //
356       // Note that we cannot remove
357       //   MOV_32_64  rA, wA
358       //   MOV_rr_32  wA, wA
359       // as these two instructions having side effects, zeroing out
360       // top 32 bits of rA.
361       unsigned Opcode = MI.getOpcode();
362       if (Opcode == BPF::MOV_rr) {
363         Register dst = MI.getOperand(0).getReg();
364         Register src = MI.getOperand(1).getReg();
365 
366         if (dst != src)
367           continue;
368 
369         ToErase = &MI;
370         RedundantMovElemNum++;
371         Eliminated = true;
372       }
373     }
374   }
375 
376   return Eliminated;
377 }
378 
379 } // end default namespace
380 
381 INITIALIZE_PASS(BPFMIPreEmitPeephole, "bpf-mi-pemit-peephole",
382                 "BPF PreEmit Peephole Optimization", false, false)
383 
384 char BPFMIPreEmitPeephole::ID = 0;
385 FunctionPass* llvm::createBPFMIPreEmitPeepholePass()
386 {
387   return new BPFMIPreEmitPeephole();
388 }
389 
390 STATISTIC(TruncElemNum, "Number of truncation eliminated");
391 
392 namespace {
393 
394 struct BPFMIPeepholeTruncElim : public MachineFunctionPass {
395 
396   static char ID;
397   const BPFInstrInfo *TII;
398   MachineFunction *MF;
399   MachineRegisterInfo *MRI;
400 
401   BPFMIPeepholeTruncElim() : MachineFunctionPass(ID) {
402     initializeBPFMIPeepholeTruncElimPass(*PassRegistry::getPassRegistry());
403   }
404 
405 private:
406   // Initialize class variables.
407   void initialize(MachineFunction &MFParm);
408 
409   bool eliminateTruncSeq();
410 
411 public:
412 
413   // Main entry point for this pass.
414   bool runOnMachineFunction(MachineFunction &MF) override {
415     if (skipFunction(MF.getFunction()))
416       return false;
417 
418     initialize(MF);
419 
420     return eliminateTruncSeq();
421   }
422 };
423 
424 static bool TruncSizeCompatible(int TruncSize, unsigned opcode)
425 {
426   if (TruncSize == 1)
427     return opcode == BPF::LDB || opcode == BPF::LDB32;
428 
429   if (TruncSize == 2)
430     return opcode == BPF::LDH || opcode == BPF::LDH32;
431 
432   if (TruncSize == 4)
433     return opcode == BPF::LDW || opcode == BPF::LDW32;
434 
435   return false;
436 }
437 
438 // Initialize class variables.
439 void BPFMIPeepholeTruncElim::initialize(MachineFunction &MFParm) {
440   MF = &MFParm;
441   MRI = &MF->getRegInfo();
442   TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
443   LLVM_DEBUG(dbgs() << "*** BPF MachineSSA TRUNC Elim peephole pass ***\n\n");
444 }
445 
446 // Reg truncating is often the result of 8/16/32bit->64bit or
447 // 8/16bit->32bit conversion. If the reg value is loaded with
448 // masked byte width, the AND operation can be removed since
449 // BPF LOAD already has zero extension.
450 //
451 // This also solved a correctness issue.
452 // In BPF socket-related program, e.g., __sk_buff->{data, data_end}
453 // are 32-bit registers, but later on, kernel verifier will rewrite
454 // it with 64-bit value. Therefore, truncating the value after the
455 // load will result in incorrect code.
456 bool BPFMIPeepholeTruncElim::eliminateTruncSeq() {
457   MachineInstr* ToErase = nullptr;
458   bool Eliminated = false;
459 
460   for (MachineBasicBlock &MBB : *MF) {
461     for (MachineInstr &MI : MBB) {
462       // The second insn to remove if the eliminate candidate is a pair.
463       MachineInstr *MI2 = nullptr;
464       Register DstReg, SrcReg;
465       MachineInstr *DefMI;
466       int TruncSize = -1;
467 
468       // If the previous instruction was marked for elimination, remove it now.
469       if (ToErase) {
470         ToErase->eraseFromParent();
471         ToErase = nullptr;
472       }
473 
474       // AND A, 0xFFFFFFFF will be turned into SLL/SRL pair due to immediate
475       // for BPF ANDI is i32, and this case only happens on ALU64.
476       if (MI.getOpcode() == BPF::SRL_ri &&
477           MI.getOperand(2).getImm() == 32) {
478         SrcReg = MI.getOperand(1).getReg();
479         if (!MRI->hasOneNonDBGUse(SrcReg))
480           continue;
481 
482         MI2 = MRI->getVRegDef(SrcReg);
483         DstReg = MI.getOperand(0).getReg();
484 
485         if (!MI2 ||
486             MI2->getOpcode() != BPF::SLL_ri ||
487             MI2->getOperand(2).getImm() != 32)
488           continue;
489 
490         // Update SrcReg.
491         SrcReg = MI2->getOperand(1).getReg();
492         DefMI = MRI->getVRegDef(SrcReg);
493         if (DefMI)
494           TruncSize = 4;
495       } else if (MI.getOpcode() == BPF::AND_ri ||
496                  MI.getOpcode() == BPF::AND_ri_32) {
497         SrcReg = MI.getOperand(1).getReg();
498         DstReg = MI.getOperand(0).getReg();
499         DefMI = MRI->getVRegDef(SrcReg);
500 
501         if (!DefMI)
502           continue;
503 
504         int64_t imm = MI.getOperand(2).getImm();
505         if (imm == 0xff)
506           TruncSize = 1;
507         else if (imm == 0xffff)
508           TruncSize = 2;
509       }
510 
511       if (TruncSize == -1)
512         continue;
513 
514       // The definition is PHI node, check all inputs.
515       if (DefMI->isPHI()) {
516         bool CheckFail = false;
517 
518         for (unsigned i = 1, e = DefMI->getNumOperands(); i < e; i += 2) {
519           MachineOperand &opnd = DefMI->getOperand(i);
520           if (!opnd.isReg()) {
521             CheckFail = true;
522             break;
523           }
524 
525           MachineInstr *PhiDef = MRI->getVRegDef(opnd.getReg());
526           if (!PhiDef || PhiDef->isPHI() ||
527               !TruncSizeCompatible(TruncSize, PhiDef->getOpcode())) {
528             CheckFail = true;
529             break;
530           }
531         }
532 
533         if (CheckFail)
534           continue;
535       } else if (!TruncSizeCompatible(TruncSize, DefMI->getOpcode())) {
536         continue;
537       }
538 
539       BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::MOV_rr), DstReg)
540               .addReg(SrcReg);
541 
542       if (MI2)
543         MI2->eraseFromParent();
544 
545       // Mark it to ToErase, and erase in the next iteration.
546       ToErase = &MI;
547       TruncElemNum++;
548       Eliminated = true;
549     }
550   }
551 
552   return Eliminated;
553 }
554 
555 } // end default namespace
556 
557 INITIALIZE_PASS(BPFMIPeepholeTruncElim, "bpf-mi-trunc-elim",
558                 "BPF MachineSSA Peephole Optimization For TRUNC Eliminate",
559                 false, false)
560 
561 char BPFMIPeepholeTruncElim::ID = 0;
562 FunctionPass* llvm::createBPFMIPeepholeTruncElimPass()
563 {
564   return new BPFMIPeepholeTruncElim();
565 }
566