1 //===- Debugify.cpp - Attach synthetic debug info to everything -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file This pass attaches synthetic debug info to everything. It can be used
10 /// to create targeted tests for debug info preservation.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Transforms/Utils/Debugify.h"
15 #include "llvm/ADT/BitVector.h"
16 #include "llvm/ADT/StringExtras.h"
17 #include "llvm/IR/DIBuilder.h"
18 #include "llvm/IR/DebugInfo.h"
19 #include "llvm/IR/InstIterator.h"
20 #include "llvm/IR/Instructions.h"
21 #include "llvm/IR/IntrinsicInst.h"
22 #include "llvm/IR/Module.h"
23 #include "llvm/Pass.h"
24 #include "llvm/Support/CommandLine.h"
25 
26 using namespace llvm;
27 
28 namespace {
29 
30 cl::opt<bool> Quiet("debugify-quiet",
31                     cl::desc("Suppress verbose debugify output"));
32 
33 enum class Level {
34   Locations,
35   LocationsAndVariables
36 };
37 cl::opt<Level> DebugifyLevel(
38     "debugify-level", cl::desc("Kind of debug info to add"),
39     cl::values(clEnumValN(Level::Locations, "locations", "Locations only"),
40                clEnumValN(Level::LocationsAndVariables, "location+variables",
41                           "Locations and Variables")),
42     cl::init(Level::LocationsAndVariables));
43 
44 raw_ostream &dbg() { return Quiet ? nulls() : errs(); }
45 
46 uint64_t getAllocSizeInBits(Module &M, Type *Ty) {
47   return Ty->isSized() ? M.getDataLayout().getTypeAllocSizeInBits(Ty) : 0;
48 }
49 
50 bool isFunctionSkipped(Function &F) {
51   return F.isDeclaration() || !F.hasExactDefinition();
52 }
53 
54 /// Find the basic block's terminating instruction.
55 ///
56 /// Special care is needed to handle musttail and deopt calls, as these behave
57 /// like (but are in fact not) terminators.
58 Instruction *findTerminatingInstruction(BasicBlock &BB) {
59   if (auto *I = BB.getTerminatingMustTailCall())
60     return I;
61   if (auto *I = BB.getTerminatingDeoptimizeCall())
62     return I;
63   return BB.getTerminator();
64 }
65 } // end anonymous namespace
66 
67 bool llvm::applyDebugifyMetadata(
68     Module &M, iterator_range<Module::iterator> Functions, StringRef Banner,
69     std::function<bool(DIBuilder &DIB, Function &F)> ApplyToMF) {
70   // Skip modules with debug info.
71   if (M.getNamedMetadata("llvm.dbg.cu")) {
72     dbg() << Banner << "Skipping module with debug info\n";
73     return false;
74   }
75 
76   DIBuilder DIB(M);
77   LLVMContext &Ctx = M.getContext();
78   auto *Int32Ty = Type::getInt32Ty(Ctx);
79 
80   // Get a DIType which corresponds to Ty.
81   DenseMap<uint64_t, DIType *> TypeCache;
82   auto getCachedDIType = [&](Type *Ty) -> DIType * {
83     uint64_t Size = getAllocSizeInBits(M, Ty);
84     DIType *&DTy = TypeCache[Size];
85     if (!DTy) {
86       std::string Name = "ty" + utostr(Size);
87       DTy = DIB.createBasicType(Name, Size, dwarf::DW_ATE_unsigned);
88     }
89     return DTy;
90   };
91 
92   unsigned NextLine = 1;
93   unsigned NextVar = 1;
94   auto File = DIB.createFile(M.getName(), "/");
95   auto CU = DIB.createCompileUnit(dwarf::DW_LANG_C, File, "debugify",
96                                   /*isOptimized=*/true, "", 0);
97 
98   // Visit each instruction.
99   for (Function &F : Functions) {
100     if (isFunctionSkipped(F))
101       continue;
102 
103     bool InsertedDbgVal = false;
104     auto SPType = DIB.createSubroutineType(DIB.getOrCreateTypeArray(None));
105     DISubprogram::DISPFlags SPFlags =
106         DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized;
107     if (F.hasPrivateLinkage() || F.hasInternalLinkage())
108       SPFlags |= DISubprogram::SPFlagLocalToUnit;
109     auto SP = DIB.createFunction(CU, F.getName(), F.getName(), File, NextLine,
110                                  SPType, NextLine, DINode::FlagZero, SPFlags);
111     F.setSubprogram(SP);
112 
113     // Helper that inserts a dbg.value before \p InsertBefore, copying the
114     // location (and possibly the type, if it's non-void) from \p TemplateInst.
115     auto insertDbgVal = [&](Instruction &TemplateInst,
116                             Instruction *InsertBefore) {
117       std::string Name = utostr(NextVar++);
118       Value *V = &TemplateInst;
119       if (TemplateInst.getType()->isVoidTy())
120         V = ConstantInt::get(Int32Ty, 0);
121       const DILocation *Loc = TemplateInst.getDebugLoc().get();
122       auto LocalVar = DIB.createAutoVariable(SP, Name, File, Loc->getLine(),
123                                              getCachedDIType(V->getType()),
124                                              /*AlwaysPreserve=*/true);
125       DIB.insertDbgValueIntrinsic(V, LocalVar, DIB.createExpression(), Loc,
126                                   InsertBefore);
127     };
128 
129     for (BasicBlock &BB : F) {
130       // Attach debug locations.
131       for (Instruction &I : BB)
132         I.setDebugLoc(DILocation::get(Ctx, NextLine++, 1, SP));
133 
134       if (DebugifyLevel < Level::LocationsAndVariables)
135         continue;
136 
137       // Inserting debug values into EH pads can break IR invariants.
138       if (BB.isEHPad())
139         continue;
140 
141       // Find the terminating instruction, after which no debug values are
142       // attached.
143       Instruction *LastInst = findTerminatingInstruction(BB);
144       assert(LastInst && "Expected basic block with a terminator");
145 
146       // Maintain an insertion point which can't be invalidated when updates
147       // are made.
148       BasicBlock::iterator InsertPt = BB.getFirstInsertionPt();
149       assert(InsertPt != BB.end() && "Expected to find an insertion point");
150       Instruction *InsertBefore = &*InsertPt;
151 
152       // Attach debug values.
153       for (Instruction *I = &*BB.begin(); I != LastInst; I = I->getNextNode()) {
154         // Skip void-valued instructions.
155         if (I->getType()->isVoidTy())
156           continue;
157 
158         // Phis and EH pads must be grouped at the beginning of the block.
159         // Only advance the insertion point when we finish visiting these.
160         if (!isa<PHINode>(I) && !I->isEHPad())
161           InsertBefore = I->getNextNode();
162 
163         insertDbgVal(*I, InsertBefore);
164         InsertedDbgVal = true;
165       }
166     }
167     // Make sure we emit at least one dbg.value, otherwise MachineDebugify may
168     // not have anything to work with as it goes about inserting DBG_VALUEs.
169     // (It's common for MIR tests to be written containing skeletal IR with
170     // empty functions -- we're still interested in debugifying the MIR within
171     // those tests, and this helps with that.)
172     if (DebugifyLevel == Level::LocationsAndVariables && !InsertedDbgVal) {
173       auto *Term = findTerminatingInstruction(F.getEntryBlock());
174       insertDbgVal(*Term, Term);
175     }
176     if (ApplyToMF)
177       ApplyToMF(DIB, F);
178     DIB.finalizeSubprogram(SP);
179   }
180   DIB.finalize();
181 
182   // Track the number of distinct lines and variables.
183   NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.debugify");
184   auto addDebugifyOperand = [&](unsigned N) {
185     NMD->addOperand(MDNode::get(
186         Ctx, ValueAsMetadata::getConstant(ConstantInt::get(Int32Ty, N))));
187   };
188   addDebugifyOperand(NextLine - 1); // Original number of lines.
189   addDebugifyOperand(NextVar - 1);  // Original number of variables.
190   assert(NMD->getNumOperands() == 2 &&
191          "llvm.debugify should have exactly 2 operands!");
192 
193   // Claim that this synthetic debug info is valid.
194   StringRef DIVersionKey = "Debug Info Version";
195   if (!M.getModuleFlag(DIVersionKey))
196     M.addModuleFlag(Module::Warning, DIVersionKey, DEBUG_METADATA_VERSION);
197 
198   return true;
199 }
200 
201 static bool applyDebugify(Function &F) {
202   Module &M = *F.getParent();
203   auto FuncIt = F.getIterator();
204   return applyDebugifyMetadata(M, make_range(FuncIt, std::next(FuncIt)),
205                                "FunctionDebugify: ", /*ApplyToMF=*/nullptr);
206 }
207 
208 static bool applyDebugify(Module &M) {
209   return applyDebugifyMetadata(M, M.functions(),
210                                "ModuleDebugify: ", /*ApplyToMF=*/nullptr);
211 }
212 
213 bool llvm::stripDebugifyMetadata(Module &M) {
214   bool Changed = false;
215 
216   // Remove the llvm.debugify module-level named metadata.
217   NamedMDNode *DebugifyMD = M.getNamedMetadata("llvm.debugify");
218   if (DebugifyMD) {
219     M.eraseNamedMetadata(DebugifyMD);
220     Changed = true;
221   }
222 
223   // Strip out all debug intrinsics and supporting metadata (subprograms, types,
224   // variables, etc).
225   Changed |= StripDebugInfo(M);
226 
227   // Strip out the dead dbg.value prototype.
228   Function *DbgValF = M.getFunction("llvm.dbg.value");
229   if (DbgValF) {
230     assert(DbgValF->isDeclaration() && DbgValF->use_empty() &&
231            "Not all debug info stripped?");
232     DbgValF->eraseFromParent();
233     Changed = true;
234   }
235 
236   // Strip out the module-level Debug Info Version metadata.
237   // FIXME: There must be an easier way to remove an operand from a NamedMDNode.
238   NamedMDNode *NMD = M.getModuleFlagsMetadata();
239   if (!NMD)
240     return Changed;
241   SmallVector<MDNode *, 4> Flags;
242   for (MDNode *Flag : NMD->operands())
243     Flags.push_back(Flag);
244   NMD->clearOperands();
245   for (MDNode *Flag : Flags) {
246     MDString *Key = dyn_cast_or_null<MDString>(Flag->getOperand(1));
247     if (Key->getString() == "Debug Info Version") {
248       Changed = true;
249       continue;
250     }
251     NMD->addOperand(Flag);
252   }
253   // If we left it empty we might as well remove it.
254   if (NMD->getNumOperands() == 0)
255     NMD->eraseFromParent();
256 
257   return Changed;
258 }
259 
260 namespace {
261 /// Return true if a mis-sized diagnostic is issued for \p DVI.
262 bool diagnoseMisSizedDbgValue(Module &M, DbgValueInst *DVI) {
263   // The size of a dbg.value's value operand should match the size of the
264   // variable it corresponds to.
265   //
266   // TODO: This, along with a check for non-null value operands, should be
267   // promoted to verifier failures.
268   Value *V = DVI->getValue();
269   if (!V)
270     return false;
271 
272   // For now, don't try to interpret anything more complicated than an empty
273   // DIExpression. Eventually we should try to handle OP_deref and fragments.
274   if (DVI->getExpression()->getNumElements())
275     return false;
276 
277   Type *Ty = V->getType();
278   uint64_t ValueOperandSize = getAllocSizeInBits(M, Ty);
279   Optional<uint64_t> DbgVarSize = DVI->getFragmentSizeInBits();
280   if (!ValueOperandSize || !DbgVarSize)
281     return false;
282 
283   bool HasBadSize = false;
284   if (Ty->isIntegerTy()) {
285     auto Signedness = DVI->getVariable()->getSignedness();
286     if (Signedness && *Signedness == DIBasicType::Signedness::Signed)
287       HasBadSize = ValueOperandSize < *DbgVarSize;
288   } else {
289     HasBadSize = ValueOperandSize != *DbgVarSize;
290   }
291 
292   if (HasBadSize) {
293     dbg() << "ERROR: dbg.value operand has size " << ValueOperandSize
294           << ", but its variable has size " << *DbgVarSize << ": ";
295     DVI->print(dbg());
296     dbg() << "\n";
297   }
298   return HasBadSize;
299 }
300 
301 bool checkDebugifyMetadata(Module &M,
302                            iterator_range<Module::iterator> Functions,
303                            StringRef NameOfWrappedPass, StringRef Banner,
304                            bool Strip, DebugifyStatsMap *StatsMap) {
305   // Skip modules without debugify metadata.
306   NamedMDNode *NMD = M.getNamedMetadata("llvm.debugify");
307   if (!NMD) {
308     dbg() << Banner << ": Skipping module without debugify metadata\n";
309     return false;
310   }
311 
312   auto getDebugifyOperand = [&](unsigned Idx) -> unsigned {
313     return mdconst::extract<ConstantInt>(NMD->getOperand(Idx)->getOperand(0))
314         ->getZExtValue();
315   };
316   assert(NMD->getNumOperands() == 2 &&
317          "llvm.debugify should have exactly 2 operands!");
318   unsigned OriginalNumLines = getDebugifyOperand(0);
319   unsigned OriginalNumVars = getDebugifyOperand(1);
320   bool HasErrors = false;
321 
322   // Track debug info loss statistics if able.
323   DebugifyStatistics *Stats = nullptr;
324   if (StatsMap && !NameOfWrappedPass.empty())
325     Stats = &StatsMap->operator[](NameOfWrappedPass);
326 
327   BitVector MissingLines{OriginalNumLines, true};
328   BitVector MissingVars{OriginalNumVars, true};
329   for (Function &F : Functions) {
330     if (isFunctionSkipped(F))
331       continue;
332 
333     // Find missing lines.
334     for (Instruction &I : instructions(F)) {
335       if (isa<DbgValueInst>(&I) || isa<PHINode>(&I))
336         continue;
337 
338       auto DL = I.getDebugLoc();
339       if (DL && DL.getLine() != 0) {
340         MissingLines.reset(DL.getLine() - 1);
341         continue;
342       }
343 
344       if (!DL) {
345         dbg() << "WARNING: Instruction with empty DebugLoc in function ";
346         dbg() << F.getName() << " --";
347         I.print(dbg());
348         dbg() << "\n";
349       }
350     }
351 
352     // Find missing variables and mis-sized debug values.
353     for (Instruction &I : instructions(F)) {
354       auto *DVI = dyn_cast<DbgValueInst>(&I);
355       if (!DVI)
356         continue;
357 
358       unsigned Var = ~0U;
359       (void)to_integer(DVI->getVariable()->getName(), Var, 10);
360       assert(Var <= OriginalNumVars && "Unexpected name for DILocalVariable");
361       bool HasBadSize = diagnoseMisSizedDbgValue(M, DVI);
362       if (!HasBadSize)
363         MissingVars.reset(Var - 1);
364       HasErrors |= HasBadSize;
365     }
366   }
367 
368   // Print the results.
369   for (unsigned Idx : MissingLines.set_bits())
370     dbg() << "WARNING: Missing line " << Idx + 1 << "\n";
371 
372   for (unsigned Idx : MissingVars.set_bits())
373     dbg() << "WARNING: Missing variable " << Idx + 1 << "\n";
374 
375   // Update DI loss statistics.
376   if (Stats) {
377     Stats->NumDbgLocsExpected += OriginalNumLines;
378     Stats->NumDbgLocsMissing += MissingLines.count();
379     Stats->NumDbgValuesExpected += OriginalNumVars;
380     Stats->NumDbgValuesMissing += MissingVars.count();
381   }
382 
383   dbg() << Banner;
384   if (!NameOfWrappedPass.empty())
385     dbg() << " [" << NameOfWrappedPass << "]";
386   dbg() << ": " << (HasErrors ? "FAIL" : "PASS") << '\n';
387 
388   // Strip debugify metadata if required.
389   if (Strip)
390     return stripDebugifyMetadata(M);
391 
392   return false;
393 }
394 
395 /// ModulePass for attaching synthetic debug info to everything, used with the
396 /// legacy module pass manager.
397 struct DebugifyModulePass : public ModulePass {
398   bool runOnModule(Module &M) override { return applyDebugify(M); }
399 
400   DebugifyModulePass() : ModulePass(ID) {}
401 
402   void getAnalysisUsage(AnalysisUsage &AU) const override {
403     AU.setPreservesAll();
404   }
405 
406   static char ID; // Pass identification.
407 };
408 
409 /// FunctionPass for attaching synthetic debug info to instructions within a
410 /// single function, used with the legacy module pass manager.
411 struct DebugifyFunctionPass : public FunctionPass {
412   bool runOnFunction(Function &F) override { return applyDebugify(F); }
413 
414   DebugifyFunctionPass() : FunctionPass(ID) {}
415 
416   void getAnalysisUsage(AnalysisUsage &AU) const override {
417     AU.setPreservesAll();
418   }
419 
420   static char ID; // Pass identification.
421 };
422 
423 /// ModulePass for checking debug info inserted by -debugify, used with the
424 /// legacy module pass manager.
425 struct CheckDebugifyModulePass : public ModulePass {
426   bool runOnModule(Module &M) override {
427     return checkDebugifyMetadata(M, M.functions(), NameOfWrappedPass,
428                                  "CheckModuleDebugify", Strip, StatsMap);
429   }
430 
431   CheckDebugifyModulePass(bool Strip = false, StringRef NameOfWrappedPass = "",
432                           DebugifyStatsMap *StatsMap = nullptr)
433       : ModulePass(ID), Strip(Strip), NameOfWrappedPass(NameOfWrappedPass),
434         StatsMap(StatsMap) {}
435 
436   void getAnalysisUsage(AnalysisUsage &AU) const override {
437     AU.setPreservesAll();
438   }
439 
440   static char ID; // Pass identification.
441 
442 private:
443   bool Strip;
444   StringRef NameOfWrappedPass;
445   DebugifyStatsMap *StatsMap;
446 };
447 
448 /// FunctionPass for checking debug info inserted by -debugify-function, used
449 /// with the legacy module pass manager.
450 struct CheckDebugifyFunctionPass : public FunctionPass {
451   bool runOnFunction(Function &F) override {
452     Module &M = *F.getParent();
453     auto FuncIt = F.getIterator();
454     return checkDebugifyMetadata(M, make_range(FuncIt, std::next(FuncIt)),
455                                  NameOfWrappedPass, "CheckFunctionDebugify",
456                                  Strip, StatsMap);
457   }
458 
459   CheckDebugifyFunctionPass(bool Strip = false,
460                             StringRef NameOfWrappedPass = "",
461                             DebugifyStatsMap *StatsMap = nullptr)
462       : FunctionPass(ID), Strip(Strip), NameOfWrappedPass(NameOfWrappedPass),
463         StatsMap(StatsMap) {}
464 
465   void getAnalysisUsage(AnalysisUsage &AU) const override {
466     AU.setPreservesAll();
467   }
468 
469   static char ID; // Pass identification.
470 
471 private:
472   bool Strip;
473   StringRef NameOfWrappedPass;
474   DebugifyStatsMap *StatsMap;
475 };
476 
477 } // end anonymous namespace
478 
479 void llvm::exportDebugifyStats(StringRef Path, const DebugifyStatsMap &Map) {
480   std::error_code EC;
481   raw_fd_ostream OS{Path, EC};
482   if (EC) {
483     errs() << "Could not open file: " << EC.message() << ", " << Path << '\n';
484     return;
485   }
486 
487   OS << "Pass Name" << ',' << "# of missing debug values" << ','
488      << "# of missing locations" << ',' << "Missing/Expected value ratio" << ','
489      << "Missing/Expected location ratio" << '\n';
490   for (const auto &Entry : Map) {
491     StringRef Pass = Entry.first;
492     DebugifyStatistics Stats = Entry.second;
493 
494     OS << Pass << ',' << Stats.NumDbgValuesMissing << ','
495        << Stats.NumDbgLocsMissing << ',' << Stats.getMissingValueRatio() << ','
496        << Stats.getEmptyLocationRatio() << '\n';
497   }
498 }
499 
500 ModulePass *llvm::createDebugifyModulePass() {
501   return new DebugifyModulePass();
502 }
503 
504 FunctionPass *llvm::createDebugifyFunctionPass() {
505   return new DebugifyFunctionPass();
506 }
507 
508 PreservedAnalyses NewPMDebugifyPass::run(Module &M, ModuleAnalysisManager &) {
509   applyDebugifyMetadata(M, M.functions(),
510                         "ModuleDebugify: ", /*ApplyToMF*/ nullptr);
511   return PreservedAnalyses::all();
512 }
513 
514 ModulePass *llvm::createCheckDebugifyModulePass(bool Strip,
515                                                 StringRef NameOfWrappedPass,
516                                                 DebugifyStatsMap *StatsMap) {
517   return new CheckDebugifyModulePass(Strip, NameOfWrappedPass, StatsMap);
518 }
519 
520 FunctionPass *
521 llvm::createCheckDebugifyFunctionPass(bool Strip, StringRef NameOfWrappedPass,
522                                       DebugifyStatsMap *StatsMap) {
523   return new CheckDebugifyFunctionPass(Strip, NameOfWrappedPass, StatsMap);
524 }
525 
526 PreservedAnalyses NewPMCheckDebugifyPass::run(Module &M,
527                                               ModuleAnalysisManager &) {
528   checkDebugifyMetadata(M, M.functions(), "", "CheckModuleDebugify", false,
529                         nullptr);
530   return PreservedAnalyses::all();
531 }
532 
533 void DebugifyEachInstrumentation::registerCallbacks(
534     PassInstrumentationCallbacks &PIC) {
535   PIC.registerBeforeNonSkippedPassCallback([](StringRef P, Any IR) {
536     if (any_isa<const Function *>(IR))
537       applyDebugify(*const_cast<Function *>(any_cast<const Function *>(IR)));
538     else if (any_isa<const Module *>(IR))
539       applyDebugify(*const_cast<Module *>(any_cast<const Module *>(IR)));
540   });
541   PIC.registerAfterPassCallback([this](StringRef P, Any IR,
542                                        const PreservedAnalyses &PassPA) {
543     if (any_isa<const Function *>(IR)) {
544       auto &F = *const_cast<Function *>(any_cast<const Function *>(IR));
545       Module &M = *F.getParent();
546       auto It = F.getIterator();
547       checkDebugifyMetadata(M, make_range(It, std::next(It)), P,
548                             "CheckFunctionDebugify", /*Strip=*/true, &StatsMap);
549     } else if (any_isa<const Module *>(IR)) {
550       auto &M = *const_cast<Module *>(any_cast<const Module *>(IR));
551       checkDebugifyMetadata(M, M.functions(), P, "CheckModuleDebugify",
552                             /*Strip=*/true, &StatsMap);
553     }
554   });
555 }
556 
557 char DebugifyModulePass::ID = 0;
558 static RegisterPass<DebugifyModulePass> DM("debugify",
559                                            "Attach debug info to everything");
560 
561 char CheckDebugifyModulePass::ID = 0;
562 static RegisterPass<CheckDebugifyModulePass>
563     CDM("check-debugify", "Check debug info from -debugify");
564 
565 char DebugifyFunctionPass::ID = 0;
566 static RegisterPass<DebugifyFunctionPass> DF("debugify-function",
567                                              "Attach debug info to a function");
568 
569 char CheckDebugifyFunctionPass::ID = 0;
570 static RegisterPass<CheckDebugifyFunctionPass>
571     CDF("check-debugify-function", "Check debug info from -debugify-function");
572