1 //===-- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp --*- C++ -*--===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains support for writing Microsoft CodeView debug info.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CodeViewDebug.h"
15 #include "llvm/DebugInfo/CodeView/CodeView.h"
16 #include "llvm/DebugInfo/CodeView/Line.h"
17 #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
18 #include "llvm/MC/MCExpr.h"
19 #include "llvm/MC/MCSymbol.h"
20 #include "llvm/Support/COFF.h"
21 
22 using namespace llvm::codeview;
23 
24 namespace llvm {
25 
26 StringRef CodeViewDebug::getFullFilepath(const DIFile *File) {
27   std::string &Filepath = FileToFilepathMap[File];
28   if (!Filepath.empty())
29     return Filepath;
30 
31   StringRef Dir = File->getDirectory(), Filename = File->getFilename();
32 
33   // Clang emits directory and relative filename info into the IR, but CodeView
34   // operates on full paths.  We could change Clang to emit full paths too, but
35   // that would increase the IR size and probably not needed for other users.
36   // For now, just concatenate and canonicalize the path here.
37   if (Filename.find(':') == 1)
38     Filepath = Filename;
39   else
40     Filepath = (Dir + "\\" + Filename).str();
41 
42   // Canonicalize the path.  We have to do it textually because we may no longer
43   // have access the file in the filesystem.
44   // First, replace all slashes with backslashes.
45   std::replace(Filepath.begin(), Filepath.end(), '/', '\\');
46 
47   // Remove all "\.\" with "\".
48   size_t Cursor = 0;
49   while ((Cursor = Filepath.find("\\.\\", Cursor)) != std::string::npos)
50     Filepath.erase(Cursor, 2);
51 
52   // Replace all "\XXX\..\" with "\".  Don't try too hard though as the original
53   // path should be well-formatted, e.g. start with a drive letter, etc.
54   Cursor = 0;
55   while ((Cursor = Filepath.find("\\..\\", Cursor)) != std::string::npos) {
56     // Something's wrong if the path starts with "\..\", abort.
57     if (Cursor == 0)
58       break;
59 
60     size_t PrevSlash = Filepath.rfind('\\', Cursor - 1);
61     if (PrevSlash == std::string::npos)
62       // Something's wrong, abort.
63       break;
64 
65     Filepath.erase(PrevSlash, Cursor + 3 - PrevSlash);
66     // The next ".." might be following the one we've just erased.
67     Cursor = PrevSlash;
68   }
69 
70   // Remove all duplicate backslashes.
71   Cursor = 0;
72   while ((Cursor = Filepath.find("\\\\", Cursor)) != std::string::npos)
73     Filepath.erase(Cursor, 1);
74 
75   return Filepath;
76 }
77 
78 unsigned CodeViewDebug::maybeRecordFile(const DIFile *F) {
79   unsigned NextId = FileIdMap.size() + 1;
80   auto Insertion = FileIdMap.insert(std::make_pair(F, NextId));
81   if (Insertion.second) {
82     // We have to compute the full filepath and emit a .cv_file directive.
83     StringRef FullPath = getFullFilepath(F);
84     NextId = Asm->OutStreamer->EmitCVFileDirective(NextId, FullPath);
85     assert(NextId == FileIdMap.size() && ".cv_file directive failed");
86   }
87   return Insertion.first->second;
88 }
89 
90 void CodeViewDebug::maybeRecordLocation(DebugLoc DL,
91                                         const MachineFunction *MF) {
92   // Skip this instruction if it has the same location as the previous one.
93   if (DL == CurFn->LastLoc)
94     return;
95 
96   const DIScope *Scope = DL.get()->getScope();
97   if (!Scope)
98     return;
99 
100   // Skip this line if it is longer than the maximum we can record.
101   LineInfo LI(DL.getLine(), DL.getLine(), /*IsStatement=*/true);
102   if (LI.getStartLine() != DL.getLine() || LI.isAlwaysStepInto() ||
103       LI.isNeverStepInto())
104     return;
105 
106   ColumnInfo CI(DL.getCol(), /*EndColumn=*/0);
107   if (CI.getStartColumn() != DL.getCol())
108     return;
109 
110   if (!CurFn->HaveLineInfo)
111     CurFn->HaveLineInfo = true;
112   unsigned FileId = 0;
113   if (CurFn->LastLoc.get() && CurFn->LastLoc->getFile() == DL->getFile())
114     FileId = CurFn->LastFileId;
115   else
116     FileId = CurFn->LastFileId = maybeRecordFile(DL->getFile());
117   CurFn->LastLoc = DL;
118   Asm->OutStreamer->EmitCVLocDirective(CurFn->FuncId, FileId, DL.getLine(),
119                                        DL.getCol(), /*PrologueEnd=*/false,
120                                        /*IsStmt=*/false, DL->getFilename());
121 }
122 
123 CodeViewDebug::CodeViewDebug(AsmPrinter *AP)
124     : Asm(nullptr), CurFn(nullptr) {
125   MachineModuleInfo *MMI = AP->MMI;
126 
127   // If module doesn't have named metadata anchors or COFF debug section
128   // is not available, skip any debug info related stuff.
129   if (!MMI->getModule()->getNamedMetadata("llvm.dbg.cu") ||
130       !AP->getObjFileLowering().getCOFFDebugSymbolsSection())
131     return;
132 
133   // Tell MMI that we have debug info.
134   MMI->setDebugInfoAvailability(true);
135   Asm = AP;
136 }
137 
138 void CodeViewDebug::endModule() {
139   if (FnDebugInfo.empty())
140     return;
141 
142   // FIXME: For functions that are comdat, we should emit separate .debug$S
143   // sections that are comdat associative with the main function instead of
144   // having one big .debug$S section.
145   assert(Asm != nullptr);
146   Asm->OutStreamer->SwitchSection(
147       Asm->getObjFileLowering().getCOFFDebugSymbolsSection());
148   Asm->EmitInt32(COFF::DEBUG_SECTION_MAGIC);
149 
150   // The COFF .debug$S section consists of several subsections, each starting
151   // with a 4-byte control code (e.g. 0xF1, 0xF2, etc) and then a 4-byte length
152   // of the payload followed by the payload itself.  The subsections are 4-byte
153   // aligned.
154 
155   // Emit per-function debug information.
156   for (auto &P : FnDebugInfo)
157     emitDebugInfoForFunction(P.first, P.second);
158 
159   // This subsection holds a file index to offset in string table table.
160   Asm->OutStreamer->AddComment("File index to string table offset subsection");
161   Asm->OutStreamer->EmitCVFileChecksumsDirective();
162 
163   // This subsection holds the string table.
164   Asm->OutStreamer->AddComment("String table");
165   Asm->OutStreamer->EmitCVStringTableDirective();
166 
167   clear();
168 }
169 
170 static void EmitLabelDiff(MCStreamer &Streamer,
171                           const MCSymbol *From, const MCSymbol *To,
172                           unsigned int Size = 4) {
173   MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
174   MCContext &Context = Streamer.getContext();
175   const MCExpr *FromRef = MCSymbolRefExpr::create(From, Variant, Context),
176                *ToRef   = MCSymbolRefExpr::create(To, Variant, Context);
177   const MCExpr *AddrDelta =
178       MCBinaryExpr::create(MCBinaryExpr::Sub, ToRef, FromRef, Context);
179   Streamer.EmitValue(AddrDelta, Size);
180 }
181 
182 void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
183                                              FunctionInfo &FI) {
184   // For each function there is a separate subsection
185   // which holds the PC to file:line table.
186   const MCSymbol *Fn = Asm->getSymbol(GV);
187   assert(Fn);
188 
189   StringRef FuncName;
190   if (auto *SP = getDISubprogram(GV))
191     FuncName = SP->getDisplayName();
192 
193   // If our DISubprogram name is empty, use the mangled name.
194   if (FuncName.empty())
195     FuncName = GlobalValue::getRealLinkageName(GV->getName());
196 
197   // Emit a symbol subsection, required by VS2012+ to find function boundaries.
198   MCSymbol *SymbolsBegin = Asm->MMI->getContext().createTempSymbol(),
199            *SymbolsEnd = Asm->MMI->getContext().createTempSymbol();
200   Asm->OutStreamer->AddComment("Symbol subsection for " + Twine(FuncName));
201   Asm->EmitInt32(unsigned(ModuleSubstreamKind::Symbols));
202   EmitLabelDiff(*Asm->OutStreamer, SymbolsBegin, SymbolsEnd);
203   Asm->OutStreamer->EmitLabel(SymbolsBegin);
204   {
205     MCSymbol *ProcSegmentBegin = Asm->MMI->getContext().createTempSymbol(),
206              *ProcSegmentEnd = Asm->MMI->getContext().createTempSymbol();
207     EmitLabelDiff(*Asm->OutStreamer, ProcSegmentBegin, ProcSegmentEnd, 2);
208     Asm->OutStreamer->EmitLabel(ProcSegmentBegin);
209 
210     Asm->EmitInt16(unsigned(SymbolRecordKind::S_GPROC32_ID));
211 
212     // Some bytes of this segment don't seem to be required for basic debugging,
213     // so just fill them with zeroes.
214     Asm->OutStreamer->EmitFill(12, 0);
215     // This is the important bit that tells the debugger where the function
216     // code is located and what's its size:
217     EmitLabelDiff(*Asm->OutStreamer, Fn, FI.End);
218     Asm->OutStreamer->EmitFill(12, 0);
219     Asm->OutStreamer->EmitCOFFSecRel32(Fn);
220     Asm->OutStreamer->EmitCOFFSectionIndex(Fn);
221     Asm->EmitInt8(0);
222     // Emit the function display name as a null-terminated string.
223     Asm->OutStreamer->EmitBytes(FuncName);
224     Asm->EmitInt8(0);
225     Asm->OutStreamer->EmitLabel(ProcSegmentEnd);
226 
227     // We're done with this function.
228     Asm->EmitInt16(0x0002);
229     Asm->EmitInt16(unsigned(SymbolRecordKind::S_PROC_ID_END));
230   }
231   Asm->OutStreamer->EmitLabel(SymbolsEnd);
232   // Every subsection must be aligned to a 4-byte boundary.
233   Asm->OutStreamer->EmitFill((-FuncName.size()) % 4, 0);
234 
235   // We have an assembler directive that takes care of the whole line table.
236   Asm->OutStreamer->EmitCVLinetableDirective(FI.FuncId, Fn, FI.End);
237 }
238 
239 void CodeViewDebug::beginFunction(const MachineFunction *MF) {
240   assert(!CurFn && "Can't process two functions at once!");
241 
242   if (!Asm || !Asm->MMI->hasDebugInfo())
243     return;
244 
245   const Function *GV = MF->getFunction();
246   assert(FnDebugInfo.count(GV) == false);
247   CurFn = &FnDebugInfo[GV];
248   CurFn->FuncId = NextFuncId++;
249 
250   // Find the end of the function prolog.
251   // FIXME: is there a simpler a way to do this? Can we just search
252   // for the first instruction of the function, not the last of the prolog?
253   DebugLoc PrologEndLoc;
254   bool EmptyPrologue = true;
255   for (const auto &MBB : *MF) {
256     if (PrologEndLoc)
257       break;
258     for (const auto &MI : MBB) {
259       if (MI.isDebugValue())
260         continue;
261 
262       // First known non-DBG_VALUE and non-frame setup location marks
263       // the beginning of the function body.
264       // FIXME: do we need the first subcondition?
265       if (!MI.getFlag(MachineInstr::FrameSetup) && MI.getDebugLoc()) {
266         PrologEndLoc = MI.getDebugLoc();
267         break;
268       }
269       EmptyPrologue = false;
270     }
271   }
272   // Record beginning of function if we have a non-empty prologue.
273   if (PrologEndLoc && !EmptyPrologue) {
274     DebugLoc FnStartDL = PrologEndLoc.getFnDebugLoc();
275     maybeRecordLocation(FnStartDL, MF);
276   }
277 }
278 
279 void CodeViewDebug::endFunction(const MachineFunction *MF) {
280   if (!Asm || !CurFn)  // We haven't created any debug info for this function.
281     return;
282 
283   const Function *GV = MF->getFunction();
284   assert(FnDebugInfo.count(GV));
285   assert(CurFn == &FnDebugInfo[GV]);
286 
287   // Don't emit anything if we don't have any line tables.
288   if (!CurFn->HaveLineInfo) {
289     FnDebugInfo.erase(GV);
290   } else {
291     CurFn->End = Asm->getFunctionEnd();
292   }
293   CurFn = nullptr;
294 }
295 
296 void CodeViewDebug::beginInstruction(const MachineInstr *MI) {
297   // Ignore DBG_VALUE locations and function prologue.
298   if (!Asm || MI->isDebugValue() || MI->getFlag(MachineInstr::FrameSetup))
299     return;
300   DebugLoc DL = MI->getDebugLoc();
301   if (DL == PrevInstLoc || !DL)
302     return;
303   maybeRecordLocation(DL, Asm->MF);
304 }
305 }
306