1 //===-- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp --*- C++ -*--===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains support for writing Microsoft CodeView debug info.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CodeViewDebug.h"
15 #include "llvm/DebugInfo/CodeView/CodeView.h"
16 #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
17 #include "llvm/MC/MCExpr.h"
18 #include "llvm/MC/MCSymbol.h"
19 #include "llvm/Support/COFF.h"
20 
21 using namespace llvm::codeview;
22 
23 namespace llvm {
24 
25 StringRef CodeViewDebug::getFullFilepath(const DIFile *File) {
26   std::string &Filepath = FileToFilepathMap[File];
27   if (!Filepath.empty())
28     return Filepath;
29 
30   StringRef Dir = File->getDirectory(), Filename = File->getFilename();
31 
32   // Clang emits directory and relative filename info into the IR, but CodeView
33   // operates on full paths.  We could change Clang to emit full paths too, but
34   // that would increase the IR size and probably not needed for other users.
35   // For now, just concatenate and canonicalize the path here.
36   if (Filename.find(':') == 1)
37     Filepath = Filename;
38   else
39     Filepath = (Dir + "\\" + Filename).str();
40 
41   // Canonicalize the path.  We have to do it textually because we may no longer
42   // have access the file in the filesystem.
43   // First, replace all slashes with backslashes.
44   std::replace(Filepath.begin(), Filepath.end(), '/', '\\');
45 
46   // Remove all "\.\" with "\".
47   size_t Cursor = 0;
48   while ((Cursor = Filepath.find("\\.\\", Cursor)) != std::string::npos)
49     Filepath.erase(Cursor, 2);
50 
51   // Replace all "\XXX\..\" with "\".  Don't try too hard though as the original
52   // path should be well-formatted, e.g. start with a drive letter, etc.
53   Cursor = 0;
54   while ((Cursor = Filepath.find("\\..\\", Cursor)) != std::string::npos) {
55     // Something's wrong if the path starts with "\..\", abort.
56     if (Cursor == 0)
57       break;
58 
59     size_t PrevSlash = Filepath.rfind('\\', Cursor - 1);
60     if (PrevSlash == std::string::npos)
61       // Something's wrong, abort.
62       break;
63 
64     Filepath.erase(PrevSlash, Cursor + 3 - PrevSlash);
65     // The next ".." might be following the one we've just erased.
66     Cursor = PrevSlash;
67   }
68 
69   // Remove all duplicate backslashes.
70   Cursor = 0;
71   while ((Cursor = Filepath.find("\\\\", Cursor)) != std::string::npos)
72     Filepath.erase(Cursor, 1);
73 
74   return Filepath;
75 }
76 
77 void CodeViewDebug::maybeRecordLocation(DebugLoc DL,
78                                         const MachineFunction *MF) {
79   // Skip this instruction if it has the same location as the previous one.
80   if (DL == CurFn->LastLoc)
81     return;
82 
83   const DIScope *Scope = DL.get()->getScope();
84   if (!Scope)
85     return;
86 
87   // Skip this line if it is longer than the maximum we can record.
88   if (DL.getLine() > COFF::CVL_MaxLineNumber)
89     return;
90 
91   CurFn->LastLoc = DL;
92 
93   MCSymbol *MCL = Asm->MMI->getContext().createTempSymbol();
94   Asm->OutStreamer->EmitLabel(MCL);
95   CurFn->Instrs.push_back(MCL);
96   LabelsAndLocs[MCL] = DL;
97 }
98 
99 CodeViewDebug::CodeViewDebug(AsmPrinter *AP)
100     : Asm(nullptr), CurFn(nullptr) {
101   MachineModuleInfo *MMI = AP->MMI;
102 
103   // If module doesn't have named metadata anchors or COFF debug section
104   // is not available, skip any debug info related stuff.
105   if (!MMI->getModule()->getNamedMetadata("llvm.dbg.cu") ||
106       !AP->getObjFileLowering().getCOFFDebugSymbolsSection())
107     return;
108 
109   // Tell MMI that we have debug info.
110   MMI->setDebugInfoAvailability(true);
111   Asm = AP;
112 }
113 
114 void CodeViewDebug::endModule() {
115   if (FnDebugInfo.empty())
116     return;
117 
118   // FIXME: For functions that are comdat, we should emit separate .debug$S
119   // sections that are comdat associative with the main function instead of
120   // having one big .debug$S section.
121   assert(Asm != nullptr);
122   Asm->OutStreamer->SwitchSection(
123       Asm->getObjFileLowering().getCOFFDebugSymbolsSection());
124   Asm->EmitInt32(COFF::DEBUG_SECTION_MAGIC);
125 
126   // The COFF .debug$S section consists of several subsections, each starting
127   // with a 4-byte control code (e.g. 0xF1, 0xF2, etc) and then a 4-byte length
128   // of the payload followed by the payload itself.  The subsections are 4-byte
129   // aligned.
130 
131   // Emit per-function debug information.  This code is extracted into a
132   // separate function for readability.
133   for (size_t I = 0, E = VisitedFunctions.size(); I != E; ++I)
134     emitDebugInfoForFunction(VisitedFunctions[I]);
135 
136   // This subsection holds a file index to offset in string table table.
137   Asm->OutStreamer->AddComment("File index to string table offset subsection");
138   Asm->EmitInt32(unsigned(ModuleSubstreamKind::FileChecksums));
139   size_t NumFilenames = FileNameRegistry.Infos.size();
140   Asm->EmitInt32(8 * NumFilenames);
141   for (size_t I = 0, E = FileNameRegistry.Filenames.size(); I != E; ++I) {
142     StringRef Filename = FileNameRegistry.Filenames[I];
143     // For each unique filename, just write its offset in the string table.
144     Asm->EmitInt32(FileNameRegistry.Infos[Filename].StartOffset);
145     // The function name offset is not followed by any additional data.
146     Asm->EmitInt32(0);
147   }
148 
149   // This subsection holds the string table.
150   Asm->OutStreamer->AddComment("String table");
151   Asm->EmitInt32(unsigned(ModuleSubstreamKind::StringTable));
152   Asm->EmitInt32(FileNameRegistry.LastOffset);
153   // The payload starts with a null character.
154   Asm->EmitInt8(0);
155 
156   for (size_t I = 0, E = FileNameRegistry.Filenames.size(); I != E; ++I) {
157     // Just emit unique filenames one by one, separated by a null character.
158     Asm->OutStreamer->EmitBytes(FileNameRegistry.Filenames[I]);
159     Asm->EmitInt8(0);
160   }
161 
162   // No more subsections. Fill with zeros to align the end of the section by 4.
163   Asm->OutStreamer->EmitFill((-FileNameRegistry.LastOffset) % 4, 0);
164 
165   clear();
166 }
167 
168 static void EmitLabelDiff(MCStreamer &Streamer,
169                           const MCSymbol *From, const MCSymbol *To,
170                           unsigned int Size = 4) {
171   MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
172   MCContext &Context = Streamer.getContext();
173   const MCExpr *FromRef = MCSymbolRefExpr::create(From, Variant, Context),
174                *ToRef   = MCSymbolRefExpr::create(To, Variant, Context);
175   const MCExpr *AddrDelta =
176       MCBinaryExpr::create(MCBinaryExpr::Sub, ToRef, FromRef, Context);
177   Streamer.EmitValue(AddrDelta, Size);
178 }
179 
180 static const DIFile *getFileFromLoc(DebugLoc DL) {
181   return DL.get()->getScope()->getFile();
182 }
183 
184 void CodeViewDebug::emitDebugInfoForFunction(const Function *GV) {
185   // For each function there is a separate subsection
186   // which holds the PC to file:line table.
187   const MCSymbol *Fn = Asm->getSymbol(GV);
188   assert(Fn);
189 
190   const FunctionInfo &FI = FnDebugInfo[GV];
191   if (FI.Instrs.empty())
192     return;
193   assert(FI.End && "Don't know where the function ends?");
194 
195   StringRef FuncName;
196   if (auto *SP = getDISubprogram(GV))
197     FuncName = SP->getDisplayName();
198 
199   // If our DISubprogram name is empty, use the mangled name.
200   if (FuncName.empty())
201     FuncName = GlobalValue::getRealLinkageName(GV->getName());
202 
203   // Emit a symbol subsection, required by VS2012+ to find function boundaries.
204   MCSymbol *SymbolsBegin = Asm->MMI->getContext().createTempSymbol(),
205            *SymbolsEnd = Asm->MMI->getContext().createTempSymbol();
206   Asm->OutStreamer->AddComment("Symbol subsection for " + Twine(FuncName));
207   Asm->EmitInt32(unsigned(ModuleSubstreamKind::Symbols));
208   EmitLabelDiff(*Asm->OutStreamer, SymbolsBegin, SymbolsEnd);
209   Asm->OutStreamer->EmitLabel(SymbolsBegin);
210   {
211     MCSymbol *ProcSegmentBegin = Asm->MMI->getContext().createTempSymbol(),
212              *ProcSegmentEnd = Asm->MMI->getContext().createTempSymbol();
213     EmitLabelDiff(*Asm->OutStreamer, ProcSegmentBegin, ProcSegmentEnd, 2);
214     Asm->OutStreamer->EmitLabel(ProcSegmentBegin);
215 
216     Asm->EmitInt16(unsigned(SymbolRecordKind::S_GPROC32_ID));
217 
218     // Some bytes of this segment don't seem to be required for basic debugging,
219     // so just fill them with zeroes.
220     Asm->OutStreamer->EmitFill(12, 0);
221     // This is the important bit that tells the debugger where the function
222     // code is located and what's its size:
223     EmitLabelDiff(*Asm->OutStreamer, Fn, FI.End);
224     Asm->OutStreamer->EmitFill(12, 0);
225     Asm->OutStreamer->EmitCOFFSecRel32(Fn);
226     Asm->OutStreamer->EmitCOFFSectionIndex(Fn);
227     Asm->EmitInt8(0);
228     // Emit the function display name as a null-terminated string.
229     Asm->OutStreamer->EmitBytes(FuncName);
230     Asm->EmitInt8(0);
231     Asm->OutStreamer->EmitLabel(ProcSegmentEnd);
232 
233     // We're done with this function.
234     Asm->EmitInt16(0x0002);
235     Asm->EmitInt16(unsigned(SymbolRecordKind::S_PROC_ID_END));
236   }
237   Asm->OutStreamer->EmitLabel(SymbolsEnd);
238   // Every subsection must be aligned to a 4-byte boundary.
239   Asm->OutStreamer->EmitFill((-FuncName.size()) % 4, 0);
240 
241   // PCs/Instructions are grouped into segments sharing the same filename.
242   // Pre-calculate the lengths (in instructions) of these segments and store
243   // them in a map for convenience.  Each index in the map is the sequential
244   // number of the respective instruction that starts a new segment.
245   DenseMap<size_t, size_t> FilenameSegmentLengths;
246   size_t LastSegmentEnd = 0;
247   const DIFile *PrevFile = getFileFromLoc(LabelsAndLocs[FI.Instrs[0]]);
248   for (size_t J = 1, F = FI.Instrs.size(); J != F; ++J) {
249     const DIFile *CurFile = getFileFromLoc(LabelsAndLocs[FI.Instrs[J]]);
250     if (PrevFile == CurFile)
251       continue;
252     FilenameSegmentLengths[LastSegmentEnd] = J - LastSegmentEnd;
253     LastSegmentEnd = J;
254     PrevFile = CurFile;
255   }
256   FilenameSegmentLengths[LastSegmentEnd] = FI.Instrs.size() - LastSegmentEnd;
257 
258   // Emit a line table subsection, required to do PC-to-file:line lookup.
259   Asm->OutStreamer->AddComment("Line table subsection for " + Twine(FuncName));
260   Asm->EmitInt32(unsigned(ModuleSubstreamKind::Lines));
261   MCSymbol *LineTableBegin = Asm->MMI->getContext().createTempSymbol(),
262            *LineTableEnd = Asm->MMI->getContext().createTempSymbol();
263   EmitLabelDiff(*Asm->OutStreamer, LineTableBegin, LineTableEnd);
264   Asm->OutStreamer->EmitLabel(LineTableBegin);
265 
266   // Identify the function this subsection is for.
267   Asm->OutStreamer->EmitCOFFSecRel32(Fn);
268   Asm->OutStreamer->EmitCOFFSectionIndex(Fn);
269   // Insert flags after a 16-bit section index.
270   Asm->EmitInt16(COFF::DEBUG_LINE_TABLES_HAVE_COLUMN_RECORDS);
271 
272   // Length of the function's code, in bytes.
273   EmitLabelDiff(*Asm->OutStreamer, Fn, FI.End);
274 
275   // PC-to-linenumber lookup table:
276   MCSymbol *FileSegmentEnd = nullptr;
277 
278   // The start of the last segment:
279   size_t LastSegmentStart = 0;
280 
281   auto FinishPreviousChunk = [&] {
282     if (!FileSegmentEnd)
283       return;
284     for (size_t ColSegI = LastSegmentStart,
285                 ColSegEnd = ColSegI + FilenameSegmentLengths[LastSegmentStart];
286          ColSegI != ColSegEnd; ++ColSegI) {
287       unsigned ColumnNumber = LabelsAndLocs[FI.Instrs[ColSegI]].getCol();
288       // Truncate the column number if it is longer than the maximum we can
289       // record.
290       if (ColumnNumber > COFF::CVL_MaxColumnNumber)
291         ColumnNumber = 0;
292       Asm->EmitInt16(ColumnNumber); // Start column
293       Asm->EmitInt16(0);            // End column
294     }
295     Asm->OutStreamer->EmitLabel(FileSegmentEnd);
296   };
297 
298   for (size_t J = 0, F = FI.Instrs.size(); J != F; ++J) {
299     MCSymbol *Instr = FI.Instrs[J];
300     assert(LabelsAndLocs.count(Instr));
301 
302     if (FilenameSegmentLengths.count(J)) {
303       // We came to a beginning of a new filename segment.
304       FinishPreviousChunk();
305       const DIFile *File = getFileFromLoc(LabelsAndLocs[FI.Instrs[J]]);
306       StringRef CurFilename = getFullFilepath(File);
307       size_t IndexInFileTable = FileNameRegistry.add(CurFilename);
308       // Each segment starts with the offset of the filename
309       // in the string table.
310       Asm->OutStreamer->AddComment(
311           "Segment for file '" + Twine(CurFilename) + "' begins");
312       MCSymbol *FileSegmentBegin = Asm->MMI->getContext().createTempSymbol();
313       Asm->OutStreamer->EmitLabel(FileSegmentBegin);
314       Asm->EmitInt32(8 * IndexInFileTable);
315 
316       // Number of PC records in the lookup table.
317       size_t SegmentLength = FilenameSegmentLengths[J];
318       Asm->EmitInt32(SegmentLength);
319 
320       // Full size of the segment for this filename, including the prev two
321       // records.
322       FileSegmentEnd = Asm->MMI->getContext().createTempSymbol();
323       EmitLabelDiff(*Asm->OutStreamer, FileSegmentBegin, FileSegmentEnd);
324       LastSegmentStart = J;
325     }
326 
327     // The first PC with the given linenumber and the linenumber itself.
328     EmitLabelDiff(*Asm->OutStreamer, Fn, Instr);
329     uint32_t LineNumber = LabelsAndLocs[Instr].getLine();
330     assert(LineNumber <= COFF::CVL_MaxLineNumber);
331     uint32_t LineData = LineNumber | COFF::CVL_IsStatement;
332     Asm->EmitInt32(LineData);
333   }
334 
335   FinishPreviousChunk();
336   Asm->OutStreamer->EmitLabel(LineTableEnd);
337 }
338 
339 void CodeViewDebug::beginFunction(const MachineFunction *MF) {
340   assert(!CurFn && "Can't process two functions at once!");
341 
342   if (!Asm || !Asm->MMI->hasDebugInfo())
343     return;
344 
345   const Function *GV = MF->getFunction();
346   assert(FnDebugInfo.count(GV) == false);
347   VisitedFunctions.push_back(GV);
348   CurFn = &FnDebugInfo[GV];
349 
350   // Find the end of the function prolog.
351   // FIXME: is there a simpler a way to do this? Can we just search
352   // for the first instruction of the function, not the last of the prolog?
353   DebugLoc PrologEndLoc;
354   bool EmptyPrologue = true;
355   for (const auto &MBB : *MF) {
356     if (PrologEndLoc)
357       break;
358     for (const auto &MI : MBB) {
359       if (MI.isDebugValue())
360         continue;
361 
362       // First known non-DBG_VALUE and non-frame setup location marks
363       // the beginning of the function body.
364       // FIXME: do we need the first subcondition?
365       if (!MI.getFlag(MachineInstr::FrameSetup) && MI.getDebugLoc()) {
366         PrologEndLoc = MI.getDebugLoc();
367         break;
368       }
369       EmptyPrologue = false;
370     }
371   }
372   // Record beginning of function if we have a non-empty prologue.
373   if (PrologEndLoc && !EmptyPrologue) {
374     DebugLoc FnStartDL = PrologEndLoc.getFnDebugLoc();
375     maybeRecordLocation(FnStartDL, MF);
376   }
377 }
378 
379 void CodeViewDebug::endFunction(const MachineFunction *MF) {
380   if (!Asm || !CurFn)  // We haven't created any debug info for this function.
381     return;
382 
383   const Function *GV = MF->getFunction();
384   assert(FnDebugInfo.count(GV));
385   assert(CurFn == &FnDebugInfo[GV]);
386 
387   if (CurFn->Instrs.empty()) {
388     FnDebugInfo.erase(GV);
389     VisitedFunctions.pop_back();
390   } else {
391     CurFn->End = Asm->getFunctionEnd();
392   }
393   CurFn = nullptr;
394 }
395 
396 void CodeViewDebug::beginInstruction(const MachineInstr *MI) {
397   // Ignore DBG_VALUE locations and function prologue.
398   if (!Asm || MI->isDebugValue() || MI->getFlag(MachineInstr::FrameSetup))
399     return;
400   DebugLoc DL = MI->getDebugLoc();
401   if (DL == PrevInstLoc || !DL)
402     return;
403   maybeRecordLocation(DL, Asm->MF);
404 }
405 }
406