1 //===-- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp --*- C++ -*--===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains support for writing Microsoft CodeView debug info.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CodeViewDebug.h"
15 #include "llvm/DebugInfo/CodeView/CodeView.h"
16 #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
17 #include "llvm/MC/MCExpr.h"
18 #include "llvm/MC/MCSymbol.h"
19 #include "llvm/Support/COFF.h"
20 
21 using namespace llvm::codeview;
22 
23 namespace llvm {
24 
25 StringRef CodeViewDebug::getFullFilepath(const MDNode *S) {
26   assert(S);
27   assert((isa<DICompileUnit>(S) || isa<DIFile>(S) || isa<DISubprogram>(S) ||
28           isa<DILexicalBlockBase>(S)) &&
29          "Unexpected scope info");
30 
31   auto *Scope = cast<DIScope>(S);
32   StringRef Dir = Scope->getDirectory(),
33             Filename = Scope->getFilename();
34   std::string &Filepath =
35       DirAndFilenameToFilepathMap[std::make_pair(Dir, Filename)];
36   if (!Filepath.empty())
37     return Filepath;
38 
39   // Clang emits directory and relative filename info into the IR, but CodeView
40   // operates on full paths.  We could change Clang to emit full paths too, but
41   // that would increase the IR size and probably not needed for other users.
42   // For now, just concatenate and canonicalize the path here.
43   if (Filename.find(':') == 1)
44     Filepath = Filename;
45   else
46     Filepath = (Dir + "\\" + Filename).str();
47 
48   // Canonicalize the path.  We have to do it textually because we may no longer
49   // have access the file in the filesystem.
50   // First, replace all slashes with backslashes.
51   std::replace(Filepath.begin(), Filepath.end(), '/', '\\');
52 
53   // Remove all "\.\" with "\".
54   size_t Cursor = 0;
55   while ((Cursor = Filepath.find("\\.\\", Cursor)) != std::string::npos)
56     Filepath.erase(Cursor, 2);
57 
58   // Replace all "\XXX\..\" with "\".  Don't try too hard though as the original
59   // path should be well-formatted, e.g. start with a drive letter, etc.
60   Cursor = 0;
61   while ((Cursor = Filepath.find("\\..\\", Cursor)) != std::string::npos) {
62     // Something's wrong if the path starts with "\..\", abort.
63     if (Cursor == 0)
64       break;
65 
66     size_t PrevSlash = Filepath.rfind('\\', Cursor - 1);
67     if (PrevSlash == std::string::npos)
68       // Something's wrong, abort.
69       break;
70 
71     Filepath.erase(PrevSlash, Cursor + 3 - PrevSlash);
72     // The next ".." might be following the one we've just erased.
73     Cursor = PrevSlash;
74   }
75 
76   // Remove all duplicate backslashes.
77   Cursor = 0;
78   while ((Cursor = Filepath.find("\\\\", Cursor)) != std::string::npos)
79     Filepath.erase(Cursor, 1);
80 
81   return Filepath;
82 }
83 
84 void CodeViewDebug::maybeRecordLocation(DebugLoc DL,
85                                                 const MachineFunction *MF) {
86   const MDNode *Scope = DL.getScope();
87   if (!Scope)
88     return;
89   unsigned LineNumber = DL.getLine();
90   // Skip this line if it is longer than the maximum we can record.
91   if (LineNumber > COFF::CVL_MaxLineNumber)
92     return;
93 
94   unsigned ColumnNumber = DL.getCol();
95   // Truncate the column number if it is longer than the maximum we can record.
96   if (ColumnNumber > COFF::CVL_MaxColumnNumber)
97     ColumnNumber = 0;
98 
99   StringRef Filename = getFullFilepath(Scope);
100 
101   // Skip this instruction if it has the same file:line as the previous one.
102   assert(CurFn);
103   if (!CurFn->Instrs.empty()) {
104     const InstrInfoTy &LastInstr = InstrInfo[CurFn->Instrs.back()];
105     if (LastInstr.Filename == Filename && LastInstr.LineNumber == LineNumber &&
106         LastInstr.ColumnNumber == ColumnNumber)
107       return;
108   }
109   FileNameRegistry.add(Filename);
110 
111   MCSymbol *MCL = Asm->MMI->getContext().createTempSymbol();
112   Asm->OutStreamer->EmitLabel(MCL);
113   CurFn->Instrs.push_back(MCL);
114   InstrInfo[MCL] = InstrInfoTy(Filename, LineNumber, ColumnNumber);
115 }
116 
117 CodeViewDebug::CodeViewDebug(AsmPrinter *AP)
118     : Asm(nullptr), CurFn(nullptr) {
119   MachineModuleInfo *MMI = AP->MMI;
120 
121   // If module doesn't have named metadata anchors or COFF debug section
122   // is not available, skip any debug info related stuff.
123   if (!MMI->getModule()->getNamedMetadata("llvm.dbg.cu") ||
124       !AP->getObjFileLowering().getCOFFDebugSymbolsSection())
125     return;
126 
127   // Tell MMI that we have debug info.
128   MMI->setDebugInfoAvailability(true);
129   Asm = AP;
130 }
131 
132 void CodeViewDebug::endModule() {
133   if (FnDebugInfo.empty())
134     return;
135 
136   // FIXME: For functions that are comdat, we should emit separate .debug$S
137   // sections that are comdat associative with the main function instead of
138   // having one big .debug$S section.
139   assert(Asm != nullptr);
140   Asm->OutStreamer->SwitchSection(
141       Asm->getObjFileLowering().getCOFFDebugSymbolsSection());
142   Asm->EmitInt32(COFF::DEBUG_SECTION_MAGIC);
143 
144   // The COFF .debug$S section consists of several subsections, each starting
145   // with a 4-byte control code (e.g. 0xF1, 0xF2, etc) and then a 4-byte length
146   // of the payload followed by the payload itself.  The subsections are 4-byte
147   // aligned.
148 
149   // Emit per-function debug information.  This code is extracted into a
150   // separate function for readability.
151   for (size_t I = 0, E = VisitedFunctions.size(); I != E; ++I)
152     emitDebugInfoForFunction(VisitedFunctions[I]);
153 
154   // This subsection holds a file index to offset in string table table.
155   Asm->OutStreamer->AddComment("File index to string table offset subsection");
156   Asm->EmitInt32(unsigned(ModuleSubstreamKind::FileChecksums));
157   size_t NumFilenames = FileNameRegistry.Infos.size();
158   Asm->EmitInt32(8 * NumFilenames);
159   for (size_t I = 0, E = FileNameRegistry.Filenames.size(); I != E; ++I) {
160     StringRef Filename = FileNameRegistry.Filenames[I];
161     // For each unique filename, just write its offset in the string table.
162     Asm->EmitInt32(FileNameRegistry.Infos[Filename].StartOffset);
163     // The function name offset is not followed by any additional data.
164     Asm->EmitInt32(0);
165   }
166 
167   // This subsection holds the string table.
168   Asm->OutStreamer->AddComment("String table");
169   Asm->EmitInt32(unsigned(ModuleSubstreamKind::StringTable));
170   Asm->EmitInt32(FileNameRegistry.LastOffset);
171   // The payload starts with a null character.
172   Asm->EmitInt8(0);
173 
174   for (size_t I = 0, E = FileNameRegistry.Filenames.size(); I != E; ++I) {
175     // Just emit unique filenames one by one, separated by a null character.
176     Asm->OutStreamer->EmitBytes(FileNameRegistry.Filenames[I]);
177     Asm->EmitInt8(0);
178   }
179 
180   // No more subsections. Fill with zeros to align the end of the section by 4.
181   Asm->OutStreamer->EmitFill((-FileNameRegistry.LastOffset) % 4, 0);
182 
183   clear();
184 }
185 
186 static void EmitLabelDiff(MCStreamer &Streamer,
187                           const MCSymbol *From, const MCSymbol *To,
188                           unsigned int Size = 4) {
189   MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
190   MCContext &Context = Streamer.getContext();
191   const MCExpr *FromRef = MCSymbolRefExpr::create(From, Variant, Context),
192                *ToRef   = MCSymbolRefExpr::create(To, Variant, Context);
193   const MCExpr *AddrDelta =
194       MCBinaryExpr::create(MCBinaryExpr::Sub, ToRef, FromRef, Context);
195   Streamer.EmitValue(AddrDelta, Size);
196 }
197 
198 void CodeViewDebug::emitDebugInfoForFunction(const Function *GV) {
199   // For each function there is a separate subsection
200   // which holds the PC to file:line table.
201   const MCSymbol *Fn = Asm->getSymbol(GV);
202   assert(Fn);
203 
204   const FunctionInfo &FI = FnDebugInfo[GV];
205   if (FI.Instrs.empty())
206     return;
207   assert(FI.End && "Don't know where the function ends?");
208 
209   StringRef FuncName;
210   if (auto *SP = getDISubprogram(GV))
211     FuncName = SP->getDisplayName();
212 
213   // If our DISubprogram name is empty, use the mangled name.
214   if (FuncName.empty())
215     FuncName = GlobalValue::getRealLinkageName(GV->getName());
216 
217   // Emit a symbol subsection, required by VS2012+ to find function boundaries.
218   MCSymbol *SymbolsBegin = Asm->MMI->getContext().createTempSymbol(),
219            *SymbolsEnd = Asm->MMI->getContext().createTempSymbol();
220   Asm->OutStreamer->AddComment("Symbol subsection for " + Twine(FuncName));
221   Asm->EmitInt32(unsigned(ModuleSubstreamKind::Symbols));
222   EmitLabelDiff(*Asm->OutStreamer, SymbolsBegin, SymbolsEnd);
223   Asm->OutStreamer->EmitLabel(SymbolsBegin);
224   {
225     MCSymbol *ProcSegmentBegin = Asm->MMI->getContext().createTempSymbol(),
226              *ProcSegmentEnd = Asm->MMI->getContext().createTempSymbol();
227     EmitLabelDiff(*Asm->OutStreamer, ProcSegmentBegin, ProcSegmentEnd, 2);
228     Asm->OutStreamer->EmitLabel(ProcSegmentBegin);
229 
230     Asm->EmitInt16(unsigned(SymbolRecordKind::S_GPROC32_ID));
231 
232     // Some bytes of this segment don't seem to be required for basic debugging,
233     // so just fill them with zeroes.
234     Asm->OutStreamer->EmitFill(12, 0);
235     // This is the important bit that tells the debugger where the function
236     // code is located and what's its size:
237     EmitLabelDiff(*Asm->OutStreamer, Fn, FI.End);
238     Asm->OutStreamer->EmitFill(12, 0);
239     Asm->OutStreamer->EmitCOFFSecRel32(Fn);
240     Asm->OutStreamer->EmitCOFFSectionIndex(Fn);
241     Asm->EmitInt8(0);
242     // Emit the function display name as a null-terminated string.
243     Asm->OutStreamer->EmitBytes(FuncName);
244     Asm->EmitInt8(0);
245     Asm->OutStreamer->EmitLabel(ProcSegmentEnd);
246 
247     // We're done with this function.
248     Asm->EmitInt16(0x0002);
249     Asm->EmitInt16(unsigned(SymbolRecordKind::S_PROC_ID_END));
250   }
251   Asm->OutStreamer->EmitLabel(SymbolsEnd);
252   // Every subsection must be aligned to a 4-byte boundary.
253   Asm->OutStreamer->EmitFill((-FuncName.size()) % 4, 0);
254 
255   // PCs/Instructions are grouped into segments sharing the same filename.
256   // Pre-calculate the lengths (in instructions) of these segments and store
257   // them in a map for convenience.  Each index in the map is the sequential
258   // number of the respective instruction that starts a new segment.
259   DenseMap<size_t, size_t> FilenameSegmentLengths;
260   size_t LastSegmentEnd = 0;
261   StringRef PrevFilename = InstrInfo[FI.Instrs[0]].Filename;
262   for (size_t J = 1, F = FI.Instrs.size(); J != F; ++J) {
263     if (PrevFilename == InstrInfo[FI.Instrs[J]].Filename)
264       continue;
265     FilenameSegmentLengths[LastSegmentEnd] = J - LastSegmentEnd;
266     LastSegmentEnd = J;
267     PrevFilename = InstrInfo[FI.Instrs[J]].Filename;
268   }
269   FilenameSegmentLengths[LastSegmentEnd] = FI.Instrs.size() - LastSegmentEnd;
270 
271   // Emit a line table subsection, required to do PC-to-file:line lookup.
272   Asm->OutStreamer->AddComment("Line table subsection for " + Twine(FuncName));
273   Asm->EmitInt32(unsigned(ModuleSubstreamKind::Lines));
274   MCSymbol *LineTableBegin = Asm->MMI->getContext().createTempSymbol(),
275            *LineTableEnd = Asm->MMI->getContext().createTempSymbol();
276   EmitLabelDiff(*Asm->OutStreamer, LineTableBegin, LineTableEnd);
277   Asm->OutStreamer->EmitLabel(LineTableBegin);
278 
279   // Identify the function this subsection is for.
280   Asm->OutStreamer->EmitCOFFSecRel32(Fn);
281   Asm->OutStreamer->EmitCOFFSectionIndex(Fn);
282   // Insert flags after a 16-bit section index.
283   Asm->EmitInt16(COFF::DEBUG_LINE_TABLES_HAVE_COLUMN_RECORDS);
284 
285   // Length of the function's code, in bytes.
286   EmitLabelDiff(*Asm->OutStreamer, Fn, FI.End);
287 
288   // PC-to-linenumber lookup table:
289   MCSymbol *FileSegmentEnd = nullptr;
290 
291   // The start of the last segment:
292   size_t LastSegmentStart = 0;
293 
294   auto FinishPreviousChunk = [&] {
295     if (!FileSegmentEnd)
296       return;
297     for (size_t ColSegI = LastSegmentStart,
298                 ColSegEnd = ColSegI + FilenameSegmentLengths[LastSegmentStart];
299          ColSegI != ColSegEnd; ++ColSegI) {
300       unsigned ColumnNumber = InstrInfo[FI.Instrs[ColSegI]].ColumnNumber;
301       assert(ColumnNumber <= COFF::CVL_MaxColumnNumber);
302       Asm->EmitInt16(ColumnNumber); // Start column
303       Asm->EmitInt16(0);            // End column
304     }
305     Asm->OutStreamer->EmitLabel(FileSegmentEnd);
306   };
307 
308   for (size_t J = 0, F = FI.Instrs.size(); J != F; ++J) {
309     MCSymbol *Instr = FI.Instrs[J];
310     assert(InstrInfo.count(Instr));
311 
312     if (FilenameSegmentLengths.count(J)) {
313       // We came to a beginning of a new filename segment.
314       FinishPreviousChunk();
315       StringRef CurFilename = InstrInfo[FI.Instrs[J]].Filename;
316       assert(FileNameRegistry.Infos.count(CurFilename));
317       size_t IndexInStringTable =
318           FileNameRegistry.Infos[CurFilename].FilenameID;
319       // Each segment starts with the offset of the filename
320       // in the string table.
321       Asm->OutStreamer->AddComment(
322           "Segment for file '" + Twine(CurFilename) + "' begins");
323       MCSymbol *FileSegmentBegin = Asm->MMI->getContext().createTempSymbol();
324       Asm->OutStreamer->EmitLabel(FileSegmentBegin);
325       Asm->EmitInt32(8 * IndexInStringTable);
326 
327       // Number of PC records in the lookup table.
328       size_t SegmentLength = FilenameSegmentLengths[J];
329       Asm->EmitInt32(SegmentLength);
330 
331       // Full size of the segment for this filename, including the prev two
332       // records.
333       FileSegmentEnd = Asm->MMI->getContext().createTempSymbol();
334       EmitLabelDiff(*Asm->OutStreamer, FileSegmentBegin, FileSegmentEnd);
335       LastSegmentStart = J;
336     }
337 
338     // The first PC with the given linenumber and the linenumber itself.
339     EmitLabelDiff(*Asm->OutStreamer, Fn, Instr);
340     uint32_t LineNumber = InstrInfo[Instr].LineNumber;
341     assert(LineNumber <= COFF::CVL_MaxLineNumber);
342     uint32_t LineData = LineNumber | COFF::CVL_IsStatement;
343     Asm->EmitInt32(LineData);
344   }
345 
346   FinishPreviousChunk();
347   Asm->OutStreamer->EmitLabel(LineTableEnd);
348 }
349 
350 void CodeViewDebug::beginFunction(const MachineFunction *MF) {
351   assert(!CurFn && "Can't process two functions at once!");
352 
353   if (!Asm || !Asm->MMI->hasDebugInfo())
354     return;
355 
356   const Function *GV = MF->getFunction();
357   assert(FnDebugInfo.count(GV) == false);
358   VisitedFunctions.push_back(GV);
359   CurFn = &FnDebugInfo[GV];
360 
361   // Find the end of the function prolog.
362   // FIXME: is there a simpler a way to do this? Can we just search
363   // for the first instruction of the function, not the last of the prolog?
364   DebugLoc PrologEndLoc;
365   bool EmptyPrologue = true;
366   for (const auto &MBB : *MF) {
367     if (PrologEndLoc)
368       break;
369     for (const auto &MI : MBB) {
370       if (MI.isDebugValue())
371         continue;
372 
373       // First known non-DBG_VALUE and non-frame setup location marks
374       // the beginning of the function body.
375       // FIXME: do we need the first subcondition?
376       if (!MI.getFlag(MachineInstr::FrameSetup) && MI.getDebugLoc()) {
377         PrologEndLoc = MI.getDebugLoc();
378         break;
379       }
380       EmptyPrologue = false;
381     }
382   }
383   // Record beginning of function if we have a non-empty prologue.
384   if (PrologEndLoc && !EmptyPrologue) {
385     DebugLoc FnStartDL = PrologEndLoc.getFnDebugLoc();
386     maybeRecordLocation(FnStartDL, MF);
387   }
388 }
389 
390 void CodeViewDebug::endFunction(const MachineFunction *MF) {
391   if (!Asm || !CurFn)  // We haven't created any debug info for this function.
392     return;
393 
394   const Function *GV = MF->getFunction();
395   assert(FnDebugInfo.count(GV));
396   assert(CurFn == &FnDebugInfo[GV]);
397 
398   if (CurFn->Instrs.empty()) {
399     FnDebugInfo.erase(GV);
400     VisitedFunctions.pop_back();
401   } else {
402     CurFn->End = Asm->getFunctionEnd();
403   }
404   CurFn = nullptr;
405 }
406 
407 void CodeViewDebug::beginInstruction(const MachineInstr *MI) {
408   // Ignore DBG_VALUE locations and function prologue.
409   if (!Asm || MI->isDebugValue() || MI->getFlag(MachineInstr::FrameSetup))
410     return;
411   DebugLoc DL = MI->getDebugLoc();
412   if (DL == PrevInstLoc || !DL)
413     return;
414   maybeRecordLocation(DL, Asm->MF);
415 }
416 }
417