1 //===-- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp --*- C++ -*--===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains support for writing Microsoft CodeView debug info.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CodeViewDebug.h"
15 #include "llvm/DebugInfo/CodeView/CodeView.h"
16 #include "llvm/DebugInfo/CodeView/Line.h"
17 #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
18 #include "llvm/DebugInfo/CodeView/TypeIndex.h"
19 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
20 #include "llvm/MC/MCExpr.h"
21 #include "llvm/MC/MCSymbol.h"
22 #include "llvm/Support/COFF.h"
23 
24 using namespace llvm::codeview;
25 
26 namespace llvm {
27 
28 StringRef CodeViewDebug::getFullFilepath(const DIFile *File) {
29   std::string &Filepath = FileToFilepathMap[File];
30   if (!Filepath.empty())
31     return Filepath;
32 
33   StringRef Dir = File->getDirectory(), Filename = File->getFilename();
34 
35   // Clang emits directory and relative filename info into the IR, but CodeView
36   // operates on full paths.  We could change Clang to emit full paths too, but
37   // that would increase the IR size and probably not needed for other users.
38   // For now, just concatenate and canonicalize the path here.
39   if (Filename.find(':') == 1)
40     Filepath = Filename;
41   else
42     Filepath = (Dir + "\\" + Filename).str();
43 
44   // Canonicalize the path.  We have to do it textually because we may no longer
45   // have access the file in the filesystem.
46   // First, replace all slashes with backslashes.
47   std::replace(Filepath.begin(), Filepath.end(), '/', '\\');
48 
49   // Remove all "\.\" with "\".
50   size_t Cursor = 0;
51   while ((Cursor = Filepath.find("\\.\\", Cursor)) != std::string::npos)
52     Filepath.erase(Cursor, 2);
53 
54   // Replace all "\XXX\..\" with "\".  Don't try too hard though as the original
55   // path should be well-formatted, e.g. start with a drive letter, etc.
56   Cursor = 0;
57   while ((Cursor = Filepath.find("\\..\\", Cursor)) != std::string::npos) {
58     // Something's wrong if the path starts with "\..\", abort.
59     if (Cursor == 0)
60       break;
61 
62     size_t PrevSlash = Filepath.rfind('\\', Cursor - 1);
63     if (PrevSlash == std::string::npos)
64       // Something's wrong, abort.
65       break;
66 
67     Filepath.erase(PrevSlash, Cursor + 3 - PrevSlash);
68     // The next ".." might be following the one we've just erased.
69     Cursor = PrevSlash;
70   }
71 
72   // Remove all duplicate backslashes.
73   Cursor = 0;
74   while ((Cursor = Filepath.find("\\\\", Cursor)) != std::string::npos)
75     Filepath.erase(Cursor, 1);
76 
77   return Filepath;
78 }
79 
80 unsigned CodeViewDebug::maybeRecordFile(const DIFile *F) {
81   unsigned NextId = FileIdMap.size() + 1;
82   auto Insertion = FileIdMap.insert(std::make_pair(F, NextId));
83   if (Insertion.second) {
84     // We have to compute the full filepath and emit a .cv_file directive.
85     StringRef FullPath = getFullFilepath(F);
86     NextId = OS.EmitCVFileDirective(NextId, FullPath);
87     assert(NextId == FileIdMap.size() && ".cv_file directive failed");
88   }
89   return Insertion.first->second;
90 }
91 
92 CodeViewDebug::InlineSite &CodeViewDebug::getInlineSite(const DILocation *Loc) {
93   const DILocation *InlinedAt = Loc->getInlinedAt();
94   auto Insertion = CurFn->InlineSites.insert({InlinedAt, InlineSite()});
95   if (Insertion.second) {
96     InlineSite &Site = Insertion.first->second;
97     Site.SiteFuncId = NextFuncId++;
98     Site.Inlinee = Loc->getScope()->getSubprogram();
99     InlinedSubprograms.insert(Loc->getScope()->getSubprogram());
100   }
101   return Insertion.first->second;
102 }
103 
104 void CodeViewDebug::maybeRecordLocation(DebugLoc DL,
105                                         const MachineFunction *MF) {
106   // Skip this instruction if it has the same location as the previous one.
107   if (DL == CurFn->LastLoc)
108     return;
109 
110   const DIScope *Scope = DL.get()->getScope();
111   if (!Scope)
112     return;
113 
114   // Skip this line if it is longer than the maximum we can record.
115   LineInfo LI(DL.getLine(), DL.getLine(), /*IsStatement=*/true);
116   if (LI.getStartLine() != DL.getLine() || LI.isAlwaysStepInto() ||
117       LI.isNeverStepInto())
118     return;
119 
120   ColumnInfo CI(DL.getCol(), /*EndColumn=*/0);
121   if (CI.getStartColumn() != DL.getCol())
122     return;
123 
124   if (!CurFn->HaveLineInfo)
125     CurFn->HaveLineInfo = true;
126   unsigned FileId = 0;
127   if (CurFn->LastLoc.get() && CurFn->LastLoc->getFile() == DL->getFile())
128     FileId = CurFn->LastFileId;
129   else
130     FileId = CurFn->LastFileId = maybeRecordFile(DL->getFile());
131   CurFn->LastLoc = DL;
132 
133   unsigned FuncId = CurFn->FuncId;
134   if (const DILocation *Loc = DL->getInlinedAt()) {
135     // If this location was actually inlined from somewhere else, give it the ID
136     // of the inline call site.
137     FuncId = getInlineSite(DL.get()).SiteFuncId;
138     // Ensure we have links in the tree of inline call sites.
139     const DILocation *ChildLoc = nullptr;
140     while (Loc->getInlinedAt()) {
141       InlineSite &Site = getInlineSite(Loc);
142       if (ChildLoc) {
143         // Record the child inline site if not already present.
144         auto B = Site.ChildSites.begin(), E = Site.ChildSites.end();
145         if (std::find(B, E, Loc) != E)
146           break;
147         Site.ChildSites.push_back(Loc);
148       }
149       ChildLoc = Loc;
150     }
151   }
152 
153   OS.EmitCVLocDirective(FuncId, FileId, DL.getLine(), DL.getCol(),
154                         /*PrologueEnd=*/false,
155                         /*IsStmt=*/false, DL->getFilename());
156 }
157 
158 CodeViewDebug::CodeViewDebug(AsmPrinter *AP)
159     : Asm(AP), OS(*Asm->OutStreamer), CurFn(nullptr) {
160   MachineModuleInfo *MMI = AP->MMI;
161 
162   // If module doesn't have named metadata anchors or COFF debug section
163   // is not available, skip any debug info related stuff.
164   if (!MMI->getModule()->getNamedMetadata("llvm.dbg.cu") ||
165       !AP->getObjFileLowering().getCOFFDebugSymbolsSection()) {
166     Asm = nullptr;
167     return;
168   }
169 
170   // Tell MMI that we have debug info.
171   MMI->setDebugInfoAvailability(true);
172 }
173 
174 void CodeViewDebug::endModule() {
175   if (FnDebugInfo.empty())
176     return;
177 
178   emitTypeInformation();
179 
180   // FIXME: For functions that are comdat, we should emit separate .debug$S
181   // sections that are comdat associative with the main function instead of
182   // having one big .debug$S section.
183   assert(Asm != nullptr);
184   OS.SwitchSection(Asm->getObjFileLowering().getCOFFDebugSymbolsSection());
185   OS.AddComment("Debug section magic");
186   OS.EmitIntValue(COFF::DEBUG_SECTION_MAGIC, 4);
187 
188   // The COFF .debug$S section consists of several subsections, each starting
189   // with a 4-byte control code (e.g. 0xF1, 0xF2, etc) and then a 4-byte length
190   // of the payload followed by the payload itself.  The subsections are 4-byte
191   // aligned.
192 
193   // Make a subsection for all the inlined subprograms.
194   emitInlineeLinesSubsection();
195 
196   // Emit per-function debug information.
197   for (auto &P : FnDebugInfo)
198     emitDebugInfoForFunction(P.first, P.second);
199 
200   // This subsection holds a file index to offset in string table table.
201   OS.AddComment("File index to string table offset subsection");
202   OS.EmitCVFileChecksumsDirective();
203 
204   // This subsection holds the string table.
205   OS.AddComment("String table");
206   OS.EmitCVStringTableDirective();
207 
208   clear();
209 }
210 
211 void CodeViewDebug::emitTypeInformation() {
212   // Start the .debug$T section with 0x4.
213   OS.SwitchSection(Asm->getObjFileLowering().getCOFFDebugTypesSection());
214   OS.AddComment("Debug section magic");
215   OS.EmitIntValue(COFF::DEBUG_SECTION_MAGIC, 4);
216 
217   NamedMDNode *CU_Nodes =
218       Asm->MMI->getModule()->getNamedMetadata("llvm.dbg.cu");
219   if (!CU_Nodes)
220     return;
221 
222   // This type info currently only holds function ids for use with inline call
223   // frame info. All functions are assigned a simple 'void ()' type. Emit that
224   // type here.
225   TypeIndex ArgListIdx = getNextTypeIndex();
226   OS.AddComment("Type record length");
227   OS.EmitIntValue(2 + sizeof(ArgList), 2);
228   OS.AddComment("Leaf type: LF_ARGLIST");
229   OS.EmitIntValue(LF_ARGLIST, 2);
230   OS.AddComment("Number of arguments");
231   OS.EmitIntValue(0, 4);
232 
233   TypeIndex VoidProcIdx = getNextTypeIndex();
234   OS.AddComment("Type record length");
235   OS.EmitIntValue(2 + sizeof(ProcedureType), 2);
236   OS.AddComment("Leaf type: LF_PROCEDURE");
237   OS.EmitIntValue(LF_PROCEDURE, 2);
238   OS.AddComment("Return type index");
239   OS.EmitIntValue(TypeIndex::Void().getIndex(), 4);
240   OS.AddComment("Calling convention");
241   OS.EmitIntValue(char(CallingConvention::NearC), 1);
242   OS.AddComment("Function options");
243   OS.EmitIntValue(char(FunctionOptions::None), 1);
244   OS.AddComment("# of parameters");
245   OS.EmitIntValue(0, 2);
246   OS.AddComment("Argument list type index");
247   OS.EmitIntValue(ArgListIdx.getIndex(), 4);
248 
249   for (MDNode *N : CU_Nodes->operands()) {
250     auto *CUNode = cast<DICompileUnit>(N);
251     for (auto *SP : CUNode->getSubprograms()) {
252       StringRef DisplayName = SP->getDisplayName();
253       OS.AddComment("Type record length");
254       OS.EmitIntValue(2 + sizeof(FuncId) + DisplayName.size() + 1, 2);
255       OS.AddComment("Leaf type: LF_FUNC_ID");
256       OS.EmitIntValue(LF_FUNC_ID, 2);
257 
258       OS.AddComment("Scope type index");
259       OS.EmitIntValue(TypeIndex().getIndex(), 4);
260       OS.AddComment("Function type");
261       OS.EmitIntValue(VoidProcIdx.getIndex(), 4);
262       {
263         SmallString<32> NullTerminatedString(DisplayName);
264         if (NullTerminatedString.empty() || NullTerminatedString.back() != '\0')
265           NullTerminatedString.push_back('\0');
266         OS.AddComment("Function name");
267         OS.EmitBytes(NullTerminatedString);
268       }
269 
270       TypeIndex FuncIdIdx = getNextTypeIndex();
271       SubprogramToFuncId.insert(std::make_pair(SP, FuncIdIdx));
272     }
273   }
274 }
275 
276 void CodeViewDebug::emitInlineeLinesSubsection() {
277   if (InlinedSubprograms.empty())
278     return;
279 
280   MCSymbol *InlineBegin = Asm->MMI->getContext().createTempSymbol(),
281            *InlineEnd = Asm->MMI->getContext().createTempSymbol();
282 
283   OS.AddComment("Inlinee lines subsection");
284   OS.EmitIntValue(unsigned(ModuleSubstreamKind::InlineeLines), 4);
285   OS.AddComment("Subsection size");
286   OS.emitAbsoluteSymbolDiff(InlineEnd, InlineBegin, 4);
287   OS.EmitLabel(InlineBegin);
288 
289   // We don't provide any extra file info.
290   // FIXME: Find out if debuggers use this info.
291   OS.AddComment("Inlinee lines signature");
292   OS.EmitIntValue(unsigned(InlineeLinesSignature::Normal), 4);
293 
294   for (const DISubprogram *SP : InlinedSubprograms) {
295     OS.AddBlankLine();
296     TypeIndex TypeId = SubprogramToFuncId[SP];
297     unsigned FileId = maybeRecordFile(SP->getFile());
298     OS.AddComment("Inlined function " + SP->getDisplayName() + " starts at " +
299                   SP->getFilename() + Twine(':') + Twine(SP->getLine()));
300     OS.AddBlankLine();
301     // The filechecksum table uses 8 byte entries for now, and file ids start at
302     // 1.
303     unsigned FileOffset = (FileId - 1) * 8;
304     OS.AddComment("Type index of inlined function");
305     OS.EmitIntValue(TypeId.getIndex(), 4);
306     OS.AddComment("Offset into filechecksum table");
307     OS.EmitIntValue(FileOffset, 4);
308     OS.AddComment("Starting line number");
309     OS.EmitIntValue(SP->getLine(), 4);
310   }
311 
312   OS.EmitLabel(InlineEnd);
313 }
314 
315 static void EmitLabelDiff(MCStreamer &Streamer,
316                           const MCSymbol *From, const MCSymbol *To,
317                           unsigned int Size = 4) {
318   MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
319   MCContext &Context = Streamer.getContext();
320   const MCExpr *FromRef = MCSymbolRefExpr::create(From, Variant, Context),
321                *ToRef   = MCSymbolRefExpr::create(To, Variant, Context);
322   const MCExpr *AddrDelta =
323       MCBinaryExpr::create(MCBinaryExpr::Sub, ToRef, FromRef, Context);
324   Streamer.EmitValue(AddrDelta, Size);
325 }
326 
327 void CodeViewDebug::collectInlineSiteChildren(
328     SmallVectorImpl<unsigned> &Children, const FunctionInfo &FI,
329     const InlineSite &Site) {
330   for (const DILocation *ChildSiteLoc : Site.ChildSites) {
331     auto I = FI.InlineSites.find(ChildSiteLoc);
332     assert(I != FI.InlineSites.end());
333     const InlineSite &ChildSite = I->second;
334     Children.push_back(ChildSite.SiteFuncId);
335     collectInlineSiteChildren(Children, FI, ChildSite);
336   }
337 }
338 
339 void CodeViewDebug::emitInlinedCallSite(const FunctionInfo &FI,
340                                         const DILocation *InlinedAt,
341                                         const InlineSite &Site) {
342   MCStreamer &OS = *Asm->OutStreamer;
343 
344   MCSymbol *InlineBegin = Asm->MMI->getContext().createTempSymbol(),
345            *InlineEnd = Asm->MMI->getContext().createTempSymbol();
346 
347   assert(SubprogramToFuncId.count(Site.Inlinee));
348   TypeIndex InlineeIdx = SubprogramToFuncId[Site.Inlinee];
349 
350   // SymbolRecord
351   OS.AddComment("Record length");
352   EmitLabelDiff(OS, InlineBegin, InlineEnd, 2);   // RecordLength
353   OS.EmitLabel(InlineBegin);
354   OS.AddComment("Record kind: S_INLINESITE");
355   OS.EmitIntValue(SymbolRecordKind::S_INLINESITE, 2); // RecordKind
356 
357   OS.AddComment("PtrParent");
358   OS.EmitIntValue(0, 4);
359   OS.AddComment("PtrEnd");
360   OS.EmitIntValue(0, 4);
361   OS.AddComment("Inlinee type index");
362   OS.EmitIntValue(InlineeIdx.getIndex(), 4);
363 
364   unsigned FileId = maybeRecordFile(Site.Inlinee->getFile());
365   unsigned StartLineNum = Site.Inlinee->getLine();
366   SmallVector<unsigned, 3> SecondaryFuncIds;
367   collectInlineSiteChildren(SecondaryFuncIds, FI, Site);
368 
369   OS.EmitCVInlineLinetableDirective(Site.SiteFuncId, FileId, StartLineNum,
370                                     FI.Begin, FI.End, SecondaryFuncIds);
371 
372   OS.EmitLabel(InlineEnd);
373 
374   // Recurse on child inlined call sites before closing the scope.
375   for (const DILocation *ChildSite : Site.ChildSites) {
376     auto I = FI.InlineSites.find(ChildSite);
377     assert(I != FI.InlineSites.end() &&
378            "child site not in function inline site map");
379     emitInlinedCallSite(FI, ChildSite, I->second);
380   }
381 
382   // Close the scope.
383   OS.AddComment("Record length");
384   OS.EmitIntValue(2, 2);                                  // RecordLength
385   OS.AddComment("Record kind: S_INLINESITE_END");
386   OS.EmitIntValue(SymbolRecordKind::S_INLINESITE_END, 2); // RecordKind
387 }
388 
389 void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
390                                              FunctionInfo &FI) {
391   // For each function there is a separate subsection
392   // which holds the PC to file:line table.
393   const MCSymbol *Fn = Asm->getSymbol(GV);
394   assert(Fn);
395 
396   StringRef FuncName;
397   if (auto *SP = getDISubprogram(GV))
398     FuncName = SP->getDisplayName();
399 
400   // If our DISubprogram name is empty, use the mangled name.
401   if (FuncName.empty())
402     FuncName = GlobalValue::getRealLinkageName(GV->getName());
403 
404   // Emit a symbol subsection, required by VS2012+ to find function boundaries.
405   MCSymbol *SymbolsBegin = Asm->MMI->getContext().createTempSymbol(),
406            *SymbolsEnd = Asm->MMI->getContext().createTempSymbol();
407   OS.AddComment("Symbol subsection for " + Twine(FuncName));
408   OS.EmitIntValue(unsigned(ModuleSubstreamKind::Symbols), 4);
409   OS.AddComment("Subsection size");
410   EmitLabelDiff(*Asm->OutStreamer, SymbolsBegin, SymbolsEnd);
411   OS.EmitLabel(SymbolsBegin);
412   {
413     MCSymbol *ProcRecordBegin = Asm->MMI->getContext().createTempSymbol(),
414              *ProcRecordEnd = Asm->MMI->getContext().createTempSymbol();
415     OS.AddComment("Record length");
416     EmitLabelDiff(*Asm->OutStreamer, ProcRecordBegin, ProcRecordEnd, 2);
417     OS.EmitLabel(ProcRecordBegin);
418 
419     OS.AddComment("Record kind: S_GPROC32_ID");
420     OS.EmitIntValue(unsigned(SymbolRecordKind::S_GPROC32_ID), 2);
421 
422     // These fields are filled in by tools like CVPACK which run after the fact.
423     OS.AddComment("PtrParent");
424     OS.EmitIntValue(0, 4);
425     OS.AddComment("PtrEnd");
426     OS.EmitIntValue(0, 4);
427     OS.AddComment("PtrNext");
428     OS.EmitIntValue(0, 4);
429     // This is the important bit that tells the debugger where the function
430     // code is located and what's its size:
431     OS.AddComment("Code size");
432     EmitLabelDiff(*Asm->OutStreamer, Fn, FI.End);
433     OS.AddComment("Offset after prologue");
434     OS.EmitIntValue(0, 4);
435     OS.AddComment("Offset before epilogue");
436     OS.EmitIntValue(0, 4);
437     OS.AddComment("Function type index");
438     OS.EmitIntValue(0, 4);
439     OS.AddComment("Function section relative address");
440     OS.EmitCOFFSecRel32(Fn);
441     OS.AddComment("Function section index");
442     OS.EmitCOFFSectionIndex(Fn);
443     OS.AddComment("Flags");
444     OS.EmitIntValue(0, 1);
445     // Emit the function display name as a null-terminated string.
446     OS.AddComment("Function name");
447     {
448       SmallString<32> NullTerminatedString(FuncName);
449       if (NullTerminatedString.empty() || NullTerminatedString.back() != '\0')
450         NullTerminatedString.push_back('\0');
451       OS.EmitBytes(NullTerminatedString);
452     }
453     OS.EmitLabel(ProcRecordEnd);
454 
455     // Emit inlined call site information. Only emit functions inlined directly
456     // into the parent function. We'll emit the other sites recursively as part
457     // of their parent inline site.
458     for (auto &KV : FI.InlineSites) {
459       const DILocation *InlinedAt = KV.first;
460       if (!InlinedAt->getInlinedAt())
461         emitInlinedCallSite(FI, InlinedAt, KV.second);
462     }
463 
464     // We're done with this function.
465     OS.AddComment("Record length");
466     OS.EmitIntValue(0x0002, 2);
467     OS.AddComment("Record kind: S_PROC_ID_END");
468     OS.EmitIntValue(unsigned(SymbolRecordKind::S_PROC_ID_END), 2);
469   }
470   OS.EmitLabel(SymbolsEnd);
471   // Every subsection must be aligned to a 4-byte boundary.
472   OS.EmitValueToAlignment(4);
473 
474   // We have an assembler directive that takes care of the whole line table.
475   OS.EmitCVLinetableDirective(FI.FuncId, Fn, FI.End);
476 }
477 
478 void CodeViewDebug::beginFunction(const MachineFunction *MF) {
479   assert(!CurFn && "Can't process two functions at once!");
480 
481   if (!Asm || !Asm->MMI->hasDebugInfo())
482     return;
483 
484   const Function *GV = MF->getFunction();
485   assert(FnDebugInfo.count(GV) == false);
486   CurFn = &FnDebugInfo[GV];
487   CurFn->FuncId = NextFuncId++;
488   CurFn->Begin = Asm->getFunctionBegin();
489 
490   // Find the end of the function prolog.
491   // FIXME: is there a simpler a way to do this? Can we just search
492   // for the first instruction of the function, not the last of the prolog?
493   DebugLoc PrologEndLoc;
494   bool EmptyPrologue = true;
495   for (const auto &MBB : *MF) {
496     if (PrologEndLoc)
497       break;
498     for (const auto &MI : MBB) {
499       if (MI.isDebugValue())
500         continue;
501 
502       // First known non-DBG_VALUE and non-frame setup location marks
503       // the beginning of the function body.
504       // FIXME: do we need the first subcondition?
505       if (!MI.getFlag(MachineInstr::FrameSetup) && MI.getDebugLoc()) {
506         PrologEndLoc = MI.getDebugLoc();
507         break;
508       }
509       EmptyPrologue = false;
510     }
511   }
512   // Record beginning of function if we have a non-empty prologue.
513   if (PrologEndLoc && !EmptyPrologue) {
514     DebugLoc FnStartDL = PrologEndLoc.getFnDebugLoc();
515     maybeRecordLocation(FnStartDL, MF);
516   }
517 }
518 
519 void CodeViewDebug::endFunction(const MachineFunction *MF) {
520   if (!Asm || !CurFn)  // We haven't created any debug info for this function.
521     return;
522 
523   const Function *GV = MF->getFunction();
524   assert(FnDebugInfo.count(GV));
525   assert(CurFn == &FnDebugInfo[GV]);
526 
527   // Don't emit anything if we don't have any line tables.
528   if (!CurFn->HaveLineInfo) {
529     FnDebugInfo.erase(GV);
530   } else {
531     CurFn->End = Asm->getFunctionEnd();
532   }
533   CurFn = nullptr;
534 }
535 
536 void CodeViewDebug::beginInstruction(const MachineInstr *MI) {
537   // Ignore DBG_VALUE locations and function prologue.
538   if (!Asm || MI->isDebugValue() || MI->getFlag(MachineInstr::FrameSetup))
539     return;
540   DebugLoc DL = MI->getDebugLoc();
541   if (DL == PrevInstLoc || !DL)
542     return;
543   maybeRecordLocation(DL, Asm->MF);
544 }
545 }
546