1 //===-- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp --*- C++ -*--===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains support for writing Microsoft CodeView debug info.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CodeViewDebug.h"
15 #include "llvm/DebugInfo/CodeView/CodeView.h"
16 #include "llvm/DebugInfo/CodeView/Line.h"
17 #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
18 #include "llvm/DebugInfo/CodeView/TypeIndex.h"
19 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
20 #include "llvm/MC/MCExpr.h"
21 #include "llvm/MC/MCSymbol.h"
22 #include "llvm/Support/COFF.h"
23 
24 using namespace llvm::codeview;
25 
26 namespace llvm {
27 
28 StringRef CodeViewDebug::getFullFilepath(const DIFile *File) {
29   std::string &Filepath = FileToFilepathMap[File];
30   if (!Filepath.empty())
31     return Filepath;
32 
33   StringRef Dir = File->getDirectory(), Filename = File->getFilename();
34 
35   // Clang emits directory and relative filename info into the IR, but CodeView
36   // operates on full paths.  We could change Clang to emit full paths too, but
37   // that would increase the IR size and probably not needed for other users.
38   // For now, just concatenate and canonicalize the path here.
39   if (Filename.find(':') == 1)
40     Filepath = Filename;
41   else
42     Filepath = (Dir + "\\" + Filename).str();
43 
44   // Canonicalize the path.  We have to do it textually because we may no longer
45   // have access the file in the filesystem.
46   // First, replace all slashes with backslashes.
47   std::replace(Filepath.begin(), Filepath.end(), '/', '\\');
48 
49   // Remove all "\.\" with "\".
50   size_t Cursor = 0;
51   while ((Cursor = Filepath.find("\\.\\", Cursor)) != std::string::npos)
52     Filepath.erase(Cursor, 2);
53 
54   // Replace all "\XXX\..\" with "\".  Don't try too hard though as the original
55   // path should be well-formatted, e.g. start with a drive letter, etc.
56   Cursor = 0;
57   while ((Cursor = Filepath.find("\\..\\", Cursor)) != std::string::npos) {
58     // Something's wrong if the path starts with "\..\", abort.
59     if (Cursor == 0)
60       break;
61 
62     size_t PrevSlash = Filepath.rfind('\\', Cursor - 1);
63     if (PrevSlash == std::string::npos)
64       // Something's wrong, abort.
65       break;
66 
67     Filepath.erase(PrevSlash, Cursor + 3 - PrevSlash);
68     // The next ".." might be following the one we've just erased.
69     Cursor = PrevSlash;
70   }
71 
72   // Remove all duplicate backslashes.
73   Cursor = 0;
74   while ((Cursor = Filepath.find("\\\\", Cursor)) != std::string::npos)
75     Filepath.erase(Cursor, 1);
76 
77   return Filepath;
78 }
79 
80 unsigned CodeViewDebug::maybeRecordFile(const DIFile *F) {
81   unsigned NextId = FileIdMap.size() + 1;
82   auto Insertion = FileIdMap.insert(std::make_pair(F, NextId));
83   if (Insertion.second) {
84     // We have to compute the full filepath and emit a .cv_file directive.
85     StringRef FullPath = getFullFilepath(F);
86     NextId = Asm->OutStreamer->EmitCVFileDirective(NextId, FullPath);
87     assert(NextId == FileIdMap.size() && ".cv_file directive failed");
88   }
89   return Insertion.first->second;
90 }
91 
92 CodeViewDebug::InlineSite &CodeViewDebug::getInlineSite(const DILocation *Loc) {
93   const DILocation *InlinedAt = Loc->getInlinedAt();
94   auto Insertion = CurFn->InlineSites.insert({InlinedAt, InlineSite()});
95   if (Insertion.second) {
96     InlineSite &Site = Insertion.first->second;
97     Site.SiteFuncId = NextFuncId++;
98     Site.Inlinee = Loc->getScope()->getSubprogram();
99   }
100   return Insertion.first->second;
101 }
102 
103 void CodeViewDebug::maybeRecordLocation(DebugLoc DL,
104                                         const MachineFunction *MF) {
105   // Skip this instruction if it has the same location as the previous one.
106   if (DL == CurFn->LastLoc)
107     return;
108 
109   const DIScope *Scope = DL.get()->getScope();
110   if (!Scope)
111     return;
112 
113   // Skip this line if it is longer than the maximum we can record.
114   LineInfo LI(DL.getLine(), DL.getLine(), /*IsStatement=*/true);
115   if (LI.getStartLine() != DL.getLine() || LI.isAlwaysStepInto() ||
116       LI.isNeverStepInto())
117     return;
118 
119   ColumnInfo CI(DL.getCol(), /*EndColumn=*/0);
120   if (CI.getStartColumn() != DL.getCol())
121     return;
122 
123   if (!CurFn->HaveLineInfo)
124     CurFn->HaveLineInfo = true;
125   unsigned FileId = 0;
126   if (CurFn->LastLoc.get() && CurFn->LastLoc->getFile() == DL->getFile())
127     FileId = CurFn->LastFileId;
128   else
129     FileId = CurFn->LastFileId = maybeRecordFile(DL->getFile());
130   CurFn->LastLoc = DL;
131 
132   unsigned FuncId = CurFn->FuncId;
133   if (const DILocation *Loc = DL->getInlinedAt()) {
134     // If this location was actually inlined from somewhere else, give it the ID
135     // of the inline call site.
136     FuncId = getInlineSite(DL.get()).SiteFuncId;
137     // Ensure we have links in the tree of inline call sites.
138     const DILocation *ChildLoc = nullptr;
139     while (Loc->getInlinedAt()) {
140       InlineSite &Site = getInlineSite(Loc);
141       if (ChildLoc) {
142         // Record the child inline site if not already present.
143         auto B = Site.ChildSites.begin(), E = Site.ChildSites.end();
144         if (std::find(B, E, Loc) != E)
145           break;
146         Site.ChildSites.push_back(Loc);
147       }
148       ChildLoc = Loc;
149     }
150   }
151 
152   Asm->OutStreamer->EmitCVLocDirective(FuncId, FileId, DL.getLine(),
153                                        DL.getCol(), /*PrologueEnd=*/false,
154                                        /*IsStmt=*/false, DL->getFilename());
155 }
156 
157 CodeViewDebug::CodeViewDebug(AsmPrinter *AP)
158     : Asm(nullptr), CurFn(nullptr) {
159   MachineModuleInfo *MMI = AP->MMI;
160 
161   // If module doesn't have named metadata anchors or COFF debug section
162   // is not available, skip any debug info related stuff.
163   if (!MMI->getModule()->getNamedMetadata("llvm.dbg.cu") ||
164       !AP->getObjFileLowering().getCOFFDebugSymbolsSection())
165     return;
166 
167   // Tell MMI that we have debug info.
168   MMI->setDebugInfoAvailability(true);
169   Asm = AP;
170 }
171 
172 void CodeViewDebug::endModule() {
173   if (FnDebugInfo.empty())
174     return;
175 
176   emitTypeInformation();
177 
178   // FIXME: For functions that are comdat, we should emit separate .debug$S
179   // sections that are comdat associative with the main function instead of
180   // having one big .debug$S section.
181   assert(Asm != nullptr);
182   Asm->OutStreamer->SwitchSection(
183       Asm->getObjFileLowering().getCOFFDebugSymbolsSection());
184   Asm->EmitInt32(COFF::DEBUG_SECTION_MAGIC);
185 
186   // The COFF .debug$S section consists of several subsections, each starting
187   // with a 4-byte control code (e.g. 0xF1, 0xF2, etc) and then a 4-byte length
188   // of the payload followed by the payload itself.  The subsections are 4-byte
189   // aligned.
190 
191   // Emit per-function debug information.
192   for (auto &P : FnDebugInfo)
193     emitDebugInfoForFunction(P.first, P.second);
194 
195   // This subsection holds a file index to offset in string table table.
196   Asm->OutStreamer->AddComment("File index to string table offset subsection");
197   Asm->OutStreamer->EmitCVFileChecksumsDirective();
198 
199   // This subsection holds the string table.
200   Asm->OutStreamer->AddComment("String table");
201   Asm->OutStreamer->EmitCVStringTableDirective();
202 
203   clear();
204 }
205 
206 template <typename T> static void emitRecord(MCStreamer &OS, const T &Rec) {
207   OS.EmitBytes(StringRef(reinterpret_cast<const char *>(&Rec), sizeof(Rec)));
208 }
209 
210 void CodeViewDebug::emitTypeInformation() {
211   // Start the .debug$T section with 0x4.
212   Asm->OutStreamer->SwitchSection(
213       Asm->getObjFileLowering().getCOFFDebugTypesSection());
214   Asm->EmitInt32(COFF::DEBUG_SECTION_MAGIC);
215 
216   NamedMDNode *CU_Nodes =
217       Asm->MMI->getModule()->getNamedMetadata("llvm.dbg.cu");
218   if (!CU_Nodes)
219     return;
220 
221   // This type info currently only holds function ids for use with inline call
222   // frame info. All functions are assigned a simple 'void ()' type. Emit that
223   // type here.
224   TypeIndex ArgListIdx = getNextTypeIndex();
225   Asm->EmitInt16(2 + sizeof(ArgList));
226   Asm->EmitInt16(LF_ARGLIST);
227   Asm->EmitInt32(0);
228 
229   TypeIndex VoidProcIdx = getNextTypeIndex();
230   Asm->EmitInt16(2 + sizeof(ProcedureType));
231   Asm->EmitInt16(LF_PROCEDURE);
232   ProcedureType Proc{}; // Zero initialize.
233   Proc.ReturnType = TypeIndex::Void();
234   Proc.CallConv = CallingConvention::NearC;
235   Proc.Options = FunctionOptions::None;
236   Proc.NumParameters = 0;
237   Proc.ArgListType = ArgListIdx;
238   emitRecord(*Asm->OutStreamer, Proc);
239 
240   for (MDNode *N : CU_Nodes->operands()) {
241     auto *CUNode = cast<DICompileUnit>(N);
242     for (auto *SP : CUNode->getSubprograms()) {
243       StringRef DisplayName = SP->getDisplayName();
244       Asm->EmitInt16(2 + sizeof(FuncId) + DisplayName.size() + 1);
245       Asm->EmitInt16(LF_FUNC_ID);
246 
247       FuncId Func{}; // Zero initialize.
248       Func.ParentScope = TypeIndex();
249       Func.FunctionType = VoidProcIdx;
250       emitRecord(*Asm->OutStreamer, Func);
251       Asm->OutStreamer->EmitBytes(DisplayName);
252       Asm->EmitInt8(0);
253 
254       TypeIndex FuncIdIdx = getNextTypeIndex();
255       SubprogramToFuncId.insert(std::make_pair(SP, FuncIdIdx));
256     }
257   }
258 }
259 
260 static void EmitLabelDiff(MCStreamer &Streamer,
261                           const MCSymbol *From, const MCSymbol *To,
262                           unsigned int Size = 4) {
263   MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
264   MCContext &Context = Streamer.getContext();
265   const MCExpr *FromRef = MCSymbolRefExpr::create(From, Variant, Context),
266                *ToRef   = MCSymbolRefExpr::create(To, Variant, Context);
267   const MCExpr *AddrDelta =
268       MCBinaryExpr::create(MCBinaryExpr::Sub, ToRef, FromRef, Context);
269   Streamer.EmitValue(AddrDelta, Size);
270 }
271 
272 void CodeViewDebug::emitInlinedCallSite(const FunctionInfo &FI,
273                                         const DILocation *InlinedAt,
274                                         const InlineSite &Site) {
275   MCStreamer &OS = *Asm->OutStreamer;
276 
277   MCSymbol *InlineBegin = Asm->MMI->getContext().createTempSymbol(),
278            *InlineEnd = Asm->MMI->getContext().createTempSymbol();
279 
280   assert(SubprogramToFuncId.count(Site.Inlinee));
281   TypeIndex InlineeIdx = SubprogramToFuncId[Site.Inlinee];
282 
283   // SymbolRecord
284   EmitLabelDiff(OS, InlineBegin, InlineEnd, 2);   // RecordLength
285   OS.EmitLabel(InlineBegin);
286   Asm->EmitInt16(SymbolRecordKind::S_INLINESITE); // RecordKind
287 
288   InlineSiteSym SiteBytes{};
289   SiteBytes.Inlinee = InlineeIdx;
290   Asm->OutStreamer->EmitBytes(
291       StringRef(reinterpret_cast<const char *>(&SiteBytes), sizeof(SiteBytes)));
292 
293   // FIXME: annotations
294 
295   OS.EmitLabel(InlineEnd);
296 
297   // Recurse on child inlined call sites before closing the scope.
298   for (const DILocation *ChildSite : Site.ChildSites) {
299     auto I = FI.InlineSites.find(ChildSite);
300     assert(I != FI.InlineSites.end() &&
301            "child site not in function inline site map");
302     emitInlinedCallSite(FI, ChildSite, I->second);
303   }
304 
305   // Close the scope.
306   Asm->EmitInt16(2);                                  // RecordLength
307   Asm->EmitInt16(SymbolRecordKind::S_INLINESITE_END); // RecordKind
308 }
309 
310 void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
311                                              FunctionInfo &FI) {
312   // For each function there is a separate subsection
313   // which holds the PC to file:line table.
314   const MCSymbol *Fn = Asm->getSymbol(GV);
315   assert(Fn);
316 
317   StringRef FuncName;
318   if (auto *SP = getDISubprogram(GV))
319     FuncName = SP->getDisplayName();
320 
321   // If our DISubprogram name is empty, use the mangled name.
322   if (FuncName.empty())
323     FuncName = GlobalValue::getRealLinkageName(GV->getName());
324 
325   // Emit a symbol subsection, required by VS2012+ to find function boundaries.
326   MCSymbol *SymbolsBegin = Asm->MMI->getContext().createTempSymbol(),
327            *SymbolsEnd = Asm->MMI->getContext().createTempSymbol();
328   Asm->OutStreamer->AddComment("Symbol subsection for " + Twine(FuncName));
329   Asm->EmitInt32(unsigned(ModuleSubstreamKind::Symbols));
330   EmitLabelDiff(*Asm->OutStreamer, SymbolsBegin, SymbolsEnd);
331   Asm->OutStreamer->EmitLabel(SymbolsBegin);
332   {
333     MCSymbol *ProcSegmentBegin = Asm->MMI->getContext().createTempSymbol(),
334              *ProcSegmentEnd = Asm->MMI->getContext().createTempSymbol();
335     EmitLabelDiff(*Asm->OutStreamer, ProcSegmentBegin, ProcSegmentEnd, 2);
336     Asm->OutStreamer->EmitLabel(ProcSegmentBegin);
337 
338     Asm->EmitInt16(unsigned(SymbolRecordKind::S_GPROC32_ID));
339 
340     // Some bytes of this segment don't seem to be required for basic debugging,
341     // so just fill them with zeroes.
342     Asm->OutStreamer->EmitFill(12, 0);
343     // This is the important bit that tells the debugger where the function
344     // code is located and what's its size:
345     EmitLabelDiff(*Asm->OutStreamer, Fn, FI.End);
346     Asm->OutStreamer->EmitFill(12, 0);
347     Asm->OutStreamer->EmitCOFFSecRel32(Fn);
348     Asm->OutStreamer->EmitCOFFSectionIndex(Fn);
349     Asm->EmitInt8(0);
350     // Emit the function display name as a null-terminated string.
351     Asm->OutStreamer->EmitBytes(FuncName);
352     Asm->EmitInt8(0);
353     Asm->OutStreamer->EmitLabel(ProcSegmentEnd);
354 
355     // Emit inlined call site information. Only emit functions inlined directly
356     // into the parent function. We'll emit the other sites recursively as part
357     // of their parent inline site.
358     for (auto &KV : FI.InlineSites) {
359       const DILocation *InlinedAt = KV.first;
360       if (!InlinedAt->getInlinedAt())
361         emitInlinedCallSite(FI, InlinedAt, KV.second);
362     }
363 
364     // We're done with this function.
365     Asm->EmitInt16(0x0002);
366     Asm->EmitInt16(unsigned(SymbolRecordKind::S_PROC_ID_END));
367   }
368   Asm->OutStreamer->EmitLabel(SymbolsEnd);
369   // Every subsection must be aligned to a 4-byte boundary.
370   Asm->OutStreamer->EmitFill((-FuncName.size()) % 4, 0);
371 
372   // We have an assembler directive that takes care of the whole line table.
373   Asm->OutStreamer->EmitCVLinetableDirective(FI.FuncId, Fn, FI.End);
374 }
375 
376 void CodeViewDebug::beginFunction(const MachineFunction *MF) {
377   assert(!CurFn && "Can't process two functions at once!");
378 
379   if (!Asm || !Asm->MMI->hasDebugInfo())
380     return;
381 
382   const Function *GV = MF->getFunction();
383   assert(FnDebugInfo.count(GV) == false);
384   CurFn = &FnDebugInfo[GV];
385   CurFn->FuncId = NextFuncId++;
386 
387   // Find the end of the function prolog.
388   // FIXME: is there a simpler a way to do this? Can we just search
389   // for the first instruction of the function, not the last of the prolog?
390   DebugLoc PrologEndLoc;
391   bool EmptyPrologue = true;
392   for (const auto &MBB : *MF) {
393     if (PrologEndLoc)
394       break;
395     for (const auto &MI : MBB) {
396       if (MI.isDebugValue())
397         continue;
398 
399       // First known non-DBG_VALUE and non-frame setup location marks
400       // the beginning of the function body.
401       // FIXME: do we need the first subcondition?
402       if (!MI.getFlag(MachineInstr::FrameSetup) && MI.getDebugLoc()) {
403         PrologEndLoc = MI.getDebugLoc();
404         break;
405       }
406       EmptyPrologue = false;
407     }
408   }
409   // Record beginning of function if we have a non-empty prologue.
410   if (PrologEndLoc && !EmptyPrologue) {
411     DebugLoc FnStartDL = PrologEndLoc.getFnDebugLoc();
412     maybeRecordLocation(FnStartDL, MF);
413   }
414 }
415 
416 void CodeViewDebug::endFunction(const MachineFunction *MF) {
417   if (!Asm || !CurFn)  // We haven't created any debug info for this function.
418     return;
419 
420   const Function *GV = MF->getFunction();
421   assert(FnDebugInfo.count(GV));
422   assert(CurFn == &FnDebugInfo[GV]);
423 
424   // Don't emit anything if we don't have any line tables.
425   if (!CurFn->HaveLineInfo) {
426     FnDebugInfo.erase(GV);
427   } else {
428     CurFn->End = Asm->getFunctionEnd();
429   }
430   CurFn = nullptr;
431 }
432 
433 void CodeViewDebug::beginInstruction(const MachineInstr *MI) {
434   // Ignore DBG_VALUE locations and function prologue.
435   if (!Asm || MI->isDebugValue() || MI->getFlag(MachineInstr::FrameSetup))
436     return;
437   DebugLoc DL = MI->getDebugLoc();
438   if (DL == PrevInstLoc || !DL)
439     return;
440   maybeRecordLocation(DL, Asm->MF);
441 }
442 }
443