1 //===-- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp --*- C++ -*--===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains support for writing Microsoft CodeView debug info. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CodeViewDebug.h" 15 #include "llvm/DebugInfo/CodeView/CodeView.h" 16 #include "llvm/DebugInfo/CodeView/Line.h" 17 #include "llvm/DebugInfo/CodeView/SymbolRecord.h" 18 #include "llvm/DebugInfo/CodeView/TypeIndex.h" 19 #include "llvm/DebugInfo/CodeView/TypeRecord.h" 20 #include "llvm/MC/MCExpr.h" 21 #include "llvm/MC/MCSymbol.h" 22 #include "llvm/Support/COFF.h" 23 24 using namespace llvm::codeview; 25 26 namespace llvm { 27 28 StringRef CodeViewDebug::getFullFilepath(const DIFile *File) { 29 std::string &Filepath = FileToFilepathMap[File]; 30 if (!Filepath.empty()) 31 return Filepath; 32 33 StringRef Dir = File->getDirectory(), Filename = File->getFilename(); 34 35 // Clang emits directory and relative filename info into the IR, but CodeView 36 // operates on full paths. We could change Clang to emit full paths too, but 37 // that would increase the IR size and probably not needed for other users. 38 // For now, just concatenate and canonicalize the path here. 39 if (Filename.find(':') == 1) 40 Filepath = Filename; 41 else 42 Filepath = (Dir + "\\" + Filename).str(); 43 44 // Canonicalize the path. We have to do it textually because we may no longer 45 // have access the file in the filesystem. 46 // First, replace all slashes with backslashes. 47 std::replace(Filepath.begin(), Filepath.end(), '/', '\\'); 48 49 // Remove all "\.\" with "\". 50 size_t Cursor = 0; 51 while ((Cursor = Filepath.find("\\.\\", Cursor)) != std::string::npos) 52 Filepath.erase(Cursor, 2); 53 54 // Replace all "\XXX\..\" with "\". Don't try too hard though as the original 55 // path should be well-formatted, e.g. start with a drive letter, etc. 56 Cursor = 0; 57 while ((Cursor = Filepath.find("\\..\\", Cursor)) != std::string::npos) { 58 // Something's wrong if the path starts with "\..\", abort. 59 if (Cursor == 0) 60 break; 61 62 size_t PrevSlash = Filepath.rfind('\\', Cursor - 1); 63 if (PrevSlash == std::string::npos) 64 // Something's wrong, abort. 65 break; 66 67 Filepath.erase(PrevSlash, Cursor + 3 - PrevSlash); 68 // The next ".." might be following the one we've just erased. 69 Cursor = PrevSlash; 70 } 71 72 // Remove all duplicate backslashes. 73 Cursor = 0; 74 while ((Cursor = Filepath.find("\\\\", Cursor)) != std::string::npos) 75 Filepath.erase(Cursor, 1); 76 77 return Filepath; 78 } 79 80 unsigned CodeViewDebug::maybeRecordFile(const DIFile *F) { 81 unsigned NextId = FileIdMap.size() + 1; 82 auto Insertion = FileIdMap.insert(std::make_pair(F, NextId)); 83 if (Insertion.second) { 84 // We have to compute the full filepath and emit a .cv_file directive. 85 StringRef FullPath = getFullFilepath(F); 86 NextId = Asm->OutStreamer->EmitCVFileDirective(NextId, FullPath); 87 assert(NextId == FileIdMap.size() && ".cv_file directive failed"); 88 } 89 return Insertion.first->second; 90 } 91 92 CodeViewDebug::InlineSite &CodeViewDebug::getInlineSite(const DILocation *Loc) { 93 const DILocation *InlinedAt = Loc->getInlinedAt(); 94 auto Insertion = CurFn->InlineSites.insert({InlinedAt, InlineSite()}); 95 if (Insertion.second) { 96 InlineSite &Site = Insertion.first->second; 97 Site.SiteFuncId = NextFuncId++; 98 Site.Inlinee = Loc->getScope()->getSubprogram(); 99 InlinedSubprograms.insert(Loc->getScope()->getSubprogram()); 100 } 101 return Insertion.first->second; 102 } 103 104 void CodeViewDebug::maybeRecordLocation(DebugLoc DL, 105 const MachineFunction *MF) { 106 // Skip this instruction if it has the same location as the previous one. 107 if (DL == CurFn->LastLoc) 108 return; 109 110 const DIScope *Scope = DL.get()->getScope(); 111 if (!Scope) 112 return; 113 114 // Skip this line if it is longer than the maximum we can record. 115 LineInfo LI(DL.getLine(), DL.getLine(), /*IsStatement=*/true); 116 if (LI.getStartLine() != DL.getLine() || LI.isAlwaysStepInto() || 117 LI.isNeverStepInto()) 118 return; 119 120 ColumnInfo CI(DL.getCol(), /*EndColumn=*/0); 121 if (CI.getStartColumn() != DL.getCol()) 122 return; 123 124 if (!CurFn->HaveLineInfo) 125 CurFn->HaveLineInfo = true; 126 unsigned FileId = 0; 127 if (CurFn->LastLoc.get() && CurFn->LastLoc->getFile() == DL->getFile()) 128 FileId = CurFn->LastFileId; 129 else 130 FileId = CurFn->LastFileId = maybeRecordFile(DL->getFile()); 131 CurFn->LastLoc = DL; 132 133 unsigned FuncId = CurFn->FuncId; 134 if (const DILocation *Loc = DL->getInlinedAt()) { 135 // If this location was actually inlined from somewhere else, give it the ID 136 // of the inline call site. 137 FuncId = getInlineSite(DL.get()).SiteFuncId; 138 // Ensure we have links in the tree of inline call sites. 139 const DILocation *ChildLoc = nullptr; 140 while (Loc->getInlinedAt()) { 141 InlineSite &Site = getInlineSite(Loc); 142 if (ChildLoc) { 143 // Record the child inline site if not already present. 144 auto B = Site.ChildSites.begin(), E = Site.ChildSites.end(); 145 if (std::find(B, E, Loc) != E) 146 break; 147 Site.ChildSites.push_back(Loc); 148 } 149 ChildLoc = Loc; 150 } 151 } 152 153 Asm->OutStreamer->EmitCVLocDirective(FuncId, FileId, DL.getLine(), 154 DL.getCol(), /*PrologueEnd=*/false, 155 /*IsStmt=*/false, DL->getFilename()); 156 } 157 158 CodeViewDebug::CodeViewDebug(AsmPrinter *AP) 159 : Asm(nullptr), CurFn(nullptr) { 160 MachineModuleInfo *MMI = AP->MMI; 161 162 // If module doesn't have named metadata anchors or COFF debug section 163 // is not available, skip any debug info related stuff. 164 if (!MMI->getModule()->getNamedMetadata("llvm.dbg.cu") || 165 !AP->getObjFileLowering().getCOFFDebugSymbolsSection()) 166 return; 167 168 // Tell MMI that we have debug info. 169 MMI->setDebugInfoAvailability(true); 170 Asm = AP; 171 } 172 173 void CodeViewDebug::endModule() { 174 if (FnDebugInfo.empty()) 175 return; 176 177 emitTypeInformation(); 178 179 // FIXME: For functions that are comdat, we should emit separate .debug$S 180 // sections that are comdat associative with the main function instead of 181 // having one big .debug$S section. 182 assert(Asm != nullptr); 183 Asm->OutStreamer->SwitchSection( 184 Asm->getObjFileLowering().getCOFFDebugSymbolsSection()); 185 Asm->EmitInt32(COFF::DEBUG_SECTION_MAGIC); 186 187 // The COFF .debug$S section consists of several subsections, each starting 188 // with a 4-byte control code (e.g. 0xF1, 0xF2, etc) and then a 4-byte length 189 // of the payload followed by the payload itself. The subsections are 4-byte 190 // aligned. 191 192 // Make a subsection for all the inlined subprograms. 193 emitInlineeLinesSubsection(); 194 195 // Emit per-function debug information. 196 for (auto &P : FnDebugInfo) 197 emitDebugInfoForFunction(P.first, P.second); 198 199 // This subsection holds a file index to offset in string table table. 200 Asm->OutStreamer->AddComment("File index to string table offset subsection"); 201 Asm->OutStreamer->EmitCVFileChecksumsDirective(); 202 203 // This subsection holds the string table. 204 Asm->OutStreamer->AddComment("String table"); 205 Asm->OutStreamer->EmitCVStringTableDirective(); 206 207 clear(); 208 } 209 210 template <typename T> static void emitRecord(MCStreamer &OS, const T &Rec) { 211 OS.EmitBytes(StringRef(reinterpret_cast<const char *>(&Rec), sizeof(Rec))); 212 } 213 214 void CodeViewDebug::emitTypeInformation() { 215 // Start the .debug$T section with 0x4. 216 Asm->OutStreamer->SwitchSection( 217 Asm->getObjFileLowering().getCOFFDebugTypesSection()); 218 Asm->EmitInt32(COFF::DEBUG_SECTION_MAGIC); 219 220 NamedMDNode *CU_Nodes = 221 Asm->MMI->getModule()->getNamedMetadata("llvm.dbg.cu"); 222 if (!CU_Nodes) 223 return; 224 225 // This type info currently only holds function ids for use with inline call 226 // frame info. All functions are assigned a simple 'void ()' type. Emit that 227 // type here. 228 TypeIndex ArgListIdx = getNextTypeIndex(); 229 Asm->EmitInt16(2 + sizeof(ArgList)); 230 Asm->EmitInt16(LF_ARGLIST); 231 Asm->EmitInt32(0); 232 233 TypeIndex VoidProcIdx = getNextTypeIndex(); 234 Asm->EmitInt16(2 + sizeof(ProcedureType)); 235 Asm->EmitInt16(LF_PROCEDURE); 236 ProcedureType Proc{}; // Zero initialize. 237 Proc.ReturnType = TypeIndex::Void(); 238 Proc.CallConv = CallingConvention::NearC; 239 Proc.Options = FunctionOptions::None; 240 Proc.NumParameters = 0; 241 Proc.ArgListType = ArgListIdx; 242 emitRecord(*Asm->OutStreamer, Proc); 243 244 for (MDNode *N : CU_Nodes->operands()) { 245 auto *CUNode = cast<DICompileUnit>(N); 246 for (auto *SP : CUNode->getSubprograms()) { 247 StringRef DisplayName = SP->getDisplayName(); 248 Asm->EmitInt16(2 + sizeof(FuncId) + DisplayName.size() + 1); 249 Asm->EmitInt16(LF_FUNC_ID); 250 251 FuncId Func{}; // Zero initialize. 252 Func.ParentScope = TypeIndex(); 253 Func.FunctionType = VoidProcIdx; 254 emitRecord(*Asm->OutStreamer, Func); 255 Asm->OutStreamer->EmitBytes(DisplayName); 256 Asm->EmitInt8(0); 257 258 TypeIndex FuncIdIdx = getNextTypeIndex(); 259 SubprogramToFuncId.insert(std::make_pair(SP, FuncIdIdx)); 260 } 261 } 262 } 263 264 void CodeViewDebug::emitInlineeLinesSubsection() { 265 if (InlinedSubprograms.empty()) 266 return; 267 268 MCStreamer &OS = *Asm->OutStreamer; 269 MCSymbol *InlineBegin = Asm->MMI->getContext().createTempSymbol(), 270 *InlineEnd = Asm->MMI->getContext().createTempSymbol(); 271 272 OS.AddComment("Inlinee lines subsection"); 273 OS.EmitIntValue(unsigned(ModuleSubstreamKind::InlineeLines), 4); 274 OS.emitAbsoluteSymbolDiff(InlineEnd, InlineBegin, 4); 275 OS.EmitLabel(InlineBegin); 276 277 // We don't provide any extra file info. 278 // FIXME: Find out if debuggers use this info. 279 OS.EmitIntValue(unsigned(InlineeLinesSignature::Normal), 4); 280 281 for (const DISubprogram *SP : InlinedSubprograms) { 282 TypeIndex TypeId = SubprogramToFuncId[SP]; 283 unsigned FileId = maybeRecordFile(SP->getFile()); 284 OS.AddComment("Inlined function " + SP->getDisplayName() + " starts at " + 285 SP->getFilename() + Twine(':') + Twine(SP->getLine())); 286 // The filechecksum table uses 8 byte entries for now, and file ids start at 287 // 1. 288 unsigned FileOffset = (FileId - 1) * 8; 289 OS.EmitIntValue(TypeId.getIndex(), 4); 290 OS.EmitIntValue(FileOffset, 4); 291 OS.EmitIntValue(SP->getLine(), 4); 292 } 293 294 OS.EmitLabel(InlineEnd); 295 } 296 297 static void EmitLabelDiff(MCStreamer &Streamer, 298 const MCSymbol *From, const MCSymbol *To, 299 unsigned int Size = 4) { 300 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; 301 MCContext &Context = Streamer.getContext(); 302 const MCExpr *FromRef = MCSymbolRefExpr::create(From, Variant, Context), 303 *ToRef = MCSymbolRefExpr::create(To, Variant, Context); 304 const MCExpr *AddrDelta = 305 MCBinaryExpr::create(MCBinaryExpr::Sub, ToRef, FromRef, Context); 306 Streamer.EmitValue(AddrDelta, Size); 307 } 308 309 void CodeViewDebug::collectInlineSiteChildren( 310 SmallVectorImpl<unsigned> &Children, const FunctionInfo &FI, 311 const InlineSite &Site) { 312 for (const DILocation *ChildSiteLoc : Site.ChildSites) { 313 auto I = FI.InlineSites.find(ChildSiteLoc); 314 assert(I != FI.InlineSites.end()); 315 const InlineSite &ChildSite = I->second; 316 Children.push_back(ChildSite.SiteFuncId); 317 collectInlineSiteChildren(Children, FI, ChildSite); 318 } 319 } 320 321 void CodeViewDebug::emitInlinedCallSite(const FunctionInfo &FI, 322 const DILocation *InlinedAt, 323 const InlineSite &Site) { 324 MCStreamer &OS = *Asm->OutStreamer; 325 326 MCSymbol *InlineBegin = Asm->MMI->getContext().createTempSymbol(), 327 *InlineEnd = Asm->MMI->getContext().createTempSymbol(); 328 329 assert(SubprogramToFuncId.count(Site.Inlinee)); 330 TypeIndex InlineeIdx = SubprogramToFuncId[Site.Inlinee]; 331 332 // SymbolRecord 333 EmitLabelDiff(OS, InlineBegin, InlineEnd, 2); // RecordLength 334 OS.EmitLabel(InlineBegin); 335 Asm->EmitInt16(SymbolRecordKind::S_INLINESITE); // RecordKind 336 337 InlineSiteSym SiteBytes{}; 338 SiteBytes.Inlinee = InlineeIdx; 339 Asm->OutStreamer->EmitBytes( 340 StringRef(reinterpret_cast<const char *>(&SiteBytes), sizeof(SiteBytes))); 341 342 unsigned FileId = maybeRecordFile(Site.Inlinee->getFile()); 343 unsigned StartLineNum = Site.Inlinee->getLine(); 344 SmallVector<unsigned, 3> SecondaryFuncIds; 345 collectInlineSiteChildren(SecondaryFuncIds, FI, Site); 346 347 OS.EmitCVInlineLinetableDirective(Site.SiteFuncId, FileId, StartLineNum, 348 FI.Begin, FI.End, SecondaryFuncIds); 349 350 OS.EmitLabel(InlineEnd); 351 352 // Recurse on child inlined call sites before closing the scope. 353 for (const DILocation *ChildSite : Site.ChildSites) { 354 auto I = FI.InlineSites.find(ChildSite); 355 assert(I != FI.InlineSites.end() && 356 "child site not in function inline site map"); 357 emitInlinedCallSite(FI, ChildSite, I->second); 358 } 359 360 // Close the scope. 361 Asm->EmitInt16(2); // RecordLength 362 Asm->EmitInt16(SymbolRecordKind::S_INLINESITE_END); // RecordKind 363 } 364 365 void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, 366 FunctionInfo &FI) { 367 // For each function there is a separate subsection 368 // which holds the PC to file:line table. 369 const MCSymbol *Fn = Asm->getSymbol(GV); 370 assert(Fn); 371 372 StringRef FuncName; 373 if (auto *SP = getDISubprogram(GV)) 374 FuncName = SP->getDisplayName(); 375 376 // If our DISubprogram name is empty, use the mangled name. 377 if (FuncName.empty()) 378 FuncName = GlobalValue::getRealLinkageName(GV->getName()); 379 380 // Emit a symbol subsection, required by VS2012+ to find function boundaries. 381 MCSymbol *SymbolsBegin = Asm->MMI->getContext().createTempSymbol(), 382 *SymbolsEnd = Asm->MMI->getContext().createTempSymbol(); 383 Asm->OutStreamer->AddComment("Symbol subsection for " + Twine(FuncName)); 384 Asm->EmitInt32(unsigned(ModuleSubstreamKind::Symbols)); 385 EmitLabelDiff(*Asm->OutStreamer, SymbolsBegin, SymbolsEnd); 386 Asm->OutStreamer->EmitLabel(SymbolsBegin); 387 { 388 MCSymbol *ProcSegmentBegin = Asm->MMI->getContext().createTempSymbol(), 389 *ProcSegmentEnd = Asm->MMI->getContext().createTempSymbol(); 390 EmitLabelDiff(*Asm->OutStreamer, ProcSegmentBegin, ProcSegmentEnd, 2); 391 Asm->OutStreamer->EmitLabel(ProcSegmentBegin); 392 393 Asm->EmitInt16(unsigned(SymbolRecordKind::S_GPROC32_ID)); 394 395 // Some bytes of this segment don't seem to be required for basic debugging, 396 // so just fill them with zeroes. 397 Asm->OutStreamer->EmitFill(12, 0); 398 // This is the important bit that tells the debugger where the function 399 // code is located and what's its size: 400 EmitLabelDiff(*Asm->OutStreamer, Fn, FI.End); 401 Asm->OutStreamer->EmitFill(12, 0); 402 Asm->OutStreamer->EmitCOFFSecRel32(Fn); 403 Asm->OutStreamer->EmitCOFFSectionIndex(Fn); 404 Asm->EmitInt8(0); 405 // Emit the function display name as a null-terminated string. 406 Asm->OutStreamer->EmitBytes(FuncName); 407 Asm->EmitInt8(0); 408 Asm->OutStreamer->EmitLabel(ProcSegmentEnd); 409 410 // Emit inlined call site information. Only emit functions inlined directly 411 // into the parent function. We'll emit the other sites recursively as part 412 // of their parent inline site. 413 for (auto &KV : FI.InlineSites) { 414 const DILocation *InlinedAt = KV.first; 415 if (!InlinedAt->getInlinedAt()) 416 emitInlinedCallSite(FI, InlinedAt, KV.second); 417 } 418 419 // We're done with this function. 420 Asm->EmitInt16(0x0002); 421 Asm->EmitInt16(unsigned(SymbolRecordKind::S_PROC_ID_END)); 422 } 423 Asm->OutStreamer->EmitLabel(SymbolsEnd); 424 // Every subsection must be aligned to a 4-byte boundary. 425 Asm->OutStreamer->EmitValueToAlignment(4); 426 427 // We have an assembler directive that takes care of the whole line table. 428 Asm->OutStreamer->EmitCVLinetableDirective(FI.FuncId, Fn, FI.End); 429 } 430 431 void CodeViewDebug::beginFunction(const MachineFunction *MF) { 432 assert(!CurFn && "Can't process two functions at once!"); 433 434 if (!Asm || !Asm->MMI->hasDebugInfo()) 435 return; 436 437 const Function *GV = MF->getFunction(); 438 assert(FnDebugInfo.count(GV) == false); 439 CurFn = &FnDebugInfo[GV]; 440 CurFn->FuncId = NextFuncId++; 441 CurFn->Begin = Asm->getFunctionBegin(); 442 443 // Find the end of the function prolog. 444 // FIXME: is there a simpler a way to do this? Can we just search 445 // for the first instruction of the function, not the last of the prolog? 446 DebugLoc PrologEndLoc; 447 bool EmptyPrologue = true; 448 for (const auto &MBB : *MF) { 449 if (PrologEndLoc) 450 break; 451 for (const auto &MI : MBB) { 452 if (MI.isDebugValue()) 453 continue; 454 455 // First known non-DBG_VALUE and non-frame setup location marks 456 // the beginning of the function body. 457 // FIXME: do we need the first subcondition? 458 if (!MI.getFlag(MachineInstr::FrameSetup) && MI.getDebugLoc()) { 459 PrologEndLoc = MI.getDebugLoc(); 460 break; 461 } 462 EmptyPrologue = false; 463 } 464 } 465 // Record beginning of function if we have a non-empty prologue. 466 if (PrologEndLoc && !EmptyPrologue) { 467 DebugLoc FnStartDL = PrologEndLoc.getFnDebugLoc(); 468 maybeRecordLocation(FnStartDL, MF); 469 } 470 } 471 472 void CodeViewDebug::endFunction(const MachineFunction *MF) { 473 if (!Asm || !CurFn) // We haven't created any debug info for this function. 474 return; 475 476 const Function *GV = MF->getFunction(); 477 assert(FnDebugInfo.count(GV)); 478 assert(CurFn == &FnDebugInfo[GV]); 479 480 // Don't emit anything if we don't have any line tables. 481 if (!CurFn->HaveLineInfo) { 482 FnDebugInfo.erase(GV); 483 } else { 484 CurFn->End = Asm->getFunctionEnd(); 485 } 486 CurFn = nullptr; 487 } 488 489 void CodeViewDebug::beginInstruction(const MachineInstr *MI) { 490 // Ignore DBG_VALUE locations and function prologue. 491 if (!Asm || MI->isDebugValue() || MI->getFlag(MachineInstr::FrameSetup)) 492 return; 493 DebugLoc DL = MI->getDebugLoc(); 494 if (DL == PrevInstLoc || !DL) 495 return; 496 maybeRecordLocation(DL, Asm->MF); 497 } 498 } 499