1 //===-- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp --*- C++ -*--===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains support for writing Microsoft CodeView debug info. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CodeViewDebug.h" 15 #include "llvm/DebugInfo/CodeView/CodeView.h" 16 #include "llvm/DebugInfo/CodeView/Line.h" 17 #include "llvm/DebugInfo/CodeView/SymbolRecord.h" 18 #include "llvm/DebugInfo/CodeView/TypeIndex.h" 19 #include "llvm/DebugInfo/CodeView/TypeRecord.h" 20 #include "llvm/MC/MCExpr.h" 21 #include "llvm/MC/MCSymbol.h" 22 #include "llvm/Support/COFF.h" 23 24 using namespace llvm::codeview; 25 26 namespace llvm { 27 28 StringRef CodeViewDebug::getFullFilepath(const DIFile *File) { 29 std::string &Filepath = FileToFilepathMap[File]; 30 if (!Filepath.empty()) 31 return Filepath; 32 33 StringRef Dir = File->getDirectory(), Filename = File->getFilename(); 34 35 // Clang emits directory and relative filename info into the IR, but CodeView 36 // operates on full paths. We could change Clang to emit full paths too, but 37 // that would increase the IR size and probably not needed for other users. 38 // For now, just concatenate and canonicalize the path here. 39 if (Filename.find(':') == 1) 40 Filepath = Filename; 41 else 42 Filepath = (Dir + "\\" + Filename).str(); 43 44 // Canonicalize the path. We have to do it textually because we may no longer 45 // have access the file in the filesystem. 46 // First, replace all slashes with backslashes. 47 std::replace(Filepath.begin(), Filepath.end(), '/', '\\'); 48 49 // Remove all "\.\" with "\". 50 size_t Cursor = 0; 51 while ((Cursor = Filepath.find("\\.\\", Cursor)) != std::string::npos) 52 Filepath.erase(Cursor, 2); 53 54 // Replace all "\XXX\..\" with "\". Don't try too hard though as the original 55 // path should be well-formatted, e.g. start with a drive letter, etc. 56 Cursor = 0; 57 while ((Cursor = Filepath.find("\\..\\", Cursor)) != std::string::npos) { 58 // Something's wrong if the path starts with "\..\", abort. 59 if (Cursor == 0) 60 break; 61 62 size_t PrevSlash = Filepath.rfind('\\', Cursor - 1); 63 if (PrevSlash == std::string::npos) 64 // Something's wrong, abort. 65 break; 66 67 Filepath.erase(PrevSlash, Cursor + 3 - PrevSlash); 68 // The next ".." might be following the one we've just erased. 69 Cursor = PrevSlash; 70 } 71 72 // Remove all duplicate backslashes. 73 Cursor = 0; 74 while ((Cursor = Filepath.find("\\\\", Cursor)) != std::string::npos) 75 Filepath.erase(Cursor, 1); 76 77 return Filepath; 78 } 79 80 unsigned CodeViewDebug::maybeRecordFile(const DIFile *F) { 81 unsigned NextId = FileIdMap.size() + 1; 82 auto Insertion = FileIdMap.insert(std::make_pair(F, NextId)); 83 if (Insertion.second) { 84 // We have to compute the full filepath and emit a .cv_file directive. 85 StringRef FullPath = getFullFilepath(F); 86 NextId = Asm->OutStreamer->EmitCVFileDirective(NextId, FullPath); 87 assert(NextId == FileIdMap.size() && ".cv_file directive failed"); 88 } 89 return Insertion.first->second; 90 } 91 92 CodeViewDebug::InlineSite &CodeViewDebug::getInlineSite(const DILocation *Loc) { 93 const DILocation *InlinedAt = Loc->getInlinedAt(); 94 auto Insertion = CurFn->InlineSites.insert({InlinedAt, InlineSite()}); 95 if (Insertion.second) { 96 InlineSite &Site = Insertion.first->second; 97 Site.SiteFuncId = NextFuncId++; 98 Site.Inlinee = Loc->getScope()->getSubprogram(); 99 } 100 return Insertion.first->second; 101 } 102 103 void CodeViewDebug::maybeRecordLocation(DebugLoc DL, 104 const MachineFunction *MF) { 105 // Skip this instruction if it has the same location as the previous one. 106 if (DL == CurFn->LastLoc) 107 return; 108 109 const DIScope *Scope = DL.get()->getScope(); 110 if (!Scope) 111 return; 112 113 // Skip this line if it is longer than the maximum we can record. 114 LineInfo LI(DL.getLine(), DL.getLine(), /*IsStatement=*/true); 115 if (LI.getStartLine() != DL.getLine() || LI.isAlwaysStepInto() || 116 LI.isNeverStepInto()) 117 return; 118 119 ColumnInfo CI(DL.getCol(), /*EndColumn=*/0); 120 if (CI.getStartColumn() != DL.getCol()) 121 return; 122 123 if (!CurFn->HaveLineInfo) 124 CurFn->HaveLineInfo = true; 125 unsigned FileId = 0; 126 if (CurFn->LastLoc.get() && CurFn->LastLoc->getFile() == DL->getFile()) 127 FileId = CurFn->LastFileId; 128 else 129 FileId = CurFn->LastFileId = maybeRecordFile(DL->getFile()); 130 CurFn->LastLoc = DL; 131 132 unsigned FuncId = CurFn->FuncId; 133 if (const DILocation *Loc = DL->getInlinedAt()) { 134 // If this location was actually inlined from somewhere else, give it the ID 135 // of the inline call site. 136 FuncId = getInlineSite(DL.get()).SiteFuncId; 137 // Ensure we have links in the tree of inline call sites. 138 const DILocation *ChildLoc = nullptr; 139 while (Loc->getInlinedAt()) { 140 InlineSite &Site = getInlineSite(Loc); 141 if (ChildLoc) { 142 // Record the child inline site if not already present. 143 auto B = Site.ChildSites.begin(), E = Site.ChildSites.end(); 144 if (std::find(B, E, Loc) != E) 145 break; 146 Site.ChildSites.push_back(Loc); 147 } 148 ChildLoc = Loc; 149 } 150 } 151 152 Asm->OutStreamer->EmitCVLocDirective(FuncId, FileId, DL.getLine(), 153 DL.getCol(), /*PrologueEnd=*/false, 154 /*IsStmt=*/false, DL->getFilename()); 155 } 156 157 CodeViewDebug::CodeViewDebug(AsmPrinter *AP) 158 : Asm(nullptr), CurFn(nullptr) { 159 MachineModuleInfo *MMI = AP->MMI; 160 161 // If module doesn't have named metadata anchors or COFF debug section 162 // is not available, skip any debug info related stuff. 163 if (!MMI->getModule()->getNamedMetadata("llvm.dbg.cu") || 164 !AP->getObjFileLowering().getCOFFDebugSymbolsSection()) 165 return; 166 167 // Tell MMI that we have debug info. 168 MMI->setDebugInfoAvailability(true); 169 Asm = AP; 170 } 171 172 void CodeViewDebug::endModule() { 173 if (FnDebugInfo.empty()) 174 return; 175 176 emitTypeInformation(); 177 178 // FIXME: For functions that are comdat, we should emit separate .debug$S 179 // sections that are comdat associative with the main function instead of 180 // having one big .debug$S section. 181 assert(Asm != nullptr); 182 Asm->OutStreamer->SwitchSection( 183 Asm->getObjFileLowering().getCOFFDebugSymbolsSection()); 184 Asm->EmitInt32(COFF::DEBUG_SECTION_MAGIC); 185 186 // The COFF .debug$S section consists of several subsections, each starting 187 // with a 4-byte control code (e.g. 0xF1, 0xF2, etc) and then a 4-byte length 188 // of the payload followed by the payload itself. The subsections are 4-byte 189 // aligned. 190 191 // Emit per-function debug information. 192 for (auto &P : FnDebugInfo) 193 emitDebugInfoForFunction(P.first, P.second); 194 195 // This subsection holds a file index to offset in string table table. 196 Asm->OutStreamer->AddComment("File index to string table offset subsection"); 197 Asm->OutStreamer->EmitCVFileChecksumsDirective(); 198 199 // This subsection holds the string table. 200 Asm->OutStreamer->AddComment("String table"); 201 Asm->OutStreamer->EmitCVStringTableDirective(); 202 203 clear(); 204 } 205 206 template <typename T> static void emitRecord(MCStreamer &OS, const T &Rec) { 207 OS.EmitBytes(StringRef(reinterpret_cast<const char *>(&Rec), sizeof(Rec))); 208 } 209 210 void CodeViewDebug::emitTypeInformation() { 211 // Start the .debug$T section with 0x4. 212 Asm->OutStreamer->SwitchSection( 213 Asm->getObjFileLowering().getCOFFDebugTypesSection()); 214 Asm->EmitInt32(COFF::DEBUG_SECTION_MAGIC); 215 216 NamedMDNode *CU_Nodes = 217 Asm->MMI->getModule()->getNamedMetadata("llvm.dbg.cu"); 218 if (!CU_Nodes) 219 return; 220 221 // This type info currently only holds function ids for use with inline call 222 // frame info. All functions are assigned a simple 'void ()' type. Emit that 223 // type here. 224 TypeIndex ArgListIdx = getNextTypeIndex(); 225 Asm->EmitInt16(2 + sizeof(ArgList)); 226 Asm->EmitInt16(LF_ARGLIST); 227 Asm->EmitInt32(0); 228 229 TypeIndex VoidProcIdx = getNextTypeIndex(); 230 Asm->EmitInt16(2 + sizeof(ProcedureType)); 231 Asm->EmitInt16(LF_PROCEDURE); 232 ProcedureType Proc{}; // Zero initialize. 233 Proc.ReturnType = TypeIndex::Void(); 234 Proc.CallConv = CallingConvention::NearC; 235 Proc.Options = FunctionOptions::None; 236 Proc.NumParameters = 0; 237 Proc.ArgListType = ArgListIdx; 238 emitRecord(*Asm->OutStreamer, Proc); 239 240 for (MDNode *N : CU_Nodes->operands()) { 241 auto *CUNode = cast<DICompileUnit>(N); 242 for (auto *SP : CUNode->getSubprograms()) { 243 StringRef DisplayName = SP->getDisplayName(); 244 Asm->EmitInt16(2 + sizeof(FuncId) + DisplayName.size() + 1); 245 Asm->EmitInt16(LF_FUNC_ID); 246 247 FuncId Func{}; // Zero initialize. 248 Func.ParentScope = TypeIndex(); 249 Func.FunctionType = VoidProcIdx; 250 emitRecord(*Asm->OutStreamer, Func); 251 Asm->OutStreamer->EmitBytes(DisplayName); 252 Asm->EmitInt8(0); 253 254 TypeIndex FuncIdIdx = getNextTypeIndex(); 255 SubprogramToFuncId.insert(std::make_pair(SP, FuncIdIdx)); 256 } 257 } 258 } 259 260 static void EmitLabelDiff(MCStreamer &Streamer, 261 const MCSymbol *From, const MCSymbol *To, 262 unsigned int Size = 4) { 263 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; 264 MCContext &Context = Streamer.getContext(); 265 const MCExpr *FromRef = MCSymbolRefExpr::create(From, Variant, Context), 266 *ToRef = MCSymbolRefExpr::create(To, Variant, Context); 267 const MCExpr *AddrDelta = 268 MCBinaryExpr::create(MCBinaryExpr::Sub, ToRef, FromRef, Context); 269 Streamer.EmitValue(AddrDelta, Size); 270 } 271 272 void CodeViewDebug::emitInlinedCallSite(const FunctionInfo &FI, 273 const DILocation *InlinedAt, 274 const InlineSite &Site) { 275 MCStreamer &OS = *Asm->OutStreamer; 276 277 MCSymbol *InlineBegin = Asm->MMI->getContext().createTempSymbol(), 278 *InlineEnd = Asm->MMI->getContext().createTempSymbol(); 279 280 assert(SubprogramToFuncId.count(Site.Inlinee)); 281 TypeIndex InlineeIdx = SubprogramToFuncId[Site.Inlinee]; 282 283 // SymbolRecord 284 EmitLabelDiff(OS, InlineBegin, InlineEnd, 2); // RecordLength 285 OS.EmitLabel(InlineBegin); 286 Asm->EmitInt16(SymbolRecordKind::S_INLINESITE); // RecordKind 287 288 InlineSiteSym SiteBytes{}; 289 SiteBytes.Inlinee = InlineeIdx; 290 Asm->OutStreamer->EmitBytes( 291 StringRef(reinterpret_cast<const char *>(&SiteBytes), sizeof(SiteBytes))); 292 293 // FIXME: annotations 294 295 OS.EmitLabel(InlineEnd); 296 297 // Recurse on child inlined call sites before closing the scope. 298 for (const DILocation *ChildSite : Site.ChildSites) { 299 auto I = FI.InlineSites.find(ChildSite); 300 assert(I != FI.InlineSites.end() && 301 "child site not in function inline site map"); 302 emitInlinedCallSite(FI, ChildSite, I->second); 303 } 304 305 // Close the scope. 306 Asm->EmitInt16(2); // RecordLength 307 Asm->EmitInt16(SymbolRecordKind::S_INLINESITE_END); // RecordKind 308 } 309 310 void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, 311 FunctionInfo &FI) { 312 // For each function there is a separate subsection 313 // which holds the PC to file:line table. 314 const MCSymbol *Fn = Asm->getSymbol(GV); 315 assert(Fn); 316 317 StringRef FuncName; 318 if (auto *SP = getDISubprogram(GV)) 319 FuncName = SP->getDisplayName(); 320 321 // If our DISubprogram name is empty, use the mangled name. 322 if (FuncName.empty()) 323 FuncName = GlobalValue::getRealLinkageName(GV->getName()); 324 325 // Emit a symbol subsection, required by VS2012+ to find function boundaries. 326 MCSymbol *SymbolsBegin = Asm->MMI->getContext().createTempSymbol(), 327 *SymbolsEnd = Asm->MMI->getContext().createTempSymbol(); 328 Asm->OutStreamer->AddComment("Symbol subsection for " + Twine(FuncName)); 329 Asm->EmitInt32(unsigned(ModuleSubstreamKind::Symbols)); 330 EmitLabelDiff(*Asm->OutStreamer, SymbolsBegin, SymbolsEnd); 331 Asm->OutStreamer->EmitLabel(SymbolsBegin); 332 { 333 MCSymbol *ProcSegmentBegin = Asm->MMI->getContext().createTempSymbol(), 334 *ProcSegmentEnd = Asm->MMI->getContext().createTempSymbol(); 335 EmitLabelDiff(*Asm->OutStreamer, ProcSegmentBegin, ProcSegmentEnd, 2); 336 Asm->OutStreamer->EmitLabel(ProcSegmentBegin); 337 338 Asm->EmitInt16(unsigned(SymbolRecordKind::S_GPROC32_ID)); 339 340 // Some bytes of this segment don't seem to be required for basic debugging, 341 // so just fill them with zeroes. 342 Asm->OutStreamer->EmitFill(12, 0); 343 // This is the important bit that tells the debugger where the function 344 // code is located and what's its size: 345 EmitLabelDiff(*Asm->OutStreamer, Fn, FI.End); 346 Asm->OutStreamer->EmitFill(12, 0); 347 Asm->OutStreamer->EmitCOFFSecRel32(Fn); 348 Asm->OutStreamer->EmitCOFFSectionIndex(Fn); 349 Asm->EmitInt8(0); 350 // Emit the function display name as a null-terminated string. 351 Asm->OutStreamer->EmitBytes(FuncName); 352 Asm->EmitInt8(0); 353 Asm->OutStreamer->EmitLabel(ProcSegmentEnd); 354 355 // Emit inlined call site information. Only emit functions inlined directly 356 // into the parent function. We'll emit the other sites recursively as part 357 // of their parent inline site. 358 for (auto &KV : FI.InlineSites) { 359 const DILocation *InlinedAt = KV.first; 360 if (!InlinedAt->getInlinedAt()) 361 emitInlinedCallSite(FI, InlinedAt, KV.second); 362 } 363 364 // We're done with this function. 365 Asm->EmitInt16(0x0002); 366 Asm->EmitInt16(unsigned(SymbolRecordKind::S_PROC_ID_END)); 367 } 368 Asm->OutStreamer->EmitLabel(SymbolsEnd); 369 // Every subsection must be aligned to a 4-byte boundary. 370 Asm->OutStreamer->EmitFill((-FuncName.size()) % 4, 0); 371 372 // We have an assembler directive that takes care of the whole line table. 373 Asm->OutStreamer->EmitCVLinetableDirective(FI.FuncId, Fn, FI.End); 374 } 375 376 void CodeViewDebug::beginFunction(const MachineFunction *MF) { 377 assert(!CurFn && "Can't process two functions at once!"); 378 379 if (!Asm || !Asm->MMI->hasDebugInfo()) 380 return; 381 382 const Function *GV = MF->getFunction(); 383 assert(FnDebugInfo.count(GV) == false); 384 CurFn = &FnDebugInfo[GV]; 385 CurFn->FuncId = NextFuncId++; 386 387 // Find the end of the function prolog. 388 // FIXME: is there a simpler a way to do this? Can we just search 389 // for the first instruction of the function, not the last of the prolog? 390 DebugLoc PrologEndLoc; 391 bool EmptyPrologue = true; 392 for (const auto &MBB : *MF) { 393 if (PrologEndLoc) 394 break; 395 for (const auto &MI : MBB) { 396 if (MI.isDebugValue()) 397 continue; 398 399 // First known non-DBG_VALUE and non-frame setup location marks 400 // the beginning of the function body. 401 // FIXME: do we need the first subcondition? 402 if (!MI.getFlag(MachineInstr::FrameSetup) && MI.getDebugLoc()) { 403 PrologEndLoc = MI.getDebugLoc(); 404 break; 405 } 406 EmptyPrologue = false; 407 } 408 } 409 // Record beginning of function if we have a non-empty prologue. 410 if (PrologEndLoc && !EmptyPrologue) { 411 DebugLoc FnStartDL = PrologEndLoc.getFnDebugLoc(); 412 maybeRecordLocation(FnStartDL, MF); 413 } 414 } 415 416 void CodeViewDebug::endFunction(const MachineFunction *MF) { 417 if (!Asm || !CurFn) // We haven't created any debug info for this function. 418 return; 419 420 const Function *GV = MF->getFunction(); 421 assert(FnDebugInfo.count(GV)); 422 assert(CurFn == &FnDebugInfo[GV]); 423 424 // Don't emit anything if we don't have any line tables. 425 if (!CurFn->HaveLineInfo) { 426 FnDebugInfo.erase(GV); 427 } else { 428 CurFn->End = Asm->getFunctionEnd(); 429 } 430 CurFn = nullptr; 431 } 432 433 void CodeViewDebug::beginInstruction(const MachineInstr *MI) { 434 // Ignore DBG_VALUE locations and function prologue. 435 if (!Asm || MI->isDebugValue() || MI->getFlag(MachineInstr::FrameSetup)) 436 return; 437 DebugLoc DL = MI->getDebugLoc(); 438 if (DL == PrevInstLoc || !DL) 439 return; 440 maybeRecordLocation(DL, Asm->MF); 441 } 442 } 443