1 //===-- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp --*- C++ -*--===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains support for writing Microsoft CodeView debug info. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CodeViewDebug.h" 15 #include "llvm/DebugInfo/CodeView/CodeView.h" 16 #include "llvm/DebugInfo/CodeView/SymbolRecord.h" 17 #include "llvm/MC/MCExpr.h" 18 #include "llvm/MC/MCSymbol.h" 19 #include "llvm/Support/COFF.h" 20 21 using namespace llvm::codeview; 22 23 namespace llvm { 24 25 StringRef CodeViewDebug::getFullFilepath(const MDNode *S) { 26 assert(S); 27 assert((isa<DICompileUnit>(S) || isa<DIFile>(S) || isa<DISubprogram>(S) || 28 isa<DILexicalBlockBase>(S)) && 29 "Unexpected scope info"); 30 31 auto *Scope = cast<DIScope>(S); 32 StringRef Dir = Scope->getDirectory(), 33 Filename = Scope->getFilename(); 34 std::string &Filepath = 35 DirAndFilenameToFilepathMap[std::make_pair(Dir, Filename)]; 36 if (!Filepath.empty()) 37 return Filepath; 38 39 // Clang emits directory and relative filename info into the IR, but CodeView 40 // operates on full paths. We could change Clang to emit full paths too, but 41 // that would increase the IR size and probably not needed for other users. 42 // For now, just concatenate and canonicalize the path here. 43 if (Filename.find(':') == 1) 44 Filepath = Filename; 45 else 46 Filepath = (Dir + "\\" + Filename).str(); 47 48 // Canonicalize the path. We have to do it textually because we may no longer 49 // have access the file in the filesystem. 50 // First, replace all slashes with backslashes. 51 std::replace(Filepath.begin(), Filepath.end(), '/', '\\'); 52 53 // Remove all "\.\" with "\". 54 size_t Cursor = 0; 55 while ((Cursor = Filepath.find("\\.\\", Cursor)) != std::string::npos) 56 Filepath.erase(Cursor, 2); 57 58 // Replace all "\XXX\..\" with "\". Don't try too hard though as the original 59 // path should be well-formatted, e.g. start with a drive letter, etc. 60 Cursor = 0; 61 while ((Cursor = Filepath.find("\\..\\", Cursor)) != std::string::npos) { 62 // Something's wrong if the path starts with "\..\", abort. 63 if (Cursor == 0) 64 break; 65 66 size_t PrevSlash = Filepath.rfind('\\', Cursor - 1); 67 if (PrevSlash == std::string::npos) 68 // Something's wrong, abort. 69 break; 70 71 Filepath.erase(PrevSlash, Cursor + 3 - PrevSlash); 72 // The next ".." might be following the one we've just erased. 73 Cursor = PrevSlash; 74 } 75 76 // Remove all duplicate backslashes. 77 Cursor = 0; 78 while ((Cursor = Filepath.find("\\\\", Cursor)) != std::string::npos) 79 Filepath.erase(Cursor, 1); 80 81 return Filepath; 82 } 83 84 void CodeViewDebug::maybeRecordLocation(DebugLoc DL, 85 const MachineFunction *MF) { 86 const MDNode *Scope = DL.getScope(); 87 if (!Scope) 88 return; 89 unsigned LineNumber = DL.getLine(); 90 // Skip this line if it is longer than the maximum we can record. 91 if (LineNumber > COFF::CVL_MaxLineNumber) 92 return; 93 94 unsigned ColumnNumber = DL.getCol(); 95 // Truncate the column number if it is longer than the maximum we can record. 96 if (ColumnNumber > COFF::CVL_MaxColumnNumber) 97 ColumnNumber = 0; 98 99 StringRef Filename = getFullFilepath(Scope); 100 101 // Skip this instruction if it has the same file:line as the previous one. 102 assert(CurFn); 103 if (!CurFn->Instrs.empty()) { 104 const InstrInfoTy &LastInstr = InstrInfo[CurFn->Instrs.back()]; 105 if (LastInstr.Filename == Filename && LastInstr.LineNumber == LineNumber && 106 LastInstr.ColumnNumber == ColumnNumber) 107 return; 108 } 109 FileNameRegistry.add(Filename); 110 111 MCSymbol *MCL = Asm->MMI->getContext().createTempSymbol(); 112 Asm->OutStreamer->EmitLabel(MCL); 113 CurFn->Instrs.push_back(MCL); 114 InstrInfo[MCL] = InstrInfoTy(Filename, LineNumber, ColumnNumber); 115 } 116 117 CodeViewDebug::CodeViewDebug(AsmPrinter *AP) 118 : Asm(nullptr), CurFn(nullptr) { 119 MachineModuleInfo *MMI = AP->MMI; 120 121 // If module doesn't have named metadata anchors or COFF debug section 122 // is not available, skip any debug info related stuff. 123 if (!MMI->getModule()->getNamedMetadata("llvm.dbg.cu") || 124 !AP->getObjFileLowering().getCOFFDebugSymbolsSection()) 125 return; 126 127 // Tell MMI that we have debug info. 128 MMI->setDebugInfoAvailability(true); 129 Asm = AP; 130 } 131 132 void CodeViewDebug::endModule() { 133 if (FnDebugInfo.empty()) 134 return; 135 136 // FIXME: For functions that are comdat, we should emit separate .debug$S 137 // sections that are comdat associative with the main function instead of 138 // having one big .debug$S section. 139 assert(Asm != nullptr); 140 Asm->OutStreamer->SwitchSection( 141 Asm->getObjFileLowering().getCOFFDebugSymbolsSection()); 142 Asm->EmitInt32(COFF::DEBUG_SECTION_MAGIC); 143 144 // The COFF .debug$S section consists of several subsections, each starting 145 // with a 4-byte control code (e.g. 0xF1, 0xF2, etc) and then a 4-byte length 146 // of the payload followed by the payload itself. The subsections are 4-byte 147 // aligned. 148 149 // Emit per-function debug information. This code is extracted into a 150 // separate function for readability. 151 for (size_t I = 0, E = VisitedFunctions.size(); I != E; ++I) 152 emitDebugInfoForFunction(VisitedFunctions[I]); 153 154 // This subsection holds a file index to offset in string table table. 155 Asm->OutStreamer->AddComment("File index to string table offset subsection"); 156 Asm->EmitInt32(unsigned(ModuleSubstreamKind::FileChecksums)); 157 size_t NumFilenames = FileNameRegistry.Infos.size(); 158 Asm->EmitInt32(8 * NumFilenames); 159 for (size_t I = 0, E = FileNameRegistry.Filenames.size(); I != E; ++I) { 160 StringRef Filename = FileNameRegistry.Filenames[I]; 161 // For each unique filename, just write its offset in the string table. 162 Asm->EmitInt32(FileNameRegistry.Infos[Filename].StartOffset); 163 // The function name offset is not followed by any additional data. 164 Asm->EmitInt32(0); 165 } 166 167 // This subsection holds the string table. 168 Asm->OutStreamer->AddComment("String table"); 169 Asm->EmitInt32(unsigned(ModuleSubstreamKind::StringTable)); 170 Asm->EmitInt32(FileNameRegistry.LastOffset); 171 // The payload starts with a null character. 172 Asm->EmitInt8(0); 173 174 for (size_t I = 0, E = FileNameRegistry.Filenames.size(); I != E; ++I) { 175 // Just emit unique filenames one by one, separated by a null character. 176 Asm->OutStreamer->EmitBytes(FileNameRegistry.Filenames[I]); 177 Asm->EmitInt8(0); 178 } 179 180 // No more subsections. Fill with zeros to align the end of the section by 4. 181 Asm->OutStreamer->EmitFill((-FileNameRegistry.LastOffset) % 4, 0); 182 183 clear(); 184 } 185 186 static void EmitLabelDiff(MCStreamer &Streamer, 187 const MCSymbol *From, const MCSymbol *To, 188 unsigned int Size = 4) { 189 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; 190 MCContext &Context = Streamer.getContext(); 191 const MCExpr *FromRef = MCSymbolRefExpr::create(From, Variant, Context), 192 *ToRef = MCSymbolRefExpr::create(To, Variant, Context); 193 const MCExpr *AddrDelta = 194 MCBinaryExpr::create(MCBinaryExpr::Sub, ToRef, FromRef, Context); 195 Streamer.EmitValue(AddrDelta, Size); 196 } 197 198 void CodeViewDebug::emitDebugInfoForFunction(const Function *GV) { 199 // For each function there is a separate subsection 200 // which holds the PC to file:line table. 201 const MCSymbol *Fn = Asm->getSymbol(GV); 202 assert(Fn); 203 204 const FunctionInfo &FI = FnDebugInfo[GV]; 205 if (FI.Instrs.empty()) 206 return; 207 assert(FI.End && "Don't know where the function ends?"); 208 209 StringRef FuncName; 210 if (auto *SP = getDISubprogram(GV)) 211 FuncName = SP->getDisplayName(); 212 213 // If our DISubprogram name is empty, use the mangled name. 214 if (FuncName.empty()) 215 FuncName = GlobalValue::getRealLinkageName(GV->getName()); 216 217 // Emit a symbol subsection, required by VS2012+ to find function boundaries. 218 MCSymbol *SymbolsBegin = Asm->MMI->getContext().createTempSymbol(), 219 *SymbolsEnd = Asm->MMI->getContext().createTempSymbol(); 220 Asm->OutStreamer->AddComment("Symbol subsection for " + Twine(FuncName)); 221 Asm->EmitInt32(unsigned(ModuleSubstreamKind::Symbols)); 222 EmitLabelDiff(*Asm->OutStreamer, SymbolsBegin, SymbolsEnd); 223 Asm->OutStreamer->EmitLabel(SymbolsBegin); 224 { 225 MCSymbol *ProcSegmentBegin = Asm->MMI->getContext().createTempSymbol(), 226 *ProcSegmentEnd = Asm->MMI->getContext().createTempSymbol(); 227 EmitLabelDiff(*Asm->OutStreamer, ProcSegmentBegin, ProcSegmentEnd, 2); 228 Asm->OutStreamer->EmitLabel(ProcSegmentBegin); 229 230 Asm->EmitInt16(unsigned(SymbolRecordKind::S_GPROC32_ID)); 231 232 // Some bytes of this segment don't seem to be required for basic debugging, 233 // so just fill them with zeroes. 234 Asm->OutStreamer->EmitFill(12, 0); 235 // This is the important bit that tells the debugger where the function 236 // code is located and what's its size: 237 EmitLabelDiff(*Asm->OutStreamer, Fn, FI.End); 238 Asm->OutStreamer->EmitFill(12, 0); 239 Asm->OutStreamer->EmitCOFFSecRel32(Fn); 240 Asm->OutStreamer->EmitCOFFSectionIndex(Fn); 241 Asm->EmitInt8(0); 242 // Emit the function display name as a null-terminated string. 243 Asm->OutStreamer->EmitBytes(FuncName); 244 Asm->EmitInt8(0); 245 Asm->OutStreamer->EmitLabel(ProcSegmentEnd); 246 247 // We're done with this function. 248 Asm->EmitInt16(0x0002); 249 Asm->EmitInt16(unsigned(SymbolRecordKind::S_PROC_ID_END)); 250 } 251 Asm->OutStreamer->EmitLabel(SymbolsEnd); 252 // Every subsection must be aligned to a 4-byte boundary. 253 Asm->OutStreamer->EmitFill((-FuncName.size()) % 4, 0); 254 255 // PCs/Instructions are grouped into segments sharing the same filename. 256 // Pre-calculate the lengths (in instructions) of these segments and store 257 // them in a map for convenience. Each index in the map is the sequential 258 // number of the respective instruction that starts a new segment. 259 DenseMap<size_t, size_t> FilenameSegmentLengths; 260 size_t LastSegmentEnd = 0; 261 StringRef PrevFilename = InstrInfo[FI.Instrs[0]].Filename; 262 for (size_t J = 1, F = FI.Instrs.size(); J != F; ++J) { 263 if (PrevFilename == InstrInfo[FI.Instrs[J]].Filename) 264 continue; 265 FilenameSegmentLengths[LastSegmentEnd] = J - LastSegmentEnd; 266 LastSegmentEnd = J; 267 PrevFilename = InstrInfo[FI.Instrs[J]].Filename; 268 } 269 FilenameSegmentLengths[LastSegmentEnd] = FI.Instrs.size() - LastSegmentEnd; 270 271 // Emit a line table subsection, required to do PC-to-file:line lookup. 272 Asm->OutStreamer->AddComment("Line table subsection for " + Twine(FuncName)); 273 Asm->EmitInt32(unsigned(ModuleSubstreamKind::Lines)); 274 MCSymbol *LineTableBegin = Asm->MMI->getContext().createTempSymbol(), 275 *LineTableEnd = Asm->MMI->getContext().createTempSymbol(); 276 EmitLabelDiff(*Asm->OutStreamer, LineTableBegin, LineTableEnd); 277 Asm->OutStreamer->EmitLabel(LineTableBegin); 278 279 // Identify the function this subsection is for. 280 Asm->OutStreamer->EmitCOFFSecRel32(Fn); 281 Asm->OutStreamer->EmitCOFFSectionIndex(Fn); 282 // Insert flags after a 16-bit section index. 283 Asm->EmitInt16(COFF::DEBUG_LINE_TABLES_HAVE_COLUMN_RECORDS); 284 285 // Length of the function's code, in bytes. 286 EmitLabelDiff(*Asm->OutStreamer, Fn, FI.End); 287 288 // PC-to-linenumber lookup table: 289 MCSymbol *FileSegmentEnd = nullptr; 290 291 // The start of the last segment: 292 size_t LastSegmentStart = 0; 293 294 auto FinishPreviousChunk = [&] { 295 if (!FileSegmentEnd) 296 return; 297 for (size_t ColSegI = LastSegmentStart, 298 ColSegEnd = ColSegI + FilenameSegmentLengths[LastSegmentStart]; 299 ColSegI != ColSegEnd; ++ColSegI) { 300 unsigned ColumnNumber = InstrInfo[FI.Instrs[ColSegI]].ColumnNumber; 301 assert(ColumnNumber <= COFF::CVL_MaxColumnNumber); 302 Asm->EmitInt16(ColumnNumber); // Start column 303 Asm->EmitInt16(0); // End column 304 } 305 Asm->OutStreamer->EmitLabel(FileSegmentEnd); 306 }; 307 308 for (size_t J = 0, F = FI.Instrs.size(); J != F; ++J) { 309 MCSymbol *Instr = FI.Instrs[J]; 310 assert(InstrInfo.count(Instr)); 311 312 if (FilenameSegmentLengths.count(J)) { 313 // We came to a beginning of a new filename segment. 314 FinishPreviousChunk(); 315 StringRef CurFilename = InstrInfo[FI.Instrs[J]].Filename; 316 assert(FileNameRegistry.Infos.count(CurFilename)); 317 size_t IndexInStringTable = 318 FileNameRegistry.Infos[CurFilename].FilenameID; 319 // Each segment starts with the offset of the filename 320 // in the string table. 321 Asm->OutStreamer->AddComment( 322 "Segment for file '" + Twine(CurFilename) + "' begins"); 323 MCSymbol *FileSegmentBegin = Asm->MMI->getContext().createTempSymbol(); 324 Asm->OutStreamer->EmitLabel(FileSegmentBegin); 325 Asm->EmitInt32(8 * IndexInStringTable); 326 327 // Number of PC records in the lookup table. 328 size_t SegmentLength = FilenameSegmentLengths[J]; 329 Asm->EmitInt32(SegmentLength); 330 331 // Full size of the segment for this filename, including the prev two 332 // records. 333 FileSegmentEnd = Asm->MMI->getContext().createTempSymbol(); 334 EmitLabelDiff(*Asm->OutStreamer, FileSegmentBegin, FileSegmentEnd); 335 LastSegmentStart = J; 336 } 337 338 // The first PC with the given linenumber and the linenumber itself. 339 EmitLabelDiff(*Asm->OutStreamer, Fn, Instr); 340 uint32_t LineNumber = InstrInfo[Instr].LineNumber; 341 assert(LineNumber <= COFF::CVL_MaxLineNumber); 342 uint32_t LineData = LineNumber | COFF::CVL_IsStatement; 343 Asm->EmitInt32(LineData); 344 } 345 346 FinishPreviousChunk(); 347 Asm->OutStreamer->EmitLabel(LineTableEnd); 348 } 349 350 void CodeViewDebug::beginFunction(const MachineFunction *MF) { 351 assert(!CurFn && "Can't process two functions at once!"); 352 353 if (!Asm || !Asm->MMI->hasDebugInfo()) 354 return; 355 356 const Function *GV = MF->getFunction(); 357 assert(FnDebugInfo.count(GV) == false); 358 VisitedFunctions.push_back(GV); 359 CurFn = &FnDebugInfo[GV]; 360 361 // Find the end of the function prolog. 362 // FIXME: is there a simpler a way to do this? Can we just search 363 // for the first instruction of the function, not the last of the prolog? 364 DebugLoc PrologEndLoc; 365 bool EmptyPrologue = true; 366 for (const auto &MBB : *MF) { 367 if (PrologEndLoc) 368 break; 369 for (const auto &MI : MBB) { 370 if (MI.isDebugValue()) 371 continue; 372 373 // First known non-DBG_VALUE and non-frame setup location marks 374 // the beginning of the function body. 375 // FIXME: do we need the first subcondition? 376 if (!MI.getFlag(MachineInstr::FrameSetup) && MI.getDebugLoc()) { 377 PrologEndLoc = MI.getDebugLoc(); 378 break; 379 } 380 EmptyPrologue = false; 381 } 382 } 383 // Record beginning of function if we have a non-empty prologue. 384 if (PrologEndLoc && !EmptyPrologue) { 385 DebugLoc FnStartDL = PrologEndLoc.getFnDebugLoc(); 386 maybeRecordLocation(FnStartDL, MF); 387 } 388 } 389 390 void CodeViewDebug::endFunction(const MachineFunction *MF) { 391 if (!Asm || !CurFn) // We haven't created any debug info for this function. 392 return; 393 394 const Function *GV = MF->getFunction(); 395 assert(FnDebugInfo.count(GV)); 396 assert(CurFn == &FnDebugInfo[GV]); 397 398 if (CurFn->Instrs.empty()) { 399 FnDebugInfo.erase(GV); 400 VisitedFunctions.pop_back(); 401 } else { 402 CurFn->End = Asm->getFunctionEnd(); 403 } 404 CurFn = nullptr; 405 } 406 407 void CodeViewDebug::beginInstruction(const MachineInstr *MI) { 408 // Ignore DBG_VALUE locations and function prologue. 409 if (!Asm || MI->isDebugValue() || MI->getFlag(MachineInstr::FrameSetup)) 410 return; 411 DebugLoc DL = MI->getDebugLoc(); 412 if (DL == PrevInstLoc || !DL) 413 return; 414 maybeRecordLocation(DL, Asm->MF); 415 } 416 } 417