1 //===-- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp --*- C++ -*--===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains support for writing Microsoft CodeView debug info. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CodeViewDebug.h" 15 #include "llvm/DebugInfo/CodeView/CodeView.h" 16 #include "llvm/DebugInfo/CodeView/SymbolRecord.h" 17 #include "llvm/MC/MCExpr.h" 18 #include "llvm/MC/MCSymbol.h" 19 #include "llvm/Support/COFF.h" 20 21 using namespace llvm::codeview; 22 23 namespace llvm { 24 25 StringRef CodeViewDebug::getFullFilepath(const DIFile *File) { 26 std::string &Filepath = FileToFilepathMap[File]; 27 if (!Filepath.empty()) 28 return Filepath; 29 30 StringRef Dir = File->getDirectory(), Filename = File->getFilename(); 31 32 // Clang emits directory and relative filename info into the IR, but CodeView 33 // operates on full paths. We could change Clang to emit full paths too, but 34 // that would increase the IR size and probably not needed for other users. 35 // For now, just concatenate and canonicalize the path here. 36 if (Filename.find(':') == 1) 37 Filepath = Filename; 38 else 39 Filepath = (Dir + "\\" + Filename).str(); 40 41 // Canonicalize the path. We have to do it textually because we may no longer 42 // have access the file in the filesystem. 43 // First, replace all slashes with backslashes. 44 std::replace(Filepath.begin(), Filepath.end(), '/', '\\'); 45 46 // Remove all "\.\" with "\". 47 size_t Cursor = 0; 48 while ((Cursor = Filepath.find("\\.\\", Cursor)) != std::string::npos) 49 Filepath.erase(Cursor, 2); 50 51 // Replace all "\XXX\..\" with "\". Don't try too hard though as the original 52 // path should be well-formatted, e.g. start with a drive letter, etc. 53 Cursor = 0; 54 while ((Cursor = Filepath.find("\\..\\", Cursor)) != std::string::npos) { 55 // Something's wrong if the path starts with "\..\", abort. 56 if (Cursor == 0) 57 break; 58 59 size_t PrevSlash = Filepath.rfind('\\', Cursor - 1); 60 if (PrevSlash == std::string::npos) 61 // Something's wrong, abort. 62 break; 63 64 Filepath.erase(PrevSlash, Cursor + 3 - PrevSlash); 65 // The next ".." might be following the one we've just erased. 66 Cursor = PrevSlash; 67 } 68 69 // Remove all duplicate backslashes. 70 Cursor = 0; 71 while ((Cursor = Filepath.find("\\\\", Cursor)) != std::string::npos) 72 Filepath.erase(Cursor, 1); 73 74 return Filepath; 75 } 76 77 void CodeViewDebug::maybeRecordLocation(DebugLoc DL, 78 const MachineFunction *MF) { 79 // Skip this instruction if it has the same location as the previous one. 80 if (DL == CurFn->LastLoc) 81 return; 82 83 const DIScope *Scope = DL.get()->getScope(); 84 if (!Scope) 85 return; 86 87 // Skip this line if it is longer than the maximum we can record. 88 if (DL.getLine() > COFF::CVL_MaxLineNumber) 89 return; 90 91 CurFn->LastLoc = DL; 92 93 MCSymbol *MCL = Asm->MMI->getContext().createTempSymbol(); 94 Asm->OutStreamer->EmitLabel(MCL); 95 CurFn->Instrs.push_back(MCL); 96 LabelsAndLocs[MCL] = DL; 97 } 98 99 CodeViewDebug::CodeViewDebug(AsmPrinter *AP) 100 : Asm(nullptr), CurFn(nullptr) { 101 MachineModuleInfo *MMI = AP->MMI; 102 103 // If module doesn't have named metadata anchors or COFF debug section 104 // is not available, skip any debug info related stuff. 105 if (!MMI->getModule()->getNamedMetadata("llvm.dbg.cu") || 106 !AP->getObjFileLowering().getCOFFDebugSymbolsSection()) 107 return; 108 109 // Tell MMI that we have debug info. 110 MMI->setDebugInfoAvailability(true); 111 Asm = AP; 112 } 113 114 void CodeViewDebug::endModule() { 115 if (FnDebugInfo.empty()) 116 return; 117 118 // FIXME: For functions that are comdat, we should emit separate .debug$S 119 // sections that are comdat associative with the main function instead of 120 // having one big .debug$S section. 121 assert(Asm != nullptr); 122 Asm->OutStreamer->SwitchSection( 123 Asm->getObjFileLowering().getCOFFDebugSymbolsSection()); 124 Asm->EmitInt32(COFF::DEBUG_SECTION_MAGIC); 125 126 // The COFF .debug$S section consists of several subsections, each starting 127 // with a 4-byte control code (e.g. 0xF1, 0xF2, etc) and then a 4-byte length 128 // of the payload followed by the payload itself. The subsections are 4-byte 129 // aligned. 130 131 // Emit per-function debug information. This code is extracted into a 132 // separate function for readability. 133 for (size_t I = 0, E = VisitedFunctions.size(); I != E; ++I) 134 emitDebugInfoForFunction(VisitedFunctions[I]); 135 136 // This subsection holds a file index to offset in string table table. 137 Asm->OutStreamer->AddComment("File index to string table offset subsection"); 138 Asm->EmitInt32(unsigned(ModuleSubstreamKind::FileChecksums)); 139 size_t NumFilenames = FileNameRegistry.Infos.size(); 140 Asm->EmitInt32(8 * NumFilenames); 141 for (size_t I = 0, E = FileNameRegistry.Filenames.size(); I != E; ++I) { 142 StringRef Filename = FileNameRegistry.Filenames[I]; 143 // For each unique filename, just write its offset in the string table. 144 Asm->EmitInt32(FileNameRegistry.Infos[Filename].StartOffset); 145 // The function name offset is not followed by any additional data. 146 Asm->EmitInt32(0); 147 } 148 149 // This subsection holds the string table. 150 Asm->OutStreamer->AddComment("String table"); 151 Asm->EmitInt32(unsigned(ModuleSubstreamKind::StringTable)); 152 Asm->EmitInt32(FileNameRegistry.LastOffset); 153 // The payload starts with a null character. 154 Asm->EmitInt8(0); 155 156 for (size_t I = 0, E = FileNameRegistry.Filenames.size(); I != E; ++I) { 157 // Just emit unique filenames one by one, separated by a null character. 158 Asm->OutStreamer->EmitBytes(FileNameRegistry.Filenames[I]); 159 Asm->EmitInt8(0); 160 } 161 162 // No more subsections. Fill with zeros to align the end of the section by 4. 163 Asm->OutStreamer->EmitFill((-FileNameRegistry.LastOffset) % 4, 0); 164 165 clear(); 166 } 167 168 static void EmitLabelDiff(MCStreamer &Streamer, 169 const MCSymbol *From, const MCSymbol *To, 170 unsigned int Size = 4) { 171 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; 172 MCContext &Context = Streamer.getContext(); 173 const MCExpr *FromRef = MCSymbolRefExpr::create(From, Variant, Context), 174 *ToRef = MCSymbolRefExpr::create(To, Variant, Context); 175 const MCExpr *AddrDelta = 176 MCBinaryExpr::create(MCBinaryExpr::Sub, ToRef, FromRef, Context); 177 Streamer.EmitValue(AddrDelta, Size); 178 } 179 180 static const DIFile *getFileFromLoc(DebugLoc DL) { 181 return DL.get()->getScope()->getFile(); 182 } 183 184 void CodeViewDebug::emitDebugInfoForFunction(const Function *GV) { 185 // For each function there is a separate subsection 186 // which holds the PC to file:line table. 187 const MCSymbol *Fn = Asm->getSymbol(GV); 188 assert(Fn); 189 190 const FunctionInfo &FI = FnDebugInfo[GV]; 191 if (FI.Instrs.empty()) 192 return; 193 assert(FI.End && "Don't know where the function ends?"); 194 195 StringRef FuncName; 196 if (auto *SP = getDISubprogram(GV)) 197 FuncName = SP->getDisplayName(); 198 199 // If our DISubprogram name is empty, use the mangled name. 200 if (FuncName.empty()) 201 FuncName = GlobalValue::getRealLinkageName(GV->getName()); 202 203 // Emit a symbol subsection, required by VS2012+ to find function boundaries. 204 MCSymbol *SymbolsBegin = Asm->MMI->getContext().createTempSymbol(), 205 *SymbolsEnd = Asm->MMI->getContext().createTempSymbol(); 206 Asm->OutStreamer->AddComment("Symbol subsection for " + Twine(FuncName)); 207 Asm->EmitInt32(unsigned(ModuleSubstreamKind::Symbols)); 208 EmitLabelDiff(*Asm->OutStreamer, SymbolsBegin, SymbolsEnd); 209 Asm->OutStreamer->EmitLabel(SymbolsBegin); 210 { 211 MCSymbol *ProcSegmentBegin = Asm->MMI->getContext().createTempSymbol(), 212 *ProcSegmentEnd = Asm->MMI->getContext().createTempSymbol(); 213 EmitLabelDiff(*Asm->OutStreamer, ProcSegmentBegin, ProcSegmentEnd, 2); 214 Asm->OutStreamer->EmitLabel(ProcSegmentBegin); 215 216 Asm->EmitInt16(unsigned(SymbolRecordKind::S_GPROC32_ID)); 217 218 // Some bytes of this segment don't seem to be required for basic debugging, 219 // so just fill them with zeroes. 220 Asm->OutStreamer->EmitFill(12, 0); 221 // This is the important bit that tells the debugger where the function 222 // code is located and what's its size: 223 EmitLabelDiff(*Asm->OutStreamer, Fn, FI.End); 224 Asm->OutStreamer->EmitFill(12, 0); 225 Asm->OutStreamer->EmitCOFFSecRel32(Fn); 226 Asm->OutStreamer->EmitCOFFSectionIndex(Fn); 227 Asm->EmitInt8(0); 228 // Emit the function display name as a null-terminated string. 229 Asm->OutStreamer->EmitBytes(FuncName); 230 Asm->EmitInt8(0); 231 Asm->OutStreamer->EmitLabel(ProcSegmentEnd); 232 233 // We're done with this function. 234 Asm->EmitInt16(0x0002); 235 Asm->EmitInt16(unsigned(SymbolRecordKind::S_PROC_ID_END)); 236 } 237 Asm->OutStreamer->EmitLabel(SymbolsEnd); 238 // Every subsection must be aligned to a 4-byte boundary. 239 Asm->OutStreamer->EmitFill((-FuncName.size()) % 4, 0); 240 241 // PCs/Instructions are grouped into segments sharing the same filename. 242 // Pre-calculate the lengths (in instructions) of these segments and store 243 // them in a map for convenience. Each index in the map is the sequential 244 // number of the respective instruction that starts a new segment. 245 DenseMap<size_t, size_t> FilenameSegmentLengths; 246 size_t LastSegmentEnd = 0; 247 const DIFile *PrevFile = getFileFromLoc(LabelsAndLocs[FI.Instrs[0]]); 248 for (size_t J = 1, F = FI.Instrs.size(); J != F; ++J) { 249 const DIFile *CurFile = getFileFromLoc(LabelsAndLocs[FI.Instrs[J]]); 250 if (PrevFile == CurFile) 251 continue; 252 FilenameSegmentLengths[LastSegmentEnd] = J - LastSegmentEnd; 253 LastSegmentEnd = J; 254 PrevFile = CurFile; 255 } 256 FilenameSegmentLengths[LastSegmentEnd] = FI.Instrs.size() - LastSegmentEnd; 257 258 // Emit a line table subsection, required to do PC-to-file:line lookup. 259 Asm->OutStreamer->AddComment("Line table subsection for " + Twine(FuncName)); 260 Asm->EmitInt32(unsigned(ModuleSubstreamKind::Lines)); 261 MCSymbol *LineTableBegin = Asm->MMI->getContext().createTempSymbol(), 262 *LineTableEnd = Asm->MMI->getContext().createTempSymbol(); 263 EmitLabelDiff(*Asm->OutStreamer, LineTableBegin, LineTableEnd); 264 Asm->OutStreamer->EmitLabel(LineTableBegin); 265 266 // Identify the function this subsection is for. 267 Asm->OutStreamer->EmitCOFFSecRel32(Fn); 268 Asm->OutStreamer->EmitCOFFSectionIndex(Fn); 269 // Insert flags after a 16-bit section index. 270 Asm->EmitInt16(COFF::DEBUG_LINE_TABLES_HAVE_COLUMN_RECORDS); 271 272 // Length of the function's code, in bytes. 273 EmitLabelDiff(*Asm->OutStreamer, Fn, FI.End); 274 275 // PC-to-linenumber lookup table: 276 MCSymbol *FileSegmentEnd = nullptr; 277 278 // The start of the last segment: 279 size_t LastSegmentStart = 0; 280 281 auto FinishPreviousChunk = [&] { 282 if (!FileSegmentEnd) 283 return; 284 for (size_t ColSegI = LastSegmentStart, 285 ColSegEnd = ColSegI + FilenameSegmentLengths[LastSegmentStart]; 286 ColSegI != ColSegEnd; ++ColSegI) { 287 unsigned ColumnNumber = LabelsAndLocs[FI.Instrs[ColSegI]].getCol(); 288 // Truncate the column number if it is longer than the maximum we can 289 // record. 290 if (ColumnNumber > COFF::CVL_MaxColumnNumber) 291 ColumnNumber = 0; 292 Asm->EmitInt16(ColumnNumber); // Start column 293 Asm->EmitInt16(0); // End column 294 } 295 Asm->OutStreamer->EmitLabel(FileSegmentEnd); 296 }; 297 298 for (size_t J = 0, F = FI.Instrs.size(); J != F; ++J) { 299 MCSymbol *Instr = FI.Instrs[J]; 300 assert(LabelsAndLocs.count(Instr)); 301 302 if (FilenameSegmentLengths.count(J)) { 303 // We came to a beginning of a new filename segment. 304 FinishPreviousChunk(); 305 const DIFile *File = getFileFromLoc(LabelsAndLocs[FI.Instrs[J]]); 306 StringRef CurFilename = getFullFilepath(File); 307 size_t IndexInFileTable = FileNameRegistry.add(CurFilename); 308 // Each segment starts with the offset of the filename 309 // in the string table. 310 Asm->OutStreamer->AddComment( 311 "Segment for file '" + Twine(CurFilename) + "' begins"); 312 MCSymbol *FileSegmentBegin = Asm->MMI->getContext().createTempSymbol(); 313 Asm->OutStreamer->EmitLabel(FileSegmentBegin); 314 Asm->EmitInt32(8 * IndexInFileTable); 315 316 // Number of PC records in the lookup table. 317 size_t SegmentLength = FilenameSegmentLengths[J]; 318 Asm->EmitInt32(SegmentLength); 319 320 // Full size of the segment for this filename, including the prev two 321 // records. 322 FileSegmentEnd = Asm->MMI->getContext().createTempSymbol(); 323 EmitLabelDiff(*Asm->OutStreamer, FileSegmentBegin, FileSegmentEnd); 324 LastSegmentStart = J; 325 } 326 327 // The first PC with the given linenumber and the linenumber itself. 328 EmitLabelDiff(*Asm->OutStreamer, Fn, Instr); 329 uint32_t LineNumber = LabelsAndLocs[Instr].getLine(); 330 assert(LineNumber <= COFF::CVL_MaxLineNumber); 331 uint32_t LineData = LineNumber | COFF::CVL_IsStatement; 332 Asm->EmitInt32(LineData); 333 } 334 335 FinishPreviousChunk(); 336 Asm->OutStreamer->EmitLabel(LineTableEnd); 337 } 338 339 void CodeViewDebug::beginFunction(const MachineFunction *MF) { 340 assert(!CurFn && "Can't process two functions at once!"); 341 342 if (!Asm || !Asm->MMI->hasDebugInfo()) 343 return; 344 345 const Function *GV = MF->getFunction(); 346 assert(FnDebugInfo.count(GV) == false); 347 VisitedFunctions.push_back(GV); 348 CurFn = &FnDebugInfo[GV]; 349 350 // Find the end of the function prolog. 351 // FIXME: is there a simpler a way to do this? Can we just search 352 // for the first instruction of the function, not the last of the prolog? 353 DebugLoc PrologEndLoc; 354 bool EmptyPrologue = true; 355 for (const auto &MBB : *MF) { 356 if (PrologEndLoc) 357 break; 358 for (const auto &MI : MBB) { 359 if (MI.isDebugValue()) 360 continue; 361 362 // First known non-DBG_VALUE and non-frame setup location marks 363 // the beginning of the function body. 364 // FIXME: do we need the first subcondition? 365 if (!MI.getFlag(MachineInstr::FrameSetup) && MI.getDebugLoc()) { 366 PrologEndLoc = MI.getDebugLoc(); 367 break; 368 } 369 EmptyPrologue = false; 370 } 371 } 372 // Record beginning of function if we have a non-empty prologue. 373 if (PrologEndLoc && !EmptyPrologue) { 374 DebugLoc FnStartDL = PrologEndLoc.getFnDebugLoc(); 375 maybeRecordLocation(FnStartDL, MF); 376 } 377 } 378 379 void CodeViewDebug::endFunction(const MachineFunction *MF) { 380 if (!Asm || !CurFn) // We haven't created any debug info for this function. 381 return; 382 383 const Function *GV = MF->getFunction(); 384 assert(FnDebugInfo.count(GV)); 385 assert(CurFn == &FnDebugInfo[GV]); 386 387 if (CurFn->Instrs.empty()) { 388 FnDebugInfo.erase(GV); 389 VisitedFunctions.pop_back(); 390 } else { 391 CurFn->End = Asm->getFunctionEnd(); 392 } 393 CurFn = nullptr; 394 } 395 396 void CodeViewDebug::beginInstruction(const MachineInstr *MI) { 397 // Ignore DBG_VALUE locations and function prologue. 398 if (!Asm || MI->isDebugValue() || MI->getFlag(MachineInstr::FrameSetup)) 399 return; 400 DebugLoc DL = MI->getDebugLoc(); 401 if (DL == PrevInstLoc || !DL) 402 return; 403 maybeRecordLocation(DL, Asm->MF); 404 } 405 } 406