1 //===-- CompileUnitIndex.cpp ------------------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "CompileUnitIndex.h"
11 
12 #include "PdbIndex.h"
13 #include "PdbUtil.h"
14 
15 #include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
16 #include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
17 #include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
18 #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
19 #include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
20 #include "llvm/DebugInfo/PDB/Native/DbiStream.h"
21 #include "llvm/DebugInfo/PDB/Native/InfoStream.h"
22 #include "llvm/DebugInfo/PDB/Native/ModuleDebugStream.h"
23 #include "llvm/DebugInfo/PDB/Native/NamedStreamMap.h"
24 #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
25 #include "llvm/Support/Path.h"
26 
27 #include "lldb/Utility/LLDBAssert.h"
28 
29 using namespace lldb;
30 using namespace lldb_private;
31 using namespace lldb_private::npdb;
32 using namespace llvm::codeview;
33 using namespace llvm::pdb;
34 
35 static bool IsMainFile(llvm::StringRef main, llvm::StringRef other) {
36   if (main == other)
37     return true;
38 
39   // If the files refer to the local file system, we can just ask the file
40   // system if they're equivalent.  But if the source isn't present on disk
41   // then we still want to try.
42   if (llvm::sys::fs::equivalent(main, other))
43     return true;
44 
45   // FIXME: If we ever want to support PDB debug info for non-Windows systems
46   // the following check will be wrong, but we need a way to store the host
47   // information in the PDB.
48   llvm::SmallString<64> normalized(other);
49   llvm::sys::path::native(normalized, llvm::sys::path::Style::windows);
50   return main.equals_lower(normalized);
51 }
52 
53 static void ParseCompile3(const CVSymbol &sym, CompilandIndexItem &cci) {
54   cci.m_compile_opts.emplace();
55   llvm::cantFail(
56       SymbolDeserializer::deserializeAs<Compile3Sym>(sym, *cci.m_compile_opts));
57 }
58 
59 static void ParseObjname(const CVSymbol &sym, CompilandIndexItem &cci) {
60   cci.m_obj_name.emplace();
61   llvm::cantFail(
62       SymbolDeserializer::deserializeAs<ObjNameSym>(sym, *cci.m_obj_name));
63 }
64 
65 static void ParseBuildInfo(PdbIndex &index, const CVSymbol &sym,
66                            CompilandIndexItem &cci) {
67   BuildInfoSym bis(SymbolRecordKind::BuildInfoSym);
68   llvm::cantFail(SymbolDeserializer::deserializeAs<BuildInfoSym>(sym, bis));
69 
70   // S_BUILDINFO just points to an LF_BUILDINFO in the IPI stream.  Let's do
71   // a little extra work to pull out the LF_BUILDINFO.
72   LazyRandomTypeCollection &types = index.ipi().typeCollection();
73   llvm::Optional<CVType> cvt = types.tryGetType(bis.BuildId);
74 
75   if (!cvt || cvt->kind() != LF_BUILDINFO)
76     return;
77 
78   BuildInfoRecord bir;
79   llvm::cantFail(TypeDeserializer::deserializeAs<BuildInfoRecord>(*cvt, bir));
80   cci.m_build_info.assign(bir.ArgIndices.begin(), bir.ArgIndices.end());
81 }
82 
83 static void ParseExtendedInfo(PdbIndex &index, CompilandIndexItem &item) {
84   const CVSymbolArray &syms = item.m_debug_stream.getSymbolArray();
85 
86   // This is a private function, it shouldn't be called if the information
87   // has already been parsed.
88   lldbassert(!item.m_obj_name);
89   lldbassert(!item.m_compile_opts);
90   lldbassert(item.m_build_info.empty());
91 
92   // We're looking for 3 things.  S_COMPILE3, S_OBJNAME, and S_BUILDINFO.
93   int found = 0;
94   for (const CVSymbol &sym : syms) {
95     switch (sym.kind()) {
96     case S_COMPILE3:
97       ParseCompile3(sym, item);
98       break;
99     case S_OBJNAME:
100       ParseObjname(sym, item);
101       break;
102     case S_BUILDINFO:
103       ParseBuildInfo(index, sym, item);
104       break;
105     default:
106       continue;
107     }
108     if (++found >= 3)
109       break;
110   }
111 }
112 
113 CompilandIndexItem::CompilandIndexItem(
114     PdbSymUid uid, llvm::pdb::ModuleDebugStreamRef debug_stream,
115     llvm::pdb::DbiModuleDescriptor descriptor)
116     : m_uid(uid), m_debug_stream(std::move(debug_stream)),
117       m_module_descriptor(std::move(descriptor)) {}
118 
119 CompilandIndexItem &CompileUnitIndex::GetOrCreateCompiland(uint16_t modi) {
120   PdbSymUid uid = PdbSymUid::makeCompilandId(modi);
121   return GetOrCreateCompiland(uid);
122 }
123 
124 CompilandIndexItem &
125 CompileUnitIndex::GetOrCreateCompiland(PdbSymUid compiland_uid) {
126   auto result = m_comp_units.try_emplace(compiland_uid.toOpaqueId(), nullptr);
127   if (!result.second)
128     return *result.first->second;
129 
130   // Find the module list and load its debug information stream and cache it
131   // since we need to use it for almost all interesting operations.
132   const DbiModuleList &modules = m_index.dbi().modules();
133   uint16_t modi = compiland_uid.asCompiland().modi;
134   llvm::pdb::DbiModuleDescriptor descriptor = modules.getModuleDescriptor(modi);
135   uint16_t stream = descriptor.getModuleStreamIndex();
136   std::unique_ptr<llvm::msf::MappedBlockStream> stream_data =
137       m_index.pdb().createIndexedStream(stream);
138   llvm::pdb::ModuleDebugStreamRef debug_stream(descriptor,
139                                                std::move(stream_data));
140   cantFail(debug_stream.reload());
141 
142   std::unique_ptr<CompilandIndexItem> &cci = result.first->second;
143 
144   cci = llvm::make_unique<CompilandIndexItem>(
145       compiland_uid, std::move(debug_stream), std::move(descriptor));
146   ParseExtendedInfo(m_index, *cci);
147 
148   cci->m_strings.initialize(debug_stream.getSubsectionsArray());
149   PDBStringTable &strings = cantFail(m_index.pdb().getStringTable());
150   cci->m_strings.setStrings(strings.getStringTable());
151 
152   // We want the main source file to always comes first.  Note that we can't
153   // just push_back the main file onto the front because `GetMainSourceFile`
154   // computes it in such a way that it doesn't own the resulting memory.  So we
155   // have to iterate the module file list comparing each one to the main file
156   // name until we find it, and we can cache that one since the memory is backed
157   // by a contiguous chunk inside the mapped PDB.
158   llvm::SmallString<64> main_file = GetMainSourceFile(*cci);
159   llvm::sys::path::native(main_file, llvm::sys::path::Style::windows);
160 
161   uint32_t file_count = modules.getSourceFileCount(modi);
162   cci->m_file_list.reserve(file_count);
163   bool found_main_file = false;
164   for (llvm::StringRef file : modules.source_files(modi)) {
165     if (!found_main_file && IsMainFile(main_file, file)) {
166       cci->m_file_list.insert(cci->m_file_list.begin(), file);
167       found_main_file = true;
168       continue;
169     }
170     cci->m_file_list.push_back(file);
171   }
172 
173   return *cci;
174 }
175 
176 const CompilandIndexItem *CompileUnitIndex::GetCompiland(uint16_t modi) const {
177   return GetCompiland(PdbSymUid::makeCompilandId(modi));
178 }
179 
180 const CompilandIndexItem *
181 CompileUnitIndex::GetCompiland(PdbSymUid compiland_uid) const {
182   auto iter = m_comp_units.find(compiland_uid.toOpaqueId());
183   if (iter == m_comp_units.end())
184     return nullptr;
185   return iter->second.get();
186 }
187 
188 CompilandIndexItem *CompileUnitIndex::GetCompiland(uint16_t modi) {
189   return GetCompiland(PdbSymUid::makeCompilandId(modi));
190 }
191 
192 CompilandIndexItem *CompileUnitIndex::GetCompiland(PdbSymUid compiland_uid) {
193   auto iter = m_comp_units.find(compiland_uid.toOpaqueId());
194   if (iter == m_comp_units.end())
195     return nullptr;
196   return iter->second.get();
197 }
198 
199 llvm::SmallString<64>
200 CompileUnitIndex::GetMainSourceFile(const CompilandIndexItem &item) const {
201   // LF_BUILDINFO contains a list of arg indices which point to LF_STRING_ID
202   // records in the IPI stream.  The order of the arg indices is as follows:
203   // [0] - working directory where compiler was invoked.
204   // [1] - absolute path to compiler binary
205   // [2] - source file name
206   // [3] - path to compiler generated PDB (the /Zi PDB, although this entry gets
207   //       added even when using /Z7)
208   // [4] - full command line invocation.
209   //
210   // We need to form the path [0]\[2] to generate the full path to the main
211   // file.source
212   if (item.m_build_info.size() < 3)
213     return {""};
214 
215   LazyRandomTypeCollection &types = m_index.ipi().typeCollection();
216 
217   StringIdRecord working_dir;
218   StringIdRecord file_name;
219   CVType dir_cvt = types.getType(item.m_build_info[0]);
220   CVType file_cvt = types.getType(item.m_build_info[2]);
221   llvm::cantFail(
222       TypeDeserializer::deserializeAs<StringIdRecord>(dir_cvt, working_dir));
223   llvm::cantFail(
224       TypeDeserializer::deserializeAs<StringIdRecord>(file_cvt, file_name));
225 
226   llvm::SmallString<64> absolute_path = working_dir.String;
227   llvm::sys::path::append(absolute_path, file_name.String);
228   return absolute_path;
229 }
230