1 //===- Module.h - Module description ----------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //  This file defines the Module class, which describes a module that has
11 //  been loaded from an AST file.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_CLANG_SERIALIZATION_MODULE_H
16 #define LLVM_CLANG_SERIALIZATION_MODULE_H
17 
18 #include "clang/Basic/Module.h"
19 #include "clang/Basic/SourceLocation.h"
20 #include "clang/Serialization/ASTBitCodes.h"
21 #include "clang/Serialization/ContinuousRangeMap.h"
22 #include "clang/Serialization/ModuleFileExtension.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/PointerIntPair.h"
25 #include "llvm/ADT/SetVector.h"
26 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/ADT/StringRef.h"
28 #include "llvm/Bitcode/BitstreamReader.h"
29 #include "llvm/Support/Endian.h"
30 #include <cassert>
31 #include <cstdint>
32 #include <memory>
33 #include <string>
34 #include <vector>
35 
36 namespace clang {
37 
38 class FileEntry;
39 
40 namespace serialization {
41 
42 /// Specifies the kind of module that has been loaded.
43 enum ModuleKind {
44   /// File is an implicitly-loaded module.
45   MK_ImplicitModule,
46 
47   /// File is an explicitly-loaded module.
48   MK_ExplicitModule,
49 
50   /// File is a PCH file treated as such.
51   MK_PCH,
52 
53   /// File is a PCH file treated as the preamble.
54   MK_Preamble,
55 
56   /// File is a PCH file treated as the actual main file.
57   MK_MainFile,
58 
59   /// File is from a prebuilt module path.
60   MK_PrebuiltModule
61 };
62 
63 /// The input file that has been loaded from this AST file, along with
64 /// bools indicating whether this was an overridden buffer or if it was
65 /// out-of-date or not-found.
66 class InputFile {
67   enum {
68     Overridden = 1,
69     OutOfDate = 2,
70     NotFound = 3
71   };
72   llvm::PointerIntPair<const FileEntry *, 2, unsigned> Val;
73 
74 public:
75   InputFile() = default;
76 
77   InputFile(const FileEntry *File,
78             bool isOverridden = false, bool isOutOfDate = false) {
79     assert(!(isOverridden && isOutOfDate) &&
80            "an overridden cannot be out-of-date");
81     unsigned intVal = 0;
82     if (isOverridden)
83       intVal = Overridden;
84     else if (isOutOfDate)
85       intVal = OutOfDate;
86     Val.setPointerAndInt(File, intVal);
87   }
88 
getNotFound()89   static InputFile getNotFound() {
90     InputFile File;
91     File.Val.setInt(NotFound);
92     return File;
93   }
94 
getFile()95   const FileEntry *getFile() const { return Val.getPointer(); }
isOverridden()96   bool isOverridden() const { return Val.getInt() == Overridden; }
isOutOfDate()97   bool isOutOfDate() const { return Val.getInt() == OutOfDate; }
isNotFound()98   bool isNotFound() const { return Val.getInt() == NotFound; }
99 };
100 
101 /// Information about a module that has been loaded by the ASTReader.
102 ///
103 /// Each instance of the Module class corresponds to a single AST file, which
104 /// may be a precompiled header, precompiled preamble, a module, or an AST file
105 /// of some sort loaded as the main file, all of which are specific formulations
106 /// of the general notion of a "module". A module may depend on any number of
107 /// other modules.
108 class ModuleFile {
109 public:
ModuleFile(ModuleKind Kind,unsigned Generation)110   ModuleFile(ModuleKind Kind, unsigned Generation)
111       : Kind(Kind), Generation(Generation) {}
112   ~ModuleFile();
113 
114   // === General information ===
115 
116   /// The index of this module in the list of modules.
117   unsigned Index = 0;
118 
119   /// The type of this module.
120   ModuleKind Kind;
121 
122   /// The file name of the module file.
123   std::string FileName;
124 
125   /// The name of the module.
126   std::string ModuleName;
127 
128   /// The base directory of the module.
129   std::string BaseDirectory;
130 
getTimestampFilename()131   std::string getTimestampFilename() const {
132     return FileName + ".timestamp";
133   }
134 
135   /// The original source file name that was used to build the
136   /// primary AST file, which may have been modified for
137   /// relocatable-pch support.
138   std::string OriginalSourceFileName;
139 
140   /// The actual original source file name that was used to
141   /// build this AST file.
142   std::string ActualOriginalSourceFileName;
143 
144   /// The file ID for the original source file that was used to
145   /// build this AST file.
146   FileID OriginalSourceFileID;
147 
148   /// The directory that the PCH was originally created in. Used to
149   /// allow resolving headers even after headers+PCH was moved to a new path.
150   std::string OriginalDir;
151 
152   std::string ModuleMapPath;
153 
154   /// Whether this precompiled header is a relocatable PCH file.
155   bool RelocatablePCH = false;
156 
157   /// Whether timestamps are included in this module file.
158   bool HasTimestamps = false;
159 
160   /// Whether the PCH has a corresponding object file.
161   bool PCHHasObjectFile = false;
162 
163   /// The file entry for the module file.
164   const FileEntry *File = nullptr;
165 
166   /// The signature of the module file, which may be used instead of the size
167   /// and modification time to identify this particular file.
168   ASTFileSignature Signature;
169 
170   /// Whether this module has been directly imported by the
171   /// user.
172   bool DirectlyImported = false;
173 
174   /// The generation of which this module file is a part.
175   unsigned Generation;
176 
177   /// The memory buffer that stores the data associated with
178   /// this AST file, owned by the PCMCache in the ModuleManager.
179   llvm::MemoryBuffer *Buffer;
180 
181   /// The size of this file, in bits.
182   uint64_t SizeInBits = 0;
183 
184   /// The global bit offset (or base) of this module
185   uint64_t GlobalBitOffset = 0;
186 
187   /// The serialized bitstream data for this file.
188   StringRef Data;
189 
190   /// The main bitstream cursor for the main block.
191   llvm::BitstreamCursor Stream;
192 
193   /// The source location where the module was explicitly or implicitly
194   /// imported in the local translation unit.
195   ///
196   /// If module A depends on and imports module B, both modules will have the
197   /// same DirectImportLoc, but different ImportLoc (B's ImportLoc will be a
198   /// source location inside module A).
199   ///
200   /// WARNING: This is largely useless. It doesn't tell you when a module was
201   /// made visible, just when the first submodule of that module was imported.
202   SourceLocation DirectImportLoc;
203 
204   /// The source location where this module was first imported.
205   SourceLocation ImportLoc;
206 
207   /// The first source location in this module.
208   SourceLocation FirstLoc;
209 
210   /// The list of extension readers that are attached to this module
211   /// file.
212   std::vector<std::unique_ptr<ModuleFileExtensionReader>> ExtensionReaders;
213 
214   /// The module offset map data for this file. If non-empty, the various
215   /// ContinuousRangeMaps described below have not yet been populated.
216   StringRef ModuleOffsetMap;
217 
218   // === Input Files ===
219 
220   /// The cursor to the start of the input-files block.
221   llvm::BitstreamCursor InputFilesCursor;
222 
223   /// Offsets for all of the input file entries in the AST file.
224   const llvm::support::unaligned_uint64_t *InputFileOffsets = nullptr;
225 
226   /// The input files that have been loaded from this AST file.
227   std::vector<InputFile> InputFilesLoaded;
228 
229   // All user input files reside at the index range [0, NumUserInputFiles), and
230   // system input files reside at [NumUserInputFiles, InputFilesLoaded.size()).
231   unsigned NumUserInputFiles = 0;
232 
233   /// If non-zero, specifies the time when we last validated input
234   /// files.  Zero means we never validated them.
235   ///
236   /// The time is specified in seconds since the start of the Epoch.
237   uint64_t InputFilesValidationTimestamp = 0;
238 
239   // === Source Locations ===
240 
241   /// Cursor used to read source location entries.
242   llvm::BitstreamCursor SLocEntryCursor;
243 
244   /// The number of source location entries in this AST file.
245   unsigned LocalNumSLocEntries = 0;
246 
247   /// The base ID in the source manager's view of this module.
248   int SLocEntryBaseID = 0;
249 
250   /// The base offset in the source manager's view of this module.
251   unsigned SLocEntryBaseOffset = 0;
252 
253   /// Offsets for all of the source location entries in the
254   /// AST file.
255   const uint32_t *SLocEntryOffsets = nullptr;
256 
257   /// SLocEntries that we're going to preload.
258   SmallVector<uint64_t, 4> PreloadSLocEntries;
259 
260   /// Remapping table for source locations in this module.
261   ContinuousRangeMap<uint32_t, int, 2> SLocRemap;
262 
263   // === Identifiers ===
264 
265   /// The number of identifiers in this AST file.
266   unsigned LocalNumIdentifiers = 0;
267 
268   /// Offsets into the identifier table data.
269   ///
270   /// This array is indexed by the identifier ID (-1), and provides
271   /// the offset into IdentifierTableData where the string data is
272   /// stored.
273   const uint32_t *IdentifierOffsets = nullptr;
274 
275   /// Base identifier ID for identifiers local to this module.
276   serialization::IdentID BaseIdentifierID = 0;
277 
278   /// Remapping table for identifier IDs in this module.
279   ContinuousRangeMap<uint32_t, int, 2> IdentifierRemap;
280 
281   /// Actual data for the on-disk hash table of identifiers.
282   ///
283   /// This pointer points into a memory buffer, where the on-disk hash
284   /// table for identifiers actually lives.
285   const char *IdentifierTableData = nullptr;
286 
287   /// A pointer to an on-disk hash table of opaque type
288   /// IdentifierHashTable.
289   void *IdentifierLookupTable = nullptr;
290 
291   /// Offsets of identifiers that we're going to preload within
292   /// IdentifierTableData.
293   std::vector<unsigned> PreloadIdentifierOffsets;
294 
295   // === Macros ===
296 
297   /// The cursor to the start of the preprocessor block, which stores
298   /// all of the macro definitions.
299   llvm::BitstreamCursor MacroCursor;
300 
301   /// The number of macros in this AST file.
302   unsigned LocalNumMacros = 0;
303 
304   /// Offsets of macros in the preprocessor block.
305   ///
306   /// This array is indexed by the macro ID (-1), and provides
307   /// the offset into the preprocessor block where macro definitions are
308   /// stored.
309   const uint32_t *MacroOffsets = nullptr;
310 
311   /// Base macro ID for macros local to this module.
312   serialization::MacroID BaseMacroID = 0;
313 
314   /// Remapping table for macro IDs in this module.
315   ContinuousRangeMap<uint32_t, int, 2> MacroRemap;
316 
317   /// The offset of the start of the set of defined macros.
318   uint64_t MacroStartOffset = 0;
319 
320   // === Detailed PreprocessingRecord ===
321 
322   /// The cursor to the start of the (optional) detailed preprocessing
323   /// record block.
324   llvm::BitstreamCursor PreprocessorDetailCursor;
325 
326   /// The offset of the start of the preprocessor detail cursor.
327   uint64_t PreprocessorDetailStartOffset = 0;
328 
329   /// Base preprocessed entity ID for preprocessed entities local to
330   /// this module.
331   serialization::PreprocessedEntityID BasePreprocessedEntityID = 0;
332 
333   /// Remapping table for preprocessed entity IDs in this module.
334   ContinuousRangeMap<uint32_t, int, 2> PreprocessedEntityRemap;
335 
336   const PPEntityOffset *PreprocessedEntityOffsets = nullptr;
337   unsigned NumPreprocessedEntities = 0;
338 
339   /// Base ID for preprocessed skipped ranges local to this module.
340   unsigned BasePreprocessedSkippedRangeID = 0;
341 
342   const PPSkippedRange *PreprocessedSkippedRangeOffsets = nullptr;
343   unsigned NumPreprocessedSkippedRanges = 0;
344 
345   // === Header search information ===
346 
347   /// The number of local HeaderFileInfo structures.
348   unsigned LocalNumHeaderFileInfos = 0;
349 
350   /// Actual data for the on-disk hash table of header file
351   /// information.
352   ///
353   /// This pointer points into a memory buffer, where the on-disk hash
354   /// table for header file information actually lives.
355   const char *HeaderFileInfoTableData = nullptr;
356 
357   /// The on-disk hash table that contains information about each of
358   /// the header files.
359   void *HeaderFileInfoTable = nullptr;
360 
361   // === Submodule information ===
362 
363   /// The number of submodules in this module.
364   unsigned LocalNumSubmodules = 0;
365 
366   /// Base submodule ID for submodules local to this module.
367   serialization::SubmoduleID BaseSubmoduleID = 0;
368 
369   /// Remapping table for submodule IDs in this module.
370   ContinuousRangeMap<uint32_t, int, 2> SubmoduleRemap;
371 
372   // === Selectors ===
373 
374   /// The number of selectors new to this file.
375   ///
376   /// This is the number of entries in SelectorOffsets.
377   unsigned LocalNumSelectors = 0;
378 
379   /// Offsets into the selector lookup table's data array
380   /// where each selector resides.
381   const uint32_t *SelectorOffsets = nullptr;
382 
383   /// Base selector ID for selectors local to this module.
384   serialization::SelectorID BaseSelectorID = 0;
385 
386   /// Remapping table for selector IDs in this module.
387   ContinuousRangeMap<uint32_t, int, 2> SelectorRemap;
388 
389   /// A pointer to the character data that comprises the selector table
390   ///
391   /// The SelectorOffsets table refers into this memory.
392   const unsigned char *SelectorLookupTableData = nullptr;
393 
394   /// A pointer to an on-disk hash table of opaque type
395   /// ASTSelectorLookupTable.
396   ///
397   /// This hash table provides the IDs of all selectors, and the associated
398   /// instance and factory methods.
399   void *SelectorLookupTable = nullptr;
400 
401   // === Declarations ===
402 
403   /// DeclsCursor - This is a cursor to the start of the DECLS_BLOCK block. It
404   /// has read all the abbreviations at the start of the block and is ready to
405   /// jump around with these in context.
406   llvm::BitstreamCursor DeclsCursor;
407 
408   /// The number of declarations in this AST file.
409   unsigned LocalNumDecls = 0;
410 
411   /// Offset of each declaration within the bitstream, indexed
412   /// by the declaration ID (-1).
413   const DeclOffset *DeclOffsets = nullptr;
414 
415   /// Base declaration ID for declarations local to this module.
416   serialization::DeclID BaseDeclID = 0;
417 
418   /// Remapping table for declaration IDs in this module.
419   ContinuousRangeMap<uint32_t, int, 2> DeclRemap;
420 
421   /// Mapping from the module files that this module file depends on
422   /// to the base declaration ID for that module as it is understood within this
423   /// module.
424   ///
425   /// This is effectively a reverse global-to-local mapping for declaration
426   /// IDs, so that we can interpret a true global ID (for this translation unit)
427   /// as a local ID (for this module file).
428   llvm::DenseMap<ModuleFile *, serialization::DeclID> GlobalToLocalDeclIDs;
429 
430   /// Array of file-level DeclIDs sorted by file.
431   const serialization::DeclID *FileSortedDecls = nullptr;
432   unsigned NumFileSortedDecls = 0;
433 
434   /// Array of category list location information within this
435   /// module file, sorted by the definition ID.
436   const serialization::ObjCCategoriesInfo *ObjCCategoriesMap = nullptr;
437 
438   /// The number of redeclaration info entries in ObjCCategoriesMap.
439   unsigned LocalNumObjCCategoriesInMap = 0;
440 
441   /// The Objective-C category lists for categories known to this
442   /// module.
443   SmallVector<uint64_t, 1> ObjCCategories;
444 
445   // === Types ===
446 
447   /// The number of types in this AST file.
448   unsigned LocalNumTypes = 0;
449 
450   /// Offset of each type within the bitstream, indexed by the
451   /// type ID, or the representation of a Type*.
452   const uint32_t *TypeOffsets = nullptr;
453 
454   /// Base type ID for types local to this module as represented in
455   /// the global type ID space.
456   serialization::TypeID BaseTypeIndex = 0;
457 
458   /// Remapping table for type IDs in this module.
459   ContinuousRangeMap<uint32_t, int, 2> TypeRemap;
460 
461   // === Miscellaneous ===
462 
463   /// Diagnostic IDs and their mappings that the user changed.
464   SmallVector<uint64_t, 8> PragmaDiagMappings;
465 
466   /// List of modules which depend on this module
467   llvm::SetVector<ModuleFile *> ImportedBy;
468 
469   /// List of modules which this module depends on
470   llvm::SetVector<ModuleFile *> Imports;
471 
472   /// Determine whether this module was directly imported at
473   /// any point during translation.
isDirectlyImported()474   bool isDirectlyImported() const { return DirectlyImported; }
475 
476   /// Is this a module file for a module (rather than a PCH or similar).
isModule()477   bool isModule() const {
478     return Kind == MK_ImplicitModule || Kind == MK_ExplicitModule ||
479            Kind == MK_PrebuiltModule;
480   }
481 
482   /// Dump debugging output for this module.
483   void dump();
484 };
485 
486 } // namespace serialization
487 
488 } // namespace clang
489 
490 #endif // LLVM_CLANG_SERIALIZATION_MODULE_H
491