1 //===- ModuleFile.h - Module file description -------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines the Module class, which describes a module that has
10 //  been loaded from an AST file.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_CLANG_SERIALIZATION_MODULEFILE_H
15 #define LLVM_CLANG_SERIALIZATION_MODULEFILE_H
16 
17 #include "clang/Basic/FileManager.h"
18 #include "clang/Basic/Module.h"
19 #include "clang/Basic/SourceLocation.h"
20 #include "clang/Serialization/ASTBitCodes.h"
21 #include "clang/Serialization/ContinuousRangeMap.h"
22 #include "clang/Serialization/ModuleFileExtension.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/PointerIntPair.h"
25 #include "llvm/ADT/SetVector.h"
26 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/ADT/StringRef.h"
28 #include "llvm/Bitstream/BitstreamReader.h"
29 #include "llvm/Support/Endian.h"
30 #include <cassert>
31 #include <cstdint>
32 #include <memory>
33 #include <string>
34 #include <vector>
35 
36 namespace clang {
37 
38 namespace serialization {
39 
40 /// Specifies the kind of module that has been loaded.
41 enum ModuleKind {
42   /// File is an implicitly-loaded module.
43   MK_ImplicitModule,
44 
45   /// File is an explicitly-loaded module.
46   MK_ExplicitModule,
47 
48   /// File is a PCH file treated as such.
49   MK_PCH,
50 
51   /// File is a PCH file treated as the preamble.
52   MK_Preamble,
53 
54   /// File is a PCH file treated as the actual main file.
55   MK_MainFile,
56 
57   /// File is from a prebuilt module path.
58   MK_PrebuiltModule
59 };
60 
61 /// The input file that has been loaded from this AST file, along with
62 /// bools indicating whether this was an overridden buffer or if it was
63 /// out-of-date or not-found.
64 class InputFile {
65   enum {
66     Overridden = 1,
67     OutOfDate = 2,
68     NotFound = 3
69   };
70   llvm::PointerIntPair<const FileEntryRef::MapEntry *, 2, unsigned> Val;
71 
72 public:
73   InputFile() = default;
74 
75   InputFile(FileEntryRef File, bool isOverridden = false,
76             bool isOutOfDate = false) {
77     assert(!(isOverridden && isOutOfDate) &&
78            "an overridden cannot be out-of-date");
79     unsigned intVal = 0;
80     if (isOverridden)
81       intVal = Overridden;
82     else if (isOutOfDate)
83       intVal = OutOfDate;
84     Val.setPointerAndInt(&File.getMapEntry(), intVal);
85   }
86 
getNotFound()87   static InputFile getNotFound() {
88     InputFile File;
89     File.Val.setInt(NotFound);
90     return File;
91   }
92 
getFile()93   OptionalFileEntryRefDegradesToFileEntryPtr getFile() const {
94     if (auto *P = Val.getPointer())
95       return FileEntryRef(*P);
96     return None;
97   }
isOverridden()98   bool isOverridden() const { return Val.getInt() == Overridden; }
isOutOfDate()99   bool isOutOfDate() const { return Val.getInt() == OutOfDate; }
isNotFound()100   bool isNotFound() const { return Val.getInt() == NotFound; }
101 };
102 
103 /// Information about a module that has been loaded by the ASTReader.
104 ///
105 /// Each instance of the Module class corresponds to a single AST file, which
106 /// may be a precompiled header, precompiled preamble, a module, or an AST file
107 /// of some sort loaded as the main file, all of which are specific formulations
108 /// of the general notion of a "module". A module may depend on any number of
109 /// other modules.
110 class ModuleFile {
111 public:
ModuleFile(ModuleKind Kind,unsigned Generation)112   ModuleFile(ModuleKind Kind, unsigned Generation)
113       : Kind(Kind), Generation(Generation) {}
114   ~ModuleFile();
115 
116   // === General information ===
117 
118   /// The index of this module in the list of modules.
119   unsigned Index = 0;
120 
121   /// The type of this module.
122   ModuleKind Kind;
123 
124   /// The file name of the module file.
125   std::string FileName;
126 
127   /// The name of the module.
128   std::string ModuleName;
129 
130   /// The base directory of the module.
131   std::string BaseDirectory;
132 
getTimestampFilename()133   std::string getTimestampFilename() const {
134     return FileName + ".timestamp";
135   }
136 
137   /// The original source file name that was used to build the
138   /// primary AST file, which may have been modified for
139   /// relocatable-pch support.
140   std::string OriginalSourceFileName;
141 
142   /// The actual original source file name that was used to
143   /// build this AST file.
144   std::string ActualOriginalSourceFileName;
145 
146   /// The file ID for the original source file that was used to
147   /// build this AST file.
148   FileID OriginalSourceFileID;
149 
150   /// The directory that the PCH was originally created in. Used to
151   /// allow resolving headers even after headers+PCH was moved to a new path.
152   std::string OriginalDir;
153 
154   std::string ModuleMapPath;
155 
156   /// Whether this precompiled header is a relocatable PCH file.
157   bool RelocatablePCH = false;
158 
159   /// Whether timestamps are included in this module file.
160   bool HasTimestamps = false;
161 
162   /// Whether the top-level module has been read from the AST file.
163   bool DidReadTopLevelSubmodule = false;
164 
165   /// The file entry for the module file.
166   OptionalFileEntryRefDegradesToFileEntryPtr File;
167 
168   /// The signature of the module file, which may be used instead of the size
169   /// and modification time to identify this particular file.
170   ASTFileSignature Signature;
171 
172   /// The signature of the AST block of the module file, this can be used to
173   /// unique module files based on AST contents.
174   ASTFileSignature ASTBlockHash;
175 
176   /// Whether this module has been directly imported by the
177   /// user.
178   bool DirectlyImported = false;
179 
180   /// The generation of which this module file is a part.
181   unsigned Generation;
182 
183   /// The memory buffer that stores the data associated with
184   /// this AST file, owned by the InMemoryModuleCache.
185   llvm::MemoryBuffer *Buffer;
186 
187   /// The size of this file, in bits.
188   uint64_t SizeInBits = 0;
189 
190   /// The global bit offset (or base) of this module
191   uint64_t GlobalBitOffset = 0;
192 
193   /// The bit offset of the AST block of this module.
194   uint64_t ASTBlockStartOffset = 0;
195 
196   /// The serialized bitstream data for this file.
197   StringRef Data;
198 
199   /// The main bitstream cursor for the main block.
200   llvm::BitstreamCursor Stream;
201 
202   /// The source location where the module was explicitly or implicitly
203   /// imported in the local translation unit.
204   ///
205   /// If module A depends on and imports module B, both modules will have the
206   /// same DirectImportLoc, but different ImportLoc (B's ImportLoc will be a
207   /// source location inside module A).
208   ///
209   /// WARNING: This is largely useless. It doesn't tell you when a module was
210   /// made visible, just when the first submodule of that module was imported.
211   SourceLocation DirectImportLoc;
212 
213   /// The source location where this module was first imported.
214   SourceLocation ImportLoc;
215 
216   /// The first source location in this module.
217   SourceLocation FirstLoc;
218 
219   /// The list of extension readers that are attached to this module
220   /// file.
221   std::vector<std::unique_ptr<ModuleFileExtensionReader>> ExtensionReaders;
222 
223   /// The module offset map data for this file. If non-empty, the various
224   /// ContinuousRangeMaps described below have not yet been populated.
225   StringRef ModuleOffsetMap;
226 
227   // === Input Files ===
228 
229   /// The cursor to the start of the input-files block.
230   llvm::BitstreamCursor InputFilesCursor;
231 
232   /// Offsets for all of the input file entries in the AST file.
233   const llvm::support::unaligned_uint64_t *InputFileOffsets = nullptr;
234 
235   /// The input files that have been loaded from this AST file.
236   std::vector<InputFile> InputFilesLoaded;
237 
238   // All user input files reside at the index range [0, NumUserInputFiles), and
239   // system input files reside at [NumUserInputFiles, InputFilesLoaded.size()).
240   unsigned NumUserInputFiles = 0;
241 
242   /// If non-zero, specifies the time when we last validated input
243   /// files.  Zero means we never validated them.
244   ///
245   /// The time is specified in seconds since the start of the Epoch.
246   uint64_t InputFilesValidationTimestamp = 0;
247 
248   // === Source Locations ===
249 
250   /// Cursor used to read source location entries.
251   llvm::BitstreamCursor SLocEntryCursor;
252 
253   /// The bit offset to the start of the SOURCE_MANAGER_BLOCK.
254   uint64_t SourceManagerBlockStartOffset = 0;
255 
256   /// The number of source location entries in this AST file.
257   unsigned LocalNumSLocEntries = 0;
258 
259   /// The base ID in the source manager's view of this module.
260   int SLocEntryBaseID = 0;
261 
262   /// The base offset in the source manager's view of this module.
263   SourceLocation::UIntTy SLocEntryBaseOffset = 0;
264 
265   /// Base file offset for the offsets in SLocEntryOffsets. Real file offset
266   /// for the entry is SLocEntryOffsetsBase + SLocEntryOffsets[i].
267   uint64_t SLocEntryOffsetsBase = 0;
268 
269   /// Offsets for all of the source location entries in the
270   /// AST file.
271   const uint32_t *SLocEntryOffsets = nullptr;
272 
273   /// SLocEntries that we're going to preload.
274   SmallVector<uint64_t, 4> PreloadSLocEntries;
275 
276   /// Remapping table for source locations in this module.
277   ContinuousRangeMap<SourceLocation::UIntTy, SourceLocation::IntTy, 2>
278       SLocRemap;
279 
280   // === Identifiers ===
281 
282   /// The number of identifiers in this AST file.
283   unsigned LocalNumIdentifiers = 0;
284 
285   /// Offsets into the identifier table data.
286   ///
287   /// This array is indexed by the identifier ID (-1), and provides
288   /// the offset into IdentifierTableData where the string data is
289   /// stored.
290   const uint32_t *IdentifierOffsets = nullptr;
291 
292   /// Base identifier ID for identifiers local to this module.
293   serialization::IdentID BaseIdentifierID = 0;
294 
295   /// Remapping table for identifier IDs in this module.
296   ContinuousRangeMap<uint32_t, int, 2> IdentifierRemap;
297 
298   /// Actual data for the on-disk hash table of identifiers.
299   ///
300   /// This pointer points into a memory buffer, where the on-disk hash
301   /// table for identifiers actually lives.
302   const unsigned char *IdentifierTableData = nullptr;
303 
304   /// A pointer to an on-disk hash table of opaque type
305   /// IdentifierHashTable.
306   void *IdentifierLookupTable = nullptr;
307 
308   /// Offsets of identifiers that we're going to preload within
309   /// IdentifierTableData.
310   std::vector<unsigned> PreloadIdentifierOffsets;
311 
312   // === Macros ===
313 
314   /// The cursor to the start of the preprocessor block, which stores
315   /// all of the macro definitions.
316   llvm::BitstreamCursor MacroCursor;
317 
318   /// The number of macros in this AST file.
319   unsigned LocalNumMacros = 0;
320 
321   /// Base file offset for the offsets in MacroOffsets. Real file offset for
322   /// the entry is MacroOffsetsBase + MacroOffsets[i].
323   uint64_t MacroOffsetsBase = 0;
324 
325   /// Offsets of macros in the preprocessor block.
326   ///
327   /// This array is indexed by the macro ID (-1), and provides
328   /// the offset into the preprocessor block where macro definitions are
329   /// stored.
330   const uint32_t *MacroOffsets = nullptr;
331 
332   /// Base macro ID for macros local to this module.
333   serialization::MacroID BaseMacroID = 0;
334 
335   /// Remapping table for macro IDs in this module.
336   ContinuousRangeMap<uint32_t, int, 2> MacroRemap;
337 
338   /// The offset of the start of the set of defined macros.
339   uint64_t MacroStartOffset = 0;
340 
341   // === Detailed PreprocessingRecord ===
342 
343   /// The cursor to the start of the (optional) detailed preprocessing
344   /// record block.
345   llvm::BitstreamCursor PreprocessorDetailCursor;
346 
347   /// The offset of the start of the preprocessor detail cursor.
348   uint64_t PreprocessorDetailStartOffset = 0;
349 
350   /// Base preprocessed entity ID for preprocessed entities local to
351   /// this module.
352   serialization::PreprocessedEntityID BasePreprocessedEntityID = 0;
353 
354   /// Remapping table for preprocessed entity IDs in this module.
355   ContinuousRangeMap<uint32_t, int, 2> PreprocessedEntityRemap;
356 
357   const PPEntityOffset *PreprocessedEntityOffsets = nullptr;
358   unsigned NumPreprocessedEntities = 0;
359 
360   /// Base ID for preprocessed skipped ranges local to this module.
361   unsigned BasePreprocessedSkippedRangeID = 0;
362 
363   const PPSkippedRange *PreprocessedSkippedRangeOffsets = nullptr;
364   unsigned NumPreprocessedSkippedRanges = 0;
365 
366   // === Header search information ===
367 
368   /// The number of local HeaderFileInfo structures.
369   unsigned LocalNumHeaderFileInfos = 0;
370 
371   /// Actual data for the on-disk hash table of header file
372   /// information.
373   ///
374   /// This pointer points into a memory buffer, where the on-disk hash
375   /// table for header file information actually lives.
376   const char *HeaderFileInfoTableData = nullptr;
377 
378   /// The on-disk hash table that contains information about each of
379   /// the header files.
380   void *HeaderFileInfoTable = nullptr;
381 
382   // === Submodule information ===
383 
384   /// The number of submodules in this module.
385   unsigned LocalNumSubmodules = 0;
386 
387   /// Base submodule ID for submodules local to this module.
388   serialization::SubmoduleID BaseSubmoduleID = 0;
389 
390   /// Remapping table for submodule IDs in this module.
391   ContinuousRangeMap<uint32_t, int, 2> SubmoduleRemap;
392 
393   // === Selectors ===
394 
395   /// The number of selectors new to this file.
396   ///
397   /// This is the number of entries in SelectorOffsets.
398   unsigned LocalNumSelectors = 0;
399 
400   /// Offsets into the selector lookup table's data array
401   /// where each selector resides.
402   const uint32_t *SelectorOffsets = nullptr;
403 
404   /// Base selector ID for selectors local to this module.
405   serialization::SelectorID BaseSelectorID = 0;
406 
407   /// Remapping table for selector IDs in this module.
408   ContinuousRangeMap<uint32_t, int, 2> SelectorRemap;
409 
410   /// A pointer to the character data that comprises the selector table
411   ///
412   /// The SelectorOffsets table refers into this memory.
413   const unsigned char *SelectorLookupTableData = nullptr;
414 
415   /// A pointer to an on-disk hash table of opaque type
416   /// ASTSelectorLookupTable.
417   ///
418   /// This hash table provides the IDs of all selectors, and the associated
419   /// instance and factory methods.
420   void *SelectorLookupTable = nullptr;
421 
422   // === Declarations ===
423 
424   /// DeclsCursor - This is a cursor to the start of the DECLTYPES_BLOCK block.
425   /// It has read all the abbreviations at the start of the block and is ready
426   /// to jump around with these in context.
427   llvm::BitstreamCursor DeclsCursor;
428 
429   /// The offset to the start of the DECLTYPES_BLOCK block.
430   uint64_t DeclsBlockStartOffset = 0;
431 
432   /// The number of declarations in this AST file.
433   unsigned LocalNumDecls = 0;
434 
435   /// Offset of each declaration within the bitstream, indexed
436   /// by the declaration ID (-1).
437   const DeclOffset *DeclOffsets = nullptr;
438 
439   /// Base declaration ID for declarations local to this module.
440   serialization::DeclID BaseDeclID = 0;
441 
442   /// Remapping table for declaration IDs in this module.
443   ContinuousRangeMap<uint32_t, int, 2> DeclRemap;
444 
445   /// Mapping from the module files that this module file depends on
446   /// to the base declaration ID for that module as it is understood within this
447   /// module.
448   ///
449   /// This is effectively a reverse global-to-local mapping for declaration
450   /// IDs, so that we can interpret a true global ID (for this translation unit)
451   /// as a local ID (for this module file).
452   llvm::DenseMap<ModuleFile *, serialization::DeclID> GlobalToLocalDeclIDs;
453 
454   /// Array of file-level DeclIDs sorted by file.
455   const serialization::DeclID *FileSortedDecls = nullptr;
456   unsigned NumFileSortedDecls = 0;
457 
458   /// Array of category list location information within this
459   /// module file, sorted by the definition ID.
460   const serialization::ObjCCategoriesInfo *ObjCCategoriesMap = nullptr;
461 
462   /// The number of redeclaration info entries in ObjCCategoriesMap.
463   unsigned LocalNumObjCCategoriesInMap = 0;
464 
465   /// The Objective-C category lists for categories known to this
466   /// module.
467   SmallVector<uint64_t, 1> ObjCCategories;
468 
469   // === Types ===
470 
471   /// The number of types in this AST file.
472   unsigned LocalNumTypes = 0;
473 
474   /// Offset of each type within the bitstream, indexed by the
475   /// type ID, or the representation of a Type*.
476   const UnderalignedInt64 *TypeOffsets = nullptr;
477 
478   /// Base type ID for types local to this module as represented in
479   /// the global type ID space.
480   serialization::TypeID BaseTypeIndex = 0;
481 
482   /// Remapping table for type IDs in this module.
483   ContinuousRangeMap<uint32_t, int, 2> TypeRemap;
484 
485   // === Miscellaneous ===
486 
487   /// Diagnostic IDs and their mappings that the user changed.
488   SmallVector<uint64_t, 8> PragmaDiagMappings;
489 
490   /// List of modules which depend on this module
491   llvm::SetVector<ModuleFile *> ImportedBy;
492 
493   /// List of modules which this module depends on
494   llvm::SetVector<ModuleFile *> Imports;
495 
496   /// Determine whether this module was directly imported at
497   /// any point during translation.
isDirectlyImported()498   bool isDirectlyImported() const { return DirectlyImported; }
499 
500   /// Is this a module file for a module (rather than a PCH or similar).
isModule()501   bool isModule() const {
502     return Kind == MK_ImplicitModule || Kind == MK_ExplicitModule ||
503            Kind == MK_PrebuiltModule;
504   }
505 
506   /// Dump debugging output for this module.
507   void dump();
508 };
509 
510 } // namespace serialization
511 
512 } // namespace clang
513 
514 #endif // LLVM_CLANG_SERIALIZATION_MODULEFILE_H
515