1 //===--- HeaderMap.cpp - A file that acts like dir of symlinks ------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the HeaderMap interface.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Lex/HeaderMap.h"
15 #include "clang/Basic/FileManager.h"
16 #include "llvm/ADT/OwningPtr.h"
17 #include "llvm/ADT/SmallString.h"
18 #include "llvm/System/DataTypes.h"
19 #include "llvm/Support/MathExtras.h"
20 #include "llvm/Support/MemoryBuffer.h"
21 #include <cstdio>
22 using namespace clang;
23 
24 //===----------------------------------------------------------------------===//
25 // Data Structures and Manifest Constants
26 //===----------------------------------------------------------------------===//
27 
28 enum {
29   HMAP_HeaderMagicNumber = ('h' << 24) | ('m' << 16) | ('a' << 8) | 'p',
30   HMAP_HeaderVersion = 1,
31 
32   HMAP_EmptyBucketKey = 0
33 };
34 
35 namespace clang {
36 struct HMapBucket {
37   uint32_t Key;          // Offset (into strings) of key.
38 
39   uint32_t Prefix;     // Offset (into strings) of value prefix.
40   uint32_t Suffix;     // Offset (into strings) of value suffix.
41 };
42 
43 struct HMapHeader {
44   uint32_t Magic;           // Magic word, also indicates byte order.
45   uint16_t Version;         // Version number -- currently 1.
46   uint16_t Reserved;        // Reserved for future use - zero for now.
47   uint32_t StringsOffset;   // Offset to start of string pool.
48   uint32_t NumEntries;      // Number of entries in the string table.
49   uint32_t NumBuckets;      // Number of buckets (always a power of 2).
50   uint32_t MaxValueLength;  // Length of longest result path (excluding nul).
51   // An array of 'NumBuckets' HMapBucket objects follows this header.
52   // Strings follow the buckets, at StringsOffset.
53 };
54 } // end namespace clang.
55 
56 /// HashHMapKey - This is the 'well known' hash function required by the file
57 /// format, used to look up keys in the hash table.  The hash table uses simple
58 /// linear probing based on this function.
59 static inline unsigned HashHMapKey(const char *S, const char *End) {
60   unsigned Result = 0;
61 
62   for (; S != End; S++)
63     Result += tolower(*S) * 13;
64   return Result;
65 }
66 
67 
68 
69 //===----------------------------------------------------------------------===//
70 // Verification and Construction
71 //===----------------------------------------------------------------------===//
72 
73 /// HeaderMap::Create - This attempts to load the specified file as a header
74 /// map.  If it doesn't look like a HeaderMap, it gives up and returns null.
75 /// If it looks like a HeaderMap but is obviously corrupted, it puts a reason
76 /// into the string error argument and returns null.
77 const HeaderMap *HeaderMap::Create(const FileEntry *FE) {
78   // If the file is too small to be a header map, ignore it.
79   unsigned FileSize = FE->getSize();
80   if (FileSize <= sizeof(HMapHeader)) return 0;
81 
82   llvm::OwningPtr<const llvm::MemoryBuffer> FileBuffer(
83     llvm::MemoryBuffer::getFile(FE->getName(), 0, FE->getSize()));
84   if (FileBuffer == 0) return 0;  // Unreadable file?
85   const char *FileStart = FileBuffer->getBufferStart();
86 
87   // We know the file is at least as big as the header, check it now.
88   const HMapHeader *Header = reinterpret_cast<const HMapHeader*>(FileStart);
89 
90   // Sniff it to see if it's a headermap by checking the magic number and
91   // version.
92   bool NeedsByteSwap;
93   if (Header->Magic == HMAP_HeaderMagicNumber &&
94       Header->Version == HMAP_HeaderVersion)
95     NeedsByteSwap = false;
96   else if (Header->Magic == llvm::ByteSwap_32(HMAP_HeaderMagicNumber) &&
97            Header->Version == llvm::ByteSwap_16(HMAP_HeaderVersion))
98     NeedsByteSwap = true;  // Mixed endianness headermap.
99   else
100     return 0;  // Not a header map.
101 
102   if (Header->Reserved != 0) return 0;
103 
104   // Okay, everything looks good, create the header map.
105   return new HeaderMap(FileBuffer.take(), NeedsByteSwap);
106 }
107 
108 HeaderMap::~HeaderMap() {
109   delete FileBuffer;
110 }
111 
112 //===----------------------------------------------------------------------===//
113 //  Utility Methods
114 //===----------------------------------------------------------------------===//
115 
116 
117 /// getFileName - Return the filename of the headermap.
118 const char *HeaderMap::getFileName() const {
119   return FileBuffer->getBufferIdentifier();
120 }
121 
122 unsigned HeaderMap::getEndianAdjustedWord(unsigned X) const {
123   if (!NeedsBSwap) return X;
124   return llvm::ByteSwap_32(X);
125 }
126 
127 /// getHeader - Return a reference to the file header, in unbyte-swapped form.
128 /// This method cannot fail.
129 const HMapHeader &HeaderMap::getHeader() const {
130   // We know the file is at least as big as the header.  Return it.
131   return *reinterpret_cast<const HMapHeader*>(FileBuffer->getBufferStart());
132 }
133 
134 /// getBucket - Return the specified hash table bucket from the header map,
135 /// bswap'ing its fields as appropriate.  If the bucket number is not valid,
136 /// this return a bucket with an empty key (0).
137 HMapBucket HeaderMap::getBucket(unsigned BucketNo) const {
138   HMapBucket Result;
139   Result.Key = HMAP_EmptyBucketKey;
140 
141   const HMapBucket *BucketArray =
142     reinterpret_cast<const HMapBucket*>(FileBuffer->getBufferStart() +
143                                         sizeof(HMapHeader));
144 
145   const HMapBucket *BucketPtr = BucketArray+BucketNo;
146   if ((char*)(BucketPtr+1) > FileBuffer->getBufferEnd()) {
147     Result.Prefix = 0;
148     Result.Suffix = 0;
149     return Result;  // Invalid buffer, corrupt hmap.
150   }
151 
152   // Otherwise, the bucket is valid.  Load the values, bswapping as needed.
153   Result.Key    = getEndianAdjustedWord(BucketPtr->Key);
154   Result.Prefix = getEndianAdjustedWord(BucketPtr->Prefix);
155   Result.Suffix = getEndianAdjustedWord(BucketPtr->Suffix);
156   return Result;
157 }
158 
159 /// getString - Look up the specified string in the string table.  If the string
160 /// index is not valid, it returns an empty string.
161 const char *HeaderMap::getString(unsigned StrTabIdx) const {
162   // Add the start of the string table to the idx.
163   StrTabIdx += getEndianAdjustedWord(getHeader().StringsOffset);
164 
165   // Check for invalid index.
166   if (StrTabIdx >= FileBuffer->getBufferSize())
167     return 0;
168 
169   // Otherwise, we have a valid pointer into the file.  Just return it.  We know
170   // that the "string" can not overrun the end of the file, because the buffer
171   // is nul terminated by virtue of being a MemoryBuffer.
172   return FileBuffer->getBufferStart()+StrTabIdx;
173 }
174 
175 /// StringsEqualWithoutCase - Compare the specified two strings for case-
176 /// insensitive equality, returning true if they are equal.  Both strings are
177 /// known to have the same length.
178 static bool StringsEqualWithoutCase(const char *S1, const char *S2,
179                                     unsigned Len) {
180   for (; Len; ++S1, ++S2, --Len)
181     if (tolower(*S1) != tolower(*S2))
182       return false;
183   return true;
184 }
185 
186 //===----------------------------------------------------------------------===//
187 // The Main Drivers
188 //===----------------------------------------------------------------------===//
189 
190 /// dump - Print the contents of this headermap to stderr.
191 void HeaderMap::dump() const {
192   const HMapHeader &Hdr = getHeader();
193   unsigned NumBuckets = getEndianAdjustedWord(Hdr.NumBuckets);
194 
195   fprintf(stderr, "Header Map %s:\n  %d buckets, %d entries\n",
196           getFileName(), NumBuckets,
197           getEndianAdjustedWord(Hdr.NumEntries));
198 
199   for (unsigned i = 0; i != NumBuckets; ++i) {
200     HMapBucket B = getBucket(i);
201     if (B.Key == HMAP_EmptyBucketKey) continue;
202 
203     const char *Key    = getString(B.Key);
204     const char *Prefix = getString(B.Prefix);
205     const char *Suffix = getString(B.Suffix);
206     fprintf(stderr, "  %d. %s -> '%s' '%s'\n", i, Key, Prefix, Suffix);
207   }
208 }
209 
210 /// LookupFile - Check to see if the specified relative filename is located in
211 /// this HeaderMap.  If so, open it and return its FileEntry.
212 const FileEntry *HeaderMap::LookupFile(const char *FilenameStart,
213                                        const char *FilenameEnd,
214                                        FileManager &FM) const {
215   const HMapHeader &Hdr = getHeader();
216   unsigned NumBuckets = getEndianAdjustedWord(Hdr.NumBuckets);
217 
218   // If the number of buckets is not a power of two, the headermap is corrupt.
219   // Don't probe infinitely.
220   if (NumBuckets & (NumBuckets-1))
221     return 0;
222 
223   // Linearly probe the hash table.
224   for (unsigned Bucket = HashHMapKey(FilenameStart, FilenameEnd);; ++Bucket) {
225     HMapBucket B = getBucket(Bucket & (NumBuckets-1));
226     if (B.Key == HMAP_EmptyBucketKey) return 0; // Hash miss.
227 
228     // See if the key matches.  If not, probe on.
229     const char *Key = getString(B.Key);
230     unsigned BucketKeyLen = strlen(Key);
231     if (BucketKeyLen != unsigned(FilenameEnd-FilenameStart))
232       continue;
233 
234     // See if the actual strings equal.
235     if (!StringsEqualWithoutCase(FilenameStart, Key, BucketKeyLen))
236       continue;
237 
238     // If so, we have a match in the hash table.  Construct the destination
239     // path.
240     llvm::SmallString<1024> DestPath;
241     DestPath += getString(B.Prefix);
242     DestPath += getString(B.Suffix);
243     return FM.getFile(DestPath.begin(), DestPath.end());
244   }
245 }
246