1 /*
2  * extractExternal.cpp
3  */
4 
5 
6 //===----------------------------------------------------------------------===//
7 //
8 //                     The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 
16 #include <fstream>
17 #include <iostream>
18 #include <map>
19 #include <set>
20 #include <stdlib.h>
21 #include <string>
22 #include <strstream>
23 
24 /* Given a set of n object files h ('external' object files) and a set of m
25    object files o ('internal' object files),
26    1. Determines r, the subset of h that o depends on, directly or indirectly
27    2. Removes the files in h - r from the file system
28    3. For each external symbol defined in some file in r, rename it in r U o
29       by prefixing it with "__kmp_external_"
30    Usage:
31    hide.exe <n> <filenames for h> <filenames for o>
32 
33    Thus, the prefixed symbols become hidden in the sense that they now have a
34    special prefix.
35 */
36 
37 using namespace std;
38 
39 void stop(char *errorMsg) {
40   printf("%s\n", errorMsg);
41   exit(1);
42 }
43 
44 // an entry in the symbol table of a .OBJ file
45 class Symbol {
46 public:
47   __int64 name;
48   unsigned value;
49   unsigned short sectionNum, type;
50   char storageClass, nAux;
51 };
52 
53 class _rstream : public istrstream {
54 private:
55   const char *buf;
56 
57 protected:
58   _rstream(pair<const char *, streamsize> p)
59       : istrstream(p.first, p.second), buf(p.first) {}
60   ~_rstream() { delete[] buf; }
61 };
62 
63 // A stream encapuslating the content of a file or the content of a string,
64 // overriding the >> operator to read various integer types in binary form,
65 // as well as a symbol table entry.
66 class rstream : public _rstream {
67 private:
68   template <class T> inline rstream &doRead(T &x) {
69     read((char *)&x, sizeof(T));
70     return *this;
71   }
72   static pair<const char *, streamsize> getBuf(const char *fileName) {
73     ifstream raw(fileName, ios::binary | ios::in);
74     if (!raw.is_open())
75       stop("rstream.getBuf: Error opening file");
76     raw.seekg(0, ios::end);
77     streampos fileSize = raw.tellg();
78     if (fileSize < 0)
79       stop("rstream.getBuf: Error reading file");
80     char *buf = new char[fileSize];
81     raw.seekg(0, ios::beg);
82     raw.read(buf, fileSize);
83     return pair<const char *, streamsize>(buf, fileSize);
84   }
85 
86 public:
87   // construct from a string
88   rstream(const char *buf, streamsize size)
89       : _rstream(pair<const char *, streamsize>(buf, size)) {}
90   // construct from a file whole content is fully read once to initialize the
91   // content of this stream
92   rstream(const char *fileName) : _rstream(getBuf(fileName)) {}
93   rstream &operator>>(int &x) { return doRead(x); }
94   rstream &operator>>(unsigned &x) { return doRead(x); }
95   rstream &operator>>(short &x) { return doRead(x); }
96   rstream &operator>>(unsigned short &x) { return doRead(x); }
97   rstream &operator>>(Symbol &e) {
98     read((char *)&e, 18);
99     return *this;
100   }
101 };
102 
103 // string table in a .OBJ file
104 class StringTable {
105 private:
106   map<string, unsigned> directory;
107   size_t length;
108   char *data;
109 
110   // make <directory> from <length> bytes in <data>
111   void makeDirectory(void) {
112     unsigned i = 4;
113     while (i < length) {
114       string s = string(data + i);
115       directory.insert(make_pair(s, i));
116       i += s.size() + 1;
117     }
118   }
119   // initialize <length> and <data> with contents specified by the arguments
120   void init(const char *_data) {
121     unsigned _length = *(unsigned *)_data;
122 
123     if (_length < sizeof(unsigned) || _length != *(unsigned *)_data)
124       stop("StringTable.init: Invalid symbol table");
125     if (_data[_length - 1]) {
126       // to prevent runaway strings, make sure the data ends with a zero
127       data = new char[length = _length + 1];
128       data[_length] = 0;
129     } else {
130       data = new char[length = _length];
131     }
132     *(unsigned *)data = length;
133     KMP_MEMCPY(data + sizeof(unsigned), _data + sizeof(unsigned),
134                length - sizeof(unsigned));
135     makeDirectory();
136   }
137 
138 public:
139   StringTable(rstream &f) {
140     // Construct string table by reading from f.
141     streampos s;
142     unsigned strSize;
143     char *strData;
144 
145     s = f.tellg();
146     f >> strSize;
147     if (strSize < sizeof(unsigned))
148       stop("StringTable: Invalid string table");
149     strData = new char[strSize];
150     *(unsigned *)strData = strSize;
151     // read the raw data into <strData>
152     f.read(strData + sizeof(unsigned), strSize - sizeof(unsigned));
153     s = f.tellg() - s;
154     if (s < strSize)
155       stop("StringTable: Unexpected EOF");
156     init(strData);
157     delete[] strData;
158   }
159   StringTable(const set<string> &strings) {
160     // Construct string table from given strings.
161     char *p;
162     set<string>::const_iterator it;
163     size_t s;
164 
165     // count required size for data
166     for (length = sizeof(unsigned), it = strings.begin(); it != strings.end();
167          ++it) {
168       size_t l = (*it).size();
169 
170       if (l > (unsigned)0xFFFFFFFF)
171         stop("StringTable: String too long");
172       if (l > 8) {
173         length += l + 1;
174         if (length > (unsigned)0xFFFFFFFF)
175           stop("StringTable: Symbol table too long");
176       }
177     }
178     data = new char[length];
179     *(unsigned *)data = length;
180     // populate data and directory
181     for (p = data + sizeof(unsigned), it = strings.begin(); it != strings.end();
182          ++it) {
183       const string &str = *it;
184       size_t l = str.size();
185       if (l > 8) {
186         directory.insert(make_pair(str, p - data));
187         KMP_MEMCPY(p, str.c_str(), l);
188         p[l] = 0;
189         p += l + 1;
190       }
191     }
192   }
193   ~StringTable() { delete[] data; }
194   // Returns encoding for given string based on this string table. Error if
195   // string length is greater than 8 but string is not in the string table
196   // -- returns 0.
197   __int64 encode(const string &str) {
198     __int64 r;
199 
200     if (str.size() <= 8) {
201       // encoded directly
202       ((char *)&r)[7] = 0;
203       KMP_STRNCPY_S((char *)&r, sizeof(r), str.c_str(), 8);
204       return r;
205     } else {
206       // represented as index into table
207       map<string, unsigned>::const_iterator it = directory.find(str);
208       if (it == directory.end())
209         stop("StringTable::encode: String now found in string table");
210       ((unsigned *)&r)[0] = 0;
211       ((unsigned *)&r)[1] = (*it).second;
212       return r;
213     }
214   }
215   // Returns string represented by x based on this string table. Error if x
216   // references an invalid position in the table--returns the empty string.
217   string decode(__int64 x) const {
218     if (*(unsigned *)&x == 0) {
219       // represented as index into table
220       unsigned &p = ((unsigned *)&x)[1];
221       if (p >= length)
222         stop("StringTable::decode: Invalid string table lookup");
223       return string(data + p);
224     } else {
225       // encoded directly
226       char *p = (char *)&x;
227       int i;
228 
229       for (i = 0; i < 8 && p[i]; ++i)
230         ;
231       return string(p, i);
232     }
233   }
234   void write(ostream &os) { os.write(data, length); }
235 };
236 
237 // for the named object file, determines the set of defined symbols and the set
238 // of undefined external symbols and writes them to <defined> and <undefined>
239 // respectively
240 void computeExternalSymbols(const char *fileName, set<string> *defined,
241                             set<string> *undefined) {
242   streampos fileSize;
243   size_t strTabStart;
244   unsigned symTabStart, symNEntries;
245   rstream f(fileName);
246 
247   f.seekg(0, ios::end);
248   fileSize = f.tellg();
249 
250   f.seekg(8);
251   f >> symTabStart >> symNEntries;
252   // seek to the string table
253   f.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries);
254   if (f.eof()) {
255     printf("computeExternalSymbols: fileName='%s', fileSize = %lu, symTabStart "
256            "= %u, symNEntries = %u\n",
257            fileName, (unsigned long)fileSize, symTabStart, symNEntries);
258     stop("computeExternalSymbols: Unexpected EOF 1");
259   }
260   StringTable stringTable(f); // read the string table
261   if (f.tellg() != fileSize)
262     stop("computeExternalSymbols: Unexpected data after string table");
263 
264   f.clear();
265   f.seekg(symTabStart); // seek to the symbol table
266 
267   defined->clear();
268   undefined->clear();
269   for (int i = 0; i < symNEntries; ++i) {
270     // process each entry
271     Symbol e;
272 
273     if (f.eof())
274       stop("computeExternalSymbols: Unexpected EOF 2");
275     f >> e;
276     if (f.fail())
277       stop("computeExternalSymbols: File read error");
278     if (e.nAux) { // auxiliary entry: skip
279       f.seekg(e.nAux * 18, ios::cur);
280       i += e.nAux;
281     }
282     // if symbol is extern and defined in the current file, insert it
283     if (e.storageClass == 2)
284       if (e.sectionNum)
285         defined->insert(stringTable.decode(e.name));
286       else
287         undefined->insert(stringTable.decode(e.name));
288   }
289 }
290 
291 // For each occurrence of an external symbol in the object file named by
292 // by <fileName> that is a member of <hide>, renames it by prefixing
293 // with "__kmp_external_", writing back the file in-place
294 void hideSymbols(char *fileName, const set<string> &hide) {
295   static const string prefix("__kmp_external_");
296   set<string> strings; // set of all occurring symbols, appropriately prefixed
297   streampos fileSize;
298   size_t strTabStart;
299   unsigned symTabStart, symNEntries;
300   int i;
301   rstream in(fileName);
302 
303   in.seekg(0, ios::end);
304   fileSize = in.tellg();
305 
306   in.seekg(8);
307   in >> symTabStart >> symNEntries;
308   in.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries);
309   if (in.eof())
310     stop("hideSymbols: Unexpected EOF");
311   StringTable stringTableOld(in); // read original string table
312 
313   if (in.tellg() != fileSize)
314     stop("hideSymbols: Unexpected data after string table");
315 
316   // compute set of occurring strings with prefix added
317   for (i = 0; i < symNEntries; ++i) {
318     Symbol e;
319 
320     in.seekg(symTabStart + i * 18);
321     if (in.eof())
322       stop("hideSymbols: Unexpected EOF");
323     in >> e;
324     if (in.fail())
325       stop("hideSymbols: File read error");
326     if (e.nAux)
327       i += e.nAux;
328     const string &s = stringTableOld.decode(e.name);
329     // if symbol is extern and found in <hide>, prefix and insert into strings,
330     // otherwise, just insert into strings without prefix
331     strings.insert(
332         (e.storageClass == 2 && hide.find(s) != hide.end()) ? prefix + s : s);
333   }
334 
335   ofstream out(fileName, ios::trunc | ios::out | ios::binary);
336   if (!out.is_open())
337     stop("hideSymbols: Error opening output file");
338 
339   // make new string table from string set
340   StringTable stringTableNew = StringTable(strings);
341 
342   // copy input file to output file up to just before the symbol table
343   in.seekg(0);
344   char *buf = new char[symTabStart];
345   in.read(buf, symTabStart);
346   out.write(buf, symTabStart);
347   delete[] buf;
348 
349   // copy input symbol table to output symbol table with name translation
350   for (i = 0; i < symNEntries; ++i) {
351     Symbol e;
352 
353     in.seekg(symTabStart + i * 18);
354     if (in.eof())
355       stop("hideSymbols: Unexpected EOF");
356     in >> e;
357     if (in.fail())
358       stop("hideSymbols: File read error");
359     const string &s = stringTableOld.decode(e.name);
360     out.seekp(symTabStart + i * 18);
361     e.name = stringTableNew.encode(
362         (e.storageClass == 2 && hide.find(s) != hide.end()) ? prefix + s : s);
363     out.write((char *)&e, 18);
364     if (out.fail())
365       stop("hideSymbols: File write error");
366     if (e.nAux) {
367       // copy auxiliary symbol table entries
368       int nAux = e.nAux;
369       for (int j = 1; j <= nAux; ++j) {
370         in >> e;
371         out.seekp(symTabStart + (i + j) * 18);
372         out.write((char *)&e, 18);
373       }
374       i += nAux;
375     }
376   }
377   // output string table
378   stringTableNew.write(out);
379 }
380 
381 // returns true iff <a> and <b> have no common element
382 template <class T> bool isDisjoint(const set<T> &a, const set<T> &b) {
383   set<T>::const_iterator ita, itb;
384 
385   for (ita = a.begin(), itb = b.begin(); ita != a.end() && itb != b.end();) {
386     const T &ta = *ita, &tb = *itb;
387     if (ta < tb)
388       ++ita;
389     else if (tb < ta)
390       ++itb;
391     else
392       return false;
393   }
394   return true;
395 }
396 
397 // PRE: <defined> and <undefined> are arrays with <nTotal> elements where
398 // <nTotal> >= <nExternal>.  The first <nExternal> elements correspond to the
399 // external object files and the rest correspond to the internal object files.
400 // POST: file x is said to depend on file y if undefined[x] and defined[y] are
401 // not disjoint. Returns the transitive closure of the set of internal object
402 // files, as a set of file indexes, under the 'depends on' relation, minus the
403 // set of internal object files.
404 set<int> *findRequiredExternal(int nExternal, int nTotal, set<string> *defined,
405                                set<string> *undefined) {
406   set<int> *required = new set<int>;
407   set<int> fresh[2];
408   int i, cur = 0;
409   bool changed;
410 
411   for (i = nTotal - 1; i >= nExternal; --i)
412     fresh[cur].insert(i);
413   do {
414     changed = false;
415     for (set<int>::iterator it = fresh[cur].begin(); it != fresh[cur].end();
416          ++it) {
417       set<string> &s = undefined[*it];
418 
419       for (i = 0; i < nExternal; ++i) {
420         if (required->find(i) == required->end()) {
421           if (!isDisjoint(defined[i], s)) {
422             // found a new qualifying element
423             required->insert(i);
424             fresh[1 - cur].insert(i);
425             changed = true;
426           }
427         }
428       }
429     }
430     fresh[cur].clear();
431     cur = 1 - cur;
432   } while (changed);
433   return required;
434 }
435 
436 int main(int argc, char **argv) {
437   int nExternal, nInternal, i;
438   set<string> *defined, *undefined;
439   set<int>::iterator it;
440 
441   if (argc < 3)
442     stop("Please specify a positive integer followed by a list of object "
443          "filenames");
444   nExternal = atoi(argv[1]);
445   if (nExternal <= 0)
446     stop("Please specify a positive integer followed by a list of object "
447          "filenames");
448   if (nExternal + 2 > argc)
449     stop("Too few external objects");
450   nInternal = argc - nExternal - 2;
451   defined = new set<string>[argc - 2];
452   undefined = new set<string>[argc - 2];
453 
454   // determine the set of defined and undefined external symbols
455   for (i = 2; i < argc; ++i)
456     computeExternalSymbols(argv[i], defined + i - 2, undefined + i - 2);
457 
458   // determine the set of required external files
459   set<int> *requiredExternal =
460       findRequiredExternal(nExternal, argc - 2, defined, undefined);
461   set<string> hide;
462 
463   // determine the set of symbols to hide--namely defined external symbols of
464   // the required external files
465   for (it = requiredExternal->begin(); it != requiredExternal->end(); ++it) {
466     int idx = *it;
467     set<string>::iterator it2;
468     // We have to insert one element at a time instead of inserting a range
469     // because the insert member function taking a range doesn't exist on
470     // Windows* OS, at least at the time of this writing.
471     for (it2 = defined[idx].begin(); it2 != defined[idx].end(); ++it2)
472       hide.insert(*it2);
473   }
474 
475   // process the external files--removing those that are not required and hiding
476   //   the appropriate symbols in the others
477   for (i = 0; i < nExternal; ++i)
478     if (requiredExternal->find(i) != requiredExternal->end())
479       hideSymbols(argv[2 + i], hide);
480     else
481       remove(argv[2 + i]);
482   // hide the appropriate symbols in the internal files
483   for (i = nExternal + 2; i < argc; ++i)
484     hideSymbols(argv[i], hide);
485   return 0;
486 }
487