1 /*
2  * extractExternal.cpp
3  */
4 
5 
6 //===----------------------------------------------------------------------===//
7 //
8 //                     The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 
16 #include <stdlib.h>
17 #include <iostream>
18 #include <strstream>
19 #include <fstream>
20 #include <string>
21 #include <set>
22 #include <map>
23 
24 /* Given a set of n object files h ('external' object files) and a set of m
25    object files o ('internal' object files),
26    1. Determines r, the subset of h that o depends on, directly or indirectly
27    2. Removes the files in h - r from the file system
28    3. For each external symbol defined in some file in r, rename it in r U o
29       by prefixing it with "__kmp_external_"
30    Usage:
31    hide.exe <n> <filenames for h> <filenames for o>
32 
33    Thus, the prefixed symbols become hidden in the sense that they now have a special
34    prefix.
35 */
36 
37 using namespace std;
38 
39 void stop(char* errorMsg) {
40     printf("%s\n", errorMsg);
41     exit(1);
42 }
43 
44 // an entry in the symbol table of a .OBJ file
45 class Symbol {
46 public:
47     __int64 name;
48     unsigned value;
49     unsigned short sectionNum, type;
50     char storageClass, nAux;
51 };
52 
53 class _rstream : public istrstream {
54 private:
55     const char *buf;
56 protected:
57     _rstream(pair<const char*, streamsize> p):istrstream(p.first,p.second),buf(p.first){}
58     ~_rstream() {
59 	delete[]buf;
60     }
61 };
62 
63 /* A stream encapuslating the content of a file or the content of a string, overriding the
64    >> operator to read various integer types in binary form, as well as a symbol table
65    entry.
66 */
67 class rstream : public _rstream {
68 private:
69     template<class T>
70     inline rstream& doRead(T &x) {
71 	read((char*)&x, sizeof(T));
72 	return *this;
73     }
74     static pair<const char*, streamsize> getBuf(const char *fileName) {
75 	ifstream raw(fileName,ios::binary | ios::in);
76 	if(!raw.is_open())
77 	    stop("rstream.getBuf: Error opening file");
78 	raw.seekg(0,ios::end);
79 	streampos fileSize = raw.tellg();
80 	if(fileSize < 0)
81 	    stop("rstream.getBuf: Error reading file");
82 	char *buf = new char[fileSize];
83 	raw.seekg(0,ios::beg);
84 	raw.read(buf, fileSize);
85 	return pair<const char*, streamsize>(buf,fileSize);
86     }
87 public:
88     // construct from a string
89     rstream(const char *buf,streamsize size):_rstream(pair<const char*,streamsize>(buf, size)){}
90     /* construct from a file whole content is fully read once to initialize the content of
91        this stream
92     */
93     rstream(const char *fileName):_rstream(getBuf(fileName)){}
94     rstream& operator>>(int &x) {
95 	return doRead(x);
96     }
97     rstream& operator>>(unsigned &x) {
98 	return doRead(x);
99     }
100     rstream& operator>>(short &x) {
101 	return doRead(x);
102     }
103     rstream& operator>>(unsigned short &x) {
104 	return doRead(x);
105     }
106     rstream& operator>>(Symbol &e) {
107 	read((char*)&e, 18);
108 	return *this;
109     }
110 };
111 
112 // string table in a .OBJ file
113 class StringTable {
114 private:
115     map<string, unsigned> directory;
116     size_t length;
117     char *data;
118 
119     // make <directory> from <length> bytes in <data>
120     void makeDirectory(void) {
121 	unsigned i = 4;
122 	while(i < length) {
123 	    string s = string(data + i);
124 	    directory.insert(make_pair(s, i));
125 	    i += s.size() + 1;
126 	}
127     }
128     // initialize <length> and <data> with contents specified by the arguments
129     void init(const char *_data) {
130 	unsigned _length = *(unsigned*)_data;
131 
132 	if(_length < sizeof(unsigned) || _length != *(unsigned*)_data)
133 	    stop("StringTable.init: Invalid symbol table");
134 	if(_data[_length - 1]) {
135 	    // to prevent runaway strings, make sure the data ends with a zero
136 	    data = new char[length = _length + 1];
137 	    data[_length] = 0;
138 	} else {
139 	    data = new char[length = _length];
140 	}
141 	*(unsigned*)data = length;
142 	KMP_MEMCPY(data + sizeof(unsigned), _data + sizeof(unsigned),
143 	           length - sizeof(unsigned));
144 	makeDirectory();
145     }
146 public:
147     StringTable(rstream &f) {
148 	/* Construct string table by reading from f.
149 	 */
150 	streampos s;
151 	unsigned strSize;
152 	char *strData;
153 
154 	s = f.tellg();
155 	f>>strSize;
156 	if(strSize < sizeof(unsigned))
157 	    stop("StringTable: Invalid string table");
158 	strData = new char[strSize];
159 	*(unsigned*)strData = strSize;
160 	// read the raw data into <strData>
161 	f.read(strData + sizeof(unsigned), strSize - sizeof(unsigned));
162 	s = f.tellg() - s;
163 	if(s < strSize)
164 	    stop("StringTable: Unexpected EOF");
165 	init(strData);
166 	delete[]strData;
167     }
168     StringTable(const set<string> &strings) {
169 	/* Construct string table from given strings.
170 	 */
171 	char *p;
172 	set<string>::const_iterator it;
173 	size_t s;
174 
175 	// count required size for data
176 	for(length = sizeof(unsigned), it = strings.begin(); it != strings.end(); ++it) {
177 	    size_t l = (*it).size();
178 
179 	    if(l > (unsigned) 0xFFFFFFFF)
180 		stop("StringTable: String too long");
181 	    if(l > 8) {
182 		length += l + 1;
183 		if(length > (unsigned) 0xFFFFFFFF)
184 		    stop("StringTable: Symbol table too long");
185 	    }
186 	}
187 	data = new char[length];
188 	*(unsigned*)data = length;
189 	// populate data and directory
190 	for(p = data + sizeof(unsigned), it = strings.begin(); it != strings.end(); ++it) {
191 	    const string &str = *it;
192 	    size_t l = str.size();
193 	    if(l > 8) {
194 		directory.insert(make_pair(str, p - data));
195 		KMP_MEMCPY(p, str.c_str(), l);
196 		p[l] = 0;
197 		p += l + 1;
198 	    }
199 	}
200     }
201     ~StringTable() {
202 	delete[] data;
203     }
204     /* Returns encoding for given string based on this string table.
205        Error if string length is greater than 8 but string is not in
206        the string table--returns 0.
207     */
208     __int64 encode(const string &str) {
209 	__int64 r;
210 
211 	if(str.size() <= 8) {
212 	    // encoded directly
213 	    ((char*)&r)[7] = 0;
214 	    KMP_STRNCPY_S((char*)&r, sizeof(r), str.c_str(), 8);
215 	    return r;
216 	} else {
217 	    // represented as index into table
218 	    map<string,unsigned>::const_iterator it = directory.find(str);
219 	    if(it == directory.end())
220 		stop("StringTable::encode: String now found in string table");
221 	    ((unsigned*)&r)[0] = 0;
222 	    ((unsigned*)&r)[1] = (*it).second;
223 	    return r;
224 	}
225     }
226     /* Returns string represented by x based on this string table.
227        Error if x references an invalid position in the table--returns
228        the empty string.
229     */
230     string decode(__int64 x) const {
231 	if(*(unsigned*)&x == 0) {
232 	    // represented as index into table
233 	    unsigned &p = ((unsigned*)&x)[1];
234 	    if(p >= length)
235 		stop("StringTable::decode: Invalid string table lookup");
236 	    return string(data + p);
237 	} else {
238 	    // encoded directly
239 	    char *p = (char*)&x;
240 	    int i;
241 
242 	    for(i = 0; i < 8 && p[i]; ++i);
243 	    return string(p, i);
244 	}
245     }
246     void write(ostream &os) {
247 	os.write(data, length);
248     }
249 };
250 
251 /* for the named object file, determines the set of defined symbols and the set of undefined external symbols
252    and writes them to <defined> and <undefined> respectively
253 */
254 void computeExternalSymbols(const char *fileName, set<string> *defined, set<string> *undefined){
255     streampos fileSize;
256     size_t strTabStart;
257     unsigned symTabStart, symNEntries;
258     rstream f(fileName);
259 
260     f.seekg(0,ios::end);
261     fileSize = f.tellg();
262 
263     f.seekg(8);
264     f >> symTabStart >> symNEntries;
265     // seek to the string table
266     f.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries);
267     if(f.eof()) {
268 	printf("computeExternalSymbols: fileName='%s', fileSize = %lu, symTabStart = %u, symNEntries = %u\n",
269 	       fileName, (unsigned long) fileSize, symTabStart, symNEntries);
270 	stop("computeExternalSymbols: Unexpected EOF 1");
271     }
272     StringTable stringTable(f); // read the string table
273     if(f.tellg() != fileSize)
274 	stop("computeExternalSymbols: Unexpected data after string table");
275 
276     f.clear();
277     f.seekg(symTabStart); // seek to the symbol table
278 
279     defined->clear(); undefined->clear();
280     for(int i = 0; i < symNEntries; ++i) {
281 	// process each entry
282 	Symbol e;
283 
284 	if(f.eof())
285 	    stop("computeExternalSymbols: Unexpected EOF 2");
286 	f>>e;
287 	if(f.fail())
288 	    stop("computeExternalSymbols: File read error");
289 	if(e.nAux) { // auxiliary entry: skip
290 	    f.seekg(e.nAux * 18, ios::cur);
291 	    i += e.nAux;
292 	}
293 	// if symbol is extern and defined in the current file, insert it
294 	if(e.storageClass == 2)
295 	    if(e.sectionNum)
296 		defined->insert(stringTable.decode(e.name));
297 	    else
298 		undefined->insert(stringTable.decode(e.name));
299     }
300 }
301 
302 /* For each occurrence of an external symbol in the object file named by
303    by <fileName> that is a member of <hide>, renames it by prefixing
304    with "__kmp_external_", writing back the file in-place
305 */
306 void hideSymbols(char *fileName, const set<string> &hide) {
307     static const string prefix("__kmp_external_");
308     set<string> strings; // set of all occurring symbols, appropriately prefixed
309     streampos fileSize;
310     size_t strTabStart;
311     unsigned symTabStart, symNEntries;
312     int i;
313     rstream in(fileName);
314 
315     in.seekg(0,ios::end);
316     fileSize = in.tellg();
317 
318     in.seekg(8);
319     in >> symTabStart >> symNEntries;
320     in.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries);
321     if(in.eof())
322 	stop("hideSymbols: Unexpected EOF");
323     StringTable stringTableOld(in); // read original string table
324 
325     if(in.tellg() != fileSize)
326 	stop("hideSymbols: Unexpected data after string table");
327 
328     // compute set of occurring strings with prefix added
329     for(i = 0; i < symNEntries; ++i) {
330 	Symbol e;
331 
332 	in.seekg(symTabStart + i * 18);
333 	if(in.eof())
334 	    stop("hideSymbols: Unexpected EOF");
335 	in >> e;
336 	if(in.fail())
337 	    stop("hideSymbols: File read error");
338 	if(e.nAux)
339 	    i += e.nAux;
340 	const string &s = stringTableOld.decode(e.name);
341 	// if symbol is extern and found in <hide>, prefix and insert into strings,
342 	// otherwise, just insert into strings without prefix
343 	strings.insert( (e.storageClass == 2 && hide.find(s) != hide.end()) ?
344 			prefix + s : s);
345     }
346 
347     ofstream out(fileName, ios::trunc | ios::out | ios::binary);
348     if(!out.is_open())
349 	stop("hideSymbols: Error opening output file");
350 
351     // make new string table from string set
352     StringTable stringTableNew = StringTable(strings);
353 
354     // copy input file to output file up to just before the symbol table
355     in.seekg(0);
356     char *buf = new char[symTabStart];
357     in.read(buf, symTabStart);
358     out.write(buf, symTabStart);
359     delete []buf;
360 
361     // copy input symbol table to output symbol table with name translation
362     for(i = 0; i < symNEntries; ++i) {
363 	Symbol e;
364 
365 	in.seekg(symTabStart + i*18);
366 	if(in.eof())
367 	    stop("hideSymbols: Unexpected EOF");
368 	in >> e;
369 	if(in.fail())
370 	    stop("hideSymbols: File read error");
371 	const string &s = stringTableOld.decode(e.name);
372 	out.seekp(symTabStart + i*18);
373 	e.name = stringTableNew.encode( (e.storageClass == 2 && hide.find(s) != hide.end()) ?
374 					prefix + s : s);
375 	out.write((char*)&e, 18);
376 	if(out.fail())
377 	    stop("hideSymbols: File write error");
378 	if(e.nAux) {
379 	    // copy auxiliary symbol table entries
380 	    int nAux = e.nAux;
381 	    for(int j = 1; j <= nAux; ++j) {
382 		in >> e;
383 		out.seekp(symTabStart + (i + j) * 18);
384 		out.write((char*)&e, 18);
385 	    }
386 	    i += nAux;
387 	}
388     }
389     // output string table
390     stringTableNew.write(out);
391 }
392 
393 // returns true iff <a> and <b> have no common element
394 template <class T>
395 bool isDisjoint(const set<T> &a, const set<T> &b) {
396     set<T>::const_iterator ita, itb;
397 
398     for(ita = a.begin(), itb = b.begin(); ita != a.end() && itb != b.end();) {
399 	const T &ta = *ita, &tb = *itb;
400 	if(ta < tb)
401 	    ++ita;
402 	else if (tb < ta)
403 	    ++itb;
404 	else
405 	    return false;
406     }
407     return true;
408 }
409 
410 /* precondition: <defined> and <undefined> are arrays with <nTotal> elements where
411    <nTotal> >= <nExternal>.  The first <nExternal> elements correspond to the external object
412    files and the rest correspond to the internal object files.
413    postcondition: file x is said to depend on file y if undefined[x] and defined[y] are not
414    disjoint.  Returns the transitive closure of the set of internal object files, as a set of
415    file indexes, under the 'depends on' relation, minus the set of internal object files.
416 */
417 set<int> *findRequiredExternal(int nExternal, int nTotal, set<string> *defined, set<string> *undefined) {
418     set<int> *required = new set<int>;
419     set<int> fresh[2];
420     int i, cur = 0;
421     bool changed;
422 
423     for(i = nTotal - 1; i >= nExternal; --i)
424 	fresh[cur].insert(i);
425     do {
426 	changed = false;
427 	for(set<int>::iterator it = fresh[cur].begin(); it != fresh[cur].end(); ++it) {
428 	    set<string> &s = undefined[*it];
429 
430 	    for(i = 0; i < nExternal; ++i) {
431 		if(required->find(i) == required->end()) {
432 		    if(!isDisjoint(defined[i], s)) {
433 			// found a new qualifying element
434 			required->insert(i);
435 			fresh[1 - cur].insert(i);
436 			changed = true;
437 		    }
438 		}
439 	    }
440 	}
441 	fresh[cur].clear();
442 	cur = 1 - cur;
443     } while(changed);
444     return required;
445 }
446 
447 int main(int argc, char **argv) {
448     int nExternal, nInternal, i;
449     set<string> *defined, *undefined;
450     set<int>::iterator it;
451 
452     if(argc < 3)
453 	stop("Please specify a positive integer followed by a list of object filenames");
454     nExternal = atoi(argv[1]);
455     if(nExternal <= 0)
456 	stop("Please specify a positive integer followed by a list of object filenames");
457     if(nExternal +  2 > argc)
458 	stop("Too few external objects");
459     nInternal = argc - nExternal - 2;
460     defined = new set<string>[argc - 2];
461     undefined = new set<string>[argc - 2];
462 
463     // determine the set of defined and undefined external symbols
464     for(i = 2; i < argc; ++i)
465 	computeExternalSymbols(argv[i], defined + i - 2, undefined + i - 2);
466 
467     // determine the set of required external files
468     set<int> *requiredExternal = findRequiredExternal(nExternal, argc - 2, defined, undefined);
469     set<string> hide;
470 
471     /* determine the set of symbols to hide--namely defined external symbols of the
472        required external files
473     */
474     for(it = requiredExternal->begin(); it != requiredExternal->end(); ++it) {
475 	int idx = *it;
476 	set<string>::iterator it2;
477 	/* We have to insert one element at a time instead of inserting a range because
478 	   the insert member function taking a range doesn't exist on Windows* OS, at least
479 	   at the time of this writing.
480 	*/
481 	for(it2 = defined[idx].begin(); it2 != defined[idx].end(); ++it2)
482 	    hide.insert(*it2);
483     }
484 
485     /* process the external files--removing those that are not required and hiding
486        the appropriate symbols in the others
487     */
488     for(i = 0; i < nExternal; ++i)
489 	if(requiredExternal->find(i) != requiredExternal->end())
490 	    hideSymbols(argv[2 + i], hide);
491 	else
492 	    remove(argv[2 + i]);
493     // hide the appropriate symbols in the internal files
494     for(i = nExternal + 2; i < argc; ++i)
495 	hideSymbols(argv[i], hide);
496     return 0;
497 }
498