1 /*
2  * extractExternal.cpp
3  * $Revision: 42181 $
4  * $Date: 2013-03-26 15:04:45 -0500 (Tue, 26 Mar 2013) $
5  */
6 
7 
8 //===----------------------------------------------------------------------===//
9 //
10 //                     The LLVM Compiler Infrastructure
11 //
12 // This file is dual licensed under the MIT and the University of Illinois Open
13 // Source Licenses. See LICENSE.txt for details.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 
18 #include <stdlib.h>
19 #include <iostream>
20 #include <strstream>
21 #include <fstream>
22 #include <string>
23 #include <set>
24 #include <map>
25 
26 /* Given a set of n object files h ('external' object files) and a set of m
27    object files o ('internal' object files),
28    1. Determines r, the subset of h that o depends on, directly or indirectly
29    2. Removes the files in h - r from the file system
30    3. For each external symbol defined in some file in r, rename it in r U o
31       by prefixing it with "__kmp_external_"
32    Usage:
33    hide.exe <n> <filenames for h> <filenames for o>
34 
35    Thus, the prefixed symbols become hidden in the sense that they now have a special
36    prefix.
37 */
38 
39 using namespace std;
40 
41 void stop(char* errorMsg) {
42     printf("%s\n", errorMsg);
43     exit(1);
44 }
45 
46 // an entry in the symbol table of a .OBJ file
47 class Symbol {
48 public:
49     __int64 name;
50     unsigned value;
51     unsigned short sectionNum, type;
52     char storageClass, nAux;
53 };
54 
55 class _rstream : public istrstream {
56 private:
57     const char *buf;
58 protected:
59     _rstream(pair<const char*, streamsize> p):istrstream(p.first,p.second),buf(p.first){}
60     ~_rstream() {
61 	delete[]buf;
62     }
63 };
64 
65 /* A stream encapuslating the content of a file or the content of a string, overriding the
66    >> operator to read various integer types in binary form, as well as a symbol table
67    entry.
68 */
69 class rstream : public _rstream {
70 private:
71     template<class T>
72     inline rstream& doRead(T &x) {
73 	read((char*)&x, sizeof(T));
74 	return *this;
75     }
76     static pair<const char*, streamsize> getBuf(const char *fileName) {
77 	ifstream raw(fileName,ios::binary | ios::in);
78 	if(!raw.is_open())
79 	    stop("rstream.getBuf: Error opening file");
80 	raw.seekg(0,ios::end);
81 	streampos fileSize = raw.tellg();
82 	if(fileSize < 0)
83 	    stop("rstream.getBuf: Error reading file");
84 	char *buf = new char[fileSize];
85 	raw.seekg(0,ios::beg);
86 	raw.read(buf, fileSize);
87 	return pair<const char*, streamsize>(buf,fileSize);
88     }
89 public:
90     // construct from a string
91     rstream(const char *buf,streamsize size):_rstream(pair<const char*,streamsize>(buf, size)){}
92     /* construct from a file whole content is fully read once to initialize the content of
93        this stream
94     */
95     rstream(const char *fileName):_rstream(getBuf(fileName)){}
96     rstream& operator>>(int &x) {
97 	return doRead(x);
98     }
99     rstream& operator>>(unsigned &x) {
100 	return doRead(x);
101     }
102     rstream& operator>>(short &x) {
103 	return doRead(x);
104     }
105     rstream& operator>>(unsigned short &x) {
106 	return doRead(x);
107     }
108     rstream& operator>>(Symbol &e) {
109 	read((char*)&e, 18);
110 	return *this;
111     }
112 };
113 
114 // string table in a .OBJ file
115 class StringTable {
116 private:
117     map<string, unsigned> directory;
118     size_t length;
119     char *data;
120 
121     // make <directory> from <length> bytes in <data>
122     void makeDirectory(void) {
123 	unsigned i = 4;
124 	while(i < length) {
125 	    string s = string(data + i);
126 	    directory.insert(make_pair(s, i));
127 	    i += s.size() + 1;
128 	}
129     }
130     // initialize <length> and <data> with contents specified by the arguments
131     void init(const char *_data) {
132 	unsigned _length = *(unsigned*)_data;
133 
134 	if(_length < sizeof(unsigned) || _length != *(unsigned*)_data)
135 	    stop("StringTable.init: Invalid symbol table");
136 	if(_data[_length - 1]) {
137 	    // to prevent runaway strings, make sure the data ends with a zero
138 	    data = new char[length = _length + 1];
139 	    data[_length] = 0;
140 	} else {
141 	    data = new char[length = _length];
142 	}
143 	*(unsigned*)data = length;
144 	memcpy(data + sizeof(unsigned), _data + sizeof(unsigned),
145 	       length - sizeof(unsigned));
146 	makeDirectory();
147     }
148 public:
149     StringTable(rstream &f) {
150 	/* Construct string table by reading from f.
151 	 */
152 	streampos s;
153 	unsigned strSize;
154 	char *strData;
155 
156 	s = f.tellg();
157 	f>>strSize;
158 	if(strSize < sizeof(unsigned))
159 	    stop("StringTable: Invalid string table");
160 	strData = new char[strSize];
161 	*(unsigned*)strData = strSize;
162 	// read the raw data into <strData>
163 	f.read(strData + sizeof(unsigned), strSize - sizeof(unsigned));
164 	s = f.tellg() - s;
165 	if(s < strSize)
166 	    stop("StringTable: Unexpected EOF");
167 	init(strData);
168 	delete[]strData;
169     }
170     StringTable(const set<string> &strings) {
171 	/* Construct string table from given strings.
172 	 */
173 	char *p;
174 	set<string>::const_iterator it;
175 	size_t s;
176 
177 	// count required size for data
178 	for(length = sizeof(unsigned), it = strings.begin(); it != strings.end(); ++it) {
179 	    size_t l = (*it).size();
180 
181 	    if(l > (unsigned) 0xFFFFFFFF)
182 		stop("StringTable: String too long");
183 	    if(l > 8) {
184 		length += l + 1;
185 		if(length > (unsigned) 0xFFFFFFFF)
186 		    stop("StringTable: Symbol table too long");
187 	    }
188 	}
189 	data = new char[length];
190 	*(unsigned*)data = length;
191 	// populate data and directory
192 	for(p = data + sizeof(unsigned), it = strings.begin(); it != strings.end(); ++it) {
193 	    const string &str = *it;
194 	    size_t l = str.size();
195 	    if(l > 8) {
196 		directory.insert(make_pair(str, p - data));
197 		memcpy(p, str.c_str(), l);
198 		p[l] = 0;
199 		p += l + 1;
200 	    }
201 	}
202     }
203     ~StringTable() {
204 	delete[] data;
205     }
206     /* Returns encoding for given string based on this string table.
207        Error if string length is greater than 8 but string is not in
208        the string table--returns 0.
209     */
210     __int64 encode(const string &str) {
211 	__int64 r;
212 
213 	if(str.size() <= 8) {
214 	    // encoded directly
215 	    ((char*)&r)[7] = 0;
216 	    strncpy((char*)&r, str.c_str(), 8);
217 	    return r;
218 	} else {
219 	    // represented as index into table
220 	    map<string,unsigned>::const_iterator it = directory.find(str);
221 	    if(it == directory.end())
222 		stop("StringTable::encode: String now found in string table");
223 	    ((unsigned*)&r)[0] = 0;
224 	    ((unsigned*)&r)[1] = (*it).second;
225 	    return r;
226 	}
227     }
228     /* Returns string represented by x based on this string table.
229        Error if x references an invalid position in the table--returns
230        the empty string.
231     */
232     string decode(__int64 x) const {
233 	if(*(unsigned*)&x == 0) {
234 	    // represented as index into table
235 	    unsigned &p = ((unsigned*)&x)[1];
236 	    if(p >= length)
237 		stop("StringTable::decode: Invalid string table lookup");
238 	    return string(data + p);
239 	} else {
240 	    // encoded directly
241 	    char *p = (char*)&x;
242 	    int i;
243 
244 	    for(i = 0; i < 8 && p[i]; ++i);
245 	    return string(p, i);
246 	}
247     }
248     void write(ostream &os) {
249 	os.write(data, length);
250     }
251 };
252 
253 /* for the named object file, determines the set of defined symbols and the set of undefined external symbols
254    and writes them to <defined> and <undefined> respectively
255 */
256 void computeExternalSymbols(const char *fileName, set<string> *defined, set<string> *undefined){
257     streampos fileSize;
258     size_t strTabStart;
259     unsigned symTabStart, symNEntries;
260     rstream f(fileName);
261 
262     f.seekg(0,ios::end);
263     fileSize = f.tellg();
264 
265     f.seekg(8);
266     f >> symTabStart >> symNEntries;
267     // seek to the string table
268     f.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries);
269     if(f.eof()) {
270 	printf("computeExternalSymbols: fileName='%s', fileSize = %lu, symTabStart = %u, symNEntries = %u\n",
271 	       fileName, (unsigned long) fileSize, symTabStart, symNEntries);
272 	stop("computeExternalSymbols: Unexpected EOF 1");
273     }
274     StringTable stringTable(f); // read the string table
275     if(f.tellg() != fileSize)
276 	stop("computeExternalSymbols: Unexpected data after string table");
277 
278     f.clear();
279     f.seekg(symTabStart); // seek to the symbol table
280 
281     defined->clear(); undefined->clear();
282     for(int i = 0; i < symNEntries; ++i) {
283 	// process each entry
284 	Symbol e;
285 
286 	if(f.eof())
287 	    stop("computeExternalSymbols: Unexpected EOF 2");
288 	f>>e;
289 	if(f.fail())
290 	    stop("computeExternalSymbols: File read error");
291 	if(e.nAux) { // auxiliary entry: skip
292 	    f.seekg(e.nAux * 18, ios::cur);
293 	    i += e.nAux;
294 	}
295 	// if symbol is extern and defined in the current file, insert it
296 	if(e.storageClass == 2)
297 	    if(e.sectionNum)
298 		defined->insert(stringTable.decode(e.name));
299 	    else
300 		undefined->insert(stringTable.decode(e.name));
301     }
302 }
303 
304 /* For each occurence of an external symbol in the object file named by
305    by <fileName> that is a member of <hide>, renames it by prefixing
306    with "__kmp_external_", writing back the file in-place
307 */
308 void hideSymbols(char *fileName, const set<string> &hide) {
309     static const string prefix("__kmp_external_");
310     set<string> strings; // set of all occurring symbols, appropriately prefixed
311     streampos fileSize;
312     size_t strTabStart;
313     unsigned symTabStart, symNEntries;
314     int i;
315     rstream in(fileName);
316 
317     in.seekg(0,ios::end);
318     fileSize = in.tellg();
319 
320     in.seekg(8);
321     in >> symTabStart >> symNEntries;
322     in.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries);
323     if(in.eof())
324 	stop("hideSymbols: Unexpected EOF");
325     StringTable stringTableOld(in); // read original string table
326 
327     if(in.tellg() != fileSize)
328 	stop("hideSymbols: Unexpected data after string table");
329 
330     // compute set of occurring strings with prefix added
331     for(i = 0; i < symNEntries; ++i) {
332 	Symbol e;
333 
334 	in.seekg(symTabStart + i * 18);
335 	if(in.eof())
336 	    stop("hideSymbols: Unexpected EOF");
337 	in >> e;
338 	if(in.fail())
339 	    stop("hideSymbols: File read error");
340 	if(e.nAux)
341 	    i += e.nAux;
342 	const string &s = stringTableOld.decode(e.name);
343 	// if symbol is extern and found in <hide>, prefix and insert into strings,
344 	// otherwise, just insert into strings without prefix
345 	strings.insert( (e.storageClass == 2 && hide.find(s) != hide.end()) ?
346 			prefix + s : s);
347     }
348 
349     ofstream out(fileName, ios::trunc | ios::out | ios::binary);
350     if(!out.is_open())
351 	stop("hideSymbols: Error opening output file");
352 
353     // make new string table from string set
354     StringTable stringTableNew = StringTable(strings);
355 
356     // copy input file to output file up to just before the symbol table
357     in.seekg(0);
358     char *buf = new char[symTabStart];
359     in.read(buf, symTabStart);
360     out.write(buf, symTabStart);
361     delete []buf;
362 
363     // copy input symbol table to output symbol table with name translation
364     for(i = 0; i < symNEntries; ++i) {
365 	Symbol e;
366 
367 	in.seekg(symTabStart + i*18);
368 	if(in.eof())
369 	    stop("hideSymbols: Unexpected EOF");
370 	in >> e;
371 	if(in.fail())
372 	    stop("hideSymbols: File read error");
373 	const string &s = stringTableOld.decode(e.name);
374 	out.seekp(symTabStart + i*18);
375 	e.name = stringTableNew.encode( (e.storageClass == 2 && hide.find(s) != hide.end()) ?
376 					prefix + s : s);
377 	out.write((char*)&e, 18);
378 	if(out.fail())
379 	    stop("hideSymbols: File write error");
380 	if(e.nAux) {
381 	    // copy auxiliary symbol table entries
382 	    int nAux = e.nAux;
383 	    for(int j = 1; j <= nAux; ++j) {
384 		in >> e;
385 		out.seekp(symTabStart + (i + j) * 18);
386 		out.write((char*)&e, 18);
387 	    }
388 	    i += nAux;
389 	}
390     }
391     // output string table
392     stringTableNew.write(out);
393 }
394 
395 // returns true iff <a> and <b> have no common element
396 template <class T>
397 bool isDisjoint(const set<T> &a, const set<T> &b) {
398     set<T>::const_iterator ita, itb;
399 
400     for(ita = a.begin(), itb = b.begin(); ita != a.end() && itb != b.end();) {
401 	const T &ta = *ita, &tb = *itb;
402 	if(ta < tb)
403 	    ++ita;
404 	else if (tb < ta)
405 	    ++itb;
406 	else
407 	    return false;
408     }
409     return true;
410 }
411 
412 /* precondition: <defined> and <undefined> are arrays with <nTotal> elements where
413    <nTotal> >= <nExternal>.  The first <nExternal> elements correspond to the external object
414    files and the rest correspond to the internal object files.
415    postcondition: file x is said to depend on file y if undefined[x] and defined[y] are not
416    disjoint.  Returns the transitive closure of the set of internal object files, as a set of
417    file indexes, under the 'depends on' relation, minus the set of internal object files.
418 */
419 set<int> *findRequiredExternal(int nExternal, int nTotal, set<string> *defined, set<string> *undefined) {
420     set<int> *required = new set<int>;
421     set<int> fresh[2];
422     int i, cur = 0;
423     bool changed;
424 
425     for(i = nTotal - 1; i >= nExternal; --i)
426 	fresh[cur].insert(i);
427     do {
428 	changed = false;
429 	for(set<int>::iterator it = fresh[cur].begin(); it != fresh[cur].end(); ++it) {
430 	    set<string> &s = undefined[*it];
431 
432 	    for(i = 0; i < nExternal; ++i) {
433 		if(required->find(i) == required->end()) {
434 		    if(!isDisjoint(defined[i], s)) {
435 			// found a new qualifying element
436 			required->insert(i);
437 			fresh[1 - cur].insert(i);
438 			changed = true;
439 		    }
440 		}
441 	    }
442 	}
443 	fresh[cur].clear();
444 	cur = 1 - cur;
445     } while(changed);
446     return required;
447 }
448 
449 int main(int argc, char **argv) {
450     int nExternal, nInternal, i;
451     set<string> *defined, *undefined;
452     set<int>::iterator it;
453 
454     if(argc < 3)
455 	stop("Please specify a positive integer followed by a list of object filenames");
456     nExternal = atoi(argv[1]);
457     if(nExternal <= 0)
458 	stop("Please specify a positive integer followed by a list of object filenames");
459     if(nExternal +  2 > argc)
460 	stop("Too few external objects");
461     nInternal = argc - nExternal - 2;
462     defined = new set<string>[argc - 2];
463     undefined = new set<string>[argc - 2];
464 
465     // determine the set of defined and undefined external symbols
466     for(i = 2; i < argc; ++i)
467 	computeExternalSymbols(argv[i], defined + i - 2, undefined + i - 2);
468 
469     // determine the set of required external files
470     set<int> *requiredExternal = findRequiredExternal(nExternal, argc - 2, defined, undefined);
471     set<string> hide;
472 
473     /* determine the set of symbols to hide--namely defined external symbols of the
474        required external files
475     */
476     for(it = requiredExternal->begin(); it != requiredExternal->end(); ++it) {
477 	int idx = *it;
478 	set<string>::iterator it2;
479 	/* We have to insert one element at a time instead of inserting a range because
480 	   the insert member function taking a range doesn't exist on Windows* OS, at least
481 	   at the time of this writing.
482 	*/
483 	for(it2 = defined[idx].begin(); it2 != defined[idx].end(); ++it2)
484 	    hide.insert(*it2);
485     }
486 
487     /* process the external files--removing those that are not required and hiding
488        the appropriate symbols in the others
489     */
490     for(i = 0; i < nExternal; ++i)
491 	if(requiredExternal->find(i) != requiredExternal->end())
492 	    hideSymbols(argv[2 + i], hide);
493 	else
494 	    remove(argv[2 + i]);
495     // hide the appropriate symbols in the internal files
496     for(i = nExternal + 2; i < argc; ++i)
497 	hideSymbols(argv[i], hide);
498     return 0;
499 }
500