1 /* 2 * extractExternal.cpp 3 */ 4 5 6 //===----------------------------------------------------------------------===// 7 // 8 // The LLVM Compiler Infrastructure 9 // 10 // This file is dual licensed under the MIT and the University of Illinois Open 11 // Source Licenses. See LICENSE.txt for details. 12 // 13 //===----------------------------------------------------------------------===// 14 15 16 #include <fstream> 17 #include <iostream> 18 #include <map> 19 #include <set> 20 #include <stdlib.h> 21 #include <string> 22 #include <strstream> 23 24 /* Given a set of n object files h ('external' object files) and a set of m 25 object files o ('internal' object files), 26 1. Determines r, the subset of h that o depends on, directly or indirectly 27 2. Removes the files in h - r from the file system 28 3. For each external symbol defined in some file in r, rename it in r U o 29 by prefixing it with "__kmp_external_" 30 Usage: 31 hide.exe <n> <filenames for h> <filenames for o> 32 33 Thus, the prefixed symbols become hidden in the sense that they now have a 34 special prefix. 35 */ 36 37 using namespace std; 38 39 void stop(char *errorMsg) { 40 printf("%s\n", errorMsg); 41 exit(1); 42 } 43 44 // an entry in the symbol table of a .OBJ file 45 class Symbol { 46 public: 47 __int64 name; 48 unsigned value; 49 unsigned short sectionNum, type; 50 char storageClass, nAux; 51 }; 52 53 class _rstream : public istrstream { 54 private: 55 const char *buf; 56 57 protected: 58 _rstream(pair<const char *, streamsize> p) 59 : istrstream(p.first, p.second), buf(p.first) {} 60 ~_rstream() { delete[] buf; } 61 }; 62 63 // A stream encapuslating the content of a file or the content of a string, 64 // overriding the >> operator to read various integer types in binary form, 65 // as well as a symbol table entry. 66 class rstream : public _rstream { 67 private: 68 template <class T> inline rstream &doRead(T &x) { 69 read((char *)&x, sizeof(T)); 70 return *this; 71 } 72 static pair<const char *, streamsize> getBuf(const char *fileName) { 73 ifstream raw(fileName, ios::binary | ios::in); 74 if (!raw.is_open()) 75 stop("rstream.getBuf: Error opening file"); 76 raw.seekg(0, ios::end); 77 streampos fileSize = raw.tellg(); 78 if (fileSize < 0) 79 stop("rstream.getBuf: Error reading file"); 80 char *buf = new char[fileSize]; 81 raw.seekg(0, ios::beg); 82 raw.read(buf, fileSize); 83 return pair<const char *, streamsize>(buf, fileSize); 84 } 85 86 public: 87 // construct from a string 88 rstream(const char *buf, streamsize size) 89 : _rstream(pair<const char *, streamsize>(buf, size)) {} 90 // construct from a file whole content is fully read once to initialize the 91 // content of this stream 92 rstream(const char *fileName) : _rstream(getBuf(fileName)) {} 93 rstream &operator>>(int &x) { return doRead(x); } 94 rstream &operator>>(unsigned &x) { return doRead(x); } 95 rstream &operator>>(short &x) { return doRead(x); } 96 rstream &operator>>(unsigned short &x) { return doRead(x); } 97 rstream &operator>>(Symbol &e) { 98 read((char *)&e, 18); 99 return *this; 100 } 101 }; 102 103 // string table in a .OBJ file 104 class StringTable { 105 private: 106 map<string, unsigned> directory; 107 size_t length; 108 char *data; 109 110 // make <directory> from <length> bytes in <data> 111 void makeDirectory(void) { 112 unsigned i = 4; 113 while (i < length) { 114 string s = string(data + i); 115 directory.insert(make_pair(s, i)); 116 i += s.size() + 1; 117 } 118 } 119 // initialize <length> and <data> with contents specified by the arguments 120 void init(const char *_data) { 121 unsigned _length = *(unsigned *)_data; 122 123 if (_length < sizeof(unsigned) || _length != *(unsigned *)_data) 124 stop("StringTable.init: Invalid symbol table"); 125 if (_data[_length - 1]) { 126 // to prevent runaway strings, make sure the data ends with a zero 127 data = new char[length = _length + 1]; 128 data[_length] = 0; 129 } else { 130 data = new char[length = _length]; 131 } 132 *(unsigned *)data = length; 133 KMP_MEMCPY(data + sizeof(unsigned), _data + sizeof(unsigned), 134 length - sizeof(unsigned)); 135 makeDirectory(); 136 } 137 138 public: 139 StringTable(rstream &f) { 140 // Construct string table by reading from f. 141 streampos s; 142 unsigned strSize; 143 char *strData; 144 145 s = f.tellg(); 146 f >> strSize; 147 if (strSize < sizeof(unsigned)) 148 stop("StringTable: Invalid string table"); 149 strData = new char[strSize]; 150 *(unsigned *)strData = strSize; 151 // read the raw data into <strData> 152 f.read(strData + sizeof(unsigned), strSize - sizeof(unsigned)); 153 s = f.tellg() - s; 154 if (s < strSize) 155 stop("StringTable: Unexpected EOF"); 156 init(strData); 157 delete[] strData; 158 } 159 StringTable(const set<string> &strings) { 160 // Construct string table from given strings. 161 char *p; 162 set<string>::const_iterator it; 163 size_t s; 164 165 // count required size for data 166 for (length = sizeof(unsigned), it = strings.begin(); it != strings.end(); 167 ++it) { 168 size_t l = (*it).size(); 169 170 if (l > (unsigned)0xFFFFFFFF) 171 stop("StringTable: String too long"); 172 if (l > 8) { 173 length += l + 1; 174 if (length > (unsigned)0xFFFFFFFF) 175 stop("StringTable: Symbol table too long"); 176 } 177 } 178 data = new char[length]; 179 *(unsigned *)data = length; 180 // populate data and directory 181 for (p = data + sizeof(unsigned), it = strings.begin(); it != strings.end(); 182 ++it) { 183 const string &str = *it; 184 size_t l = str.size(); 185 if (l > 8) { 186 directory.insert(make_pair(str, p - data)); 187 KMP_MEMCPY(p, str.c_str(), l); 188 p[l] = 0; 189 p += l + 1; 190 } 191 } 192 } 193 ~StringTable() { delete[] data; } 194 // Returns encoding for given string based on this string table. Error if 195 // string length is greater than 8 but string is not in the string table 196 // -- returns 0. 197 __int64 encode(const string &str) { 198 __int64 r; 199 200 if (str.size() <= 8) { 201 // encoded directly 202 ((char *)&r)[7] = 0; 203 KMP_STRNCPY_S((char *)&r, sizeof(r), str.c_str(), 8); 204 return r; 205 } else { 206 // represented as index into table 207 map<string, unsigned>::const_iterator it = directory.find(str); 208 if (it == directory.end()) 209 stop("StringTable::encode: String now found in string table"); 210 ((unsigned *)&r)[0] = 0; 211 ((unsigned *)&r)[1] = (*it).second; 212 return r; 213 } 214 } 215 // Returns string represented by x based on this string table. Error if x 216 // references an invalid position in the table--returns the empty string. 217 string decode(__int64 x) const { 218 if (*(unsigned *)&x == 0) { 219 // represented as index into table 220 unsigned &p = ((unsigned *)&x)[1]; 221 if (p >= length) 222 stop("StringTable::decode: Invalid string table lookup"); 223 return string(data + p); 224 } else { 225 // encoded directly 226 char *p = (char *)&x; 227 int i; 228 229 for (i = 0; i < 8 && p[i]; ++i) 230 ; 231 return string(p, i); 232 } 233 } 234 void write(ostream &os) { os.write(data, length); } 235 }; 236 237 // for the named object file, determines the set of defined symbols and the set 238 // of undefined external symbols and writes them to <defined> and <undefined> 239 // respectively 240 void computeExternalSymbols(const char *fileName, set<string> *defined, 241 set<string> *undefined) { 242 streampos fileSize; 243 size_t strTabStart; 244 unsigned symTabStart, symNEntries; 245 rstream f(fileName); 246 247 f.seekg(0, ios::end); 248 fileSize = f.tellg(); 249 250 f.seekg(8); 251 f >> symTabStart >> symNEntries; 252 // seek to the string table 253 f.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries); 254 if (f.eof()) { 255 printf("computeExternalSymbols: fileName='%s', fileSize = %lu, symTabStart " 256 "= %u, symNEntries = %u\n", 257 fileName, (unsigned long)fileSize, symTabStart, symNEntries); 258 stop("computeExternalSymbols: Unexpected EOF 1"); 259 } 260 StringTable stringTable(f); // read the string table 261 if (f.tellg() != fileSize) 262 stop("computeExternalSymbols: Unexpected data after string table"); 263 264 f.clear(); 265 f.seekg(symTabStart); // seek to the symbol table 266 267 defined->clear(); 268 undefined->clear(); 269 for (int i = 0; i < symNEntries; ++i) { 270 // process each entry 271 Symbol e; 272 273 if (f.eof()) 274 stop("computeExternalSymbols: Unexpected EOF 2"); 275 f >> e; 276 if (f.fail()) 277 stop("computeExternalSymbols: File read error"); 278 if (e.nAux) { // auxiliary entry: skip 279 f.seekg(e.nAux * 18, ios::cur); 280 i += e.nAux; 281 } 282 // if symbol is extern and defined in the current file, insert it 283 if (e.storageClass == 2) 284 if (e.sectionNum) 285 defined->insert(stringTable.decode(e.name)); 286 else 287 undefined->insert(stringTable.decode(e.name)); 288 } 289 } 290 291 // For each occurrence of an external symbol in the object file named by 292 // by <fileName> that is a member of <hide>, renames it by prefixing 293 // with "__kmp_external_", writing back the file in-place 294 void hideSymbols(char *fileName, const set<string> &hide) { 295 static const string prefix("__kmp_external_"); 296 set<string> strings; // set of all occurring symbols, appropriately prefixed 297 streampos fileSize; 298 size_t strTabStart; 299 unsigned symTabStart, symNEntries; 300 int i; 301 rstream in(fileName); 302 303 in.seekg(0, ios::end); 304 fileSize = in.tellg(); 305 306 in.seekg(8); 307 in >> symTabStart >> symNEntries; 308 in.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries); 309 if (in.eof()) 310 stop("hideSymbols: Unexpected EOF"); 311 StringTable stringTableOld(in); // read original string table 312 313 if (in.tellg() != fileSize) 314 stop("hideSymbols: Unexpected data after string table"); 315 316 // compute set of occurring strings with prefix added 317 for (i = 0; i < symNEntries; ++i) { 318 Symbol e; 319 320 in.seekg(symTabStart + i * 18); 321 if (in.eof()) 322 stop("hideSymbols: Unexpected EOF"); 323 in >> e; 324 if (in.fail()) 325 stop("hideSymbols: File read error"); 326 if (e.nAux) 327 i += e.nAux; 328 const string &s = stringTableOld.decode(e.name); 329 // if symbol is extern and found in <hide>, prefix and insert into strings, 330 // otherwise, just insert into strings without prefix 331 strings.insert( 332 (e.storageClass == 2 && hide.find(s) != hide.end()) ? prefix + s : s); 333 } 334 335 ofstream out(fileName, ios::trunc | ios::out | ios::binary); 336 if (!out.is_open()) 337 stop("hideSymbols: Error opening output file"); 338 339 // make new string table from string set 340 StringTable stringTableNew = StringTable(strings); 341 342 // copy input file to output file up to just before the symbol table 343 in.seekg(0); 344 char *buf = new char[symTabStart]; 345 in.read(buf, symTabStart); 346 out.write(buf, symTabStart); 347 delete[] buf; 348 349 // copy input symbol table to output symbol table with name translation 350 for (i = 0; i < symNEntries; ++i) { 351 Symbol e; 352 353 in.seekg(symTabStart + i * 18); 354 if (in.eof()) 355 stop("hideSymbols: Unexpected EOF"); 356 in >> e; 357 if (in.fail()) 358 stop("hideSymbols: File read error"); 359 const string &s = stringTableOld.decode(e.name); 360 out.seekp(symTabStart + i * 18); 361 e.name = stringTableNew.encode( 362 (e.storageClass == 2 && hide.find(s) != hide.end()) ? prefix + s : s); 363 out.write((char *)&e, 18); 364 if (out.fail()) 365 stop("hideSymbols: File write error"); 366 if (e.nAux) { 367 // copy auxiliary symbol table entries 368 int nAux = e.nAux; 369 for (int j = 1; j <= nAux; ++j) { 370 in >> e; 371 out.seekp(symTabStart + (i + j) * 18); 372 out.write((char *)&e, 18); 373 } 374 i += nAux; 375 } 376 } 377 // output string table 378 stringTableNew.write(out); 379 } 380 381 // returns true iff <a> and <b> have no common element 382 template <class T> bool isDisjoint(const set<T> &a, const set<T> &b) { 383 set<T>::const_iterator ita, itb; 384 385 for (ita = a.begin(), itb = b.begin(); ita != a.end() && itb != b.end();) { 386 const T &ta = *ita, &tb = *itb; 387 if (ta < tb) 388 ++ita; 389 else if (tb < ta) 390 ++itb; 391 else 392 return false; 393 } 394 return true; 395 } 396 397 // PRE: <defined> and <undefined> are arrays with <nTotal> elements where 398 // <nTotal> >= <nExternal>. The first <nExternal> elements correspond to the 399 // external object files and the rest correspond to the internal object files. 400 // POST: file x is said to depend on file y if undefined[x] and defined[y] are 401 // not disjoint. Returns the transitive closure of the set of internal object 402 // files, as a set of file indexes, under the 'depends on' relation, minus the 403 // set of internal object files. 404 set<int> *findRequiredExternal(int nExternal, int nTotal, set<string> *defined, 405 set<string> *undefined) { 406 set<int> *required = new set<int>; 407 set<int> fresh[2]; 408 int i, cur = 0; 409 bool changed; 410 411 for (i = nTotal - 1; i >= nExternal; --i) 412 fresh[cur].insert(i); 413 do { 414 changed = false; 415 for (set<int>::iterator it = fresh[cur].begin(); it != fresh[cur].end(); 416 ++it) { 417 set<string> &s = undefined[*it]; 418 419 for (i = 0; i < nExternal; ++i) { 420 if (required->find(i) == required->end()) { 421 if (!isDisjoint(defined[i], s)) { 422 // found a new qualifying element 423 required->insert(i); 424 fresh[1 - cur].insert(i); 425 changed = true; 426 } 427 } 428 } 429 } 430 fresh[cur].clear(); 431 cur = 1 - cur; 432 } while (changed); 433 return required; 434 } 435 436 int main(int argc, char **argv) { 437 int nExternal, nInternal, i; 438 set<string> *defined, *undefined; 439 set<int>::iterator it; 440 441 if (argc < 3) 442 stop("Please specify a positive integer followed by a list of object " 443 "filenames"); 444 nExternal = atoi(argv[1]); 445 if (nExternal <= 0) 446 stop("Please specify a positive integer followed by a list of object " 447 "filenames"); 448 if (nExternal + 2 > argc) 449 stop("Too few external objects"); 450 nInternal = argc - nExternal - 2; 451 defined = new set<string>[argc - 2]; 452 undefined = new set<string>[argc - 2]; 453 454 // determine the set of defined and undefined external symbols 455 for (i = 2; i < argc; ++i) 456 computeExternalSymbols(argv[i], defined + i - 2, undefined + i - 2); 457 458 // determine the set of required external files 459 set<int> *requiredExternal = 460 findRequiredExternal(nExternal, argc - 2, defined, undefined); 461 set<string> hide; 462 463 // determine the set of symbols to hide--namely defined external symbols of 464 // the required external files 465 for (it = requiredExternal->begin(); it != requiredExternal->end(); ++it) { 466 int idx = *it; 467 set<string>::iterator it2; 468 // We have to insert one element at a time instead of inserting a range 469 // because the insert member function taking a range doesn't exist on 470 // Windows* OS, at least at the time of this writing. 471 for (it2 = defined[idx].begin(); it2 != defined[idx].end(); ++it2) 472 hide.insert(*it2); 473 } 474 475 // process the external files--removing those that are not required and hiding 476 // the appropriate symbols in the others 477 for (i = 0; i < nExternal; ++i) 478 if (requiredExternal->find(i) != requiredExternal->end()) 479 hideSymbols(argv[2 + i], hide); 480 else 481 remove(argv[2 + i]); 482 // hide the appropriate symbols in the internal files 483 for (i = nExternal + 2; i < argc; ++i) 484 hideSymbols(argv[i], hide); 485 return 0; 486 } 487