1 /* 2 * extractExternal.cpp 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // The LLVM Compiler Infrastructure 8 // 9 // This file is dual licensed under the MIT and the University of Illinois Open 10 // Source Licenses. See LICENSE.txt for details. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include <fstream> 15 #include <iostream> 16 #include <map> 17 #include <set> 18 #include <stdlib.h> 19 #include <string> 20 #include <strstream> 21 22 /* Given a set of n object files h ('external' object files) and a set of m 23 object files o ('internal' object files), 24 1. Determines r, the subset of h that o depends on, directly or indirectly 25 2. Removes the files in h - r from the file system 26 3. For each external symbol defined in some file in r, rename it in r U o 27 by prefixing it with "__kmp_external_" 28 Usage: 29 hide.exe <n> <filenames for h> <filenames for o> 30 31 Thus, the prefixed symbols become hidden in the sense that they now have a 32 special prefix. 33 */ 34 35 using namespace std; 36 37 void stop(char *errorMsg) { 38 printf("%s\n", errorMsg); 39 exit(1); 40 } 41 42 // an entry in the symbol table of a .OBJ file 43 class Symbol { 44 public: 45 __int64 name; 46 unsigned value; 47 unsigned short sectionNum, type; 48 char storageClass, nAux; 49 }; 50 51 class _rstream : public istrstream { 52 private: 53 const char *buf; 54 55 protected: 56 _rstream(pair<const char *, streamsize> p) 57 : istrstream(p.first, p.second), buf(p.first) {} 58 ~_rstream() { delete[] buf; } 59 }; 60 61 // A stream encapuslating the content of a file or the content of a string, 62 // overriding the >> operator to read various integer types in binary form, 63 // as well as a symbol table entry. 64 class rstream : public _rstream { 65 private: 66 template <class T> inline rstream &doRead(T &x) { 67 read((char *)&x, sizeof(T)); 68 return *this; 69 } 70 static pair<const char *, streamsize> getBuf(const char *fileName) { 71 ifstream raw(fileName, ios::binary | ios::in); 72 if (!raw.is_open()) 73 stop("rstream.getBuf: Error opening file"); 74 raw.seekg(0, ios::end); 75 streampos fileSize = raw.tellg(); 76 if (fileSize < 0) 77 stop("rstream.getBuf: Error reading file"); 78 char *buf = new char[fileSize]; 79 raw.seekg(0, ios::beg); 80 raw.read(buf, fileSize); 81 return pair<const char *, streamsize>(buf, fileSize); 82 } 83 84 public: 85 // construct from a string 86 rstream(const char *buf, streamsize size) 87 : _rstream(pair<const char *, streamsize>(buf, size)) {} 88 // construct from a file whole content is fully read once to initialize the 89 // content of this stream 90 rstream(const char *fileName) : _rstream(getBuf(fileName)) {} 91 rstream &operator>>(int &x) { return doRead(x); } 92 rstream &operator>>(unsigned &x) { return doRead(x); } 93 rstream &operator>>(short &x) { return doRead(x); } 94 rstream &operator>>(unsigned short &x) { return doRead(x); } 95 rstream &operator>>(Symbol &e) { 96 read((char *)&e, 18); 97 return *this; 98 } 99 }; 100 101 // string table in a .OBJ file 102 class StringTable { 103 private: 104 map<string, unsigned> directory; 105 size_t length; 106 char *data; 107 108 // make <directory> from <length> bytes in <data> 109 void makeDirectory(void) { 110 unsigned i = 4; 111 while (i < length) { 112 string s = string(data + i); 113 directory.insert(make_pair(s, i)); 114 i += s.size() + 1; 115 } 116 } 117 // initialize <length> and <data> with contents specified by the arguments 118 void init(const char *_data) { 119 unsigned _length = *(unsigned *)_data; 120 121 if (_length < sizeof(unsigned) || _length != *(unsigned *)_data) 122 stop("StringTable.init: Invalid symbol table"); 123 if (_data[_length - 1]) { 124 // to prevent runaway strings, make sure the data ends with a zero 125 data = new char[length = _length + 1]; 126 data[_length] = 0; 127 } else { 128 data = new char[length = _length]; 129 } 130 *(unsigned *)data = length; 131 KMP_MEMCPY(data + sizeof(unsigned), _data + sizeof(unsigned), 132 length - sizeof(unsigned)); 133 makeDirectory(); 134 } 135 136 public: 137 StringTable(rstream &f) { 138 // Construct string table by reading from f. 139 streampos s; 140 unsigned strSize; 141 char *strData; 142 143 s = f.tellg(); 144 f >> strSize; 145 if (strSize < sizeof(unsigned)) 146 stop("StringTable: Invalid string table"); 147 strData = new char[strSize]; 148 *(unsigned *)strData = strSize; 149 // read the raw data into <strData> 150 f.read(strData + sizeof(unsigned), strSize - sizeof(unsigned)); 151 s = f.tellg() - s; 152 if (s < strSize) 153 stop("StringTable: Unexpected EOF"); 154 init(strData); 155 delete[] strData; 156 } 157 StringTable(const set<string> &strings) { 158 // Construct string table from given strings. 159 char *p; 160 set<string>::const_iterator it; 161 size_t s; 162 163 // count required size for data 164 for (length = sizeof(unsigned), it = strings.begin(); it != strings.end(); 165 ++it) { 166 size_t l = (*it).size(); 167 168 if (l > (unsigned)0xFFFFFFFF) 169 stop("StringTable: String too long"); 170 if (l > 8) { 171 length += l + 1; 172 if (length > (unsigned)0xFFFFFFFF) 173 stop("StringTable: Symbol table too long"); 174 } 175 } 176 data = new char[length]; 177 *(unsigned *)data = length; 178 // populate data and directory 179 for (p = data + sizeof(unsigned), it = strings.begin(); it != strings.end(); 180 ++it) { 181 const string &str = *it; 182 size_t l = str.size(); 183 if (l > 8) { 184 directory.insert(make_pair(str, p - data)); 185 KMP_MEMCPY(p, str.c_str(), l); 186 p[l] = 0; 187 p += l + 1; 188 } 189 } 190 } 191 ~StringTable() { delete[] data; } 192 // Returns encoding for given string based on this string table. Error if 193 // string length is greater than 8 but string is not in the string table 194 // -- returns 0. 195 __int64 encode(const string &str) { 196 __int64 r; 197 198 if (str.size() <= 8) { 199 // encoded directly 200 ((char *)&r)[7] = 0; 201 KMP_STRNCPY_S((char *)&r, sizeof(r), str.c_str(), 8); 202 return r; 203 } else { 204 // represented as index into table 205 map<string, unsigned>::const_iterator it = directory.find(str); 206 if (it == directory.end()) 207 stop("StringTable::encode: String now found in string table"); 208 ((unsigned *)&r)[0] = 0; 209 ((unsigned *)&r)[1] = (*it).second; 210 return r; 211 } 212 } 213 // Returns string represented by x based on this string table. Error if x 214 // references an invalid position in the table--returns the empty string. 215 string decode(__int64 x) const { 216 if (*(unsigned *)&x == 0) { 217 // represented as index into table 218 unsigned &p = ((unsigned *)&x)[1]; 219 if (p >= length) 220 stop("StringTable::decode: Invalid string table lookup"); 221 return string(data + p); 222 } else { 223 // encoded directly 224 char *p = (char *)&x; 225 int i; 226 227 for (i = 0; i < 8 && p[i]; ++i) 228 ; 229 return string(p, i); 230 } 231 } 232 void write(ostream &os) { os.write(data, length); } 233 }; 234 235 // for the named object file, determines the set of defined symbols and the set 236 // of undefined external symbols and writes them to <defined> and <undefined> 237 // respectively 238 void computeExternalSymbols(const char *fileName, set<string> *defined, 239 set<string> *undefined) { 240 streampos fileSize; 241 size_t strTabStart; 242 unsigned symTabStart, symNEntries; 243 rstream f(fileName); 244 245 f.seekg(0, ios::end); 246 fileSize = f.tellg(); 247 248 f.seekg(8); 249 f >> symTabStart >> symNEntries; 250 // seek to the string table 251 f.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries); 252 if (f.eof()) { 253 printf("computeExternalSymbols: fileName='%s', fileSize = %lu, symTabStart " 254 "= %u, symNEntries = %u\n", 255 fileName, (unsigned long)fileSize, symTabStart, symNEntries); 256 stop("computeExternalSymbols: Unexpected EOF 1"); 257 } 258 StringTable stringTable(f); // read the string table 259 if (f.tellg() != fileSize) 260 stop("computeExternalSymbols: Unexpected data after string table"); 261 262 f.clear(); 263 f.seekg(symTabStart); // seek to the symbol table 264 265 defined->clear(); 266 undefined->clear(); 267 for (int i = 0; i < symNEntries; ++i) { 268 // process each entry 269 Symbol e; 270 271 if (f.eof()) 272 stop("computeExternalSymbols: Unexpected EOF 2"); 273 f >> e; 274 if (f.fail()) 275 stop("computeExternalSymbols: File read error"); 276 if (e.nAux) { // auxiliary entry: skip 277 f.seekg(e.nAux * 18, ios::cur); 278 i += e.nAux; 279 } 280 // if symbol is extern and defined in the current file, insert it 281 if (e.storageClass == 2) 282 if (e.sectionNum) 283 defined->insert(stringTable.decode(e.name)); 284 else 285 undefined->insert(stringTable.decode(e.name)); 286 } 287 } 288 289 // For each occurrence of an external symbol in the object file named by 290 // by <fileName> that is a member of <hide>, renames it by prefixing 291 // with "__kmp_external_", writing back the file in-place 292 void hideSymbols(char *fileName, const set<string> &hide) { 293 static const string prefix("__kmp_external_"); 294 set<string> strings; // set of all occurring symbols, appropriately prefixed 295 streampos fileSize; 296 size_t strTabStart; 297 unsigned symTabStart, symNEntries; 298 int i; 299 rstream in(fileName); 300 301 in.seekg(0, ios::end); 302 fileSize = in.tellg(); 303 304 in.seekg(8); 305 in >> symTabStart >> symNEntries; 306 in.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries); 307 if (in.eof()) 308 stop("hideSymbols: Unexpected EOF"); 309 StringTable stringTableOld(in); // read original string table 310 311 if (in.tellg() != fileSize) 312 stop("hideSymbols: Unexpected data after string table"); 313 314 // compute set of occurring strings with prefix added 315 for (i = 0; i < symNEntries; ++i) { 316 Symbol e; 317 318 in.seekg(symTabStart + i * 18); 319 if (in.eof()) 320 stop("hideSymbols: Unexpected EOF"); 321 in >> e; 322 if (in.fail()) 323 stop("hideSymbols: File read error"); 324 if (e.nAux) 325 i += e.nAux; 326 const string &s = stringTableOld.decode(e.name); 327 // if symbol is extern and found in <hide>, prefix and insert into strings, 328 // otherwise, just insert into strings without prefix 329 strings.insert( 330 (e.storageClass == 2 && hide.find(s) != hide.end()) ? prefix + s : s); 331 } 332 333 ofstream out(fileName, ios::trunc | ios::out | ios::binary); 334 if (!out.is_open()) 335 stop("hideSymbols: Error opening output file"); 336 337 // make new string table from string set 338 StringTable stringTableNew = StringTable(strings); 339 340 // copy input file to output file up to just before the symbol table 341 in.seekg(0); 342 char *buf = new char[symTabStart]; 343 in.read(buf, symTabStart); 344 out.write(buf, symTabStart); 345 delete[] buf; 346 347 // copy input symbol table to output symbol table with name translation 348 for (i = 0; i < symNEntries; ++i) { 349 Symbol e; 350 351 in.seekg(symTabStart + i * 18); 352 if (in.eof()) 353 stop("hideSymbols: Unexpected EOF"); 354 in >> e; 355 if (in.fail()) 356 stop("hideSymbols: File read error"); 357 const string &s = stringTableOld.decode(e.name); 358 out.seekp(symTabStart + i * 18); 359 e.name = stringTableNew.encode( 360 (e.storageClass == 2 && hide.find(s) != hide.end()) ? prefix + s : s); 361 out.write((char *)&e, 18); 362 if (out.fail()) 363 stop("hideSymbols: File write error"); 364 if (e.nAux) { 365 // copy auxiliary symbol table entries 366 int nAux = e.nAux; 367 for (int j = 1; j <= nAux; ++j) { 368 in >> e; 369 out.seekp(symTabStart + (i + j) * 18); 370 out.write((char *)&e, 18); 371 } 372 i += nAux; 373 } 374 } 375 // output string table 376 stringTableNew.write(out); 377 } 378 379 // returns true iff <a> and <b> have no common element 380 template <class T> bool isDisjoint(const set<T> &a, const set<T> &b) { 381 set<T>::const_iterator ita, itb; 382 383 for (ita = a.begin(), itb = b.begin(); ita != a.end() && itb != b.end();) { 384 const T &ta = *ita, &tb = *itb; 385 if (ta < tb) 386 ++ita; 387 else if (tb < ta) 388 ++itb; 389 else 390 return false; 391 } 392 return true; 393 } 394 395 // PRE: <defined> and <undefined> are arrays with <nTotal> elements where 396 // <nTotal> >= <nExternal>. The first <nExternal> elements correspond to the 397 // external object files and the rest correspond to the internal object files. 398 // POST: file x is said to depend on file y if undefined[x] and defined[y] are 399 // not disjoint. Returns the transitive closure of the set of internal object 400 // files, as a set of file indexes, under the 'depends on' relation, minus the 401 // set of internal object files. 402 set<int> *findRequiredExternal(int nExternal, int nTotal, set<string> *defined, 403 set<string> *undefined) { 404 set<int> *required = new set<int>; 405 set<int> fresh[2]; 406 int i, cur = 0; 407 bool changed; 408 409 for (i = nTotal - 1; i >= nExternal; --i) 410 fresh[cur].insert(i); 411 do { 412 changed = false; 413 for (set<int>::iterator it = fresh[cur].begin(); it != fresh[cur].end(); 414 ++it) { 415 set<string> &s = undefined[*it]; 416 417 for (i = 0; i < nExternal; ++i) { 418 if (required->find(i) == required->end()) { 419 if (!isDisjoint(defined[i], s)) { 420 // found a new qualifying element 421 required->insert(i); 422 fresh[1 - cur].insert(i); 423 changed = true; 424 } 425 } 426 } 427 } 428 fresh[cur].clear(); 429 cur = 1 - cur; 430 } while (changed); 431 return required; 432 } 433 434 int main(int argc, char **argv) { 435 int nExternal, nInternal, i; 436 set<string> *defined, *undefined; 437 set<int>::iterator it; 438 439 if (argc < 3) 440 stop("Please specify a positive integer followed by a list of object " 441 "filenames"); 442 nExternal = atoi(argv[1]); 443 if (nExternal <= 0) 444 stop("Please specify a positive integer followed by a list of object " 445 "filenames"); 446 if (nExternal + 2 > argc) 447 stop("Too few external objects"); 448 nInternal = argc - nExternal - 2; 449 defined = new set<string>[argc - 2]; 450 undefined = new set<string>[argc - 2]; 451 452 // determine the set of defined and undefined external symbols 453 for (i = 2; i < argc; ++i) 454 computeExternalSymbols(argv[i], defined + i - 2, undefined + i - 2); 455 456 // determine the set of required external files 457 set<int> *requiredExternal = 458 findRequiredExternal(nExternal, argc - 2, defined, undefined); 459 set<string> hide; 460 461 // determine the set of symbols to hide--namely defined external symbols of 462 // the required external files 463 for (it = requiredExternal->begin(); it != requiredExternal->end(); ++it) { 464 int idx = *it; 465 set<string>::iterator it2; 466 // We have to insert one element at a time instead of inserting a range 467 // because the insert member function taking a range doesn't exist on 468 // Windows* OS, at least at the time of this writing. 469 for (it2 = defined[idx].begin(); it2 != defined[idx].end(); ++it2) 470 hide.insert(*it2); 471 } 472 473 // process the external files--removing those that are not required and hiding 474 // the appropriate symbols in the others 475 for (i = 0; i < nExternal; ++i) 476 if (requiredExternal->find(i) != requiredExternal->end()) 477 hideSymbols(argv[2 + i], hide); 478 else 479 remove(argv[2 + i]); 480 // hide the appropriate symbols in the internal files 481 for (i = nExternal + 2; i < argc; ++i) 482 hideSymbols(argv[i], hide); 483 return 0; 484 } 485