1 /* 2 * extractExternal.cpp 3 */ 4 5 6 //===----------------------------------------------------------------------===// 7 // 8 // The LLVM Compiler Infrastructure 9 // 10 // This file is dual licensed under the MIT and the University of Illinois Open 11 // Source Licenses. See LICENSE.txt for details. 12 // 13 //===----------------------------------------------------------------------===// 14 15 16 #include <stdlib.h> 17 #include <iostream> 18 #include <strstream> 19 #include <fstream> 20 #include <string> 21 #include <set> 22 #include <map> 23 24 /* Given a set of n object files h ('external' object files) and a set of m 25 object files o ('internal' object files), 26 1. Determines r, the subset of h that o depends on, directly or indirectly 27 2. Removes the files in h - r from the file system 28 3. For each external symbol defined in some file in r, rename it in r U o 29 by prefixing it with "__kmp_external_" 30 Usage: 31 hide.exe <n> <filenames for h> <filenames for o> 32 33 Thus, the prefixed symbols become hidden in the sense that they now have a special 34 prefix. 35 */ 36 37 using namespace std; 38 39 void stop(char* errorMsg) { 40 printf("%s\n", errorMsg); 41 exit(1); 42 } 43 44 // an entry in the symbol table of a .OBJ file 45 class Symbol { 46 public: 47 __int64 name; 48 unsigned value; 49 unsigned short sectionNum, type; 50 char storageClass, nAux; 51 }; 52 53 class _rstream : public istrstream { 54 private: 55 const char *buf; 56 protected: 57 _rstream(pair<const char*, streamsize> p):istrstream(p.first,p.second),buf(p.first){} 58 ~_rstream() { 59 delete[]buf; 60 } 61 }; 62 63 /* A stream encapuslating the content of a file or the content of a string, overriding the 64 >> operator to read various integer types in binary form, as well as a symbol table 65 entry. 66 */ 67 class rstream : public _rstream { 68 private: 69 template<class T> 70 inline rstream& doRead(T &x) { 71 read((char*)&x, sizeof(T)); 72 return *this; 73 } 74 static pair<const char*, streamsize> getBuf(const char *fileName) { 75 ifstream raw(fileName,ios::binary | ios::in); 76 if(!raw.is_open()) 77 stop("rstream.getBuf: Error opening file"); 78 raw.seekg(0,ios::end); 79 streampos fileSize = raw.tellg(); 80 if(fileSize < 0) 81 stop("rstream.getBuf: Error reading file"); 82 char *buf = new char[fileSize]; 83 raw.seekg(0,ios::beg); 84 raw.read(buf, fileSize); 85 return pair<const char*, streamsize>(buf,fileSize); 86 } 87 public: 88 // construct from a string 89 rstream(const char *buf,streamsize size):_rstream(pair<const char*,streamsize>(buf, size)){} 90 /* construct from a file whole content is fully read once to initialize the content of 91 this stream 92 */ 93 rstream(const char *fileName):_rstream(getBuf(fileName)){} 94 rstream& operator>>(int &x) { 95 return doRead(x); 96 } 97 rstream& operator>>(unsigned &x) { 98 return doRead(x); 99 } 100 rstream& operator>>(short &x) { 101 return doRead(x); 102 } 103 rstream& operator>>(unsigned short &x) { 104 return doRead(x); 105 } 106 rstream& operator>>(Symbol &e) { 107 read((char*)&e, 18); 108 return *this; 109 } 110 }; 111 112 // string table in a .OBJ file 113 class StringTable { 114 private: 115 map<string, unsigned> directory; 116 size_t length; 117 char *data; 118 119 // make <directory> from <length> bytes in <data> 120 void makeDirectory(void) { 121 unsigned i = 4; 122 while(i < length) { 123 string s = string(data + i); 124 directory.insert(make_pair(s, i)); 125 i += s.size() + 1; 126 } 127 } 128 // initialize <length> and <data> with contents specified by the arguments 129 void init(const char *_data) { 130 unsigned _length = *(unsigned*)_data; 131 132 if(_length < sizeof(unsigned) || _length != *(unsigned*)_data) 133 stop("StringTable.init: Invalid symbol table"); 134 if(_data[_length - 1]) { 135 // to prevent runaway strings, make sure the data ends with a zero 136 data = new char[length = _length + 1]; 137 data[_length] = 0; 138 } else { 139 data = new char[length = _length]; 140 } 141 *(unsigned*)data = length; 142 KMP_MEMCPY(data + sizeof(unsigned), _data + sizeof(unsigned), 143 length - sizeof(unsigned)); 144 makeDirectory(); 145 } 146 public: 147 StringTable(rstream &f) { 148 /* Construct string table by reading from f. 149 */ 150 streampos s; 151 unsigned strSize; 152 char *strData; 153 154 s = f.tellg(); 155 f>>strSize; 156 if(strSize < sizeof(unsigned)) 157 stop("StringTable: Invalid string table"); 158 strData = new char[strSize]; 159 *(unsigned*)strData = strSize; 160 // read the raw data into <strData> 161 f.read(strData + sizeof(unsigned), strSize - sizeof(unsigned)); 162 s = f.tellg() - s; 163 if(s < strSize) 164 stop("StringTable: Unexpected EOF"); 165 init(strData); 166 delete[]strData; 167 } 168 StringTable(const set<string> &strings) { 169 /* Construct string table from given strings. 170 */ 171 char *p; 172 set<string>::const_iterator it; 173 size_t s; 174 175 // count required size for data 176 for(length = sizeof(unsigned), it = strings.begin(); it != strings.end(); ++it) { 177 size_t l = (*it).size(); 178 179 if(l > (unsigned) 0xFFFFFFFF) 180 stop("StringTable: String too long"); 181 if(l > 8) { 182 length += l + 1; 183 if(length > (unsigned) 0xFFFFFFFF) 184 stop("StringTable: Symbol table too long"); 185 } 186 } 187 data = new char[length]; 188 *(unsigned*)data = length; 189 // populate data and directory 190 for(p = data + sizeof(unsigned), it = strings.begin(); it != strings.end(); ++it) { 191 const string &str = *it; 192 size_t l = str.size(); 193 if(l > 8) { 194 directory.insert(make_pair(str, p - data)); 195 KMP_MEMCPY(p, str.c_str(), l); 196 p[l] = 0; 197 p += l + 1; 198 } 199 } 200 } 201 ~StringTable() { 202 delete[] data; 203 } 204 /* Returns encoding for given string based on this string table. 205 Error if string length is greater than 8 but string is not in 206 the string table--returns 0. 207 */ 208 __int64 encode(const string &str) { 209 __int64 r; 210 211 if(str.size() <= 8) { 212 // encoded directly 213 ((char*)&r)[7] = 0; 214 KMP_STRNCPY_S((char*)&r, sizeof(r), str.c_str(), 8); 215 return r; 216 } else { 217 // represented as index into table 218 map<string,unsigned>::const_iterator it = directory.find(str); 219 if(it == directory.end()) 220 stop("StringTable::encode: String now found in string table"); 221 ((unsigned*)&r)[0] = 0; 222 ((unsigned*)&r)[1] = (*it).second; 223 return r; 224 } 225 } 226 /* Returns string represented by x based on this string table. 227 Error if x references an invalid position in the table--returns 228 the empty string. 229 */ 230 string decode(__int64 x) const { 231 if(*(unsigned*)&x == 0) { 232 // represented as index into table 233 unsigned &p = ((unsigned*)&x)[1]; 234 if(p >= length) 235 stop("StringTable::decode: Invalid string table lookup"); 236 return string(data + p); 237 } else { 238 // encoded directly 239 char *p = (char*)&x; 240 int i; 241 242 for(i = 0; i < 8 && p[i]; ++i); 243 return string(p, i); 244 } 245 } 246 void write(ostream &os) { 247 os.write(data, length); 248 } 249 }; 250 251 /* for the named object file, determines the set of defined symbols and the set of undefined external symbols 252 and writes them to <defined> and <undefined> respectively 253 */ 254 void computeExternalSymbols(const char *fileName, set<string> *defined, set<string> *undefined){ 255 streampos fileSize; 256 size_t strTabStart; 257 unsigned symTabStart, symNEntries; 258 rstream f(fileName); 259 260 f.seekg(0,ios::end); 261 fileSize = f.tellg(); 262 263 f.seekg(8); 264 f >> symTabStart >> symNEntries; 265 // seek to the string table 266 f.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries); 267 if(f.eof()) { 268 printf("computeExternalSymbols: fileName='%s', fileSize = %lu, symTabStart = %u, symNEntries = %u\n", 269 fileName, (unsigned long) fileSize, symTabStart, symNEntries); 270 stop("computeExternalSymbols: Unexpected EOF 1"); 271 } 272 StringTable stringTable(f); // read the string table 273 if(f.tellg() != fileSize) 274 stop("computeExternalSymbols: Unexpected data after string table"); 275 276 f.clear(); 277 f.seekg(symTabStart); // seek to the symbol table 278 279 defined->clear(); undefined->clear(); 280 for(int i = 0; i < symNEntries; ++i) { 281 // process each entry 282 Symbol e; 283 284 if(f.eof()) 285 stop("computeExternalSymbols: Unexpected EOF 2"); 286 f>>e; 287 if(f.fail()) 288 stop("computeExternalSymbols: File read error"); 289 if(e.nAux) { // auxiliary entry: skip 290 f.seekg(e.nAux * 18, ios::cur); 291 i += e.nAux; 292 } 293 // if symbol is extern and defined in the current file, insert it 294 if(e.storageClass == 2) 295 if(e.sectionNum) 296 defined->insert(stringTable.decode(e.name)); 297 else 298 undefined->insert(stringTable.decode(e.name)); 299 } 300 } 301 302 /* For each occurrence of an external symbol in the object file named by 303 by <fileName> that is a member of <hide>, renames it by prefixing 304 with "__kmp_external_", writing back the file in-place 305 */ 306 void hideSymbols(char *fileName, const set<string> &hide) { 307 static const string prefix("__kmp_external_"); 308 set<string> strings; // set of all occurring symbols, appropriately prefixed 309 streampos fileSize; 310 size_t strTabStart; 311 unsigned symTabStart, symNEntries; 312 int i; 313 rstream in(fileName); 314 315 in.seekg(0,ios::end); 316 fileSize = in.tellg(); 317 318 in.seekg(8); 319 in >> symTabStart >> symNEntries; 320 in.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries); 321 if(in.eof()) 322 stop("hideSymbols: Unexpected EOF"); 323 StringTable stringTableOld(in); // read original string table 324 325 if(in.tellg() != fileSize) 326 stop("hideSymbols: Unexpected data after string table"); 327 328 // compute set of occurring strings with prefix added 329 for(i = 0; i < symNEntries; ++i) { 330 Symbol e; 331 332 in.seekg(symTabStart + i * 18); 333 if(in.eof()) 334 stop("hideSymbols: Unexpected EOF"); 335 in >> e; 336 if(in.fail()) 337 stop("hideSymbols: File read error"); 338 if(e.nAux) 339 i += e.nAux; 340 const string &s = stringTableOld.decode(e.name); 341 // if symbol is extern and found in <hide>, prefix and insert into strings, 342 // otherwise, just insert into strings without prefix 343 strings.insert( (e.storageClass == 2 && hide.find(s) != hide.end()) ? 344 prefix + s : s); 345 } 346 347 ofstream out(fileName, ios::trunc | ios::out | ios::binary); 348 if(!out.is_open()) 349 stop("hideSymbols: Error opening output file"); 350 351 // make new string table from string set 352 StringTable stringTableNew = StringTable(strings); 353 354 // copy input file to output file up to just before the symbol table 355 in.seekg(0); 356 char *buf = new char[symTabStart]; 357 in.read(buf, symTabStart); 358 out.write(buf, symTabStart); 359 delete []buf; 360 361 // copy input symbol table to output symbol table with name translation 362 for(i = 0; i < symNEntries; ++i) { 363 Symbol e; 364 365 in.seekg(symTabStart + i*18); 366 if(in.eof()) 367 stop("hideSymbols: Unexpected EOF"); 368 in >> e; 369 if(in.fail()) 370 stop("hideSymbols: File read error"); 371 const string &s = stringTableOld.decode(e.name); 372 out.seekp(symTabStart + i*18); 373 e.name = stringTableNew.encode( (e.storageClass == 2 && hide.find(s) != hide.end()) ? 374 prefix + s : s); 375 out.write((char*)&e, 18); 376 if(out.fail()) 377 stop("hideSymbols: File write error"); 378 if(e.nAux) { 379 // copy auxiliary symbol table entries 380 int nAux = e.nAux; 381 for(int j = 1; j <= nAux; ++j) { 382 in >> e; 383 out.seekp(symTabStart + (i + j) * 18); 384 out.write((char*)&e, 18); 385 } 386 i += nAux; 387 } 388 } 389 // output string table 390 stringTableNew.write(out); 391 } 392 393 // returns true iff <a> and <b> have no common element 394 template <class T> 395 bool isDisjoint(const set<T> &a, const set<T> &b) { 396 set<T>::const_iterator ita, itb; 397 398 for(ita = a.begin(), itb = b.begin(); ita != a.end() && itb != b.end();) { 399 const T &ta = *ita, &tb = *itb; 400 if(ta < tb) 401 ++ita; 402 else if (tb < ta) 403 ++itb; 404 else 405 return false; 406 } 407 return true; 408 } 409 410 /* precondition: <defined> and <undefined> are arrays with <nTotal> elements where 411 <nTotal> >= <nExternal>. The first <nExternal> elements correspond to the external object 412 files and the rest correspond to the internal object files. 413 postcondition: file x is said to depend on file y if undefined[x] and defined[y] are not 414 disjoint. Returns the transitive closure of the set of internal object files, as a set of 415 file indexes, under the 'depends on' relation, minus the set of internal object files. 416 */ 417 set<int> *findRequiredExternal(int nExternal, int nTotal, set<string> *defined, set<string> *undefined) { 418 set<int> *required = new set<int>; 419 set<int> fresh[2]; 420 int i, cur = 0; 421 bool changed; 422 423 for(i = nTotal - 1; i >= nExternal; --i) 424 fresh[cur].insert(i); 425 do { 426 changed = false; 427 for(set<int>::iterator it = fresh[cur].begin(); it != fresh[cur].end(); ++it) { 428 set<string> &s = undefined[*it]; 429 430 for(i = 0; i < nExternal; ++i) { 431 if(required->find(i) == required->end()) { 432 if(!isDisjoint(defined[i], s)) { 433 // found a new qualifying element 434 required->insert(i); 435 fresh[1 - cur].insert(i); 436 changed = true; 437 } 438 } 439 } 440 } 441 fresh[cur].clear(); 442 cur = 1 - cur; 443 } while(changed); 444 return required; 445 } 446 447 int main(int argc, char **argv) { 448 int nExternal, nInternal, i; 449 set<string> *defined, *undefined; 450 set<int>::iterator it; 451 452 if(argc < 3) 453 stop("Please specify a positive integer followed by a list of object filenames"); 454 nExternal = atoi(argv[1]); 455 if(nExternal <= 0) 456 stop("Please specify a positive integer followed by a list of object filenames"); 457 if(nExternal + 2 > argc) 458 stop("Too few external objects"); 459 nInternal = argc - nExternal - 2; 460 defined = new set<string>[argc - 2]; 461 undefined = new set<string>[argc - 2]; 462 463 // determine the set of defined and undefined external symbols 464 for(i = 2; i < argc; ++i) 465 computeExternalSymbols(argv[i], defined + i - 2, undefined + i - 2); 466 467 // determine the set of required external files 468 set<int> *requiredExternal = findRequiredExternal(nExternal, argc - 2, defined, undefined); 469 set<string> hide; 470 471 /* determine the set of symbols to hide--namely defined external symbols of the 472 required external files 473 */ 474 for(it = requiredExternal->begin(); it != requiredExternal->end(); ++it) { 475 int idx = *it; 476 set<string>::iterator it2; 477 /* We have to insert one element at a time instead of inserting a range because 478 the insert member function taking a range doesn't exist on Windows* OS, at least 479 at the time of this writing. 480 */ 481 for(it2 = defined[idx].begin(); it2 != defined[idx].end(); ++it2) 482 hide.insert(*it2); 483 } 484 485 /* process the external files--removing those that are not required and hiding 486 the appropriate symbols in the others 487 */ 488 for(i = 0; i < nExternal; ++i) 489 if(requiredExternal->find(i) != requiredExternal->end()) 490 hideSymbols(argv[2 + i], hide); 491 else 492 remove(argv[2 + i]); 493 // hide the appropriate symbols in the internal files 494 for(i = nExternal + 2; i < argc; ++i) 495 hideSymbols(argv[i], hide); 496 return 0; 497 } 498