1 /* 2 * extractExternal.cpp 3 * $Revision: 42181 $ 4 * $Date: 2013-03-26 15:04:45 -0500 (Tue, 26 Mar 2013) $ 5 */ 6 7 8 //===----------------------------------------------------------------------===// 9 // 10 // The LLVM Compiler Infrastructure 11 // 12 // This file is dual licensed under the MIT and the University of Illinois Open 13 // Source Licenses. See LICENSE.txt for details. 14 // 15 //===----------------------------------------------------------------------===// 16 17 18 #include <stdlib.h> 19 #include <iostream> 20 #include <strstream> 21 #include <fstream> 22 #include <string> 23 #include <set> 24 #include <map> 25 26 /* Given a set of n object files h ('external' object files) and a set of m 27 object files o ('internal' object files), 28 1. Determines r, the subset of h that o depends on, directly or indirectly 29 2. Removes the files in h - r from the file system 30 3. For each external symbol defined in some file in r, rename it in r U o 31 by prefixing it with "__kmp_external_" 32 Usage: 33 hide.exe <n> <filenames for h> <filenames for o> 34 35 Thus, the prefixed symbols become hidden in the sense that they now have a special 36 prefix. 37 */ 38 39 using namespace std; 40 41 void stop(char* errorMsg) { 42 printf("%s\n", errorMsg); 43 exit(1); 44 } 45 46 // an entry in the symbol table of a .OBJ file 47 class Symbol { 48 public: 49 __int64 name; 50 unsigned value; 51 unsigned short sectionNum, type; 52 char storageClass, nAux; 53 }; 54 55 class _rstream : public istrstream { 56 private: 57 const char *buf; 58 protected: 59 _rstream(pair<const char*, streamsize> p):istrstream(p.first,p.second),buf(p.first){} 60 ~_rstream() { 61 delete[]buf; 62 } 63 }; 64 65 /* A stream encapuslating the content of a file or the content of a string, overriding the 66 >> operator to read various integer types in binary form, as well as a symbol table 67 entry. 68 */ 69 class rstream : public _rstream { 70 private: 71 template<class T> 72 inline rstream& doRead(T &x) { 73 read((char*)&x, sizeof(T)); 74 return *this; 75 } 76 static pair<const char*, streamsize> getBuf(const char *fileName) { 77 ifstream raw(fileName,ios::binary | ios::in); 78 if(!raw.is_open()) 79 stop("rstream.getBuf: Error opening file"); 80 raw.seekg(0,ios::end); 81 streampos fileSize = raw.tellg(); 82 if(fileSize < 0) 83 stop("rstream.getBuf: Error reading file"); 84 char *buf = new char[fileSize]; 85 raw.seekg(0,ios::beg); 86 raw.read(buf, fileSize); 87 return pair<const char*, streamsize>(buf,fileSize); 88 } 89 public: 90 // construct from a string 91 rstream(const char *buf,streamsize size):_rstream(pair<const char*,streamsize>(buf, size)){} 92 /* construct from a file whole content is fully read once to initialize the content of 93 this stream 94 */ 95 rstream(const char *fileName):_rstream(getBuf(fileName)){} 96 rstream& operator>>(int &x) { 97 return doRead(x); 98 } 99 rstream& operator>>(unsigned &x) { 100 return doRead(x); 101 } 102 rstream& operator>>(short &x) { 103 return doRead(x); 104 } 105 rstream& operator>>(unsigned short &x) { 106 return doRead(x); 107 } 108 rstream& operator>>(Symbol &e) { 109 read((char*)&e, 18); 110 return *this; 111 } 112 }; 113 114 // string table in a .OBJ file 115 class StringTable { 116 private: 117 map<string, unsigned> directory; 118 size_t length; 119 char *data; 120 121 // make <directory> from <length> bytes in <data> 122 void makeDirectory(void) { 123 unsigned i = 4; 124 while(i < length) { 125 string s = string(data + i); 126 directory.insert(make_pair(s, i)); 127 i += s.size() + 1; 128 } 129 } 130 // initialize <length> and <data> with contents specified by the arguments 131 void init(const char *_data) { 132 unsigned _length = *(unsigned*)_data; 133 134 if(_length < sizeof(unsigned) || _length != *(unsigned*)_data) 135 stop("StringTable.init: Invalid symbol table"); 136 if(_data[_length - 1]) { 137 // to prevent runaway strings, make sure the data ends with a zero 138 data = new char[length = _length + 1]; 139 data[_length] = 0; 140 } else { 141 data = new char[length = _length]; 142 } 143 *(unsigned*)data = length; 144 memcpy(data + sizeof(unsigned), _data + sizeof(unsigned), 145 length - sizeof(unsigned)); 146 makeDirectory(); 147 } 148 public: 149 StringTable(rstream &f) { 150 /* Construct string table by reading from f. 151 */ 152 streampos s; 153 unsigned strSize; 154 char *strData; 155 156 s = f.tellg(); 157 f>>strSize; 158 if(strSize < sizeof(unsigned)) 159 stop("StringTable: Invalid string table"); 160 strData = new char[strSize]; 161 *(unsigned*)strData = strSize; 162 // read the raw data into <strData> 163 f.read(strData + sizeof(unsigned), strSize - sizeof(unsigned)); 164 s = f.tellg() - s; 165 if(s < strSize) 166 stop("StringTable: Unexpected EOF"); 167 init(strData); 168 delete[]strData; 169 } 170 StringTable(const set<string> &strings) { 171 /* Construct string table from given strings. 172 */ 173 char *p; 174 set<string>::const_iterator it; 175 size_t s; 176 177 // count required size for data 178 for(length = sizeof(unsigned), it = strings.begin(); it != strings.end(); ++it) { 179 size_t l = (*it).size(); 180 181 if(l > (unsigned) 0xFFFFFFFF) 182 stop("StringTable: String too long"); 183 if(l > 8) { 184 length += l + 1; 185 if(length > (unsigned) 0xFFFFFFFF) 186 stop("StringTable: Symbol table too long"); 187 } 188 } 189 data = new char[length]; 190 *(unsigned*)data = length; 191 // populate data and directory 192 for(p = data + sizeof(unsigned), it = strings.begin(); it != strings.end(); ++it) { 193 const string &str = *it; 194 size_t l = str.size(); 195 if(l > 8) { 196 directory.insert(make_pair(str, p - data)); 197 memcpy(p, str.c_str(), l); 198 p[l] = 0; 199 p += l + 1; 200 } 201 } 202 } 203 ~StringTable() { 204 delete[] data; 205 } 206 /* Returns encoding for given string based on this string table. 207 Error if string length is greater than 8 but string is not in 208 the string table--returns 0. 209 */ 210 __int64 encode(const string &str) { 211 __int64 r; 212 213 if(str.size() <= 8) { 214 // encoded directly 215 ((char*)&r)[7] = 0; 216 strncpy((char*)&r, str.c_str(), 8); 217 return r; 218 } else { 219 // represented as index into table 220 map<string,unsigned>::const_iterator it = directory.find(str); 221 if(it == directory.end()) 222 stop("StringTable::encode: String now found in string table"); 223 ((unsigned*)&r)[0] = 0; 224 ((unsigned*)&r)[1] = (*it).second; 225 return r; 226 } 227 } 228 /* Returns string represented by x based on this string table. 229 Error if x references an invalid position in the table--returns 230 the empty string. 231 */ 232 string decode(__int64 x) const { 233 if(*(unsigned*)&x == 0) { 234 // represented as index into table 235 unsigned &p = ((unsigned*)&x)[1]; 236 if(p >= length) 237 stop("StringTable::decode: Invalid string table lookup"); 238 return string(data + p); 239 } else { 240 // encoded directly 241 char *p = (char*)&x; 242 int i; 243 244 for(i = 0; i < 8 && p[i]; ++i); 245 return string(p, i); 246 } 247 } 248 void write(ostream &os) { 249 os.write(data, length); 250 } 251 }; 252 253 /* for the named object file, determines the set of defined symbols and the set of undefined external symbols 254 and writes them to <defined> and <undefined> respectively 255 */ 256 void computeExternalSymbols(const char *fileName, set<string> *defined, set<string> *undefined){ 257 streampos fileSize; 258 size_t strTabStart; 259 unsigned symTabStart, symNEntries; 260 rstream f(fileName); 261 262 f.seekg(0,ios::end); 263 fileSize = f.tellg(); 264 265 f.seekg(8); 266 f >> symTabStart >> symNEntries; 267 // seek to the string table 268 f.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries); 269 if(f.eof()) { 270 printf("computeExternalSymbols: fileName='%s', fileSize = %lu, symTabStart = %u, symNEntries = %u\n", 271 fileName, (unsigned long) fileSize, symTabStart, symNEntries); 272 stop("computeExternalSymbols: Unexpected EOF 1"); 273 } 274 StringTable stringTable(f); // read the string table 275 if(f.tellg() != fileSize) 276 stop("computeExternalSymbols: Unexpected data after string table"); 277 278 f.clear(); 279 f.seekg(symTabStart); // seek to the symbol table 280 281 defined->clear(); undefined->clear(); 282 for(int i = 0; i < symNEntries; ++i) { 283 // process each entry 284 Symbol e; 285 286 if(f.eof()) 287 stop("computeExternalSymbols: Unexpected EOF 2"); 288 f>>e; 289 if(f.fail()) 290 stop("computeExternalSymbols: File read error"); 291 if(e.nAux) { // auxiliary entry: skip 292 f.seekg(e.nAux * 18, ios::cur); 293 i += e.nAux; 294 } 295 // if symbol is extern and defined in the current file, insert it 296 if(e.storageClass == 2) 297 if(e.sectionNum) 298 defined->insert(stringTable.decode(e.name)); 299 else 300 undefined->insert(stringTable.decode(e.name)); 301 } 302 } 303 304 /* For each occurrence of an external symbol in the object file named by 305 by <fileName> that is a member of <hide>, renames it by prefixing 306 with "__kmp_external_", writing back the file in-place 307 */ 308 void hideSymbols(char *fileName, const set<string> &hide) { 309 static const string prefix("__kmp_external_"); 310 set<string> strings; // set of all occurring symbols, appropriately prefixed 311 streampos fileSize; 312 size_t strTabStart; 313 unsigned symTabStart, symNEntries; 314 int i; 315 rstream in(fileName); 316 317 in.seekg(0,ios::end); 318 fileSize = in.tellg(); 319 320 in.seekg(8); 321 in >> symTabStart >> symNEntries; 322 in.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries); 323 if(in.eof()) 324 stop("hideSymbols: Unexpected EOF"); 325 StringTable stringTableOld(in); // read original string table 326 327 if(in.tellg() != fileSize) 328 stop("hideSymbols: Unexpected data after string table"); 329 330 // compute set of occurring strings with prefix added 331 for(i = 0; i < symNEntries; ++i) { 332 Symbol e; 333 334 in.seekg(symTabStart + i * 18); 335 if(in.eof()) 336 stop("hideSymbols: Unexpected EOF"); 337 in >> e; 338 if(in.fail()) 339 stop("hideSymbols: File read error"); 340 if(e.nAux) 341 i += e.nAux; 342 const string &s = stringTableOld.decode(e.name); 343 // if symbol is extern and found in <hide>, prefix and insert into strings, 344 // otherwise, just insert into strings without prefix 345 strings.insert( (e.storageClass == 2 && hide.find(s) != hide.end()) ? 346 prefix + s : s); 347 } 348 349 ofstream out(fileName, ios::trunc | ios::out | ios::binary); 350 if(!out.is_open()) 351 stop("hideSymbols: Error opening output file"); 352 353 // make new string table from string set 354 StringTable stringTableNew = StringTable(strings); 355 356 // copy input file to output file up to just before the symbol table 357 in.seekg(0); 358 char *buf = new char[symTabStart]; 359 in.read(buf, symTabStart); 360 out.write(buf, symTabStart); 361 delete []buf; 362 363 // copy input symbol table to output symbol table with name translation 364 for(i = 0; i < symNEntries; ++i) { 365 Symbol e; 366 367 in.seekg(symTabStart + i*18); 368 if(in.eof()) 369 stop("hideSymbols: Unexpected EOF"); 370 in >> e; 371 if(in.fail()) 372 stop("hideSymbols: File read error"); 373 const string &s = stringTableOld.decode(e.name); 374 out.seekp(symTabStart + i*18); 375 e.name = stringTableNew.encode( (e.storageClass == 2 && hide.find(s) != hide.end()) ? 376 prefix + s : s); 377 out.write((char*)&e, 18); 378 if(out.fail()) 379 stop("hideSymbols: File write error"); 380 if(e.nAux) { 381 // copy auxiliary symbol table entries 382 int nAux = e.nAux; 383 for(int j = 1; j <= nAux; ++j) { 384 in >> e; 385 out.seekp(symTabStart + (i + j) * 18); 386 out.write((char*)&e, 18); 387 } 388 i += nAux; 389 } 390 } 391 // output string table 392 stringTableNew.write(out); 393 } 394 395 // returns true iff <a> and <b> have no common element 396 template <class T> 397 bool isDisjoint(const set<T> &a, const set<T> &b) { 398 set<T>::const_iterator ita, itb; 399 400 for(ita = a.begin(), itb = b.begin(); ita != a.end() && itb != b.end();) { 401 const T &ta = *ita, &tb = *itb; 402 if(ta < tb) 403 ++ita; 404 else if (tb < ta) 405 ++itb; 406 else 407 return false; 408 } 409 return true; 410 } 411 412 /* precondition: <defined> and <undefined> are arrays with <nTotal> elements where 413 <nTotal> >= <nExternal>. The first <nExternal> elements correspond to the external object 414 files and the rest correspond to the internal object files. 415 postcondition: file x is said to depend on file y if undefined[x] and defined[y] are not 416 disjoint. Returns the transitive closure of the set of internal object files, as a set of 417 file indexes, under the 'depends on' relation, minus the set of internal object files. 418 */ 419 set<int> *findRequiredExternal(int nExternal, int nTotal, set<string> *defined, set<string> *undefined) { 420 set<int> *required = new set<int>; 421 set<int> fresh[2]; 422 int i, cur = 0; 423 bool changed; 424 425 for(i = nTotal - 1; i >= nExternal; --i) 426 fresh[cur].insert(i); 427 do { 428 changed = false; 429 for(set<int>::iterator it = fresh[cur].begin(); it != fresh[cur].end(); ++it) { 430 set<string> &s = undefined[*it]; 431 432 for(i = 0; i < nExternal; ++i) { 433 if(required->find(i) == required->end()) { 434 if(!isDisjoint(defined[i], s)) { 435 // found a new qualifying element 436 required->insert(i); 437 fresh[1 - cur].insert(i); 438 changed = true; 439 } 440 } 441 } 442 } 443 fresh[cur].clear(); 444 cur = 1 - cur; 445 } while(changed); 446 return required; 447 } 448 449 int main(int argc, char **argv) { 450 int nExternal, nInternal, i; 451 set<string> *defined, *undefined; 452 set<int>::iterator it; 453 454 if(argc < 3) 455 stop("Please specify a positive integer followed by a list of object filenames"); 456 nExternal = atoi(argv[1]); 457 if(nExternal <= 0) 458 stop("Please specify a positive integer followed by a list of object filenames"); 459 if(nExternal + 2 > argc) 460 stop("Too few external objects"); 461 nInternal = argc - nExternal - 2; 462 defined = new set<string>[argc - 2]; 463 undefined = new set<string>[argc - 2]; 464 465 // determine the set of defined and undefined external symbols 466 for(i = 2; i < argc; ++i) 467 computeExternalSymbols(argv[i], defined + i - 2, undefined + i - 2); 468 469 // determine the set of required external files 470 set<int> *requiredExternal = findRequiredExternal(nExternal, argc - 2, defined, undefined); 471 set<string> hide; 472 473 /* determine the set of symbols to hide--namely defined external symbols of the 474 required external files 475 */ 476 for(it = requiredExternal->begin(); it != requiredExternal->end(); ++it) { 477 int idx = *it; 478 set<string>::iterator it2; 479 /* We have to insert one element at a time instead of inserting a range because 480 the insert member function taking a range doesn't exist on Windows* OS, at least 481 at the time of this writing. 482 */ 483 for(it2 = defined[idx].begin(); it2 != defined[idx].end(); ++it2) 484 hide.insert(*it2); 485 } 486 487 /* process the external files--removing those that are not required and hiding 488 the appropriate symbols in the others 489 */ 490 for(i = 0; i < nExternal; ++i) 491 if(requiredExternal->find(i) != requiredExternal->end()) 492 hideSymbols(argv[2 + i], hide); 493 else 494 remove(argv[2 + i]); 495 // hide the appropriate symbols in the internal files 496 for(i = nExternal + 2; i < argc; ++i) 497 hideSymbols(argv[i], hide); 498 return 0; 499 } 500