1 //===-- StringPrinter.cpp ----------------------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "lldb/DataFormatters/StringPrinter.h" 11 12 #include "lldb/Core/DataExtractor.h" 13 #include "lldb/Core/Error.h" 14 #include "lldb/Target/Process.h" 15 #include "lldb/Target/Target.h" 16 17 #include "llvm/Support/ConvertUTF.h" 18 19 #include <ctype.h> 20 #include <functional> 21 #include <locale> 22 23 using namespace lldb; 24 using namespace lldb_private; 25 using namespace lldb_private::formatters; 26 27 // I can't use a std::unique_ptr for this because the Deleter is a template argument there 28 // and I want the same type to represent both pointers I want to free and pointers I don't need 29 // to free - which is what this class essentially is 30 // It's very specialized to the needs of this file, and not suggested for general use 31 template <typename T = uint8_t, typename U = char, typename S = size_t> 32 struct StringPrinterBufferPointer 33 { 34 public: 35 36 typedef std::function<void(const T*)> Deleter; 37 38 StringPrinterBufferPointer (std::nullptr_t ptr) : 39 m_data(nullptr), 40 m_size(0), 41 m_deleter() 42 {} 43 44 StringPrinterBufferPointer(const T* bytes, S size, Deleter deleter = nullptr) : 45 m_data(bytes), 46 m_size(size), 47 m_deleter(deleter) 48 {} 49 50 StringPrinterBufferPointer(const U* bytes, S size, Deleter deleter = nullptr) : 51 m_data((T*)bytes), 52 m_size(size), 53 m_deleter(deleter) 54 {} 55 56 StringPrinterBufferPointer(StringPrinterBufferPointer&& rhs) : 57 m_data(rhs.m_data), 58 m_size(rhs.m_size), 59 m_deleter(rhs.m_deleter) 60 { 61 rhs.m_data = nullptr; 62 } 63 64 StringPrinterBufferPointer(const StringPrinterBufferPointer& rhs) : 65 m_data(rhs.m_data), 66 m_size(rhs.m_size), 67 m_deleter(rhs.m_deleter) 68 { 69 rhs.m_data = nullptr; // this is why m_data has to be mutable 70 } 71 72 const T* 73 GetBytes () const 74 { 75 return m_data; 76 } 77 78 const S 79 GetSize () const 80 { 81 return m_size; 82 } 83 84 ~StringPrinterBufferPointer () 85 { 86 if (m_data && m_deleter) 87 m_deleter(m_data); 88 m_data = nullptr; 89 } 90 91 StringPrinterBufferPointer& 92 operator = (const StringPrinterBufferPointer& rhs) 93 { 94 if (m_data && m_deleter) 95 m_deleter(m_data); 96 m_data = rhs.m_data; 97 m_size = rhs.m_size; 98 m_deleter = rhs.m_deleter; 99 rhs.m_data = nullptr; 100 return *this; 101 } 102 103 private: 104 mutable const T* m_data; 105 size_t m_size; 106 Deleter m_deleter; 107 }; 108 109 // we define this for all values of type but only implement it for those we care about 110 // that's good because we get linker errors for any unsupported type 111 template <StringElementType type> 112 static StringPrinterBufferPointer<> 113 GetPrintableImpl(uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next); 114 115 // mimic isprint() for Unicode codepoints 116 static bool 117 isprint(char32_t codepoint) 118 { 119 if (codepoint <= 0x1F || codepoint == 0x7F) // C0 120 { 121 return false; 122 } 123 if (codepoint >= 0x80 && codepoint <= 0x9F) // C1 124 { 125 return false; 126 } 127 if (codepoint == 0x2028 || codepoint == 0x2029) // line/paragraph separators 128 { 129 return false; 130 } 131 if (codepoint == 0x200E || codepoint == 0x200F || (codepoint >= 0x202A && codepoint <= 0x202E)) // bidirectional text control 132 { 133 return false; 134 } 135 if (codepoint >= 0xFFF9 && codepoint <= 0xFFFF) // interlinears and generally specials 136 { 137 return false; 138 } 139 return true; 140 } 141 142 template <> 143 StringPrinterBufferPointer<> 144 GetPrintableImpl<StringElementType::ASCII> (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next) 145 { 146 StringPrinterBufferPointer<> retval = {nullptr}; 147 148 switch (*buffer) 149 { 150 case '\a': 151 retval = {"\\a",2}; 152 break; 153 case '\b': 154 retval = {"\\b",2}; 155 break; 156 case '\f': 157 retval = {"\\f",2}; 158 break; 159 case '\n': 160 retval = {"\\n",2}; 161 break; 162 case '\r': 163 retval = {"\\r",2}; 164 break; 165 case '\t': 166 retval = {"\\t",2}; 167 break; 168 case '\v': 169 retval = {"\\v",2}; 170 break; 171 case '\"': 172 retval = {"\\\"",2}; 173 break; 174 case '\\': 175 retval = {"\\\\",2}; 176 break; 177 default: 178 if (isprint(*buffer)) 179 retval = {buffer,1}; 180 else 181 { 182 retval = { new uint8_t[5],4,[] (const uint8_t* c) {delete[] c;} }; 183 sprintf((char*)retval.GetBytes(),"\\x%02x",*buffer); 184 break; 185 } 186 } 187 188 next = buffer + 1; 189 return retval; 190 } 191 192 static char32_t 193 ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1) 194 { 195 return (c0-192)*64+(c1-128); 196 } 197 static char32_t 198 ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1, unsigned char c2) 199 { 200 return (c0-224)*4096+(c1-128)*64+(c2-128); 201 } 202 static char32_t 203 ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1, unsigned char c2, unsigned char c3) 204 { 205 return (c0-240)*262144+(c2-128)*4096+(c2-128)*64+(c3-128); 206 } 207 208 template <> 209 StringPrinterBufferPointer<> 210 GetPrintableImpl<StringElementType::UTF8> (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next) 211 { 212 StringPrinterBufferPointer<> retval {nullptr}; 213 214 unsigned utf8_encoded_len = getNumBytesForUTF8(*buffer); 215 216 if (1+buffer_end-buffer < utf8_encoded_len) 217 { 218 // I don't have enough bytes - print whatever I have left 219 retval = {buffer,static_cast<size_t>(1+buffer_end-buffer)}; 220 next = buffer_end+1; 221 return retval; 222 } 223 224 char32_t codepoint = 0; 225 switch (utf8_encoded_len) 226 { 227 case 1: 228 // this is just an ASCII byte - ask ASCII 229 return GetPrintableImpl<StringElementType::ASCII>(buffer, buffer_end, next); 230 case 2: 231 codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1)); 232 break; 233 case 3: 234 codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1), (unsigned char)*(buffer+2)); 235 break; 236 case 4: 237 codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1), (unsigned char)*(buffer+2), (unsigned char)*(buffer+3)); 238 break; 239 default: 240 // this is probably some bogus non-character thing 241 // just print it as-is and hope to sync up again soon 242 retval = {buffer,1}; 243 next = buffer+1; 244 return retval; 245 } 246 247 if (codepoint) 248 { 249 switch (codepoint) 250 { 251 case '\a': 252 retval = {"\\a",2}; 253 break; 254 case '\b': 255 retval = {"\\b",2}; 256 break; 257 case '\f': 258 retval = {"\\f",2}; 259 break; 260 case '\n': 261 retval = {"\\n",2}; 262 break; 263 case '\r': 264 retval = {"\\r",2}; 265 break; 266 case '\t': 267 retval = {"\\t",2}; 268 break; 269 case '\v': 270 retval = {"\\v",2}; 271 break; 272 case '\"': 273 retval = {"\\\"",2}; 274 break; 275 case '\\': 276 retval = {"\\\\",2}; 277 break; 278 default: 279 if (isprint(codepoint)) 280 retval = {buffer,utf8_encoded_len}; 281 else 282 { 283 retval = { new uint8_t[11],10,[] (const uint8_t* c) {delete[] c;} }; 284 sprintf((char*)retval.GetBytes(),"\\U%08x",codepoint); 285 break; 286 } 287 } 288 289 next = buffer + utf8_encoded_len; 290 return retval; 291 } 292 293 // this should not happen - but just in case.. try to resync at some point 294 retval = {buffer,1}; 295 next = buffer+1; 296 return retval; 297 } 298 299 // Given a sequence of bytes, this function returns: 300 // a sequence of bytes to actually print out + a length 301 // the following unscanned position of the buffer is in next 302 static StringPrinterBufferPointer<> 303 GetPrintable(StringElementType type, uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next) 304 { 305 if (!buffer) 306 return {nullptr}; 307 308 switch (type) 309 { 310 case StringElementType::ASCII: 311 return GetPrintableImpl<StringElementType::ASCII>(buffer, buffer_end, next); 312 case StringElementType::UTF8: 313 return GetPrintableImpl<StringElementType::UTF8>(buffer, buffer_end, next); 314 default: 315 return {nullptr}; 316 } 317 } 318 319 // use this call if you already have an LLDB-side buffer for the data 320 template<typename SourceDataType> 321 static bool 322 DumpUTFBufferToStream (ConversionResult (*ConvertFunction) (const SourceDataType**, 323 const SourceDataType*, 324 UTF8**, 325 UTF8*, 326 ConversionFlags), 327 const DataExtractor& data, 328 Stream& stream, 329 char prefix_token, 330 char quote, 331 uint32_t sourceSize, 332 bool escapeNonPrintables) 333 { 334 if (prefix_token != 0) 335 stream.Printf("%c",prefix_token); 336 if (quote != 0) 337 stream.Printf("%c",quote); 338 if (data.GetByteSize() && data.GetDataStart() && data.GetDataEnd()) 339 { 340 const int bufferSPSize = data.GetByteSize(); 341 if (sourceSize == 0) 342 { 343 const int origin_encoding = 8*sizeof(SourceDataType); 344 sourceSize = bufferSPSize/(origin_encoding / 4); 345 } 346 347 SourceDataType *data_ptr = (SourceDataType*)data.GetDataStart(); 348 SourceDataType *data_end_ptr = data_ptr + sourceSize; 349 350 while (data_ptr < data_end_ptr) 351 { 352 if (!*data_ptr) 353 { 354 data_end_ptr = data_ptr; 355 break; 356 } 357 data_ptr++; 358 } 359 360 data_ptr = (SourceDataType*)data.GetDataStart(); 361 362 lldb::DataBufferSP utf8_data_buffer_sp; 363 UTF8* utf8_data_ptr = nullptr; 364 UTF8* utf8_data_end_ptr = nullptr; 365 366 if (ConvertFunction) 367 { 368 utf8_data_buffer_sp.reset(new DataBufferHeap(4*bufferSPSize,0)); 369 utf8_data_ptr = (UTF8*)utf8_data_buffer_sp->GetBytes(); 370 utf8_data_end_ptr = utf8_data_ptr + utf8_data_buffer_sp->GetByteSize(); 371 ConvertFunction ( (const SourceDataType**)&data_ptr, data_end_ptr, &utf8_data_ptr, utf8_data_end_ptr, lenientConversion ); 372 utf8_data_ptr = (UTF8*)utf8_data_buffer_sp->GetBytes(); // needed because the ConvertFunction will change the value of the data_ptr 373 } 374 else 375 { 376 // just copy the pointers - the cast is necessary to make the compiler happy 377 // but this should only happen if we are reading UTF8 data 378 utf8_data_ptr = (UTF8*)data_ptr; 379 utf8_data_end_ptr = (UTF8*)data_end_ptr; 380 } 381 382 // since we tend to accept partial data (and even partially malformed data) 383 // we might end up with no NULL terminator before the end_ptr 384 // hence we need to take a slower route and ensure we stay within boundaries 385 for (;utf8_data_ptr < utf8_data_end_ptr;) 386 { 387 if (!*utf8_data_ptr) 388 break; 389 390 if (escapeNonPrintables) 391 { 392 uint8_t* next_data = nullptr; 393 auto printable = GetPrintable(StringElementType::UTF8, utf8_data_ptr, utf8_data_end_ptr, next_data); 394 auto printable_bytes = printable.GetBytes(); 395 auto printable_size = printable.GetSize(); 396 if (!printable_bytes || !next_data) 397 { 398 // GetPrintable() failed on us - print one byte in a desperate resync attempt 399 printable_bytes = utf8_data_ptr; 400 printable_size = 1; 401 next_data = utf8_data_ptr+1; 402 } 403 for (int c = 0; c < printable_size; c++) 404 stream.Printf("%c", *(printable_bytes+c)); 405 utf8_data_ptr = (uint8_t*)next_data; 406 } 407 else 408 { 409 stream.Printf("%c",*utf8_data_ptr); 410 utf8_data_ptr++; 411 } 412 } 413 } 414 if (quote != 0) 415 stream.Printf("%c",quote); 416 return true; 417 } 418 419 namespace lldb_private 420 { 421 422 namespace formatters 423 { 424 425 template <> 426 bool 427 ReadStringAndDumpToStream<StringElementType::ASCII> (ReadStringAndDumpToStreamOptions options) 428 { 429 assert(options.GetStream() && "need a Stream to print the string to"); 430 Error my_error; 431 size_t my_data_read; 432 433 ProcessSP process_sp(options.GetProcessSP()); 434 435 if (process_sp.get() == nullptr || options.GetLocation() == 0) 436 return false; 437 438 size_t size; 439 440 if (options.GetSourceSize() == 0) 441 size = process_sp->GetTarget().GetMaximumSizeOfStringSummary(); 442 else 443 size = std::min(options.GetSourceSize(),process_sp->GetTarget().GetMaximumSizeOfStringSummary()); 444 445 lldb::DataBufferSP buffer_sp(new DataBufferHeap(size,0)); 446 447 my_data_read = process_sp->ReadCStringFromMemory(options.GetLocation(), (char*)buffer_sp->GetBytes(), size, my_error); 448 449 if (my_error.Fail()) 450 return false; 451 452 char prefix_token = options.GetPrefixToken(); 453 char quote = options.GetQuote(); 454 455 if (prefix_token != 0) 456 options.GetStream()->Printf("%c%c",prefix_token,quote); 457 else if (quote != 0) 458 options.GetStream()->Printf("%c",quote); 459 460 uint8_t* data_end = buffer_sp->GetBytes()+buffer_sp->GetByteSize(); 461 462 // since we tend to accept partial data (and even partially malformed data) 463 // we might end up with no NULL terminator before the end_ptr 464 // hence we need to take a slower route and ensure we stay within boundaries 465 for (uint8_t* data = buffer_sp->GetBytes(); *data && (data < data_end);) 466 { 467 if (options.GetEscapeNonPrintables()) 468 { 469 uint8_t* next_data = nullptr; 470 auto printable = GetPrintable(StringElementType::ASCII, data, data_end, next_data); 471 auto printable_bytes = printable.GetBytes(); 472 auto printable_size = printable.GetSize(); 473 if (!printable_bytes || !next_data) 474 { 475 // GetPrintable() failed on us - print one byte in a desperate resync attempt 476 printable_bytes = data; 477 printable_size = 1; 478 next_data = data+1; 479 } 480 for (int c = 0; c < printable_size; c++) 481 options.GetStream()->Printf("%c", *(printable_bytes+c)); 482 data = (uint8_t*)next_data; 483 } 484 else 485 { 486 options.GetStream()->Printf("%c",*data); 487 data++; 488 } 489 } 490 491 if (quote != 0) 492 options.GetStream()->Printf("%c",quote); 493 494 return true; 495 } 496 497 template<typename SourceDataType> 498 static bool 499 ReadUTFBufferAndDumpToStream (const ReadStringAndDumpToStreamOptions& options, 500 ConversionResult (*ConvertFunction) (const SourceDataType**, 501 const SourceDataType*, 502 UTF8**, 503 UTF8*, 504 ConversionFlags)) 505 { 506 assert(options.GetStream() && "need a Stream to print the string to"); 507 508 if (options.GetLocation() == 0 || options.GetLocation() == LLDB_INVALID_ADDRESS) 509 return false; 510 511 lldb::ProcessSP process_sp(options.GetProcessSP()); 512 513 if (!process_sp) 514 return false; 515 516 const int type_width = sizeof(SourceDataType); 517 const int origin_encoding = 8 * type_width ; 518 if (origin_encoding != 8 && origin_encoding != 16 && origin_encoding != 32) 519 return false; 520 // if not UTF8, I need a conversion function to return proper UTF8 521 if (origin_encoding != 8 && !ConvertFunction) 522 return false; 523 524 if (!options.GetStream()) 525 return false; 526 527 uint32_t sourceSize = options.GetSourceSize(); 528 bool needs_zero_terminator = options.GetNeedsZeroTermination(); 529 530 if (!sourceSize) 531 { 532 sourceSize = process_sp->GetTarget().GetMaximumSizeOfStringSummary(); 533 needs_zero_terminator = true; 534 } 535 else 536 sourceSize = std::min(sourceSize,process_sp->GetTarget().GetMaximumSizeOfStringSummary()); 537 538 const int bufferSPSize = sourceSize * type_width; 539 540 lldb::DataBufferSP buffer_sp(new DataBufferHeap(bufferSPSize,0)); 541 542 if (!buffer_sp->GetBytes()) 543 return false; 544 545 Error error; 546 char *buffer = reinterpret_cast<char *>(buffer_sp->GetBytes()); 547 548 size_t data_read = 0; 549 if (needs_zero_terminator) 550 data_read = process_sp->ReadStringFromMemory(options.GetLocation(), buffer, bufferSPSize, error, type_width); 551 else 552 data_read = process_sp->ReadMemoryFromInferior(options.GetLocation(), (char*)buffer_sp->GetBytes(), bufferSPSize, error); 553 554 if (error.Fail() || data_read == 0) 555 { 556 options.GetStream()->Printf("unable to read data"); 557 return true; 558 } 559 560 DataExtractor data(buffer_sp, process_sp->GetByteOrder(), process_sp->GetAddressByteSize()); 561 562 return DumpUTFBufferToStream(ConvertFunction, data, *options.GetStream(), options.GetPrefixToken(), options.GetQuote(), sourceSize, options.GetEscapeNonPrintables()); 563 } 564 565 template <> 566 bool 567 ReadStringAndDumpToStream<StringElementType::UTF8> (ReadStringAndDumpToStreamOptions options) 568 { 569 return ReadUTFBufferAndDumpToStream<UTF8>(options, 570 nullptr); 571 } 572 573 template <> 574 bool 575 ReadStringAndDumpToStream<StringElementType::UTF16> (ReadStringAndDumpToStreamOptions options) 576 { 577 return ReadUTFBufferAndDumpToStream<UTF16>(options, 578 ConvertUTF16toUTF8); 579 } 580 581 template <> 582 bool 583 ReadStringAndDumpToStream<StringElementType::UTF32> (ReadStringAndDumpToStreamOptions options) 584 { 585 return ReadUTFBufferAndDumpToStream<UTF32>(options, 586 ConvertUTF32toUTF8); 587 } 588 589 template <> 590 bool 591 ReadBufferAndDumpToStream<StringElementType::UTF8> (ReadBufferAndDumpToStreamOptions options) 592 { 593 assert(options.GetStream() && "need a Stream to print the string to"); 594 595 return DumpUTFBufferToStream<UTF8>(nullptr, options.GetData(), *options.GetStream(), options.GetPrefixToken(), options.GetQuote(), options.GetSourceSize(), options.GetEscapeNonPrintables()); 596 } 597 598 template <> 599 bool 600 ReadBufferAndDumpToStream<StringElementType::ASCII> (ReadBufferAndDumpToStreamOptions options) 601 { 602 // treat ASCII the same as UTF8 603 // FIXME: can we optimize ASCII some more? 604 return ReadBufferAndDumpToStream<StringElementType::UTF8>(options); 605 } 606 607 template <> 608 bool 609 ReadBufferAndDumpToStream<StringElementType::UTF16> (ReadBufferAndDumpToStreamOptions options) 610 { 611 assert(options.GetStream() && "need a Stream to print the string to"); 612 613 return DumpUTFBufferToStream(ConvertUTF16toUTF8, options.GetData(), *options.GetStream(), options.GetPrefixToken(), options.GetQuote(), options.GetSourceSize(), options.GetEscapeNonPrintables()); 614 } 615 616 template <> 617 bool 618 ReadBufferAndDumpToStream<StringElementType::UTF32> (ReadBufferAndDumpToStreamOptions options) 619 { 620 assert(options.GetStream() && "need a Stream to print the string to"); 621 622 return DumpUTFBufferToStream(ConvertUTF32toUTF8, options.GetData(), *options.GetStream(), options.GetPrefixToken(), options.GetQuote(), options.GetSourceSize(), options.GetEscapeNonPrintables()); 623 } 624 625 } // namespace formatters 626 627 } // namespace lldb_private 628