1 //===-- StringPrinter.cpp ----------------------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "lldb/DataFormatters/StringPrinter.h" 11 12 #include "lldb/Core/DataExtractor.h" 13 #include "lldb/Core/Debugger.h" 14 #include "lldb/Core/Error.h" 15 #include "lldb/Core/ValueObject.h" 16 #include "lldb/Target/Process.h" 17 #include "lldb/Target/Target.h" 18 19 #include "llvm/Support/ConvertUTF.h" 20 21 #include <ctype.h> 22 #include <functional> 23 #include <locale> 24 25 using namespace lldb; 26 using namespace lldb_private; 27 using namespace lldb_private::formatters; 28 29 // I can't use a std::unique_ptr for this because the Deleter is a template argument there 30 // and I want the same type to represent both pointers I want to free and pointers I don't need 31 // to free - which is what this class essentially is 32 // It's very specialized to the needs of this file, and not suggested for general use 33 template <typename T = uint8_t, typename U = char, typename S = size_t> 34 struct StringPrinterBufferPointer 35 { 36 public: 37 38 typedef std::function<void(const T*)> Deleter; 39 40 StringPrinterBufferPointer (std::nullptr_t ptr) : 41 m_data(nullptr), 42 m_size(0), 43 m_deleter() 44 {} 45 46 StringPrinterBufferPointer(const T* bytes, S size, Deleter deleter = nullptr) : 47 m_data(bytes), 48 m_size(size), 49 m_deleter(deleter) 50 {} 51 52 StringPrinterBufferPointer(const U* bytes, S size, Deleter deleter = nullptr) : 53 m_data((T*)bytes), 54 m_size(size), 55 m_deleter(deleter) 56 {} 57 58 StringPrinterBufferPointer(StringPrinterBufferPointer&& rhs) : 59 m_data(rhs.m_data), 60 m_size(rhs.m_size), 61 m_deleter(rhs.m_deleter) 62 { 63 rhs.m_data = nullptr; 64 } 65 66 StringPrinterBufferPointer(const StringPrinterBufferPointer& rhs) : 67 m_data(rhs.m_data), 68 m_size(rhs.m_size), 69 m_deleter(rhs.m_deleter) 70 { 71 rhs.m_data = nullptr; // this is why m_data has to be mutable 72 } 73 74 const T* 75 GetBytes () const 76 { 77 return m_data; 78 } 79 80 const S 81 GetSize () const 82 { 83 return m_size; 84 } 85 86 ~StringPrinterBufferPointer () 87 { 88 if (m_data && m_deleter) 89 m_deleter(m_data); 90 m_data = nullptr; 91 } 92 93 StringPrinterBufferPointer& 94 operator = (const StringPrinterBufferPointer& rhs) 95 { 96 if (m_data && m_deleter) 97 m_deleter(m_data); 98 m_data = rhs.m_data; 99 m_size = rhs.m_size; 100 m_deleter = rhs.m_deleter; 101 rhs.m_data = nullptr; 102 return *this; 103 } 104 105 private: 106 mutable const T* m_data; 107 size_t m_size; 108 Deleter m_deleter; 109 }; 110 111 // we define this for all values of type but only implement it for those we care about 112 // that's good because we get linker errors for any unsupported type 113 template <StringElementType type> 114 static StringPrinterBufferPointer<> 115 GetPrintableImpl(uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next); 116 117 // mimic isprint() for Unicode codepoints 118 static bool 119 isprint(char32_t codepoint) 120 { 121 if (codepoint <= 0x1F || codepoint == 0x7F) // C0 122 { 123 return false; 124 } 125 if (codepoint >= 0x80 && codepoint <= 0x9F) // C1 126 { 127 return false; 128 } 129 if (codepoint == 0x2028 || codepoint == 0x2029) // line/paragraph separators 130 { 131 return false; 132 } 133 if (codepoint == 0x200E || codepoint == 0x200F || (codepoint >= 0x202A && codepoint <= 0x202E)) // bidirectional text control 134 { 135 return false; 136 } 137 if (codepoint >= 0xFFF9 && codepoint <= 0xFFFF) // interlinears and generally specials 138 { 139 return false; 140 } 141 return true; 142 } 143 144 template <> 145 StringPrinterBufferPointer<> 146 GetPrintableImpl<StringElementType::ASCII> (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next) 147 { 148 StringPrinterBufferPointer<> retval = {nullptr}; 149 150 switch (*buffer) 151 { 152 case 0: 153 retval = {"\\0",2}; 154 break; 155 case '\a': 156 retval = {"\\a",2}; 157 break; 158 case '\b': 159 retval = {"\\b",2}; 160 break; 161 case '\f': 162 retval = {"\\f",2}; 163 break; 164 case '\n': 165 retval = {"\\n",2}; 166 break; 167 case '\r': 168 retval = {"\\r",2}; 169 break; 170 case '\t': 171 retval = {"\\t",2}; 172 break; 173 case '\v': 174 retval = {"\\v",2}; 175 break; 176 case '\"': 177 retval = {"\\\"",2}; 178 break; 179 case '\\': 180 retval = {"\\\\",2}; 181 break; 182 default: 183 if (isprint(*buffer)) 184 retval = {buffer,1}; 185 else 186 { 187 uint8_t* data = new uint8_t[5]; 188 sprintf((char*)data,"\\x%02x",*buffer); 189 retval = {data, 4, [] (const uint8_t* c) {delete[] c;} }; 190 break; 191 } 192 } 193 194 next = buffer + 1; 195 return retval; 196 } 197 198 static char32_t 199 ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1) 200 { 201 return (c0-192)*64+(c1-128); 202 } 203 static char32_t 204 ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1, unsigned char c2) 205 { 206 return (c0-224)*4096+(c1-128)*64+(c2-128); 207 } 208 static char32_t 209 ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1, unsigned char c2, unsigned char c3) 210 { 211 return (c0-240)*262144+(c2-128)*4096+(c2-128)*64+(c3-128); 212 } 213 214 template <> 215 StringPrinterBufferPointer<> 216 GetPrintableImpl<StringElementType::UTF8> (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next) 217 { 218 StringPrinterBufferPointer<> retval {nullptr}; 219 220 unsigned utf8_encoded_len = getNumBytesForUTF8(*buffer); 221 222 if (1+buffer_end-buffer < utf8_encoded_len) 223 { 224 // I don't have enough bytes - print whatever I have left 225 retval = {buffer,static_cast<size_t>(1+buffer_end-buffer)}; 226 next = buffer_end+1; 227 return retval; 228 } 229 230 char32_t codepoint = 0; 231 switch (utf8_encoded_len) 232 { 233 case 1: 234 // this is just an ASCII byte - ask ASCII 235 return GetPrintableImpl<StringElementType::ASCII>(buffer, buffer_end, next); 236 case 2: 237 codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1)); 238 break; 239 case 3: 240 codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1), (unsigned char)*(buffer+2)); 241 break; 242 case 4: 243 codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1), (unsigned char)*(buffer+2), (unsigned char)*(buffer+3)); 244 break; 245 default: 246 // this is probably some bogus non-character thing 247 // just print it as-is and hope to sync up again soon 248 retval = {buffer,1}; 249 next = buffer+1; 250 return retval; 251 } 252 253 if (codepoint) 254 { 255 switch (codepoint) 256 { 257 case 0: 258 retval = {"\\0",2}; 259 break; 260 case '\a': 261 retval = {"\\a",2}; 262 break; 263 case '\b': 264 retval = {"\\b",2}; 265 break; 266 case '\f': 267 retval = {"\\f",2}; 268 break; 269 case '\n': 270 retval = {"\\n",2}; 271 break; 272 case '\r': 273 retval = {"\\r",2}; 274 break; 275 case '\t': 276 retval = {"\\t",2}; 277 break; 278 case '\v': 279 retval = {"\\v",2}; 280 break; 281 case '\"': 282 retval = {"\\\"",2}; 283 break; 284 case '\\': 285 retval = {"\\\\",2}; 286 break; 287 default: 288 if (isprint(codepoint)) 289 retval = {buffer,utf8_encoded_len}; 290 else 291 { 292 uint8_t* data = new uint8_t[11]; 293 sprintf((char*)data,"\\U%08x",codepoint); 294 retval = { data,10,[] (const uint8_t* c) {delete[] c;} }; 295 break; 296 } 297 } 298 299 next = buffer + utf8_encoded_len; 300 return retval; 301 } 302 303 // this should not happen - but just in case.. try to resync at some point 304 retval = {buffer,1}; 305 next = buffer+1; 306 return retval; 307 } 308 309 // Given a sequence of bytes, this function returns: 310 // a sequence of bytes to actually print out + a length 311 // the following unscanned position of the buffer is in next 312 static StringPrinterBufferPointer<> 313 GetPrintable(StringElementType type, uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next) 314 { 315 if (!buffer) 316 return {nullptr}; 317 318 switch (type) 319 { 320 case StringElementType::ASCII: 321 return GetPrintableImpl<StringElementType::ASCII>(buffer, buffer_end, next); 322 case StringElementType::UTF8: 323 return GetPrintableImpl<StringElementType::UTF8>(buffer, buffer_end, next); 324 default: 325 return {nullptr}; 326 } 327 } 328 329 // use this call if you already have an LLDB-side buffer for the data 330 template<typename SourceDataType> 331 static bool 332 DumpUTFBufferToStream (ConversionResult (*ConvertFunction) (const SourceDataType**, 333 const SourceDataType*, 334 UTF8**, 335 UTF8*, 336 ConversionFlags), 337 const ReadBufferAndDumpToStreamOptions& dump_options) 338 { 339 Stream &stream(*dump_options.GetStream()); 340 if (dump_options.GetPrefixToken() != 0) 341 stream.Printf("%c",dump_options.GetPrefixToken()); 342 if (dump_options.GetQuote() != 0) 343 stream.Printf("%c",dump_options.GetQuote()); 344 auto data(dump_options.GetData()); 345 auto source_size(dump_options.GetSourceSize()); 346 if (data.GetByteSize() && data.GetDataStart() && data.GetDataEnd()) 347 { 348 const int bufferSPSize = data.GetByteSize(); 349 if (dump_options.GetSourceSize() == 0) 350 { 351 const int origin_encoding = 8*sizeof(SourceDataType); 352 source_size = bufferSPSize/(origin_encoding / 4); 353 } 354 355 const SourceDataType *data_ptr = (const SourceDataType*)data.GetDataStart(); 356 const SourceDataType *data_end_ptr = data_ptr + source_size; 357 358 const bool zero_is_terminator = dump_options.GetBinaryZeroIsTerminator(); 359 360 if (zero_is_terminator) 361 { 362 while (data_ptr < data_end_ptr) 363 { 364 if (!*data_ptr) 365 { 366 data_end_ptr = data_ptr; 367 break; 368 } 369 data_ptr++; 370 } 371 372 data_ptr = (const SourceDataType*)data.GetDataStart(); 373 } 374 375 lldb::DataBufferSP utf8_data_buffer_sp; 376 UTF8* utf8_data_ptr = nullptr; 377 UTF8* utf8_data_end_ptr = nullptr; 378 379 if (ConvertFunction) 380 { 381 utf8_data_buffer_sp.reset(new DataBufferHeap(4*bufferSPSize,0)); 382 utf8_data_ptr = (UTF8*)utf8_data_buffer_sp->GetBytes(); 383 utf8_data_end_ptr = utf8_data_ptr + utf8_data_buffer_sp->GetByteSize(); 384 ConvertFunction ( &data_ptr, data_end_ptr, &utf8_data_ptr, utf8_data_end_ptr, lenientConversion ); 385 if (false == zero_is_terminator) 386 utf8_data_end_ptr = utf8_data_ptr; 387 utf8_data_ptr = (UTF8*)utf8_data_buffer_sp->GetBytes(); // needed because the ConvertFunction will change the value of the data_ptr 388 } 389 else 390 { 391 // just copy the pointers - the cast is necessary to make the compiler happy 392 // but this should only happen if we are reading UTF8 data 393 utf8_data_ptr = (UTF8*)data_ptr; 394 utf8_data_end_ptr = (UTF8*)data_end_ptr; 395 } 396 397 const bool escape_non_printables = dump_options.GetEscapeNonPrintables(); 398 399 // since we tend to accept partial data (and even partially malformed data) 400 // we might end up with no NULL terminator before the end_ptr 401 // hence we need to take a slower route and ensure we stay within boundaries 402 for (;utf8_data_ptr < utf8_data_end_ptr;) 403 { 404 if (zero_is_terminator && !*utf8_data_ptr) 405 break; 406 407 if (escape_non_printables) 408 { 409 uint8_t* next_data = nullptr; 410 auto printable = GetPrintable(StringElementType::UTF8, utf8_data_ptr, utf8_data_end_ptr, next_data); 411 auto printable_bytes = printable.GetBytes(); 412 auto printable_size = printable.GetSize(); 413 if (!printable_bytes || !next_data) 414 { 415 // GetPrintable() failed on us - print one byte in a desperate resync attempt 416 printable_bytes = utf8_data_ptr; 417 printable_size = 1; 418 next_data = utf8_data_ptr+1; 419 } 420 for (unsigned c = 0; c < printable_size; c++) 421 stream.Printf("%c", *(printable_bytes+c)); 422 utf8_data_ptr = (uint8_t*)next_data; 423 } 424 else 425 { 426 stream.Printf("%c",*utf8_data_ptr); 427 utf8_data_ptr++; 428 } 429 } 430 } 431 if (dump_options.GetQuote() != 0) 432 stream.Printf("%c",dump_options.GetQuote()); 433 return true; 434 } 435 436 lldb_private::formatters::ReadStringAndDumpToStreamOptions::ReadStringAndDumpToStreamOptions (ValueObject& valobj) : 437 ReadStringAndDumpToStreamOptions() 438 { 439 SetEscapeNonPrintables(valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables()); 440 } 441 442 lldb_private::formatters::ReadBufferAndDumpToStreamOptions::ReadBufferAndDumpToStreamOptions (ValueObject& valobj) : 443 ReadBufferAndDumpToStreamOptions() 444 { 445 SetEscapeNonPrintables(valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables()); 446 } 447 448 lldb_private::formatters::ReadBufferAndDumpToStreamOptions::ReadBufferAndDumpToStreamOptions (const lldb_private::formatters::ReadStringAndDumpToStreamOptions& options) : 449 ReadBufferAndDumpToStreamOptions() 450 { 451 SetStream(options.GetStream()); 452 SetPrefixToken(options.GetPrefixToken()); 453 SetQuote(options.GetQuote()); 454 SetEscapeNonPrintables(options.GetEscapeNonPrintables()); 455 SetBinaryZeroIsTerminator(options.GetBinaryZeroIsTerminator()); 456 } 457 458 459 namespace lldb_private 460 { 461 462 namespace formatters 463 { 464 465 template <> 466 bool 467 ReadStringAndDumpToStream<StringElementType::ASCII> (const ReadStringAndDumpToStreamOptions& options) 468 { 469 assert(options.GetStream() && "need a Stream to print the string to"); 470 Error my_error; 471 472 ProcessSP process_sp(options.GetProcessSP()); 473 474 if (process_sp.get() == nullptr || options.GetLocation() == 0) 475 return false; 476 477 size_t size; 478 479 if (options.GetSourceSize() == 0) 480 size = process_sp->GetTarget().GetMaximumSizeOfStringSummary(); 481 else if (!options.GetIgnoreMaxLength()) 482 size = std::min(options.GetSourceSize(),process_sp->GetTarget().GetMaximumSizeOfStringSummary()); 483 else 484 size = options.GetSourceSize(); 485 486 lldb::DataBufferSP buffer_sp(new DataBufferHeap(size,0)); 487 488 process_sp->ReadCStringFromMemory(options.GetLocation(), (char*)buffer_sp->GetBytes(), size, my_error); 489 490 if (my_error.Fail()) 491 return false; 492 493 char prefix_token = options.GetPrefixToken(); 494 char quote = options.GetQuote(); 495 496 if (prefix_token != 0) 497 options.GetStream()->Printf("%c%c",prefix_token,quote); 498 else if (quote != 0) 499 options.GetStream()->Printf("%c",quote); 500 501 uint8_t* data_end = buffer_sp->GetBytes()+buffer_sp->GetByteSize(); 502 503 // since we tend to accept partial data (and even partially malformed data) 504 // we might end up with no NULL terminator before the end_ptr 505 // hence we need to take a slower route and ensure we stay within boundaries 506 for (uint8_t* data = buffer_sp->GetBytes(); *data && (data < data_end);) 507 { 508 if (options.GetEscapeNonPrintables()) 509 { 510 uint8_t* next_data = nullptr; 511 auto printable = GetPrintable(StringElementType::ASCII, data, data_end, next_data); 512 auto printable_bytes = printable.GetBytes(); 513 auto printable_size = printable.GetSize(); 514 if (!printable_bytes || !next_data) 515 { 516 // GetPrintable() failed on us - print one byte in a desperate resync attempt 517 printable_bytes = data; 518 printable_size = 1; 519 next_data = data+1; 520 } 521 for (unsigned c = 0; c < printable_size; c++) 522 options.GetStream()->Printf("%c", *(printable_bytes+c)); 523 data = (uint8_t*)next_data; 524 } 525 else 526 { 527 options.GetStream()->Printf("%c",*data); 528 data++; 529 } 530 } 531 532 if (quote != 0) 533 options.GetStream()->Printf("%c",quote); 534 535 return true; 536 } 537 538 template<typename SourceDataType> 539 static bool 540 ReadUTFBufferAndDumpToStream (const ReadStringAndDumpToStreamOptions& options, 541 ConversionResult (*ConvertFunction) (const SourceDataType**, 542 const SourceDataType*, 543 UTF8**, 544 UTF8*, 545 ConversionFlags)) 546 { 547 assert(options.GetStream() && "need a Stream to print the string to"); 548 549 if (options.GetLocation() == 0 || options.GetLocation() == LLDB_INVALID_ADDRESS) 550 return false; 551 552 lldb::ProcessSP process_sp(options.GetProcessSP()); 553 554 if (!process_sp) 555 return false; 556 557 const int type_width = sizeof(SourceDataType); 558 const int origin_encoding = 8 * type_width ; 559 if (origin_encoding != 8 && origin_encoding != 16 && origin_encoding != 32) 560 return false; 561 // if not UTF8, I need a conversion function to return proper UTF8 562 if (origin_encoding != 8 && !ConvertFunction) 563 return false; 564 565 if (!options.GetStream()) 566 return false; 567 568 uint32_t sourceSize = options.GetSourceSize(); 569 bool needs_zero_terminator = options.GetNeedsZeroTermination(); 570 571 if (!sourceSize) 572 { 573 sourceSize = process_sp->GetTarget().GetMaximumSizeOfStringSummary(); 574 needs_zero_terminator = true; 575 } 576 else if (!options.GetIgnoreMaxLength()) 577 sourceSize = std::min(sourceSize,process_sp->GetTarget().GetMaximumSizeOfStringSummary()); 578 579 const int bufferSPSize = sourceSize * type_width; 580 581 lldb::DataBufferSP buffer_sp(new DataBufferHeap(bufferSPSize,0)); 582 583 if (!buffer_sp->GetBytes()) 584 return false; 585 586 Error error; 587 char *buffer = reinterpret_cast<char *>(buffer_sp->GetBytes()); 588 589 if (needs_zero_terminator) 590 process_sp->ReadStringFromMemory(options.GetLocation(), buffer, bufferSPSize, error, type_width); 591 else 592 process_sp->ReadMemoryFromInferior(options.GetLocation(), (char*)buffer_sp->GetBytes(), bufferSPSize, error); 593 594 if (error.Fail()) 595 { 596 options.GetStream()->Printf("unable to read data"); 597 return true; 598 } 599 600 DataExtractor data(buffer_sp, process_sp->GetByteOrder(), process_sp->GetAddressByteSize()); 601 602 ReadBufferAndDumpToStreamOptions dump_options(options); 603 dump_options.SetData(data); 604 dump_options.SetSourceSize(sourceSize); 605 606 return DumpUTFBufferToStream(ConvertFunction, dump_options); 607 } 608 609 template <> 610 bool 611 ReadStringAndDumpToStream<StringElementType::UTF8> (const ReadStringAndDumpToStreamOptions& options) 612 { 613 return ReadUTFBufferAndDumpToStream<UTF8>(options, 614 nullptr); 615 } 616 617 template <> 618 bool 619 ReadStringAndDumpToStream<StringElementType::UTF16> (const ReadStringAndDumpToStreamOptions& options) 620 { 621 return ReadUTFBufferAndDumpToStream<UTF16>(options, 622 ConvertUTF16toUTF8); 623 } 624 625 template <> 626 bool 627 ReadStringAndDumpToStream<StringElementType::UTF32> (const ReadStringAndDumpToStreamOptions& options) 628 { 629 return ReadUTFBufferAndDumpToStream<UTF32>(options, 630 ConvertUTF32toUTF8); 631 } 632 633 template <> 634 bool 635 ReadBufferAndDumpToStream<StringElementType::UTF8> (const ReadBufferAndDumpToStreamOptions& options) 636 { 637 assert(options.GetStream() && "need a Stream to print the string to"); 638 639 return DumpUTFBufferToStream<UTF8>(nullptr, options); 640 } 641 642 template <> 643 bool 644 ReadBufferAndDumpToStream<StringElementType::ASCII> (const ReadBufferAndDumpToStreamOptions& options) 645 { 646 // treat ASCII the same as UTF8 647 // FIXME: can we optimize ASCII some more? 648 return ReadBufferAndDumpToStream<StringElementType::UTF8>(options); 649 } 650 651 template <> 652 bool 653 ReadBufferAndDumpToStream<StringElementType::UTF16> (const ReadBufferAndDumpToStreamOptions& options) 654 { 655 assert(options.GetStream() && "need a Stream to print the string to"); 656 657 return DumpUTFBufferToStream(ConvertUTF16toUTF8, options); 658 } 659 660 template <> 661 bool 662 ReadBufferAndDumpToStream<StringElementType::UTF32> (const ReadBufferAndDumpToStreamOptions& options) 663 { 664 assert(options.GetStream() && "need a Stream to print the string to"); 665 666 return DumpUTFBufferToStream(ConvertUTF32toUTF8, options); 667 } 668 669 } // namespace formatters 670 671 } // namespace lldb_private 672