1 //===-- StringPrinter.cpp ----------------------------------------*- C++ 2 //-*-===// 3 // 4 // The LLVM Compiler Infrastructure 5 // 6 // This file is distributed under the University of Illinois Open Source 7 // License. See LICENSE.TXT for details. 8 // 9 //===----------------------------------------------------------------------===// 10 11 #include "lldb/DataFormatters/StringPrinter.h" 12 13 #include "lldb/Core/Debugger.h" 14 #include "lldb/Core/Error.h" 15 #include "lldb/Core/ValueObject.h" 16 #include "lldb/Target/Language.h" 17 #include "lldb/Target/Process.h" 18 #include "lldb/Target/Target.h" 19 20 #include "llvm/Support/ConvertUTF.h" 21 22 #include <ctype.h> 23 #include <locale> 24 25 using namespace lldb; 26 using namespace lldb_private; 27 using namespace lldb_private::formatters; 28 29 // we define this for all values of type but only implement it for those we care 30 // about 31 // that's good because we get linker errors for any unsupported type 32 template <lldb_private::formatters::StringPrinter::StringElementType type> 33 static StringPrinter::StringPrinterBufferPointer<> 34 GetPrintableImpl(uint8_t *buffer, uint8_t *buffer_end, uint8_t *&next); 35 36 // mimic isprint() for Unicode codepoints 37 static bool isprint(char32_t codepoint) { 38 if (codepoint <= 0x1F || codepoint == 0x7F) // C0 39 { 40 return false; 41 } 42 if (codepoint >= 0x80 && codepoint <= 0x9F) // C1 43 { 44 return false; 45 } 46 if (codepoint == 0x2028 || codepoint == 0x2029) // line/paragraph separators 47 { 48 return false; 49 } 50 if (codepoint == 0x200E || codepoint == 0x200F || 51 (codepoint >= 0x202A && 52 codepoint <= 0x202E)) // bidirectional text control 53 { 54 return false; 55 } 56 if (codepoint >= 0xFFF9 && 57 codepoint <= 0xFFFF) // interlinears and generally specials 58 { 59 return false; 60 } 61 return true; 62 } 63 64 template <> 65 StringPrinter::StringPrinterBufferPointer<> 66 GetPrintableImpl<StringPrinter::StringElementType::ASCII>(uint8_t *buffer, 67 uint8_t *buffer_end, 68 uint8_t *&next) { 69 StringPrinter::StringPrinterBufferPointer<> retval = {nullptr}; 70 71 switch (*buffer) { 72 case 0: 73 retval = {"\\0", 2}; 74 break; 75 case '\a': 76 retval = {"\\a", 2}; 77 break; 78 case '\b': 79 retval = {"\\b", 2}; 80 break; 81 case '\f': 82 retval = {"\\f", 2}; 83 break; 84 case '\n': 85 retval = {"\\n", 2}; 86 break; 87 case '\r': 88 retval = {"\\r", 2}; 89 break; 90 case '\t': 91 retval = {"\\t", 2}; 92 break; 93 case '\v': 94 retval = {"\\v", 2}; 95 break; 96 case '\"': 97 retval = {"\\\"", 2}; 98 break; 99 case '\\': 100 retval = {"\\\\", 2}; 101 break; 102 default: 103 if (isprint(*buffer)) 104 retval = {buffer, 1}; 105 else { 106 uint8_t *data = new uint8_t[5]; 107 sprintf((char *)data, "\\x%02x", *buffer); 108 retval = {data, 4, [](const uint8_t *c) { delete[] c; }}; 109 break; 110 } 111 } 112 113 next = buffer + 1; 114 return retval; 115 } 116 117 static char32_t ConvertUTF8ToCodePoint(unsigned char c0, unsigned char c1) { 118 return (c0 - 192) * 64 + (c1 - 128); 119 } 120 static char32_t ConvertUTF8ToCodePoint(unsigned char c0, unsigned char c1, 121 unsigned char c2) { 122 return (c0 - 224) * 4096 + (c1 - 128) * 64 + (c2 - 128); 123 } 124 static char32_t ConvertUTF8ToCodePoint(unsigned char c0, unsigned char c1, 125 unsigned char c2, unsigned char c3) { 126 return (c0 - 240) * 262144 + (c2 - 128) * 4096 + (c2 - 128) * 64 + (c3 - 128); 127 } 128 129 template <> 130 StringPrinter::StringPrinterBufferPointer<> 131 GetPrintableImpl<StringPrinter::StringElementType::UTF8>(uint8_t *buffer, 132 uint8_t *buffer_end, 133 uint8_t *&next) { 134 StringPrinter::StringPrinterBufferPointer<> retval{nullptr}; 135 136 unsigned utf8_encoded_len = getNumBytesForUTF8(*buffer); 137 138 if (1 + buffer_end - buffer < utf8_encoded_len) { 139 // I don't have enough bytes - print whatever I have left 140 retval = {buffer, static_cast<size_t>(1 + buffer_end - buffer)}; 141 next = buffer_end + 1; 142 return retval; 143 } 144 145 char32_t codepoint = 0; 146 switch (utf8_encoded_len) { 147 case 1: 148 // this is just an ASCII byte - ask ASCII 149 return GetPrintableImpl<StringPrinter::StringElementType::ASCII>( 150 buffer, buffer_end, next); 151 case 2: 152 codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, 153 (unsigned char)*(buffer + 1)); 154 break; 155 case 3: 156 codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, 157 (unsigned char)*(buffer + 1), 158 (unsigned char)*(buffer + 2)); 159 break; 160 case 4: 161 codepoint = ConvertUTF8ToCodePoint( 162 (unsigned char)*buffer, (unsigned char)*(buffer + 1), 163 (unsigned char)*(buffer + 2), (unsigned char)*(buffer + 3)); 164 break; 165 default: 166 // this is probably some bogus non-character thing 167 // just print it as-is and hope to sync up again soon 168 retval = {buffer, 1}; 169 next = buffer + 1; 170 return retval; 171 } 172 173 if (codepoint) { 174 switch (codepoint) { 175 case 0: 176 retval = {"\\0", 2}; 177 break; 178 case '\a': 179 retval = {"\\a", 2}; 180 break; 181 case '\b': 182 retval = {"\\b", 2}; 183 break; 184 case '\f': 185 retval = {"\\f", 2}; 186 break; 187 case '\n': 188 retval = {"\\n", 2}; 189 break; 190 case '\r': 191 retval = {"\\r", 2}; 192 break; 193 case '\t': 194 retval = {"\\t", 2}; 195 break; 196 case '\v': 197 retval = {"\\v", 2}; 198 break; 199 case '\"': 200 retval = {"\\\"", 2}; 201 break; 202 case '\\': 203 retval = {"\\\\", 2}; 204 break; 205 default: 206 if (isprint(codepoint)) 207 retval = {buffer, utf8_encoded_len}; 208 else { 209 uint8_t *data = new uint8_t[11]; 210 sprintf((char *)data, "\\U%08x", (unsigned)codepoint); 211 retval = {data, 10, [](const uint8_t *c) { delete[] c; }}; 212 break; 213 } 214 } 215 216 next = buffer + utf8_encoded_len; 217 return retval; 218 } 219 220 // this should not happen - but just in case.. try to resync at some point 221 retval = {buffer, 1}; 222 next = buffer + 1; 223 return retval; 224 } 225 226 // Given a sequence of bytes, this function returns: 227 // a sequence of bytes to actually print out + a length 228 // the following unscanned position of the buffer is in next 229 static StringPrinter::StringPrinterBufferPointer<> 230 GetPrintable(StringPrinter::StringElementType type, uint8_t *buffer, 231 uint8_t *buffer_end, uint8_t *&next) { 232 if (!buffer) 233 return {nullptr}; 234 235 switch (type) { 236 case StringPrinter::StringElementType::ASCII: 237 return GetPrintableImpl<StringPrinter::StringElementType::ASCII>( 238 buffer, buffer_end, next); 239 case StringPrinter::StringElementType::UTF8: 240 return GetPrintableImpl<StringPrinter::StringElementType::UTF8>( 241 buffer, buffer_end, next); 242 default: 243 return {nullptr}; 244 } 245 } 246 247 StringPrinter::EscapingHelper 248 StringPrinter::GetDefaultEscapingHelper(GetPrintableElementType elem_type) { 249 switch (elem_type) { 250 case GetPrintableElementType::UTF8: 251 return [](uint8_t *buffer, uint8_t *buffer_end, 252 uint8_t *&next) -> StringPrinter::StringPrinterBufferPointer<> { 253 return GetPrintable(StringPrinter::StringElementType::UTF8, buffer, 254 buffer_end, next); 255 }; 256 case GetPrintableElementType::ASCII: 257 return [](uint8_t *buffer, uint8_t *buffer_end, 258 uint8_t *&next) -> StringPrinter::StringPrinterBufferPointer<> { 259 return GetPrintable(StringPrinter::StringElementType::ASCII, buffer, 260 buffer_end, next); 261 }; 262 } 263 llvm_unreachable("bad element type"); 264 } 265 266 // use this call if you already have an LLDB-side buffer for the data 267 template <typename SourceDataType> 268 static bool DumpUTFBufferToStream( 269 ConversionResult (*ConvertFunction)(const SourceDataType **, 270 const SourceDataType *, UTF8 **, UTF8 *, 271 ConversionFlags), 272 const StringPrinter::ReadBufferAndDumpToStreamOptions &dump_options) { 273 Stream &stream(*dump_options.GetStream()); 274 if (dump_options.GetPrefixToken() != 0) 275 stream.Printf("%s", dump_options.GetPrefixToken()); 276 if (dump_options.GetQuote() != 0) 277 stream.Printf("%c", dump_options.GetQuote()); 278 auto data(dump_options.GetData()); 279 auto source_size(dump_options.GetSourceSize()); 280 if (data.GetByteSize() && data.GetDataStart() && data.GetDataEnd()) { 281 const int bufferSPSize = data.GetByteSize(); 282 if (dump_options.GetSourceSize() == 0) { 283 const int origin_encoding = 8 * sizeof(SourceDataType); 284 source_size = bufferSPSize / (origin_encoding / 4); 285 } 286 287 const SourceDataType *data_ptr = 288 (const SourceDataType *)data.GetDataStart(); 289 const SourceDataType *data_end_ptr = data_ptr + source_size; 290 291 const bool zero_is_terminator = dump_options.GetBinaryZeroIsTerminator(); 292 293 if (zero_is_terminator) { 294 while (data_ptr < data_end_ptr) { 295 if (!*data_ptr) { 296 data_end_ptr = data_ptr; 297 break; 298 } 299 data_ptr++; 300 } 301 302 data_ptr = (const SourceDataType *)data.GetDataStart(); 303 } 304 305 lldb::DataBufferSP utf8_data_buffer_sp; 306 UTF8 *utf8_data_ptr = nullptr; 307 UTF8 *utf8_data_end_ptr = nullptr; 308 309 if (ConvertFunction) { 310 utf8_data_buffer_sp.reset(new DataBufferHeap(4 * bufferSPSize, 0)); 311 utf8_data_ptr = (UTF8 *)utf8_data_buffer_sp->GetBytes(); 312 utf8_data_end_ptr = utf8_data_ptr + utf8_data_buffer_sp->GetByteSize(); 313 ConvertFunction(&data_ptr, data_end_ptr, &utf8_data_ptr, 314 utf8_data_end_ptr, lenientConversion); 315 if (false == zero_is_terminator) 316 utf8_data_end_ptr = utf8_data_ptr; 317 utf8_data_ptr = 318 (UTF8 *)utf8_data_buffer_sp->GetBytes(); // needed because the 319 // ConvertFunction will 320 // change the value of the 321 // data_ptr 322 } else { 323 // just copy the pointers - the cast is necessary to make the compiler 324 // happy 325 // but this should only happen if we are reading UTF8 data 326 utf8_data_ptr = 327 const_cast<UTF8 *>(reinterpret_cast<const UTF8 *>(data_ptr)); 328 utf8_data_end_ptr = 329 const_cast<UTF8 *>(reinterpret_cast<const UTF8 *>(data_end_ptr)); 330 } 331 332 const bool escape_non_printables = dump_options.GetEscapeNonPrintables(); 333 lldb_private::formatters::StringPrinter::EscapingHelper escaping_callback; 334 if (escape_non_printables) { 335 if (Language *language = Language::FindPlugin(dump_options.GetLanguage())) 336 escaping_callback = language->GetStringPrinterEscapingHelper( 337 lldb_private::formatters::StringPrinter::GetPrintableElementType:: 338 UTF8); 339 else 340 escaping_callback = 341 lldb_private::formatters::StringPrinter::GetDefaultEscapingHelper( 342 lldb_private::formatters::StringPrinter:: 343 GetPrintableElementType::UTF8); 344 } 345 346 // since we tend to accept partial data (and even partially malformed data) 347 // we might end up with no NULL terminator before the end_ptr 348 // hence we need to take a slower route and ensure we stay within boundaries 349 for (; utf8_data_ptr < utf8_data_end_ptr;) { 350 if (zero_is_terminator && !*utf8_data_ptr) 351 break; 352 353 if (escape_non_printables) { 354 uint8_t *next_data = nullptr; 355 auto printable = 356 escaping_callback(utf8_data_ptr, utf8_data_end_ptr, next_data); 357 auto printable_bytes = printable.GetBytes(); 358 auto printable_size = printable.GetSize(); 359 if (!printable_bytes || !next_data) { 360 // GetPrintable() failed on us - print one byte in a desperate resync 361 // attempt 362 printable_bytes = utf8_data_ptr; 363 printable_size = 1; 364 next_data = utf8_data_ptr + 1; 365 } 366 for (unsigned c = 0; c < printable_size; c++) 367 stream.Printf("%c", *(printable_bytes + c)); 368 utf8_data_ptr = (uint8_t *)next_data; 369 } else { 370 stream.Printf("%c", *utf8_data_ptr); 371 utf8_data_ptr++; 372 } 373 } 374 } 375 if (dump_options.GetQuote() != 0) 376 stream.Printf("%c", dump_options.GetQuote()); 377 if (dump_options.GetSuffixToken() != 0) 378 stream.Printf("%s", dump_options.GetSuffixToken()); 379 if (dump_options.GetIsTruncated()) 380 stream.Printf("..."); 381 return true; 382 } 383 384 lldb_private::formatters::StringPrinter::ReadStringAndDumpToStreamOptions:: 385 ReadStringAndDumpToStreamOptions(ValueObject &valobj) 386 : ReadStringAndDumpToStreamOptions() { 387 SetEscapeNonPrintables( 388 valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables()); 389 } 390 391 lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStreamOptions:: 392 ReadBufferAndDumpToStreamOptions(ValueObject &valobj) 393 : ReadBufferAndDumpToStreamOptions() { 394 SetEscapeNonPrintables( 395 valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables()); 396 } 397 398 lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStreamOptions:: 399 ReadBufferAndDumpToStreamOptions( 400 const ReadStringAndDumpToStreamOptions &options) 401 : ReadBufferAndDumpToStreamOptions() { 402 SetStream(options.GetStream()); 403 SetPrefixToken(options.GetPrefixToken()); 404 SetSuffixToken(options.GetSuffixToken()); 405 SetQuote(options.GetQuote()); 406 SetEscapeNonPrintables(options.GetEscapeNonPrintables()); 407 SetBinaryZeroIsTerminator(options.GetBinaryZeroIsTerminator()); 408 SetLanguage(options.GetLanguage()); 409 } 410 411 namespace lldb_private { 412 413 namespace formatters { 414 415 template <> 416 bool StringPrinter::ReadStringAndDumpToStream< 417 StringPrinter::StringElementType::ASCII>( 418 const ReadStringAndDumpToStreamOptions &options) { 419 assert(options.GetStream() && "need a Stream to print the string to"); 420 Error my_error; 421 422 ProcessSP process_sp(options.GetProcessSP()); 423 424 if (process_sp.get() == nullptr || options.GetLocation() == 0) 425 return false; 426 427 size_t size; 428 const auto max_size = process_sp->GetTarget().GetMaximumSizeOfStringSummary(); 429 bool is_truncated = false; 430 431 if (options.GetSourceSize() == 0) 432 size = max_size; 433 else if (!options.GetIgnoreMaxLength()) { 434 size = options.GetSourceSize(); 435 if (size > max_size) { 436 size = max_size; 437 is_truncated = true; 438 } 439 } else 440 size = options.GetSourceSize(); 441 442 lldb::DataBufferSP buffer_sp(new DataBufferHeap(size, 0)); 443 444 process_sp->ReadCStringFromMemory( 445 options.GetLocation(), (char *)buffer_sp->GetBytes(), size, my_error); 446 447 if (my_error.Fail()) 448 return false; 449 450 const char *prefix_token = options.GetPrefixToken(); 451 char quote = options.GetQuote(); 452 453 if (prefix_token != 0) 454 options.GetStream()->Printf("%s%c", prefix_token, quote); 455 else if (quote != 0) 456 options.GetStream()->Printf("%c", quote); 457 458 uint8_t *data_end = buffer_sp->GetBytes() + buffer_sp->GetByteSize(); 459 460 const bool escape_non_printables = options.GetEscapeNonPrintables(); 461 lldb_private::formatters::StringPrinter::EscapingHelper escaping_callback; 462 if (escape_non_printables) { 463 if (Language *language = Language::FindPlugin(options.GetLanguage())) 464 escaping_callback = language->GetStringPrinterEscapingHelper( 465 lldb_private::formatters::StringPrinter::GetPrintableElementType:: 466 ASCII); 467 else 468 escaping_callback = 469 lldb_private::formatters::StringPrinter::GetDefaultEscapingHelper( 470 lldb_private::formatters::StringPrinter::GetPrintableElementType:: 471 ASCII); 472 } 473 474 // since we tend to accept partial data (and even partially malformed data) 475 // we might end up with no NULL terminator before the end_ptr 476 // hence we need to take a slower route and ensure we stay within boundaries 477 for (uint8_t *data = buffer_sp->GetBytes(); *data && (data < data_end);) { 478 if (escape_non_printables) { 479 uint8_t *next_data = nullptr; 480 auto printable = escaping_callback(data, data_end, next_data); 481 auto printable_bytes = printable.GetBytes(); 482 auto printable_size = printable.GetSize(); 483 if (!printable_bytes || !next_data) { 484 // GetPrintable() failed on us - print one byte in a desperate resync 485 // attempt 486 printable_bytes = data; 487 printable_size = 1; 488 next_data = data + 1; 489 } 490 for (unsigned c = 0; c < printable_size; c++) 491 options.GetStream()->Printf("%c", *(printable_bytes + c)); 492 data = (uint8_t *)next_data; 493 } else { 494 options.GetStream()->Printf("%c", *data); 495 data++; 496 } 497 } 498 499 const char *suffix_token = options.GetSuffixToken(); 500 501 if (suffix_token != 0) 502 options.GetStream()->Printf("%c%s", quote, suffix_token); 503 else if (quote != 0) 504 options.GetStream()->Printf("%c", quote); 505 506 if (is_truncated) 507 options.GetStream()->Printf("..."); 508 509 return true; 510 } 511 512 template <typename SourceDataType> 513 static bool ReadUTFBufferAndDumpToStream( 514 const StringPrinter::ReadStringAndDumpToStreamOptions &options, 515 ConversionResult (*ConvertFunction)(const SourceDataType **, 516 const SourceDataType *, UTF8 **, UTF8 *, 517 ConversionFlags)) { 518 assert(options.GetStream() && "need a Stream to print the string to"); 519 520 if (options.GetLocation() == 0 || 521 options.GetLocation() == LLDB_INVALID_ADDRESS) 522 return false; 523 524 lldb::ProcessSP process_sp(options.GetProcessSP()); 525 526 if (!process_sp) 527 return false; 528 529 const int type_width = sizeof(SourceDataType); 530 const int origin_encoding = 8 * type_width; 531 if (origin_encoding != 8 && origin_encoding != 16 && origin_encoding != 32) 532 return false; 533 // if not UTF8, I need a conversion function to return proper UTF8 534 if (origin_encoding != 8 && !ConvertFunction) 535 return false; 536 537 if (!options.GetStream()) 538 return false; 539 540 uint32_t sourceSize = options.GetSourceSize(); 541 bool needs_zero_terminator = options.GetNeedsZeroTermination(); 542 543 bool is_truncated = false; 544 const auto max_size = process_sp->GetTarget().GetMaximumSizeOfStringSummary(); 545 546 if (!sourceSize) { 547 sourceSize = max_size; 548 needs_zero_terminator = true; 549 } else if (!options.GetIgnoreMaxLength()) { 550 if (sourceSize > max_size) { 551 sourceSize = max_size; 552 is_truncated = true; 553 } 554 } 555 556 const int bufferSPSize = sourceSize * type_width; 557 558 lldb::DataBufferSP buffer_sp(new DataBufferHeap(bufferSPSize, 0)); 559 560 if (!buffer_sp->GetBytes()) 561 return false; 562 563 Error error; 564 char *buffer = reinterpret_cast<char *>(buffer_sp->GetBytes()); 565 566 if (needs_zero_terminator) 567 process_sp->ReadStringFromMemory(options.GetLocation(), buffer, 568 bufferSPSize, error, type_width); 569 else 570 process_sp->ReadMemoryFromInferior(options.GetLocation(), 571 (char *)buffer_sp->GetBytes(), 572 bufferSPSize, error); 573 574 if (error.Fail()) { 575 options.GetStream()->Printf("unable to read data"); 576 return true; 577 } 578 579 DataExtractor data(buffer_sp, process_sp->GetByteOrder(), 580 process_sp->GetAddressByteSize()); 581 582 StringPrinter::ReadBufferAndDumpToStreamOptions dump_options(options); 583 dump_options.SetData(data); 584 dump_options.SetSourceSize(sourceSize); 585 dump_options.SetIsTruncated(is_truncated); 586 587 return DumpUTFBufferToStream(ConvertFunction, dump_options); 588 } 589 590 template <> 591 bool StringPrinter::ReadStringAndDumpToStream< 592 StringPrinter::StringElementType::UTF8>( 593 const ReadStringAndDumpToStreamOptions &options) { 594 return ReadUTFBufferAndDumpToStream<UTF8>(options, nullptr); 595 } 596 597 template <> 598 bool StringPrinter::ReadStringAndDumpToStream< 599 StringPrinter::StringElementType::UTF16>( 600 const ReadStringAndDumpToStreamOptions &options) { 601 return ReadUTFBufferAndDumpToStream<UTF16>(options, ConvertUTF16toUTF8); 602 } 603 604 template <> 605 bool StringPrinter::ReadStringAndDumpToStream< 606 StringPrinter::StringElementType::UTF32>( 607 const ReadStringAndDumpToStreamOptions &options) { 608 return ReadUTFBufferAndDumpToStream<UTF32>(options, ConvertUTF32toUTF8); 609 } 610 611 template <> 612 bool StringPrinter::ReadBufferAndDumpToStream< 613 StringPrinter::StringElementType::UTF8>( 614 const ReadBufferAndDumpToStreamOptions &options) { 615 assert(options.GetStream() && "need a Stream to print the string to"); 616 617 return DumpUTFBufferToStream<UTF8>(nullptr, options); 618 } 619 620 template <> 621 bool StringPrinter::ReadBufferAndDumpToStream< 622 StringPrinter::StringElementType::ASCII>( 623 const ReadBufferAndDumpToStreamOptions &options) { 624 // treat ASCII the same as UTF8 625 // FIXME: can we optimize ASCII some more? 626 return ReadBufferAndDumpToStream<StringElementType::UTF8>(options); 627 } 628 629 template <> 630 bool StringPrinter::ReadBufferAndDumpToStream< 631 StringPrinter::StringElementType::UTF16>( 632 const ReadBufferAndDumpToStreamOptions &options) { 633 assert(options.GetStream() && "need a Stream to print the string to"); 634 635 return DumpUTFBufferToStream(ConvertUTF16toUTF8, options); 636 } 637 638 template <> 639 bool StringPrinter::ReadBufferAndDumpToStream< 640 StringPrinter::StringElementType::UTF32>( 641 const ReadBufferAndDumpToStreamOptions &options) { 642 assert(options.GetStream() && "need a Stream to print the string to"); 643 644 return DumpUTFBufferToStream(ConvertUTF32toUTF8, options); 645 } 646 647 } // namespace formatters 648 649 } // namespace lldb_private 650