1 //===-- StringPrinter.cpp -------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "lldb/DataFormatters/StringPrinter.h" 10 11 #include "lldb/Core/Debugger.h" 12 #include "lldb/Core/ValueObject.h" 13 #include "lldb/Target/Language.h" 14 #include "lldb/Target/Process.h" 15 #include "lldb/Target/Target.h" 16 #include "lldb/Utility/Status.h" 17 18 #include "llvm/Support/ConvertUTF.h" 19 20 #include <ctype.h> 21 #include <locale> 22 #include <memory> 23 24 using namespace lldb; 25 using namespace lldb_private; 26 using namespace lldb_private::formatters; 27 28 // we define this for all values of type but only implement it for those we 29 // care about that's good because we get linker errors for any unsupported type 30 template <lldb_private::formatters::StringPrinter::StringElementType type> 31 static StringPrinter::StringPrinterBufferPointer 32 GetPrintableImpl(uint8_t *buffer, uint8_t *buffer_end, uint8_t *&next); 33 34 // mimic isprint() for Unicode codepoints 35 static bool isprint(char32_t codepoint) { 36 if (codepoint <= 0x1F || codepoint == 0x7F) // C0 37 { 38 return false; 39 } 40 if (codepoint >= 0x80 && codepoint <= 0x9F) // C1 41 { 42 return false; 43 } 44 if (codepoint == 0x2028 || codepoint == 0x2029) // line/paragraph separators 45 { 46 return false; 47 } 48 if (codepoint == 0x200E || codepoint == 0x200F || 49 (codepoint >= 0x202A && 50 codepoint <= 0x202E)) // bidirectional text control 51 { 52 return false; 53 } 54 if (codepoint >= 0xFFF9 && 55 codepoint <= 0xFFFF) // interlinears and generally specials 56 { 57 return false; 58 } 59 return true; 60 } 61 62 template <> 63 StringPrinter::StringPrinterBufferPointer 64 GetPrintableImpl<StringPrinter::StringElementType::ASCII>(uint8_t *buffer, 65 uint8_t *buffer_end, 66 uint8_t *&next) { 67 StringPrinter::StringPrinterBufferPointer retval = {nullptr}; 68 69 switch (*buffer) { 70 case 0: 71 retval = {"\\0", 2}; 72 break; 73 case '\a': 74 retval = {"\\a", 2}; 75 break; 76 case '\b': 77 retval = {"\\b", 2}; 78 break; 79 case '\f': 80 retval = {"\\f", 2}; 81 break; 82 case '\n': 83 retval = {"\\n", 2}; 84 break; 85 case '\r': 86 retval = {"\\r", 2}; 87 break; 88 case '\t': 89 retval = {"\\t", 2}; 90 break; 91 case '\v': 92 retval = {"\\v", 2}; 93 break; 94 case '\"': 95 retval = {"\\\"", 2}; 96 break; 97 case '\\': 98 retval = {"\\\\", 2}; 99 break; 100 default: 101 if (isprint(*buffer)) 102 retval = {buffer, 1}; 103 else { 104 uint8_t *data = new uint8_t[5]; 105 sprintf((char *)data, "\\x%02x", *buffer); 106 retval = {data, 4, [](const uint8_t *c) { delete[] c; }}; 107 break; 108 } 109 } 110 111 next = buffer + 1; 112 return retval; 113 } 114 115 static char32_t ConvertUTF8ToCodePoint(unsigned char c0, unsigned char c1) { 116 return (c0 - 192) * 64 + (c1 - 128); 117 } 118 static char32_t ConvertUTF8ToCodePoint(unsigned char c0, unsigned char c1, 119 unsigned char c2) { 120 return (c0 - 224) * 4096 + (c1 - 128) * 64 + (c2 - 128); 121 } 122 static char32_t ConvertUTF8ToCodePoint(unsigned char c0, unsigned char c1, 123 unsigned char c2, unsigned char c3) { 124 return (c0 - 240) * 262144 + (c2 - 128) * 4096 + (c2 - 128) * 64 + (c3 - 128); 125 } 126 127 template <> 128 StringPrinter::StringPrinterBufferPointer 129 GetPrintableImpl<StringPrinter::StringElementType::UTF8>(uint8_t *buffer, 130 uint8_t *buffer_end, 131 uint8_t *&next) { 132 StringPrinter::StringPrinterBufferPointer retval{nullptr}; 133 134 const unsigned utf8_encoded_len = llvm::getNumBytesForUTF8(*buffer); 135 136 // If the utf8 encoded length is invalid, or if there aren't enough bytes to 137 // print, this is some kind of corrupted string. 138 if (utf8_encoded_len == 0 || utf8_encoded_len > 4) 139 return retval; 140 if ((buffer_end - buffer) < utf8_encoded_len) 141 // There's no room in the buffer for the utf8 sequence. 142 return retval; 143 144 char32_t codepoint = 0; 145 switch (utf8_encoded_len) { 146 case 1: 147 // this is just an ASCII byte - ask ASCII 148 return GetPrintableImpl<StringPrinter::StringElementType::ASCII>( 149 buffer, buffer_end, next); 150 case 2: 151 codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, 152 (unsigned char)*(buffer + 1)); 153 break; 154 case 3: 155 codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, 156 (unsigned char)*(buffer + 1), 157 (unsigned char)*(buffer + 2)); 158 break; 159 case 4: 160 codepoint = ConvertUTF8ToCodePoint( 161 (unsigned char)*buffer, (unsigned char)*(buffer + 1), 162 (unsigned char)*(buffer + 2), (unsigned char)*(buffer + 3)); 163 break; 164 } 165 166 if (codepoint) { 167 switch (codepoint) { 168 case 0: 169 retval = {"\\0", 2}; 170 break; 171 case '\a': 172 retval = {"\\a", 2}; 173 break; 174 case '\b': 175 retval = {"\\b", 2}; 176 break; 177 case '\f': 178 retval = {"\\f", 2}; 179 break; 180 case '\n': 181 retval = {"\\n", 2}; 182 break; 183 case '\r': 184 retval = {"\\r", 2}; 185 break; 186 case '\t': 187 retval = {"\\t", 2}; 188 break; 189 case '\v': 190 retval = {"\\v", 2}; 191 break; 192 case '\"': 193 retval = {"\\\"", 2}; 194 break; 195 case '\\': 196 retval = {"\\\\", 2}; 197 break; 198 default: 199 if (isprint(codepoint)) 200 retval = {buffer, utf8_encoded_len}; 201 else { 202 uint8_t *data = new uint8_t[11]; 203 sprintf((char *)data, "\\U%08x", (unsigned)codepoint); 204 retval = {data, 10, [](const uint8_t *c) { delete[] c; }}; 205 break; 206 } 207 } 208 209 next = buffer + utf8_encoded_len; 210 return retval; 211 } 212 213 // We couldn't figure out how to print this string. 214 return retval; 215 } 216 217 // Given a sequence of bytes, this function returns: a sequence of bytes to 218 // actually print out + a length the following unscanned position of the buffer 219 // is in next 220 static StringPrinter::StringPrinterBufferPointer 221 GetPrintable(StringPrinter::StringElementType type, uint8_t *buffer, 222 uint8_t *buffer_end, uint8_t *&next) { 223 if (!buffer || buffer >= buffer_end) 224 return {nullptr}; 225 226 switch (type) { 227 case StringPrinter::StringElementType::ASCII: 228 return GetPrintableImpl<StringPrinter::StringElementType::ASCII>( 229 buffer, buffer_end, next); 230 case StringPrinter::StringElementType::UTF8: 231 return GetPrintableImpl<StringPrinter::StringElementType::UTF8>( 232 buffer, buffer_end, next); 233 default: 234 return {nullptr}; 235 } 236 } 237 238 StringPrinter::EscapingHelper 239 StringPrinter::GetDefaultEscapingHelper(GetPrintableElementType elem_type) { 240 switch (elem_type) { 241 case GetPrintableElementType::UTF8: 242 return [](uint8_t *buffer, uint8_t *buffer_end, 243 uint8_t *&next) -> StringPrinter::StringPrinterBufferPointer { 244 return GetPrintable(StringPrinter::StringElementType::UTF8, buffer, 245 buffer_end, next); 246 }; 247 case GetPrintableElementType::ASCII: 248 return [](uint8_t *buffer, uint8_t *buffer_end, 249 uint8_t *&next) -> StringPrinter::StringPrinterBufferPointer { 250 return GetPrintable(StringPrinter::StringElementType::ASCII, buffer, 251 buffer_end, next); 252 }; 253 } 254 llvm_unreachable("bad element type"); 255 } 256 257 // use this call if you already have an LLDB-side buffer for the data 258 template <typename SourceDataType> 259 static bool DumpUTFBufferToStream( 260 llvm::ConversionResult (*ConvertFunction)(const SourceDataType **, 261 const SourceDataType *, 262 llvm::UTF8 **, llvm::UTF8 *, 263 llvm::ConversionFlags), 264 const StringPrinter::ReadBufferAndDumpToStreamOptions &dump_options) { 265 Stream &stream(*dump_options.GetStream()); 266 if (dump_options.GetPrefixToken() != nullptr) 267 stream.Printf("%s", dump_options.GetPrefixToken()); 268 if (dump_options.GetQuote() != 0) 269 stream.Printf("%c", dump_options.GetQuote()); 270 auto data(dump_options.GetData()); 271 auto source_size(dump_options.GetSourceSize()); 272 if (data.GetByteSize() && data.GetDataStart() && data.GetDataEnd()) { 273 const int bufferSPSize = data.GetByteSize(); 274 if (dump_options.GetSourceSize() == 0) { 275 const int origin_encoding = 8 * sizeof(SourceDataType); 276 source_size = bufferSPSize / (origin_encoding / 4); 277 } 278 279 const SourceDataType *data_ptr = 280 (const SourceDataType *)data.GetDataStart(); 281 const SourceDataType *data_end_ptr = data_ptr + source_size; 282 283 const bool zero_is_terminator = dump_options.GetBinaryZeroIsTerminator(); 284 285 if (zero_is_terminator) { 286 while (data_ptr < data_end_ptr) { 287 if (!*data_ptr) { 288 data_end_ptr = data_ptr; 289 break; 290 } 291 data_ptr++; 292 } 293 294 data_ptr = (const SourceDataType *)data.GetDataStart(); 295 } 296 297 lldb::DataBufferSP utf8_data_buffer_sp; 298 llvm::UTF8 *utf8_data_ptr = nullptr; 299 llvm::UTF8 *utf8_data_end_ptr = nullptr; 300 301 if (ConvertFunction) { 302 utf8_data_buffer_sp = 303 std::make_shared<DataBufferHeap>(4 * bufferSPSize, 0); 304 utf8_data_ptr = (llvm::UTF8 *)utf8_data_buffer_sp->GetBytes(); 305 utf8_data_end_ptr = utf8_data_ptr + utf8_data_buffer_sp->GetByteSize(); 306 ConvertFunction(&data_ptr, data_end_ptr, &utf8_data_ptr, 307 utf8_data_end_ptr, llvm::lenientConversion); 308 if (!zero_is_terminator) 309 utf8_data_end_ptr = utf8_data_ptr; 310 // needed because the ConvertFunction will change the value of the 311 // data_ptr. 312 utf8_data_ptr = 313 (llvm::UTF8 *)utf8_data_buffer_sp->GetBytes(); 314 } else { 315 // just copy the pointers - the cast is necessary to make the compiler 316 // happy but this should only happen if we are reading UTF8 data 317 utf8_data_ptr = const_cast<llvm::UTF8 *>( 318 reinterpret_cast<const llvm::UTF8 *>(data_ptr)); 319 utf8_data_end_ptr = const_cast<llvm::UTF8 *>( 320 reinterpret_cast<const llvm::UTF8 *>(data_end_ptr)); 321 } 322 323 const bool escape_non_printables = dump_options.GetEscapeNonPrintables(); 324 lldb_private::formatters::StringPrinter::EscapingHelper escaping_callback; 325 if (escape_non_printables) { 326 if (Language *language = Language::FindPlugin(dump_options.GetLanguage())) 327 escaping_callback = language->GetStringPrinterEscapingHelper( 328 lldb_private::formatters::StringPrinter::GetPrintableElementType:: 329 UTF8); 330 else 331 escaping_callback = 332 lldb_private::formatters::StringPrinter::GetDefaultEscapingHelper( 333 lldb_private::formatters::StringPrinter:: 334 GetPrintableElementType::UTF8); 335 } 336 337 // since we tend to accept partial data (and even partially malformed data) 338 // we might end up with no NULL terminator before the end_ptr hence we need 339 // to take a slower route and ensure we stay within boundaries 340 for (; utf8_data_ptr < utf8_data_end_ptr;) { 341 if (zero_is_terminator && !*utf8_data_ptr) 342 break; 343 344 if (escape_non_printables) { 345 uint8_t *next_data = nullptr; 346 auto printable = 347 escaping_callback(utf8_data_ptr, utf8_data_end_ptr, next_data); 348 auto printable_bytes = printable.GetBytes(); 349 auto printable_size = printable.GetSize(); 350 351 // We failed to figure out how to print this string. 352 if (!printable_bytes || !next_data) 353 return false; 354 355 for (unsigned c = 0; c < printable_size; c++) 356 stream.Printf("%c", *(printable_bytes + c)); 357 utf8_data_ptr = (uint8_t *)next_data; 358 } else { 359 stream.Printf("%c", *utf8_data_ptr); 360 utf8_data_ptr++; 361 } 362 } 363 } 364 if (dump_options.GetQuote() != 0) 365 stream.Printf("%c", dump_options.GetQuote()); 366 if (dump_options.GetSuffixToken() != nullptr) 367 stream.Printf("%s", dump_options.GetSuffixToken()); 368 if (dump_options.GetIsTruncated()) 369 stream.Printf("..."); 370 return true; 371 } 372 373 lldb_private::formatters::StringPrinter::ReadStringAndDumpToStreamOptions:: 374 ReadStringAndDumpToStreamOptions(ValueObject &valobj) 375 : ReadStringAndDumpToStreamOptions() { 376 SetEscapeNonPrintables( 377 valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables()); 378 } 379 380 lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStreamOptions:: 381 ReadBufferAndDumpToStreamOptions(ValueObject &valobj) 382 : ReadBufferAndDumpToStreamOptions() { 383 SetEscapeNonPrintables( 384 valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables()); 385 } 386 387 lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStreamOptions:: 388 ReadBufferAndDumpToStreamOptions( 389 const ReadStringAndDumpToStreamOptions &options) 390 : ReadBufferAndDumpToStreamOptions() { 391 SetStream(options.GetStream()); 392 SetPrefixToken(options.GetPrefixToken()); 393 SetSuffixToken(options.GetSuffixToken()); 394 SetQuote(options.GetQuote()); 395 SetEscapeNonPrintables(options.GetEscapeNonPrintables()); 396 SetBinaryZeroIsTerminator(options.GetBinaryZeroIsTerminator()); 397 SetLanguage(options.GetLanguage()); 398 } 399 400 namespace lldb_private { 401 402 namespace formatters { 403 404 template <> 405 bool StringPrinter::ReadStringAndDumpToStream< 406 StringPrinter::StringElementType::ASCII>( 407 const ReadStringAndDumpToStreamOptions &options) { 408 assert(options.GetStream() && "need a Stream to print the string to"); 409 Status my_error; 410 411 ProcessSP process_sp(options.GetProcessSP()); 412 413 if (process_sp.get() == nullptr || options.GetLocation() == 0) 414 return false; 415 416 size_t size; 417 const auto max_size = process_sp->GetTarget().GetMaximumSizeOfStringSummary(); 418 bool is_truncated = false; 419 420 if (options.GetSourceSize() == 0) 421 size = max_size; 422 else if (!options.GetIgnoreMaxLength()) { 423 size = options.GetSourceSize(); 424 if (size > max_size) { 425 size = max_size; 426 is_truncated = true; 427 } 428 } else 429 size = options.GetSourceSize(); 430 431 lldb::DataBufferSP buffer_sp(new DataBufferHeap(size, 0)); 432 433 process_sp->ReadCStringFromMemory( 434 options.GetLocation(), (char *)buffer_sp->GetBytes(), size, my_error); 435 436 if (my_error.Fail()) 437 return false; 438 439 const char *prefix_token = options.GetPrefixToken(); 440 char quote = options.GetQuote(); 441 442 if (prefix_token != nullptr) 443 options.GetStream()->Printf("%s%c", prefix_token, quote); 444 else if (quote != 0) 445 options.GetStream()->Printf("%c", quote); 446 447 uint8_t *data_end = buffer_sp->GetBytes() + buffer_sp->GetByteSize(); 448 449 const bool escape_non_printables = options.GetEscapeNonPrintables(); 450 lldb_private::formatters::StringPrinter::EscapingHelper escaping_callback; 451 if (escape_non_printables) { 452 if (Language *language = Language::FindPlugin(options.GetLanguage())) 453 escaping_callback = language->GetStringPrinterEscapingHelper( 454 lldb_private::formatters::StringPrinter::GetPrintableElementType:: 455 ASCII); 456 else 457 escaping_callback = 458 lldb_private::formatters::StringPrinter::GetDefaultEscapingHelper( 459 lldb_private::formatters::StringPrinter::GetPrintableElementType:: 460 ASCII); 461 } 462 463 // since we tend to accept partial data (and even partially malformed data) 464 // we might end up with no NULL terminator before the end_ptr hence we need 465 // to take a slower route and ensure we stay within boundaries 466 for (uint8_t *data = buffer_sp->GetBytes(); *data && (data < data_end);) { 467 if (escape_non_printables) { 468 uint8_t *next_data = nullptr; 469 auto printable = escaping_callback(data, data_end, next_data); 470 auto printable_bytes = printable.GetBytes(); 471 auto printable_size = printable.GetSize(); 472 473 // We failed to figure out how to print this string. 474 if (!printable_bytes || !next_data) 475 return false; 476 477 for (unsigned c = 0; c < printable_size; c++) 478 options.GetStream()->Printf("%c", *(printable_bytes + c)); 479 data = (uint8_t *)next_data; 480 } else { 481 options.GetStream()->Printf("%c", *data); 482 data++; 483 } 484 } 485 486 const char *suffix_token = options.GetSuffixToken(); 487 488 if (suffix_token != nullptr) 489 options.GetStream()->Printf("%c%s", quote, suffix_token); 490 else if (quote != 0) 491 options.GetStream()->Printf("%c", quote); 492 493 if (is_truncated) 494 options.GetStream()->Printf("..."); 495 496 return true; 497 } 498 499 template <typename SourceDataType> 500 static bool ReadUTFBufferAndDumpToStream( 501 const StringPrinter::ReadStringAndDumpToStreamOptions &options, 502 llvm::ConversionResult (*ConvertFunction)(const SourceDataType **, 503 const SourceDataType *, 504 llvm::UTF8 **, llvm::UTF8 *, 505 llvm::ConversionFlags)) { 506 assert(options.GetStream() && "need a Stream to print the string to"); 507 508 if (options.GetLocation() == 0 || 509 options.GetLocation() == LLDB_INVALID_ADDRESS) 510 return false; 511 512 lldb::ProcessSP process_sp(options.GetProcessSP()); 513 514 if (!process_sp) 515 return false; 516 517 const int type_width = sizeof(SourceDataType); 518 const int origin_encoding = 8 * type_width; 519 if (origin_encoding != 8 && origin_encoding != 16 && origin_encoding != 32) 520 return false; 521 // if not UTF8, I need a conversion function to return proper UTF8 522 if (origin_encoding != 8 && !ConvertFunction) 523 return false; 524 525 if (!options.GetStream()) 526 return false; 527 528 uint32_t sourceSize = options.GetSourceSize(); 529 bool needs_zero_terminator = options.GetNeedsZeroTermination(); 530 531 bool is_truncated = false; 532 const auto max_size = process_sp->GetTarget().GetMaximumSizeOfStringSummary(); 533 534 if (!sourceSize) { 535 sourceSize = max_size; 536 needs_zero_terminator = true; 537 } else if (!options.GetIgnoreMaxLength()) { 538 if (sourceSize > max_size) { 539 sourceSize = max_size; 540 is_truncated = true; 541 } 542 } 543 544 const int bufferSPSize = sourceSize * type_width; 545 546 lldb::DataBufferSP buffer_sp(new DataBufferHeap(bufferSPSize, 0)); 547 548 if (!buffer_sp->GetBytes()) 549 return false; 550 551 Status error; 552 char *buffer = reinterpret_cast<char *>(buffer_sp->GetBytes()); 553 554 if (needs_zero_terminator) 555 process_sp->ReadStringFromMemory(options.GetLocation(), buffer, 556 bufferSPSize, error, type_width); 557 else 558 process_sp->ReadMemoryFromInferior(options.GetLocation(), 559 (char *)buffer_sp->GetBytes(), 560 bufferSPSize, error); 561 562 if (error.Fail()) { 563 options.GetStream()->Printf("unable to read data"); 564 return true; 565 } 566 567 DataExtractor data(buffer_sp, process_sp->GetByteOrder(), 568 process_sp->GetAddressByteSize()); 569 570 StringPrinter::ReadBufferAndDumpToStreamOptions dump_options(options); 571 dump_options.SetData(data); 572 dump_options.SetSourceSize(sourceSize); 573 dump_options.SetIsTruncated(is_truncated); 574 575 return DumpUTFBufferToStream(ConvertFunction, dump_options); 576 } 577 578 template <> 579 bool StringPrinter::ReadStringAndDumpToStream< 580 StringPrinter::StringElementType::UTF8>( 581 const ReadStringAndDumpToStreamOptions &options) { 582 return ReadUTFBufferAndDumpToStream<llvm::UTF8>(options, nullptr); 583 } 584 585 template <> 586 bool StringPrinter::ReadStringAndDumpToStream< 587 StringPrinter::StringElementType::UTF16>( 588 const ReadStringAndDumpToStreamOptions &options) { 589 return ReadUTFBufferAndDumpToStream<llvm::UTF16>(options, 590 llvm::ConvertUTF16toUTF8); 591 } 592 593 template <> 594 bool StringPrinter::ReadStringAndDumpToStream< 595 StringPrinter::StringElementType::UTF32>( 596 const ReadStringAndDumpToStreamOptions &options) { 597 return ReadUTFBufferAndDumpToStream<llvm::UTF32>(options, 598 llvm::ConvertUTF32toUTF8); 599 } 600 601 template <> 602 bool StringPrinter::ReadBufferAndDumpToStream< 603 StringPrinter::StringElementType::UTF8>( 604 const ReadBufferAndDumpToStreamOptions &options) { 605 assert(options.GetStream() && "need a Stream to print the string to"); 606 607 return DumpUTFBufferToStream<llvm::UTF8>(nullptr, options); 608 } 609 610 template <> 611 bool StringPrinter::ReadBufferAndDumpToStream< 612 StringPrinter::StringElementType::ASCII>( 613 const ReadBufferAndDumpToStreamOptions &options) { 614 // treat ASCII the same as UTF8 615 // FIXME: can we optimize ASCII some more? 616 return ReadBufferAndDumpToStream<StringElementType::UTF8>(options); 617 } 618 619 template <> 620 bool StringPrinter::ReadBufferAndDumpToStream< 621 StringPrinter::StringElementType::UTF16>( 622 const ReadBufferAndDumpToStreamOptions &options) { 623 assert(options.GetStream() && "need a Stream to print the string to"); 624 625 return DumpUTFBufferToStream(llvm::ConvertUTF16toUTF8, options); 626 } 627 628 template <> 629 bool StringPrinter::ReadBufferAndDumpToStream< 630 StringPrinter::StringElementType::UTF32>( 631 const ReadBufferAndDumpToStreamOptions &options) { 632 assert(options.GetStream() && "need a Stream to print the string to"); 633 634 return DumpUTFBufferToStream(llvm::ConvertUTF32toUTF8, options); 635 } 636 637 } // namespace formatters 638 639 } // namespace lldb_private 640