1 //===-- StringPrinter.cpp ----------------------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "lldb/DataFormatters/StringPrinter.h" 11 12 #include "lldb/Core/Debugger.h" 13 #include "lldb/Core/Error.h" 14 #include "lldb/Core/ValueObject.h" 15 #include "lldb/Target/Language.h" 16 #include "lldb/Target/Process.h" 17 #include "lldb/Target/Target.h" 18 19 #include "llvm/Support/ConvertUTF.h" 20 21 #include <ctype.h> 22 #include <locale> 23 24 using namespace lldb; 25 using namespace lldb_private; 26 using namespace lldb_private::formatters; 27 28 // we define this for all values of type but only implement it for those we care about 29 // that's good because we get linker errors for any unsupported type 30 template <lldb_private::formatters::StringPrinter::StringElementType type> 31 static StringPrinter::StringPrinterBufferPointer<> 32 GetPrintableImpl(uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next); 33 34 // mimic isprint() for Unicode codepoints 35 static bool 36 isprint(char32_t codepoint) 37 { 38 if (codepoint <= 0x1F || codepoint == 0x7F) // C0 39 { 40 return false; 41 } 42 if (codepoint >= 0x80 && codepoint <= 0x9F) // C1 43 { 44 return false; 45 } 46 if (codepoint == 0x2028 || codepoint == 0x2029) // line/paragraph separators 47 { 48 return false; 49 } 50 if (codepoint == 0x200E || codepoint == 0x200F || (codepoint >= 0x202A && codepoint <= 0x202E)) // bidirectional text control 51 { 52 return false; 53 } 54 if (codepoint >= 0xFFF9 && codepoint <= 0xFFFF) // interlinears and generally specials 55 { 56 return false; 57 } 58 return true; 59 } 60 61 template <> 62 StringPrinter::StringPrinterBufferPointer<> 63 GetPrintableImpl<StringPrinter::StringElementType::ASCII> (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next) 64 { 65 StringPrinter::StringPrinterBufferPointer<> retval = {nullptr}; 66 67 switch (*buffer) 68 { 69 case 0: 70 retval = {"\\0",2}; 71 break; 72 case '\a': 73 retval = {"\\a",2}; 74 break; 75 case '\b': 76 retval = {"\\b",2}; 77 break; 78 case '\f': 79 retval = {"\\f",2}; 80 break; 81 case '\n': 82 retval = {"\\n",2}; 83 break; 84 case '\r': 85 retval = {"\\r",2}; 86 break; 87 case '\t': 88 retval = {"\\t",2}; 89 break; 90 case '\v': 91 retval = {"\\v",2}; 92 break; 93 case '\"': 94 retval = {"\\\"",2}; 95 break; 96 case '\\': 97 retval = {"\\\\",2}; 98 break; 99 default: 100 if (isprint(*buffer)) 101 retval = {buffer,1}; 102 else 103 { 104 uint8_t* data = new uint8_t[5]; 105 sprintf((char*)data,"\\x%02x",*buffer); 106 retval = {data, 4, [] (const uint8_t* c) {delete[] c;} }; 107 break; 108 } 109 } 110 111 next = buffer + 1; 112 return retval; 113 } 114 115 static char32_t 116 ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1) 117 { 118 return (c0-192)*64+(c1-128); 119 } 120 static char32_t 121 ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1, unsigned char c2) 122 { 123 return (c0-224)*4096+(c1-128)*64+(c2-128); 124 } 125 static char32_t 126 ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1, unsigned char c2, unsigned char c3) 127 { 128 return (c0-240)*262144+(c2-128)*4096+(c2-128)*64+(c3-128); 129 } 130 131 template <> 132 StringPrinter::StringPrinterBufferPointer<> 133 GetPrintableImpl<StringPrinter::StringElementType::UTF8> (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next) 134 { 135 StringPrinter::StringPrinterBufferPointer<> retval {nullptr}; 136 137 unsigned utf8_encoded_len = getNumBytesForUTF8(*buffer); 138 139 if (1+buffer_end-buffer < utf8_encoded_len) 140 { 141 // I don't have enough bytes - print whatever I have left 142 retval = {buffer,static_cast<size_t>(1+buffer_end-buffer)}; 143 next = buffer_end+1; 144 return retval; 145 } 146 147 char32_t codepoint = 0; 148 switch (utf8_encoded_len) 149 { 150 case 1: 151 // this is just an ASCII byte - ask ASCII 152 return GetPrintableImpl<StringPrinter::StringElementType::ASCII>(buffer, buffer_end, next); 153 case 2: 154 codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1)); 155 break; 156 case 3: 157 codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1), (unsigned char)*(buffer+2)); 158 break; 159 case 4: 160 codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1), (unsigned char)*(buffer+2), (unsigned char)*(buffer+3)); 161 break; 162 default: 163 // this is probably some bogus non-character thing 164 // just print it as-is and hope to sync up again soon 165 retval = {buffer,1}; 166 next = buffer+1; 167 return retval; 168 } 169 170 if (codepoint) 171 { 172 switch (codepoint) 173 { 174 case 0: 175 retval = {"\\0",2}; 176 break; 177 case '\a': 178 retval = {"\\a",2}; 179 break; 180 case '\b': 181 retval = {"\\b",2}; 182 break; 183 case '\f': 184 retval = {"\\f",2}; 185 break; 186 case '\n': 187 retval = {"\\n",2}; 188 break; 189 case '\r': 190 retval = {"\\r",2}; 191 break; 192 case '\t': 193 retval = {"\\t",2}; 194 break; 195 case '\v': 196 retval = {"\\v",2}; 197 break; 198 case '\"': 199 retval = {"\\\"",2}; 200 break; 201 case '\\': 202 retval = {"\\\\",2}; 203 break; 204 default: 205 if (isprint(codepoint)) 206 retval = {buffer,utf8_encoded_len}; 207 else 208 { 209 uint8_t* data = new uint8_t[11]; 210 sprintf((char *)data, "\\U%08x", (unsigned)codepoint); 211 retval = { data,10,[] (const uint8_t* c) {delete[] c;} }; 212 break; 213 } 214 } 215 216 next = buffer + utf8_encoded_len; 217 return retval; 218 } 219 220 // this should not happen - but just in case.. try to resync at some point 221 retval = {buffer,1}; 222 next = buffer+1; 223 return retval; 224 } 225 226 // Given a sequence of bytes, this function returns: 227 // a sequence of bytes to actually print out + a length 228 // the following unscanned position of the buffer is in next 229 static StringPrinter::StringPrinterBufferPointer<> 230 GetPrintable(StringPrinter::StringElementType type, uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next) 231 { 232 if (!buffer) 233 return {nullptr}; 234 235 switch (type) 236 { 237 case StringPrinter::StringElementType::ASCII: 238 return GetPrintableImpl<StringPrinter::StringElementType::ASCII>(buffer, buffer_end, next); 239 case StringPrinter::StringElementType::UTF8: 240 return GetPrintableImpl<StringPrinter::StringElementType::UTF8>(buffer, buffer_end, next); 241 default: 242 return {nullptr}; 243 } 244 } 245 246 StringPrinter::EscapingHelper 247 StringPrinter::GetDefaultEscapingHelper (GetPrintableElementType elem_type) 248 { 249 switch (elem_type) 250 { 251 case GetPrintableElementType::UTF8: 252 return [] (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next) -> StringPrinter::StringPrinterBufferPointer<> { 253 return GetPrintable(StringPrinter::StringElementType::UTF8, buffer, buffer_end, next); 254 }; 255 case GetPrintableElementType::ASCII: 256 return [] (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next) -> StringPrinter::StringPrinterBufferPointer<> { 257 return GetPrintable(StringPrinter::StringElementType::ASCII, buffer, buffer_end, next); 258 }; 259 } 260 llvm_unreachable("bad element type"); 261 } 262 263 // use this call if you already have an LLDB-side buffer for the data 264 template<typename SourceDataType> 265 static bool 266 DumpUTFBufferToStream (ConversionResult (*ConvertFunction) (const SourceDataType**, 267 const SourceDataType*, 268 UTF8**, 269 UTF8*, 270 ConversionFlags), 271 const StringPrinter::ReadBufferAndDumpToStreamOptions& dump_options) 272 { 273 Stream &stream(*dump_options.GetStream()); 274 if (dump_options.GetPrefixToken() != 0) 275 stream.Printf("%s",dump_options.GetPrefixToken()); 276 if (dump_options.GetQuote() != 0) 277 stream.Printf("%c",dump_options.GetQuote()); 278 auto data(dump_options.GetData()); 279 auto source_size(dump_options.GetSourceSize()); 280 if (data.GetByteSize() && data.GetDataStart() && data.GetDataEnd()) 281 { 282 const int bufferSPSize = data.GetByteSize(); 283 if (dump_options.GetSourceSize() == 0) 284 { 285 const int origin_encoding = 8*sizeof(SourceDataType); 286 source_size = bufferSPSize/(origin_encoding / 4); 287 } 288 289 const SourceDataType *data_ptr = (const SourceDataType*)data.GetDataStart(); 290 const SourceDataType *data_end_ptr = data_ptr + source_size; 291 292 const bool zero_is_terminator = dump_options.GetBinaryZeroIsTerminator(); 293 294 if (zero_is_terminator) 295 { 296 while (data_ptr < data_end_ptr) 297 { 298 if (!*data_ptr) 299 { 300 data_end_ptr = data_ptr; 301 break; 302 } 303 data_ptr++; 304 } 305 306 data_ptr = (const SourceDataType*)data.GetDataStart(); 307 } 308 309 lldb::DataBufferSP utf8_data_buffer_sp; 310 UTF8* utf8_data_ptr = nullptr; 311 UTF8* utf8_data_end_ptr = nullptr; 312 313 if (ConvertFunction) 314 { 315 utf8_data_buffer_sp.reset(new DataBufferHeap(4*bufferSPSize,0)); 316 utf8_data_ptr = (UTF8*)utf8_data_buffer_sp->GetBytes(); 317 utf8_data_end_ptr = utf8_data_ptr + utf8_data_buffer_sp->GetByteSize(); 318 ConvertFunction ( &data_ptr, data_end_ptr, &utf8_data_ptr, utf8_data_end_ptr, lenientConversion ); 319 if (false == zero_is_terminator) 320 utf8_data_end_ptr = utf8_data_ptr; 321 utf8_data_ptr = (UTF8*)utf8_data_buffer_sp->GetBytes(); // needed because the ConvertFunction will change the value of the data_ptr 322 } 323 else 324 { 325 // just copy the pointers - the cast is necessary to make the compiler happy 326 // but this should only happen if we are reading UTF8 data 327 utf8_data_ptr = const_cast<UTF8 *>(reinterpret_cast<const UTF8*>(data_ptr)); 328 utf8_data_end_ptr = const_cast<UTF8 *>(reinterpret_cast<const UTF8*>(data_end_ptr)); 329 } 330 331 const bool escape_non_printables = dump_options.GetEscapeNonPrintables(); 332 lldb_private::formatters::StringPrinter::EscapingHelper escaping_callback; 333 if (escape_non_printables) 334 { 335 if (Language *language = Language::FindPlugin(dump_options.GetLanguage())) 336 escaping_callback = language->GetStringPrinterEscapingHelper(lldb_private::formatters::StringPrinter::GetPrintableElementType::UTF8); 337 else 338 escaping_callback = lldb_private::formatters::StringPrinter::GetDefaultEscapingHelper(lldb_private::formatters::StringPrinter::GetPrintableElementType::UTF8); 339 } 340 341 // since we tend to accept partial data (and even partially malformed data) 342 // we might end up with no NULL terminator before the end_ptr 343 // hence we need to take a slower route and ensure we stay within boundaries 344 for (;utf8_data_ptr < utf8_data_end_ptr;) 345 { 346 if (zero_is_terminator && !*utf8_data_ptr) 347 break; 348 349 if (escape_non_printables) 350 { 351 uint8_t* next_data = nullptr; 352 auto printable = escaping_callback(utf8_data_ptr, utf8_data_end_ptr, next_data); 353 auto printable_bytes = printable.GetBytes(); 354 auto printable_size = printable.GetSize(); 355 if (!printable_bytes || !next_data) 356 { 357 // GetPrintable() failed on us - print one byte in a desperate resync attempt 358 printable_bytes = utf8_data_ptr; 359 printable_size = 1; 360 next_data = utf8_data_ptr+1; 361 } 362 for (unsigned c = 0; c < printable_size; c++) 363 stream.Printf("%c", *(printable_bytes+c)); 364 utf8_data_ptr = (uint8_t*)next_data; 365 } 366 else 367 { 368 stream.Printf("%c",*utf8_data_ptr); 369 utf8_data_ptr++; 370 } 371 } 372 } 373 if (dump_options.GetQuote() != 0) 374 stream.Printf("%c",dump_options.GetQuote()); 375 if (dump_options.GetSuffixToken() != 0) 376 stream.Printf("%s",dump_options.GetSuffixToken()); 377 if (dump_options.GetIsTruncated()) 378 stream.Printf("..."); 379 return true; 380 } 381 382 lldb_private::formatters::StringPrinter::ReadStringAndDumpToStreamOptions::ReadStringAndDumpToStreamOptions (ValueObject& valobj) : 383 ReadStringAndDumpToStreamOptions() 384 { 385 SetEscapeNonPrintables(valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables()); 386 } 387 388 lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStreamOptions::ReadBufferAndDumpToStreamOptions (ValueObject& valobj) : 389 ReadBufferAndDumpToStreamOptions() 390 { 391 SetEscapeNonPrintables(valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables()); 392 } 393 394 lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStreamOptions::ReadBufferAndDumpToStreamOptions (const ReadStringAndDumpToStreamOptions& options) : 395 ReadBufferAndDumpToStreamOptions() 396 { 397 SetStream(options.GetStream()); 398 SetPrefixToken(options.GetPrefixToken()); 399 SetSuffixToken(options.GetSuffixToken()); 400 SetQuote(options.GetQuote()); 401 SetEscapeNonPrintables(options.GetEscapeNonPrintables()); 402 SetBinaryZeroIsTerminator(options.GetBinaryZeroIsTerminator()); 403 SetLanguage(options.GetLanguage()); 404 } 405 406 407 namespace lldb_private 408 { 409 410 namespace formatters 411 { 412 413 template <> 414 bool 415 StringPrinter::ReadStringAndDumpToStream<StringPrinter::StringElementType::ASCII> (const ReadStringAndDumpToStreamOptions& options) 416 { 417 assert(options.GetStream() && "need a Stream to print the string to"); 418 Error my_error; 419 420 ProcessSP process_sp(options.GetProcessSP()); 421 422 if (process_sp.get() == nullptr || options.GetLocation() == 0) 423 return false; 424 425 size_t size; 426 const auto max_size = process_sp->GetTarget().GetMaximumSizeOfStringSummary(); 427 bool is_truncated = false; 428 429 if (options.GetSourceSize() == 0) 430 size = max_size; 431 else if (!options.GetIgnoreMaxLength()) 432 { 433 size = options.GetSourceSize(); 434 if (size > max_size) 435 { 436 size = max_size; 437 is_truncated = true; 438 } 439 } 440 else 441 size = options.GetSourceSize(); 442 443 lldb::DataBufferSP buffer_sp(new DataBufferHeap(size,0)); 444 445 process_sp->ReadCStringFromMemory(options.GetLocation(), (char*)buffer_sp->GetBytes(), size, my_error); 446 447 if (my_error.Fail()) 448 return false; 449 450 const char* prefix_token = options.GetPrefixToken(); 451 char quote = options.GetQuote(); 452 453 if (prefix_token != 0) 454 options.GetStream()->Printf("%s%c",prefix_token,quote); 455 else if (quote != 0) 456 options.GetStream()->Printf("%c",quote); 457 458 uint8_t* data_end = buffer_sp->GetBytes()+buffer_sp->GetByteSize(); 459 460 const bool escape_non_printables = options.GetEscapeNonPrintables(); 461 lldb_private::formatters::StringPrinter::EscapingHelper escaping_callback; 462 if (escape_non_printables) 463 { 464 if (Language *language = Language::FindPlugin(options.GetLanguage())) 465 escaping_callback = language->GetStringPrinterEscapingHelper(lldb_private::formatters::StringPrinter::GetPrintableElementType::ASCII); 466 else 467 escaping_callback = lldb_private::formatters::StringPrinter::GetDefaultEscapingHelper(lldb_private::formatters::StringPrinter::GetPrintableElementType::ASCII); 468 } 469 470 // since we tend to accept partial data (and even partially malformed data) 471 // we might end up with no NULL terminator before the end_ptr 472 // hence we need to take a slower route and ensure we stay within boundaries 473 for (uint8_t* data = buffer_sp->GetBytes(); *data && (data < data_end);) 474 { 475 if (escape_non_printables) 476 { 477 uint8_t* next_data = nullptr; 478 auto printable = escaping_callback(data, data_end, next_data); 479 auto printable_bytes = printable.GetBytes(); 480 auto printable_size = printable.GetSize(); 481 if (!printable_bytes || !next_data) 482 { 483 // GetPrintable() failed on us - print one byte in a desperate resync attempt 484 printable_bytes = data; 485 printable_size = 1; 486 next_data = data+1; 487 } 488 for (unsigned c = 0; c < printable_size; c++) 489 options.GetStream()->Printf("%c", *(printable_bytes+c)); 490 data = (uint8_t*)next_data; 491 } 492 else 493 { 494 options.GetStream()->Printf("%c",*data); 495 data++; 496 } 497 } 498 499 const char* suffix_token = options.GetSuffixToken(); 500 501 if (suffix_token != 0) 502 options.GetStream()->Printf("%c%s",quote, suffix_token); 503 else if (quote != 0) 504 options.GetStream()->Printf("%c",quote); 505 506 if (is_truncated) 507 options.GetStream()->Printf("..."); 508 509 return true; 510 } 511 512 template<typename SourceDataType> 513 static bool 514 ReadUTFBufferAndDumpToStream (const StringPrinter::ReadStringAndDumpToStreamOptions& options, 515 ConversionResult (*ConvertFunction) (const SourceDataType**, 516 const SourceDataType*, 517 UTF8**, 518 UTF8*, 519 ConversionFlags)) 520 { 521 assert(options.GetStream() && "need a Stream to print the string to"); 522 523 if (options.GetLocation() == 0 || options.GetLocation() == LLDB_INVALID_ADDRESS) 524 return false; 525 526 lldb::ProcessSP process_sp(options.GetProcessSP()); 527 528 if (!process_sp) 529 return false; 530 531 const int type_width = sizeof(SourceDataType); 532 const int origin_encoding = 8 * type_width ; 533 if (origin_encoding != 8 && origin_encoding != 16 && origin_encoding != 32) 534 return false; 535 // if not UTF8, I need a conversion function to return proper UTF8 536 if (origin_encoding != 8 && !ConvertFunction) 537 return false; 538 539 if (!options.GetStream()) 540 return false; 541 542 uint32_t sourceSize = options.GetSourceSize(); 543 bool needs_zero_terminator = options.GetNeedsZeroTermination(); 544 545 bool is_truncated = false; 546 const auto max_size = process_sp->GetTarget().GetMaximumSizeOfStringSummary(); 547 548 if (!sourceSize) 549 { 550 sourceSize = max_size; 551 needs_zero_terminator = true; 552 } 553 else if (!options.GetIgnoreMaxLength()) 554 { 555 if (sourceSize > max_size) 556 { 557 sourceSize = max_size; 558 is_truncated = true; 559 } 560 } 561 562 const int bufferSPSize = sourceSize * type_width; 563 564 lldb::DataBufferSP buffer_sp(new DataBufferHeap(bufferSPSize,0)); 565 566 if (!buffer_sp->GetBytes()) 567 return false; 568 569 Error error; 570 char *buffer = reinterpret_cast<char *>(buffer_sp->GetBytes()); 571 572 if (needs_zero_terminator) 573 process_sp->ReadStringFromMemory(options.GetLocation(), buffer, bufferSPSize, error, type_width); 574 else 575 process_sp->ReadMemoryFromInferior(options.GetLocation(), (char*)buffer_sp->GetBytes(), bufferSPSize, error); 576 577 if (error.Fail()) 578 { 579 options.GetStream()->Printf("unable to read data"); 580 return true; 581 } 582 583 DataExtractor data(buffer_sp, process_sp->GetByteOrder(), process_sp->GetAddressByteSize()); 584 585 StringPrinter::ReadBufferAndDumpToStreamOptions dump_options(options); 586 dump_options.SetData(data); 587 dump_options.SetSourceSize(sourceSize); 588 dump_options.SetIsTruncated(is_truncated); 589 590 return DumpUTFBufferToStream(ConvertFunction, dump_options); 591 } 592 593 template <> 594 bool 595 StringPrinter::ReadStringAndDumpToStream<StringPrinter::StringElementType::UTF8> (const ReadStringAndDumpToStreamOptions& options) 596 { 597 return ReadUTFBufferAndDumpToStream<UTF8>(options, 598 nullptr); 599 } 600 601 template <> 602 bool 603 StringPrinter::ReadStringAndDumpToStream<StringPrinter::StringElementType::UTF16> (const ReadStringAndDumpToStreamOptions& options) 604 { 605 return ReadUTFBufferAndDumpToStream<UTF16>(options, 606 ConvertUTF16toUTF8); 607 } 608 609 template <> 610 bool 611 StringPrinter::ReadStringAndDumpToStream<StringPrinter::StringElementType::UTF32> (const ReadStringAndDumpToStreamOptions& options) 612 { 613 return ReadUTFBufferAndDumpToStream<UTF32>(options, 614 ConvertUTF32toUTF8); 615 } 616 617 template <> 618 bool 619 StringPrinter::ReadBufferAndDumpToStream<StringPrinter::StringElementType::UTF8> (const ReadBufferAndDumpToStreamOptions& options) 620 { 621 assert(options.GetStream() && "need a Stream to print the string to"); 622 623 return DumpUTFBufferToStream<UTF8>(nullptr, options); 624 } 625 626 template <> 627 bool 628 StringPrinter::ReadBufferAndDumpToStream<StringPrinter::StringElementType::ASCII> (const ReadBufferAndDumpToStreamOptions& options) 629 { 630 // treat ASCII the same as UTF8 631 // FIXME: can we optimize ASCII some more? 632 return ReadBufferAndDumpToStream<StringElementType::UTF8>(options); 633 } 634 635 template <> 636 bool 637 StringPrinter::ReadBufferAndDumpToStream<StringPrinter::StringElementType::UTF16> (const ReadBufferAndDumpToStreamOptions& options) 638 { 639 assert(options.GetStream() && "need a Stream to print the string to"); 640 641 return DumpUTFBufferToStream(ConvertUTF16toUTF8, options); 642 } 643 644 template <> 645 bool 646 StringPrinter::ReadBufferAndDumpToStream<StringPrinter::StringElementType::UTF32> (const ReadBufferAndDumpToStreamOptions& options) 647 { 648 assert(options.GetStream() && "need a Stream to print the string to"); 649 650 return DumpUTFBufferToStream(ConvertUTF32toUTF8, options); 651 } 652 653 } // namespace formatters 654 655 } // namespace lldb_private 656