1 //===-- StringPrinter.cpp ----------------------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "lldb/DataFormatters/StringPrinter.h" 11 12 #include "lldb/Core/DataExtractor.h" 13 #include "lldb/Core/Debugger.h" 14 #include "lldb/Core/Error.h" 15 #include "lldb/Core/ValueObject.h" 16 #include "lldb/Target/Process.h" 17 #include "lldb/Target/Target.h" 18 19 #include "llvm/Support/ConvertUTF.h" 20 21 #include <ctype.h> 22 #include <functional> 23 #include <locale> 24 25 using namespace lldb; 26 using namespace lldb_private; 27 using namespace lldb_private::formatters; 28 29 // I can't use a std::unique_ptr for this because the Deleter is a template argument there 30 // and I want the same type to represent both pointers I want to free and pointers I don't need 31 // to free - which is what this class essentially is 32 // It's very specialized to the needs of this file, and not suggested for general use 33 template <typename T = uint8_t, typename U = char, typename S = size_t> 34 struct StringPrinterBufferPointer 35 { 36 public: 37 38 typedef std::function<void(const T*)> Deleter; 39 40 StringPrinterBufferPointer (std::nullptr_t ptr) : 41 m_data(nullptr), 42 m_size(0), 43 m_deleter() 44 {} 45 46 StringPrinterBufferPointer(const T* bytes, S size, Deleter deleter = nullptr) : 47 m_data(bytes), 48 m_size(size), 49 m_deleter(deleter) 50 {} 51 52 StringPrinterBufferPointer(const U* bytes, S size, Deleter deleter = nullptr) : 53 m_data((T*)bytes), 54 m_size(size), 55 m_deleter(deleter) 56 {} 57 58 StringPrinterBufferPointer(StringPrinterBufferPointer&& rhs) : 59 m_data(rhs.m_data), 60 m_size(rhs.m_size), 61 m_deleter(rhs.m_deleter) 62 { 63 rhs.m_data = nullptr; 64 } 65 66 StringPrinterBufferPointer(const StringPrinterBufferPointer& rhs) : 67 m_data(rhs.m_data), 68 m_size(rhs.m_size), 69 m_deleter(rhs.m_deleter) 70 { 71 rhs.m_data = nullptr; // this is why m_data has to be mutable 72 } 73 74 const T* 75 GetBytes () const 76 { 77 return m_data; 78 } 79 80 const S 81 GetSize () const 82 { 83 return m_size; 84 } 85 86 ~StringPrinterBufferPointer () 87 { 88 if (m_data && m_deleter) 89 m_deleter(m_data); 90 m_data = nullptr; 91 } 92 93 StringPrinterBufferPointer& 94 operator = (const StringPrinterBufferPointer& rhs) 95 { 96 if (m_data && m_deleter) 97 m_deleter(m_data); 98 m_data = rhs.m_data; 99 m_size = rhs.m_size; 100 m_deleter = rhs.m_deleter; 101 rhs.m_data = nullptr; 102 return *this; 103 } 104 105 private: 106 mutable const T* m_data; 107 size_t m_size; 108 Deleter m_deleter; 109 }; 110 111 // we define this for all values of type but only implement it for those we care about 112 // that's good because we get linker errors for any unsupported type 113 template <StringElementType type> 114 static StringPrinterBufferPointer<> 115 GetPrintableImpl(uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next); 116 117 // mimic isprint() for Unicode codepoints 118 static bool 119 isprint(char32_t codepoint) 120 { 121 if (codepoint <= 0x1F || codepoint == 0x7F) // C0 122 { 123 return false; 124 } 125 if (codepoint >= 0x80 && codepoint <= 0x9F) // C1 126 { 127 return false; 128 } 129 if (codepoint == 0x2028 || codepoint == 0x2029) // line/paragraph separators 130 { 131 return false; 132 } 133 if (codepoint == 0x200E || codepoint == 0x200F || (codepoint >= 0x202A && codepoint <= 0x202E)) // bidirectional text control 134 { 135 return false; 136 } 137 if (codepoint >= 0xFFF9 && codepoint <= 0xFFFF) // interlinears and generally specials 138 { 139 return false; 140 } 141 return true; 142 } 143 144 template <> 145 StringPrinterBufferPointer<> 146 GetPrintableImpl<StringElementType::ASCII> (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next) 147 { 148 StringPrinterBufferPointer<> retval = {nullptr}; 149 150 switch (*buffer) 151 { 152 case 0: 153 retval = {"\\0",2}; 154 break; 155 case '\a': 156 retval = {"\\a",2}; 157 break; 158 case '\b': 159 retval = {"\\b",2}; 160 break; 161 case '\f': 162 retval = {"\\f",2}; 163 break; 164 case '\n': 165 retval = {"\\n",2}; 166 break; 167 case '\r': 168 retval = {"\\r",2}; 169 break; 170 case '\t': 171 retval = {"\\t",2}; 172 break; 173 case '\v': 174 retval = {"\\v",2}; 175 break; 176 case '\"': 177 retval = {"\\\"",2}; 178 break; 179 case '\\': 180 retval = {"\\\\",2}; 181 break; 182 default: 183 if (isprint(*buffer)) 184 retval = {buffer,1}; 185 else 186 { 187 retval = { new uint8_t[5],4,[] (const uint8_t* c) {delete[] c;} }; 188 sprintf((char*)retval.GetBytes(),"\\x%02x",*buffer); 189 break; 190 } 191 } 192 193 next = buffer + 1; 194 return retval; 195 } 196 197 static char32_t 198 ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1) 199 { 200 return (c0-192)*64+(c1-128); 201 } 202 static char32_t 203 ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1, unsigned char c2) 204 { 205 return (c0-224)*4096+(c1-128)*64+(c2-128); 206 } 207 static char32_t 208 ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1, unsigned char c2, unsigned char c3) 209 { 210 return (c0-240)*262144+(c2-128)*4096+(c2-128)*64+(c3-128); 211 } 212 213 template <> 214 StringPrinterBufferPointer<> 215 GetPrintableImpl<StringElementType::UTF8> (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next) 216 { 217 StringPrinterBufferPointer<> retval {nullptr}; 218 219 unsigned utf8_encoded_len = getNumBytesForUTF8(*buffer); 220 221 if (1+buffer_end-buffer < utf8_encoded_len) 222 { 223 // I don't have enough bytes - print whatever I have left 224 retval = {buffer,static_cast<size_t>(1+buffer_end-buffer)}; 225 next = buffer_end+1; 226 return retval; 227 } 228 229 char32_t codepoint = 0; 230 switch (utf8_encoded_len) 231 { 232 case 1: 233 // this is just an ASCII byte - ask ASCII 234 return GetPrintableImpl<StringElementType::ASCII>(buffer, buffer_end, next); 235 case 2: 236 codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1)); 237 break; 238 case 3: 239 codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1), (unsigned char)*(buffer+2)); 240 break; 241 case 4: 242 codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1), (unsigned char)*(buffer+2), (unsigned char)*(buffer+3)); 243 break; 244 default: 245 // this is probably some bogus non-character thing 246 // just print it as-is and hope to sync up again soon 247 retval = {buffer,1}; 248 next = buffer+1; 249 return retval; 250 } 251 252 if (codepoint) 253 { 254 switch (codepoint) 255 { 256 case 0: 257 retval = {"\\0",2}; 258 break; 259 case '\a': 260 retval = {"\\a",2}; 261 break; 262 case '\b': 263 retval = {"\\b",2}; 264 break; 265 case '\f': 266 retval = {"\\f",2}; 267 break; 268 case '\n': 269 retval = {"\\n",2}; 270 break; 271 case '\r': 272 retval = {"\\r",2}; 273 break; 274 case '\t': 275 retval = {"\\t",2}; 276 break; 277 case '\v': 278 retval = {"\\v",2}; 279 break; 280 case '\"': 281 retval = {"\\\"",2}; 282 break; 283 case '\\': 284 retval = {"\\\\",2}; 285 break; 286 default: 287 if (isprint(codepoint)) 288 retval = {buffer,utf8_encoded_len}; 289 else 290 { 291 retval = { new uint8_t[11],10,[] (const uint8_t* c) {delete[] c;} }; 292 sprintf((char*)retval.GetBytes(),"\\U%08x",codepoint); 293 break; 294 } 295 } 296 297 next = buffer + utf8_encoded_len; 298 return retval; 299 } 300 301 // this should not happen - but just in case.. try to resync at some point 302 retval = {buffer,1}; 303 next = buffer+1; 304 return retval; 305 } 306 307 // Given a sequence of bytes, this function returns: 308 // a sequence of bytes to actually print out + a length 309 // the following unscanned position of the buffer is in next 310 static StringPrinterBufferPointer<> 311 GetPrintable(StringElementType type, uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next) 312 { 313 if (!buffer) 314 return {nullptr}; 315 316 switch (type) 317 { 318 case StringElementType::ASCII: 319 return GetPrintableImpl<StringElementType::ASCII>(buffer, buffer_end, next); 320 case StringElementType::UTF8: 321 return GetPrintableImpl<StringElementType::UTF8>(buffer, buffer_end, next); 322 default: 323 return {nullptr}; 324 } 325 } 326 327 // use this call if you already have an LLDB-side buffer for the data 328 template<typename SourceDataType> 329 static bool 330 DumpUTFBufferToStream (ConversionResult (*ConvertFunction) (const SourceDataType**, 331 const SourceDataType*, 332 UTF8**, 333 UTF8*, 334 ConversionFlags), 335 const DataExtractor& data, 336 Stream& stream, 337 char prefix_token, 338 char quote, 339 uint32_t sourceSize, 340 bool escapeNonPrintables) 341 { 342 if (prefix_token != 0) 343 stream.Printf("%c",prefix_token); 344 if (quote != 0) 345 stream.Printf("%c",quote); 346 if (data.GetByteSize() && data.GetDataStart() && data.GetDataEnd()) 347 { 348 const int bufferSPSize = data.GetByteSize(); 349 if (sourceSize == 0) 350 { 351 const int origin_encoding = 8*sizeof(SourceDataType); 352 sourceSize = bufferSPSize/(origin_encoding / 4); 353 } 354 355 SourceDataType *data_ptr = (SourceDataType*)data.GetDataStart(); 356 SourceDataType *data_end_ptr = data_ptr + sourceSize; 357 358 while (data_ptr < data_end_ptr) 359 { 360 if (!*data_ptr) 361 { 362 data_end_ptr = data_ptr; 363 break; 364 } 365 data_ptr++; 366 } 367 368 data_ptr = (SourceDataType*)data.GetDataStart(); 369 370 lldb::DataBufferSP utf8_data_buffer_sp; 371 UTF8* utf8_data_ptr = nullptr; 372 UTF8* utf8_data_end_ptr = nullptr; 373 374 if (ConvertFunction) 375 { 376 utf8_data_buffer_sp.reset(new DataBufferHeap(4*bufferSPSize,0)); 377 utf8_data_ptr = (UTF8*)utf8_data_buffer_sp->GetBytes(); 378 utf8_data_end_ptr = utf8_data_ptr + utf8_data_buffer_sp->GetByteSize(); 379 ConvertFunction ( (const SourceDataType**)&data_ptr, data_end_ptr, &utf8_data_ptr, utf8_data_end_ptr, lenientConversion ); 380 utf8_data_ptr = (UTF8*)utf8_data_buffer_sp->GetBytes(); // needed because the ConvertFunction will change the value of the data_ptr 381 } 382 else 383 { 384 // just copy the pointers - the cast is necessary to make the compiler happy 385 // but this should only happen if we are reading UTF8 data 386 utf8_data_ptr = (UTF8*)data_ptr; 387 utf8_data_end_ptr = (UTF8*)data_end_ptr; 388 } 389 390 // since we tend to accept partial data (and even partially malformed data) 391 // we might end up with no NULL terminator before the end_ptr 392 // hence we need to take a slower route and ensure we stay within boundaries 393 for (;utf8_data_ptr < utf8_data_end_ptr;) 394 { 395 if (!*utf8_data_ptr) 396 break; 397 398 if (escapeNonPrintables) 399 { 400 uint8_t* next_data = nullptr; 401 auto printable = GetPrintable(StringElementType::UTF8, utf8_data_ptr, utf8_data_end_ptr, next_data); 402 auto printable_bytes = printable.GetBytes(); 403 auto printable_size = printable.GetSize(); 404 if (!printable_bytes || !next_data) 405 { 406 // GetPrintable() failed on us - print one byte in a desperate resync attempt 407 printable_bytes = utf8_data_ptr; 408 printable_size = 1; 409 next_data = utf8_data_ptr+1; 410 } 411 for (unsigned c = 0; c < printable_size; c++) 412 stream.Printf("%c", *(printable_bytes+c)); 413 utf8_data_ptr = (uint8_t*)next_data; 414 } 415 else 416 { 417 stream.Printf("%c",*utf8_data_ptr); 418 utf8_data_ptr++; 419 } 420 } 421 } 422 if (quote != 0) 423 stream.Printf("%c",quote); 424 return true; 425 } 426 427 lldb_private::formatters::ReadStringAndDumpToStreamOptions::ReadStringAndDumpToStreamOptions (ValueObject& valobj) : 428 ReadStringAndDumpToStreamOptions() 429 { 430 SetEscapeNonPrintables(valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables()); 431 } 432 433 lldb_private::formatters::ReadBufferAndDumpToStreamOptions::ReadBufferAndDumpToStreamOptions (ValueObject& valobj) : 434 ReadBufferAndDumpToStreamOptions() 435 { 436 SetEscapeNonPrintables(valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables()); 437 } 438 439 440 namespace lldb_private 441 { 442 443 namespace formatters 444 { 445 446 template <> 447 bool 448 ReadStringAndDumpToStream<StringElementType::ASCII> (ReadStringAndDumpToStreamOptions options) 449 { 450 assert(options.GetStream() && "need a Stream to print the string to"); 451 Error my_error; 452 size_t my_data_read; 453 454 ProcessSP process_sp(options.GetProcessSP()); 455 456 if (process_sp.get() == nullptr || options.GetLocation() == 0) 457 return false; 458 459 size_t size; 460 461 if (options.GetSourceSize() == 0) 462 size = process_sp->GetTarget().GetMaximumSizeOfStringSummary(); 463 else if (!options.GetIgnoreMaxLength()) 464 size = std::min(options.GetSourceSize(),process_sp->GetTarget().GetMaximumSizeOfStringSummary()); 465 else 466 size = options.GetSourceSize(); 467 468 lldb::DataBufferSP buffer_sp(new DataBufferHeap(size,0)); 469 470 my_data_read = process_sp->ReadCStringFromMemory(options.GetLocation(), (char*)buffer_sp->GetBytes(), size, my_error); 471 472 if (my_error.Fail()) 473 return false; 474 475 char prefix_token = options.GetPrefixToken(); 476 char quote = options.GetQuote(); 477 478 if (prefix_token != 0) 479 options.GetStream()->Printf("%c%c",prefix_token,quote); 480 else if (quote != 0) 481 options.GetStream()->Printf("%c",quote); 482 483 uint8_t* data_end = buffer_sp->GetBytes()+buffer_sp->GetByteSize(); 484 485 // since we tend to accept partial data (and even partially malformed data) 486 // we might end up with no NULL terminator before the end_ptr 487 // hence we need to take a slower route and ensure we stay within boundaries 488 for (uint8_t* data = buffer_sp->GetBytes(); *data && (data < data_end);) 489 { 490 if (options.GetEscapeNonPrintables()) 491 { 492 uint8_t* next_data = nullptr; 493 auto printable = GetPrintable(StringElementType::ASCII, data, data_end, next_data); 494 auto printable_bytes = printable.GetBytes(); 495 auto printable_size = printable.GetSize(); 496 if (!printable_bytes || !next_data) 497 { 498 // GetPrintable() failed on us - print one byte in a desperate resync attempt 499 printable_bytes = data; 500 printable_size = 1; 501 next_data = data+1; 502 } 503 for (unsigned c = 0; c < printable_size; c++) 504 options.GetStream()->Printf("%c", *(printable_bytes+c)); 505 data = (uint8_t*)next_data; 506 } 507 else 508 { 509 options.GetStream()->Printf("%c",*data); 510 data++; 511 } 512 } 513 514 if (quote != 0) 515 options.GetStream()->Printf("%c",quote); 516 517 return true; 518 } 519 520 template<typename SourceDataType> 521 static bool 522 ReadUTFBufferAndDumpToStream (const ReadStringAndDumpToStreamOptions& options, 523 ConversionResult (*ConvertFunction) (const SourceDataType**, 524 const SourceDataType*, 525 UTF8**, 526 UTF8*, 527 ConversionFlags)) 528 { 529 assert(options.GetStream() && "need a Stream to print the string to"); 530 531 if (options.GetLocation() == 0 || options.GetLocation() == LLDB_INVALID_ADDRESS) 532 return false; 533 534 lldb::ProcessSP process_sp(options.GetProcessSP()); 535 536 if (!process_sp) 537 return false; 538 539 const int type_width = sizeof(SourceDataType); 540 const int origin_encoding = 8 * type_width ; 541 if (origin_encoding != 8 && origin_encoding != 16 && origin_encoding != 32) 542 return false; 543 // if not UTF8, I need a conversion function to return proper UTF8 544 if (origin_encoding != 8 && !ConvertFunction) 545 return false; 546 547 if (!options.GetStream()) 548 return false; 549 550 uint32_t sourceSize = options.GetSourceSize(); 551 bool needs_zero_terminator = options.GetNeedsZeroTermination(); 552 553 if (!sourceSize) 554 { 555 sourceSize = process_sp->GetTarget().GetMaximumSizeOfStringSummary(); 556 needs_zero_terminator = true; 557 } 558 else 559 sourceSize = std::min(sourceSize,process_sp->GetTarget().GetMaximumSizeOfStringSummary()); 560 561 const int bufferSPSize = sourceSize * type_width; 562 563 lldb::DataBufferSP buffer_sp(new DataBufferHeap(bufferSPSize,0)); 564 565 if (!buffer_sp->GetBytes()) 566 return false; 567 568 Error error; 569 char *buffer = reinterpret_cast<char *>(buffer_sp->GetBytes()); 570 571 size_t data_read = 0; 572 if (needs_zero_terminator) 573 data_read = process_sp->ReadStringFromMemory(options.GetLocation(), buffer, bufferSPSize, error, type_width); 574 else 575 data_read = process_sp->ReadMemoryFromInferior(options.GetLocation(), (char*)buffer_sp->GetBytes(), bufferSPSize, error); 576 577 if (error.Fail()) 578 { 579 options.GetStream()->Printf("unable to read data"); 580 return true; 581 } 582 583 DataExtractor data(buffer_sp, process_sp->GetByteOrder(), process_sp->GetAddressByteSize()); 584 585 return DumpUTFBufferToStream(ConvertFunction, data, *options.GetStream(), options.GetPrefixToken(), options.GetQuote(), sourceSize, options.GetEscapeNonPrintables()); 586 } 587 588 template <> 589 bool 590 ReadStringAndDumpToStream<StringElementType::UTF8> (ReadStringAndDumpToStreamOptions options) 591 { 592 return ReadUTFBufferAndDumpToStream<UTF8>(options, 593 nullptr); 594 } 595 596 template <> 597 bool 598 ReadStringAndDumpToStream<StringElementType::UTF16> (ReadStringAndDumpToStreamOptions options) 599 { 600 return ReadUTFBufferAndDumpToStream<UTF16>(options, 601 ConvertUTF16toUTF8); 602 } 603 604 template <> 605 bool 606 ReadStringAndDumpToStream<StringElementType::UTF32> (ReadStringAndDumpToStreamOptions options) 607 { 608 return ReadUTFBufferAndDumpToStream<UTF32>(options, 609 ConvertUTF32toUTF8); 610 } 611 612 template <> 613 bool 614 ReadBufferAndDumpToStream<StringElementType::UTF8> (ReadBufferAndDumpToStreamOptions options) 615 { 616 assert(options.GetStream() && "need a Stream to print the string to"); 617 618 return DumpUTFBufferToStream<UTF8>(nullptr, options.GetData(), *options.GetStream(), options.GetPrefixToken(), options.GetQuote(), options.GetSourceSize(), options.GetEscapeNonPrintables()); 619 } 620 621 template <> 622 bool 623 ReadBufferAndDumpToStream<StringElementType::ASCII> (ReadBufferAndDumpToStreamOptions options) 624 { 625 // treat ASCII the same as UTF8 626 // FIXME: can we optimize ASCII some more? 627 return ReadBufferAndDumpToStream<StringElementType::UTF8>(options); 628 } 629 630 template <> 631 bool 632 ReadBufferAndDumpToStream<StringElementType::UTF16> (ReadBufferAndDumpToStreamOptions options) 633 { 634 assert(options.GetStream() && "need a Stream to print the string to"); 635 636 return DumpUTFBufferToStream(ConvertUTF16toUTF8, options.GetData(), *options.GetStream(), options.GetPrefixToken(), options.GetQuote(), options.GetSourceSize(), options.GetEscapeNonPrintables()); 637 } 638 639 template <> 640 bool 641 ReadBufferAndDumpToStream<StringElementType::UTF32> (ReadBufferAndDumpToStreamOptions options) 642 { 643 assert(options.GetStream() && "need a Stream to print the string to"); 644 645 return DumpUTFBufferToStream(ConvertUTF32toUTF8, options.GetData(), *options.GetStream(), options.GetPrefixToken(), options.GetQuote(), options.GetSourceSize(), options.GetEscapeNonPrintables()); 646 } 647 648 } // namespace formatters 649 650 } // namespace lldb_private 651