1 //===-- StringPrinter.cpp ----------------------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "lldb/DataFormatters/StringPrinter.h" 11 12 #include "lldb/Core/DataExtractor.h" 13 #include "lldb/Core/Debugger.h" 14 #include "lldb/Core/Error.h" 15 #include "lldb/Core/ValueObject.h" 16 #include "lldb/Target/Process.h" 17 #include "lldb/Target/Target.h" 18 19 #include "llvm/Support/ConvertUTF.h" 20 21 #include <ctype.h> 22 #include <functional> 23 #include <locale> 24 25 using namespace lldb; 26 using namespace lldb_private; 27 using namespace lldb_private::formatters; 28 29 // I can't use a std::unique_ptr for this because the Deleter is a template argument there 30 // and I want the same type to represent both pointers I want to free and pointers I don't need 31 // to free - which is what this class essentially is 32 // It's very specialized to the needs of this file, and not suggested for general use 33 template <typename T = uint8_t, typename U = char, typename S = size_t> 34 struct StringPrinterBufferPointer 35 { 36 public: 37 38 typedef std::function<void(const T*)> Deleter; 39 40 StringPrinterBufferPointer (std::nullptr_t ptr) : 41 m_data(nullptr), 42 m_size(0), 43 m_deleter() 44 {} 45 46 StringPrinterBufferPointer(const T* bytes, S size, Deleter deleter = nullptr) : 47 m_data(bytes), 48 m_size(size), 49 m_deleter(deleter) 50 {} 51 52 StringPrinterBufferPointer(const U* bytes, S size, Deleter deleter = nullptr) : 53 m_data((T*)bytes), 54 m_size(size), 55 m_deleter(deleter) 56 {} 57 58 StringPrinterBufferPointer(StringPrinterBufferPointer&& rhs) : 59 m_data(rhs.m_data), 60 m_size(rhs.m_size), 61 m_deleter(rhs.m_deleter) 62 { 63 rhs.m_data = nullptr; 64 } 65 66 StringPrinterBufferPointer(const StringPrinterBufferPointer& rhs) : 67 m_data(rhs.m_data), 68 m_size(rhs.m_size), 69 m_deleter(rhs.m_deleter) 70 { 71 rhs.m_data = nullptr; // this is why m_data has to be mutable 72 } 73 74 const T* 75 GetBytes () const 76 { 77 return m_data; 78 } 79 80 const S 81 GetSize () const 82 { 83 return m_size; 84 } 85 86 ~StringPrinterBufferPointer () 87 { 88 if (m_data && m_deleter) 89 m_deleter(m_data); 90 m_data = nullptr; 91 } 92 93 StringPrinterBufferPointer& 94 operator = (const StringPrinterBufferPointer& rhs) 95 { 96 if (m_data && m_deleter) 97 m_deleter(m_data); 98 m_data = rhs.m_data; 99 m_size = rhs.m_size; 100 m_deleter = rhs.m_deleter; 101 rhs.m_data = nullptr; 102 return *this; 103 } 104 105 private: 106 mutable const T* m_data; 107 size_t m_size; 108 Deleter m_deleter; 109 }; 110 111 // we define this for all values of type but only implement it for those we care about 112 // that's good because we get linker errors for any unsupported type 113 template <StringElementType type> 114 static StringPrinterBufferPointer<> 115 GetPrintableImpl(uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next); 116 117 // mimic isprint() for Unicode codepoints 118 static bool 119 isprint(char32_t codepoint) 120 { 121 if (codepoint <= 0x1F || codepoint == 0x7F) // C0 122 { 123 return false; 124 } 125 if (codepoint >= 0x80 && codepoint <= 0x9F) // C1 126 { 127 return false; 128 } 129 if (codepoint == 0x2028 || codepoint == 0x2029) // line/paragraph separators 130 { 131 return false; 132 } 133 if (codepoint == 0x200E || codepoint == 0x200F || (codepoint >= 0x202A && codepoint <= 0x202E)) // bidirectional text control 134 { 135 return false; 136 } 137 if (codepoint >= 0xFFF9 && codepoint <= 0xFFFF) // interlinears and generally specials 138 { 139 return false; 140 } 141 return true; 142 } 143 144 template <> 145 StringPrinterBufferPointer<> 146 GetPrintableImpl<StringElementType::ASCII> (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next) 147 { 148 StringPrinterBufferPointer<> retval = {nullptr}; 149 150 switch (*buffer) 151 { 152 case '\a': 153 retval = {"\\a",2}; 154 break; 155 case '\b': 156 retval = {"\\b",2}; 157 break; 158 case '\f': 159 retval = {"\\f",2}; 160 break; 161 case '\n': 162 retval = {"\\n",2}; 163 break; 164 case '\r': 165 retval = {"\\r",2}; 166 break; 167 case '\t': 168 retval = {"\\t",2}; 169 break; 170 case '\v': 171 retval = {"\\v",2}; 172 break; 173 case '\"': 174 retval = {"\\\"",2}; 175 break; 176 case '\\': 177 retval = {"\\\\",2}; 178 break; 179 default: 180 if (isprint(*buffer)) 181 retval = {buffer,1}; 182 else 183 { 184 retval = { new uint8_t[5],4,[] (const uint8_t* c) {delete[] c;} }; 185 sprintf((char*)retval.GetBytes(),"\\x%02x",*buffer); 186 break; 187 } 188 } 189 190 next = buffer + 1; 191 return retval; 192 } 193 194 static char32_t 195 ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1) 196 { 197 return (c0-192)*64+(c1-128); 198 } 199 static char32_t 200 ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1, unsigned char c2) 201 { 202 return (c0-224)*4096+(c1-128)*64+(c2-128); 203 } 204 static char32_t 205 ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1, unsigned char c2, unsigned char c3) 206 { 207 return (c0-240)*262144+(c2-128)*4096+(c2-128)*64+(c3-128); 208 } 209 210 template <> 211 StringPrinterBufferPointer<> 212 GetPrintableImpl<StringElementType::UTF8> (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next) 213 { 214 StringPrinterBufferPointer<> retval {nullptr}; 215 216 unsigned utf8_encoded_len = getNumBytesForUTF8(*buffer); 217 218 if (1+buffer_end-buffer < utf8_encoded_len) 219 { 220 // I don't have enough bytes - print whatever I have left 221 retval = {buffer,static_cast<size_t>(1+buffer_end-buffer)}; 222 next = buffer_end+1; 223 return retval; 224 } 225 226 char32_t codepoint = 0; 227 switch (utf8_encoded_len) 228 { 229 case 1: 230 // this is just an ASCII byte - ask ASCII 231 return GetPrintableImpl<StringElementType::ASCII>(buffer, buffer_end, next); 232 case 2: 233 codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1)); 234 break; 235 case 3: 236 codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1), (unsigned char)*(buffer+2)); 237 break; 238 case 4: 239 codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1), (unsigned char)*(buffer+2), (unsigned char)*(buffer+3)); 240 break; 241 default: 242 // this is probably some bogus non-character thing 243 // just print it as-is and hope to sync up again soon 244 retval = {buffer,1}; 245 next = buffer+1; 246 return retval; 247 } 248 249 if (codepoint) 250 { 251 switch (codepoint) 252 { 253 case '\a': 254 retval = {"\\a",2}; 255 break; 256 case '\b': 257 retval = {"\\b",2}; 258 break; 259 case '\f': 260 retval = {"\\f",2}; 261 break; 262 case '\n': 263 retval = {"\\n",2}; 264 break; 265 case '\r': 266 retval = {"\\r",2}; 267 break; 268 case '\t': 269 retval = {"\\t",2}; 270 break; 271 case '\v': 272 retval = {"\\v",2}; 273 break; 274 case '\"': 275 retval = {"\\\"",2}; 276 break; 277 case '\\': 278 retval = {"\\\\",2}; 279 break; 280 default: 281 if (isprint(codepoint)) 282 retval = {buffer,utf8_encoded_len}; 283 else 284 { 285 retval = { new uint8_t[11],10,[] (const uint8_t* c) {delete[] c;} }; 286 sprintf((char*)retval.GetBytes(),"\\U%08x",codepoint); 287 break; 288 } 289 } 290 291 next = buffer + utf8_encoded_len; 292 return retval; 293 } 294 295 // this should not happen - but just in case.. try to resync at some point 296 retval = {buffer,1}; 297 next = buffer+1; 298 return retval; 299 } 300 301 // Given a sequence of bytes, this function returns: 302 // a sequence of bytes to actually print out + a length 303 // the following unscanned position of the buffer is in next 304 static StringPrinterBufferPointer<> 305 GetPrintable(StringElementType type, uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next) 306 { 307 if (!buffer) 308 return {nullptr}; 309 310 switch (type) 311 { 312 case StringElementType::ASCII: 313 return GetPrintableImpl<StringElementType::ASCII>(buffer, buffer_end, next); 314 case StringElementType::UTF8: 315 return GetPrintableImpl<StringElementType::UTF8>(buffer, buffer_end, next); 316 default: 317 return {nullptr}; 318 } 319 } 320 321 // use this call if you already have an LLDB-side buffer for the data 322 template<typename SourceDataType> 323 static bool 324 DumpUTFBufferToStream (ConversionResult (*ConvertFunction) (const SourceDataType**, 325 const SourceDataType*, 326 UTF8**, 327 UTF8*, 328 ConversionFlags), 329 const DataExtractor& data, 330 Stream& stream, 331 char prefix_token, 332 char quote, 333 uint32_t sourceSize, 334 bool escapeNonPrintables) 335 { 336 if (prefix_token != 0) 337 stream.Printf("%c",prefix_token); 338 if (quote != 0) 339 stream.Printf("%c",quote); 340 if (data.GetByteSize() && data.GetDataStart() && data.GetDataEnd()) 341 { 342 const int bufferSPSize = data.GetByteSize(); 343 if (sourceSize == 0) 344 { 345 const int origin_encoding = 8*sizeof(SourceDataType); 346 sourceSize = bufferSPSize/(origin_encoding / 4); 347 } 348 349 SourceDataType *data_ptr = (SourceDataType*)data.GetDataStart(); 350 SourceDataType *data_end_ptr = data_ptr + sourceSize; 351 352 while (data_ptr < data_end_ptr) 353 { 354 if (!*data_ptr) 355 { 356 data_end_ptr = data_ptr; 357 break; 358 } 359 data_ptr++; 360 } 361 362 data_ptr = (SourceDataType*)data.GetDataStart(); 363 364 lldb::DataBufferSP utf8_data_buffer_sp; 365 UTF8* utf8_data_ptr = nullptr; 366 UTF8* utf8_data_end_ptr = nullptr; 367 368 if (ConvertFunction) 369 { 370 utf8_data_buffer_sp.reset(new DataBufferHeap(4*bufferSPSize,0)); 371 utf8_data_ptr = (UTF8*)utf8_data_buffer_sp->GetBytes(); 372 utf8_data_end_ptr = utf8_data_ptr + utf8_data_buffer_sp->GetByteSize(); 373 ConvertFunction ( (const SourceDataType**)&data_ptr, data_end_ptr, &utf8_data_ptr, utf8_data_end_ptr, lenientConversion ); 374 utf8_data_ptr = (UTF8*)utf8_data_buffer_sp->GetBytes(); // needed because the ConvertFunction will change the value of the data_ptr 375 } 376 else 377 { 378 // just copy the pointers - the cast is necessary to make the compiler happy 379 // but this should only happen if we are reading UTF8 data 380 utf8_data_ptr = (UTF8*)data_ptr; 381 utf8_data_end_ptr = (UTF8*)data_end_ptr; 382 } 383 384 // since we tend to accept partial data (and even partially malformed data) 385 // we might end up with no NULL terminator before the end_ptr 386 // hence we need to take a slower route and ensure we stay within boundaries 387 for (;utf8_data_ptr < utf8_data_end_ptr;) 388 { 389 if (!*utf8_data_ptr) 390 break; 391 392 if (escapeNonPrintables) 393 { 394 uint8_t* next_data = nullptr; 395 auto printable = GetPrintable(StringElementType::UTF8, utf8_data_ptr, utf8_data_end_ptr, next_data); 396 auto printable_bytes = printable.GetBytes(); 397 auto printable_size = printable.GetSize(); 398 if (!printable_bytes || !next_data) 399 { 400 // GetPrintable() failed on us - print one byte in a desperate resync attempt 401 printable_bytes = utf8_data_ptr; 402 printable_size = 1; 403 next_data = utf8_data_ptr+1; 404 } 405 for (int c = 0; c < printable_size; c++) 406 stream.Printf("%c", *(printable_bytes+c)); 407 utf8_data_ptr = (uint8_t*)next_data; 408 } 409 else 410 { 411 stream.Printf("%c",*utf8_data_ptr); 412 utf8_data_ptr++; 413 } 414 } 415 } 416 if (quote != 0) 417 stream.Printf("%c",quote); 418 return true; 419 } 420 421 lldb_private::formatters::ReadStringAndDumpToStreamOptions::ReadStringAndDumpToStreamOptions (ValueObject& valobj) : 422 ReadStringAndDumpToStreamOptions() 423 { 424 SetEscapeNonPrintables(valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables()); 425 } 426 427 lldb_private::formatters::ReadBufferAndDumpToStreamOptions::ReadBufferAndDumpToStreamOptions (ValueObject& valobj) : 428 ReadBufferAndDumpToStreamOptions() 429 { 430 SetEscapeNonPrintables(valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables()); 431 } 432 433 434 namespace lldb_private 435 { 436 437 namespace formatters 438 { 439 440 template <> 441 bool 442 ReadStringAndDumpToStream<StringElementType::ASCII> (ReadStringAndDumpToStreamOptions options) 443 { 444 assert(options.GetStream() && "need a Stream to print the string to"); 445 Error my_error; 446 size_t my_data_read; 447 448 ProcessSP process_sp(options.GetProcessSP()); 449 450 if (process_sp.get() == nullptr || options.GetLocation() == 0) 451 return false; 452 453 size_t size; 454 455 if (options.GetSourceSize() == 0) 456 size = process_sp->GetTarget().GetMaximumSizeOfStringSummary(); 457 else if (!options.GetIgnoreMaxLength()) 458 size = std::min(options.GetSourceSize(),process_sp->GetTarget().GetMaximumSizeOfStringSummary()); 459 else 460 size = options.GetSourceSize(); 461 462 lldb::DataBufferSP buffer_sp(new DataBufferHeap(size,0)); 463 464 my_data_read = process_sp->ReadCStringFromMemory(options.GetLocation(), (char*)buffer_sp->GetBytes(), size, my_error); 465 466 if (my_error.Fail()) 467 return false; 468 469 char prefix_token = options.GetPrefixToken(); 470 char quote = options.GetQuote(); 471 472 if (prefix_token != 0) 473 options.GetStream()->Printf("%c%c",prefix_token,quote); 474 else if (quote != 0) 475 options.GetStream()->Printf("%c",quote); 476 477 uint8_t* data_end = buffer_sp->GetBytes()+buffer_sp->GetByteSize(); 478 479 // since we tend to accept partial data (and even partially malformed data) 480 // we might end up with no NULL terminator before the end_ptr 481 // hence we need to take a slower route and ensure we stay within boundaries 482 for (uint8_t* data = buffer_sp->GetBytes(); *data && (data < data_end);) 483 { 484 if (options.GetEscapeNonPrintables()) 485 { 486 uint8_t* next_data = nullptr; 487 auto printable = GetPrintable(StringElementType::ASCII, data, data_end, next_data); 488 auto printable_bytes = printable.GetBytes(); 489 auto printable_size = printable.GetSize(); 490 if (!printable_bytes || !next_data) 491 { 492 // GetPrintable() failed on us - print one byte in a desperate resync attempt 493 printable_bytes = data; 494 printable_size = 1; 495 next_data = data+1; 496 } 497 for (int c = 0; c < printable_size; c++) 498 options.GetStream()->Printf("%c", *(printable_bytes+c)); 499 data = (uint8_t*)next_data; 500 } 501 else 502 { 503 options.GetStream()->Printf("%c",*data); 504 data++; 505 } 506 } 507 508 if (quote != 0) 509 options.GetStream()->Printf("%c",quote); 510 511 return true; 512 } 513 514 template<typename SourceDataType> 515 static bool 516 ReadUTFBufferAndDumpToStream (const ReadStringAndDumpToStreamOptions& options, 517 ConversionResult (*ConvertFunction) (const SourceDataType**, 518 const SourceDataType*, 519 UTF8**, 520 UTF8*, 521 ConversionFlags)) 522 { 523 assert(options.GetStream() && "need a Stream to print the string to"); 524 525 if (options.GetLocation() == 0 || options.GetLocation() == LLDB_INVALID_ADDRESS) 526 return false; 527 528 lldb::ProcessSP process_sp(options.GetProcessSP()); 529 530 if (!process_sp) 531 return false; 532 533 const int type_width = sizeof(SourceDataType); 534 const int origin_encoding = 8 * type_width ; 535 if (origin_encoding != 8 && origin_encoding != 16 && origin_encoding != 32) 536 return false; 537 // if not UTF8, I need a conversion function to return proper UTF8 538 if (origin_encoding != 8 && !ConvertFunction) 539 return false; 540 541 if (!options.GetStream()) 542 return false; 543 544 uint32_t sourceSize = options.GetSourceSize(); 545 bool needs_zero_terminator = options.GetNeedsZeroTermination(); 546 547 if (!sourceSize) 548 { 549 sourceSize = process_sp->GetTarget().GetMaximumSizeOfStringSummary(); 550 needs_zero_terminator = true; 551 } 552 else 553 sourceSize = std::min(sourceSize,process_sp->GetTarget().GetMaximumSizeOfStringSummary()); 554 555 const int bufferSPSize = sourceSize * type_width; 556 557 lldb::DataBufferSP buffer_sp(new DataBufferHeap(bufferSPSize,0)); 558 559 if (!buffer_sp->GetBytes()) 560 return false; 561 562 Error error; 563 char *buffer = reinterpret_cast<char *>(buffer_sp->GetBytes()); 564 565 size_t data_read = 0; 566 if (needs_zero_terminator) 567 data_read = process_sp->ReadStringFromMemory(options.GetLocation(), buffer, bufferSPSize, error, type_width); 568 else 569 data_read = process_sp->ReadMemoryFromInferior(options.GetLocation(), (char*)buffer_sp->GetBytes(), bufferSPSize, error); 570 571 if (error.Fail()) 572 { 573 options.GetStream()->Printf("unable to read data"); 574 return true; 575 } 576 577 DataExtractor data(buffer_sp, process_sp->GetByteOrder(), process_sp->GetAddressByteSize()); 578 579 return DumpUTFBufferToStream(ConvertFunction, data, *options.GetStream(), options.GetPrefixToken(), options.GetQuote(), sourceSize, options.GetEscapeNonPrintables()); 580 } 581 582 template <> 583 bool 584 ReadStringAndDumpToStream<StringElementType::UTF8> (ReadStringAndDumpToStreamOptions options) 585 { 586 return ReadUTFBufferAndDumpToStream<UTF8>(options, 587 nullptr); 588 } 589 590 template <> 591 bool 592 ReadStringAndDumpToStream<StringElementType::UTF16> (ReadStringAndDumpToStreamOptions options) 593 { 594 return ReadUTFBufferAndDumpToStream<UTF16>(options, 595 ConvertUTF16toUTF8); 596 } 597 598 template <> 599 bool 600 ReadStringAndDumpToStream<StringElementType::UTF32> (ReadStringAndDumpToStreamOptions options) 601 { 602 return ReadUTFBufferAndDumpToStream<UTF32>(options, 603 ConvertUTF32toUTF8); 604 } 605 606 template <> 607 bool 608 ReadBufferAndDumpToStream<StringElementType::UTF8> (ReadBufferAndDumpToStreamOptions options) 609 { 610 assert(options.GetStream() && "need a Stream to print the string to"); 611 612 return DumpUTFBufferToStream<UTF8>(nullptr, options.GetData(), *options.GetStream(), options.GetPrefixToken(), options.GetQuote(), options.GetSourceSize(), options.GetEscapeNonPrintables()); 613 } 614 615 template <> 616 bool 617 ReadBufferAndDumpToStream<StringElementType::ASCII> (ReadBufferAndDumpToStreamOptions options) 618 { 619 // treat ASCII the same as UTF8 620 // FIXME: can we optimize ASCII some more? 621 return ReadBufferAndDumpToStream<StringElementType::UTF8>(options); 622 } 623 624 template <> 625 bool 626 ReadBufferAndDumpToStream<StringElementType::UTF16> (ReadBufferAndDumpToStreamOptions options) 627 { 628 assert(options.GetStream() && "need a Stream to print the string to"); 629 630 return DumpUTFBufferToStream(ConvertUTF16toUTF8, options.GetData(), *options.GetStream(), options.GetPrefixToken(), options.GetQuote(), options.GetSourceSize(), options.GetEscapeNonPrintables()); 631 } 632 633 template <> 634 bool 635 ReadBufferAndDumpToStream<StringElementType::UTF32> (ReadBufferAndDumpToStreamOptions options) 636 { 637 assert(options.GetStream() && "need a Stream to print the string to"); 638 639 return DumpUTFBufferToStream(ConvertUTF32toUTF8, options.GetData(), *options.GetStream(), options.GetPrefixToken(), options.GetQuote(), options.GetSourceSize(), options.GetEscapeNonPrintables()); 640 } 641 642 } // namespace formatters 643 644 } // namespace lldb_private 645