1 //===-- StringPrinter.cpp ----------------------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "lldb/DataFormatters/StringPrinter.h" 11 12 #include "lldb/Core/DataExtractor.h" 13 #include "lldb/Core/Debugger.h" 14 #include "lldb/Core/Error.h" 15 #include "lldb/Core/ValueObject.h" 16 #include "lldb/Target/Process.h" 17 #include "lldb/Target/Target.h" 18 19 #include "llvm/Support/ConvertUTF.h" 20 21 #include <ctype.h> 22 #include <functional> 23 #include <locale> 24 25 using namespace lldb; 26 using namespace lldb_private; 27 using namespace lldb_private::formatters; 28 29 // I can't use a std::unique_ptr for this because the Deleter is a template argument there 30 // and I want the same type to represent both pointers I want to free and pointers I don't need 31 // to free - which is what this class essentially is 32 // It's very specialized to the needs of this file, and not suggested for general use 33 template <typename T = uint8_t, typename U = char, typename S = size_t> 34 struct StringPrinterBufferPointer 35 { 36 public: 37 38 typedef std::function<void(const T*)> Deleter; 39 40 StringPrinterBufferPointer (std::nullptr_t ptr) : 41 m_data(nullptr), 42 m_size(0), 43 m_deleter() 44 {} 45 46 StringPrinterBufferPointer(const T* bytes, S size, Deleter deleter = nullptr) : 47 m_data(bytes), 48 m_size(size), 49 m_deleter(deleter) 50 {} 51 52 StringPrinterBufferPointer(const U* bytes, S size, Deleter deleter = nullptr) : 53 m_data((T*)bytes), 54 m_size(size), 55 m_deleter(deleter) 56 {} 57 58 StringPrinterBufferPointer(StringPrinterBufferPointer&& rhs) : 59 m_data(rhs.m_data), 60 m_size(rhs.m_size), 61 m_deleter(rhs.m_deleter) 62 { 63 rhs.m_data = nullptr; 64 } 65 66 StringPrinterBufferPointer(const StringPrinterBufferPointer& rhs) : 67 m_data(rhs.m_data), 68 m_size(rhs.m_size), 69 m_deleter(rhs.m_deleter) 70 { 71 rhs.m_data = nullptr; // this is why m_data has to be mutable 72 } 73 74 const T* 75 GetBytes () const 76 { 77 return m_data; 78 } 79 80 const S 81 GetSize () const 82 { 83 return m_size; 84 } 85 86 ~StringPrinterBufferPointer () 87 { 88 if (m_data && m_deleter) 89 m_deleter(m_data); 90 m_data = nullptr; 91 } 92 93 StringPrinterBufferPointer& 94 operator = (const StringPrinterBufferPointer& rhs) 95 { 96 if (m_data && m_deleter) 97 m_deleter(m_data); 98 m_data = rhs.m_data; 99 m_size = rhs.m_size; 100 m_deleter = rhs.m_deleter; 101 rhs.m_data = nullptr; 102 return *this; 103 } 104 105 private: 106 mutable const T* m_data; 107 size_t m_size; 108 Deleter m_deleter; 109 }; 110 111 // we define this for all values of type but only implement it for those we care about 112 // that's good because we get linker errors for any unsupported type 113 template <StringElementType type> 114 static StringPrinterBufferPointer<> 115 GetPrintableImpl(uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next); 116 117 // mimic isprint() for Unicode codepoints 118 static bool 119 isprint(char32_t codepoint) 120 { 121 if (codepoint <= 0x1F || codepoint == 0x7F) // C0 122 { 123 return false; 124 } 125 if (codepoint >= 0x80 && codepoint <= 0x9F) // C1 126 { 127 return false; 128 } 129 if (codepoint == 0x2028 || codepoint == 0x2029) // line/paragraph separators 130 { 131 return false; 132 } 133 if (codepoint == 0x200E || codepoint == 0x200F || (codepoint >= 0x202A && codepoint <= 0x202E)) // bidirectional text control 134 { 135 return false; 136 } 137 if (codepoint >= 0xFFF9 && codepoint <= 0xFFFF) // interlinears and generally specials 138 { 139 return false; 140 } 141 return true; 142 } 143 144 template <> 145 StringPrinterBufferPointer<> 146 GetPrintableImpl<StringElementType::ASCII> (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next) 147 { 148 StringPrinterBufferPointer<> retval = {nullptr}; 149 150 switch (*buffer) 151 { 152 case 0: 153 retval = {"\\0",2}; 154 break; 155 case '\a': 156 retval = {"\\a",2}; 157 break; 158 case '\b': 159 retval = {"\\b",2}; 160 break; 161 case '\f': 162 retval = {"\\f",2}; 163 break; 164 case '\n': 165 retval = {"\\n",2}; 166 break; 167 case '\r': 168 retval = {"\\r",2}; 169 break; 170 case '\t': 171 retval = {"\\t",2}; 172 break; 173 case '\v': 174 retval = {"\\v",2}; 175 break; 176 case '\"': 177 retval = {"\\\"",2}; 178 break; 179 case '\\': 180 retval = {"\\\\",2}; 181 break; 182 default: 183 if (isprint(*buffer)) 184 retval = {buffer,1}; 185 else 186 { 187 uint8_t* data = new uint8_t[5]; 188 sprintf((char*)data,"\\x%02x",*buffer); 189 retval = {data, 4, [] (const uint8_t* c) {delete[] c;} }; 190 break; 191 } 192 } 193 194 next = buffer + 1; 195 return retval; 196 } 197 198 static char32_t 199 ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1) 200 { 201 return (c0-192)*64+(c1-128); 202 } 203 static char32_t 204 ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1, unsigned char c2) 205 { 206 return (c0-224)*4096+(c1-128)*64+(c2-128); 207 } 208 static char32_t 209 ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1, unsigned char c2, unsigned char c3) 210 { 211 return (c0-240)*262144+(c2-128)*4096+(c2-128)*64+(c3-128); 212 } 213 214 template <> 215 StringPrinterBufferPointer<> 216 GetPrintableImpl<StringElementType::UTF8> (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next) 217 { 218 StringPrinterBufferPointer<> retval {nullptr}; 219 220 unsigned utf8_encoded_len = getNumBytesForUTF8(*buffer); 221 222 if (1+buffer_end-buffer < utf8_encoded_len) 223 { 224 // I don't have enough bytes - print whatever I have left 225 retval = {buffer,static_cast<size_t>(1+buffer_end-buffer)}; 226 next = buffer_end+1; 227 return retval; 228 } 229 230 char32_t codepoint = 0; 231 switch (utf8_encoded_len) 232 { 233 case 1: 234 // this is just an ASCII byte - ask ASCII 235 return GetPrintableImpl<StringElementType::ASCII>(buffer, buffer_end, next); 236 case 2: 237 codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1)); 238 break; 239 case 3: 240 codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1), (unsigned char)*(buffer+2)); 241 break; 242 case 4: 243 codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1), (unsigned char)*(buffer+2), (unsigned char)*(buffer+3)); 244 break; 245 default: 246 // this is probably some bogus non-character thing 247 // just print it as-is and hope to sync up again soon 248 retval = {buffer,1}; 249 next = buffer+1; 250 return retval; 251 } 252 253 if (codepoint) 254 { 255 switch (codepoint) 256 { 257 case 0: 258 retval = {"\\0",2}; 259 break; 260 case '\a': 261 retval = {"\\a",2}; 262 break; 263 case '\b': 264 retval = {"\\b",2}; 265 break; 266 case '\f': 267 retval = {"\\f",2}; 268 break; 269 case '\n': 270 retval = {"\\n",2}; 271 break; 272 case '\r': 273 retval = {"\\r",2}; 274 break; 275 case '\t': 276 retval = {"\\t",2}; 277 break; 278 case '\v': 279 retval = {"\\v",2}; 280 break; 281 case '\"': 282 retval = {"\\\"",2}; 283 break; 284 case '\\': 285 retval = {"\\\\",2}; 286 break; 287 default: 288 if (isprint(codepoint)) 289 retval = {buffer,utf8_encoded_len}; 290 else 291 { 292 uint8_t* data = new uint8_t[11]; 293 sprintf((char*)data,"\\U%08x",codepoint); 294 retval = { data,10,[] (const uint8_t* c) {delete[] c;} }; 295 break; 296 } 297 } 298 299 next = buffer + utf8_encoded_len; 300 return retval; 301 } 302 303 // this should not happen - but just in case.. try to resync at some point 304 retval = {buffer,1}; 305 next = buffer+1; 306 return retval; 307 } 308 309 // Given a sequence of bytes, this function returns: 310 // a sequence of bytes to actually print out + a length 311 // the following unscanned position of the buffer is in next 312 static StringPrinterBufferPointer<> 313 GetPrintable(StringElementType type, uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next) 314 { 315 if (!buffer) 316 return {nullptr}; 317 318 switch (type) 319 { 320 case StringElementType::ASCII: 321 return GetPrintableImpl<StringElementType::ASCII>(buffer, buffer_end, next); 322 case StringElementType::UTF8: 323 return GetPrintableImpl<StringElementType::UTF8>(buffer, buffer_end, next); 324 default: 325 return {nullptr}; 326 } 327 } 328 329 // use this call if you already have an LLDB-side buffer for the data 330 template<typename SourceDataType> 331 static bool 332 DumpUTFBufferToStream (ConversionResult (*ConvertFunction) (const SourceDataType**, 333 const SourceDataType*, 334 UTF8**, 335 UTF8*, 336 ConversionFlags), 337 const DataExtractor& data, 338 Stream& stream, 339 char prefix_token, 340 char quote, 341 uint32_t sourceSize, 342 bool escapeNonPrintables) 343 { 344 if (prefix_token != 0) 345 stream.Printf("%c",prefix_token); 346 if (quote != 0) 347 stream.Printf("%c",quote); 348 if (data.GetByteSize() && data.GetDataStart() && data.GetDataEnd()) 349 { 350 const int bufferSPSize = data.GetByteSize(); 351 if (sourceSize == 0) 352 { 353 const int origin_encoding = 8*sizeof(SourceDataType); 354 sourceSize = bufferSPSize/(origin_encoding / 4); 355 } 356 357 const SourceDataType *data_ptr = (const SourceDataType*)data.GetDataStart(); 358 const SourceDataType *data_end_ptr = data_ptr + sourceSize; 359 360 while (data_ptr < data_end_ptr) 361 { 362 if (!*data_ptr) 363 { 364 data_end_ptr = data_ptr; 365 break; 366 } 367 data_ptr++; 368 } 369 370 data_ptr = (const SourceDataType*)data.GetDataStart(); 371 372 lldb::DataBufferSP utf8_data_buffer_sp; 373 UTF8* utf8_data_ptr = nullptr; 374 UTF8* utf8_data_end_ptr = nullptr; 375 376 if (ConvertFunction) 377 { 378 utf8_data_buffer_sp.reset(new DataBufferHeap(4*bufferSPSize,0)); 379 utf8_data_ptr = (UTF8*)utf8_data_buffer_sp->GetBytes(); 380 utf8_data_end_ptr = utf8_data_ptr + utf8_data_buffer_sp->GetByteSize(); 381 ConvertFunction ( &data_ptr, data_end_ptr, &utf8_data_ptr, utf8_data_end_ptr, lenientConversion ); 382 utf8_data_ptr = (UTF8*)utf8_data_buffer_sp->GetBytes(); // needed because the ConvertFunction will change the value of the data_ptr 383 } 384 else 385 { 386 // just copy the pointers - the cast is necessary to make the compiler happy 387 // but this should only happen if we are reading UTF8 data 388 utf8_data_ptr = (UTF8*)data_ptr; 389 utf8_data_end_ptr = (UTF8*)data_end_ptr; 390 } 391 392 // since we tend to accept partial data (and even partially malformed data) 393 // we might end up with no NULL terminator before the end_ptr 394 // hence we need to take a slower route and ensure we stay within boundaries 395 for (;utf8_data_ptr < utf8_data_end_ptr;) 396 { 397 if (!*utf8_data_ptr) 398 break; 399 400 if (escapeNonPrintables) 401 { 402 uint8_t* next_data = nullptr; 403 auto printable = GetPrintable(StringElementType::UTF8, utf8_data_ptr, utf8_data_end_ptr, next_data); 404 auto printable_bytes = printable.GetBytes(); 405 auto printable_size = printable.GetSize(); 406 if (!printable_bytes || !next_data) 407 { 408 // GetPrintable() failed on us - print one byte in a desperate resync attempt 409 printable_bytes = utf8_data_ptr; 410 printable_size = 1; 411 next_data = utf8_data_ptr+1; 412 } 413 for (unsigned c = 0; c < printable_size; c++) 414 stream.Printf("%c", *(printable_bytes+c)); 415 utf8_data_ptr = (uint8_t*)next_data; 416 } 417 else 418 { 419 stream.Printf("%c",*utf8_data_ptr); 420 utf8_data_ptr++; 421 } 422 } 423 } 424 if (quote != 0) 425 stream.Printf("%c",quote); 426 return true; 427 } 428 429 lldb_private::formatters::ReadStringAndDumpToStreamOptions::ReadStringAndDumpToStreamOptions (ValueObject& valobj) : 430 ReadStringAndDumpToStreamOptions() 431 { 432 SetEscapeNonPrintables(valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables()); 433 } 434 435 lldb_private::formatters::ReadBufferAndDumpToStreamOptions::ReadBufferAndDumpToStreamOptions (ValueObject& valobj) : 436 ReadBufferAndDumpToStreamOptions() 437 { 438 SetEscapeNonPrintables(valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables()); 439 } 440 441 442 namespace lldb_private 443 { 444 445 namespace formatters 446 { 447 448 template <> 449 bool 450 ReadStringAndDumpToStream<StringElementType::ASCII> (ReadStringAndDumpToStreamOptions options) 451 { 452 assert(options.GetStream() && "need a Stream to print the string to"); 453 Error my_error; 454 455 ProcessSP process_sp(options.GetProcessSP()); 456 457 if (process_sp.get() == nullptr || options.GetLocation() == 0) 458 return false; 459 460 size_t size; 461 462 if (options.GetSourceSize() == 0) 463 size = process_sp->GetTarget().GetMaximumSizeOfStringSummary(); 464 else if (!options.GetIgnoreMaxLength()) 465 size = std::min(options.GetSourceSize(),process_sp->GetTarget().GetMaximumSizeOfStringSummary()); 466 else 467 size = options.GetSourceSize(); 468 469 lldb::DataBufferSP buffer_sp(new DataBufferHeap(size,0)); 470 471 process_sp->ReadCStringFromMemory(options.GetLocation(), (char*)buffer_sp->GetBytes(), size, my_error); 472 473 if (my_error.Fail()) 474 return false; 475 476 char prefix_token = options.GetPrefixToken(); 477 char quote = options.GetQuote(); 478 479 if (prefix_token != 0) 480 options.GetStream()->Printf("%c%c",prefix_token,quote); 481 else if (quote != 0) 482 options.GetStream()->Printf("%c",quote); 483 484 uint8_t* data_end = buffer_sp->GetBytes()+buffer_sp->GetByteSize(); 485 486 // since we tend to accept partial data (and even partially malformed data) 487 // we might end up with no NULL terminator before the end_ptr 488 // hence we need to take a slower route and ensure we stay within boundaries 489 for (uint8_t* data = buffer_sp->GetBytes(); *data && (data < data_end);) 490 { 491 if (options.GetEscapeNonPrintables()) 492 { 493 uint8_t* next_data = nullptr; 494 auto printable = GetPrintable(StringElementType::ASCII, data, data_end, next_data); 495 auto printable_bytes = printable.GetBytes(); 496 auto printable_size = printable.GetSize(); 497 if (!printable_bytes || !next_data) 498 { 499 // GetPrintable() failed on us - print one byte in a desperate resync attempt 500 printable_bytes = data; 501 printable_size = 1; 502 next_data = data+1; 503 } 504 for (unsigned c = 0; c < printable_size; c++) 505 options.GetStream()->Printf("%c", *(printable_bytes+c)); 506 data = (uint8_t*)next_data; 507 } 508 else 509 { 510 options.GetStream()->Printf("%c",*data); 511 data++; 512 } 513 } 514 515 if (quote != 0) 516 options.GetStream()->Printf("%c",quote); 517 518 return true; 519 } 520 521 template<typename SourceDataType> 522 static bool 523 ReadUTFBufferAndDumpToStream (const ReadStringAndDumpToStreamOptions& options, 524 ConversionResult (*ConvertFunction) (const SourceDataType**, 525 const SourceDataType*, 526 UTF8**, 527 UTF8*, 528 ConversionFlags)) 529 { 530 assert(options.GetStream() && "need a Stream to print the string to"); 531 532 if (options.GetLocation() == 0 || options.GetLocation() == LLDB_INVALID_ADDRESS) 533 return false; 534 535 lldb::ProcessSP process_sp(options.GetProcessSP()); 536 537 if (!process_sp) 538 return false; 539 540 const int type_width = sizeof(SourceDataType); 541 const int origin_encoding = 8 * type_width ; 542 if (origin_encoding != 8 && origin_encoding != 16 && origin_encoding != 32) 543 return false; 544 // if not UTF8, I need a conversion function to return proper UTF8 545 if (origin_encoding != 8 && !ConvertFunction) 546 return false; 547 548 if (!options.GetStream()) 549 return false; 550 551 uint32_t sourceSize = options.GetSourceSize(); 552 bool needs_zero_terminator = options.GetNeedsZeroTermination(); 553 554 if (!sourceSize) 555 { 556 sourceSize = process_sp->GetTarget().GetMaximumSizeOfStringSummary(); 557 needs_zero_terminator = true; 558 } 559 else if (!options.GetIgnoreMaxLength()) 560 sourceSize = std::min(sourceSize,process_sp->GetTarget().GetMaximumSizeOfStringSummary()); 561 562 const int bufferSPSize = sourceSize * type_width; 563 564 lldb::DataBufferSP buffer_sp(new DataBufferHeap(bufferSPSize,0)); 565 566 if (!buffer_sp->GetBytes()) 567 return false; 568 569 Error error; 570 char *buffer = reinterpret_cast<char *>(buffer_sp->GetBytes()); 571 572 if (needs_zero_terminator) 573 process_sp->ReadStringFromMemory(options.GetLocation(), buffer, bufferSPSize, error, type_width); 574 else 575 process_sp->ReadMemoryFromInferior(options.GetLocation(), (char*)buffer_sp->GetBytes(), bufferSPSize, error); 576 577 if (error.Fail()) 578 { 579 options.GetStream()->Printf("unable to read data"); 580 return true; 581 } 582 583 DataExtractor data(buffer_sp, process_sp->GetByteOrder(), process_sp->GetAddressByteSize()); 584 585 return DumpUTFBufferToStream(ConvertFunction, data, *options.GetStream(), options.GetPrefixToken(), options.GetQuote(), sourceSize, options.GetEscapeNonPrintables()); 586 } 587 588 template <> 589 bool 590 ReadStringAndDumpToStream<StringElementType::UTF8> (ReadStringAndDumpToStreamOptions options) 591 { 592 return ReadUTFBufferAndDumpToStream<UTF8>(options, 593 nullptr); 594 } 595 596 template <> 597 bool 598 ReadStringAndDumpToStream<StringElementType::UTF16> (ReadStringAndDumpToStreamOptions options) 599 { 600 return ReadUTFBufferAndDumpToStream<UTF16>(options, 601 ConvertUTF16toUTF8); 602 } 603 604 template <> 605 bool 606 ReadStringAndDumpToStream<StringElementType::UTF32> (ReadStringAndDumpToStreamOptions options) 607 { 608 return ReadUTFBufferAndDumpToStream<UTF32>(options, 609 ConvertUTF32toUTF8); 610 } 611 612 template <> 613 bool 614 ReadBufferAndDumpToStream<StringElementType::UTF8> (ReadBufferAndDumpToStreamOptions options) 615 { 616 assert(options.GetStream() && "need a Stream to print the string to"); 617 618 return DumpUTFBufferToStream<UTF8>(nullptr, options.GetData(), *options.GetStream(), options.GetPrefixToken(), options.GetQuote(), options.GetSourceSize(), options.GetEscapeNonPrintables()); 619 } 620 621 template <> 622 bool 623 ReadBufferAndDumpToStream<StringElementType::ASCII> (ReadBufferAndDumpToStreamOptions options) 624 { 625 // treat ASCII the same as UTF8 626 // FIXME: can we optimize ASCII some more? 627 return ReadBufferAndDumpToStream<StringElementType::UTF8>(options); 628 } 629 630 template <> 631 bool 632 ReadBufferAndDumpToStream<StringElementType::UTF16> (ReadBufferAndDumpToStreamOptions options) 633 { 634 assert(options.GetStream() && "need a Stream to print the string to"); 635 636 return DumpUTFBufferToStream(ConvertUTF16toUTF8, options.GetData(), *options.GetStream(), options.GetPrefixToken(), options.GetQuote(), options.GetSourceSize(), options.GetEscapeNonPrintables()); 637 } 638 639 template <> 640 bool 641 ReadBufferAndDumpToStream<StringElementType::UTF32> (ReadBufferAndDumpToStreamOptions options) 642 { 643 assert(options.GetStream() && "need a Stream to print the string to"); 644 645 return DumpUTFBufferToStream(ConvertUTF32toUTF8, options.GetData(), *options.GetStream(), options.GetPrefixToken(), options.GetQuote(), options.GetSourceSize(), options.GetEscapeNonPrintables()); 646 } 647 648 } // namespace formatters 649 650 } // namespace lldb_private 651