1 //===-- StringPrinter.cpp ----------------------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "lldb/DataFormatters/StringPrinter.h" 11 12 #include "lldb/Core/DataExtractor.h" 13 #include "lldb/Core/Debugger.h" 14 #include "lldb/Core/Error.h" 15 #include "lldb/Core/ValueObject.h" 16 #include "lldb/Target/Language.h" 17 #include "lldb/Target/Process.h" 18 #include "lldb/Target/Target.h" 19 20 #include "llvm/Support/ConvertUTF.h" 21 22 #include <ctype.h> 23 #include <locale> 24 25 using namespace lldb; 26 using namespace lldb_private; 27 using namespace lldb_private::formatters; 28 29 // we define this for all values of type but only implement it for those we care about 30 // that's good because we get linker errors for any unsupported type 31 template <lldb_private::formatters::StringPrinter::StringElementType type> 32 static StringPrinter::StringPrinterBufferPointer<> 33 GetPrintableImpl(uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next); 34 35 // mimic isprint() for Unicode codepoints 36 static bool 37 isprint(char32_t codepoint) 38 { 39 if (codepoint <= 0x1F || codepoint == 0x7F) // C0 40 { 41 return false; 42 } 43 if (codepoint >= 0x80 && codepoint <= 0x9F) // C1 44 { 45 return false; 46 } 47 if (codepoint == 0x2028 || codepoint == 0x2029) // line/paragraph separators 48 { 49 return false; 50 } 51 if (codepoint == 0x200E || codepoint == 0x200F || (codepoint >= 0x202A && codepoint <= 0x202E)) // bidirectional text control 52 { 53 return false; 54 } 55 if (codepoint >= 0xFFF9 && codepoint <= 0xFFFF) // interlinears and generally specials 56 { 57 return false; 58 } 59 return true; 60 } 61 62 template <> 63 StringPrinter::StringPrinterBufferPointer<> 64 GetPrintableImpl<StringPrinter::StringElementType::ASCII> (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next) 65 { 66 StringPrinter::StringPrinterBufferPointer<> retval = {nullptr}; 67 68 switch (*buffer) 69 { 70 case 0: 71 retval = {"\\0",2}; 72 break; 73 case '\a': 74 retval = {"\\a",2}; 75 break; 76 case '\b': 77 retval = {"\\b",2}; 78 break; 79 case '\f': 80 retval = {"\\f",2}; 81 break; 82 case '\n': 83 retval = {"\\n",2}; 84 break; 85 case '\r': 86 retval = {"\\r",2}; 87 break; 88 case '\t': 89 retval = {"\\t",2}; 90 break; 91 case '\v': 92 retval = {"\\v",2}; 93 break; 94 case '\"': 95 retval = {"\\\"",2}; 96 break; 97 case '\\': 98 retval = {"\\\\",2}; 99 break; 100 default: 101 if (isprint(*buffer)) 102 retval = {buffer,1}; 103 else 104 { 105 uint8_t* data = new uint8_t[5]; 106 sprintf((char*)data,"\\x%02x",*buffer); 107 retval = {data, 4, [] (const uint8_t* c) {delete[] c;} }; 108 break; 109 } 110 } 111 112 next = buffer + 1; 113 return retval; 114 } 115 116 static char32_t 117 ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1) 118 { 119 return (c0-192)*64+(c1-128); 120 } 121 static char32_t 122 ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1, unsigned char c2) 123 { 124 return (c0-224)*4096+(c1-128)*64+(c2-128); 125 } 126 static char32_t 127 ConvertUTF8ToCodePoint (unsigned char c0, unsigned char c1, unsigned char c2, unsigned char c3) 128 { 129 return (c0-240)*262144+(c2-128)*4096+(c2-128)*64+(c3-128); 130 } 131 132 template <> 133 StringPrinter::StringPrinterBufferPointer<> 134 GetPrintableImpl<StringPrinter::StringElementType::UTF8> (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next) 135 { 136 StringPrinter::StringPrinterBufferPointer<> retval {nullptr}; 137 138 unsigned utf8_encoded_len = getNumBytesForUTF8(*buffer); 139 140 if (1+buffer_end-buffer < utf8_encoded_len) 141 { 142 // I don't have enough bytes - print whatever I have left 143 retval = {buffer,static_cast<size_t>(1+buffer_end-buffer)}; 144 next = buffer_end+1; 145 return retval; 146 } 147 148 char32_t codepoint = 0; 149 switch (utf8_encoded_len) 150 { 151 case 1: 152 // this is just an ASCII byte - ask ASCII 153 return GetPrintableImpl<StringPrinter::StringElementType::ASCII>(buffer, buffer_end, next); 154 case 2: 155 codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1)); 156 break; 157 case 3: 158 codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1), (unsigned char)*(buffer+2)); 159 break; 160 case 4: 161 codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer+1), (unsigned char)*(buffer+2), (unsigned char)*(buffer+3)); 162 break; 163 default: 164 // this is probably some bogus non-character thing 165 // just print it as-is and hope to sync up again soon 166 retval = {buffer,1}; 167 next = buffer+1; 168 return retval; 169 } 170 171 if (codepoint) 172 { 173 switch (codepoint) 174 { 175 case 0: 176 retval = {"\\0",2}; 177 break; 178 case '\a': 179 retval = {"\\a",2}; 180 break; 181 case '\b': 182 retval = {"\\b",2}; 183 break; 184 case '\f': 185 retval = {"\\f",2}; 186 break; 187 case '\n': 188 retval = {"\\n",2}; 189 break; 190 case '\r': 191 retval = {"\\r",2}; 192 break; 193 case '\t': 194 retval = {"\\t",2}; 195 break; 196 case '\v': 197 retval = {"\\v",2}; 198 break; 199 case '\"': 200 retval = {"\\\"",2}; 201 break; 202 case '\\': 203 retval = {"\\\\",2}; 204 break; 205 default: 206 if (isprint(codepoint)) 207 retval = {buffer,utf8_encoded_len}; 208 else 209 { 210 uint8_t* data = new uint8_t[11]; 211 sprintf((char*)data,"\\U%08x",codepoint); 212 retval = { data,10,[] (const uint8_t* c) {delete[] c;} }; 213 break; 214 } 215 } 216 217 next = buffer + utf8_encoded_len; 218 return retval; 219 } 220 221 // this should not happen - but just in case.. try to resync at some point 222 retval = {buffer,1}; 223 next = buffer+1; 224 return retval; 225 } 226 227 // Given a sequence of bytes, this function returns: 228 // a sequence of bytes to actually print out + a length 229 // the following unscanned position of the buffer is in next 230 static StringPrinter::StringPrinterBufferPointer<> 231 GetPrintable(StringPrinter::StringElementType type, uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next) 232 { 233 if (!buffer) 234 return {nullptr}; 235 236 switch (type) 237 { 238 case StringPrinter::StringElementType::ASCII: 239 return GetPrintableImpl<StringPrinter::StringElementType::ASCII>(buffer, buffer_end, next); 240 case StringPrinter::StringElementType::UTF8: 241 return GetPrintableImpl<StringPrinter::StringElementType::UTF8>(buffer, buffer_end, next); 242 default: 243 return {nullptr}; 244 } 245 } 246 247 StringPrinter::EscapingHelper 248 StringPrinter::GetDefaultEscapingHelper (GetPrintableElementType elem_type) 249 { 250 switch (elem_type) 251 { 252 case GetPrintableElementType::UTF8: 253 return [] (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next) -> StringPrinter::StringPrinterBufferPointer<> { 254 return GetPrintable(StringPrinter::StringElementType::UTF8, buffer, buffer_end, next); 255 }; 256 case GetPrintableElementType::ASCII: 257 return [] (uint8_t* buffer, uint8_t* buffer_end, uint8_t*& next) -> StringPrinter::StringPrinterBufferPointer<> { 258 return GetPrintable(StringPrinter::StringElementType::ASCII, buffer, buffer_end, next); 259 }; 260 } 261 } 262 263 // use this call if you already have an LLDB-side buffer for the data 264 template<typename SourceDataType> 265 static bool 266 DumpUTFBufferToStream (ConversionResult (*ConvertFunction) (const SourceDataType**, 267 const SourceDataType*, 268 UTF8**, 269 UTF8*, 270 ConversionFlags), 271 const StringPrinter::ReadBufferAndDumpToStreamOptions& dump_options) 272 { 273 Stream &stream(*dump_options.GetStream()); 274 if (dump_options.GetPrefixToken() != 0) 275 stream.Printf("%c",dump_options.GetPrefixToken()); 276 if (dump_options.GetQuote() != 0) 277 stream.Printf("%c",dump_options.GetQuote()); 278 auto data(dump_options.GetData()); 279 auto source_size(dump_options.GetSourceSize()); 280 if (data.GetByteSize() && data.GetDataStart() && data.GetDataEnd()) 281 { 282 const int bufferSPSize = data.GetByteSize(); 283 if (dump_options.GetSourceSize() == 0) 284 { 285 const int origin_encoding = 8*sizeof(SourceDataType); 286 source_size = bufferSPSize/(origin_encoding / 4); 287 } 288 289 const SourceDataType *data_ptr = (const SourceDataType*)data.GetDataStart(); 290 const SourceDataType *data_end_ptr = data_ptr + source_size; 291 292 const bool zero_is_terminator = dump_options.GetBinaryZeroIsTerminator(); 293 294 if (zero_is_terminator) 295 { 296 while (data_ptr < data_end_ptr) 297 { 298 if (!*data_ptr) 299 { 300 data_end_ptr = data_ptr; 301 break; 302 } 303 data_ptr++; 304 } 305 306 data_ptr = (const SourceDataType*)data.GetDataStart(); 307 } 308 309 lldb::DataBufferSP utf8_data_buffer_sp; 310 UTF8* utf8_data_ptr = nullptr; 311 UTF8* utf8_data_end_ptr = nullptr; 312 313 if (ConvertFunction) 314 { 315 utf8_data_buffer_sp.reset(new DataBufferHeap(4*bufferSPSize,0)); 316 utf8_data_ptr = (UTF8*)utf8_data_buffer_sp->GetBytes(); 317 utf8_data_end_ptr = utf8_data_ptr + utf8_data_buffer_sp->GetByteSize(); 318 ConvertFunction ( &data_ptr, data_end_ptr, &utf8_data_ptr, utf8_data_end_ptr, lenientConversion ); 319 if (false == zero_is_terminator) 320 utf8_data_end_ptr = utf8_data_ptr; 321 utf8_data_ptr = (UTF8*)utf8_data_buffer_sp->GetBytes(); // needed because the ConvertFunction will change the value of the data_ptr 322 } 323 else 324 { 325 // just copy the pointers - the cast is necessary to make the compiler happy 326 // but this should only happen if we are reading UTF8 data 327 utf8_data_ptr = (UTF8*)data_ptr; 328 utf8_data_end_ptr = (UTF8*)data_end_ptr; 329 } 330 331 const bool escape_non_printables = dump_options.GetEscapeNonPrintables(); 332 lldb_private::formatters::StringPrinter::EscapingHelper escaping_callback; 333 if (escape_non_printables) 334 { 335 if (Language *language = Language::FindPlugin(dump_options.GetLanguage())) 336 escaping_callback = language->GetStringPrinterEscapingHelper(lldb_private::formatters::StringPrinter::GetPrintableElementType::UTF8); 337 else 338 escaping_callback = lldb_private::formatters::StringPrinter::GetDefaultEscapingHelper(lldb_private::formatters::StringPrinter::GetPrintableElementType::UTF8); 339 } 340 341 // since we tend to accept partial data (and even partially malformed data) 342 // we might end up with no NULL terminator before the end_ptr 343 // hence we need to take a slower route and ensure we stay within boundaries 344 for (;utf8_data_ptr < utf8_data_end_ptr;) 345 { 346 if (zero_is_terminator && !*utf8_data_ptr) 347 break; 348 349 if (escape_non_printables) 350 { 351 uint8_t* next_data = nullptr; 352 auto printable = escaping_callback(utf8_data_ptr, utf8_data_end_ptr, next_data); 353 auto printable_bytes = printable.GetBytes(); 354 auto printable_size = printable.GetSize(); 355 if (!printable_bytes || !next_data) 356 { 357 // GetPrintable() failed on us - print one byte in a desperate resync attempt 358 printable_bytes = utf8_data_ptr; 359 printable_size = 1; 360 next_data = utf8_data_ptr+1; 361 } 362 for (unsigned c = 0; c < printable_size; c++) 363 stream.Printf("%c", *(printable_bytes+c)); 364 utf8_data_ptr = (uint8_t*)next_data; 365 } 366 else 367 { 368 stream.Printf("%c",*utf8_data_ptr); 369 utf8_data_ptr++; 370 } 371 } 372 } 373 if (dump_options.GetQuote() != 0) 374 stream.Printf("%c",dump_options.GetQuote()); 375 return true; 376 } 377 378 lldb_private::formatters::StringPrinter::ReadStringAndDumpToStreamOptions::ReadStringAndDumpToStreamOptions (ValueObject& valobj) : 379 ReadStringAndDumpToStreamOptions() 380 { 381 SetEscapeNonPrintables(valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables()); 382 } 383 384 lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStreamOptions::ReadBufferAndDumpToStreamOptions (ValueObject& valobj) : 385 ReadBufferAndDumpToStreamOptions() 386 { 387 SetEscapeNonPrintables(valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables()); 388 } 389 390 lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStreamOptions::ReadBufferAndDumpToStreamOptions (const ReadStringAndDumpToStreamOptions& options) : 391 ReadBufferAndDumpToStreamOptions() 392 { 393 SetStream(options.GetStream()); 394 SetPrefixToken(options.GetPrefixToken()); 395 SetQuote(options.GetQuote()); 396 SetEscapeNonPrintables(options.GetEscapeNonPrintables()); 397 SetBinaryZeroIsTerminator(options.GetBinaryZeroIsTerminator()); 398 SetLanguage(options.GetLanguage()); 399 } 400 401 402 namespace lldb_private 403 { 404 405 namespace formatters 406 { 407 408 template <> 409 bool 410 StringPrinter::ReadStringAndDumpToStream<StringPrinter::StringElementType::ASCII> (const ReadStringAndDumpToStreamOptions& options) 411 { 412 assert(options.GetStream() && "need a Stream to print the string to"); 413 Error my_error; 414 415 ProcessSP process_sp(options.GetProcessSP()); 416 417 if (process_sp.get() == nullptr || options.GetLocation() == 0) 418 return false; 419 420 size_t size; 421 422 if (options.GetSourceSize() == 0) 423 size = process_sp->GetTarget().GetMaximumSizeOfStringSummary(); 424 else if (!options.GetIgnoreMaxLength()) 425 size = std::min(options.GetSourceSize(),process_sp->GetTarget().GetMaximumSizeOfStringSummary()); 426 else 427 size = options.GetSourceSize(); 428 429 lldb::DataBufferSP buffer_sp(new DataBufferHeap(size,0)); 430 431 process_sp->ReadCStringFromMemory(options.GetLocation(), (char*)buffer_sp->GetBytes(), size, my_error); 432 433 if (my_error.Fail()) 434 return false; 435 436 char prefix_token = options.GetPrefixToken(); 437 char quote = options.GetQuote(); 438 439 if (prefix_token != 0) 440 options.GetStream()->Printf("%c%c",prefix_token,quote); 441 else if (quote != 0) 442 options.GetStream()->Printf("%c",quote); 443 444 uint8_t* data_end = buffer_sp->GetBytes()+buffer_sp->GetByteSize(); 445 446 const bool escape_non_printables = options.GetEscapeNonPrintables(); 447 lldb_private::formatters::StringPrinter::EscapingHelper escaping_callback; 448 if (escape_non_printables) 449 { 450 if (Language *language = Language::FindPlugin(options.GetLanguage())) 451 escaping_callback = language->GetStringPrinterEscapingHelper(lldb_private::formatters::StringPrinter::GetPrintableElementType::ASCII); 452 else 453 escaping_callback = lldb_private::formatters::StringPrinter::GetDefaultEscapingHelper(lldb_private::formatters::StringPrinter::GetPrintableElementType::ASCII); 454 } 455 456 // since we tend to accept partial data (and even partially malformed data) 457 // we might end up with no NULL terminator before the end_ptr 458 // hence we need to take a slower route and ensure we stay within boundaries 459 for (uint8_t* data = buffer_sp->GetBytes(); *data && (data < data_end);) 460 { 461 if (escape_non_printables) 462 { 463 uint8_t* next_data = nullptr; 464 auto printable = escaping_callback(data, data_end, next_data); 465 auto printable_bytes = printable.GetBytes(); 466 auto printable_size = printable.GetSize(); 467 if (!printable_bytes || !next_data) 468 { 469 // GetPrintable() failed on us - print one byte in a desperate resync attempt 470 printable_bytes = data; 471 printable_size = 1; 472 next_data = data+1; 473 } 474 for (unsigned c = 0; c < printable_size; c++) 475 options.GetStream()->Printf("%c", *(printable_bytes+c)); 476 data = (uint8_t*)next_data; 477 } 478 else 479 { 480 options.GetStream()->Printf("%c",*data); 481 data++; 482 } 483 } 484 485 if (quote != 0) 486 options.GetStream()->Printf("%c",quote); 487 488 return true; 489 } 490 491 template<typename SourceDataType> 492 static bool 493 ReadUTFBufferAndDumpToStream (const StringPrinter::ReadStringAndDumpToStreamOptions& options, 494 ConversionResult (*ConvertFunction) (const SourceDataType**, 495 const SourceDataType*, 496 UTF8**, 497 UTF8*, 498 ConversionFlags)) 499 { 500 assert(options.GetStream() && "need a Stream to print the string to"); 501 502 if (options.GetLocation() == 0 || options.GetLocation() == LLDB_INVALID_ADDRESS) 503 return false; 504 505 lldb::ProcessSP process_sp(options.GetProcessSP()); 506 507 if (!process_sp) 508 return false; 509 510 const int type_width = sizeof(SourceDataType); 511 const int origin_encoding = 8 * type_width ; 512 if (origin_encoding != 8 && origin_encoding != 16 && origin_encoding != 32) 513 return false; 514 // if not UTF8, I need a conversion function to return proper UTF8 515 if (origin_encoding != 8 && !ConvertFunction) 516 return false; 517 518 if (!options.GetStream()) 519 return false; 520 521 uint32_t sourceSize = options.GetSourceSize(); 522 bool needs_zero_terminator = options.GetNeedsZeroTermination(); 523 524 if (!sourceSize) 525 { 526 sourceSize = process_sp->GetTarget().GetMaximumSizeOfStringSummary(); 527 needs_zero_terminator = true; 528 } 529 else if (!options.GetIgnoreMaxLength()) 530 sourceSize = std::min(sourceSize,process_sp->GetTarget().GetMaximumSizeOfStringSummary()); 531 532 const int bufferSPSize = sourceSize * type_width; 533 534 lldb::DataBufferSP buffer_sp(new DataBufferHeap(bufferSPSize,0)); 535 536 if (!buffer_sp->GetBytes()) 537 return false; 538 539 Error error; 540 char *buffer = reinterpret_cast<char *>(buffer_sp->GetBytes()); 541 542 if (needs_zero_terminator) 543 process_sp->ReadStringFromMemory(options.GetLocation(), buffer, bufferSPSize, error, type_width); 544 else 545 process_sp->ReadMemoryFromInferior(options.GetLocation(), (char*)buffer_sp->GetBytes(), bufferSPSize, error); 546 547 if (error.Fail()) 548 { 549 options.GetStream()->Printf("unable to read data"); 550 return true; 551 } 552 553 DataExtractor data(buffer_sp, process_sp->GetByteOrder(), process_sp->GetAddressByteSize()); 554 555 StringPrinter::ReadBufferAndDumpToStreamOptions dump_options(options); 556 dump_options.SetData(data); 557 dump_options.SetSourceSize(sourceSize); 558 559 return DumpUTFBufferToStream(ConvertFunction, dump_options); 560 } 561 562 template <> 563 bool 564 StringPrinter::ReadStringAndDumpToStream<StringPrinter::StringElementType::UTF8> (const ReadStringAndDumpToStreamOptions& options) 565 { 566 return ReadUTFBufferAndDumpToStream<UTF8>(options, 567 nullptr); 568 } 569 570 template <> 571 bool 572 StringPrinter::ReadStringAndDumpToStream<StringPrinter::StringElementType::UTF16> (const ReadStringAndDumpToStreamOptions& options) 573 { 574 return ReadUTFBufferAndDumpToStream<UTF16>(options, 575 ConvertUTF16toUTF8); 576 } 577 578 template <> 579 bool 580 StringPrinter::ReadStringAndDumpToStream<StringPrinter::StringElementType::UTF32> (const ReadStringAndDumpToStreamOptions& options) 581 { 582 return ReadUTFBufferAndDumpToStream<UTF32>(options, 583 ConvertUTF32toUTF8); 584 } 585 586 template <> 587 bool 588 StringPrinter::ReadBufferAndDumpToStream<StringPrinter::StringElementType::UTF8> (const ReadBufferAndDumpToStreamOptions& options) 589 { 590 assert(options.GetStream() && "need a Stream to print the string to"); 591 592 return DumpUTFBufferToStream<UTF8>(nullptr, options); 593 } 594 595 template <> 596 bool 597 StringPrinter::ReadBufferAndDumpToStream<StringPrinter::StringElementType::ASCII> (const ReadBufferAndDumpToStreamOptions& options) 598 { 599 // treat ASCII the same as UTF8 600 // FIXME: can we optimize ASCII some more? 601 return ReadBufferAndDumpToStream<StringElementType::UTF8>(options); 602 } 603 604 template <> 605 bool 606 StringPrinter::ReadBufferAndDumpToStream<StringPrinter::StringElementType::UTF16> (const ReadBufferAndDumpToStreamOptions& options) 607 { 608 assert(options.GetStream() && "need a Stream to print the string to"); 609 610 return DumpUTFBufferToStream(ConvertUTF16toUTF8, options); 611 } 612 613 template <> 614 bool 615 StringPrinter::ReadBufferAndDumpToStream<StringPrinter::StringElementType::UTF32> (const ReadBufferAndDumpToStreamOptions& options) 616 { 617 assert(options.GetStream() && "need a Stream to print the string to"); 618 619 return DumpUTFBufferToStream(ConvertUTF32toUTF8, options); 620 } 621 622 } // namespace formatters 623 624 } // namespace lldb_private 625