1 //===-- PythonDataObjects.cpp ------------------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #ifdef LLDB_DISABLE_PYTHON 11 12 // Python is disabled in this build 13 14 #else 15 16 #include "lldb-python.h" 17 #include "PythonDataObjects.h" 18 #include "ScriptInterpreterPython.h" 19 20 #include "lldb/Core/Stream.h" 21 #include "lldb/Host/File.h" 22 #include "lldb/Interpreter/ScriptInterpreter.h" 23 24 #include <stdio.h> 25 26 using namespace lldb_private; 27 using namespace lldb; 28 29 void 30 StructuredPythonObject::Dump(Stream &s) const 31 { 32 s << "Python Obj: 0x" << GetValue(); 33 } 34 35 //---------------------------------------------------------------------- 36 // PythonObject 37 //---------------------------------------------------------------------- 38 39 void 40 PythonObject::Dump(Stream &strm) const 41 { 42 if (m_py_obj) 43 { 44 FILE *file = ::tmpfile(); 45 if (file) 46 { 47 ::PyObject_Print (m_py_obj, file, 0); 48 const long length = ftell (file); 49 if (length) 50 { 51 ::rewind(file); 52 std::vector<char> file_contents (length,'\0'); 53 const size_t length_read = ::fread (file_contents.data(), 1, file_contents.size(), file); 54 if (length_read > 0) 55 strm.Write (file_contents.data(), length_read); 56 } 57 ::fclose (file); 58 } 59 } 60 else 61 strm.PutCString ("NULL"); 62 } 63 64 PyObjectType 65 PythonObject::GetObjectType() const 66 { 67 if (!IsAllocated()) 68 return PyObjectType::None; 69 70 if (PythonModule::Check(m_py_obj)) 71 return PyObjectType::Module; 72 if (PythonList::Check(m_py_obj)) 73 return PyObjectType::List; 74 if (PythonDictionary::Check(m_py_obj)) 75 return PyObjectType::Dictionary; 76 if (PythonString::Check(m_py_obj)) 77 return PyObjectType::String; 78 if (PythonInteger::Check(m_py_obj)) 79 return PyObjectType::Integer; 80 if (PythonFile::Check(m_py_obj)) 81 return PyObjectType::File; 82 return PyObjectType::Unknown; 83 } 84 85 PythonString 86 PythonObject::Repr() const 87 { 88 if (!m_py_obj) 89 return PythonString(); 90 PyObject *repr = PyObject_Repr(m_py_obj); 91 if (!repr) 92 return PythonString(); 93 return PythonString(PyRefType::Owned, repr); 94 } 95 96 PythonString 97 PythonObject::Str() const 98 { 99 if (!m_py_obj) 100 return PythonString(); 101 PyObject *str = PyObject_Str(m_py_obj); 102 if (!str) 103 return PythonString(); 104 return PythonString(PyRefType::Owned, str); 105 } 106 107 PythonObject 108 PythonObject::ResolveNameGlobal(llvm::StringRef name) 109 { 110 return PythonModule::MainModule().ResolveName(name); 111 } 112 113 PythonObject 114 PythonObject::ResolveName(llvm::StringRef name) const 115 { 116 // Resolve the name in the context of the specified object. If, 117 // for example, `this` refers to a PyModule, then this will look for 118 // `name` in this module. If `this` refers to a PyType, then it will 119 // resolve `name` as an attribute of that type. If `this` refers to 120 // an instance of an object, then it will resolve `name` as the value 121 // of the specified field. 122 // 123 // This function handles dotted names so that, for example, if `m_py_obj` 124 // refers to the `sys` module, and `name` == "path.append", then it 125 // will find the function `sys.path.append`. 126 127 size_t dot_pos = name.find_first_of('.'); 128 if (dot_pos == llvm::StringRef::npos) 129 { 130 // No dots in the name, we should be able to find the value immediately 131 // as an attribute of `use_object`. 132 return GetAttributeValue(name); 133 } 134 135 // Look up the first piece of the name, and resolve the rest as a child of that. 136 PythonObject parent = ResolveName(name.substr(0, dot_pos)); 137 if (!parent.IsAllocated()) 138 return PythonObject(); 139 140 // Tail recursion.. should be optimized by the compiler 141 return parent.ResolveName(name.substr(dot_pos + 1)); 142 } 143 144 bool 145 PythonObject::HasAttribute(llvm::StringRef attr) const 146 { 147 if (!IsValid()) 148 return false; 149 PythonString py_attr(attr); 150 return !!PyObject_HasAttr(m_py_obj, py_attr.get()); 151 } 152 153 PythonObject 154 PythonObject::GetAttributeValue(llvm::StringRef attr) const 155 { 156 if (!IsValid()) 157 return PythonObject(); 158 159 PythonString py_attr(attr); 160 if (!PyObject_HasAttr(m_py_obj, py_attr.get())) 161 return PythonObject(); 162 163 return PythonObject(PyRefType::Owned, 164 PyObject_GetAttr(m_py_obj, py_attr.get())); 165 } 166 167 bool 168 PythonObject::IsNone() const 169 { 170 return m_py_obj == Py_None; 171 } 172 173 bool 174 PythonObject::IsValid() const 175 { 176 return m_py_obj != nullptr; 177 } 178 179 bool 180 PythonObject::IsAllocated() const 181 { 182 return IsValid() && !IsNone(); 183 } 184 185 StructuredData::ObjectSP 186 PythonObject::CreateStructuredObject() const 187 { 188 switch (GetObjectType()) 189 { 190 case PyObjectType::Dictionary: 191 return PythonDictionary(PyRefType::Borrowed, m_py_obj).CreateStructuredDictionary(); 192 case PyObjectType::Integer: 193 return PythonInteger(PyRefType::Borrowed, m_py_obj).CreateStructuredInteger(); 194 case PyObjectType::List: 195 return PythonList(PyRefType::Borrowed, m_py_obj).CreateStructuredArray(); 196 case PyObjectType::String: 197 return PythonString(PyRefType::Borrowed, m_py_obj).CreateStructuredString(); 198 case PyObjectType::None: 199 return StructuredData::ObjectSP(); 200 default: 201 return StructuredData::ObjectSP(new StructuredPythonObject(m_py_obj)); 202 } 203 } 204 205 //---------------------------------------------------------------------- 206 // PythonString 207 //---------------------------------------------------------------------- 208 209 PythonString::PythonString(PyRefType type, PyObject *py_obj) 210 : PythonObject() 211 { 212 Reset(type, py_obj); // Use "Reset()" to ensure that py_obj is a string 213 } 214 215 PythonString::PythonString(const PythonString &object) 216 : PythonObject(object) 217 { 218 } 219 220 PythonString::PythonString(llvm::StringRef string) 221 : PythonObject() 222 { 223 SetString(string); 224 } 225 226 PythonString::PythonString(const char *string) 227 : PythonObject() 228 { 229 SetString(llvm::StringRef(string)); 230 } 231 232 PythonString::PythonString() 233 : PythonObject() 234 { 235 } 236 237 PythonString::~PythonString () 238 { 239 } 240 241 bool 242 PythonString::Check(PyObject *py_obj) 243 { 244 if (!py_obj) 245 return false; 246 247 if (PyUnicode_Check(py_obj)) 248 return true; 249 #if PY_MAJOR_VERSION < 3 250 if (PyString_Check(py_obj)) 251 return true; 252 #endif 253 return false; 254 } 255 256 void 257 PythonString::Reset(PyRefType type, PyObject *py_obj) 258 { 259 // Grab the desired reference type so that if we end up rejecting 260 // `py_obj` it still gets decremented if necessary. 261 PythonObject result(type, py_obj); 262 263 if (!PythonString::Check(py_obj)) 264 { 265 PythonObject::Reset(); 266 return; 267 } 268 #if PY_MAJOR_VERSION < 3 269 // In Python 2, Don't store PyUnicode objects directly, because we need 270 // access to their underlying character buffers which Python 2 doesn't 271 // provide. 272 if (PyUnicode_Check(py_obj)) 273 result.Reset(PyRefType::Owned, PyUnicode_AsUTF8String(result.get())); 274 #endif 275 // Calling PythonObject::Reset(const PythonObject&) will lead to stack overflow since it calls 276 // back into the virtual implementation. 277 PythonObject::Reset(PyRefType::Borrowed, result.get()); 278 } 279 280 llvm::StringRef 281 PythonString::GetString() const 282 { 283 if (!IsValid()) 284 return llvm::StringRef(); 285 286 Py_ssize_t size; 287 char *c; 288 289 #if PY_MAJOR_VERSION >= 3 290 c = PyUnicode_AsUTF8AndSize(m_py_obj, &size); 291 #else 292 PyString_AsStringAndSize(m_py_obj, &c, &size); 293 #endif 294 return llvm::StringRef(c, size); 295 } 296 297 size_t 298 PythonString::GetSize() const 299 { 300 if (IsValid()) 301 { 302 #if PY_MAJOR_VERSION >= 3 303 return PyUnicode_GetSize(m_py_obj); 304 #else 305 return PyString_Size(m_py_obj); 306 #endif 307 } 308 return 0; 309 } 310 311 void 312 PythonString::SetString (llvm::StringRef string) 313 { 314 #if PY_MAJOR_VERSION >= 3 315 PyObject *unicode = PyUnicode_FromStringAndSize(string.data(), string.size()); 316 PythonObject::Reset(PyRefType::Owned, unicode); 317 #else 318 PyObject *str = PyString_FromStringAndSize(string.data(), string.size()); 319 PythonObject::Reset(PyRefType::Owned, str); 320 #endif 321 } 322 323 StructuredData::StringSP 324 PythonString::CreateStructuredString() const 325 { 326 StructuredData::StringSP result(new StructuredData::String); 327 result->SetValue(GetString()); 328 return result; 329 } 330 331 //---------------------------------------------------------------------- 332 // PythonInteger 333 //---------------------------------------------------------------------- 334 335 PythonInteger::PythonInteger() 336 : PythonObject() 337 { 338 339 } 340 341 PythonInteger::PythonInteger(PyRefType type, PyObject *py_obj) 342 : PythonObject() 343 { 344 Reset(type, py_obj); // Use "Reset()" to ensure that py_obj is a integer type 345 } 346 347 PythonInteger::PythonInteger(const PythonInteger &object) 348 : PythonObject(object) 349 { 350 } 351 352 PythonInteger::PythonInteger(int64_t value) 353 : PythonObject() 354 { 355 SetInteger(value); 356 } 357 358 359 PythonInteger::~PythonInteger () 360 { 361 } 362 363 bool 364 PythonInteger::Check(PyObject *py_obj) 365 { 366 if (!py_obj) 367 return false; 368 369 #if PY_MAJOR_VERSION >= 3 370 // Python 3 does not have PyInt_Check. There is only one type of 371 // integral value, long. 372 return PyLong_Check(py_obj); 373 #else 374 return PyLong_Check(py_obj) || PyInt_Check(py_obj); 375 #endif 376 } 377 378 void 379 PythonInteger::Reset(PyRefType type, PyObject *py_obj) 380 { 381 // Grab the desired reference type so that if we end up rejecting 382 // `py_obj` it still gets decremented if necessary. 383 PythonObject result(type, py_obj); 384 385 if (!PythonInteger::Check(py_obj)) 386 { 387 PythonObject::Reset(); 388 return; 389 } 390 391 #if PY_MAJOR_VERSION < 3 392 // Always store this as a PyLong, which makes interoperability between 393 // Python 2.x and Python 3.x easier. This is only necessary in 2.x, 394 // since 3.x doesn't even have a PyInt. 395 if (PyInt_Check(py_obj)) 396 { 397 // Since we converted the original object to a different type, the new 398 // object is an owned object regardless of the ownership semantics requested 399 // by the user. 400 result.Reset(PyRefType::Owned, PyLong_FromLongLong(PyInt_AsLong(py_obj))); 401 } 402 #endif 403 404 assert(PyLong_Check(result.get()) && "Couldn't get a PyLong from this PyObject"); 405 406 // Calling PythonObject::Reset(const PythonObject&) will lead to stack overflow since it calls 407 // back into the virtual implementation. 408 PythonObject::Reset(PyRefType::Borrowed, result.get()); 409 } 410 411 int64_t 412 PythonInteger::GetInteger() const 413 { 414 if (m_py_obj) 415 { 416 assert(PyLong_Check(m_py_obj) && "PythonInteger::GetInteger has a PyObject that isn't a PyLong"); 417 418 return PyLong_AsLongLong(m_py_obj); 419 } 420 return UINT64_MAX; 421 } 422 423 void 424 PythonInteger::SetInteger(int64_t value) 425 { 426 PythonObject::Reset(PyRefType::Owned, PyLong_FromLongLong(value)); 427 } 428 429 StructuredData::IntegerSP 430 PythonInteger::CreateStructuredInteger() const 431 { 432 StructuredData::IntegerSP result(new StructuredData::Integer); 433 result->SetValue(GetInteger()); 434 return result; 435 } 436 437 //---------------------------------------------------------------------- 438 // PythonList 439 //---------------------------------------------------------------------- 440 441 PythonList::PythonList(PyInitialValue value) 442 : PythonObject() 443 { 444 if (value == PyInitialValue::Empty) 445 Reset(PyRefType::Owned, PyList_New(0)); 446 } 447 448 PythonList::PythonList(int list_size) 449 : PythonObject() 450 { 451 Reset(PyRefType::Owned, PyList_New(list_size)); 452 } 453 454 PythonList::PythonList(PyRefType type, PyObject *py_obj) 455 : PythonObject() 456 { 457 Reset(type, py_obj); // Use "Reset()" to ensure that py_obj is a list 458 } 459 460 PythonList::PythonList(const PythonList &list) 461 : PythonObject(list) 462 { 463 } 464 465 PythonList::~PythonList () 466 { 467 } 468 469 bool 470 PythonList::Check(PyObject *py_obj) 471 { 472 if (!py_obj) 473 return false; 474 return PyList_Check(py_obj); 475 } 476 477 void 478 PythonList::Reset(PyRefType type, PyObject *py_obj) 479 { 480 // Grab the desired reference type so that if we end up rejecting 481 // `py_obj` it still gets decremented if necessary. 482 PythonObject result(type, py_obj); 483 484 if (!PythonList::Check(py_obj)) 485 { 486 PythonObject::Reset(); 487 return; 488 } 489 490 // Calling PythonObject::Reset(const PythonObject&) will lead to stack overflow since it calls 491 // back into the virtual implementation. 492 PythonObject::Reset(PyRefType::Borrowed, result.get()); 493 } 494 495 uint32_t 496 PythonList::GetSize() const 497 { 498 if (IsValid()) 499 return PyList_GET_SIZE(m_py_obj); 500 return 0; 501 } 502 503 PythonObject 504 PythonList::GetItemAtIndex(uint32_t index) const 505 { 506 if (IsValid()) 507 return PythonObject(PyRefType::Borrowed, PyList_GetItem(m_py_obj, index)); 508 return PythonObject(); 509 } 510 511 void 512 PythonList::SetItemAtIndex(uint32_t index, const PythonObject &object) 513 { 514 if (IsAllocated() && object.IsValid()) 515 { 516 // PyList_SetItem is documented to "steal" a reference, so we need to 517 // convert it to an owned reference by incrementing it. 518 Py_INCREF(object.get()); 519 PyList_SetItem(m_py_obj, index, object.get()); 520 } 521 } 522 523 void 524 PythonList::AppendItem(const PythonObject &object) 525 { 526 if (IsAllocated() && object.IsValid()) 527 { 528 // `PyList_Append` does *not* steal a reference, so do not call `Py_INCREF` 529 // here like we do with `PyList_SetItem`. 530 PyList_Append(m_py_obj, object.get()); 531 } 532 } 533 534 StructuredData::ArraySP 535 PythonList::CreateStructuredArray() const 536 { 537 StructuredData::ArraySP result(new StructuredData::Array); 538 uint32_t count = GetSize(); 539 for (uint32_t i = 0; i < count; ++i) 540 { 541 PythonObject obj = GetItemAtIndex(i); 542 result->AddItem(obj.CreateStructuredObject()); 543 } 544 return result; 545 } 546 547 //---------------------------------------------------------------------- 548 // PythonDictionary 549 //---------------------------------------------------------------------- 550 551 PythonDictionary::PythonDictionary(PyInitialValue value) 552 : PythonObject() 553 { 554 if (value == PyInitialValue::Empty) 555 Reset(PyRefType::Owned, PyDict_New()); 556 } 557 558 PythonDictionary::PythonDictionary(PyRefType type, PyObject *py_obj) 559 : PythonObject() 560 { 561 Reset(type, py_obj); // Use "Reset()" to ensure that py_obj is a dictionary 562 } 563 564 PythonDictionary::PythonDictionary(const PythonDictionary &object) 565 : PythonObject(object) 566 { 567 } 568 569 PythonDictionary::~PythonDictionary () 570 { 571 } 572 573 bool 574 PythonDictionary::Check(PyObject *py_obj) 575 { 576 if (!py_obj) 577 return false; 578 579 return PyDict_Check(py_obj); 580 } 581 582 void 583 PythonDictionary::Reset(PyRefType type, PyObject *py_obj) 584 { 585 // Grab the desired reference type so that if we end up rejecting 586 // `py_obj` it still gets decremented if necessary. 587 PythonObject result(type, py_obj); 588 589 if (!PythonDictionary::Check(py_obj)) 590 { 591 PythonObject::Reset(); 592 return; 593 } 594 595 // Calling PythonObject::Reset(const PythonObject&) will lead to stack overflow since it calls 596 // back into the virtual implementation. 597 PythonObject::Reset(PyRefType::Borrowed, result.get()); 598 } 599 600 uint32_t 601 PythonDictionary::GetSize() const 602 { 603 if (IsValid()) 604 return PyDict_Size(m_py_obj); 605 return 0; 606 } 607 608 PythonList 609 PythonDictionary::GetKeys() const 610 { 611 if (IsValid()) 612 return PythonList(PyRefType::Owned, PyDict_Keys(m_py_obj)); 613 return PythonList(PyInitialValue::Invalid); 614 } 615 616 PythonObject 617 PythonDictionary::GetItemForKey(const PythonObject &key) const 618 { 619 if (IsAllocated() && key.IsValid()) 620 return PythonObject(PyRefType::Borrowed, PyDict_GetItem(m_py_obj, key.get())); 621 return PythonObject(); 622 } 623 624 void 625 PythonDictionary::SetItemForKey(const PythonObject &key, const PythonObject &value) 626 { 627 if (IsAllocated() && key.IsValid() && value.IsValid()) 628 PyDict_SetItem(m_py_obj, key.get(), value.get()); 629 } 630 631 StructuredData::DictionarySP 632 PythonDictionary::CreateStructuredDictionary() const 633 { 634 StructuredData::DictionarySP result(new StructuredData::Dictionary); 635 PythonList keys(GetKeys()); 636 uint32_t num_keys = keys.GetSize(); 637 for (uint32_t i = 0; i < num_keys; ++i) 638 { 639 PythonObject key = keys.GetItemAtIndex(i); 640 PythonObject value = GetItemForKey(key); 641 StructuredData::ObjectSP structured_value = value.CreateStructuredObject(); 642 result->AddItem(key.Str().GetString(), structured_value); 643 } 644 return result; 645 } 646 647 PythonModule::PythonModule() : PythonObject() 648 { 649 } 650 651 PythonModule::PythonModule(PyRefType type, PyObject *py_obj) 652 { 653 Reset(type, py_obj); // Use "Reset()" to ensure that py_obj is a module 654 } 655 656 PythonModule::PythonModule(const PythonModule &dict) : PythonObject(dict) 657 { 658 } 659 660 PythonModule::~PythonModule() 661 { 662 } 663 664 PythonModule 665 PythonModule::MainModule() 666 { 667 return PythonModule(PyRefType::Borrowed, PyImport_AddModule("__main__")); 668 } 669 670 bool 671 PythonModule::Check(PyObject *py_obj) 672 { 673 if (!py_obj) 674 return false; 675 676 return PyModule_Check(py_obj); 677 } 678 679 void 680 PythonModule::Reset(PyRefType type, PyObject *py_obj) 681 { 682 // Grab the desired reference type so that if we end up rejecting 683 // `py_obj` it still gets decremented if necessary. 684 PythonObject result(type, py_obj); 685 686 if (!PythonModule::Check(py_obj)) 687 { 688 PythonObject::Reset(); 689 return; 690 } 691 692 // Calling PythonObject::Reset(const PythonObject&) will lead to stack overflow since it calls 693 // back into the virtual implementation. 694 PythonObject::Reset(PyRefType::Borrowed, result.get()); 695 } 696 697 PythonDictionary 698 PythonModule::GetDictionary() const 699 { 700 return PythonDictionary(PyRefType::Borrowed, PyModule_GetDict(m_py_obj)); 701 } 702 703 PythonFile::PythonFile() 704 : PythonObject() 705 { 706 } 707 708 PythonFile::PythonFile(File &file, const char *mode) 709 { 710 Reset(file, mode); 711 } 712 713 PythonFile::PythonFile(const char *path, const char *mode) 714 { 715 FILE *fp = nullptr; 716 fp = fopen(path, mode); 717 lldb_private::File file(fp, true); 718 Reset(file, mode); 719 } 720 721 PythonFile::PythonFile(PyRefType type, PyObject *o) 722 { 723 Reset(type, o); 724 } 725 726 PythonFile::~PythonFile() 727 { 728 } 729 730 bool 731 PythonFile::Check(PyObject *py_obj) 732 { 733 #if PY_MAJOR_VERSION < 3 734 return PyFile_Check(py_obj); 735 #else 736 // In Python 3, there is no `PyFile_Check`, and in fact PyFile is not even a 737 // first-class object type anymore. `PyFile_FromFd` is just a thin wrapper 738 // over `io.open()`, which returns some object derived from `io.IOBase`. 739 // As a result, the only way to detect a file in Python 3 is to check whether 740 // it inherits from `io.IOBase`. Since it is possible for non-files to also 741 // inherit from `io.IOBase`, we additionally verify that it has the `fileno` 742 // attribute, which should guarantee that it is backed by the file system. 743 PythonObject io_module(PyRefType::Owned, PyImport_ImportModule("io")); 744 PythonDictionary io_dict(PyRefType::Borrowed, PyModule_GetDict(io_module.get())); 745 PythonObject io_base_class = io_dict.GetItemForKey(PythonString("IOBase")); 746 747 PythonObject object_type(PyRefType::Owned, PyObject_Type(py_obj)); 748 749 if (1 != PyObject_IsSubclass(object_type.get(), io_base_class.get())) 750 return false; 751 if (!object_type.HasAttribute("fileno")) 752 return false; 753 754 return true; 755 #endif 756 } 757 758 void 759 PythonFile::Reset(PyRefType type, PyObject *py_obj) 760 { 761 // Grab the desired reference type so that if we end up rejecting 762 // `py_obj` it still gets decremented if necessary. 763 PythonObject result(type, py_obj); 764 765 if (!PythonFile::Check(py_obj)) 766 { 767 PythonObject::Reset(); 768 return; 769 } 770 771 // Calling PythonObject::Reset(const PythonObject&) will lead to stack 772 // overflow since it calls back into the virtual implementation. 773 PythonObject::Reset(PyRefType::Borrowed, result.get()); 774 } 775 776 void 777 PythonFile::Reset(File &file, const char *mode) 778 { 779 char *cmode = const_cast<char *>(mode); 780 #if PY_MAJOR_VERSION >= 3 781 Reset(PyRefType::Owned, 782 PyFile_FromFd(file.GetDescriptor(), nullptr, cmode, -1, nullptr, "ignore", nullptr, 0)); 783 #else 784 // Read through the Python source, doesn't seem to modify these strings 785 Reset(PyRefType::Owned, 786 PyFile_FromFile(file.GetStream(), const_cast<char *>(""), cmode, nullptr)); 787 #endif 788 } 789 790 bool 791 PythonFile::GetUnderlyingFile(File &file) const 792 { 793 if (!IsValid()) 794 return false; 795 796 file.Close(); 797 // We don't own the file descriptor returned by this function, make sure the 798 // File object knows about that. 799 file.SetDescriptor(PyObject_AsFileDescriptor(m_py_obj), false); 800 return file.IsValid(); 801 } 802 803 804 #endif 805