1 //===-- PythonDataObjects.cpp ------------------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #ifdef LLDB_DISABLE_PYTHON
11 
12 // Python is disabled in this build
13 
14 #else
15 
16 #include "lldb-python.h"
17 #include "PythonDataObjects.h"
18 #include "ScriptInterpreterPython.h"
19 
20 #include "lldb/Core/Stream.h"
21 #include "lldb/Host/File.h"
22 #include "lldb/Interpreter/ScriptInterpreter.h"
23 
24 #include <stdio.h>
25 
26 using namespace lldb_private;
27 using namespace lldb;
28 
29 void
30 StructuredPythonObject::Dump(Stream &s) const
31 {
32     s << "Python Obj: 0x" << GetValue();
33 }
34 
35 //----------------------------------------------------------------------
36 // PythonObject
37 //----------------------------------------------------------------------
38 
39 void
40 PythonObject::Dump(Stream &strm) const
41 {
42     if (m_py_obj)
43     {
44         FILE *file = ::tmpfile();
45         if (file)
46         {
47             ::PyObject_Print (m_py_obj, file, 0);
48             const long length = ftell (file);
49             if (length)
50             {
51                 ::rewind(file);
52                 std::vector<char> file_contents (length,'\0');
53                 const size_t length_read = ::fread (file_contents.data(), 1, file_contents.size(), file);
54                 if (length_read > 0)
55                     strm.Write (file_contents.data(), length_read);
56             }
57             ::fclose (file);
58         }
59     }
60     else
61         strm.PutCString ("NULL");
62 }
63 
64 PyObjectType
65 PythonObject::GetObjectType() const
66 {
67     if (!IsAllocated())
68         return PyObjectType::None;
69 
70     if (PythonModule::Check(m_py_obj))
71         return PyObjectType::Module;
72     if (PythonList::Check(m_py_obj))
73         return PyObjectType::List;
74     if (PythonDictionary::Check(m_py_obj))
75         return PyObjectType::Dictionary;
76     if (PythonString::Check(m_py_obj))
77         return PyObjectType::String;
78     if (PythonInteger::Check(m_py_obj))
79         return PyObjectType::Integer;
80     if (PythonFile::Check(m_py_obj))
81         return PyObjectType::File;
82     return PyObjectType::Unknown;
83 }
84 
85 PythonString
86 PythonObject::Repr() const
87 {
88     if (!m_py_obj)
89         return PythonString();
90     PyObject *repr = PyObject_Repr(m_py_obj);
91     if (!repr)
92         return PythonString();
93     return PythonString(PyRefType::Owned, repr);
94 }
95 
96 PythonString
97 PythonObject::Str() const
98 {
99     if (!m_py_obj)
100         return PythonString();
101     PyObject *str = PyObject_Str(m_py_obj);
102     if (!str)
103         return PythonString();
104     return PythonString(PyRefType::Owned, str);
105 }
106 
107 PythonObject
108 PythonObject::ResolveNameGlobal(llvm::StringRef name)
109 {
110     return PythonModule::MainModule().ResolveName(name);
111 }
112 
113 PythonObject
114 PythonObject::ResolveName(llvm::StringRef name) const
115 {
116     // Resolve the name in the context of the specified object.  If,
117     // for example, `this` refers to a PyModule, then this will look for
118     // `name` in this module.  If `this` refers to a PyType, then it will
119     // resolve `name` as an attribute of that type.  If `this` refers to
120     // an instance of an object, then it will resolve `name` as the value
121     // of the specified field.
122     //
123     // This function handles dotted names so that, for example, if `m_py_obj`
124     // refers to the `sys` module, and `name` == "path.append", then it
125     // will find the function `sys.path.append`.
126 
127     size_t dot_pos = name.find_first_of('.');
128     if (dot_pos == llvm::StringRef::npos)
129     {
130         // No dots in the name, we should be able to find the value immediately
131         // as an attribute of `use_object`.
132         return GetAttributeValue(name);
133     }
134 
135     // Look up the first piece of the name, and resolve the rest as a child of that.
136     PythonObject parent = ResolveName(name.substr(0, dot_pos));
137     if (!parent.IsAllocated())
138         return PythonObject();
139 
140     // Tail recursion.. should be optimized by the compiler
141     return parent.ResolveName(name.substr(dot_pos + 1));
142 }
143 
144 bool
145 PythonObject::HasAttribute(llvm::StringRef attr) const
146 {
147     if (!IsValid())
148         return false;
149     PythonString py_attr(attr);
150     return !!PyObject_HasAttr(m_py_obj, py_attr.get());
151 }
152 
153 PythonObject
154 PythonObject::GetAttributeValue(llvm::StringRef attr) const
155 {
156     if (!IsValid())
157         return PythonObject();
158 
159     PythonString py_attr(attr);
160     if (!PyObject_HasAttr(m_py_obj, py_attr.get()))
161         return PythonObject();
162 
163     return PythonObject(PyRefType::Owned,
164         PyObject_GetAttr(m_py_obj, py_attr.get()));
165 }
166 
167 bool
168 PythonObject::IsNone() const
169 {
170     return m_py_obj == Py_None;
171 }
172 
173 bool
174 PythonObject::IsValid() const
175 {
176     return m_py_obj != nullptr;
177 }
178 
179 bool
180 PythonObject::IsAllocated() const
181 {
182     return IsValid() && !IsNone();
183 }
184 
185 StructuredData::ObjectSP
186 PythonObject::CreateStructuredObject() const
187 {
188     switch (GetObjectType())
189     {
190         case PyObjectType::Dictionary:
191             return PythonDictionary(PyRefType::Borrowed, m_py_obj).CreateStructuredDictionary();
192         case PyObjectType::Integer:
193             return PythonInteger(PyRefType::Borrowed, m_py_obj).CreateStructuredInteger();
194         case PyObjectType::List:
195             return PythonList(PyRefType::Borrowed, m_py_obj).CreateStructuredArray();
196         case PyObjectType::String:
197             return PythonString(PyRefType::Borrowed, m_py_obj).CreateStructuredString();
198         case PyObjectType::None:
199             return StructuredData::ObjectSP();
200         default:
201             return StructuredData::ObjectSP(new StructuredPythonObject(m_py_obj));
202     }
203 }
204 
205 //----------------------------------------------------------------------
206 // PythonString
207 //----------------------------------------------------------------------
208 
209 PythonString::PythonString(PyRefType type, PyObject *py_obj)
210     : PythonObject()
211 {
212     Reset(type, py_obj); // Use "Reset()" to ensure that py_obj is a string
213 }
214 
215 PythonString::PythonString(const PythonString &object)
216     : PythonObject(object)
217 {
218 }
219 
220 PythonString::PythonString(llvm::StringRef string)
221     : PythonObject()
222 {
223     SetString(string);
224 }
225 
226 PythonString::PythonString(const char *string)
227     : PythonObject()
228 {
229     SetString(llvm::StringRef(string));
230 }
231 
232 PythonString::PythonString()
233     : PythonObject()
234 {
235 }
236 
237 PythonString::~PythonString ()
238 {
239 }
240 
241 bool
242 PythonString::Check(PyObject *py_obj)
243 {
244     if (!py_obj)
245         return false;
246 
247     if (PyUnicode_Check(py_obj))
248         return true;
249 #if PY_MAJOR_VERSION < 3
250     if (PyString_Check(py_obj))
251         return true;
252 #endif
253     return false;
254 }
255 
256 void
257 PythonString::Reset(PyRefType type, PyObject *py_obj)
258 {
259     // Grab the desired reference type so that if we end up rejecting
260     // `py_obj` it still gets decremented if necessary.
261     PythonObject result(type, py_obj);
262 
263     if (!PythonString::Check(py_obj))
264     {
265         PythonObject::Reset();
266         return;
267     }
268 #if PY_MAJOR_VERSION < 3
269     // In Python 2, Don't store PyUnicode objects directly, because we need
270     // access to their underlying character buffers which Python 2 doesn't
271     // provide.
272     if (PyUnicode_Check(py_obj))
273         result.Reset(PyRefType::Owned, PyUnicode_AsUTF8String(result.get()));
274 #endif
275     // Calling PythonObject::Reset(const PythonObject&) will lead to stack overflow since it calls
276     // back into the virtual implementation.
277     PythonObject::Reset(PyRefType::Borrowed, result.get());
278 }
279 
280 llvm::StringRef
281 PythonString::GetString() const
282 {
283     if (!IsValid())
284         return llvm::StringRef();
285 
286     Py_ssize_t size;
287     char *c;
288 
289 #if PY_MAJOR_VERSION >= 3
290     c = PyUnicode_AsUTF8AndSize(m_py_obj, &size);
291 #else
292     PyString_AsStringAndSize(m_py_obj, &c, &size);
293 #endif
294     return llvm::StringRef(c, size);
295 }
296 
297 size_t
298 PythonString::GetSize() const
299 {
300     if (IsValid())
301     {
302 #if PY_MAJOR_VERSION >= 3
303         return PyUnicode_GetSize(m_py_obj);
304 #else
305         return PyString_Size(m_py_obj);
306 #endif
307     }
308     return 0;
309 }
310 
311 void
312 PythonString::SetString (llvm::StringRef string)
313 {
314 #if PY_MAJOR_VERSION >= 3
315     PyObject *unicode = PyUnicode_FromStringAndSize(string.data(), string.size());
316     PythonObject::Reset(PyRefType::Owned, unicode);
317 #else
318     PyObject *str = PyString_FromStringAndSize(string.data(), string.size());
319     PythonObject::Reset(PyRefType::Owned, str);
320 #endif
321 }
322 
323 StructuredData::StringSP
324 PythonString::CreateStructuredString() const
325 {
326     StructuredData::StringSP result(new StructuredData::String);
327     result->SetValue(GetString());
328     return result;
329 }
330 
331 //----------------------------------------------------------------------
332 // PythonInteger
333 //----------------------------------------------------------------------
334 
335 PythonInteger::PythonInteger()
336     : PythonObject()
337 {
338 
339 }
340 
341 PythonInteger::PythonInteger(PyRefType type, PyObject *py_obj)
342     : PythonObject()
343 {
344     Reset(type, py_obj); // Use "Reset()" to ensure that py_obj is a integer type
345 }
346 
347 PythonInteger::PythonInteger(const PythonInteger &object)
348     : PythonObject(object)
349 {
350 }
351 
352 PythonInteger::PythonInteger(int64_t value)
353     : PythonObject()
354 {
355     SetInteger(value);
356 }
357 
358 
359 PythonInteger::~PythonInteger ()
360 {
361 }
362 
363 bool
364 PythonInteger::Check(PyObject *py_obj)
365 {
366     if (!py_obj)
367         return false;
368 
369 #if PY_MAJOR_VERSION >= 3
370     // Python 3 does not have PyInt_Check.  There is only one type of
371     // integral value, long.
372     return PyLong_Check(py_obj);
373 #else
374     return PyLong_Check(py_obj) || PyInt_Check(py_obj);
375 #endif
376 }
377 
378 void
379 PythonInteger::Reset(PyRefType type, PyObject *py_obj)
380 {
381     // Grab the desired reference type so that if we end up rejecting
382     // `py_obj` it still gets decremented if necessary.
383     PythonObject result(type, py_obj);
384 
385     if (!PythonInteger::Check(py_obj))
386     {
387         PythonObject::Reset();
388         return;
389     }
390 
391 #if PY_MAJOR_VERSION < 3
392     // Always store this as a PyLong, which makes interoperability between
393     // Python 2.x and Python 3.x easier.  This is only necessary in 2.x,
394     // since 3.x doesn't even have a PyInt.
395     if (PyInt_Check(py_obj))
396     {
397         // Since we converted the original object to a different type, the new
398         // object is an owned object regardless of the ownership semantics requested
399         // by the user.
400         result.Reset(PyRefType::Owned, PyLong_FromLongLong(PyInt_AsLong(py_obj)));
401     }
402 #endif
403 
404     assert(PyLong_Check(result.get()) && "Couldn't get a PyLong from this PyObject");
405 
406     // Calling PythonObject::Reset(const PythonObject&) will lead to stack overflow since it calls
407     // back into the virtual implementation.
408     PythonObject::Reset(PyRefType::Borrowed, result.get());
409 }
410 
411 int64_t
412 PythonInteger::GetInteger() const
413 {
414     if (m_py_obj)
415     {
416         assert(PyLong_Check(m_py_obj) && "PythonInteger::GetInteger has a PyObject that isn't a PyLong");
417 
418         return PyLong_AsLongLong(m_py_obj);
419     }
420     return UINT64_MAX;
421 }
422 
423 void
424 PythonInteger::SetInteger(int64_t value)
425 {
426     PythonObject::Reset(PyRefType::Owned, PyLong_FromLongLong(value));
427 }
428 
429 StructuredData::IntegerSP
430 PythonInteger::CreateStructuredInteger() const
431 {
432     StructuredData::IntegerSP result(new StructuredData::Integer);
433     result->SetValue(GetInteger());
434     return result;
435 }
436 
437 //----------------------------------------------------------------------
438 // PythonList
439 //----------------------------------------------------------------------
440 
441 PythonList::PythonList(PyInitialValue value)
442     : PythonObject()
443 {
444     if (value == PyInitialValue::Empty)
445         Reset(PyRefType::Owned, PyList_New(0));
446 }
447 
448 PythonList::PythonList(int list_size)
449     : PythonObject()
450 {
451     Reset(PyRefType::Owned, PyList_New(list_size));
452 }
453 
454 PythonList::PythonList(PyRefType type, PyObject *py_obj)
455     : PythonObject()
456 {
457     Reset(type, py_obj); // Use "Reset()" to ensure that py_obj is a list
458 }
459 
460 PythonList::PythonList(const PythonList &list)
461     : PythonObject(list)
462 {
463 }
464 
465 PythonList::~PythonList ()
466 {
467 }
468 
469 bool
470 PythonList::Check(PyObject *py_obj)
471 {
472     if (!py_obj)
473         return false;
474     return PyList_Check(py_obj);
475 }
476 
477 void
478 PythonList::Reset(PyRefType type, PyObject *py_obj)
479 {
480     // Grab the desired reference type so that if we end up rejecting
481     // `py_obj` it still gets decremented if necessary.
482     PythonObject result(type, py_obj);
483 
484     if (!PythonList::Check(py_obj))
485     {
486         PythonObject::Reset();
487         return;
488     }
489 
490     // Calling PythonObject::Reset(const PythonObject&) will lead to stack overflow since it calls
491     // back into the virtual implementation.
492     PythonObject::Reset(PyRefType::Borrowed, result.get());
493 }
494 
495 uint32_t
496 PythonList::GetSize() const
497 {
498     if (IsValid())
499         return PyList_GET_SIZE(m_py_obj);
500     return 0;
501 }
502 
503 PythonObject
504 PythonList::GetItemAtIndex(uint32_t index) const
505 {
506     if (IsValid())
507         return PythonObject(PyRefType::Borrowed, PyList_GetItem(m_py_obj, index));
508     return PythonObject();
509 }
510 
511 void
512 PythonList::SetItemAtIndex(uint32_t index, const PythonObject &object)
513 {
514     if (IsAllocated() && object.IsValid())
515     {
516         // PyList_SetItem is documented to "steal" a reference, so we need to
517         // convert it to an owned reference by incrementing it.
518         Py_INCREF(object.get());
519         PyList_SetItem(m_py_obj, index, object.get());
520     }
521 }
522 
523 void
524 PythonList::AppendItem(const PythonObject &object)
525 {
526     if (IsAllocated() && object.IsValid())
527     {
528         // `PyList_Append` does *not* steal a reference, so do not call `Py_INCREF`
529         // here like we do with `PyList_SetItem`.
530         PyList_Append(m_py_obj, object.get());
531     }
532 }
533 
534 StructuredData::ArraySP
535 PythonList::CreateStructuredArray() const
536 {
537     StructuredData::ArraySP result(new StructuredData::Array);
538     uint32_t count = GetSize();
539     for (uint32_t i = 0; i < count; ++i)
540     {
541         PythonObject obj = GetItemAtIndex(i);
542         result->AddItem(obj.CreateStructuredObject());
543     }
544     return result;
545 }
546 
547 //----------------------------------------------------------------------
548 // PythonDictionary
549 //----------------------------------------------------------------------
550 
551 PythonDictionary::PythonDictionary(PyInitialValue value)
552     : PythonObject()
553 {
554     if (value == PyInitialValue::Empty)
555         Reset(PyRefType::Owned, PyDict_New());
556 }
557 
558 PythonDictionary::PythonDictionary(PyRefType type, PyObject *py_obj)
559     : PythonObject()
560 {
561     Reset(type, py_obj); // Use "Reset()" to ensure that py_obj is a dictionary
562 }
563 
564 PythonDictionary::PythonDictionary(const PythonDictionary &object)
565     : PythonObject(object)
566 {
567 }
568 
569 PythonDictionary::~PythonDictionary ()
570 {
571 }
572 
573 bool
574 PythonDictionary::Check(PyObject *py_obj)
575 {
576     if (!py_obj)
577         return false;
578 
579     return PyDict_Check(py_obj);
580 }
581 
582 void
583 PythonDictionary::Reset(PyRefType type, PyObject *py_obj)
584 {
585     // Grab the desired reference type so that if we end up rejecting
586     // `py_obj` it still gets decremented if necessary.
587     PythonObject result(type, py_obj);
588 
589     if (!PythonDictionary::Check(py_obj))
590     {
591         PythonObject::Reset();
592         return;
593     }
594 
595     // Calling PythonObject::Reset(const PythonObject&) will lead to stack overflow since it calls
596     // back into the virtual implementation.
597     PythonObject::Reset(PyRefType::Borrowed, result.get());
598 }
599 
600 uint32_t
601 PythonDictionary::GetSize() const
602 {
603     if (IsValid())
604         return PyDict_Size(m_py_obj);
605     return 0;
606 }
607 
608 PythonList
609 PythonDictionary::GetKeys() const
610 {
611     if (IsValid())
612         return PythonList(PyRefType::Owned, PyDict_Keys(m_py_obj));
613     return PythonList(PyInitialValue::Invalid);
614 }
615 
616 PythonObject
617 PythonDictionary::GetItemForKey(const PythonObject &key) const
618 {
619     if (IsAllocated() && key.IsValid())
620         return PythonObject(PyRefType::Borrowed, PyDict_GetItem(m_py_obj, key.get()));
621     return PythonObject();
622 }
623 
624 void
625 PythonDictionary::SetItemForKey(const PythonObject &key, const PythonObject &value)
626 {
627     if (IsAllocated() && key.IsValid() && value.IsValid())
628         PyDict_SetItem(m_py_obj, key.get(), value.get());
629 }
630 
631 StructuredData::DictionarySP
632 PythonDictionary::CreateStructuredDictionary() const
633 {
634     StructuredData::DictionarySP result(new StructuredData::Dictionary);
635     PythonList keys(GetKeys());
636     uint32_t num_keys = keys.GetSize();
637     for (uint32_t i = 0; i < num_keys; ++i)
638     {
639         PythonObject key = keys.GetItemAtIndex(i);
640         PythonObject value = GetItemForKey(key);
641         StructuredData::ObjectSP structured_value = value.CreateStructuredObject();
642         result->AddItem(key.Str().GetString(), structured_value);
643     }
644     return result;
645 }
646 
647 PythonModule::PythonModule() : PythonObject()
648 {
649 }
650 
651 PythonModule::PythonModule(PyRefType type, PyObject *py_obj)
652 {
653     Reset(type, py_obj); // Use "Reset()" to ensure that py_obj is a module
654 }
655 
656 PythonModule::PythonModule(const PythonModule &dict) : PythonObject(dict)
657 {
658 }
659 
660 PythonModule::~PythonModule()
661 {
662 }
663 
664 PythonModule
665 PythonModule::MainModule()
666 {
667     return PythonModule(PyRefType::Borrowed, PyImport_AddModule("__main__"));
668 }
669 
670 bool
671 PythonModule::Check(PyObject *py_obj)
672 {
673     if (!py_obj)
674         return false;
675 
676     return PyModule_Check(py_obj);
677 }
678 
679 void
680 PythonModule::Reset(PyRefType type, PyObject *py_obj)
681 {
682     // Grab the desired reference type so that if we end up rejecting
683     // `py_obj` it still gets decremented if necessary.
684     PythonObject result(type, py_obj);
685 
686     if (!PythonModule::Check(py_obj))
687     {
688         PythonObject::Reset();
689         return;
690     }
691 
692     // Calling PythonObject::Reset(const PythonObject&) will lead to stack overflow since it calls
693     // back into the virtual implementation.
694     PythonObject::Reset(PyRefType::Borrowed, result.get());
695 }
696 
697 PythonDictionary
698 PythonModule::GetDictionary() const
699 {
700     return PythonDictionary(PyRefType::Borrowed, PyModule_GetDict(m_py_obj));
701 }
702 
703 PythonFile::PythonFile()
704     : PythonObject()
705 {
706 }
707 
708 PythonFile::PythonFile(File &file, const char *mode)
709 {
710     Reset(file, mode);
711 }
712 
713 PythonFile::PythonFile(const char *path, const char *mode)
714 {
715     FILE *fp = nullptr;
716     fp = fopen(path, mode);
717     lldb_private::File file(fp, true);
718     Reset(file, mode);
719 }
720 
721 PythonFile::PythonFile(PyRefType type, PyObject *o)
722 {
723     Reset(type, o);
724 }
725 
726 PythonFile::~PythonFile()
727 {
728 }
729 
730 bool
731 PythonFile::Check(PyObject *py_obj)
732 {
733 #if PY_MAJOR_VERSION < 3
734     return PyFile_Check(py_obj);
735 #else
736     // In Python 3, there is no `PyFile_Check`, and in fact PyFile is not even a
737     // first-class object type anymore.  `PyFile_FromFd` is just a thin wrapper
738     // over `io.open()`, which returns some object derived from `io.IOBase`.
739     // As a result, the only way to detect a file in Python 3 is to check whether
740     // it inherits from `io.IOBase`.  Since it is possible for non-files to also
741     // inherit from `io.IOBase`, we additionally verify that it has the `fileno`
742     // attribute, which should guarantee that it is backed by the file system.
743     PythonObject io_module(PyRefType::Owned, PyImport_ImportModule("io"));
744     PythonDictionary io_dict(PyRefType::Borrowed, PyModule_GetDict(io_module.get()));
745     PythonObject io_base_class = io_dict.GetItemForKey(PythonString("IOBase"));
746 
747     PythonObject object_type(PyRefType::Owned, PyObject_Type(py_obj));
748 
749     if (1 != PyObject_IsSubclass(object_type.get(), io_base_class.get()))
750         return false;
751     if (!object_type.HasAttribute("fileno"))
752         return false;
753 
754     return true;
755 #endif
756 }
757 
758 void
759 PythonFile::Reset(PyRefType type, PyObject *py_obj)
760 {
761     // Grab the desired reference type so that if we end up rejecting
762     // `py_obj` it still gets decremented if necessary.
763     PythonObject result(type, py_obj);
764 
765     if (!PythonFile::Check(py_obj))
766     {
767         PythonObject::Reset();
768         return;
769     }
770 
771     // Calling PythonObject::Reset(const PythonObject&) will lead to stack
772     // overflow since it calls back into the virtual implementation.
773     PythonObject::Reset(PyRefType::Borrowed, result.get());
774 }
775 
776 void
777 PythonFile::Reset(File &file, const char *mode)
778 {
779     char *cmode = const_cast<char *>(mode);
780 #if PY_MAJOR_VERSION >= 3
781     Reset(PyRefType::Owned,
782         PyFile_FromFd(file.GetDescriptor(), nullptr, cmode, -1, nullptr, "ignore", nullptr, 0));
783 #else
784     // Read through the Python source, doesn't seem to modify these strings
785     Reset(PyRefType::Owned,
786         PyFile_FromFile(file.GetStream(), const_cast<char *>(""), cmode, nullptr));
787 #endif
788 }
789 
790 bool
791 PythonFile::GetUnderlyingFile(File &file) const
792 {
793     if (!IsValid())
794         return false;
795 
796     file.Close();
797     // We don't own the file descriptor returned by this function, make sure the
798     // File object knows about that.
799     file.SetDescriptor(PyObject_AsFileDescriptor(m_py_obj), false);
800     return file.IsValid();
801 }
802 
803 
804 #endif
805