1 //===-- Perf.cpp ----------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Perf.h"
10 
11 #include "Plugins/Process/POSIX/ProcessPOSIXLog.h"
12 #include "lldb/Host/linux/Support.h"
13 
14 #include "llvm/Support/FormatVariadic.h"
15 #include "llvm/Support/MathExtras.h"
16 #include "llvm/Support/MemoryBuffer.h"
17 
18 #include <sys/ioctl.h>
19 #include <sys/mman.h>
20 #include <sys/syscall.h>
21 #include <unistd.h>
22 
23 using namespace lldb_private;
24 using namespace process_linux;
25 using namespace llvm;
26 
27 Expected<LinuxPerfZeroTscConversion>
28 lldb_private::process_linux::LoadPerfTscConversionParameters() {
29   lldb::pid_t pid = getpid();
30   perf_event_attr attr;
31   memset(&attr, 0, sizeof(attr));
32   attr.size = sizeof(attr);
33   attr.type = PERF_TYPE_SOFTWARE;
34   attr.config = PERF_COUNT_SW_DUMMY;
35 
36   Expected<PerfEvent> perf_event = PerfEvent::Init(attr, pid);
37   if (!perf_event)
38     return perf_event.takeError();
39   if (Error mmap_err =
40           perf_event->MmapMetadataAndBuffers(/*num_data_pages=*/0,
41                                              /*num_aux_pages=*/0,
42                                              /*data_buffer_write=*/false))
43     return std::move(mmap_err);
44 
45   perf_event_mmap_page &mmap_metada = perf_event->GetMetadataPage();
46   if (mmap_metada.cap_user_time && mmap_metada.cap_user_time_zero) {
47     return LinuxPerfZeroTscConversion{
48         mmap_metada.time_mult, mmap_metada.time_shift, mmap_metada.time_zero};
49   } else {
50     auto err_cap =
51         !mmap_metada.cap_user_time ? "cap_user_time" : "cap_user_time_zero";
52     std::string err_msg =
53         llvm::formatv("Can't get TSC to real time conversion values. "
54                       "perf_event capability '{0}' not supported.",
55                       err_cap);
56     return llvm::createStringError(llvm::inconvertibleErrorCode(), err_msg);
57   }
58 }
59 
60 void resource_handle::MmapDeleter::operator()(void *ptr) {
61   if (m_bytes && ptr != nullptr)
62     munmap(ptr, m_bytes);
63 }
64 
65 void resource_handle::FileDescriptorDeleter::operator()(long *ptr) {
66   if (ptr == nullptr)
67     return;
68   if (*ptr == -1)
69     return;
70   close(*ptr);
71   std::default_delete<long>()(ptr);
72 }
73 
74 llvm::Expected<PerfEvent> PerfEvent::Init(perf_event_attr &attr,
75                                           Optional<lldb::pid_t> pid,
76                                           Optional<lldb::core_id_t> cpu,
77                                           Optional<long> group_fd,
78                                           unsigned long flags) {
79   errno = 0;
80   long fd = syscall(SYS_perf_event_open, &attr, pid.getValueOr(-1),
81                     cpu.getValueOr(-1), group_fd.getValueOr(-1), flags);
82   if (fd == -1) {
83     std::string err_msg =
84         llvm::formatv("perf event syscall failed: {0}", std::strerror(errno));
85     return llvm::createStringError(llvm::inconvertibleErrorCode(), err_msg);
86   }
87   return PerfEvent(fd, !attr.disabled);
88 }
89 
90 llvm::Expected<PerfEvent> PerfEvent::Init(perf_event_attr &attr,
91                                           Optional<lldb::pid_t> pid,
92                                           Optional<lldb::core_id_t> cpu) {
93   return Init(attr, pid, cpu, -1, 0);
94 }
95 
96 llvm::Expected<resource_handle::MmapUP>
97 PerfEvent::DoMmap(void *addr, size_t length, int prot, int flags,
98                   long int offset, llvm::StringRef buffer_name) {
99   errno = 0;
100   auto mmap_result = ::mmap(addr, length, prot, flags, GetFd(), offset);
101 
102   if (mmap_result == MAP_FAILED) {
103     std::string err_msg =
104         llvm::formatv("perf event mmap allocation failed for {0}: {1}",
105                       buffer_name, std::strerror(errno));
106     return createStringError(inconvertibleErrorCode(), err_msg);
107   }
108   return resource_handle::MmapUP(mmap_result, length);
109 }
110 
111 llvm::Error PerfEvent::MmapMetadataAndDataBuffer(size_t num_data_pages,
112                                                  bool data_buffer_write) {
113   size_t mmap_size = (num_data_pages + 1) * getpagesize();
114   if (Expected<resource_handle::MmapUP> mmap_metadata_data = DoMmap(
115           nullptr, mmap_size, PROT_READ | (data_buffer_write ? PROT_WRITE : 0),
116           MAP_SHARED, 0, "metadata and data buffer")) {
117     m_metadata_data_base = std::move(mmap_metadata_data.get());
118     return Error::success();
119   } else
120     return mmap_metadata_data.takeError();
121 }
122 
123 llvm::Error PerfEvent::MmapAuxBuffer(size_t num_aux_pages) {
124   if (num_aux_pages == 0)
125     return Error::success();
126 
127   perf_event_mmap_page &metadata_page = GetMetadataPage();
128 
129   metadata_page.aux_offset =
130       metadata_page.data_offset + metadata_page.data_size;
131   metadata_page.aux_size = num_aux_pages * getpagesize();
132 
133   if (Expected<resource_handle::MmapUP> mmap_aux =
134           DoMmap(nullptr, metadata_page.aux_size, PROT_READ, MAP_SHARED,
135                  metadata_page.aux_offset, "aux buffer")) {
136     m_aux_base = std::move(mmap_aux.get());
137     return Error::success();
138   } else
139     return mmap_aux.takeError();
140 }
141 
142 llvm::Error PerfEvent::MmapMetadataAndBuffers(size_t num_data_pages,
143                                               size_t num_aux_pages,
144                                               bool data_buffer_write) {
145   if (num_data_pages != 0 && !isPowerOf2_64(num_data_pages))
146     return llvm::createStringError(
147         llvm::inconvertibleErrorCode(),
148         llvm::formatv("Number of data pages must be a power of 2, got: {0}",
149                       num_data_pages));
150   if (num_aux_pages != 0 && !isPowerOf2_64(num_aux_pages))
151     return llvm::createStringError(
152         llvm::inconvertibleErrorCode(),
153         llvm::formatv("Number of aux pages must be a power of 2, got: {0}",
154                       num_aux_pages));
155   if (Error err = MmapMetadataAndDataBuffer(num_data_pages, data_buffer_write))
156     return err;
157   if (Error err = MmapAuxBuffer(num_aux_pages))
158     return err;
159   return Error::success();
160 }
161 
162 long PerfEvent::GetFd() const { return *(m_fd.get()); }
163 
164 perf_event_mmap_page &PerfEvent::GetMetadataPage() const {
165   return *reinterpret_cast<perf_event_mmap_page *>(m_metadata_data_base.get());
166 }
167 
168 ArrayRef<uint8_t> PerfEvent::GetDataBuffer() const {
169   perf_event_mmap_page &mmap_metadata = GetMetadataPage();
170   return {reinterpret_cast<uint8_t *>(m_metadata_data_base.get()) +
171               mmap_metadata.data_offset,
172            static_cast<size_t>(mmap_metadata.data_size)};
173 }
174 
175 ArrayRef<uint8_t> PerfEvent::GetAuxBuffer() const {
176   perf_event_mmap_page &mmap_metadata = GetMetadataPage();
177   return {reinterpret_cast<uint8_t *>(m_aux_base.get()),
178            static_cast<size_t>(mmap_metadata.aux_size)};
179 }
180 
181 Expected<std::vector<uint8_t>>
182 PerfEvent::ReadFlushedOutDataCyclicBuffer(size_t offset, size_t size) {
183   // The following code assumes that the protection level of the DATA page
184   // is PROT_READ. If PROT_WRITE is used, then reading would require that
185   // this piece of code updates some pointers. See more about data_tail
186   // in https://man7.org/linux/man-pages/man2/perf_event_open.2.html.
187 
188   bool was_enabled = m_enabled;
189   if (Error err = DisableWithIoctl())
190     return std::move(err);
191 
192   /**
193    * The data buffer and aux buffer have different implementations
194    * with respect to their definition of head pointer. In the case
195    * of Aux data buffer the head always wraps around the aux buffer
196    * and we don't need to care about it, whereas the data_head keeps
197    * increasing and needs to be wrapped by modulus operator
198    */
199   perf_event_mmap_page &mmap_metadata = GetMetadataPage();
200 
201   ArrayRef<uint8_t> data = GetDataBuffer();
202   uint64_t data_head = mmap_metadata.data_head;
203   uint64_t data_size = mmap_metadata.data_size;
204   std::vector<uint8_t> output;
205   output.reserve(size);
206 
207   if (data_head > data_size) {
208     uint64_t actual_data_head = data_head % data_size;
209     // The buffer has wrapped
210     for (uint64_t i = actual_data_head + offset;
211          i < data_size && output.size() < size; i++)
212       output.push_back(data[i]);
213 
214     // We need to find the starting position for the left part if the offset was
215     // too big
216     uint64_t left_part_start = actual_data_head + offset >= data_size
217                                    ? actual_data_head + offset - data_size
218                                    : 0;
219     for (uint64_t i = left_part_start;
220          i < actual_data_head && output.size() < size; i++)
221       output.push_back(data[i]);
222   } else {
223     for (uint64_t i = offset; i < data_head && output.size() < size; i++)
224       output.push_back(data[i]);
225   }
226 
227   if (was_enabled) {
228     if (Error err = EnableWithIoctl())
229       return std::move(err);
230   }
231 
232   if (output.size() != size)
233     return createStringError(inconvertibleErrorCode(),
234                              formatv("Requested {0} bytes of perf_event data "
235                                      "buffer but only {1} are available",
236                                      size, output.size()));
237 
238   return output;
239 }
240 
241 Expected<std::vector<uint8_t>>
242 PerfEvent::ReadFlushedOutAuxCyclicBuffer(size_t offset, size_t size) {
243   // The following code assumes that the protection level of the AUX page
244   // is PROT_READ. If PROT_WRITE is used, then reading would require that
245   // this piece of code updates some pointers. See more about aux_tail
246   // in https://man7.org/linux/man-pages/man2/perf_event_open.2.html.
247 
248   bool was_enabled = m_enabled;
249   if (Error err = DisableWithIoctl())
250     return std::move(err);
251 
252   perf_event_mmap_page &mmap_metadata = GetMetadataPage();
253 
254   ArrayRef<uint8_t> data = GetAuxBuffer();
255   uint64_t aux_head = mmap_metadata.aux_head;
256   uint64_t aux_size = mmap_metadata.aux_size;
257   std::vector<uint8_t> output;
258   output.reserve(size);
259 
260   /**
261    * When configured as ring buffer, the aux buffer keeps wrapping around
262    * the buffer and its not possible to detect how many times the buffer
263    * wrapped. Initially the buffer is filled with zeros,as shown below
264    * so in order to get complete buffer we first copy firstpartsize, followed
265    * by any left over part from beginning to aux_head
266    *
267    * aux_offset [d,d,d,d,d,d,d,d,0,0,0,0,0,0,0,0,0,0,0] aux_size
268    *                 aux_head->||<- firstpartsize  ->|
269    *
270    * */
271 
272   for (uint64_t i = aux_head + offset; i < aux_size && output.size() < size;
273        i++)
274     output.push_back(data[i]);
275 
276   // We need to find the starting position for the left part if the offset was
277   // too big
278   uint64_t left_part_start =
279       aux_head + offset >= aux_size ? aux_head + offset - aux_size : 0;
280   for (uint64_t i = left_part_start; i < aux_head && output.size() < size; i++)
281     output.push_back(data[i]);
282 
283   if (was_enabled) {
284     if (Error err = EnableWithIoctl())
285       return std::move(err);
286   }
287 
288   if (output.size() != size)
289     return createStringError(inconvertibleErrorCode(),
290                              formatv("Requested {0} bytes of perf_event aux "
291                                      "buffer but only {1} are available",
292                                      size, output.size()));
293 
294   return output;
295 }
296 
297 Error PerfEvent::DisableWithIoctl() {
298   if (!m_enabled)
299     return Error::success();
300 
301   if (ioctl(*m_fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) < 0)
302     return createStringError(inconvertibleErrorCode(),
303                              "Can't disable perf event. %s",
304                              std::strerror(errno));
305 
306   m_enabled = false;
307   return Error::success();
308 }
309 
310 bool PerfEvent::IsEnabled() const { return m_enabled; }
311 
312 Error PerfEvent::EnableWithIoctl() {
313   if (m_enabled)
314     return Error::success();
315 
316   if (ioctl(*m_fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) < 0)
317     return createStringError(inconvertibleErrorCode(),
318                              "Can't enable perf event. %s",
319                              std::strerror(errno));
320 
321   m_enabled = true;
322   return Error::success();
323 }
324 
325 size_t PerfEvent::GetEffectiveDataBufferSize() const {
326   perf_event_mmap_page &mmap_metadata = GetMetadataPage();
327   if (mmap_metadata.data_head < mmap_metadata.data_size)
328     return mmap_metadata.data_head;
329   else
330     return mmap_metadata.data_size; // The buffer has wrapped.
331 }
332 
333 Expected<PerfEvent>
334 lldb_private::process_linux::CreateContextSwitchTracePerfEvent(
335     lldb::core_id_t core_id, const PerfEvent *parent_perf_event) {
336   Log *log = GetLog(POSIXLog::Trace);
337 #ifndef PERF_ATTR_SIZE_VER5
338   return createStringError(inconvertibleErrorCode(),
339                            "Intel PT Linux perf event not supported");
340 #else
341   perf_event_attr attr;
342   memset(&attr, 0, sizeof(attr));
343   attr.size = sizeof(attr);
344   attr.sample_type = PERF_SAMPLE_TID | PERF_SAMPLE_TIME;
345   attr.type = PERF_TYPE_SOFTWARE;
346   attr.context_switch = 1;
347   attr.exclude_kernel = 1;
348   attr.sample_id_all = 1;
349   attr.exclude_hv = 1;
350   attr.disabled = parent_perf_event ? !parent_perf_event->IsEnabled() : false;
351 
352   // The given perf configuration will produce context switch records of 32
353   // bytes each. Assuming that every context switch will be emitted twice (one
354   // for context switch ins and another one for context switch outs), and that a
355   // context switch will happen at least every half a millisecond per core, we
356   // need 500 * 32 bytes (~16 KB) for a trace of one second, which is much more
357   // than what a regular intel pt trace can get. Pessimistically we pick as
358   // 32KiB for the size of our context switch trace.
359 
360   uint64_t data_buffer_size = 32768;
361   uint64_t data_buffer_numpages = data_buffer_size / getpagesize();
362 
363   LLDB_LOG(log, "Will create context switch trace buffer of size {0}",
364            data_buffer_size);
365 
366   Optional<long> group_fd;
367   if (parent_perf_event)
368     group_fd = parent_perf_event->GetFd();
369 
370   if (Expected<PerfEvent> perf_event =
371           PerfEvent::Init(attr, /*pid=*/None, core_id, group_fd, /*flags=*/0)) {
372     if (Error mmap_err = perf_event->MmapMetadataAndBuffers(
373             data_buffer_numpages, 0, /*data_buffer_write=*/false)) {
374       return std::move(mmap_err);
375     }
376     return perf_event;
377   } else {
378     return perf_event.takeError();
379   }
380 #endif
381 }
382