1 //===-- Perf.cpp ----------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "Perf.h" 10 11 #include "Plugins/Process/POSIX/ProcessPOSIXLog.h" 12 #include "lldb/Host/linux/Support.h" 13 14 #include "llvm/Support/FormatVariadic.h" 15 #include "llvm/Support/MathExtras.h" 16 #include "llvm/Support/MemoryBuffer.h" 17 18 #include <sys/ioctl.h> 19 #include <sys/mman.h> 20 #include <sys/syscall.h> 21 #include <unistd.h> 22 23 using namespace lldb_private; 24 using namespace process_linux; 25 using namespace llvm; 26 27 Expected<LinuxPerfZeroTscConversion> 28 lldb_private::process_linux::LoadPerfTscConversionParameters() { 29 lldb::pid_t pid = getpid(); 30 perf_event_attr attr; 31 memset(&attr, 0, sizeof(attr)); 32 attr.size = sizeof(attr); 33 attr.type = PERF_TYPE_SOFTWARE; 34 attr.config = PERF_COUNT_SW_DUMMY; 35 36 Expected<PerfEvent> perf_event = PerfEvent::Init(attr, pid); 37 if (!perf_event) 38 return perf_event.takeError(); 39 if (Error mmap_err = 40 perf_event->MmapMetadataAndBuffers(/*num_data_pages=*/0, 41 /*num_aux_pages=*/0, 42 /*data_buffer_write=*/false)) 43 return std::move(mmap_err); 44 45 perf_event_mmap_page &mmap_metada = perf_event->GetMetadataPage(); 46 if (mmap_metada.cap_user_time && mmap_metada.cap_user_time_zero) { 47 return LinuxPerfZeroTscConversion{ 48 mmap_metada.time_mult, mmap_metada.time_shift, mmap_metada.time_zero}; 49 } else { 50 auto err_cap = 51 !mmap_metada.cap_user_time ? "cap_user_time" : "cap_user_time_zero"; 52 std::string err_msg = 53 llvm::formatv("Can't get TSC to real time conversion values. " 54 "perf_event capability '{0}' not supported.", 55 err_cap); 56 return llvm::createStringError(llvm::inconvertibleErrorCode(), err_msg); 57 } 58 } 59 60 void resource_handle::MmapDeleter::operator()(void *ptr) { 61 if (m_bytes && ptr != nullptr) 62 munmap(ptr, m_bytes); 63 } 64 65 void resource_handle::FileDescriptorDeleter::operator()(long *ptr) { 66 if (ptr == nullptr) 67 return; 68 if (*ptr == -1) 69 return; 70 close(*ptr); 71 std::default_delete<long>()(ptr); 72 } 73 74 llvm::Expected<PerfEvent> PerfEvent::Init(perf_event_attr &attr, 75 Optional<lldb::pid_t> pid, 76 Optional<lldb::core_id_t> cpu, 77 Optional<long> group_fd, 78 unsigned long flags) { 79 errno = 0; 80 long fd = syscall(SYS_perf_event_open, &attr, pid.getValueOr(-1), 81 cpu.getValueOr(-1), group_fd.getValueOr(-1), flags); 82 if (fd == -1) { 83 std::string err_msg = 84 llvm::formatv("perf event syscall failed: {0}", std::strerror(errno)); 85 return llvm::createStringError(llvm::inconvertibleErrorCode(), err_msg); 86 } 87 return PerfEvent(fd, !attr.disabled); 88 } 89 90 llvm::Expected<PerfEvent> PerfEvent::Init(perf_event_attr &attr, 91 Optional<lldb::pid_t> pid, 92 Optional<lldb::core_id_t> cpu) { 93 return Init(attr, pid, cpu, -1, 0); 94 } 95 96 llvm::Expected<resource_handle::MmapUP> 97 PerfEvent::DoMmap(void *addr, size_t length, int prot, int flags, 98 long int offset, llvm::StringRef buffer_name) { 99 errno = 0; 100 auto mmap_result = ::mmap(addr, length, prot, flags, GetFd(), offset); 101 102 if (mmap_result == MAP_FAILED) { 103 std::string err_msg = 104 llvm::formatv("perf event mmap allocation failed for {0}: {1}", 105 buffer_name, std::strerror(errno)); 106 return createStringError(inconvertibleErrorCode(), err_msg); 107 } 108 return resource_handle::MmapUP(mmap_result, length); 109 } 110 111 llvm::Error PerfEvent::MmapMetadataAndDataBuffer(size_t num_data_pages, 112 bool data_buffer_write) { 113 size_t mmap_size = (num_data_pages + 1) * getpagesize(); 114 if (Expected<resource_handle::MmapUP> mmap_metadata_data = DoMmap( 115 nullptr, mmap_size, PROT_READ | (data_buffer_write ? PROT_WRITE : 0), 116 MAP_SHARED, 0, "metadata and data buffer")) { 117 m_metadata_data_base = std::move(mmap_metadata_data.get()); 118 return Error::success(); 119 } else 120 return mmap_metadata_data.takeError(); 121 } 122 123 llvm::Error PerfEvent::MmapAuxBuffer(size_t num_aux_pages) { 124 if (num_aux_pages == 0) 125 return Error::success(); 126 127 perf_event_mmap_page &metadata_page = GetMetadataPage(); 128 129 metadata_page.aux_offset = 130 metadata_page.data_offset + metadata_page.data_size; 131 metadata_page.aux_size = num_aux_pages * getpagesize(); 132 133 if (Expected<resource_handle::MmapUP> mmap_aux = 134 DoMmap(nullptr, metadata_page.aux_size, PROT_READ, MAP_SHARED, 135 metadata_page.aux_offset, "aux buffer")) { 136 m_aux_base = std::move(mmap_aux.get()); 137 return Error::success(); 138 } else 139 return mmap_aux.takeError(); 140 } 141 142 llvm::Error PerfEvent::MmapMetadataAndBuffers(size_t num_data_pages, 143 size_t num_aux_pages, 144 bool data_buffer_write) { 145 if (num_data_pages != 0 && !isPowerOf2_64(num_data_pages)) 146 return llvm::createStringError( 147 llvm::inconvertibleErrorCode(), 148 llvm::formatv("Number of data pages must be a power of 2, got: {0}", 149 num_data_pages)); 150 if (num_aux_pages != 0 && !isPowerOf2_64(num_aux_pages)) 151 return llvm::createStringError( 152 llvm::inconvertibleErrorCode(), 153 llvm::formatv("Number of aux pages must be a power of 2, got: {0}", 154 num_aux_pages)); 155 if (Error err = MmapMetadataAndDataBuffer(num_data_pages, data_buffer_write)) 156 return err; 157 if (Error err = MmapAuxBuffer(num_aux_pages)) 158 return err; 159 return Error::success(); 160 } 161 162 long PerfEvent::GetFd() const { return *(m_fd.get()); } 163 164 perf_event_mmap_page &PerfEvent::GetMetadataPage() const { 165 return *reinterpret_cast<perf_event_mmap_page *>(m_metadata_data_base.get()); 166 } 167 168 ArrayRef<uint8_t> PerfEvent::GetDataBuffer() const { 169 perf_event_mmap_page &mmap_metadata = GetMetadataPage(); 170 return {reinterpret_cast<uint8_t *>(m_metadata_data_base.get()) + 171 mmap_metadata.data_offset, 172 static_cast<size_t>(mmap_metadata.data_size)}; 173 } 174 175 ArrayRef<uint8_t> PerfEvent::GetAuxBuffer() const { 176 perf_event_mmap_page &mmap_metadata = GetMetadataPage(); 177 return {reinterpret_cast<uint8_t *>(m_aux_base.get()), 178 static_cast<size_t>(mmap_metadata.aux_size)}; 179 } 180 181 Expected<std::vector<uint8_t>> 182 PerfEvent::ReadFlushedOutDataCyclicBuffer(size_t offset, size_t size) { 183 // The following code assumes that the protection level of the DATA page 184 // is PROT_READ. If PROT_WRITE is used, then reading would require that 185 // this piece of code updates some pointers. See more about data_tail 186 // in https://man7.org/linux/man-pages/man2/perf_event_open.2.html. 187 188 bool was_enabled = m_enabled; 189 if (Error err = DisableWithIoctl()) 190 return std::move(err); 191 192 /** 193 * The data buffer and aux buffer have different implementations 194 * with respect to their definition of head pointer. In the case 195 * of Aux data buffer the head always wraps around the aux buffer 196 * and we don't need to care about it, whereas the data_head keeps 197 * increasing and needs to be wrapped by modulus operator 198 */ 199 perf_event_mmap_page &mmap_metadata = GetMetadataPage(); 200 201 ArrayRef<uint8_t> data = GetDataBuffer(); 202 uint64_t data_head = mmap_metadata.data_head; 203 uint64_t data_size = mmap_metadata.data_size; 204 std::vector<uint8_t> output; 205 output.reserve(size); 206 207 if (data_head > data_size) { 208 uint64_t actual_data_head = data_head % data_size; 209 // The buffer has wrapped 210 for (uint64_t i = actual_data_head + offset; 211 i < data_size && output.size() < size; i++) 212 output.push_back(data[i]); 213 214 // We need to find the starting position for the left part if the offset was 215 // too big 216 uint64_t left_part_start = actual_data_head + offset >= data_size 217 ? actual_data_head + offset - data_size 218 : 0; 219 for (uint64_t i = left_part_start; 220 i < actual_data_head && output.size() < size; i++) 221 output.push_back(data[i]); 222 } else { 223 for (uint64_t i = offset; i < data_head && output.size() < size; i++) 224 output.push_back(data[i]); 225 } 226 227 if (was_enabled) { 228 if (Error err = EnableWithIoctl()) 229 return std::move(err); 230 } 231 232 if (output.size() != size) 233 return createStringError(inconvertibleErrorCode(), 234 formatv("Requested {0} bytes of perf_event data " 235 "buffer but only {1} are available", 236 size, output.size())); 237 238 return output; 239 } 240 241 Expected<std::vector<uint8_t>> 242 PerfEvent::ReadFlushedOutAuxCyclicBuffer(size_t offset, size_t size) { 243 // The following code assumes that the protection level of the AUX page 244 // is PROT_READ. If PROT_WRITE is used, then reading would require that 245 // this piece of code updates some pointers. See more about aux_tail 246 // in https://man7.org/linux/man-pages/man2/perf_event_open.2.html. 247 248 bool was_enabled = m_enabled; 249 if (Error err = DisableWithIoctl()) 250 return std::move(err); 251 252 perf_event_mmap_page &mmap_metadata = GetMetadataPage(); 253 254 ArrayRef<uint8_t> data = GetAuxBuffer(); 255 uint64_t aux_head = mmap_metadata.aux_head; 256 uint64_t aux_size = mmap_metadata.aux_size; 257 std::vector<uint8_t> output; 258 output.reserve(size); 259 260 /** 261 * When configured as ring buffer, the aux buffer keeps wrapping around 262 * the buffer and its not possible to detect how many times the buffer 263 * wrapped. Initially the buffer is filled with zeros,as shown below 264 * so in order to get complete buffer we first copy firstpartsize, followed 265 * by any left over part from beginning to aux_head 266 * 267 * aux_offset [d,d,d,d,d,d,d,d,0,0,0,0,0,0,0,0,0,0,0] aux_size 268 * aux_head->||<- firstpartsize ->| 269 * 270 * */ 271 272 for (uint64_t i = aux_head + offset; i < aux_size && output.size() < size; 273 i++) 274 output.push_back(data[i]); 275 276 // We need to find the starting position for the left part if the offset was 277 // too big 278 uint64_t left_part_start = 279 aux_head + offset >= aux_size ? aux_head + offset - aux_size : 0; 280 for (uint64_t i = left_part_start; i < aux_head && output.size() < size; i++) 281 output.push_back(data[i]); 282 283 if (was_enabled) { 284 if (Error err = EnableWithIoctl()) 285 return std::move(err); 286 } 287 288 if (output.size() != size) 289 return createStringError(inconvertibleErrorCode(), 290 formatv("Requested {0} bytes of perf_event aux " 291 "buffer but only {1} are available", 292 size, output.size())); 293 294 return output; 295 } 296 297 Error PerfEvent::DisableWithIoctl() { 298 if (!m_enabled) 299 return Error::success(); 300 301 if (ioctl(*m_fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) < 0) 302 return createStringError(inconvertibleErrorCode(), 303 "Can't disable perf event. %s", 304 std::strerror(errno)); 305 306 m_enabled = false; 307 return Error::success(); 308 } 309 310 bool PerfEvent::IsEnabled() const { return m_enabled; } 311 312 Error PerfEvent::EnableWithIoctl() { 313 if (m_enabled) 314 return Error::success(); 315 316 if (ioctl(*m_fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) < 0) 317 return createStringError(inconvertibleErrorCode(), 318 "Can't enable perf event. %s", 319 std::strerror(errno)); 320 321 m_enabled = true; 322 return Error::success(); 323 } 324 325 size_t PerfEvent::GetEffectiveDataBufferSize() const { 326 perf_event_mmap_page &mmap_metadata = GetMetadataPage(); 327 if (mmap_metadata.data_head < mmap_metadata.data_size) 328 return mmap_metadata.data_head; 329 else 330 return mmap_metadata.data_size; // The buffer has wrapped. 331 } 332 333 Expected<PerfEvent> 334 lldb_private::process_linux::CreateContextSwitchTracePerfEvent( 335 lldb::core_id_t core_id, const PerfEvent *parent_perf_event) { 336 Log *log = GetLog(POSIXLog::Trace); 337 #ifndef PERF_ATTR_SIZE_VER5 338 return createStringError(inconvertibleErrorCode(), 339 "Intel PT Linux perf event not supported"); 340 #else 341 perf_event_attr attr; 342 memset(&attr, 0, sizeof(attr)); 343 attr.size = sizeof(attr); 344 attr.sample_type = PERF_SAMPLE_TID | PERF_SAMPLE_TIME; 345 attr.type = PERF_TYPE_SOFTWARE; 346 attr.context_switch = 1; 347 attr.exclude_kernel = 1; 348 attr.sample_id_all = 1; 349 attr.exclude_hv = 1; 350 attr.disabled = parent_perf_event ? !parent_perf_event->IsEnabled() : false; 351 352 // The given perf configuration will produce context switch records of 32 353 // bytes each. Assuming that every context switch will be emitted twice (one 354 // for context switch ins and another one for context switch outs), and that a 355 // context switch will happen at least every half a millisecond per core, we 356 // need 500 * 32 bytes (~16 KB) for a trace of one second, which is much more 357 // than what a regular intel pt trace can get. Pessimistically we pick as 358 // 32KiB for the size of our context switch trace. 359 360 uint64_t data_buffer_size = 32768; 361 uint64_t data_buffer_numpages = data_buffer_size / getpagesize(); 362 363 LLDB_LOG(log, "Will create context switch trace buffer of size {0}", 364 data_buffer_size); 365 366 Optional<long> group_fd; 367 if (parent_perf_event) 368 group_fd = parent_perf_event->GetFd(); 369 370 if (Expected<PerfEvent> perf_event = 371 PerfEvent::Init(attr, /*pid=*/None, core_id, group_fd, /*flags=*/0)) { 372 if (Error mmap_err = perf_event->MmapMetadataAndBuffers( 373 data_buffer_numpages, 0, /*data_buffer_write=*/false)) { 374 return std::move(mmap_err); 375 } 376 return perf_event; 377 } else { 378 return perf_event.takeError(); 379 } 380 #endif 381 } 382