1 //===-- Perf.cpp ----------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "Perf.h" 10 11 #include "Plugins/Process/POSIX/ProcessPOSIXLog.h" 12 #include "lldb/Host/linux/Support.h" 13 14 #include "llvm/Support/FormatVariadic.h" 15 #include "llvm/Support/MathExtras.h" 16 #include "llvm/Support/MemoryBuffer.h" 17 18 #include <linux/version.h> 19 #include <sys/ioctl.h> 20 #include <sys/mman.h> 21 #include <sys/syscall.h> 22 #include <unistd.h> 23 24 using namespace lldb_private; 25 using namespace process_linux; 26 using namespace llvm; 27 28 Expected<LinuxPerfZeroTscConversion> 29 lldb_private::process_linux::LoadPerfTscConversionParameters() { 30 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3,12,0) 31 lldb::pid_t pid = getpid(); 32 perf_event_attr attr; 33 memset(&attr, 0, sizeof(attr)); 34 attr.size = sizeof(attr); 35 attr.type = PERF_TYPE_SOFTWARE; 36 attr.config = PERF_COUNT_SW_DUMMY; 37 38 Expected<PerfEvent> perf_event = PerfEvent::Init(attr, pid); 39 if (!perf_event) 40 return perf_event.takeError(); 41 if (Error mmap_err = 42 perf_event->MmapMetadataAndBuffers(/*num_data_pages=*/0, 43 /*num_aux_pages=*/0, 44 /*data_buffer_write=*/false)) 45 return std::move(mmap_err); 46 47 perf_event_mmap_page &mmap_metada = perf_event->GetMetadataPage(); 48 if (mmap_metada.cap_user_time && mmap_metada.cap_user_time_zero) { 49 return LinuxPerfZeroTscConversion{ 50 mmap_metada.time_mult, mmap_metada.time_shift, {mmap_metada.time_zero}}; 51 } else { 52 auto err_cap = 53 !mmap_metada.cap_user_time ? "cap_user_time" : "cap_user_time_zero"; 54 std::string err_msg = 55 llvm::formatv("Can't get TSC to real time conversion values. " 56 "perf_event capability '{0}' not supported.", 57 err_cap); 58 return llvm::createStringError(llvm::inconvertibleErrorCode(), err_msg); 59 } 60 #else 61 std::string err_msg = "PERF_COUNT_SW_DUMMY requires Linux 3.12"; 62 return llvm::createStringError(llvm::inconvertibleErrorCode(), err_msg); 63 #endif 64 } 65 66 void resource_handle::MmapDeleter::operator()(void *ptr) { 67 if (m_bytes && ptr != nullptr) 68 munmap(ptr, m_bytes); 69 } 70 71 void resource_handle::FileDescriptorDeleter::operator()(long *ptr) { 72 if (ptr == nullptr) 73 return; 74 if (*ptr == -1) 75 return; 76 close(*ptr); 77 std::default_delete<long>()(ptr); 78 } 79 80 llvm::Expected<PerfEvent> PerfEvent::Init(perf_event_attr &attr, 81 Optional<lldb::pid_t> pid, 82 Optional<lldb::cpu_id_t> cpu, 83 Optional<long> group_fd, 84 unsigned long flags) { 85 errno = 0; 86 long fd = syscall(SYS_perf_event_open, &attr, pid.value_or(-1), 87 cpu.value_or(-1), group_fd.value_or(-1), flags); 88 if (fd == -1) { 89 std::string err_msg = 90 llvm::formatv("perf event syscall failed: {0}", std::strerror(errno)); 91 return llvm::createStringError(llvm::inconvertibleErrorCode(), err_msg); 92 } 93 return PerfEvent(fd, !attr.disabled); 94 } 95 96 llvm::Expected<PerfEvent> PerfEvent::Init(perf_event_attr &attr, 97 Optional<lldb::pid_t> pid, 98 Optional<lldb::cpu_id_t> cpu) { 99 return Init(attr, pid, cpu, -1, 0); 100 } 101 102 llvm::Expected<resource_handle::MmapUP> 103 PerfEvent::DoMmap(void *addr, size_t length, int prot, int flags, 104 long int offset, llvm::StringRef buffer_name) { 105 errno = 0; 106 auto mmap_result = ::mmap(addr, length, prot, flags, GetFd(), offset); 107 108 if (mmap_result == MAP_FAILED) { 109 std::string err_msg = 110 llvm::formatv("perf event mmap allocation failed for {0}: {1}", 111 buffer_name, std::strerror(errno)); 112 return createStringError(inconvertibleErrorCode(), err_msg); 113 } 114 return resource_handle::MmapUP(mmap_result, length); 115 } 116 117 llvm::Error PerfEvent::MmapMetadataAndDataBuffer(size_t num_data_pages, 118 bool data_buffer_write) { 119 size_t mmap_size = (num_data_pages + 1) * getpagesize(); 120 if (Expected<resource_handle::MmapUP> mmap_metadata_data = DoMmap( 121 nullptr, mmap_size, PROT_READ | (data_buffer_write ? PROT_WRITE : 0), 122 MAP_SHARED, 0, "metadata and data buffer")) { 123 m_metadata_data_base = std::move(mmap_metadata_data.get()); 124 return Error::success(); 125 } else 126 return mmap_metadata_data.takeError(); 127 } 128 129 llvm::Error PerfEvent::MmapAuxBuffer(size_t num_aux_pages) { 130 #ifndef PERF_ATTR_SIZE_VER5 131 return createStringError(inconvertibleErrorCode(), 132 "Intel PT Linux perf event not supported"); 133 #else 134 if (num_aux_pages == 0) 135 return Error::success(); 136 137 perf_event_mmap_page &metadata_page = GetMetadataPage(); 138 139 metadata_page.aux_offset = 140 metadata_page.data_offset + metadata_page.data_size; 141 metadata_page.aux_size = num_aux_pages * getpagesize(); 142 143 if (Expected<resource_handle::MmapUP> mmap_aux = 144 DoMmap(nullptr, metadata_page.aux_size, PROT_READ, MAP_SHARED, 145 metadata_page.aux_offset, "aux buffer")) { 146 m_aux_base = std::move(mmap_aux.get()); 147 return Error::success(); 148 } else 149 return mmap_aux.takeError(); 150 #endif 151 } 152 153 llvm::Error PerfEvent::MmapMetadataAndBuffers(size_t num_data_pages, 154 size_t num_aux_pages, 155 bool data_buffer_write) { 156 if (num_data_pages != 0 && !isPowerOf2_64(num_data_pages)) 157 return llvm::createStringError( 158 llvm::inconvertibleErrorCode(), 159 llvm::formatv("Number of data pages must be a power of 2, got: {0}", 160 num_data_pages)); 161 if (num_aux_pages != 0 && !isPowerOf2_64(num_aux_pages)) 162 return llvm::createStringError( 163 llvm::inconvertibleErrorCode(), 164 llvm::formatv("Number of aux pages must be a power of 2, got: {0}", 165 num_aux_pages)); 166 if (Error err = MmapMetadataAndDataBuffer(num_data_pages, data_buffer_write)) 167 return err; 168 if (Error err = MmapAuxBuffer(num_aux_pages)) 169 return err; 170 return Error::success(); 171 } 172 173 long PerfEvent::GetFd() const { return *(m_fd.get()); } 174 175 perf_event_mmap_page &PerfEvent::GetMetadataPage() const { 176 return *reinterpret_cast<perf_event_mmap_page *>(m_metadata_data_base.get()); 177 } 178 179 ArrayRef<uint8_t> PerfEvent::GetDataBuffer() const { 180 #ifndef PERF_ATTR_SIZE_VER5 181 llvm_unreachable("Intel PT Linux perf event not supported"); 182 #else 183 perf_event_mmap_page &mmap_metadata = GetMetadataPage(); 184 return {reinterpret_cast<uint8_t *>(m_metadata_data_base.get()) + 185 mmap_metadata.data_offset, 186 static_cast<size_t>(mmap_metadata.data_size)}; 187 #endif 188 } 189 190 ArrayRef<uint8_t> PerfEvent::GetAuxBuffer() const { 191 #ifndef PERF_ATTR_SIZE_VER5 192 llvm_unreachable("Intel PT Linux perf event not supported"); 193 #else 194 perf_event_mmap_page &mmap_metadata = GetMetadataPage(); 195 return {reinterpret_cast<uint8_t *>(m_aux_base.get()), 196 static_cast<size_t>(mmap_metadata.aux_size)}; 197 #endif 198 } 199 200 Expected<std::vector<uint8_t>> PerfEvent::GetReadOnlyDataBuffer() { 201 // The following code assumes that the protection level of the DATA page 202 // is PROT_READ. If PROT_WRITE is used, then reading would require that 203 // this piece of code updates some pointers. See more about data_tail 204 // in https://man7.org/linux/man-pages/man2/perf_event_open.2.html. 205 206 #ifndef PERF_ATTR_SIZE_VER5 207 return createStringError(inconvertibleErrorCode(), 208 "Intel PT Linux perf event not supported"); 209 #else 210 bool was_enabled = m_enabled; 211 if (Error err = DisableWithIoctl()) 212 return std::move(err); 213 214 /** 215 * The data buffer and aux buffer have different implementations 216 * with respect to their definition of head pointer when using PROD_READ only. 217 * In the case of Aux data buffer the head always wraps around the aux buffer 218 * and we don't need to care about it, whereas the data_head keeps 219 * increasing and needs to be wrapped by modulus operator 220 */ 221 perf_event_mmap_page &mmap_metadata = GetMetadataPage(); 222 223 ArrayRef<uint8_t> data = GetDataBuffer(); 224 uint64_t data_head = mmap_metadata.data_head; 225 uint64_t data_size = mmap_metadata.data_size; 226 std::vector<uint8_t> output; 227 output.reserve(data.size()); 228 229 if (data_head > data_size) { 230 uint64_t actual_data_head = data_head % data_size; 231 // The buffer has wrapped, so we first the oldest chunk of data 232 output.insert(output.end(), data.begin() + actual_data_head, data.end()); 233 // And we we read the most recent chunk of data 234 output.insert(output.end(), data.begin(), data.begin() + actual_data_head); 235 } else { 236 // There's been no wrapping, so we just read linearly 237 output.insert(output.end(), data.begin(), data.begin() + data_head); 238 } 239 240 if (was_enabled) { 241 if (Error err = EnableWithIoctl()) 242 return std::move(err); 243 } 244 245 return output; 246 #endif 247 } 248 249 Expected<std::vector<uint8_t>> PerfEvent::GetReadOnlyAuxBuffer() { 250 // The following code assumes that the protection level of the AUX page 251 // is PROT_READ. If PROT_WRITE is used, then reading would require that 252 // this piece of code updates some pointers. See more about aux_tail 253 // in https://man7.org/linux/man-pages/man2/perf_event_open.2.html. 254 255 #ifndef PERF_ATTR_SIZE_VER5 256 return createStringError(inconvertibleErrorCode(), 257 "Intel PT Linux perf event not supported"); 258 #else 259 bool was_enabled = m_enabled; 260 if (Error err = DisableWithIoctl()) 261 return std::move(err); 262 263 perf_event_mmap_page &mmap_metadata = GetMetadataPage(); 264 265 ArrayRef<uint8_t> data = GetAuxBuffer(); 266 uint64_t aux_head = mmap_metadata.aux_head; 267 std::vector<uint8_t> output; 268 output.reserve(data.size()); 269 270 /** 271 * When configured as ring buffer, the aux buffer keeps wrapping around 272 * the buffer and its not possible to detect how many times the buffer 273 * wrapped. Initially the buffer is filled with zeros,as shown below 274 * so in order to get complete buffer we first copy firstpartsize, followed 275 * by any left over part from beginning to aux_head 276 * 277 * aux_offset [d,d,d,d,d,d,d,d,0,0,0,0,0,0,0,0,0,0,0] aux_size 278 * aux_head->||<- firstpartsize ->| 279 * 280 * */ 281 282 output.insert(output.end(), data.begin() + aux_head, data.end()); 283 output.insert(output.end(), data.begin(), data.begin() + aux_head); 284 285 if (was_enabled) { 286 if (Error err = EnableWithIoctl()) 287 return std::move(err); 288 } 289 290 return output; 291 #endif 292 } 293 294 Error PerfEvent::DisableWithIoctl() { 295 if (!m_enabled) 296 return Error::success(); 297 298 if (ioctl(*m_fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) < 0) 299 return createStringError(inconvertibleErrorCode(), 300 "Can't disable perf event. %s", 301 std::strerror(errno)); 302 303 m_enabled = false; 304 return Error::success(); 305 } 306 307 bool PerfEvent::IsEnabled() const { return m_enabled; } 308 309 Error PerfEvent::EnableWithIoctl() { 310 if (m_enabled) 311 return Error::success(); 312 313 if (ioctl(*m_fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) < 0) 314 return createStringError(inconvertibleErrorCode(), 315 "Can't enable perf event. %s", 316 std::strerror(errno)); 317 318 m_enabled = true; 319 return Error::success(); 320 } 321 322 size_t PerfEvent::GetEffectiveDataBufferSize() const { 323 #ifndef PERF_ATTR_SIZE_VER5 324 llvm_unreachable("Intel PT Linux perf event not supported"); 325 #else 326 perf_event_mmap_page &mmap_metadata = GetMetadataPage(); 327 if (mmap_metadata.data_head < mmap_metadata.data_size) 328 return mmap_metadata.data_head; 329 else 330 return mmap_metadata.data_size; // The buffer has wrapped. 331 #endif 332 } 333 334 Expected<PerfEvent> 335 lldb_private::process_linux::CreateContextSwitchTracePerfEvent( 336 lldb::cpu_id_t cpu_id, const PerfEvent *parent_perf_event) { 337 Log *log = GetLog(POSIXLog::Trace); 338 #ifndef PERF_ATTR_SIZE_VER5 339 return createStringError(inconvertibleErrorCode(), 340 "Intel PT Linux perf event not supported"); 341 #else 342 perf_event_attr attr; 343 memset(&attr, 0, sizeof(attr)); 344 attr.size = sizeof(attr); 345 attr.sample_type = PERF_SAMPLE_TID | PERF_SAMPLE_TIME; 346 attr.type = PERF_TYPE_SOFTWARE; 347 attr.context_switch = 1; 348 attr.exclude_kernel = 1; 349 attr.sample_id_all = 1; 350 attr.exclude_hv = 1; 351 attr.disabled = parent_perf_event ? !parent_perf_event->IsEnabled() : false; 352 353 // The given perf configuration will produce context switch records of 32 354 // bytes each. Assuming that every context switch will be emitted twice (one 355 // for context switch ins and another one for context switch outs), and that a 356 // context switch will happen at least every half a millisecond per core, we 357 // need 500 * 32 bytes (~16 KB) for a trace of one second, which is much more 358 // than what a regular intel pt trace can get. Pessimistically we pick as 359 // 32KiB for the size of our context switch trace. 360 361 uint64_t data_buffer_size = 32768; 362 uint64_t data_buffer_numpages = data_buffer_size / getpagesize(); 363 364 LLDB_LOG(log, "Will create context switch trace buffer of size {0}", 365 data_buffer_size); 366 367 Optional<long> group_fd; 368 if (parent_perf_event) 369 group_fd = parent_perf_event->GetFd(); 370 371 if (Expected<PerfEvent> perf_event = 372 PerfEvent::Init(attr, /*pid=*/None, cpu_id, group_fd, /*flags=*/0)) { 373 if (Error mmap_err = perf_event->MmapMetadataAndBuffers( 374 data_buffer_numpages, 0, /*data_buffer_write=*/false)) { 375 return std::move(mmap_err); 376 } 377 return perf_event; 378 } else { 379 return perf_event.takeError(); 380 } 381 #endif 382 } 383