1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2005-2006 Robert N. M. Watson 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 #ifdef FSTACK 32 #include <stdint.h> 33 #endif 34 35 #include <sys/param.h> 36 #include <sys/counter.h> 37 #include <sys/cpuset.h> 38 #include <sys/sysctl.h> 39 40 #include <vm/uma.h> 41 #include <vm/uma_int.h> 42 43 #include <err.h> 44 #include <errno.h> 45 #ifndef FSTACK 46 #include <kvm.h> 47 #endif 48 #include <nlist.h> 49 #include <stddef.h> 50 #include <stdio.h> 51 #include <stdlib.h> 52 #include <string.h> 53 #include <unistd.h> 54 55 #include "memstat.h" 56 #include "memstat_internal.h" 57 58 #ifndef FSTACK 59 static struct nlist namelist[] = { 60 #define X_UMA_KEGS 0 61 { .n_name = "_uma_kegs" }, 62 #define X_MP_MAXID 1 63 { .n_name = "_mp_maxid" }, 64 #define X_ALL_CPUS 2 65 { .n_name = "_all_cpus" }, 66 #define X_VM_NDOMAINS 3 67 { .n_name = "_vm_ndomains" }, 68 { .n_name = "" }, 69 }; 70 #endif 71 72 /* 73 * Extract uma(9) statistics from the running kernel, and store all memory 74 * type information in the passed list. For each type, check the list for an 75 * existing entry with the right name/allocator -- if present, update that 76 * entry. Otherwise, add a new entry. On error, the entire list will be 77 * cleared, as entries will be in an inconsistent state. 78 * 79 * To reduce the level of work for a list that starts empty, we keep around a 80 * hint as to whether it was empty when we began, so we can avoid searching 81 * the list for entries to update. Updates are O(n^2) due to searching for 82 * each entry before adding it. 83 */ 84 int 85 memstat_sysctl_uma(struct memory_type_list *list, int flags) 86 { 87 struct uma_stream_header *ushp; 88 struct uma_type_header *uthp; 89 struct uma_percpu_stat *upsp; 90 struct memory_type *mtp; 91 int count, hint_dontsearch, i, j, maxcpus, maxid; 92 char *buffer, *p; 93 size_t size; 94 95 hint_dontsearch = LIST_EMPTY(&list->mtl_list); 96 97 /* 98 * Query the number of CPUs, number of malloc types so that we can 99 * guess an initial buffer size. We loop until we succeed or really 100 * fail. Note that the value of maxcpus we query using sysctl is not 101 * the version we use when processing the real data -- that is read 102 * from the header. 103 */ 104 retry: 105 size = sizeof(maxid); 106 if (sysctlbyname("kern.smp.maxid", &maxid, &size, NULL, 0) < 0) { 107 if (errno == EACCES || errno == EPERM) 108 list->mtl_error = MEMSTAT_ERROR_PERMISSION; 109 else 110 list->mtl_error = MEMSTAT_ERROR_DATAERROR; 111 return (-1); 112 } 113 if (size != sizeof(maxid)) { 114 list->mtl_error = MEMSTAT_ERROR_DATAERROR; 115 return (-1); 116 } 117 118 size = sizeof(count); 119 if (sysctlbyname("vm.zone_count", &count, &size, NULL, 0) < 0) { 120 if (errno == EACCES || errno == EPERM) 121 list->mtl_error = MEMSTAT_ERROR_PERMISSION; 122 else 123 list->mtl_error = MEMSTAT_ERROR_VERSION; 124 return (-1); 125 } 126 if (size != sizeof(count)) { 127 list->mtl_error = MEMSTAT_ERROR_DATAERROR; 128 return (-1); 129 } 130 131 size = sizeof(*uthp) + count * (sizeof(*uthp) + sizeof(*upsp) * 132 (maxid + 1)); 133 134 buffer = malloc(size); 135 if (buffer == NULL) { 136 list->mtl_error = MEMSTAT_ERROR_NOMEMORY; 137 return (-1); 138 } 139 140 if (sysctlbyname("vm.zone_stats", buffer, &size, NULL, 0) < 0) { 141 /* 142 * XXXRW: ENOMEM is an ambiguous return, we should bound the 143 * number of loops, perhaps. 144 */ 145 if (errno == ENOMEM) { 146 free(buffer); 147 goto retry; 148 } 149 if (errno == EACCES || errno == EPERM) 150 list->mtl_error = MEMSTAT_ERROR_PERMISSION; 151 else 152 list->mtl_error = MEMSTAT_ERROR_VERSION; 153 free(buffer); 154 return (-1); 155 } 156 157 if (size == 0) { 158 free(buffer); 159 return (0); 160 } 161 162 if (size < sizeof(*ushp)) { 163 list->mtl_error = MEMSTAT_ERROR_VERSION; 164 free(buffer); 165 return (-1); 166 } 167 p = buffer; 168 ushp = (struct uma_stream_header *)p; 169 p += sizeof(*ushp); 170 171 if (ushp->ush_version != UMA_STREAM_VERSION) { 172 list->mtl_error = MEMSTAT_ERROR_VERSION; 173 free(buffer); 174 return (-1); 175 } 176 177 /* 178 * For the remainder of this function, we are quite trusting about 179 * the layout of structures and sizes, since we've determined we have 180 * a matching version and acceptable CPU count. 181 */ 182 maxcpus = ushp->ush_maxcpus; 183 count = ushp->ush_count; 184 for (i = 0; i < count; i++) { 185 uthp = (struct uma_type_header *)p; 186 p += sizeof(*uthp); 187 188 if (hint_dontsearch == 0) { 189 mtp = memstat_mtl_find(list, ALLOCATOR_UMA, 190 uthp->uth_name); 191 } else 192 mtp = NULL; 193 if (mtp == NULL) 194 mtp = _memstat_mt_allocate(list, ALLOCATOR_UMA, 195 uthp->uth_name, maxid + 1); 196 if (mtp == NULL) { 197 _memstat_mtl_empty(list); 198 free(buffer); 199 list->mtl_error = MEMSTAT_ERROR_NOMEMORY; 200 return (-1); 201 } 202 203 /* 204 * Reset the statistics on a current node. 205 */ 206 _memstat_mt_reset_stats(mtp, maxid + 1); 207 208 mtp->mt_numallocs = uthp->uth_allocs; 209 mtp->mt_numfrees = uthp->uth_frees; 210 mtp->mt_failures = uthp->uth_fails; 211 mtp->mt_sleeps = uthp->uth_sleeps; 212 mtp->mt_xdomain = uthp->uth_xdomain; 213 214 for (j = 0; j < maxcpus; j++) { 215 upsp = (struct uma_percpu_stat *)p; 216 p += sizeof(*upsp); 217 218 mtp->mt_percpu_cache[j].mtp_free = 219 upsp->ups_cache_free; 220 mtp->mt_free += upsp->ups_cache_free; 221 mtp->mt_numallocs += upsp->ups_allocs; 222 mtp->mt_numfrees += upsp->ups_frees; 223 } 224 225 /* 226 * Values for uth_allocs and uth_frees frees are snap. 227 * It may happen that kernel reports that number of frees 228 * is greater than number of allocs. See counter(9) for 229 * details. 230 */ 231 if (mtp->mt_numallocs < mtp->mt_numfrees) 232 mtp->mt_numallocs = mtp->mt_numfrees; 233 234 mtp->mt_size = uthp->uth_size; 235 mtp->mt_rsize = uthp->uth_rsize; 236 mtp->mt_memalloced = mtp->mt_numallocs * uthp->uth_size; 237 mtp->mt_memfreed = mtp->mt_numfrees * uthp->uth_size; 238 mtp->mt_bytes = mtp->mt_memalloced - mtp->mt_memfreed; 239 mtp->mt_countlimit = uthp->uth_limit; 240 mtp->mt_byteslimit = uthp->uth_limit * uthp->uth_size; 241 242 mtp->mt_count = mtp->mt_numallocs - mtp->mt_numfrees; 243 mtp->mt_zonefree = uthp->uth_zone_free; 244 245 /* 246 * UMA secondary zones share a keg with the primary zone. To 247 * avoid double-reporting of free items, report keg free 248 * items only in the primary zone. 249 */ 250 if (!(uthp->uth_zone_flags & UTH_ZONE_SECONDARY)) { 251 mtp->mt_kegfree = uthp->uth_keg_free; 252 mtp->mt_free += mtp->mt_kegfree; 253 } 254 mtp->mt_free += mtp->mt_zonefree; 255 } 256 257 free(buffer); 258 259 return (0); 260 } 261 262 #ifndef FSTACK 263 static int 264 kread(kvm_t *kvm, void *kvm_pointer, void *address, size_t size, 265 size_t offset) 266 { 267 ssize_t ret; 268 269 ret = kvm_read(kvm, (unsigned long)kvm_pointer + offset, address, 270 size); 271 if (ret < 0) 272 return (MEMSTAT_ERROR_KVM); 273 if ((size_t)ret != size) 274 return (MEMSTAT_ERROR_KVM_SHORTREAD); 275 return (0); 276 } 277 278 static int 279 kread_string(kvm_t *kvm, const void *kvm_pointer, char *buffer, int buflen) 280 { 281 ssize_t ret; 282 int i; 283 284 for (i = 0; i < buflen; i++) { 285 ret = kvm_read(kvm, (unsigned long)kvm_pointer + i, 286 &(buffer[i]), sizeof(char)); 287 if (ret < 0) 288 return (MEMSTAT_ERROR_KVM); 289 if ((size_t)ret != sizeof(char)) 290 return (MEMSTAT_ERROR_KVM_SHORTREAD); 291 if (buffer[i] == '\0') 292 return (0); 293 } 294 /* Truncate. */ 295 buffer[i-1] = '\0'; 296 return (0); 297 } 298 299 static int 300 kread_symbol(kvm_t *kvm, int index, void *address, size_t size, 301 size_t offset) 302 { 303 ssize_t ret; 304 305 ret = kvm_read(kvm, namelist[index].n_value + offset, address, size); 306 if (ret < 0) 307 return (MEMSTAT_ERROR_KVM); 308 if ((size_t)ret != size) 309 return (MEMSTAT_ERROR_KVM_SHORTREAD); 310 return (0); 311 } 312 313 /* 314 * memstat_kvm_uma() is similar to memstat_sysctl_uma(), only it extracts 315 * UMA(9) statistics from a kernel core/memory file. 316 */ 317 int 318 memstat_kvm_uma(struct memory_type_list *list, void *kvm_handle) 319 { 320 LIST_HEAD(, uma_keg) uma_kegs; 321 struct memory_type *mtp; 322 struct uma_zone_domain uzd; 323 struct uma_domain ukd; 324 struct uma_bucket *ubp, ub; 325 struct uma_cache *ucp, *ucp_array; 326 struct uma_zone *uzp, uz; 327 struct uma_keg *kzp, kz; 328 uint64_t kegfree; 329 int hint_dontsearch, i, mp_maxid, ndomains, ret; 330 char name[MEMTYPE_MAXNAME]; 331 cpuset_t all_cpus; 332 long cpusetsize; 333 kvm_t *kvm; 334 335 kvm = (kvm_t *)kvm_handle; 336 hint_dontsearch = LIST_EMPTY(&list->mtl_list); 337 if (kvm_nlist(kvm, namelist) != 0) { 338 list->mtl_error = MEMSTAT_ERROR_KVM; 339 return (-1); 340 } 341 if (namelist[X_UMA_KEGS].n_type == 0 || 342 namelist[X_UMA_KEGS].n_value == 0) { 343 list->mtl_error = MEMSTAT_ERROR_KVM_NOSYMBOL; 344 return (-1); 345 } 346 ret = kread_symbol(kvm, X_MP_MAXID, &mp_maxid, sizeof(mp_maxid), 0); 347 if (ret != 0) { 348 list->mtl_error = ret; 349 return (-1); 350 } 351 ret = kread_symbol(kvm, X_VM_NDOMAINS, &ndomains, 352 sizeof(ndomains), 0); 353 if (ret != 0) { 354 list->mtl_error = ret; 355 return (-1); 356 } 357 ret = kread_symbol(kvm, X_UMA_KEGS, &uma_kegs, sizeof(uma_kegs), 0); 358 if (ret != 0) { 359 list->mtl_error = ret; 360 return (-1); 361 } 362 cpusetsize = sysconf(_SC_CPUSET_SIZE); 363 if (cpusetsize == -1 || (u_long)cpusetsize > sizeof(cpuset_t)) { 364 list->mtl_error = MEMSTAT_ERROR_KVM_NOSYMBOL; 365 return (-1); 366 } 367 CPU_ZERO(&all_cpus); 368 ret = kread_symbol(kvm, X_ALL_CPUS, &all_cpus, cpusetsize, 0); 369 if (ret != 0) { 370 list->mtl_error = ret; 371 return (-1); 372 } 373 ucp_array = malloc(sizeof(struct uma_cache) * (mp_maxid + 1)); 374 if (ucp_array == NULL) { 375 list->mtl_error = MEMSTAT_ERROR_NOMEMORY; 376 return (-1); 377 } 378 for (kzp = LIST_FIRST(&uma_kegs); kzp != NULL; kzp = 379 LIST_NEXT(&kz, uk_link)) { 380 ret = kread(kvm, kzp, &kz, sizeof(kz), 0); 381 if (ret != 0) { 382 free(ucp_array); 383 _memstat_mtl_empty(list); 384 list->mtl_error = ret; 385 return (-1); 386 } 387 for (uzp = LIST_FIRST(&kz.uk_zones); uzp != NULL; uzp = 388 LIST_NEXT(&uz, uz_link)) { 389 ret = kread(kvm, uzp, &uz, sizeof(uz), 0); 390 if (ret != 0) { 391 free(ucp_array); 392 _memstat_mtl_empty(list); 393 list->mtl_error = ret; 394 return (-1); 395 } 396 ret = kread(kvm, uzp, ucp_array, 397 sizeof(struct uma_cache) * (mp_maxid + 1), 398 offsetof(struct uma_zone, uz_cpu[0])); 399 if (ret != 0) { 400 free(ucp_array); 401 _memstat_mtl_empty(list); 402 list->mtl_error = ret; 403 return (-1); 404 } 405 ret = kread_string(kvm, uz.uz_name, name, 406 MEMTYPE_MAXNAME); 407 if (ret != 0) { 408 free(ucp_array); 409 _memstat_mtl_empty(list); 410 list->mtl_error = ret; 411 return (-1); 412 } 413 if (hint_dontsearch == 0) { 414 mtp = memstat_mtl_find(list, ALLOCATOR_UMA, 415 name); 416 } else 417 mtp = NULL; 418 if (mtp == NULL) 419 mtp = _memstat_mt_allocate(list, ALLOCATOR_UMA, 420 name, mp_maxid + 1); 421 if (mtp == NULL) { 422 free(ucp_array); 423 _memstat_mtl_empty(list); 424 list->mtl_error = MEMSTAT_ERROR_NOMEMORY; 425 return (-1); 426 } 427 /* 428 * Reset the statistics on a current node. 429 */ 430 _memstat_mt_reset_stats(mtp, mp_maxid + 1); 431 mtp->mt_numallocs = kvm_counter_u64_fetch(kvm, 432 (unsigned long )uz.uz_allocs); 433 mtp->mt_numfrees = kvm_counter_u64_fetch(kvm, 434 (unsigned long )uz.uz_frees); 435 mtp->mt_failures = kvm_counter_u64_fetch(kvm, 436 (unsigned long )uz.uz_fails); 437 mtp->mt_xdomain = kvm_counter_u64_fetch(kvm, 438 (unsigned long )uz.uz_xdomain); 439 mtp->mt_sleeps = uz.uz_sleeps; 440 /* See comment above in memstat_sysctl_uma(). */ 441 if (mtp->mt_numallocs < mtp->mt_numfrees) 442 mtp->mt_numallocs = mtp->mt_numfrees; 443 444 if (kz.uk_flags & UMA_ZFLAG_INTERNAL) 445 goto skip_percpu; 446 for (i = 0; i < mp_maxid + 1; i++) { 447 if (!CPU_ISSET(i, &all_cpus)) 448 continue; 449 ucp = &ucp_array[i]; 450 mtp->mt_numallocs += ucp->uc_allocs; 451 mtp->mt_numfrees += ucp->uc_frees; 452 453 mtp->mt_free += ucp->uc_allocbucket.ucb_cnt; 454 mtp->mt_free += ucp->uc_freebucket.ucb_cnt; 455 mtp->mt_free += ucp->uc_crossbucket.ucb_cnt; 456 } 457 skip_percpu: 458 mtp->mt_size = kz.uk_size; 459 mtp->mt_rsize = kz.uk_rsize; 460 mtp->mt_memalloced = mtp->mt_numallocs * mtp->mt_size; 461 mtp->mt_memfreed = mtp->mt_numfrees * mtp->mt_size; 462 mtp->mt_bytes = mtp->mt_memalloced - mtp->mt_memfreed; 463 mtp->mt_countlimit = uz.uz_max_items; 464 mtp->mt_byteslimit = mtp->mt_countlimit * mtp->mt_size; 465 mtp->mt_count = mtp->mt_numallocs - mtp->mt_numfrees; 466 for (i = 0; i < ndomains; i++) { 467 ret = kread(kvm, ZDOM_GET(uzp, i), &uzd, 468 sizeof(uzd), 0); 469 if (ret != 0) 470 continue; 471 for (ubp = 472 STAILQ_FIRST(&uzd.uzd_buckets); 473 ubp != NULL; 474 ubp = STAILQ_NEXT(&ub, ub_link)) { 475 ret = kread(kvm, ubp, &ub, 476 sizeof(ub), 0); 477 if (ret != 0) 478 continue; 479 mtp->mt_zonefree += ub.ub_cnt; 480 } 481 } 482 if (!((kz.uk_flags & UMA_ZONE_SECONDARY) && 483 LIST_FIRST(&kz.uk_zones) != uzp)) { 484 kegfree = 0; 485 for (i = 0; i < ndomains; i++) { 486 ret = kread(kvm, &kzp->uk_domain[i], 487 &ukd, sizeof(ukd), 0); 488 if (ret != 0) 489 kegfree += ukd.ud_free_items; 490 } 491 mtp->mt_kegfree = kegfree; 492 mtp->mt_free += mtp->mt_kegfree; 493 } 494 mtp->mt_free += mtp->mt_zonefree; 495 } 496 } 497 free(ucp_array); 498 return (0); 499 } 500 #endif 501 502