1 /*- 2 * Copyright (c) 2005-2006 Robert N. M. Watson 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include <sys/param.h> 30 #include <sys/cpuset.h> 31 #include <sys/sysctl.h> 32 33 #ifndef FSTACK 34 #include <vm/vm.h> 35 #include <vm/vm_page.h> 36 #endif 37 38 #include <vm/uma.h> 39 #include <vm/uma_int.h> 40 41 #include <err.h> 42 #include <errno.h> 43 #ifndef FSTACK 44 #include <kvm.h> 45 #endif 46 #include <nlist.h> 47 #include <stddef.h> 48 #include <stdio.h> 49 #include <stdlib.h> 50 #include <string.h> 51 #include <unistd.h> 52 53 #include "memstat.h" 54 #include "memstat_internal.h" 55 56 #ifndef FSTACK 57 static struct nlist namelist[] = { 58 #define X_UMA_KEGS 0 59 { .n_name = "_uma_kegs" }, 60 #define X_MP_MAXID 1 61 { .n_name = "_mp_maxid" }, 62 #define X_ALL_CPUS 2 63 { .n_name = "_all_cpus" }, 64 { .n_name = "" }, 65 }; 66 #endif 67 68 /* 69 * Extract uma(9) statistics from the running kernel, and store all memory 70 * type information in the passed list. For each type, check the list for an 71 * existing entry with the right name/allocator -- if present, update that 72 * entry. Otherwise, add a new entry. On error, the entire list will be 73 * cleared, as entries will be in an inconsistent state. 74 * 75 * To reduce the level of work for a list that starts empty, we keep around a 76 * hint as to whether it was empty when we began, so we can avoid searching 77 * the list for entries to update. Updates are O(n^2) due to searching for 78 * each entry before adding it. 79 */ 80 int 81 memstat_sysctl_uma(struct memory_type_list *list, int flags) 82 { 83 struct uma_stream_header *ushp; 84 struct uma_type_header *uthp; 85 struct uma_percpu_stat *upsp; 86 struct memory_type *mtp; 87 int count, hint_dontsearch, i, j, maxcpus, maxid; 88 char *buffer, *p; 89 size_t size; 90 91 hint_dontsearch = LIST_EMPTY(&list->mtl_list); 92 93 /* 94 * Query the number of CPUs, number of malloc types so that we can 95 * guess an initial buffer size. We loop until we succeed or really 96 * fail. Note that the value of maxcpus we query using sysctl is not 97 * the version we use when processing the real data -- that is read 98 * from the header. 99 */ 100 retry: 101 size = sizeof(maxid); 102 if (sysctlbyname("kern.smp.maxid", &maxid, &size, NULL, 0) < 0) { 103 if (errno == EACCES || errno == EPERM) 104 list->mtl_error = MEMSTAT_ERROR_PERMISSION; 105 else 106 list->mtl_error = MEMSTAT_ERROR_DATAERROR; 107 return (-1); 108 } 109 if (size != sizeof(maxid)) { 110 list->mtl_error = MEMSTAT_ERROR_DATAERROR; 111 return (-1); 112 } 113 114 size = sizeof(count); 115 if (sysctlbyname("vm.zone_count", &count, &size, NULL, 0) < 0) { 116 if (errno == EACCES || errno == EPERM) 117 list->mtl_error = MEMSTAT_ERROR_PERMISSION; 118 else 119 list->mtl_error = MEMSTAT_ERROR_VERSION; 120 return (-1); 121 } 122 if (size != sizeof(count)) { 123 list->mtl_error = MEMSTAT_ERROR_DATAERROR; 124 return (-1); 125 } 126 127 size = sizeof(*uthp) + count * (sizeof(*uthp) + sizeof(*upsp) * 128 (maxid + 1)); 129 130 buffer = malloc(size); 131 if (buffer == NULL) { 132 list->mtl_error = MEMSTAT_ERROR_NOMEMORY; 133 return (-1); 134 } 135 136 if (sysctlbyname("vm.zone_stats", buffer, &size, NULL, 0) < 0) { 137 /* 138 * XXXRW: ENOMEM is an ambiguous return, we should bound the 139 * number of loops, perhaps. 140 */ 141 if (errno == ENOMEM) { 142 free(buffer); 143 goto retry; 144 } 145 if (errno == EACCES || errno == EPERM) 146 list->mtl_error = MEMSTAT_ERROR_PERMISSION; 147 else 148 list->mtl_error = MEMSTAT_ERROR_VERSION; 149 free(buffer); 150 return (-1); 151 } 152 153 if (size == 0) { 154 free(buffer); 155 return (0); 156 } 157 158 if (size < sizeof(*ushp)) { 159 list->mtl_error = MEMSTAT_ERROR_VERSION; 160 free(buffer); 161 return (-1); 162 } 163 p = buffer; 164 ushp = (struct uma_stream_header *)p; 165 p += sizeof(*ushp); 166 167 if (ushp->ush_version != UMA_STREAM_VERSION) { 168 list->mtl_error = MEMSTAT_ERROR_VERSION; 169 free(buffer); 170 return (-1); 171 } 172 173 /* 174 * For the remainder of this function, we are quite trusting about 175 * the layout of structures and sizes, since we've determined we have 176 * a matching version and acceptable CPU count. 177 */ 178 maxcpus = ushp->ush_maxcpus; 179 count = ushp->ush_count; 180 for (i = 0; i < count; i++) { 181 uthp = (struct uma_type_header *)p; 182 p += sizeof(*uthp); 183 184 if (hint_dontsearch == 0) { 185 mtp = memstat_mtl_find(list, ALLOCATOR_UMA, 186 uthp->uth_name); 187 } else 188 mtp = NULL; 189 if (mtp == NULL) 190 mtp = _memstat_mt_allocate(list, ALLOCATOR_UMA, 191 uthp->uth_name, maxid + 1); 192 if (mtp == NULL) { 193 _memstat_mtl_empty(list); 194 free(buffer); 195 list->mtl_error = MEMSTAT_ERROR_NOMEMORY; 196 return (-1); 197 } 198 199 /* 200 * Reset the statistics on a current node. 201 */ 202 _memstat_mt_reset_stats(mtp, maxid + 1); 203 204 mtp->mt_numallocs = uthp->uth_allocs; 205 mtp->mt_numfrees = uthp->uth_frees; 206 mtp->mt_failures = uthp->uth_fails; 207 mtp->mt_sleeps = uthp->uth_sleeps; 208 209 for (j = 0; j < maxcpus; j++) { 210 upsp = (struct uma_percpu_stat *)p; 211 p += sizeof(*upsp); 212 213 mtp->mt_percpu_cache[j].mtp_free = 214 upsp->ups_cache_free; 215 mtp->mt_free += upsp->ups_cache_free; 216 mtp->mt_numallocs += upsp->ups_allocs; 217 mtp->mt_numfrees += upsp->ups_frees; 218 } 219 220 mtp->mt_size = uthp->uth_size; 221 mtp->mt_rsize = uthp->uth_rsize; 222 mtp->mt_memalloced = mtp->mt_numallocs * uthp->uth_size; 223 mtp->mt_memfreed = mtp->mt_numfrees * uthp->uth_size; 224 mtp->mt_bytes = mtp->mt_memalloced - mtp->mt_memfreed; 225 mtp->mt_countlimit = uthp->uth_limit; 226 mtp->mt_byteslimit = uthp->uth_limit * uthp->uth_size; 227 228 mtp->mt_count = mtp->mt_numallocs - mtp->mt_numfrees; 229 mtp->mt_zonefree = uthp->uth_zone_free; 230 231 /* 232 * UMA secondary zones share a keg with the primary zone. To 233 * avoid double-reporting of free items, report keg free 234 * items only in the primary zone. 235 */ 236 if (!(uthp->uth_zone_flags & UTH_ZONE_SECONDARY)) { 237 mtp->mt_kegfree = uthp->uth_keg_free; 238 mtp->mt_free += mtp->mt_kegfree; 239 } 240 mtp->mt_free += mtp->mt_zonefree; 241 } 242 243 free(buffer); 244 245 return (0); 246 } 247 248 #ifndef FSTACK 249 static int 250 kread(kvm_t *kvm, void *kvm_pointer, void *address, size_t size, 251 size_t offset) 252 { 253 ssize_t ret; 254 255 ret = kvm_read(kvm, (unsigned long)kvm_pointer + offset, address, 256 size); 257 if (ret < 0) 258 return (MEMSTAT_ERROR_KVM); 259 if ((size_t)ret != size) 260 return (MEMSTAT_ERROR_KVM_SHORTREAD); 261 return (0); 262 } 263 264 static int 265 kread_string(kvm_t *kvm, const void *kvm_pointer, char *buffer, int buflen) 266 { 267 ssize_t ret; 268 int i; 269 270 for (i = 0; i < buflen; i++) { 271 ret = kvm_read(kvm, (unsigned long)kvm_pointer + i, 272 &(buffer[i]), sizeof(char)); 273 if (ret < 0) 274 return (MEMSTAT_ERROR_KVM); 275 if ((size_t)ret != sizeof(char)) 276 return (MEMSTAT_ERROR_KVM_SHORTREAD); 277 if (buffer[i] == '\0') 278 return (0); 279 } 280 /* Truncate. */ 281 buffer[i-1] = '\0'; 282 return (0); 283 } 284 285 static int 286 kread_symbol(kvm_t *kvm, int index, void *address, size_t size, 287 size_t offset) 288 { 289 ssize_t ret; 290 291 ret = kvm_read(kvm, namelist[index].n_value + offset, address, size); 292 if (ret < 0) 293 return (MEMSTAT_ERROR_KVM); 294 if ((size_t)ret != size) 295 return (MEMSTAT_ERROR_KVM_SHORTREAD); 296 return (0); 297 } 298 299 /* 300 * memstat_kvm_uma() is similar to memstat_sysctl_uma(), only it extracts 301 * UMA(9) statistics from a kernel core/memory file. 302 */ 303 int 304 memstat_kvm_uma(struct memory_type_list *list, void *kvm_handle) 305 { 306 LIST_HEAD(, uma_keg) uma_kegs; 307 struct memory_type *mtp; 308 struct uma_bucket *ubp, ub; 309 struct uma_cache *ucp, *ucp_array; 310 struct uma_zone *uzp, uz; 311 struct uma_keg *kzp, kz; 312 int hint_dontsearch, i, mp_maxid, ret; 313 char name[MEMTYPE_MAXNAME]; 314 cpuset_t all_cpus; 315 long cpusetsize; 316 kvm_t *kvm; 317 318 kvm = (kvm_t *)kvm_handle; 319 hint_dontsearch = LIST_EMPTY(&list->mtl_list); 320 if (kvm_nlist(kvm, namelist) != 0) { 321 list->mtl_error = MEMSTAT_ERROR_KVM; 322 return (-1); 323 } 324 if (namelist[X_UMA_KEGS].n_type == 0 || 325 namelist[X_UMA_KEGS].n_value == 0) { 326 list->mtl_error = MEMSTAT_ERROR_KVM_NOSYMBOL; 327 return (-1); 328 } 329 ret = kread_symbol(kvm, X_MP_MAXID, &mp_maxid, sizeof(mp_maxid), 0); 330 if (ret != 0) { 331 list->mtl_error = ret; 332 return (-1); 333 } 334 ret = kread_symbol(kvm, X_UMA_KEGS, &uma_kegs, sizeof(uma_kegs), 0); 335 if (ret != 0) { 336 list->mtl_error = ret; 337 return (-1); 338 } 339 cpusetsize = sysconf(_SC_CPUSET_SIZE); 340 if (cpusetsize == -1 || (u_long)cpusetsize > sizeof(cpuset_t)) { 341 list->mtl_error = MEMSTAT_ERROR_KVM_NOSYMBOL; 342 return (-1); 343 } 344 CPU_ZERO(&all_cpus); 345 ret = kread_symbol(kvm, X_ALL_CPUS, &all_cpus, cpusetsize, 0); 346 if (ret != 0) { 347 list->mtl_error = ret; 348 return (-1); 349 } 350 ucp_array = malloc(sizeof(struct uma_cache) * (mp_maxid + 1)); 351 if (ucp_array == NULL) { 352 list->mtl_error = MEMSTAT_ERROR_NOMEMORY; 353 return (-1); 354 } 355 for (kzp = LIST_FIRST(&uma_kegs); kzp != NULL; kzp = 356 LIST_NEXT(&kz, uk_link)) { 357 ret = kread(kvm, kzp, &kz, sizeof(kz), 0); 358 if (ret != 0) { 359 free(ucp_array); 360 _memstat_mtl_empty(list); 361 list->mtl_error = ret; 362 return (-1); 363 } 364 for (uzp = LIST_FIRST(&kz.uk_zones); uzp != NULL; uzp = 365 LIST_NEXT(&uz, uz_link)) { 366 ret = kread(kvm, uzp, &uz, sizeof(uz), 0); 367 if (ret != 0) { 368 free(ucp_array); 369 _memstat_mtl_empty(list); 370 list->mtl_error = ret; 371 return (-1); 372 } 373 ret = kread(kvm, uzp, ucp_array, 374 sizeof(struct uma_cache) * (mp_maxid + 1), 375 offsetof(struct uma_zone, uz_cpu[0])); 376 if (ret != 0) { 377 free(ucp_array); 378 _memstat_mtl_empty(list); 379 list->mtl_error = ret; 380 return (-1); 381 } 382 ret = kread_string(kvm, uz.uz_name, name, 383 MEMTYPE_MAXNAME); 384 if (ret != 0) { 385 free(ucp_array); 386 _memstat_mtl_empty(list); 387 list->mtl_error = ret; 388 return (-1); 389 } 390 if (hint_dontsearch == 0) { 391 mtp = memstat_mtl_find(list, ALLOCATOR_UMA, 392 name); 393 } else 394 mtp = NULL; 395 if (mtp == NULL) 396 mtp = _memstat_mt_allocate(list, ALLOCATOR_UMA, 397 name, mp_maxid + 1); 398 if (mtp == NULL) { 399 free(ucp_array); 400 _memstat_mtl_empty(list); 401 list->mtl_error = MEMSTAT_ERROR_NOMEMORY; 402 return (-1); 403 } 404 /* 405 * Reset the statistics on a current node. 406 */ 407 _memstat_mt_reset_stats(mtp, mp_maxid + 1); 408 mtp->mt_numallocs = uz.uz_allocs; 409 mtp->mt_numfrees = uz.uz_frees; 410 mtp->mt_failures = uz.uz_fails; 411 mtp->mt_sleeps = uz.uz_sleeps; 412 if (kz.uk_flags & UMA_ZFLAG_INTERNAL) 413 goto skip_percpu; 414 for (i = 0; i < mp_maxid + 1; i++) { 415 if (!CPU_ISSET(i, &all_cpus)) 416 continue; 417 ucp = &ucp_array[i]; 418 mtp->mt_numallocs += ucp->uc_allocs; 419 mtp->mt_numfrees += ucp->uc_frees; 420 421 if (ucp->uc_allocbucket != NULL) { 422 ret = kread(kvm, ucp->uc_allocbucket, 423 &ub, sizeof(ub), 0); 424 if (ret != 0) { 425 free(ucp_array); 426 _memstat_mtl_empty(list); 427 list->mtl_error = ret; 428 return (-1); 429 } 430 mtp->mt_free += ub.ub_cnt; 431 } 432 if (ucp->uc_freebucket != NULL) { 433 ret = kread(kvm, ucp->uc_freebucket, 434 &ub, sizeof(ub), 0); 435 if (ret != 0) { 436 free(ucp_array); 437 _memstat_mtl_empty(list); 438 list->mtl_error = ret; 439 return (-1); 440 } 441 mtp->mt_free += ub.ub_cnt; 442 } 443 } 444 skip_percpu: 445 mtp->mt_size = kz.uk_size; 446 mtp->mt_rsize = kz.uk_rsize; 447 mtp->mt_memalloced = mtp->mt_numallocs * mtp->mt_size; 448 mtp->mt_memfreed = mtp->mt_numfrees * mtp->mt_size; 449 mtp->mt_bytes = mtp->mt_memalloced - mtp->mt_memfreed; 450 if (kz.uk_ppera > 1) 451 mtp->mt_countlimit = kz.uk_maxpages / 452 kz.uk_ipers; 453 else 454 mtp->mt_countlimit = kz.uk_maxpages * 455 kz.uk_ipers; 456 mtp->mt_byteslimit = mtp->mt_countlimit * mtp->mt_size; 457 mtp->mt_count = mtp->mt_numallocs - mtp->mt_numfrees; 458 for (ubp = LIST_FIRST(&uz.uz_buckets); ubp != 459 NULL; ubp = LIST_NEXT(&ub, ub_link)) { 460 ret = kread(kvm, ubp, &ub, sizeof(ub), 0); 461 mtp->mt_zonefree += ub.ub_cnt; 462 } 463 if (!((kz.uk_flags & UMA_ZONE_SECONDARY) && 464 LIST_FIRST(&kz.uk_zones) != uzp)) { 465 mtp->mt_kegfree = kz.uk_free; 466 mtp->mt_free += mtp->mt_kegfree; 467 } 468 mtp->mt_free += mtp->mt_zonefree; 469 } 470 } 471 free(ucp_array); 472 return (0); 473 } 474 #endif 475 476