1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * Written by Mark Hemment, 1996 ([email protected]). 4 * 5 * (C) SGI 2006, Christoph Lameter 6 * Cleaned up and restructured to ease the addition of alternative 7 * implementations of SLAB allocators. 8 * (C) Linux Foundation 2008-2013 9 * Unified interface for all slab allocators 10 */ 11 12 #ifndef _LINUX_SLAB_H 13 #define _LINUX_SLAB_H 14 15 #include <linux/cache.h> 16 #include <linux/gfp.h> 17 #include <linux/overflow.h> 18 #include <linux/types.h> 19 #include <linux/workqueue.h> 20 #include <linux/percpu-refcount.h> 21 #include <linux/cleanup.h> 22 #include <linux/hash.h> 23 24 enum _slab_flag_bits { 25 _SLAB_CONSISTENCY_CHECKS, 26 _SLAB_RED_ZONE, 27 _SLAB_POISON, 28 _SLAB_KMALLOC, 29 _SLAB_HWCACHE_ALIGN, 30 _SLAB_CACHE_DMA, 31 _SLAB_CACHE_DMA32, 32 _SLAB_STORE_USER, 33 _SLAB_PANIC, 34 _SLAB_TYPESAFE_BY_RCU, 35 _SLAB_TRACE, 36 #ifdef CONFIG_DEBUG_OBJECTS 37 _SLAB_DEBUG_OBJECTS, 38 #endif 39 _SLAB_NOLEAKTRACE, 40 _SLAB_NO_MERGE, 41 #ifdef CONFIG_FAILSLAB 42 _SLAB_FAILSLAB, 43 #endif 44 #ifdef CONFIG_MEMCG 45 _SLAB_ACCOUNT, 46 #endif 47 #ifdef CONFIG_KASAN_GENERIC 48 _SLAB_KASAN, 49 #endif 50 _SLAB_NO_USER_FLAGS, 51 #ifdef CONFIG_KFENCE 52 _SLAB_SKIP_KFENCE, 53 #endif 54 #ifndef CONFIG_SLUB_TINY 55 _SLAB_RECLAIM_ACCOUNT, 56 #endif 57 _SLAB_OBJECT_POISON, 58 _SLAB_CMPXCHG_DOUBLE, 59 #ifdef CONFIG_SLAB_OBJ_EXT 60 _SLAB_NO_OBJ_EXT, 61 #endif 62 _SLAB_FLAGS_LAST_BIT 63 }; 64 65 #define __SLAB_FLAG_BIT(nr) ((slab_flags_t __force)(1U << (nr))) 66 #define __SLAB_FLAG_UNUSED ((slab_flags_t __force)(0U)) 67 68 /* 69 * Flags to pass to kmem_cache_create(). 70 * The ones marked DEBUG need CONFIG_SLUB_DEBUG enabled, otherwise are no-op 71 */ 72 /* DEBUG: Perform (expensive) checks on alloc/free */ 73 #define SLAB_CONSISTENCY_CHECKS __SLAB_FLAG_BIT(_SLAB_CONSISTENCY_CHECKS) 74 /* DEBUG: Red zone objs in a cache */ 75 #define SLAB_RED_ZONE __SLAB_FLAG_BIT(_SLAB_RED_ZONE) 76 /* DEBUG: Poison objects */ 77 #define SLAB_POISON __SLAB_FLAG_BIT(_SLAB_POISON) 78 /* Indicate a kmalloc slab */ 79 #define SLAB_KMALLOC __SLAB_FLAG_BIT(_SLAB_KMALLOC) 80 /* Align objs on cache lines */ 81 #define SLAB_HWCACHE_ALIGN __SLAB_FLAG_BIT(_SLAB_HWCACHE_ALIGN) 82 /* Use GFP_DMA memory */ 83 #define SLAB_CACHE_DMA __SLAB_FLAG_BIT(_SLAB_CACHE_DMA) 84 /* Use GFP_DMA32 memory */ 85 #define SLAB_CACHE_DMA32 __SLAB_FLAG_BIT(_SLAB_CACHE_DMA32) 86 /* DEBUG: Store the last owner for bug hunting */ 87 #define SLAB_STORE_USER __SLAB_FLAG_BIT(_SLAB_STORE_USER) 88 /* Panic if kmem_cache_create() fails */ 89 #define SLAB_PANIC __SLAB_FLAG_BIT(_SLAB_PANIC) 90 /* 91 * SLAB_TYPESAFE_BY_RCU - **WARNING** READ THIS! 92 * 93 * This delays freeing the SLAB page by a grace period, it does _NOT_ 94 * delay object freeing. This means that if you do kmem_cache_free() 95 * that memory location is free to be reused at any time. Thus it may 96 * be possible to see another object there in the same RCU grace period. 97 * 98 * This feature only ensures the memory location backing the object 99 * stays valid, the trick to using this is relying on an independent 100 * object validation pass. Something like: 101 * 102 * begin: 103 * rcu_read_lock(); 104 * obj = lockless_lookup(key); 105 * if (obj) { 106 * if (!try_get_ref(obj)) // might fail for free objects 107 * rcu_read_unlock(); 108 * goto begin; 109 * 110 * if (obj->key != key) { // not the object we expected 111 * put_ref(obj); 112 * rcu_read_unlock(); 113 * goto begin; 114 * } 115 * } 116 * rcu_read_unlock(); 117 * 118 * This is useful if we need to approach a kernel structure obliquely, 119 * from its address obtained without the usual locking. We can lock 120 * the structure to stabilize it and check it's still at the given address, 121 * only if we can be sure that the memory has not been meanwhile reused 122 * for some other kind of object (which our subsystem's lock might corrupt). 123 * 124 * rcu_read_lock before reading the address, then rcu_read_unlock after 125 * taking the spinlock within the structure expected at that address. 126 * 127 * Note that it is not possible to acquire a lock within a structure 128 * allocated with SLAB_TYPESAFE_BY_RCU without first acquiring a reference 129 * as described above. The reason is that SLAB_TYPESAFE_BY_RCU pages 130 * are not zeroed before being given to the slab, which means that any 131 * locks must be initialized after each and every kmem_struct_alloc(). 132 * Alternatively, make the ctor passed to kmem_cache_create() initialize 133 * the locks at page-allocation time, as is done in __i915_request_ctor(), 134 * sighand_ctor(), and anon_vma_ctor(). Such a ctor permits readers 135 * to safely acquire those ctor-initialized locks under rcu_read_lock() 136 * protection. 137 * 138 * Note that SLAB_TYPESAFE_BY_RCU was originally named SLAB_DESTROY_BY_RCU. 139 */ 140 /* Defer freeing slabs to RCU */ 141 #define SLAB_TYPESAFE_BY_RCU __SLAB_FLAG_BIT(_SLAB_TYPESAFE_BY_RCU) 142 /* Trace allocations and frees */ 143 #define SLAB_TRACE __SLAB_FLAG_BIT(_SLAB_TRACE) 144 145 /* Flag to prevent checks on free */ 146 #ifdef CONFIG_DEBUG_OBJECTS 147 # define SLAB_DEBUG_OBJECTS __SLAB_FLAG_BIT(_SLAB_DEBUG_OBJECTS) 148 #else 149 # define SLAB_DEBUG_OBJECTS __SLAB_FLAG_UNUSED 150 #endif 151 152 /* Avoid kmemleak tracing */ 153 #define SLAB_NOLEAKTRACE __SLAB_FLAG_BIT(_SLAB_NOLEAKTRACE) 154 155 /* 156 * Prevent merging with compatible kmem caches. This flag should be used 157 * cautiously. Valid use cases: 158 * 159 * - caches created for self-tests (e.g. kunit) 160 * - general caches created and used by a subsystem, only when a 161 * (subsystem-specific) debug option is enabled 162 * - performance critical caches, should be very rare and consulted with slab 163 * maintainers, and not used together with CONFIG_SLUB_TINY 164 */ 165 #define SLAB_NO_MERGE __SLAB_FLAG_BIT(_SLAB_NO_MERGE) 166 167 /* Fault injection mark */ 168 #ifdef CONFIG_FAILSLAB 169 # define SLAB_FAILSLAB __SLAB_FLAG_BIT(_SLAB_FAILSLAB) 170 #else 171 # define SLAB_FAILSLAB __SLAB_FLAG_UNUSED 172 #endif 173 /* Account to memcg */ 174 #ifdef CONFIG_MEMCG 175 # define SLAB_ACCOUNT __SLAB_FLAG_BIT(_SLAB_ACCOUNT) 176 #else 177 # define SLAB_ACCOUNT __SLAB_FLAG_UNUSED 178 #endif 179 180 #ifdef CONFIG_KASAN_GENERIC 181 #define SLAB_KASAN __SLAB_FLAG_BIT(_SLAB_KASAN) 182 #else 183 #define SLAB_KASAN __SLAB_FLAG_UNUSED 184 #endif 185 186 /* 187 * Ignore user specified debugging flags. 188 * Intended for caches created for self-tests so they have only flags 189 * specified in the code and other flags are ignored. 190 */ 191 #define SLAB_NO_USER_FLAGS __SLAB_FLAG_BIT(_SLAB_NO_USER_FLAGS) 192 193 #ifdef CONFIG_KFENCE 194 #define SLAB_SKIP_KFENCE __SLAB_FLAG_BIT(_SLAB_SKIP_KFENCE) 195 #else 196 #define SLAB_SKIP_KFENCE __SLAB_FLAG_UNUSED 197 #endif 198 199 /* The following flags affect the page allocator grouping pages by mobility */ 200 /* Objects are reclaimable */ 201 #ifndef CONFIG_SLUB_TINY 202 #define SLAB_RECLAIM_ACCOUNT __SLAB_FLAG_BIT(_SLAB_RECLAIM_ACCOUNT) 203 #else 204 #define SLAB_RECLAIM_ACCOUNT __SLAB_FLAG_UNUSED 205 #endif 206 #define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */ 207 208 /* Slab created using create_boot_cache */ 209 #ifdef CONFIG_SLAB_OBJ_EXT 210 #define SLAB_NO_OBJ_EXT __SLAB_FLAG_BIT(_SLAB_NO_OBJ_EXT) 211 #else 212 #define SLAB_NO_OBJ_EXT __SLAB_FLAG_UNUSED 213 #endif 214 215 /* 216 * freeptr_t represents a SLUB freelist pointer, which might be encoded 217 * and not dereferenceable if CONFIG_SLAB_FREELIST_HARDENED is enabled. 218 */ 219 typedef struct { unsigned long v; } freeptr_t; 220 221 /* 222 * ZERO_SIZE_PTR will be returned for zero sized kmalloc requests. 223 * 224 * Dereferencing ZERO_SIZE_PTR will lead to a distinct access fault. 225 * 226 * ZERO_SIZE_PTR can be passed to kfree though in the same way that NULL can. 227 * Both make kfree a no-op. 228 */ 229 #define ZERO_SIZE_PTR ((void *)16) 230 231 #define ZERO_OR_NULL_PTR(x) ((unsigned long)(x) <= \ 232 (unsigned long)ZERO_SIZE_PTR) 233 234 #include <linux/kasan.h> 235 236 struct list_lru; 237 struct mem_cgroup; 238 /* 239 * struct kmem_cache related prototypes 240 */ 241 bool slab_is_available(void); 242 243 /** 244 * struct kmem_cache_args - Less common arguments for kmem_cache_create() 245 * 246 * Any uninitialized fields of the structure are interpreted as unused. The 247 * exception is @freeptr_offset where %0 is a valid value, so 248 * @use_freeptr_offset must be also set to %true in order to interpret the field 249 * as used. For @useroffset %0 is also valid, but only with non-%0 250 * @usersize. 251 * 252 * When %NULL args is passed to kmem_cache_create(), it is equivalent to all 253 * fields unused. 254 */ 255 struct kmem_cache_args { 256 /** 257 * @align: The required alignment for the objects. 258 * 259 * %0 means no specific alignment is requested. 260 */ 261 unsigned int align; 262 /** 263 * @useroffset: Usercopy region offset. 264 * 265 * %0 is a valid offset, when @usersize is non-%0 266 */ 267 unsigned int useroffset; 268 /** 269 * @usersize: Usercopy region size. 270 * 271 * %0 means no usercopy region is specified. 272 */ 273 unsigned int usersize; 274 /** 275 * @freeptr_offset: Custom offset for the free pointer 276 * in &SLAB_TYPESAFE_BY_RCU caches 277 * 278 * By default &SLAB_TYPESAFE_BY_RCU caches place the free pointer 279 * outside of the object. This might cause the object to grow in size. 280 * Cache creators that have a reason to avoid this can specify a custom 281 * free pointer offset in their struct where the free pointer will be 282 * placed. 283 * 284 * Note that placing the free pointer inside the object requires the 285 * caller to ensure that no fields are invalidated that are required to 286 * guard against object recycling (See &SLAB_TYPESAFE_BY_RCU for 287 * details). 288 * 289 * Using %0 as a value for @freeptr_offset is valid. If @freeptr_offset 290 * is specified, %use_freeptr_offset must be set %true. 291 * 292 * Note that @ctor currently isn't supported with custom free pointers 293 * as a @ctor requires an external free pointer. 294 */ 295 unsigned int freeptr_offset; 296 /** 297 * @use_freeptr_offset: Whether a @freeptr_offset is used. 298 */ 299 bool use_freeptr_offset; 300 /** 301 * @ctor: A constructor for the objects. 302 * 303 * The constructor is invoked for each object in a newly allocated slab 304 * page. It is the cache user's responsibility to free object in the 305 * same state as after calling the constructor, or deal appropriately 306 * with any differences between a freshly constructed and a reallocated 307 * object. 308 * 309 * %NULL means no constructor. 310 */ 311 void (*ctor)(void *); 312 }; 313 314 struct kmem_cache *__kmem_cache_create_args(const char *name, 315 unsigned int object_size, 316 struct kmem_cache_args *args, 317 slab_flags_t flags); 318 static inline struct kmem_cache * 319 __kmem_cache_create(const char *name, unsigned int size, unsigned int align, 320 slab_flags_t flags, void (*ctor)(void *)) 321 { 322 struct kmem_cache_args kmem_args = { 323 .align = align, 324 .ctor = ctor, 325 }; 326 327 return __kmem_cache_create_args(name, size, &kmem_args, flags); 328 } 329 330 /** 331 * kmem_cache_create_usercopy - Create a kmem cache with a region suitable 332 * for copying to userspace. 333 * @name: A string which is used in /proc/slabinfo to identify this cache. 334 * @size: The size of objects to be created in this cache. 335 * @align: The required alignment for the objects. 336 * @flags: SLAB flags 337 * @useroffset: Usercopy region offset 338 * @usersize: Usercopy region size 339 * @ctor: A constructor for the objects, or %NULL. 340 * 341 * This is a legacy wrapper, new code should use either KMEM_CACHE_USERCOPY() 342 * if whitelisting a single field is sufficient, or kmem_cache_create() with 343 * the necessary parameters passed via the args parameter (see 344 * &struct kmem_cache_args) 345 * 346 * Return: a pointer to the cache on success, NULL on failure. 347 */ 348 static inline struct kmem_cache * 349 kmem_cache_create_usercopy(const char *name, unsigned int size, 350 unsigned int align, slab_flags_t flags, 351 unsigned int useroffset, unsigned int usersize, 352 void (*ctor)(void *)) 353 { 354 struct kmem_cache_args kmem_args = { 355 .align = align, 356 .ctor = ctor, 357 .useroffset = useroffset, 358 .usersize = usersize, 359 }; 360 361 return __kmem_cache_create_args(name, size, &kmem_args, flags); 362 } 363 364 /* If NULL is passed for @args, use this variant with default arguments. */ 365 static inline struct kmem_cache * 366 __kmem_cache_default_args(const char *name, unsigned int size, 367 struct kmem_cache_args *args, 368 slab_flags_t flags) 369 { 370 struct kmem_cache_args kmem_default_args = {}; 371 372 /* Make sure we don't get passed garbage. */ 373 if (WARN_ON_ONCE(args)) 374 return ERR_PTR(-EINVAL); 375 376 return __kmem_cache_create_args(name, size, &kmem_default_args, flags); 377 } 378 379 /** 380 * kmem_cache_create - Create a kmem cache. 381 * @__name: A string which is used in /proc/slabinfo to identify this cache. 382 * @__object_size: The size of objects to be created in this cache. 383 * @__args: Optional arguments, see &struct kmem_cache_args. Passing %NULL 384 * means defaults will be used for all the arguments. 385 * 386 * This is currently implemented as a macro using ``_Generic()`` to call 387 * either the new variant of the function, or a legacy one. 388 * 389 * The new variant has 4 parameters: 390 * ``kmem_cache_create(name, object_size, args, flags)`` 391 * 392 * See __kmem_cache_create_args() which implements this. 393 * 394 * The legacy variant has 5 parameters: 395 * ``kmem_cache_create(name, object_size, align, flags, ctor)`` 396 * 397 * The align and ctor parameters map to the respective fields of 398 * &struct kmem_cache_args 399 * 400 * Context: Cannot be called within a interrupt, but can be interrupted. 401 * 402 * Return: a pointer to the cache on success, NULL on failure. 403 */ 404 #define kmem_cache_create(__name, __object_size, __args, ...) \ 405 _Generic((__args), \ 406 struct kmem_cache_args *: __kmem_cache_create_args, \ 407 void *: __kmem_cache_default_args, \ 408 default: __kmem_cache_create)(__name, __object_size, __args, __VA_ARGS__) 409 410 void kmem_cache_destroy(struct kmem_cache *s); 411 int kmem_cache_shrink(struct kmem_cache *s); 412 413 /* 414 * Please use this macro to create slab caches. Simply specify the 415 * name of the structure and maybe some flags that are listed above. 416 * 417 * The alignment of the struct determines object alignment. If you 418 * f.e. add ____cacheline_aligned_in_smp to the struct declaration 419 * then the objects will be properly aligned in SMP configurations. 420 */ 421 #define KMEM_CACHE(__struct, __flags) \ 422 __kmem_cache_create_args(#__struct, sizeof(struct __struct), \ 423 &(struct kmem_cache_args) { \ 424 .align = __alignof__(struct __struct), \ 425 }, (__flags)) 426 427 /* 428 * To whitelist a single field for copying to/from usercopy, use this 429 * macro instead for KMEM_CACHE() above. 430 */ 431 #define KMEM_CACHE_USERCOPY(__struct, __flags, __field) \ 432 __kmem_cache_create_args(#__struct, sizeof(struct __struct), \ 433 &(struct kmem_cache_args) { \ 434 .align = __alignof__(struct __struct), \ 435 .useroffset = offsetof(struct __struct, __field), \ 436 .usersize = sizeof_field(struct __struct, __field), \ 437 }, (__flags)) 438 439 /* 440 * Common kmalloc functions provided by all allocators 441 */ 442 void * __must_check krealloc_noprof(const void *objp, size_t new_size, 443 gfp_t flags) __realloc_size(2); 444 #define krealloc(...) alloc_hooks(krealloc_noprof(__VA_ARGS__)) 445 446 void kfree(const void *objp); 447 void kfree_sensitive(const void *objp); 448 size_t __ksize(const void *objp); 449 450 DEFINE_FREE(kfree, void *, if (!IS_ERR_OR_NULL(_T)) kfree(_T)) 451 DEFINE_FREE(kfree_sensitive, void *, if (_T) kfree_sensitive(_T)) 452 453 /** 454 * ksize - Report actual allocation size of associated object 455 * 456 * @objp: Pointer returned from a prior kmalloc()-family allocation. 457 * 458 * This should not be used for writing beyond the originally requested 459 * allocation size. Either use krealloc() or round up the allocation size 460 * with kmalloc_size_roundup() prior to allocation. If this is used to 461 * access beyond the originally requested allocation size, UBSAN_BOUNDS 462 * and/or FORTIFY_SOURCE may trip, since they only know about the 463 * originally allocated size via the __alloc_size attribute. 464 */ 465 size_t ksize(const void *objp); 466 467 #ifdef CONFIG_PRINTK 468 bool kmem_dump_obj(void *object); 469 #else 470 static inline bool kmem_dump_obj(void *object) { return false; } 471 #endif 472 473 /* 474 * Some archs want to perform DMA into kmalloc caches and need a guaranteed 475 * alignment larger than the alignment of a 64-bit integer. 476 * Setting ARCH_DMA_MINALIGN in arch headers allows that. 477 */ 478 #ifdef ARCH_HAS_DMA_MINALIGN 479 #if ARCH_DMA_MINALIGN > 8 && !defined(ARCH_KMALLOC_MINALIGN) 480 #define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN 481 #endif 482 #endif 483 484 #ifndef ARCH_KMALLOC_MINALIGN 485 #define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long) 486 #elif ARCH_KMALLOC_MINALIGN > 8 487 #define KMALLOC_MIN_SIZE ARCH_KMALLOC_MINALIGN 488 #define KMALLOC_SHIFT_LOW ilog2(KMALLOC_MIN_SIZE) 489 #endif 490 491 /* 492 * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment. 493 * Intended for arches that get misalignment faults even for 64 bit integer 494 * aligned buffers. 495 */ 496 #ifndef ARCH_SLAB_MINALIGN 497 #define ARCH_SLAB_MINALIGN __alignof__(unsigned long long) 498 #endif 499 500 /* 501 * Arches can define this function if they want to decide the minimum slab 502 * alignment at runtime. The value returned by the function must be a power 503 * of two and >= ARCH_SLAB_MINALIGN. 504 */ 505 #ifndef arch_slab_minalign 506 static inline unsigned int arch_slab_minalign(void) 507 { 508 return ARCH_SLAB_MINALIGN; 509 } 510 #endif 511 512 /* 513 * kmem_cache_alloc and friends return pointers aligned to ARCH_SLAB_MINALIGN. 514 * kmalloc and friends return pointers aligned to both ARCH_KMALLOC_MINALIGN 515 * and ARCH_SLAB_MINALIGN, but here we only assume the former alignment. 516 */ 517 #define __assume_kmalloc_alignment __assume_aligned(ARCH_KMALLOC_MINALIGN) 518 #define __assume_slab_alignment __assume_aligned(ARCH_SLAB_MINALIGN) 519 #define __assume_page_alignment __assume_aligned(PAGE_SIZE) 520 521 /* 522 * Kmalloc array related definitions 523 */ 524 525 /* 526 * SLUB directly allocates requests fitting in to an order-1 page 527 * (PAGE_SIZE*2). Larger requests are passed to the page allocator. 528 */ 529 #define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1) 530 #define KMALLOC_SHIFT_MAX (MAX_PAGE_ORDER + PAGE_SHIFT) 531 #ifndef KMALLOC_SHIFT_LOW 532 #define KMALLOC_SHIFT_LOW 3 533 #endif 534 535 /* Maximum allocatable size */ 536 #define KMALLOC_MAX_SIZE (1UL << KMALLOC_SHIFT_MAX) 537 /* Maximum size for which we actually use a slab cache */ 538 #define KMALLOC_MAX_CACHE_SIZE (1UL << KMALLOC_SHIFT_HIGH) 539 /* Maximum order allocatable via the slab allocator */ 540 #define KMALLOC_MAX_ORDER (KMALLOC_SHIFT_MAX - PAGE_SHIFT) 541 542 /* 543 * Kmalloc subsystem. 544 */ 545 #ifndef KMALLOC_MIN_SIZE 546 #define KMALLOC_MIN_SIZE (1 << KMALLOC_SHIFT_LOW) 547 #endif 548 549 /* 550 * This restriction comes from byte sized index implementation. 551 * Page size is normally 2^12 bytes and, in this case, if we want to use 552 * byte sized index which can represent 2^8 entries, the size of the object 553 * should be equal or greater to 2^12 / 2^8 = 2^4 = 16. 554 * If minimum size of kmalloc is less than 16, we use it as minimum object 555 * size and give up to use byte sized index. 556 */ 557 #define SLAB_OBJ_MIN_SIZE (KMALLOC_MIN_SIZE < 16 ? \ 558 (KMALLOC_MIN_SIZE) : 16) 559 560 #ifdef CONFIG_RANDOM_KMALLOC_CACHES 561 #define RANDOM_KMALLOC_CACHES_NR 15 // # of cache copies 562 #else 563 #define RANDOM_KMALLOC_CACHES_NR 0 564 #endif 565 566 /* 567 * Whenever changing this, take care of that kmalloc_type() and 568 * create_kmalloc_caches() still work as intended. 569 * 570 * KMALLOC_NORMAL can contain only unaccounted objects whereas KMALLOC_CGROUP 571 * is for accounted but unreclaimable and non-dma objects. All the other 572 * kmem caches can have both accounted and unaccounted objects. 573 */ 574 enum kmalloc_cache_type { 575 KMALLOC_NORMAL = 0, 576 #ifndef CONFIG_ZONE_DMA 577 KMALLOC_DMA = KMALLOC_NORMAL, 578 #endif 579 #ifndef CONFIG_MEMCG 580 KMALLOC_CGROUP = KMALLOC_NORMAL, 581 #endif 582 KMALLOC_RANDOM_START = KMALLOC_NORMAL, 583 KMALLOC_RANDOM_END = KMALLOC_RANDOM_START + RANDOM_KMALLOC_CACHES_NR, 584 #ifdef CONFIG_SLUB_TINY 585 KMALLOC_RECLAIM = KMALLOC_NORMAL, 586 #else 587 KMALLOC_RECLAIM, 588 #endif 589 #ifdef CONFIG_ZONE_DMA 590 KMALLOC_DMA, 591 #endif 592 #ifdef CONFIG_MEMCG 593 KMALLOC_CGROUP, 594 #endif 595 NR_KMALLOC_TYPES 596 }; 597 598 typedef struct kmem_cache * kmem_buckets[KMALLOC_SHIFT_HIGH + 1]; 599 600 extern kmem_buckets kmalloc_caches[NR_KMALLOC_TYPES]; 601 602 /* 603 * Define gfp bits that should not be set for KMALLOC_NORMAL. 604 */ 605 #define KMALLOC_NOT_NORMAL_BITS \ 606 (__GFP_RECLAIMABLE | \ 607 (IS_ENABLED(CONFIG_ZONE_DMA) ? __GFP_DMA : 0) | \ 608 (IS_ENABLED(CONFIG_MEMCG) ? __GFP_ACCOUNT : 0)) 609 610 extern unsigned long random_kmalloc_seed; 611 612 static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags, unsigned long caller) 613 { 614 /* 615 * The most common case is KMALLOC_NORMAL, so test for it 616 * with a single branch for all the relevant flags. 617 */ 618 if (likely((flags & KMALLOC_NOT_NORMAL_BITS) == 0)) 619 #ifdef CONFIG_RANDOM_KMALLOC_CACHES 620 /* RANDOM_KMALLOC_CACHES_NR (=15) copies + the KMALLOC_NORMAL */ 621 return KMALLOC_RANDOM_START + hash_64(caller ^ random_kmalloc_seed, 622 ilog2(RANDOM_KMALLOC_CACHES_NR + 1)); 623 #else 624 return KMALLOC_NORMAL; 625 #endif 626 627 /* 628 * At least one of the flags has to be set. Their priorities in 629 * decreasing order are: 630 * 1) __GFP_DMA 631 * 2) __GFP_RECLAIMABLE 632 * 3) __GFP_ACCOUNT 633 */ 634 if (IS_ENABLED(CONFIG_ZONE_DMA) && (flags & __GFP_DMA)) 635 return KMALLOC_DMA; 636 if (!IS_ENABLED(CONFIG_MEMCG) || (flags & __GFP_RECLAIMABLE)) 637 return KMALLOC_RECLAIM; 638 else 639 return KMALLOC_CGROUP; 640 } 641 642 /* 643 * Figure out which kmalloc slab an allocation of a certain size 644 * belongs to. 645 * 0 = zero alloc 646 * 1 = 65 .. 96 bytes 647 * 2 = 129 .. 192 bytes 648 * n = 2^(n-1)+1 .. 2^n 649 * 650 * Note: __kmalloc_index() is compile-time optimized, and not runtime optimized; 651 * typical usage is via kmalloc_index() and therefore evaluated at compile-time. 652 * Callers where !size_is_constant should only be test modules, where runtime 653 * overheads of __kmalloc_index() can be tolerated. Also see kmalloc_slab(). 654 */ 655 static __always_inline unsigned int __kmalloc_index(size_t size, 656 bool size_is_constant) 657 { 658 if (!size) 659 return 0; 660 661 if (size <= KMALLOC_MIN_SIZE) 662 return KMALLOC_SHIFT_LOW; 663 664 if (KMALLOC_MIN_SIZE <= 32 && size > 64 && size <= 96) 665 return 1; 666 if (KMALLOC_MIN_SIZE <= 64 && size > 128 && size <= 192) 667 return 2; 668 if (size <= 8) return 3; 669 if (size <= 16) return 4; 670 if (size <= 32) return 5; 671 if (size <= 64) return 6; 672 if (size <= 128) return 7; 673 if (size <= 256) return 8; 674 if (size <= 512) return 9; 675 if (size <= 1024) return 10; 676 if (size <= 2 * 1024) return 11; 677 if (size <= 4 * 1024) return 12; 678 if (size <= 8 * 1024) return 13; 679 if (size <= 16 * 1024) return 14; 680 if (size <= 32 * 1024) return 15; 681 if (size <= 64 * 1024) return 16; 682 if (size <= 128 * 1024) return 17; 683 if (size <= 256 * 1024) return 18; 684 if (size <= 512 * 1024) return 19; 685 if (size <= 1024 * 1024) return 20; 686 if (size <= 2 * 1024 * 1024) return 21; 687 688 if (!IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES) && size_is_constant) 689 BUILD_BUG_ON_MSG(1, "unexpected size in kmalloc_index()"); 690 else 691 BUG(); 692 693 /* Will never be reached. Needed because the compiler may complain */ 694 return -1; 695 } 696 static_assert(PAGE_SHIFT <= 20); 697 #define kmalloc_index(s) __kmalloc_index(s, true) 698 699 #include <linux/alloc_tag.h> 700 701 /** 702 * kmem_cache_alloc - Allocate an object 703 * @cachep: The cache to allocate from. 704 * @flags: See kmalloc(). 705 * 706 * Allocate an object from this cache. 707 * See kmem_cache_zalloc() for a shortcut of adding __GFP_ZERO to flags. 708 * 709 * Return: pointer to the new object or %NULL in case of error 710 */ 711 void *kmem_cache_alloc_noprof(struct kmem_cache *cachep, 712 gfp_t flags) __assume_slab_alignment __malloc; 713 #define kmem_cache_alloc(...) alloc_hooks(kmem_cache_alloc_noprof(__VA_ARGS__)) 714 715 void *kmem_cache_alloc_lru_noprof(struct kmem_cache *s, struct list_lru *lru, 716 gfp_t gfpflags) __assume_slab_alignment __malloc; 717 #define kmem_cache_alloc_lru(...) alloc_hooks(kmem_cache_alloc_lru_noprof(__VA_ARGS__)) 718 719 /** 720 * kmem_cache_charge - memcg charge an already allocated slab memory 721 * @objp: address of the slab object to memcg charge 722 * @gfpflags: describe the allocation context 723 * 724 * kmem_cache_charge allows charging a slab object to the current memcg, 725 * primarily in cases where charging at allocation time might not be possible 726 * because the target memcg is not known (i.e. softirq context) 727 * 728 * The objp should be pointer returned by the slab allocator functions like 729 * kmalloc (with __GFP_ACCOUNT in flags) or kmem_cache_alloc. The memcg charge 730 * behavior can be controlled through gfpflags parameter, which affects how the 731 * necessary internal metadata can be allocated. Including __GFP_NOFAIL denotes 732 * that overcharging is requested instead of failure, but is not applied for the 733 * internal metadata allocation. 734 * 735 * There are several cases where it will return true even if the charging was 736 * not done: 737 * More specifically: 738 * 739 * 1. For !CONFIG_MEMCG or cgroup_disable=memory systems. 740 * 2. Already charged slab objects. 741 * 3. For slab objects from KMALLOC_NORMAL caches - allocated by kmalloc() 742 * without __GFP_ACCOUNT 743 * 4. Allocating internal metadata has failed 744 * 745 * Return: true if charge was successful otherwise false. 746 */ 747 bool kmem_cache_charge(void *objp, gfp_t gfpflags); 748 void kmem_cache_free(struct kmem_cache *s, void *objp); 749 750 kmem_buckets *kmem_buckets_create(const char *name, slab_flags_t flags, 751 unsigned int useroffset, unsigned int usersize, 752 void (*ctor)(void *)); 753 754 /* 755 * Bulk allocation and freeing operations. These are accelerated in an 756 * allocator specific way to avoid taking locks repeatedly or building 757 * metadata structures unnecessarily. 758 * 759 * Note that interrupts must be enabled when calling these functions. 760 */ 761 void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p); 762 763 int kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags, size_t size, void **p); 764 #define kmem_cache_alloc_bulk(...) alloc_hooks(kmem_cache_alloc_bulk_noprof(__VA_ARGS__)) 765 766 static __always_inline void kfree_bulk(size_t size, void **p) 767 { 768 kmem_cache_free_bulk(NULL, size, p); 769 } 770 771 void *kmem_cache_alloc_node_noprof(struct kmem_cache *s, gfp_t flags, 772 int node) __assume_slab_alignment __malloc; 773 #define kmem_cache_alloc_node(...) alloc_hooks(kmem_cache_alloc_node_noprof(__VA_ARGS__)) 774 775 /* 776 * These macros allow declaring a kmem_buckets * parameter alongside size, which 777 * can be compiled out with CONFIG_SLAB_BUCKETS=n so that a large number of call 778 * sites don't have to pass NULL. 779 */ 780 #ifdef CONFIG_SLAB_BUCKETS 781 #define DECL_BUCKET_PARAMS(_size, _b) size_t (_size), kmem_buckets *(_b) 782 #define PASS_BUCKET_PARAMS(_size, _b) (_size), (_b) 783 #define PASS_BUCKET_PARAM(_b) (_b) 784 #else 785 #define DECL_BUCKET_PARAMS(_size, _b) size_t (_size) 786 #define PASS_BUCKET_PARAMS(_size, _b) (_size) 787 #define PASS_BUCKET_PARAM(_b) NULL 788 #endif 789 790 /* 791 * The following functions are not to be used directly and are intended only 792 * for internal use from kmalloc() and kmalloc_node() 793 * with the exception of kunit tests 794 */ 795 796 void *__kmalloc_noprof(size_t size, gfp_t flags) 797 __assume_kmalloc_alignment __alloc_size(1); 798 799 void *__kmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node) 800 __assume_kmalloc_alignment __alloc_size(1); 801 802 void *__kmalloc_cache_noprof(struct kmem_cache *s, gfp_t flags, size_t size) 803 __assume_kmalloc_alignment __alloc_size(3); 804 805 void *__kmalloc_cache_node_noprof(struct kmem_cache *s, gfp_t gfpflags, 806 int node, size_t size) 807 __assume_kmalloc_alignment __alloc_size(4); 808 809 void *__kmalloc_large_noprof(size_t size, gfp_t flags) 810 __assume_page_alignment __alloc_size(1); 811 812 void *__kmalloc_large_node_noprof(size_t size, gfp_t flags, int node) 813 __assume_page_alignment __alloc_size(1); 814 815 /** 816 * kmalloc - allocate kernel memory 817 * @size: how many bytes of memory are required. 818 * @flags: describe the allocation context 819 * 820 * kmalloc is the normal method of allocating memory 821 * for objects smaller than page size in the kernel. 822 * 823 * The allocated object address is aligned to at least ARCH_KMALLOC_MINALIGN 824 * bytes. For @size of power of two bytes, the alignment is also guaranteed 825 * to be at least to the size. For other sizes, the alignment is guaranteed to 826 * be at least the largest power-of-two divisor of @size. 827 * 828 * The @flags argument may be one of the GFP flags defined at 829 * include/linux/gfp_types.h and described at 830 * :ref:`Documentation/core-api/mm-api.rst <mm-api-gfp-flags>` 831 * 832 * The recommended usage of the @flags is described at 833 * :ref:`Documentation/core-api/memory-allocation.rst <memory_allocation>` 834 * 835 * Below is a brief outline of the most useful GFP flags 836 * 837 * %GFP_KERNEL 838 * Allocate normal kernel ram. May sleep. 839 * 840 * %GFP_NOWAIT 841 * Allocation will not sleep. 842 * 843 * %GFP_ATOMIC 844 * Allocation will not sleep. May use emergency pools. 845 * 846 * Also it is possible to set different flags by OR'ing 847 * in one or more of the following additional @flags: 848 * 849 * %__GFP_ZERO 850 * Zero the allocated memory before returning. Also see kzalloc(). 851 * 852 * %__GFP_HIGH 853 * This allocation has high priority and may use emergency pools. 854 * 855 * %__GFP_NOFAIL 856 * Indicate that this allocation is in no way allowed to fail 857 * (think twice before using). 858 * 859 * %__GFP_NORETRY 860 * If memory is not immediately available, 861 * then give up at once. 862 * 863 * %__GFP_NOWARN 864 * If allocation fails, don't issue any warnings. 865 * 866 * %__GFP_RETRY_MAYFAIL 867 * Try really hard to succeed the allocation but fail 868 * eventually. 869 */ 870 static __always_inline __alloc_size(1) void *kmalloc_noprof(size_t size, gfp_t flags) 871 { 872 if (__builtin_constant_p(size) && size) { 873 unsigned int index; 874 875 if (size > KMALLOC_MAX_CACHE_SIZE) 876 return __kmalloc_large_noprof(size, flags); 877 878 index = kmalloc_index(size); 879 return __kmalloc_cache_noprof( 880 kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index], 881 flags, size); 882 } 883 return __kmalloc_noprof(size, flags); 884 } 885 #define kmalloc(...) alloc_hooks(kmalloc_noprof(__VA_ARGS__)) 886 887 #define kmem_buckets_alloc(_b, _size, _flags) \ 888 alloc_hooks(__kmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE)) 889 890 #define kmem_buckets_alloc_track_caller(_b, _size, _flags) \ 891 alloc_hooks(__kmalloc_node_track_caller_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE, _RET_IP_)) 892 893 static __always_inline __alloc_size(1) void *kmalloc_node_noprof(size_t size, gfp_t flags, int node) 894 { 895 if (__builtin_constant_p(size) && size) { 896 unsigned int index; 897 898 if (size > KMALLOC_MAX_CACHE_SIZE) 899 return __kmalloc_large_node_noprof(size, flags, node); 900 901 index = kmalloc_index(size); 902 return __kmalloc_cache_node_noprof( 903 kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index], 904 flags, node, size); 905 } 906 return __kmalloc_node_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node); 907 } 908 #define kmalloc_node(...) alloc_hooks(kmalloc_node_noprof(__VA_ARGS__)) 909 910 /** 911 * kmalloc_array - allocate memory for an array. 912 * @n: number of elements. 913 * @size: element size. 914 * @flags: the type of memory to allocate (see kmalloc). 915 */ 916 static inline __alloc_size(1, 2) void *kmalloc_array_noprof(size_t n, size_t size, gfp_t flags) 917 { 918 size_t bytes; 919 920 if (unlikely(check_mul_overflow(n, size, &bytes))) 921 return NULL; 922 if (__builtin_constant_p(n) && __builtin_constant_p(size)) 923 return kmalloc_noprof(bytes, flags); 924 return kmalloc_noprof(bytes, flags); 925 } 926 #define kmalloc_array(...) alloc_hooks(kmalloc_array_noprof(__VA_ARGS__)) 927 928 /** 929 * krealloc_array - reallocate memory for an array. 930 * @p: pointer to the memory chunk to reallocate 931 * @new_n: new number of elements to alloc 932 * @new_size: new size of a single member of the array 933 * @flags: the type of memory to allocate (see kmalloc) 934 * 935 * If __GFP_ZERO logic is requested, callers must ensure that, starting with the 936 * initial memory allocation, every subsequent call to this API for the same 937 * memory allocation is flagged with __GFP_ZERO. Otherwise, it is possible that 938 * __GFP_ZERO is not fully honored by this API. 939 * 940 * See krealloc_noprof() for further details. 941 * 942 * In any case, the contents of the object pointed to are preserved up to the 943 * lesser of the new and old sizes. 944 */ 945 static inline __realloc_size(2, 3) void * __must_check krealloc_array_noprof(void *p, 946 size_t new_n, 947 size_t new_size, 948 gfp_t flags) 949 { 950 size_t bytes; 951 952 if (unlikely(check_mul_overflow(new_n, new_size, &bytes))) 953 return NULL; 954 955 return krealloc_noprof(p, bytes, flags); 956 } 957 #define krealloc_array(...) alloc_hooks(krealloc_array_noprof(__VA_ARGS__)) 958 959 /** 960 * kcalloc - allocate memory for an array. The memory is set to zero. 961 * @n: number of elements. 962 * @size: element size. 963 * @flags: the type of memory to allocate (see kmalloc). 964 */ 965 #define kcalloc(n, size, flags) kmalloc_array(n, size, (flags) | __GFP_ZERO) 966 967 void *__kmalloc_node_track_caller_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node, 968 unsigned long caller) __alloc_size(1); 969 #define kmalloc_node_track_caller_noprof(size, flags, node, caller) \ 970 __kmalloc_node_track_caller_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node, caller) 971 #define kmalloc_node_track_caller(...) \ 972 alloc_hooks(kmalloc_node_track_caller_noprof(__VA_ARGS__, _RET_IP_)) 973 974 /* 975 * kmalloc_track_caller is a special version of kmalloc that records the 976 * calling function of the routine calling it for slab leak tracking instead 977 * of just the calling function (confusing, eh?). 978 * It's useful when the call to kmalloc comes from a widely-used standard 979 * allocator where we care about the real place the memory allocation 980 * request comes from. 981 */ 982 #define kmalloc_track_caller(...) kmalloc_node_track_caller(__VA_ARGS__, NUMA_NO_NODE) 983 984 #define kmalloc_track_caller_noprof(...) \ 985 kmalloc_node_track_caller_noprof(__VA_ARGS__, NUMA_NO_NODE, _RET_IP_) 986 987 static inline __alloc_size(1, 2) void *kmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, 988 int node) 989 { 990 size_t bytes; 991 992 if (unlikely(check_mul_overflow(n, size, &bytes))) 993 return NULL; 994 if (__builtin_constant_p(n) && __builtin_constant_p(size)) 995 return kmalloc_node_noprof(bytes, flags, node); 996 return __kmalloc_node_noprof(PASS_BUCKET_PARAMS(bytes, NULL), flags, node); 997 } 998 #define kmalloc_array_node(...) alloc_hooks(kmalloc_array_node_noprof(__VA_ARGS__)) 999 1000 #define kcalloc_node(_n, _size, _flags, _node) \ 1001 kmalloc_array_node(_n, _size, (_flags) | __GFP_ZERO, _node) 1002 1003 /* 1004 * Shortcuts 1005 */ 1006 #define kmem_cache_zalloc(_k, _flags) kmem_cache_alloc(_k, (_flags)|__GFP_ZERO) 1007 1008 /** 1009 * kzalloc - allocate memory. The memory is set to zero. 1010 * @size: how many bytes of memory are required. 1011 * @flags: the type of memory to allocate (see kmalloc). 1012 */ 1013 static inline __alloc_size(1) void *kzalloc_noprof(size_t size, gfp_t flags) 1014 { 1015 return kmalloc_noprof(size, flags | __GFP_ZERO); 1016 } 1017 #define kzalloc(...) alloc_hooks(kzalloc_noprof(__VA_ARGS__)) 1018 #define kzalloc_node(_size, _flags, _node) kmalloc_node(_size, (_flags)|__GFP_ZERO, _node) 1019 1020 void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node) __alloc_size(1); 1021 #define kvmalloc_node_noprof(size, flags, node) \ 1022 __kvmalloc_node_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node) 1023 #define kvmalloc_node(...) alloc_hooks(kvmalloc_node_noprof(__VA_ARGS__)) 1024 1025 #define kvmalloc(_size, _flags) kvmalloc_node(_size, _flags, NUMA_NO_NODE) 1026 #define kvmalloc_noprof(_size, _flags) kvmalloc_node_noprof(_size, _flags, NUMA_NO_NODE) 1027 #define kvzalloc(_size, _flags) kvmalloc(_size, (_flags)|__GFP_ZERO) 1028 1029 #define kvzalloc_node(_size, _flags, _node) kvmalloc_node(_size, (_flags)|__GFP_ZERO, _node) 1030 #define kmem_buckets_valloc(_b, _size, _flags) \ 1031 alloc_hooks(__kvmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE)) 1032 1033 static inline __alloc_size(1, 2) void * 1034 kvmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, int node) 1035 { 1036 size_t bytes; 1037 1038 if (unlikely(check_mul_overflow(n, size, &bytes))) 1039 return NULL; 1040 1041 return kvmalloc_node_noprof(bytes, flags, node); 1042 } 1043 1044 #define kvmalloc_array_noprof(...) kvmalloc_array_node_noprof(__VA_ARGS__, NUMA_NO_NODE) 1045 #define kvcalloc_node_noprof(_n,_s,_f,_node) kvmalloc_array_node_noprof(_n,_s,(_f)|__GFP_ZERO,_node) 1046 #define kvcalloc_noprof(...) kvcalloc_node_noprof(__VA_ARGS__, NUMA_NO_NODE) 1047 1048 #define kvmalloc_array(...) alloc_hooks(kvmalloc_array_noprof(__VA_ARGS__)) 1049 #define kvcalloc_node(...) alloc_hooks(kvcalloc_node_noprof(__VA_ARGS__)) 1050 #define kvcalloc(...) alloc_hooks(kvcalloc_noprof(__VA_ARGS__)) 1051 1052 void *kvrealloc_noprof(const void *p, size_t size, gfp_t flags) 1053 __realloc_size(2); 1054 #define kvrealloc(...) alloc_hooks(kvrealloc_noprof(__VA_ARGS__)) 1055 1056 extern void kvfree(const void *addr); 1057 DEFINE_FREE(kvfree, void *, if (!IS_ERR_OR_NULL(_T)) kvfree(_T)) 1058 1059 extern void kvfree_sensitive(const void *addr, size_t len); 1060 1061 unsigned int kmem_cache_size(struct kmem_cache *s); 1062 1063 /** 1064 * kmalloc_size_roundup - Report allocation bucket size for the given size 1065 * 1066 * @size: Number of bytes to round up from. 1067 * 1068 * This returns the number of bytes that would be available in a kmalloc() 1069 * allocation of @size bytes. For example, a 126 byte request would be 1070 * rounded up to the next sized kmalloc bucket, 128 bytes. (This is strictly 1071 * for the general-purpose kmalloc()-based allocations, and is not for the 1072 * pre-sized kmem_cache_alloc()-based allocations.) 1073 * 1074 * Use this to kmalloc() the full bucket size ahead of time instead of using 1075 * ksize() to query the size after an allocation. 1076 */ 1077 size_t kmalloc_size_roundup(size_t size); 1078 1079 void __init kmem_cache_init_late(void); 1080 1081 #endif /* _LINUX_SLAB_H */ 1082