1*b4edb8d2Swuqiang.matt /* SPDX-License-Identifier: GPL-2.0 */ 2*b4edb8d2Swuqiang.matt 3*b4edb8d2Swuqiang.matt #ifndef _LINUX_OBJPOOL_H 4*b4edb8d2Swuqiang.matt #define _LINUX_OBJPOOL_H 5*b4edb8d2Swuqiang.matt 6*b4edb8d2Swuqiang.matt #include <linux/types.h> 7*b4edb8d2Swuqiang.matt #include <linux/refcount.h> 8*b4edb8d2Swuqiang.matt 9*b4edb8d2Swuqiang.matt /* 10*b4edb8d2Swuqiang.matt * objpool: ring-array based lockless MPMC queue 11*b4edb8d2Swuqiang.matt * 12*b4edb8d2Swuqiang.matt * Copyright: [email protected],[email protected] 13*b4edb8d2Swuqiang.matt * 14*b4edb8d2Swuqiang.matt * objpool is a scalable implementation of high performance queue for 15*b4edb8d2Swuqiang.matt * object allocation and reclamation, such as kretprobe instances. 16*b4edb8d2Swuqiang.matt * 17*b4edb8d2Swuqiang.matt * With leveraging percpu ring-array to mitigate hot spots of memory 18*b4edb8d2Swuqiang.matt * contention, it delivers near-linear scalability for high parallel 19*b4edb8d2Swuqiang.matt * scenarios. The objpool is best suited for the following cases: 20*b4edb8d2Swuqiang.matt * 1) Memory allocation or reclamation are prohibited or too expensive 21*b4edb8d2Swuqiang.matt * 2) Consumers are of different priorities, such as irqs and threads 22*b4edb8d2Swuqiang.matt * 23*b4edb8d2Swuqiang.matt * Limitations: 24*b4edb8d2Swuqiang.matt * 1) Maximum objects (capacity) is fixed after objpool creation 25*b4edb8d2Swuqiang.matt * 2) All pre-allocated objects are managed in percpu ring array, 26*b4edb8d2Swuqiang.matt * which consumes more memory than linked lists 27*b4edb8d2Swuqiang.matt */ 28*b4edb8d2Swuqiang.matt 29*b4edb8d2Swuqiang.matt /** 30*b4edb8d2Swuqiang.matt * struct objpool_slot - percpu ring array of objpool 31*b4edb8d2Swuqiang.matt * @head: head sequence of the local ring array (to retrieve at) 32*b4edb8d2Swuqiang.matt * @tail: tail sequence of the local ring array (to append at) 33*b4edb8d2Swuqiang.matt * @last: the last sequence number marked as ready for retrieve 34*b4edb8d2Swuqiang.matt * @mask: bits mask for modulo capacity to compute array indexes 35*b4edb8d2Swuqiang.matt * @entries: object entries on this slot 36*b4edb8d2Swuqiang.matt * 37*b4edb8d2Swuqiang.matt * Represents a cpu-local array-based ring buffer, its size is specialized 38*b4edb8d2Swuqiang.matt * during initialization of object pool. The percpu objpool node is to be 39*b4edb8d2Swuqiang.matt * allocated from local memory for NUMA system, and to be kept compact in 40*b4edb8d2Swuqiang.matt * continuous memory: CPU assigned number of objects are stored just after 41*b4edb8d2Swuqiang.matt * the body of objpool_node. 42*b4edb8d2Swuqiang.matt * 43*b4edb8d2Swuqiang.matt * Real size of the ring array is far too smaller than the value range of 44*b4edb8d2Swuqiang.matt * head and tail, typed as uint32_t: [0, 2^32), so only lower bits (mask) 45*b4edb8d2Swuqiang.matt * of head and tail are used as the actual position in the ring array. In 46*b4edb8d2Swuqiang.matt * general the ring array is acting like a small sliding window, which is 47*b4edb8d2Swuqiang.matt * always moving forward in the loop of [0, 2^32). 48*b4edb8d2Swuqiang.matt */ 49*b4edb8d2Swuqiang.matt struct objpool_slot { 50*b4edb8d2Swuqiang.matt uint32_t head; 51*b4edb8d2Swuqiang.matt uint32_t tail; 52*b4edb8d2Swuqiang.matt uint32_t last; 53*b4edb8d2Swuqiang.matt uint32_t mask; 54*b4edb8d2Swuqiang.matt void *entries[]; 55*b4edb8d2Swuqiang.matt } __packed; 56*b4edb8d2Swuqiang.matt 57*b4edb8d2Swuqiang.matt struct objpool_head; 58*b4edb8d2Swuqiang.matt 59*b4edb8d2Swuqiang.matt /* 60*b4edb8d2Swuqiang.matt * caller-specified callback for object initial setup, it's only called 61*b4edb8d2Swuqiang.matt * once for each object (just after the memory allocation of the object) 62*b4edb8d2Swuqiang.matt */ 63*b4edb8d2Swuqiang.matt typedef int (*objpool_init_obj_cb)(void *obj, void *context); 64*b4edb8d2Swuqiang.matt 65*b4edb8d2Swuqiang.matt /* caller-specified cleanup callback for objpool destruction */ 66*b4edb8d2Swuqiang.matt typedef int (*objpool_fini_cb)(struct objpool_head *head, void *context); 67*b4edb8d2Swuqiang.matt 68*b4edb8d2Swuqiang.matt /** 69*b4edb8d2Swuqiang.matt * struct objpool_head - object pooling metadata 70*b4edb8d2Swuqiang.matt * @obj_size: object size, aligned to sizeof(void *) 71*b4edb8d2Swuqiang.matt * @nr_objs: total objs (to be pre-allocated with objpool) 72*b4edb8d2Swuqiang.matt * @nr_cpus: local copy of nr_cpu_ids 73*b4edb8d2Swuqiang.matt * @capacity: max objs can be managed by one objpool_slot 74*b4edb8d2Swuqiang.matt * @gfp: gfp flags for kmalloc & vmalloc 75*b4edb8d2Swuqiang.matt * @ref: refcount of objpool 76*b4edb8d2Swuqiang.matt * @flags: flags for objpool management 77*b4edb8d2Swuqiang.matt * @cpu_slots: pointer to the array of objpool_slot 78*b4edb8d2Swuqiang.matt * @release: resource cleanup callback 79*b4edb8d2Swuqiang.matt * @context: caller-provided context 80*b4edb8d2Swuqiang.matt */ 81*b4edb8d2Swuqiang.matt struct objpool_head { 82*b4edb8d2Swuqiang.matt int obj_size; 83*b4edb8d2Swuqiang.matt int nr_objs; 84*b4edb8d2Swuqiang.matt int nr_cpus; 85*b4edb8d2Swuqiang.matt int capacity; 86*b4edb8d2Swuqiang.matt gfp_t gfp; 87*b4edb8d2Swuqiang.matt refcount_t ref; 88*b4edb8d2Swuqiang.matt unsigned long flags; 89*b4edb8d2Swuqiang.matt struct objpool_slot **cpu_slots; 90*b4edb8d2Swuqiang.matt objpool_fini_cb release; 91*b4edb8d2Swuqiang.matt void *context; 92*b4edb8d2Swuqiang.matt }; 93*b4edb8d2Swuqiang.matt 94*b4edb8d2Swuqiang.matt #define OBJPOOL_NR_OBJECT_MAX (1UL << 24) /* maximum numbers of total objects */ 95*b4edb8d2Swuqiang.matt #define OBJPOOL_OBJECT_SIZE_MAX (1UL << 16) /* maximum size of an object */ 96*b4edb8d2Swuqiang.matt 97*b4edb8d2Swuqiang.matt /** 98*b4edb8d2Swuqiang.matt * objpool_init() - initialize objpool and pre-allocated objects 99*b4edb8d2Swuqiang.matt * @pool: the object pool to be initialized, declared by caller 100*b4edb8d2Swuqiang.matt * @nr_objs: total objects to be pre-allocated by this object pool 101*b4edb8d2Swuqiang.matt * @object_size: size of an object (should be > 0) 102*b4edb8d2Swuqiang.matt * @gfp: flags for memory allocation (via kmalloc or vmalloc) 103*b4edb8d2Swuqiang.matt * @context: user context for object initialization callback 104*b4edb8d2Swuqiang.matt * @objinit: object initialization callback for extra setup 105*b4edb8d2Swuqiang.matt * @release: cleanup callback for extra cleanup task 106*b4edb8d2Swuqiang.matt * 107*b4edb8d2Swuqiang.matt * return value: 0 for success, otherwise error code 108*b4edb8d2Swuqiang.matt * 109*b4edb8d2Swuqiang.matt * All pre-allocated objects are to be zeroed after memory allocation. 110*b4edb8d2Swuqiang.matt * Caller could do extra initialization in objinit callback. objinit() 111*b4edb8d2Swuqiang.matt * will be called just after slot allocation and called only once for 112*b4edb8d2Swuqiang.matt * each object. After that the objpool won't touch any content of the 113*b4edb8d2Swuqiang.matt * objects. It's caller's duty to perform reinitialization after each 114*b4edb8d2Swuqiang.matt * pop (object allocation) or do clearance before each push (object 115*b4edb8d2Swuqiang.matt * reclamation). 116*b4edb8d2Swuqiang.matt */ 117*b4edb8d2Swuqiang.matt int objpool_init(struct objpool_head *pool, int nr_objs, int object_size, 118*b4edb8d2Swuqiang.matt gfp_t gfp, void *context, objpool_init_obj_cb objinit, 119*b4edb8d2Swuqiang.matt objpool_fini_cb release); 120*b4edb8d2Swuqiang.matt 121*b4edb8d2Swuqiang.matt /** 122*b4edb8d2Swuqiang.matt * objpool_pop() - allocate an object from objpool 123*b4edb8d2Swuqiang.matt * @pool: object pool 124*b4edb8d2Swuqiang.matt * 125*b4edb8d2Swuqiang.matt * return value: object ptr or NULL if failed 126*b4edb8d2Swuqiang.matt */ 127*b4edb8d2Swuqiang.matt void *objpool_pop(struct objpool_head *pool); 128*b4edb8d2Swuqiang.matt 129*b4edb8d2Swuqiang.matt /** 130*b4edb8d2Swuqiang.matt * objpool_push() - reclaim the object and return back to objpool 131*b4edb8d2Swuqiang.matt * @obj: object ptr to be pushed to objpool 132*b4edb8d2Swuqiang.matt * @pool: object pool 133*b4edb8d2Swuqiang.matt * 134*b4edb8d2Swuqiang.matt * return: 0 or error code (it fails only when user tries to push 135*b4edb8d2Swuqiang.matt * the same object multiple times or wrong "objects" into objpool) 136*b4edb8d2Swuqiang.matt */ 137*b4edb8d2Swuqiang.matt int objpool_push(void *obj, struct objpool_head *pool); 138*b4edb8d2Swuqiang.matt 139*b4edb8d2Swuqiang.matt /** 140*b4edb8d2Swuqiang.matt * objpool_drop() - discard the object and deref objpool 141*b4edb8d2Swuqiang.matt * @obj: object ptr to be discarded 142*b4edb8d2Swuqiang.matt * @pool: object pool 143*b4edb8d2Swuqiang.matt * 144*b4edb8d2Swuqiang.matt * return: 0 if objpool was released; -EAGAIN if there are still 145*b4edb8d2Swuqiang.matt * outstanding objects 146*b4edb8d2Swuqiang.matt * 147*b4edb8d2Swuqiang.matt * objpool_drop is normally for the release of outstanding objects 148*b4edb8d2Swuqiang.matt * after objpool cleanup (objpool_fini). Thinking of this example: 149*b4edb8d2Swuqiang.matt * kretprobe is unregistered and objpool_fini() is called to release 150*b4edb8d2Swuqiang.matt * all remained objects, but there are still objects being used by 151*b4edb8d2Swuqiang.matt * unfinished kretprobes (like blockable function: sys_accept). So 152*b4edb8d2Swuqiang.matt * only when the last outstanding object is dropped could the whole 153*b4edb8d2Swuqiang.matt * objpool be released along with the call of objpool_drop() 154*b4edb8d2Swuqiang.matt */ 155*b4edb8d2Swuqiang.matt int objpool_drop(void *obj, struct objpool_head *pool); 156*b4edb8d2Swuqiang.matt 157*b4edb8d2Swuqiang.matt /** 158*b4edb8d2Swuqiang.matt * objpool_free() - release objpool forcely (all objects to be freed) 159*b4edb8d2Swuqiang.matt * @pool: object pool to be released 160*b4edb8d2Swuqiang.matt */ 161*b4edb8d2Swuqiang.matt void objpool_free(struct objpool_head *pool); 162*b4edb8d2Swuqiang.matt 163*b4edb8d2Swuqiang.matt /** 164*b4edb8d2Swuqiang.matt * objpool_fini() - deref object pool (also releasing unused objects) 165*b4edb8d2Swuqiang.matt * @pool: object pool to be dereferenced 166*b4edb8d2Swuqiang.matt * 167*b4edb8d2Swuqiang.matt * objpool_fini() will try to release all remained free objects and 168*b4edb8d2Swuqiang.matt * then drop an extra reference of the objpool. If all objects are 169*b4edb8d2Swuqiang.matt * already returned to objpool (so called synchronous use cases), 170*b4edb8d2Swuqiang.matt * the objpool itself will be freed together. But if there are still 171*b4edb8d2Swuqiang.matt * outstanding objects (so called asynchronous use cases, such like 172*b4edb8d2Swuqiang.matt * blockable kretprobe), the objpool won't be released until all 173*b4edb8d2Swuqiang.matt * the outstanding objects are dropped, but the caller must assure 174*b4edb8d2Swuqiang.matt * there are no concurrent objpool_push() on the fly. Normally RCU 175*b4edb8d2Swuqiang.matt * is being required to make sure all ongoing objpool_push() must 176*b4edb8d2Swuqiang.matt * be finished before calling objpool_fini(), so does test_objpool, 177*b4edb8d2Swuqiang.matt * kretprobe or rethook 178*b4edb8d2Swuqiang.matt */ 179*b4edb8d2Swuqiang.matt void objpool_fini(struct objpool_head *pool); 180*b4edb8d2Swuqiang.matt 181*b4edb8d2Swuqiang.matt #endif /* _LINUX_OBJPOOL_H */ 182