192f90d3bSwuqiang.matt // SPDX-License-Identifier: GPL-2.0
292f90d3bSwuqiang.matt
392f90d3bSwuqiang.matt /*
492f90d3bSwuqiang.matt * Test module for lockless object pool
592f90d3bSwuqiang.matt *
692f90d3bSwuqiang.matt * Copyright: [email protected]
792f90d3bSwuqiang.matt */
892f90d3bSwuqiang.matt
992f90d3bSwuqiang.matt #include <linux/errno.h>
1092f90d3bSwuqiang.matt #include <linux/module.h>
1192f90d3bSwuqiang.matt #include <linux/moduleparam.h>
1292f90d3bSwuqiang.matt #include <linux/completion.h>
1392f90d3bSwuqiang.matt #include <linux/kthread.h>
1492f90d3bSwuqiang.matt #include <linux/slab.h>
1592f90d3bSwuqiang.matt #include <linux/vmalloc.h>
1692f90d3bSwuqiang.matt #include <linux/delay.h>
1792f90d3bSwuqiang.matt #include <linux/hrtimer.h>
1892f90d3bSwuqiang.matt #include <linux/objpool.h>
1992f90d3bSwuqiang.matt
2092f90d3bSwuqiang.matt #define OT_NR_MAX_BULK (16)
2192f90d3bSwuqiang.matt
2292f90d3bSwuqiang.matt /* memory usage */
2392f90d3bSwuqiang.matt struct ot_mem_stat {
2492f90d3bSwuqiang.matt atomic_long_t alloc;
2592f90d3bSwuqiang.matt atomic_long_t free;
2692f90d3bSwuqiang.matt };
2792f90d3bSwuqiang.matt
2892f90d3bSwuqiang.matt /* object allocation results */
2992f90d3bSwuqiang.matt struct ot_obj_stat {
3092f90d3bSwuqiang.matt unsigned long nhits;
3192f90d3bSwuqiang.matt unsigned long nmiss;
3292f90d3bSwuqiang.matt };
3392f90d3bSwuqiang.matt
3492f90d3bSwuqiang.matt /* control & results per testcase */
3592f90d3bSwuqiang.matt struct ot_data {
3692f90d3bSwuqiang.matt struct rw_semaphore start;
3792f90d3bSwuqiang.matt struct completion wait;
3892f90d3bSwuqiang.matt struct completion rcu;
3992f90d3bSwuqiang.matt atomic_t nthreads ____cacheline_aligned_in_smp;
4092f90d3bSwuqiang.matt atomic_t stop ____cacheline_aligned_in_smp;
4192f90d3bSwuqiang.matt struct ot_mem_stat kmalloc;
4292f90d3bSwuqiang.matt struct ot_mem_stat vmalloc;
4392f90d3bSwuqiang.matt struct ot_obj_stat objects;
4492f90d3bSwuqiang.matt u64 duration;
4592f90d3bSwuqiang.matt };
4692f90d3bSwuqiang.matt
4792f90d3bSwuqiang.matt /* testcase */
4892f90d3bSwuqiang.matt struct ot_test {
4992f90d3bSwuqiang.matt int async; /* synchronous or asynchronous */
5092f90d3bSwuqiang.matt int mode; /* only mode 0 supported */
5192f90d3bSwuqiang.matt int objsz; /* object size */
5292f90d3bSwuqiang.matt int duration; /* ms */
5392f90d3bSwuqiang.matt int delay; /* ms */
5492f90d3bSwuqiang.matt int bulk_normal;
5592f90d3bSwuqiang.matt int bulk_irq;
5692f90d3bSwuqiang.matt unsigned long hrtimer; /* ms */
5792f90d3bSwuqiang.matt const char *name;
5892f90d3bSwuqiang.matt struct ot_data data;
5992f90d3bSwuqiang.matt };
6092f90d3bSwuqiang.matt
6192f90d3bSwuqiang.matt /* per-cpu worker */
6292f90d3bSwuqiang.matt struct ot_item {
6392f90d3bSwuqiang.matt struct objpool_head *pool; /* pool head */
6492f90d3bSwuqiang.matt struct ot_test *test; /* test parameters */
6592f90d3bSwuqiang.matt
6692f90d3bSwuqiang.matt void (*worker)(struct ot_item *item, int irq);
6792f90d3bSwuqiang.matt
6892f90d3bSwuqiang.matt /* hrtimer control */
6992f90d3bSwuqiang.matt ktime_t hrtcycle;
7092f90d3bSwuqiang.matt struct hrtimer hrtimer;
7192f90d3bSwuqiang.matt
7292f90d3bSwuqiang.matt int bulk[2]; /* for thread and irq */
7392f90d3bSwuqiang.matt int delay;
7492f90d3bSwuqiang.matt u32 niters;
7592f90d3bSwuqiang.matt
7692f90d3bSwuqiang.matt /* summary per thread */
7792f90d3bSwuqiang.matt struct ot_obj_stat stat[2]; /* thread and irq */
7892f90d3bSwuqiang.matt u64 duration;
7992f90d3bSwuqiang.matt };
8092f90d3bSwuqiang.matt
8192f90d3bSwuqiang.matt /*
8292f90d3bSwuqiang.matt * memory leakage checking
8392f90d3bSwuqiang.matt */
8492f90d3bSwuqiang.matt
ot_kzalloc(struct ot_test * test,long size)8592f90d3bSwuqiang.matt static void *ot_kzalloc(struct ot_test *test, long size)
8692f90d3bSwuqiang.matt {
8792f90d3bSwuqiang.matt void *ptr = kzalloc(size, GFP_KERNEL);
8892f90d3bSwuqiang.matt
8992f90d3bSwuqiang.matt if (ptr)
9092f90d3bSwuqiang.matt atomic_long_add(size, &test->data.kmalloc.alloc);
9192f90d3bSwuqiang.matt return ptr;
9292f90d3bSwuqiang.matt }
9392f90d3bSwuqiang.matt
ot_kfree(struct ot_test * test,void * ptr,long size)9492f90d3bSwuqiang.matt static void ot_kfree(struct ot_test *test, void *ptr, long size)
9592f90d3bSwuqiang.matt {
9692f90d3bSwuqiang.matt if (!ptr)
9792f90d3bSwuqiang.matt return;
9892f90d3bSwuqiang.matt atomic_long_add(size, &test->data.kmalloc.free);
9992f90d3bSwuqiang.matt kfree(ptr);
10092f90d3bSwuqiang.matt }
10192f90d3bSwuqiang.matt
ot_mem_report(struct ot_test * test)10292f90d3bSwuqiang.matt static void ot_mem_report(struct ot_test *test)
10392f90d3bSwuqiang.matt {
10492f90d3bSwuqiang.matt long alloc, free;
10592f90d3bSwuqiang.matt
10692f90d3bSwuqiang.matt pr_info("memory allocation summary for %s\n", test->name);
10792f90d3bSwuqiang.matt
10892f90d3bSwuqiang.matt alloc = atomic_long_read(&test->data.kmalloc.alloc);
10992f90d3bSwuqiang.matt free = atomic_long_read(&test->data.kmalloc.free);
11092f90d3bSwuqiang.matt pr_info(" kmalloc: %lu - %lu = %lu\n", alloc, free, alloc - free);
11192f90d3bSwuqiang.matt
11292f90d3bSwuqiang.matt alloc = atomic_long_read(&test->data.vmalloc.alloc);
11392f90d3bSwuqiang.matt free = atomic_long_read(&test->data.vmalloc.free);
11492f90d3bSwuqiang.matt pr_info(" vmalloc: %lu - %lu = %lu\n", alloc, free, alloc - free);
11592f90d3bSwuqiang.matt }
11692f90d3bSwuqiang.matt
11792f90d3bSwuqiang.matt /* user object instance */
11892f90d3bSwuqiang.matt struct ot_node {
11992f90d3bSwuqiang.matt void *owner;
12092f90d3bSwuqiang.matt unsigned long data;
12192f90d3bSwuqiang.matt unsigned long refs;
12292f90d3bSwuqiang.matt unsigned long payload[32];
12392f90d3bSwuqiang.matt };
12492f90d3bSwuqiang.matt
12592f90d3bSwuqiang.matt /* user objpool manager */
12692f90d3bSwuqiang.matt struct ot_context {
12792f90d3bSwuqiang.matt struct objpool_head pool; /* objpool head */
12892f90d3bSwuqiang.matt struct ot_test *test; /* test parameters */
12992f90d3bSwuqiang.matt void *ptr; /* user pool buffer */
13092f90d3bSwuqiang.matt unsigned long size; /* buffer size */
13192f90d3bSwuqiang.matt struct rcu_head rcu;
13292f90d3bSwuqiang.matt };
13392f90d3bSwuqiang.matt
13492f90d3bSwuqiang.matt static DEFINE_PER_CPU(struct ot_item, ot_pcup_items);
13592f90d3bSwuqiang.matt
ot_init_data(struct ot_data * data)13692f90d3bSwuqiang.matt static int ot_init_data(struct ot_data *data)
13792f90d3bSwuqiang.matt {
13892f90d3bSwuqiang.matt memset(data, 0, sizeof(*data));
13992f90d3bSwuqiang.matt init_rwsem(&data->start);
14092f90d3bSwuqiang.matt init_completion(&data->wait);
14192f90d3bSwuqiang.matt init_completion(&data->rcu);
14292f90d3bSwuqiang.matt atomic_set(&data->nthreads, 1);
14392f90d3bSwuqiang.matt
14492f90d3bSwuqiang.matt return 0;
14592f90d3bSwuqiang.matt }
14692f90d3bSwuqiang.matt
ot_init_node(void * nod,void * context)14792f90d3bSwuqiang.matt static int ot_init_node(void *nod, void *context)
14892f90d3bSwuqiang.matt {
14992f90d3bSwuqiang.matt struct ot_context *sop = context;
15092f90d3bSwuqiang.matt struct ot_node *on = nod;
15192f90d3bSwuqiang.matt
15292f90d3bSwuqiang.matt on->owner = &sop->pool;
15392f90d3bSwuqiang.matt return 0;
15492f90d3bSwuqiang.matt }
15592f90d3bSwuqiang.matt
ot_hrtimer_handler(struct hrtimer * hrt)15692f90d3bSwuqiang.matt static enum hrtimer_restart ot_hrtimer_handler(struct hrtimer *hrt)
15792f90d3bSwuqiang.matt {
15892f90d3bSwuqiang.matt struct ot_item *item = container_of(hrt, struct ot_item, hrtimer);
15992f90d3bSwuqiang.matt struct ot_test *test = item->test;
16092f90d3bSwuqiang.matt
16192f90d3bSwuqiang.matt if (atomic_read_acquire(&test->data.stop))
16292f90d3bSwuqiang.matt return HRTIMER_NORESTART;
16392f90d3bSwuqiang.matt
16492f90d3bSwuqiang.matt /* do bulk-testings for objects pop/push */
16592f90d3bSwuqiang.matt item->worker(item, 1);
16692f90d3bSwuqiang.matt
16792f90d3bSwuqiang.matt hrtimer_forward(hrt, hrt->base->get_time(), item->hrtcycle);
16892f90d3bSwuqiang.matt return HRTIMER_RESTART;
16992f90d3bSwuqiang.matt }
17092f90d3bSwuqiang.matt
ot_start_hrtimer(struct ot_item * item)17192f90d3bSwuqiang.matt static void ot_start_hrtimer(struct ot_item *item)
17292f90d3bSwuqiang.matt {
17392f90d3bSwuqiang.matt if (!item->test->hrtimer)
17492f90d3bSwuqiang.matt return;
17592f90d3bSwuqiang.matt hrtimer_start(&item->hrtimer, item->hrtcycle, HRTIMER_MODE_REL);
17692f90d3bSwuqiang.matt }
17792f90d3bSwuqiang.matt
ot_stop_hrtimer(struct ot_item * item)17892f90d3bSwuqiang.matt static void ot_stop_hrtimer(struct ot_item *item)
17992f90d3bSwuqiang.matt {
18092f90d3bSwuqiang.matt if (!item->test->hrtimer)
18192f90d3bSwuqiang.matt return;
18292f90d3bSwuqiang.matt hrtimer_cancel(&item->hrtimer);
18392f90d3bSwuqiang.matt }
18492f90d3bSwuqiang.matt
ot_init_hrtimer(struct ot_item * item,unsigned long hrtimer)18592f90d3bSwuqiang.matt static int ot_init_hrtimer(struct ot_item *item, unsigned long hrtimer)
18692f90d3bSwuqiang.matt {
18792f90d3bSwuqiang.matt struct hrtimer *hrt = &item->hrtimer;
18892f90d3bSwuqiang.matt
18992f90d3bSwuqiang.matt if (!hrtimer)
19092f90d3bSwuqiang.matt return -ENOENT;
19192f90d3bSwuqiang.matt
19292f90d3bSwuqiang.matt item->hrtcycle = ktime_set(0, hrtimer * 1000000UL);
193*b09dffdeSNam Cao hrtimer_setup(hrt, ot_hrtimer_handler, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
19492f90d3bSwuqiang.matt return 0;
19592f90d3bSwuqiang.matt }
19692f90d3bSwuqiang.matt
ot_init_cpu_item(struct ot_item * item,struct ot_test * test,struct objpool_head * pool,void (* worker)(struct ot_item *,int))19792f90d3bSwuqiang.matt static int ot_init_cpu_item(struct ot_item *item,
19892f90d3bSwuqiang.matt struct ot_test *test,
19992f90d3bSwuqiang.matt struct objpool_head *pool,
20092f90d3bSwuqiang.matt void (*worker)(struct ot_item *, int))
20192f90d3bSwuqiang.matt {
20292f90d3bSwuqiang.matt memset(item, 0, sizeof(*item));
20392f90d3bSwuqiang.matt item->pool = pool;
20492f90d3bSwuqiang.matt item->test = test;
20592f90d3bSwuqiang.matt item->worker = worker;
20692f90d3bSwuqiang.matt
20792f90d3bSwuqiang.matt item->bulk[0] = test->bulk_normal;
20892f90d3bSwuqiang.matt item->bulk[1] = test->bulk_irq;
20992f90d3bSwuqiang.matt item->delay = test->delay;
21092f90d3bSwuqiang.matt
21192f90d3bSwuqiang.matt /* initialize hrtimer */
21292f90d3bSwuqiang.matt ot_init_hrtimer(item, item->test->hrtimer);
21392f90d3bSwuqiang.matt return 0;
21492f90d3bSwuqiang.matt }
21592f90d3bSwuqiang.matt
ot_thread_worker(void * arg)21692f90d3bSwuqiang.matt static int ot_thread_worker(void *arg)
21792f90d3bSwuqiang.matt {
21892f90d3bSwuqiang.matt struct ot_item *item = arg;
21992f90d3bSwuqiang.matt struct ot_test *test = item->test;
22092f90d3bSwuqiang.matt ktime_t start;
22192f90d3bSwuqiang.matt
22292f90d3bSwuqiang.matt atomic_inc(&test->data.nthreads);
22392f90d3bSwuqiang.matt down_read(&test->data.start);
22492f90d3bSwuqiang.matt up_read(&test->data.start);
22592f90d3bSwuqiang.matt start = ktime_get();
22692f90d3bSwuqiang.matt ot_start_hrtimer(item);
22792f90d3bSwuqiang.matt do {
22892f90d3bSwuqiang.matt if (atomic_read_acquire(&test->data.stop))
22992f90d3bSwuqiang.matt break;
23092f90d3bSwuqiang.matt /* do bulk-testings for objects pop/push */
23192f90d3bSwuqiang.matt item->worker(item, 0);
23292f90d3bSwuqiang.matt } while (!kthread_should_stop());
23392f90d3bSwuqiang.matt ot_stop_hrtimer(item);
23492f90d3bSwuqiang.matt item->duration = (u64) ktime_us_delta(ktime_get(), start);
23592f90d3bSwuqiang.matt if (atomic_dec_and_test(&test->data.nthreads))
23692f90d3bSwuqiang.matt complete(&test->data.wait);
23792f90d3bSwuqiang.matt
23892f90d3bSwuqiang.matt return 0;
23992f90d3bSwuqiang.matt }
24092f90d3bSwuqiang.matt
ot_perf_report(struct ot_test * test,u64 duration)24192f90d3bSwuqiang.matt static void ot_perf_report(struct ot_test *test, u64 duration)
24292f90d3bSwuqiang.matt {
24392f90d3bSwuqiang.matt struct ot_obj_stat total, normal = {0}, irq = {0};
24492f90d3bSwuqiang.matt int cpu, nthreads = 0;
24592f90d3bSwuqiang.matt
24692f90d3bSwuqiang.matt pr_info("\n");
24792f90d3bSwuqiang.matt pr_info("Testing summary for %s\n", test->name);
24892f90d3bSwuqiang.matt
24992f90d3bSwuqiang.matt for_each_possible_cpu(cpu) {
25092f90d3bSwuqiang.matt struct ot_item *item = per_cpu_ptr(&ot_pcup_items, cpu);
25192f90d3bSwuqiang.matt if (!item->duration)
25292f90d3bSwuqiang.matt continue;
25392f90d3bSwuqiang.matt normal.nhits += item->stat[0].nhits;
25492f90d3bSwuqiang.matt normal.nmiss += item->stat[0].nmiss;
25592f90d3bSwuqiang.matt irq.nhits += item->stat[1].nhits;
25692f90d3bSwuqiang.matt irq.nmiss += item->stat[1].nmiss;
25792f90d3bSwuqiang.matt pr_info("CPU: %d duration: %lluus\n", cpu, item->duration);
25892f90d3bSwuqiang.matt pr_info("\tthread:\t%16lu hits \t%16lu miss\n",
25992f90d3bSwuqiang.matt item->stat[0].nhits, item->stat[0].nmiss);
26092f90d3bSwuqiang.matt pr_info("\tirq: \t%16lu hits \t%16lu miss\n",
26192f90d3bSwuqiang.matt item->stat[1].nhits, item->stat[1].nmiss);
26292f90d3bSwuqiang.matt pr_info("\ttotal: \t%16lu hits \t%16lu miss\n",
26392f90d3bSwuqiang.matt item->stat[0].nhits + item->stat[1].nhits,
26492f90d3bSwuqiang.matt item->stat[0].nmiss + item->stat[1].nmiss);
26592f90d3bSwuqiang.matt nthreads++;
26692f90d3bSwuqiang.matt }
26792f90d3bSwuqiang.matt
26892f90d3bSwuqiang.matt total.nhits = normal.nhits + irq.nhits;
26992f90d3bSwuqiang.matt total.nmiss = normal.nmiss + irq.nmiss;
27092f90d3bSwuqiang.matt
27192f90d3bSwuqiang.matt pr_info("ALL: \tnthreads: %d duration: %lluus\n", nthreads, duration);
27292f90d3bSwuqiang.matt pr_info("SUM: \t%16lu hits \t%16lu miss\n",
27392f90d3bSwuqiang.matt total.nhits, total.nmiss);
27492f90d3bSwuqiang.matt
27592f90d3bSwuqiang.matt test->data.objects = total;
27692f90d3bSwuqiang.matt test->data.duration = duration;
27792f90d3bSwuqiang.matt }
27892f90d3bSwuqiang.matt
27992f90d3bSwuqiang.matt /*
28092f90d3bSwuqiang.matt * synchronous test cases for objpool manipulation
28192f90d3bSwuqiang.matt */
28292f90d3bSwuqiang.matt
28392f90d3bSwuqiang.matt /* objpool manipulation for synchronous mode (percpu objpool) */
ot_init_sync_m0(struct ot_test * test)28492f90d3bSwuqiang.matt static struct ot_context *ot_init_sync_m0(struct ot_test *test)
28592f90d3bSwuqiang.matt {
28692f90d3bSwuqiang.matt struct ot_context *sop = NULL;
28792f90d3bSwuqiang.matt int max = num_possible_cpus() << 3;
28892f90d3bSwuqiang.matt gfp_t gfp = GFP_KERNEL;
28992f90d3bSwuqiang.matt
29092f90d3bSwuqiang.matt sop = (struct ot_context *)ot_kzalloc(test, sizeof(*sop));
29192f90d3bSwuqiang.matt if (!sop)
29292f90d3bSwuqiang.matt return NULL;
29392f90d3bSwuqiang.matt sop->test = test;
29492f90d3bSwuqiang.matt if (test->objsz < 512)
29592f90d3bSwuqiang.matt gfp = GFP_ATOMIC;
29692f90d3bSwuqiang.matt
29792f90d3bSwuqiang.matt if (objpool_init(&sop->pool, max, test->objsz,
29892f90d3bSwuqiang.matt gfp, sop, ot_init_node, NULL)) {
29992f90d3bSwuqiang.matt ot_kfree(test, sop, sizeof(*sop));
30092f90d3bSwuqiang.matt return NULL;
30192f90d3bSwuqiang.matt }
30292f90d3bSwuqiang.matt WARN_ON(max != sop->pool.nr_objs);
30392f90d3bSwuqiang.matt
30492f90d3bSwuqiang.matt return sop;
30592f90d3bSwuqiang.matt }
30692f90d3bSwuqiang.matt
ot_fini_sync(struct ot_context * sop)30792f90d3bSwuqiang.matt static void ot_fini_sync(struct ot_context *sop)
30892f90d3bSwuqiang.matt {
30992f90d3bSwuqiang.matt objpool_fini(&sop->pool);
31092f90d3bSwuqiang.matt ot_kfree(sop->test, sop, sizeof(*sop));
31192f90d3bSwuqiang.matt }
31292f90d3bSwuqiang.matt
3133afe7337Swuqiang.matt static struct {
31492f90d3bSwuqiang.matt struct ot_context * (*init)(struct ot_test *oc);
31592f90d3bSwuqiang.matt void (*fini)(struct ot_context *sop);
31692f90d3bSwuqiang.matt } g_ot_sync_ops[] = {
31792f90d3bSwuqiang.matt {.init = ot_init_sync_m0, .fini = ot_fini_sync},
31892f90d3bSwuqiang.matt };
31992f90d3bSwuqiang.matt
32092f90d3bSwuqiang.matt /*
32192f90d3bSwuqiang.matt * synchronous test cases: performance mode
32292f90d3bSwuqiang.matt */
32392f90d3bSwuqiang.matt
ot_bulk_sync(struct ot_item * item,int irq)32492f90d3bSwuqiang.matt static void ot_bulk_sync(struct ot_item *item, int irq)
32592f90d3bSwuqiang.matt {
32692f90d3bSwuqiang.matt struct ot_node *nods[OT_NR_MAX_BULK];
32792f90d3bSwuqiang.matt int i;
32892f90d3bSwuqiang.matt
32992f90d3bSwuqiang.matt for (i = 0; i < item->bulk[irq]; i++)
33092f90d3bSwuqiang.matt nods[i] = objpool_pop(item->pool);
33192f90d3bSwuqiang.matt
33292f90d3bSwuqiang.matt if (!irq && (item->delay || !(++(item->niters) & 0x7FFF)))
33392f90d3bSwuqiang.matt msleep(item->delay);
33492f90d3bSwuqiang.matt
33592f90d3bSwuqiang.matt while (i-- > 0) {
33692f90d3bSwuqiang.matt struct ot_node *on = nods[i];
33792f90d3bSwuqiang.matt if (on) {
33892f90d3bSwuqiang.matt on->refs++;
33992f90d3bSwuqiang.matt objpool_push(on, item->pool);
34092f90d3bSwuqiang.matt item->stat[irq].nhits++;
34192f90d3bSwuqiang.matt } else {
34292f90d3bSwuqiang.matt item->stat[irq].nmiss++;
34392f90d3bSwuqiang.matt }
34492f90d3bSwuqiang.matt }
34592f90d3bSwuqiang.matt }
34692f90d3bSwuqiang.matt
ot_start_sync(struct ot_test * test)34792f90d3bSwuqiang.matt static int ot_start_sync(struct ot_test *test)
34892f90d3bSwuqiang.matt {
34992f90d3bSwuqiang.matt struct ot_context *sop;
35092f90d3bSwuqiang.matt ktime_t start;
35192f90d3bSwuqiang.matt u64 duration;
35292f90d3bSwuqiang.matt unsigned long timeout;
35392f90d3bSwuqiang.matt int cpu;
35492f90d3bSwuqiang.matt
35592f90d3bSwuqiang.matt /* initialize objpool for syncrhonous testcase */
35692f90d3bSwuqiang.matt sop = g_ot_sync_ops[test->mode].init(test);
35792f90d3bSwuqiang.matt if (!sop)
35892f90d3bSwuqiang.matt return -ENOMEM;
35992f90d3bSwuqiang.matt
36092f90d3bSwuqiang.matt /* grab rwsem to block testing threads */
36192f90d3bSwuqiang.matt down_write(&test->data.start);
36292f90d3bSwuqiang.matt
36392f90d3bSwuqiang.matt for_each_possible_cpu(cpu) {
36492f90d3bSwuqiang.matt struct ot_item *item = per_cpu_ptr(&ot_pcup_items, cpu);
36592f90d3bSwuqiang.matt struct task_struct *work;
36692f90d3bSwuqiang.matt
36792f90d3bSwuqiang.matt ot_init_cpu_item(item, test, &sop->pool, ot_bulk_sync);
36892f90d3bSwuqiang.matt
36992f90d3bSwuqiang.matt /* skip offline cpus */
37092f90d3bSwuqiang.matt if (!cpu_online(cpu))
37192f90d3bSwuqiang.matt continue;
37292f90d3bSwuqiang.matt
373192faebeSFrederic Weisbecker work = kthread_run_on_cpu(ot_thread_worker, item,
374192faebeSFrederic Weisbecker cpu, "ot_worker_%d");
375192faebeSFrederic Weisbecker if (IS_ERR(work))
37692f90d3bSwuqiang.matt pr_err("failed to create thread for cpu %d\n", cpu);
37792f90d3bSwuqiang.matt }
37892f90d3bSwuqiang.matt
37992f90d3bSwuqiang.matt /* wait a while to make sure all threads waiting at start line */
38092f90d3bSwuqiang.matt msleep(20);
38192f90d3bSwuqiang.matt
38292f90d3bSwuqiang.matt /* in case no threads were created: memory insufficient ? */
38392f90d3bSwuqiang.matt if (atomic_dec_and_test(&test->data.nthreads))
38492f90d3bSwuqiang.matt complete(&test->data.wait);
38592f90d3bSwuqiang.matt
38692f90d3bSwuqiang.matt // sched_set_fifo_low(current);
38792f90d3bSwuqiang.matt
38892f90d3bSwuqiang.matt /* start objpool testing threads */
38992f90d3bSwuqiang.matt start = ktime_get();
39092f90d3bSwuqiang.matt up_write(&test->data.start);
39192f90d3bSwuqiang.matt
39292f90d3bSwuqiang.matt /* yeild cpu to worker threads for duration ms */
39392f90d3bSwuqiang.matt timeout = msecs_to_jiffies(test->duration);
39492f90d3bSwuqiang.matt schedule_timeout_interruptible(timeout);
39592f90d3bSwuqiang.matt
39692f90d3bSwuqiang.matt /* tell workers threads to quit */
39792f90d3bSwuqiang.matt atomic_set_release(&test->data.stop, 1);
39892f90d3bSwuqiang.matt
39992f90d3bSwuqiang.matt /* wait all workers threads finish and quit */
40092f90d3bSwuqiang.matt wait_for_completion(&test->data.wait);
40192f90d3bSwuqiang.matt duration = (u64) ktime_us_delta(ktime_get(), start);
40292f90d3bSwuqiang.matt
40392f90d3bSwuqiang.matt /* cleanup objpool */
40492f90d3bSwuqiang.matt g_ot_sync_ops[test->mode].fini(sop);
40592f90d3bSwuqiang.matt
40692f90d3bSwuqiang.matt /* report testing summary and performance results */
40792f90d3bSwuqiang.matt ot_perf_report(test, duration);
40892f90d3bSwuqiang.matt
40992f90d3bSwuqiang.matt /* report memory allocation summary */
41092f90d3bSwuqiang.matt ot_mem_report(test);
41192f90d3bSwuqiang.matt
41292f90d3bSwuqiang.matt return 0;
41392f90d3bSwuqiang.matt }
41492f90d3bSwuqiang.matt
41592f90d3bSwuqiang.matt /*
41692f90d3bSwuqiang.matt * asynchronous test cases: pool lifecycle controlled by refcount
41792f90d3bSwuqiang.matt */
41892f90d3bSwuqiang.matt
ot_fini_async_rcu(struct rcu_head * rcu)41992f90d3bSwuqiang.matt static void ot_fini_async_rcu(struct rcu_head *rcu)
42092f90d3bSwuqiang.matt {
42192f90d3bSwuqiang.matt struct ot_context *sop = container_of(rcu, struct ot_context, rcu);
42292f90d3bSwuqiang.matt struct ot_test *test = sop->test;
42392f90d3bSwuqiang.matt
42492f90d3bSwuqiang.matt /* here all cpus are aware of the stop event: test->data.stop = 1 */
42592f90d3bSwuqiang.matt WARN_ON(!atomic_read_acquire(&test->data.stop));
42692f90d3bSwuqiang.matt
42792f90d3bSwuqiang.matt objpool_fini(&sop->pool);
42892f90d3bSwuqiang.matt complete(&test->data.rcu);
42992f90d3bSwuqiang.matt }
43092f90d3bSwuqiang.matt
ot_fini_async(struct ot_context * sop)43192f90d3bSwuqiang.matt static void ot_fini_async(struct ot_context *sop)
43292f90d3bSwuqiang.matt {
43392f90d3bSwuqiang.matt /* make sure the stop event is acknowledged by all cores */
43492f90d3bSwuqiang.matt call_rcu(&sop->rcu, ot_fini_async_rcu);
43592f90d3bSwuqiang.matt }
43692f90d3bSwuqiang.matt
ot_objpool_release(struct objpool_head * head,void * context)43792f90d3bSwuqiang.matt static int ot_objpool_release(struct objpool_head *head, void *context)
43892f90d3bSwuqiang.matt {
43992f90d3bSwuqiang.matt struct ot_context *sop = context;
44092f90d3bSwuqiang.matt
44192f90d3bSwuqiang.matt WARN_ON(!head || !sop || head != &sop->pool);
44292f90d3bSwuqiang.matt
44392f90d3bSwuqiang.matt /* do context cleaning if needed */
44492f90d3bSwuqiang.matt if (sop)
44592f90d3bSwuqiang.matt ot_kfree(sop->test, sop, sizeof(*sop));
44692f90d3bSwuqiang.matt
44792f90d3bSwuqiang.matt return 0;
44892f90d3bSwuqiang.matt }
44992f90d3bSwuqiang.matt
ot_init_async_m0(struct ot_test * test)45092f90d3bSwuqiang.matt static struct ot_context *ot_init_async_m0(struct ot_test *test)
45192f90d3bSwuqiang.matt {
45292f90d3bSwuqiang.matt struct ot_context *sop = NULL;
45392f90d3bSwuqiang.matt int max = num_possible_cpus() << 3;
45492f90d3bSwuqiang.matt gfp_t gfp = GFP_KERNEL;
45592f90d3bSwuqiang.matt
45692f90d3bSwuqiang.matt sop = (struct ot_context *)ot_kzalloc(test, sizeof(*sop));
45792f90d3bSwuqiang.matt if (!sop)
45892f90d3bSwuqiang.matt return NULL;
45992f90d3bSwuqiang.matt sop->test = test;
46092f90d3bSwuqiang.matt if (test->objsz < 512)
46192f90d3bSwuqiang.matt gfp = GFP_ATOMIC;
46292f90d3bSwuqiang.matt
46392f90d3bSwuqiang.matt if (objpool_init(&sop->pool, max, test->objsz, gfp, sop,
46492f90d3bSwuqiang.matt ot_init_node, ot_objpool_release)) {
46592f90d3bSwuqiang.matt ot_kfree(test, sop, sizeof(*sop));
46692f90d3bSwuqiang.matt return NULL;
46792f90d3bSwuqiang.matt }
46892f90d3bSwuqiang.matt WARN_ON(max != sop->pool.nr_objs);
46992f90d3bSwuqiang.matt
47092f90d3bSwuqiang.matt return sop;
47192f90d3bSwuqiang.matt }
47292f90d3bSwuqiang.matt
4733afe7337Swuqiang.matt static struct {
47492f90d3bSwuqiang.matt struct ot_context * (*init)(struct ot_test *oc);
47592f90d3bSwuqiang.matt void (*fini)(struct ot_context *sop);
47692f90d3bSwuqiang.matt } g_ot_async_ops[] = {
47792f90d3bSwuqiang.matt {.init = ot_init_async_m0, .fini = ot_fini_async},
47892f90d3bSwuqiang.matt };
47992f90d3bSwuqiang.matt
ot_nod_recycle(struct ot_node * on,struct objpool_head * pool,int release)48092f90d3bSwuqiang.matt static void ot_nod_recycle(struct ot_node *on, struct objpool_head *pool,
48192f90d3bSwuqiang.matt int release)
48292f90d3bSwuqiang.matt {
48392f90d3bSwuqiang.matt struct ot_context *sop;
48492f90d3bSwuqiang.matt
48592f90d3bSwuqiang.matt on->refs++;
48692f90d3bSwuqiang.matt
48792f90d3bSwuqiang.matt if (!release) {
48892f90d3bSwuqiang.matt /* push object back to opjpool for reuse */
48992f90d3bSwuqiang.matt objpool_push(on, pool);
49092f90d3bSwuqiang.matt return;
49192f90d3bSwuqiang.matt }
49292f90d3bSwuqiang.matt
49392f90d3bSwuqiang.matt sop = container_of(pool, struct ot_context, pool);
49492f90d3bSwuqiang.matt WARN_ON(sop != pool->context);
49592f90d3bSwuqiang.matt
49692f90d3bSwuqiang.matt /* unref objpool with nod removed forever */
49792f90d3bSwuqiang.matt objpool_drop(on, pool);
49892f90d3bSwuqiang.matt }
49992f90d3bSwuqiang.matt
ot_bulk_async(struct ot_item * item,int irq)50092f90d3bSwuqiang.matt static void ot_bulk_async(struct ot_item *item, int irq)
50192f90d3bSwuqiang.matt {
50292f90d3bSwuqiang.matt struct ot_test *test = item->test;
50392f90d3bSwuqiang.matt struct ot_node *nods[OT_NR_MAX_BULK];
50492f90d3bSwuqiang.matt int i, stop;
50592f90d3bSwuqiang.matt
50692f90d3bSwuqiang.matt for (i = 0; i < item->bulk[irq]; i++)
50792f90d3bSwuqiang.matt nods[i] = objpool_pop(item->pool);
50892f90d3bSwuqiang.matt
50992f90d3bSwuqiang.matt if (!irq) {
51092f90d3bSwuqiang.matt if (item->delay || !(++(item->niters) & 0x7FFF))
51192f90d3bSwuqiang.matt msleep(item->delay);
51292f90d3bSwuqiang.matt get_cpu();
51392f90d3bSwuqiang.matt }
51492f90d3bSwuqiang.matt
51592f90d3bSwuqiang.matt stop = atomic_read_acquire(&test->data.stop);
51692f90d3bSwuqiang.matt
51792f90d3bSwuqiang.matt /* drop all objects and deref objpool */
51892f90d3bSwuqiang.matt while (i-- > 0) {
51992f90d3bSwuqiang.matt struct ot_node *on = nods[i];
52092f90d3bSwuqiang.matt
52192f90d3bSwuqiang.matt if (on) {
52292f90d3bSwuqiang.matt on->refs++;
52392f90d3bSwuqiang.matt ot_nod_recycle(on, item->pool, stop);
52492f90d3bSwuqiang.matt item->stat[irq].nhits++;
52592f90d3bSwuqiang.matt } else {
52692f90d3bSwuqiang.matt item->stat[irq].nmiss++;
52792f90d3bSwuqiang.matt }
52892f90d3bSwuqiang.matt }
52992f90d3bSwuqiang.matt
53092f90d3bSwuqiang.matt if (!irq)
53192f90d3bSwuqiang.matt put_cpu();
53292f90d3bSwuqiang.matt }
53392f90d3bSwuqiang.matt
ot_start_async(struct ot_test * test)53492f90d3bSwuqiang.matt static int ot_start_async(struct ot_test *test)
53592f90d3bSwuqiang.matt {
53692f90d3bSwuqiang.matt struct ot_context *sop;
53792f90d3bSwuqiang.matt ktime_t start;
53892f90d3bSwuqiang.matt u64 duration;
53992f90d3bSwuqiang.matt unsigned long timeout;
54092f90d3bSwuqiang.matt int cpu;
54192f90d3bSwuqiang.matt
54292f90d3bSwuqiang.matt /* initialize objpool for syncrhonous testcase */
54392f90d3bSwuqiang.matt sop = g_ot_async_ops[test->mode].init(test);
54492f90d3bSwuqiang.matt if (!sop)
54592f90d3bSwuqiang.matt return -ENOMEM;
54692f90d3bSwuqiang.matt
54792f90d3bSwuqiang.matt /* grab rwsem to block testing threads */
54892f90d3bSwuqiang.matt down_write(&test->data.start);
54992f90d3bSwuqiang.matt
55092f90d3bSwuqiang.matt for_each_possible_cpu(cpu) {
55192f90d3bSwuqiang.matt struct ot_item *item = per_cpu_ptr(&ot_pcup_items, cpu);
55292f90d3bSwuqiang.matt struct task_struct *work;
55392f90d3bSwuqiang.matt
55492f90d3bSwuqiang.matt ot_init_cpu_item(item, test, &sop->pool, ot_bulk_async);
55592f90d3bSwuqiang.matt
55692f90d3bSwuqiang.matt /* skip offline cpus */
55792f90d3bSwuqiang.matt if (!cpu_online(cpu))
55892f90d3bSwuqiang.matt continue;
55992f90d3bSwuqiang.matt
560192faebeSFrederic Weisbecker work = kthread_run_on_cpu(ot_thread_worker, item, cpu, "ot_worker_%d");
561192faebeSFrederic Weisbecker if (IS_ERR(work))
56292f90d3bSwuqiang.matt pr_err("failed to create thread for cpu %d\n", cpu);
56392f90d3bSwuqiang.matt }
56492f90d3bSwuqiang.matt
56592f90d3bSwuqiang.matt /* wait a while to make sure all threads waiting at start line */
56692f90d3bSwuqiang.matt msleep(20);
56792f90d3bSwuqiang.matt
56892f90d3bSwuqiang.matt /* in case no threads were created: memory insufficient ? */
56992f90d3bSwuqiang.matt if (atomic_dec_and_test(&test->data.nthreads))
57092f90d3bSwuqiang.matt complete(&test->data.wait);
57192f90d3bSwuqiang.matt
57292f90d3bSwuqiang.matt /* start objpool testing threads */
57392f90d3bSwuqiang.matt start = ktime_get();
57492f90d3bSwuqiang.matt up_write(&test->data.start);
57592f90d3bSwuqiang.matt
57692f90d3bSwuqiang.matt /* yeild cpu to worker threads for duration ms */
57792f90d3bSwuqiang.matt timeout = msecs_to_jiffies(test->duration);
57892f90d3bSwuqiang.matt schedule_timeout_interruptible(timeout);
57992f90d3bSwuqiang.matt
58092f90d3bSwuqiang.matt /* tell workers threads to quit */
58192f90d3bSwuqiang.matt atomic_set_release(&test->data.stop, 1);
58292f90d3bSwuqiang.matt
58392f90d3bSwuqiang.matt /* do async-finalization */
58492f90d3bSwuqiang.matt g_ot_async_ops[test->mode].fini(sop);
58592f90d3bSwuqiang.matt
58692f90d3bSwuqiang.matt /* wait all workers threads finish and quit */
58792f90d3bSwuqiang.matt wait_for_completion(&test->data.wait);
58892f90d3bSwuqiang.matt duration = (u64) ktime_us_delta(ktime_get(), start);
58992f90d3bSwuqiang.matt
59092f90d3bSwuqiang.matt /* assure rcu callback is triggered */
59192f90d3bSwuqiang.matt wait_for_completion(&test->data.rcu);
59292f90d3bSwuqiang.matt
59392f90d3bSwuqiang.matt /*
59492f90d3bSwuqiang.matt * now we are sure that objpool is finalized either
59592f90d3bSwuqiang.matt * by rcu callback or by worker threads
59692f90d3bSwuqiang.matt */
59792f90d3bSwuqiang.matt
59892f90d3bSwuqiang.matt /* report testing summary and performance results */
59992f90d3bSwuqiang.matt ot_perf_report(test, duration);
60092f90d3bSwuqiang.matt
60192f90d3bSwuqiang.matt /* report memory allocation summary */
60292f90d3bSwuqiang.matt ot_mem_report(test);
60392f90d3bSwuqiang.matt
60492f90d3bSwuqiang.matt return 0;
60592f90d3bSwuqiang.matt }
60692f90d3bSwuqiang.matt
60792f90d3bSwuqiang.matt /*
60892f90d3bSwuqiang.matt * predefined testing cases:
60992f90d3bSwuqiang.matt * synchronous case / overrun case / async case
61092f90d3bSwuqiang.matt *
61192f90d3bSwuqiang.matt * async: synchronous or asynchronous testing
61292f90d3bSwuqiang.matt * mode: only mode 0 supported
61392f90d3bSwuqiang.matt * objsz: object size
61492f90d3bSwuqiang.matt * duration: int, total test time in ms
61592f90d3bSwuqiang.matt * delay: int, delay (in ms) between each iteration
61692f90d3bSwuqiang.matt * bulk_normal: int, repeat times for thread worker
61792f90d3bSwuqiang.matt * bulk_irq: int, repeat times for irq consumer
61892f90d3bSwuqiang.matt * hrtimer: unsigned long, hrtimer intervnal in ms
61992f90d3bSwuqiang.matt * name: char *, tag for current test ot_item
62092f90d3bSwuqiang.matt */
62192f90d3bSwuqiang.matt
62292f90d3bSwuqiang.matt #define NODE_COMPACT sizeof(struct ot_node)
62392f90d3bSwuqiang.matt #define NODE_VMALLOC (512)
62492f90d3bSwuqiang.matt
6253afe7337Swuqiang.matt static struct ot_test g_testcases[] = {
62692f90d3bSwuqiang.matt
62792f90d3bSwuqiang.matt /* sync & normal */
62892f90d3bSwuqiang.matt {0, 0, NODE_COMPACT, 1000, 0, 1, 0, 0, "sync: percpu objpool"},
62992f90d3bSwuqiang.matt {0, 0, NODE_VMALLOC, 1000, 0, 1, 0, 0, "sync: percpu objpool from vmalloc"},
63092f90d3bSwuqiang.matt
63192f90d3bSwuqiang.matt /* sync & hrtimer */
63292f90d3bSwuqiang.matt {0, 0, NODE_COMPACT, 1000, 0, 1, 1, 4, "sync & hrtimer: percpu objpool"},
63392f90d3bSwuqiang.matt {0, 0, NODE_VMALLOC, 1000, 0, 1, 1, 4, "sync & hrtimer: percpu objpool from vmalloc"},
63492f90d3bSwuqiang.matt
63592f90d3bSwuqiang.matt /* sync & overrun */
63692f90d3bSwuqiang.matt {0, 0, NODE_COMPACT, 1000, 0, 16, 0, 0, "sync overrun: percpu objpool"},
63792f90d3bSwuqiang.matt {0, 0, NODE_VMALLOC, 1000, 0, 16, 0, 0, "sync overrun: percpu objpool from vmalloc"},
63892f90d3bSwuqiang.matt
63992f90d3bSwuqiang.matt /* async mode */
64092f90d3bSwuqiang.matt {1, 0, NODE_COMPACT, 1000, 100, 1, 0, 0, "async: percpu objpool"},
64192f90d3bSwuqiang.matt {1, 0, NODE_VMALLOC, 1000, 100, 1, 0, 0, "async: percpu objpool from vmalloc"},
64292f90d3bSwuqiang.matt
64392f90d3bSwuqiang.matt /* async + hrtimer mode */
64492f90d3bSwuqiang.matt {1, 0, NODE_COMPACT, 1000, 0, 4, 4, 4, "async & hrtimer: percpu objpool"},
64592f90d3bSwuqiang.matt {1, 0, NODE_VMALLOC, 1000, 0, 4, 4, 4, "async & hrtimer: percpu objpool from vmalloc"},
64692f90d3bSwuqiang.matt };
64792f90d3bSwuqiang.matt
ot_mod_init(void)64892f90d3bSwuqiang.matt static int __init ot_mod_init(void)
64992f90d3bSwuqiang.matt {
65092f90d3bSwuqiang.matt int i;
65192f90d3bSwuqiang.matt
65292f90d3bSwuqiang.matt /* perform testings */
65392f90d3bSwuqiang.matt for (i = 0; i < ARRAY_SIZE(g_testcases); i++) {
65492f90d3bSwuqiang.matt ot_init_data(&g_testcases[i].data);
65592f90d3bSwuqiang.matt if (g_testcases[i].async)
65692f90d3bSwuqiang.matt ot_start_async(&g_testcases[i]);
65792f90d3bSwuqiang.matt else
65892f90d3bSwuqiang.matt ot_start_sync(&g_testcases[i]);
65992f90d3bSwuqiang.matt }
66092f90d3bSwuqiang.matt
66192f90d3bSwuqiang.matt /* show tests summary */
66292f90d3bSwuqiang.matt pr_info("\n");
66392f90d3bSwuqiang.matt pr_info("Summary of testcases:\n");
66492f90d3bSwuqiang.matt for (i = 0; i < ARRAY_SIZE(g_testcases); i++) {
66592f90d3bSwuqiang.matt pr_info(" duration: %lluus \thits: %10lu \tmiss: %10lu \t%s\n",
66692f90d3bSwuqiang.matt g_testcases[i].data.duration, g_testcases[i].data.objects.nhits,
66792f90d3bSwuqiang.matt g_testcases[i].data.objects.nmiss, g_testcases[i].name);
66892f90d3bSwuqiang.matt }
66992f90d3bSwuqiang.matt
67092f90d3bSwuqiang.matt return -EAGAIN;
67192f90d3bSwuqiang.matt }
67292f90d3bSwuqiang.matt
ot_mod_exit(void)67392f90d3bSwuqiang.matt static void __exit ot_mod_exit(void)
67492f90d3bSwuqiang.matt {
67592f90d3bSwuqiang.matt }
67692f90d3bSwuqiang.matt
67792f90d3bSwuqiang.matt module_init(ot_mod_init);
67892f90d3bSwuqiang.matt module_exit(ot_mod_exit);
67992f90d3bSwuqiang.matt
680053a5e4cSJeff Johnson MODULE_DESCRIPTION("Test module for lockless object pool");
68192f90d3bSwuqiang.matt MODULE_LICENSE("GPL");
682