165789daaSBarry Song // SPDX-License-Identifier: GPL-2.0-only
265789daaSBarry Song /*
342e4eefbSHao Fang * Copyright (C) 2020 HiSilicon Limited.
465789daaSBarry Song */
565789daaSBarry Song
665789daaSBarry Song #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
765789daaSBarry Song
865789daaSBarry Song #include <linux/debugfs.h>
965789daaSBarry Song #include <linux/delay.h>
1065789daaSBarry Song #include <linux/device.h>
1165789daaSBarry Song #include <linux/dma-mapping.h>
1265789daaSBarry Song #include <linux/kernel.h>
1365789daaSBarry Song #include <linux/kthread.h>
148ddde07aSTian Tao #include <linux/map_benchmark.h>
1565789daaSBarry Song #include <linux/math64.h>
1665789daaSBarry Song #include <linux/module.h>
1765789daaSBarry Song #include <linux/pci.h>
1865789daaSBarry Song #include <linux/platform_device.h>
1965789daaSBarry Song #include <linux/slab.h>
2065789daaSBarry Song #include <linux/timekeeping.h>
2165789daaSBarry Song
2265789daaSBarry Song struct map_benchmark_data {
2365789daaSBarry Song struct map_benchmark bparam;
2465789daaSBarry Song struct device *dev;
2565789daaSBarry Song struct dentry *debugfs;
2665789daaSBarry Song enum dma_data_direction dir;
2765789daaSBarry Song atomic64_t sum_map_100ns;
2865789daaSBarry Song atomic64_t sum_unmap_100ns;
2965789daaSBarry Song atomic64_t sum_sq_map;
3065789daaSBarry Song atomic64_t sum_sq_unmap;
3165789daaSBarry Song atomic64_t loops;
3265789daaSBarry Song };
3365789daaSBarry Song
map_benchmark_thread(void * data)3465789daaSBarry Song static int map_benchmark_thread(void *data)
3565789daaSBarry Song {
3665789daaSBarry Song void *buf;
3765789daaSBarry Song dma_addr_t dma_addr;
3865789daaSBarry Song struct map_benchmark_data *map = data;
39ca947482SXiang Chen int npages = map->bparam.granule;
40ca947482SXiang Chen u64 size = npages * PAGE_SIZE;
4165789daaSBarry Song int ret = 0;
4265789daaSBarry Song
43ca947482SXiang Chen buf = alloc_pages_exact(size, GFP_KERNEL);
4465789daaSBarry Song if (!buf)
4565789daaSBarry Song return -ENOMEM;
4665789daaSBarry Song
4765789daaSBarry Song while (!kthread_should_stop()) {
4865789daaSBarry Song u64 map_100ns, unmap_100ns, map_sq, unmap_sq;
4965789daaSBarry Song ktime_t map_stime, map_etime, unmap_stime, unmap_etime;
5065789daaSBarry Song ktime_t map_delta, unmap_delta;
5165789daaSBarry Song
5265789daaSBarry Song /*
5365789daaSBarry Song * for a non-coherent device, if we don't stain them in the
5465789daaSBarry Song * cache, this will give an underestimate of the real-world
5565789daaSBarry Song * overhead of BIDIRECTIONAL or TO_DEVICE mappings;
5665789daaSBarry Song * 66 means evertything goes well! 66 is lucky.
5765789daaSBarry Song */
5865789daaSBarry Song if (map->dir != DMA_FROM_DEVICE)
59ca947482SXiang Chen memset(buf, 0x66, size);
6065789daaSBarry Song
6165789daaSBarry Song map_stime = ktime_get();
62ca947482SXiang Chen dma_addr = dma_map_single(map->dev, buf, size, map->dir);
6365789daaSBarry Song if (unlikely(dma_mapping_error(map->dev, dma_addr))) {
6465789daaSBarry Song pr_err("dma_map_single failed on %s\n",
6565789daaSBarry Song dev_name(map->dev));
6665789daaSBarry Song ret = -ENOMEM;
6765789daaSBarry Song goto out;
6865789daaSBarry Song }
6965789daaSBarry Song map_etime = ktime_get();
7065789daaSBarry Song map_delta = ktime_sub(map_etime, map_stime);
7165789daaSBarry Song
729dc00b25SBarry Song /* Pretend DMA is transmitting */
739dc00b25SBarry Song ndelay(map->bparam.dma_trans_ns);
749dc00b25SBarry Song
7565789daaSBarry Song unmap_stime = ktime_get();
76ca947482SXiang Chen dma_unmap_single(map->dev, dma_addr, size, map->dir);
7765789daaSBarry Song unmap_etime = ktime_get();
7865789daaSBarry Song unmap_delta = ktime_sub(unmap_etime, unmap_stime);
7965789daaSBarry Song
8065789daaSBarry Song /* calculate sum and sum of squares */
8165789daaSBarry Song
8265789daaSBarry Song map_100ns = div64_ul(map_delta, 100);
8365789daaSBarry Song unmap_100ns = div64_ul(unmap_delta, 100);
8465789daaSBarry Song map_sq = map_100ns * map_100ns;
8565789daaSBarry Song unmap_sq = unmap_100ns * unmap_100ns;
8665789daaSBarry Song
8765789daaSBarry Song atomic64_add(map_100ns, &map->sum_map_100ns);
8865789daaSBarry Song atomic64_add(unmap_100ns, &map->sum_unmap_100ns);
8965789daaSBarry Song atomic64_add(map_sq, &map->sum_sq_map);
9065789daaSBarry Song atomic64_add(unmap_sq, &map->sum_sq_unmap);
9165789daaSBarry Song atomic64_inc(&map->loops);
92*54624acfSYicong Yang
93*54624acfSYicong Yang /*
94*54624acfSYicong Yang * We may test for a long time so periodically check whether
95*54624acfSYicong Yang * we need to schedule to avoid starving the others. Otherwise
96*54624acfSYicong Yang * we may hangup the kernel in a non-preemptible kernel when
97*54624acfSYicong Yang * the test kthreads number >= CPU number, the test kthreads
98*54624acfSYicong Yang * will run endless on every CPU since the thread resposible
99*54624acfSYicong Yang * for notifying the kthread stop (in do_map_benchmark())
100*54624acfSYicong Yang * could not be scheduled.
101*54624acfSYicong Yang *
102*54624acfSYicong Yang * Note this may degrade the test concurrency since the test
103*54624acfSYicong Yang * threads may need to share the CPU time with other load
104*54624acfSYicong Yang * in the system. So it's recommended to run this benchmark
105*54624acfSYicong Yang * on an idle system.
106*54624acfSYicong Yang */
107*54624acfSYicong Yang cond_resched();
10865789daaSBarry Song }
10965789daaSBarry Song
11065789daaSBarry Song out:
111ca947482SXiang Chen free_pages_exact(buf, size);
11265789daaSBarry Song return ret;
11365789daaSBarry Song }
11465789daaSBarry Song
do_map_benchmark(struct map_benchmark_data * map)11565789daaSBarry Song static int do_map_benchmark(struct map_benchmark_data *map)
11665789daaSBarry Song {
11765789daaSBarry Song struct task_struct **tsk;
11865789daaSBarry Song int threads = map->bparam.threads;
11965789daaSBarry Song int node = map->bparam.node;
12065789daaSBarry Song u64 loops;
12165789daaSBarry Song int ret = 0;
12265789daaSBarry Song int i;
12365789daaSBarry Song
12465789daaSBarry Song tsk = kmalloc_array(threads, sizeof(*tsk), GFP_KERNEL);
12565789daaSBarry Song if (!tsk)
12665789daaSBarry Song return -ENOMEM;
12765789daaSBarry Song
12865789daaSBarry Song get_device(map->dev);
12965789daaSBarry Song
13065789daaSBarry Song for (i = 0; i < threads; i++) {
13165789daaSBarry Song tsk[i] = kthread_create_on_node(map_benchmark_thread, map,
13265789daaSBarry Song map->bparam.node, "dma-map-benchmark/%d", i);
13365789daaSBarry Song if (IS_ERR(tsk[i])) {
13465789daaSBarry Song pr_err("create dma_map thread failed\n");
13565789daaSBarry Song ret = PTR_ERR(tsk[i]);
136bb9025f4SFedor Pchelkin while (--i >= 0)
137bb9025f4SFedor Pchelkin kthread_stop(tsk[i]);
13865789daaSBarry Song goto out;
13965789daaSBarry Song }
14065789daaSBarry Song
14165789daaSBarry Song if (node != NUMA_NO_NODE)
142e64746e7SFedor Pchelkin kthread_bind_mask(tsk[i], cpumask_of_node(node));
14365789daaSBarry Song }
14465789daaSBarry Song
14565789daaSBarry Song /* clear the old value in the previous benchmark */
14665789daaSBarry Song atomic64_set(&map->sum_map_100ns, 0);
14765789daaSBarry Song atomic64_set(&map->sum_unmap_100ns, 0);
14865789daaSBarry Song atomic64_set(&map->sum_sq_map, 0);
14965789daaSBarry Song atomic64_set(&map->sum_sq_unmap, 0);
15065789daaSBarry Song atomic64_set(&map->loops, 0);
15165789daaSBarry Song
152d17405d5SBarry Song for (i = 0; i < threads; i++) {
153d17405d5SBarry Song get_task_struct(tsk[i]);
15465789daaSBarry Song wake_up_process(tsk[i]);
155d17405d5SBarry Song }
15665789daaSBarry Song
15765789daaSBarry Song msleep_interruptible(map->bparam.seconds * 1000);
15865789daaSBarry Song
159bb9025f4SFedor Pchelkin /* wait for the completion of all started benchmark threads */
16065789daaSBarry Song for (i = 0; i < threads; i++) {
161bb9025f4SFedor Pchelkin int kthread_ret = kthread_stop_put(tsk[i]);
162bb9025f4SFedor Pchelkin
163bb9025f4SFedor Pchelkin if (kthread_ret)
164bb9025f4SFedor Pchelkin ret = kthread_ret;
165bb9025f4SFedor Pchelkin }
166bb9025f4SFedor Pchelkin
16765789daaSBarry Song if (ret)
16865789daaSBarry Song goto out;
16965789daaSBarry Song
17065789daaSBarry Song loops = atomic64_read(&map->loops);
17165789daaSBarry Song if (likely(loops > 0)) {
17265789daaSBarry Song u64 map_variance, unmap_variance;
17365789daaSBarry Song u64 sum_map = atomic64_read(&map->sum_map_100ns);
17465789daaSBarry Song u64 sum_unmap = atomic64_read(&map->sum_unmap_100ns);
17565789daaSBarry Song u64 sum_sq_map = atomic64_read(&map->sum_sq_map);
17665789daaSBarry Song u64 sum_sq_unmap = atomic64_read(&map->sum_sq_unmap);
17765789daaSBarry Song
17865789daaSBarry Song /* average latency */
17965789daaSBarry Song map->bparam.avg_map_100ns = div64_u64(sum_map, loops);
18065789daaSBarry Song map->bparam.avg_unmap_100ns = div64_u64(sum_unmap, loops);
18165789daaSBarry Song
18265789daaSBarry Song /* standard deviation of latency */
18365789daaSBarry Song map_variance = div64_u64(sum_sq_map, loops) -
18465789daaSBarry Song map->bparam.avg_map_100ns *
18565789daaSBarry Song map->bparam.avg_map_100ns;
18665789daaSBarry Song unmap_variance = div64_u64(sum_sq_unmap, loops) -
18765789daaSBarry Song map->bparam.avg_unmap_100ns *
18865789daaSBarry Song map->bparam.avg_unmap_100ns;
18965789daaSBarry Song map->bparam.map_stddev = int_sqrt64(map_variance);
19065789daaSBarry Song map->bparam.unmap_stddev = int_sqrt64(unmap_variance);
19165789daaSBarry Song }
19265789daaSBarry Song
19365789daaSBarry Song out:
19465789daaSBarry Song put_device(map->dev);
19565789daaSBarry Song kfree(tsk);
19665789daaSBarry Song return ret;
19765789daaSBarry Song }
19865789daaSBarry Song
map_benchmark_ioctl(struct file * file,unsigned int cmd,unsigned long arg)19965789daaSBarry Song static long map_benchmark_ioctl(struct file *file, unsigned int cmd,
20065789daaSBarry Song unsigned long arg)
20165789daaSBarry Song {
20265789daaSBarry Song struct map_benchmark_data *map = file->private_data;
20365789daaSBarry Song void __user *argp = (void __user *)arg;
20465789daaSBarry Song u64 old_dma_mask;
20565789daaSBarry Song int ret;
20665789daaSBarry Song
20765789daaSBarry Song if (copy_from_user(&map->bparam, argp, sizeof(map->bparam)))
20865789daaSBarry Song return -EFAULT;
20965789daaSBarry Song
21065789daaSBarry Song switch (cmd) {
21165789daaSBarry Song case DMA_MAP_BENCHMARK:
21265789daaSBarry Song if (map->bparam.threads == 0 ||
21365789daaSBarry Song map->bparam.threads > DMA_MAP_MAX_THREADS) {
21465789daaSBarry Song pr_err("invalid thread number\n");
21565789daaSBarry Song return -EINVAL;
21665789daaSBarry Song }
21765789daaSBarry Song
21865789daaSBarry Song if (map->bparam.seconds == 0 ||
21965789daaSBarry Song map->bparam.seconds > DMA_MAP_MAX_SECONDS) {
22065789daaSBarry Song pr_err("invalid duration seconds\n");
22165789daaSBarry Song return -EINVAL;
22265789daaSBarry Song }
22365789daaSBarry Song
2249dc00b25SBarry Song if (map->bparam.dma_trans_ns > DMA_MAP_MAX_TRANS_DELAY) {
2259dc00b25SBarry Song pr_err("invalid transmission delay\n");
2269dc00b25SBarry Song return -EINVAL;
2279dc00b25SBarry Song }
2289dc00b25SBarry Song
22965789daaSBarry Song if (map->bparam.node != NUMA_NO_NODE &&
2301ff05e72SFedor Pchelkin (map->bparam.node < 0 || map->bparam.node >= MAX_NUMNODES ||
2311ff05e72SFedor Pchelkin !node_possible(map->bparam.node))) {
23265789daaSBarry Song pr_err("invalid numa node\n");
23365789daaSBarry Song return -EINVAL;
23465789daaSBarry Song }
23565789daaSBarry Song
236ca947482SXiang Chen if (map->bparam.granule < 1 || map->bparam.granule > 1024) {
237ca947482SXiang Chen pr_err("invalid granule size\n");
238ca947482SXiang Chen return -EINVAL;
239ca947482SXiang Chen }
240ca947482SXiang Chen
24165789daaSBarry Song switch (map->bparam.dma_dir) {
24265789daaSBarry Song case DMA_MAP_BIDIRECTIONAL:
24365789daaSBarry Song map->dir = DMA_BIDIRECTIONAL;
24465789daaSBarry Song break;
24565789daaSBarry Song case DMA_MAP_FROM_DEVICE:
24665789daaSBarry Song map->dir = DMA_FROM_DEVICE;
24765789daaSBarry Song break;
24865789daaSBarry Song case DMA_MAP_TO_DEVICE:
24965789daaSBarry Song map->dir = DMA_TO_DEVICE;
25065789daaSBarry Song break;
25165789daaSBarry Song default:
25265789daaSBarry Song pr_err("invalid DMA direction\n");
25365789daaSBarry Song return -EINVAL;
25465789daaSBarry Song }
25565789daaSBarry Song
25665789daaSBarry Song old_dma_mask = dma_get_mask(map->dev);
25765789daaSBarry Song
25865789daaSBarry Song ret = dma_set_mask(map->dev,
25965789daaSBarry Song DMA_BIT_MASK(map->bparam.dma_bits));
26065789daaSBarry Song if (ret) {
26165789daaSBarry Song pr_err("failed to set dma_mask on device %s\n",
26265789daaSBarry Song dev_name(map->dev));
26365789daaSBarry Song return -EINVAL;
26465789daaSBarry Song }
26565789daaSBarry Song
26665789daaSBarry Song ret = do_map_benchmark(map);
26765789daaSBarry Song
26865789daaSBarry Song /*
26965789daaSBarry Song * restore the original dma_mask as many devices' dma_mask are
27065789daaSBarry Song * set by architectures, acpi, busses. When we bind them back
27165789daaSBarry Song * to their original drivers, those drivers shouldn't see
27265789daaSBarry Song * dma_mask changed by benchmark
27365789daaSBarry Song */
27465789daaSBarry Song dma_set_mask(map->dev, old_dma_mask);
275f7c9ccaaSFedor Pchelkin
276f7c9ccaaSFedor Pchelkin if (ret)
277f7c9ccaaSFedor Pchelkin return ret;
27865789daaSBarry Song break;
27965789daaSBarry Song default:
28065789daaSBarry Song return -EINVAL;
28165789daaSBarry Song }
28265789daaSBarry Song
28365789daaSBarry Song if (copy_to_user(argp, &map->bparam, sizeof(map->bparam)))
28465789daaSBarry Song return -EFAULT;
28565789daaSBarry Song
28665789daaSBarry Song return ret;
28765789daaSBarry Song }
28865789daaSBarry Song
28965789daaSBarry Song static const struct file_operations map_benchmark_fops = {
29065789daaSBarry Song .open = simple_open,
29165789daaSBarry Song .unlocked_ioctl = map_benchmark_ioctl,
29265789daaSBarry Song };
29365789daaSBarry Song
map_benchmark_remove_debugfs(void * data)29465789daaSBarry Song static void map_benchmark_remove_debugfs(void *data)
29565789daaSBarry Song {
29665789daaSBarry Song struct map_benchmark_data *map = (struct map_benchmark_data *)data;
29765789daaSBarry Song
29865789daaSBarry Song debugfs_remove(map->debugfs);
29965789daaSBarry Song }
30065789daaSBarry Song
__map_benchmark_probe(struct device * dev)30165789daaSBarry Song static int __map_benchmark_probe(struct device *dev)
30265789daaSBarry Song {
30365789daaSBarry Song struct dentry *entry;
30465789daaSBarry Song struct map_benchmark_data *map;
30565789daaSBarry Song int ret;
30665789daaSBarry Song
30765789daaSBarry Song map = devm_kzalloc(dev, sizeof(*map), GFP_KERNEL);
30865789daaSBarry Song if (!map)
30965789daaSBarry Song return -ENOMEM;
31065789daaSBarry Song map->dev = dev;
31165789daaSBarry Song
31265789daaSBarry Song ret = devm_add_action(dev, map_benchmark_remove_debugfs, map);
31365789daaSBarry Song if (ret) {
31465789daaSBarry Song pr_err("Can't add debugfs remove action\n");
31565789daaSBarry Song return ret;
31665789daaSBarry Song }
31765789daaSBarry Song
31865789daaSBarry Song /*
31965789daaSBarry Song * we only permit a device bound with this driver, 2nd probe
32065789daaSBarry Song * will fail
32165789daaSBarry Song */
32265789daaSBarry Song entry = debugfs_create_file("dma_map_benchmark", 0600, NULL, map,
32365789daaSBarry Song &map_benchmark_fops);
32465789daaSBarry Song if (IS_ERR(entry))
32565789daaSBarry Song return PTR_ERR(entry);
32665789daaSBarry Song map->debugfs = entry;
32765789daaSBarry Song
32865789daaSBarry Song return 0;
32965789daaSBarry Song }
33065789daaSBarry Song
map_benchmark_platform_probe(struct platform_device * pdev)33165789daaSBarry Song static int map_benchmark_platform_probe(struct platform_device *pdev)
33265789daaSBarry Song {
33365789daaSBarry Song return __map_benchmark_probe(&pdev->dev);
33465789daaSBarry Song }
33565789daaSBarry Song
33665789daaSBarry Song static struct platform_driver map_benchmark_platform_driver = {
33765789daaSBarry Song .driver = {
33865789daaSBarry Song .name = "dma_map_benchmark",
33965789daaSBarry Song },
34065789daaSBarry Song .probe = map_benchmark_platform_probe,
34165789daaSBarry Song };
34265789daaSBarry Song
34365789daaSBarry Song static int
map_benchmark_pci_probe(struct pci_dev * pdev,const struct pci_device_id * id)34465789daaSBarry Song map_benchmark_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
34565789daaSBarry Song {
34665789daaSBarry Song return __map_benchmark_probe(&pdev->dev);
34765789daaSBarry Song }
34865789daaSBarry Song
34965789daaSBarry Song static struct pci_driver map_benchmark_pci_driver = {
35065789daaSBarry Song .name = "dma_map_benchmark",
35165789daaSBarry Song .probe = map_benchmark_pci_probe,
35265789daaSBarry Song };
35365789daaSBarry Song
map_benchmark_init(void)35465789daaSBarry Song static int __init map_benchmark_init(void)
35565789daaSBarry Song {
35665789daaSBarry Song int ret;
35765789daaSBarry Song
35865789daaSBarry Song ret = pci_register_driver(&map_benchmark_pci_driver);
35965789daaSBarry Song if (ret)
36065789daaSBarry Song return ret;
36165789daaSBarry Song
36265789daaSBarry Song ret = platform_driver_register(&map_benchmark_platform_driver);
36365789daaSBarry Song if (ret) {
36465789daaSBarry Song pci_unregister_driver(&map_benchmark_pci_driver);
36565789daaSBarry Song return ret;
36665789daaSBarry Song }
36765789daaSBarry Song
36865789daaSBarry Song return 0;
36965789daaSBarry Song }
37065789daaSBarry Song
map_benchmark_cleanup(void)37165789daaSBarry Song static void __exit map_benchmark_cleanup(void)
37265789daaSBarry Song {
37365789daaSBarry Song platform_driver_unregister(&map_benchmark_platform_driver);
37465789daaSBarry Song pci_unregister_driver(&map_benchmark_pci_driver);
37565789daaSBarry Song }
37665789daaSBarry Song
37765789daaSBarry Song module_init(map_benchmark_init);
37865789daaSBarry Song module_exit(map_benchmark_cleanup);
37965789daaSBarry Song
38065789daaSBarry Song MODULE_AUTHOR("Barry Song <[email protected]>");
38165789daaSBarry Song MODULE_DESCRIPTION("dma_map benchmark driver");
382