1*2bfe3f2eSlogwang /*
2*2bfe3f2eSlogwang  *   BSD LICENSE
3*2bfe3f2eSlogwang  *
4*2bfe3f2eSlogwang  *   Copyright (C) Cavium, Inc 2017.
5*2bfe3f2eSlogwang  *
6*2bfe3f2eSlogwang  *   Redistribution and use in source and binary forms, with or without
7*2bfe3f2eSlogwang  *   modification, are permitted provided that the following conditions
8*2bfe3f2eSlogwang  *   are met:
9*2bfe3f2eSlogwang  *
10*2bfe3f2eSlogwang  *     * Redistributions of source code must retain the above copyright
11*2bfe3f2eSlogwang  *       notice, this list of conditions and the following disclaimer.
12*2bfe3f2eSlogwang  *     * Redistributions in binary form must reproduce the above copyright
13*2bfe3f2eSlogwang  *       notice, this list of conditions and the following disclaimer in
14*2bfe3f2eSlogwang  *       the documentation and/or other materials provided with the
15*2bfe3f2eSlogwang  *       distribution.
16*2bfe3f2eSlogwang  *     * Neither the name of Cavium, Inc nor the names of its
17*2bfe3f2eSlogwang  *       contributors may be used to endorse or promote products derived
18*2bfe3f2eSlogwang  *       from this software without specific prior written permission.
19*2bfe3f2eSlogwang  *
20*2bfe3f2eSlogwang  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21*2bfe3f2eSlogwang  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22*2bfe3f2eSlogwang  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23*2bfe3f2eSlogwang  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24*2bfe3f2eSlogwang  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25*2bfe3f2eSlogwang  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26*2bfe3f2eSlogwang  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27*2bfe3f2eSlogwang  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28*2bfe3f2eSlogwang  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29*2bfe3f2eSlogwang  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30*2bfe3f2eSlogwang  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31*2bfe3f2eSlogwang  */
32*2bfe3f2eSlogwang 
33*2bfe3f2eSlogwang #include "test_perf_common.h"
34*2bfe3f2eSlogwang 
35*2bfe3f2eSlogwang /* See http://dpdk.org/doc/guides/tools/testeventdev.html for test details */
36*2bfe3f2eSlogwang 
37*2bfe3f2eSlogwang static inline int
38*2bfe3f2eSlogwang perf_queue_nb_event_queues(struct evt_options *opt)
39*2bfe3f2eSlogwang {
40*2bfe3f2eSlogwang 	/* nb_queues = number of producers * number of stages */
41*2bfe3f2eSlogwang 	return evt_nr_active_lcores(opt->plcores) * opt->nb_stages;
42*2bfe3f2eSlogwang }
43*2bfe3f2eSlogwang 
44*2bfe3f2eSlogwang static inline __attribute__((always_inline)) void
45*2bfe3f2eSlogwang mark_fwd_latency(struct rte_event *const ev,
46*2bfe3f2eSlogwang 		const uint8_t nb_stages)
47*2bfe3f2eSlogwang {
48*2bfe3f2eSlogwang 	if (unlikely((ev->queue_id % nb_stages) == 0)) {
49*2bfe3f2eSlogwang 		struct perf_elt *const m = ev->event_ptr;
50*2bfe3f2eSlogwang 
51*2bfe3f2eSlogwang 		m->timestamp = rte_get_timer_cycles();
52*2bfe3f2eSlogwang 	}
53*2bfe3f2eSlogwang }
54*2bfe3f2eSlogwang 
55*2bfe3f2eSlogwang static inline __attribute__((always_inline)) void
56*2bfe3f2eSlogwang fwd_event(struct rte_event *const ev, uint8_t *const sched_type_list,
57*2bfe3f2eSlogwang 		const uint8_t nb_stages)
58*2bfe3f2eSlogwang {
59*2bfe3f2eSlogwang 	ev->queue_id++;
60*2bfe3f2eSlogwang 	ev->sched_type = sched_type_list[ev->queue_id % nb_stages];
61*2bfe3f2eSlogwang 	ev->op = RTE_EVENT_OP_FORWARD;
62*2bfe3f2eSlogwang 	ev->event_type = RTE_EVENT_TYPE_CPU;
63*2bfe3f2eSlogwang }
64*2bfe3f2eSlogwang 
65*2bfe3f2eSlogwang static int
66*2bfe3f2eSlogwang perf_queue_worker(void *arg, const int enable_fwd_latency)
67*2bfe3f2eSlogwang {
68*2bfe3f2eSlogwang 	PERF_WORKER_INIT;
69*2bfe3f2eSlogwang 	struct rte_event ev;
70*2bfe3f2eSlogwang 
71*2bfe3f2eSlogwang 	while (t->done == false) {
72*2bfe3f2eSlogwang 		uint16_t event = rte_event_dequeue_burst(dev, port, &ev, 1, 0);
73*2bfe3f2eSlogwang 
74*2bfe3f2eSlogwang 		if (!event) {
75*2bfe3f2eSlogwang 			rte_pause();
76*2bfe3f2eSlogwang 			continue;
77*2bfe3f2eSlogwang 		}
78*2bfe3f2eSlogwang 		if (enable_fwd_latency)
79*2bfe3f2eSlogwang 		/* first q in pipeline, mark timestamp to compute fwd latency */
80*2bfe3f2eSlogwang 			mark_fwd_latency(&ev, nb_stages);
81*2bfe3f2eSlogwang 
82*2bfe3f2eSlogwang 		/* last stage in pipeline */
83*2bfe3f2eSlogwang 		if (unlikely((ev.queue_id % nb_stages) == laststage)) {
84*2bfe3f2eSlogwang 			if (enable_fwd_latency)
85*2bfe3f2eSlogwang 				cnt = perf_process_last_stage_latency(pool,
86*2bfe3f2eSlogwang 					&ev, w, bufs, sz, cnt);
87*2bfe3f2eSlogwang 			else
88*2bfe3f2eSlogwang 				cnt = perf_process_last_stage(pool,
89*2bfe3f2eSlogwang 					&ev, w, bufs, sz, cnt);
90*2bfe3f2eSlogwang 		} else {
91*2bfe3f2eSlogwang 			fwd_event(&ev, sched_type_list, nb_stages);
92*2bfe3f2eSlogwang 			while (rte_event_enqueue_burst(dev, port, &ev, 1) != 1)
93*2bfe3f2eSlogwang 				rte_pause();
94*2bfe3f2eSlogwang 		}
95*2bfe3f2eSlogwang 	}
96*2bfe3f2eSlogwang 	return 0;
97*2bfe3f2eSlogwang }
98*2bfe3f2eSlogwang 
99*2bfe3f2eSlogwang static int
100*2bfe3f2eSlogwang perf_queue_worker_burst(void *arg, const int enable_fwd_latency)
101*2bfe3f2eSlogwang {
102*2bfe3f2eSlogwang 	PERF_WORKER_INIT;
103*2bfe3f2eSlogwang 	uint16_t i;
104*2bfe3f2eSlogwang 	/* +1 to avoid prefetch out of array check */
105*2bfe3f2eSlogwang 	struct rte_event ev[BURST_SIZE + 1];
106*2bfe3f2eSlogwang 
107*2bfe3f2eSlogwang 	while (t->done == false) {
108*2bfe3f2eSlogwang 		uint16_t const nb_rx = rte_event_dequeue_burst(dev, port, ev,
109*2bfe3f2eSlogwang 				BURST_SIZE, 0);
110*2bfe3f2eSlogwang 
111*2bfe3f2eSlogwang 		if (!nb_rx) {
112*2bfe3f2eSlogwang 			rte_pause();
113*2bfe3f2eSlogwang 			continue;
114*2bfe3f2eSlogwang 		}
115*2bfe3f2eSlogwang 
116*2bfe3f2eSlogwang 		for (i = 0; i < nb_rx; i++) {
117*2bfe3f2eSlogwang 			if (enable_fwd_latency) {
118*2bfe3f2eSlogwang 				rte_prefetch0(ev[i+1].event_ptr);
119*2bfe3f2eSlogwang 				/* first queue in pipeline.
120*2bfe3f2eSlogwang 				 * mark time stamp to compute fwd latency
121*2bfe3f2eSlogwang 				 */
122*2bfe3f2eSlogwang 				mark_fwd_latency(&ev[i], nb_stages);
123*2bfe3f2eSlogwang 			}
124*2bfe3f2eSlogwang 			/* last stage in pipeline */
125*2bfe3f2eSlogwang 			if (unlikely((ev[i].queue_id % nb_stages) ==
126*2bfe3f2eSlogwang 						 laststage)) {
127*2bfe3f2eSlogwang 				if (enable_fwd_latency)
128*2bfe3f2eSlogwang 					cnt = perf_process_last_stage_latency(
129*2bfe3f2eSlogwang 						pool, &ev[i], w, bufs, sz, cnt);
130*2bfe3f2eSlogwang 				else
131*2bfe3f2eSlogwang 					cnt = perf_process_last_stage(pool,
132*2bfe3f2eSlogwang 						&ev[i], w, bufs, sz, cnt);
133*2bfe3f2eSlogwang 
134*2bfe3f2eSlogwang 				ev[i].op = RTE_EVENT_OP_RELEASE;
135*2bfe3f2eSlogwang 			} else {
136*2bfe3f2eSlogwang 				fwd_event(&ev[i], sched_type_list, nb_stages);
137*2bfe3f2eSlogwang 			}
138*2bfe3f2eSlogwang 		}
139*2bfe3f2eSlogwang 
140*2bfe3f2eSlogwang 		uint16_t enq;
141*2bfe3f2eSlogwang 
142*2bfe3f2eSlogwang 		enq = rte_event_enqueue_burst(dev, port, ev, nb_rx);
143*2bfe3f2eSlogwang 		while (enq < nb_rx) {
144*2bfe3f2eSlogwang 			enq += rte_event_enqueue_burst(dev, port,
145*2bfe3f2eSlogwang 							ev + enq, nb_rx - enq);
146*2bfe3f2eSlogwang 		}
147*2bfe3f2eSlogwang 	}
148*2bfe3f2eSlogwang 	return 0;
149*2bfe3f2eSlogwang }
150*2bfe3f2eSlogwang 
151*2bfe3f2eSlogwang static int
152*2bfe3f2eSlogwang worker_wrapper(void *arg)
153*2bfe3f2eSlogwang {
154*2bfe3f2eSlogwang 	struct worker_data *w  = arg;
155*2bfe3f2eSlogwang 	struct evt_options *opt = w->t->opt;
156*2bfe3f2eSlogwang 
157*2bfe3f2eSlogwang 	const bool burst = evt_has_burst_mode(w->dev_id);
158*2bfe3f2eSlogwang 	const int fwd_latency = opt->fwd_latency;
159*2bfe3f2eSlogwang 
160*2bfe3f2eSlogwang 	/* allow compiler to optimize */
161*2bfe3f2eSlogwang 	if (!burst && !fwd_latency)
162*2bfe3f2eSlogwang 		return perf_queue_worker(arg, 0);
163*2bfe3f2eSlogwang 	else if (!burst && fwd_latency)
164*2bfe3f2eSlogwang 		return perf_queue_worker(arg, 1);
165*2bfe3f2eSlogwang 	else if (burst && !fwd_latency)
166*2bfe3f2eSlogwang 		return perf_queue_worker_burst(arg, 0);
167*2bfe3f2eSlogwang 	else if (burst && fwd_latency)
168*2bfe3f2eSlogwang 		return perf_queue_worker_burst(arg, 1);
169*2bfe3f2eSlogwang 
170*2bfe3f2eSlogwang 	rte_panic("invalid worker\n");
171*2bfe3f2eSlogwang }
172*2bfe3f2eSlogwang 
173*2bfe3f2eSlogwang static int
174*2bfe3f2eSlogwang perf_queue_launch_lcores(struct evt_test *test, struct evt_options *opt)
175*2bfe3f2eSlogwang {
176*2bfe3f2eSlogwang 	return perf_launch_lcores(test, opt, worker_wrapper);
177*2bfe3f2eSlogwang }
178*2bfe3f2eSlogwang 
179*2bfe3f2eSlogwang static int
180*2bfe3f2eSlogwang perf_queue_eventdev_setup(struct evt_test *test, struct evt_options *opt)
181*2bfe3f2eSlogwang {
182*2bfe3f2eSlogwang 	uint8_t queue;
183*2bfe3f2eSlogwang 	int nb_stages = opt->nb_stages;
184*2bfe3f2eSlogwang 	int ret;
185*2bfe3f2eSlogwang 
186*2bfe3f2eSlogwang 	const struct rte_event_dev_config config = {
187*2bfe3f2eSlogwang 			.nb_event_queues = perf_queue_nb_event_queues(opt),
188*2bfe3f2eSlogwang 			.nb_event_ports = perf_nb_event_ports(opt),
189*2bfe3f2eSlogwang 			.nb_events_limit  = 4096,
190*2bfe3f2eSlogwang 			.nb_event_queue_flows = opt->nb_flows,
191*2bfe3f2eSlogwang 			.nb_event_port_dequeue_depth = 128,
192*2bfe3f2eSlogwang 			.nb_event_port_enqueue_depth = 128,
193*2bfe3f2eSlogwang 	};
194*2bfe3f2eSlogwang 
195*2bfe3f2eSlogwang 	ret = rte_event_dev_configure(opt->dev_id, &config);
196*2bfe3f2eSlogwang 	if (ret) {
197*2bfe3f2eSlogwang 		evt_err("failed to configure eventdev %d", opt->dev_id);
198*2bfe3f2eSlogwang 		return ret;
199*2bfe3f2eSlogwang 	}
200*2bfe3f2eSlogwang 
201*2bfe3f2eSlogwang 	struct rte_event_queue_conf q_conf = {
202*2bfe3f2eSlogwang 			.priority = RTE_EVENT_DEV_PRIORITY_NORMAL,
203*2bfe3f2eSlogwang 			.nb_atomic_flows = opt->nb_flows,
204*2bfe3f2eSlogwang 			.nb_atomic_order_sequences = opt->nb_flows,
205*2bfe3f2eSlogwang 	};
206*2bfe3f2eSlogwang 	/* queue configurations */
207*2bfe3f2eSlogwang 	for (queue = 0; queue < perf_queue_nb_event_queues(opt); queue++) {
208*2bfe3f2eSlogwang 		q_conf.schedule_type =
209*2bfe3f2eSlogwang 			(opt->sched_type_list[queue % nb_stages]);
210*2bfe3f2eSlogwang 
211*2bfe3f2eSlogwang 		if (opt->q_priority) {
212*2bfe3f2eSlogwang 			uint8_t stage_pos = queue % nb_stages;
213*2bfe3f2eSlogwang 			/* Configure event queues(stage 0 to stage n) with
214*2bfe3f2eSlogwang 			 * RTE_EVENT_DEV_PRIORITY_LOWEST to
215*2bfe3f2eSlogwang 			 * RTE_EVENT_DEV_PRIORITY_HIGHEST.
216*2bfe3f2eSlogwang 			 */
217*2bfe3f2eSlogwang 			uint8_t step = RTE_EVENT_DEV_PRIORITY_LOWEST /
218*2bfe3f2eSlogwang 					(nb_stages - 1);
219*2bfe3f2eSlogwang 			/* Higher prio for the queues closer to last stage */
220*2bfe3f2eSlogwang 			q_conf.priority = RTE_EVENT_DEV_PRIORITY_LOWEST -
221*2bfe3f2eSlogwang 					(step * stage_pos);
222*2bfe3f2eSlogwang 		}
223*2bfe3f2eSlogwang 		ret = rte_event_queue_setup(opt->dev_id, queue, &q_conf);
224*2bfe3f2eSlogwang 		if (ret) {
225*2bfe3f2eSlogwang 			evt_err("failed to setup queue=%d", queue);
226*2bfe3f2eSlogwang 			return ret;
227*2bfe3f2eSlogwang 		}
228*2bfe3f2eSlogwang 	}
229*2bfe3f2eSlogwang 
230*2bfe3f2eSlogwang 	ret = perf_event_dev_port_setup(test, opt, nb_stages /* stride */,
231*2bfe3f2eSlogwang 					perf_queue_nb_event_queues(opt));
232*2bfe3f2eSlogwang 	if (ret)
233*2bfe3f2eSlogwang 		return ret;
234*2bfe3f2eSlogwang 
235*2bfe3f2eSlogwang 	ret = evt_service_setup(opt->dev_id);
236*2bfe3f2eSlogwang 	if (ret) {
237*2bfe3f2eSlogwang 		evt_err("No service lcore found to run event dev.");
238*2bfe3f2eSlogwang 		return ret;
239*2bfe3f2eSlogwang 	}
240*2bfe3f2eSlogwang 
241*2bfe3f2eSlogwang 	ret = rte_event_dev_start(opt->dev_id);
242*2bfe3f2eSlogwang 	if (ret) {
243*2bfe3f2eSlogwang 		evt_err("failed to start eventdev %d", opt->dev_id);
244*2bfe3f2eSlogwang 		return ret;
245*2bfe3f2eSlogwang 	}
246*2bfe3f2eSlogwang 
247*2bfe3f2eSlogwang 	return 0;
248*2bfe3f2eSlogwang }
249*2bfe3f2eSlogwang 
250*2bfe3f2eSlogwang static void
251*2bfe3f2eSlogwang perf_queue_opt_dump(struct evt_options *opt)
252*2bfe3f2eSlogwang {
253*2bfe3f2eSlogwang 	evt_dump_fwd_latency(opt);
254*2bfe3f2eSlogwang 	perf_opt_dump(opt, perf_queue_nb_event_queues(opt));
255*2bfe3f2eSlogwang }
256*2bfe3f2eSlogwang 
257*2bfe3f2eSlogwang static int
258*2bfe3f2eSlogwang perf_queue_opt_check(struct evt_options *opt)
259*2bfe3f2eSlogwang {
260*2bfe3f2eSlogwang 	return perf_opt_check(opt, perf_queue_nb_event_queues(opt));
261*2bfe3f2eSlogwang }
262*2bfe3f2eSlogwang 
263*2bfe3f2eSlogwang static bool
264*2bfe3f2eSlogwang perf_queue_capability_check(struct evt_options *opt)
265*2bfe3f2eSlogwang {
266*2bfe3f2eSlogwang 	struct rte_event_dev_info dev_info;
267*2bfe3f2eSlogwang 
268*2bfe3f2eSlogwang 	rte_event_dev_info_get(opt->dev_id, &dev_info);
269*2bfe3f2eSlogwang 	if (dev_info.max_event_queues < perf_queue_nb_event_queues(opt) ||
270*2bfe3f2eSlogwang 			dev_info.max_event_ports < perf_nb_event_ports(opt)) {
271*2bfe3f2eSlogwang 		evt_err("not enough eventdev queues=%d/%d or ports=%d/%d",
272*2bfe3f2eSlogwang 			perf_queue_nb_event_queues(opt),
273*2bfe3f2eSlogwang 			dev_info.max_event_queues,
274*2bfe3f2eSlogwang 			perf_nb_event_ports(opt), dev_info.max_event_ports);
275*2bfe3f2eSlogwang 	}
276*2bfe3f2eSlogwang 
277*2bfe3f2eSlogwang 	return true;
278*2bfe3f2eSlogwang }
279*2bfe3f2eSlogwang 
280*2bfe3f2eSlogwang static const struct evt_test_ops perf_queue =  {
281*2bfe3f2eSlogwang 	.cap_check          = perf_queue_capability_check,
282*2bfe3f2eSlogwang 	.opt_check          = perf_queue_opt_check,
283*2bfe3f2eSlogwang 	.opt_dump           = perf_queue_opt_dump,
284*2bfe3f2eSlogwang 	.test_setup         = perf_test_setup,
285*2bfe3f2eSlogwang 	.mempool_setup      = perf_mempool_setup,
286*2bfe3f2eSlogwang 	.eventdev_setup     = perf_queue_eventdev_setup,
287*2bfe3f2eSlogwang 	.launch_lcores      = perf_queue_launch_lcores,
288*2bfe3f2eSlogwang 	.eventdev_destroy   = perf_eventdev_destroy,
289*2bfe3f2eSlogwang 	.mempool_destroy    = perf_mempool_destroy,
290*2bfe3f2eSlogwang 	.test_result        = perf_test_result,
291*2bfe3f2eSlogwang 	.test_destroy       = perf_test_destroy,
292*2bfe3f2eSlogwang };
293*2bfe3f2eSlogwang 
294*2bfe3f2eSlogwang EVT_TEST_REGISTER(perf_queue);
295