1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2018 Intel Corporation
3 */
4
5 #include <rte_malloc.h>
6 #include <rte_eal.h>
7 #include <rte_log.h>
8 #include <rte_cycles.h>
9 #include <rte_compressdev.h>
10
11 #include "comp_perf_test_throughput.h"
12
13 void
cperf_throughput_test_destructor(void * arg)14 cperf_throughput_test_destructor(void *arg)
15 {
16 if (arg) {
17 comp_perf_free_memory(
18 ((struct cperf_benchmark_ctx *)arg)->ver.options,
19 &((struct cperf_benchmark_ctx *)arg)->ver.mem);
20 rte_free(arg);
21 }
22 }
23
24 void *
cperf_throughput_test_constructor(uint8_t dev_id,uint16_t qp_id,struct comp_test_data * options)25 cperf_throughput_test_constructor(uint8_t dev_id, uint16_t qp_id,
26 struct comp_test_data *options)
27 {
28 struct cperf_benchmark_ctx *ctx = NULL;
29
30 ctx = rte_malloc(NULL, sizeof(struct cperf_benchmark_ctx), 0);
31
32 if (ctx == NULL)
33 return NULL;
34
35 ctx->ver.mem.dev_id = dev_id;
36 ctx->ver.mem.qp_id = qp_id;
37 ctx->ver.options = options;
38 ctx->ver.silent = 1; /* ver. part will be silent */
39
40 if (!comp_perf_allocate_memory(ctx->ver.options, &ctx->ver.mem)
41 && !prepare_bufs(ctx->ver.options, &ctx->ver.mem))
42 return ctx;
43
44 cperf_throughput_test_destructor(ctx);
45 return NULL;
46 }
47
48 static int
main_loop(struct cperf_benchmark_ctx * ctx,enum rte_comp_xform_type type)49 main_loop(struct cperf_benchmark_ctx *ctx, enum rte_comp_xform_type type)
50 {
51 struct comp_test_data *test_data = ctx->ver.options;
52 struct cperf_mem_resources *mem = &ctx->ver.mem;
53 uint8_t dev_id = mem->dev_id;
54 uint32_t i, iter, num_iter;
55 struct rte_comp_op **ops, **deq_ops;
56 void *priv_xform = NULL;
57 struct rte_comp_xform xform;
58 struct rte_mbuf **input_bufs, **output_bufs;
59 int res = 0;
60 int allocated = 0;
61 uint32_t out_seg_sz;
62
63 if (test_data == NULL || !test_data->burst_sz) {
64 RTE_LOG(ERR, USER1,
65 "Unknown burst size\n");
66 return -1;
67 }
68
69 ops = rte_zmalloc_socket(NULL,
70 2 * mem->total_bufs * sizeof(struct rte_comp_op *),
71 0, rte_socket_id());
72
73 if (ops == NULL) {
74 RTE_LOG(ERR, USER1,
75 "Can't allocate memory for ops strucures\n");
76 return -1;
77 }
78
79 deq_ops = &ops[mem->total_bufs];
80
81 if (type == RTE_COMP_COMPRESS) {
82 xform = (struct rte_comp_xform) {
83 .type = RTE_COMP_COMPRESS,
84 .compress = {
85 .algo = RTE_COMP_ALGO_DEFLATE,
86 .deflate.huffman = test_data->huffman_enc,
87 .level = test_data->level,
88 .window_size = test_data->window_sz,
89 .chksum = RTE_COMP_CHECKSUM_NONE,
90 .hash_algo = RTE_COMP_HASH_ALGO_NONE
91 }
92 };
93 input_bufs = mem->decomp_bufs;
94 output_bufs = mem->comp_bufs;
95 out_seg_sz = test_data->out_seg_sz;
96 } else {
97 xform = (struct rte_comp_xform) {
98 .type = RTE_COMP_DECOMPRESS,
99 .decompress = {
100 .algo = RTE_COMP_ALGO_DEFLATE,
101 .chksum = RTE_COMP_CHECKSUM_NONE,
102 .window_size = test_data->window_sz,
103 .hash_algo = RTE_COMP_HASH_ALGO_NONE
104 }
105 };
106 input_bufs = mem->comp_bufs;
107 output_bufs = mem->decomp_bufs;
108 out_seg_sz = test_data->seg_sz;
109 }
110
111 /* Create private xform */
112 if (rte_compressdev_private_xform_create(dev_id, &xform,
113 &priv_xform) < 0) {
114 RTE_LOG(ERR, USER1, "Private xform could not be created\n");
115 res = -1;
116 goto end;
117 }
118
119 uint64_t tsc_start, tsc_end, tsc_duration;
120
121 num_iter = test_data->num_iter;
122 tsc_start = tsc_end = tsc_duration = 0;
123 tsc_start = rte_rdtsc_precise();
124
125 for (iter = 0; iter < num_iter; iter++) {
126 uint32_t total_ops = mem->total_bufs;
127 uint32_t remaining_ops = mem->total_bufs;
128 uint32_t total_deq_ops = 0;
129 uint32_t total_enq_ops = 0;
130 uint16_t ops_unused = 0;
131 uint16_t num_enq = 0;
132 uint16_t num_deq = 0;
133
134 while (remaining_ops > 0) {
135 uint16_t num_ops = RTE_MIN(remaining_ops,
136 test_data->burst_sz);
137 uint16_t ops_needed = num_ops - ops_unused;
138
139 /*
140 * Move the unused operations from the previous
141 * enqueue_burst call to the front, to maintain order
142 */
143 if ((ops_unused > 0) && (num_enq > 0)) {
144 size_t nb_b_to_mov =
145 ops_unused * sizeof(struct rte_comp_op *);
146
147 memmove(ops, &ops[num_enq], nb_b_to_mov);
148 }
149
150 /* Allocate compression operations */
151 if (ops_needed && !rte_comp_op_bulk_alloc(
152 mem->op_pool,
153 &ops[ops_unused],
154 ops_needed)) {
155 RTE_LOG(ERR, USER1,
156 "Could not allocate enough operations\n");
157 res = -1;
158 goto end;
159 }
160 allocated += ops_needed;
161
162 for (i = 0; i < ops_needed; i++) {
163 /*
164 * Calculate next buffer to attach to operation
165 */
166 uint32_t buf_id = total_enq_ops + i +
167 ops_unused;
168 uint16_t op_id = ops_unused + i;
169 /* Reset all data in output buffers */
170 struct rte_mbuf *m = output_bufs[buf_id];
171
172 m->pkt_len = out_seg_sz * m->nb_segs;
173 while (m) {
174 m->data_len = m->buf_len - m->data_off;
175 m = m->next;
176 }
177 ops[op_id]->m_src = input_bufs[buf_id];
178 ops[op_id]->m_dst = output_bufs[buf_id];
179 ops[op_id]->src.offset = 0;
180 ops[op_id]->src.length =
181 rte_pktmbuf_pkt_len(input_bufs[buf_id]);
182 ops[op_id]->dst.offset = 0;
183 ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL;
184 ops[op_id]->input_chksum = buf_id;
185 ops[op_id]->private_xform = priv_xform;
186 }
187
188 if (unlikely(test_data->perf_comp_force_stop))
189 goto end;
190
191 num_enq = rte_compressdev_enqueue_burst(dev_id,
192 mem->qp_id, ops,
193 num_ops);
194 if (num_enq == 0) {
195 struct rte_compressdev_stats stats;
196
197 rte_compressdev_stats_get(dev_id, &stats);
198 if (stats.enqueue_err_count) {
199 res = -1;
200 goto end;
201 }
202 }
203
204 ops_unused = num_ops - num_enq;
205 remaining_ops -= num_enq;
206 total_enq_ops += num_enq;
207
208 num_deq = rte_compressdev_dequeue_burst(dev_id,
209 mem->qp_id,
210 deq_ops,
211 test_data->burst_sz);
212 total_deq_ops += num_deq;
213
214 if (iter == num_iter - 1) {
215 for (i = 0; i < num_deq; i++) {
216 struct rte_comp_op *op = deq_ops[i];
217
218 if (op->status !=
219 RTE_COMP_OP_STATUS_SUCCESS) {
220 RTE_LOG(ERR, USER1,
221 "Some operations were not successful\n");
222 goto end;
223 }
224
225 struct rte_mbuf *m = op->m_dst;
226
227 m->pkt_len = op->produced;
228 uint32_t remaining_data = op->produced;
229 uint16_t data_to_append;
230
231 while (remaining_data > 0) {
232 data_to_append =
233 RTE_MIN(remaining_data,
234 out_seg_sz);
235 m->data_len = data_to_append;
236 remaining_data -=
237 data_to_append;
238 m = m->next;
239 }
240 }
241 }
242 rte_mempool_put_bulk(mem->op_pool,
243 (void **)deq_ops, num_deq);
244 allocated -= num_deq;
245 }
246
247 /* Dequeue the last operations */
248 while (total_deq_ops < total_ops) {
249 if (unlikely(test_data->perf_comp_force_stop))
250 goto end;
251
252 num_deq = rte_compressdev_dequeue_burst(dev_id,
253 mem->qp_id,
254 deq_ops,
255 test_data->burst_sz);
256 if (num_deq == 0) {
257 struct rte_compressdev_stats stats;
258
259 rte_compressdev_stats_get(dev_id, &stats);
260 if (stats.dequeue_err_count) {
261 res = -1;
262 goto end;
263 }
264 }
265
266 total_deq_ops += num_deq;
267
268 if (iter == num_iter - 1) {
269 for (i = 0; i < num_deq; i++) {
270 struct rte_comp_op *op = deq_ops[i];
271
272 if (op->status !=
273 RTE_COMP_OP_STATUS_SUCCESS) {
274 RTE_LOG(ERR, USER1,
275 "Some operations were not successful\n");
276 goto end;
277 }
278
279 struct rte_mbuf *m = op->m_dst;
280
281 m->pkt_len = op->produced;
282 uint32_t remaining_data = op->produced;
283 uint16_t data_to_append;
284
285 while (remaining_data > 0) {
286 data_to_append =
287 RTE_MIN(remaining_data,
288 out_seg_sz);
289 m->data_len = data_to_append;
290 remaining_data -=
291 data_to_append;
292 m = m->next;
293 }
294 }
295 }
296 rte_mempool_put_bulk(mem->op_pool,
297 (void **)deq_ops, num_deq);
298 allocated -= num_deq;
299 }
300 }
301
302 tsc_end = rte_rdtsc_precise();
303 tsc_duration = tsc_end - tsc_start;
304
305 if (type == RTE_COMP_COMPRESS)
306 ctx->comp_tsc_duration[test_data->level] =
307 tsc_duration / num_iter;
308 else
309 ctx->decomp_tsc_duration[test_data->level] =
310 tsc_duration / num_iter;
311
312 end:
313 rte_mempool_put_bulk(mem->op_pool, (void **)ops, allocated);
314 rte_compressdev_private_xform_free(dev_id, priv_xform);
315 rte_free(ops);
316
317 if (test_data->perf_comp_force_stop) {
318 RTE_LOG(ERR, USER1,
319 "lcore: %d Perf. test has been aborted by user\n",
320 mem->lcore_id);
321 res = -1;
322 }
323 return res;
324 }
325
326 int
cperf_throughput_test_runner(void * test_ctx)327 cperf_throughput_test_runner(void *test_ctx)
328 {
329 struct cperf_benchmark_ctx *ctx = test_ctx;
330 struct comp_test_data *test_data = ctx->ver.options;
331 uint32_t lcore = rte_lcore_id();
332 static rte_atomic16_t display_once = RTE_ATOMIC16_INIT(0);
333 int i, ret = EXIT_SUCCESS;
334
335 ctx->ver.mem.lcore_id = lcore;
336
337 /*
338 * printing information about current compression thread
339 */
340 if (rte_atomic16_test_and_set(&ctx->ver.mem.print_info_once))
341 printf(" lcore: %u,"
342 " driver name: %s,"
343 " device name: %s,"
344 " device id: %u,"
345 " socket id: %u,"
346 " queue pair id: %u\n",
347 lcore,
348 ctx->ver.options->driver_name,
349 rte_compressdev_name_get(ctx->ver.mem.dev_id),
350 ctx->ver.mem.dev_id,
351 rte_compressdev_socket_id(ctx->ver.mem.dev_id),
352 ctx->ver.mem.qp_id);
353
354 /*
355 * First the verification part is needed
356 */
357 if (cperf_verify_test_runner(&ctx->ver)) {
358 ret = EXIT_FAILURE;
359 goto end;
360 }
361
362 /*
363 * Run the tests twice, discarding the first performance
364 * results, before the cache is warmed up
365 */
366 for (i = 0; i < 2; i++) {
367 if (main_loop(ctx, RTE_COMP_COMPRESS) < 0) {
368 ret = EXIT_FAILURE;
369 goto end;
370 }
371 }
372
373 for (i = 0; i < 2; i++) {
374 if (main_loop(ctx, RTE_COMP_DECOMPRESS) < 0) {
375 ret = EXIT_FAILURE;
376 goto end;
377 }
378 }
379
380 ctx->comp_tsc_byte =
381 (double)(ctx->comp_tsc_duration[test_data->level]) /
382 test_data->input_data_sz;
383
384 ctx->decomp_tsc_byte =
385 (double)(ctx->decomp_tsc_duration[test_data->level]) /
386 test_data->input_data_sz;
387
388 ctx->comp_gbps = rte_get_tsc_hz() / ctx->comp_tsc_byte * 8 /
389 1000000000;
390
391 ctx->decomp_gbps = rte_get_tsc_hz() / ctx->decomp_tsc_byte * 8 /
392 1000000000;
393
394 if (rte_atomic16_test_and_set(&display_once)) {
395 printf("\n%12s%6s%12s%17s%15s%16s\n",
396 "lcore id", "Level", "Comp size", "Comp ratio [%]",
397 "Comp [Gbps]", "Decomp [Gbps]");
398 }
399
400 printf("%12u%6u%12zu%17.2f%15.2f%16.2f\n",
401 ctx->ver.mem.lcore_id,
402 test_data->level, ctx->ver.comp_data_sz, ctx->ver.ratio,
403 ctx->comp_gbps,
404 ctx->decomp_gbps);
405
406 end:
407 return ret;
408 }
409