1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2018 Intel Corporation
3 */
4
5 #include <stdarg.h>
6 #include <stdio.h>
7 #include <string.h>
8 #include <errno.h>
9 #include <stdint.h>
10 #include <unistd.h>
11 #include <inttypes.h>
12
13 #include <sys/types.h>
14 #include <sys/stat.h>
15 #include <fcntl.h>
16 #include <sys/queue.h>
17
18 #include <rte_common.h>
19 #include <rte_byteorder.h>
20 #include <rte_malloc.h>
21 #include <rte_log.h>
22 #include <rte_debug.h>
23 #include <rte_cycles.h>
24 #include <rte_eal.h>
25 #include <rte_per_lcore.h>
26 #include <rte_lcore.h>
27 #include <rte_atomic.h>
28 #include <rte_mbuf.h>
29 #include <rte_ethdev.h>
30
31 #include <rte_bpf_ethdev.h>
32 #include "bpf_impl.h"
33
34 /*
35 * information about installed BPF rx/tx callback
36 */
37
38 struct bpf_eth_cbi {
39 /* used by both data & control path */
40 uint32_t use; /*usage counter */
41 const struct rte_eth_rxtx_callback *cb; /* callback handle */
42 struct rte_bpf *bpf;
43 struct rte_bpf_jit jit;
44 /* used by control path only */
45 LIST_ENTRY(bpf_eth_cbi) link;
46 uint16_t port;
47 uint16_t queue;
48 } __rte_cache_aligned;
49
50 /*
51 * Odd number means that callback is used by datapath.
52 * Even number means that callback is not used by datapath.
53 */
54 #define BPF_ETH_CBI_INUSE 1
55
56 /*
57 * List to manage RX/TX installed callbacks.
58 */
59 LIST_HEAD(bpf_eth_cbi_list, bpf_eth_cbi);
60
61 enum {
62 BPF_ETH_RX,
63 BPF_ETH_TX,
64 BPF_ETH_NUM,
65 };
66
67 /*
68 * information about all installed BPF rx/tx callbacks
69 */
70 struct bpf_eth_cbh {
71 rte_spinlock_t lock;
72 struct bpf_eth_cbi_list list;
73 uint32_t type;
74 };
75
76 static struct bpf_eth_cbh rx_cbh = {
77 .lock = RTE_SPINLOCK_INITIALIZER,
78 .list = LIST_HEAD_INITIALIZER(list),
79 .type = BPF_ETH_RX,
80 };
81
82 static struct bpf_eth_cbh tx_cbh = {
83 .lock = RTE_SPINLOCK_INITIALIZER,
84 .list = LIST_HEAD_INITIALIZER(list),
85 .type = BPF_ETH_TX,
86 };
87
88 /*
89 * Marks given callback as used by datapath.
90 */
91 static __rte_always_inline void
bpf_eth_cbi_inuse(struct bpf_eth_cbi * cbi)92 bpf_eth_cbi_inuse(struct bpf_eth_cbi *cbi)
93 {
94 cbi->use++;
95 /* make sure no store/load reordering could happen */
96 rte_smp_mb();
97 }
98
99 /*
100 * Marks given callback list as not used by datapath.
101 */
102 static __rte_always_inline void
bpf_eth_cbi_unuse(struct bpf_eth_cbi * cbi)103 bpf_eth_cbi_unuse(struct bpf_eth_cbi *cbi)
104 {
105 /* make sure all previous loads are completed */
106 rte_smp_rmb();
107 cbi->use++;
108 }
109
110 /*
111 * Waits till datapath finished using given callback.
112 */
113 static void
bpf_eth_cbi_wait(const struct bpf_eth_cbi * cbi)114 bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
115 {
116 uint32_t nuse, puse;
117
118 /* make sure all previous loads and stores are completed */
119 rte_smp_mb();
120
121 puse = cbi->use;
122
123 /* in use, busy wait till current RX/TX iteration is finished */
124 if ((puse & BPF_ETH_CBI_INUSE) != 0) {
125 do {
126 rte_pause();
127 rte_compiler_barrier();
128 nuse = cbi->use;
129 } while (nuse == puse);
130 }
131 }
132
133 static void
bpf_eth_cbi_cleanup(struct bpf_eth_cbi * bc)134 bpf_eth_cbi_cleanup(struct bpf_eth_cbi *bc)
135 {
136 bc->bpf = NULL;
137 memset(&bc->jit, 0, sizeof(bc->jit));
138 }
139
140 static struct bpf_eth_cbi *
bpf_eth_cbh_find(struct bpf_eth_cbh * cbh,uint16_t port,uint16_t queue)141 bpf_eth_cbh_find(struct bpf_eth_cbh *cbh, uint16_t port, uint16_t queue)
142 {
143 struct bpf_eth_cbi *cbi;
144
145 LIST_FOREACH(cbi, &cbh->list, link) {
146 if (cbi->port == port && cbi->queue == queue)
147 break;
148 }
149 return cbi;
150 }
151
152 static struct bpf_eth_cbi *
bpf_eth_cbh_add(struct bpf_eth_cbh * cbh,uint16_t port,uint16_t queue)153 bpf_eth_cbh_add(struct bpf_eth_cbh *cbh, uint16_t port, uint16_t queue)
154 {
155 struct bpf_eth_cbi *cbi;
156
157 /* return an existing one */
158 cbi = bpf_eth_cbh_find(cbh, port, queue);
159 if (cbi != NULL)
160 return cbi;
161
162 cbi = rte_zmalloc(NULL, sizeof(*cbi), RTE_CACHE_LINE_SIZE);
163 if (cbi != NULL) {
164 cbi->port = port;
165 cbi->queue = queue;
166 LIST_INSERT_HEAD(&cbh->list, cbi, link);
167 }
168 return cbi;
169 }
170
171 /*
172 * BPF packet processing routinies.
173 */
174
175 static inline uint32_t
apply_filter(struct rte_mbuf * mb[],const uint64_t rc[],uint32_t num,uint32_t drop)176 apply_filter(struct rte_mbuf *mb[], const uint64_t rc[], uint32_t num,
177 uint32_t drop)
178 {
179 uint32_t i, j, k;
180 struct rte_mbuf *dr[num];
181
182 for (i = 0, j = 0, k = 0; i != num; i++) {
183
184 /* filter matches */
185 if (rc[i] != 0)
186 mb[j++] = mb[i];
187 /* no match */
188 else
189 dr[k++] = mb[i];
190 }
191
192 if (drop != 0) {
193 /* free filtered out mbufs */
194 for (i = 0; i != k; i++)
195 rte_pktmbuf_free(dr[i]);
196 } else {
197 /* copy filtered out mbufs beyond good ones */
198 for (i = 0; i != k; i++)
199 mb[j + i] = dr[i];
200 }
201
202 return j;
203 }
204
205 static inline uint32_t
pkt_filter_vm(const struct rte_bpf * bpf,struct rte_mbuf * mb[],uint32_t num,uint32_t drop)206 pkt_filter_vm(const struct rte_bpf *bpf, struct rte_mbuf *mb[], uint32_t num,
207 uint32_t drop)
208 {
209 uint32_t i;
210 void *dp[num];
211 uint64_t rc[num];
212
213 for (i = 0; i != num; i++)
214 dp[i] = rte_pktmbuf_mtod(mb[i], void *);
215
216 rte_bpf_exec_burst(bpf, dp, rc, num);
217 return apply_filter(mb, rc, num, drop);
218 }
219
220 static inline uint32_t
pkt_filter_jit(const struct rte_bpf_jit * jit,struct rte_mbuf * mb[],uint32_t num,uint32_t drop)221 pkt_filter_jit(const struct rte_bpf_jit *jit, struct rte_mbuf *mb[],
222 uint32_t num, uint32_t drop)
223 {
224 uint32_t i, n;
225 void *dp;
226 uint64_t rc[num];
227
228 n = 0;
229 for (i = 0; i != num; i++) {
230 dp = rte_pktmbuf_mtod(mb[i], void *);
231 rc[i] = jit->func(dp);
232 n += (rc[i] == 0);
233 }
234
235 if (n != 0)
236 num = apply_filter(mb, rc, num, drop);
237
238 return num;
239 }
240
241 static inline uint32_t
pkt_filter_mb_vm(const struct rte_bpf * bpf,struct rte_mbuf * mb[],uint32_t num,uint32_t drop)242 pkt_filter_mb_vm(const struct rte_bpf *bpf, struct rte_mbuf *mb[], uint32_t num,
243 uint32_t drop)
244 {
245 uint64_t rc[num];
246
247 rte_bpf_exec_burst(bpf, (void **)mb, rc, num);
248 return apply_filter(mb, rc, num, drop);
249 }
250
251 static inline uint32_t
pkt_filter_mb_jit(const struct rte_bpf_jit * jit,struct rte_mbuf * mb[],uint32_t num,uint32_t drop)252 pkt_filter_mb_jit(const struct rte_bpf_jit *jit, struct rte_mbuf *mb[],
253 uint32_t num, uint32_t drop)
254 {
255 uint32_t i, n;
256 uint64_t rc[num];
257
258 n = 0;
259 for (i = 0; i != num; i++) {
260 rc[i] = jit->func(mb[i]);
261 n += (rc[i] == 0);
262 }
263
264 if (n != 0)
265 num = apply_filter(mb, rc, num, drop);
266
267 return num;
268 }
269
270 /*
271 * RX/TX callbacks for raw data bpf.
272 */
273
274 static uint16_t
bpf_rx_callback_vm(__rte_unused uint16_t port,__rte_unused uint16_t queue,struct rte_mbuf * pkt[],uint16_t nb_pkts,__rte_unused uint16_t max_pkts,void * user_param)275 bpf_rx_callback_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue,
276 struct rte_mbuf *pkt[], uint16_t nb_pkts,
277 __rte_unused uint16_t max_pkts, void *user_param)
278 {
279 struct bpf_eth_cbi *cbi;
280 uint16_t rc;
281
282 cbi = user_param;
283
284 bpf_eth_cbi_inuse(cbi);
285 rc = (cbi->cb != NULL) ?
286 pkt_filter_vm(cbi->bpf, pkt, nb_pkts, 1) :
287 nb_pkts;
288 bpf_eth_cbi_unuse(cbi);
289 return rc;
290 }
291
292 static uint16_t
bpf_rx_callback_jit(__rte_unused uint16_t port,__rte_unused uint16_t queue,struct rte_mbuf * pkt[],uint16_t nb_pkts,__rte_unused uint16_t max_pkts,void * user_param)293 bpf_rx_callback_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue,
294 struct rte_mbuf *pkt[], uint16_t nb_pkts,
295 __rte_unused uint16_t max_pkts, void *user_param)
296 {
297 struct bpf_eth_cbi *cbi;
298 uint16_t rc;
299
300 cbi = user_param;
301 bpf_eth_cbi_inuse(cbi);
302 rc = (cbi->cb != NULL) ?
303 pkt_filter_jit(&cbi->jit, pkt, nb_pkts, 1) :
304 nb_pkts;
305 bpf_eth_cbi_unuse(cbi);
306 return rc;
307 }
308
309 static uint16_t
bpf_tx_callback_vm(__rte_unused uint16_t port,__rte_unused uint16_t queue,struct rte_mbuf * pkt[],uint16_t nb_pkts,void * user_param)310 bpf_tx_callback_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue,
311 struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param)
312 {
313 struct bpf_eth_cbi *cbi;
314 uint16_t rc;
315
316 cbi = user_param;
317 bpf_eth_cbi_inuse(cbi);
318 rc = (cbi->cb != NULL) ?
319 pkt_filter_vm(cbi->bpf, pkt, nb_pkts, 0) :
320 nb_pkts;
321 bpf_eth_cbi_unuse(cbi);
322 return rc;
323 }
324
325 static uint16_t
bpf_tx_callback_jit(__rte_unused uint16_t port,__rte_unused uint16_t queue,struct rte_mbuf * pkt[],uint16_t nb_pkts,void * user_param)326 bpf_tx_callback_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue,
327 struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param)
328 {
329 struct bpf_eth_cbi *cbi;
330 uint16_t rc;
331
332 cbi = user_param;
333 bpf_eth_cbi_inuse(cbi);
334 rc = (cbi->cb != NULL) ?
335 pkt_filter_jit(&cbi->jit, pkt, nb_pkts, 0) :
336 nb_pkts;
337 bpf_eth_cbi_unuse(cbi);
338 return rc;
339 }
340
341 /*
342 * RX/TX callbacks for mbuf.
343 */
344
345 static uint16_t
bpf_rx_callback_mb_vm(__rte_unused uint16_t port,__rte_unused uint16_t queue,struct rte_mbuf * pkt[],uint16_t nb_pkts,__rte_unused uint16_t max_pkts,void * user_param)346 bpf_rx_callback_mb_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue,
347 struct rte_mbuf *pkt[], uint16_t nb_pkts,
348 __rte_unused uint16_t max_pkts, void *user_param)
349 {
350 struct bpf_eth_cbi *cbi;
351 uint16_t rc;
352
353 cbi = user_param;
354 bpf_eth_cbi_inuse(cbi);
355 rc = (cbi->cb != NULL) ?
356 pkt_filter_mb_vm(cbi->bpf, pkt, nb_pkts, 1) :
357 nb_pkts;
358 bpf_eth_cbi_unuse(cbi);
359 return rc;
360 }
361
362 static uint16_t
bpf_rx_callback_mb_jit(__rte_unused uint16_t port,__rte_unused uint16_t queue,struct rte_mbuf * pkt[],uint16_t nb_pkts,__rte_unused uint16_t max_pkts,void * user_param)363 bpf_rx_callback_mb_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue,
364 struct rte_mbuf *pkt[], uint16_t nb_pkts,
365 __rte_unused uint16_t max_pkts, void *user_param)
366 {
367 struct bpf_eth_cbi *cbi;
368 uint16_t rc;
369
370 cbi = user_param;
371 bpf_eth_cbi_inuse(cbi);
372 rc = (cbi->cb != NULL) ?
373 pkt_filter_mb_jit(&cbi->jit, pkt, nb_pkts, 1) :
374 nb_pkts;
375 bpf_eth_cbi_unuse(cbi);
376 return rc;
377 }
378
379 static uint16_t
bpf_tx_callback_mb_vm(__rte_unused uint16_t port,__rte_unused uint16_t queue,struct rte_mbuf * pkt[],uint16_t nb_pkts,void * user_param)380 bpf_tx_callback_mb_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue,
381 struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param)
382 {
383 struct bpf_eth_cbi *cbi;
384 uint16_t rc;
385
386 cbi = user_param;
387 bpf_eth_cbi_inuse(cbi);
388 rc = (cbi->cb != NULL) ?
389 pkt_filter_mb_vm(cbi->bpf, pkt, nb_pkts, 0) :
390 nb_pkts;
391 bpf_eth_cbi_unuse(cbi);
392 return rc;
393 }
394
395 static uint16_t
bpf_tx_callback_mb_jit(__rte_unused uint16_t port,__rte_unused uint16_t queue,struct rte_mbuf * pkt[],uint16_t nb_pkts,void * user_param)396 bpf_tx_callback_mb_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue,
397 struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param)
398 {
399 struct bpf_eth_cbi *cbi;
400 uint16_t rc;
401
402 cbi = user_param;
403 bpf_eth_cbi_inuse(cbi);
404 rc = (cbi->cb != NULL) ?
405 pkt_filter_mb_jit(&cbi->jit, pkt, nb_pkts, 0) :
406 nb_pkts;
407 bpf_eth_cbi_unuse(cbi);
408 return rc;
409 }
410
411 static rte_rx_callback_fn
select_rx_callback(enum rte_bpf_arg_type type,uint32_t flags)412 select_rx_callback(enum rte_bpf_arg_type type, uint32_t flags)
413 {
414 if (flags & RTE_BPF_ETH_F_JIT) {
415 if (type == RTE_BPF_ARG_PTR)
416 return bpf_rx_callback_jit;
417 else if (type == RTE_BPF_ARG_PTR_MBUF)
418 return bpf_rx_callback_mb_jit;
419 } else if (type == RTE_BPF_ARG_PTR)
420 return bpf_rx_callback_vm;
421 else if (type == RTE_BPF_ARG_PTR_MBUF)
422 return bpf_rx_callback_mb_vm;
423
424 return NULL;
425 }
426
427 static rte_tx_callback_fn
select_tx_callback(enum rte_bpf_arg_type type,uint32_t flags)428 select_tx_callback(enum rte_bpf_arg_type type, uint32_t flags)
429 {
430 if (flags & RTE_BPF_ETH_F_JIT) {
431 if (type == RTE_BPF_ARG_PTR)
432 return bpf_tx_callback_jit;
433 else if (type == RTE_BPF_ARG_PTR_MBUF)
434 return bpf_tx_callback_mb_jit;
435 } else if (type == RTE_BPF_ARG_PTR)
436 return bpf_tx_callback_vm;
437 else if (type == RTE_BPF_ARG_PTR_MBUF)
438 return bpf_tx_callback_mb_vm;
439
440 return NULL;
441 }
442
443 /*
444 * helper function to perform BPF unload for given port/queue.
445 * have to introduce extra complexity (and possible slowdown) here,
446 * as right now there is no safe generic way to remove RX/TX callback
447 * while IO is active.
448 * Still don't free memory allocated for callback handle itself,
449 * again right now there is no safe way to do that without stopping RX/TX
450 * on given port/queue first.
451 */
452 static void
bpf_eth_cbi_unload(struct bpf_eth_cbi * bc)453 bpf_eth_cbi_unload(struct bpf_eth_cbi *bc)
454 {
455 /* mark this cbi as empty */
456 bc->cb = NULL;
457 rte_smp_mb();
458
459 /* make sure datapath doesn't use bpf anymore, then destroy bpf */
460 bpf_eth_cbi_wait(bc);
461 rte_bpf_destroy(bc->bpf);
462 bpf_eth_cbi_cleanup(bc);
463 }
464
465 static void
bpf_eth_unload(struct bpf_eth_cbh * cbh,uint16_t port,uint16_t queue)466 bpf_eth_unload(struct bpf_eth_cbh *cbh, uint16_t port, uint16_t queue)
467 {
468 struct bpf_eth_cbi *bc;
469
470 bc = bpf_eth_cbh_find(cbh, port, queue);
471 if (bc == NULL || bc->cb == NULL)
472 return;
473
474 if (cbh->type == BPF_ETH_RX)
475 rte_eth_remove_rx_callback(port, queue, bc->cb);
476 else
477 rte_eth_remove_tx_callback(port, queue, bc->cb);
478
479 bpf_eth_cbi_unload(bc);
480 }
481
482
483 void
rte_bpf_eth_rx_unload(uint16_t port,uint16_t queue)484 rte_bpf_eth_rx_unload(uint16_t port, uint16_t queue)
485 {
486 struct bpf_eth_cbh *cbh;
487
488 cbh = &rx_cbh;
489 rte_spinlock_lock(&cbh->lock);
490 bpf_eth_unload(cbh, port, queue);
491 rte_spinlock_unlock(&cbh->lock);
492 }
493
494 void
rte_bpf_eth_tx_unload(uint16_t port,uint16_t queue)495 rte_bpf_eth_tx_unload(uint16_t port, uint16_t queue)
496 {
497 struct bpf_eth_cbh *cbh;
498
499 cbh = &tx_cbh;
500 rte_spinlock_lock(&cbh->lock);
501 bpf_eth_unload(cbh, port, queue);
502 rte_spinlock_unlock(&cbh->lock);
503 }
504
505 static int
bpf_eth_elf_load(struct bpf_eth_cbh * cbh,uint16_t port,uint16_t queue,const struct rte_bpf_prm * prm,const char * fname,const char * sname,uint32_t flags)506 bpf_eth_elf_load(struct bpf_eth_cbh *cbh, uint16_t port, uint16_t queue,
507 const struct rte_bpf_prm *prm, const char *fname, const char *sname,
508 uint32_t flags)
509 {
510 int32_t rc;
511 struct bpf_eth_cbi *bc;
512 struct rte_bpf *bpf;
513 rte_rx_callback_fn frx;
514 rte_tx_callback_fn ftx;
515 struct rte_bpf_jit jit;
516
517 frx = NULL;
518 ftx = NULL;
519
520 if (prm == NULL || rte_eth_dev_is_valid_port(port) == 0 ||
521 queue >= RTE_MAX_QUEUES_PER_PORT)
522 return -EINVAL;
523
524 if (cbh->type == BPF_ETH_RX)
525 frx = select_rx_callback(prm->prog_arg.type, flags);
526 else
527 ftx = select_tx_callback(prm->prog_arg.type, flags);
528
529 if (frx == NULL && ftx == NULL) {
530 RTE_BPF_LOG(ERR, "%s(%u, %u): no callback selected;\n",
531 __func__, port, queue);
532 return -EINVAL;
533 }
534
535 bpf = rte_bpf_elf_load(prm, fname, sname);
536 if (bpf == NULL)
537 return -rte_errno;
538
539 rte_bpf_get_jit(bpf, &jit);
540
541 if ((flags & RTE_BPF_ETH_F_JIT) != 0 && jit.func == NULL) {
542 RTE_BPF_LOG(ERR, "%s(%u, %u): no JIT generated;\n",
543 __func__, port, queue);
544 rte_bpf_destroy(bpf);
545 return -ENOTSUP;
546 }
547
548 /* setup/update global callback info */
549 bc = bpf_eth_cbh_add(cbh, port, queue);
550 if (bc == NULL)
551 return -ENOMEM;
552
553 /* remove old one, if any */
554 if (bc->cb != NULL)
555 bpf_eth_unload(cbh, port, queue);
556
557 bc->bpf = bpf;
558 bc->jit = jit;
559
560 if (cbh->type == BPF_ETH_RX)
561 bc->cb = rte_eth_add_rx_callback(port, queue, frx, bc);
562 else
563 bc->cb = rte_eth_add_tx_callback(port, queue, ftx, bc);
564
565 if (bc->cb == NULL) {
566 rc = -rte_errno;
567 rte_bpf_destroy(bpf);
568 bpf_eth_cbi_cleanup(bc);
569 } else
570 rc = 0;
571
572 return rc;
573 }
574
575 int
rte_bpf_eth_rx_elf_load(uint16_t port,uint16_t queue,const struct rte_bpf_prm * prm,const char * fname,const char * sname,uint32_t flags)576 rte_bpf_eth_rx_elf_load(uint16_t port, uint16_t queue,
577 const struct rte_bpf_prm *prm, const char *fname, const char *sname,
578 uint32_t flags)
579 {
580 int32_t rc;
581 struct bpf_eth_cbh *cbh;
582
583 cbh = &rx_cbh;
584 rte_spinlock_lock(&cbh->lock);
585 rc = bpf_eth_elf_load(cbh, port, queue, prm, fname, sname, flags);
586 rte_spinlock_unlock(&cbh->lock);
587
588 return rc;
589 }
590
591 int
rte_bpf_eth_tx_elf_load(uint16_t port,uint16_t queue,const struct rte_bpf_prm * prm,const char * fname,const char * sname,uint32_t flags)592 rte_bpf_eth_tx_elf_load(uint16_t port, uint16_t queue,
593 const struct rte_bpf_prm *prm, const char *fname, const char *sname,
594 uint32_t flags)
595 {
596 int32_t rc;
597 struct bpf_eth_cbh *cbh;
598
599 cbh = &tx_cbh;
600 rte_spinlock_lock(&cbh->lock);
601 rc = bpf_eth_elf_load(cbh, port, queue, prm, fname, sname, flags);
602 rte_spinlock_unlock(&cbh->lock);
603
604 return rc;
605 }
606