1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2021 Microsoft Corporation
3 *
4 * Based on bpf_convert_filter() in the Linux kernel sources
5 * and filter2xdp.
6 *
7 * Licensed as BSD with permission original authors.
8 * Copyright (C) 2017 Tobias Klauser
9 * Copyright (c) 2011 - 2014 PLUMgrid, http://plumgrid.com
10 */
11
12 #include <assert.h>
13 #include <errno.h>
14 #include <stdbool.h>
15 #include <stddef.h>
16 #include <stdint.h>
17 #include <stdlib.h>
18 #include <string.h>
19
20 #include <rte_common.h>
21 #include <rte_bpf.h>
22 #include <rte_log.h>
23 #include <rte_malloc.h>
24 #include <rte_errno.h>
25
26 /* Workaround name conflicts with libpcap */
27 #define bpf_validate(f, len) bpf_validate_libpcap(f, len)
28 #include <pcap/pcap.h>
29 #include <pcap/bpf.h>
30 #undef bpf_validate
31
32 #include "bpf_impl.h"
33 #include "bpf_def.h"
34
35 #ifndef BPF_MAXINSNS
36 #define BPF_MAXINSNS 4096
37 #endif
38
39 /*
40 * Linux socket filter uses negative absolute offsets to
41 * reference ancillary data.
42 */
43 #define SKF_AD_OFF (-0x1000)
44 #define SKF_AD_PROTOCOL 0
45 #define SKF_AD_PKTTYPE 4
46 #define SKF_AD_IFINDEX 8
47 #define SKF_AD_NLATTR 12
48 #define SKF_AD_NLATTR_NEST 16
49 #define SKF_AD_MARK 20
50 #define SKF_AD_QUEUE 24
51 #define SKF_AD_HATYPE 28
52 #define SKF_AD_RXHASH 32
53 #define SKF_AD_CPU 36
54 #define SKF_AD_ALU_XOR_X 40
55 #define SKF_AD_VLAN_TAG 44
56 #define SKF_AD_VLAN_TAG_PRESENT 48
57 #define SKF_AD_PAY_OFFSET 52
58 #define SKF_AD_RANDOM 56
59 #define SKF_AD_VLAN_TPID 60
60 #define SKF_AD_MAX 64
61
62 /* ArgX, context and stack frame pointer register positions. Note,
63 * Arg1, Arg2, Arg3, etc are used as argument mappings of function
64 * calls in BPF_CALL instruction.
65 */
66 #define BPF_REG_ARG1 EBPF_REG_1
67 #define BPF_REG_ARG2 EBPF_REG_2
68 #define BPF_REG_ARG3 EBPF_REG_3
69 #define BPF_REG_ARG4 EBPF_REG_4
70 #define BPF_REG_ARG5 EBPF_REG_5
71 #define BPF_REG_CTX EBPF_REG_6
72 #define BPF_REG_FP EBPF_REG_10
73
74 /* Additional register mappings for converted user programs. */
75 #define BPF_REG_A EBPF_REG_0
76 #define BPF_REG_X EBPF_REG_7
77 #define BPF_REG_TMP EBPF_REG_8
78
79 /* Helper macros for filter block array initializers. */
80
81 /* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */
82
83 #define EBPF_ALU64_REG(OP, DST, SRC) \
84 ((struct ebpf_insn) { \
85 .code = EBPF_ALU64 | BPF_OP(OP) | BPF_X, \
86 .dst_reg = DST, \
87 .src_reg = SRC, \
88 .off = 0, \
89 .imm = 0 })
90
91 #define BPF_ALU32_REG(OP, DST, SRC) \
92 ((struct ebpf_insn) { \
93 .code = BPF_ALU | BPF_OP(OP) | BPF_X, \
94 .dst_reg = DST, \
95 .src_reg = SRC, \
96 .off = 0, \
97 .imm = 0 })
98
99 /* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */
100
101 #define BPF_ALU32_IMM(OP, DST, IMM) \
102 ((struct ebpf_insn) { \
103 .code = BPF_ALU | BPF_OP(OP) | BPF_K, \
104 .dst_reg = DST, \
105 .src_reg = 0, \
106 .off = 0, \
107 .imm = IMM })
108
109 /* Short form of mov, dst_reg = src_reg */
110
111 #define BPF_MOV64_REG(DST, SRC) \
112 ((struct ebpf_insn) { \
113 .code = EBPF_ALU64 | EBPF_MOV | BPF_X, \
114 .dst_reg = DST, \
115 .src_reg = SRC, \
116 .off = 0, \
117 .imm = 0 })
118
119 #define BPF_MOV32_REG(DST, SRC) \
120 ((struct ebpf_insn) { \
121 .code = BPF_ALU | EBPF_MOV | BPF_X, \
122 .dst_reg = DST, \
123 .src_reg = SRC, \
124 .off = 0, \
125 .imm = 0 })
126
127 /* Short form of mov, dst_reg = imm32 */
128
129 #define BPF_MOV32_IMM(DST, IMM) \
130 ((struct ebpf_insn) { \
131 .code = BPF_ALU | EBPF_MOV | BPF_K, \
132 .dst_reg = DST, \
133 .src_reg = 0, \
134 .off = 0, \
135 .imm = IMM })
136
137 /* Short form of mov based on type, BPF_X: dst_reg = src_reg, BPF_K: dst_reg = imm32 */
138
139 #define BPF_MOV32_RAW(TYPE, DST, SRC, IMM) \
140 ((struct ebpf_insn) { \
141 .code = BPF_ALU | EBPF_MOV | BPF_SRC(TYPE), \
142 .dst_reg = DST, \
143 .src_reg = SRC, \
144 .off = 0, \
145 .imm = IMM })
146
147 /* Direct packet access, R0 = *(uint *) (skb->data + imm32) */
148
149 #define BPF_LD_ABS(SIZE, IMM) \
150 ((struct ebpf_insn) { \
151 .code = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \
152 .dst_reg = 0, \
153 .src_reg = 0, \
154 .off = 0, \
155 .imm = IMM })
156
157 /* Memory load, dst_reg = *(uint *) (src_reg + off16) */
158
159 #define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \
160 ((struct ebpf_insn) { \
161 .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \
162 .dst_reg = DST, \
163 .src_reg = SRC, \
164 .off = OFF, \
165 .imm = 0 })
166
167 /* Memory store, *(uint *) (dst_reg + off16) = src_reg */
168
169 #define BPF_STX_MEM(SIZE, DST, SRC, OFF) \
170 ((struct ebpf_insn) { \
171 .code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \
172 .dst_reg = DST, \
173 .src_reg = SRC, \
174 .off = OFF, \
175 .imm = 0 })
176
177 /* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */
178
179 #define BPF_JMP_IMM(OP, DST, IMM, OFF) \
180 ((struct ebpf_insn) { \
181 .code = BPF_JMP | BPF_OP(OP) | BPF_K, \
182 .dst_reg = DST, \
183 .src_reg = 0, \
184 .off = OFF, \
185 .imm = IMM })
186
187 /* Raw code statement block */
188
189 #define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \
190 ((struct ebpf_insn) { \
191 .code = CODE, \
192 .dst_reg = DST, \
193 .src_reg = SRC, \
194 .off = OFF, \
195 .imm = IMM })
196
197 /* Program exit */
198
199 #define BPF_EXIT_INSN() \
200 ((struct ebpf_insn) { \
201 .code = BPF_JMP | EBPF_EXIT, \
202 .dst_reg = 0, \
203 .src_reg = 0, \
204 .off = 0, \
205 .imm = 0 })
206
207 /*
208 * Placeholder to convert BPF extensions like length and VLAN tag
209 * If and when DPDK BPF supports them.
210 */
convert_bpf_load(const struct bpf_insn * fp,struct ebpf_insn ** new_insnp __rte_unused)211 static bool convert_bpf_load(const struct bpf_insn *fp,
212 struct ebpf_insn **new_insnp __rte_unused)
213 {
214 switch (fp->k) {
215 case SKF_AD_OFF + SKF_AD_PROTOCOL:
216 case SKF_AD_OFF + SKF_AD_PKTTYPE:
217 case SKF_AD_OFF + SKF_AD_IFINDEX:
218 case SKF_AD_OFF + SKF_AD_HATYPE:
219 case SKF_AD_OFF + SKF_AD_MARK:
220 case SKF_AD_OFF + SKF_AD_RXHASH:
221 case SKF_AD_OFF + SKF_AD_QUEUE:
222 case SKF_AD_OFF + SKF_AD_VLAN_TAG:
223 case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT:
224 case SKF_AD_OFF + SKF_AD_VLAN_TPID:
225 case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
226 case SKF_AD_OFF + SKF_AD_NLATTR:
227 case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
228 case SKF_AD_OFF + SKF_AD_CPU:
229 case SKF_AD_OFF + SKF_AD_RANDOM:
230 case SKF_AD_OFF + SKF_AD_ALU_XOR_X:
231 /* Linux has special negative offsets to access meta-data. */
232 RTE_BPF_LOG(ERR,
233 "rte_bpf_convert: socket offset %d not supported\n",
234 fp->k - SKF_AD_OFF);
235 return true;
236 default:
237 return false;
238 }
239 }
240
bpf_convert_filter(const struct bpf_insn * prog,size_t len,struct ebpf_insn * new_prog,uint32_t * new_len)241 static int bpf_convert_filter(const struct bpf_insn *prog, size_t len,
242 struct ebpf_insn *new_prog, uint32_t *new_len)
243 {
244 unsigned int pass = 0;
245 size_t new_flen = 0, target, i;
246 struct ebpf_insn *new_insn;
247 const struct bpf_insn *fp;
248 int *addrs = NULL;
249 uint8_t bpf_src;
250
251 if (len > BPF_MAXINSNS) {
252 RTE_BPF_LOG(ERR, "%s: cBPF program too long (%zu insns)\n",
253 __func__, len);
254 return -EINVAL;
255 }
256
257 /* On second pass, allocate the new program */
258 if (new_prog) {
259 addrs = calloc(len, sizeof(*addrs));
260 if (addrs == NULL)
261 return -ENOMEM;
262 }
263
264 do_pass:
265 new_insn = new_prog;
266 fp = prog;
267
268 /* Classic BPF related prologue emission. */
269 if (new_insn) {
270 /* Classic BPF expects A and X to be reset first. These need
271 * to be guaranteed to be the first two instructions.
272 */
273 *new_insn++ = EBPF_ALU64_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
274 *new_insn++ = EBPF_ALU64_REG(BPF_XOR, BPF_REG_X, BPF_REG_X);
275
276 /* All programs must keep CTX in callee saved BPF_REG_CTX.
277 * In eBPF case it's done by the compiler, here we need to
278 * do this ourself. Initial CTX is present in BPF_REG_ARG1.
279 */
280 *new_insn++ = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1);
281 } else {
282 new_insn += 3;
283 }
284
285 for (i = 0; i < len; fp++, i++) {
286 struct ebpf_insn tmp_insns[6] = { };
287 struct ebpf_insn *insn = tmp_insns;
288
289 if (addrs)
290 addrs[i] = new_insn - new_prog;
291
292 switch (fp->code) {
293 /* Absolute loads are how classic BPF accesses skb */
294 case BPF_LD | BPF_ABS | BPF_W:
295 case BPF_LD | BPF_ABS | BPF_H:
296 case BPF_LD | BPF_ABS | BPF_B:
297 if (convert_bpf_load(fp, &insn))
298 goto err;
299
300 *insn = BPF_RAW_INSN(fp->code, 0, 0, 0, fp->k);
301 break;
302
303 case BPF_ALU | BPF_DIV | BPF_X:
304 case BPF_ALU | BPF_MOD | BPF_X:
305 /* For cBPF, don't cause floating point exception */
306 *insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X);
307 *insn++ = BPF_JMP_IMM(EBPF_JNE, BPF_REG_X, 0, 2);
308 *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
309 *insn++ = BPF_EXIT_INSN();
310 /* fallthrough */
311 case BPF_ALU | BPF_ADD | BPF_X:
312 case BPF_ALU | BPF_ADD | BPF_K:
313 case BPF_ALU | BPF_SUB | BPF_X:
314 case BPF_ALU | BPF_SUB | BPF_K:
315 case BPF_ALU | BPF_AND | BPF_X:
316 case BPF_ALU | BPF_AND | BPF_K:
317 case BPF_ALU | BPF_OR | BPF_X:
318 case BPF_ALU | BPF_OR | BPF_K:
319 case BPF_ALU | BPF_LSH | BPF_X:
320 case BPF_ALU | BPF_LSH | BPF_K:
321 case BPF_ALU | BPF_RSH | BPF_X:
322 case BPF_ALU | BPF_RSH | BPF_K:
323 case BPF_ALU | BPF_XOR | BPF_X:
324 case BPF_ALU | BPF_XOR | BPF_K:
325 case BPF_ALU | BPF_MUL | BPF_X:
326 case BPF_ALU | BPF_MUL | BPF_K:
327 case BPF_ALU | BPF_DIV | BPF_K:
328 case BPF_ALU | BPF_MOD | BPF_K:
329 case BPF_ALU | BPF_NEG:
330 case BPF_LD | BPF_IND | BPF_W:
331 case BPF_LD | BPF_IND | BPF_H:
332 case BPF_LD | BPF_IND | BPF_B:
333 /* All arithmetic insns map as-is. */
334 insn->code = fp->code;
335 insn->dst_reg = BPF_REG_A;
336 bpf_src = BPF_SRC(fp->code);
337 insn->src_reg = bpf_src == BPF_X ? BPF_REG_X : 0;
338 insn->off = 0;
339 insn->imm = fp->k;
340 break;
341
342 /* Jump transformation cannot use BPF block macros
343 * everywhere as offset calculation and target updates
344 * require a bit more work than the rest, i.e. jump
345 * opcodes map as-is, but offsets need adjustment.
346 */
347
348 #define BPF_EMIT_JMP \
349 do { \
350 if (target >= len) \
351 goto err; \
352 insn->off = addrs ? addrs[target] - addrs[i] - 1 : 0; \
353 /* Adjust pc relative offset for 2nd or 3rd insn. */ \
354 insn->off -= insn - tmp_insns; \
355 } while (0)
356
357 case BPF_JMP | BPF_JA:
358 target = i + fp->k + 1;
359 insn->code = fp->code;
360 BPF_EMIT_JMP;
361 break;
362
363 case BPF_JMP | BPF_JEQ | BPF_K:
364 case BPF_JMP | BPF_JEQ | BPF_X:
365 case BPF_JMP | BPF_JSET | BPF_K:
366 case BPF_JMP | BPF_JSET | BPF_X:
367 case BPF_JMP | BPF_JGT | BPF_K:
368 case BPF_JMP | BPF_JGT | BPF_X:
369 case BPF_JMP | BPF_JGE | BPF_K:
370 case BPF_JMP | BPF_JGE | BPF_X:
371 if (BPF_SRC(fp->code) == BPF_K && (int) fp->k < 0) {
372 /* BPF immediates are signed, zero extend
373 * immediate into tmp register and use it
374 * in compare insn.
375 */
376 *insn++ = BPF_MOV32_IMM(BPF_REG_TMP, fp->k);
377
378 insn->dst_reg = BPF_REG_A;
379 insn->src_reg = BPF_REG_TMP;
380 bpf_src = BPF_X;
381 } else {
382 insn->dst_reg = BPF_REG_A;
383 insn->imm = fp->k;
384 bpf_src = BPF_SRC(fp->code);
385 insn->src_reg = bpf_src == BPF_X ? BPF_REG_X : 0;
386 }
387
388 /* Common case where 'jump_false' is next insn. */
389 if (fp->jf == 0) {
390 insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
391 target = i + fp->jt + 1;
392 BPF_EMIT_JMP;
393 break;
394 }
395
396 /* Convert JEQ into JNE when 'jump_true' is next insn. */
397 if (fp->jt == 0 && BPF_OP(fp->code) == BPF_JEQ) {
398 insn->code = BPF_JMP | EBPF_JNE | bpf_src;
399 target = i + fp->jf + 1;
400 BPF_EMIT_JMP;
401 break;
402 }
403
404 /* Other jumps are mapped into two insns: Jxx and JA. */
405 target = i + fp->jt + 1;
406 insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
407 BPF_EMIT_JMP;
408 insn++;
409
410 insn->code = BPF_JMP | BPF_JA;
411 target = i + fp->jf + 1;
412 BPF_EMIT_JMP;
413 break;
414
415 /* ldxb 4 * ([14] & 0xf) is remapped into 6 insns. */
416 case BPF_LDX | BPF_MSH | BPF_B:
417 /* tmp = A */
418 *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_A);
419 /* A = BPF_R0 = *(u8 *) (skb->data + K) */
420 *insn++ = BPF_LD_ABS(BPF_B, fp->k);
421 /* A &= 0xf */
422 *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf);
423 /* A <<= 2 */
424 *insn++ = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2);
425 /* X = A */
426 *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
427 /* A = tmp */
428 *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP);
429 break;
430
431 /* RET_K is remapped into 2 insns. RET_A case doesn't need an
432 * extra mov as EBPF_REG_0 is already mapped into BPF_REG_A.
433 */
434 case BPF_RET | BPF_A:
435 case BPF_RET | BPF_K:
436 if (BPF_RVAL(fp->code) == BPF_K) {
437 *insn++ = BPF_MOV32_RAW(BPF_K, EBPF_REG_0,
438 0, fp->k);
439 }
440 *insn = BPF_EXIT_INSN();
441 break;
442
443 /* Store to stack. */
444 case BPF_ST:
445 case BPF_STX:
446 *insn = BPF_STX_MEM(BPF_W, BPF_REG_FP, BPF_CLASS(fp->code) ==
447 BPF_ST ? BPF_REG_A : BPF_REG_X,
448 -(BPF_MEMWORDS - fp->k) * 4);
449 break;
450
451 /* Load from stack. */
452 case BPF_LD | BPF_MEM:
453 case BPF_LDX | BPF_MEM:
454 *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ?
455 BPF_REG_A : BPF_REG_X, BPF_REG_FP,
456 -(BPF_MEMWORDS - fp->k) * 4);
457 break;
458
459 /* A = K or X = K */
460 case BPF_LD | BPF_IMM:
461 case BPF_LDX | BPF_IMM:
462 *insn = BPF_MOV32_IMM(BPF_CLASS(fp->code) == BPF_LD ?
463 BPF_REG_A : BPF_REG_X, fp->k);
464 break;
465
466 /* X = A */
467 case BPF_MISC | BPF_TAX:
468 *insn = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
469 break;
470
471 /* A = X */
472 case BPF_MISC | BPF_TXA:
473 *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_X);
474 break;
475
476 /* A = mbuf->len or X = mbuf->len */
477 case BPF_LD | BPF_W | BPF_LEN:
478 case BPF_LDX | BPF_W | BPF_LEN:
479 /* BPF_ABS/BPF_IND implicitly expect mbuf ptr in R6 */
480
481 *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ?
482 BPF_REG_A : BPF_REG_X, BPF_REG_CTX,
483 offsetof(struct rte_mbuf, pkt_len));
484 break;
485
486 /* Unknown instruction. */
487 default:
488 RTE_BPF_LOG(ERR, "%s: Unknown instruction!: %#x\n",
489 __func__, fp->code);
490 goto err;
491 }
492
493 insn++;
494 if (new_prog)
495 memcpy(new_insn, tmp_insns,
496 sizeof(*insn) * (insn - tmp_insns));
497 new_insn += insn - tmp_insns;
498 }
499
500 if (!new_prog) {
501 /* Only calculating new length. */
502 *new_len = new_insn - new_prog;
503 return 0;
504 }
505
506 pass++;
507 if ((ptrdiff_t)new_flen != new_insn - new_prog) {
508 new_flen = new_insn - new_prog;
509 if (pass > 2)
510 goto err;
511 goto do_pass;
512 }
513
514 free(addrs);
515 assert(*new_len == new_flen);
516
517 return 0;
518 err:
519 free(addrs);
520 return -1;
521 }
522
523 struct rte_bpf_prm *
rte_bpf_convert(const struct bpf_program * prog)524 rte_bpf_convert(const struct bpf_program *prog)
525 {
526 struct rte_bpf_prm *prm = NULL;
527 struct ebpf_insn *ebpf = NULL;
528 uint32_t ebpf_len = 0;
529 int ret;
530
531 if (prog == NULL) {
532 RTE_BPF_LOG(ERR, "%s: NULL program\n", __func__);
533 rte_errno = EINVAL;
534 return NULL;
535 }
536
537 /* 1st pass: calculate the eBPF program length */
538 ret = bpf_convert_filter(prog->bf_insns, prog->bf_len, NULL, &ebpf_len);
539 if (ret < 0) {
540 RTE_BPF_LOG(ERR, "%s: cannot get eBPF length\n", __func__);
541 rte_errno = -ret;
542 return NULL;
543 }
544
545 RTE_BPF_LOG(DEBUG, "%s: prog len cBPF=%u -> eBPF=%u\n",
546 __func__, prog->bf_len, ebpf_len);
547
548 prm = rte_zmalloc("bpf_filter",
549 sizeof(*prm) + ebpf_len * sizeof(*ebpf), 0);
550 if (prm == NULL) {
551 rte_errno = ENOMEM;
552 return NULL;
553 }
554
555 /* The EPBF instructions in this case are right after the header */
556 ebpf = (void *)(prm + 1);
557
558 /* 2nd pass: remap cBPF to eBPF instructions */
559 ret = bpf_convert_filter(prog->bf_insns, prog->bf_len, ebpf, &ebpf_len);
560 if (ret < 0) {
561 RTE_BPF_LOG(ERR, "%s: cannot convert cBPF to eBPF\n", __func__);
562 free(prm);
563 rte_errno = -ret;
564 return NULL;
565 }
566
567 prm->ins = ebpf;
568 prm->nb_ins = ebpf_len;
569
570 /* Classic BPF programs use mbufs */
571 prm->prog_arg.type = RTE_BPF_ARG_PTR_MBUF;
572 prm->prog_arg.size = sizeof(struct rte_mbuf);
573
574 return prm;
575 }
576