1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2019 6WIND S.A.
3 */
4
5 #include <sys/queue.h>
6 #include <stdint.h>
7 #include <limits.h>
8
9 #include <rte_common.h>
10 #include <rte_eal.h>
11 #include <rte_eal_memconfig.h>
12 #include <rte_tailq.h>
13 #include <rte_errno.h>
14 #include <rte_malloc.h>
15 #include <rte_string_fns.h>
16 #include <rte_bitops.h>
17 #include <rte_mbuf.h>
18 #include <rte_mbuf_dyn.h>
19
20 #define RTE_MBUF_DYN_MZNAME "rte_mbuf_dyn"
21
22 struct mbuf_dynfield_elt {
23 struct rte_mbuf_dynfield params;
24 size_t offset;
25 };
26 TAILQ_HEAD(mbuf_dynfield_list, rte_tailq_entry);
27
28 static struct rte_tailq_elem mbuf_dynfield_tailq = {
29 .name = "RTE_MBUF_DYNFIELD",
30 };
31 EAL_REGISTER_TAILQ(mbuf_dynfield_tailq);
32
33 struct mbuf_dynflag_elt {
34 struct rte_mbuf_dynflag params;
35 unsigned int bitnum;
36 };
37 TAILQ_HEAD(mbuf_dynflag_list, rte_tailq_entry);
38
39 static struct rte_tailq_elem mbuf_dynflag_tailq = {
40 .name = "RTE_MBUF_DYNFLAG",
41 };
42 EAL_REGISTER_TAILQ(mbuf_dynflag_tailq);
43
44 struct mbuf_dyn_shm {
45 /**
46 * For each mbuf byte, free_space[i] != 0 if space is free.
47 * The value is the size of the biggest aligned element that
48 * can fit in the zone.
49 */
50 uint8_t free_space[sizeof(struct rte_mbuf)];
51 /** Bitfield of available flags. */
52 uint64_t free_flags;
53 };
54 static struct mbuf_dyn_shm *shm;
55
56 /* Set the value of free_space[] according to the size and alignment of
57 * the free areas. This helps to select the best place when reserving a
58 * dynamic field. Assume tailq is locked.
59 */
60 static void
process_score(void)61 process_score(void)
62 {
63 size_t off, align, size, i;
64
65 /* first, erase previous info */
66 for (i = 0; i < sizeof(struct rte_mbuf); i++) {
67 if (shm->free_space[i])
68 shm->free_space[i] = 1;
69 }
70
71 off = 0;
72 while (off < sizeof(struct rte_mbuf)) {
73 /* get the size of the free zone */
74 for (size = 0; (off + size) < sizeof(struct rte_mbuf) &&
75 shm->free_space[off + size]; size++)
76 ;
77 if (size == 0) {
78 off++;
79 continue;
80 }
81
82 /* get the alignment of biggest object that can fit in
83 * the zone at this offset.
84 */
85 for (align = 1;
86 (off % (align << 1)) == 0 && (align << 1) <= size;
87 align <<= 1)
88 ;
89
90 /* save it in free_space[] */
91 for (i = off; i < off + align; i++)
92 shm->free_space[i] = RTE_MAX(align, shm->free_space[i]);
93
94 off += align;
95 }
96 }
97
98 /* Mark the area occupied by a mbuf field as available in the shm. */
99 #define mark_free(field) \
100 memset(&shm->free_space[offsetof(struct rte_mbuf, field)], \
101 1, sizeof(((struct rte_mbuf *)0)->field))
102
103 /* Allocate and initialize the shared memory. Assume tailq is locked */
104 static int
init_shared_mem(void)105 init_shared_mem(void)
106 {
107 const struct rte_memzone *mz;
108 uint64_t mask;
109
110 if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
111 mz = rte_memzone_reserve_aligned(RTE_MBUF_DYN_MZNAME,
112 sizeof(struct mbuf_dyn_shm),
113 SOCKET_ID_ANY, 0,
114 RTE_CACHE_LINE_SIZE);
115 } else {
116 mz = rte_memzone_lookup(RTE_MBUF_DYN_MZNAME);
117 }
118 if (mz == NULL)
119 return -1;
120
121 shm = mz->addr;
122
123 if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
124 /* init free_space, keep it sync'd with
125 * rte_mbuf_dynfield_copy().
126 */
127 memset(shm, 0, sizeof(*shm));
128 mark_free(dynfield1);
129
130 /* init free_flags */
131 for (mask = PKT_FIRST_FREE; mask <= PKT_LAST_FREE; mask <<= 1)
132 shm->free_flags |= mask;
133
134 process_score();
135 }
136
137 return 0;
138 }
139
140 /* check if this offset can be used */
141 static int
check_offset(size_t offset,size_t size,size_t align)142 check_offset(size_t offset, size_t size, size_t align)
143 {
144 size_t i;
145
146 if ((offset & (align - 1)) != 0)
147 return -1;
148 if (offset + size > sizeof(struct rte_mbuf))
149 return -1;
150
151 for (i = 0; i < size; i++) {
152 if (!shm->free_space[i + offset])
153 return -1;
154 }
155
156 return 0;
157 }
158
159 /* assume tailq is locked */
160 static struct mbuf_dynfield_elt *
__mbuf_dynfield_lookup(const char * name)161 __mbuf_dynfield_lookup(const char *name)
162 {
163 struct mbuf_dynfield_list *mbuf_dynfield_list;
164 struct mbuf_dynfield_elt *mbuf_dynfield;
165 struct rte_tailq_entry *te;
166
167 mbuf_dynfield_list = RTE_TAILQ_CAST(
168 mbuf_dynfield_tailq.head, mbuf_dynfield_list);
169
170 TAILQ_FOREACH(te, mbuf_dynfield_list, next) {
171 mbuf_dynfield = (struct mbuf_dynfield_elt *)te->data;
172 if (strcmp(name, mbuf_dynfield->params.name) == 0)
173 break;
174 }
175
176 if (te == NULL || mbuf_dynfield == NULL) {
177 rte_errno = ENOENT;
178 return NULL;
179 }
180
181 return mbuf_dynfield;
182 }
183
184 int
rte_mbuf_dynfield_lookup(const char * name,struct rte_mbuf_dynfield * params)185 rte_mbuf_dynfield_lookup(const char *name, struct rte_mbuf_dynfield *params)
186 {
187 struct mbuf_dynfield_elt *mbuf_dynfield;
188
189 rte_mcfg_tailq_read_lock();
190 if (shm == NULL && init_shared_mem() < 0)
191 mbuf_dynfield = NULL;
192 else
193 mbuf_dynfield = __mbuf_dynfield_lookup(name);
194 rte_mcfg_tailq_read_unlock();
195
196 if (mbuf_dynfield == NULL)
197 return -1;
198
199 if (params != NULL)
200 memcpy(params, &mbuf_dynfield->params, sizeof(*params));
201
202 return mbuf_dynfield->offset;
203 }
204
mbuf_dynfield_cmp(const struct rte_mbuf_dynfield * params1,const struct rte_mbuf_dynfield * params2)205 static int mbuf_dynfield_cmp(const struct rte_mbuf_dynfield *params1,
206 const struct rte_mbuf_dynfield *params2)
207 {
208 if (strcmp(params1->name, params2->name))
209 return -1;
210 if (params1->size != params2->size)
211 return -1;
212 if (params1->align != params2->align)
213 return -1;
214 if (params1->flags != params2->flags)
215 return -1;
216 return 0;
217 }
218
219 /* assume tailq is locked */
220 static int
__rte_mbuf_dynfield_register_offset(const struct rte_mbuf_dynfield * params,size_t req)221 __rte_mbuf_dynfield_register_offset(const struct rte_mbuf_dynfield *params,
222 size_t req)
223 {
224 struct mbuf_dynfield_list *mbuf_dynfield_list;
225 struct mbuf_dynfield_elt *mbuf_dynfield = NULL;
226 struct rte_tailq_entry *te = NULL;
227 unsigned int best_zone = UINT_MAX;
228 size_t i, offset;
229 int ret;
230
231 if (shm == NULL && init_shared_mem() < 0)
232 return -1;
233
234 mbuf_dynfield = __mbuf_dynfield_lookup(params->name);
235 if (mbuf_dynfield != NULL) {
236 if (req != SIZE_MAX && req != mbuf_dynfield->offset) {
237 rte_errno = EEXIST;
238 return -1;
239 }
240 if (mbuf_dynfield_cmp(params, &mbuf_dynfield->params) < 0) {
241 rte_errno = EEXIST;
242 return -1;
243 }
244 return mbuf_dynfield->offset;
245 }
246
247 if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
248 rte_errno = EPERM;
249 return -1;
250 }
251
252 if (req == SIZE_MAX) {
253 /* Find the best place to put this field: we search the
254 * lowest value of shm->free_space[offset]: the zones
255 * containing room for larger fields are kept for later.
256 */
257 for (offset = 0;
258 offset < sizeof(struct rte_mbuf);
259 offset++) {
260 if (check_offset(offset, params->size,
261 params->align) == 0 &&
262 shm->free_space[offset] < best_zone) {
263 best_zone = shm->free_space[offset];
264 req = offset;
265 }
266 }
267 if (req == SIZE_MAX) {
268 rte_errno = ENOENT;
269 return -1;
270 }
271 } else {
272 if (check_offset(req, params->size, params->align) < 0) {
273 rte_errno = EBUSY;
274 return -1;
275 }
276 }
277
278 offset = req;
279 mbuf_dynfield_list = RTE_TAILQ_CAST(
280 mbuf_dynfield_tailq.head, mbuf_dynfield_list);
281
282 te = rte_zmalloc("MBUF_DYNFIELD_TAILQ_ENTRY", sizeof(*te), 0);
283 if (te == NULL) {
284 rte_errno = ENOMEM;
285 return -1;
286 }
287
288 mbuf_dynfield = rte_zmalloc("mbuf_dynfield", sizeof(*mbuf_dynfield), 0);
289 if (mbuf_dynfield == NULL) {
290 rte_free(te);
291 rte_errno = ENOMEM;
292 return -1;
293 }
294
295 ret = strlcpy(mbuf_dynfield->params.name, params->name,
296 sizeof(mbuf_dynfield->params.name));
297 if (ret < 0 || ret >= (int)sizeof(mbuf_dynfield->params.name)) {
298 rte_errno = ENAMETOOLONG;
299 rte_free(mbuf_dynfield);
300 rte_free(te);
301 return -1;
302 }
303 memcpy(&mbuf_dynfield->params, params, sizeof(mbuf_dynfield->params));
304 mbuf_dynfield->offset = offset;
305 te->data = mbuf_dynfield;
306
307 TAILQ_INSERT_TAIL(mbuf_dynfield_list, te, next);
308
309 for (i = offset; i < offset + params->size; i++)
310 shm->free_space[i] = 0;
311 process_score();
312
313 RTE_LOG(DEBUG, MBUF, "Registered dynamic field %s (sz=%zu, al=%zu, fl=0x%x) -> %zd\n",
314 params->name, params->size, params->align, params->flags,
315 offset);
316
317 return offset;
318 }
319
320 int
rte_mbuf_dynfield_register_offset(const struct rte_mbuf_dynfield * params,size_t req)321 rte_mbuf_dynfield_register_offset(const struct rte_mbuf_dynfield *params,
322 size_t req)
323 {
324 int ret;
325
326 if (params->size >= sizeof(struct rte_mbuf)) {
327 rte_errno = EINVAL;
328 return -1;
329 }
330 if (!rte_is_power_of_2(params->align)) {
331 rte_errno = EINVAL;
332 return -1;
333 }
334 if (params->flags != 0) {
335 rte_errno = EINVAL;
336 return -1;
337 }
338
339 rte_mcfg_tailq_write_lock();
340 ret = __rte_mbuf_dynfield_register_offset(params, req);
341 rte_mcfg_tailq_write_unlock();
342
343 return ret;
344 }
345
346 int
rte_mbuf_dynfield_register(const struct rte_mbuf_dynfield * params)347 rte_mbuf_dynfield_register(const struct rte_mbuf_dynfield *params)
348 {
349 return rte_mbuf_dynfield_register_offset(params, SIZE_MAX);
350 }
351
352 /* assume tailq is locked */
353 static struct mbuf_dynflag_elt *
__mbuf_dynflag_lookup(const char * name)354 __mbuf_dynflag_lookup(const char *name)
355 {
356 struct mbuf_dynflag_list *mbuf_dynflag_list;
357 struct mbuf_dynflag_elt *mbuf_dynflag;
358 struct rte_tailq_entry *te;
359
360 mbuf_dynflag_list = RTE_TAILQ_CAST(
361 mbuf_dynflag_tailq.head, mbuf_dynflag_list);
362
363 TAILQ_FOREACH(te, mbuf_dynflag_list, next) {
364 mbuf_dynflag = (struct mbuf_dynflag_elt *)te->data;
365 if (strncmp(name, mbuf_dynflag->params.name,
366 RTE_MBUF_DYN_NAMESIZE) == 0)
367 break;
368 }
369
370 if (te == NULL) {
371 rte_errno = ENOENT;
372 return NULL;
373 }
374
375 return mbuf_dynflag;
376 }
377
378 int
rte_mbuf_dynflag_lookup(const char * name,struct rte_mbuf_dynflag * params)379 rte_mbuf_dynflag_lookup(const char *name,
380 struct rte_mbuf_dynflag *params)
381 {
382 struct mbuf_dynflag_elt *mbuf_dynflag;
383
384 rte_mcfg_tailq_read_lock();
385 if (shm == NULL && init_shared_mem() < 0)
386 mbuf_dynflag = NULL;
387 else
388 mbuf_dynflag = __mbuf_dynflag_lookup(name);
389 rte_mcfg_tailq_read_unlock();
390
391 if (mbuf_dynflag == NULL)
392 return -1;
393
394 if (params != NULL)
395 memcpy(params, &mbuf_dynflag->params, sizeof(*params));
396
397 return mbuf_dynflag->bitnum;
398 }
399
mbuf_dynflag_cmp(const struct rte_mbuf_dynflag * params1,const struct rte_mbuf_dynflag * params2)400 static int mbuf_dynflag_cmp(const struct rte_mbuf_dynflag *params1,
401 const struct rte_mbuf_dynflag *params2)
402 {
403 if (strcmp(params1->name, params2->name))
404 return -1;
405 if (params1->flags != params2->flags)
406 return -1;
407 return 0;
408 }
409
410 /* assume tailq is locked */
411 static int
__rte_mbuf_dynflag_register_bitnum(const struct rte_mbuf_dynflag * params,unsigned int req)412 __rte_mbuf_dynflag_register_bitnum(const struct rte_mbuf_dynflag *params,
413 unsigned int req)
414 {
415 struct mbuf_dynflag_list *mbuf_dynflag_list;
416 struct mbuf_dynflag_elt *mbuf_dynflag = NULL;
417 struct rte_tailq_entry *te = NULL;
418 unsigned int bitnum;
419 int ret;
420
421 if (shm == NULL && init_shared_mem() < 0)
422 return -1;
423
424 mbuf_dynflag = __mbuf_dynflag_lookup(params->name);
425 if (mbuf_dynflag != NULL) {
426 if (req != UINT_MAX && req != mbuf_dynflag->bitnum) {
427 rte_errno = EEXIST;
428 return -1;
429 }
430 if (mbuf_dynflag_cmp(params, &mbuf_dynflag->params) < 0) {
431 rte_errno = EEXIST;
432 return -1;
433 }
434 return mbuf_dynflag->bitnum;
435 }
436
437 if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
438 rte_errno = EPERM;
439 return -1;
440 }
441
442 if (req == UINT_MAX) {
443 if (shm->free_flags == 0) {
444 rte_errno = ENOENT;
445 return -1;
446 }
447 bitnum = rte_bsf64(shm->free_flags);
448 } else {
449 if ((shm->free_flags & (1ULL << req)) == 0) {
450 rte_errno = EBUSY;
451 return -1;
452 }
453 bitnum = req;
454 }
455
456 mbuf_dynflag_list = RTE_TAILQ_CAST(
457 mbuf_dynflag_tailq.head, mbuf_dynflag_list);
458
459 te = rte_zmalloc("MBUF_DYNFLAG_TAILQ_ENTRY", sizeof(*te), 0);
460 if (te == NULL) {
461 rte_errno = ENOMEM;
462 return -1;
463 }
464
465 mbuf_dynflag = rte_zmalloc("mbuf_dynflag", sizeof(*mbuf_dynflag), 0);
466 if (mbuf_dynflag == NULL) {
467 rte_free(te);
468 rte_errno = ENOMEM;
469 return -1;
470 }
471
472 ret = strlcpy(mbuf_dynflag->params.name, params->name,
473 sizeof(mbuf_dynflag->params.name));
474 if (ret < 0 || ret >= (int)sizeof(mbuf_dynflag->params.name)) {
475 rte_free(mbuf_dynflag);
476 rte_free(te);
477 rte_errno = ENAMETOOLONG;
478 return -1;
479 }
480 mbuf_dynflag->bitnum = bitnum;
481 te->data = mbuf_dynflag;
482
483 TAILQ_INSERT_TAIL(mbuf_dynflag_list, te, next);
484
485 shm->free_flags &= ~(1ULL << bitnum);
486
487 RTE_LOG(DEBUG, MBUF, "Registered dynamic flag %s (fl=0x%x) -> %u\n",
488 params->name, params->flags, bitnum);
489
490 return bitnum;
491 }
492
493 int
rte_mbuf_dynflag_register_bitnum(const struct rte_mbuf_dynflag * params,unsigned int req)494 rte_mbuf_dynflag_register_bitnum(const struct rte_mbuf_dynflag *params,
495 unsigned int req)
496 {
497 int ret;
498
499 if (req >= RTE_SIZEOF_FIELD(struct rte_mbuf, ol_flags) * CHAR_BIT &&
500 req != UINT_MAX) {
501 rte_errno = EINVAL;
502 return -1;
503 }
504
505 rte_mcfg_tailq_write_lock();
506 ret = __rte_mbuf_dynflag_register_bitnum(params, req);
507 rte_mcfg_tailq_write_unlock();
508
509 return ret;
510 }
511
512 int
rte_mbuf_dynflag_register(const struct rte_mbuf_dynflag * params)513 rte_mbuf_dynflag_register(const struct rte_mbuf_dynflag *params)
514 {
515 return rte_mbuf_dynflag_register_bitnum(params, UINT_MAX);
516 }
517
rte_mbuf_dyn_dump(FILE * out)518 void rte_mbuf_dyn_dump(FILE *out)
519 {
520 struct mbuf_dynfield_list *mbuf_dynfield_list;
521 struct mbuf_dynfield_elt *dynfield;
522 struct mbuf_dynflag_list *mbuf_dynflag_list;
523 struct mbuf_dynflag_elt *dynflag;
524 struct rte_tailq_entry *te;
525 size_t i;
526
527 rte_mcfg_tailq_write_lock();
528 init_shared_mem();
529 fprintf(out, "Reserved fields:\n");
530 mbuf_dynfield_list = RTE_TAILQ_CAST(
531 mbuf_dynfield_tailq.head, mbuf_dynfield_list);
532 TAILQ_FOREACH(te, mbuf_dynfield_list, next) {
533 dynfield = (struct mbuf_dynfield_elt *)te->data;
534 fprintf(out, " name=%s offset=%zd size=%zd align=%zd flags=%x\n",
535 dynfield->params.name, dynfield->offset,
536 dynfield->params.size, dynfield->params.align,
537 dynfield->params.flags);
538 }
539 fprintf(out, "Reserved flags:\n");
540 mbuf_dynflag_list = RTE_TAILQ_CAST(
541 mbuf_dynflag_tailq.head, mbuf_dynflag_list);
542 TAILQ_FOREACH(te, mbuf_dynflag_list, next) {
543 dynflag = (struct mbuf_dynflag_elt *)te->data;
544 fprintf(out, " name=%s bitnum=%u flags=%x\n",
545 dynflag->params.name, dynflag->bitnum,
546 dynflag->params.flags);
547 }
548 fprintf(out, "Free space in mbuf (0 = occupied, value = free zone alignment):\n");
549 for (i = 0; i < sizeof(struct rte_mbuf); i++) {
550 if ((i % 8) == 0)
551 fprintf(out, " %4.4zx: ", i);
552 fprintf(out, "%2.2x%s", shm->free_space[i],
553 (i % 8 != 7) ? " " : "\n");
554 }
555 fprintf(out, "Free bit in mbuf->ol_flags (0 = occupied, 1 = free):\n");
556 for (i = 0; i < sizeof(uint64_t) * CHAR_BIT; i++) {
557 if ((i % 8) == 0)
558 fprintf(out, " %4.4zx: ", i);
559 fprintf(out, "%1.1x%s", (shm->free_flags & (1ULL << i)) ? 1 : 0,
560 (i % 8 != 7) ? " " : "\n");
561 }
562
563 rte_mcfg_tailq_write_unlock();
564 }
565
566 static int
rte_mbuf_dyn_timestamp_register(int * field_offset,uint64_t * flag,const char * direction,const char * flag_name)567 rte_mbuf_dyn_timestamp_register(int *field_offset, uint64_t *flag,
568 const char *direction, const char *flag_name)
569 {
570 static const struct rte_mbuf_dynfield field_desc = {
571 .name = RTE_MBUF_DYNFIELD_TIMESTAMP_NAME,
572 .size = sizeof(rte_mbuf_timestamp_t),
573 .align = __alignof__(rte_mbuf_timestamp_t),
574 };
575 struct rte_mbuf_dynflag flag_desc = {};
576 int offset;
577
578 offset = rte_mbuf_dynfield_register(&field_desc);
579 if (offset < 0) {
580 RTE_LOG(ERR, MBUF,
581 "Failed to register mbuf field for timestamp\n");
582 return -1;
583 }
584 if (field_offset != NULL)
585 *field_offset = offset;
586
587 strlcpy(flag_desc.name, flag_name, sizeof(flag_desc.name));
588 offset = rte_mbuf_dynflag_register(&flag_desc);
589 if (offset < 0) {
590 RTE_LOG(ERR, MBUF,
591 "Failed to register mbuf flag for %s timestamp\n",
592 direction);
593 return -1;
594 }
595 if (flag != NULL)
596 *flag = RTE_BIT64(offset);
597
598 return 0;
599 }
600
601 int
rte_mbuf_dyn_rx_timestamp_register(int * field_offset,uint64_t * rx_flag)602 rte_mbuf_dyn_rx_timestamp_register(int *field_offset, uint64_t *rx_flag)
603 {
604 return rte_mbuf_dyn_timestamp_register(field_offset, rx_flag,
605 "Rx", RTE_MBUF_DYNFLAG_RX_TIMESTAMP_NAME);
606 }
607
608 int
rte_mbuf_dyn_tx_timestamp_register(int * field_offset,uint64_t * tx_flag)609 rte_mbuf_dyn_tx_timestamp_register(int *field_offset, uint64_t *tx_flag)
610 {
611 return rte_mbuf_dyn_timestamp_register(field_offset, tx_flag,
612 "Tx", RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME);
613 }
614