1 /*
2 * Copyright (C) 2016-2018 Vincenzo Maffione
3 * Copyright (C) 2015 Stefano Garzarella
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 *
27 * $FreeBSD$
28 */
29
30 /*
31 * common headers
32 */
33 #if defined(__FreeBSD__)
34 #include <sys/cdefs.h>
35 #include <sys/param.h>
36 #include <sys/kernel.h>
37 #include <sys/types.h>
38 #include <sys/selinfo.h>
39 #include <sys/socket.h>
40 #include <net/if.h>
41 #include <net/if_var.h>
42 #include <machine/bus.h>
43
44 #define usleep_range(_1, _2) \
45 pause_sbt("sync-kloop-sleep", SBT_1US * _1, SBT_1US * 1, C_ABSOLUTE)
46
47 #elif defined(linux)
48 #include <bsd_glue.h>
49 #include <linux/file.h>
50 #include <linux/eventfd.h>
51 #endif
52
53 #include <net/netmap.h>
54 #include <dev/netmap/netmap_kern.h>
55 #include <net/netmap_virt.h>
56 #include <dev/netmap/netmap_mem2.h>
57
58 /* Support for eventfd-based notifications. */
59 #if defined(linux)
60 #define SYNC_KLOOP_POLL
61 #endif
62
63 /* Write kring pointers (hwcur, hwtail) to the CSB.
64 * This routine is coupled with ptnetmap_guest_read_kring_csb(). */
65 static inline void
sync_kloop_kernel_write(struct nm_csb_ktoa __user * ptr,uint32_t hwcur,uint32_t hwtail)66 sync_kloop_kernel_write(struct nm_csb_ktoa __user *ptr, uint32_t hwcur,
67 uint32_t hwtail)
68 {
69 /* Issue a first store-store barrier to make sure writes to the
70 * netmap ring do not overcome updates on ktoa->hwcur and ktoa->hwtail. */
71 nm_stst_barrier();
72
73 /*
74 * The same scheme used in nm_sync_kloop_appl_write() applies here.
75 * We allow the application to read a value of hwcur more recent than the value
76 * of hwtail, since this would anyway result in a consistent view of the
77 * ring state (and hwcur can never wraparound hwtail, since hwcur must be
78 * behind head).
79 *
80 * The following memory barrier scheme is used to make this happen:
81 *
82 * Application Kernel
83 *
84 * STORE(hwcur) LOAD(hwtail)
85 * wmb() <-------------> rmb()
86 * STORE(hwtail) LOAD(hwcur)
87 */
88 CSB_WRITE(ptr, hwcur, hwcur);
89 nm_stst_barrier();
90 CSB_WRITE(ptr, hwtail, hwtail);
91 }
92
93 /* Read kring pointers (head, cur, sync_flags) from the CSB.
94 * This routine is coupled with ptnetmap_guest_write_kring_csb(). */
95 static inline void
sync_kloop_kernel_read(struct nm_csb_atok __user * ptr,struct netmap_ring * shadow_ring,uint32_t num_slots)96 sync_kloop_kernel_read(struct nm_csb_atok __user *ptr,
97 struct netmap_ring *shadow_ring,
98 uint32_t num_slots)
99 {
100 /*
101 * We place a memory barrier to make sure that the update of head never
102 * overtakes the update of cur.
103 * (see explanation in sync_kloop_kernel_write).
104 */
105 CSB_READ(ptr, head, shadow_ring->head);
106 nm_ldld_barrier();
107 CSB_READ(ptr, cur, shadow_ring->cur);
108 CSB_READ(ptr, sync_flags, shadow_ring->flags);
109
110 /* Make sure that loads from atok->head and atok->cur are not delayed
111 * after the loads from the netmap ring. */
112 nm_ldld_barrier();
113 }
114
115 /* Enable or disable application --> kernel kicks. */
116 static inline void
csb_ktoa_kick_enable(struct nm_csb_ktoa __user * csb_ktoa,uint32_t val)117 csb_ktoa_kick_enable(struct nm_csb_ktoa __user *csb_ktoa, uint32_t val)
118 {
119 CSB_WRITE(csb_ktoa, kern_need_kick, val);
120 }
121
122 #ifdef SYNC_KLOOP_POLL
123 /* Are application interrupt enabled or disabled? */
124 static inline uint32_t
csb_atok_intr_enabled(struct nm_csb_atok __user * csb_atok)125 csb_atok_intr_enabled(struct nm_csb_atok __user *csb_atok)
126 {
127 uint32_t v;
128
129 CSB_READ(csb_atok, appl_need_kick, v);
130
131 return v;
132 }
133 #endif /* SYNC_KLOOP_POLL */
134
135 static inline void
sync_kloop_kring_dump(const char * title,const struct netmap_kring * kring)136 sync_kloop_kring_dump(const char *title, const struct netmap_kring *kring)
137 {
138 nm_prinf("%s, kring %s, hwcur %d, rhead %d, "
139 "rcur %d, rtail %d, hwtail %d",
140 title, kring->name, kring->nr_hwcur, kring->rhead,
141 kring->rcur, kring->rtail, kring->nr_hwtail);
142 }
143
144 /* Arguments for netmap_sync_kloop_tx_ring() and
145 * netmap_sync_kloop_rx_ring().
146 */
147 struct sync_kloop_ring_args {
148 struct netmap_kring *kring;
149 struct nm_csb_atok *csb_atok;
150 struct nm_csb_ktoa *csb_ktoa;
151 #ifdef SYNC_KLOOP_POLL
152 struct eventfd_ctx *irq_ctx;
153 #endif /* SYNC_KLOOP_POLL */
154 /* Are we busy waiting rather than using a schedule() loop ? */
155 bool busy_wait;
156 /* Are we processing in the context of VM exit ? */
157 bool direct;
158 };
159
160 static void
netmap_sync_kloop_tx_ring(const struct sync_kloop_ring_args * a)161 netmap_sync_kloop_tx_ring(const struct sync_kloop_ring_args *a)
162 {
163 struct netmap_kring *kring = a->kring;
164 struct nm_csb_atok *csb_atok = a->csb_atok;
165 struct nm_csb_ktoa *csb_ktoa = a->csb_ktoa;
166 struct netmap_ring shadow_ring; /* shadow copy of the netmap_ring */
167 bool more_txspace = false;
168 uint32_t num_slots;
169 int batch;
170
171 if (unlikely(nm_kr_tryget(kring, 1, NULL))) {
172 return;
173 }
174
175 num_slots = kring->nkr_num_slots;
176
177 /* Disable application --> kernel notifications. */
178 if (!a->direct) {
179 csb_ktoa_kick_enable(csb_ktoa, 0);
180 }
181 /* Copy the application kring pointers from the CSB */
182 sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
183
184 for (;;) {
185 batch = shadow_ring.head - kring->nr_hwcur;
186 if (batch < 0)
187 batch += num_slots;
188
189 #ifdef PTN_TX_BATCH_LIM
190 if (batch > PTN_TX_BATCH_LIM(num_slots)) {
191 /* If application moves ahead too fast, let's cut the move so
192 * that we don't exceed our batch limit. */
193 uint32_t head_lim = kring->nr_hwcur + PTN_TX_BATCH_LIM(num_slots);
194
195 if (head_lim >= num_slots)
196 head_lim -= num_slots;
197 nm_prdis(1, "batch: %d head: %d head_lim: %d", batch, shadow_ring.head,
198 head_lim);
199 shadow_ring.head = head_lim;
200 batch = PTN_TX_BATCH_LIM(num_slots);
201 }
202 #endif /* PTN_TX_BATCH_LIM */
203
204 if (nm_kr_txspace(kring) <= (num_slots >> 1)) {
205 shadow_ring.flags |= NAF_FORCE_RECLAIM;
206 }
207
208 /* Netmap prologue */
209 shadow_ring.tail = kring->rtail;
210 if (unlikely(nm_txsync_prologue(kring, &shadow_ring) >= num_slots)) {
211 /* Reinit ring and enable notifications. */
212 netmap_ring_reinit(kring);
213 if (!a->busy_wait) {
214 csb_ktoa_kick_enable(csb_ktoa, 1);
215 }
216 break;
217 }
218
219 if (unlikely(netmap_debug & NM_DEBUG_TXSYNC)) {
220 sync_kloop_kring_dump("pre txsync", kring);
221 }
222
223 if (unlikely(kring->nm_sync(kring, shadow_ring.flags))) {
224 if (!a->busy_wait) {
225 /* Reenable notifications. */
226 csb_ktoa_kick_enable(csb_ktoa, 1);
227 }
228 nm_prerr("txsync() failed");
229 break;
230 }
231
232 /*
233 * Finalize
234 * Copy kernel hwcur and hwtail into the CSB for the application sync(), and
235 * do the nm_sync_finalize.
236 */
237 sync_kloop_kernel_write(csb_ktoa, kring->nr_hwcur,
238 kring->nr_hwtail);
239 if (kring->rtail != kring->nr_hwtail) {
240 /* Some more room available in the parent adapter. */
241 kring->rtail = kring->nr_hwtail;
242 more_txspace = true;
243 }
244
245 if (unlikely(netmap_debug & NM_DEBUG_TXSYNC)) {
246 sync_kloop_kring_dump("post txsync", kring);
247 }
248
249 /* Interrupt the application if needed. */
250 #ifdef SYNC_KLOOP_POLL
251 if (a->irq_ctx && more_txspace && csb_atok_intr_enabled(csb_atok)) {
252 /* We could disable kernel --> application kicks here,
253 * to avoid spurious interrupts. */
254 eventfd_signal(a->irq_ctx, 1);
255 more_txspace = false;
256 }
257 #endif /* SYNC_KLOOP_POLL */
258
259 /* Read CSB to see if there is more work to do. */
260 sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
261 if (shadow_ring.head == kring->rhead) {
262 if (a->busy_wait) {
263 break;
264 }
265 /*
266 * No more packets to transmit. We enable notifications and
267 * go to sleep, waiting for a kick from the application when new
268 * new slots are ready for transmission.
269 */
270 /* Reenable notifications. */
271 csb_ktoa_kick_enable(csb_ktoa, 1);
272 /* Double check, with store-load memory barrier. */
273 nm_stld_barrier();
274 sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
275 if (shadow_ring.head != kring->rhead) {
276 /* We won the race condition, there are more packets to
277 * transmit. Disable notifications and do another cycle */
278 csb_ktoa_kick_enable(csb_ktoa, 0);
279 continue;
280 }
281 break;
282 }
283
284 if (nm_kr_txempty(kring)) {
285 /* No more available TX slots. We stop waiting for a notification
286 * from the backend (netmap_tx_irq). */
287 nm_prdis(1, "TX ring");
288 break;
289 }
290 }
291
292 nm_kr_put(kring);
293
294 #ifdef SYNC_KLOOP_POLL
295 if (a->irq_ctx && more_txspace && csb_atok_intr_enabled(csb_atok)) {
296 eventfd_signal(a->irq_ctx, 1);
297 }
298 #endif /* SYNC_KLOOP_POLL */
299 }
300
301 /* RX cycle without receive any packets */
302 #define SYNC_LOOP_RX_DRY_CYCLES_MAX 2
303
304 static inline int
sync_kloop_norxslots(struct netmap_kring * kring,uint32_t g_head)305 sync_kloop_norxslots(struct netmap_kring *kring, uint32_t g_head)
306 {
307 return (NM_ACCESS_ONCE(kring->nr_hwtail) == nm_prev(g_head,
308 kring->nkr_num_slots - 1));
309 }
310
311 static void
netmap_sync_kloop_rx_ring(const struct sync_kloop_ring_args * a)312 netmap_sync_kloop_rx_ring(const struct sync_kloop_ring_args *a)
313 {
314
315 struct netmap_kring *kring = a->kring;
316 struct nm_csb_atok *csb_atok = a->csb_atok;
317 struct nm_csb_ktoa *csb_ktoa = a->csb_ktoa;
318 struct netmap_ring shadow_ring; /* shadow copy of the netmap_ring */
319 int dry_cycles = 0;
320 bool some_recvd = false;
321 uint32_t num_slots;
322
323 if (unlikely(nm_kr_tryget(kring, 1, NULL))) {
324 return;
325 }
326
327 num_slots = kring->nkr_num_slots;
328
329 /* Get RX csb_atok and csb_ktoa pointers from the CSB. */
330 num_slots = kring->nkr_num_slots;
331
332 /* Disable notifications. */
333 if (!a->direct) {
334 csb_ktoa_kick_enable(csb_ktoa, 0);
335 }
336 /* Copy the application kring pointers from the CSB */
337 sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
338
339 for (;;) {
340 uint32_t hwtail;
341
342 /* Netmap prologue */
343 shadow_ring.tail = kring->rtail;
344 if (unlikely(nm_rxsync_prologue(kring, &shadow_ring) >= num_slots)) {
345 /* Reinit ring and enable notifications. */
346 netmap_ring_reinit(kring);
347 if (!a->busy_wait) {
348 csb_ktoa_kick_enable(csb_ktoa, 1);
349 }
350 break;
351 }
352
353 if (unlikely(netmap_debug & NM_DEBUG_RXSYNC)) {
354 sync_kloop_kring_dump("pre rxsync", kring);
355 }
356
357 if (unlikely(kring->nm_sync(kring, shadow_ring.flags))) {
358 if (!a->busy_wait) {
359 /* Reenable notifications. */
360 csb_ktoa_kick_enable(csb_ktoa, 1);
361 }
362 nm_prerr("rxsync() failed");
363 break;
364 }
365
366 /*
367 * Finalize
368 * Copy kernel hwcur and hwtail into the CSB for the application sync()
369 */
370 hwtail = NM_ACCESS_ONCE(kring->nr_hwtail);
371 sync_kloop_kernel_write(csb_ktoa, kring->nr_hwcur, hwtail);
372 if (kring->rtail != hwtail) {
373 kring->rtail = hwtail;
374 some_recvd = true;
375 dry_cycles = 0;
376 } else {
377 dry_cycles++;
378 }
379
380 if (unlikely(netmap_debug & NM_DEBUG_RXSYNC)) {
381 sync_kloop_kring_dump("post rxsync", kring);
382 }
383
384 #ifdef SYNC_KLOOP_POLL
385 /* Interrupt the application if needed. */
386 if (a->irq_ctx && some_recvd && csb_atok_intr_enabled(csb_atok)) {
387 /* We could disable kernel --> application kicks here,
388 * to avoid spurious interrupts. */
389 eventfd_signal(a->irq_ctx, 1);
390 some_recvd = false;
391 }
392 #endif /* SYNC_KLOOP_POLL */
393
394 /* Read CSB to see if there is more work to do. */
395 sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
396 if (sync_kloop_norxslots(kring, shadow_ring.head)) {
397 if (a->busy_wait) {
398 break;
399 }
400 /*
401 * No more slots available for reception. We enable notification and
402 * go to sleep, waiting for a kick from the application when new receive
403 * slots are available.
404 */
405 /* Reenable notifications. */
406 csb_ktoa_kick_enable(csb_ktoa, 1);
407 /* Double check, with store-load memory barrier. */
408 nm_stld_barrier();
409 sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
410 if (!sync_kloop_norxslots(kring, shadow_ring.head)) {
411 /* We won the race condition, more slots are available. Disable
412 * notifications and do another cycle. */
413 csb_ktoa_kick_enable(csb_ktoa, 0);
414 continue;
415 }
416 break;
417 }
418
419 hwtail = NM_ACCESS_ONCE(kring->nr_hwtail);
420 if (unlikely(hwtail == kring->rhead ||
421 dry_cycles >= SYNC_LOOP_RX_DRY_CYCLES_MAX)) {
422 /* No more packets to be read from the backend. We stop and
423 * wait for a notification from the backend (netmap_rx_irq). */
424 nm_prdis(1, "nr_hwtail: %d rhead: %d dry_cycles: %d",
425 hwtail, kring->rhead, dry_cycles);
426 break;
427 }
428 }
429
430 nm_kr_put(kring);
431
432 #ifdef SYNC_KLOOP_POLL
433 /* Interrupt the application if needed. */
434 if (a->irq_ctx && some_recvd && csb_atok_intr_enabled(csb_atok)) {
435 eventfd_signal(a->irq_ctx, 1);
436 }
437 #endif /* SYNC_KLOOP_POLL */
438 }
439
440 #ifdef SYNC_KLOOP_POLL
441 struct sync_kloop_poll_ctx;
442 struct sync_kloop_poll_entry {
443 /* Support for receiving notifications from
444 * a netmap ring or from the application. */
445 struct file *filp;
446 wait_queue_t wait;
447 wait_queue_head_t *wqh;
448
449 /* Support for sending notifications to the application. */
450 struct eventfd_ctx *irq_ctx;
451 struct file *irq_filp;
452
453 /* Arguments for the ring processing function. Useful
454 * in case of custom wake-up function. */
455 struct sync_kloop_ring_args *args;
456 struct sync_kloop_poll_ctx *parent;
457
458 };
459
460 struct sync_kloop_poll_ctx {
461 poll_table wait_table;
462 unsigned int next_entry;
463 int (*next_wake_fun)(wait_queue_t *, unsigned, int, void *);
464 unsigned int num_entries;
465 unsigned int num_tx_rings;
466 unsigned int num_rings;
467 /* First num_tx_rings entries are for the TX kicks.
468 * Then the RX kicks entries follow. The last two
469 * entries are for TX irq, and RX irq. */
470 struct sync_kloop_poll_entry entries[0];
471 };
472
473 static void
sync_kloop_poll_table_queue_proc(struct file * file,wait_queue_head_t * wqh,poll_table * pt)474 sync_kloop_poll_table_queue_proc(struct file *file, wait_queue_head_t *wqh,
475 poll_table *pt)
476 {
477 struct sync_kloop_poll_ctx *poll_ctx =
478 container_of(pt, struct sync_kloop_poll_ctx, wait_table);
479 struct sync_kloop_poll_entry *entry = poll_ctx->entries +
480 poll_ctx->next_entry;
481
482 BUG_ON(poll_ctx->next_entry >= poll_ctx->num_entries);
483 entry->wqh = wqh;
484 entry->filp = file;
485 /* Use the default wake up function. */
486 if (poll_ctx->next_wake_fun == NULL) {
487 init_waitqueue_entry(&entry->wait, current);
488 } else {
489 init_waitqueue_func_entry(&entry->wait,
490 poll_ctx->next_wake_fun);
491 }
492 add_wait_queue(wqh, &entry->wait);
493 }
494
495 static int
sync_kloop_tx_kick_wake_fun(wait_queue_t * wait,unsigned mode,int wake_flags,void * key)496 sync_kloop_tx_kick_wake_fun(wait_queue_t *wait, unsigned mode,
497 int wake_flags, void *key)
498 {
499 struct sync_kloop_poll_entry *entry =
500 container_of(wait, struct sync_kloop_poll_entry, wait);
501
502 netmap_sync_kloop_tx_ring(entry->args);
503
504 return 0;
505 }
506
507 static int
sync_kloop_tx_irq_wake_fun(wait_queue_t * wait,unsigned mode,int wake_flags,void * key)508 sync_kloop_tx_irq_wake_fun(wait_queue_t *wait, unsigned mode,
509 int wake_flags, void *key)
510 {
511 struct sync_kloop_poll_entry *entry =
512 container_of(wait, struct sync_kloop_poll_entry, wait);
513 struct sync_kloop_poll_ctx *poll_ctx = entry->parent;
514 int i;
515
516 for (i = 0; i < poll_ctx->num_tx_rings; i++) {
517 struct eventfd_ctx *irq_ctx = poll_ctx->entries[i].irq_ctx;
518
519 if (irq_ctx) {
520 eventfd_signal(irq_ctx, 1);
521 }
522 }
523
524 return 0;
525 }
526
527 static int
sync_kloop_rx_kick_wake_fun(wait_queue_t * wait,unsigned mode,int wake_flags,void * key)528 sync_kloop_rx_kick_wake_fun(wait_queue_t *wait, unsigned mode,
529 int wake_flags, void *key)
530 {
531 struct sync_kloop_poll_entry *entry =
532 container_of(wait, struct sync_kloop_poll_entry, wait);
533
534 netmap_sync_kloop_rx_ring(entry->args);
535
536 return 0;
537 }
538
539 static int
sync_kloop_rx_irq_wake_fun(wait_queue_t * wait,unsigned mode,int wake_flags,void * key)540 sync_kloop_rx_irq_wake_fun(wait_queue_t *wait, unsigned mode,
541 int wake_flags, void *key)
542 {
543 struct sync_kloop_poll_entry *entry =
544 container_of(wait, struct sync_kloop_poll_entry, wait);
545 struct sync_kloop_poll_ctx *poll_ctx = entry->parent;
546 int i;
547
548 for (i = poll_ctx->num_tx_rings; i < poll_ctx->num_rings; i++) {
549 struct eventfd_ctx *irq_ctx = poll_ctx->entries[i].irq_ctx;
550
551 if (irq_ctx) {
552 eventfd_signal(irq_ctx, 1);
553 }
554 }
555
556 return 0;
557 }
558 #endif /* SYNC_KLOOP_POLL */
559
560 int
netmap_sync_kloop(struct netmap_priv_d * priv,struct nmreq_header * hdr)561 netmap_sync_kloop(struct netmap_priv_d *priv, struct nmreq_header *hdr)
562 {
563 struct nmreq_sync_kloop_start *req =
564 (struct nmreq_sync_kloop_start *)(uintptr_t)hdr->nr_body;
565 struct nmreq_opt_sync_kloop_eventfds *eventfds_opt = NULL;
566 #ifdef SYNC_KLOOP_POLL
567 struct sync_kloop_poll_ctx *poll_ctx = NULL;
568 #endif /* SYNC_KLOOP_POLL */
569 int num_rx_rings, num_tx_rings, num_rings;
570 struct sync_kloop_ring_args *args = NULL;
571 uint32_t sleep_us = req->sleep_us;
572 struct nm_csb_atok* csb_atok_base;
573 struct nm_csb_ktoa* csb_ktoa_base;
574 struct netmap_adapter *na;
575 struct nmreq_option *opt;
576 bool na_could_sleep = false;
577 bool busy_wait = true;
578 bool direct_tx = false;
579 bool direct_rx = false;
580 int err = 0;
581 int i;
582
583 if (sleep_us > 1000000) {
584 /* We do not accept sleeping for more than a second. */
585 return EINVAL;
586 }
587
588 if (priv->np_nifp == NULL) {
589 return ENXIO;
590 }
591 mb(); /* make sure following reads are not from cache */
592
593 na = priv->np_na;
594 if (!nm_netmap_on(na)) {
595 return ENXIO;
596 }
597
598 NMG_LOCK();
599 /* Make sure the application is working in CSB mode. */
600 if (!priv->np_csb_atok_base || !priv->np_csb_ktoa_base) {
601 NMG_UNLOCK();
602 nm_prerr("sync-kloop on %s requires "
603 "NETMAP_REQ_OPT_CSB option", na->name);
604 return EINVAL;
605 }
606
607 csb_atok_base = priv->np_csb_atok_base;
608 csb_ktoa_base = priv->np_csb_ktoa_base;
609
610 /* Make sure that no kloop is currently running. */
611 if (priv->np_kloop_state & NM_SYNC_KLOOP_RUNNING) {
612 err = EBUSY;
613 }
614 priv->np_kloop_state |= NM_SYNC_KLOOP_RUNNING;
615 NMG_UNLOCK();
616 if (err) {
617 return err;
618 }
619
620 num_rx_rings = priv->np_qlast[NR_RX] - priv->np_qfirst[NR_RX];
621 num_tx_rings = priv->np_qlast[NR_TX] - priv->np_qfirst[NR_TX];
622 num_rings = num_tx_rings + num_rx_rings;
623
624 args = nm_os_malloc(num_rings * sizeof(args[0]));
625 if (!args) {
626 err = ENOMEM;
627 goto out;
628 }
629
630 /* Prepare the arguments for netmap_sync_kloop_tx_ring()
631 * and netmap_sync_kloop_rx_ring(). */
632 for (i = 0; i < num_tx_rings; i++) {
633 struct sync_kloop_ring_args *a = args + i;
634
635 a->kring = NMR(na, NR_TX)[i + priv->np_qfirst[NR_TX]];
636 a->csb_atok = csb_atok_base + i;
637 a->csb_ktoa = csb_ktoa_base + i;
638 a->busy_wait = busy_wait;
639 a->direct = direct_tx;
640 }
641 for (i = 0; i < num_rx_rings; i++) {
642 struct sync_kloop_ring_args *a = args + num_tx_rings + i;
643
644 a->kring = NMR(na, NR_RX)[i + priv->np_qfirst[NR_RX]];
645 a->csb_atok = csb_atok_base + num_tx_rings + i;
646 a->csb_ktoa = csb_ktoa_base + num_tx_rings + i;
647 a->busy_wait = busy_wait;
648 a->direct = direct_rx;
649 }
650
651 /* Validate notification options. */
652 opt = nmreq_getoption(hdr, NETMAP_REQ_OPT_SYNC_KLOOP_MODE);
653 if (opt != NULL) {
654 struct nmreq_opt_sync_kloop_mode *mode_opt =
655 (struct nmreq_opt_sync_kloop_mode *)opt;
656
657 direct_tx = !!(mode_opt->mode & NM_OPT_SYNC_KLOOP_DIRECT_TX);
658 direct_rx = !!(mode_opt->mode & NM_OPT_SYNC_KLOOP_DIRECT_RX);
659 if (mode_opt->mode & ~(NM_OPT_SYNC_KLOOP_DIRECT_TX |
660 NM_OPT_SYNC_KLOOP_DIRECT_RX)) {
661 opt->nro_status = err = EINVAL;
662 goto out;
663 }
664 opt->nro_status = 0;
665 }
666 opt = nmreq_getoption(hdr, NETMAP_REQ_OPT_SYNC_KLOOP_EVENTFDS);
667 if (opt != NULL) {
668 if (opt->nro_size != sizeof(*eventfds_opt) +
669 sizeof(eventfds_opt->eventfds[0]) * num_rings) {
670 /* Option size not consistent with the number of
671 * entries. */
672 opt->nro_status = err = EINVAL;
673 goto out;
674 }
675 #ifdef SYNC_KLOOP_POLL
676 eventfds_opt = (struct nmreq_opt_sync_kloop_eventfds *)opt;
677 opt->nro_status = 0;
678
679 /* Check if some ioeventfd entry is not defined, and force sleep
680 * synchronization in that case. */
681 busy_wait = false;
682 for (i = 0; i < num_rings; i++) {
683 if (eventfds_opt->eventfds[i].ioeventfd < 0) {
684 busy_wait = true;
685 break;
686 }
687 }
688
689 if (busy_wait && (direct_tx || direct_rx)) {
690 /* For direct processing we need all the
691 * ioeventfds to be valid. */
692 opt->nro_status = err = EINVAL;
693 goto out;
694 }
695
696 /* We need 2 poll entries for TX and RX notifications coming
697 * from the netmap adapter, plus one entries per ring for the
698 * notifications coming from the application. */
699 poll_ctx = nm_os_malloc(sizeof(*poll_ctx) +
700 (num_rings + 2) * sizeof(poll_ctx->entries[0]));
701 init_poll_funcptr(&poll_ctx->wait_table,
702 sync_kloop_poll_table_queue_proc);
703 poll_ctx->num_entries = 2 + num_rings;
704 poll_ctx->num_tx_rings = num_tx_rings;
705 poll_ctx->num_rings = num_rings;
706 poll_ctx->next_entry = 0;
707 poll_ctx->next_wake_fun = NULL;
708
709 if (direct_tx && (na->na_flags & NAF_BDG_MAYSLEEP)) {
710 /* In direct mode, VALE txsync is called from
711 * wake-up context, where it is not possible
712 * to sleep.
713 */
714 na->na_flags &= ~NAF_BDG_MAYSLEEP;
715 na_could_sleep = true;
716 }
717
718 for (i = 0; i < num_rings + 2; i++) {
719 poll_ctx->entries[i].args = args + i;
720 poll_ctx->entries[i].parent = poll_ctx;
721 }
722
723 /* Poll for notifications coming from the applications through
724 * eventfds. */
725 for (i = 0; i < num_rings; i++, poll_ctx->next_entry++) {
726 struct eventfd_ctx *irq = NULL;
727 struct file *filp = NULL;
728 unsigned long mask;
729 bool tx_ring = (i < num_tx_rings);
730
731 if (eventfds_opt->eventfds[i].irqfd >= 0) {
732 filp = eventfd_fget(
733 eventfds_opt->eventfds[i].irqfd);
734 if (IS_ERR(filp)) {
735 err = PTR_ERR(filp);
736 goto out;
737 }
738 irq = eventfd_ctx_fileget(filp);
739 if (IS_ERR(irq)) {
740 err = PTR_ERR(irq);
741 goto out;
742 }
743 }
744 poll_ctx->entries[i].irq_filp = filp;
745 poll_ctx->entries[i].irq_ctx = irq;
746 poll_ctx->entries[i].args->busy_wait = busy_wait;
747 /* Don't let netmap_sync_kloop_*x_ring() use
748 * IRQs in direct mode. */
749 poll_ctx->entries[i].args->irq_ctx =
750 ((tx_ring && direct_tx) ||
751 (!tx_ring && direct_rx)) ? NULL :
752 poll_ctx->entries[i].irq_ctx;
753 poll_ctx->entries[i].args->direct =
754 (tx_ring ? direct_tx : direct_rx);
755
756 if (!busy_wait) {
757 filp = eventfd_fget(
758 eventfds_opt->eventfds[i].ioeventfd);
759 if (IS_ERR(filp)) {
760 err = PTR_ERR(filp);
761 goto out;
762 }
763 if (tx_ring && direct_tx) {
764 /* Override the wake up function
765 * so that it can directly call
766 * netmap_sync_kloop_tx_ring().
767 */
768 poll_ctx->next_wake_fun =
769 sync_kloop_tx_kick_wake_fun;
770 } else if (!tx_ring && direct_rx) {
771 /* Same for direct RX. */
772 poll_ctx->next_wake_fun =
773 sync_kloop_rx_kick_wake_fun;
774 } else {
775 poll_ctx->next_wake_fun = NULL;
776 }
777 mask = filp->f_op->poll(filp,
778 &poll_ctx->wait_table);
779 if (mask & POLLERR) {
780 err = EINVAL;
781 goto out;
782 }
783 }
784 }
785
786 /* Poll for notifications coming from the netmap rings bound to
787 * this file descriptor. */
788 if (!busy_wait) {
789 NMG_LOCK();
790 /* In direct mode, override the wake up function so
791 * that it can forward the netmap_tx_irq() to the
792 * guest. */
793 poll_ctx->next_wake_fun = direct_tx ?
794 sync_kloop_tx_irq_wake_fun : NULL;
795 poll_wait(priv->np_filp, priv->np_si[NR_TX],
796 &poll_ctx->wait_table);
797 poll_ctx->next_entry++;
798
799 poll_ctx->next_wake_fun = direct_rx ?
800 sync_kloop_rx_irq_wake_fun : NULL;
801 poll_wait(priv->np_filp, priv->np_si[NR_RX],
802 &poll_ctx->wait_table);
803 poll_ctx->next_entry++;
804 NMG_UNLOCK();
805 }
806 #else /* SYNC_KLOOP_POLL */
807 opt->nro_status = EOPNOTSUPP;
808 goto out;
809 #endif /* SYNC_KLOOP_POLL */
810 }
811
812 nm_prinf("kloop busy_wait %u, direct_tx %u, direct_rx %u, "
813 "na_could_sleep %u", busy_wait, direct_tx, direct_rx,
814 na_could_sleep);
815
816 /* Main loop. */
817 for (;;) {
818 if (unlikely(NM_ACCESS_ONCE(priv->np_kloop_state) & NM_SYNC_KLOOP_STOPPING)) {
819 break;
820 }
821
822 #ifdef SYNC_KLOOP_POLL
823 if (!busy_wait) {
824 /* It is important to set the task state as
825 * interruptible before processing any TX/RX ring,
826 * so that if a notification on ring Y comes after
827 * we have processed ring Y, but before we call
828 * schedule(), we don't miss it. This is true because
829 * the wake up function will change the the task state,
830 * and therefore the schedule_timeout() call below
831 * will observe the change).
832 */
833 set_current_state(TASK_INTERRUPTIBLE);
834 }
835 #endif /* SYNC_KLOOP_POLL */
836
837 /* Process all the TX rings bound to this file descriptor. */
838 for (i = 0; !direct_tx && i < num_tx_rings; i++) {
839 struct sync_kloop_ring_args *a = args + i;
840 netmap_sync_kloop_tx_ring(a);
841 }
842
843 /* Process all the RX rings bound to this file descriptor. */
844 for (i = 0; !direct_rx && i < num_rx_rings; i++) {
845 struct sync_kloop_ring_args *a = args + num_tx_rings + i;
846 netmap_sync_kloop_rx_ring(a);
847 }
848
849 if (busy_wait) {
850 /* Default synchronization method: sleep for a while. */
851 usleep_range(sleep_us, sleep_us);
852 }
853 #ifdef SYNC_KLOOP_POLL
854 else {
855 /* Yield to the scheduler waiting for a notification
856 * to come either from netmap or the application. */
857 schedule_timeout(msecs_to_jiffies(3000));
858 }
859 #endif /* SYNC_KLOOP_POLL */
860 }
861 out:
862 #ifdef SYNC_KLOOP_POLL
863 if (poll_ctx) {
864 /* Stop polling from netmap and the eventfds, and deallocate
865 * the poll context. */
866 if (!busy_wait) {
867 __set_current_state(TASK_RUNNING);
868 }
869 for (i = 0; i < poll_ctx->next_entry; i++) {
870 struct sync_kloop_poll_entry *entry =
871 poll_ctx->entries + i;
872
873 if (entry->wqh)
874 remove_wait_queue(entry->wqh, &entry->wait);
875 /* We did not get a reference to the eventfds, but
876 * don't do that on netmap file descriptors (since
877 * a reference was not taken. */
878 if (entry->filp && entry->filp != priv->np_filp)
879 fput(entry->filp);
880 if (entry->irq_ctx)
881 eventfd_ctx_put(entry->irq_ctx);
882 if (entry->irq_filp)
883 fput(entry->irq_filp);
884 }
885 nm_os_free(poll_ctx);
886 poll_ctx = NULL;
887 }
888 #endif /* SYNC_KLOOP_POLL */
889
890 if (args) {
891 nm_os_free(args);
892 args = NULL;
893 }
894
895 /* Reset the kloop state. */
896 NMG_LOCK();
897 priv->np_kloop_state = 0;
898 if (na_could_sleep) {
899 na->na_flags |= NAF_BDG_MAYSLEEP;
900 }
901 NMG_UNLOCK();
902
903 return err;
904 }
905
906 int
netmap_sync_kloop_stop(struct netmap_priv_d * priv)907 netmap_sync_kloop_stop(struct netmap_priv_d *priv)
908 {
909 struct netmap_adapter *na;
910 bool running = true;
911 int err = 0;
912
913 if (priv->np_nifp == NULL) {
914 return ENXIO;
915 }
916 mb(); /* make sure following reads are not from cache */
917
918 na = priv->np_na;
919 if (!nm_netmap_on(na)) {
920 return ENXIO;
921 }
922
923 /* Set the kloop stopping flag. */
924 NMG_LOCK();
925 priv->np_kloop_state |= NM_SYNC_KLOOP_STOPPING;
926 NMG_UNLOCK();
927
928 /* Send a notification to the kloop, in case it is blocked in
929 * schedule_timeout(). We can use either RX or TX, because the
930 * kloop is waiting on both. */
931 nm_os_selwakeup(priv->np_si[NR_RX]);
932
933 /* Wait for the kloop to actually terminate. */
934 while (running) {
935 usleep_range(1000, 1500);
936 NMG_LOCK();
937 running = (NM_ACCESS_ONCE(priv->np_kloop_state)
938 & NM_SYNC_KLOOP_RUNNING);
939 NMG_UNLOCK();
940 }
941
942 return err;
943 }
944
945 #ifdef WITH_PTNETMAP
946 /*
947 * Guest ptnetmap txsync()/rxsync() routines, used in ptnet device drivers.
948 * These routines are reused across the different operating systems supported
949 * by netmap.
950 */
951
952 /*
953 * Reconcile host and guest views of the transmit ring.
954 *
955 * Guest user wants to transmit packets up to the one before ring->head,
956 * and guest kernel knows tx_ring->hwcur is the first packet unsent
957 * by the host kernel.
958 *
959 * We push out as many packets as possible, and possibly
960 * reclaim buffers from previously completed transmission.
961 *
962 * Notifications from the host are enabled only if the user guest would
963 * block (no space in the ring).
964 */
965 bool
netmap_pt_guest_txsync(struct nm_csb_atok * atok,struct nm_csb_ktoa * ktoa,struct netmap_kring * kring,int flags)966 netmap_pt_guest_txsync(struct nm_csb_atok *atok, struct nm_csb_ktoa *ktoa,
967 struct netmap_kring *kring, int flags)
968 {
969 bool notify = false;
970
971 /* Disable notifications */
972 atok->appl_need_kick = 0;
973
974 /*
975 * First part: tell the host to process the new packets,
976 * updating the CSB.
977 */
978 kring->nr_hwcur = ktoa->hwcur;
979 nm_sync_kloop_appl_write(atok, kring->rcur, kring->rhead);
980
981 /* Ask for a kick from a guest to the host if needed. */
982 if (((kring->rhead != kring->nr_hwcur || nm_kr_wouldblock(kring))
983 && NM_ACCESS_ONCE(ktoa->kern_need_kick)) ||
984 (flags & NAF_FORCE_RECLAIM)) {
985 atok->sync_flags = flags;
986 notify = true;
987 }
988
989 /*
990 * Second part: reclaim buffers for completed transmissions.
991 */
992 if (nm_kr_wouldblock(kring) || (flags & NAF_FORCE_RECLAIM)) {
993 nm_sync_kloop_appl_read(ktoa, &kring->nr_hwtail,
994 &kring->nr_hwcur);
995 }
996
997 /*
998 * No more room in the ring for new transmissions. The user thread will
999 * go to sleep and we need to be notified by the host when more free
1000 * space is available.
1001 */
1002 if (nm_kr_wouldblock(kring) && !(kring->nr_kflags & NKR_NOINTR)) {
1003 /* Reenable notifications. */
1004 atok->appl_need_kick = 1;
1005 /* Double check, with store-load memory barrier. */
1006 nm_stld_barrier();
1007 nm_sync_kloop_appl_read(ktoa, &kring->nr_hwtail,
1008 &kring->nr_hwcur);
1009 /* If there is new free space, disable notifications */
1010 if (unlikely(!nm_kr_wouldblock(kring))) {
1011 atok->appl_need_kick = 0;
1012 }
1013 }
1014
1015 nm_prdis(1, "%s CSB(head:%u cur:%u hwtail:%u) KRING(head:%u cur:%u tail:%u)",
1016 kring->name, atok->head, atok->cur, ktoa->hwtail,
1017 kring->rhead, kring->rcur, kring->nr_hwtail);
1018
1019 return notify;
1020 }
1021
1022 /*
1023 * Reconcile host and guest view of the receive ring.
1024 *
1025 * Update hwcur/hwtail from host (reading from CSB).
1026 *
1027 * If guest user has released buffers up to the one before ring->head, we
1028 * also give them to the host.
1029 *
1030 * Notifications from the host are enabled only if the user guest would
1031 * block (no more completed slots in the ring).
1032 */
1033 bool
netmap_pt_guest_rxsync(struct nm_csb_atok * atok,struct nm_csb_ktoa * ktoa,struct netmap_kring * kring,int flags)1034 netmap_pt_guest_rxsync(struct nm_csb_atok *atok, struct nm_csb_ktoa *ktoa,
1035 struct netmap_kring *kring, int flags)
1036 {
1037 bool notify = false;
1038
1039 /* Disable notifications */
1040 atok->appl_need_kick = 0;
1041
1042 /*
1043 * First part: import newly received packets, by updating the kring
1044 * hwtail to the hwtail known from the host (read from the CSB).
1045 * This also updates the kring hwcur.
1046 */
1047 nm_sync_kloop_appl_read(ktoa, &kring->nr_hwtail, &kring->nr_hwcur);
1048 kring->nr_kflags &= ~NKR_PENDINTR;
1049
1050 /*
1051 * Second part: tell the host about the slots that guest user has
1052 * released, by updating cur and head in the CSB.
1053 */
1054 if (kring->rhead != kring->nr_hwcur) {
1055 nm_sync_kloop_appl_write(atok, kring->rcur, kring->rhead);
1056 }
1057
1058 /*
1059 * No more completed RX slots. The user thread will go to sleep and
1060 * we need to be notified by the host when more RX slots have been
1061 * completed.
1062 */
1063 if (nm_kr_wouldblock(kring) && !(kring->nr_kflags & NKR_NOINTR)) {
1064 /* Reenable notifications. */
1065 atok->appl_need_kick = 1;
1066 /* Double check, with store-load memory barrier. */
1067 nm_stld_barrier();
1068 nm_sync_kloop_appl_read(ktoa, &kring->nr_hwtail,
1069 &kring->nr_hwcur);
1070 /* If there are new slots, disable notifications. */
1071 if (!nm_kr_wouldblock(kring)) {
1072 atok->appl_need_kick = 0;
1073 }
1074 }
1075
1076 /* Ask for a kick from the guest to the host if needed. */
1077 if ((kring->rhead != kring->nr_hwcur || nm_kr_wouldblock(kring))
1078 && NM_ACCESS_ONCE(ktoa->kern_need_kick)) {
1079 atok->sync_flags = flags;
1080 notify = true;
1081 }
1082
1083 nm_prdis(1, "%s CSB(head:%u cur:%u hwtail:%u) KRING(head:%u cur:%u tail:%u)",
1084 kring->name, atok->head, atok->cur, ktoa->hwtail,
1085 kring->rhead, kring->rcur, kring->nr_hwtail);
1086
1087 return notify;
1088 }
1089
1090 /*
1091 * Callbacks for ptnet drivers: nm_krings_create, nm_krings_delete, nm_dtor.
1092 */
1093 int
ptnet_nm_krings_create(struct netmap_adapter * na)1094 ptnet_nm_krings_create(struct netmap_adapter *na)
1095 {
1096 struct netmap_pt_guest_adapter *ptna =
1097 (struct netmap_pt_guest_adapter *)na; /* Upcast. */
1098 struct netmap_adapter *na_nm = &ptna->hwup.up;
1099 struct netmap_adapter *na_dr = &ptna->dr.up;
1100 int ret;
1101
1102 if (ptna->backend_users) {
1103 return 0;
1104 }
1105
1106 /* Create krings on the public netmap adapter. */
1107 ret = netmap_hw_krings_create(na_nm);
1108 if (ret) {
1109 return ret;
1110 }
1111
1112 /* Copy krings into the netmap adapter private to the driver. */
1113 na_dr->tx_rings = na_nm->tx_rings;
1114 na_dr->rx_rings = na_nm->rx_rings;
1115
1116 return 0;
1117 }
1118
1119 void
ptnet_nm_krings_delete(struct netmap_adapter * na)1120 ptnet_nm_krings_delete(struct netmap_adapter *na)
1121 {
1122 struct netmap_pt_guest_adapter *ptna =
1123 (struct netmap_pt_guest_adapter *)na; /* Upcast. */
1124 struct netmap_adapter *na_nm = &ptna->hwup.up;
1125 struct netmap_adapter *na_dr = &ptna->dr.up;
1126
1127 if (ptna->backend_users) {
1128 return;
1129 }
1130
1131 na_dr->tx_rings = NULL;
1132 na_dr->rx_rings = NULL;
1133
1134 netmap_hw_krings_delete(na_nm);
1135 }
1136
1137 void
ptnet_nm_dtor(struct netmap_adapter * na)1138 ptnet_nm_dtor(struct netmap_adapter *na)
1139 {
1140 struct netmap_pt_guest_adapter *ptna =
1141 (struct netmap_pt_guest_adapter *)na;
1142
1143 netmap_mem_put(ptna->dr.up.nm_mem);
1144 memset(&ptna->dr, 0, sizeof(ptna->dr));
1145 netmap_mem_pt_guest_ifp_del(na->nm_mem, na->ifp);
1146 }
1147
1148 int
netmap_pt_guest_attach(struct netmap_adapter * arg,unsigned int nifp_offset,unsigned int memid)1149 netmap_pt_guest_attach(struct netmap_adapter *arg,
1150 unsigned int nifp_offset, unsigned int memid)
1151 {
1152 struct netmap_pt_guest_adapter *ptna;
1153 struct ifnet *ifp = arg ? arg->ifp : NULL;
1154 int error;
1155
1156 /* get allocator */
1157 arg->nm_mem = netmap_mem_pt_guest_new(ifp, nifp_offset, memid);
1158 if (arg->nm_mem == NULL)
1159 return ENOMEM;
1160 arg->na_flags |= NAF_MEM_OWNER;
1161 error = netmap_attach_ext(arg, sizeof(struct netmap_pt_guest_adapter), 1);
1162 if (error)
1163 return error;
1164
1165 /* get the netmap_pt_guest_adapter */
1166 ptna = (struct netmap_pt_guest_adapter *) NA(ifp);
1167
1168 /* Initialize a separate pass-through netmap adapter that is going to
1169 * be used by the ptnet driver only, and so never exposed to netmap
1170 * applications. We only need a subset of the available fields. */
1171 memset(&ptna->dr, 0, sizeof(ptna->dr));
1172 ptna->dr.up.ifp = ifp;
1173 ptna->dr.up.nm_mem = netmap_mem_get(ptna->hwup.up.nm_mem);
1174 ptna->dr.up.nm_config = ptna->hwup.up.nm_config;
1175
1176 ptna->backend_users = 0;
1177
1178 return 0;
1179 }
1180
1181 #endif /* WITH_PTNETMAP */
1182