1 /*- 2 * Copyright (c) 2021 The FreeBSD Foundation 3 * Copyright (c) 2022 Bjoern A. Zeeb 4 * 5 * This software was developed by Björn Zeeb under sponsorship from 6 * the FreeBSD Foundation. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 #include <sys/param.h> 32 #include <sys/types.h> 33 #include <sys/kernel.h> 34 #include <sys/sysctl.h> 35 36 #include <linux/bitops.h> 37 #include <linux/list.h> 38 #include <linux/netdevice.h> 39 40 MALLOC_DEFINE(M_NETDEV, "lkpindev", "Linux KPI netdevice compat"); 41 42 #define NAPI_LOCK_INIT(_ndev) \ 43 mtx_init(&(_ndev)->napi_mtx, "napi_mtx", NULL, MTX_DEF) 44 #define NAPI_LOCK_DESTROY(_ndev) mtx_destroy(&(_ndev)->napi_mtx) 45 #define NAPI_LOCK_ASSERT(_ndev) mtx_assert(&(_ndev)->napi_mtx, MA_OWNED) 46 #define NAPI_LOCK(_ndev) mtx_lock(&(_ndev)->napi_mtx) 47 #define NAPI_UNLOCK(_ndev) mtx_unlock(&(_ndev)->napi_mtx) 48 49 /* -------------------------------------------------------------------------- */ 50 51 #define LKPI_NAPI_FLAGS \ 52 "\20\1DISABLE_PENDING\2IS_SCHEDULED\3LOST_RACE_TRY_AGAIN" 53 54 /* #define NAPI_DEBUG */ 55 #ifdef NAPI_DEBUG 56 static int debug_napi; 57 SYSCTL_INT(_compat_linuxkpi, OID_AUTO, debug_napi, CTLFLAG_RWTUN, 58 &debug_napi, 0, "NAPI debug level"); 59 60 #define DNAPI_TODO 0x01 61 #define DNAPI_IMPROVE 0x02 62 #define DNAPI_TRACE 0x10 63 #define DNAPI_TRACE_TASK 0x20 64 #define DNAPI_DIRECT_DISPATCH 0x1000 65 66 #define NAPI_TRACE(_n) if (debug_napi & DNAPI_TRACE) \ 67 printf("NAPI_TRACE %s:%d %u %p (%#jx %b)\n", __func__, __LINE__, \ 68 (unsigned int)ticks, _n, (uintmax_t)(_n)->state, \ 69 (int)(_n)->state, LKPI_NAPI_FLAGS) 70 #define NAPI_TRACE2D(_n, _d) if (debug_napi & DNAPI_TRACE) \ 71 printf("NAPI_TRACE %s:%d %u %p (%#jx %b) %d\n", __func__, __LINE__, \ 72 (unsigned int)ticks, _n, (uintmax_t)(_n)->state, \ 73 (int)(_n)->state, LKPI_NAPI_FLAGS, _d) 74 #define NAPI_TRACE_TASK(_n, _p, _c) if (debug_napi & DNAPI_TRACE_TASK) \ 75 printf("NAPI_TRACE %s:%d %u %p (%#jx %b) pending %d count %d " \ 76 "rx_count %d\n", __func__, __LINE__, \ 77 (unsigned int)ticks, _n, (uintmax_t)(_n)->state, \ 78 (int)(_n)->state, LKPI_NAPI_FLAGS, _p, _c, (_n)->rx_count) 79 #define NAPI_TODO() if (debug_napi & DNAPI_TODO) \ 80 printf("NAPI_TODO %s:%d %d\n", __func__, __LINE__, ticks) 81 #define NAPI_IMPROVE() if (debug_napi & DNAPI_IMPROVE) \ 82 printf("NAPI_IMPROVE %s:%d %d\n", __func__, __LINE__, ticks) 83 84 #define NAPI_DIRECT_DISPATCH() ((debug_napi & DNAPI_DIRECT_DISPATCH) != 0) 85 #else 86 #define NAPI_TRACE(_n) do { } while(0) 87 #define NAPI_TRACE2D(_n, _d) do { } while(0) 88 #define NAPI_TRACE_TASK(_n, _p, _c) do { } while(0) 89 #define NAPI_TODO() do { } while(0) 90 #define NAPI_IMPROVE() do { } while(0) 91 92 #define NAPI_DIRECT_DISPATCH() (0) 93 #endif 94 95 /* -------------------------------------------------------------------------- */ 96 97 /* 98 * Check if a poll is running or can run and and if the latter 99 * make us as running. That way we ensure that only one poll 100 * can only ever run at the same time. Returns true if no poll 101 * was scheduled yet. 102 */ 103 bool 104 linuxkpi_napi_schedule_prep(struct napi_struct *napi) 105 { 106 unsigned long old, new; 107 108 NAPI_TRACE(napi); 109 110 /* Can can only update/return if all flags agree. */ 111 do { 112 old = READ_ONCE(napi->state); 113 114 /* If we are stopping, cannot run again. */ 115 if ((old & BIT(LKPI_NAPI_FLAG_DISABLE_PENDING)) != 0) { 116 NAPI_TRACE(napi); 117 return (false); 118 } 119 120 new = old; 121 /* We were already scheduled. Need to try again? */ 122 if ((old & BIT(LKPI_NAPI_FLAG_IS_SCHEDULED)) != 0) 123 new |= BIT(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN); 124 new |= BIT(LKPI_NAPI_FLAG_IS_SCHEDULED); 125 126 } while (atomic_cmpset_acq_long(&napi->state, old, new) == 0); 127 128 NAPI_TRACE(napi); 129 return ((old & BIT(LKPI_NAPI_FLAG_IS_SCHEDULED)) == 0); 130 } 131 132 static void 133 lkpi___napi_schedule_dd(struct napi_struct *napi) 134 { 135 unsigned long old, new; 136 int rc; 137 138 rc = 0; 139 again: 140 NAPI_TRACE2D(napi, rc); 141 if (napi->poll != NULL) 142 rc = napi->poll(napi, napi->budget); 143 napi->rx_count += rc; 144 145 /* Check if interrupts are still disabled, more work to do. */ 146 /* Bandaid for now. */ 147 if (rc >= napi->budget) 148 goto again; 149 150 /* Bandaid for now. */ 151 if (test_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &napi->state)) 152 goto again; 153 154 do { 155 new = old = READ_ONCE(napi->state); 156 clear_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &new); 157 clear_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &new); 158 } while (atomic_cmpset_acq_long(&napi->state, old, new) == 0); 159 160 NAPI_TRACE2D(napi, rc); 161 } 162 163 void 164 linuxkpi___napi_schedule(struct napi_struct *napi) 165 { 166 int rc; 167 168 NAPI_TRACE(napi); 169 if (test_bit(LKPI_NAPI_FLAG_SHUTDOWN, &napi->state)) { 170 clear_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &napi->state); 171 clear_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &napi->state); 172 NAPI_TRACE(napi); 173 return; 174 } 175 176 if (NAPI_DIRECT_DISPATCH()) { 177 lkpi___napi_schedule_dd(napi); 178 } else { 179 rc = taskqueue_enqueue(napi->dev->napi_tq, &napi->napi_task); 180 NAPI_TRACE2D(napi, rc); 181 if (rc != 0) { 182 /* Should we assert EPIPE? */ 183 return; 184 } 185 } 186 } 187 188 bool 189 linuxkpi_napi_schedule(struct napi_struct *napi) 190 { 191 192 NAPI_TRACE(napi); 193 194 /* 195 * iwlwifi calls this sequence instead of napi_schedule() 196 * to be able to test the prep result. 197 */ 198 if (napi_schedule_prep(napi)) { 199 __napi_schedule(napi); 200 return (true); 201 } 202 203 return (false); 204 } 205 206 void 207 linuxkpi_napi_reschedule(struct napi_struct *napi) 208 { 209 210 NAPI_TRACE(napi); 211 212 /* Not sure what is different to napi_schedule yet. */ 213 if (napi_schedule_prep(napi)) 214 __napi_schedule(napi); 215 } 216 217 bool 218 linuxkpi_napi_complete_done(struct napi_struct *napi, int ret) 219 { 220 unsigned long old, new; 221 222 NAPI_TRACE(napi); 223 if (NAPI_DIRECT_DISPATCH()) 224 return (true); 225 226 do { 227 new = old = READ_ONCE(napi->state); 228 229 /* 230 * If we lost a race before, we need to re-schedule. 231 * Leave IS_SCHEDULED set essentially doing "_prep". 232 */ 233 if (!test_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &old)) 234 clear_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &new); 235 clear_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &new); 236 } while (atomic_cmpset_acq_long(&napi->state, old, new) == 0); 237 238 NAPI_TRACE(napi); 239 240 /* Someone tried to schedule while poll was running. Re-sched. */ 241 if (test_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &old)) { 242 __napi_schedule(napi); 243 return (false); 244 } 245 246 return (true); 247 } 248 249 bool 250 linuxkpi_napi_complete(struct napi_struct *napi) 251 { 252 253 NAPI_TRACE(napi); 254 return (napi_complete_done(napi, 0)); 255 } 256 257 void 258 linuxkpi_napi_disable(struct napi_struct *napi) 259 { 260 NAPI_TRACE(napi); 261 set_bit(LKPI_NAPI_FLAG_DISABLE_PENDING, &napi->state); 262 while (test_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &napi->state)) 263 pause_sbt("napidslp", SBT_1MS, 0, C_HARDCLOCK); 264 clear_bit(LKPI_NAPI_FLAG_DISABLE_PENDING, &napi->state); 265 } 266 267 void 268 linuxkpi_napi_enable(struct napi_struct *napi) 269 { 270 271 NAPI_TRACE(napi); 272 KASSERT(!test_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &napi->state), 273 ("%s: enabling napi %p already scheduled\n", __func__, napi)); 274 mb(); 275 /* Let us be scheduled. */ 276 clear_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &napi->state); 277 } 278 279 void 280 linuxkpi_napi_synchronize(struct napi_struct *napi) 281 { 282 NAPI_TRACE(napi); 283 #if defined(SMP) 284 /* Check & sleep while a napi is scheduled. */ 285 while (test_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &napi->state)) 286 pause_sbt("napisslp", SBT_1MS, 0, C_HARDCLOCK); 287 #else 288 mb(); 289 #endif 290 } 291 292 /* -------------------------------------------------------------------------- */ 293 294 static void 295 lkpi_napi_task(void *ctx, int pending) 296 { 297 struct napi_struct *napi; 298 int count; 299 300 KASSERT(ctx != NULL, ("%s: napi %p, pending %d\n", 301 __func__, ctx, pending)); 302 napi = ctx; 303 KASSERT(napi->poll != NULL, ("%s: napi %p poll is NULL\n", 304 __func__, napi)); 305 306 NAPI_TRACE_TASK(napi, pending, napi->budget); 307 count = napi->poll(napi, napi->budget); 308 napi->rx_count += count; 309 NAPI_TRACE_TASK(napi, pending, count); 310 311 /* 312 * We must not check against count < pending here. There are situations 313 * when a driver may "poll" and we may not have any work to do and that 314 * would make us re-schedule ourseless for ever. 315 */ 316 if (count >= napi->budget) { 317 /* 318 * Have to re-schedule ourselves. napi_complete() was not run 319 * in this case which means we are still SCHEDULED. 320 * In order to queue another task we have to directly call 321 * __napi_schedule() without _prep() in the way. 322 */ 323 __napi_schedule(napi); 324 } 325 } 326 327 /* -------------------------------------------------------------------------- */ 328 329 void 330 linuxkpi_netif_napi_add(struct net_device *ndev, struct napi_struct *napi, 331 int(*napi_poll)(struct napi_struct *, int)) 332 { 333 334 napi->dev = ndev; 335 napi->poll = napi_poll; 336 napi->budget = NAPI_POLL_WEIGHT; 337 338 INIT_LIST_HEAD(&napi->rx_list); 339 napi->rx_count = 0; 340 341 TASK_INIT(&napi->napi_task, 0, lkpi_napi_task, napi); 342 343 NAPI_LOCK(ndev); 344 TAILQ_INSERT_TAIL(&ndev->napi_head, napi, entry); 345 NAPI_UNLOCK(ndev); 346 347 /* Anything else to do on the ndev? */ 348 clear_bit(LKPI_NAPI_FLAG_SHUTDOWN, &napi->state); 349 } 350 351 static void 352 lkpi_netif_napi_del_locked(struct napi_struct *napi) 353 { 354 struct net_device *ndev; 355 356 ndev = napi->dev; 357 NAPI_LOCK_ASSERT(ndev); 358 359 set_bit(LKPI_NAPI_FLAG_SHUTDOWN, &napi->state); 360 TAILQ_REMOVE(&ndev->napi_head, napi, entry); 361 while (taskqueue_cancel(ndev->napi_tq, &napi->napi_task, NULL) != 0) 362 taskqueue_drain(ndev->napi_tq, &napi->napi_task); 363 } 364 365 void 366 linuxkpi_netif_napi_del(struct napi_struct *napi) 367 { 368 struct net_device *ndev; 369 370 ndev = napi->dev; 371 NAPI_LOCK(ndev); 372 lkpi_netif_napi_del_locked(napi); 373 NAPI_UNLOCK(ndev); 374 } 375 376 /* -------------------------------------------------------------------------- */ 377 378 void 379 linuxkpi_init_dummy_netdev(struct net_device *ndev) 380 { 381 382 memset(ndev, 0, sizeof(*ndev)); 383 384 ndev->reg_state = NETREG_DUMMY; 385 NAPI_LOCK_INIT(ndev); 386 TAILQ_INIT(&ndev->napi_head); 387 /* Anything else? */ 388 389 ndev->napi_tq = taskqueue_create("tq_ndev_napi", M_WAITOK, 390 taskqueue_thread_enqueue, &ndev->napi_tq); 391 /* One thread for now. */ 392 (void) taskqueue_start_threads(&ndev->napi_tq, 1, PWAIT, 393 "ndev napi taskq"); 394 } 395 396 struct net_device * 397 linuxkpi_alloc_netdev(size_t len, const char *name, uint32_t flags, 398 void(*setup_func)(struct net_device *)) 399 { 400 struct net_device *ndev; 401 402 ndev = malloc(sizeof(*ndev) + len, M_NETDEV, M_NOWAIT); 403 if (ndev == NULL) 404 return (ndev); 405 406 /* Always first as it zeros! */ 407 linuxkpi_init_dummy_netdev(ndev); 408 409 strlcpy(ndev->name, name, sizeof(*ndev->name)); 410 411 /* This needs extending as we support more. */ 412 413 setup_func(ndev); 414 415 return (ndev); 416 } 417 418 void 419 linuxkpi_free_netdev(struct net_device *ndev) 420 { 421 struct napi_struct *napi, *temp; 422 423 NAPI_LOCK(ndev); 424 TAILQ_FOREACH_SAFE(napi, &ndev->napi_head, entry, temp) { 425 lkpi_netif_napi_del_locked(napi); 426 } 427 NAPI_UNLOCK(ndev); 428 429 taskqueue_free(ndev->napi_tq); 430 ndev->napi_tq = NULL; 431 NAPI_LOCK_DESTROY(ndev); 432 433 /* This needs extending as we support more. */ 434 435 free(ndev, M_NETDEV); 436 } 437