1 /* 2 * Copyright (C) 2011-2014 Universita` di Pisa. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 /* 28 * $FreeBSD$ 29 * 30 * Functions and macros to manipulate netmap structures and packets 31 * in userspace. See netmap(4) for more information. 32 * 33 * The address of the struct netmap_if, say nifp, is computed from the 34 * value returned from ioctl(.., NIOCREG, ...) and the mmap region: 35 * ioctl(fd, NIOCREG, &req); 36 * mem = mmap(0, ... ); 37 * nifp = NETMAP_IF(mem, req.nr_nifp); 38 * (so simple, we could just do it manually) 39 * 40 * From there: 41 * struct netmap_ring *NETMAP_TXRING(nifp, index) 42 * struct netmap_ring *NETMAP_RXRING(nifp, index) 43 * we can access ring->cur, ring->head, ring->tail, etc. 44 * 45 * ring->slot[i] gives us the i-th slot (we can access 46 * directly len, flags, buf_idx) 47 * 48 * char *buf = NETMAP_BUF(ring, x) returns a pointer to 49 * the buffer numbered x 50 * 51 * All ring indexes (head, cur, tail) should always move forward. 52 * To compute the next index in a circular ring you can use 53 * i = nm_ring_next(ring, i); 54 * 55 * To ease porting apps from pcap to netmap we supply a few fuctions 56 * that can be called to open, close, read and write on netmap in a way 57 * similar to libpcap. Note that the read/write function depend on 58 * an ioctl()/select()/poll() being issued to refill rings or push 59 * packets out. 60 * 61 * In order to use these, include #define NETMAP_WITH_LIBS 62 * in the source file that invokes these functions. 63 */ 64 65 #ifndef _NET_NETMAP_USER_H_ 66 #define _NET_NETMAP_USER_H_ 67 68 #define NETMAP_DEVICE_NAME "/dev/netmap" 69 #ifdef __CYGWIN__ 70 /* 71 * we can compile userspace apps with either cygwin or msvc, 72 * and we use _WIN32 to identify windows specific code 73 */ 74 #ifndef _WIN32 75 #define _WIN32 76 #endif /* _WIN32 */ 77 78 #endif /* __CYGWIN__ */ 79 80 #ifdef _WIN32 81 #undef NETMAP_DEVICE_NAME 82 #define NETMAP_DEVICE_NAME "/proc/sys/DosDevices/Global/netmap" 83 #include <windows.h> 84 #include <WinDef.h> 85 #include <sys/cygwin.h> 86 //#include <netioapi.h> 87 //#include <winsock.h> 88 //#define IFNAMSIZ 256 89 #endif 90 91 #include <stdint.h> 92 #include <sys/socket.h> /* apple needs sockaddr */ 93 #include <net/if.h> /* IFNAMSIZ */ 94 95 #ifndef likely 96 #define likely(x) __builtin_expect(!!(x), 1) 97 #define unlikely(x) __builtin_expect(!!(x), 0) 98 #endif /* likely and unlikely */ 99 100 #include "netmap.h" 101 102 /* helper macro */ 103 #define _NETMAP_OFFSET(type, ptr, offset) \ 104 ((type)(void *)((char *)(ptr) + (offset))) 105 106 #define NETMAP_IF(_base, _ofs) _NETMAP_OFFSET(struct netmap_if *, _base, _ofs) 107 108 #define NETMAP_TXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ 109 nifp, (nifp)->ring_ofs[index] ) 110 111 #define NETMAP_RXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ 112 nifp, (nifp)->ring_ofs[index + (nifp)->ni_tx_rings + 1] ) 113 114 #define NETMAP_BUF(ring, index) \ 115 ((char *)(ring) + (ring)->buf_ofs + ((index)*(ring)->nr_buf_size)) 116 117 #define NETMAP_BUF_IDX(ring, buf) \ 118 ( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \ 119 (ring)->nr_buf_size ) 120 121 122 static inline uint32_t 123 nm_ring_next(struct netmap_ring *r, uint32_t i) 124 { 125 return ( unlikely(i + 1 == r->num_slots) ? 0 : i + 1); 126 } 127 128 129 /* 130 * Return 1 if we have pending transmissions in the tx ring. 131 * When everything is complete ring->head = ring->tail + 1 (modulo ring size) 132 */ 133 static inline int 134 nm_tx_pending(struct netmap_ring *r) 135 { 136 return nm_ring_next(r, r->tail) != r->head; 137 } 138 139 140 static inline uint32_t 141 nm_ring_space(struct netmap_ring *ring) 142 { 143 int ret = ring->tail - ring->cur; 144 if (ret < 0) 145 ret += ring->num_slots; 146 return ret; 147 } 148 149 150 #ifdef NETMAP_WITH_LIBS 151 /* 152 * Support for simple I/O libraries. 153 * Include other system headers required for compiling this. 154 */ 155 156 #ifndef HAVE_NETMAP_WITH_LIBS 157 #define HAVE_NETMAP_WITH_LIBS 158 159 #include <stdio.h> 160 #include <sys/time.h> 161 #include <sys/mman.h> 162 #include <string.h> /* memset */ 163 #include <sys/ioctl.h> 164 #include <sys/errno.h> /* EINVAL */ 165 #include <fcntl.h> /* O_RDWR */ 166 #include <unistd.h> /* close() */ 167 #include <signal.h> 168 #include <stdlib.h> 169 170 #ifndef ND /* debug macros */ 171 /* debug support */ 172 #define ND(_fmt, ...) do {} while(0) 173 #define D(_fmt, ...) \ 174 do { \ 175 struct timeval _t0; \ 176 gettimeofday(&_t0, NULL); \ 177 fprintf(stderr, "%03d.%06d %s [%d] " _fmt "\n", \ 178 (int)(_t0.tv_sec % 1000), (int)_t0.tv_usec, \ 179 __FUNCTION__, __LINE__, ##__VA_ARGS__); \ 180 } while (0) 181 182 /* Rate limited version of "D", lps indicates how many per second */ 183 #define RD(lps, format, ...) \ 184 do { \ 185 static int __t0, __cnt; \ 186 struct timeval __xxts; \ 187 gettimeofday(&__xxts, NULL); \ 188 if (__t0 != __xxts.tv_sec) { \ 189 __t0 = __xxts.tv_sec; \ 190 __cnt = 0; \ 191 } \ 192 if (__cnt++ < lps) { \ 193 D(format, ##__VA_ARGS__); \ 194 } \ 195 } while (0) 196 #endif 197 198 struct nm_pkthdr { /* same as pcap_pkthdr */ 199 struct timeval ts; 200 uint32_t caplen; 201 uint32_t len; 202 }; 203 204 struct nm_stat { /* same as pcap_stat */ 205 u_int ps_recv; 206 u_int ps_drop; 207 u_int ps_ifdrop; 208 #ifdef WIN32 209 u_int bs_capt; 210 #endif /* WIN32 */ 211 }; 212 213 #define NM_ERRBUF_SIZE 512 214 215 struct nm_desc { 216 struct nm_desc *self; /* point to self if netmap. */ 217 int fd; 218 void *mem; 219 uint32_t memsize; 220 int done_mmap; /* set if mem is the result of mmap */ 221 struct netmap_if * const nifp; 222 uint16_t first_tx_ring, last_tx_ring, cur_tx_ring; 223 uint16_t first_rx_ring, last_rx_ring, cur_rx_ring; 224 struct nmreq req; /* also contains the nr_name = ifname */ 225 struct nm_pkthdr hdr; 226 227 /* 228 * The memory contains netmap_if, rings and then buffers. 229 * Given a pointer (e.g. to nm_inject) we can compare with 230 * mem/buf_start/buf_end to tell if it is a buffer or 231 * some other descriptor in our region. 232 * We also store a pointer to some ring as it helps in the 233 * translation from buffer indexes to addresses. 234 */ 235 struct netmap_ring * const some_ring; 236 void * const buf_start; 237 void * const buf_end; 238 /* parameters from pcap_open_live */ 239 int snaplen; 240 int promisc; 241 int to_ms; 242 char *errbuf; 243 244 /* save flags so we can restore them on close */ 245 uint32_t if_flags; 246 uint32_t if_reqcap; 247 uint32_t if_curcap; 248 249 struct nm_stat st; 250 char msg[NM_ERRBUF_SIZE]; 251 }; 252 253 /* 254 * when the descriptor is open correctly, d->self == d 255 * Eventually we should also use some magic number. 256 */ 257 #define P2NMD(p) ((struct nm_desc *)(p)) 258 #define IS_NETMAP_DESC(d) ((d) && P2NMD(d)->self == P2NMD(d)) 259 #define NETMAP_FD(d) (P2NMD(d)->fd) 260 261 262 /* 263 * this is a slightly optimized copy routine which rounds 264 * to multiple of 64 bytes and is often faster than dealing 265 * with other odd sizes. We assume there is enough room 266 * in the source and destination buffers. 267 * 268 * XXX only for multiples of 64 bytes, non overlapped. 269 */ 270 static inline void 271 nm_pkt_copy(const void *_src, void *_dst, int l) 272 { 273 const uint64_t *src = (const uint64_t *)_src; 274 uint64_t *dst = (uint64_t *)_dst; 275 276 if (unlikely(l >= 1024)) { 277 memcpy(dst, src, l); 278 return; 279 } 280 for (; likely(l > 0); l-=64) { 281 *dst++ = *src++; 282 *dst++ = *src++; 283 *dst++ = *src++; 284 *dst++ = *src++; 285 *dst++ = *src++; 286 *dst++ = *src++; 287 *dst++ = *src++; 288 *dst++ = *src++; 289 } 290 } 291 292 293 /* 294 * The callback, invoked on each received packet. Same as libpcap 295 */ 296 typedef void (*nm_cb_t)(u_char *, const struct nm_pkthdr *, const u_char *d); 297 298 /* 299 *--- the pcap-like API --- 300 * 301 * nm_open() opens a file descriptor, binds to a port and maps memory. 302 * 303 * ifname (netmap:foo or vale:foo) is the port name 304 * a suffix can indicate the follwing: 305 * ^ bind the host (sw) ring pair 306 * * bind host and NIC ring pairs (transparent) 307 * -NN bind individual NIC ring pair 308 * {NN bind master side of pipe NN 309 * }NN bind slave side of pipe NN 310 * a suffix starting with / and the following flags, 311 * in any order: 312 * x exclusive access 313 * z zero copy monitor 314 * t monitor tx side 315 * r monitor rx side 316 * R bind only RX ring(s) 317 * T bind only TX ring(s) 318 * 319 * req provides the initial values of nmreq before parsing ifname. 320 * Remember that the ifname parsing will override the ring 321 * number in nm_ringid, and part of nm_flags; 322 * flags special functions, normally 0 323 * indicates which fields of *arg are significant 324 * arg special functions, normally NULL 325 * if passed a netmap_desc with mem != NULL, 326 * use that memory instead of mmap. 327 */ 328 329 static struct nm_desc *nm_open(const char *ifname, const struct nmreq *req, 330 uint64_t flags, const struct nm_desc *arg); 331 332 /* 333 * nm_open can import some fields from the parent descriptor. 334 * These flags control which ones. 335 * Also in flags you can specify NETMAP_NO_TX_POLL and NETMAP_DO_RX_POLL, 336 * which set the initial value for these flags. 337 * Note that the 16 low bits of the flags are reserved for data 338 * that may go into the nmreq. 339 */ 340 enum { 341 NM_OPEN_NO_MMAP = 0x040000, /* reuse mmap from parent */ 342 NM_OPEN_IFNAME = 0x080000, /* nr_name, nr_ringid, nr_flags */ 343 NM_OPEN_ARG1 = 0x100000, 344 NM_OPEN_ARG2 = 0x200000, 345 NM_OPEN_ARG3 = 0x400000, 346 NM_OPEN_RING_CFG = 0x800000, /* tx|rx rings|slots */ 347 }; 348 349 350 /* 351 * nm_close() closes and restores the port to its previous state 352 */ 353 354 static int nm_close(struct nm_desc *); 355 356 /* 357 * nm_mmap() do mmap or inherit from parent if the nr_arg2 358 * (memory block) matches. 359 */ 360 361 static int nm_mmap(struct nm_desc *, const struct nm_desc *); 362 363 /* 364 * nm_inject() is the same as pcap_inject() 365 * nm_dispatch() is the same as pcap_dispatch() 366 * nm_nextpkt() is the same as pcap_next() 367 */ 368 369 static int nm_inject(struct nm_desc *, const void *, size_t); 370 static int nm_dispatch(struct nm_desc *, int, nm_cb_t, u_char *); 371 static u_char *nm_nextpkt(struct nm_desc *, struct nm_pkthdr *); 372 373 #ifdef _WIN32 374 375 intptr_t _get_osfhandle(int); /* defined in io.h in windows */ 376 377 /* 378 * In windows we do not have yet native poll support, so we keep track 379 * of file descriptors associated to netmap ports to emulate poll on 380 * them and fall back on regular poll on other file descriptors. 381 */ 382 struct win_netmap_fd_list { 383 struct win_netmap_fd_list *next; 384 int win_netmap_fd; 385 HANDLE win_netmap_handle; 386 }; 387 388 /* 389 * list head containing all the netmap opened fd and their 390 * windows HANDLE counterparts 391 */ 392 static struct win_netmap_fd_list *win_netmap_fd_list_head; 393 394 static void 395 win_insert_fd_record(int fd) 396 { 397 struct win_netmap_fd_list *curr; 398 399 for (curr = win_netmap_fd_list_head; curr; curr = curr->next) { 400 if (fd == curr->win_netmap_fd) { 401 return; 402 } 403 } 404 curr = calloc(1, sizeof(*curr)); 405 curr->next = win_netmap_fd_list_head; 406 curr->win_netmap_fd = fd; 407 curr->win_netmap_handle = IntToPtr(_get_osfhandle(fd)); 408 win_netmap_fd_list_head = curr; 409 } 410 411 void 412 win_remove_fd_record(int fd) 413 { 414 struct win_netmap_fd_list *curr = win_netmap_fd_list_head; 415 struct win_netmap_fd_list *prev = NULL; 416 for (; curr ; prev = curr, curr = curr->next) { 417 if (fd != curr->win_netmap_fd) 418 continue; 419 /* found the entry */ 420 if (prev == NULL) { /* we are freeing the first entry */ 421 win_netmap_fd_list_head = curr->next; 422 } else { 423 prev->next = curr->next; 424 } 425 free(curr); 426 break; 427 } 428 } 429 430 431 HANDLE 432 win_get_netmap_handle(int fd) 433 { 434 struct win_netmap_fd_list *curr; 435 436 for (curr = win_netmap_fd_list_head; curr; curr = curr->next) { 437 if (fd == curr->win_netmap_fd) { 438 return curr->win_netmap_handle; 439 } 440 } 441 return NULL; 442 } 443 444 /* 445 * we need to wrap ioctl and mmap, at least for the netmap file descriptors 446 */ 447 448 /* 449 * use this function only from netmap_user.h internal functions 450 * same as ioctl, returns 0 on success and -1 on error 451 */ 452 static int 453 win_nm_ioctl_internal(HANDLE h, int32_t ctlCode, void *arg) 454 { 455 DWORD bReturn = 0, szIn, szOut; 456 BOOL ioctlReturnStatus; 457 void *inParam = arg, *outParam = arg; 458 459 switch (ctlCode) { 460 case NETMAP_POLL: 461 szIn = sizeof(POLL_REQUEST_DATA); 462 szOut = sizeof(POLL_REQUEST_DATA); 463 break; 464 case NETMAP_MMAP: 465 szIn = 0; 466 szOut = sizeof(void*); 467 inParam = NULL; /* nothing on input */ 468 break; 469 case NIOCTXSYNC: 470 case NIOCRXSYNC: 471 szIn = 0; 472 szOut = 0; 473 break; 474 case NIOCREGIF: 475 szIn = sizeof(struct nmreq); 476 szOut = sizeof(struct nmreq); 477 break; 478 case NIOCCONFIG: 479 D("unsupported NIOCCONFIG!"); 480 return -1; 481 482 default: /* a regular ioctl */ 483 D("invalid ioctl %x on netmap fd", ctlCode); 484 return -1; 485 } 486 487 ioctlReturnStatus = DeviceIoControl(h, 488 ctlCode, inParam, szIn, 489 outParam, szOut, 490 &bReturn, NULL); 491 // XXX note windows returns 0 on error or async call, 1 on success 492 // we could call GetLastError() to figure out what happened 493 return ioctlReturnStatus ? 0 : -1; 494 } 495 496 /* 497 * this function is what must be called from user-space programs 498 * same as ioctl, returns 0 on success and -1 on error 499 */ 500 static int 501 win_nm_ioctl(int fd, int32_t ctlCode, void *arg) 502 { 503 HANDLE h = win_get_netmap_handle(fd); 504 505 if (h == NULL) { 506 return ioctl(fd, ctlCode, arg); 507 } else { 508 return win_nm_ioctl_internal(h, ctlCode, arg); 509 } 510 } 511 512 #define ioctl win_nm_ioctl /* from now on, within this file ... */ 513 514 /* 515 * We cannot use the native mmap on windows 516 * The only parameter used is "fd", the other ones are just declared to 517 * make this signature comparable to the FreeBSD/Linux one 518 */ 519 static void * 520 win32_mmap_emulated(void *addr, size_t length, int prot, int flags, int fd, int32_t offset) 521 { 522 HANDLE h = win_get_netmap_handle(fd); 523 524 if (h == NULL) { 525 return mmap(addr, length, prot, flags, fd, offset); 526 } else { 527 MEMORY_ENTRY ret; 528 529 return win_nm_ioctl_internal(h, NETMAP_MMAP, &ret) ? 530 NULL : ret.pUsermodeVirtualAddress; 531 } 532 } 533 534 #define mmap win32_mmap_emulated 535 536 #include <sys/poll.h> /* XXX needed to use the structure pollfd */ 537 538 static int 539 win_nm_poll(struct pollfd *fds, int nfds, int timeout) 540 { 541 HANDLE h; 542 543 if (nfds != 1 || fds == NULL || (h = win_get_netmap_handle(fds->fd)) == NULL) {; 544 return poll(fds, nfds, timeout); 545 } else { 546 POLL_REQUEST_DATA prd; 547 548 prd.timeout = timeout; 549 prd.events = fds->events; 550 551 win_nm_ioctl_internal(h, NETMAP_POLL, &prd); 552 if ((prd.revents == POLLERR) || (prd.revents == STATUS_TIMEOUT)) { 553 return -1; 554 } 555 return 1; 556 } 557 } 558 559 #define poll win_nm_poll 560 561 static int 562 win_nm_open(char* pathname, int flags){ 563 564 if (strcmp(pathname, NETMAP_DEVICE_NAME) == 0){ 565 int fd = open(NETMAP_DEVICE_NAME, O_RDWR); 566 if (fd < 0) { 567 return -1; 568 } 569 570 win_insert_fd_record(fd); 571 return fd; 572 } 573 else { 574 575 return open(pathname, flags); 576 } 577 } 578 579 #define open win_nm_open 580 581 static int 582 win_nm_close(int fd){ 583 if (fd != -1){ 584 close(fd); 585 if (win_get_netmap_handle(fd) != NULL){ 586 win_remove_fd_record(fd); 587 } 588 } 589 return 0; 590 } 591 592 #define close win_nm_close 593 594 #endif /* _WIN32 */ 595 596 /* 597 * Try to open, return descriptor if successful, NULL otherwise. 598 * An invalid netmap name will return errno = 0; 599 * You can pass a pointer to a pre-filled nm_desc to add special 600 * parameters. Flags is used as follows 601 * NM_OPEN_NO_MMAP use the memory from arg, only XXX avoid mmap 602 * if the nr_arg2 (memory block) matches. 603 * NM_OPEN_ARG1 use req.nr_arg1 from arg 604 * NM_OPEN_ARG2 use req.nr_arg2 from arg 605 * NM_OPEN_RING_CFG user ring config from arg 606 */ 607 static struct nm_desc * 608 nm_open(const char *ifname, const struct nmreq *req, 609 uint64_t new_flags, const struct nm_desc *arg) 610 { 611 struct nm_desc *d = NULL; 612 const struct nm_desc *parent = arg; 613 u_int namelen; 614 uint32_t nr_ringid = 0, nr_flags, nr_reg; 615 const char *port = NULL; 616 #define MAXERRMSG 80 617 char errmsg[MAXERRMSG] = ""; 618 enum { P_START, P_RNGSFXOK, P_GETNUM, P_FLAGS, P_FLAGSOK } p_state; 619 long num; 620 621 if (strncmp(ifname, "netmap:", 7) && strncmp(ifname, "vale", 4)) { 622 errno = 0; /* name not recognised, not an error */ 623 return NULL; 624 } 625 if (ifname[0] == 'n') 626 ifname += 7; 627 /* scan for a separator */ 628 for (port = ifname; *port && !index("-*^{}/", *port); port++) 629 ; 630 namelen = port - ifname; 631 if (namelen >= sizeof(d->req.nr_name)) { 632 snprintf(errmsg, MAXERRMSG, "name too long"); 633 goto fail; 634 } 635 p_state = P_START; 636 nr_flags = NR_REG_ALL_NIC; /* default for no suffix */ 637 while (*port) { 638 switch (p_state) { 639 case P_START: 640 switch (*port) { 641 case '^': /* only SW ring */ 642 nr_flags = NR_REG_SW; 643 p_state = P_RNGSFXOK; 644 break; 645 case '*': /* NIC and SW */ 646 nr_flags = NR_REG_NIC_SW; 647 p_state = P_RNGSFXOK; 648 break; 649 case '-': /* one NIC ring pair */ 650 nr_flags = NR_REG_ONE_NIC; 651 p_state = P_GETNUM; 652 break; 653 case '{': /* pipe (master endpoint) */ 654 nr_flags = NR_REG_PIPE_MASTER; 655 p_state = P_GETNUM; 656 break; 657 case '}': /* pipe (slave endoint) */ 658 nr_flags = NR_REG_PIPE_SLAVE; 659 p_state = P_GETNUM; 660 break; 661 case '/': /* start of flags */ 662 p_state = P_FLAGS; 663 break; 664 default: 665 snprintf(errmsg, MAXERRMSG, "unknown modifier: '%c'", *port); 666 goto fail; 667 } 668 port++; 669 break; 670 case P_RNGSFXOK: 671 switch (*port) { 672 case '/': 673 p_state = P_FLAGS; 674 break; 675 default: 676 snprintf(errmsg, MAXERRMSG, "unexpected character: '%c'", *port); 677 goto fail; 678 } 679 port++; 680 break; 681 case P_GETNUM: 682 num = strtol(port, (char **)&port, 10); 683 if (num < 0 || num >= NETMAP_RING_MASK) { 684 snprintf(errmsg, MAXERRMSG, "'%ld' out of range [0, %d)", 685 num, NETMAP_RING_MASK); 686 goto fail; 687 } 688 nr_ringid = num & NETMAP_RING_MASK; 689 p_state = P_RNGSFXOK; 690 break; 691 case P_FLAGS: 692 case P_FLAGSOK: 693 switch (*port) { 694 case 'x': 695 nr_flags |= NR_EXCLUSIVE; 696 break; 697 case 'z': 698 nr_flags |= NR_ZCOPY_MON; 699 break; 700 case 't': 701 nr_flags |= NR_MONITOR_TX; 702 break; 703 case 'r': 704 nr_flags |= NR_MONITOR_RX; 705 break; 706 case 'R': 707 nr_flags |= NR_RX_RINGS_ONLY; 708 break; 709 case 'T': 710 nr_flags |= NR_TX_RINGS_ONLY; 711 break; 712 default: 713 snprintf(errmsg, MAXERRMSG, "unrecognized flag: '%c'", *port); 714 goto fail; 715 } 716 port++; 717 p_state = P_FLAGSOK; 718 break; 719 } 720 } 721 if (p_state != P_START && p_state != P_RNGSFXOK && p_state != P_FLAGSOK) { 722 snprintf(errmsg, MAXERRMSG, "unexpected end of port name"); 723 goto fail; 724 } 725 ND("flags: %s %s %s %s", 726 (nr_flags & NR_EXCLUSIVE) ? "EXCLUSIVE" : "", 727 (nr_flags & NR_ZCOPY_MON) ? "ZCOPY_MON" : "", 728 (nr_flags & NR_MONITOR_TX) ? "MONITOR_TX" : "", 729 (nr_flags & NR_MONITOR_RX) ? "MONITOR_RX" : ""); 730 d = (struct nm_desc *)calloc(1, sizeof(*d)); 731 if (d == NULL) { 732 snprintf(errmsg, MAXERRMSG, "nm_desc alloc failure"); 733 errno = ENOMEM; 734 return NULL; 735 } 736 d->self = d; /* set this early so nm_close() works */ 737 d->fd = open(NETMAP_DEVICE_NAME, O_RDWR); 738 if (d->fd < 0) { 739 snprintf(errmsg, MAXERRMSG, "cannot open /dev/netmap: %s", strerror(errno)); 740 goto fail; 741 } 742 743 if (req) 744 d->req = *req; 745 d->req.nr_version = NETMAP_API; 746 d->req.nr_ringid &= ~NETMAP_RING_MASK; 747 748 /* these fields are overridden by ifname and flags processing */ 749 d->req.nr_ringid |= nr_ringid; 750 d->req.nr_flags |= nr_flags; 751 memcpy(d->req.nr_name, ifname, namelen); 752 d->req.nr_name[namelen] = '\0'; 753 /* optionally import info from parent */ 754 if (IS_NETMAP_DESC(parent) && new_flags) { 755 if (new_flags & NM_OPEN_ARG1) 756 D("overriding ARG1 %d", parent->req.nr_arg1); 757 d->req.nr_arg1 = new_flags & NM_OPEN_ARG1 ? 758 parent->req.nr_arg1 : 4; 759 if (new_flags & NM_OPEN_ARG2) 760 D("overriding ARG2 %d", parent->req.nr_arg2); 761 d->req.nr_arg2 = new_flags & NM_OPEN_ARG2 ? 762 parent->req.nr_arg2 : 0; 763 if (new_flags & NM_OPEN_ARG3) 764 D("overriding ARG3 %d", parent->req.nr_arg3); 765 d->req.nr_arg3 = new_flags & NM_OPEN_ARG3 ? 766 parent->req.nr_arg3 : 0; 767 if (new_flags & NM_OPEN_RING_CFG) { 768 D("overriding RING_CFG"); 769 d->req.nr_tx_slots = parent->req.nr_tx_slots; 770 d->req.nr_rx_slots = parent->req.nr_rx_slots; 771 d->req.nr_tx_rings = parent->req.nr_tx_rings; 772 d->req.nr_rx_rings = parent->req.nr_rx_rings; 773 } 774 if (new_flags & NM_OPEN_IFNAME) { 775 D("overriding ifname %s ringid 0x%x flags 0x%x", 776 parent->req.nr_name, parent->req.nr_ringid, 777 parent->req.nr_flags); 778 memcpy(d->req.nr_name, parent->req.nr_name, 779 sizeof(d->req.nr_name)); 780 d->req.nr_ringid = parent->req.nr_ringid; 781 d->req.nr_flags = parent->req.nr_flags; 782 } 783 } 784 /* add the *XPOLL flags */ 785 d->req.nr_ringid |= new_flags & (NETMAP_NO_TX_POLL | NETMAP_DO_RX_POLL); 786 787 if (ioctl(d->fd, NIOCREGIF, &d->req)) { 788 snprintf(errmsg, MAXERRMSG, "NIOCREGIF failed: %s", strerror(errno)); 789 goto fail; 790 } 791 792 /* if parent is defined, do nm_mmap() even if NM_OPEN_NO_MMAP is set */ 793 if ((!(new_flags & NM_OPEN_NO_MMAP) || parent) && nm_mmap(d, parent)) { 794 snprintf(errmsg, MAXERRMSG, "mmap failed: %s", strerror(errno)); 795 goto fail; 796 } 797 798 nr_reg = d->req.nr_flags & NR_REG_MASK; 799 800 if (nr_reg == NR_REG_SW) { /* host stack */ 801 d->first_tx_ring = d->last_tx_ring = d->req.nr_tx_rings; 802 d->first_rx_ring = d->last_rx_ring = d->req.nr_rx_rings; 803 } else if (nr_reg == NR_REG_ALL_NIC) { /* only nic */ 804 d->first_tx_ring = 0; 805 d->first_rx_ring = 0; 806 d->last_tx_ring = d->req.nr_tx_rings - 1; 807 d->last_rx_ring = d->req.nr_rx_rings - 1; 808 } else if (nr_reg == NR_REG_NIC_SW) { 809 d->first_tx_ring = 0; 810 d->first_rx_ring = 0; 811 d->last_tx_ring = d->req.nr_tx_rings; 812 d->last_rx_ring = d->req.nr_rx_rings; 813 } else if (nr_reg == NR_REG_ONE_NIC) { 814 /* XXX check validity */ 815 d->first_tx_ring = d->last_tx_ring = 816 d->first_rx_ring = d->last_rx_ring = d->req.nr_ringid & NETMAP_RING_MASK; 817 } else { /* pipes */ 818 d->first_tx_ring = d->last_tx_ring = 0; 819 d->first_rx_ring = d->last_rx_ring = 0; 820 } 821 822 #ifdef DEBUG_NETMAP_USER 823 { /* debugging code */ 824 int i; 825 826 D("%s tx %d .. %d %d rx %d .. %d %d", ifname, 827 d->first_tx_ring, d->last_tx_ring, d->req.nr_tx_rings, 828 d->first_rx_ring, d->last_rx_ring, d->req.nr_rx_rings); 829 for (i = 0; i <= d->req.nr_tx_rings; i++) { 830 struct netmap_ring *r = NETMAP_TXRING(d->nifp, i); 831 D("TX%d %p h %d c %d t %d", i, r, r->head, r->cur, r->tail); 832 } 833 for (i = 0; i <= d->req.nr_rx_rings; i++) { 834 struct netmap_ring *r = NETMAP_RXRING(d->nifp, i); 835 D("RX%d %p h %d c %d t %d", i, r, r->head, r->cur, r->tail); 836 } 837 } 838 #endif /* debugging */ 839 840 d->cur_tx_ring = d->first_tx_ring; 841 d->cur_rx_ring = d->first_rx_ring; 842 return d; 843 844 fail: 845 nm_close(d); 846 if (errmsg[0]) 847 D("%s %s", errmsg, ifname); 848 if (errno == 0) 849 errno = EINVAL; 850 return NULL; 851 } 852 853 854 static int 855 nm_close(struct nm_desc *d) 856 { 857 /* 858 * ugly trick to avoid unused warnings 859 */ 860 static void *__xxzt[] __attribute__ ((unused)) = 861 { (void *)nm_open, (void *)nm_inject, 862 (void *)nm_dispatch, (void *)nm_nextpkt } ; 863 864 if (d == NULL || d->self != d) 865 return EINVAL; 866 if (d->done_mmap && d->mem) 867 munmap(d->mem, d->memsize); 868 if (d->fd != -1){ 869 close(d->fd); 870 } 871 872 bzero(d, sizeof(*d)); 873 free(d); 874 return 0; 875 } 876 877 878 static int 879 nm_mmap(struct nm_desc *d, const struct nm_desc *parent) 880 { 881 //XXX TODO: check if mmap is already done 882 883 if (IS_NETMAP_DESC(parent) && parent->mem && 884 parent->req.nr_arg2 == d->req.nr_arg2) { 885 /* do not mmap, inherit from parent */ 886 D("do not mmap, inherit from parent"); 887 d->memsize = parent->memsize; 888 d->mem = parent->mem; 889 } else { 890 /* XXX TODO: check if memsize is too large (or there is overflow) */ 891 d->memsize = d->req.nr_memsize; 892 d->mem = mmap(0, d->memsize, PROT_WRITE | PROT_READ, MAP_SHARED, 893 d->fd, 0); 894 if (d->mem == MAP_FAILED) { 895 goto fail; 896 } 897 d->done_mmap = 1; 898 } 899 { 900 struct netmap_if *nifp = NETMAP_IF(d->mem, d->req.nr_offset); 901 struct netmap_ring *r = NETMAP_RXRING(nifp, ); 902 903 *(struct netmap_if **)(uintptr_t)&(d->nifp) = nifp; 904 *(struct netmap_ring **)(uintptr_t)&d->some_ring = r; 905 *(void **)(uintptr_t)&d->buf_start = NETMAP_BUF(r, 0); 906 *(void **)(uintptr_t)&d->buf_end = 907 (char *)d->mem + d->memsize; 908 } 909 910 return 0; 911 912 fail: 913 return EINVAL; 914 } 915 916 /* 917 * Same prototype as pcap_inject(), only need to cast. 918 */ 919 static int 920 nm_inject(struct nm_desc *d, const void *buf, size_t size) 921 { 922 u_int c, n = d->last_tx_ring - d->first_tx_ring + 1; 923 924 for (c = 0; c < n ; c++) { 925 /* compute current ring to use */ 926 struct netmap_ring *ring; 927 uint32_t i, idx; 928 uint32_t ri = d->cur_tx_ring + c; 929 930 if (ri > d->last_tx_ring) 931 ri = d->first_tx_ring; 932 ring = NETMAP_TXRING(d->nifp, ri); 933 if (nm_ring_empty(ring)) { 934 continue; 935 } 936 i = ring->cur; 937 idx = ring->slot[i].buf_idx; 938 ring->slot[i].len = size; 939 nm_pkt_copy(buf, NETMAP_BUF(ring, idx), size); 940 d->cur_tx_ring = ri; 941 ring->head = ring->cur = nm_ring_next(ring, i); 942 return size; 943 } 944 return 0; /* fail */ 945 } 946 947 948 /* 949 * Same prototype as pcap_dispatch(), only need to cast. 950 */ 951 static int 952 nm_dispatch(struct nm_desc *d, int cnt, nm_cb_t cb, u_char *arg) 953 { 954 int n = d->last_rx_ring - d->first_rx_ring + 1; 955 int c, got = 0, ri = d->cur_rx_ring; 956 957 if (cnt == 0) 958 cnt = -1; 959 /* cnt == -1 means infinite, but rings have a finite amount 960 * of buffers and the int is large enough that we never wrap, 961 * so we can omit checking for -1 962 */ 963 for (c=0; c < n && cnt != got; c++) { 964 /* compute current ring to use */ 965 struct netmap_ring *ring; 966 967 ri = d->cur_rx_ring + c; 968 if (ri > d->last_rx_ring) 969 ri = d->first_rx_ring; 970 ring = NETMAP_RXRING(d->nifp, ri); 971 for ( ; !nm_ring_empty(ring) && cnt != got; got++) { 972 u_int i = ring->cur; 973 u_int idx = ring->slot[i].buf_idx; 974 u_char *buf = (u_char *)NETMAP_BUF(ring, idx); 975 976 // __builtin_prefetch(buf); 977 d->hdr.len = d->hdr.caplen = ring->slot[i].len; 978 d->hdr.ts = ring->ts; 979 cb(arg, &d->hdr, buf); 980 ring->head = ring->cur = nm_ring_next(ring, i); 981 } 982 } 983 d->cur_rx_ring = ri; 984 return got; 985 } 986 987 static u_char * 988 nm_nextpkt(struct nm_desc *d, struct nm_pkthdr *hdr) 989 { 990 int ri = d->cur_rx_ring; 991 992 do { 993 /* compute current ring to use */ 994 struct netmap_ring *ring = NETMAP_RXRING(d->nifp, ri); 995 if (!nm_ring_empty(ring)) { 996 u_int i = ring->cur; 997 u_int idx = ring->slot[i].buf_idx; 998 u_char *buf = (u_char *)NETMAP_BUF(ring, idx); 999 1000 // __builtin_prefetch(buf); 1001 hdr->ts = ring->ts; 1002 hdr->len = hdr->caplen = ring->slot[i].len; 1003 ring->cur = nm_ring_next(ring, i); 1004 /* we could postpone advancing head if we want 1005 * to hold the buffer. This can be supported in 1006 * the future. 1007 */ 1008 ring->head = ring->cur; 1009 d->cur_rx_ring = ri; 1010 return buf; 1011 } 1012 ri++; 1013 if (ri > d->last_rx_ring) 1014 ri = d->first_rx_ring; 1015 } while (ri != d->cur_rx_ring); 1016 return NULL; /* nothing found */ 1017 } 1018 1019 #endif /* !HAVE_NETMAP_WITH_LIBS */ 1020 1021 #endif /* NETMAP_WITH_LIBS */ 1022 1023 #endif /* _NET_NETMAP_USER_H_ */ 1024