1 /* 2 * Inspired by opendp/dpdk-nginx's ans_module.c. 3 * License of opendp: 4 * 5 BSD LICENSE 6 Copyright(c) 2015-2017 Ansyun [email protected]. All rights reserved. 7 All rights reserved. 8 Redistribution and use in source and binary forms, with or without 9 modification, are permitted provided that the following conditions 10 are met: 11 12 Redistributions of source code must retain the above copyright 13 notice, this list of conditions and the following disclaimer. 14 Redistributions in binary form must reproduce the above copyright 15 notice, this list of conditions and the following disclaimer in 16 the documentation and/or other materials provided with the 17 distribution. 18 Neither the name of Ansyun [email protected] nor the names of its 19 contributors may be used to endorse or promote products derived 20 from this software without specific prior written permission. 21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 Author: JiaKai ([email protected]) and Bluestar ([email protected]) 33 */ 34 35 /* 36 * Copyright (C) 2017 THL A29 Limited, a Tencent company. 37 * All rights reserved. 38 * 39 * Redistribution and use in source and binary forms, with or without 40 * modification, are permitted provided that the following conditions are met: 41 * 42 * 1. Redistributions of source code must retain the above copyright notice, this 43 * list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright notice, 45 * this list of conditions and the following disclaimer in the documentation 46 * and/or other materials provided with the distribution. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 50 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 51 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 52 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 53 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 54 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 55 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 56 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 57 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 58 * 59 */ 60 61 #include <stdio.h> 62 #include <stdint.h> 63 #include <string.h> 64 #include <stdlib.h> 65 #include <stdarg.h> 66 #include <errno.h> 67 #include <netinet/in.h> 68 #include <assert.h> 69 #include <unistd.h> 70 #include <sys/types.h> 71 #include <sys/socket.h> 72 #include <arpa/inet.h> 73 #include <sys/time.h> 74 75 #include <ngx_auto_config.h> 76 #include "ff_api.h" 77 78 #define _GNU_SOURCE 79 #define __USE_GNU 80 81 #include <unistd.h> 82 #include <sched.h> 83 #include <sys/types.h> 84 #include <fcntl.h> 85 #include <sys/syscall.h> 86 #include <dlfcn.h> 87 #include <limits.h> 88 89 #ifndef likely 90 #define likely(x) __builtin_expect((x),1) 91 #endif 92 93 #ifndef unlikely 94 #define unlikely(x) __builtin_expect((x),0) 95 #endif 96 97 static int (*real_close)(int); 98 static int (*real_socket)(int, int, int); 99 static int (*real_bind)(int, const struct sockaddr*, socklen_t); 100 static int (*real_connect)(int, const struct sockaddr*, socklen_t); 101 static int (*real_listen)(int, int); 102 103 static int (*real_getsockopt)(int, int, int, void *, socklen_t*); 104 static int (*real_setsockopt)(int, int, int, const void *, socklen_t); 105 106 static int (*real_accept)(int, struct sockaddr *, socklen_t *); 107 static int (*real_accept4)(int, struct sockaddr *, socklen_t *, int); 108 static ssize_t (*real_recv)(int, void *, size_t, int); 109 static ssize_t (*real_send)(int, const void *, size_t, int); 110 static ssize_t (*real_sendto)(int, const void *, size_t, int, 111 const struct sockaddr*, socklen_t); 112 static ssize_t (*real_sendmsg)(int, const struct msghdr*, int); 113 static ssize_t (*real_recvmsg)(int, struct msghdr *, int); 114 static ssize_t (*real_writev)(int, const struct iovec *, int); 115 static ssize_t (*real_readv)(int, const struct iovec *, int); 116 117 static ssize_t (*real_read)(int, void *, size_t); 118 static ssize_t (*real_write)(int, const void *, size_t); 119 120 static int (*real_shutdown)(int, int); 121 122 static int (*real_ioctl)(int, int, void *); 123 124 static int (*real_gettimeofday)(struct timeval *tv, struct timezone *tz); 125 126 static int (*real_getpeername)(int sockfd, struct sockaddr * name, socklen_t *namelen); 127 static int (*real_getsockname)(int s, struct sockaddr *name, socklen_t *namelen); 128 129 static __thread int inited; 130 131 #define SYSCALL(func) \ 132 ({ \ 133 if (unlikely(!real_##func)) { \ 134 real_##func = dlsym(RTLD_NEXT, #func); \ 135 } \ 136 real_##func; \ 137 }) 138 139 extern intptr_t ngx_max_sockets; 140 141 /*- 142 * Make sockfd assigned by the fstack plus the value of maximum kernel socket. 143 * so we can tell them apart according to different scopes. 144 * Solve the condominium ownership at Application Layer and obtain more freedom. 145 * fstack tried to do this by 'fd_reserve', unfortunately, it doesn't work well. 146 */ 147 static inline int convert_fstack_fd(int sockfd) { 148 return sockfd + ngx_max_sockets; 149 } 150 151 /* Restore socket fd. */ 152 static inline int restore_fstack_fd(int sockfd) { 153 if(sockfd <= ngx_max_sockets) { 154 return sockfd; 155 } 156 157 return sockfd - ngx_max_sockets; 158 } 159 160 /* Tell whether a 'sockfd' belongs to fstack. */ 161 int is_fstack_fd(int sockfd) { 162 if (unlikely(inited == 0)) { 163 return 0; 164 } 165 166 return sockfd >= ngx_max_sockets; 167 } 168 169 // proc_type, 1: primary, 0: secondary. 170 int 171 ff_mod_init(const char *conf, int proc_id, int proc_type) { 172 int rc, i; 173 int ff_argc = 4; 174 175 char **ff_argv = malloc(sizeof(char *)*ff_argc); 176 for (i = 0; i < ff_argc; i++) { 177 ff_argv[i] = malloc(sizeof(char)*PATH_MAX); 178 } 179 180 sprintf(ff_argv[0], "nginx"); 181 sprintf(ff_argv[1], "--conf=%s", conf); 182 sprintf(ff_argv[2], "--proc-id=%d", proc_id); 183 if (proc_type == 1) { 184 sprintf(ff_argv[3], "--proc-type=primary"); 185 } else { 186 sprintf(ff_argv[3], "--proc-type=secondary"); 187 } 188 189 rc = ff_init(ff_argc, ff_argv); 190 if (rc == 0) { 191 /* Ensure that the socket we converted 192 does not exceed the maximum value of 'int' */ 193 194 if(ngx_max_sockets + (unsigned)ff_getmaxfd() > INT_MAX) 195 { 196 rc = -1; 197 } 198 199 inited = 1; 200 } 201 202 for (i = 0; i < ff_argc; i++) { 203 free(ff_argv[i]); 204 } 205 206 free(ff_argv); 207 208 return rc; 209 } 210 211 /*- 212 * Verify whether the socket is supported by fstack or not. 213 */ 214 int 215 fstack_territory(int domain, int type, int protocol) 216 { 217 /* Remove creation flags */ 218 type &= ~SOCK_CLOEXEC; 219 type &= ~SOCK_NONBLOCK; 220 type &= ~SOCK_FSTACK; 221 222 if ((AF_INET != domain && AF_INET6 != domain) || (SOCK_STREAM != type && SOCK_DGRAM != type)) { 223 return 0; 224 } 225 226 return 1; 227 } 228 229 int 230 socket(int domain, int type, int protocol) 231 { 232 int sock; 233 if (unlikely(inited == 0)) { 234 return SYSCALL(socket)(domain, type, protocol); 235 } 236 237 if (unlikely(fstack_territory(domain, type, protocol) == 0)) { 238 return SYSCALL(socket)(domain, type, protocol); 239 } 240 241 if (unlikely((type & SOCK_FSTACK) == 0)) { 242 return SYSCALL(socket)(domain, type, protocol); 243 } 244 245 type &= ~SOCK_FSTACK; 246 sock = ff_socket(domain, type, protocol); 247 248 if (sock != -1) { 249 sock = convert_fstack_fd(sock); 250 } 251 252 return sock; 253 } 254 255 int 256 bind(int sockfd, const struct sockaddr *addr, socklen_t addrlen) 257 { 258 if(is_fstack_fd(sockfd)){ 259 sockfd = restore_fstack_fd(sockfd); 260 return ff_bind(sockfd, (struct linux_sockaddr *)addr, addrlen); 261 } 262 263 return SYSCALL(bind)(sockfd, addr, addrlen); 264 } 265 266 int 267 connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen) 268 { 269 if(is_fstack_fd(sockfd)){ 270 sockfd = restore_fstack_fd(sockfd); 271 return ff_connect(sockfd, (struct linux_sockaddr *)addr, addrlen); 272 } 273 274 return SYSCALL(connect)(sockfd, addr, addrlen); 275 } 276 277 int 278 getpeername(int sockfd, struct sockaddr * name, 279 socklen_t *namelen) 280 { 281 if(is_fstack_fd(sockfd)){ 282 sockfd = restore_fstack_fd(sockfd); 283 return ff_getpeername(sockfd, 284 (struct linux_sockaddr *)name, namelen); 285 } 286 287 return SYSCALL(getpeername)(sockfd, name, namelen); 288 } 289 290 int 291 getsockname(int sockfd, struct sockaddr *name, 292 socklen_t *namelen) 293 { 294 if(is_fstack_fd(sockfd)){ 295 sockfd = restore_fstack_fd(sockfd); 296 return ff_getsockname(sockfd, 297 (struct linux_sockaddr *)name, namelen); 298 } 299 300 return SYSCALL(getsockname)(sockfd, name, namelen); 301 } 302 303 ssize_t 304 send(int sockfd, const void *buf, size_t len, int flags) 305 { 306 if(is_fstack_fd(sockfd)){ 307 sockfd = restore_fstack_fd(sockfd); 308 return ff_send(sockfd, buf, len, flags); 309 } 310 311 return SYSCALL(send)(sockfd, buf, len, flags); 312 } 313 314 ssize_t 315 sendto(int sockfd, const void *buf, size_t len, int flags, 316 const struct sockaddr *dest_addr, socklen_t addrlen) 317 { 318 if(is_fstack_fd(sockfd)){ 319 sockfd = restore_fstack_fd(sockfd); 320 return ff_sendto(sockfd, buf, len, flags, 321 (struct linux_sockaddr *)dest_addr, addrlen); 322 } 323 324 return SYSCALL(sendto)(sockfd, buf, len, flags, dest_addr, addrlen); 325 } 326 327 ssize_t 328 sendmsg(int sockfd, const struct msghdr *msg, int flags) 329 { 330 if(is_fstack_fd(sockfd)){ 331 sockfd = restore_fstack_fd(sockfd); 332 return ff_sendmsg(sockfd, msg, flags); 333 } 334 335 return SYSCALL(sendmsg)(sockfd, msg, flags); 336 } 337 338 ssize_t recvmsg(int sockfd, struct msghdr *msg, int flags) 339 { 340 if(is_fstack_fd(sockfd)){ 341 sockfd = restore_fstack_fd(sockfd); 342 return ff_recvmsg(sockfd, msg, flags); 343 } 344 345 return SYSCALL(recvmsg)(sockfd, msg, flags); 346 } 347 348 ssize_t 349 recv(int sockfd, void *buf, size_t len, int flags) 350 { 351 if(is_fstack_fd(sockfd)){ 352 sockfd = restore_fstack_fd(sockfd); 353 return ff_recv(sockfd, buf, len, flags); 354 } 355 356 return SYSCALL(recv)(sockfd, buf, len, flags); 357 } 358 359 ssize_t 360 __recv_chk (int fd, void *buf, size_t n, size_t buflen, int flags) 361 { 362 /* 363 if (n > buflen) 364 __chk_fail (); 365 */ 366 return recv (fd, buf, n, flags); 367 } 368 369 int 370 listen(int sockfd, int backlog) 371 { 372 if(is_fstack_fd(sockfd)){ 373 sockfd = restore_fstack_fd(sockfd); 374 return ff_listen(sockfd, backlog); 375 } 376 377 return SYSCALL(listen)(sockfd, backlog); 378 } 379 380 int 381 getsockopt(int sockfd, int level, int optname, 382 void *optval, socklen_t *optlen) 383 { 384 if(is_fstack_fd(sockfd)){ 385 sockfd = restore_fstack_fd(sockfd); 386 return ff_getsockopt(sockfd, level, optname, optval, optlen); 387 } 388 389 return SYSCALL(getsockopt)(sockfd, level, optname, optval, optlen); 390 } 391 392 int 393 setsockopt (int sockfd, int level, int optname, 394 const void *optval, socklen_t optlen) 395 { 396 if(is_fstack_fd(sockfd)){ 397 sockfd = restore_fstack_fd(sockfd); 398 return ff_setsockopt(sockfd, level, optname, optval, optlen); 399 } 400 401 return SYSCALL(setsockopt)(sockfd, level, optname, optval, optlen); 402 } 403 404 int 405 accept(int sockfd, struct sockaddr *addr, socklen_t *addrlen) 406 { 407 int rc; 408 if(is_fstack_fd(sockfd)){ 409 sockfd = restore_fstack_fd(sockfd); 410 rc = ff_accept(sockfd, (struct linux_sockaddr *)addr, addrlen); 411 if (rc != -1) { 412 rc = convert_fstack_fd(rc); 413 } 414 415 return rc; 416 } 417 418 return SYSCALL(accept)(sockfd, addr, addrlen); 419 } 420 421 int 422 accept4(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags) 423 { 424 int rc; 425 if(is_fstack_fd(sockfd)){ 426 sockfd = restore_fstack_fd(sockfd); 427 rc = ff_accept(sockfd, (struct linux_sockaddr *)addr, addrlen); 428 if (rc != -1) { 429 rc = convert_fstack_fd(rc); 430 } 431 432 return rc; 433 } 434 435 return SYSCALL(accept4)(sockfd, addr, addrlen, flags); 436 } 437 438 int 439 close(int sockfd) 440 { 441 if(is_fstack_fd(sockfd)){ 442 sockfd = restore_fstack_fd(sockfd); 443 return ff_close(sockfd); 444 } 445 446 return SYSCALL(close)(sockfd); 447 } 448 449 int 450 shutdown(int sockfd, int how) 451 { 452 if(is_fstack_fd(sockfd)){ 453 sockfd = restore_fstack_fd(sockfd); 454 return ff_shutdown(sockfd, how); 455 } 456 457 return SYSCALL(shutdown)(sockfd, how); 458 } 459 460 ssize_t 461 writev(int sockfd, const struct iovec *iov, int iovcnt) 462 { 463 if(is_fstack_fd(sockfd)){ 464 sockfd = restore_fstack_fd(sockfd); 465 return ff_writev(sockfd, iov, iovcnt); 466 } 467 468 return SYSCALL(writev)(sockfd, iov, iovcnt); 469 } 470 471 ssize_t 472 readv(int sockfd, const struct iovec *iov, int iovcnt) 473 { 474 if(is_fstack_fd(sockfd)){ 475 sockfd = restore_fstack_fd(sockfd); 476 return ff_readv(sockfd, iov, iovcnt); 477 } 478 479 return SYSCALL(readv)(sockfd, iov, iovcnt); 480 } 481 482 ssize_t 483 read(int sockfd, void *buf, size_t count) 484 { 485 if(is_fstack_fd(sockfd)){ 486 sockfd = restore_fstack_fd(sockfd); 487 return ff_read(sockfd, buf, count); 488 } 489 490 return SYSCALL(read)(sockfd, buf, count); 491 } 492 493 ssize_t 494 write(int sockfd, const void *buf, size_t count) 495 { 496 if(is_fstack_fd(sockfd)){ 497 sockfd = restore_fstack_fd(sockfd); 498 return ff_write(sockfd, buf, count); 499 } 500 501 return SYSCALL(write)(sockfd, buf, count); 502 } 503 504 int 505 ioctl(int sockfd, int request, void *p) 506 { 507 if(is_fstack_fd(sockfd)){ 508 sockfd = restore_fstack_fd(sockfd); 509 return ff_ioctl(sockfd, request, p); 510 } 511 512 return SYSCALL(ioctl)(sockfd, request, p); 513 } 514 515 int 516 kqueue(void) 517 { 518 return ff_kqueue(); 519 } 520 521 int 522 kevent(int kq, const struct kevent *changelist, int nchanges, 523 struct kevent *eventlist, int nevents, const struct timespec *timeout) 524 { 525 struct kevent *kev; 526 int i = 0; 527 for(i = 0; i < nchanges; i++) { 528 kev = (struct kevent *)&changelist[i]; 529 switch (kev->filter) { 530 531 case EVFILT_READ: 532 case EVFILT_WRITE: 533 case EVFILT_VNODE: 534 kev->ident = restore_fstack_fd(kev->ident); 535 break; 536 case EVFILT_AIO: 537 case EVFILT_PROC: 538 case EVFILT_SIGNAL: 539 case EVFILT_TIMER: 540 case EVFILT_USER: 541 default: 542 break; 543 } 544 } 545 return ff_kevent(kq, changelist, nchanges, eventlist, nevents, timeout); 546 } 547 548 int 549 gettimeofday(struct timeval *tv, struct timezone *tz) 550 { 551 if (unlikely(inited == 0)) { 552 return SYSCALL(gettimeofday)(tv, tz); 553 } 554 555 return ff_gettimeofday(tv, tz); 556 } 557