1 /*
2 * Inspired by opendp/dpdk-nginx's ans_module.c.
3 * License of opendp:
4 *
5 BSD LICENSE
6 Copyright(c) 2015-2017 Ansyun [email protected]. All rights reserved.
7 All rights reserved.
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions
10 are met:
11
12 Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
14 Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in
16 the documentation and/or other materials provided with the
17 distribution.
18 Neither the name of Ansyun [email protected] nor the names of its
19 contributors may be used to endorse or promote products derived
20 from this software without specific prior written permission.
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 Author: JiaKai ([email protected]) and Bluestar ([email protected])
33 */
34
35 /*
36 * Copyright (C) 2017-2021 THL A29 Limited, a Tencent company.
37 * All rights reserved.
38 *
39 * Redistribution and use in source and binary forms, with or without
40 * modification, are permitted provided that the following conditions are met:
41 *
42 * 1. Redistributions of source code must retain the above copyright notice, this
43 * list of conditions and the following disclaimer.
44 * 2. Redistributions in binary form must reproduce the above copyright notice,
45 * this list of conditions and the following disclaimer in the documentation
46 * and/or other materials provided with the distribution.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
50 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
51 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
52 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
53 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
54 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
55 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
56 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
57 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
58 *
59 */
60
61 #include <stdio.h>
62 #include <stdint.h>
63 #include <string.h>
64 #include <stdlib.h>
65 #include <stdarg.h>
66 #include <errno.h>
67 #include <netinet/in.h>
68 #include <assert.h>
69 #include <unistd.h>
70 #include <sys/types.h>
71 #include <sys/socket.h>
72 #include <arpa/inet.h>
73 #include <sys/time.h>
74
75 #include <ngx_auto_config.h>
76 #include "ff_api.h"
77
78 #define _GNU_SOURCE
79 #define __USE_GNU
80
81 #include <unistd.h>
82 #include <sched.h>
83 #include <sys/types.h>
84 #include <fcntl.h>
85 #include <sys/syscall.h>
86 #include <dlfcn.h>
87 #include <limits.h>
88
89 #ifndef likely
90 #define likely(x) __builtin_expect((x),1)
91 #endif
92
93 #ifndef unlikely
94 #define unlikely(x) __builtin_expect((x),0)
95 #endif
96
97 static int (*real_close)(int);
98 static int (*real_socket)(int, int, int);
99 static int (*real_bind)(int, const struct sockaddr*, socklen_t);
100 static int (*real_connect)(int, const struct sockaddr*, socklen_t);
101 static int (*real_listen)(int, int);
102
103 static int (*real_getsockopt)(int, int, int, void *, socklen_t*);
104 static int (*real_setsockopt)(int, int, int, const void *, socklen_t);
105
106 static int (*real_accept)(int, struct sockaddr *, socklen_t *);
107 static int (*real_accept4)(int, struct sockaddr *, socklen_t *, int);
108 static ssize_t (*real_recv)(int, void *, size_t, int);
109 static ssize_t (*real_send)(int, const void *, size_t, int);
110 static ssize_t (*real_sendto)(int, const void *, size_t, int,
111 const struct sockaddr*, socklen_t);
112 static ssize_t (*real_sendmsg)(int, const struct msghdr*, int);
113 static ssize_t (*real_recvmsg)(int, struct msghdr *, int);
114 static ssize_t (*real_writev)(int, const struct iovec *, int);
115 static ssize_t (*real_readv)(int, const struct iovec *, int);
116
117 static ssize_t (*real_read)(int, void *, size_t);
118 static ssize_t (*real_write)(int, const void *, size_t);
119
120 static int (*real_shutdown)(int, int);
121
122 static int (*real_ioctl)(int, int, void *);
123
124 static int (*real_gettimeofday)(struct timeval *tv, struct timezone *tz);
125
126 static int (*real_getpeername)(int sockfd, struct sockaddr * name, socklen_t *namelen);
127 static int (*real_getsockname)(int s, struct sockaddr *name, socklen_t *namelen);
128
129 static __thread int inited;
130
131 #define SYSCALL(func) \
132 ({ \
133 if (unlikely(!real_##func)) { \
134 real_##func = dlsym(RTLD_NEXT, #func); \
135 } \
136 real_##func; \
137 })
138
139 extern intptr_t ngx_max_sockets;
140
141 /*-
142 * Make sockfd assigned by the fstack plus the value of maximum kernel socket.
143 * so we can tell them apart according to different scopes.
144 * Solve the condominium ownership at Application Layer and obtain more freedom.
145 * fstack tried to do this by 'fd_reserve', unfortunately, it doesn't work well.
146 */
convert_fstack_fd(int sockfd)147 static inline int convert_fstack_fd(int sockfd) {
148 return sockfd + ngx_max_sockets;
149 }
150
151 /* Restore socket fd. */
restore_fstack_fd(int sockfd)152 static inline int restore_fstack_fd(int sockfd) {
153 if(sockfd <= ngx_max_sockets) {
154 return sockfd;
155 }
156
157 return sockfd - ngx_max_sockets;
158 }
159
160 /* Tell whether a 'sockfd' belongs to fstack. */
is_fstack_fd(int sockfd)161 int is_fstack_fd(int sockfd) {
162 if (unlikely(inited == 0)) {
163 return 0;
164 }
165
166 return sockfd >= ngx_max_sockets;
167 }
168
169 // proc_type, 1: primary, 0: secondary.
170 int
ff_mod_init(const char * conf,int proc_id,int proc_type)171 ff_mod_init(const char *conf, int proc_id, int proc_type) {
172 int rc, i;
173 int ff_argc = 4;
174
175 char **ff_argv = malloc(sizeof(char *)*ff_argc);
176 for (i = 0; i < ff_argc; i++) {
177 ff_argv[i] = malloc(sizeof(char)*PATH_MAX);
178 }
179
180 sprintf(ff_argv[0], "nginx");
181 sprintf(ff_argv[1], "--conf=%s", conf);
182 sprintf(ff_argv[2], "--proc-id=%d", proc_id);
183 if (proc_type == 1) {
184 sprintf(ff_argv[3], "--proc-type=primary");
185 } else {
186 sprintf(ff_argv[3], "--proc-type=secondary");
187 }
188
189 rc = ff_init(ff_argc, ff_argv);
190 if (rc == 0) {
191 /* Ensure that the socket we converted
192 does not exceed the maximum value of 'int' */
193
194 if(ngx_max_sockets + (unsigned)ff_getmaxfd() > INT_MAX)
195 {
196 rc = -1;
197 }
198
199 inited = 1;
200 }
201
202 for (i = 0; i < ff_argc; i++) {
203 free(ff_argv[i]);
204 }
205
206 free(ff_argv);
207
208 return rc;
209 }
210
211 /*-
212 * Verify whether the socket is supported by fstack or not.
213 */
214 int
fstack_territory(int domain,int type,int protocol)215 fstack_territory(int domain, int type, int protocol)
216 {
217 /* Remove creation flags */
218 type &= ~SOCK_CLOEXEC;
219 type &= ~SOCK_NONBLOCK;
220 type &= ~SOCK_FSTACK;
221
222 if ((AF_INET != domain && AF_INET6 != domain) || (SOCK_STREAM != type && SOCK_DGRAM != type)) {
223 return 0;
224 }
225
226 return 1;
227 }
228
229 int
socket(int domain,int type,int protocol)230 socket(int domain, int type, int protocol)
231 {
232 int sock;
233 if (unlikely(inited == 0)) {
234 return SYSCALL(socket)(domain, type, protocol);
235 }
236
237 if (unlikely(fstack_territory(domain, type, protocol) == 0)) {
238 return SYSCALL(socket)(domain, type, protocol);
239 }
240
241 if (unlikely((type & SOCK_FSTACK) == 0)) {
242 return SYSCALL(socket)(domain, type, protocol);
243 }
244
245 type &= ~SOCK_FSTACK;
246 sock = ff_socket(domain, type, protocol);
247
248 if (sock != -1) {
249 sock = convert_fstack_fd(sock);
250 }
251
252 return sock;
253 }
254
255 int
bind(int sockfd,const struct sockaddr * addr,socklen_t addrlen)256 bind(int sockfd, const struct sockaddr *addr, socklen_t addrlen)
257 {
258 if(is_fstack_fd(sockfd)){
259 sockfd = restore_fstack_fd(sockfd);
260 return ff_bind(sockfd, (struct linux_sockaddr *)addr, addrlen);
261 }
262
263 return SYSCALL(bind)(sockfd, addr, addrlen);
264 }
265
266 int
connect(int sockfd,const struct sockaddr * addr,socklen_t addrlen)267 connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen)
268 {
269 if(is_fstack_fd(sockfd)){
270 sockfd = restore_fstack_fd(sockfd);
271 return ff_connect(sockfd, (struct linux_sockaddr *)addr, addrlen);
272 }
273
274 return SYSCALL(connect)(sockfd, addr, addrlen);
275 }
276
277 int
getpeername(int sockfd,struct sockaddr * name,socklen_t * namelen)278 getpeername(int sockfd, struct sockaddr * name,
279 socklen_t *namelen)
280 {
281 if(is_fstack_fd(sockfd)){
282 sockfd = restore_fstack_fd(sockfd);
283 return ff_getpeername(sockfd,
284 (struct linux_sockaddr *)name, namelen);
285 }
286
287 return SYSCALL(getpeername)(sockfd, name, namelen);
288 }
289
290 int
getsockname(int sockfd,struct sockaddr * name,socklen_t * namelen)291 getsockname(int sockfd, struct sockaddr *name,
292 socklen_t *namelen)
293 {
294 if(is_fstack_fd(sockfd)){
295 sockfd = restore_fstack_fd(sockfd);
296 return ff_getsockname(sockfd,
297 (struct linux_sockaddr *)name, namelen);
298 }
299
300 return SYSCALL(getsockname)(sockfd, name, namelen);
301 }
302
303 ssize_t
send(int sockfd,const void * buf,size_t len,int flags)304 send(int sockfd, const void *buf, size_t len, int flags)
305 {
306 if(is_fstack_fd(sockfd)){
307 sockfd = restore_fstack_fd(sockfd);
308 return ff_send(sockfd, buf, len, flags);
309 }
310
311 return SYSCALL(send)(sockfd, buf, len, flags);
312 }
313
314 ssize_t
sendto(int sockfd,const void * buf,size_t len,int flags,const struct sockaddr * dest_addr,socklen_t addrlen)315 sendto(int sockfd, const void *buf, size_t len, int flags,
316 const struct sockaddr *dest_addr, socklen_t addrlen)
317 {
318 if(is_fstack_fd(sockfd)){
319 sockfd = restore_fstack_fd(sockfd);
320 return ff_sendto(sockfd, buf, len, flags,
321 (struct linux_sockaddr *)dest_addr, addrlen);
322 }
323
324 return SYSCALL(sendto)(sockfd, buf, len, flags, dest_addr, addrlen);
325 }
326
327 ssize_t
sendmsg(int sockfd,const struct msghdr * msg,int flags)328 sendmsg(int sockfd, const struct msghdr *msg, int flags)
329 {
330 if(is_fstack_fd(sockfd)){
331 sockfd = restore_fstack_fd(sockfd);
332 return ff_sendmsg(sockfd, msg, flags);
333 }
334
335 return SYSCALL(sendmsg)(sockfd, msg, flags);
336 }
337
recvmsg(int sockfd,struct msghdr * msg,int flags)338 ssize_t recvmsg(int sockfd, struct msghdr *msg, int flags)
339 {
340 if(is_fstack_fd(sockfd)){
341 sockfd = restore_fstack_fd(sockfd);
342 return ff_recvmsg(sockfd, msg, flags);
343 }
344
345 return SYSCALL(recvmsg)(sockfd, msg, flags);
346 }
347
348 ssize_t
recv(int sockfd,void * buf,size_t len,int flags)349 recv(int sockfd, void *buf, size_t len, int flags)
350 {
351 if(is_fstack_fd(sockfd)){
352 sockfd = restore_fstack_fd(sockfd);
353 return ff_recv(sockfd, buf, len, flags);
354 }
355
356 return SYSCALL(recv)(sockfd, buf, len, flags);
357 }
358
359 ssize_t
__recv_chk(int fd,void * buf,size_t n,size_t buflen,int flags)360 __recv_chk (int fd, void *buf, size_t n, size_t buflen, int flags)
361 {
362 /*
363 if (n > buflen)
364 __chk_fail ();
365 */
366 return recv (fd, buf, n, flags);
367 }
368
369 int
listen(int sockfd,int backlog)370 listen(int sockfd, int backlog)
371 {
372 if(is_fstack_fd(sockfd)){
373 sockfd = restore_fstack_fd(sockfd);
374 return ff_listen(sockfd, backlog);
375 }
376
377 return SYSCALL(listen)(sockfd, backlog);
378 }
379
380 int
getsockopt(int sockfd,int level,int optname,void * optval,socklen_t * optlen)381 getsockopt(int sockfd, int level, int optname,
382 void *optval, socklen_t *optlen)
383 {
384 if(is_fstack_fd(sockfd)){
385 sockfd = restore_fstack_fd(sockfd);
386 return ff_getsockopt(sockfd, level, optname, optval, optlen);
387 }
388
389 return SYSCALL(getsockopt)(sockfd, level, optname, optval, optlen);
390 }
391
392 int
setsockopt(int sockfd,int level,int optname,const void * optval,socklen_t optlen)393 setsockopt (int sockfd, int level, int optname,
394 const void *optval, socklen_t optlen)
395 {
396 if(is_fstack_fd(sockfd)){
397 sockfd = restore_fstack_fd(sockfd);
398 return ff_setsockopt(sockfd, level, optname, optval, optlen);
399 }
400
401 return SYSCALL(setsockopt)(sockfd, level, optname, optval, optlen);
402 }
403
404 int
accept(int sockfd,struct sockaddr * addr,socklen_t * addrlen)405 accept(int sockfd, struct sockaddr *addr, socklen_t *addrlen)
406 {
407 int rc;
408 if(is_fstack_fd(sockfd)){
409 sockfd = restore_fstack_fd(sockfd);
410 rc = ff_accept(sockfd, (struct linux_sockaddr *)addr, addrlen);
411 if (rc != -1) {
412 rc = convert_fstack_fd(rc);
413 }
414
415 return rc;
416 }
417
418 return SYSCALL(accept)(sockfd, addr, addrlen);
419 }
420
421 int
accept4(int sockfd,struct sockaddr * addr,socklen_t * addrlen,int flags)422 accept4(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags)
423 {
424 int rc;
425 if(is_fstack_fd(sockfd)){
426 sockfd = restore_fstack_fd(sockfd);
427 rc = ff_accept(sockfd, (struct linux_sockaddr *)addr, addrlen);
428 if (rc != -1) {
429 rc = convert_fstack_fd(rc);
430 }
431
432 return rc;
433 }
434
435 return SYSCALL(accept4)(sockfd, addr, addrlen, flags);
436 }
437
438 int
close(int sockfd)439 close(int sockfd)
440 {
441 if(is_fstack_fd(sockfd)){
442 sockfd = restore_fstack_fd(sockfd);
443 return ff_close(sockfd);
444 }
445
446 return SYSCALL(close)(sockfd);
447 }
448
449 int
shutdown(int sockfd,int how)450 shutdown(int sockfd, int how)
451 {
452 if(is_fstack_fd(sockfd)){
453 sockfd = restore_fstack_fd(sockfd);
454 return ff_shutdown(sockfd, how);
455 }
456
457 return SYSCALL(shutdown)(sockfd, how);
458 }
459
460 ssize_t
writev(int sockfd,const struct iovec * iov,int iovcnt)461 writev(int sockfd, const struct iovec *iov, int iovcnt)
462 {
463 if(is_fstack_fd(sockfd)){
464 sockfd = restore_fstack_fd(sockfd);
465 return ff_writev(sockfd, iov, iovcnt);
466 }
467
468 return SYSCALL(writev)(sockfd, iov, iovcnt);
469 }
470
471 ssize_t
readv(int sockfd,const struct iovec * iov,int iovcnt)472 readv(int sockfd, const struct iovec *iov, int iovcnt)
473 {
474 if(is_fstack_fd(sockfd)){
475 sockfd = restore_fstack_fd(sockfd);
476 return ff_readv(sockfd, iov, iovcnt);
477 }
478
479 return SYSCALL(readv)(sockfd, iov, iovcnt);
480 }
481
482 ssize_t
read(int sockfd,void * buf,size_t count)483 read(int sockfd, void *buf, size_t count)
484 {
485 if(is_fstack_fd(sockfd)){
486 sockfd = restore_fstack_fd(sockfd);
487 return ff_read(sockfd, buf, count);
488 }
489
490 return SYSCALL(read)(sockfd, buf, count);
491 }
492
493 ssize_t
write(int sockfd,const void * buf,size_t count)494 write(int sockfd, const void *buf, size_t count)
495 {
496 if(is_fstack_fd(sockfd)){
497 sockfd = restore_fstack_fd(sockfd);
498 return ff_write(sockfd, buf, count);
499 }
500
501 return SYSCALL(write)(sockfd, buf, count);
502 }
503
504 int
ioctl(int sockfd,int request,void * p)505 ioctl(int sockfd, int request, void *p)
506 {
507 if(is_fstack_fd(sockfd)){
508 sockfd = restore_fstack_fd(sockfd);
509 return ff_ioctl(sockfd, request, p);
510 }
511
512 return SYSCALL(ioctl)(sockfd, request, p);
513 }
514
515 int
kqueue(void)516 kqueue(void)
517 {
518 return ff_kqueue();
519 }
520
521 int
kevent(int kq,const struct kevent * changelist,int nchanges,struct kevent * eventlist,int nevents,const struct timespec * timeout)522 kevent(int kq, const struct kevent *changelist, int nchanges,
523 struct kevent *eventlist, int nevents, const struct timespec *timeout)
524 {
525 struct kevent *kev;
526 int i = 0;
527 for(i = 0; i < nchanges; i++) {
528 kev = (struct kevent *)&changelist[i];
529 switch (kev->filter) {
530
531 case EVFILT_READ:
532 case EVFILT_WRITE:
533 case EVFILT_VNODE:
534 kev->ident = restore_fstack_fd(kev->ident);
535 break;
536 case EVFILT_AIO:
537 case EVFILT_PROC:
538 case EVFILT_SIGNAL:
539 case EVFILT_TIMER:
540 case EVFILT_USER:
541 default:
542 break;
543 }
544 }
545 return ff_kevent(kq, changelist, nchanges, eventlist, nevents, timeout);
546 }
547
548 int
gettimeofday(struct timeval * tv,struct timezone * tz)549 gettimeofday(struct timeval *tv, struct timezone *tz)
550 {
551 if (unlikely(inited == 0)) {
552 return SYSCALL(gettimeofday)(tv, tz);
553 }
554
555 return ff_gettimeofday(tv, tz);
556 }
557