1 /*
2  * Inspired by opendp/dpdk-nginx's ans_module.c.
3  * License of opendp:
4  *
5  BSD LICENSE
6  Copyright(c) 2015-2017 Ansyun [email protected]. All rights reserved.
7  All rights reserved.
8  Redistribution and use in source and binary forms, with or without
9  modification, are permitted provided that the following conditions
10  are met:
11 
12  Redistributions of source code must retain the above copyright
13  notice, this list of conditions and the following disclaimer.
14  Redistributions in binary form must reproduce the above copyright
15  notice, this list of conditions and the following disclaimer in
16  the documentation and/or other materials provided with the
17  distribution.
18  Neither the name of Ansyun [email protected] nor the names of its
19  contributors may be used to endorse or promote products derived
20  from this software without specific prior written permission.
21  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  Author: JiaKai ([email protected]) and Bluestar ([email protected])
33  */
34 
35 /*
36  * Copyright (C) 2017-2021 THL A29 Limited, a Tencent company.
37  * All rights reserved.
38  *
39  * Redistribution and use in source and binary forms, with or without
40  * modification, are permitted provided that the following conditions are met:
41  *
42  * 1. Redistributions of source code must retain the above copyright notice, this
43  *   list of conditions and the following disclaimer.
44  * 2. Redistributions in binary form must reproduce the above copyright notice,
45  *   this list of conditions and the following disclaimer in the documentation
46  *   and/or other materials provided with the distribution.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
50  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
51  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
52  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
53  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
54  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
55  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
56  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
57  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
58  *
59  */
60 
61 #include <stdio.h>
62 #include <stdint.h>
63 #include <string.h>
64 #include <stdlib.h>
65 #include <stdarg.h>
66 #include <errno.h>
67 #include <netinet/in.h>
68 #include <assert.h>
69 #include <unistd.h>
70 #include <sys/types.h>
71 #include <sys/socket.h>
72 #include <arpa/inet.h>
73 #include <sys/time.h>
74 
75 #include <ngx_auto_config.h>
76 #include "ff_api.h"
77 
78 #define _GNU_SOURCE
79 #define __USE_GNU
80 
81 #include <unistd.h>
82 #include <sched.h>
83 #include <sys/types.h>
84 #include <fcntl.h>
85 #include <sys/syscall.h>
86 #include <dlfcn.h>
87 #include <limits.h>
88 
89 #ifndef likely
90 #define likely(x)  __builtin_expect((x),1)
91 #endif
92 
93 #ifndef unlikely
94 #define unlikely(x)  __builtin_expect((x),0)
95 #endif
96 
97 static int (*real_close)(int);
98 static int (*real_socket)(int, int, int);
99 static int (*real_bind)(int, const struct sockaddr*, socklen_t);
100 static int (*real_connect)(int, const struct sockaddr*, socklen_t);
101 static int (*real_listen)(int, int);
102 
103 static int (*real_getsockopt)(int, int, int, void *, socklen_t*);
104 static int (*real_setsockopt)(int, int, int, const void *, socklen_t);
105 
106 static int (*real_accept)(int, struct sockaddr *, socklen_t *);
107 static int (*real_accept4)(int, struct sockaddr *, socklen_t *, int);
108 static ssize_t (*real_recv)(int, void *, size_t, int);
109 static ssize_t (*real_send)(int, const void *, size_t, int);
110 static ssize_t (*real_sendto)(int, const void *, size_t, int,
111     const struct sockaddr*, socklen_t);
112 static ssize_t (*real_sendmsg)(int, const struct msghdr*, int);
113 static ssize_t (*real_recvmsg)(int, struct msghdr *, int);
114 static ssize_t (*real_writev)(int, const struct iovec *, int);
115 static ssize_t (*real_readv)(int, const struct iovec *, int);
116 
117 static ssize_t (*real_read)(int, void *, size_t);
118 static ssize_t (*real_write)(int, const void *, size_t);
119 
120 static int (*real_shutdown)(int, int);
121 
122 static int (*real_ioctl)(int, int, void *);
123 
124 static int (*real_gettimeofday)(struct timeval *tv, struct timezone *tz);
125 
126 static int (*real_getpeername)(int sockfd, struct sockaddr * name, socklen_t *namelen);
127 static int (*real_getsockname)(int s, struct sockaddr *name, socklen_t *namelen);
128 
129 static __thread int inited;
130 
131 #define SYSCALL(func)                                       \
132     ({                                                      \
133         if (unlikely(!real_##func)) {                       \
134             real_##func = dlsym(RTLD_NEXT, #func);          \
135         }                                                   \
136         real_##func;                                        \
137     })
138 
139 extern intptr_t    ngx_max_sockets;
140 
141 /*-
142  * Make sockfd assigned by the fstack plus the value of maximum kernel socket.
143  *  so we can tell them apart according to different scopes.
144  * Solve the condominium ownership at Application Layer and obtain more freedom.
145  * fstack tried to do this by 'fd_reserve', unfortunately, it doesn't work well.
146  */
convert_fstack_fd(int sockfd)147 static inline int convert_fstack_fd(int sockfd) {
148     return sockfd + ngx_max_sockets;
149 }
150 
151 /* Restore socket fd. */
restore_fstack_fd(int sockfd)152 static inline int restore_fstack_fd(int sockfd) {
153     if(sockfd <= ngx_max_sockets) {
154         return sockfd;
155     }
156 
157     return sockfd - ngx_max_sockets;
158 }
159 
160 /* Tell whether a 'sockfd' belongs to fstack. */
is_fstack_fd(int sockfd)161 int is_fstack_fd(int sockfd) {
162     if (unlikely(inited == 0)) {
163         return 0;
164     }
165 
166     return sockfd >= ngx_max_sockets;
167 }
168 
169 // proc_type, 1: primary, 0: secondary.
170 int
ff_mod_init(const char * conf,int proc_id,int proc_type)171 ff_mod_init(const char *conf, int proc_id, int proc_type) {
172     int rc, i;
173     int ff_argc = 4;
174 
175     char **ff_argv = malloc(sizeof(char *)*ff_argc);
176     for (i = 0; i < ff_argc; i++) {
177         ff_argv[i] = malloc(sizeof(char)*PATH_MAX);
178     }
179 
180     sprintf(ff_argv[0], "nginx");
181     sprintf(ff_argv[1], "--conf=%s", conf);
182     sprintf(ff_argv[2], "--proc-id=%d", proc_id);
183     if (proc_type == 1) {
184         sprintf(ff_argv[3], "--proc-type=primary");
185     } else {
186         sprintf(ff_argv[3], "--proc-type=secondary");
187     }
188 
189     rc = ff_init(ff_argc, ff_argv);
190     if (rc == 0) {
191         /* Ensure that the socket we converted
192                 does not exceed the maximum value of 'int' */
193 
194         if(ngx_max_sockets + (unsigned)ff_getmaxfd() > INT_MAX)
195         {
196             rc = -1;
197         }
198 
199         inited = 1;
200     }
201 
202     for (i = 0; i < ff_argc; i++) {
203         free(ff_argv[i]);
204     }
205 
206     free(ff_argv);
207 
208     return rc;
209 }
210 
211 /*-
212  * Verify whether the socket is supported by fstack or not.
213  */
214 int
fstack_territory(int domain,int type,int protocol)215 fstack_territory(int domain, int type, int protocol)
216 {
217     /* Remove creation flags */
218     type &= ~SOCK_CLOEXEC;
219     type &= ~SOCK_NONBLOCK;
220     type &= ~SOCK_FSTACK;
221 
222     if ((AF_INET != domain && AF_INET6 != domain) || (SOCK_STREAM != type && SOCK_DGRAM != type)) {
223         return 0;
224     }
225 
226     return 1;
227 }
228 
229 int
socket(int domain,int type,int protocol)230 socket(int domain, int type, int protocol)
231 {
232     int sock;
233     if (unlikely(inited == 0)) {
234         return SYSCALL(socket)(domain, type, protocol);
235     }
236 
237     if (unlikely(fstack_territory(domain, type, protocol) == 0)) {
238         return SYSCALL(socket)(domain, type, protocol);
239     }
240 
241     if (unlikely((type & SOCK_FSTACK) == 0)) {
242         return SYSCALL(socket)(domain, type, protocol);
243     }
244 
245     type &= ~SOCK_FSTACK;
246     sock = ff_socket(domain, type, protocol);
247 
248     if (sock != -1) {
249         sock = convert_fstack_fd(sock);
250     }
251 
252     return sock;
253 }
254 
255 int
bind(int sockfd,const struct sockaddr * addr,socklen_t addrlen)256 bind(int sockfd, const struct sockaddr *addr, socklen_t addrlen)
257 {
258     if(is_fstack_fd(sockfd)){
259         sockfd = restore_fstack_fd(sockfd);
260         return ff_bind(sockfd, (struct linux_sockaddr *)addr, addrlen);
261     }
262 
263     return SYSCALL(bind)(sockfd, addr, addrlen);
264 }
265 
266 int
connect(int sockfd,const struct sockaddr * addr,socklen_t addrlen)267 connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen)
268 {
269     if(is_fstack_fd(sockfd)){
270         sockfd = restore_fstack_fd(sockfd);
271         return ff_connect(sockfd, (struct linux_sockaddr *)addr, addrlen);
272     }
273 
274     return SYSCALL(connect)(sockfd, addr, addrlen);
275 }
276 
277 int
getpeername(int sockfd,struct sockaddr * name,socklen_t * namelen)278 getpeername(int sockfd, struct sockaddr * name,
279     socklen_t *namelen)
280 {
281     if(is_fstack_fd(sockfd)){
282         sockfd = restore_fstack_fd(sockfd);
283         return ff_getpeername(sockfd,
284             (struct linux_sockaddr *)name, namelen);
285     }
286 
287     return SYSCALL(getpeername)(sockfd, name, namelen);
288 }
289 
290 int
getsockname(int sockfd,struct sockaddr * name,socklen_t * namelen)291 getsockname(int sockfd, struct sockaddr *name,
292     socklen_t *namelen)
293 {
294     if(is_fstack_fd(sockfd)){
295         sockfd = restore_fstack_fd(sockfd);
296         return ff_getsockname(sockfd,
297             (struct linux_sockaddr *)name, namelen);
298     }
299 
300     return SYSCALL(getsockname)(sockfd, name, namelen);
301 }
302 
303 ssize_t
send(int sockfd,const void * buf,size_t len,int flags)304 send(int sockfd, const void *buf, size_t len, int flags)
305 {
306     if(is_fstack_fd(sockfd)){
307         sockfd = restore_fstack_fd(sockfd);
308         return ff_send(sockfd, buf, len, flags);
309     }
310 
311     return SYSCALL(send)(sockfd, buf, len, flags);
312 }
313 
314 ssize_t
sendto(int sockfd,const void * buf,size_t len,int flags,const struct sockaddr * dest_addr,socklen_t addrlen)315 sendto(int sockfd, const void *buf, size_t len, int flags,
316     const struct sockaddr *dest_addr, socklen_t addrlen)
317 {
318     if(is_fstack_fd(sockfd)){
319         sockfd = restore_fstack_fd(sockfd);
320         return ff_sendto(sockfd, buf, len, flags,
321             (struct linux_sockaddr *)dest_addr, addrlen);
322     }
323 
324     return SYSCALL(sendto)(sockfd, buf, len, flags, dest_addr, addrlen);
325 }
326 
327 ssize_t
sendmsg(int sockfd,const struct msghdr * msg,int flags)328 sendmsg(int sockfd, const struct msghdr *msg, int flags)
329 {
330     if(is_fstack_fd(sockfd)){
331         sockfd = restore_fstack_fd(sockfd);
332         return ff_sendmsg(sockfd, msg, flags);
333     }
334 
335     return SYSCALL(sendmsg)(sockfd, msg, flags);
336 }
337 
recvmsg(int sockfd,struct msghdr * msg,int flags)338 ssize_t recvmsg(int sockfd, struct msghdr *msg, int flags)
339 {
340     if(is_fstack_fd(sockfd)){
341         sockfd = restore_fstack_fd(sockfd);
342         return ff_recvmsg(sockfd, msg, flags);
343     }
344 
345     return SYSCALL(recvmsg)(sockfd, msg, flags);
346 }
347 
348 ssize_t
recv(int sockfd,void * buf,size_t len,int flags)349 recv(int sockfd, void *buf, size_t len, int flags)
350 {
351     if(is_fstack_fd(sockfd)){
352         sockfd = restore_fstack_fd(sockfd);
353         return ff_recv(sockfd, buf, len, flags);
354     }
355 
356     return SYSCALL(recv)(sockfd, buf, len, flags);
357 }
358 
359 ssize_t
__recv_chk(int fd,void * buf,size_t n,size_t buflen,int flags)360 __recv_chk (int fd, void *buf, size_t n, size_t buflen, int flags)
361 {
362 /*
363   if (n > buflen)
364     __chk_fail ();
365 */
366   return recv (fd, buf, n, flags);
367 }
368 
369 int
listen(int sockfd,int backlog)370 listen(int sockfd, int backlog)
371 {
372     if(is_fstack_fd(sockfd)){
373         sockfd = restore_fstack_fd(sockfd);
374         return ff_listen(sockfd, backlog);
375     }
376 
377     return SYSCALL(listen)(sockfd, backlog);
378 }
379 
380 int
getsockopt(int sockfd,int level,int optname,void * optval,socklen_t * optlen)381 getsockopt(int sockfd, int level, int optname,
382     void *optval, socklen_t *optlen)
383 {
384     if(is_fstack_fd(sockfd)){
385         sockfd = restore_fstack_fd(sockfd);
386         return ff_getsockopt(sockfd, level, optname, optval, optlen);
387     }
388 
389     return SYSCALL(getsockopt)(sockfd, level, optname, optval, optlen);
390 }
391 
392 int
setsockopt(int sockfd,int level,int optname,const void * optval,socklen_t optlen)393 setsockopt (int sockfd, int level, int optname,
394     const void *optval, socklen_t optlen)
395 {
396     if(is_fstack_fd(sockfd)){
397         sockfd = restore_fstack_fd(sockfd);
398         return ff_setsockopt(sockfd, level, optname, optval, optlen);
399     }
400 
401     return SYSCALL(setsockopt)(sockfd, level, optname, optval, optlen);
402 }
403 
404 int
accept(int sockfd,struct sockaddr * addr,socklen_t * addrlen)405 accept(int sockfd, struct sockaddr *addr, socklen_t *addrlen)
406 {
407     int rc;
408     if(is_fstack_fd(sockfd)){
409         sockfd = restore_fstack_fd(sockfd);
410         rc = ff_accept(sockfd, (struct linux_sockaddr *)addr, addrlen);
411         if (rc != -1) {
412             rc = convert_fstack_fd(rc);
413         }
414 
415         return rc;
416     }
417 
418     return SYSCALL(accept)(sockfd, addr, addrlen);
419 }
420 
421 int
accept4(int sockfd,struct sockaddr * addr,socklen_t * addrlen,int flags)422 accept4(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags)
423 {
424     int rc;
425     if(is_fstack_fd(sockfd)){
426         sockfd = restore_fstack_fd(sockfd);
427         rc = ff_accept(sockfd, (struct linux_sockaddr *)addr, addrlen);
428         if (rc != -1) {
429             rc = convert_fstack_fd(rc);
430         }
431 
432         return rc;
433     }
434 
435     return SYSCALL(accept4)(sockfd, addr, addrlen, flags);
436 }
437 
438 int
close(int sockfd)439 close(int sockfd)
440 {
441     if(is_fstack_fd(sockfd)){
442         sockfd = restore_fstack_fd(sockfd);
443         return ff_close(sockfd);
444     }
445 
446     return SYSCALL(close)(sockfd);
447 }
448 
449 int
shutdown(int sockfd,int how)450 shutdown(int sockfd, int how)
451 {
452     if(is_fstack_fd(sockfd)){
453         sockfd = restore_fstack_fd(sockfd);
454         return ff_shutdown(sockfd, how);
455     }
456 
457     return SYSCALL(shutdown)(sockfd, how);
458 }
459 
460 ssize_t
writev(int sockfd,const struct iovec * iov,int iovcnt)461 writev(int sockfd, const struct iovec *iov, int iovcnt)
462 {
463     if(is_fstack_fd(sockfd)){
464         sockfd = restore_fstack_fd(sockfd);
465         return ff_writev(sockfd, iov, iovcnt);
466     }
467 
468     return SYSCALL(writev)(sockfd, iov, iovcnt);
469 }
470 
471 ssize_t
readv(int sockfd,const struct iovec * iov,int iovcnt)472 readv(int sockfd, const struct iovec *iov, int iovcnt)
473 {
474     if(is_fstack_fd(sockfd)){
475         sockfd = restore_fstack_fd(sockfd);
476         return ff_readv(sockfd, iov, iovcnt);
477     }
478 
479     return SYSCALL(readv)(sockfd, iov, iovcnt);
480 }
481 
482 ssize_t
read(int sockfd,void * buf,size_t count)483 read(int sockfd, void *buf, size_t count)
484 {
485     if(is_fstack_fd(sockfd)){
486         sockfd = restore_fstack_fd(sockfd);
487         return ff_read(sockfd, buf, count);
488     }
489 
490     return SYSCALL(read)(sockfd, buf, count);
491 }
492 
493 ssize_t
write(int sockfd,const void * buf,size_t count)494 write(int sockfd, const void *buf, size_t count)
495 {
496     if(is_fstack_fd(sockfd)){
497         sockfd = restore_fstack_fd(sockfd);
498         return ff_write(sockfd, buf, count);
499     }
500 
501     return SYSCALL(write)(sockfd, buf, count);
502 }
503 
504 int
ioctl(int sockfd,int request,void * p)505 ioctl(int sockfd, int request, void *p)
506 {
507     if(is_fstack_fd(sockfd)){
508         sockfd = restore_fstack_fd(sockfd);
509         return ff_ioctl(sockfd, request, p);
510     }
511 
512     return SYSCALL(ioctl)(sockfd, request, p);
513 }
514 
515 int
kqueue(void)516 kqueue(void)
517 {
518     return ff_kqueue();
519 }
520 
521 int
kevent(int kq,const struct kevent * changelist,int nchanges,struct kevent * eventlist,int nevents,const struct timespec * timeout)522 kevent(int kq, const struct kevent *changelist, int nchanges,
523     struct kevent *eventlist, int nevents, const struct timespec *timeout)
524 {
525     struct kevent     *kev;
526     int                i = 0;
527     for(i = 0; i < nchanges; i++) {
528         kev = (struct kevent *)&changelist[i];
529         switch (kev->filter) {
530 
531         case EVFILT_READ:
532         case EVFILT_WRITE:
533         case EVFILT_VNODE:
534             kev->ident = restore_fstack_fd(kev->ident);
535             break;
536         case EVFILT_AIO:
537         case EVFILT_PROC:
538         case EVFILT_SIGNAL:
539         case EVFILT_TIMER:
540         case EVFILT_USER:
541         default:
542             break;
543         }
544     }
545     return ff_kevent(kq, changelist, nchanges, eventlist, nevents, timeout);
546 }
547 
548 int
gettimeofday(struct timeval * tv,struct timezone * tz)549 gettimeofday(struct timeval *tv, struct timezone *tz)
550 {
551     if (unlikely(inited == 0)) {
552         return SYSCALL(gettimeofday)(tv, tz);
553     }
554 
555     return ff_gettimeofday(tv, tz);
556 }
557