1 /*
2  * services/listen_dnsport.c - listen on port 53 for incoming DNS queries.
3  *
4  * Copyright (c) 2007, NLnet Labs. All rights reserved.
5  *
6  * This software is open source.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  *
15  * Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  *
19  * Neither the name of the NLNET LABS nor the names of its contributors may
20  * be used to endorse or promote products derived from this software without
21  * specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35 
36 /**
37  * \file
38  *
39  * This file has functions to get queries from clients.
40  */
41 #include "config.h"
42 #ifdef HAVE_SYS_TYPES_H
43 #  include <sys/types.h>
44 #endif
45 #include <sys/time.h>
46 #include <limits.h>
47 #ifdef USE_TCP_FASTOPEN
48 #include <netinet/tcp.h>
49 #endif
50 #include "services/listen_dnsport.h"
51 #include "services/outside_network.h"
52 #include "util/netevent.h"
53 #include "util/log.h"
54 #include "util/config_file.h"
55 #include "util/net_help.h"
56 #include "sldns/sbuffer.h"
57 #include "sldns/parseutil.h"
58 #include "services/mesh.h"
59 #include "util/fptr_wlist.h"
60 #include "util/locks.h"
61 
62 #ifdef HAVE_NETDB_H
63 #include <netdb.h>
64 #endif
65 #include <fcntl.h>
66 
67 #ifdef HAVE_SYS_UN_H
68 #include <sys/un.h>
69 #endif
70 
71 #ifdef HAVE_SYSTEMD
72 #include <systemd/sd-daemon.h>
73 #endif
74 
75 #ifdef HAVE_IFADDRS_H
76 #include <ifaddrs.h>
77 #endif
78 #ifdef HAVE_NET_IF_H
79 #include <net/if.h>
80 #endif
81 
82 /** number of queued TCP connections for listen() */
83 #define TCP_BACKLOG 256
84 
85 #ifndef THREADS_DISABLED
86 /** lock on the counter of stream buffer memory */
87 static lock_basic_type stream_wait_count_lock;
88 /** lock on the counter of HTTP2 query buffer memory */
89 static lock_basic_type http2_query_buffer_count_lock;
90 /** lock on the counter of HTTP2 response buffer memory */
91 static lock_basic_type http2_response_buffer_count_lock;
92 #endif
93 /** size (in bytes) of stream wait buffers */
94 static size_t stream_wait_count = 0;
95 /** is the lock initialised for stream wait buffers */
96 static int stream_wait_lock_inited = 0;
97 /** size (in bytes) of HTTP2 query buffers */
98 static size_t http2_query_buffer_count = 0;
99 /** is the lock initialised for HTTP2 query buffers */
100 static int http2_query_buffer_lock_inited = 0;
101 /** size (in bytes) of HTTP2 response buffers */
102 static size_t http2_response_buffer_count = 0;
103 /** is the lock initialised for HTTP2 response buffers */
104 static int http2_response_buffer_lock_inited = 0;
105 
106 /**
107  * Debug print of the getaddrinfo returned address.
108  * @param addr: the address returned.
109  */
110 static void
verbose_print_addr(struct addrinfo * addr)111 verbose_print_addr(struct addrinfo *addr)
112 {
113 	if(verbosity >= VERB_ALGO) {
114 		char buf[100];
115 		void* sinaddr = &((struct sockaddr_in*)addr->ai_addr)->sin_addr;
116 #ifdef INET6
117 		if(addr->ai_family == AF_INET6)
118 			sinaddr = &((struct sockaddr_in6*)addr->ai_addr)->
119 				sin6_addr;
120 #endif /* INET6 */
121 		if(inet_ntop(addr->ai_family, sinaddr, buf,
122 			(socklen_t)sizeof(buf)) == 0) {
123 			(void)strlcpy(buf, "(null)", sizeof(buf));
124 		}
125 		buf[sizeof(buf)-1] = 0;
126 		verbose(VERB_ALGO, "creating %s%s socket %s %d",
127 			addr->ai_socktype==SOCK_DGRAM?"udp":
128 			addr->ai_socktype==SOCK_STREAM?"tcp":"otherproto",
129 			addr->ai_family==AF_INET?"4":
130 			addr->ai_family==AF_INET6?"6":
131 			"_otherfam", buf,
132 			ntohs(((struct sockaddr_in*)addr->ai_addr)->sin_port));
133 	}
134 }
135 
136 void
verbose_print_unbound_socket(struct unbound_socket * ub_sock)137 verbose_print_unbound_socket(struct unbound_socket* ub_sock)
138 {
139 	if(verbosity >= VERB_ALGO) {
140 		log_info("listing of unbound_socket structure:");
141 		verbose_print_addr(ub_sock->addr);
142 		log_info("s is: %d, fam is: %s", ub_sock->s, ub_sock->fam == AF_INET?"AF_INET":"AF_INET6");
143 	}
144 }
145 
146 #ifdef HAVE_SYSTEMD
147 static int
systemd_get_activated(int family,int socktype,int listen,struct sockaddr * addr,socklen_t addrlen,const char * path)148 systemd_get_activated(int family, int socktype, int listen,
149 		      struct sockaddr *addr, socklen_t addrlen,
150 		      const char *path)
151 {
152 	int i = 0;
153 	int r = 0;
154 	int s = -1;
155 	const char* listen_pid, *listen_fds;
156 
157 	/* We should use "listen" option only for stream protocols. For UDP it should be -1 */
158 
159 	if((r = sd_booted()) < 1) {
160 		if(r == 0)
161 			log_warn("systemd is not running");
162 		else
163 			log_err("systemd sd_booted(): %s", strerror(-r));
164 		return -1;
165 	}
166 
167 	listen_pid = getenv("LISTEN_PID");
168 	listen_fds = getenv("LISTEN_FDS");
169 
170 	if (!listen_pid) {
171 		log_warn("Systemd mandatory ENV variable is not defined: LISTEN_PID");
172 		return -1;
173 	}
174 
175 	if (!listen_fds) {
176 		log_warn("Systemd mandatory ENV variable is not defined: LISTEN_FDS");
177 		return -1;
178 	}
179 
180 	if((r = sd_listen_fds(0)) < 1) {
181 		if(r == 0)
182 			log_warn("systemd: did not return socket, check unit configuration");
183 		else
184 			log_err("systemd sd_listen_fds(): %s", strerror(-r));
185 		return -1;
186 	}
187 
188 	for(i = 0; i < r; i++) {
189 		if(sd_is_socket(SD_LISTEN_FDS_START + i, family, socktype, listen)) {
190 			s = SD_LISTEN_FDS_START + i;
191 			break;
192 		}
193 	}
194 	if (s == -1) {
195 		if (addr)
196 			log_err_addr("systemd sd_listen_fds()",
197 				     "no such socket",
198 				     (struct sockaddr_storage *)addr, addrlen);
199 		else
200 			log_err("systemd sd_listen_fds(): %s", path);
201 	}
202 	return s;
203 }
204 #endif
205 
206 int
create_udp_sock(int family,int socktype,struct sockaddr * addr,socklen_t addrlen,int v6only,int * inuse,int * noproto,int rcv,int snd,int listen,int * reuseport,int transparent,int freebind,int use_systemd,int dscp)207 create_udp_sock(int family, int socktype, struct sockaddr* addr,
208         socklen_t addrlen, int v6only, int* inuse, int* noproto,
209 	int rcv, int snd, int listen, int* reuseport, int transparent,
210 	int freebind, int use_systemd, int dscp)
211 {
212 	int s;
213 	char* err;
214 #if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_USE_MIN_MTU)  || defined(IP_TRANSPARENT) || defined(IP_BINDANY) || defined(IP_FREEBIND) || defined (SO_BINDANY)
215 	int on=1;
216 #endif
217 #ifdef IPV6_MTU
218 	int mtu = IPV6_MIN_MTU;
219 #endif
220 #if !defined(SO_RCVBUFFORCE) && !defined(SO_RCVBUF)
221 	(void)rcv;
222 #endif
223 #if !defined(SO_SNDBUFFORCE) && !defined(SO_SNDBUF)
224 	(void)snd;
225 #endif
226 #ifndef IPV6_V6ONLY
227 	(void)v6only;
228 #endif
229 #if !defined(IP_TRANSPARENT) && !defined(IP_BINDANY) && !defined(SO_BINDANY)
230 	(void)transparent;
231 #endif
232 #if !defined(IP_FREEBIND)
233 	(void)freebind;
234 #endif
235 #ifdef HAVE_SYSTEMD
236 	int got_fd_from_systemd = 0;
237 
238 	if (!use_systemd
239 	    || (use_systemd
240 		&& (s = systemd_get_activated(family, socktype, -1, addr,
241 					      addrlen, NULL)) == -1)) {
242 #else
243 	(void)use_systemd;
244 #endif
245 	if((s = socket(family, socktype, 0)) == -1) {
246 		*inuse = 0;
247 #ifndef USE_WINSOCK
248 		if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
249 			*noproto = 1;
250 			return -1;
251 		}
252 #else
253 		if(WSAGetLastError() == WSAEAFNOSUPPORT ||
254 			WSAGetLastError() == WSAEPROTONOSUPPORT) {
255 			*noproto = 1;
256 			return -1;
257 		}
258 #endif
259 		log_err("can't create socket: %s", sock_strerror(errno));
260 		*noproto = 0;
261 		return -1;
262 	}
263 #ifdef HAVE_SYSTEMD
264 	} else {
265 		got_fd_from_systemd = 1;
266 	}
267 #endif
268 	if(listen) {
269 #ifdef SO_REUSEADDR
270 		if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on,
271 			(socklen_t)sizeof(on)) < 0) {
272 			log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
273 				sock_strerror(errno));
274 #ifndef USE_WINSOCK
275 			if(errno != ENOSYS) {
276 				close(s);
277 				*noproto = 0;
278 				*inuse = 0;
279 				return -1;
280 			}
281 #else
282 			closesocket(s);
283 			*noproto = 0;
284 			*inuse = 0;
285 			return -1;
286 #endif
287 		}
288 #endif /* SO_REUSEADDR */
289 #ifdef SO_REUSEPORT
290 #  ifdef SO_REUSEPORT_LB
291 		/* on FreeBSD 12 we have SO_REUSEPORT_LB that does loadbalance
292 		 * like SO_REUSEPORT on Linux.  This is what the users want
293 		 * with the config option in unbound.conf; if we actually
294 		 * need local address and port reuse they'll also need to
295 		 * have SO_REUSEPORT set for them, assume it was _LB they want.
296 		 */
297 		if (reuseport && *reuseport &&
298 		    setsockopt(s, SOL_SOCKET, SO_REUSEPORT_LB, (void*)&on,
299 			(socklen_t)sizeof(on)) < 0) {
300 #ifdef ENOPROTOOPT
301 			if(errno != ENOPROTOOPT || verbosity >= 3)
302 				log_warn("setsockopt(.. SO_REUSEPORT_LB ..) failed: %s",
303 					strerror(errno));
304 #endif
305 			/* this option is not essential, we can continue */
306 			*reuseport = 0;
307 		}
308 #  else /* no SO_REUSEPORT_LB */
309 
310 		/* try to set SO_REUSEPORT so that incoming
311 		 * queries are distributed evenly among the receiving threads.
312 		 * Each thread must have its own socket bound to the same port,
313 		 * with SO_REUSEPORT set on each socket.
314 		 */
315 		if (reuseport && *reuseport &&
316 		    setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on,
317 			(socklen_t)sizeof(on)) < 0) {
318 #ifdef ENOPROTOOPT
319 			if(errno != ENOPROTOOPT || verbosity >= 3)
320 				log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s",
321 					strerror(errno));
322 #endif
323 			/* this option is not essential, we can continue */
324 			*reuseport = 0;
325 		}
326 #  endif /* SO_REUSEPORT_LB */
327 #else
328 		(void)reuseport;
329 #endif /* defined(SO_REUSEPORT) */
330 #ifdef IP_TRANSPARENT
331 		if (transparent &&
332 		    setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on,
333 		    (socklen_t)sizeof(on)) < 0) {
334 			log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s",
335 			strerror(errno));
336 		}
337 #elif defined(IP_BINDANY)
338 		if (transparent &&
339 		    setsockopt(s, (family==AF_INET6? IPPROTO_IPV6:IPPROTO_IP),
340 		    (family == AF_INET6? IPV6_BINDANY:IP_BINDANY),
341 		    (void*)&on, (socklen_t)sizeof(on)) < 0) {
342 			log_warn("setsockopt(.. IP%s_BINDANY ..) failed: %s",
343 			(family==AF_INET6?"V6":""), strerror(errno));
344 		}
345 #elif defined(SO_BINDANY)
346 		if (transparent &&
347 		    setsockopt(s, SOL_SOCKET, SO_BINDANY, (void*)&on,
348 		    (socklen_t)sizeof(on)) < 0) {
349 			log_warn("setsockopt(.. SO_BINDANY ..) failed: %s",
350 			strerror(errno));
351 		}
352 #endif /* IP_TRANSPARENT || IP_BINDANY || SO_BINDANY */
353 	}
354 #ifdef IP_FREEBIND
355 	if(freebind &&
356 	    setsockopt(s, IPPROTO_IP, IP_FREEBIND, (void*)&on,
357 	    (socklen_t)sizeof(on)) < 0) {
358 		log_warn("setsockopt(.. IP_FREEBIND ..) failed: %s",
359 		strerror(errno));
360 	}
361 #endif /* IP_FREEBIND */
362 	if(rcv) {
363 #ifdef SO_RCVBUF
364 		int got;
365 		socklen_t slen = (socklen_t)sizeof(got);
366 #  ifdef SO_RCVBUFFORCE
367 		/* Linux specific: try to use root permission to override
368 		 * system limits on rcvbuf. The limit is stored in
369 		 * /proc/sys/net/core/rmem_max or sysctl net.core.rmem_max */
370 		if(setsockopt(s, SOL_SOCKET, SO_RCVBUFFORCE, (void*)&rcv,
371 			(socklen_t)sizeof(rcv)) < 0) {
372 			if(errno != EPERM) {
373 				log_err("setsockopt(..., SO_RCVBUFFORCE, "
374 					"...) failed: %s", sock_strerror(errno));
375 				sock_close(s);
376 				*noproto = 0;
377 				*inuse = 0;
378 				return -1;
379 			}
380 #  endif /* SO_RCVBUFFORCE */
381 			if(setsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&rcv,
382 				(socklen_t)sizeof(rcv)) < 0) {
383 				log_err("setsockopt(..., SO_RCVBUF, "
384 					"...) failed: %s", sock_strerror(errno));
385 				sock_close(s);
386 				*noproto = 0;
387 				*inuse = 0;
388 				return -1;
389 			}
390 			/* check if we got the right thing or if system
391 			 * reduced to some system max.  Warn if so */
392 			if(getsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&got,
393 				&slen) >= 0 && got < rcv/2) {
394 				log_warn("so-rcvbuf %u was not granted. "
395 					"Got %u. To fix: start with "
396 					"root permissions(linux) or sysctl "
397 					"bigger net.core.rmem_max(linux) or "
398 					"kern.ipc.maxsockbuf(bsd) values.",
399 					(unsigned)rcv, (unsigned)got);
400 			}
401 #  ifdef SO_RCVBUFFORCE
402 		}
403 #  endif
404 #endif /* SO_RCVBUF */
405 	}
406 	/* first do RCVBUF as the receive buffer is more important */
407 	if(snd) {
408 #ifdef SO_SNDBUF
409 		int got;
410 		socklen_t slen = (socklen_t)sizeof(got);
411 #  ifdef SO_SNDBUFFORCE
412 		/* Linux specific: try to use root permission to override
413 		 * system limits on sndbuf. The limit is stored in
414 		 * /proc/sys/net/core/wmem_max or sysctl net.core.wmem_max */
415 		if(setsockopt(s, SOL_SOCKET, SO_SNDBUFFORCE, (void*)&snd,
416 			(socklen_t)sizeof(snd)) < 0) {
417 			if(errno != EPERM) {
418 				log_err("setsockopt(..., SO_SNDBUFFORCE, "
419 					"...) failed: %s", sock_strerror(errno));
420 				sock_close(s);
421 				*noproto = 0;
422 				*inuse = 0;
423 				return -1;
424 			}
425 #  endif /* SO_SNDBUFFORCE */
426 			if(setsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&snd,
427 				(socklen_t)sizeof(snd)) < 0) {
428 				log_err("setsockopt(..., SO_SNDBUF, "
429 					"...) failed: %s", sock_strerror(errno));
430 				sock_close(s);
431 				*noproto = 0;
432 				*inuse = 0;
433 				return -1;
434 			}
435 			/* check if we got the right thing or if system
436 			 * reduced to some system max.  Warn if so */
437 			if(getsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&got,
438 				&slen) >= 0 && got < snd/2) {
439 				log_warn("so-sndbuf %u was not granted. "
440 					"Got %u. To fix: start with "
441 					"root permissions(linux) or sysctl "
442 					"bigger net.core.wmem_max(linux) or "
443 					"kern.ipc.maxsockbuf(bsd) values.",
444 					(unsigned)snd, (unsigned)got);
445 			}
446 #  ifdef SO_SNDBUFFORCE
447 		}
448 #  endif
449 #endif /* SO_SNDBUF */
450 	}
451 	err = set_ip_dscp(s, family, dscp);
452 	if(err != NULL)
453 		log_warn("error setting IP DiffServ codepoint %d on UDP socket: %s", dscp, err);
454 	if(family == AF_INET6) {
455 # if defined(IPV6_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT)
456 		int omit6_set = 0;
457 		int action;
458 # endif
459 # if defined(IPV6_V6ONLY)
460 		if(v6only) {
461 			int val=(v6only==2)?0:1;
462 			if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY,
463 				(void*)&val, (socklen_t)sizeof(val)) < 0) {
464 				log_err("setsockopt(..., IPV6_V6ONLY"
465 					", ...) failed: %s", sock_strerror(errno));
466 				sock_close(s);
467 				*noproto = 0;
468 				*inuse = 0;
469 				return -1;
470 			}
471 		}
472 # endif
473 # if defined(IPV6_USE_MIN_MTU)
474 		/*
475 		 * There is no fragmentation of IPv6 datagrams
476 		 * during forwarding in the network. Therefore
477 		 * we do not send UDP datagrams larger than
478 		 * the minimum IPv6 MTU of 1280 octets. The
479 		 * EDNS0 message length can be larger if the
480 		 * network stack supports IPV6_USE_MIN_MTU.
481 		 */
482 		if (setsockopt(s, IPPROTO_IPV6, IPV6_USE_MIN_MTU,
483 			(void*)&on, (socklen_t)sizeof(on)) < 0) {
484 			log_err("setsockopt(..., IPV6_USE_MIN_MTU, "
485 				"...) failed: %s", sock_strerror(errno));
486 			sock_close(s);
487 			*noproto = 0;
488 			*inuse = 0;
489 			return -1;
490 		}
491 # elif defined(IPV6_MTU)
492 		/*
493 		 * On Linux, to send no larger than 1280, the PMTUD is
494 		 * disabled by default for datagrams anyway, so we set
495 		 * the MTU to use.
496 		 */
497 		if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU,
498 			(void*)&mtu, (socklen_t)sizeof(mtu)) < 0) {
499 			log_err("setsockopt(..., IPV6_MTU, ...) failed: %s",
500 				sock_strerror(errno));
501 			sock_close(s);
502 			*noproto = 0;
503 			*inuse = 0;
504 			return -1;
505 		}
506 # endif /* IPv6 MTU */
507 # if defined(IPV6_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT)
508 #  if defined(IP_PMTUDISC_OMIT)
509 		action = IP_PMTUDISC_OMIT;
510 		if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU_DISCOVER,
511 			&action, (socklen_t)sizeof(action)) < 0) {
512 
513 			if (errno != EINVAL) {
514 				log_err("setsockopt(..., IPV6_MTU_DISCOVER, IP_PMTUDISC_OMIT...) failed: %s",
515 					strerror(errno));
516 				sock_close(s);
517 				*noproto = 0;
518 				*inuse = 0;
519 				return -1;
520 			}
521 		}
522 		else
523 		{
524 		    omit6_set = 1;
525 		}
526 #  endif
527 		if (omit6_set == 0) {
528 			action = IP_PMTUDISC_DONT;
529 			if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU_DISCOVER,
530 				&action, (socklen_t)sizeof(action)) < 0) {
531 				log_err("setsockopt(..., IPV6_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s",
532 					strerror(errno));
533 				sock_close(s);
534 				*noproto = 0;
535 				*inuse = 0;
536 				return -1;
537 			}
538 		}
539 # endif /* IPV6_MTU_DISCOVER */
540 	} else if(family == AF_INET) {
541 #  if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT)
542 /* linux 3.15 has IP_PMTUDISC_OMIT, Hannes Frederic Sowa made it so that
543  * PMTU information is not accepted, but fragmentation is allowed
544  * if and only if the packet size exceeds the outgoing interface MTU
545  * (and also uses the interface mtu to determine the size of the packets).
546  * So there won't be any EMSGSIZE error.  Against DNS fragmentation attacks.
547  * FreeBSD already has same semantics without setting the option. */
548 		int omit_set = 0;
549 		int action;
550 #   if defined(IP_PMTUDISC_OMIT)
551 		action = IP_PMTUDISC_OMIT;
552 		if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER,
553 			&action, (socklen_t)sizeof(action)) < 0) {
554 
555 			if (errno != EINVAL) {
556 				log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_OMIT...) failed: %s",
557 					strerror(errno));
558 				sock_close(s);
559 				*noproto = 0;
560 				*inuse = 0;
561 				return -1;
562 			}
563 		}
564 		else
565 		{
566 		    omit_set = 1;
567 		}
568 #   endif
569 		if (omit_set == 0) {
570    			action = IP_PMTUDISC_DONT;
571 			if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER,
572 				&action, (socklen_t)sizeof(action)) < 0) {
573 				log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s",
574 					strerror(errno));
575 				sock_close(s);
576 				*noproto = 0;
577 				*inuse = 0;
578 				return -1;
579 			}
580 		}
581 #  elif defined(IP_DONTFRAG) && !defined(__APPLE__)
582 		/* the IP_DONTFRAG option if defined in the 11.0 OSX headers,
583 		 * but does not work on that version, so we exclude it */
584 		int off = 0;
585 		if (setsockopt(s, IPPROTO_IP, IP_DONTFRAG,
586 			&off, (socklen_t)sizeof(off)) < 0) {
587 			log_err("setsockopt(..., IP_DONTFRAG, ...) failed: %s",
588 				strerror(errno));
589 			sock_close(s);
590 			*noproto = 0;
591 			*inuse = 0;
592 			return -1;
593 		}
594 #  endif /* IPv4 MTU */
595 	}
596 	if(
597 #ifdef HAVE_SYSTEMD
598 		!got_fd_from_systemd &&
599 #endif
600 		bind(s, (struct sockaddr*)addr, addrlen) != 0) {
601 		*noproto = 0;
602 		*inuse = 0;
603 #ifndef USE_WINSOCK
604 #ifdef EADDRINUSE
605 		*inuse = (errno == EADDRINUSE);
606 		/* detect freebsd jail with no ipv6 permission */
607 		if(family==AF_INET6 && errno==EINVAL)
608 			*noproto = 1;
609 		else if(errno != EADDRINUSE &&
610 			!(errno == EACCES && verbosity < 4 && !listen)
611 #ifdef EADDRNOTAVAIL
612 			&& !(errno == EADDRNOTAVAIL && verbosity < 4 && !listen)
613 #endif
614 			) {
615 			log_err_addr("can't bind socket", strerror(errno),
616 				(struct sockaddr_storage*)addr, addrlen);
617 		}
618 #endif /* EADDRINUSE */
619 #else /* USE_WINSOCK */
620 		if(WSAGetLastError() != WSAEADDRINUSE &&
621 			WSAGetLastError() != WSAEADDRNOTAVAIL &&
622 			!(WSAGetLastError() == WSAEACCES && verbosity < 4 && !listen)) {
623 			log_err_addr("can't bind socket",
624 				wsa_strerror(WSAGetLastError()),
625 				(struct sockaddr_storage*)addr, addrlen);
626 		}
627 #endif /* USE_WINSOCK */
628 		sock_close(s);
629 		return -1;
630 	}
631 	if(!fd_set_nonblock(s)) {
632 		*noproto = 0;
633 		*inuse = 0;
634 		sock_close(s);
635 		return -1;
636 	}
637 	return s;
638 }
639 
640 int
create_tcp_accept_sock(struct addrinfo * addr,int v6only,int * noproto,int * reuseport,int transparent,int mss,int nodelay,int freebind,int use_systemd,int dscp)641 create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto,
642 	int* reuseport, int transparent, int mss, int nodelay, int freebind,
643 	int use_systemd, int dscp)
644 {
645 	int s;
646 	char* err;
647 #if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_V6ONLY) || defined(IP_TRANSPARENT) || defined(IP_BINDANY) || defined(IP_FREEBIND) || defined(SO_BINDANY)
648 	int on = 1;
649 #endif
650 #ifdef HAVE_SYSTEMD
651 	int got_fd_from_systemd = 0;
652 #endif
653 #ifdef USE_TCP_FASTOPEN
654 	int qlen;
655 #endif
656 #if !defined(IP_TRANSPARENT) && !defined(IP_BINDANY) && !defined(SO_BINDANY)
657 	(void)transparent;
658 #endif
659 #if !defined(IP_FREEBIND)
660 	(void)freebind;
661 #endif
662 	verbose_print_addr(addr);
663 	*noproto = 0;
664 #ifdef HAVE_SYSTEMD
665 	if (!use_systemd ||
666 	    (use_systemd
667 	     && (s = systemd_get_activated(addr->ai_family, addr->ai_socktype, 1,
668 					   addr->ai_addr, addr->ai_addrlen,
669 					   NULL)) == -1)) {
670 #else
671 	(void)use_systemd;
672 #endif
673 	if((s = socket(addr->ai_family, addr->ai_socktype, 0)) == -1) {
674 #ifndef USE_WINSOCK
675 		if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
676 			*noproto = 1;
677 			return -1;
678 		}
679 #else
680 		if(WSAGetLastError() == WSAEAFNOSUPPORT ||
681 			WSAGetLastError() == WSAEPROTONOSUPPORT) {
682 			*noproto = 1;
683 			return -1;
684 		}
685 #endif
686 		log_err("can't create socket: %s", sock_strerror(errno));
687 		return -1;
688 	}
689 	if(nodelay) {
690 #if defined(IPPROTO_TCP) && defined(TCP_NODELAY)
691 		if(setsockopt(s, IPPROTO_TCP, TCP_NODELAY, (void*)&on,
692 			(socklen_t)sizeof(on)) < 0) {
693 			#ifndef USE_WINSOCK
694 			log_err(" setsockopt(.. TCP_NODELAY ..) failed: %s",
695 				strerror(errno));
696 			#else
697 			log_err(" setsockopt(.. TCP_NODELAY ..) failed: %s",
698 				wsa_strerror(WSAGetLastError()));
699 			#endif
700 		}
701 #else
702 		log_warn(" setsockopt(TCP_NODELAY) unsupported");
703 #endif /* defined(IPPROTO_TCP) && defined(TCP_NODELAY) */
704 	}
705 	if (mss > 0) {
706 #if defined(IPPROTO_TCP) && defined(TCP_MAXSEG)
707 		if(setsockopt(s, IPPROTO_TCP, TCP_MAXSEG, (void*)&mss,
708 			(socklen_t)sizeof(mss)) < 0) {
709 			log_err(" setsockopt(.. TCP_MAXSEG ..) failed: %s",
710 				sock_strerror(errno));
711 		} else {
712 			verbose(VERB_ALGO,
713 				" tcp socket mss set to %d", mss);
714 		}
715 #else
716 		log_warn(" setsockopt(TCP_MAXSEG) unsupported");
717 #endif /* defined(IPPROTO_TCP) && defined(TCP_MAXSEG) */
718 	}
719 #ifdef HAVE_SYSTEMD
720 	} else {
721 		got_fd_from_systemd = 1;
722     }
723 #endif
724 #ifdef SO_REUSEADDR
725 	if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on,
726 		(socklen_t)sizeof(on)) < 0) {
727 		log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
728 			sock_strerror(errno));
729 		sock_close(s);
730 		return -1;
731 	}
732 #endif /* SO_REUSEADDR */
733 #ifdef IP_FREEBIND
734 	if (freebind && setsockopt(s, IPPROTO_IP, IP_FREEBIND, (void*)&on,
735 	    (socklen_t)sizeof(on)) < 0) {
736 		log_warn("setsockopt(.. IP_FREEBIND ..) failed: %s",
737 		strerror(errno));
738 	}
739 #endif /* IP_FREEBIND */
740 #ifdef SO_REUSEPORT
741 	/* try to set SO_REUSEPORT so that incoming
742 	 * connections are distributed evenly among the receiving threads.
743 	 * Each thread must have its own socket bound to the same port,
744 	 * with SO_REUSEPORT set on each socket.
745 	 */
746 	if (reuseport && *reuseport &&
747 		setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on,
748 		(socklen_t)sizeof(on)) < 0) {
749 #ifdef ENOPROTOOPT
750 		if(errno != ENOPROTOOPT || verbosity >= 3)
751 			log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s",
752 				strerror(errno));
753 #endif
754 		/* this option is not essential, we can continue */
755 		*reuseport = 0;
756 	}
757 #else
758 	(void)reuseport;
759 #endif /* defined(SO_REUSEPORT) */
760 #if defined(IPV6_V6ONLY)
761 	if(addr->ai_family == AF_INET6 && v6only) {
762 		if(setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY,
763 			(void*)&on, (socklen_t)sizeof(on)) < 0) {
764 			log_err("setsockopt(..., IPV6_V6ONLY, ...) failed: %s",
765 				sock_strerror(errno));
766 			sock_close(s);
767 			return -1;
768 		}
769 	}
770 #else
771 	(void)v6only;
772 #endif /* IPV6_V6ONLY */
773 #ifdef IP_TRANSPARENT
774 	if (transparent &&
775 	    setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on,
776 	    (socklen_t)sizeof(on)) < 0) {
777 		log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s",
778 			strerror(errno));
779 	}
780 #elif defined(IP_BINDANY)
781 	if (transparent &&
782 	    setsockopt(s, (addr->ai_family==AF_INET6? IPPROTO_IPV6:IPPROTO_IP),
783 	    (addr->ai_family == AF_INET6? IPV6_BINDANY:IP_BINDANY),
784 	    (void*)&on, (socklen_t)sizeof(on)) < 0) {
785 		log_warn("setsockopt(.. IP%s_BINDANY ..) failed: %s",
786 		(addr->ai_family==AF_INET6?"V6":""), strerror(errno));
787 	}
788 #elif defined(SO_BINDANY)
789 	if (transparent &&
790 	    setsockopt(s, SOL_SOCKET, SO_BINDANY, (void*)&on, (socklen_t)
791 	    sizeof(on)) < 0) {
792 		log_warn("setsockopt(.. SO_BINDANY ..) failed: %s",
793 		strerror(errno));
794 	}
795 #endif /* IP_TRANSPARENT || IP_BINDANY || SO_BINDANY */
796 	err = set_ip_dscp(s, addr->ai_family, dscp);
797 	if(err != NULL)
798 		log_warn("error setting IP DiffServ codepoint %d on TCP socket: %s", dscp, err);
799 	if(
800 #ifdef HAVE_SYSTEMD
801 		!got_fd_from_systemd &&
802 #endif
803         bind(s, addr->ai_addr, addr->ai_addrlen) != 0) {
804 #ifndef USE_WINSOCK
805 		/* detect freebsd jail with no ipv6 permission */
806 		if(addr->ai_family==AF_INET6 && errno==EINVAL)
807 			*noproto = 1;
808 		else {
809 			log_err_addr("can't bind socket", strerror(errno),
810 				(struct sockaddr_storage*)addr->ai_addr,
811 				addr->ai_addrlen);
812 		}
813 #else
814 		log_err_addr("can't bind socket",
815 			wsa_strerror(WSAGetLastError()),
816 			(struct sockaddr_storage*)addr->ai_addr,
817 			addr->ai_addrlen);
818 #endif
819 		sock_close(s);
820 		return -1;
821 	}
822 	if(!fd_set_nonblock(s)) {
823 		sock_close(s);
824 		return -1;
825 	}
826 	if(listen(s, TCP_BACKLOG) == -1) {
827 		log_err("can't listen: %s", sock_strerror(errno));
828 		sock_close(s);
829 		return -1;
830 	}
831 #ifdef USE_TCP_FASTOPEN
832 	/* qlen specifies how many outstanding TFO requests to allow. Limit is a defense
833 	   against IP spoofing attacks as suggested in RFC7413 */
834 #ifdef __APPLE__
835 	/* OS X implementation only supports qlen of 1 via this call. Actual
836 	   value is configured by the net.inet.tcp.fastopen_backlog kernel parm. */
837 	qlen = 1;
838 #else
839 	/* 5 is recommended on linux */
840 	qlen = 5;
841 #endif
842 	if ((setsockopt(s, IPPROTO_TCP, TCP_FASTOPEN, &qlen,
843 		  sizeof(qlen))) == -1 ) {
844 #ifdef ENOPROTOOPT
845 		/* squelch ENOPROTOOPT: freebsd server mode with kernel support
846 		   disabled, except when verbosity enabled for debugging */
847 		if(errno != ENOPROTOOPT || verbosity >= 3) {
848 #endif
849 		  if(errno == EPERM) {
850 		  	log_warn("Setting TCP Fast Open as server failed: %s ; this could likely be because sysctl net.inet.tcp.fastopen.enabled, net.inet.tcp.fastopen.server_enable, or net.ipv4.tcp_fastopen is disabled", strerror(errno));
851 		  } else {
852 		  	log_err("Setting TCP Fast Open as server failed: %s", strerror(errno));
853 		  }
854 #ifdef ENOPROTOOPT
855 		}
856 #endif
857 	}
858 #endif
859 	return s;
860 }
861 
862 char*
set_ip_dscp(int socket,int addrfamily,int dscp)863 set_ip_dscp(int socket, int addrfamily, int dscp)
864 {
865 	int ds;
866 
867 	if(dscp == 0)
868 		return NULL;
869 	ds = dscp << 2;
870 	switch(addrfamily) {
871 	case AF_INET6:
872 	#ifdef IPV6_TCLASS
873 		if(setsockopt(socket, IPPROTO_IPV6, IPV6_TCLASS, (void*)&ds,
874 			sizeof(ds)) < 0)
875 			return sock_strerror(errno);
876 		break;
877 	#else
878 		return "IPV6_TCLASS not defined on this system";
879 	#endif
880 	default:
881 		if(setsockopt(socket, IPPROTO_IP, IP_TOS, (void*)&ds, sizeof(ds)) < 0)
882 			return sock_strerror(errno);
883 		break;
884 	}
885 	return NULL;
886 }
887 
888 int
create_local_accept_sock(const char * path,int * noproto,int use_systemd)889 create_local_accept_sock(const char *path, int* noproto, int use_systemd)
890 {
891 #ifdef HAVE_SYSTEMD
892 	int ret;
893 
894 	if (use_systemd && (ret = systemd_get_activated(AF_LOCAL, SOCK_STREAM, 1, NULL, 0, path)) != -1)
895 		return ret;
896 	else {
897 #endif
898 #ifdef HAVE_SYS_UN_H
899 	int s;
900 	struct sockaddr_un usock;
901 #ifndef HAVE_SYSTEMD
902 	(void)use_systemd;
903 #endif
904 
905 	verbose(VERB_ALGO, "creating unix socket %s", path);
906 #ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN
907 	/* this member exists on BSDs, not Linux */
908 	usock.sun_len = (unsigned)sizeof(usock);
909 #endif
910 	usock.sun_family = AF_LOCAL;
911 	/* length is 92-108, 104 on FreeBSD */
912 	(void)strlcpy(usock.sun_path, path, sizeof(usock.sun_path));
913 
914 	if ((s = socket(AF_LOCAL, SOCK_STREAM, 0)) == -1) {
915 		log_err("Cannot create local socket %s (%s)",
916 			path, strerror(errno));
917 		return -1;
918 	}
919 
920 	if (unlink(path) && errno != ENOENT) {
921 		/* The socket already exists and cannot be removed */
922 		log_err("Cannot remove old local socket %s (%s)",
923 			path, strerror(errno));
924 		goto err;
925 	}
926 
927 	if (bind(s, (struct sockaddr *)&usock,
928 		(socklen_t)sizeof(struct sockaddr_un)) == -1) {
929 		log_err("Cannot bind local socket %s (%s)",
930 			path, strerror(errno));
931 		goto err;
932 	}
933 
934 	if (!fd_set_nonblock(s)) {
935 		log_err("Cannot set non-blocking mode");
936 		goto err;
937 	}
938 
939 	if (listen(s, TCP_BACKLOG) == -1) {
940 		log_err("can't listen: %s", strerror(errno));
941 		goto err;
942 	}
943 
944 	(void)noproto; /*unused*/
945 	return s;
946 
947 err:
948 	sock_close(s);
949 	return -1;
950 
951 #ifdef HAVE_SYSTEMD
952 	}
953 #endif
954 #else
955 	(void)use_systemd;
956 	(void)path;
957 	log_err("Local sockets are not supported");
958 	*noproto = 1;
959 	return -1;
960 #endif
961 }
962 
963 
964 /**
965  * Create socket from getaddrinfo results
966  */
967 static int
make_sock(int stype,const char * ifname,const char * port,struct addrinfo * hints,int v6only,int * noip6,size_t rcv,size_t snd,int * reuseport,int transparent,int tcp_mss,int nodelay,int freebind,int use_systemd,int dscp,struct unbound_socket * ub_sock)968 make_sock(int stype, const char* ifname, const char* port,
969 	struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
970 	int* reuseport, int transparent, int tcp_mss, int nodelay, int freebind,
971 	int use_systemd, int dscp, struct unbound_socket* ub_sock)
972 {
973 	struct addrinfo *res = NULL;
974 	int r, s, inuse, noproto;
975 	hints->ai_socktype = stype;
976 	*noip6 = 0;
977 	if((r=getaddrinfo(ifname, port, hints, &res)) != 0 || !res) {
978 #ifdef USE_WINSOCK
979 		if(r == EAI_NONAME && hints->ai_family == AF_INET6){
980 			*noip6 = 1; /* 'Host not found' for IP6 on winXP */
981 			return -1;
982 		}
983 #endif
984 		log_err("node %s:%s getaddrinfo: %s %s",
985 			ifname?ifname:"default", port, gai_strerror(r),
986 #ifdef EAI_SYSTEM
987 			r==EAI_SYSTEM?(char*)strerror(errno):""
988 #else
989 			""
990 #endif
991 		);
992 		return -1;
993 	}
994 	if(stype == SOCK_DGRAM) {
995 		verbose_print_addr(res);
996 		s = create_udp_sock(res->ai_family, res->ai_socktype,
997 			(struct sockaddr*)res->ai_addr, res->ai_addrlen,
998 			v6only, &inuse, &noproto, (int)rcv, (int)snd, 1,
999 			reuseport, transparent, freebind, use_systemd, dscp);
1000 		if(s == -1 && inuse) {
1001 			log_err("bind: address already in use");
1002 		} else if(s == -1 && noproto && hints->ai_family == AF_INET6){
1003 			*noip6 = 1;
1004 		}
1005 	} else	{
1006 		s = create_tcp_accept_sock(res, v6only, &noproto, reuseport,
1007 			transparent, tcp_mss, nodelay, freebind, use_systemd,
1008 			dscp);
1009 		if(s == -1 && noproto && hints->ai_family == AF_INET6){
1010 			*noip6 = 1;
1011 		}
1012 	}
1013 
1014 	ub_sock->addr = res;
1015 	ub_sock->s = s;
1016 	ub_sock->fam = hints->ai_family;
1017 
1018 	return s;
1019 }
1020 
1021 /** make socket and first see if ifname contains port override info */
1022 static int
make_sock_port(int stype,const char * ifname,const char * port,struct addrinfo * hints,int v6only,int * noip6,size_t rcv,size_t snd,int * reuseport,int transparent,int tcp_mss,int nodelay,int freebind,int use_systemd,int dscp,struct unbound_socket * ub_sock)1023 make_sock_port(int stype, const char* ifname, const char* port,
1024 	struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
1025 	int* reuseport, int transparent, int tcp_mss, int nodelay, int freebind,
1026 	int use_systemd, int dscp, struct unbound_socket* ub_sock)
1027 {
1028 	char* s = strchr(ifname, '@');
1029 	if(s) {
1030 		/* override port with ifspec@port */
1031 		char p[16];
1032 		char newif[128];
1033 		if((size_t)(s-ifname) >= sizeof(newif)) {
1034 			log_err("ifname too long: %s", ifname);
1035 			*noip6 = 0;
1036 			return -1;
1037 		}
1038 		if(strlen(s+1) >= sizeof(p)) {
1039 			log_err("portnumber too long: %s", ifname);
1040 			*noip6 = 0;
1041 			return -1;
1042 		}
1043 		(void)strlcpy(newif, ifname, sizeof(newif));
1044 		newif[s-ifname] = 0;
1045 		(void)strlcpy(p, s+1, sizeof(p));
1046 		p[strlen(s+1)]=0;
1047 		return make_sock(stype, newif, p, hints, v6only, noip6, rcv,
1048 			snd, reuseport, transparent, tcp_mss, nodelay, freebind,
1049 			use_systemd, dscp, ub_sock);
1050 	}
1051 	return make_sock(stype, ifname, port, hints, v6only, noip6, rcv, snd,
1052 		reuseport, transparent, tcp_mss, nodelay, freebind, use_systemd,
1053 		dscp, ub_sock);
1054 }
1055 
1056 /**
1057  * Add port to open ports list.
1058  * @param list: list head. changed.
1059  * @param s: fd.
1060  * @param ftype: if fd is UDP.
1061  * @param ub_sock: socket with address.
1062  * @return false on failure. list in unchanged then.
1063  */
1064 static int
port_insert(struct listen_port ** list,int s,enum listen_type ftype,struct unbound_socket * ub_sock)1065 port_insert(struct listen_port** list, int s, enum listen_type ftype, struct unbound_socket* ub_sock)
1066 {
1067 	struct listen_port* item = (struct listen_port*)malloc(
1068 		sizeof(struct listen_port));
1069 	if(!item)
1070 		return 0;
1071 	item->next = *list;
1072 	item->fd = s;
1073 	item->ftype = ftype;
1074 	item->socket = ub_sock;
1075 	*list = item;
1076 	return 1;
1077 }
1078 
1079 /** set fd to receive source address packet info */
1080 static int
set_recvpktinfo(int s,int family)1081 set_recvpktinfo(int s, int family)
1082 {
1083 #if defined(IPV6_RECVPKTINFO) || defined(IPV6_PKTINFO) || (defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)) || defined(IP_PKTINFO)
1084 	int on = 1;
1085 #else
1086 	(void)s;
1087 #endif
1088 	if(family == AF_INET6) {
1089 #           ifdef IPV6_RECVPKTINFO
1090 		if(setsockopt(s, IPPROTO_IPV6, IPV6_RECVPKTINFO,
1091 			(void*)&on, (socklen_t)sizeof(on)) < 0) {
1092 			log_err("setsockopt(..., IPV6_RECVPKTINFO, ...) failed: %s",
1093 				strerror(errno));
1094 			return 0;
1095 		}
1096 #           elif defined(IPV6_PKTINFO)
1097 		if(setsockopt(s, IPPROTO_IPV6, IPV6_PKTINFO,
1098 			(void*)&on, (socklen_t)sizeof(on)) < 0) {
1099 			log_err("setsockopt(..., IPV6_PKTINFO, ...) failed: %s",
1100 				strerror(errno));
1101 			return 0;
1102 		}
1103 #           else
1104 		log_err("no IPV6_RECVPKTINFO and IPV6_PKTINFO options, please "
1105 			"disable interface-automatic or do-ip6 in config");
1106 		return 0;
1107 #           endif /* defined IPV6_RECVPKTINFO */
1108 
1109 	} else if(family == AF_INET) {
1110 #           ifdef IP_PKTINFO
1111 		if(setsockopt(s, IPPROTO_IP, IP_PKTINFO,
1112 			(void*)&on, (socklen_t)sizeof(on)) < 0) {
1113 			log_err("setsockopt(..., IP_PKTINFO, ...) failed: %s",
1114 				strerror(errno));
1115 			return 0;
1116 		}
1117 #           elif defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)
1118 		if(setsockopt(s, IPPROTO_IP, IP_RECVDSTADDR,
1119 			(void*)&on, (socklen_t)sizeof(on)) < 0) {
1120 			log_err("setsockopt(..., IP_RECVDSTADDR, ...) failed: %s",
1121 				strerror(errno));
1122 			return 0;
1123 		}
1124 #           else
1125 		log_err("no IP_SENDSRCADDR or IP_PKTINFO option, please disable "
1126 			"interface-automatic or do-ip4 in config");
1127 		return 0;
1128 #           endif /* IP_PKTINFO */
1129 
1130 	}
1131 	return 1;
1132 }
1133 
1134 /** see if interface is ssl, its port number == the ssl port number */
1135 static int
if_is_ssl(const char * ifname,const char * port,int ssl_port,struct config_strlist * tls_additional_port)1136 if_is_ssl(const char* ifname, const char* port, int ssl_port,
1137 	struct config_strlist* tls_additional_port)
1138 {
1139 	struct config_strlist* s;
1140 	char* p = strchr(ifname, '@');
1141 	if(!p && atoi(port) == ssl_port)
1142 		return 1;
1143 	if(p && atoi(p+1) == ssl_port)
1144 		return 1;
1145 	for(s = tls_additional_port; s; s = s->next) {
1146 		if(p && atoi(p+1) == atoi(s->str))
1147 			return 1;
1148 		if(!p && atoi(port) == atoi(s->str))
1149 			return 1;
1150 	}
1151 	return 0;
1152 }
1153 
1154 /**
1155  * Helper for ports_open. Creates one interface (or NULL for default).
1156  * @param ifname: The interface ip address.
1157  * @param do_auto: use automatic interface detection.
1158  * 	If enabled, then ifname must be the wildcard name.
1159  * @param do_udp: if udp should be used.
1160  * @param do_tcp: if udp should be used.
1161  * @param hints: for getaddrinfo. family and flags have to be set by caller.
1162  * @param port: Port number to use (as string).
1163  * @param list: list of open ports, appended to, changed to point to list head.
1164  * @param rcv: receive buffer size for UDP
1165  * @param snd: send buffer size for UDP
1166  * @param ssl_port: ssl service port number
1167  * @param tls_additional_port: list of additional ssl service port numbers.
1168  * @param https_port: DoH service port number
1169  * @param reuseport: try to set SO_REUSEPORT if nonNULL and true.
1170  * 	set to false on exit if reuseport failed due to no kernel support.
1171  * @param transparent: set IP_TRANSPARENT socket option.
1172  * @param tcp_mss: maximum segment size of tcp socket. default if zero.
1173  * @param freebind: set IP_FREEBIND socket option.
1174  * @param http2_nodelay: set TCP_NODELAY on HTTP/2 connection
1175  * @param use_systemd: if true, fetch sockets from systemd.
1176  * @param dnscrypt_port: dnscrypt service port number
1177  * @param dscp: DSCP to use.
1178  * @return: returns false on error.
1179  */
1180 static int
ports_create_if(const char * ifname,int do_auto,int do_udp,int do_tcp,struct addrinfo * hints,const char * port,struct listen_port ** list,size_t rcv,size_t snd,int ssl_port,struct config_strlist * tls_additional_port,int https_port,int * reuseport,int transparent,int tcp_mss,int freebind,int http2_nodelay,int use_systemd,int dnscrypt_port,int dscp)1181 ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp,
1182 	struct addrinfo *hints, const char* port, struct listen_port** list,
1183 	size_t rcv, size_t snd, int ssl_port,
1184 	struct config_strlist* tls_additional_port, int https_port,
1185 	int* reuseport, int transparent, int tcp_mss, int freebind,
1186 	int http2_nodelay, int use_systemd, int dnscrypt_port, int dscp)
1187 {
1188 	int s, noip6=0;
1189 	int is_https = if_is_https(ifname, port, https_port);
1190 	int nodelay = is_https && http2_nodelay;
1191 	struct unbound_socket* ub_sock;
1192 #ifdef USE_DNSCRYPT
1193 	int is_dnscrypt = ((strchr(ifname, '@') &&
1194 			atoi(strchr(ifname, '@')+1) == dnscrypt_port) ||
1195 			(!strchr(ifname, '@') && atoi(port) == dnscrypt_port));
1196 #else
1197 	int is_dnscrypt = 0;
1198 	(void)dnscrypt_port;
1199 #endif
1200 
1201 	if(!do_udp && !do_tcp)
1202 		return 0;
1203 
1204 	if(do_auto) {
1205 		ub_sock = calloc(1, sizeof(struct unbound_socket));
1206 		if(!ub_sock)
1207 			return 0;
1208 		if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1,
1209 			&noip6, rcv, snd, reuseport, transparent,
1210 			tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock)) == -1) {
1211 			freeaddrinfo(ub_sock->addr);
1212 			free(ub_sock);
1213 			if(noip6) {
1214 				log_warn("IPv6 protocol not available");
1215 				return 1;
1216 			}
1217 			return 0;
1218 		}
1219 		/* getting source addr packet info is highly non-portable */
1220 		if(!set_recvpktinfo(s, hints->ai_family)) {
1221 			sock_close(s);
1222 			freeaddrinfo(ub_sock->addr);
1223 			free(ub_sock);
1224 			return 0;
1225 		}
1226 		if(!port_insert(list, s,
1227 		   is_dnscrypt?listen_type_udpancil_dnscrypt:listen_type_udpancil, ub_sock)) {
1228 			sock_close(s);
1229 			freeaddrinfo(ub_sock->addr);
1230 			free(ub_sock);
1231 			return 0;
1232 		}
1233 	} else if(do_udp) {
1234 		ub_sock = calloc(1, sizeof(struct unbound_socket));
1235 		if(!ub_sock)
1236 			return 0;
1237 		/* regular udp socket */
1238 		if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1,
1239 			&noip6, rcv, snd, reuseport, transparent,
1240 			tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock)) == -1) {
1241 			freeaddrinfo(ub_sock->addr);
1242 			free(ub_sock);
1243 			if(noip6) {
1244 				log_warn("IPv6 protocol not available");
1245 				return 1;
1246 			}
1247 			return 0;
1248 		}
1249 		if(!port_insert(list, s,
1250 		   is_dnscrypt?listen_type_udp_dnscrypt:listen_type_udp, ub_sock)) {
1251 			sock_close(s);
1252 			freeaddrinfo(ub_sock->addr);
1253 			free(ub_sock);
1254 			return 0;
1255 		}
1256 	}
1257 	if(do_tcp) {
1258 		int is_ssl = if_is_ssl(ifname, port, ssl_port,
1259 			tls_additional_port);
1260 		enum listen_type port_type;
1261 		ub_sock = calloc(1, sizeof(struct unbound_socket));
1262 		if(!ub_sock)
1263 			return 0;
1264 		if(is_ssl)
1265 			port_type = listen_type_ssl;
1266 		else if(is_https)
1267 			port_type = listen_type_http;
1268 		else if(is_dnscrypt)
1269 			port_type = listen_type_tcp_dnscrypt;
1270 		else
1271 			port_type = listen_type_tcp;
1272 		if((s = make_sock_port(SOCK_STREAM, ifname, port, hints, 1,
1273 			&noip6, 0, 0, reuseport, transparent, tcp_mss, nodelay,
1274 			freebind, use_systemd, dscp, ub_sock)) == -1) {
1275 			freeaddrinfo(ub_sock->addr);
1276 			free(ub_sock);
1277 			if(noip6) {
1278 				/*log_warn("IPv6 protocol not available");*/
1279 				return 1;
1280 			}
1281 			return 0;
1282 		}
1283 		if(is_ssl)
1284 			verbose(VERB_ALGO, "setup TCP for SSL service");
1285 		if(!port_insert(list, s, port_type, ub_sock)) {
1286 			sock_close(s);
1287 			freeaddrinfo(ub_sock->addr);
1288 			free(ub_sock);
1289 			return 0;
1290 		}
1291 	}
1292 	return 1;
1293 }
1294 
1295 /**
1296  * Add items to commpoint list in front.
1297  * @param c: commpoint to add.
1298  * @param front: listen struct.
1299  * @return: false on failure.
1300  */
1301 static int
listen_cp_insert(struct comm_point * c,struct listen_dnsport * front)1302 listen_cp_insert(struct comm_point* c, struct listen_dnsport* front)
1303 {
1304 	struct listen_list* item = (struct listen_list*)malloc(
1305 		sizeof(struct listen_list));
1306 	if(!item)
1307 		return 0;
1308 	item->com = c;
1309 	item->next = front->cps;
1310 	front->cps = item;
1311 	return 1;
1312 }
1313 
listen_setup_locks(void)1314 void listen_setup_locks(void)
1315 {
1316 	if(!stream_wait_lock_inited) {
1317 		lock_basic_init(&stream_wait_count_lock);
1318 		stream_wait_lock_inited = 1;
1319 	}
1320 	if(!http2_query_buffer_lock_inited) {
1321 		lock_basic_init(&http2_query_buffer_count_lock);
1322 		http2_query_buffer_lock_inited = 1;
1323 	}
1324 	if(!http2_response_buffer_lock_inited) {
1325 		lock_basic_init(&http2_response_buffer_count_lock);
1326 		http2_response_buffer_lock_inited = 1;
1327 	}
1328 }
1329 
listen_desetup_locks(void)1330 void listen_desetup_locks(void)
1331 {
1332 	if(stream_wait_lock_inited) {
1333 		stream_wait_lock_inited = 0;
1334 		lock_basic_destroy(&stream_wait_count_lock);
1335 	}
1336 	if(http2_query_buffer_lock_inited) {
1337 		http2_query_buffer_lock_inited = 0;
1338 		lock_basic_destroy(&http2_query_buffer_count_lock);
1339 	}
1340 	if(http2_response_buffer_lock_inited) {
1341 		http2_response_buffer_lock_inited = 0;
1342 		lock_basic_destroy(&http2_response_buffer_count_lock);
1343 	}
1344 }
1345 
1346 struct listen_dnsport*
listen_create(struct comm_base * base,struct listen_port * ports,size_t bufsize,int tcp_accept_count,int tcp_idle_timeout,int harden_large_queries,uint32_t http_max_streams,char * http_endpoint,int http_notls,struct tcl_list * tcp_conn_limit,void * sslctx,struct dt_env * dtenv,comm_point_callback_type * cb,void * cb_arg)1347 listen_create(struct comm_base* base, struct listen_port* ports,
1348 	size_t bufsize, int tcp_accept_count, int tcp_idle_timeout,
1349 	int harden_large_queries, uint32_t http_max_streams,
1350 	char* http_endpoint, int http_notls, struct tcl_list* tcp_conn_limit,
1351 	void* sslctx, struct dt_env* dtenv, comm_point_callback_type* cb,
1352 	void *cb_arg)
1353 {
1354 	struct listen_dnsport* front = (struct listen_dnsport*)
1355 		malloc(sizeof(struct listen_dnsport));
1356 	if(!front)
1357 		return NULL;
1358 	front->cps = NULL;
1359 	front->udp_buff = sldns_buffer_new(bufsize);
1360 #ifdef USE_DNSCRYPT
1361 	front->dnscrypt_udp_buff = NULL;
1362 #endif
1363 	if(!front->udp_buff) {
1364 		free(front);
1365 		return NULL;
1366 	}
1367 
1368 	/* create comm points as needed */
1369 	while(ports) {
1370 		struct comm_point* cp = NULL;
1371 		if(ports->ftype == listen_type_udp ||
1372 		   ports->ftype == listen_type_udp_dnscrypt)
1373 			cp = comm_point_create_udp(base, ports->fd,
1374 				front->udp_buff, cb, cb_arg, ports->socket);
1375 		else if(ports->ftype == listen_type_tcp ||
1376 				ports->ftype == listen_type_tcp_dnscrypt)
1377 			cp = comm_point_create_tcp(base, ports->fd,
1378 				tcp_accept_count, tcp_idle_timeout,
1379 				harden_large_queries, 0, NULL,
1380 				tcp_conn_limit, bufsize, front->udp_buff,
1381 				ports->ftype, cb, cb_arg, ports->socket);
1382 		else if(ports->ftype == listen_type_ssl ||
1383 			ports->ftype == listen_type_http) {
1384 			cp = comm_point_create_tcp(base, ports->fd,
1385 				tcp_accept_count, tcp_idle_timeout,
1386 				harden_large_queries,
1387 				http_max_streams, http_endpoint,
1388 				tcp_conn_limit, bufsize, front->udp_buff,
1389 				ports->ftype, cb, cb_arg, ports->socket);
1390 			if(ports->ftype == listen_type_http) {
1391 				if(!sslctx && !http_notls) {
1392 					log_warn("HTTPS port configured, but "
1393 						"no TLS tls-service-key or "
1394 						"tls-service-pem set");
1395 				}
1396 #ifndef HAVE_SSL_CTX_SET_ALPN_SELECT_CB
1397 				if(!http_notls) {
1398 					log_warn("Unbound is not compiled "
1399 						"with an OpenSSL version "
1400 						"supporting ALPN "
1401 						"(OpenSSL >= 1.0.2). This "
1402 						"is required to use "
1403 						"DNS-over-HTTPS");
1404 				}
1405 #endif
1406 #ifndef HAVE_NGHTTP2_NGHTTP2_H
1407 				log_warn("Unbound is not compiled with "
1408 					"nghttp2. This is required to use "
1409 					"DNS-over-HTTPS.");
1410 #endif
1411 			}
1412 		} else if(ports->ftype == listen_type_udpancil ||
1413 				  ports->ftype == listen_type_udpancil_dnscrypt)
1414 			cp = comm_point_create_udp_ancil(base, ports->fd,
1415 				front->udp_buff, cb, cb_arg, ports->socket);
1416 		if(!cp) {
1417 			log_err("can't create commpoint");
1418 			listen_delete(front);
1419 			return NULL;
1420 		}
1421 		if(http_notls && ports->ftype == listen_type_http)
1422 			cp->ssl = NULL;
1423 		else
1424 			cp->ssl = sslctx;
1425 		cp->dtenv = dtenv;
1426 		cp->do_not_close = 1;
1427 #ifdef USE_DNSCRYPT
1428 		if (ports->ftype == listen_type_udp_dnscrypt ||
1429 			ports->ftype == listen_type_tcp_dnscrypt ||
1430 			ports->ftype == listen_type_udpancil_dnscrypt) {
1431 			cp->dnscrypt = 1;
1432 			cp->dnscrypt_buffer = sldns_buffer_new(bufsize);
1433 			if(!cp->dnscrypt_buffer) {
1434 				log_err("can't alloc dnscrypt_buffer");
1435 				comm_point_delete(cp);
1436 				listen_delete(front);
1437 				return NULL;
1438 			}
1439 			front->dnscrypt_udp_buff = cp->dnscrypt_buffer;
1440 		}
1441 #endif
1442 		if(!listen_cp_insert(cp, front)) {
1443 			log_err("malloc failed");
1444 			comm_point_delete(cp);
1445 			listen_delete(front);
1446 			return NULL;
1447 		}
1448 		ports = ports->next;
1449 	}
1450 	if(!front->cps) {
1451 		log_err("Could not open sockets to accept queries.");
1452 		listen_delete(front);
1453 		return NULL;
1454 	}
1455 
1456 	return front;
1457 }
1458 
1459 void
listen_list_delete(struct listen_list * list)1460 listen_list_delete(struct listen_list* list)
1461 {
1462 	struct listen_list *p = list, *pn;
1463 	while(p) {
1464 		pn = p->next;
1465 		comm_point_delete(p->com);
1466 		free(p);
1467 		p = pn;
1468 	}
1469 }
1470 
1471 void
listen_delete(struct listen_dnsport * front)1472 listen_delete(struct listen_dnsport* front)
1473 {
1474 	if(!front)
1475 		return;
1476 	listen_list_delete(front->cps);
1477 #ifdef USE_DNSCRYPT
1478 	if(front->dnscrypt_udp_buff &&
1479 		front->udp_buff != front->dnscrypt_udp_buff) {
1480 		sldns_buffer_free(front->dnscrypt_udp_buff);
1481 	}
1482 #endif
1483 	sldns_buffer_free(front->udp_buff);
1484 	free(front);
1485 }
1486 
1487 #ifdef HAVE_GETIFADDRS
1488 static int
resolve_ifa_name(struct ifaddrs * ifas,const char * search_ifa,char *** ip_addresses,int * ip_addresses_size)1489 resolve_ifa_name(struct ifaddrs *ifas, const char *search_ifa, char ***ip_addresses, int *ip_addresses_size)
1490 {
1491 	struct ifaddrs *ifa;
1492 	void *tmpbuf;
1493 	int last_ip_addresses_size = *ip_addresses_size;
1494 
1495 	for(ifa = ifas; ifa != NULL; ifa = ifa->ifa_next) {
1496 		sa_family_t family;
1497 		const char* atsign;
1498 #ifdef INET6      /* |   address ip    | % |  ifa name  | @ |  port  | nul */
1499 		char addr_buf[INET6_ADDRSTRLEN + 1 + IF_NAMESIZE + 1 + 16 + 1];
1500 #else
1501 		char addr_buf[INET_ADDRSTRLEN + 1 + 16 + 1];
1502 #endif
1503 
1504 		if((atsign=strrchr(search_ifa, '@')) != NULL) {
1505 			if(strlen(ifa->ifa_name) != (size_t)(atsign-search_ifa)
1506 			   || strncmp(ifa->ifa_name, search_ifa,
1507 			   atsign-search_ifa) != 0)
1508 				continue;
1509 		} else {
1510 			if(strcmp(ifa->ifa_name, search_ifa) != 0)
1511 				continue;
1512 			atsign = "";
1513 		}
1514 
1515 		if(ifa->ifa_addr == NULL)
1516 			continue;
1517 
1518 		family = ifa->ifa_addr->sa_family;
1519 		if(family == AF_INET) {
1520 			char a4[INET_ADDRSTRLEN + 1];
1521 			struct sockaddr_in *in4 = (struct sockaddr_in *)
1522 				ifa->ifa_addr;
1523 			if(!inet_ntop(family, &in4->sin_addr, a4, sizeof(a4))) {
1524 				log_err("inet_ntop failed");
1525 				return 0;
1526 			}
1527 			snprintf(addr_buf, sizeof(addr_buf), "%s%s",
1528 				a4, atsign);
1529 		}
1530 #ifdef INET6
1531 		else if(family == AF_INET6) {
1532 			struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)
1533 				ifa->ifa_addr;
1534 			char a6[INET6_ADDRSTRLEN + 1];
1535 			char if_index_name[IF_NAMESIZE + 1];
1536 			if_index_name[0] = 0;
1537 			if(!inet_ntop(family, &in6->sin6_addr, a6, sizeof(a6))) {
1538 				log_err("inet_ntop failed");
1539 				return 0;
1540 			}
1541 			(void)if_indextoname(in6->sin6_scope_id,
1542 				(char *)if_index_name);
1543 			if (strlen(if_index_name) != 0) {
1544 				snprintf(addr_buf, sizeof(addr_buf),
1545 					"%s%%%s%s", a6, if_index_name, atsign);
1546 			} else {
1547 				snprintf(addr_buf, sizeof(addr_buf), "%s%s",
1548 					a6, atsign);
1549 			}
1550 		}
1551 #endif
1552 		else {
1553 			continue;
1554 		}
1555 		verbose(4, "interface %s has address %s", search_ifa, addr_buf);
1556 
1557 		tmpbuf = realloc(*ip_addresses, sizeof(char *) * (*ip_addresses_size + 1));
1558 		if(!tmpbuf) {
1559 			log_err("realloc failed: out of memory");
1560 			return 0;
1561 		} else {
1562 			*ip_addresses = tmpbuf;
1563 		}
1564 		(*ip_addresses)[*ip_addresses_size] = strdup(addr_buf);
1565 		if(!(*ip_addresses)[*ip_addresses_size]) {
1566 			log_err("strdup failed: out of memory");
1567 			return 0;
1568 		}
1569 		(*ip_addresses_size)++;
1570 	}
1571 
1572 	if (*ip_addresses_size == last_ip_addresses_size) {
1573 		tmpbuf = realloc(*ip_addresses, sizeof(char *) * (*ip_addresses_size + 1));
1574 		if(!tmpbuf) {
1575 			log_err("realloc failed: out of memory");
1576 			return 0;
1577 		} else {
1578 			*ip_addresses = tmpbuf;
1579 		}
1580 		(*ip_addresses)[*ip_addresses_size] = strdup(search_ifa);
1581 		if(!(*ip_addresses)[*ip_addresses_size]) {
1582 			log_err("strdup failed: out of memory");
1583 			return 0;
1584 		}
1585 		(*ip_addresses_size)++;
1586 	}
1587 	return 1;
1588 }
1589 #endif /* HAVE_GETIFADDRS */
1590 
resolve_interface_names(char ** ifs,int num_ifs,struct config_strlist * list,char *** resif,int * num_resif)1591 int resolve_interface_names(char** ifs, int num_ifs,
1592 	struct config_strlist* list, char*** resif, int* num_resif)
1593 {
1594 #ifdef HAVE_GETIFADDRS
1595 	struct ifaddrs *addrs = NULL;
1596 	if(num_ifs == 0 && list == NULL) {
1597 		*resif = NULL;
1598 		*num_resif = 0;
1599 		return 1;
1600 	}
1601 	if(getifaddrs(&addrs) == -1) {
1602 		log_err("failed to list interfaces: getifaddrs: %s",
1603 			strerror(errno));
1604 		freeifaddrs(addrs);
1605 		return 0;
1606 	}
1607 	if(ifs) {
1608 		int i;
1609 		for(i=0; i<num_ifs; i++) {
1610 			if(!resolve_ifa_name(addrs, ifs[i], resif, num_resif)) {
1611 				freeifaddrs(addrs);
1612 				config_del_strarray(*resif, *num_resif);
1613 				*resif = NULL;
1614 				*num_resif = 0;
1615 				return 0;
1616 			}
1617 		}
1618 	}
1619 	if(list) {
1620 		struct config_strlist* p;
1621 		for(p = list; p; p = p->next) {
1622 			if(!resolve_ifa_name(addrs, p->str, resif, num_resif)) {
1623 				freeifaddrs(addrs);
1624 				config_del_strarray(*resif, *num_resif);
1625 				*resif = NULL;
1626 				*num_resif = 0;
1627 				return 0;
1628 			}
1629 }
1630 	}
1631 	freeifaddrs(addrs);
1632 	return 1;
1633 #else
1634 	struct config_strlist* p;
1635 	if(num_ifs == 0 && list == NULL) {
1636 		*resif = NULL;
1637 		*num_resif = 0;
1638 		return 1;
1639 	}
1640 	*num_resif = num_ifs;
1641 	for(p = list; p; p = p->next) {
1642 		(*num_resif)++;
1643 	}
1644 	*resif = calloc(*num_resif, sizeof(**resif));
1645 	if(!*resif) {
1646 		log_err("out of memory");
1647 		return 0;
1648 	}
1649 	if(ifs) {
1650 		int i;
1651 		for(i=0; i<num_ifs; i++) {
1652 			(*resif)[i] = strdup(ifs[i]);
1653 			if(!((*resif)[i])) {
1654 				log_err("out of memory");
1655 				config_del_strarray(*resif, *num_resif);
1656 				*resif = NULL;
1657 				*num_resif = 0;
1658 				return 0;
1659 			}
1660 		}
1661 	}
1662 	if(list) {
1663 		int idx = num_ifs;
1664 		for(p = list; p; p = p->next) {
1665 			(*resif)[idx] = strdup(p->str);
1666 			if(!((*resif)[idx])) {
1667 				log_err("out of memory");
1668 				config_del_strarray(*resif, *num_resif);
1669 				*resif = NULL;
1670 				*num_resif = 0;
1671 				return 0;
1672 			}
1673 			idx++;
1674 		}
1675 	}
1676 	return 1;
1677 #endif /* HAVE_GETIFADDRS */
1678 }
1679 
1680 struct listen_port*
listening_ports_open(struct config_file * cfg,char ** ifs,int num_ifs,int * reuseport)1681 listening_ports_open(struct config_file* cfg, char** ifs, int num_ifs,
1682 	int* reuseport)
1683 {
1684 	struct listen_port* list = NULL;
1685 	struct addrinfo hints;
1686 	int i, do_ip4, do_ip6;
1687 	int do_tcp, do_auto;
1688 	char portbuf[32];
1689 	snprintf(portbuf, sizeof(portbuf), "%d", cfg->port);
1690 	do_ip4 = cfg->do_ip4;
1691 	do_ip6 = cfg->do_ip6;
1692 	do_tcp = cfg->do_tcp;
1693 	do_auto = cfg->if_automatic && cfg->do_udp;
1694 	if(cfg->incoming_num_tcp == 0)
1695 		do_tcp = 0;
1696 
1697 	/* getaddrinfo */
1698 	memset(&hints, 0, sizeof(hints));
1699 	hints.ai_flags = AI_PASSIVE;
1700 	/* no name lookups on our listening ports */
1701 	if(num_ifs > 0)
1702 		hints.ai_flags |= AI_NUMERICHOST;
1703 	hints.ai_family = AF_UNSPEC;
1704 #ifndef INET6
1705 	do_ip6 = 0;
1706 #endif
1707 	if(!do_ip4 && !do_ip6) {
1708 		return NULL;
1709 	}
1710 	/* create ip4 and ip6 ports so that return addresses are nice. */
1711 	if(do_auto || num_ifs == 0) {
1712 		if(do_ip6) {
1713 			hints.ai_family = AF_INET6;
1714 			if(!ports_create_if(do_auto?"::0":"::1",
1715 				do_auto, cfg->do_udp, do_tcp,
1716 				&hints, portbuf, &list,
1717 				cfg->so_rcvbuf, cfg->so_sndbuf,
1718 				cfg->ssl_port, cfg->tls_additional_port,
1719 				cfg->https_port, reuseport, cfg->ip_transparent,
1720 				cfg->tcp_mss, cfg->ip_freebind,
1721 				cfg->http_nodelay, cfg->use_systemd,
1722 				cfg->dnscrypt_port, cfg->ip_dscp)) {
1723 				listening_ports_free(list);
1724 				return NULL;
1725 			}
1726 		}
1727 		if(do_ip4) {
1728 			hints.ai_family = AF_INET;
1729 			if(!ports_create_if(do_auto?"0.0.0.0":"127.0.0.1",
1730 				do_auto, cfg->do_udp, do_tcp,
1731 				&hints, portbuf, &list,
1732 				cfg->so_rcvbuf, cfg->so_sndbuf,
1733 				cfg->ssl_port, cfg->tls_additional_port,
1734 				cfg->https_port, reuseport, cfg->ip_transparent,
1735 				cfg->tcp_mss, cfg->ip_freebind,
1736 				cfg->http_nodelay, cfg->use_systemd,
1737 				cfg->dnscrypt_port, cfg->ip_dscp)) {
1738 				listening_ports_free(list);
1739 				return NULL;
1740 			}
1741 		}
1742 	} else for(i = 0; i<num_ifs; i++) {
1743 		if(str_is_ip6(ifs[i])) {
1744 			if(!do_ip6)
1745 				continue;
1746 			hints.ai_family = AF_INET6;
1747 			if(!ports_create_if(ifs[i], 0, cfg->do_udp,
1748 				do_tcp, &hints, portbuf, &list,
1749 				cfg->so_rcvbuf, cfg->so_sndbuf,
1750 				cfg->ssl_port, cfg->tls_additional_port,
1751 				cfg->https_port, reuseport, cfg->ip_transparent,
1752 				cfg->tcp_mss, cfg->ip_freebind,
1753 				cfg->http_nodelay, cfg->use_systemd,
1754 				cfg->dnscrypt_port, cfg->ip_dscp)) {
1755 				listening_ports_free(list);
1756 				return NULL;
1757 			}
1758 		} else {
1759 			if(!do_ip4)
1760 				continue;
1761 			hints.ai_family = AF_INET;
1762 			if(!ports_create_if(ifs[i], 0, cfg->do_udp,
1763 				do_tcp, &hints, portbuf, &list,
1764 				cfg->so_rcvbuf, cfg->so_sndbuf,
1765 				cfg->ssl_port, cfg->tls_additional_port,
1766 				cfg->https_port, reuseport, cfg->ip_transparent,
1767 				cfg->tcp_mss, cfg->ip_freebind,
1768 				cfg->http_nodelay, cfg->use_systemd,
1769 				cfg->dnscrypt_port, cfg->ip_dscp)) {
1770 				listening_ports_free(list);
1771 				return NULL;
1772 			}
1773 		}
1774 	}
1775 
1776 	return list;
1777 }
1778 
listening_ports_free(struct listen_port * list)1779 void listening_ports_free(struct listen_port* list)
1780 {
1781 	struct listen_port* nx;
1782 	while(list) {
1783 		nx = list->next;
1784 		if(list->fd != -1) {
1785 			sock_close(list->fd);
1786 		}
1787 		/* rc_ports don't have ub_socket */
1788 		if(list->socket) {
1789 			freeaddrinfo(list->socket->addr);
1790 			free(list->socket);
1791 		}
1792 		free(list);
1793 		list = nx;
1794 	}
1795 }
1796 
listen_get_mem(struct listen_dnsport * listen)1797 size_t listen_get_mem(struct listen_dnsport* listen)
1798 {
1799 	struct listen_list* p;
1800 	size_t s = sizeof(*listen) + sizeof(*listen->base) +
1801 		sizeof(*listen->udp_buff) +
1802 		sldns_buffer_capacity(listen->udp_buff);
1803 #ifdef USE_DNSCRYPT
1804 	s += sizeof(*listen->dnscrypt_udp_buff);
1805 	if(listen->udp_buff != listen->dnscrypt_udp_buff){
1806 		s += sldns_buffer_capacity(listen->dnscrypt_udp_buff);
1807 	}
1808 #endif
1809 	for(p = listen->cps; p; p = p->next) {
1810 		s += sizeof(*p);
1811 		s += comm_point_get_mem(p->com);
1812 	}
1813 	return s;
1814 }
1815 
listen_stop_accept(struct listen_dnsport * listen)1816 void listen_stop_accept(struct listen_dnsport* listen)
1817 {
1818 	/* do not stop the ones that have no tcp_free list
1819 	 * (they have already stopped listening) */
1820 	struct listen_list* p;
1821 	for(p=listen->cps; p; p=p->next) {
1822 		if(p->com->type == comm_tcp_accept &&
1823 			p->com->tcp_free != NULL) {
1824 			comm_point_stop_listening(p->com);
1825 		}
1826 	}
1827 }
1828 
listen_start_accept(struct listen_dnsport * listen)1829 void listen_start_accept(struct listen_dnsport* listen)
1830 {
1831 	/* do not start the ones that have no tcp_free list, it is no
1832 	 * use to listen to them because they have no free tcp handlers */
1833 	struct listen_list* p;
1834 	for(p=listen->cps; p; p=p->next) {
1835 		if(p->com->type == comm_tcp_accept &&
1836 			p->com->tcp_free != NULL) {
1837 			comm_point_start_listening(p->com, -1, -1);
1838 		}
1839 	}
1840 }
1841 
1842 struct tcp_req_info*
tcp_req_info_create(struct sldns_buffer * spoolbuf)1843 tcp_req_info_create(struct sldns_buffer* spoolbuf)
1844 {
1845 	struct tcp_req_info* req = (struct tcp_req_info*)malloc(sizeof(*req));
1846 	if(!req) {
1847 		log_err("malloc failure for new stream outoforder processing structure");
1848 		return NULL;
1849 	}
1850 	memset(req, 0, sizeof(*req));
1851 	req->spool_buffer = spoolbuf;
1852 	return req;
1853 }
1854 
1855 void
tcp_req_info_delete(struct tcp_req_info * req)1856 tcp_req_info_delete(struct tcp_req_info* req)
1857 {
1858 	if(!req) return;
1859 	tcp_req_info_clear(req);
1860 	/* cp is pointer back to commpoint that owns this struct and
1861 	 * called delete on us */
1862 	/* spool_buffer is shared udp buffer, not deleted here */
1863 	free(req);
1864 }
1865 
tcp_req_info_clear(struct tcp_req_info * req)1866 void tcp_req_info_clear(struct tcp_req_info* req)
1867 {
1868 	struct tcp_req_open_item* open, *nopen;
1869 	struct tcp_req_done_item* item, *nitem;
1870 	if(!req) return;
1871 
1872 	/* free outstanding request mesh reply entries */
1873 	open = req->open_req_list;
1874 	while(open) {
1875 		nopen = open->next;
1876 		mesh_state_remove_reply(open->mesh, open->mesh_state, req->cp);
1877 		free(open);
1878 		open = nopen;
1879 	}
1880 	req->open_req_list = NULL;
1881 	req->num_open_req = 0;
1882 
1883 	/* free pending writable result packets */
1884 	item = req->done_req_list;
1885 	while(item) {
1886 		nitem = item->next;
1887 		lock_basic_lock(&stream_wait_count_lock);
1888 		stream_wait_count -= (sizeof(struct tcp_req_done_item)
1889 			+item->len);
1890 		lock_basic_unlock(&stream_wait_count_lock);
1891 		free(item->buf);
1892 		free(item);
1893 		item = nitem;
1894 	}
1895 	req->done_req_list = NULL;
1896 	req->num_done_req = 0;
1897 	req->read_is_closed = 0;
1898 }
1899 
1900 void
tcp_req_info_remove_mesh_state(struct tcp_req_info * req,struct mesh_state * m)1901 tcp_req_info_remove_mesh_state(struct tcp_req_info* req, struct mesh_state* m)
1902 {
1903 	struct tcp_req_open_item* open, *prev = NULL;
1904 	if(!req || !m) return;
1905 	open = req->open_req_list;
1906 	while(open) {
1907 		if(open->mesh_state == m) {
1908 			struct tcp_req_open_item* next;
1909 			if(prev) prev->next = open->next;
1910 			else req->open_req_list = open->next;
1911 			/* caller has to manage the mesh state reply entry */
1912 			next = open->next;
1913 			free(open);
1914 			req->num_open_req --;
1915 
1916 			/* prev = prev; */
1917 			open = next;
1918 			continue;
1919 		}
1920 		prev = open;
1921 		open = open->next;
1922 	}
1923 }
1924 
1925 /** setup listening for read or write */
1926 static void
tcp_req_info_setup_listen(struct tcp_req_info * req)1927 tcp_req_info_setup_listen(struct tcp_req_info* req)
1928 {
1929 	int wr = 0;
1930 	int rd = 0;
1931 
1932 	if(req->cp->tcp_byte_count != 0) {
1933 		/* cannot change, halfway through */
1934 		return;
1935 	}
1936 
1937 	if(!req->cp->tcp_is_reading)
1938 		wr = 1;
1939 	if(!req->read_is_closed)
1940 		rd = 1;
1941 
1942 	if(wr) {
1943 		req->cp->tcp_is_reading = 0;
1944 		comm_point_stop_listening(req->cp);
1945 		comm_point_start_listening(req->cp, -1,
1946 			adjusted_tcp_timeout(req->cp));
1947 	} else if(rd) {
1948 		req->cp->tcp_is_reading = 1;
1949 		comm_point_stop_listening(req->cp);
1950 		comm_point_start_listening(req->cp, -1,
1951 			adjusted_tcp_timeout(req->cp));
1952 		/* and also read it (from SSL stack buffers), so
1953 		 * no event read event is expected since the remainder of
1954 		 * the TLS frame is sitting in the buffers. */
1955 		req->read_again = 1;
1956 	} else {
1957 		comm_point_stop_listening(req->cp);
1958 		comm_point_start_listening(req->cp, -1,
1959 			adjusted_tcp_timeout(req->cp));
1960 		comm_point_listen_for_rw(req->cp, 0, 0);
1961 	}
1962 }
1963 
1964 /** remove first item from list of pending results */
1965 static struct tcp_req_done_item*
tcp_req_info_pop_done(struct tcp_req_info * req)1966 tcp_req_info_pop_done(struct tcp_req_info* req)
1967 {
1968 	struct tcp_req_done_item* item;
1969 	log_assert(req->num_done_req > 0 && req->done_req_list);
1970 	item = req->done_req_list;
1971 	lock_basic_lock(&stream_wait_count_lock);
1972 	stream_wait_count -= (sizeof(struct tcp_req_done_item)+item->len);
1973 	lock_basic_unlock(&stream_wait_count_lock);
1974 	req->done_req_list = req->done_req_list->next;
1975 	req->num_done_req --;
1976 	return item;
1977 }
1978 
1979 /** Send given buffer and setup to write */
1980 static void
tcp_req_info_start_write_buf(struct tcp_req_info * req,uint8_t * buf,size_t len)1981 tcp_req_info_start_write_buf(struct tcp_req_info* req, uint8_t* buf,
1982 	size_t len)
1983 {
1984 	sldns_buffer_clear(req->cp->buffer);
1985 	sldns_buffer_write(req->cp->buffer, buf, len);
1986 	sldns_buffer_flip(req->cp->buffer);
1987 
1988 	req->cp->tcp_is_reading = 0; /* we are now writing */
1989 }
1990 
1991 /** pick up the next result and start writing it to the channel */
1992 static void
tcp_req_pickup_next_result(struct tcp_req_info * req)1993 tcp_req_pickup_next_result(struct tcp_req_info* req)
1994 {
1995 	if(req->num_done_req > 0) {
1996 		/* unlist the done item from the list of pending results */
1997 		struct tcp_req_done_item* item = tcp_req_info_pop_done(req);
1998 		tcp_req_info_start_write_buf(req, item->buf, item->len);
1999 		free(item->buf);
2000 		free(item);
2001 	}
2002 }
2003 
2004 /** the read channel has closed */
2005 int
tcp_req_info_handle_read_close(struct tcp_req_info * req)2006 tcp_req_info_handle_read_close(struct tcp_req_info* req)
2007 {
2008 	verbose(VERB_ALGO, "tcp channel read side closed %d", req->cp->fd);
2009 	/* reset byte count for (potential) partial read */
2010 	req->cp->tcp_byte_count = 0;
2011 	/* if we still have results to write, pick up next and write it */
2012 	if(req->num_done_req != 0) {
2013 		tcp_req_pickup_next_result(req);
2014 		tcp_req_info_setup_listen(req);
2015 		return 1;
2016 	}
2017 	/* if nothing to do, this closes the connection */
2018 	if(req->num_open_req == 0 && req->num_done_req == 0)
2019 		return 0;
2020 	/* otherwise, we must be waiting for dns resolve, wait with timeout */
2021 	req->read_is_closed = 1;
2022 	tcp_req_info_setup_listen(req);
2023 	return 1;
2024 }
2025 
2026 void
tcp_req_info_handle_writedone(struct tcp_req_info * req)2027 tcp_req_info_handle_writedone(struct tcp_req_info* req)
2028 {
2029 	/* back to reading state, we finished this write event */
2030 	sldns_buffer_clear(req->cp->buffer);
2031 	if(req->num_done_req == 0 && req->read_is_closed) {
2032 		/* no more to write and nothing to read, close it */
2033 		comm_point_drop_reply(&req->cp->repinfo);
2034 		return;
2035 	}
2036 	req->cp->tcp_is_reading = 1;
2037 	/* see if another result needs writing */
2038 	tcp_req_pickup_next_result(req);
2039 
2040 	/* see if there is more to write, if not stop_listening for writing */
2041 	/* see if new requests are allowed, if so, start_listening
2042 	 * for reading */
2043 	tcp_req_info_setup_listen(req);
2044 }
2045 
2046 void
tcp_req_info_handle_readdone(struct tcp_req_info * req)2047 tcp_req_info_handle_readdone(struct tcp_req_info* req)
2048 {
2049 	struct comm_point* c = req->cp;
2050 
2051 	/* we want to read up several requests, unless there are
2052 	 * pending answers */
2053 
2054 	req->is_drop = 0;
2055 	req->is_reply = 0;
2056 	req->in_worker_handle = 1;
2057 	sldns_buffer_set_limit(req->spool_buffer, 0);
2058 	/* handle the current request */
2059 	/* this calls the worker handle request routine that could give
2060 	 * a cache response, or localdata response, or drop the reply,
2061 	 * or schedule a mesh entry for later */
2062 	fptr_ok(fptr_whitelist_comm_point(c->callback));
2063 	if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) {
2064 		req->in_worker_handle = 0;
2065 		/* there is an answer, put it up.  It is already in the
2066 		 * c->buffer, just send it. */
2067 		/* since we were just reading a query, the channel is
2068 		 * clear to write to */
2069 	send_it:
2070 		c->tcp_is_reading = 0;
2071 		comm_point_stop_listening(c);
2072 		comm_point_start_listening(c, -1, adjusted_tcp_timeout(c));
2073 		return;
2074 	}
2075 	req->in_worker_handle = 0;
2076 	/* it should be waiting in the mesh for recursion.
2077 	 * If mesh failed to add a new entry and called commpoint_drop_reply.
2078 	 * Then the mesh state has been cleared. */
2079 	if(req->is_drop) {
2080 		/* the reply has been dropped, stream has been closed. */
2081 		return;
2082 	}
2083 	/* If mesh failed(mallocfail) and called commpoint_send_reply with
2084 	 * something like servfail then we pick up that reply below. */
2085 	if(req->is_reply) {
2086 		goto send_it;
2087 	}
2088 
2089 	sldns_buffer_clear(c->buffer);
2090 	/* if pending answers, pick up an answer and start sending it */
2091 	tcp_req_pickup_next_result(req);
2092 
2093 	/* if answers pending, start sending answers */
2094 	/* read more requests if we can have more requests */
2095 	tcp_req_info_setup_listen(req);
2096 }
2097 
2098 int
tcp_req_info_add_meshstate(struct tcp_req_info * req,struct mesh_area * mesh,struct mesh_state * m)2099 tcp_req_info_add_meshstate(struct tcp_req_info* req,
2100 	struct mesh_area* mesh, struct mesh_state* m)
2101 {
2102 	struct tcp_req_open_item* item;
2103 	log_assert(req && mesh && m);
2104 	item = (struct tcp_req_open_item*)malloc(sizeof(*item));
2105 	if(!item) return 0;
2106 	item->next = req->open_req_list;
2107 	item->mesh = mesh;
2108 	item->mesh_state = m;
2109 	req->open_req_list = item;
2110 	req->num_open_req++;
2111 	return 1;
2112 }
2113 
2114 /** Add a result to the result list.  At the end. */
2115 static int
tcp_req_info_add_result(struct tcp_req_info * req,uint8_t * buf,size_t len)2116 tcp_req_info_add_result(struct tcp_req_info* req, uint8_t* buf, size_t len)
2117 {
2118 	struct tcp_req_done_item* last = NULL;
2119 	struct tcp_req_done_item* item;
2120 	size_t space;
2121 
2122 	/* see if we have space */
2123 	space = sizeof(struct tcp_req_done_item) + len;
2124 	lock_basic_lock(&stream_wait_count_lock);
2125 	if(stream_wait_count + space > stream_wait_max) {
2126 		lock_basic_unlock(&stream_wait_count_lock);
2127 		verbose(VERB_ALGO, "drop stream reply, no space left, in stream-wait-size");
2128 		return 0;
2129 	}
2130 	stream_wait_count += space;
2131 	lock_basic_unlock(&stream_wait_count_lock);
2132 
2133 	/* find last element */
2134 	last = req->done_req_list;
2135 	while(last && last->next)
2136 		last = last->next;
2137 
2138 	/* create new element */
2139 	item = (struct tcp_req_done_item*)malloc(sizeof(*item));
2140 	if(!item) {
2141 		log_err("malloc failure, for stream result list");
2142 		return 0;
2143 	}
2144 	item->next = NULL;
2145 	item->len = len;
2146 	item->buf = memdup(buf, len);
2147 	if(!item->buf) {
2148 		free(item);
2149 		log_err("malloc failure, adding reply to stream result list");
2150 		return 0;
2151 	}
2152 
2153 	/* link in */
2154 	if(last) last->next = item;
2155 	else req->done_req_list = item;
2156 	req->num_done_req++;
2157 	return 1;
2158 }
2159 
2160 void
tcp_req_info_send_reply(struct tcp_req_info * req)2161 tcp_req_info_send_reply(struct tcp_req_info* req)
2162 {
2163 	if(req->in_worker_handle) {
2164 		/* reply from mesh is in the spool_buffer */
2165 		/* copy now, so that the spool buffer is free for other tasks
2166 		 * before the callback is done */
2167 		sldns_buffer_clear(req->cp->buffer);
2168 		sldns_buffer_write(req->cp->buffer,
2169 			sldns_buffer_begin(req->spool_buffer),
2170 			sldns_buffer_limit(req->spool_buffer));
2171 		sldns_buffer_flip(req->cp->buffer);
2172 		req->is_reply = 1;
2173 		return;
2174 	}
2175 	/* now that the query has been handled, that mesh_reply entry
2176 	 * should be removed, from the tcp_req_info list,
2177 	 * the mesh state cleanup removes then with region_cleanup and
2178 	 * replies_sent true. */
2179 	/* see if we can send it straight away (we are not doing
2180 	 * anything else).  If so, copy to buffer and start */
2181 	if(req->cp->tcp_is_reading && req->cp->tcp_byte_count == 0) {
2182 		/* buffer is free, and was ready to read new query into,
2183 		 * but we are now going to use it to send this answer */
2184 		tcp_req_info_start_write_buf(req,
2185 			sldns_buffer_begin(req->spool_buffer),
2186 			sldns_buffer_limit(req->spool_buffer));
2187 		/* switch to listen to write events */
2188 		comm_point_stop_listening(req->cp);
2189 		comm_point_start_listening(req->cp, -1,
2190 			adjusted_tcp_timeout(req->cp));
2191 		return;
2192 	}
2193 	/* queue up the answer behind the others already pending */
2194 	if(!tcp_req_info_add_result(req, sldns_buffer_begin(req->spool_buffer),
2195 		sldns_buffer_limit(req->spool_buffer))) {
2196 		/* drop the connection, we are out of resources */
2197 		comm_point_drop_reply(&req->cp->repinfo);
2198 	}
2199 }
2200 
tcp_req_info_get_stream_buffer_size(void)2201 size_t tcp_req_info_get_stream_buffer_size(void)
2202 {
2203 	size_t s;
2204 	if(!stream_wait_lock_inited)
2205 		return stream_wait_count;
2206 	lock_basic_lock(&stream_wait_count_lock);
2207 	s = stream_wait_count;
2208 	lock_basic_unlock(&stream_wait_count_lock);
2209 	return s;
2210 }
2211 
http2_get_query_buffer_size(void)2212 size_t http2_get_query_buffer_size(void)
2213 {
2214 	size_t s;
2215 	if(!http2_query_buffer_lock_inited)
2216 		return http2_query_buffer_count;
2217 	lock_basic_lock(&http2_query_buffer_count_lock);
2218 	s = http2_query_buffer_count;
2219 	lock_basic_unlock(&http2_query_buffer_count_lock);
2220 	return s;
2221 }
2222 
http2_get_response_buffer_size(void)2223 size_t http2_get_response_buffer_size(void)
2224 {
2225 	size_t s;
2226 	if(!http2_response_buffer_lock_inited)
2227 		return http2_response_buffer_count;
2228 	lock_basic_lock(&http2_response_buffer_count_lock);
2229 	s = http2_response_buffer_count;
2230 	lock_basic_unlock(&http2_response_buffer_count_lock);
2231 	return s;
2232 }
2233 
2234 #ifdef HAVE_NGHTTP2
2235 /** nghttp2 callback. Used to copy response from rbuffer to nghttp2 session */
http2_submit_response_read_callback(nghttp2_session * ATTR_UNUSED (session),int32_t stream_id,uint8_t * buf,size_t length,uint32_t * data_flags,nghttp2_data_source * source,void * ATTR_UNUSED (cb_arg))2236 static ssize_t http2_submit_response_read_callback(
2237 	nghttp2_session* ATTR_UNUSED(session),
2238 	int32_t stream_id, uint8_t* buf, size_t length, uint32_t* data_flags,
2239 	nghttp2_data_source* source, void* ATTR_UNUSED(cb_arg))
2240 {
2241 	struct http2_stream* h2_stream;
2242 	struct http2_session* h2_session = source->ptr;
2243 	size_t copylen = length;
2244 	if(!(h2_stream = nghttp2_session_get_stream_user_data(
2245 		h2_session->session, stream_id))) {
2246 		verbose(VERB_QUERY, "http2: cannot get stream data, closing "
2247 			"stream");
2248 		return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE;
2249 	}
2250 	if(!h2_stream->rbuffer ||
2251 		sldns_buffer_remaining(h2_stream->rbuffer) == 0) {
2252 		verbose(VERB_QUERY, "http2: cannot submit buffer. No data "
2253 			"available in rbuffer");
2254 		/* rbuffer will be free'd in frame close cb */
2255 		return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE;
2256 	}
2257 
2258 	if(copylen > sldns_buffer_remaining(h2_stream->rbuffer))
2259 		copylen = sldns_buffer_remaining(h2_stream->rbuffer);
2260 	if(copylen > SSIZE_MAX)
2261 		copylen = SSIZE_MAX; /* will probably never happen */
2262 
2263 	memcpy(buf, sldns_buffer_current(h2_stream->rbuffer), copylen);
2264 	sldns_buffer_skip(h2_stream->rbuffer, copylen);
2265 
2266 	if(sldns_buffer_remaining(h2_stream->rbuffer) == 0) {
2267 		*data_flags |= NGHTTP2_DATA_FLAG_EOF;
2268 		lock_basic_lock(&http2_response_buffer_count_lock);
2269 		http2_response_buffer_count -=
2270 			sldns_buffer_capacity(h2_stream->rbuffer);
2271 		lock_basic_unlock(&http2_response_buffer_count_lock);
2272 		sldns_buffer_free(h2_stream->rbuffer);
2273 		h2_stream->rbuffer = NULL;
2274 	}
2275 
2276 	return copylen;
2277 }
2278 
2279 /**
2280  * Send RST_STREAM frame for stream.
2281  * @param h2_session: http2 session to submit frame to
2282  * @param h2_stream: http2 stream containing frame ID to use in RST_STREAM
2283  * @return 0 on error, 1 otherwise
2284  */
http2_submit_rst_stream(struct http2_session * h2_session,struct http2_stream * h2_stream)2285 static int http2_submit_rst_stream(struct http2_session* h2_session,
2286 		struct http2_stream* h2_stream)
2287 {
2288 	int ret = nghttp2_submit_rst_stream(h2_session->session,
2289 		NGHTTP2_FLAG_NONE, h2_stream->stream_id,
2290 		NGHTTP2_INTERNAL_ERROR);
2291 	if(ret) {
2292 		verbose(VERB_QUERY, "http2: nghttp2_submit_rst_stream failed, "
2293 			"error: %s", nghttp2_strerror(ret));
2294 		return 0;
2295 	}
2296 	return 1;
2297 }
2298 
2299 /**
2300  * DNS response ready to be submitted to nghttp2, to be prepared for sending
2301  * out. Response is stored in c->buffer. Copy to rbuffer because the c->buffer
2302  * might be used before this will be sent out.
2303  * @param h2_session: http2 session, containing c->buffer which contains answer
2304  * @return 0 on error, 1 otherwise
2305  */
http2_submit_dns_response(struct http2_session * h2_session)2306 int http2_submit_dns_response(struct http2_session* h2_session)
2307 {
2308 	int ret;
2309 	nghttp2_data_provider data_prd;
2310 	char status[4];
2311 	nghttp2_nv headers[3];
2312 	struct http2_stream* h2_stream = h2_session->c->h2_stream;
2313 	size_t rlen;
2314 	char rlen_str[32];
2315 
2316 	if(h2_stream->rbuffer) {
2317 		log_err("http2 submit response error: rbuffer already "
2318 			"exists");
2319 		return 0;
2320 	}
2321 	if(sldns_buffer_remaining(h2_session->c->buffer) == 0) {
2322 		log_err("http2 submit response error: c->buffer not complete");
2323 		return 0;
2324 	}
2325 
2326 	if(snprintf(status, 4, "%d", h2_stream->status) != 3) {
2327 		verbose(VERB_QUERY, "http2: submit response error: "
2328 			"invalid status");
2329 		return 0;
2330 	}
2331 
2332 	rlen = sldns_buffer_remaining(h2_session->c->buffer);
2333 	snprintf(rlen_str, sizeof(rlen_str), "%u", (unsigned)rlen);
2334 
2335 	lock_basic_lock(&http2_response_buffer_count_lock);
2336 	if(http2_response_buffer_count + rlen > http2_response_buffer_max) {
2337 		lock_basic_unlock(&http2_response_buffer_count_lock);
2338 		verbose(VERB_ALGO, "reset HTTP2 stream, no space left, "
2339 			"in https-response-buffer-size");
2340 		return http2_submit_rst_stream(h2_session, h2_stream);
2341 	}
2342 	http2_response_buffer_count += rlen;
2343 	lock_basic_unlock(&http2_response_buffer_count_lock);
2344 
2345 	if(!(h2_stream->rbuffer = sldns_buffer_new(rlen))) {
2346 		lock_basic_lock(&http2_response_buffer_count_lock);
2347 		http2_response_buffer_count -= rlen;
2348 		lock_basic_unlock(&http2_response_buffer_count_lock);
2349 		log_err("http2 submit response error: malloc failure");
2350 		return 0;
2351 	}
2352 
2353 	headers[0].name = (uint8_t*)":status";
2354 	headers[0].namelen = 7;
2355 	headers[0].value = (uint8_t*)status;
2356 	headers[0].valuelen = 3;
2357 	headers[0].flags = NGHTTP2_NV_FLAG_NONE;
2358 
2359 	headers[1].name = (uint8_t*)"content-type";
2360 	headers[1].namelen = 12;
2361 	headers[1].value = (uint8_t*)"application/dns-message";
2362 	headers[1].valuelen = 23;
2363 	headers[1].flags = NGHTTP2_NV_FLAG_NONE;
2364 
2365 	headers[2].name = (uint8_t*)"content-length";
2366 	headers[2].namelen = 14;
2367 	headers[2].value = (uint8_t*)rlen_str;
2368 	headers[2].valuelen = strlen(rlen_str);
2369 	headers[2].flags = NGHTTP2_NV_FLAG_NONE;
2370 
2371 	sldns_buffer_write(h2_stream->rbuffer,
2372 		sldns_buffer_current(h2_session->c->buffer),
2373 		sldns_buffer_remaining(h2_session->c->buffer));
2374 	sldns_buffer_flip(h2_stream->rbuffer);
2375 
2376 	data_prd.source.ptr = h2_session;
2377 	data_prd.read_callback = http2_submit_response_read_callback;
2378 	ret = nghttp2_submit_response(h2_session->session, h2_stream->stream_id,
2379 		headers, 3, &data_prd);
2380 	if(ret) {
2381 		verbose(VERB_QUERY, "http2: set_stream_user_data failed, "
2382 			"error: %s", nghttp2_strerror(ret));
2383 		return 0;
2384 	}
2385 	return 1;
2386 }
2387 #else
http2_submit_dns_response(void * ATTR_UNUSED (v))2388 int http2_submit_dns_response(void* ATTR_UNUSED(v))
2389 {
2390 	return 0;
2391 }
2392 #endif
2393 
2394 #ifdef HAVE_NGHTTP2
2395 /** HTTP status to descriptive string */
http_status_to_str(enum http_status s)2396 static char* http_status_to_str(enum http_status s)
2397 {
2398 	switch(s) {
2399 		case HTTP_STATUS_OK:
2400 			return "OK";
2401 		case HTTP_STATUS_BAD_REQUEST:
2402 			return "Bad Request";
2403 		case HTTP_STATUS_NOT_FOUND:
2404 			return "Not Found";
2405 		case HTTP_STATUS_PAYLOAD_TOO_LARGE:
2406 			return "Payload Too Large";
2407 		case HTTP_STATUS_URI_TOO_LONG:
2408 			return "URI Too Long";
2409 		case HTTP_STATUS_UNSUPPORTED_MEDIA_TYPE:
2410 			return "Unsupported Media Type";
2411 		case HTTP_STATUS_NOT_IMPLEMENTED:
2412 			return "Not Implemented";
2413 	}
2414 	return "Status Unknown";
2415 }
2416 
2417 /** nghttp2 callback. Used to copy error message to nghttp2 session */
http2_submit_error_read_callback(nghttp2_session * ATTR_UNUSED (session),int32_t stream_id,uint8_t * buf,size_t length,uint32_t * data_flags,nghttp2_data_source * source,void * ATTR_UNUSED (cb_arg))2418 static ssize_t http2_submit_error_read_callback(
2419 	nghttp2_session* ATTR_UNUSED(session),
2420 	int32_t stream_id, uint8_t* buf, size_t length, uint32_t* data_flags,
2421 	nghttp2_data_source* source, void* ATTR_UNUSED(cb_arg))
2422 {
2423 	struct http2_stream* h2_stream;
2424 	struct http2_session* h2_session = source->ptr;
2425 	char* msg;
2426 	if(!(h2_stream = nghttp2_session_get_stream_user_data(
2427 		h2_session->session, stream_id))) {
2428 		verbose(VERB_QUERY, "http2: cannot get stream data, closing "
2429 			"stream");
2430 		return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE;
2431 	}
2432 	*data_flags |= NGHTTP2_DATA_FLAG_EOF;
2433 	msg = http_status_to_str(h2_stream->status);
2434 	if(length < strlen(msg))
2435 		return 0; /* not worth trying over multiple frames */
2436 	memcpy(buf, msg, strlen(msg));
2437 	return strlen(msg);
2438 
2439 }
2440 
2441 /**
2442  * HTTP error response ready to be submitted to nghttp2, to be prepared for
2443  * sending out. Message body will contain descriptive string for HTTP status.
2444  * @param h2_session: http2 session to submit to
2445  * @param h2_stream: http2 stream containing HTTP status to use for error
2446  * @return 0 on error, 1 otherwise
2447  */
http2_submit_error(struct http2_session * h2_session,struct http2_stream * h2_stream)2448 static int http2_submit_error(struct http2_session* h2_session,
2449 	struct http2_stream* h2_stream)
2450 {
2451 	int ret;
2452 	char status[4];
2453 	nghttp2_data_provider data_prd;
2454 	nghttp2_nv headers[1]; /* will be copied by nghttp */
2455 	if(snprintf(status, 4, "%d", h2_stream->status) != 3) {
2456 		verbose(VERB_QUERY, "http2: submit error failed, "
2457 			"invalid status");
2458 		return 0;
2459 	}
2460 	headers[0].name = (uint8_t*)":status";
2461 	headers[0].namelen = 7;
2462 	headers[0].value = (uint8_t*)status;
2463 	headers[0].valuelen = 3;
2464 	headers[0].flags = NGHTTP2_NV_FLAG_NONE;
2465 
2466 	data_prd.source.ptr = h2_session;
2467 	data_prd.read_callback = http2_submit_error_read_callback;
2468 
2469 	ret = nghttp2_submit_response(h2_session->session, h2_stream->stream_id,
2470 		headers, 1, &data_prd);
2471 	if(ret) {
2472 		verbose(VERB_QUERY, "http2: submit error failed, "
2473 			"error: %s", nghttp2_strerror(ret));
2474 		return 0;
2475 	}
2476 	return 1;
2477 }
2478 
2479 /**
2480  * Start query handling. Query is stored in the stream, and will be free'd here.
2481  * @param h2_session: http2 session, containing comm point
2482  * @param h2_stream: stream containing buffered query
2483  * @return: -1 on error, 1 if answer is stored in c->buffer, 0 if there is no
2484  * reply available (yet).
2485  */
http2_query_read_done(struct http2_session * h2_session,struct http2_stream * h2_stream)2486 static int http2_query_read_done(struct http2_session* h2_session,
2487 	struct http2_stream* h2_stream)
2488 {
2489 	log_assert(h2_stream->qbuffer);
2490 
2491 	if(h2_session->c->h2_stream) {
2492 		verbose(VERB_ALGO, "http2_query_read_done failure: shared "
2493 			"buffer already assigned to stream");
2494 		return -1;
2495 	}
2496 
2497     /* the c->buffer might be used by mesh_send_reply and no be cleard
2498 	 * need to be cleared before use */
2499 	sldns_buffer_clear(h2_session->c->buffer);
2500 	if(sldns_buffer_remaining(h2_session->c->buffer) <
2501 		sldns_buffer_remaining(h2_stream->qbuffer)) {
2502 		/* qbuffer will be free'd in frame close cb */
2503 		sldns_buffer_clear(h2_session->c->buffer);
2504 		verbose(VERB_ALGO, "http2_query_read_done failure: can't fit "
2505 			"qbuffer in c->buffer");
2506 		return -1;
2507 	}
2508 
2509 	sldns_buffer_write(h2_session->c->buffer,
2510 		sldns_buffer_current(h2_stream->qbuffer),
2511 		sldns_buffer_remaining(h2_stream->qbuffer));
2512 
2513 	lock_basic_lock(&http2_query_buffer_count_lock);
2514 	http2_query_buffer_count -= sldns_buffer_capacity(h2_stream->qbuffer);
2515 	lock_basic_unlock(&http2_query_buffer_count_lock);
2516 	sldns_buffer_free(h2_stream->qbuffer);
2517 	h2_stream->qbuffer = NULL;
2518 
2519 	sldns_buffer_flip(h2_session->c->buffer);
2520 	h2_session->c->h2_stream = h2_stream;
2521 	fptr_ok(fptr_whitelist_comm_point(h2_session->c->callback));
2522 	if((*h2_session->c->callback)(h2_session->c, h2_session->c->cb_arg,
2523 		NETEVENT_NOERROR, &h2_session->c->repinfo)) {
2524 		return 1; /* answer in c->buffer */
2525 	}
2526 	sldns_buffer_clear(h2_session->c->buffer);
2527 	h2_session->c->h2_stream = NULL;
2528 	return 0; /* mesh state added, or dropped */
2529 }
2530 
2531 /** nghttp2 callback. Used to check if the received frame indicates the end of a
2532  * stream. Gather collected request data and start query handling. */
http2_req_frame_recv_cb(nghttp2_session * session,const nghttp2_frame * frame,void * cb_arg)2533 static int http2_req_frame_recv_cb(nghttp2_session* session,
2534 	const nghttp2_frame* frame, void* cb_arg)
2535 {
2536 	struct http2_session* h2_session = (struct http2_session*)cb_arg;
2537 	struct http2_stream* h2_stream;
2538 	int query_read_done;
2539 
2540 	if((frame->hd.type != NGHTTP2_DATA &&
2541 		frame->hd.type != NGHTTP2_HEADERS) ||
2542 		!(frame->hd.flags & NGHTTP2_FLAG_END_STREAM)) {
2543 			return 0;
2544 	}
2545 
2546 	if(!(h2_stream = nghttp2_session_get_stream_user_data(
2547 		session, frame->hd.stream_id)))
2548 		return 0;
2549 
2550 	if(h2_stream->invalid_endpoint) {
2551 		h2_stream->status = HTTP_STATUS_NOT_FOUND;
2552 		goto submit_http_error;
2553 	}
2554 
2555 	if(h2_stream->invalid_content_type) {
2556 		h2_stream->status = HTTP_STATUS_UNSUPPORTED_MEDIA_TYPE;
2557 		goto submit_http_error;
2558 	}
2559 
2560 	if(h2_stream->http_method != HTTP_METHOD_GET &&
2561 		h2_stream->http_method != HTTP_METHOD_POST) {
2562 		h2_stream->status = HTTP_STATUS_NOT_IMPLEMENTED;
2563 		goto submit_http_error;
2564 	}
2565 
2566 	if(h2_stream->query_too_large) {
2567 		if(h2_stream->http_method == HTTP_METHOD_POST)
2568 			h2_stream->status = HTTP_STATUS_PAYLOAD_TOO_LARGE;
2569 		else
2570 			h2_stream->status = HTTP_STATUS_URI_TOO_LONG;
2571 		goto submit_http_error;
2572 	}
2573 
2574 	if(!h2_stream->qbuffer) {
2575 		h2_stream->status = HTTP_STATUS_BAD_REQUEST;
2576 		goto submit_http_error;
2577 	}
2578 
2579 	if(h2_stream->status) {
2580 submit_http_error:
2581 		verbose(VERB_QUERY, "http2 request invalid, returning :status="
2582 			"%d", h2_stream->status);
2583 		if(!http2_submit_error(h2_session, h2_stream)) {
2584 			return NGHTTP2_ERR_CALLBACK_FAILURE;
2585 		}
2586 		return 0;
2587 	}
2588 	h2_stream->status = HTTP_STATUS_OK;
2589 
2590 	sldns_buffer_flip(h2_stream->qbuffer);
2591 	h2_session->postpone_drop = 1;
2592 	query_read_done = http2_query_read_done(h2_session, h2_stream);
2593 	if(query_read_done < 0)
2594 		return NGHTTP2_ERR_CALLBACK_FAILURE;
2595 	else if(!query_read_done) {
2596 		if(h2_session->is_drop) {
2597 			/* connection needs to be closed. Return failure to make
2598 			 * sure no other action are taken anymore on comm point.
2599 			 * failure will result in reclaiming (and closing)
2600 			 * of comm point. */
2601 			verbose(VERB_QUERY, "http2 query dropped in worker cb");
2602 			h2_session->postpone_drop = 0;
2603 			return NGHTTP2_ERR_CALLBACK_FAILURE;
2604 		}
2605 		/* nothing to submit right now, query added to mesh. */
2606 		h2_session->postpone_drop = 0;
2607 		return 0;
2608 	}
2609 	if(!http2_submit_dns_response(h2_session)) {
2610 		sldns_buffer_clear(h2_session->c->buffer);
2611 		h2_session->c->h2_stream = NULL;
2612 		return NGHTTP2_ERR_CALLBACK_FAILURE;
2613 	}
2614 	verbose(VERB_QUERY, "http2 query submitted to session");
2615 	sldns_buffer_clear(h2_session->c->buffer);
2616 	h2_session->c->h2_stream = NULL;
2617 	return 0;
2618 }
2619 
2620 /** nghttp2 callback. Used to detect start of new streams. */
http2_req_begin_headers_cb(nghttp2_session * session,const nghttp2_frame * frame,void * cb_arg)2621 static int http2_req_begin_headers_cb(nghttp2_session* session,
2622 	const nghttp2_frame* frame, void* cb_arg)
2623 {
2624 	struct http2_session* h2_session = (struct http2_session*)cb_arg;
2625 	struct http2_stream* h2_stream;
2626 	int ret;
2627 	if(frame->hd.type != NGHTTP2_HEADERS ||
2628 		frame->headers.cat != NGHTTP2_HCAT_REQUEST) {
2629 		/* only interested in request headers */
2630 		return 0;
2631 	}
2632 	if(!(h2_stream = http2_stream_create(frame->hd.stream_id))) {
2633 		log_err("malloc failure while creating http2 stream");
2634 		return NGHTTP2_ERR_CALLBACK_FAILURE;
2635 	}
2636 	http2_session_add_stream(h2_session, h2_stream);
2637 	ret = nghttp2_session_set_stream_user_data(session,
2638 		frame->hd.stream_id, h2_stream);
2639 	if(ret) {
2640 		/* stream does not exist */
2641 		verbose(VERB_QUERY, "http2: set_stream_user_data failed, "
2642 			"error: %s", nghttp2_strerror(ret));
2643 		return NGHTTP2_ERR_CALLBACK_FAILURE;
2644 	}
2645 
2646 	return 0;
2647 }
2648 
2649 /**
2650  * base64url decode, store in qbuffer
2651  * @param h2_session: http2 session
2652  * @param h2_stream: http2 stream
2653  * @param start: start of the base64 string
2654  * @param length: length of the base64 string
2655  * @return: 0 on error, 1 otherwise. query will be stored in h2_stream->qbuffer,
2656  * buffer will be NULL is unparseble.
2657  */
http2_buffer_uri_query(struct http2_session * h2_session,struct http2_stream * h2_stream,const uint8_t * start,size_t length)2658 static int http2_buffer_uri_query(struct http2_session* h2_session,
2659 	struct http2_stream* h2_stream, const uint8_t* start, size_t length)
2660 {
2661 	size_t expectb64len;
2662 	int b64len;
2663 	if(h2_stream->http_method == HTTP_METHOD_POST)
2664 		return 1;
2665 	if(length == 0)
2666 		return 1;
2667 	if(h2_stream->qbuffer) {
2668 		verbose(VERB_ALGO, "http2_req_header fail, "
2669 			"qbuffer already set");
2670 		return 0;
2671 	}
2672 
2673 	/* calculate size, might be a bit bigger than the real
2674 	 * decoded buffer size */
2675 	expectb64len = sldns_b64_pton_calculate_size(length);
2676 	log_assert(expectb64len > 0);
2677 	if(expectb64len >
2678 		h2_session->c->http2_stream_max_qbuffer_size) {
2679 		h2_stream->query_too_large = 1;
2680 		return 1;
2681 	}
2682 
2683 	lock_basic_lock(&http2_query_buffer_count_lock);
2684 	if(http2_query_buffer_count + expectb64len > http2_query_buffer_max) {
2685 		lock_basic_unlock(&http2_query_buffer_count_lock);
2686 		verbose(VERB_ALGO, "reset HTTP2 stream, no space left, "
2687 			"in http2-query-buffer-size");
2688 		return http2_submit_rst_stream(h2_session, h2_stream);
2689 	}
2690 	http2_query_buffer_count += expectb64len;
2691 	lock_basic_unlock(&http2_query_buffer_count_lock);
2692 	if(!(h2_stream->qbuffer = sldns_buffer_new(expectb64len))) {
2693 		lock_basic_lock(&http2_query_buffer_count_lock);
2694 		http2_query_buffer_count -= expectb64len;
2695 		lock_basic_unlock(&http2_query_buffer_count_lock);
2696 		log_err("http2_req_header fail, qbuffer "
2697 			"malloc failure");
2698 		return 0;
2699 	}
2700 
2701 	if(sldns_b64_contains_nonurl((char const*)start, length)) {
2702 		char buf[65536+4];
2703 		verbose(VERB_ALGO, "HTTP2 stream contains wrong b64 encoding");
2704 		/* copy to the scratch buffer temporarily to terminate the
2705 		 * string with a zero */
2706 		if(length+1 > sizeof(buf)) {
2707 			/* too long */
2708 			lock_basic_lock(&http2_query_buffer_count_lock);
2709 			http2_query_buffer_count -= expectb64len;
2710 			lock_basic_unlock(&http2_query_buffer_count_lock);
2711 			sldns_buffer_free(h2_stream->qbuffer);
2712 			h2_stream->qbuffer = NULL;
2713 			return 1;
2714 		}
2715 		memmove(buf, start, length);
2716 		buf[length] = 0;
2717 		if(!(b64len = sldns_b64_pton(buf, sldns_buffer_current(
2718 			h2_stream->qbuffer), expectb64len)) || b64len < 0) {
2719 			lock_basic_lock(&http2_query_buffer_count_lock);
2720 			http2_query_buffer_count -= expectb64len;
2721 			lock_basic_unlock(&http2_query_buffer_count_lock);
2722 			sldns_buffer_free(h2_stream->qbuffer);
2723 			h2_stream->qbuffer = NULL;
2724 			return 1;
2725 		}
2726 	} else {
2727 		if(!(b64len = sldns_b64url_pton(
2728 			(char const *)start, length,
2729 			sldns_buffer_current(h2_stream->qbuffer),
2730 			expectb64len)) || b64len < 0) {
2731 			lock_basic_lock(&http2_query_buffer_count_lock);
2732 			http2_query_buffer_count -= expectb64len;
2733 			lock_basic_unlock(&http2_query_buffer_count_lock);
2734 			sldns_buffer_free(h2_stream->qbuffer);
2735 			h2_stream->qbuffer = NULL;
2736 			/* return without error, method can be an
2737 			 * unknown POST */
2738 			return 1;
2739 		}
2740 	}
2741 	sldns_buffer_skip(h2_stream->qbuffer, (size_t)b64len);
2742 	return 1;
2743 }
2744 
2745 /** nghttp2 callback. Used to parse headers from HEADER frames. */
http2_req_header_cb(nghttp2_session * session,const nghttp2_frame * frame,const uint8_t * name,size_t namelen,const uint8_t * value,size_t valuelen,uint8_t ATTR_UNUSED (flags),void * cb_arg)2746 static int http2_req_header_cb(nghttp2_session* session,
2747 	const nghttp2_frame* frame, const uint8_t* name, size_t namelen,
2748 	const uint8_t* value, size_t valuelen, uint8_t ATTR_UNUSED(flags),
2749 	void* cb_arg)
2750 {
2751 	struct http2_stream* h2_stream = NULL;
2752 	struct http2_session* h2_session = (struct http2_session*)cb_arg;
2753 	/* nghttp2 deals with CONTINUATION frames and provides them as part of
2754 	 * the HEADER */
2755 	if(frame->hd.type != NGHTTP2_HEADERS ||
2756 		frame->headers.cat != NGHTTP2_HCAT_REQUEST) {
2757 		/* only interested in request headers */
2758 		return 0;
2759 	}
2760 	if(!(h2_stream = nghttp2_session_get_stream_user_data(session,
2761 		frame->hd.stream_id)))
2762 		return 0;
2763 
2764 	/* earlier checks already indicate we can stop handling this query */
2765 	if(h2_stream->http_method == HTTP_METHOD_UNSUPPORTED ||
2766 		h2_stream->invalid_content_type ||
2767 		h2_stream->invalid_endpoint)
2768 		return 0;
2769 
2770 
2771 	/* nghttp2 performs some sanity checks in the headers, including:
2772 	 * name and value are guaranteed to be null terminated
2773 	 * name is guaranteed to be lowercase
2774 	 * content-length value is guaranteed to contain digits
2775 	 */
2776 
2777 	if(!h2_stream->http_method && namelen == 7 &&
2778 		memcmp(":method", name, namelen) == 0) {
2779 		/* Case insensitive check on :method value to be on the safe
2780 		 * side. I failed to find text about case sensitivity in specs.
2781 		 */
2782 		if(valuelen == 3 && strcasecmp("GET", (const char*)value) == 0)
2783 			h2_stream->http_method = HTTP_METHOD_GET;
2784 		else if(valuelen == 4 &&
2785 			strcasecmp("POST", (const char*)value) == 0) {
2786 			h2_stream->http_method = HTTP_METHOD_POST;
2787 			if(h2_stream->qbuffer) {
2788 				/* POST method uses query from DATA frames */
2789 				lock_basic_lock(&http2_query_buffer_count_lock);
2790 				http2_query_buffer_count -=
2791 					sldns_buffer_capacity(h2_stream->qbuffer);
2792 				lock_basic_unlock(&http2_query_buffer_count_lock);
2793 				sldns_buffer_free(h2_stream->qbuffer);
2794 				h2_stream->qbuffer = NULL;
2795 			}
2796 		} else
2797 			h2_stream->http_method = HTTP_METHOD_UNSUPPORTED;
2798 		return 0;
2799 	}
2800 	if(namelen == 5 && memcmp(":path", name, namelen) == 0) {
2801 		/* :path may contain DNS query, depending on method. Method might
2802 		 * not be known yet here, so check after finishing receiving
2803 		 * stream. */
2804 #define	HTTP_QUERY_PARAM "?dns="
2805 		size_t el = strlen(h2_session->c->http_endpoint);
2806 		size_t qpl = strlen(HTTP_QUERY_PARAM);
2807 
2808 		if(valuelen < el || memcmp(h2_session->c->http_endpoint,
2809 			value, el) != 0) {
2810 			h2_stream->invalid_endpoint = 1;
2811 			return 0;
2812 		}
2813 		/* larger than endpoint only allowed if it is for the query
2814 		 * parameter */
2815 		if(valuelen <= el+qpl ||
2816 			memcmp(HTTP_QUERY_PARAM, value+el, qpl) != 0) {
2817 			if(valuelen != el)
2818 				h2_stream->invalid_endpoint = 1;
2819 			return 0;
2820 		}
2821 
2822 		if(!http2_buffer_uri_query(h2_session, h2_stream,
2823 			value+(el+qpl), valuelen-(el+qpl))) {
2824 			return NGHTTP2_ERR_CALLBACK_FAILURE;
2825 		}
2826 		return 0;
2827 	}
2828 	/* Content type is a SHOULD (rfc7231#section-3.1.1.5) when using POST,
2829 	 * and not needed when using GET. Don't enfore.
2830 	 * If set only allow lowercase "application/dns-message".
2831 	 *
2832 	 * Clients SHOULD (rfc8484#section-4.1) set an accept header, but MUST
2833 	 * be able to handle "application/dns-message". Since that is the only
2834 	 * content-type supported we can ignore the accept header.
2835 	 */
2836 	if((namelen == 12 && memcmp("content-type", name, namelen) == 0)) {
2837 		if(valuelen != 23 || memcmp("application/dns-message", value,
2838 			valuelen) != 0) {
2839 			h2_stream->invalid_content_type = 1;
2840 		}
2841 	}
2842 
2843 	/* Only interested in content-lentg for POST (on not yet known) method.
2844 	 */
2845 	if((!h2_stream->http_method ||
2846 		h2_stream->http_method == HTTP_METHOD_POST) &&
2847 		!h2_stream->content_length && namelen  == 14 &&
2848 		memcmp("content-length", name, namelen) == 0) {
2849 		if(valuelen > 5) {
2850 			h2_stream->query_too_large = 1;
2851 			return 0;
2852 		}
2853 		/* guaranteed to only contain digits and be null terminated */
2854 		h2_stream->content_length = atoi((const char*)value);
2855 		if(h2_stream->content_length >
2856 			h2_session->c->http2_stream_max_qbuffer_size) {
2857 			h2_stream->query_too_large = 1;
2858 			return 0;
2859 		}
2860 	}
2861 	return 0;
2862 }
2863 
2864 /** nghttp2 callback. Used to get data from DATA frames, which can contain
2865  * queries in POST requests. */
http2_req_data_chunk_recv_cb(nghttp2_session * ATTR_UNUSED (session),uint8_t ATTR_UNUSED (flags),int32_t stream_id,const uint8_t * data,size_t len,void * cb_arg)2866 static int http2_req_data_chunk_recv_cb(nghttp2_session* ATTR_UNUSED(session),
2867 	uint8_t ATTR_UNUSED(flags), int32_t stream_id, const uint8_t* data,
2868 	size_t len, void* cb_arg)
2869 {
2870 	struct http2_session* h2_session = (struct http2_session*)cb_arg;
2871 	struct http2_stream* h2_stream;
2872 	size_t qlen = 0;
2873 
2874 	if(!(h2_stream = nghttp2_session_get_stream_user_data(
2875 		h2_session->session, stream_id))) {
2876 		return 0;
2877 	}
2878 
2879 	if(h2_stream->query_too_large)
2880 		return 0;
2881 
2882 	if(!h2_stream->qbuffer) {
2883 		if(h2_stream->content_length) {
2884 			if(h2_stream->content_length < len)
2885 				/* getting more data in DATA frame than
2886 				 * advertised in content-length header. */
2887 				return NGHTTP2_ERR_CALLBACK_FAILURE;
2888 			qlen = h2_stream->content_length;
2889 		} else if(len <= h2_session->c->http2_stream_max_qbuffer_size) {
2890 			/* setting this to msg-buffer-size can result in a lot
2891 			 * of memory consuption. Most queries should fit in a
2892 			 * single DATA frame, and most POST queries will
2893 			 * contain content-length which does not impose this
2894 			 * limit. */
2895 			qlen = len;
2896 		}
2897 	}
2898 	if(!h2_stream->qbuffer && qlen) {
2899 		lock_basic_lock(&http2_query_buffer_count_lock);
2900 		if(http2_query_buffer_count + qlen > http2_query_buffer_max) {
2901 			lock_basic_unlock(&http2_query_buffer_count_lock);
2902 			verbose(VERB_ALGO, "reset HTTP2 stream, no space left, "
2903 				"in http2-query-buffer-size");
2904 			return http2_submit_rst_stream(h2_session, h2_stream);
2905 		}
2906 		http2_query_buffer_count += qlen;
2907 		lock_basic_unlock(&http2_query_buffer_count_lock);
2908 		if(!(h2_stream->qbuffer = sldns_buffer_new(qlen))) {
2909 			lock_basic_lock(&http2_query_buffer_count_lock);
2910 			http2_query_buffer_count -= qlen;
2911 			lock_basic_unlock(&http2_query_buffer_count_lock);
2912 		}
2913 	}
2914 
2915 	if(!h2_stream->qbuffer ||
2916 		sldns_buffer_remaining(h2_stream->qbuffer) < len) {
2917 		verbose(VERB_ALGO, "http2 data_chunck_recv failed. Not enough "
2918 			"buffer space for POST query. Can happen on multi "
2919 			"frame requests without content-length header");
2920 		h2_stream->query_too_large = 1;
2921 		return 0;
2922 	}
2923 
2924 	sldns_buffer_write(h2_stream->qbuffer, data, len);
2925 
2926 	return 0;
2927 }
2928 
http2_req_stream_clear(struct http2_stream * h2_stream)2929 void http2_req_stream_clear(struct http2_stream* h2_stream)
2930 {
2931 	if(h2_stream->qbuffer) {
2932 		lock_basic_lock(&http2_query_buffer_count_lock);
2933 		http2_query_buffer_count -=
2934 			sldns_buffer_capacity(h2_stream->qbuffer);
2935 		lock_basic_unlock(&http2_query_buffer_count_lock);
2936 		sldns_buffer_free(h2_stream->qbuffer);
2937 		h2_stream->qbuffer = NULL;
2938 	}
2939 	if(h2_stream->rbuffer) {
2940 		lock_basic_lock(&http2_response_buffer_count_lock);
2941 		http2_response_buffer_count -=
2942 			sldns_buffer_capacity(h2_stream->rbuffer);
2943 		lock_basic_unlock(&http2_response_buffer_count_lock);
2944 		sldns_buffer_free(h2_stream->rbuffer);
2945 		h2_stream->rbuffer = NULL;
2946 	}
2947 }
2948 
http2_req_callbacks_create(void)2949 nghttp2_session_callbacks* http2_req_callbacks_create(void)
2950 {
2951 	nghttp2_session_callbacks *callbacks;
2952 	if(nghttp2_session_callbacks_new(&callbacks) == NGHTTP2_ERR_NOMEM) {
2953 		log_err("failed to initialize nghttp2 callback");
2954 		return NULL;
2955 	}
2956 	/* reception of header block started, used to create h2_stream */
2957 	nghttp2_session_callbacks_set_on_begin_headers_callback(callbacks,
2958 		http2_req_begin_headers_cb);
2959 	/* complete frame received, used to get data from stream if frame
2960 	 * has end stream flag, and start processing query */
2961 	nghttp2_session_callbacks_set_on_frame_recv_callback(callbacks,
2962 		http2_req_frame_recv_cb);
2963 	/* get request info from headers */
2964 	nghttp2_session_callbacks_set_on_header_callback(callbacks,
2965 		http2_req_header_cb);
2966 	/* get data from DATA frames, containing POST query */
2967 	nghttp2_session_callbacks_set_on_data_chunk_recv_callback(callbacks,
2968 		http2_req_data_chunk_recv_cb);
2969 
2970 	/* generic HTTP2 callbacks */
2971 	nghttp2_session_callbacks_set_recv_callback(callbacks, http2_recv_cb);
2972 	nghttp2_session_callbacks_set_send_callback(callbacks, http2_send_cb);
2973 	nghttp2_session_callbacks_set_on_stream_close_callback(callbacks,
2974 		http2_stream_close_cb);
2975 
2976 	return callbacks;
2977 }
2978 #endif /* HAVE_NGHTTP2 */
2979