1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2007-2008
5 * Swinburne University of Technology, Melbourne, Australia.
6 * Copyright (c) 2009-2010 Lawrence Stewart <[email protected]>
7 * Copyright (c) 2010 The FreeBSD Foundation
8 * All rights reserved.
9 *
10 * This software was developed at the Centre for Advanced Internet
11 * Architectures, Swinburne University of Technology, by Lawrence Stewart and
12 * James Healy, made possible in part by a grant from the Cisco University
13 * Research Program Fund at Community Foundation Silicon Valley.
14 *
15 * Portions of this software were developed at the Centre for Advanced
16 * Internet Architectures, Swinburne University of Technology, Melbourne,
17 * Australia by David Hayes under sponsorship from the FreeBSD Foundation.
18 *
19 * Redistribution and use in source and binary forms, with or without
20 * modification, are permitted provided that the following conditions
21 * are met:
22 * 1. Redistributions of source code must retain the above copyright
23 * notice, this list of conditions and the following disclaimer.
24 * 2. Redistributions in binary form must reproduce the above copyright
25 * notice, this list of conditions and the following disclaimer in the
26 * documentation and/or other materials provided with the distribution.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 */
40
41 /*
42 * This software was first released in 2007 by James Healy and Lawrence Stewart
43 * whilst working on the NewTCP research project at Swinburne University of
44 * Technology's Centre for Advanced Internet Architectures, Melbourne,
45 * Australia, which was made possible in part by a grant from the Cisco
46 * University Research Program Fund at Community Foundation Silicon Valley.
47 * More details are available at:
48 * http://caia.swin.edu.au/urp/newtcp/
49 */
50
51 #include <sys/cdefs.h>
52 __FBSDID("$FreeBSD$");
53
54 #include <sys/param.h>
55 #include <sys/kernel.h>
56 #include <sys/libkern.h>
57 #include <sys/lock.h>
58 #include <sys/malloc.h>
59 #include <sys/module.h>
60 #include <sys/mutex.h>
61 #include <sys/queue.h>
62 #include <sys/rwlock.h>
63 #include <sys/sbuf.h>
64 #include <sys/socket.h>
65 #include <sys/socketvar.h>
66 #include <sys/sysctl.h>
67
68 #include <net/vnet.h>
69
70 #include <netinet/in.h>
71 #include <netinet/in_pcb.h>
72 #include <netinet/tcp.h>
73 #include <netinet/tcp_var.h>
74 #include <netinet/cc/cc.h>
75
76 #include <netinet/cc/cc_module.h>
77
78 /*
79 * List of available cc algorithms on the current system. First element
80 * is used as the system default CC algorithm.
81 */
82 struct cc_head cc_list = STAILQ_HEAD_INITIALIZER(cc_list);
83
84 /* Protects the cc_list TAILQ. */
85 struct rwlock cc_list_lock;
86
87 VNET_DEFINE(struct cc_algo *, default_cc_ptr) = &newreno_cc_algo;
88
89 /*
90 * Sysctl handler to show and change the default CC algorithm.
91 */
92 static int
cc_default_algo(SYSCTL_HANDLER_ARGS)93 cc_default_algo(SYSCTL_HANDLER_ARGS)
94 {
95 char default_cc[TCP_CA_NAME_MAX];
96 struct cc_algo *funcs;
97 int error;
98
99 /* Get the current default: */
100 CC_LIST_RLOCK();
101 strlcpy(default_cc, CC_DEFAULT()->name, sizeof(default_cc));
102 CC_LIST_RUNLOCK();
103
104 error = sysctl_handle_string(oidp, default_cc, sizeof(default_cc), req);
105
106 /* Check for error or no change */
107 if (error != 0 || req->newptr == NULL)
108 goto done;
109
110 error = ESRCH;
111
112 /* Find algo with specified name and set it to default. */
113 CC_LIST_RLOCK();
114 STAILQ_FOREACH(funcs, &cc_list, entries) {
115 if (strncmp(default_cc, funcs->name, sizeof(default_cc)))
116 continue;
117 V_default_cc_ptr = funcs;
118 error = 0;
119 break;
120 }
121 CC_LIST_RUNLOCK();
122 done:
123 return (error);
124 }
125
126 /*
127 * Sysctl handler to display the list of available CC algorithms.
128 */
129 static int
cc_list_available(SYSCTL_HANDLER_ARGS)130 cc_list_available(SYSCTL_HANDLER_ARGS)
131 {
132 struct cc_algo *algo;
133 struct sbuf *s;
134 int err, first, nalgos;
135
136 err = nalgos = 0;
137 first = 1;
138
139 CC_LIST_RLOCK();
140 STAILQ_FOREACH(algo, &cc_list, entries) {
141 nalgos++;
142 }
143 CC_LIST_RUNLOCK();
144
145 s = sbuf_new(NULL, NULL, nalgos * TCP_CA_NAME_MAX, SBUF_FIXEDLEN);
146
147 if (s == NULL)
148 return (ENOMEM);
149
150 /*
151 * It is theoretically possible for the CC list to have grown in size
152 * since the call to sbuf_new() and therefore for the sbuf to be too
153 * small. If this were to happen (incredibly unlikely), the sbuf will
154 * reach an overflow condition, sbuf_printf() will return an error and
155 * the sysctl will fail gracefully.
156 */
157 CC_LIST_RLOCK();
158 STAILQ_FOREACH(algo, &cc_list, entries) {
159 err = sbuf_printf(s, first ? "%s" : ", %s", algo->name);
160 if (err) {
161 /* Sbuf overflow condition. */
162 err = EOVERFLOW;
163 break;
164 }
165 first = 0;
166 }
167 CC_LIST_RUNLOCK();
168
169 if (!err) {
170 sbuf_finish(s);
171 err = sysctl_handle_string(oidp, sbuf_data(s), 0, req);
172 }
173
174 sbuf_delete(s);
175 return (err);
176 }
177
178 /*
179 * Reset the default CC algo to NewReno for any netstack which is using the algo
180 * that is about to go away as its default.
181 */
182 static void
cc_checkreset_default(struct cc_algo * remove_cc)183 cc_checkreset_default(struct cc_algo *remove_cc)
184 {
185 VNET_ITERATOR_DECL(vnet_iter);
186
187 CC_LIST_LOCK_ASSERT();
188
189 VNET_LIST_RLOCK_NOSLEEP();
190 VNET_FOREACH(vnet_iter) {
191 CURVNET_SET(vnet_iter);
192 if (strncmp(CC_DEFAULT()->name, remove_cc->name,
193 TCP_CA_NAME_MAX) == 0)
194 V_default_cc_ptr = &newreno_cc_algo;
195 CURVNET_RESTORE();
196 }
197 VNET_LIST_RUNLOCK_NOSLEEP();
198 }
199
200 /*
201 * Initialise CC subsystem on system boot.
202 */
203 static void
cc_init(void)204 cc_init(void)
205 {
206 CC_LIST_LOCK_INIT();
207 STAILQ_INIT(&cc_list);
208 }
209
210 /*
211 * Returns non-zero on success, 0 on failure.
212 */
213 int
cc_deregister_algo(struct cc_algo * remove_cc)214 cc_deregister_algo(struct cc_algo *remove_cc)
215 {
216 struct cc_algo *funcs, *tmpfuncs;
217 int err;
218
219 err = ENOENT;
220
221 /* Never allow newreno to be deregistered. */
222 if (&newreno_cc_algo == remove_cc)
223 return (EPERM);
224
225 /* Remove algo from cc_list so that new connections can't use it. */
226 CC_LIST_WLOCK();
227 STAILQ_FOREACH_SAFE(funcs, &cc_list, entries, tmpfuncs) {
228 if (funcs == remove_cc) {
229 cc_checkreset_default(remove_cc);
230 STAILQ_REMOVE(&cc_list, funcs, cc_algo, entries);
231 err = 0;
232 break;
233 }
234 }
235 CC_LIST_WUNLOCK();
236
237 if (!err)
238 /*
239 * XXXLAS:
240 * - We may need to handle non-zero return values in future.
241 * - If we add CC framework support for protocols other than
242 * TCP, we may want a more generic way to handle this step.
243 */
244 tcp_ccalgounload(remove_cc);
245
246 return (err);
247 }
248
249 /*
250 * Returns 0 on success, non-zero on failure.
251 */
252 int
cc_register_algo(struct cc_algo * add_cc)253 cc_register_algo(struct cc_algo *add_cc)
254 {
255 struct cc_algo *funcs;
256 int err;
257
258 err = 0;
259
260 /*
261 * Iterate over list of registered CC algorithms and make sure
262 * we're not trying to add a duplicate.
263 */
264 CC_LIST_WLOCK();
265 STAILQ_FOREACH(funcs, &cc_list, entries) {
266 if (funcs == add_cc || strncmp(funcs->name, add_cc->name,
267 TCP_CA_NAME_MAX) == 0)
268 err = EEXIST;
269 }
270
271 if (!err)
272 STAILQ_INSERT_TAIL(&cc_list, add_cc, entries);
273
274 CC_LIST_WUNLOCK();
275
276 return (err);
277 }
278
279 /*
280 * Handles kld related events. Returns 0 on success, non-zero on failure.
281 */
282 int
cc_modevent(module_t mod,int event_type,void * data)283 cc_modevent(module_t mod, int event_type, void *data)
284 {
285 struct cc_algo *algo;
286 int err;
287
288 err = 0;
289 algo = (struct cc_algo *)data;
290
291 switch(event_type) {
292 case MOD_LOAD:
293 if (algo->mod_init != NULL)
294 err = algo->mod_init();
295 if (!err)
296 err = cc_register_algo(algo);
297 break;
298
299 case MOD_QUIESCE:
300 case MOD_SHUTDOWN:
301 case MOD_UNLOAD:
302 err = cc_deregister_algo(algo);
303 if (!err && algo->mod_destroy != NULL)
304 algo->mod_destroy();
305 if (err == ENOENT)
306 err = 0;
307 break;
308
309 default:
310 err = EINVAL;
311 break;
312 }
313
314 return (err);
315 }
316
317 SYSINIT(cc, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, cc_init, NULL);
318
319 /* Declare sysctl tree and populate it. */
320 SYSCTL_NODE(_net_inet_tcp, OID_AUTO, cc, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
321 "Congestion control related settings");
322
323 SYSCTL_PROC(_net_inet_tcp_cc, OID_AUTO, algorithm,
324 CTLFLAG_VNET | CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE,
325 NULL, 0, cc_default_algo, "A",
326 "Default congestion control algorithm");
327
328 SYSCTL_PROC(_net_inet_tcp_cc, OID_AUTO, available,
329 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
330 NULL, 0, cc_list_available, "A",
331 "List available congestion control algorithms");
332
333 VNET_DEFINE(int, cc_do_abe) = 0;
334 SYSCTL_INT(_net_inet_tcp_cc, OID_AUTO, abe, CTLFLAG_VNET | CTLFLAG_RW,
335 &VNET_NAME(cc_do_abe), 0,
336 "Enable draft-ietf-tcpm-alternativebackoff-ecn (TCP Alternative Backoff with ECN)");
337
338 VNET_DEFINE(int, cc_abe_frlossreduce) = 0;
339 SYSCTL_INT(_net_inet_tcp_cc, OID_AUTO, abe_frlossreduce, CTLFLAG_VNET | CTLFLAG_RW,
340 &VNET_NAME(cc_abe_frlossreduce), 0,
341 "Apply standard beta instead of ABE-beta during ECN-signalled congestion "
342 "recovery episodes if loss also needs to be repaired");
343