1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2010 The FreeBSD Foundation
5 *
6 * This software was developed by Edward Tomasz Napierala under sponsorship
7 * from the FreeBSD Foundation.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 *
30 * $FreeBSD$
31 */
32
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35
36 #include <sys/param.h>
37 #include <sys/devctl.h>
38 #include <sys/malloc.h>
39 #include <sys/queue.h>
40 #include <sys/refcount.h>
41 #include <sys/jail.h>
42 #include <sys/kernel.h>
43 #include <sys/limits.h>
44 #include <sys/loginclass.h>
45 #include <sys/priv.h>
46 #include <sys/proc.h>
47 #include <sys/racct.h>
48 #include <sys/rctl.h>
49 #include <sys/resourcevar.h>
50 #include <sys/sx.h>
51 #include <sys/sysent.h>
52 #include <sys/sysproto.h>
53 #include <sys/systm.h>
54 #include <sys/types.h>
55 #include <sys/eventhandler.h>
56 #include <sys/lock.h>
57 #include <sys/mutex.h>
58 #include <sys/rwlock.h>
59 #include <sys/sbuf.h>
60 #include <sys/taskqueue.h>
61 #include <sys/tree.h>
62 #include <vm/uma.h>
63
64 #ifdef RCTL
65 #ifndef RACCT
66 #error "The RCTL option requires the RACCT option"
67 #endif
68
69 FEATURE(rctl, "Resource Limits");
70
71 #define HRF_DEFAULT 0
72 #define HRF_DONT_INHERIT 1
73 #define HRF_DONT_ACCUMULATE 2
74
75 #define RCTL_MAX_INBUFSIZE 4 * 1024
76 #define RCTL_MAX_OUTBUFSIZE 16 * 1024 * 1024
77 #define RCTL_LOG_BUFSIZE 128
78
79 #define RCTL_PCPU_SHIFT (10 * 1000000)
80
81 static unsigned int rctl_maxbufsize = RCTL_MAX_OUTBUFSIZE;
82 static int rctl_log_rate_limit = 10;
83 static int rctl_devctl_rate_limit = 10;
84
85 /*
86 * Values below are initialized in rctl_init().
87 */
88 static int rctl_throttle_min = -1;
89 static int rctl_throttle_max = -1;
90 static int rctl_throttle_pct = -1;
91 static int rctl_throttle_pct2 = -1;
92
93 static int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS);
94 static int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS);
95 static int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS);
96 static int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS);
97
98 SYSCTL_NODE(_kern_racct, OID_AUTO, rctl, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
99 "Resource Limits");
100 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, maxbufsize, CTLFLAG_RWTUN,
101 &rctl_maxbufsize, 0, "Maximum output buffer size");
102 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, log_rate_limit, CTLFLAG_RW,
103 &rctl_log_rate_limit, 0, "Maximum number of log messages per second");
104 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, devctl_rate_limit, CTLFLAG_RWTUN,
105 &rctl_devctl_rate_limit, 0, "Maximum number of devctl messages per second");
106 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_min,
107 CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0,
108 &rctl_throttle_min_sysctl, "IU",
109 "Shortest throttling duration, in hz");
110 TUNABLE_INT("kern.racct.rctl.throttle_min", &rctl_throttle_min);
111 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_max,
112 CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0,
113 &rctl_throttle_max_sysctl, "IU",
114 "Longest throttling duration, in hz");
115 TUNABLE_INT("kern.racct.rctl.throttle_max", &rctl_throttle_max);
116 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct,
117 CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0,
118 &rctl_throttle_pct_sysctl, "IU",
119 "Throttling penalty for process consumption, in percent");
120 TUNABLE_INT("kern.racct.rctl.throttle_pct", &rctl_throttle_pct);
121 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct2,
122 CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0,
123 &rctl_throttle_pct2_sysctl, "IU",
124 "Throttling penalty for container consumption, in percent");
125 TUNABLE_INT("kern.racct.rctl.throttle_pct2", &rctl_throttle_pct2);
126
127 /*
128 * 'rctl_rule_link' connects a rule with every racct it's related to.
129 * For example, rule 'user:X:openfiles:deny=N/process' is linked
130 * with uidinfo for user X, and to each process of that user.
131 */
132 struct rctl_rule_link {
133 LIST_ENTRY(rctl_rule_link) rrl_next;
134 struct rctl_rule *rrl_rule;
135 int rrl_exceeded;
136 };
137
138 struct dict {
139 const char *d_name;
140 int d_value;
141 };
142
143 static struct dict subjectnames[] = {
144 { "process", RCTL_SUBJECT_TYPE_PROCESS },
145 { "user", RCTL_SUBJECT_TYPE_USER },
146 { "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS },
147 { "jail", RCTL_SUBJECT_TYPE_JAIL },
148 { NULL, -1 }};
149
150 static struct dict resourcenames[] = {
151 { "cputime", RACCT_CPU },
152 { "datasize", RACCT_DATA },
153 { "stacksize", RACCT_STACK },
154 { "coredumpsize", RACCT_CORE },
155 { "memoryuse", RACCT_RSS },
156 { "memorylocked", RACCT_MEMLOCK },
157 { "maxproc", RACCT_NPROC },
158 { "openfiles", RACCT_NOFILE },
159 { "vmemoryuse", RACCT_VMEM },
160 { "pseudoterminals", RACCT_NPTS },
161 { "swapuse", RACCT_SWAP },
162 { "nthr", RACCT_NTHR },
163 { "msgqqueued", RACCT_MSGQQUEUED },
164 { "msgqsize", RACCT_MSGQSIZE },
165 { "nmsgq", RACCT_NMSGQ },
166 { "nsem", RACCT_NSEM },
167 { "nsemop", RACCT_NSEMOP },
168 { "nshm", RACCT_NSHM },
169 { "shmsize", RACCT_SHMSIZE },
170 { "wallclock", RACCT_WALLCLOCK },
171 { "pcpu", RACCT_PCTCPU },
172 { "readbps", RACCT_READBPS },
173 { "writebps", RACCT_WRITEBPS },
174 { "readiops", RACCT_READIOPS },
175 { "writeiops", RACCT_WRITEIOPS },
176 { NULL, -1 }};
177
178 static struct dict actionnames[] = {
179 { "sighup", RCTL_ACTION_SIGHUP },
180 { "sigint", RCTL_ACTION_SIGINT },
181 { "sigquit", RCTL_ACTION_SIGQUIT },
182 { "sigill", RCTL_ACTION_SIGILL },
183 { "sigtrap", RCTL_ACTION_SIGTRAP },
184 { "sigabrt", RCTL_ACTION_SIGABRT },
185 { "sigemt", RCTL_ACTION_SIGEMT },
186 { "sigfpe", RCTL_ACTION_SIGFPE },
187 { "sigkill", RCTL_ACTION_SIGKILL },
188 { "sigbus", RCTL_ACTION_SIGBUS },
189 { "sigsegv", RCTL_ACTION_SIGSEGV },
190 { "sigsys", RCTL_ACTION_SIGSYS },
191 { "sigpipe", RCTL_ACTION_SIGPIPE },
192 { "sigalrm", RCTL_ACTION_SIGALRM },
193 { "sigterm", RCTL_ACTION_SIGTERM },
194 { "sigurg", RCTL_ACTION_SIGURG },
195 { "sigstop", RCTL_ACTION_SIGSTOP },
196 { "sigtstp", RCTL_ACTION_SIGTSTP },
197 { "sigchld", RCTL_ACTION_SIGCHLD },
198 { "sigttin", RCTL_ACTION_SIGTTIN },
199 { "sigttou", RCTL_ACTION_SIGTTOU },
200 { "sigio", RCTL_ACTION_SIGIO },
201 { "sigxcpu", RCTL_ACTION_SIGXCPU },
202 { "sigxfsz", RCTL_ACTION_SIGXFSZ },
203 { "sigvtalrm", RCTL_ACTION_SIGVTALRM },
204 { "sigprof", RCTL_ACTION_SIGPROF },
205 { "sigwinch", RCTL_ACTION_SIGWINCH },
206 { "siginfo", RCTL_ACTION_SIGINFO },
207 { "sigusr1", RCTL_ACTION_SIGUSR1 },
208 { "sigusr2", RCTL_ACTION_SIGUSR2 },
209 { "sigthr", RCTL_ACTION_SIGTHR },
210 { "deny", RCTL_ACTION_DENY },
211 { "log", RCTL_ACTION_LOG },
212 { "devctl", RCTL_ACTION_DEVCTL },
213 { "throttle", RCTL_ACTION_THROTTLE },
214 { NULL, -1 }};
215
216 static void rctl_init(void);
217 SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL);
218
219 static uma_zone_t rctl_rule_zone;
220 static uma_zone_t rctl_rule_link_zone;
221
222 static int rctl_rule_fully_specified(const struct rctl_rule *rule);
223 static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule);
224
225 static MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits");
226
rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS)227 static int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS)
228 {
229 int error, val = rctl_throttle_min;
230
231 error = sysctl_handle_int(oidp, &val, 0, req);
232 if (error || !req->newptr)
233 return (error);
234 if (val < 1 || val > rctl_throttle_max)
235 return (EINVAL);
236
237 RACCT_LOCK();
238 rctl_throttle_min = val;
239 RACCT_UNLOCK();
240
241 return (0);
242 }
243
rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS)244 static int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS)
245 {
246 int error, val = rctl_throttle_max;
247
248 error = sysctl_handle_int(oidp, &val, 0, req);
249 if (error || !req->newptr)
250 return (error);
251 if (val < rctl_throttle_min)
252 return (EINVAL);
253
254 RACCT_LOCK();
255 rctl_throttle_max = val;
256 RACCT_UNLOCK();
257
258 return (0);
259 }
260
rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS)261 static int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS)
262 {
263 int error, val = rctl_throttle_pct;
264
265 error = sysctl_handle_int(oidp, &val, 0, req);
266 if (error || !req->newptr)
267 return (error);
268 if (val < 0)
269 return (EINVAL);
270
271 RACCT_LOCK();
272 rctl_throttle_pct = val;
273 RACCT_UNLOCK();
274
275 return (0);
276 }
277
rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS)278 static int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS)
279 {
280 int error, val = rctl_throttle_pct2;
281
282 error = sysctl_handle_int(oidp, &val, 0, req);
283 if (error || !req->newptr)
284 return (error);
285 if (val < 0)
286 return (EINVAL);
287
288 RACCT_LOCK();
289 rctl_throttle_pct2 = val;
290 RACCT_UNLOCK();
291
292 return (0);
293 }
294
295 static const char *
rctl_subject_type_name(int subject)296 rctl_subject_type_name(int subject)
297 {
298 int i;
299
300 for (i = 0; subjectnames[i].d_name != NULL; i++) {
301 if (subjectnames[i].d_value == subject)
302 return (subjectnames[i].d_name);
303 }
304
305 panic("rctl_subject_type_name: unknown subject type %d", subject);
306 }
307
308 static const char *
rctl_action_name(int action)309 rctl_action_name(int action)
310 {
311 int i;
312
313 for (i = 0; actionnames[i].d_name != NULL; i++) {
314 if (actionnames[i].d_value == action)
315 return (actionnames[i].d_name);
316 }
317
318 panic("rctl_action_name: unknown action %d", action);
319 }
320
321 const char *
rctl_resource_name(int resource)322 rctl_resource_name(int resource)
323 {
324 int i;
325
326 for (i = 0; resourcenames[i].d_name != NULL; i++) {
327 if (resourcenames[i].d_value == resource)
328 return (resourcenames[i].d_name);
329 }
330
331 panic("rctl_resource_name: unknown resource %d", resource);
332 }
333
334 static struct racct *
rctl_proc_rule_to_racct(const struct proc * p,const struct rctl_rule * rule)335 rctl_proc_rule_to_racct(const struct proc *p, const struct rctl_rule *rule)
336 {
337 struct ucred *cred = p->p_ucred;
338
339 ASSERT_RACCT_ENABLED();
340 RACCT_LOCK_ASSERT();
341
342 switch (rule->rr_per) {
343 case RCTL_SUBJECT_TYPE_PROCESS:
344 return (p->p_racct);
345 case RCTL_SUBJECT_TYPE_USER:
346 return (cred->cr_ruidinfo->ui_racct);
347 case RCTL_SUBJECT_TYPE_LOGINCLASS:
348 return (cred->cr_loginclass->lc_racct);
349 case RCTL_SUBJECT_TYPE_JAIL:
350 return (cred->cr_prison->pr_prison_racct->prr_racct);
351 default:
352 panic("%s: unknown per %d", __func__, rule->rr_per);
353 }
354 }
355
356 /*
357 * Return the amount of resource that can be allocated by 'p' before
358 * hitting 'rule'.
359 */
360 static int64_t
rctl_available_resource(const struct proc * p,const struct rctl_rule * rule)361 rctl_available_resource(const struct proc *p, const struct rctl_rule *rule)
362 {
363 const struct racct *racct;
364 int64_t available;
365
366 ASSERT_RACCT_ENABLED();
367 RACCT_LOCK_ASSERT();
368
369 racct = rctl_proc_rule_to_racct(p, rule);
370 available = rule->rr_amount - racct->r_resources[rule->rr_resource];
371
372 return (available);
373 }
374
375 /*
376 * Called every second for proc, uidinfo, loginclass, and jail containers.
377 * If the limit isn't exceeded, it decreases the usage amount to zero.
378 * Otherwise, it decreases it by the value of the limit. This way
379 * resource consumption exceeding the limit "carries over" to the next
380 * period.
381 */
382 void
rctl_throttle_decay(struct racct * racct,int resource)383 rctl_throttle_decay(struct racct *racct, int resource)
384 {
385 struct rctl_rule *rule;
386 struct rctl_rule_link *link;
387 int64_t minavailable;
388
389 ASSERT_RACCT_ENABLED();
390 RACCT_LOCK_ASSERT();
391
392 minavailable = INT64_MAX;
393
394 LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
395 rule = link->rrl_rule;
396
397 if (rule->rr_resource != resource)
398 continue;
399 if (rule->rr_action != RCTL_ACTION_THROTTLE)
400 continue;
401
402 if (rule->rr_amount < minavailable)
403 minavailable = rule->rr_amount;
404 }
405
406 if (racct->r_resources[resource] < minavailable) {
407 racct->r_resources[resource] = 0;
408 } else {
409 /*
410 * Cap utilization counter at ten times the limit. Otherwise,
411 * if we changed the rule lowering the allowed amount, it could
412 * take unreasonably long time for the accumulated resource
413 * usage to drop.
414 */
415 if (racct->r_resources[resource] > minavailable * 10)
416 racct->r_resources[resource] = minavailable * 10;
417
418 racct->r_resources[resource] -= minavailable;
419 }
420 }
421
422 /*
423 * Special version of rctl_get_available() for the %CPU resource.
424 * We slightly cheat here and return less than we normally would.
425 */
426 int64_t
rctl_pcpu_available(const struct proc * p)427 rctl_pcpu_available(const struct proc *p) {
428 struct rctl_rule *rule;
429 struct rctl_rule_link *link;
430 int64_t available, minavailable, limit;
431
432 ASSERT_RACCT_ENABLED();
433 RACCT_LOCK_ASSERT();
434
435 minavailable = INT64_MAX;
436 limit = 0;
437
438 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
439 rule = link->rrl_rule;
440 if (rule->rr_resource != RACCT_PCTCPU)
441 continue;
442 if (rule->rr_action != RCTL_ACTION_DENY)
443 continue;
444 available = rctl_available_resource(p, rule);
445 if (available < minavailable) {
446 minavailable = available;
447 limit = rule->rr_amount;
448 }
449 }
450
451 /*
452 * Return slightly less than actual value of the available
453 * %cpu resource. This makes %cpu throttling more aggressive
454 * and lets us act sooner than the limits are already exceeded.
455 */
456 if (limit != 0) {
457 if (limit > 2 * RCTL_PCPU_SHIFT)
458 minavailable -= RCTL_PCPU_SHIFT;
459 else
460 minavailable -= (limit / 2);
461 }
462
463 return (minavailable);
464 }
465
466 static uint64_t
xadd(uint64_t a,uint64_t b)467 xadd(uint64_t a, uint64_t b)
468 {
469 uint64_t c;
470
471 c = a + b;
472
473 /*
474 * Detect overflow.
475 */
476 if (c < a || c < b)
477 return (UINT64_MAX);
478
479 return (c);
480 }
481
482 static uint64_t
xmul(uint64_t a,uint64_t b)483 xmul(uint64_t a, uint64_t b)
484 {
485
486 if (b != 0 && a > UINT64_MAX / b)
487 return (UINT64_MAX);
488
489 return (a * b);
490 }
491
492 /*
493 * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition
494 * to what it keeps allocated now. Returns non-zero if the allocation should
495 * be denied, 0 otherwise.
496 */
497 int
rctl_enforce(struct proc * p,int resource,uint64_t amount)498 rctl_enforce(struct proc *p, int resource, uint64_t amount)
499 {
500 static struct timeval log_lasttime, devctl_lasttime;
501 static int log_curtime = 0, devctl_curtime = 0;
502 struct rctl_rule *rule;
503 struct rctl_rule_link *link;
504 struct sbuf sb;
505 char *buf;
506 int64_t available;
507 uint64_t sleep_ms, sleep_ratio;
508 int should_deny = 0;
509
510 ASSERT_RACCT_ENABLED();
511 RACCT_LOCK_ASSERT();
512
513 /*
514 * There may be more than one matching rule; go through all of them.
515 * Denial should be done last, after logging and sending signals.
516 */
517 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
518 rule = link->rrl_rule;
519 if (rule->rr_resource != resource)
520 continue;
521
522 available = rctl_available_resource(p, rule);
523 if (available >= (int64_t)amount) {
524 link->rrl_exceeded = 0;
525 continue;
526 }
527
528 switch (rule->rr_action) {
529 case RCTL_ACTION_DENY:
530 should_deny = 1;
531 continue;
532 case RCTL_ACTION_LOG:
533 /*
534 * If rrl_exceeded != 0, it means we've already
535 * logged a warning for this process.
536 */
537 if (link->rrl_exceeded != 0)
538 continue;
539
540 /*
541 * If the process state is not fully initialized yet,
542 * we can't access most of the required fields, e.g.
543 * p->p_comm. This happens when called from fork1().
544 * Ignore this rule for now; it will be processed just
545 * after fork, when called from racct_proc_fork_done().
546 */
547 if (p->p_state != PRS_NORMAL)
548 continue;
549
550 if (!ppsratecheck(&log_lasttime, &log_curtime,
551 rctl_log_rate_limit))
552 continue;
553
554 buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
555 if (buf == NULL) {
556 printf("rctl_enforce: out of memory\n");
557 continue;
558 }
559 sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
560 rctl_rule_to_sbuf(&sb, rule);
561 sbuf_finish(&sb);
562 printf("rctl: rule \"%s\" matched by pid %d "
563 "(%s), uid %d, jail %s\n", sbuf_data(&sb),
564 p->p_pid, p->p_comm, p->p_ucred->cr_uid,
565 p->p_ucred->cr_prison->pr_prison_racct->prr_name);
566 sbuf_delete(&sb);
567 free(buf, M_RCTL);
568 link->rrl_exceeded = 1;
569 continue;
570 case RCTL_ACTION_DEVCTL:
571 if (link->rrl_exceeded != 0)
572 continue;
573
574 if (p->p_state != PRS_NORMAL)
575 continue;
576
577 if (!ppsratecheck(&devctl_lasttime, &devctl_curtime,
578 rctl_devctl_rate_limit))
579 continue;
580
581 buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
582 if (buf == NULL) {
583 printf("rctl_enforce: out of memory\n");
584 continue;
585 }
586 sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
587 sbuf_printf(&sb, "rule=");
588 rctl_rule_to_sbuf(&sb, rule);
589 sbuf_printf(&sb, " pid=%d ruid=%d jail=%s",
590 p->p_pid, p->p_ucred->cr_ruid,
591 p->p_ucred->cr_prison->pr_prison_racct->prr_name);
592 sbuf_finish(&sb);
593 devctl_notify("RCTL", "rule", "matched",
594 sbuf_data(&sb));
595 sbuf_delete(&sb);
596 free(buf, M_RCTL);
597 link->rrl_exceeded = 1;
598 continue;
599 case RCTL_ACTION_THROTTLE:
600 if (p->p_state != PRS_NORMAL)
601 continue;
602
603 if (rule->rr_amount == 0) {
604 racct_proc_throttle(p, rctl_throttle_max);
605 continue;
606 }
607
608 /*
609 * Make the process sleep for a fraction of second
610 * proportional to the ratio of process' resource
611 * utilization compared to the limit. The point is
612 * to penalize resource hogs: processes that consume
613 * more of the available resources sleep for longer.
614 *
615 * We're trying to defer division until the very end,
616 * to minimize the rounding effects. The following
617 * calculation could have been written in a clearer
618 * way like this:
619 *
620 * sleep_ms = hz * p->p_racct->r_resources[resource] /
621 * rule->rr_amount;
622 * sleep_ms *= rctl_throttle_pct / 100;
623 * if (sleep_ms < rctl_throttle_min)
624 * sleep_ms = rctl_throttle_min;
625 *
626 */
627 sleep_ms = xmul(hz, p->p_racct->r_resources[resource]);
628 sleep_ms = xmul(sleep_ms, rctl_throttle_pct) / 100;
629 if (sleep_ms < rctl_throttle_min * rule->rr_amount)
630 sleep_ms = rctl_throttle_min * rule->rr_amount;
631
632 /*
633 * Multiply that by the ratio of the resource
634 * consumption for the container compared to the limit,
635 * squared. In other words, a process in a container
636 * that is two times over the limit will be throttled
637 * four times as much for hitting the same rule. The
638 * point is to penalize processes more if the container
639 * itself (eg certain UID or jail) is above the limit.
640 */
641 if (available < 0)
642 sleep_ratio = -available / rule->rr_amount;
643 else
644 sleep_ratio = 0;
645 sleep_ratio = xmul(sleep_ratio, sleep_ratio);
646 sleep_ratio = xmul(sleep_ratio, rctl_throttle_pct2) / 100;
647 sleep_ms = xadd(sleep_ms, xmul(sleep_ms, sleep_ratio));
648
649 /*
650 * Finally the division.
651 */
652 sleep_ms /= rule->rr_amount;
653
654 if (sleep_ms > rctl_throttle_max)
655 sleep_ms = rctl_throttle_max;
656 #if 0
657 printf("%s: pid %d (%s), %jd of %jd, will sleep for %ju ms (ratio %ju, available %jd)\n",
658 __func__, p->p_pid, p->p_comm,
659 p->p_racct->r_resources[resource],
660 rule->rr_amount, (uintmax_t)sleep_ms,
661 (uintmax_t)sleep_ratio, (intmax_t)available);
662 #endif
663
664 KASSERT(sleep_ms >= rctl_throttle_min, ("%s: %ju < %d\n",
665 __func__, (uintmax_t)sleep_ms, rctl_throttle_min));
666 racct_proc_throttle(p, sleep_ms);
667 continue;
668 default:
669 if (link->rrl_exceeded != 0)
670 continue;
671
672 if (p->p_state != PRS_NORMAL)
673 continue;
674
675 KASSERT(rule->rr_action > 0 &&
676 rule->rr_action <= RCTL_ACTION_SIGNAL_MAX,
677 ("rctl_enforce: unknown action %d",
678 rule->rr_action));
679
680 /*
681 * We're using the fact that RCTL_ACTION_SIG* values
682 * are equal to their counterparts from sys/signal.h.
683 */
684 kern_psignal(p, rule->rr_action);
685 link->rrl_exceeded = 1;
686 continue;
687 }
688 }
689
690 if (should_deny) {
691 /*
692 * Return fake error code; the caller should change it
693 * into one proper for the situation - EFSIZ, ENOMEM etc.
694 */
695 return (EDOOFUS);
696 }
697
698 return (0);
699 }
700
701 uint64_t
rctl_get_limit(struct proc * p,int resource)702 rctl_get_limit(struct proc *p, int resource)
703 {
704 struct rctl_rule *rule;
705 struct rctl_rule_link *link;
706 uint64_t amount = UINT64_MAX;
707
708 ASSERT_RACCT_ENABLED();
709 RACCT_LOCK_ASSERT();
710
711 /*
712 * There may be more than one matching rule; go through all of them.
713 * Denial should be done last, after logging and sending signals.
714 */
715 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
716 rule = link->rrl_rule;
717 if (rule->rr_resource != resource)
718 continue;
719 if (rule->rr_action != RCTL_ACTION_DENY)
720 continue;
721 if (rule->rr_amount < amount)
722 amount = rule->rr_amount;
723 }
724
725 return (amount);
726 }
727
728 uint64_t
rctl_get_available(struct proc * p,int resource)729 rctl_get_available(struct proc *p, int resource)
730 {
731 struct rctl_rule *rule;
732 struct rctl_rule_link *link;
733 int64_t available, minavailable, allocated;
734
735 minavailable = INT64_MAX;
736
737 ASSERT_RACCT_ENABLED();
738 RACCT_LOCK_ASSERT();
739
740 /*
741 * There may be more than one matching rule; go through all of them.
742 * Denial should be done last, after logging and sending signals.
743 */
744 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
745 rule = link->rrl_rule;
746 if (rule->rr_resource != resource)
747 continue;
748 if (rule->rr_action != RCTL_ACTION_DENY)
749 continue;
750 available = rctl_available_resource(p, rule);
751 if (available < minavailable)
752 minavailable = available;
753 }
754
755 /*
756 * XXX: Think about this _hard_.
757 */
758 allocated = p->p_racct->r_resources[resource];
759 if (minavailable < INT64_MAX - allocated)
760 minavailable += allocated;
761 if (minavailable < 0)
762 minavailable = 0;
763
764 return (minavailable);
765 }
766
767 static int
rctl_rule_matches(const struct rctl_rule * rule,const struct rctl_rule * filter)768 rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter)
769 {
770
771 ASSERT_RACCT_ENABLED();
772
773 if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) {
774 if (rule->rr_subject_type != filter->rr_subject_type)
775 return (0);
776
777 switch (filter->rr_subject_type) {
778 case RCTL_SUBJECT_TYPE_PROCESS:
779 if (filter->rr_subject.rs_proc != NULL &&
780 rule->rr_subject.rs_proc !=
781 filter->rr_subject.rs_proc)
782 return (0);
783 break;
784 case RCTL_SUBJECT_TYPE_USER:
785 if (filter->rr_subject.rs_uip != NULL &&
786 rule->rr_subject.rs_uip !=
787 filter->rr_subject.rs_uip)
788 return (0);
789 break;
790 case RCTL_SUBJECT_TYPE_LOGINCLASS:
791 if (filter->rr_subject.rs_loginclass != NULL &&
792 rule->rr_subject.rs_loginclass !=
793 filter->rr_subject.rs_loginclass)
794 return (0);
795 break;
796 case RCTL_SUBJECT_TYPE_JAIL:
797 if (filter->rr_subject.rs_prison_racct != NULL &&
798 rule->rr_subject.rs_prison_racct !=
799 filter->rr_subject.rs_prison_racct)
800 return (0);
801 break;
802 default:
803 panic("rctl_rule_matches: unknown subject type %d",
804 filter->rr_subject_type);
805 }
806 }
807
808 if (filter->rr_resource != RACCT_UNDEFINED) {
809 if (rule->rr_resource != filter->rr_resource)
810 return (0);
811 }
812
813 if (filter->rr_action != RCTL_ACTION_UNDEFINED) {
814 if (rule->rr_action != filter->rr_action)
815 return (0);
816 }
817
818 if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) {
819 if (rule->rr_amount != filter->rr_amount)
820 return (0);
821 }
822
823 if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) {
824 if (rule->rr_per != filter->rr_per)
825 return (0);
826 }
827
828 return (1);
829 }
830
831 static int
str2value(const char * str,int * value,struct dict * table)832 str2value(const char *str, int *value, struct dict *table)
833 {
834 int i;
835
836 if (value == NULL)
837 return (EINVAL);
838
839 for (i = 0; table[i].d_name != NULL; i++) {
840 if (strcasecmp(table[i].d_name, str) == 0) {
841 *value = table[i].d_value;
842 return (0);
843 }
844 }
845
846 return (EINVAL);
847 }
848
849 static int
str2id(const char * str,id_t * value)850 str2id(const char *str, id_t *value)
851 {
852 char *end;
853
854 if (str == NULL)
855 return (EINVAL);
856
857 *value = strtoul(str, &end, 10);
858 if ((size_t)(end - str) != strlen(str))
859 return (EINVAL);
860
861 return (0);
862 }
863
864 static int
str2int64(const char * str,int64_t * value)865 str2int64(const char *str, int64_t *value)
866 {
867 char *end;
868
869 if (str == NULL)
870 return (EINVAL);
871
872 *value = strtoul(str, &end, 10);
873 if ((size_t)(end - str) != strlen(str))
874 return (EINVAL);
875
876 if (*value < 0)
877 return (ERANGE);
878
879 return (0);
880 }
881
882 /*
883 * Connect the rule to the racct, increasing refcount for the rule.
884 */
885 static void
rctl_racct_add_rule(struct racct * racct,struct rctl_rule * rule)886 rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule)
887 {
888 struct rctl_rule_link *link;
889
890 ASSERT_RACCT_ENABLED();
891 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
892
893 rctl_rule_acquire(rule);
894 link = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
895 link->rrl_rule = rule;
896 link->rrl_exceeded = 0;
897
898 RACCT_LOCK();
899 LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
900 RACCT_UNLOCK();
901 }
902
903 static int
rctl_racct_add_rule_locked(struct racct * racct,struct rctl_rule * rule)904 rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule)
905 {
906 struct rctl_rule_link *link;
907
908 ASSERT_RACCT_ENABLED();
909 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
910 RACCT_LOCK_ASSERT();
911
912 link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT);
913 if (link == NULL)
914 return (ENOMEM);
915 rctl_rule_acquire(rule);
916 link->rrl_rule = rule;
917 link->rrl_exceeded = 0;
918
919 LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
920
921 return (0);
922 }
923
924 /*
925 * Remove limits for a rules matching the filter and release
926 * the refcounts for the rules, possibly freeing them. Returns
927 * the number of limit structures removed.
928 */
929 static int
rctl_racct_remove_rules(struct racct * racct,const struct rctl_rule * filter)930 rctl_racct_remove_rules(struct racct *racct,
931 const struct rctl_rule *filter)
932 {
933 struct rctl_rule_link *link, *linktmp;
934 int removed = 0;
935
936 ASSERT_RACCT_ENABLED();
937 RACCT_LOCK_ASSERT();
938
939 LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) {
940 if (!rctl_rule_matches(link->rrl_rule, filter))
941 continue;
942
943 LIST_REMOVE(link, rrl_next);
944 rctl_rule_release(link->rrl_rule);
945 uma_zfree(rctl_rule_link_zone, link);
946 removed++;
947 }
948 return (removed);
949 }
950
951 static void
rctl_rule_acquire_subject(struct rctl_rule * rule)952 rctl_rule_acquire_subject(struct rctl_rule *rule)
953 {
954
955 ASSERT_RACCT_ENABLED();
956
957 switch (rule->rr_subject_type) {
958 case RCTL_SUBJECT_TYPE_UNDEFINED:
959 case RCTL_SUBJECT_TYPE_PROCESS:
960 break;
961 case RCTL_SUBJECT_TYPE_JAIL:
962 if (rule->rr_subject.rs_prison_racct != NULL)
963 prison_racct_hold(rule->rr_subject.rs_prison_racct);
964 break;
965 case RCTL_SUBJECT_TYPE_USER:
966 if (rule->rr_subject.rs_uip != NULL)
967 uihold(rule->rr_subject.rs_uip);
968 break;
969 case RCTL_SUBJECT_TYPE_LOGINCLASS:
970 if (rule->rr_subject.rs_loginclass != NULL)
971 loginclass_hold(rule->rr_subject.rs_loginclass);
972 break;
973 default:
974 panic("rctl_rule_acquire_subject: unknown subject type %d",
975 rule->rr_subject_type);
976 }
977 }
978
979 static void
rctl_rule_release_subject(struct rctl_rule * rule)980 rctl_rule_release_subject(struct rctl_rule *rule)
981 {
982
983 ASSERT_RACCT_ENABLED();
984
985 switch (rule->rr_subject_type) {
986 case RCTL_SUBJECT_TYPE_UNDEFINED:
987 case RCTL_SUBJECT_TYPE_PROCESS:
988 break;
989 case RCTL_SUBJECT_TYPE_JAIL:
990 if (rule->rr_subject.rs_prison_racct != NULL)
991 prison_racct_free(rule->rr_subject.rs_prison_racct);
992 break;
993 case RCTL_SUBJECT_TYPE_USER:
994 if (rule->rr_subject.rs_uip != NULL)
995 uifree(rule->rr_subject.rs_uip);
996 break;
997 case RCTL_SUBJECT_TYPE_LOGINCLASS:
998 if (rule->rr_subject.rs_loginclass != NULL)
999 loginclass_free(rule->rr_subject.rs_loginclass);
1000 break;
1001 default:
1002 panic("rctl_rule_release_subject: unknown subject type %d",
1003 rule->rr_subject_type);
1004 }
1005 }
1006
1007 struct rctl_rule *
rctl_rule_alloc(int flags)1008 rctl_rule_alloc(int flags)
1009 {
1010 struct rctl_rule *rule;
1011
1012 ASSERT_RACCT_ENABLED();
1013
1014 rule = uma_zalloc(rctl_rule_zone, flags);
1015 if (rule == NULL)
1016 return (NULL);
1017 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
1018 rule->rr_subject.rs_proc = NULL;
1019 rule->rr_subject.rs_uip = NULL;
1020 rule->rr_subject.rs_loginclass = NULL;
1021 rule->rr_subject.rs_prison_racct = NULL;
1022 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
1023 rule->rr_resource = RACCT_UNDEFINED;
1024 rule->rr_action = RCTL_ACTION_UNDEFINED;
1025 rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
1026 refcount_init(&rule->rr_refcount, 1);
1027
1028 return (rule);
1029 }
1030
1031 struct rctl_rule *
rctl_rule_duplicate(const struct rctl_rule * rule,int flags)1032 rctl_rule_duplicate(const struct rctl_rule *rule, int flags)
1033 {
1034 struct rctl_rule *copy;
1035
1036 ASSERT_RACCT_ENABLED();
1037
1038 copy = uma_zalloc(rctl_rule_zone, flags);
1039 if (copy == NULL)
1040 return (NULL);
1041 copy->rr_subject_type = rule->rr_subject_type;
1042 copy->rr_subject.rs_proc = rule->rr_subject.rs_proc;
1043 copy->rr_subject.rs_uip = rule->rr_subject.rs_uip;
1044 copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass;
1045 copy->rr_subject.rs_prison_racct = rule->rr_subject.rs_prison_racct;
1046 copy->rr_per = rule->rr_per;
1047 copy->rr_resource = rule->rr_resource;
1048 copy->rr_action = rule->rr_action;
1049 copy->rr_amount = rule->rr_amount;
1050 refcount_init(©->rr_refcount, 1);
1051 rctl_rule_acquire_subject(copy);
1052
1053 return (copy);
1054 }
1055
1056 void
rctl_rule_acquire(struct rctl_rule * rule)1057 rctl_rule_acquire(struct rctl_rule *rule)
1058 {
1059
1060 ASSERT_RACCT_ENABLED();
1061 KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
1062
1063 refcount_acquire(&rule->rr_refcount);
1064 }
1065
1066 static void
rctl_rule_free(void * context,int pending)1067 rctl_rule_free(void *context, int pending)
1068 {
1069 struct rctl_rule *rule;
1070
1071 rule = (struct rctl_rule *)context;
1072
1073 ASSERT_RACCT_ENABLED();
1074 KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0"));
1075
1076 /*
1077 * We don't need locking here; rule is guaranteed to be inaccessible.
1078 */
1079
1080 rctl_rule_release_subject(rule);
1081 uma_zfree(rctl_rule_zone, rule);
1082 }
1083
1084 void
rctl_rule_release(struct rctl_rule * rule)1085 rctl_rule_release(struct rctl_rule *rule)
1086 {
1087
1088 ASSERT_RACCT_ENABLED();
1089 KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
1090
1091 if (refcount_release(&rule->rr_refcount)) {
1092 /*
1093 * rctl_rule_release() is often called when iterating
1094 * over all the uidinfo structures in the system,
1095 * holding uihashtbl_lock. Since rctl_rule_free()
1096 * might end up calling uifree(), this would lead
1097 * to lock recursion. Use taskqueue to avoid this.
1098 */
1099 TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule);
1100 taskqueue_enqueue(taskqueue_thread, &rule->rr_task);
1101 }
1102 }
1103
1104 static int
rctl_rule_fully_specified(const struct rctl_rule * rule)1105 rctl_rule_fully_specified(const struct rctl_rule *rule)
1106 {
1107
1108 ASSERT_RACCT_ENABLED();
1109
1110 switch (rule->rr_subject_type) {
1111 case RCTL_SUBJECT_TYPE_UNDEFINED:
1112 return (0);
1113 case RCTL_SUBJECT_TYPE_PROCESS:
1114 if (rule->rr_subject.rs_proc == NULL)
1115 return (0);
1116 break;
1117 case RCTL_SUBJECT_TYPE_USER:
1118 if (rule->rr_subject.rs_uip == NULL)
1119 return (0);
1120 break;
1121 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1122 if (rule->rr_subject.rs_loginclass == NULL)
1123 return (0);
1124 break;
1125 case RCTL_SUBJECT_TYPE_JAIL:
1126 if (rule->rr_subject.rs_prison_racct == NULL)
1127 return (0);
1128 break;
1129 default:
1130 panic("rctl_rule_fully_specified: unknown subject type %d",
1131 rule->rr_subject_type);
1132 }
1133 if (rule->rr_resource == RACCT_UNDEFINED)
1134 return (0);
1135 if (rule->rr_action == RCTL_ACTION_UNDEFINED)
1136 return (0);
1137 if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED)
1138 return (0);
1139 if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED)
1140 return (0);
1141
1142 return (1);
1143 }
1144
1145 static int
rctl_string_to_rule(char * rulestr,struct rctl_rule ** rulep)1146 rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep)
1147 {
1148 struct rctl_rule *rule;
1149 char *subjectstr, *subject_idstr, *resourcestr, *actionstr,
1150 *amountstr, *perstr;
1151 id_t id;
1152 int error = 0;
1153
1154 ASSERT_RACCT_ENABLED();
1155
1156 rule = rctl_rule_alloc(M_WAITOK);
1157
1158 subjectstr = strsep(&rulestr, ":");
1159 subject_idstr = strsep(&rulestr, ":");
1160 resourcestr = strsep(&rulestr, ":");
1161 actionstr = strsep(&rulestr, "=/");
1162 amountstr = strsep(&rulestr, "/");
1163 perstr = rulestr;
1164
1165 if (subjectstr == NULL || subjectstr[0] == '\0')
1166 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
1167 else {
1168 error = str2value(subjectstr, &rule->rr_subject_type, subjectnames);
1169 if (error != 0)
1170 goto out;
1171 }
1172
1173 if (subject_idstr == NULL || subject_idstr[0] == '\0') {
1174 rule->rr_subject.rs_proc = NULL;
1175 rule->rr_subject.rs_uip = NULL;
1176 rule->rr_subject.rs_loginclass = NULL;
1177 rule->rr_subject.rs_prison_racct = NULL;
1178 } else {
1179 switch (rule->rr_subject_type) {
1180 case RCTL_SUBJECT_TYPE_UNDEFINED:
1181 error = EINVAL;
1182 goto out;
1183 case RCTL_SUBJECT_TYPE_PROCESS:
1184 error = str2id(subject_idstr, &id);
1185 if (error != 0)
1186 goto out;
1187 sx_assert(&allproc_lock, SA_LOCKED);
1188 rule->rr_subject.rs_proc = pfind(id);
1189 if (rule->rr_subject.rs_proc == NULL) {
1190 error = ESRCH;
1191 goto out;
1192 }
1193 PROC_UNLOCK(rule->rr_subject.rs_proc);
1194 break;
1195 case RCTL_SUBJECT_TYPE_USER:
1196 error = str2id(subject_idstr, &id);
1197 if (error != 0)
1198 goto out;
1199 rule->rr_subject.rs_uip = uifind(id);
1200 break;
1201 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1202 rule->rr_subject.rs_loginclass =
1203 loginclass_find(subject_idstr);
1204 if (rule->rr_subject.rs_loginclass == NULL) {
1205 error = ENAMETOOLONG;
1206 goto out;
1207 }
1208 break;
1209 case RCTL_SUBJECT_TYPE_JAIL:
1210 rule->rr_subject.rs_prison_racct =
1211 prison_racct_find(subject_idstr);
1212 if (rule->rr_subject.rs_prison_racct == NULL) {
1213 error = ENAMETOOLONG;
1214 goto out;
1215 }
1216 break;
1217 default:
1218 panic("rctl_string_to_rule: unknown subject type %d",
1219 rule->rr_subject_type);
1220 }
1221 }
1222
1223 if (resourcestr == NULL || resourcestr[0] == '\0')
1224 rule->rr_resource = RACCT_UNDEFINED;
1225 else {
1226 error = str2value(resourcestr, &rule->rr_resource,
1227 resourcenames);
1228 if (error != 0)
1229 goto out;
1230 }
1231
1232 if (actionstr == NULL || actionstr[0] == '\0')
1233 rule->rr_action = RCTL_ACTION_UNDEFINED;
1234 else {
1235 error = str2value(actionstr, &rule->rr_action, actionnames);
1236 if (error != 0)
1237 goto out;
1238 }
1239
1240 if (amountstr == NULL || amountstr[0] == '\0')
1241 rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
1242 else {
1243 error = str2int64(amountstr, &rule->rr_amount);
1244 if (error != 0)
1245 goto out;
1246 if (RACCT_IS_IN_MILLIONS(rule->rr_resource)) {
1247 if (rule->rr_amount > INT64_MAX / 1000000) {
1248 error = ERANGE;
1249 goto out;
1250 }
1251 rule->rr_amount *= 1000000;
1252 }
1253 }
1254
1255 if (perstr == NULL || perstr[0] == '\0')
1256 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
1257 else {
1258 error = str2value(perstr, &rule->rr_per, subjectnames);
1259 if (error != 0)
1260 goto out;
1261 }
1262
1263 out:
1264 if (error == 0)
1265 *rulep = rule;
1266 else
1267 rctl_rule_release(rule);
1268
1269 return (error);
1270 }
1271
1272 /*
1273 * Link a rule with all the subjects it applies to.
1274 */
1275 int
rctl_rule_add(struct rctl_rule * rule)1276 rctl_rule_add(struct rctl_rule *rule)
1277 {
1278 struct proc *p;
1279 struct ucred *cred;
1280 struct uidinfo *uip;
1281 struct prison *pr;
1282 struct prison_racct *prr;
1283 struct loginclass *lc;
1284 struct rctl_rule *rule2;
1285 int match;
1286
1287 ASSERT_RACCT_ENABLED();
1288 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
1289
1290 /*
1291 * Some rules just don't make sense, like "deny" rule for an undeniable
1292 * resource. The exception are the RSS and %CPU resources - they are
1293 * not deniable in the racct sense, but the limit is enforced in
1294 * a different way.
1295 */
1296 if (rule->rr_action == RCTL_ACTION_DENY &&
1297 !RACCT_IS_DENIABLE(rule->rr_resource) &&
1298 rule->rr_resource != RACCT_RSS &&
1299 rule->rr_resource != RACCT_PCTCPU) {
1300 return (EOPNOTSUPP);
1301 }
1302
1303 if (rule->rr_action == RCTL_ACTION_THROTTLE &&
1304 !RACCT_IS_DECAYING(rule->rr_resource)) {
1305 return (EOPNOTSUPP);
1306 }
1307
1308 if (rule->rr_action == RCTL_ACTION_THROTTLE &&
1309 rule->rr_resource == RACCT_PCTCPU) {
1310 return (EOPNOTSUPP);
1311 }
1312
1313 if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS &&
1314 RACCT_IS_SLOPPY(rule->rr_resource)) {
1315 return (EOPNOTSUPP);
1316 }
1317
1318 /*
1319 * Make sure there are no duplicated rules. Also, for the "deny"
1320 * rules, remove ones differing only by "amount".
1321 */
1322 if (rule->rr_action == RCTL_ACTION_DENY) {
1323 rule2 = rctl_rule_duplicate(rule, M_WAITOK);
1324 rule2->rr_amount = RCTL_AMOUNT_UNDEFINED;
1325 rctl_rule_remove(rule2);
1326 rctl_rule_release(rule2);
1327 } else
1328 rctl_rule_remove(rule);
1329
1330 switch (rule->rr_subject_type) {
1331 case RCTL_SUBJECT_TYPE_PROCESS:
1332 p = rule->rr_subject.rs_proc;
1333 KASSERT(p != NULL, ("rctl_rule_add: NULL proc"));
1334
1335 rctl_racct_add_rule(p->p_racct, rule);
1336 /*
1337 * In case of per-process rule, we don't have anything more
1338 * to do.
1339 */
1340 return (0);
1341
1342 case RCTL_SUBJECT_TYPE_USER:
1343 uip = rule->rr_subject.rs_uip;
1344 KASSERT(uip != NULL, ("rctl_rule_add: NULL uip"));
1345 rctl_racct_add_rule(uip->ui_racct, rule);
1346 break;
1347
1348 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1349 lc = rule->rr_subject.rs_loginclass;
1350 KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass"));
1351 rctl_racct_add_rule(lc->lc_racct, rule);
1352 break;
1353
1354 case RCTL_SUBJECT_TYPE_JAIL:
1355 prr = rule->rr_subject.rs_prison_racct;
1356 KASSERT(prr != NULL, ("rctl_rule_add: NULL pr"));
1357 rctl_racct_add_rule(prr->prr_racct, rule);
1358 break;
1359
1360 default:
1361 panic("rctl_rule_add: unknown subject type %d",
1362 rule->rr_subject_type);
1363 }
1364
1365 /*
1366 * Now go through all the processes and add the new rule to the ones
1367 * it applies to.
1368 */
1369 sx_assert(&allproc_lock, SA_LOCKED);
1370 FOREACH_PROC_IN_SYSTEM(p) {
1371 cred = p->p_ucred;
1372 switch (rule->rr_subject_type) {
1373 case RCTL_SUBJECT_TYPE_USER:
1374 if (cred->cr_uidinfo == rule->rr_subject.rs_uip ||
1375 cred->cr_ruidinfo == rule->rr_subject.rs_uip)
1376 break;
1377 continue;
1378 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1379 if (cred->cr_loginclass == rule->rr_subject.rs_loginclass)
1380 break;
1381 continue;
1382 case RCTL_SUBJECT_TYPE_JAIL:
1383 match = 0;
1384 for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) {
1385 if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) {
1386 match = 1;
1387 break;
1388 }
1389 }
1390 if (match)
1391 break;
1392 continue;
1393 default:
1394 panic("rctl_rule_add: unknown subject type %d",
1395 rule->rr_subject_type);
1396 }
1397
1398 rctl_racct_add_rule(p->p_racct, rule);
1399 }
1400
1401 return (0);
1402 }
1403
1404 static void
rctl_rule_pre_callback(void)1405 rctl_rule_pre_callback(void)
1406 {
1407
1408 RACCT_LOCK();
1409 }
1410
1411 static void
rctl_rule_post_callback(void)1412 rctl_rule_post_callback(void)
1413 {
1414
1415 RACCT_UNLOCK();
1416 }
1417
1418 static void
rctl_rule_remove_callback(struct racct * racct,void * arg2,void * arg3)1419 rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3)
1420 {
1421 struct rctl_rule *filter = (struct rctl_rule *)arg2;
1422 int found = 0;
1423
1424 ASSERT_RACCT_ENABLED();
1425 RACCT_LOCK_ASSERT();
1426
1427 found += rctl_racct_remove_rules(racct, filter);
1428
1429 *((int *)arg3) += found;
1430 }
1431
1432 /*
1433 * Remove all rules that match the filter.
1434 */
1435 int
rctl_rule_remove(struct rctl_rule * filter)1436 rctl_rule_remove(struct rctl_rule *filter)
1437 {
1438 struct proc *p;
1439 int found = 0;
1440
1441 ASSERT_RACCT_ENABLED();
1442
1443 if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS &&
1444 filter->rr_subject.rs_proc != NULL) {
1445 p = filter->rr_subject.rs_proc;
1446 RACCT_LOCK();
1447 found = rctl_racct_remove_rules(p->p_racct, filter);
1448 RACCT_UNLOCK();
1449 if (found)
1450 return (0);
1451 return (ESRCH);
1452 }
1453
1454 loginclass_racct_foreach(rctl_rule_remove_callback,
1455 rctl_rule_pre_callback, rctl_rule_post_callback,
1456 filter, (void *)&found);
1457 ui_racct_foreach(rctl_rule_remove_callback,
1458 rctl_rule_pre_callback, rctl_rule_post_callback,
1459 filter, (void *)&found);
1460 prison_racct_foreach(rctl_rule_remove_callback,
1461 rctl_rule_pre_callback, rctl_rule_post_callback,
1462 filter, (void *)&found);
1463
1464 sx_assert(&allproc_lock, SA_LOCKED);
1465 RACCT_LOCK();
1466 FOREACH_PROC_IN_SYSTEM(p) {
1467 found += rctl_racct_remove_rules(p->p_racct, filter);
1468 }
1469 RACCT_UNLOCK();
1470
1471 if (found)
1472 return (0);
1473 return (ESRCH);
1474 }
1475
1476 /*
1477 * Appends a rule to the sbuf.
1478 */
1479 static void
rctl_rule_to_sbuf(struct sbuf * sb,const struct rctl_rule * rule)1480 rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule)
1481 {
1482 int64_t amount;
1483
1484 ASSERT_RACCT_ENABLED();
1485
1486 sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type));
1487
1488 switch (rule->rr_subject_type) {
1489 case RCTL_SUBJECT_TYPE_PROCESS:
1490 if (rule->rr_subject.rs_proc == NULL)
1491 sbuf_printf(sb, ":");
1492 else
1493 sbuf_printf(sb, "%d:",
1494 rule->rr_subject.rs_proc->p_pid);
1495 break;
1496 case RCTL_SUBJECT_TYPE_USER:
1497 if (rule->rr_subject.rs_uip == NULL)
1498 sbuf_printf(sb, ":");
1499 else
1500 sbuf_printf(sb, "%d:",
1501 rule->rr_subject.rs_uip->ui_uid);
1502 break;
1503 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1504 if (rule->rr_subject.rs_loginclass == NULL)
1505 sbuf_printf(sb, ":");
1506 else
1507 sbuf_printf(sb, "%s:",
1508 rule->rr_subject.rs_loginclass->lc_name);
1509 break;
1510 case RCTL_SUBJECT_TYPE_JAIL:
1511 if (rule->rr_subject.rs_prison_racct == NULL)
1512 sbuf_printf(sb, ":");
1513 else
1514 sbuf_printf(sb, "%s:",
1515 rule->rr_subject.rs_prison_racct->prr_name);
1516 break;
1517 default:
1518 panic("rctl_rule_to_sbuf: unknown subject type %d",
1519 rule->rr_subject_type);
1520 }
1521
1522 amount = rule->rr_amount;
1523 if (amount != RCTL_AMOUNT_UNDEFINED &&
1524 RACCT_IS_IN_MILLIONS(rule->rr_resource))
1525 amount /= 1000000;
1526
1527 sbuf_printf(sb, "%s:%s=%jd",
1528 rctl_resource_name(rule->rr_resource),
1529 rctl_action_name(rule->rr_action),
1530 amount);
1531
1532 if (rule->rr_per != rule->rr_subject_type)
1533 sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per));
1534 }
1535
1536 /*
1537 * Routine used by RCTL syscalls to read in input string.
1538 */
1539 static int
rctl_read_inbuf(char ** inputstr,const char * inbufp,size_t inbuflen)1540 rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen)
1541 {
1542 char *str;
1543 int error;
1544
1545 ASSERT_RACCT_ENABLED();
1546
1547 if (inbuflen <= 0)
1548 return (EINVAL);
1549 if (inbuflen > RCTL_MAX_INBUFSIZE)
1550 return (E2BIG);
1551
1552 str = malloc(inbuflen + 1, M_RCTL, M_WAITOK);
1553 error = copyinstr(inbufp, str, inbuflen, NULL);
1554 if (error != 0) {
1555 free(str, M_RCTL);
1556 return (error);
1557 }
1558
1559 *inputstr = str;
1560
1561 return (0);
1562 }
1563
1564 /*
1565 * Routine used by RCTL syscalls to write out output string.
1566 */
1567 static int
rctl_write_outbuf(struct sbuf * outputsbuf,char * outbufp,size_t outbuflen)1568 rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen)
1569 {
1570 int error;
1571
1572 ASSERT_RACCT_ENABLED();
1573
1574 if (outputsbuf == NULL)
1575 return (0);
1576
1577 sbuf_finish(outputsbuf);
1578 if (outbuflen < sbuf_len(outputsbuf) + 1) {
1579 sbuf_delete(outputsbuf);
1580 return (ERANGE);
1581 }
1582 error = copyout(sbuf_data(outputsbuf), outbufp,
1583 sbuf_len(outputsbuf) + 1);
1584 sbuf_delete(outputsbuf);
1585 return (error);
1586 }
1587
1588 static struct sbuf *
rctl_racct_to_sbuf(struct racct * racct,int sloppy)1589 rctl_racct_to_sbuf(struct racct *racct, int sloppy)
1590 {
1591 struct sbuf *sb;
1592 int64_t amount;
1593 int i;
1594
1595 ASSERT_RACCT_ENABLED();
1596
1597 sb = sbuf_new_auto();
1598 for (i = 0; i <= RACCT_MAX; i++) {
1599 if (sloppy == 0 && RACCT_IS_SLOPPY(i))
1600 continue;
1601 RACCT_LOCK();
1602 amount = racct->r_resources[i];
1603 RACCT_UNLOCK();
1604 if (RACCT_IS_IN_MILLIONS(i))
1605 amount /= 1000000;
1606 sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount);
1607 }
1608 sbuf_setpos(sb, sbuf_len(sb) - 1);
1609 return (sb);
1610 }
1611
1612 int
sys_rctl_get_racct(struct thread * td,struct rctl_get_racct_args * uap)1613 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1614 {
1615 struct rctl_rule *filter;
1616 struct sbuf *outputsbuf = NULL;
1617 struct proc *p;
1618 struct uidinfo *uip;
1619 struct loginclass *lc;
1620 struct prison_racct *prr;
1621 char *inputstr;
1622 int error;
1623
1624 if (!racct_enable)
1625 return (ENOSYS);
1626
1627 error = priv_check(td, PRIV_RCTL_GET_RACCT);
1628 if (error != 0)
1629 return (error);
1630
1631 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1632 if (error != 0)
1633 return (error);
1634
1635 sx_slock(&allproc_lock);
1636 error = rctl_string_to_rule(inputstr, &filter);
1637 free(inputstr, M_RCTL);
1638 if (error != 0) {
1639 sx_sunlock(&allproc_lock);
1640 return (error);
1641 }
1642
1643 switch (filter->rr_subject_type) {
1644 case RCTL_SUBJECT_TYPE_PROCESS:
1645 p = filter->rr_subject.rs_proc;
1646 if (p == NULL) {
1647 error = EINVAL;
1648 goto out;
1649 }
1650 outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0);
1651 break;
1652 case RCTL_SUBJECT_TYPE_USER:
1653 uip = filter->rr_subject.rs_uip;
1654 if (uip == NULL) {
1655 error = EINVAL;
1656 goto out;
1657 }
1658 outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1);
1659 break;
1660 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1661 lc = filter->rr_subject.rs_loginclass;
1662 if (lc == NULL) {
1663 error = EINVAL;
1664 goto out;
1665 }
1666 outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1);
1667 break;
1668 case RCTL_SUBJECT_TYPE_JAIL:
1669 prr = filter->rr_subject.rs_prison_racct;
1670 if (prr == NULL) {
1671 error = EINVAL;
1672 goto out;
1673 }
1674 outputsbuf = rctl_racct_to_sbuf(prr->prr_racct, 1);
1675 break;
1676 default:
1677 error = EINVAL;
1678 }
1679 out:
1680 rctl_rule_release(filter);
1681 sx_sunlock(&allproc_lock);
1682 if (error != 0)
1683 return (error);
1684
1685 error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen);
1686
1687 return (error);
1688 }
1689
1690 static void
rctl_get_rules_callback(struct racct * racct,void * arg2,void * arg3)1691 rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3)
1692 {
1693 struct rctl_rule *filter = (struct rctl_rule *)arg2;
1694 struct rctl_rule_link *link;
1695 struct sbuf *sb = (struct sbuf *)arg3;
1696
1697 ASSERT_RACCT_ENABLED();
1698 RACCT_LOCK_ASSERT();
1699
1700 LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
1701 if (!rctl_rule_matches(link->rrl_rule, filter))
1702 continue;
1703 rctl_rule_to_sbuf(sb, link->rrl_rule);
1704 sbuf_printf(sb, ",");
1705 }
1706 }
1707
1708 int
sys_rctl_get_rules(struct thread * td,struct rctl_get_rules_args * uap)1709 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1710 {
1711 struct sbuf *sb;
1712 struct rctl_rule *filter;
1713 struct rctl_rule_link *link;
1714 struct proc *p;
1715 char *inputstr, *buf;
1716 size_t bufsize;
1717 int error;
1718
1719 if (!racct_enable)
1720 return (ENOSYS);
1721
1722 error = priv_check(td, PRIV_RCTL_GET_RULES);
1723 if (error != 0)
1724 return (error);
1725
1726 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1727 if (error != 0)
1728 return (error);
1729
1730 sx_slock(&allproc_lock);
1731 error = rctl_string_to_rule(inputstr, &filter);
1732 free(inputstr, M_RCTL);
1733 if (error != 0) {
1734 sx_sunlock(&allproc_lock);
1735 return (error);
1736 }
1737
1738 bufsize = uap->outbuflen;
1739 if (bufsize > rctl_maxbufsize) {
1740 sx_sunlock(&allproc_lock);
1741 return (E2BIG);
1742 }
1743
1744 buf = malloc(bufsize, M_RCTL, M_WAITOK);
1745 sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1746 KASSERT(sb != NULL, ("sbuf_new failed"));
1747
1748 FOREACH_PROC_IN_SYSTEM(p) {
1749 RACCT_LOCK();
1750 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1751 /*
1752 * Non-process rules will be added to the buffer later.
1753 * Adding them here would result in duplicated output.
1754 */
1755 if (link->rrl_rule->rr_subject_type !=
1756 RCTL_SUBJECT_TYPE_PROCESS)
1757 continue;
1758 if (!rctl_rule_matches(link->rrl_rule, filter))
1759 continue;
1760 rctl_rule_to_sbuf(sb, link->rrl_rule);
1761 sbuf_printf(sb, ",");
1762 }
1763 RACCT_UNLOCK();
1764 }
1765
1766 loginclass_racct_foreach(rctl_get_rules_callback,
1767 rctl_rule_pre_callback, rctl_rule_post_callback,
1768 filter, sb);
1769 ui_racct_foreach(rctl_get_rules_callback,
1770 rctl_rule_pre_callback, rctl_rule_post_callback,
1771 filter, sb);
1772 prison_racct_foreach(rctl_get_rules_callback,
1773 rctl_rule_pre_callback, rctl_rule_post_callback,
1774 filter, sb);
1775 if (sbuf_error(sb) == ENOMEM) {
1776 error = ERANGE;
1777 goto out;
1778 }
1779
1780 /*
1781 * Remove trailing ",".
1782 */
1783 if (sbuf_len(sb) > 0)
1784 sbuf_setpos(sb, sbuf_len(sb) - 1);
1785
1786 error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1787 out:
1788 rctl_rule_release(filter);
1789 sx_sunlock(&allproc_lock);
1790 free(buf, M_RCTL);
1791 return (error);
1792 }
1793
1794 int
sys_rctl_get_limits(struct thread * td,struct rctl_get_limits_args * uap)1795 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1796 {
1797 struct sbuf *sb;
1798 struct rctl_rule *filter;
1799 struct rctl_rule_link *link;
1800 char *inputstr, *buf;
1801 size_t bufsize;
1802 int error;
1803
1804 if (!racct_enable)
1805 return (ENOSYS);
1806
1807 error = priv_check(td, PRIV_RCTL_GET_LIMITS);
1808 if (error != 0)
1809 return (error);
1810
1811 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1812 if (error != 0)
1813 return (error);
1814
1815 sx_slock(&allproc_lock);
1816 error = rctl_string_to_rule(inputstr, &filter);
1817 free(inputstr, M_RCTL);
1818 if (error != 0) {
1819 sx_sunlock(&allproc_lock);
1820 return (error);
1821 }
1822
1823 if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) {
1824 rctl_rule_release(filter);
1825 sx_sunlock(&allproc_lock);
1826 return (EINVAL);
1827 }
1828 if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) {
1829 rctl_rule_release(filter);
1830 sx_sunlock(&allproc_lock);
1831 return (EOPNOTSUPP);
1832 }
1833 if (filter->rr_subject.rs_proc == NULL) {
1834 rctl_rule_release(filter);
1835 sx_sunlock(&allproc_lock);
1836 return (EINVAL);
1837 }
1838
1839 bufsize = uap->outbuflen;
1840 if (bufsize > rctl_maxbufsize) {
1841 rctl_rule_release(filter);
1842 sx_sunlock(&allproc_lock);
1843 return (E2BIG);
1844 }
1845
1846 buf = malloc(bufsize, M_RCTL, M_WAITOK);
1847 sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1848 KASSERT(sb != NULL, ("sbuf_new failed"));
1849
1850 RACCT_LOCK();
1851 LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links,
1852 rrl_next) {
1853 rctl_rule_to_sbuf(sb, link->rrl_rule);
1854 sbuf_printf(sb, ",");
1855 }
1856 RACCT_UNLOCK();
1857 if (sbuf_error(sb) == ENOMEM) {
1858 error = ERANGE;
1859 sbuf_delete(sb);
1860 goto out;
1861 }
1862
1863 /*
1864 * Remove trailing ",".
1865 */
1866 if (sbuf_len(sb) > 0)
1867 sbuf_setpos(sb, sbuf_len(sb) - 1);
1868
1869 error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1870 out:
1871 rctl_rule_release(filter);
1872 sx_sunlock(&allproc_lock);
1873 free(buf, M_RCTL);
1874 return (error);
1875 }
1876
1877 int
sys_rctl_add_rule(struct thread * td,struct rctl_add_rule_args * uap)1878 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1879 {
1880 struct rctl_rule *rule;
1881 char *inputstr;
1882 int error;
1883
1884 if (!racct_enable)
1885 return (ENOSYS);
1886
1887 error = priv_check(td, PRIV_RCTL_ADD_RULE);
1888 if (error != 0)
1889 return (error);
1890
1891 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1892 if (error != 0)
1893 return (error);
1894
1895 sx_slock(&allproc_lock);
1896 error = rctl_string_to_rule(inputstr, &rule);
1897 free(inputstr, M_RCTL);
1898 if (error != 0) {
1899 sx_sunlock(&allproc_lock);
1900 return (error);
1901 }
1902 /*
1903 * The 'per' part of a rule is optional.
1904 */
1905 if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED &&
1906 rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED)
1907 rule->rr_per = rule->rr_subject_type;
1908
1909 if (!rctl_rule_fully_specified(rule)) {
1910 error = EINVAL;
1911 goto out;
1912 }
1913
1914 error = rctl_rule_add(rule);
1915
1916 out:
1917 rctl_rule_release(rule);
1918 sx_sunlock(&allproc_lock);
1919 return (error);
1920 }
1921
1922 int
sys_rctl_remove_rule(struct thread * td,struct rctl_remove_rule_args * uap)1923 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1924 {
1925 struct rctl_rule *filter;
1926 char *inputstr;
1927 int error;
1928
1929 if (!racct_enable)
1930 return (ENOSYS);
1931
1932 error = priv_check(td, PRIV_RCTL_REMOVE_RULE);
1933 if (error != 0)
1934 return (error);
1935
1936 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1937 if (error != 0)
1938 return (error);
1939
1940 sx_slock(&allproc_lock);
1941 error = rctl_string_to_rule(inputstr, &filter);
1942 free(inputstr, M_RCTL);
1943 if (error != 0) {
1944 sx_sunlock(&allproc_lock);
1945 return (error);
1946 }
1947
1948 error = rctl_rule_remove(filter);
1949 rctl_rule_release(filter);
1950 sx_sunlock(&allproc_lock);
1951
1952 return (error);
1953 }
1954
1955 /*
1956 * Update RCTL rule list after credential change.
1957 */
1958 void
rctl_proc_ucred_changed(struct proc * p,struct ucred * newcred)1959 rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred)
1960 {
1961 LIST_HEAD(, rctl_rule_link) newrules;
1962 struct rctl_rule_link *link, *newlink;
1963 struct uidinfo *newuip;
1964 struct loginclass *newlc;
1965 struct prison_racct *newprr;
1966 int rulecnt, i;
1967
1968 if (!racct_enable)
1969 return;
1970
1971 PROC_LOCK_ASSERT(p, MA_NOTOWNED);
1972
1973 newuip = newcred->cr_ruidinfo;
1974 newlc = newcred->cr_loginclass;
1975 newprr = newcred->cr_prison->pr_prison_racct;
1976
1977 LIST_INIT(&newrules);
1978
1979 again:
1980 /*
1981 * First, count the rules that apply to the process with new
1982 * credentials.
1983 */
1984 rulecnt = 0;
1985 RACCT_LOCK();
1986 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1987 if (link->rrl_rule->rr_subject_type ==
1988 RCTL_SUBJECT_TYPE_PROCESS)
1989 rulecnt++;
1990 }
1991 LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next)
1992 rulecnt++;
1993 LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next)
1994 rulecnt++;
1995 LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next)
1996 rulecnt++;
1997 RACCT_UNLOCK();
1998
1999 /*
2000 * Create temporary list. We've dropped the rctl_lock in order
2001 * to use M_WAITOK.
2002 */
2003 for (i = 0; i < rulecnt; i++) {
2004 newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
2005 newlink->rrl_rule = NULL;
2006 newlink->rrl_exceeded = 0;
2007 LIST_INSERT_HEAD(&newrules, newlink, rrl_next);
2008 }
2009
2010 newlink = LIST_FIRST(&newrules);
2011
2012 /*
2013 * Assign rules to the newly allocated list entries.
2014 */
2015 RACCT_LOCK();
2016 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
2017 if (link->rrl_rule->rr_subject_type ==
2018 RCTL_SUBJECT_TYPE_PROCESS) {
2019 if (newlink == NULL)
2020 goto goaround;
2021 rctl_rule_acquire(link->rrl_rule);
2022 newlink->rrl_rule = link->rrl_rule;
2023 newlink->rrl_exceeded = link->rrl_exceeded;
2024 newlink = LIST_NEXT(newlink, rrl_next);
2025 rulecnt--;
2026 }
2027 }
2028
2029 LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) {
2030 if (newlink == NULL)
2031 goto goaround;
2032 rctl_rule_acquire(link->rrl_rule);
2033 newlink->rrl_rule = link->rrl_rule;
2034 newlink->rrl_exceeded = link->rrl_exceeded;
2035 newlink = LIST_NEXT(newlink, rrl_next);
2036 rulecnt--;
2037 }
2038
2039 LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) {
2040 if (newlink == NULL)
2041 goto goaround;
2042 rctl_rule_acquire(link->rrl_rule);
2043 newlink->rrl_rule = link->rrl_rule;
2044 newlink->rrl_exceeded = link->rrl_exceeded;
2045 newlink = LIST_NEXT(newlink, rrl_next);
2046 rulecnt--;
2047 }
2048
2049 LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) {
2050 if (newlink == NULL)
2051 goto goaround;
2052 rctl_rule_acquire(link->rrl_rule);
2053 newlink->rrl_rule = link->rrl_rule;
2054 newlink->rrl_exceeded = link->rrl_exceeded;
2055 newlink = LIST_NEXT(newlink, rrl_next);
2056 rulecnt--;
2057 }
2058
2059 if (rulecnt == 0) {
2060 /*
2061 * Free the old rule list.
2062 */
2063 while (!LIST_EMPTY(&p->p_racct->r_rule_links)) {
2064 link = LIST_FIRST(&p->p_racct->r_rule_links);
2065 LIST_REMOVE(link, rrl_next);
2066 rctl_rule_release(link->rrl_rule);
2067 uma_zfree(rctl_rule_link_zone, link);
2068 }
2069
2070 /*
2071 * Replace lists and we're done.
2072 *
2073 * XXX: Is there any way to switch list heads instead
2074 * of iterating here?
2075 */
2076 while (!LIST_EMPTY(&newrules)) {
2077 newlink = LIST_FIRST(&newrules);
2078 LIST_REMOVE(newlink, rrl_next);
2079 LIST_INSERT_HEAD(&p->p_racct->r_rule_links,
2080 newlink, rrl_next);
2081 }
2082
2083 RACCT_UNLOCK();
2084
2085 return;
2086 }
2087
2088 goaround:
2089 RACCT_UNLOCK();
2090
2091 /*
2092 * Rule list changed while we were not holding the rctl_lock.
2093 * Free the new list and try again.
2094 */
2095 while (!LIST_EMPTY(&newrules)) {
2096 newlink = LIST_FIRST(&newrules);
2097 LIST_REMOVE(newlink, rrl_next);
2098 if (newlink->rrl_rule != NULL)
2099 rctl_rule_release(newlink->rrl_rule);
2100 uma_zfree(rctl_rule_link_zone, newlink);
2101 }
2102
2103 goto again;
2104 }
2105
2106 /*
2107 * Assign RCTL rules to the newly created process.
2108 */
2109 int
rctl_proc_fork(struct proc * parent,struct proc * child)2110 rctl_proc_fork(struct proc *parent, struct proc *child)
2111 {
2112 struct rctl_rule *rule;
2113 struct rctl_rule_link *link;
2114 int error;
2115
2116 ASSERT_RACCT_ENABLED();
2117 RACCT_LOCK_ASSERT();
2118 KASSERT(parent->p_racct != NULL, ("process without racct; p = %p", parent));
2119
2120 LIST_INIT(&child->p_racct->r_rule_links);
2121
2122 /*
2123 * Go through limits applicable to the parent and assign them
2124 * to the child. Rules with 'process' subject have to be duplicated
2125 * in order to make their rr_subject point to the new process.
2126 */
2127 LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) {
2128 if (link->rrl_rule->rr_subject_type ==
2129 RCTL_SUBJECT_TYPE_PROCESS) {
2130 rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT);
2131 if (rule == NULL)
2132 goto fail;
2133 KASSERT(rule->rr_subject.rs_proc == parent,
2134 ("rule->rr_subject.rs_proc != parent"));
2135 rule->rr_subject.rs_proc = child;
2136 error = rctl_racct_add_rule_locked(child->p_racct,
2137 rule);
2138 rctl_rule_release(rule);
2139 if (error != 0)
2140 goto fail;
2141 } else {
2142 error = rctl_racct_add_rule_locked(child->p_racct,
2143 link->rrl_rule);
2144 if (error != 0)
2145 goto fail;
2146 }
2147 }
2148
2149 return (0);
2150
2151 fail:
2152 while (!LIST_EMPTY(&child->p_racct->r_rule_links)) {
2153 link = LIST_FIRST(&child->p_racct->r_rule_links);
2154 LIST_REMOVE(link, rrl_next);
2155 rctl_rule_release(link->rrl_rule);
2156 uma_zfree(rctl_rule_link_zone, link);
2157 }
2158
2159 return (EAGAIN);
2160 }
2161
2162 /*
2163 * Release rules attached to the racct.
2164 */
2165 void
rctl_racct_release(struct racct * racct)2166 rctl_racct_release(struct racct *racct)
2167 {
2168 struct rctl_rule_link *link;
2169
2170 ASSERT_RACCT_ENABLED();
2171 RACCT_LOCK_ASSERT();
2172
2173 while (!LIST_EMPTY(&racct->r_rule_links)) {
2174 link = LIST_FIRST(&racct->r_rule_links);
2175 LIST_REMOVE(link, rrl_next);
2176 rctl_rule_release(link->rrl_rule);
2177 uma_zfree(rctl_rule_link_zone, link);
2178 }
2179 }
2180
2181 static void
rctl_init(void)2182 rctl_init(void)
2183 {
2184
2185 if (!racct_enable)
2186 return;
2187
2188 rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule),
2189 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
2190 rctl_rule_link_zone = uma_zcreate("rctl_rule_link",
2191 sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL,
2192 UMA_ALIGN_PTR, 0);
2193
2194 /*
2195 * Set default values, making sure not to overwrite the ones
2196 * fetched from tunables. Most of those could be set at the
2197 * declaration, except for the rctl_throttle_max - we cannot
2198 * set it there due to hz not being compile time constant.
2199 */
2200 if (rctl_throttle_min < 1)
2201 rctl_throttle_min = 1;
2202 if (rctl_throttle_max < rctl_throttle_min)
2203 rctl_throttle_max = 2 * hz;
2204 if (rctl_throttle_pct < 0)
2205 rctl_throttle_pct = 100;
2206 if (rctl_throttle_pct2 < 0)
2207 rctl_throttle_pct2 = 100;
2208 }
2209
2210 #else /* !RCTL */
2211
2212 int
sys_rctl_get_racct(struct thread * td,struct rctl_get_racct_args * uap)2213 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
2214 {
2215
2216 return (ENOSYS);
2217 }
2218
2219 int
sys_rctl_get_rules(struct thread * td,struct rctl_get_rules_args * uap)2220 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
2221 {
2222
2223 return (ENOSYS);
2224 }
2225
2226 int
sys_rctl_get_limits(struct thread * td,struct rctl_get_limits_args * uap)2227 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
2228 {
2229
2230 return (ENOSYS);
2231 }
2232
2233 int
sys_rctl_add_rule(struct thread * td,struct rctl_add_rule_args * uap)2234 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
2235 {
2236
2237 return (ENOSYS);
2238 }
2239
2240 int
sys_rctl_remove_rule(struct thread * td,struct rctl_remove_rule_args * uap)2241 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
2242 {
2243
2244 return (ENOSYS);
2245 }
2246
2247 #endif /* !RCTL */
2248