1 #ifndef _LINUX_CGROUP_H 2 #define _LINUX_CGROUP_H 3 /* 4 * cgroup interface 5 * 6 * Copyright (C) 2003 BULL SA 7 * Copyright (C) 2004-2006 Silicon Graphics, Inc. 8 * 9 */ 10 11 #include <linux/sched.h> 12 #include <linux/cpumask.h> 13 #include <linux/nodemask.h> 14 #include <linux/rcupdate.h> 15 #include <linux/cgroupstats.h> 16 #include <linux/prio_heap.h> 17 #include <linux/rwsem.h> 18 #include <linux/idr.h> 19 20 #ifdef CONFIG_CGROUPS 21 22 struct cgroupfs_root; 23 struct cgroup_subsys; 24 struct inode; 25 struct cgroup; 26 struct css_id; 27 28 extern int cgroup_init_early(void); 29 extern int cgroup_init(void); 30 extern void cgroup_lock(void); 31 extern int cgroup_lock_is_held(void); 32 extern bool cgroup_lock_live_group(struct cgroup *cgrp); 33 extern void cgroup_unlock(void); 34 extern void cgroup_fork(struct task_struct *p); 35 extern void cgroup_fork_callbacks(struct task_struct *p); 36 extern void cgroup_post_fork(struct task_struct *p); 37 extern void cgroup_exit(struct task_struct *p, int run_callbacks); 38 extern int cgroupstats_build(struct cgroupstats *stats, 39 struct dentry *dentry); 40 41 extern const struct file_operations proc_cgroup_operations; 42 43 /* Define the enumeration of all cgroup subsystems */ 44 #define SUBSYS(_x) _x ## _subsys_id, 45 enum cgroup_subsys_id { 46 #include <linux/cgroup_subsys.h> 47 CGROUP_SUBSYS_COUNT 48 }; 49 #undef SUBSYS 50 51 /* Per-subsystem/per-cgroup state maintained by the system. */ 52 struct cgroup_subsys_state { 53 /* 54 * The cgroup that this subsystem is attached to. Useful 55 * for subsystems that want to know about the cgroup 56 * hierarchy structure 57 */ 58 struct cgroup *cgroup; 59 60 /* 61 * State maintained by the cgroup system to allow subsystems 62 * to be "busy". Should be accessed via css_get(), 63 * css_tryget() and and css_put(). 64 */ 65 66 atomic_t refcnt; 67 68 unsigned long flags; 69 /* ID for this css, if possible */ 70 struct css_id *id; 71 }; 72 73 /* bits in struct cgroup_subsys_state flags field */ 74 enum { 75 CSS_ROOT, /* This CSS is the root of the subsystem */ 76 CSS_REMOVED, /* This CSS is dead */ 77 }; 78 79 /* 80 * Call css_get() to hold a reference on the css; it can be used 81 * for a reference obtained via: 82 * - an existing ref-counted reference to the css 83 * - task->cgroups for a locked task 84 */ 85 86 static inline void css_get(struct cgroup_subsys_state *css) 87 { 88 /* We don't need to reference count the root state */ 89 if (!test_bit(CSS_ROOT, &css->flags)) 90 atomic_inc(&css->refcnt); 91 } 92 93 static inline bool css_is_removed(struct cgroup_subsys_state *css) 94 { 95 return test_bit(CSS_REMOVED, &css->flags); 96 } 97 98 /* 99 * Call css_tryget() to take a reference on a css if your existing 100 * (known-valid) reference isn't already ref-counted. Returns false if 101 * the css has been destroyed. 102 */ 103 104 static inline bool css_tryget(struct cgroup_subsys_state *css) 105 { 106 if (test_bit(CSS_ROOT, &css->flags)) 107 return true; 108 while (!atomic_inc_not_zero(&css->refcnt)) { 109 if (test_bit(CSS_REMOVED, &css->flags)) 110 return false; 111 cpu_relax(); 112 } 113 return true; 114 } 115 116 /* 117 * css_put() should be called to release a reference taken by 118 * css_get() or css_tryget() 119 */ 120 121 extern void __css_put(struct cgroup_subsys_state *css); 122 static inline void css_put(struct cgroup_subsys_state *css) 123 { 124 if (!test_bit(CSS_ROOT, &css->flags)) 125 __css_put(css); 126 } 127 128 /* bits in struct cgroup flags field */ 129 enum { 130 /* Control Group is dead */ 131 CGRP_REMOVED, 132 /* 133 * Control Group has previously had a child cgroup or a task, 134 * but no longer (only if CGRP_NOTIFY_ON_RELEASE is set) 135 */ 136 CGRP_RELEASABLE, 137 /* Control Group requires release notifications to userspace */ 138 CGRP_NOTIFY_ON_RELEASE, 139 /* 140 * A thread in rmdir() is wating for this cgroup. 141 */ 142 CGRP_WAIT_ON_RMDIR, 143 }; 144 145 /* which pidlist file are we talking about? */ 146 enum cgroup_filetype { 147 CGROUP_FILE_PROCS, 148 CGROUP_FILE_TASKS, 149 }; 150 151 /* 152 * A pidlist is a list of pids that virtually represents the contents of one 153 * of the cgroup files ("procs" or "tasks"). We keep a list of such pidlists, 154 * a pair (one each for procs, tasks) for each pid namespace that's relevant 155 * to the cgroup. 156 */ 157 struct cgroup_pidlist { 158 /* 159 * used to find which pidlist is wanted. doesn't change as long as 160 * this particular list stays in the list. 161 */ 162 struct { enum cgroup_filetype type; struct pid_namespace *ns; } key; 163 /* array of xids */ 164 pid_t *list; 165 /* how many elements the above list has */ 166 int length; 167 /* how many files are using the current array */ 168 int use_count; 169 /* each of these stored in a list by its cgroup */ 170 struct list_head links; 171 /* pointer to the cgroup we belong to, for list removal purposes */ 172 struct cgroup *owner; 173 /* protects the other fields */ 174 struct rw_semaphore mutex; 175 }; 176 177 struct cgroup { 178 unsigned long flags; /* "unsigned long" so bitops work */ 179 180 /* 181 * count users of this cgroup. >0 means busy, but doesn't 182 * necessarily indicate the number of tasks in the cgroup 183 */ 184 atomic_t count; 185 186 /* 187 * We link our 'sibling' struct into our parent's 'children'. 188 * Our children link their 'sibling' into our 'children'. 189 */ 190 struct list_head sibling; /* my parent's children */ 191 struct list_head children; /* my children */ 192 193 struct cgroup *parent; /* my parent */ 194 struct dentry *dentry; /* cgroup fs entry, RCU protected */ 195 196 /* Private pointers for each registered subsystem */ 197 struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; 198 199 struct cgroupfs_root *root; 200 struct cgroup *top_cgroup; 201 202 /* 203 * List of cg_cgroup_links pointing at css_sets with 204 * tasks in this cgroup. Protected by css_set_lock 205 */ 206 struct list_head css_sets; 207 208 /* 209 * Linked list running through all cgroups that can 210 * potentially be reaped by the release agent. Protected by 211 * release_list_lock 212 */ 213 struct list_head release_list; 214 215 /* 216 * list of pidlists, up to two for each namespace (one for procs, one 217 * for tasks); created on demand. 218 */ 219 struct list_head pidlists; 220 struct mutex pidlist_mutex; 221 222 /* For RCU-protected deletion */ 223 struct rcu_head rcu_head; 224 }; 225 226 /* 227 * A css_set is a structure holding pointers to a set of 228 * cgroup_subsys_state objects. This saves space in the task struct 229 * object and speeds up fork()/exit(), since a single inc/dec and a 230 * list_add()/del() can bump the reference count on the entire cgroup 231 * set for a task. 232 */ 233 234 struct css_set { 235 236 /* Reference count */ 237 atomic_t refcount; 238 239 /* 240 * List running through all cgroup groups in the same hash 241 * slot. Protected by css_set_lock 242 */ 243 struct hlist_node hlist; 244 245 /* 246 * List running through all tasks using this cgroup 247 * group. Protected by css_set_lock 248 */ 249 struct list_head tasks; 250 251 /* 252 * List of cg_cgroup_link objects on link chains from 253 * cgroups referenced from this css_set. Protected by 254 * css_set_lock 255 */ 256 struct list_head cg_links; 257 258 /* 259 * Set of subsystem states, one for each subsystem. This array 260 * is immutable after creation apart from the init_css_set 261 * during subsystem registration (at boot time). 262 */ 263 struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; 264 265 /* For RCU-protected deletion */ 266 struct rcu_head rcu_head; 267 }; 268 269 /* 270 * cgroup_map_cb is an abstract callback API for reporting map-valued 271 * control files 272 */ 273 274 struct cgroup_map_cb { 275 int (*fill)(struct cgroup_map_cb *cb, const char *key, u64 value); 276 void *state; 277 }; 278 279 /* 280 * struct cftype: handler definitions for cgroup control files 281 * 282 * When reading/writing to a file: 283 * - the cgroup to use is file->f_dentry->d_parent->d_fsdata 284 * - the 'cftype' of the file is file->f_dentry->d_fsdata 285 */ 286 287 #define MAX_CFTYPE_NAME 64 288 struct cftype { 289 /* 290 * By convention, the name should begin with the name of the 291 * subsystem, followed by a period 292 */ 293 char name[MAX_CFTYPE_NAME]; 294 int private; 295 /* 296 * If not 0, file mode is set to this value, otherwise it will 297 * be figured out automatically 298 */ 299 mode_t mode; 300 301 /* 302 * If non-zero, defines the maximum length of string that can 303 * be passed to write_string; defaults to 64 304 */ 305 size_t max_write_len; 306 307 int (*open)(struct inode *inode, struct file *file); 308 ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft, 309 struct file *file, 310 char __user *buf, size_t nbytes, loff_t *ppos); 311 /* 312 * read_u64() is a shortcut for the common case of returning a 313 * single integer. Use it in place of read() 314 */ 315 u64 (*read_u64)(struct cgroup *cgrp, struct cftype *cft); 316 /* 317 * read_s64() is a signed version of read_u64() 318 */ 319 s64 (*read_s64)(struct cgroup *cgrp, struct cftype *cft); 320 /* 321 * read_map() is used for defining a map of key/value 322 * pairs. It should call cb->fill(cb, key, value) for each 323 * entry. The key/value pairs (and their ordering) should not 324 * change between reboots. 325 */ 326 int (*read_map)(struct cgroup *cont, struct cftype *cft, 327 struct cgroup_map_cb *cb); 328 /* 329 * read_seq_string() is used for outputting a simple sequence 330 * using seqfile. 331 */ 332 int (*read_seq_string)(struct cgroup *cont, struct cftype *cft, 333 struct seq_file *m); 334 335 ssize_t (*write)(struct cgroup *cgrp, struct cftype *cft, 336 struct file *file, 337 const char __user *buf, size_t nbytes, loff_t *ppos); 338 339 /* 340 * write_u64() is a shortcut for the common case of accepting 341 * a single integer (as parsed by simple_strtoull) from 342 * userspace. Use in place of write(); return 0 or error. 343 */ 344 int (*write_u64)(struct cgroup *cgrp, struct cftype *cft, u64 val); 345 /* 346 * write_s64() is a signed version of write_u64() 347 */ 348 int (*write_s64)(struct cgroup *cgrp, struct cftype *cft, s64 val); 349 350 /* 351 * write_string() is passed a nul-terminated kernelspace 352 * buffer of maximum length determined by max_write_len. 353 * Returns 0 or -ve error code. 354 */ 355 int (*write_string)(struct cgroup *cgrp, struct cftype *cft, 356 const char *buffer); 357 /* 358 * trigger() callback can be used to get some kick from the 359 * userspace, when the actual string written is not important 360 * at all. The private field can be used to determine the 361 * kick type for multiplexing. 362 */ 363 int (*trigger)(struct cgroup *cgrp, unsigned int event); 364 365 int (*release)(struct inode *inode, struct file *file); 366 }; 367 368 struct cgroup_scanner { 369 struct cgroup *cg; 370 int (*test_task)(struct task_struct *p, struct cgroup_scanner *scan); 371 void (*process_task)(struct task_struct *p, 372 struct cgroup_scanner *scan); 373 struct ptr_heap *heap; 374 void *data; 375 }; 376 377 /* 378 * Add a new file to the given cgroup directory. Should only be 379 * called by subsystems from within a populate() method 380 */ 381 int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys, 382 const struct cftype *cft); 383 384 /* 385 * Add a set of new files to the given cgroup directory. Should 386 * only be called by subsystems from within a populate() method 387 */ 388 int cgroup_add_files(struct cgroup *cgrp, 389 struct cgroup_subsys *subsys, 390 const struct cftype cft[], 391 int count); 392 393 int cgroup_is_removed(const struct cgroup *cgrp); 394 395 int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen); 396 397 int cgroup_task_count(const struct cgroup *cgrp); 398 399 /* Return true if cgrp is a descendant of the task's cgroup */ 400 int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task); 401 402 /* 403 * When the subsys has to access css and may add permanent refcnt to css, 404 * it should take care of racy conditions with rmdir(). Following set of 405 * functions, is for stop/restart rmdir if necessary. 406 * Because these will call css_get/put, "css" should be alive css. 407 * 408 * cgroup_exclude_rmdir(); 409 * ...do some jobs which may access arbitrary empty cgroup 410 * cgroup_release_and_wakeup_rmdir(); 411 * 412 * When someone removes a cgroup while cgroup_exclude_rmdir() holds it, 413 * it sleeps and cgroup_release_and_wakeup_rmdir() will wake him up. 414 */ 415 416 void cgroup_exclude_rmdir(struct cgroup_subsys_state *css); 417 void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css); 418 419 /* 420 * Control Group subsystem type. 421 * See Documentation/cgroups/cgroups.txt for details 422 */ 423 424 struct cgroup_subsys { 425 struct cgroup_subsys_state *(*create)(struct cgroup_subsys *ss, 426 struct cgroup *cgrp); 427 int (*pre_destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp); 428 void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp); 429 int (*can_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp, 430 struct task_struct *tsk, bool threadgroup); 431 void (*attach)(struct cgroup_subsys *ss, struct cgroup *cgrp, 432 struct cgroup *old_cgrp, struct task_struct *tsk, 433 bool threadgroup); 434 void (*fork)(struct cgroup_subsys *ss, struct task_struct *task); 435 void (*exit)(struct cgroup_subsys *ss, struct task_struct *task); 436 int (*populate)(struct cgroup_subsys *ss, 437 struct cgroup *cgrp); 438 void (*post_clone)(struct cgroup_subsys *ss, struct cgroup *cgrp); 439 void (*bind)(struct cgroup_subsys *ss, struct cgroup *root); 440 441 int subsys_id; 442 int active; 443 int disabled; 444 int early_init; 445 /* 446 * True if this subsys uses ID. ID is not available before cgroup_init() 447 * (not available in early_init time.) 448 */ 449 bool use_id; 450 #define MAX_CGROUP_TYPE_NAMELEN 32 451 const char *name; 452 453 /* 454 * Protects sibling/children links of cgroups in this 455 * hierarchy, plus protects which hierarchy (or none) the 456 * subsystem is a part of (i.e. root/sibling). To avoid 457 * potential deadlocks, the following operations should not be 458 * undertaken while holding any hierarchy_mutex: 459 * 460 * - allocating memory 461 * - initiating hotplug events 462 */ 463 struct mutex hierarchy_mutex; 464 struct lock_class_key subsys_key; 465 466 /* 467 * Link to parent, and list entry in parent's children. 468 * Protected by this->hierarchy_mutex and cgroup_lock() 469 */ 470 struct cgroupfs_root *root; 471 struct list_head sibling; 472 /* used when use_id == true */ 473 struct idr idr; 474 spinlock_t id_lock; 475 }; 476 477 #define SUBSYS(_x) extern struct cgroup_subsys _x ## _subsys; 478 #include <linux/cgroup_subsys.h> 479 #undef SUBSYS 480 481 static inline struct cgroup_subsys_state *cgroup_subsys_state( 482 struct cgroup *cgrp, int subsys_id) 483 { 484 return cgrp->subsys[subsys_id]; 485 } 486 487 static inline struct cgroup_subsys_state *task_subsys_state( 488 struct task_struct *task, int subsys_id) 489 { 490 return rcu_dereference_check(task->cgroups->subsys[subsys_id], 491 rcu_read_lock_held() || 492 cgroup_lock_is_held()); 493 } 494 495 static inline struct cgroup* task_cgroup(struct task_struct *task, 496 int subsys_id) 497 { 498 return task_subsys_state(task, subsys_id)->cgroup; 499 } 500 501 int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *ss, 502 char *nodename); 503 504 /* A cgroup_iter should be treated as an opaque object */ 505 struct cgroup_iter { 506 struct list_head *cg_link; 507 struct list_head *task; 508 }; 509 510 /* 511 * To iterate across the tasks in a cgroup: 512 * 513 * 1) call cgroup_iter_start to intialize an iterator 514 * 515 * 2) call cgroup_iter_next() to retrieve member tasks until it 516 * returns NULL or until you want to end the iteration 517 * 518 * 3) call cgroup_iter_end() to destroy the iterator. 519 * 520 * Or, call cgroup_scan_tasks() to iterate through every task in a 521 * cgroup - cgroup_scan_tasks() holds the css_set_lock when calling 522 * the test_task() callback, but not while calling the process_task() 523 * callback. 524 */ 525 void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it); 526 struct task_struct *cgroup_iter_next(struct cgroup *cgrp, 527 struct cgroup_iter *it); 528 void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it); 529 int cgroup_scan_tasks(struct cgroup_scanner *scan); 530 int cgroup_attach_task(struct cgroup *, struct task_struct *); 531 532 /* 533 * CSS ID is ID for cgroup_subsys_state structs under subsys. This only works 534 * if cgroup_subsys.use_id == true. It can be used for looking up and scanning. 535 * CSS ID is assigned at cgroup allocation (create) automatically 536 * and removed when subsys calls free_css_id() function. This is because 537 * the lifetime of cgroup_subsys_state is subsys's matter. 538 * 539 * Looking up and scanning function should be called under rcu_read_lock(). 540 * Taking cgroup_mutex()/hierarchy_mutex() is not necessary for following calls. 541 * But the css returned by this routine can be "not populated yet" or "being 542 * destroyed". The caller should check css and cgroup's status. 543 */ 544 545 /* 546 * Typically Called at ->destroy(), or somewhere the subsys frees 547 * cgroup_subsys_state. 548 */ 549 void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css); 550 551 /* Find a cgroup_subsys_state which has given ID */ 552 553 struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id); 554 555 /* 556 * Get a cgroup whose id is greater than or equal to id under tree of root. 557 * Returning a cgroup_subsys_state or NULL. 558 */ 559 struct cgroup_subsys_state *css_get_next(struct cgroup_subsys *ss, int id, 560 struct cgroup_subsys_state *root, int *foundid); 561 562 /* Returns true if root is ancestor of cg */ 563 bool css_is_ancestor(struct cgroup_subsys_state *cg, 564 const struct cgroup_subsys_state *root); 565 566 /* Get id and depth of css */ 567 unsigned short css_id(struct cgroup_subsys_state *css); 568 unsigned short css_depth(struct cgroup_subsys_state *css); 569 570 #else /* !CONFIG_CGROUPS */ 571 572 static inline int cgroup_init_early(void) { return 0; } 573 static inline int cgroup_init(void) { return 0; } 574 static inline void cgroup_fork(struct task_struct *p) {} 575 static inline void cgroup_fork_callbacks(struct task_struct *p) {} 576 static inline void cgroup_post_fork(struct task_struct *p) {} 577 static inline void cgroup_exit(struct task_struct *p, int callbacks) {} 578 579 static inline void cgroup_lock(void) {} 580 static inline void cgroup_unlock(void) {} 581 static inline int cgroupstats_build(struct cgroupstats *stats, 582 struct dentry *dentry) 583 { 584 return -EINVAL; 585 } 586 587 #endif /* !CONFIG_CGROUPS */ 588 589 #endif /* _LINUX_CGROUP_H */ 590