1 /* 2 * linux/cgroup-defs.h - basic definitions for cgroup 3 * 4 * This file provides basic type and interface. Include this file directly 5 * only if necessary to avoid cyclic dependencies. 6 */ 7 #ifndef _LINUX_CGROUP_DEFS_H 8 #define _LINUX_CGROUP_DEFS_H 9 10 #include <linux/limits.h> 11 #include <linux/list.h> 12 #include <linux/idr.h> 13 #include <linux/wait.h> 14 #include <linux/mutex.h> 15 #include <linux/rcupdate.h> 16 #include <linux/percpu-refcount.h> 17 #include <linux/percpu-rwsem.h> 18 #include <linux/workqueue.h> 19 20 #ifdef CONFIG_CGROUPS 21 22 struct cgroup; 23 struct cgroup_root; 24 struct cgroup_subsys; 25 struct cgroup_taskset; 26 struct kernfs_node; 27 struct kernfs_ops; 28 struct kernfs_open_file; 29 struct seq_file; 30 31 #define MAX_CGROUP_TYPE_NAMELEN 32 32 #define MAX_CGROUP_ROOT_NAMELEN 64 33 #define MAX_CFTYPE_NAME 64 34 35 /* define the enumeration of all cgroup subsystems */ 36 #define SUBSYS(_x) _x ## _cgrp_id, 37 #define SUBSYS_TAG(_t) CGROUP_ ## _t, \ 38 __unused_tag_ ## _t = CGROUP_ ## _t - 1, 39 enum cgroup_subsys_id { 40 #include <linux/cgroup_subsys.h> 41 CGROUP_SUBSYS_COUNT, 42 }; 43 #undef SUBSYS_TAG 44 #undef SUBSYS 45 46 #define CGROUP_CANFORK_COUNT (CGROUP_CANFORK_END - CGROUP_CANFORK_START) 47 48 /* bits in struct cgroup_subsys_state flags field */ 49 enum { 50 CSS_NO_REF = (1 << 0), /* no reference counting for this css */ 51 CSS_ONLINE = (1 << 1), /* between ->css_online() and ->css_offline() */ 52 CSS_RELEASED = (1 << 2), /* refcnt reached zero, released */ 53 }; 54 55 /* bits in struct cgroup flags field */ 56 enum { 57 /* Control Group requires release notifications to userspace */ 58 CGRP_NOTIFY_ON_RELEASE, 59 /* 60 * Clone the parent's configuration when creating a new child 61 * cpuset cgroup. For historical reasons, this option can be 62 * specified at mount time and thus is implemented here. 63 */ 64 CGRP_CPUSET_CLONE_CHILDREN, 65 }; 66 67 /* cgroup_root->flags */ 68 enum { 69 CGRP_ROOT_SANE_BEHAVIOR = (1 << 0), /* __DEVEL__sane_behavior specified */ 70 CGRP_ROOT_NOPREFIX = (1 << 1), /* mounted subsystems have no named prefix */ 71 CGRP_ROOT_XATTR = (1 << 2), /* supports extended attributes */ 72 }; 73 74 /* cftype->flags */ 75 enum { 76 CFTYPE_ONLY_ON_ROOT = (1 << 0), /* only create on root cgrp */ 77 CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cgrp */ 78 CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */ 79 CFTYPE_WORLD_WRITABLE = (1 << 4), /* (DON'T USE FOR NEW FILES) S_IWUGO */ 80 81 /* internal flags, do not use outside cgroup core proper */ 82 __CFTYPE_ONLY_ON_DFL = (1 << 16), /* only on default hierarchy */ 83 __CFTYPE_NOT_ON_DFL = (1 << 17), /* not on default hierarchy */ 84 }; 85 86 /* 87 * cgroup_file is the handle for a file instance created in a cgroup which 88 * is used, for example, to generate file changed notifications. This can 89 * be obtained by setting cftype->file_offset. 90 */ 91 struct cgroup_file { 92 /* do not access any fields from outside cgroup core */ 93 struct list_head node; /* anchored at css->files */ 94 struct kernfs_node *kn; 95 }; 96 97 /* 98 * Per-subsystem/per-cgroup state maintained by the system. This is the 99 * fundamental structural building block that controllers deal with. 100 * 101 * Fields marked with "PI:" are public and immutable and may be accessed 102 * directly without synchronization. 103 */ 104 struct cgroup_subsys_state { 105 /* PI: the cgroup that this css is attached to */ 106 struct cgroup *cgroup; 107 108 /* PI: the cgroup subsystem that this css is attached to */ 109 struct cgroup_subsys *ss; 110 111 /* reference count - access via css_[try]get() and css_put() */ 112 struct percpu_ref refcnt; 113 114 /* PI: the parent css */ 115 struct cgroup_subsys_state *parent; 116 117 /* siblings list anchored at the parent's ->children */ 118 struct list_head sibling; 119 struct list_head children; 120 121 /* 122 * PI: Subsys-unique ID. 0 is unused and root is always 1. The 123 * matching css can be looked up using css_from_id(). 124 */ 125 int id; 126 127 unsigned int flags; 128 129 /* 130 * Monotonically increasing unique serial number which defines a 131 * uniform order among all csses. It's guaranteed that all 132 * ->children lists are in the ascending order of ->serial_nr and 133 * used to allow interrupting and resuming iterations. 134 */ 135 u64 serial_nr; 136 137 /* all cgroup_files associated with this css */ 138 struct list_head files; 139 140 /* percpu_ref killing and RCU release */ 141 struct rcu_head rcu_head; 142 struct work_struct destroy_work; 143 }; 144 145 /* 146 * A css_set is a structure holding pointers to a set of 147 * cgroup_subsys_state objects. This saves space in the task struct 148 * object and speeds up fork()/exit(), since a single inc/dec and a 149 * list_add()/del() can bump the reference count on the entire cgroup 150 * set for a task. 151 */ 152 struct css_set { 153 /* Reference count */ 154 atomic_t refcount; 155 156 /* 157 * List running through all cgroup groups in the same hash 158 * slot. Protected by css_set_lock 159 */ 160 struct hlist_node hlist; 161 162 /* 163 * Lists running through all tasks using this cgroup group. 164 * mg_tasks lists tasks which belong to this cset but are in the 165 * process of being migrated out or in. Protected by 166 * css_set_rwsem, but, during migration, once tasks are moved to 167 * mg_tasks, it can be read safely while holding cgroup_mutex. 168 */ 169 struct list_head tasks; 170 struct list_head mg_tasks; 171 172 /* 173 * List of cgrp_cset_links pointing at cgroups referenced from this 174 * css_set. Protected by css_set_lock. 175 */ 176 struct list_head cgrp_links; 177 178 /* the default cgroup associated with this css_set */ 179 struct cgroup *dfl_cgrp; 180 181 /* 182 * Set of subsystem states, one for each subsystem. This array is 183 * immutable after creation apart from the init_css_set during 184 * subsystem registration (at boot time). 185 */ 186 struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; 187 188 /* 189 * List of csets participating in the on-going migration either as 190 * source or destination. Protected by cgroup_mutex. 191 */ 192 struct list_head mg_preload_node; 193 struct list_head mg_node; 194 195 /* 196 * If this cset is acting as the source of migration the following 197 * two fields are set. mg_src_cgrp is the source cgroup of the 198 * on-going migration and mg_dst_cset is the destination cset the 199 * target tasks on this cset should be migrated to. Protected by 200 * cgroup_mutex. 201 */ 202 struct cgroup *mg_src_cgrp; 203 struct css_set *mg_dst_cset; 204 205 /* 206 * On the default hierarhcy, ->subsys[ssid] may point to a css 207 * attached to an ancestor instead of the cgroup this css_set is 208 * associated with. The following node is anchored at 209 * ->subsys[ssid]->cgroup->e_csets[ssid] and provides a way to 210 * iterate through all css's attached to a given cgroup. 211 */ 212 struct list_head e_cset_node[CGROUP_SUBSYS_COUNT]; 213 214 /* all css_task_iters currently walking this cset */ 215 struct list_head task_iters; 216 217 /* For RCU-protected deletion */ 218 struct rcu_head rcu_head; 219 }; 220 221 struct cgroup { 222 /* self css with NULL ->ss, points back to this cgroup */ 223 struct cgroup_subsys_state self; 224 225 unsigned long flags; /* "unsigned long" so bitops work */ 226 227 /* 228 * idr allocated in-hierarchy ID. 229 * 230 * ID 0 is not used, the ID of the root cgroup is always 1, and a 231 * new cgroup will be assigned with a smallest available ID. 232 * 233 * Allocating/Removing ID must be protected by cgroup_mutex. 234 */ 235 int id; 236 237 /* 238 * Each non-empty css_set associated with this cgroup contributes 239 * one to populated_cnt. All children with non-zero popuplated_cnt 240 * of their own contribute one. The count is zero iff there's no 241 * task in this cgroup or its subtree. 242 */ 243 int populated_cnt; 244 245 struct kernfs_node *kn; /* cgroup kernfs entry */ 246 struct cgroup_file procs_file; /* handle for "cgroup.procs" */ 247 struct cgroup_file events_file; /* handle for "cgroup.events" */ 248 249 /* 250 * The bitmask of subsystems enabled on the child cgroups. 251 * ->subtree_control is the one configured through 252 * "cgroup.subtree_control" while ->child_subsys_mask is the 253 * effective one which may have more subsystems enabled. 254 * Controller knobs are made available iff it's enabled in 255 * ->subtree_control. 256 */ 257 unsigned int subtree_control; 258 unsigned int child_subsys_mask; 259 260 /* Private pointers for each registered subsystem */ 261 struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT]; 262 263 struct cgroup_root *root; 264 265 /* 266 * List of cgrp_cset_links pointing at css_sets with tasks in this 267 * cgroup. Protected by css_set_lock. 268 */ 269 struct list_head cset_links; 270 271 /* 272 * On the default hierarchy, a css_set for a cgroup with some 273 * susbsys disabled will point to css's which are associated with 274 * the closest ancestor which has the subsys enabled. The 275 * following lists all css_sets which point to this cgroup's css 276 * for the given subsystem. 277 */ 278 struct list_head e_csets[CGROUP_SUBSYS_COUNT]; 279 280 /* 281 * list of pidlists, up to two for each namespace (one for procs, one 282 * for tasks); created on demand. 283 */ 284 struct list_head pidlists; 285 struct mutex pidlist_mutex; 286 287 /* used to wait for offlining of csses */ 288 wait_queue_head_t offline_waitq; 289 290 /* used to schedule release agent */ 291 struct work_struct release_agent_work; 292 }; 293 294 /* 295 * A cgroup_root represents the root of a cgroup hierarchy, and may be 296 * associated with a kernfs_root to form an active hierarchy. This is 297 * internal to cgroup core. Don't access directly from controllers. 298 */ 299 struct cgroup_root { 300 struct kernfs_root *kf_root; 301 302 /* The bitmask of subsystems attached to this hierarchy */ 303 unsigned int subsys_mask; 304 305 /* Unique id for this hierarchy. */ 306 int hierarchy_id; 307 308 /* The root cgroup. Root is destroyed on its release. */ 309 struct cgroup cgrp; 310 311 /* Number of cgroups in the hierarchy, used only for /proc/cgroups */ 312 atomic_t nr_cgrps; 313 314 /* A list running through the active hierarchies */ 315 struct list_head root_list; 316 317 /* Hierarchy-specific flags */ 318 unsigned int flags; 319 320 /* IDs for cgroups in this hierarchy */ 321 struct idr cgroup_idr; 322 323 /* The path to use for release notifications. */ 324 char release_agent_path[PATH_MAX]; 325 326 /* The name for this hierarchy - may be empty */ 327 char name[MAX_CGROUP_ROOT_NAMELEN]; 328 }; 329 330 /* 331 * struct cftype: handler definitions for cgroup control files 332 * 333 * When reading/writing to a file: 334 * - the cgroup to use is file->f_path.dentry->d_parent->d_fsdata 335 * - the 'cftype' of the file is file->f_path.dentry->d_fsdata 336 */ 337 struct cftype { 338 /* 339 * By convention, the name should begin with the name of the 340 * subsystem, followed by a period. Zero length string indicates 341 * end of cftype array. 342 */ 343 char name[MAX_CFTYPE_NAME]; 344 unsigned long private; 345 346 /* 347 * The maximum length of string, excluding trailing nul, that can 348 * be passed to write. If < PAGE_SIZE-1, PAGE_SIZE-1 is assumed. 349 */ 350 size_t max_write_len; 351 352 /* CFTYPE_* flags */ 353 unsigned int flags; 354 355 /* 356 * If non-zero, should contain the offset from the start of css to 357 * a struct cgroup_file field. cgroup will record the handle of 358 * the created file into it. The recorded handle can be used as 359 * long as the containing css remains accessible. 360 */ 361 unsigned int file_offset; 362 363 /* 364 * Fields used for internal bookkeeping. Initialized automatically 365 * during registration. 366 */ 367 struct cgroup_subsys *ss; /* NULL for cgroup core files */ 368 struct list_head node; /* anchored at ss->cfts */ 369 struct kernfs_ops *kf_ops; 370 371 /* 372 * read_u64() is a shortcut for the common case of returning a 373 * single integer. Use it in place of read() 374 */ 375 u64 (*read_u64)(struct cgroup_subsys_state *css, struct cftype *cft); 376 /* 377 * read_s64() is a signed version of read_u64() 378 */ 379 s64 (*read_s64)(struct cgroup_subsys_state *css, struct cftype *cft); 380 381 /* generic seq_file read interface */ 382 int (*seq_show)(struct seq_file *sf, void *v); 383 384 /* optional ops, implement all or none */ 385 void *(*seq_start)(struct seq_file *sf, loff_t *ppos); 386 void *(*seq_next)(struct seq_file *sf, void *v, loff_t *ppos); 387 void (*seq_stop)(struct seq_file *sf, void *v); 388 389 /* 390 * write_u64() is a shortcut for the common case of accepting 391 * a single integer (as parsed by simple_strtoull) from 392 * userspace. Use in place of write(); return 0 or error. 393 */ 394 int (*write_u64)(struct cgroup_subsys_state *css, struct cftype *cft, 395 u64 val); 396 /* 397 * write_s64() is a signed version of write_u64() 398 */ 399 int (*write_s64)(struct cgroup_subsys_state *css, struct cftype *cft, 400 s64 val); 401 402 /* 403 * write() is the generic write callback which maps directly to 404 * kernfs write operation and overrides all other operations. 405 * Maximum write size is determined by ->max_write_len. Use 406 * of_css/cft() to access the associated css and cft. 407 */ 408 ssize_t (*write)(struct kernfs_open_file *of, 409 char *buf, size_t nbytes, loff_t off); 410 411 #ifdef CONFIG_DEBUG_LOCK_ALLOC 412 struct lock_class_key lockdep_key; 413 #endif 414 }; 415 416 /* 417 * Control Group subsystem type. 418 * See Documentation/cgroups/cgroups.txt for details 419 */ 420 struct cgroup_subsys { 421 struct cgroup_subsys_state *(*css_alloc)(struct cgroup_subsys_state *parent_css); 422 int (*css_online)(struct cgroup_subsys_state *css); 423 void (*css_offline)(struct cgroup_subsys_state *css); 424 void (*css_released)(struct cgroup_subsys_state *css); 425 void (*css_free)(struct cgroup_subsys_state *css); 426 void (*css_reset)(struct cgroup_subsys_state *css); 427 void (*css_e_css_changed)(struct cgroup_subsys_state *css); 428 429 int (*can_attach)(struct cgroup_subsys_state *css, 430 struct cgroup_taskset *tset); 431 void (*cancel_attach)(struct cgroup_subsys_state *css, 432 struct cgroup_taskset *tset); 433 void (*attach)(struct cgroup_subsys_state *css, 434 struct cgroup_taskset *tset); 435 int (*can_fork)(struct task_struct *task, void **priv_p); 436 void (*cancel_fork)(struct task_struct *task, void *priv); 437 void (*fork)(struct task_struct *task, void *priv); 438 void (*exit)(struct task_struct *task); 439 void (*free)(struct task_struct *task); 440 void (*bind)(struct cgroup_subsys_state *root_css); 441 442 int early_init; 443 444 /* 445 * If %false, this subsystem is properly hierarchical - 446 * configuration, resource accounting and restriction on a parent 447 * cgroup cover those of its children. If %true, hierarchy support 448 * is broken in some ways - some subsystems ignore hierarchy 449 * completely while others are only implemented half-way. 450 * 451 * It's now disallowed to create nested cgroups if the subsystem is 452 * broken and cgroup core will emit a warning message on such 453 * cases. Eventually, all subsystems will be made properly 454 * hierarchical and this will go away. 455 */ 456 bool broken_hierarchy; 457 bool warned_broken_hierarchy; 458 459 /* the following two fields are initialized automtically during boot */ 460 int id; 461 const char *name; 462 463 /* optional, initialized automatically during boot if not set */ 464 const char *legacy_name; 465 466 /* link to parent, protected by cgroup_lock() */ 467 struct cgroup_root *root; 468 469 /* idr for css->id */ 470 struct idr css_idr; 471 472 /* 473 * List of cftypes. Each entry is the first entry of an array 474 * terminated by zero length name. 475 */ 476 struct list_head cfts; 477 478 /* 479 * Base cftypes which are automatically registered. The two can 480 * point to the same array. 481 */ 482 struct cftype *dfl_cftypes; /* for the default hierarchy */ 483 struct cftype *legacy_cftypes; /* for the legacy hierarchies */ 484 485 /* 486 * A subsystem may depend on other subsystems. When such subsystem 487 * is enabled on a cgroup, the depended-upon subsystems are enabled 488 * together if available. Subsystems enabled due to dependency are 489 * not visible to userland until explicitly enabled. The following 490 * specifies the mask of subsystems that this one depends on. 491 */ 492 unsigned int depends_on; 493 }; 494 495 extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem; 496 497 /** 498 * cgroup_threadgroup_change_begin - threadgroup exclusion for cgroups 499 * @tsk: target task 500 * 501 * Called from threadgroup_change_begin() and allows cgroup operations to 502 * synchronize against threadgroup changes using a percpu_rw_semaphore. 503 */ 504 static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) 505 { 506 percpu_down_read(&cgroup_threadgroup_rwsem); 507 } 508 509 /** 510 * cgroup_threadgroup_change_end - threadgroup exclusion for cgroups 511 * @tsk: target task 512 * 513 * Called from threadgroup_change_end(). Counterpart of 514 * cgroup_threadcgroup_change_begin(). 515 */ 516 static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) 517 { 518 percpu_up_read(&cgroup_threadgroup_rwsem); 519 } 520 521 #else /* CONFIG_CGROUPS */ 522 523 #define CGROUP_CANFORK_COUNT 0 524 #define CGROUP_SUBSYS_COUNT 0 525 526 static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) {} 527 static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) {} 528 529 #endif /* CONFIG_CGROUPS */ 530 531 #endif /* _LINUX_CGROUP_DEFS_H */ 532