1 /* 2 * linux/cgroup-defs.h - basic definitions for cgroup 3 * 4 * This file provides basic type and interface. Include this file directly 5 * only if necessary to avoid cyclic dependencies. 6 */ 7 #ifndef _LINUX_CGROUP_DEFS_H 8 #define _LINUX_CGROUP_DEFS_H 9 10 #include <linux/limits.h> 11 #include <linux/list.h> 12 #include <linux/idr.h> 13 #include <linux/wait.h> 14 #include <linux/mutex.h> 15 #include <linux/rcupdate.h> 16 #include <linux/refcount.h> 17 #include <linux/percpu-refcount.h> 18 #include <linux/percpu-rwsem.h> 19 #include <linux/workqueue.h> 20 #include <linux/bpf-cgroup.h> 21 22 #ifdef CONFIG_CGROUPS 23 24 struct cgroup; 25 struct cgroup_root; 26 struct cgroup_subsys; 27 struct cgroup_taskset; 28 struct kernfs_node; 29 struct kernfs_ops; 30 struct kernfs_open_file; 31 struct seq_file; 32 33 #define MAX_CGROUP_TYPE_NAMELEN 32 34 #define MAX_CGROUP_ROOT_NAMELEN 64 35 #define MAX_CFTYPE_NAME 64 36 37 /* define the enumeration of all cgroup subsystems */ 38 #define SUBSYS(_x) _x ## _cgrp_id, 39 enum cgroup_subsys_id { 40 #include <linux/cgroup_subsys.h> 41 CGROUP_SUBSYS_COUNT, 42 }; 43 #undef SUBSYS 44 45 /* bits in struct cgroup_subsys_state flags field */ 46 enum { 47 CSS_NO_REF = (1 << 0), /* no reference counting for this css */ 48 CSS_ONLINE = (1 << 1), /* between ->css_online() and ->css_offline() */ 49 CSS_RELEASED = (1 << 2), /* refcnt reached zero, released */ 50 CSS_VISIBLE = (1 << 3), /* css is visible to userland */ 51 CSS_DYING = (1 << 4), /* css is dying */ 52 }; 53 54 /* bits in struct cgroup flags field */ 55 enum { 56 /* Control Group requires release notifications to userspace */ 57 CGRP_NOTIFY_ON_RELEASE, 58 /* 59 * Clone the parent's configuration when creating a new child 60 * cpuset cgroup. For historical reasons, this option can be 61 * specified at mount time and thus is implemented here. 62 */ 63 CGRP_CPUSET_CLONE_CHILDREN, 64 }; 65 66 /* cgroup_root->flags */ 67 enum { 68 CGRP_ROOT_NOPREFIX = (1 << 1), /* mounted subsystems have no named prefix */ 69 CGRP_ROOT_XATTR = (1 << 2), /* supports extended attributes */ 70 71 /* 72 * Consider namespaces as delegation boundaries. If this flag is 73 * set, controller specific interface files in a namespace root 74 * aren't writeable from inside the namespace. 75 */ 76 CGRP_ROOT_NS_DELEGATE = (1 << 3), 77 78 /* 79 * Enable cpuset controller in v1 cgroup to use v2 behavior. 80 */ 81 CGRP_ROOT_CPUSET_V2_MODE = (1 << 4), 82 }; 83 84 /* cftype->flags */ 85 enum { 86 CFTYPE_ONLY_ON_ROOT = (1 << 0), /* only create on root cgrp */ 87 CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cgrp */ 88 CFTYPE_NS_DELEGATABLE = (1 << 2), /* writeable beyond delegation boundaries */ 89 90 CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */ 91 CFTYPE_WORLD_WRITABLE = (1 << 4), /* (DON'T USE FOR NEW FILES) S_IWUGO */ 92 93 /* internal flags, do not use outside cgroup core proper */ 94 __CFTYPE_ONLY_ON_DFL = (1 << 16), /* only on default hierarchy */ 95 __CFTYPE_NOT_ON_DFL = (1 << 17), /* not on default hierarchy */ 96 }; 97 98 /* 99 * cgroup_file is the handle for a file instance created in a cgroup which 100 * is used, for example, to generate file changed notifications. This can 101 * be obtained by setting cftype->file_offset. 102 */ 103 struct cgroup_file { 104 /* do not access any fields from outside cgroup core */ 105 struct kernfs_node *kn; 106 }; 107 108 /* 109 * Per-subsystem/per-cgroup state maintained by the system. This is the 110 * fundamental structural building block that controllers deal with. 111 * 112 * Fields marked with "PI:" are public and immutable and may be accessed 113 * directly without synchronization. 114 */ 115 struct cgroup_subsys_state { 116 /* PI: the cgroup that this css is attached to */ 117 struct cgroup *cgroup; 118 119 /* PI: the cgroup subsystem that this css is attached to */ 120 struct cgroup_subsys *ss; 121 122 /* reference count - access via css_[try]get() and css_put() */ 123 struct percpu_ref refcnt; 124 125 /* siblings list anchored at the parent's ->children */ 126 struct list_head sibling; 127 struct list_head children; 128 129 /* 130 * PI: Subsys-unique ID. 0 is unused and root is always 1. The 131 * matching css can be looked up using css_from_id(). 132 */ 133 int id; 134 135 unsigned int flags; 136 137 /* 138 * Monotonically increasing unique serial number which defines a 139 * uniform order among all csses. It's guaranteed that all 140 * ->children lists are in the ascending order of ->serial_nr and 141 * used to allow interrupting and resuming iterations. 142 */ 143 u64 serial_nr; 144 145 /* 146 * Incremented by online self and children. Used to guarantee that 147 * parents are not offlined before their children. 148 */ 149 atomic_t online_cnt; 150 151 /* percpu_ref killing and RCU release */ 152 struct rcu_head rcu_head; 153 struct work_struct destroy_work; 154 155 /* 156 * PI: the parent css. Placed here for cache proximity to following 157 * fields of the containing structure. 158 */ 159 struct cgroup_subsys_state *parent; 160 }; 161 162 /* 163 * A css_set is a structure holding pointers to a set of 164 * cgroup_subsys_state objects. This saves space in the task struct 165 * object and speeds up fork()/exit(), since a single inc/dec and a 166 * list_add()/del() can bump the reference count on the entire cgroup 167 * set for a task. 168 */ 169 struct css_set { 170 /* 171 * Set of subsystem states, one for each subsystem. This array is 172 * immutable after creation apart from the init_css_set during 173 * subsystem registration (at boot time). 174 */ 175 struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; 176 177 /* reference count */ 178 refcount_t refcount; 179 180 /* 181 * For a domain cgroup, the following points to self. If threaded, 182 * to the matching cset of the nearest domain ancestor. The 183 * dom_cset provides access to the domain cgroup and its csses to 184 * which domain level resource consumptions should be charged. 185 */ 186 struct css_set *dom_cset; 187 188 /* the default cgroup associated with this css_set */ 189 struct cgroup *dfl_cgrp; 190 191 /* internal task count, protected by css_set_lock */ 192 int nr_tasks; 193 194 /* 195 * Lists running through all tasks using this cgroup group. 196 * mg_tasks lists tasks which belong to this cset but are in the 197 * process of being migrated out or in. Protected by 198 * css_set_rwsem, but, during migration, once tasks are moved to 199 * mg_tasks, it can be read safely while holding cgroup_mutex. 200 */ 201 struct list_head tasks; 202 struct list_head mg_tasks; 203 204 /* all css_task_iters currently walking this cset */ 205 struct list_head task_iters; 206 207 /* 208 * On the default hierarhcy, ->subsys[ssid] may point to a css 209 * attached to an ancestor instead of the cgroup this css_set is 210 * associated with. The following node is anchored at 211 * ->subsys[ssid]->cgroup->e_csets[ssid] and provides a way to 212 * iterate through all css's attached to a given cgroup. 213 */ 214 struct list_head e_cset_node[CGROUP_SUBSYS_COUNT]; 215 216 /* all threaded csets whose ->dom_cset points to this cset */ 217 struct list_head threaded_csets; 218 struct list_head threaded_csets_node; 219 220 /* 221 * List running through all cgroup groups in the same hash 222 * slot. Protected by css_set_lock 223 */ 224 struct hlist_node hlist; 225 226 /* 227 * List of cgrp_cset_links pointing at cgroups referenced from this 228 * css_set. Protected by css_set_lock. 229 */ 230 struct list_head cgrp_links; 231 232 /* 233 * List of csets participating in the on-going migration either as 234 * source or destination. Protected by cgroup_mutex. 235 */ 236 struct list_head mg_preload_node; 237 struct list_head mg_node; 238 239 /* 240 * If this cset is acting as the source of migration the following 241 * two fields are set. mg_src_cgrp and mg_dst_cgrp are 242 * respectively the source and destination cgroups of the on-going 243 * migration. mg_dst_cset is the destination cset the target tasks 244 * on this cset should be migrated to. Protected by cgroup_mutex. 245 */ 246 struct cgroup *mg_src_cgrp; 247 struct cgroup *mg_dst_cgrp; 248 struct css_set *mg_dst_cset; 249 250 /* dead and being drained, ignore for migration */ 251 bool dead; 252 253 /* For RCU-protected deletion */ 254 struct rcu_head rcu_head; 255 }; 256 257 struct cgroup { 258 /* self css with NULL ->ss, points back to this cgroup */ 259 struct cgroup_subsys_state self; 260 261 unsigned long flags; /* "unsigned long" so bitops work */ 262 263 /* 264 * idr allocated in-hierarchy ID. 265 * 266 * ID 0 is not used, the ID of the root cgroup is always 1, and a 267 * new cgroup will be assigned with a smallest available ID. 268 * 269 * Allocating/Removing ID must be protected by cgroup_mutex. 270 */ 271 int id; 272 273 /* 274 * The depth this cgroup is at. The root is at depth zero and each 275 * step down the hierarchy increments the level. This along with 276 * ancestor_ids[] can determine whether a given cgroup is a 277 * descendant of another without traversing the hierarchy. 278 */ 279 int level; 280 281 /* Maximum allowed descent tree depth */ 282 int max_depth; 283 284 /* 285 * Keep track of total numbers of visible and dying descent cgroups. 286 * Dying cgroups are cgroups which were deleted by a user, 287 * but are still existing because someone else is holding a reference. 288 * max_descendants is a maximum allowed number of descent cgroups. 289 */ 290 int nr_descendants; 291 int nr_dying_descendants; 292 int max_descendants; 293 294 /* 295 * Each non-empty css_set associated with this cgroup contributes 296 * one to nr_populated_csets. The counter is zero iff this cgroup 297 * doesn't have any tasks. 298 * 299 * All children which have non-zero nr_populated_csets and/or 300 * nr_populated_children of their own contribute one to either 301 * nr_populated_domain_children or nr_populated_threaded_children 302 * depending on their type. Each counter is zero iff all cgroups 303 * of the type in the subtree proper don't have any tasks. 304 */ 305 int nr_populated_csets; 306 int nr_populated_domain_children; 307 int nr_populated_threaded_children; 308 309 int nr_threaded_children; /* # of live threaded child cgroups */ 310 311 struct kernfs_node *kn; /* cgroup kernfs entry */ 312 struct cgroup_file procs_file; /* handle for "cgroup.procs" */ 313 struct cgroup_file events_file; /* handle for "cgroup.events" */ 314 315 /* 316 * The bitmask of subsystems enabled on the child cgroups. 317 * ->subtree_control is the one configured through 318 * "cgroup.subtree_control" while ->child_ss_mask is the effective 319 * one which may have more subsystems enabled. Controller knobs 320 * are made available iff it's enabled in ->subtree_control. 321 */ 322 u16 subtree_control; 323 u16 subtree_ss_mask; 324 u16 old_subtree_control; 325 u16 old_subtree_ss_mask; 326 327 /* Private pointers for each registered subsystem */ 328 struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT]; 329 330 struct cgroup_root *root; 331 332 /* 333 * List of cgrp_cset_links pointing at css_sets with tasks in this 334 * cgroup. Protected by css_set_lock. 335 */ 336 struct list_head cset_links; 337 338 /* 339 * On the default hierarchy, a css_set for a cgroup with some 340 * susbsys disabled will point to css's which are associated with 341 * the closest ancestor which has the subsys enabled. The 342 * following lists all css_sets which point to this cgroup's css 343 * for the given subsystem. 344 */ 345 struct list_head e_csets[CGROUP_SUBSYS_COUNT]; 346 347 /* 348 * If !threaded, self. If threaded, it points to the nearest 349 * domain ancestor. Inside a threaded subtree, cgroups are exempt 350 * from process granularity and no-internal-task constraint. 351 * Domain level resource consumptions which aren't tied to a 352 * specific task are charged to the dom_cgrp. 353 */ 354 struct cgroup *dom_cgrp; 355 356 /* 357 * list of pidlists, up to two for each namespace (one for procs, one 358 * for tasks); created on demand. 359 */ 360 struct list_head pidlists; 361 struct mutex pidlist_mutex; 362 363 /* used to wait for offlining of csses */ 364 wait_queue_head_t offline_waitq; 365 366 /* used to schedule release agent */ 367 struct work_struct release_agent_work; 368 369 /* used to store eBPF programs */ 370 struct cgroup_bpf bpf; 371 372 /* ids of the ancestors at each level including self */ 373 int ancestor_ids[]; 374 }; 375 376 /* 377 * A cgroup_root represents the root of a cgroup hierarchy, and may be 378 * associated with a kernfs_root to form an active hierarchy. This is 379 * internal to cgroup core. Don't access directly from controllers. 380 */ 381 struct cgroup_root { 382 struct kernfs_root *kf_root; 383 384 /* The bitmask of subsystems attached to this hierarchy */ 385 unsigned int subsys_mask; 386 387 /* Unique id for this hierarchy. */ 388 int hierarchy_id; 389 390 /* The root cgroup. Root is destroyed on its release. */ 391 struct cgroup cgrp; 392 393 /* for cgrp->ancestor_ids[0] */ 394 int cgrp_ancestor_id_storage; 395 396 /* Number of cgroups in the hierarchy, used only for /proc/cgroups */ 397 atomic_t nr_cgrps; 398 399 /* A list running through the active hierarchies */ 400 struct list_head root_list; 401 402 /* Hierarchy-specific flags */ 403 unsigned int flags; 404 405 /* IDs for cgroups in this hierarchy */ 406 struct idr cgroup_idr; 407 408 /* The path to use for release notifications. */ 409 char release_agent_path[PATH_MAX]; 410 411 /* The name for this hierarchy - may be empty */ 412 char name[MAX_CGROUP_ROOT_NAMELEN]; 413 }; 414 415 /* 416 * struct cftype: handler definitions for cgroup control files 417 * 418 * When reading/writing to a file: 419 * - the cgroup to use is file->f_path.dentry->d_parent->d_fsdata 420 * - the 'cftype' of the file is file->f_path.dentry->d_fsdata 421 */ 422 struct cftype { 423 /* 424 * By convention, the name should begin with the name of the 425 * subsystem, followed by a period. Zero length string indicates 426 * end of cftype array. 427 */ 428 char name[MAX_CFTYPE_NAME]; 429 unsigned long private; 430 431 /* 432 * The maximum length of string, excluding trailing nul, that can 433 * be passed to write. If < PAGE_SIZE-1, PAGE_SIZE-1 is assumed. 434 */ 435 size_t max_write_len; 436 437 /* CFTYPE_* flags */ 438 unsigned int flags; 439 440 /* 441 * If non-zero, should contain the offset from the start of css to 442 * a struct cgroup_file field. cgroup will record the handle of 443 * the created file into it. The recorded handle can be used as 444 * long as the containing css remains accessible. 445 */ 446 unsigned int file_offset; 447 448 /* 449 * Fields used for internal bookkeeping. Initialized automatically 450 * during registration. 451 */ 452 struct cgroup_subsys *ss; /* NULL for cgroup core files */ 453 struct list_head node; /* anchored at ss->cfts */ 454 struct kernfs_ops *kf_ops; 455 456 int (*open)(struct kernfs_open_file *of); 457 void (*release)(struct kernfs_open_file *of); 458 459 /* 460 * read_u64() is a shortcut for the common case of returning a 461 * single integer. Use it in place of read() 462 */ 463 u64 (*read_u64)(struct cgroup_subsys_state *css, struct cftype *cft); 464 /* 465 * read_s64() is a signed version of read_u64() 466 */ 467 s64 (*read_s64)(struct cgroup_subsys_state *css, struct cftype *cft); 468 469 /* generic seq_file read interface */ 470 int (*seq_show)(struct seq_file *sf, void *v); 471 472 /* optional ops, implement all or none */ 473 void *(*seq_start)(struct seq_file *sf, loff_t *ppos); 474 void *(*seq_next)(struct seq_file *sf, void *v, loff_t *ppos); 475 void (*seq_stop)(struct seq_file *sf, void *v); 476 477 /* 478 * write_u64() is a shortcut for the common case of accepting 479 * a single integer (as parsed by simple_strtoull) from 480 * userspace. Use in place of write(); return 0 or error. 481 */ 482 int (*write_u64)(struct cgroup_subsys_state *css, struct cftype *cft, 483 u64 val); 484 /* 485 * write_s64() is a signed version of write_u64() 486 */ 487 int (*write_s64)(struct cgroup_subsys_state *css, struct cftype *cft, 488 s64 val); 489 490 /* 491 * write() is the generic write callback which maps directly to 492 * kernfs write operation and overrides all other operations. 493 * Maximum write size is determined by ->max_write_len. Use 494 * of_css/cft() to access the associated css and cft. 495 */ 496 ssize_t (*write)(struct kernfs_open_file *of, 497 char *buf, size_t nbytes, loff_t off); 498 499 #ifdef CONFIG_DEBUG_LOCK_ALLOC 500 struct lock_class_key lockdep_key; 501 #endif 502 }; 503 504 /* 505 * Control Group subsystem type. 506 * See Documentation/cgroups/cgroups.txt for details 507 */ 508 struct cgroup_subsys { 509 struct cgroup_subsys_state *(*css_alloc)(struct cgroup_subsys_state *parent_css); 510 int (*css_online)(struct cgroup_subsys_state *css); 511 void (*css_offline)(struct cgroup_subsys_state *css); 512 void (*css_released)(struct cgroup_subsys_state *css); 513 void (*css_free)(struct cgroup_subsys_state *css); 514 void (*css_reset)(struct cgroup_subsys_state *css); 515 516 int (*can_attach)(struct cgroup_taskset *tset); 517 void (*cancel_attach)(struct cgroup_taskset *tset); 518 void (*attach)(struct cgroup_taskset *tset); 519 void (*post_attach)(void); 520 int (*can_fork)(struct task_struct *task); 521 void (*cancel_fork)(struct task_struct *task); 522 void (*fork)(struct task_struct *task); 523 void (*exit)(struct task_struct *task); 524 void (*free)(struct task_struct *task); 525 void (*bind)(struct cgroup_subsys_state *root_css); 526 527 bool early_init:1; 528 529 /* 530 * If %true, the controller, on the default hierarchy, doesn't show 531 * up in "cgroup.controllers" or "cgroup.subtree_control", is 532 * implicitly enabled on all cgroups on the default hierarchy, and 533 * bypasses the "no internal process" constraint. This is for 534 * utility type controllers which is transparent to userland. 535 * 536 * An implicit controller can be stolen from the default hierarchy 537 * anytime and thus must be okay with offline csses from previous 538 * hierarchies coexisting with csses for the current one. 539 */ 540 bool implicit_on_dfl:1; 541 542 /* 543 * If %true, the controller, supports threaded mode on the default 544 * hierarchy. In a threaded subtree, both process granularity and 545 * no-internal-process constraint are ignored and a threaded 546 * controllers should be able to handle that. 547 * 548 * Note that as an implicit controller is automatically enabled on 549 * all cgroups on the default hierarchy, it should also be 550 * threaded. implicit && !threaded is not supported. 551 */ 552 bool threaded:1; 553 554 /* 555 * If %false, this subsystem is properly hierarchical - 556 * configuration, resource accounting and restriction on a parent 557 * cgroup cover those of its children. If %true, hierarchy support 558 * is broken in some ways - some subsystems ignore hierarchy 559 * completely while others are only implemented half-way. 560 * 561 * It's now disallowed to create nested cgroups if the subsystem is 562 * broken and cgroup core will emit a warning message on such 563 * cases. Eventually, all subsystems will be made properly 564 * hierarchical and this will go away. 565 */ 566 bool broken_hierarchy:1; 567 bool warned_broken_hierarchy:1; 568 569 /* the following two fields are initialized automtically during boot */ 570 int id; 571 const char *name; 572 573 /* optional, initialized automatically during boot if not set */ 574 const char *legacy_name; 575 576 /* link to parent, protected by cgroup_lock() */ 577 struct cgroup_root *root; 578 579 /* idr for css->id */ 580 struct idr css_idr; 581 582 /* 583 * List of cftypes. Each entry is the first entry of an array 584 * terminated by zero length name. 585 */ 586 struct list_head cfts; 587 588 /* 589 * Base cftypes which are automatically registered. The two can 590 * point to the same array. 591 */ 592 struct cftype *dfl_cftypes; /* for the default hierarchy */ 593 struct cftype *legacy_cftypes; /* for the legacy hierarchies */ 594 595 /* 596 * A subsystem may depend on other subsystems. When such subsystem 597 * is enabled on a cgroup, the depended-upon subsystems are enabled 598 * together if available. Subsystems enabled due to dependency are 599 * not visible to userland until explicitly enabled. The following 600 * specifies the mask of subsystems that this one depends on. 601 */ 602 unsigned int depends_on; 603 }; 604 605 extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem; 606 607 /** 608 * cgroup_threadgroup_change_begin - threadgroup exclusion for cgroups 609 * @tsk: target task 610 * 611 * Allows cgroup operations to synchronize against threadgroup changes 612 * using a percpu_rw_semaphore. 613 */ 614 static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) 615 { 616 percpu_down_read(&cgroup_threadgroup_rwsem); 617 } 618 619 /** 620 * cgroup_threadgroup_change_end - threadgroup exclusion for cgroups 621 * @tsk: target task 622 * 623 * Counterpart of cgroup_threadcgroup_change_begin(). 624 */ 625 static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) 626 { 627 percpu_up_read(&cgroup_threadgroup_rwsem); 628 } 629 630 #else /* CONFIG_CGROUPS */ 631 632 #define CGROUP_SUBSYS_COUNT 0 633 634 static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) 635 { 636 might_sleep(); 637 } 638 639 static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) {} 640 641 #endif /* CONFIG_CGROUPS */ 642 643 #ifdef CONFIG_SOCK_CGROUP_DATA 644 645 /* 646 * sock_cgroup_data is embedded at sock->sk_cgrp_data and contains 647 * per-socket cgroup information except for memcg association. 648 * 649 * On legacy hierarchies, net_prio and net_cls controllers directly set 650 * attributes on each sock which can then be tested by the network layer. 651 * On the default hierarchy, each sock is associated with the cgroup it was 652 * created in and the networking layer can match the cgroup directly. 653 * 654 * To avoid carrying all three cgroup related fields separately in sock, 655 * sock_cgroup_data overloads (prioidx, classid) and the cgroup pointer. 656 * On boot, sock_cgroup_data records the cgroup that the sock was created 657 * in so that cgroup2 matches can be made; however, once either net_prio or 658 * net_cls starts being used, the area is overriden to carry prioidx and/or 659 * classid. The two modes are distinguished by whether the lowest bit is 660 * set. Clear bit indicates cgroup pointer while set bit prioidx and 661 * classid. 662 * 663 * While userland may start using net_prio or net_cls at any time, once 664 * either is used, cgroup2 matching no longer works. There is no reason to 665 * mix the two and this is in line with how legacy and v2 compatibility is 666 * handled. On mode switch, cgroup references which are already being 667 * pointed to by socks may be leaked. While this can be remedied by adding 668 * synchronization around sock_cgroup_data, given that the number of leaked 669 * cgroups is bound and highly unlikely to be high, this seems to be the 670 * better trade-off. 671 */ 672 struct sock_cgroup_data { 673 union { 674 #ifdef __LITTLE_ENDIAN 675 struct { 676 u8 is_data; 677 u8 padding; 678 u16 prioidx; 679 u32 classid; 680 } __packed; 681 #else 682 struct { 683 u32 classid; 684 u16 prioidx; 685 u8 padding; 686 u8 is_data; 687 } __packed; 688 #endif 689 u64 val; 690 }; 691 }; 692 693 /* 694 * There's a theoretical window where the following accessors race with 695 * updaters and return part of the previous pointer as the prioidx or 696 * classid. Such races are short-lived and the result isn't critical. 697 */ 698 static inline u16 sock_cgroup_prioidx(struct sock_cgroup_data *skcd) 699 { 700 /* fallback to 1 which is always the ID of the root cgroup */ 701 return (skcd->is_data & 1) ? skcd->prioidx : 1; 702 } 703 704 static inline u32 sock_cgroup_classid(struct sock_cgroup_data *skcd) 705 { 706 /* fallback to 0 which is the unconfigured default classid */ 707 return (skcd->is_data & 1) ? skcd->classid : 0; 708 } 709 710 /* 711 * If invoked concurrently, the updaters may clobber each other. The 712 * caller is responsible for synchronization. 713 */ 714 static inline void sock_cgroup_set_prioidx(struct sock_cgroup_data *skcd, 715 u16 prioidx) 716 { 717 struct sock_cgroup_data skcd_buf = {{ .val = READ_ONCE(skcd->val) }}; 718 719 if (sock_cgroup_prioidx(&skcd_buf) == prioidx) 720 return; 721 722 if (!(skcd_buf.is_data & 1)) { 723 skcd_buf.val = 0; 724 skcd_buf.is_data = 1; 725 } 726 727 skcd_buf.prioidx = prioidx; 728 WRITE_ONCE(skcd->val, skcd_buf.val); /* see sock_cgroup_ptr() */ 729 } 730 731 static inline void sock_cgroup_set_classid(struct sock_cgroup_data *skcd, 732 u32 classid) 733 { 734 struct sock_cgroup_data skcd_buf = {{ .val = READ_ONCE(skcd->val) }}; 735 736 if (sock_cgroup_classid(&skcd_buf) == classid) 737 return; 738 739 if (!(skcd_buf.is_data & 1)) { 740 skcd_buf.val = 0; 741 skcd_buf.is_data = 1; 742 } 743 744 skcd_buf.classid = classid; 745 WRITE_ONCE(skcd->val, skcd_buf.val); /* see sock_cgroup_ptr() */ 746 } 747 748 #else /* CONFIG_SOCK_CGROUP_DATA */ 749 750 struct sock_cgroup_data { 751 }; 752 753 #endif /* CONFIG_SOCK_CGROUP_DATA */ 754 755 #endif /* _LINUX_CGROUP_DEFS_H */ 756