1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds * linux/kernel/acct.c
41da177e4SLinus Torvalds *
51da177e4SLinus Torvalds * BSD Process Accounting for Linux
61da177e4SLinus Torvalds *
71da177e4SLinus Torvalds * Author: Marco van Wieringen <[email protected]>
81da177e4SLinus Torvalds *
91da177e4SLinus Torvalds * Some code based on ideas and code from:
101da177e4SLinus Torvalds * Thomas K. Dyas <[email protected]>
111da177e4SLinus Torvalds *
121da177e4SLinus Torvalds * This file implements BSD-style process accounting. Whenever any
131da177e4SLinus Torvalds * process exits, an accounting record of type "struct acct" is
141da177e4SLinus Torvalds * written to the file specified with the acct() system call. It is
151da177e4SLinus Torvalds * up to user-level programs to do useful things with the accounting
161da177e4SLinus Torvalds * log. The kernel just provides the raw accounting information.
171da177e4SLinus Torvalds *
181da177e4SLinus Torvalds * (C) Copyright 1995 - 1997 Marco van Wieringen - ELM Consultancy B.V.
191da177e4SLinus Torvalds *
201da177e4SLinus Torvalds * Plugged two leaks. 1) It didn't return acct_file into the free_filps if
211da177e4SLinus Torvalds * the file happened to be read-only. 2) If the accounting was suspended
221da177e4SLinus Torvalds * due to the lack of space it happily allowed to reopen it and completely
231da177e4SLinus Torvalds * lost the old acct_file. 3/10/98, Al Viro.
241da177e4SLinus Torvalds *
251da177e4SLinus Torvalds * Now we silently close acct_file on attempt to reopen. Cleaned sys_acct().
261da177e4SLinus Torvalds * XTerms and EMACS are manifestations of pure evil. 21/10/98, AV.
271da177e4SLinus Torvalds *
287b7b8a2cSRandy Dunlap * Fixed a nasty interaction with sys_umount(). If the accounting
291da177e4SLinus Torvalds * was suspeneded we failed to stop it on umount(). Messy.
301da177e4SLinus Torvalds * Another one: remount to readonly didn't stop accounting.
311da177e4SLinus Torvalds * Question: what should we do if we have CAP_SYS_ADMIN but not
321da177e4SLinus Torvalds * CAP_SYS_PACCT? Current code does the following: umount returns -EBUSY
331da177e4SLinus Torvalds * unless we are messing with the root. In that case we are getting a
341da177e4SLinus Torvalds * real mess with do_remount_sb(). 9/11/98, AV.
351da177e4SLinus Torvalds *
361da177e4SLinus Torvalds * Fixed a bunch of races (and pair of leaks). Probably not the best way,
371da177e4SLinus Torvalds * but this one obviously doesn't introduce deadlocks. Later. BTW, found
381da177e4SLinus Torvalds * one race (and leak) in BSD implementation.
391da177e4SLinus Torvalds * OK, that's better. ANOTHER race and leak in BSD variant. There always
401da177e4SLinus Torvalds * is one more bug... 10/11/98, AV.
411da177e4SLinus Torvalds *
421da177e4SLinus Torvalds * Oh, fsck... Oopsable SMP race in do_process_acct() - we must hold
43c1e8d7c6SMichel Lespinasse * ->mmap_lock to walk the vma list of current->mm. Nasty, since it leaks
441da177e4SLinus Torvalds * a struct file opened for write. Fixed. 2/6/2000, AV.
451da177e4SLinus Torvalds */
461da177e4SLinus Torvalds
471da177e4SLinus Torvalds #include <linux/mm.h>
481da177e4SLinus Torvalds #include <linux/slab.h>
491da177e4SLinus Torvalds #include <linux/acct.h>
50c59ede7bSRandy.Dunlap #include <linux/capability.h>
511da177e4SLinus Torvalds #include <linux/file.h>
521da177e4SLinus Torvalds #include <linux/tty.h>
531da177e4SLinus Torvalds #include <linux/security.h>
541da177e4SLinus Torvalds #include <linux/vfs.h>
551da177e4SLinus Torvalds #include <linux/jiffies.h>
561da177e4SLinus Torvalds #include <linux/times.h>
571da177e4SLinus Torvalds #include <linux/syscalls.h>
587b7b1aceSAl Viro #include <linux/mount.h>
597153e402SPaul McQuade #include <linux/uaccess.h>
6032ef5517SIngo Molnar #include <linux/sched/cputime.h>
6132ef5517SIngo Molnar
621da177e4SLinus Torvalds #include <asm/div64.h>
635f7b703fSPavel Emelyanov #include <linux/pid_namespace.h>
64efb170c2SAl Viro #include <linux/fs_pin.h>
651da177e4SLinus Torvalds
661da177e4SLinus Torvalds /*
671da177e4SLinus Torvalds * These constants control the amount of freespace that suspend and
681da177e4SLinus Torvalds * resume the process accounting system, and the time delay between
691da177e4SLinus Torvalds * each check.
701da177e4SLinus Torvalds * Turned into sysctl-controllable parameters. AV, 12/11/98
711da177e4SLinus Torvalds */
721da177e4SLinus Torvalds
73801b5014Stangmeng static int acct_parm[3] = {4, 2, 30};
741da177e4SLinus Torvalds #define RESUME (acct_parm[0]) /* >foo% free space - resume */
751da177e4SLinus Torvalds #define SUSPEND (acct_parm[1]) /* <foo% free space - suspend */
761da177e4SLinus Torvalds #define ACCT_TIMEOUT (acct_parm[2]) /* foo second timeout between checks */
771da177e4SLinus Torvalds
78801b5014Stangmeng #ifdef CONFIG_SYSCTL
791751f872SJoel Granados static const struct ctl_table kern_acct_table[] = {
80801b5014Stangmeng {
81801b5014Stangmeng .procname = "acct",
82801b5014Stangmeng .data = &acct_parm,
83801b5014Stangmeng .maxlen = 3*sizeof(int),
84801b5014Stangmeng .mode = 0644,
85801b5014Stangmeng .proc_handler = proc_dointvec,
86801b5014Stangmeng },
87801b5014Stangmeng };
88801b5014Stangmeng
kernel_acct_sysctls_init(void)89801b5014Stangmeng static __init int kernel_acct_sysctls_init(void)
90801b5014Stangmeng {
91801b5014Stangmeng register_sysctl_init("kernel", kern_acct_table);
92801b5014Stangmeng return 0;
93801b5014Stangmeng }
94801b5014Stangmeng late_initcall(kernel_acct_sysctls_init);
95801b5014Stangmeng #endif /* CONFIG_SYSCTL */
96801b5014Stangmeng
971da177e4SLinus Torvalds /*
981da177e4SLinus Torvalds * External references and all of the globals.
991da177e4SLinus Torvalds */
1001da177e4SLinus Torvalds
1011629d0ebSAl Viro struct bsd_acct_struct {
1021629d0ebSAl Viro struct fs_pin pin;
10334cece2eSAl Viro atomic_long_t count;
10434cece2eSAl Viro struct rcu_head rcu;
105b8f00e6bSAl Viro struct mutex lock;
10656d5f3ebSChristian Brauner bool active;
10756d5f3ebSChristian Brauner bool check_space;
10832dc7308SAl Viro unsigned long needcheck;
1091da177e4SLinus Torvalds struct file *file;
1105f7b703fSPavel Emelyanov struct pid_namespace *ns;
11117c0a5aaSAl Viro struct work_struct work;
11217c0a5aaSAl Viro struct completion done;
11356d5f3ebSChristian Brauner acct_t ac;
1141da177e4SLinus Torvalds };
1151da177e4SLinus Torvalds
11656d5f3ebSChristian Brauner static void fill_ac(struct bsd_acct_struct *acct);
11756d5f3ebSChristian Brauner static void acct_write_process(struct bsd_acct_struct *acct);
11859eda0e0SAl Viro
1191da177e4SLinus Torvalds /*
1201da177e4SLinus Torvalds * Check the amount of free space and suspend/resume accordingly.
1211da177e4SLinus Torvalds */
check_free_space(struct bsd_acct_struct * acct)12256d5f3ebSChristian Brauner static bool check_free_space(struct bsd_acct_struct *acct)
1231da177e4SLinus Torvalds {
1241da177e4SLinus Torvalds struct kstatfs sbuf;
1251da177e4SLinus Torvalds
12656d5f3ebSChristian Brauner if (!acct->check_space)
12756d5f3ebSChristian Brauner return acct->active;
1281da177e4SLinus Torvalds
1291da177e4SLinus Torvalds /* May block */
13054a4d58aSAl Viro if (vfs_statfs(&acct->file->f_path, &sbuf))
13156d5f3ebSChristian Brauner return acct->active;
1321da177e4SLinus Torvalds
1336248b1b3SPavel Emelyanov if (acct->active) {
13454a4d58aSAl Viro u64 suspend = sbuf.f_blocks * SUSPEND;
13554a4d58aSAl Viro do_div(suspend, 100);
13654a4d58aSAl Viro if (sbuf.f_bavail <= suspend) {
13756d5f3ebSChristian Brauner acct->active = false;
1382577d92eSIonut Alexa pr_info("Process accounting paused\n");
1391da177e4SLinus Torvalds }
1401da177e4SLinus Torvalds } else {
14154a4d58aSAl Viro u64 resume = sbuf.f_blocks * RESUME;
14254a4d58aSAl Viro do_div(resume, 100);
14354a4d58aSAl Viro if (sbuf.f_bavail >= resume) {
14456d5f3ebSChristian Brauner acct->active = true;
1452577d92eSIonut Alexa pr_info("Process accounting resumed\n");
1461da177e4SLinus Torvalds }
1471da177e4SLinus Torvalds }
1481da177e4SLinus Torvalds
14932dc7308SAl Viro acct->needcheck = jiffies + ACCT_TIMEOUT*HZ;
15054a4d58aSAl Viro return acct->active;
1511da177e4SLinus Torvalds }
1521da177e4SLinus Torvalds
acct_put(struct bsd_acct_struct * p)1539e251d02SAl Viro static void acct_put(struct bsd_acct_struct *p)
1549e251d02SAl Viro {
15534cece2eSAl Viro if (atomic_long_dec_and_test(&p->count))
15634cece2eSAl Viro kfree_rcu(p, rcu);
1579e251d02SAl Viro }
1589e251d02SAl Viro
to_acct(struct fs_pin * p)15959eda0e0SAl Viro static inline struct bsd_acct_struct *to_acct(struct fs_pin *p)
16059eda0e0SAl Viro {
16159eda0e0SAl Viro return p ? container_of(p, struct bsd_acct_struct, pin) : NULL;
16259eda0e0SAl Viro }
16359eda0e0SAl Viro
acct_get(struct pid_namespace * ns)164215752fcSAl Viro static struct bsd_acct_struct *acct_get(struct pid_namespace *ns)
165215752fcSAl Viro {
166215752fcSAl Viro struct bsd_acct_struct *res;
167215752fcSAl Viro again:
1682798d4ceSAl Viro smp_rmb();
1692798d4ceSAl Viro rcu_read_lock();
1706aa7de05SMark Rutland res = to_acct(READ_ONCE(ns->bacct));
1712798d4ceSAl Viro if (!res) {
1722798d4ceSAl Viro rcu_read_unlock();
173215752fcSAl Viro return NULL;
174215752fcSAl Viro }
17534cece2eSAl Viro if (!atomic_long_inc_not_zero(&res->count)) {
176efb170c2SAl Viro rcu_read_unlock();
177efb170c2SAl Viro cpu_relax();
178b8f00e6bSAl Viro goto again;
179efb170c2SAl Viro }
180efb170c2SAl Viro rcu_read_unlock();
181efb170c2SAl Viro mutex_lock(&res->lock);
1826aa7de05SMark Rutland if (res != to_acct(READ_ONCE(ns->bacct))) {
183efb170c2SAl Viro mutex_unlock(&res->lock);
1849e251d02SAl Viro acct_put(res);
185efb170c2SAl Viro goto again;
186efb170c2SAl Viro }
187b8f00e6bSAl Viro return res;
188b8f00e6bSAl Viro }
189b8f00e6bSAl Viro
acct_pin_kill(struct fs_pin * pin)19059eda0e0SAl Viro static void acct_pin_kill(struct fs_pin *pin)
19159eda0e0SAl Viro {
19259eda0e0SAl Viro struct bsd_acct_struct *acct = to_acct(pin);
19359eda0e0SAl Viro mutex_lock(&acct->lock);
19456d5f3ebSChristian Brauner /*
19556d5f3ebSChristian Brauner * Fill the accounting struct with the exiting task's info
19656d5f3ebSChristian Brauner * before punting to the workqueue.
19756d5f3ebSChristian Brauner */
19856d5f3ebSChristian Brauner fill_ac(acct);
19959eda0e0SAl Viro schedule_work(&acct->work);
20059eda0e0SAl Viro wait_for_completion(&acct->done);
20159eda0e0SAl Viro cmpxchg(&acct->ns->bacct, pin, NULL);
20259eda0e0SAl Viro mutex_unlock(&acct->lock);
20359eda0e0SAl Viro pin_remove(pin);
20459eda0e0SAl Viro acct_put(acct);
20559eda0e0SAl Viro }
20659eda0e0SAl Viro
close_work(struct work_struct * work)20717c0a5aaSAl Viro static void close_work(struct work_struct *work)
20817c0a5aaSAl Viro {
20917c0a5aaSAl Viro struct bsd_acct_struct *acct = container_of(work, struct bsd_acct_struct, work);
21017c0a5aaSAl Viro struct file *file = acct->file;
21156d5f3ebSChristian Brauner
21256d5f3ebSChristian Brauner /* We were fired by acct_pin_kill() which holds acct->lock. */
21356d5f3ebSChristian Brauner acct_write_process(acct);
21417c0a5aaSAl Viro if (file->f_op->flush)
21517c0a5aaSAl Viro file->f_op->flush(file, NULL);
21617c0a5aaSAl Viro __fput_sync(file);
21717c0a5aaSAl Viro complete(&acct->done);
21817c0a5aaSAl Viro }
21917c0a5aaSAl Viro
acct_on(struct filename * pathname)220669abf4eSJeff Layton static int acct_on(struct filename *pathname)
2217b7b1aceSAl Viro {
2227b7b1aceSAl Viro struct file *file;
2233064c356SAl Viro struct vfsmount *mnt, *internal;
224b8f00e6bSAl Viro struct pid_namespace *ns = task_active_pid_ns(current);
22559eda0e0SAl Viro struct bsd_acct_struct *acct;
22659eda0e0SAl Viro struct fs_pin *old;
2273064c356SAl Viro int err;
228b8f00e6bSAl Viro
229b8f00e6bSAl Viro acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL);
230b8f00e6bSAl Viro if (!acct)
231b8f00e6bSAl Viro return -ENOMEM;
2327b7b1aceSAl Viro
2337b7b1aceSAl Viro /* Difference from BSD - they don't do O_APPEND */
234669abf4eSJeff Layton file = file_open_name(pathname, O_WRONLY|O_APPEND|O_LARGEFILE, 0);
235b8f00e6bSAl Viro if (IS_ERR(file)) {
236b8f00e6bSAl Viro kfree(acct);
2377b7b1aceSAl Viro return PTR_ERR(file);
238b8f00e6bSAl Viro }
2397b7b1aceSAl Viro
240496ad9aaSAl Viro if (!S_ISREG(file_inode(file)->i_mode)) {
241b8f00e6bSAl Viro kfree(acct);
2427b7b1aceSAl Viro filp_close(file, NULL);
2437b7b1aceSAl Viro return -EACCES;
2447b7b1aceSAl Viro }
2457b7b1aceSAl Viro
246*890ed45bSChristian Brauner /* Exclude kernel kernel internal filesystems. */
247*890ed45bSChristian Brauner if (file_inode(file)->i_sb->s_flags & (SB_NOUSER | SB_KERNMOUNT)) {
248*890ed45bSChristian Brauner kfree(acct);
249*890ed45bSChristian Brauner filp_close(file, NULL);
250*890ed45bSChristian Brauner return -EINVAL;
251*890ed45bSChristian Brauner }
252*890ed45bSChristian Brauner
253*890ed45bSChristian Brauner /* Exclude procfs and sysfs. */
254*890ed45bSChristian Brauner if (file_inode(file)->i_sb->s_iflags & SB_I_USERNS_VISIBLE) {
255*890ed45bSChristian Brauner kfree(acct);
256*890ed45bSChristian Brauner filp_close(file, NULL);
257*890ed45bSChristian Brauner return -EINVAL;
258*890ed45bSChristian Brauner }
259*890ed45bSChristian Brauner
260d0f88f8dSAl Viro if (!(file->f_mode & FMODE_CAN_WRITE)) {
261b8f00e6bSAl Viro kfree(acct);
2627b7b1aceSAl Viro filp_close(file, NULL);
2637b7b1aceSAl Viro return -EIO;
2647b7b1aceSAl Viro }
2653064c356SAl Viro internal = mnt_clone_internal(&file->f_path);
2663064c356SAl Viro if (IS_ERR(internal)) {
2673064c356SAl Viro kfree(acct);
2683064c356SAl Viro filp_close(file, NULL);
2693064c356SAl Viro return PTR_ERR(internal);
2703064c356SAl Viro }
2713e15dcf7SAmir Goldstein err = mnt_get_write_access(internal);
2723064c356SAl Viro if (err) {
2733064c356SAl Viro mntput(internal);
2743064c356SAl Viro kfree(acct);
2753064c356SAl Viro filp_close(file, NULL);
2763064c356SAl Viro return err;
2773064c356SAl Viro }
2783064c356SAl Viro mnt = file->f_path.mnt;
2793064c356SAl Viro file->f_path.mnt = internal;
2807b7b1aceSAl Viro
28134cece2eSAl Viro atomic_long_set(&acct->count, 1);
28259eda0e0SAl Viro init_fs_pin(&acct->pin, acct_pin_kill);
283b8f00e6bSAl Viro acct->file = file;
284b8f00e6bSAl Viro acct->needcheck = jiffies;
285b8f00e6bSAl Viro acct->ns = ns;
286b8f00e6bSAl Viro mutex_init(&acct->lock);
28759eda0e0SAl Viro INIT_WORK(&acct->work, close_work);
28859eda0e0SAl Viro init_completion(&acct->done);
289efb170c2SAl Viro mutex_lock_nested(&acct->lock, 1); /* nobody has seen it yet */
290efb170c2SAl Viro pin_insert(&acct->pin, mnt);
291b8f00e6bSAl Viro
29259eda0e0SAl Viro rcu_read_lock();
29359eda0e0SAl Viro old = xchg(&ns->bacct, &acct->pin);
2942798d4ceSAl Viro mutex_unlock(&acct->lock);
29559eda0e0SAl Viro pin_kill(old);
2963e15dcf7SAmir Goldstein mnt_put_write_access(mnt);
2973064c356SAl Viro mntput(mnt);
2987b7b1aceSAl Viro return 0;
2997b7b1aceSAl Viro }
3007b7b1aceSAl Viro
3019df7fa16SAl Viro static DEFINE_MUTEX(acct_on_mutex);
3029df7fa16SAl Viro
303417ef531SRandy Dunlap /**
304417ef531SRandy Dunlap * sys_acct - enable/disable process accounting
305417ef531SRandy Dunlap * @name: file name for accounting records or NULL to shutdown accounting
306417ef531SRandy Dunlap *
3071da177e4SLinus Torvalds * sys_acct() is the only system call needed to implement process
3081da177e4SLinus Torvalds * accounting. It takes the name of the file where accounting records
3091da177e4SLinus Torvalds * should be written. If the filename is NULL, accounting will be
3101da177e4SLinus Torvalds * shutdown.
311b7621ebfSRandy Dunlap *
312b7621ebfSRandy Dunlap * Returns: 0 for success or negative errno values for failure.
3131da177e4SLinus Torvalds */
SYSCALL_DEFINE1(acct,const char __user *,name)314b290ebe2SHeiko Carstens SYSCALL_DEFINE1(acct, const char __user *, name)
3151da177e4SLinus Torvalds {
31605b90496SEric Paris int error = 0;
3171da177e4SLinus Torvalds
3181da177e4SLinus Torvalds if (!capable(CAP_SYS_PACCT))
3191da177e4SLinus Torvalds return -EPERM;
3201da177e4SLinus Torvalds
3211da177e4SLinus Torvalds if (name) {
32291a27b2aSJeff Layton struct filename *tmp = getname(name);
3232577d92eSIonut Alexa
3247b7b1aceSAl Viro if (IS_ERR(tmp))
32546c0a8caSPaul McQuade return PTR_ERR(tmp);
3269df7fa16SAl Viro mutex_lock(&acct_on_mutex);
327669abf4eSJeff Layton error = acct_on(tmp);
3289df7fa16SAl Viro mutex_unlock(&acct_on_mutex);
3291da177e4SLinus Torvalds putname(tmp);
3307b7b1aceSAl Viro } else {
33159eda0e0SAl Viro rcu_read_lock();
33259eda0e0SAl Viro pin_kill(task_active_pid_ns(current)->bacct);
3331da177e4SLinus Torvalds }
33405b90496SEric Paris
3351da177e4SLinus Torvalds return error;
3361da177e4SLinus Torvalds }
3371da177e4SLinus Torvalds
acct_exit_ns(struct pid_namespace * ns)3380b6b030fSPavel Emelyanov void acct_exit_ns(struct pid_namespace *ns)
3390b6b030fSPavel Emelyanov {
34059eda0e0SAl Viro rcu_read_lock();
34159eda0e0SAl Viro pin_kill(ns->bacct);
3421da177e4SLinus Torvalds }
3431da177e4SLinus Torvalds
3441da177e4SLinus Torvalds /*
345457139f1SZheng Yejian * encode an u64 into a comp_t
3461da177e4SLinus Torvalds *
3471da177e4SLinus Torvalds * This routine has been adopted from the encode_comp_t() function in
3481da177e4SLinus Torvalds * the kern_acct.c file of the FreeBSD operating system. The encoding
3491da177e4SLinus Torvalds * is a 13-bit fraction with a 3-bit (base 8) exponent.
3501da177e4SLinus Torvalds */
3511da177e4SLinus Torvalds
3521da177e4SLinus Torvalds #define MANTSIZE 13 /* 13 bit mantissa. */
3531da177e4SLinus Torvalds #define EXPSIZE 3 /* Base 8 (3 bit) exponent. */
3541da177e4SLinus Torvalds #define MAXFRACT ((1 << MANTSIZE) - 1) /* Maximum fractional value. */
3551da177e4SLinus Torvalds
encode_comp_t(u64 value)356457139f1SZheng Yejian static comp_t encode_comp_t(u64 value)
3571da177e4SLinus Torvalds {
3581da177e4SLinus Torvalds int exp, rnd;
3591da177e4SLinus Torvalds
3601da177e4SLinus Torvalds exp = rnd = 0;
3611da177e4SLinus Torvalds while (value > MAXFRACT) {
3621da177e4SLinus Torvalds rnd = value & (1 << (EXPSIZE - 1)); /* Round up? */
3631da177e4SLinus Torvalds value >>= EXPSIZE; /* Base 8 exponent == 3 bit shift. */
3641da177e4SLinus Torvalds exp++;
3651da177e4SLinus Torvalds }
3661da177e4SLinus Torvalds
3671da177e4SLinus Torvalds /*
3681da177e4SLinus Torvalds * If we need to round up, do it (and handle overflow correctly).
3691da177e4SLinus Torvalds */
3701da177e4SLinus Torvalds if (rnd && (++value > MAXFRACT)) {
3711da177e4SLinus Torvalds value >>= EXPSIZE;
3721da177e4SLinus Torvalds exp++;
3731da177e4SLinus Torvalds }
3741da177e4SLinus Torvalds
375c5f31c65SZheng Yejian if (exp > (((comp_t) ~0U) >> MANTSIZE))
376c5f31c65SZheng Yejian return (comp_t) ~0U;
3771da177e4SLinus Torvalds /*
3781da177e4SLinus Torvalds * Clean it up and polish it off.
3791da177e4SLinus Torvalds */
3801da177e4SLinus Torvalds exp <<= MANTSIZE; /* Shift the exponent into place */
3811da177e4SLinus Torvalds exp += value; /* and add on the mantissa. */
3821da177e4SLinus Torvalds return exp;
3831da177e4SLinus Torvalds }
3841da177e4SLinus Torvalds
3851da177e4SLinus Torvalds #if ACCT_VERSION == 1 || ACCT_VERSION == 2
3861da177e4SLinus Torvalds /*
3871da177e4SLinus Torvalds * encode an u64 into a comp2_t (24 bits)
3881da177e4SLinus Torvalds *
3891da177e4SLinus Torvalds * Format: 5 bit base 2 exponent, 20 bits mantissa.
3901da177e4SLinus Torvalds * The leading bit of the mantissa is not stored, but implied for
3911da177e4SLinus Torvalds * non-zero exponents.
3921da177e4SLinus Torvalds * Largest encodable value is 50 bits.
3931da177e4SLinus Torvalds */
3941da177e4SLinus Torvalds
3951da177e4SLinus Torvalds #define MANTSIZE2 20 /* 20 bit mantissa. */
3961da177e4SLinus Torvalds #define EXPSIZE2 5 /* 5 bit base 2 exponent. */
3971da177e4SLinus Torvalds #define MAXFRACT2 ((1ul << MANTSIZE2) - 1) /* Maximum fractional value. */
3981da177e4SLinus Torvalds #define MAXEXP2 ((1 << EXPSIZE2) - 1) /* Maximum exponent. */
3991da177e4SLinus Torvalds
encode_comp2_t(u64 value)4001da177e4SLinus Torvalds static comp2_t encode_comp2_t(u64 value)
4011da177e4SLinus Torvalds {
4021da177e4SLinus Torvalds int exp, rnd;
4031da177e4SLinus Torvalds
4041da177e4SLinus Torvalds exp = (value > (MAXFRACT2>>1));
4051da177e4SLinus Torvalds rnd = 0;
4061da177e4SLinus Torvalds while (value > MAXFRACT2) {
4071da177e4SLinus Torvalds rnd = value & 1;
4081da177e4SLinus Torvalds value >>= 1;
4091da177e4SLinus Torvalds exp++;
4101da177e4SLinus Torvalds }
4111da177e4SLinus Torvalds
4121da177e4SLinus Torvalds /*
4131da177e4SLinus Torvalds * If we need to round up, do it (and handle overflow correctly).
4141da177e4SLinus Torvalds */
4151da177e4SLinus Torvalds if (rnd && (++value > MAXFRACT2)) {
4161da177e4SLinus Torvalds value >>= 1;
4171da177e4SLinus Torvalds exp++;
4181da177e4SLinus Torvalds }
4191da177e4SLinus Torvalds
4201da177e4SLinus Torvalds if (exp > MAXEXP2) {
4211da177e4SLinus Torvalds /* Overflow. Return largest representable number instead. */
4221da177e4SLinus Torvalds return (1ul << (MANTSIZE2+EXPSIZE2-1)) - 1;
4231da177e4SLinus Torvalds } else {
4241da177e4SLinus Torvalds return (value & (MAXFRACT2>>1)) | (exp << (MANTSIZE2-1));
4251da177e4SLinus Torvalds }
4261da177e4SLinus Torvalds }
42735189b8fSHui Su #elif ACCT_VERSION == 3
4281da177e4SLinus Torvalds /*
4291da177e4SLinus Torvalds * encode an u64 into a 32 bit IEEE float
4301da177e4SLinus Torvalds */
encode_float(u64 value)4311da177e4SLinus Torvalds static u32 encode_float(u64 value)
4321da177e4SLinus Torvalds {
4331da177e4SLinus Torvalds unsigned exp = 190;
4341da177e4SLinus Torvalds unsigned u;
4351da177e4SLinus Torvalds
4362577d92eSIonut Alexa if (value == 0)
4372577d92eSIonut Alexa return 0;
4381da177e4SLinus Torvalds while ((s64)value > 0) {
4391da177e4SLinus Torvalds value <<= 1;
4401da177e4SLinus Torvalds exp--;
4411da177e4SLinus Torvalds }
4421da177e4SLinus Torvalds u = (u32)(value >> 40) & 0x7fffffu;
4431da177e4SLinus Torvalds return u | (exp << 23);
4441da177e4SLinus Torvalds }
4451da177e4SLinus Torvalds #endif
4461da177e4SLinus Torvalds
4471da177e4SLinus Torvalds /*
4481da177e4SLinus Torvalds * Write an accounting entry for an exiting process
4491da177e4SLinus Torvalds *
4501da177e4SLinus Torvalds * The acct_process() call is the workhorse of the process
4511da177e4SLinus Torvalds * accounting system. The struct acct is built here and then written
4521da177e4SLinus Torvalds * into the accounting file. This function should only be called from
453bcbe4a07SIngo Molnar * do_exit() or when switching to a different output file.
4541da177e4SLinus Torvalds */
4551da177e4SLinus Torvalds
fill_ac(struct bsd_acct_struct * acct)45656d5f3ebSChristian Brauner static void fill_ac(struct bsd_acct_struct *acct)
457cdd37e23SAl Viro {
458cdd37e23SAl Viro struct pacct_struct *pacct = ¤t->signal->pacct;
45956d5f3ebSChristian Brauner struct file *file = acct->file;
46056d5f3ebSChristian Brauner acct_t *ac = &acct->ac;
461cdd37e23SAl Viro u64 elapsed, run_time;
4622d602bf2SArnd Bergmann time64_t btime;
463cdd37e23SAl Viro struct tty_struct *tty;
464cdd37e23SAl Viro
46556d5f3ebSChristian Brauner lockdep_assert_held(&acct->lock);
46656d5f3ebSChristian Brauner
46756d5f3ebSChristian Brauner if (time_is_after_jiffies(acct->needcheck)) {
46856d5f3ebSChristian Brauner acct->check_space = false;
46956d5f3ebSChristian Brauner
47056d5f3ebSChristian Brauner /* Don't fill in @ac if nothing will be written. */
47156d5f3ebSChristian Brauner if (!acct->active)
47256d5f3ebSChristian Brauner return;
47356d5f3ebSChristian Brauner } else {
47456d5f3ebSChristian Brauner acct->check_space = true;
47556d5f3ebSChristian Brauner }
47656d5f3ebSChristian Brauner
477cdd37e23SAl Viro /*
478cdd37e23SAl Viro * Fill the accounting struct with the needed info as recorded
479cdd37e23SAl Viro * by the different kernel functions.
480cdd37e23SAl Viro */
481cdd37e23SAl Viro memset(ac, 0, sizeof(acct_t));
482cdd37e23SAl Viro
483cdd37e23SAl Viro ac->ac_version = ACCT_VERSION | ACCT_BYTEORDER;
4844264be50SAzeem Shaikh strscpy(ac->ac_comm, current->comm, sizeof(ac->ac_comm));
485cdd37e23SAl Viro
486cdd37e23SAl Viro /* calculate run_time in nsec*/
487cdd37e23SAl Viro run_time = ktime_get_ns();
488cdd37e23SAl Viro run_time -= current->group_leader->start_time;
489cdd37e23SAl Viro /* convert nsec -> AHZ */
490cdd37e23SAl Viro elapsed = nsec_to_AHZ(run_time);
491cdd37e23SAl Viro #if ACCT_VERSION == 3
492cdd37e23SAl Viro ac->ac_etime = encode_float(elapsed);
493cdd37e23SAl Viro #else
494cdd37e23SAl Viro ac->ac_etime = encode_comp_t(elapsed < (unsigned long) -1l ?
495cdd37e23SAl Viro (unsigned long) elapsed : (unsigned long) -1l);
496cdd37e23SAl Viro #endif
497cdd37e23SAl Viro #if ACCT_VERSION == 1 || ACCT_VERSION == 2
498cdd37e23SAl Viro {
499cdd37e23SAl Viro /* new enlarged etime field */
500cdd37e23SAl Viro comp2_t etime = encode_comp2_t(elapsed);
5012577d92eSIonut Alexa
502cdd37e23SAl Viro ac->ac_etime_hi = etime >> 16;
503cdd37e23SAl Viro ac->ac_etime_lo = (u16) etime;
504cdd37e23SAl Viro }
505cdd37e23SAl Viro #endif
506cdd37e23SAl Viro do_div(elapsed, AHZ);
5072d602bf2SArnd Bergmann btime = ktime_get_real_seconds() - elapsed;
5082d602bf2SArnd Bergmann ac->ac_btime = clamp_t(time64_t, btime, 0, U32_MAX);
509cdd37e23SAl Viro #if ACCT_VERSION == 2
510cdd37e23SAl Viro ac->ac_ahz = AHZ;
511cdd37e23SAl Viro #endif
512cdd37e23SAl Viro
513cdd37e23SAl Viro spin_lock_irq(¤t->sighand->siglock);
514cdd37e23SAl Viro tty = current->signal->tty; /* Safe as we hold the siglock */
515cdd37e23SAl Viro ac->ac_tty = tty ? old_encode_dev(tty_devnum(tty)) : 0;
516d4bc42afSFrederic Weisbecker ac->ac_utime = encode_comp_t(nsec_to_AHZ(pacct->ac_utime));
517d4bc42afSFrederic Weisbecker ac->ac_stime = encode_comp_t(nsec_to_AHZ(pacct->ac_stime));
518cdd37e23SAl Viro ac->ac_flag = pacct->ac_flag;
519cdd37e23SAl Viro ac->ac_mem = encode_comp_t(pacct->ac_mem);
520cdd37e23SAl Viro ac->ac_minflt = encode_comp_t(pacct->ac_minflt);
521cdd37e23SAl Viro ac->ac_majflt = encode_comp_t(pacct->ac_majflt);
522cdd37e23SAl Viro ac->ac_exitcode = pacct->ac_exitcode;
523cdd37e23SAl Viro spin_unlock_irq(¤t->sighand->siglock);
524d8e180dcSMichal Schmidt
5251da177e4SLinus Torvalds /* we really need to bite the bullet and change layout */
52656d5f3ebSChristian Brauner ac->ac_uid = from_kuid_munged(file->f_cred->user_ns, current_uid());
52756d5f3ebSChristian Brauner ac->ac_gid = from_kgid_munged(file->f_cred->user_ns, current_gid());
5281da177e4SLinus Torvalds #if ACCT_VERSION == 1 || ACCT_VERSION == 2
5291da177e4SLinus Torvalds /* backward-compatible 16 bit fields */
53056d5f3ebSChristian Brauner ac->ac_uid16 = ac->ac_uid;
53156d5f3ebSChristian Brauner ac->ac_gid16 = ac->ac_gid;
53235189b8fSHui Su #elif ACCT_VERSION == 3
533067b722fSYing Xue {
534067b722fSYing Xue struct pid_namespace *ns = acct->ns;
535067b722fSYing Xue
53656d5f3ebSChristian Brauner ac->ac_pid = task_tgid_nr_ns(current, ns);
537a846a195SPavel Emelyanov rcu_read_lock();
53856d5f3ebSChristian Brauner ac->ac_ppid = task_tgid_nr_ns(rcu_dereference(current->real_parent), ns);
539a846a195SPavel Emelyanov rcu_read_unlock();
540067b722fSYing Xue }
5411da177e4SLinus Torvalds #endif
54256d5f3ebSChristian Brauner }
54356d5f3ebSChristian Brauner
acct_write_process(struct bsd_acct_struct * acct)54456d5f3ebSChristian Brauner static void acct_write_process(struct bsd_acct_struct *acct)
54556d5f3ebSChristian Brauner {
54656d5f3ebSChristian Brauner struct file *file = acct->file;
54756d5f3ebSChristian Brauner const struct cred *cred;
54856d5f3ebSChristian Brauner acct_t *ac = &acct->ac;
54956d5f3ebSChristian Brauner
55056d5f3ebSChristian Brauner /* Perform file operations on behalf of whoever enabled accounting */
55156d5f3ebSChristian Brauner cred = override_creds(file->f_cred);
55256d5f3ebSChristian Brauner
5531da177e4SLinus Torvalds /*
55456d5f3ebSChristian Brauner * First check to see if there is enough free_space to continue
55556d5f3ebSChristian Brauner * the process accounting system. Then get freeze protection. If
55656d5f3ebSChristian Brauner * the fs is frozen, just skip the write as we could deadlock
55756d5f3ebSChristian Brauner * the system otherwise.
5585ae98f15SJan Kara */
55956d5f3ebSChristian Brauner if (check_free_space(acct) && file_start_write_trylock(file)) {
560ed44724bSAl Viro /* it's been opened O_APPEND, so position is irrelevant */
561ed44724bSAl Viro loff_t pos = 0;
56256d5f3ebSChristian Brauner __kernel_write(file, ac, sizeof(acct_t), &pos);
56303d95eb2SAl Viro file_end_write(file);
564ed44724bSAl Viro }
56556d5f3ebSChristian Brauner
56656d5f3ebSChristian Brauner revert_creds(cred);
56756d5f3ebSChristian Brauner }
56856d5f3ebSChristian Brauner
do_acct_process(struct bsd_acct_struct * acct)56956d5f3ebSChristian Brauner static void do_acct_process(struct bsd_acct_struct *acct)
57056d5f3ebSChristian Brauner {
57156d5f3ebSChristian Brauner unsigned long flim;
57256d5f3ebSChristian Brauner
57356d5f3ebSChristian Brauner /* Accounting records are not subject to resource limits. */
57456d5f3ebSChristian Brauner flim = rlimit(RLIMIT_FSIZE);
57556d5f3ebSChristian Brauner current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
57656d5f3ebSChristian Brauner fill_ac(acct);
57756d5f3ebSChristian Brauner acct_write_process(acct);
578ed44724bSAl Viro current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim;
5791da177e4SLinus Torvalds }
5801da177e4SLinus Torvalds
581417ef531SRandy Dunlap /**
5820e464814SKaiGai Kohei * acct_collect - collect accounting information into pacct_struct
583f6ec29a4SKaiGai Kohei * @exitcode: task exit code
584f6ec29a4SKaiGai Kohei * @group_dead: not 0, if this thread is the last one in the process.
5850e464814SKaiGai Kohei */
acct_collect(long exitcode,int group_dead)586f6ec29a4SKaiGai Kohei void acct_collect(long exitcode, int group_dead)
5870e464814SKaiGai Kohei {
5880e464814SKaiGai Kohei struct pacct_struct *pacct = ¤t->signal->pacct;
589d4bc42afSFrederic Weisbecker u64 utime, stime;
5900e464814SKaiGai Kohei unsigned long vsize = 0;
5910e464814SKaiGai Kohei
592f6ec29a4SKaiGai Kohei if (group_dead && current->mm) {
593160c8200SMatthew Wilcox (Oracle) struct mm_struct *mm = current->mm;
594160c8200SMatthew Wilcox (Oracle) VMA_ITERATOR(vmi, mm, 0);
5950e464814SKaiGai Kohei struct vm_area_struct *vma;
5962577d92eSIonut Alexa
597160c8200SMatthew Wilcox (Oracle) mmap_read_lock(mm);
598160c8200SMatthew Wilcox (Oracle) for_each_vma(vmi, vma)
5990e464814SKaiGai Kohei vsize += vma->vm_end - vma->vm_start;
600160c8200SMatthew Wilcox (Oracle) mmap_read_unlock(mm);
6010e464814SKaiGai Kohei }
6020e464814SKaiGai Kohei
60377787bfbSKaiGai Kohei spin_lock_irq(¤t->sighand->siglock);
604f6ec29a4SKaiGai Kohei if (group_dead)
6050e464814SKaiGai Kohei pacct->ac_mem = vsize / 1024;
606f6ec29a4SKaiGai Kohei if (thread_group_leader(current)) {
607f6ec29a4SKaiGai Kohei pacct->ac_exitcode = exitcode;
608f6ec29a4SKaiGai Kohei if (current->flags & PF_FORKNOEXEC)
609f6ec29a4SKaiGai Kohei pacct->ac_flag |= AFORK;
610f6ec29a4SKaiGai Kohei }
611f6ec29a4SKaiGai Kohei if (current->flags & PF_SUPERPRIV)
612f6ec29a4SKaiGai Kohei pacct->ac_flag |= ASU;
613f6ec29a4SKaiGai Kohei if (current->flags & PF_DUMPCORE)
614f6ec29a4SKaiGai Kohei pacct->ac_flag |= ACORE;
615f6ec29a4SKaiGai Kohei if (current->flags & PF_SIGNALED)
616f6ec29a4SKaiGai Kohei pacct->ac_flag |= AXSIG;
617d4bc42afSFrederic Weisbecker
618d4bc42afSFrederic Weisbecker task_cputime(current, &utime, &stime);
6196fac4829SFrederic Weisbecker pacct->ac_utime += utime;
6206fac4829SFrederic Weisbecker pacct->ac_stime += stime;
62177787bfbSKaiGai Kohei pacct->ac_minflt += current->min_flt;
62277787bfbSKaiGai Kohei pacct->ac_majflt += current->maj_flt;
62377787bfbSKaiGai Kohei spin_unlock_irq(¤t->sighand->siglock);
6240e464814SKaiGai Kohei }
6250e464814SKaiGai Kohei
slow_acct_process(struct pid_namespace * ns)626e25ff11fSAl Viro static void slow_acct_process(struct pid_namespace *ns)
6271da177e4SLinus Torvalds {
628e25ff11fSAl Viro for ( ; ns; ns = ns->parent) {
629215752fcSAl Viro struct bsd_acct_struct *acct = acct_get(ns);
630b8f00e6bSAl Viro if (acct) {
631b8f00e6bSAl Viro do_acct_process(acct);
632b8f00e6bSAl Viro mutex_unlock(&acct->lock);
6339e251d02SAl Viro acct_put(acct);
6341da177e4SLinus Torvalds }
6351da177e4SLinus Torvalds }
636e25ff11fSAl Viro }
6377d1e1350SPavel Emelyanov
6387d1e1350SPavel Emelyanov /**
639b7621ebfSRandy Dunlap * acct_process - handles process accounting for an exiting task
6407d1e1350SPavel Emelyanov */
acct_process(void)6417d1e1350SPavel Emelyanov void acct_process(void)
6427d1e1350SPavel Emelyanov {
6437d1e1350SPavel Emelyanov struct pid_namespace *ns;
6447d1e1350SPavel Emelyanov
6450c18d7a5SPavel Emelyanov /*
6460c18d7a5SPavel Emelyanov * This loop is safe lockless, since current is still
6470c18d7a5SPavel Emelyanov * alive and holds its namespace, which in turn holds
6480c18d7a5SPavel Emelyanov * its parent.
6490c18d7a5SPavel Emelyanov */
650e25ff11fSAl Viro for (ns = task_active_pid_ns(current); ns != NULL; ns = ns->parent) {
651b8f00e6bSAl Viro if (ns->bacct)
652e25ff11fSAl Viro break;
653e25ff11fSAl Viro }
654e25ff11fSAl Viro if (unlikely(ns))
655e25ff11fSAl Viro slow_acct_process(ns);
6567d1e1350SPavel Emelyanov }
657