xref: /openbmc/linux/kernel/acct.c (revision d37cf9b63113f13d742713881ce691fc615d8b3b)
1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds  *  linux/kernel/acct.c
41da177e4SLinus Torvalds  *
51da177e4SLinus Torvalds  *  BSD Process Accounting for Linux
61da177e4SLinus Torvalds  *
71da177e4SLinus Torvalds  *  Author: Marco van Wieringen <mvw@planets.elm.net>
81da177e4SLinus Torvalds  *
91da177e4SLinus Torvalds  *  Some code based on ideas and code from:
101da177e4SLinus Torvalds  *  Thomas K. Dyas <tdyas@eden.rutgers.edu>
111da177e4SLinus Torvalds  *
121da177e4SLinus Torvalds  *  This file implements BSD-style process accounting. Whenever any
131da177e4SLinus Torvalds  *  process exits, an accounting record of type "struct acct" is
141da177e4SLinus Torvalds  *  written to the file specified with the acct() system call. It is
151da177e4SLinus Torvalds  *  up to user-level programs to do useful things with the accounting
161da177e4SLinus Torvalds  *  log. The kernel just provides the raw accounting information.
171da177e4SLinus Torvalds  *
181da177e4SLinus Torvalds  * (C) Copyright 1995 - 1997 Marco van Wieringen - ELM Consultancy B.V.
191da177e4SLinus Torvalds  *
201da177e4SLinus Torvalds  *  Plugged two leaks. 1) It didn't return acct_file into the free_filps if
211da177e4SLinus Torvalds  *  the file happened to be read-only. 2) If the accounting was suspended
221da177e4SLinus Torvalds  *  due to the lack of space it happily allowed to reopen it and completely
231da177e4SLinus Torvalds  *  lost the old acct_file. 3/10/98, Al Viro.
241da177e4SLinus Torvalds  *
251da177e4SLinus Torvalds  *  Now we silently close acct_file on attempt to reopen. Cleaned sys_acct().
261da177e4SLinus Torvalds  *  XTerms and EMACS are manifestations of pure evil. 21/10/98, AV.
271da177e4SLinus Torvalds  *
287b7b8a2cSRandy Dunlap  *  Fixed a nasty interaction with sys_umount(). If the accounting
291da177e4SLinus Torvalds  *  was suspeneded we failed to stop it on umount(). Messy.
301da177e4SLinus Torvalds  *  Another one: remount to readonly didn't stop accounting.
311da177e4SLinus Torvalds  *	Question: what should we do if we have CAP_SYS_ADMIN but not
321da177e4SLinus Torvalds  *  CAP_SYS_PACCT? Current code does the following: umount returns -EBUSY
331da177e4SLinus Torvalds  *  unless we are messing with the root. In that case we are getting a
341da177e4SLinus Torvalds  *  real mess with do_remount_sb(). 9/11/98, AV.
351da177e4SLinus Torvalds  *
361da177e4SLinus Torvalds  *  Fixed a bunch of races (and pair of leaks). Probably not the best way,
371da177e4SLinus Torvalds  *  but this one obviously doesn't introduce deadlocks. Later. BTW, found
381da177e4SLinus Torvalds  *  one race (and leak) in BSD implementation.
391da177e4SLinus Torvalds  *  OK, that's better. ANOTHER race and leak in BSD variant. There always
401da177e4SLinus Torvalds  *  is one more bug... 10/11/98, AV.
411da177e4SLinus Torvalds  *
421da177e4SLinus Torvalds  *	Oh, fsck... Oopsable SMP race in do_process_acct() - we must hold
43c1e8d7c6SMichel Lespinasse  * ->mmap_lock to walk the vma list of current->mm. Nasty, since it leaks
441da177e4SLinus Torvalds  * a struct file opened for write. Fixed. 2/6/2000, AV.
451da177e4SLinus Torvalds  */
461da177e4SLinus Torvalds 
471da177e4SLinus Torvalds #include <linux/mm.h>
481da177e4SLinus Torvalds #include <linux/slab.h>
491da177e4SLinus Torvalds #include <linux/acct.h>
50c59ede7bSRandy.Dunlap #include <linux/capability.h>
511da177e4SLinus Torvalds #include <linux/file.h>
521da177e4SLinus Torvalds #include <linux/tty.h>
531da177e4SLinus Torvalds #include <linux/security.h>
541da177e4SLinus Torvalds #include <linux/vfs.h>
551da177e4SLinus Torvalds #include <linux/jiffies.h>
561da177e4SLinus Torvalds #include <linux/times.h>
571da177e4SLinus Torvalds #include <linux/syscalls.h>
587b7b1aceSAl Viro #include <linux/mount.h>
597153e402SPaul McQuade #include <linux/uaccess.h>
6032ef5517SIngo Molnar #include <linux/sched/cputime.h>
6132ef5517SIngo Molnar 
621da177e4SLinus Torvalds #include <asm/div64.h>
635f7b703fSPavel Emelyanov #include <linux/pid_namespace.h>
64efb170c2SAl Viro #include <linux/fs_pin.h>
651da177e4SLinus Torvalds 
661da177e4SLinus Torvalds /*
671da177e4SLinus Torvalds  * These constants control the amount of freespace that suspend and
681da177e4SLinus Torvalds  * resume the process accounting system, and the time delay between
691da177e4SLinus Torvalds  * each check.
701da177e4SLinus Torvalds  * Turned into sysctl-controllable parameters. AV, 12/11/98
711da177e4SLinus Torvalds  */
721da177e4SLinus Torvalds 
73801b5014Stangmeng static int acct_parm[3] = {4, 2, 30};
741da177e4SLinus Torvalds #define RESUME		(acct_parm[0])	/* >foo% free space - resume */
751da177e4SLinus Torvalds #define SUSPEND		(acct_parm[1])	/* <foo% free space - suspend */
761da177e4SLinus Torvalds #define ACCT_TIMEOUT	(acct_parm[2])	/* foo second timeout between checks */
771da177e4SLinus Torvalds 
78801b5014Stangmeng #ifdef CONFIG_SYSCTL
79801b5014Stangmeng static struct ctl_table kern_acct_table[] = {
80801b5014Stangmeng 	{
81801b5014Stangmeng 		.procname       = "acct",
82801b5014Stangmeng 		.data           = &acct_parm,
83801b5014Stangmeng 		.maxlen         = 3*sizeof(int),
84801b5014Stangmeng 		.mode           = 0644,
85801b5014Stangmeng 		.proc_handler   = proc_dointvec,
86801b5014Stangmeng 	},
87801b5014Stangmeng 	{ }
88801b5014Stangmeng };
89801b5014Stangmeng 
kernel_acct_sysctls_init(void)90801b5014Stangmeng static __init int kernel_acct_sysctls_init(void)
91801b5014Stangmeng {
92801b5014Stangmeng 	register_sysctl_init("kernel", kern_acct_table);
93801b5014Stangmeng 	return 0;
94801b5014Stangmeng }
95801b5014Stangmeng late_initcall(kernel_acct_sysctls_init);
96801b5014Stangmeng #endif /* CONFIG_SYSCTL */
97801b5014Stangmeng 
981da177e4SLinus Torvalds /*
991da177e4SLinus Torvalds  * External references and all of the globals.
1001da177e4SLinus Torvalds  */
1011da177e4SLinus Torvalds 
1021629d0ebSAl Viro struct bsd_acct_struct {
1031629d0ebSAl Viro 	struct fs_pin		pin;
10434cece2eSAl Viro 	atomic_long_t		count;
10534cece2eSAl Viro 	struct rcu_head		rcu;
106b8f00e6bSAl Viro 	struct mutex		lock;
1075c928e14SChristian Brauner 	bool			active;
1085c928e14SChristian Brauner 	bool			check_space;
10932dc7308SAl Viro 	unsigned long		needcheck;
1101da177e4SLinus Torvalds 	struct file		*file;
1115f7b703fSPavel Emelyanov 	struct pid_namespace	*ns;
11217c0a5aaSAl Viro 	struct work_struct	work;
11317c0a5aaSAl Viro 	struct completion	done;
1145c928e14SChristian Brauner 	acct_t			ac;
1151da177e4SLinus Torvalds };
1161da177e4SLinus Torvalds 
1175c928e14SChristian Brauner static void fill_ac(struct bsd_acct_struct *acct);
1185c928e14SChristian Brauner static void acct_write_process(struct bsd_acct_struct *acct);
11959eda0e0SAl Viro 
1201da177e4SLinus Torvalds /*
1211da177e4SLinus Torvalds  * Check the amount of free space and suspend/resume accordingly.
1221da177e4SLinus Torvalds  */
check_free_space(struct bsd_acct_struct * acct)1235c928e14SChristian Brauner static bool check_free_space(struct bsd_acct_struct *acct)
1241da177e4SLinus Torvalds {
1251da177e4SLinus Torvalds 	struct kstatfs sbuf;
1261da177e4SLinus Torvalds 
1275c928e14SChristian Brauner 	if (!acct->check_space)
1285c928e14SChristian Brauner 		return acct->active;
1291da177e4SLinus Torvalds 
1301da177e4SLinus Torvalds 	/* May block */
13154a4d58aSAl Viro 	if (vfs_statfs(&acct->file->f_path, &sbuf))
1325c928e14SChristian Brauner 		return acct->active;
1331da177e4SLinus Torvalds 
1346248b1b3SPavel Emelyanov 	if (acct->active) {
13554a4d58aSAl Viro 		u64 suspend = sbuf.f_blocks * SUSPEND;
13654a4d58aSAl Viro 		do_div(suspend, 100);
13754a4d58aSAl Viro 		if (sbuf.f_bavail <= suspend) {
1385c928e14SChristian Brauner 			acct->active = false;
1392577d92eSIonut Alexa 			pr_info("Process accounting paused\n");
1401da177e4SLinus Torvalds 		}
1411da177e4SLinus Torvalds 	} else {
14254a4d58aSAl Viro 		u64 resume = sbuf.f_blocks * RESUME;
14354a4d58aSAl Viro 		do_div(resume, 100);
14454a4d58aSAl Viro 		if (sbuf.f_bavail >= resume) {
1455c928e14SChristian Brauner 			acct->active = true;
1462577d92eSIonut Alexa 			pr_info("Process accounting resumed\n");
1471da177e4SLinus Torvalds 		}
1481da177e4SLinus Torvalds 	}
1491da177e4SLinus Torvalds 
15032dc7308SAl Viro 	acct->needcheck = jiffies + ACCT_TIMEOUT*HZ;
15154a4d58aSAl Viro 	return acct->active;
1521da177e4SLinus Torvalds }
1531da177e4SLinus Torvalds 
acct_put(struct bsd_acct_struct * p)1549e251d02SAl Viro static void acct_put(struct bsd_acct_struct *p)
1559e251d02SAl Viro {
15634cece2eSAl Viro 	if (atomic_long_dec_and_test(&p->count))
15734cece2eSAl Viro 		kfree_rcu(p, rcu);
1589e251d02SAl Viro }
1599e251d02SAl Viro 
to_acct(struct fs_pin * p)16059eda0e0SAl Viro static inline struct bsd_acct_struct *to_acct(struct fs_pin *p)
16159eda0e0SAl Viro {
16259eda0e0SAl Viro 	return p ? container_of(p, struct bsd_acct_struct, pin) : NULL;
16359eda0e0SAl Viro }
16459eda0e0SAl Viro 
acct_get(struct pid_namespace * ns)165215752fcSAl Viro static struct bsd_acct_struct *acct_get(struct pid_namespace *ns)
166215752fcSAl Viro {
167215752fcSAl Viro 	struct bsd_acct_struct *res;
168215752fcSAl Viro again:
1692798d4ceSAl Viro 	smp_rmb();
1702798d4ceSAl Viro 	rcu_read_lock();
1716aa7de05SMark Rutland 	res = to_acct(READ_ONCE(ns->bacct));
1722798d4ceSAl Viro 	if (!res) {
1732798d4ceSAl Viro 		rcu_read_unlock();
174215752fcSAl Viro 		return NULL;
175215752fcSAl Viro 	}
17634cece2eSAl Viro 	if (!atomic_long_inc_not_zero(&res->count)) {
177efb170c2SAl Viro 		rcu_read_unlock();
178efb170c2SAl Viro 		cpu_relax();
179b8f00e6bSAl Viro 		goto again;
180efb170c2SAl Viro 	}
181efb170c2SAl Viro 	rcu_read_unlock();
182efb170c2SAl Viro 	mutex_lock(&res->lock);
1836aa7de05SMark Rutland 	if (res != to_acct(READ_ONCE(ns->bacct))) {
184efb170c2SAl Viro 		mutex_unlock(&res->lock);
1859e251d02SAl Viro 		acct_put(res);
186efb170c2SAl Viro 		goto again;
187efb170c2SAl Viro 	}
188b8f00e6bSAl Viro 	return res;
189b8f00e6bSAl Viro }
190b8f00e6bSAl Viro 
acct_pin_kill(struct fs_pin * pin)19159eda0e0SAl Viro static void acct_pin_kill(struct fs_pin *pin)
19259eda0e0SAl Viro {
19359eda0e0SAl Viro 	struct bsd_acct_struct *acct = to_acct(pin);
19459eda0e0SAl Viro 	mutex_lock(&acct->lock);
1955c928e14SChristian Brauner 	/*
1965c928e14SChristian Brauner 	 * Fill the accounting struct with the exiting task's info
1975c928e14SChristian Brauner 	 * before punting to the workqueue.
1985c928e14SChristian Brauner 	 */
1995c928e14SChristian Brauner 	fill_ac(acct);
20059eda0e0SAl Viro 	schedule_work(&acct->work);
20159eda0e0SAl Viro 	wait_for_completion(&acct->done);
20259eda0e0SAl Viro 	cmpxchg(&acct->ns->bacct, pin, NULL);
20359eda0e0SAl Viro 	mutex_unlock(&acct->lock);
20459eda0e0SAl Viro 	pin_remove(pin);
20559eda0e0SAl Viro 	acct_put(acct);
20659eda0e0SAl Viro }
20759eda0e0SAl Viro 
close_work(struct work_struct * work)20817c0a5aaSAl Viro static void close_work(struct work_struct *work)
20917c0a5aaSAl Viro {
21017c0a5aaSAl Viro 	struct bsd_acct_struct *acct = container_of(work, struct bsd_acct_struct, work);
21117c0a5aaSAl Viro 	struct file *file = acct->file;
2125c928e14SChristian Brauner 
2135c928e14SChristian Brauner 	/* We were fired by acct_pin_kill() which holds acct->lock. */
2145c928e14SChristian Brauner 	acct_write_process(acct);
21517c0a5aaSAl Viro 	if (file->f_op->flush)
21617c0a5aaSAl Viro 		file->f_op->flush(file, NULL);
21717c0a5aaSAl Viro 	__fput_sync(file);
21817c0a5aaSAl Viro 	complete(&acct->done);
21917c0a5aaSAl Viro }
22017c0a5aaSAl Viro 
acct_on(struct filename * pathname)221669abf4eSJeff Layton static int acct_on(struct filename *pathname)
2227b7b1aceSAl Viro {
2237b7b1aceSAl Viro 	struct file *file;
2243064c356SAl Viro 	struct vfsmount *mnt, *internal;
225b8f00e6bSAl Viro 	struct pid_namespace *ns = task_active_pid_ns(current);
22659eda0e0SAl Viro 	struct bsd_acct_struct *acct;
22759eda0e0SAl Viro 	struct fs_pin *old;
2283064c356SAl Viro 	int err;
229b8f00e6bSAl Viro 
230b8f00e6bSAl Viro 	acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL);
231b8f00e6bSAl Viro 	if (!acct)
232b8f00e6bSAl Viro 		return -ENOMEM;
2337b7b1aceSAl Viro 
2347b7b1aceSAl Viro 	/* Difference from BSD - they don't do O_APPEND */
235669abf4eSJeff Layton 	file = file_open_name(pathname, O_WRONLY|O_APPEND|O_LARGEFILE, 0);
236b8f00e6bSAl Viro 	if (IS_ERR(file)) {
237b8f00e6bSAl Viro 		kfree(acct);
2387b7b1aceSAl Viro 		return PTR_ERR(file);
239b8f00e6bSAl Viro 	}
2407b7b1aceSAl Viro 
241496ad9aaSAl Viro 	if (!S_ISREG(file_inode(file)->i_mode)) {
242b8f00e6bSAl Viro 		kfree(acct);
2437b7b1aceSAl Viro 		filp_close(file, NULL);
2447b7b1aceSAl Viro 		return -EACCES;
2457b7b1aceSAl Viro 	}
2467b7b1aceSAl Viro 
247*28d23f13SChristian Brauner 	/* Exclude kernel kernel internal filesystems. */
248*28d23f13SChristian Brauner 	if (file_inode(file)->i_sb->s_flags & (SB_NOUSER | SB_KERNMOUNT)) {
249*28d23f13SChristian Brauner 		kfree(acct);
250*28d23f13SChristian Brauner 		filp_close(file, NULL);
251*28d23f13SChristian Brauner 		return -EINVAL;
252*28d23f13SChristian Brauner 	}
253*28d23f13SChristian Brauner 
254*28d23f13SChristian Brauner 	/* Exclude procfs and sysfs. */
255*28d23f13SChristian Brauner 	if (file_inode(file)->i_sb->s_iflags & SB_I_USERNS_VISIBLE) {
256*28d23f13SChristian Brauner 		kfree(acct);
257*28d23f13SChristian Brauner 		filp_close(file, NULL);
258*28d23f13SChristian Brauner 		return -EINVAL;
259*28d23f13SChristian Brauner 	}
260*28d23f13SChristian Brauner 
261d0f88f8dSAl Viro 	if (!(file->f_mode & FMODE_CAN_WRITE)) {
262b8f00e6bSAl Viro 		kfree(acct);
2637b7b1aceSAl Viro 		filp_close(file, NULL);
2647b7b1aceSAl Viro 		return -EIO;
2657b7b1aceSAl Viro 	}
2663064c356SAl Viro 	internal = mnt_clone_internal(&file->f_path);
2673064c356SAl Viro 	if (IS_ERR(internal)) {
2683064c356SAl Viro 		kfree(acct);
2693064c356SAl Viro 		filp_close(file, NULL);
2703064c356SAl Viro 		return PTR_ERR(internal);
2713064c356SAl Viro 	}
2729419a319SAl Viro 	err = __mnt_want_write(internal);
2733064c356SAl Viro 	if (err) {
2743064c356SAl Viro 		mntput(internal);
2753064c356SAl Viro 		kfree(acct);
2763064c356SAl Viro 		filp_close(file, NULL);
2773064c356SAl Viro 		return err;
2783064c356SAl Viro 	}
2793064c356SAl Viro 	mnt = file->f_path.mnt;
2803064c356SAl Viro 	file->f_path.mnt = internal;
2817b7b1aceSAl Viro 
28234cece2eSAl Viro 	atomic_long_set(&acct->count, 1);
28359eda0e0SAl Viro 	init_fs_pin(&acct->pin, acct_pin_kill);
284b8f00e6bSAl Viro 	acct->file = file;
285b8f00e6bSAl Viro 	acct->needcheck = jiffies;
286b8f00e6bSAl Viro 	acct->ns = ns;
287b8f00e6bSAl Viro 	mutex_init(&acct->lock);
28859eda0e0SAl Viro 	INIT_WORK(&acct->work, close_work);
28959eda0e0SAl Viro 	init_completion(&acct->done);
290efb170c2SAl Viro 	mutex_lock_nested(&acct->lock, 1);	/* nobody has seen it yet */
291efb170c2SAl Viro 	pin_insert(&acct->pin, mnt);
292b8f00e6bSAl Viro 
29359eda0e0SAl Viro 	rcu_read_lock();
29459eda0e0SAl Viro 	old = xchg(&ns->bacct, &acct->pin);
2952798d4ceSAl Viro 	mutex_unlock(&acct->lock);
29659eda0e0SAl Viro 	pin_kill(old);
2979419a319SAl Viro 	__mnt_drop_write(mnt);
2983064c356SAl Viro 	mntput(mnt);
2997b7b1aceSAl Viro 	return 0;
3007b7b1aceSAl Viro }
3017b7b1aceSAl Viro 
3029df7fa16SAl Viro static DEFINE_MUTEX(acct_on_mutex);
3039df7fa16SAl Viro 
304417ef531SRandy Dunlap /**
305417ef531SRandy Dunlap  * sys_acct - enable/disable process accounting
306417ef531SRandy Dunlap  * @name: file name for accounting records or NULL to shutdown accounting
307417ef531SRandy Dunlap  *
3081da177e4SLinus Torvalds  * sys_acct() is the only system call needed to implement process
3091da177e4SLinus Torvalds  * accounting. It takes the name of the file where accounting records
3101da177e4SLinus Torvalds  * should be written. If the filename is NULL, accounting will be
3111da177e4SLinus Torvalds  * shutdown.
312b7621ebfSRandy Dunlap  *
313b7621ebfSRandy Dunlap  * Returns: 0 for success or negative errno values for failure.
3141da177e4SLinus Torvalds  */
SYSCALL_DEFINE1(acct,const char __user *,name)315b290ebe2SHeiko Carstens SYSCALL_DEFINE1(acct, const char __user *, name)
3161da177e4SLinus Torvalds {
31705b90496SEric Paris 	int error = 0;
3181da177e4SLinus Torvalds 
3191da177e4SLinus Torvalds 	if (!capable(CAP_SYS_PACCT))
3201da177e4SLinus Torvalds 		return -EPERM;
3211da177e4SLinus Torvalds 
3221da177e4SLinus Torvalds 	if (name) {
32391a27b2aSJeff Layton 		struct filename *tmp = getname(name);
3242577d92eSIonut Alexa 
3257b7b1aceSAl Viro 		if (IS_ERR(tmp))
32646c0a8caSPaul McQuade 			return PTR_ERR(tmp);
3279df7fa16SAl Viro 		mutex_lock(&acct_on_mutex);
328669abf4eSJeff Layton 		error = acct_on(tmp);
3299df7fa16SAl Viro 		mutex_unlock(&acct_on_mutex);
3301da177e4SLinus Torvalds 		putname(tmp);
3317b7b1aceSAl Viro 	} else {
33259eda0e0SAl Viro 		rcu_read_lock();
33359eda0e0SAl Viro 		pin_kill(task_active_pid_ns(current)->bacct);
3341da177e4SLinus Torvalds 	}
33505b90496SEric Paris 
3361da177e4SLinus Torvalds 	return error;
3371da177e4SLinus Torvalds }
3381da177e4SLinus Torvalds 
acct_exit_ns(struct pid_namespace * ns)3390b6b030fSPavel Emelyanov void acct_exit_ns(struct pid_namespace *ns)
3400b6b030fSPavel Emelyanov {
34159eda0e0SAl Viro 	rcu_read_lock();
34259eda0e0SAl Viro 	pin_kill(ns->bacct);
3431da177e4SLinus Torvalds }
3441da177e4SLinus Torvalds 
3451da177e4SLinus Torvalds /*
346457139f1SZheng Yejian  *  encode an u64 into a comp_t
3471da177e4SLinus Torvalds  *
3481da177e4SLinus Torvalds  *  This routine has been adopted from the encode_comp_t() function in
3491da177e4SLinus Torvalds  *  the kern_acct.c file of the FreeBSD operating system. The encoding
3501da177e4SLinus Torvalds  *  is a 13-bit fraction with a 3-bit (base 8) exponent.
3511da177e4SLinus Torvalds  */
3521da177e4SLinus Torvalds 
3531da177e4SLinus Torvalds #define	MANTSIZE	13			/* 13 bit mantissa. */
3541da177e4SLinus Torvalds #define	EXPSIZE		3			/* Base 8 (3 bit) exponent. */
3551da177e4SLinus Torvalds #define	MAXFRACT	((1 << MANTSIZE) - 1)	/* Maximum fractional value. */
3561da177e4SLinus Torvalds 
encode_comp_t(u64 value)357457139f1SZheng Yejian static comp_t encode_comp_t(u64 value)
3581da177e4SLinus Torvalds {
3591da177e4SLinus Torvalds 	int exp, rnd;
3601da177e4SLinus Torvalds 
3611da177e4SLinus Torvalds 	exp = rnd = 0;
3621da177e4SLinus Torvalds 	while (value > MAXFRACT) {
3631da177e4SLinus Torvalds 		rnd = value & (1 << (EXPSIZE - 1));	/* Round up? */
3641da177e4SLinus Torvalds 		value >>= EXPSIZE;	/* Base 8 exponent == 3 bit shift. */
3651da177e4SLinus Torvalds 		exp++;
3661da177e4SLinus Torvalds 	}
3671da177e4SLinus Torvalds 
3681da177e4SLinus Torvalds 	/*
3691da177e4SLinus Torvalds 	 * If we need to round up, do it (and handle overflow correctly).
3701da177e4SLinus Torvalds 	 */
3711da177e4SLinus Torvalds 	if (rnd && (++value > MAXFRACT)) {
3721da177e4SLinus Torvalds 		value >>= EXPSIZE;
3731da177e4SLinus Torvalds 		exp++;
3741da177e4SLinus Torvalds 	}
3751da177e4SLinus Torvalds 
376c5f31c65SZheng Yejian 	if (exp > (((comp_t) ~0U) >> MANTSIZE))
377c5f31c65SZheng Yejian 		return (comp_t) ~0U;
3781da177e4SLinus Torvalds 	/*
3791da177e4SLinus Torvalds 	 * Clean it up and polish it off.
3801da177e4SLinus Torvalds 	 */
3811da177e4SLinus Torvalds 	exp <<= MANTSIZE;		/* Shift the exponent into place */
3821da177e4SLinus Torvalds 	exp += value;			/* and add on the mantissa. */
3831da177e4SLinus Torvalds 	return exp;
3841da177e4SLinus Torvalds }
3851da177e4SLinus Torvalds 
3861da177e4SLinus Torvalds #if ACCT_VERSION == 1 || ACCT_VERSION == 2
3871da177e4SLinus Torvalds /*
3881da177e4SLinus Torvalds  * encode an u64 into a comp2_t (24 bits)
3891da177e4SLinus Torvalds  *
3901da177e4SLinus Torvalds  * Format: 5 bit base 2 exponent, 20 bits mantissa.
3911da177e4SLinus Torvalds  * The leading bit of the mantissa is not stored, but implied for
3921da177e4SLinus Torvalds  * non-zero exponents.
3931da177e4SLinus Torvalds  * Largest encodable value is 50 bits.
3941da177e4SLinus Torvalds  */
3951da177e4SLinus Torvalds 
3961da177e4SLinus Torvalds #define MANTSIZE2       20                      /* 20 bit mantissa. */
3971da177e4SLinus Torvalds #define EXPSIZE2        5                       /* 5 bit base 2 exponent. */
3981da177e4SLinus Torvalds #define MAXFRACT2       ((1ul << MANTSIZE2) - 1) /* Maximum fractional value. */
3991da177e4SLinus Torvalds #define MAXEXP2         ((1 << EXPSIZE2) - 1)    /* Maximum exponent. */
4001da177e4SLinus Torvalds 
encode_comp2_t(u64 value)4011da177e4SLinus Torvalds static comp2_t encode_comp2_t(u64 value)
4021da177e4SLinus Torvalds {
4031da177e4SLinus Torvalds 	int exp, rnd;
4041da177e4SLinus Torvalds 
4051da177e4SLinus Torvalds 	exp = (value > (MAXFRACT2>>1));
4061da177e4SLinus Torvalds 	rnd = 0;
4071da177e4SLinus Torvalds 	while (value > MAXFRACT2) {
4081da177e4SLinus Torvalds 		rnd = value & 1;
4091da177e4SLinus Torvalds 		value >>= 1;
4101da177e4SLinus Torvalds 		exp++;
4111da177e4SLinus Torvalds 	}
4121da177e4SLinus Torvalds 
4131da177e4SLinus Torvalds 	/*
4141da177e4SLinus Torvalds 	 * If we need to round up, do it (and handle overflow correctly).
4151da177e4SLinus Torvalds 	 */
4161da177e4SLinus Torvalds 	if (rnd && (++value > MAXFRACT2)) {
4171da177e4SLinus Torvalds 		value >>= 1;
4181da177e4SLinus Torvalds 		exp++;
4191da177e4SLinus Torvalds 	}
4201da177e4SLinus Torvalds 
4211da177e4SLinus Torvalds 	if (exp > MAXEXP2) {
4221da177e4SLinus Torvalds 		/* Overflow. Return largest representable number instead. */
4231da177e4SLinus Torvalds 		return (1ul << (MANTSIZE2+EXPSIZE2-1)) - 1;
4241da177e4SLinus Torvalds 	} else {
4251da177e4SLinus Torvalds 		return (value & (MAXFRACT2>>1)) | (exp << (MANTSIZE2-1));
4261da177e4SLinus Torvalds 	}
4271da177e4SLinus Torvalds }
42835189b8fSHui Su #elif ACCT_VERSION == 3
4291da177e4SLinus Torvalds /*
4301da177e4SLinus Torvalds  * encode an u64 into a 32 bit IEEE float
4311da177e4SLinus Torvalds  */
encode_float(u64 value)4321da177e4SLinus Torvalds static u32 encode_float(u64 value)
4331da177e4SLinus Torvalds {
4341da177e4SLinus Torvalds 	unsigned exp = 190;
4351da177e4SLinus Torvalds 	unsigned u;
4361da177e4SLinus Torvalds 
4372577d92eSIonut Alexa 	if (value == 0)
4382577d92eSIonut Alexa 		return 0;
4391da177e4SLinus Torvalds 	while ((s64)value > 0) {
4401da177e4SLinus Torvalds 		value <<= 1;
4411da177e4SLinus Torvalds 		exp--;
4421da177e4SLinus Torvalds 	}
4431da177e4SLinus Torvalds 	u = (u32)(value >> 40) & 0x7fffffu;
4441da177e4SLinus Torvalds 	return u | (exp << 23);
4451da177e4SLinus Torvalds }
4461da177e4SLinus Torvalds #endif
4471da177e4SLinus Torvalds 
4481da177e4SLinus Torvalds /*
4491da177e4SLinus Torvalds  *  Write an accounting entry for an exiting process
4501da177e4SLinus Torvalds  *
4511da177e4SLinus Torvalds  *  The acct_process() call is the workhorse of the process
4521da177e4SLinus Torvalds  *  accounting system. The struct acct is built here and then written
4531da177e4SLinus Torvalds  *  into the accounting file. This function should only be called from
454bcbe4a07SIngo Molnar  *  do_exit() or when switching to a different output file.
4551da177e4SLinus Torvalds  */
4561da177e4SLinus Torvalds 
fill_ac(struct bsd_acct_struct * acct)4575c928e14SChristian Brauner static void fill_ac(struct bsd_acct_struct *acct)
458cdd37e23SAl Viro {
459cdd37e23SAl Viro 	struct pacct_struct *pacct = &current->signal->pacct;
4605c928e14SChristian Brauner 	struct file *file = acct->file;
4615c928e14SChristian Brauner 	acct_t *ac = &acct->ac;
462cdd37e23SAl Viro 	u64 elapsed, run_time;
4632d602bf2SArnd Bergmann 	time64_t btime;
464cdd37e23SAl Viro 	struct tty_struct *tty;
465cdd37e23SAl Viro 
4665c928e14SChristian Brauner 	lockdep_assert_held(&acct->lock);
4675c928e14SChristian Brauner 
4685c928e14SChristian Brauner 	if (time_is_after_jiffies(acct->needcheck)) {
4695c928e14SChristian Brauner 		acct->check_space = false;
4705c928e14SChristian Brauner 
4715c928e14SChristian Brauner 		/* Don't fill in @ac if nothing will be written. */
4725c928e14SChristian Brauner 		if (!acct->active)
4735c928e14SChristian Brauner 			return;
4745c928e14SChristian Brauner 	} else {
4755c928e14SChristian Brauner 		acct->check_space = true;
4765c928e14SChristian Brauner 	}
4775c928e14SChristian Brauner 
478cdd37e23SAl Viro 	/*
479cdd37e23SAl Viro 	 * Fill the accounting struct with the needed info as recorded
480cdd37e23SAl Viro 	 * by the different kernel functions.
481cdd37e23SAl Viro 	 */
482cdd37e23SAl Viro 	memset(ac, 0, sizeof(acct_t));
483cdd37e23SAl Viro 
484cdd37e23SAl Viro 	ac->ac_version = ACCT_VERSION | ACCT_BYTEORDER;
4854264be50SAzeem Shaikh 	strscpy(ac->ac_comm, current->comm, sizeof(ac->ac_comm));
486cdd37e23SAl Viro 
487cdd37e23SAl Viro 	/* calculate run_time in nsec*/
488cdd37e23SAl Viro 	run_time = ktime_get_ns();
489cdd37e23SAl Viro 	run_time -= current->group_leader->start_time;
490cdd37e23SAl Viro 	/* convert nsec -> AHZ */
491cdd37e23SAl Viro 	elapsed = nsec_to_AHZ(run_time);
492cdd37e23SAl Viro #if ACCT_VERSION == 3
493cdd37e23SAl Viro 	ac->ac_etime = encode_float(elapsed);
494cdd37e23SAl Viro #else
495cdd37e23SAl Viro 	ac->ac_etime = encode_comp_t(elapsed < (unsigned long) -1l ?
496cdd37e23SAl Viro 				(unsigned long) elapsed : (unsigned long) -1l);
497cdd37e23SAl Viro #endif
498cdd37e23SAl Viro #if ACCT_VERSION == 1 || ACCT_VERSION == 2
499cdd37e23SAl Viro 	{
500cdd37e23SAl Viro 		/* new enlarged etime field */
501cdd37e23SAl Viro 		comp2_t etime = encode_comp2_t(elapsed);
5022577d92eSIonut Alexa 
503cdd37e23SAl Viro 		ac->ac_etime_hi = etime >> 16;
504cdd37e23SAl Viro 		ac->ac_etime_lo = (u16) etime;
505cdd37e23SAl Viro 	}
506cdd37e23SAl Viro #endif
507cdd37e23SAl Viro 	do_div(elapsed, AHZ);
5082d602bf2SArnd Bergmann 	btime = ktime_get_real_seconds() - elapsed;
5092d602bf2SArnd Bergmann 	ac->ac_btime = clamp_t(time64_t, btime, 0, U32_MAX);
510cdd37e23SAl Viro #if ACCT_VERSION == 2
511cdd37e23SAl Viro 	ac->ac_ahz = AHZ;
512cdd37e23SAl Viro #endif
513cdd37e23SAl Viro 
514cdd37e23SAl Viro 	spin_lock_irq(&current->sighand->siglock);
515cdd37e23SAl Viro 	tty = current->signal->tty;	/* Safe as we hold the siglock */
516cdd37e23SAl Viro 	ac->ac_tty = tty ? old_encode_dev(tty_devnum(tty)) : 0;
517d4bc42afSFrederic Weisbecker 	ac->ac_utime = encode_comp_t(nsec_to_AHZ(pacct->ac_utime));
518d4bc42afSFrederic Weisbecker 	ac->ac_stime = encode_comp_t(nsec_to_AHZ(pacct->ac_stime));
519cdd37e23SAl Viro 	ac->ac_flag = pacct->ac_flag;
520cdd37e23SAl Viro 	ac->ac_mem = encode_comp_t(pacct->ac_mem);
521cdd37e23SAl Viro 	ac->ac_minflt = encode_comp_t(pacct->ac_minflt);
522cdd37e23SAl Viro 	ac->ac_majflt = encode_comp_t(pacct->ac_majflt);
523cdd37e23SAl Viro 	ac->ac_exitcode = pacct->ac_exitcode;
524cdd37e23SAl Viro 	spin_unlock_irq(&current->sighand->siglock);
525d8e180dcSMichal Schmidt 
5261da177e4SLinus Torvalds 	/* we really need to bite the bullet and change layout */
5275c928e14SChristian Brauner 	ac->ac_uid = from_kuid_munged(file->f_cred->user_ns, current_uid());
5285c928e14SChristian Brauner 	ac->ac_gid = from_kgid_munged(file->f_cred->user_ns, current_gid());
5291da177e4SLinus Torvalds #if ACCT_VERSION == 1 || ACCT_VERSION == 2
5301da177e4SLinus Torvalds 	/* backward-compatible 16 bit fields */
5315c928e14SChristian Brauner 	ac->ac_uid16 = ac->ac_uid;
5325c928e14SChristian Brauner 	ac->ac_gid16 = ac->ac_gid;
53335189b8fSHui Su #elif ACCT_VERSION == 3
534067b722fSYing Xue 	{
535067b722fSYing Xue 		struct pid_namespace *ns = acct->ns;
536067b722fSYing Xue 
5375c928e14SChristian Brauner 		ac->ac_pid = task_tgid_nr_ns(current, ns);
538a846a195SPavel Emelyanov 		rcu_read_lock();
5395c928e14SChristian Brauner 		ac->ac_ppid = task_tgid_nr_ns(rcu_dereference(current->real_parent), ns);
540a846a195SPavel Emelyanov 		rcu_read_unlock();
541067b722fSYing Xue 	}
5421da177e4SLinus Torvalds #endif
5435c928e14SChristian Brauner }
5445c928e14SChristian Brauner 
acct_write_process(struct bsd_acct_struct * acct)5455c928e14SChristian Brauner static void acct_write_process(struct bsd_acct_struct *acct)
5465c928e14SChristian Brauner {
5475c928e14SChristian Brauner 	struct file *file = acct->file;
5485c928e14SChristian Brauner 	const struct cred *cred;
5495c928e14SChristian Brauner 	acct_t *ac = &acct->ac;
5505c928e14SChristian Brauner 
5515c928e14SChristian Brauner 	/* Perform file operations on behalf of whoever enabled accounting */
5525c928e14SChristian Brauner 	cred = override_creds(file->f_cred);
5535c928e14SChristian Brauner 
5541da177e4SLinus Torvalds 	/*
5555c928e14SChristian Brauner 	 * First check to see if there is enough free_space to continue
5565c928e14SChristian Brauner 	 * the process accounting system. Then get freeze protection. If
5575c928e14SChristian Brauner 	 * the fs is frozen, just skip the write as we could deadlock
5585c928e14SChristian Brauner 	 * the system otherwise.
5595ae98f15SJan Kara 	 */
5605c928e14SChristian Brauner 	if (check_free_space(acct) && file_start_write_trylock(file)) {
561ed44724bSAl Viro 		/* it's been opened O_APPEND, so position is irrelevant */
562ed44724bSAl Viro 		loff_t pos = 0;
5635c928e14SChristian Brauner 		__kernel_write(file, ac, sizeof(acct_t), &pos);
56403d95eb2SAl Viro 		file_end_write(file);
565ed44724bSAl Viro 	}
5665c928e14SChristian Brauner 
5675c928e14SChristian Brauner 	revert_creds(cred);
5685c928e14SChristian Brauner }
5695c928e14SChristian Brauner 
do_acct_process(struct bsd_acct_struct * acct)5705c928e14SChristian Brauner static void do_acct_process(struct bsd_acct_struct *acct)
5715c928e14SChristian Brauner {
5725c928e14SChristian Brauner 	unsigned long flim;
5735c928e14SChristian Brauner 
5745c928e14SChristian Brauner 	/* Accounting records are not subject to resource limits. */
5755c928e14SChristian Brauner 	flim = rlimit(RLIMIT_FSIZE);
5765c928e14SChristian Brauner 	current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
5775c928e14SChristian Brauner 	fill_ac(acct);
5785c928e14SChristian Brauner 	acct_write_process(acct);
579ed44724bSAl Viro 	current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim;
5801da177e4SLinus Torvalds }
5811da177e4SLinus Torvalds 
582417ef531SRandy Dunlap /**
5830e464814SKaiGai Kohei  * acct_collect - collect accounting information into pacct_struct
584f6ec29a4SKaiGai Kohei  * @exitcode: task exit code
585f6ec29a4SKaiGai Kohei  * @group_dead: not 0, if this thread is the last one in the process.
5860e464814SKaiGai Kohei  */
acct_collect(long exitcode,int group_dead)587f6ec29a4SKaiGai Kohei void acct_collect(long exitcode, int group_dead)
5880e464814SKaiGai Kohei {
5890e464814SKaiGai Kohei 	struct pacct_struct *pacct = &current->signal->pacct;
590d4bc42afSFrederic Weisbecker 	u64 utime, stime;
5910e464814SKaiGai Kohei 	unsigned long vsize = 0;
5920e464814SKaiGai Kohei 
593f6ec29a4SKaiGai Kohei 	if (group_dead && current->mm) {
594160c8200SMatthew Wilcox (Oracle) 		struct mm_struct *mm = current->mm;
595160c8200SMatthew Wilcox (Oracle) 		VMA_ITERATOR(vmi, mm, 0);
5960e464814SKaiGai Kohei 		struct vm_area_struct *vma;
5972577d92eSIonut Alexa 
598160c8200SMatthew Wilcox (Oracle) 		mmap_read_lock(mm);
599160c8200SMatthew Wilcox (Oracle) 		for_each_vma(vmi, vma)
6000e464814SKaiGai Kohei 			vsize += vma->vm_end - vma->vm_start;
601160c8200SMatthew Wilcox (Oracle) 		mmap_read_unlock(mm);
6020e464814SKaiGai Kohei 	}
6030e464814SKaiGai Kohei 
60477787bfbSKaiGai Kohei 	spin_lock_irq(&current->sighand->siglock);
605f6ec29a4SKaiGai Kohei 	if (group_dead)
6060e464814SKaiGai Kohei 		pacct->ac_mem = vsize / 1024;
607f6ec29a4SKaiGai Kohei 	if (thread_group_leader(current)) {
608f6ec29a4SKaiGai Kohei 		pacct->ac_exitcode = exitcode;
609f6ec29a4SKaiGai Kohei 		if (current->flags & PF_FORKNOEXEC)
610f6ec29a4SKaiGai Kohei 			pacct->ac_flag |= AFORK;
611f6ec29a4SKaiGai Kohei 	}
612f6ec29a4SKaiGai Kohei 	if (current->flags & PF_SUPERPRIV)
613f6ec29a4SKaiGai Kohei 		pacct->ac_flag |= ASU;
614f6ec29a4SKaiGai Kohei 	if (current->flags & PF_DUMPCORE)
615f6ec29a4SKaiGai Kohei 		pacct->ac_flag |= ACORE;
616f6ec29a4SKaiGai Kohei 	if (current->flags & PF_SIGNALED)
617f6ec29a4SKaiGai Kohei 		pacct->ac_flag |= AXSIG;
618d4bc42afSFrederic Weisbecker 
619d4bc42afSFrederic Weisbecker 	task_cputime(current, &utime, &stime);
6206fac4829SFrederic Weisbecker 	pacct->ac_utime += utime;
6216fac4829SFrederic Weisbecker 	pacct->ac_stime += stime;
62277787bfbSKaiGai Kohei 	pacct->ac_minflt += current->min_flt;
62377787bfbSKaiGai Kohei 	pacct->ac_majflt += current->maj_flt;
62477787bfbSKaiGai Kohei 	spin_unlock_irq(&current->sighand->siglock);
6250e464814SKaiGai Kohei }
6260e464814SKaiGai Kohei 
slow_acct_process(struct pid_namespace * ns)627e25ff11fSAl Viro static void slow_acct_process(struct pid_namespace *ns)
6281da177e4SLinus Torvalds {
629e25ff11fSAl Viro 	for ( ; ns; ns = ns->parent) {
630215752fcSAl Viro 		struct bsd_acct_struct *acct = acct_get(ns);
631b8f00e6bSAl Viro 		if (acct) {
632b8f00e6bSAl Viro 			do_acct_process(acct);
633b8f00e6bSAl Viro 			mutex_unlock(&acct->lock);
6349e251d02SAl Viro 			acct_put(acct);
6351da177e4SLinus Torvalds 		}
6361da177e4SLinus Torvalds 	}
637e25ff11fSAl Viro }
6387d1e1350SPavel Emelyanov 
6397d1e1350SPavel Emelyanov /**
640b7621ebfSRandy Dunlap  * acct_process - handles process accounting for an exiting task
6417d1e1350SPavel Emelyanov  */
acct_process(void)6427d1e1350SPavel Emelyanov void acct_process(void)
6437d1e1350SPavel Emelyanov {
6447d1e1350SPavel Emelyanov 	struct pid_namespace *ns;
6457d1e1350SPavel Emelyanov 
6460c18d7a5SPavel Emelyanov 	/*
6470c18d7a5SPavel Emelyanov 	 * This loop is safe lockless, since current is still
6480c18d7a5SPavel Emelyanov 	 * alive and holds its namespace, which in turn holds
6490c18d7a5SPavel Emelyanov 	 * its parent.
6500c18d7a5SPavel Emelyanov 	 */
651e25ff11fSAl Viro 	for (ns = task_active_pid_ns(current); ns != NULL; ns = ns->parent) {
652b8f00e6bSAl Viro 		if (ns->bacct)
653e25ff11fSAl Viro 			break;
654e25ff11fSAl Viro 	}
655e25ff11fSAl Viro 	if (unlikely(ns))
656e25ff11fSAl Viro 		slow_acct_process(ns);
6577d1e1350SPavel Emelyanov }
658