fork.c (cbb245239282870bc6f54d5137dfe0f84b48ea72) | fork.c (a8ea6fc9b089156d9230bfeef964dd9be101a4a9) |
---|---|
1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * linux/kernel/fork.c 4 * 5 * Copyright (C) 1991, 1992 Linus Torvalds 6 */ 7 8/* --- 82 unchanged lines hidden (view full) --- 91#include <linux/sysctl.h> 92#include <linux/kcov.h> 93#include <linux/livepatch.h> 94#include <linux/thread_info.h> 95#include <linux/stackleak.h> 96#include <linux/kasan.h> 97#include <linux/scs.h> 98#include <linux/io_uring.h> | 1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * linux/kernel/fork.c 4 * 5 * Copyright (C) 1991, 1992 Linus Torvalds 6 */ 7 8/* --- 82 unchanged lines hidden (view full) --- 91#include <linux/sysctl.h> 92#include <linux/kcov.h> 93#include <linux/livepatch.h> 94#include <linux/thread_info.h> 95#include <linux/stackleak.h> 96#include <linux/kasan.h> 97#include <linux/scs.h> 98#include <linux/io_uring.h> |
99#include <linux/bpf.h> |
|
99 100#include <asm/pgalloc.h> 101#include <linux/uaccess.h> 102#include <asm/mmu_context.h> 103#include <asm/cacheflush.h> 104#include <asm/tlbflush.h> 105 106#include <trace/events/sched.h> --- 267 unchanged lines hidden (view full) --- 374 kmem_cache_free(vm_area_cachep, vma); 375} 376 377static void account_kernel_stack(struct task_struct *tsk, int account) 378{ 379 void *stack = task_stack_page(tsk); 380 struct vm_struct *vm = task_stack_vm_area(tsk); 381 | 100 101#include <asm/pgalloc.h> 102#include <linux/uaccess.h> 103#include <asm/mmu_context.h> 104#include <asm/cacheflush.h> 105#include <asm/tlbflush.h> 106 107#include <trace/events/sched.h> --- 267 unchanged lines hidden (view full) --- 375 kmem_cache_free(vm_area_cachep, vma); 376} 377 378static void account_kernel_stack(struct task_struct *tsk, int account) 379{ 380 void *stack = task_stack_page(tsk); 381 struct vm_struct *vm = task_stack_vm_area(tsk); 382 |
383 if (vm) { 384 int i; |
|
382 | 385 |
383 /* All stack pages are in the same node. */ 384 if (vm) 385 mod_lruvec_page_state(vm->pages[0], NR_KERNEL_STACK_KB, 386 account * (THREAD_SIZE / 1024)); 387 else | 386 for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) 387 mod_lruvec_page_state(vm->pages[i], NR_KERNEL_STACK_KB, 388 account * (PAGE_SIZE / 1024)); 389 } else { 390 /* All stack pages are in the same node. */ |
388 mod_lruvec_kmem_state(stack, NR_KERNEL_STACK_KB, 389 account * (THREAD_SIZE / 1024)); | 391 mod_lruvec_kmem_state(stack, NR_KERNEL_STACK_KB, 392 account * (THREAD_SIZE / 1024)); |
393 } |
|
390} 391 392static int memcg_charge_kernel_stack(struct task_struct *tsk) 393{ 394#ifdef CONFIG_VMAP_STACK 395 struct vm_struct *vm = task_stack_vm_area(tsk); 396 int ret; 397 --- 331 unchanged lines hidden (view full) --- 729 WARN_ON(!tsk->exit_state); 730 WARN_ON(refcount_read(&tsk->usage)); 731 WARN_ON(tsk == current); 732 733 io_uring_free(tsk); 734 cgroup_free(tsk); 735 task_numa_free(tsk, true); 736 security_task_free(tsk); | 394} 395 396static int memcg_charge_kernel_stack(struct task_struct *tsk) 397{ 398#ifdef CONFIG_VMAP_STACK 399 struct vm_struct *vm = task_stack_vm_area(tsk); 400 int ret; 401 --- 331 unchanged lines hidden (view full) --- 733 WARN_ON(!tsk->exit_state); 734 WARN_ON(refcount_read(&tsk->usage)); 735 WARN_ON(tsk == current); 736 737 io_uring_free(tsk); 738 cgroup_free(tsk); 739 task_numa_free(tsk, true); 740 security_task_free(tsk); |
741 bpf_task_storage_free(tsk); |
|
737 exit_creds(tsk); 738 delayacct_tsk_free(tsk); 739 put_signal_struct(tsk->signal); | 742 exit_creds(tsk); 743 delayacct_tsk_free(tsk); 744 put_signal_struct(tsk->signal); |
745 sched_core_free(tsk); |
|
740 741 if (!profile_handoff_task(tsk)) 742 free_task(tsk); 743} 744EXPORT_SYMBOL_GPL(__put_task_struct); 745 746void __init __weak arch_task_cache_init(void) { } 747 --- 174 unchanged lines hidden (view full) --- 922 /* One for the rcu users */ 923 refcount_set(&tsk->usage, 1); 924#ifdef CONFIG_BLK_DEV_IO_TRACE 925 tsk->btrace_seq = 0; 926#endif 927 tsk->splice_pipe = NULL; 928 tsk->task_frag.page = NULL; 929 tsk->wake_q.next = NULL; | 746 747 if (!profile_handoff_task(tsk)) 748 free_task(tsk); 749} 750EXPORT_SYMBOL_GPL(__put_task_struct); 751 752void __init __weak arch_task_cache_init(void) { } 753 --- 174 unchanged lines hidden (view full) --- 928 /* One for the rcu users */ 929 refcount_set(&tsk->usage, 1); 930#ifdef CONFIG_BLK_DEV_IO_TRACE 931 tsk->btrace_seq = 0; 932#endif 933 tsk->splice_pipe = NULL; 934 tsk->task_frag.page = NULL; 935 tsk->wake_q.next = NULL; |
936 tsk->pf_io_worker = NULL; |
|
930 931 account_kernel_stack(tsk, 1); 932 933 kcov_task_init(tsk); 934 kmap_local_fork(tsk); 935 936#ifdef CONFIG_FAULT_INJECTION 937 tsk->fail_nth = 0; --- 196 unchanged lines hidden (view full) --- 1134 * set_mm_exe_file - change a reference to the mm's executable file 1135 * 1136 * This changes mm's executable file (shown as symlink /proc/[pid]/exe). 1137 * 1138 * Main users are mmput() and sys_execve(). Callers prevent concurrent 1139 * invocations: in mmput() nobody alive left, in execve task is single 1140 * threaded. sys_prctl(PR_SET_MM_MAP/EXE_FILE) also needs to set the 1141 * mm->exe_file, but does so without using set_mm_exe_file() in order | 937 938 account_kernel_stack(tsk, 1); 939 940 kcov_task_init(tsk); 941 kmap_local_fork(tsk); 942 943#ifdef CONFIG_FAULT_INJECTION 944 tsk->fail_nth = 0; --- 196 unchanged lines hidden (view full) --- 1141 * set_mm_exe_file - change a reference to the mm's executable file 1142 * 1143 * This changes mm's executable file (shown as symlink /proc/[pid]/exe). 1144 * 1145 * Main users are mmput() and sys_execve(). Callers prevent concurrent 1146 * invocations: in mmput() nobody alive left, in execve task is single 1147 * threaded. sys_prctl(PR_SET_MM_MAP/EXE_FILE) also needs to set the 1148 * mm->exe_file, but does so without using set_mm_exe_file() in order |
1142 * to do avoid the need for any locks. | 1149 * to avoid the need for any locks. |
1143 */ 1144void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file) 1145{ 1146 struct file *old_exe_file; 1147 1148 /* 1149 * It is safe to dereference the exe_file without RCU as 1150 * this function is only called if nobody else can access --- 234 unchanged lines hidden (view full) --- 1385 1386fail_nomem: 1387 return NULL; 1388} 1389 1390static int copy_mm(unsigned long clone_flags, struct task_struct *tsk) 1391{ 1392 struct mm_struct *mm, *oldmm; | 1150 */ 1151void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file) 1152{ 1153 struct file *old_exe_file; 1154 1155 /* 1156 * It is safe to dereference the exe_file without RCU as 1157 * this function is only called if nobody else can access --- 234 unchanged lines hidden (view full) --- 1392 1393fail_nomem: 1394 return NULL; 1395} 1396 1397static int copy_mm(unsigned long clone_flags, struct task_struct *tsk) 1398{ 1399 struct mm_struct *mm, *oldmm; |
1393 int retval; | |
1394 1395 tsk->min_flt = tsk->maj_flt = 0; 1396 tsk->nvcsw = tsk->nivcsw = 0; 1397#ifdef CONFIG_DETECT_HUNG_TASK 1398 tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw; 1399 tsk->last_switch_time = 0; 1400#endif 1401 --- 10 unchanged lines hidden (view full) --- 1412 return 0; 1413 1414 /* initialize the new vmacache entries */ 1415 vmacache_flush(tsk); 1416 1417 if (clone_flags & CLONE_VM) { 1418 mmget(oldmm); 1419 mm = oldmm; | 1400 1401 tsk->min_flt = tsk->maj_flt = 0; 1402 tsk->nvcsw = tsk->nivcsw = 0; 1403#ifdef CONFIG_DETECT_HUNG_TASK 1404 tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw; 1405 tsk->last_switch_time = 0; 1406#endif 1407 --- 10 unchanged lines hidden (view full) --- 1418 return 0; 1419 1420 /* initialize the new vmacache entries */ 1421 vmacache_flush(tsk); 1422 1423 if (clone_flags & CLONE_VM) { 1424 mmget(oldmm); 1425 mm = oldmm; |
1420 goto good_mm; | 1426 } else { 1427 mm = dup_mm(tsk, current->mm); 1428 if (!mm) 1429 return -ENOMEM; |
1421 } 1422 | 1430 } 1431 |
1423 retval = -ENOMEM; 1424 mm = dup_mm(tsk, current->mm); 1425 if (!mm) 1426 goto fail_nomem; 1427 1428good_mm: | |
1429 tsk->mm = mm; 1430 tsk->active_mm = mm; 1431 return 0; | 1432 tsk->mm = mm; 1433 tsk->active_mm = mm; 1434 return 0; |
1432 1433fail_nomem: 1434 return retval; | |
1435} 1436 1437static int copy_fs(unsigned long clone_flags, struct task_struct *tsk) 1438{ 1439 struct fs_struct *fs = current->fs; 1440 if (clone_flags & CLONE_FS) { 1441 /* tsk->fs is already what we want */ 1442 spin_lock(&fs->lock); --- 289 unchanged lines hidden (view full) --- 1732 * Pid field and the first entry in the NSpid field will be identical. 1733 * If the pid namespace of the process is not a descendant of the pid 1734 * namespace of the procfs instance 0 will be shown as its first NSpid 1735 * entry and no others will be shown. 1736 * Note that this differs from the Pid and NSpid fields in 1737 * /proc/<pid>/status where Pid and NSpid are always shown relative to 1738 * the pid namespace of the procfs instance. The difference becomes 1739 * obvious when sending around a pidfd between pid namespaces from a | 1435} 1436 1437static int copy_fs(unsigned long clone_flags, struct task_struct *tsk) 1438{ 1439 struct fs_struct *fs = current->fs; 1440 if (clone_flags & CLONE_FS) { 1441 /* tsk->fs is already what we want */ 1442 spin_lock(&fs->lock); --- 289 unchanged lines hidden (view full) --- 1732 * Pid field and the first entry in the NSpid field will be identical. 1733 * If the pid namespace of the process is not a descendant of the pid 1734 * namespace of the procfs instance 0 will be shown as its first NSpid 1735 * entry and no others will be shown. 1736 * Note that this differs from the Pid and NSpid fields in 1737 * /proc/<pid>/status where Pid and NSpid are always shown relative to 1738 * the pid namespace of the procfs instance. The difference becomes 1739 * obvious when sending around a pidfd between pid namespaces from a |
1740 * different branch of the tree, i.e. where no ancestoral relation is | 1740 * different branch of the tree, i.e. where no ancestral relation is |
1741 * present between the pid namespaces: 1742 * - create two new pid namespaces ns1 and ns2 in the initial pid 1743 * namespace (also take care to create new mount namespaces in the 1744 * new pid namespace and mount procfs) 1745 * - create a process with a pidfd in ns1 1746 * - send pidfd from ns1 to ns2 1747 * - read /proc/self/fdinfo/<pidfd> and observe that both Pid and NSpid 1748 * have exactly one entry, which is 0 --- 187 unchanged lines hidden (view full) --- 1936 INIT_HLIST_NODE(&delayed.node); 1937 1938 spin_lock_irq(¤t->sighand->siglock); 1939 if (!(clone_flags & CLONE_THREAD)) 1940 hlist_add_head(&delayed.node, ¤t->signal->multiprocess); 1941 recalc_sigpending(); 1942 spin_unlock_irq(¤t->sighand->siglock); 1943 retval = -ERESTARTNOINTR; | 1741 * present between the pid namespaces: 1742 * - create two new pid namespaces ns1 and ns2 in the initial pid 1743 * namespace (also take care to create new mount namespaces in the 1744 * new pid namespace and mount procfs) 1745 * - create a process with a pidfd in ns1 1746 * - send pidfd from ns1 to ns2 1747 * - read /proc/self/fdinfo/<pidfd> and observe that both Pid and NSpid 1748 * have exactly one entry, which is 0 --- 187 unchanged lines hidden (view full) --- 1936 INIT_HLIST_NODE(&delayed.node); 1937 1938 spin_lock_irq(¤t->sighand->siglock); 1939 if (!(clone_flags & CLONE_THREAD)) 1940 hlist_add_head(&delayed.node, ¤t->signal->multiprocess); 1941 recalc_sigpending(); 1942 spin_unlock_irq(¤t->sighand->siglock); 1943 retval = -ERESTARTNOINTR; |
1944 if (signal_pending(current)) | 1944 if (task_sigpending(current)) |
1945 goto fork_out; 1946 1947 retval = -ENOMEM; 1948 p = dup_task_struct(current, node); 1949 if (!p) 1950 goto fork_out; 1951 if (args->io_thread) { 1952 /* --- 42 unchanged lines hidden (view full) --- 1995 * triggers too late. This doesn't hurt, the check is only there 1996 * to stop root fork bombs. 1997 */ 1998 retval = -EAGAIN; 1999 if (data_race(nr_threads >= max_threads)) 2000 goto bad_fork_cleanup_count; 2001 2002 delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ | 1945 goto fork_out; 1946 1947 retval = -ENOMEM; 1948 p = dup_task_struct(current, node); 1949 if (!p) 1950 goto fork_out; 1951 if (args->io_thread) { 1952 /* --- 42 unchanged lines hidden (view full) --- 1995 * triggers too late. This doesn't hurt, the check is only there 1996 * to stop root fork bombs. 1997 */ 1998 retval = -EAGAIN; 1999 if (data_race(nr_threads >= max_threads)) 2000 goto bad_fork_cleanup_count; 2001 2002 delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ |
2003 p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER | PF_IDLE); | 2003 p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER | PF_IDLE | PF_NO_SETAFFINITY); |
2004 p->flags |= PF_FORKNOEXEC; 2005 INIT_LIST_HEAD(&p->children); 2006 INIT_LIST_HEAD(&p->sibling); 2007 rcu_copy_process(p); 2008 p->vfork_done = NULL; 2009 spin_lock_init(&p->alloc_lock); 2010 2011 init_sigpending(&p->pending); | 2004 p->flags |= PF_FORKNOEXEC; 2005 INIT_LIST_HEAD(&p->children); 2006 INIT_LIST_HEAD(&p->sibling); 2007 rcu_copy_process(p); 2008 p->vfork_done = NULL; 2009 spin_lock_init(&p->alloc_lock); 2010 2011 init_sigpending(&p->pending); |
2012 p->sigqueue_cache = NULL; |
|
2012 2013 p->utime = p->stime = p->gtime = 0; 2014#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME 2015 p->utimescaled = p->stimescaled = 0; 2016#endif 2017 prev_cputime_init(&p->prev_cputime); 2018 2019#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN --- 53 unchanged lines hidden (view full) --- 2073 2074#ifdef CONFIG_DEBUG_MUTEXES 2075 p->blocked_on = NULL; /* not blocked yet */ 2076#endif 2077#ifdef CONFIG_BCACHE 2078 p->sequential_io = 0; 2079 p->sequential_io_avg = 0; 2080#endif | 2013 2014 p->utime = p->stime = p->gtime = 0; 2015#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME 2016 p->utimescaled = p->stimescaled = 0; 2017#endif 2018 prev_cputime_init(&p->prev_cputime); 2019 2020#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN --- 53 unchanged lines hidden (view full) --- 2074 2075#ifdef CONFIG_DEBUG_MUTEXES 2076 p->blocked_on = NULL; /* not blocked yet */ 2077#endif 2078#ifdef CONFIG_BCACHE 2079 p->sequential_io = 0; 2080 p->sequential_io_avg = 0; 2081#endif |
2082#ifdef CONFIG_BPF_SYSCALL 2083 RCU_INIT_POINTER(p->bpf_storage, NULL); 2084#endif |
|
2081 2082 /* Perform scheduler related setup. Assign this task to a CPU. */ 2083 retval = sched_fork(clone_flags, p); 2084 if (retval) 2085 goto bad_fork_cleanup_policy; 2086 | 2085 2086 /* Perform scheduler related setup. Assign this task to a CPU. */ 2087 retval = sched_fork(clone_flags, p); 2088 if (retval) 2089 goto bad_fork_cleanup_policy; 2090 |
2087 retval = perf_event_init_task(p); | 2091 retval = perf_event_init_task(p, clone_flags); |
2088 if (retval) 2089 goto bad_fork_cleanup_policy; 2090 retval = audit_alloc(p); 2091 if (retval) 2092 goto bad_fork_cleanup_perf; 2093 /* copy all the process information */ 2094 shm_init_task(p); 2095 retval = security_task_alloc(p, clone_flags); --- 146 unchanged lines hidden (view full) --- 2242 } else { 2243 p->real_parent = current; 2244 p->parent_exec_id = current->self_exec_id; 2245 p->exit_signal = args->exit_signal; 2246 } 2247 2248 klp_copy_process(p); 2249 | 2092 if (retval) 2093 goto bad_fork_cleanup_policy; 2094 retval = audit_alloc(p); 2095 if (retval) 2096 goto bad_fork_cleanup_perf; 2097 /* copy all the process information */ 2098 shm_init_task(p); 2099 retval = security_task_alloc(p, clone_flags); --- 146 unchanged lines hidden (view full) --- 2246 } else { 2247 p->real_parent = current; 2248 p->parent_exec_id = current->self_exec_id; 2249 p->exit_signal = args->exit_signal; 2250 } 2251 2252 klp_copy_process(p); 2253 |
2254 sched_core_fork(p); 2255 |
|
2250 spin_lock(¤t->sighand->siglock); 2251 2252 /* 2253 * Copy seccomp details explicitly here, in case they were changed 2254 * before holding sighand lock. 2255 */ 2256 copy_seccomp(p); 2257 --- 71 unchanged lines hidden (view full) --- 2329 trace_task_newtask(p, clone_flags); 2330 uprobe_copy_process(p, clone_flags); 2331 2332 copy_oom_score_adj(clone_flags, p); 2333 2334 return p; 2335 2336bad_fork_cancel_cgroup: | 2256 spin_lock(¤t->sighand->siglock); 2257 2258 /* 2259 * Copy seccomp details explicitly here, in case they were changed 2260 * before holding sighand lock. 2261 */ 2262 copy_seccomp(p); 2263 --- 71 unchanged lines hidden (view full) --- 2335 trace_task_newtask(p, clone_flags); 2336 uprobe_copy_process(p, clone_flags); 2337 2338 copy_oom_score_adj(clone_flags, p); 2339 2340 return p; 2341 2342bad_fork_cancel_cgroup: |
2343 sched_core_free(p); |
|
2337 spin_unlock(¤t->sighand->siglock); 2338 write_unlock_irq(&tasklist_lock); 2339 cgroup_cancel_fork(p, args); 2340bad_fork_put_pidfd: 2341 if (clone_flags & CLONE_PIDFD) { 2342 fput(pidfile); 2343 put_unused_fd(pidfd); 2344 } --- 55 unchanged lines hidden (view full) --- 2400 enum pid_type type; 2401 2402 for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type) { 2403 INIT_HLIST_NODE(&idle->pid_links[type]); /* not really needed */ 2404 init_task_pid(idle, type, &init_struct_pid); 2405 } 2406} 2407 | 2344 spin_unlock(¤t->sighand->siglock); 2345 write_unlock_irq(&tasklist_lock); 2346 cgroup_cancel_fork(p, args); 2347bad_fork_put_pidfd: 2348 if (clone_flags & CLONE_PIDFD) { 2349 fput(pidfile); 2350 put_unused_fd(pidfd); 2351 } --- 55 unchanged lines hidden (view full) --- 2407 enum pid_type type; 2408 2409 for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type) { 2410 INIT_HLIST_NODE(&idle->pid_links[type]); /* not really needed */ 2411 init_task_pid(idle, type, &init_struct_pid); 2412 } 2413} 2414 |
2408struct task_struct *fork_idle(int cpu) | 2415struct task_struct * __init fork_idle(int cpu) |
2409{ 2410 struct task_struct *task; 2411 struct kernel_clone_args args = { 2412 .flags = CLONE_VM, 2413 }; 2414 2415 task = copy_process(&init_struct_pid, 0, cpu_to_node(cpu), &args); 2416 if (!IS_ERR(task)) { --- 303 unchanged lines hidden (view full) --- 2720static bool clone3_args_valid(struct kernel_clone_args *kargs) 2721{ 2722 /* Verify that no unknown flags are passed along. */ 2723 if (kargs->flags & 2724 ~(CLONE_LEGACY_FLAGS | CLONE_CLEAR_SIGHAND | CLONE_INTO_CGROUP)) 2725 return false; 2726 2727 /* | 2416{ 2417 struct task_struct *task; 2418 struct kernel_clone_args args = { 2419 .flags = CLONE_VM, 2420 }; 2421 2422 task = copy_process(&init_struct_pid, 0, cpu_to_node(cpu), &args); 2423 if (!IS_ERR(task)) { --- 303 unchanged lines hidden (view full) --- 2727static bool clone3_args_valid(struct kernel_clone_args *kargs) 2728{ 2729 /* Verify that no unknown flags are passed along. */ 2730 if (kargs->flags & 2731 ~(CLONE_LEGACY_FLAGS | CLONE_CLEAR_SIGHAND | CLONE_INTO_CGROUP)) 2732 return false; 2733 2734 /* |
2728 * - make the CLONE_DETACHED bit reuseable for clone3 2729 * - make the CSIGNAL bits reuseable for clone3 | 2735 * - make the CLONE_DETACHED bit reusable for clone3 2736 * - make the CSIGNAL bits reusable for clone3 |
2730 */ 2731 if (kargs->flags & (CLONE_DETACHED | CSIGNAL)) 2732 return false; 2733 2734 if ((kargs->flags & (CLONE_SIGHAND | CLONE_CLEAR_SIGHAND)) == 2735 (CLONE_SIGHAND | CLONE_CLEAR_SIGHAND)) 2736 return false; 2737 --- 372 unchanged lines hidden --- | 2737 */ 2738 if (kargs->flags & (CLONE_DETACHED | CSIGNAL)) 2739 return false; 2740 2741 if ((kargs->flags & (CLONE_SIGHAND | CLONE_CLEAR_SIGHAND)) == 2742 (CLONE_SIGHAND | CLONE_CLEAR_SIGHAND)) 2743 return false; 2744 --- 372 unchanged lines hidden --- |