1*1da177e4SLinus Torvalds /* 2*1da177e4SLinus Torvalds * linux/kernel/fork.c 3*1da177e4SLinus Torvalds * 4*1da177e4SLinus Torvalds * Copyright (C) 1991, 1992 Linus Torvalds 5*1da177e4SLinus Torvalds */ 6*1da177e4SLinus Torvalds 7*1da177e4SLinus Torvalds /* 8*1da177e4SLinus Torvalds * 'fork.c' contains the help-routines for the 'fork' system call 9*1da177e4SLinus Torvalds * (see also entry.S and others). 10*1da177e4SLinus Torvalds * Fork is rather simple, once you get the hang of it, but the memory 11*1da177e4SLinus Torvalds * management can be a bitch. See 'mm/memory.c': 'copy_page_range()' 12*1da177e4SLinus Torvalds */ 13*1da177e4SLinus Torvalds 14*1da177e4SLinus Torvalds #include <linux/config.h> 15*1da177e4SLinus Torvalds #include <linux/slab.h> 16*1da177e4SLinus Torvalds #include <linux/init.h> 17*1da177e4SLinus Torvalds #include <linux/unistd.h> 18*1da177e4SLinus Torvalds #include <linux/smp_lock.h> 19*1da177e4SLinus Torvalds #include <linux/module.h> 20*1da177e4SLinus Torvalds #include <linux/vmalloc.h> 21*1da177e4SLinus Torvalds #include <linux/completion.h> 22*1da177e4SLinus Torvalds #include <linux/namespace.h> 23*1da177e4SLinus Torvalds #include <linux/personality.h> 24*1da177e4SLinus Torvalds #include <linux/mempolicy.h> 25*1da177e4SLinus Torvalds #include <linux/sem.h> 26*1da177e4SLinus Torvalds #include <linux/file.h> 27*1da177e4SLinus Torvalds #include <linux/key.h> 28*1da177e4SLinus Torvalds #include <linux/binfmts.h> 29*1da177e4SLinus Torvalds #include <linux/mman.h> 30*1da177e4SLinus Torvalds #include <linux/fs.h> 31*1da177e4SLinus Torvalds #include <linux/cpu.h> 32*1da177e4SLinus Torvalds #include <linux/cpuset.h> 33*1da177e4SLinus Torvalds #include <linux/security.h> 34*1da177e4SLinus Torvalds #include <linux/swap.h> 35*1da177e4SLinus Torvalds #include <linux/syscalls.h> 36*1da177e4SLinus Torvalds #include <linux/jiffies.h> 37*1da177e4SLinus Torvalds #include <linux/futex.h> 38*1da177e4SLinus Torvalds #include <linux/ptrace.h> 39*1da177e4SLinus Torvalds #include <linux/mount.h> 40*1da177e4SLinus Torvalds #include <linux/audit.h> 41*1da177e4SLinus Torvalds #include <linux/profile.h> 42*1da177e4SLinus Torvalds #include <linux/rmap.h> 43*1da177e4SLinus Torvalds #include <linux/acct.h> 44*1da177e4SLinus Torvalds 45*1da177e4SLinus Torvalds #include <asm/pgtable.h> 46*1da177e4SLinus Torvalds #include <asm/pgalloc.h> 47*1da177e4SLinus Torvalds #include <asm/uaccess.h> 48*1da177e4SLinus Torvalds #include <asm/mmu_context.h> 49*1da177e4SLinus Torvalds #include <asm/cacheflush.h> 50*1da177e4SLinus Torvalds #include <asm/tlbflush.h> 51*1da177e4SLinus Torvalds 52*1da177e4SLinus Torvalds /* 53*1da177e4SLinus Torvalds * Protected counters by write_lock_irq(&tasklist_lock) 54*1da177e4SLinus Torvalds */ 55*1da177e4SLinus Torvalds unsigned long total_forks; /* Handle normal Linux uptimes. */ 56*1da177e4SLinus Torvalds int nr_threads; /* The idle threads do not count.. */ 57*1da177e4SLinus Torvalds 58*1da177e4SLinus Torvalds int max_threads; /* tunable limit on nr_threads */ 59*1da177e4SLinus Torvalds 60*1da177e4SLinus Torvalds DEFINE_PER_CPU(unsigned long, process_counts) = 0; 61*1da177e4SLinus Torvalds 62*1da177e4SLinus Torvalds __cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ 63*1da177e4SLinus Torvalds 64*1da177e4SLinus Torvalds EXPORT_SYMBOL(tasklist_lock); 65*1da177e4SLinus Torvalds 66*1da177e4SLinus Torvalds int nr_processes(void) 67*1da177e4SLinus Torvalds { 68*1da177e4SLinus Torvalds int cpu; 69*1da177e4SLinus Torvalds int total = 0; 70*1da177e4SLinus Torvalds 71*1da177e4SLinus Torvalds for_each_online_cpu(cpu) 72*1da177e4SLinus Torvalds total += per_cpu(process_counts, cpu); 73*1da177e4SLinus Torvalds 74*1da177e4SLinus Torvalds return total; 75*1da177e4SLinus Torvalds } 76*1da177e4SLinus Torvalds 77*1da177e4SLinus Torvalds #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR 78*1da177e4SLinus Torvalds # define alloc_task_struct() kmem_cache_alloc(task_struct_cachep, GFP_KERNEL) 79*1da177e4SLinus Torvalds # define free_task_struct(tsk) kmem_cache_free(task_struct_cachep, (tsk)) 80*1da177e4SLinus Torvalds static kmem_cache_t *task_struct_cachep; 81*1da177e4SLinus Torvalds #endif 82*1da177e4SLinus Torvalds 83*1da177e4SLinus Torvalds /* SLAB cache for signal_struct structures (tsk->signal) */ 84*1da177e4SLinus Torvalds kmem_cache_t *signal_cachep; 85*1da177e4SLinus Torvalds 86*1da177e4SLinus Torvalds /* SLAB cache for sighand_struct structures (tsk->sighand) */ 87*1da177e4SLinus Torvalds kmem_cache_t *sighand_cachep; 88*1da177e4SLinus Torvalds 89*1da177e4SLinus Torvalds /* SLAB cache for files_struct structures (tsk->files) */ 90*1da177e4SLinus Torvalds kmem_cache_t *files_cachep; 91*1da177e4SLinus Torvalds 92*1da177e4SLinus Torvalds /* SLAB cache for fs_struct structures (tsk->fs) */ 93*1da177e4SLinus Torvalds kmem_cache_t *fs_cachep; 94*1da177e4SLinus Torvalds 95*1da177e4SLinus Torvalds /* SLAB cache for vm_area_struct structures */ 96*1da177e4SLinus Torvalds kmem_cache_t *vm_area_cachep; 97*1da177e4SLinus Torvalds 98*1da177e4SLinus Torvalds /* SLAB cache for mm_struct structures (tsk->mm) */ 99*1da177e4SLinus Torvalds static kmem_cache_t *mm_cachep; 100*1da177e4SLinus Torvalds 101*1da177e4SLinus Torvalds void free_task(struct task_struct *tsk) 102*1da177e4SLinus Torvalds { 103*1da177e4SLinus Torvalds free_thread_info(tsk->thread_info); 104*1da177e4SLinus Torvalds free_task_struct(tsk); 105*1da177e4SLinus Torvalds } 106*1da177e4SLinus Torvalds EXPORT_SYMBOL(free_task); 107*1da177e4SLinus Torvalds 108*1da177e4SLinus Torvalds void __put_task_struct(struct task_struct *tsk) 109*1da177e4SLinus Torvalds { 110*1da177e4SLinus Torvalds WARN_ON(!(tsk->exit_state & (EXIT_DEAD | EXIT_ZOMBIE))); 111*1da177e4SLinus Torvalds WARN_ON(atomic_read(&tsk->usage)); 112*1da177e4SLinus Torvalds WARN_ON(tsk == current); 113*1da177e4SLinus Torvalds 114*1da177e4SLinus Torvalds if (unlikely(tsk->audit_context)) 115*1da177e4SLinus Torvalds audit_free(tsk); 116*1da177e4SLinus Torvalds security_task_free(tsk); 117*1da177e4SLinus Torvalds free_uid(tsk->user); 118*1da177e4SLinus Torvalds put_group_info(tsk->group_info); 119*1da177e4SLinus Torvalds 120*1da177e4SLinus Torvalds if (!profile_handoff_task(tsk)) 121*1da177e4SLinus Torvalds free_task(tsk); 122*1da177e4SLinus Torvalds } 123*1da177e4SLinus Torvalds 124*1da177e4SLinus Torvalds void __init fork_init(unsigned long mempages) 125*1da177e4SLinus Torvalds { 126*1da177e4SLinus Torvalds #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR 127*1da177e4SLinus Torvalds #ifndef ARCH_MIN_TASKALIGN 128*1da177e4SLinus Torvalds #define ARCH_MIN_TASKALIGN L1_CACHE_BYTES 129*1da177e4SLinus Torvalds #endif 130*1da177e4SLinus Torvalds /* create a slab on which task_structs can be allocated */ 131*1da177e4SLinus Torvalds task_struct_cachep = 132*1da177e4SLinus Torvalds kmem_cache_create("task_struct", sizeof(struct task_struct), 133*1da177e4SLinus Torvalds ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL, NULL); 134*1da177e4SLinus Torvalds #endif 135*1da177e4SLinus Torvalds 136*1da177e4SLinus Torvalds /* 137*1da177e4SLinus Torvalds * The default maximum number of threads is set to a safe 138*1da177e4SLinus Torvalds * value: the thread structures can take up at most half 139*1da177e4SLinus Torvalds * of memory. 140*1da177e4SLinus Torvalds */ 141*1da177e4SLinus Torvalds max_threads = mempages / (8 * THREAD_SIZE / PAGE_SIZE); 142*1da177e4SLinus Torvalds 143*1da177e4SLinus Torvalds /* 144*1da177e4SLinus Torvalds * we need to allow at least 20 threads to boot a system 145*1da177e4SLinus Torvalds */ 146*1da177e4SLinus Torvalds if(max_threads < 20) 147*1da177e4SLinus Torvalds max_threads = 20; 148*1da177e4SLinus Torvalds 149*1da177e4SLinus Torvalds init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2; 150*1da177e4SLinus Torvalds init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2; 151*1da177e4SLinus Torvalds init_task.signal->rlim[RLIMIT_SIGPENDING] = 152*1da177e4SLinus Torvalds init_task.signal->rlim[RLIMIT_NPROC]; 153*1da177e4SLinus Torvalds } 154*1da177e4SLinus Torvalds 155*1da177e4SLinus Torvalds static struct task_struct *dup_task_struct(struct task_struct *orig) 156*1da177e4SLinus Torvalds { 157*1da177e4SLinus Torvalds struct task_struct *tsk; 158*1da177e4SLinus Torvalds struct thread_info *ti; 159*1da177e4SLinus Torvalds 160*1da177e4SLinus Torvalds prepare_to_copy(orig); 161*1da177e4SLinus Torvalds 162*1da177e4SLinus Torvalds tsk = alloc_task_struct(); 163*1da177e4SLinus Torvalds if (!tsk) 164*1da177e4SLinus Torvalds return NULL; 165*1da177e4SLinus Torvalds 166*1da177e4SLinus Torvalds ti = alloc_thread_info(tsk); 167*1da177e4SLinus Torvalds if (!ti) { 168*1da177e4SLinus Torvalds free_task_struct(tsk); 169*1da177e4SLinus Torvalds return NULL; 170*1da177e4SLinus Torvalds } 171*1da177e4SLinus Torvalds 172*1da177e4SLinus Torvalds *ti = *orig->thread_info; 173*1da177e4SLinus Torvalds *tsk = *orig; 174*1da177e4SLinus Torvalds tsk->thread_info = ti; 175*1da177e4SLinus Torvalds ti->task = tsk; 176*1da177e4SLinus Torvalds 177*1da177e4SLinus Torvalds /* One for us, one for whoever does the "release_task()" (usually parent) */ 178*1da177e4SLinus Torvalds atomic_set(&tsk->usage,2); 179*1da177e4SLinus Torvalds return tsk; 180*1da177e4SLinus Torvalds } 181*1da177e4SLinus Torvalds 182*1da177e4SLinus Torvalds #ifdef CONFIG_MMU 183*1da177e4SLinus Torvalds static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm) 184*1da177e4SLinus Torvalds { 185*1da177e4SLinus Torvalds struct vm_area_struct * mpnt, *tmp, **pprev; 186*1da177e4SLinus Torvalds struct rb_node **rb_link, *rb_parent; 187*1da177e4SLinus Torvalds int retval; 188*1da177e4SLinus Torvalds unsigned long charge; 189*1da177e4SLinus Torvalds struct mempolicy *pol; 190*1da177e4SLinus Torvalds 191*1da177e4SLinus Torvalds down_write(&oldmm->mmap_sem); 192*1da177e4SLinus Torvalds flush_cache_mm(current->mm); 193*1da177e4SLinus Torvalds mm->locked_vm = 0; 194*1da177e4SLinus Torvalds mm->mmap = NULL; 195*1da177e4SLinus Torvalds mm->mmap_cache = NULL; 196*1da177e4SLinus Torvalds mm->free_area_cache = oldmm->mmap_base; 197*1da177e4SLinus Torvalds mm->map_count = 0; 198*1da177e4SLinus Torvalds set_mm_counter(mm, rss, 0); 199*1da177e4SLinus Torvalds set_mm_counter(mm, anon_rss, 0); 200*1da177e4SLinus Torvalds cpus_clear(mm->cpu_vm_mask); 201*1da177e4SLinus Torvalds mm->mm_rb = RB_ROOT; 202*1da177e4SLinus Torvalds rb_link = &mm->mm_rb.rb_node; 203*1da177e4SLinus Torvalds rb_parent = NULL; 204*1da177e4SLinus Torvalds pprev = &mm->mmap; 205*1da177e4SLinus Torvalds 206*1da177e4SLinus Torvalds for (mpnt = current->mm->mmap ; mpnt ; mpnt = mpnt->vm_next) { 207*1da177e4SLinus Torvalds struct file *file; 208*1da177e4SLinus Torvalds 209*1da177e4SLinus Torvalds if (mpnt->vm_flags & VM_DONTCOPY) { 210*1da177e4SLinus Torvalds __vm_stat_account(mm, mpnt->vm_flags, mpnt->vm_file, 211*1da177e4SLinus Torvalds -vma_pages(mpnt)); 212*1da177e4SLinus Torvalds continue; 213*1da177e4SLinus Torvalds } 214*1da177e4SLinus Torvalds charge = 0; 215*1da177e4SLinus Torvalds if (mpnt->vm_flags & VM_ACCOUNT) { 216*1da177e4SLinus Torvalds unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; 217*1da177e4SLinus Torvalds if (security_vm_enough_memory(len)) 218*1da177e4SLinus Torvalds goto fail_nomem; 219*1da177e4SLinus Torvalds charge = len; 220*1da177e4SLinus Torvalds } 221*1da177e4SLinus Torvalds tmp = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); 222*1da177e4SLinus Torvalds if (!tmp) 223*1da177e4SLinus Torvalds goto fail_nomem; 224*1da177e4SLinus Torvalds *tmp = *mpnt; 225*1da177e4SLinus Torvalds pol = mpol_copy(vma_policy(mpnt)); 226*1da177e4SLinus Torvalds retval = PTR_ERR(pol); 227*1da177e4SLinus Torvalds if (IS_ERR(pol)) 228*1da177e4SLinus Torvalds goto fail_nomem_policy; 229*1da177e4SLinus Torvalds vma_set_policy(tmp, pol); 230*1da177e4SLinus Torvalds tmp->vm_flags &= ~VM_LOCKED; 231*1da177e4SLinus Torvalds tmp->vm_mm = mm; 232*1da177e4SLinus Torvalds tmp->vm_next = NULL; 233*1da177e4SLinus Torvalds anon_vma_link(tmp); 234*1da177e4SLinus Torvalds file = tmp->vm_file; 235*1da177e4SLinus Torvalds if (file) { 236*1da177e4SLinus Torvalds struct inode *inode = file->f_dentry->d_inode; 237*1da177e4SLinus Torvalds get_file(file); 238*1da177e4SLinus Torvalds if (tmp->vm_flags & VM_DENYWRITE) 239*1da177e4SLinus Torvalds atomic_dec(&inode->i_writecount); 240*1da177e4SLinus Torvalds 241*1da177e4SLinus Torvalds /* insert tmp into the share list, just after mpnt */ 242*1da177e4SLinus Torvalds spin_lock(&file->f_mapping->i_mmap_lock); 243*1da177e4SLinus Torvalds tmp->vm_truncate_count = mpnt->vm_truncate_count; 244*1da177e4SLinus Torvalds flush_dcache_mmap_lock(file->f_mapping); 245*1da177e4SLinus Torvalds vma_prio_tree_add(tmp, mpnt); 246*1da177e4SLinus Torvalds flush_dcache_mmap_unlock(file->f_mapping); 247*1da177e4SLinus Torvalds spin_unlock(&file->f_mapping->i_mmap_lock); 248*1da177e4SLinus Torvalds } 249*1da177e4SLinus Torvalds 250*1da177e4SLinus Torvalds /* 251*1da177e4SLinus Torvalds * Link in the new vma and copy the page table entries: 252*1da177e4SLinus Torvalds * link in first so that swapoff can see swap entries, 253*1da177e4SLinus Torvalds * and try_to_unmap_one's find_vma find the new vma. 254*1da177e4SLinus Torvalds */ 255*1da177e4SLinus Torvalds spin_lock(&mm->page_table_lock); 256*1da177e4SLinus Torvalds *pprev = tmp; 257*1da177e4SLinus Torvalds pprev = &tmp->vm_next; 258*1da177e4SLinus Torvalds 259*1da177e4SLinus Torvalds __vma_link_rb(mm, tmp, rb_link, rb_parent); 260*1da177e4SLinus Torvalds rb_link = &tmp->vm_rb.rb_right; 261*1da177e4SLinus Torvalds rb_parent = &tmp->vm_rb; 262*1da177e4SLinus Torvalds 263*1da177e4SLinus Torvalds mm->map_count++; 264*1da177e4SLinus Torvalds retval = copy_page_range(mm, current->mm, tmp); 265*1da177e4SLinus Torvalds spin_unlock(&mm->page_table_lock); 266*1da177e4SLinus Torvalds 267*1da177e4SLinus Torvalds if (tmp->vm_ops && tmp->vm_ops->open) 268*1da177e4SLinus Torvalds tmp->vm_ops->open(tmp); 269*1da177e4SLinus Torvalds 270*1da177e4SLinus Torvalds if (retval) 271*1da177e4SLinus Torvalds goto out; 272*1da177e4SLinus Torvalds } 273*1da177e4SLinus Torvalds retval = 0; 274*1da177e4SLinus Torvalds 275*1da177e4SLinus Torvalds out: 276*1da177e4SLinus Torvalds flush_tlb_mm(current->mm); 277*1da177e4SLinus Torvalds up_write(&oldmm->mmap_sem); 278*1da177e4SLinus Torvalds return retval; 279*1da177e4SLinus Torvalds fail_nomem_policy: 280*1da177e4SLinus Torvalds kmem_cache_free(vm_area_cachep, tmp); 281*1da177e4SLinus Torvalds fail_nomem: 282*1da177e4SLinus Torvalds retval = -ENOMEM; 283*1da177e4SLinus Torvalds vm_unacct_memory(charge); 284*1da177e4SLinus Torvalds goto out; 285*1da177e4SLinus Torvalds } 286*1da177e4SLinus Torvalds 287*1da177e4SLinus Torvalds static inline int mm_alloc_pgd(struct mm_struct * mm) 288*1da177e4SLinus Torvalds { 289*1da177e4SLinus Torvalds mm->pgd = pgd_alloc(mm); 290*1da177e4SLinus Torvalds if (unlikely(!mm->pgd)) 291*1da177e4SLinus Torvalds return -ENOMEM; 292*1da177e4SLinus Torvalds return 0; 293*1da177e4SLinus Torvalds } 294*1da177e4SLinus Torvalds 295*1da177e4SLinus Torvalds static inline void mm_free_pgd(struct mm_struct * mm) 296*1da177e4SLinus Torvalds { 297*1da177e4SLinus Torvalds pgd_free(mm->pgd); 298*1da177e4SLinus Torvalds } 299*1da177e4SLinus Torvalds #else 300*1da177e4SLinus Torvalds #define dup_mmap(mm, oldmm) (0) 301*1da177e4SLinus Torvalds #define mm_alloc_pgd(mm) (0) 302*1da177e4SLinus Torvalds #define mm_free_pgd(mm) 303*1da177e4SLinus Torvalds #endif /* CONFIG_MMU */ 304*1da177e4SLinus Torvalds 305*1da177e4SLinus Torvalds __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); 306*1da177e4SLinus Torvalds 307*1da177e4SLinus Torvalds #define allocate_mm() (kmem_cache_alloc(mm_cachep, SLAB_KERNEL)) 308*1da177e4SLinus Torvalds #define free_mm(mm) (kmem_cache_free(mm_cachep, (mm))) 309*1da177e4SLinus Torvalds 310*1da177e4SLinus Torvalds #include <linux/init_task.h> 311*1da177e4SLinus Torvalds 312*1da177e4SLinus Torvalds static struct mm_struct * mm_init(struct mm_struct * mm) 313*1da177e4SLinus Torvalds { 314*1da177e4SLinus Torvalds atomic_set(&mm->mm_users, 1); 315*1da177e4SLinus Torvalds atomic_set(&mm->mm_count, 1); 316*1da177e4SLinus Torvalds init_rwsem(&mm->mmap_sem); 317*1da177e4SLinus Torvalds INIT_LIST_HEAD(&mm->mmlist); 318*1da177e4SLinus Torvalds mm->core_waiters = 0; 319*1da177e4SLinus Torvalds mm->nr_ptes = 0; 320*1da177e4SLinus Torvalds spin_lock_init(&mm->page_table_lock); 321*1da177e4SLinus Torvalds rwlock_init(&mm->ioctx_list_lock); 322*1da177e4SLinus Torvalds mm->ioctx_list = NULL; 323*1da177e4SLinus Torvalds mm->default_kioctx = (struct kioctx)INIT_KIOCTX(mm->default_kioctx, *mm); 324*1da177e4SLinus Torvalds mm->free_area_cache = TASK_UNMAPPED_BASE; 325*1da177e4SLinus Torvalds 326*1da177e4SLinus Torvalds if (likely(!mm_alloc_pgd(mm))) { 327*1da177e4SLinus Torvalds mm->def_flags = 0; 328*1da177e4SLinus Torvalds return mm; 329*1da177e4SLinus Torvalds } 330*1da177e4SLinus Torvalds free_mm(mm); 331*1da177e4SLinus Torvalds return NULL; 332*1da177e4SLinus Torvalds } 333*1da177e4SLinus Torvalds 334*1da177e4SLinus Torvalds /* 335*1da177e4SLinus Torvalds * Allocate and initialize an mm_struct. 336*1da177e4SLinus Torvalds */ 337*1da177e4SLinus Torvalds struct mm_struct * mm_alloc(void) 338*1da177e4SLinus Torvalds { 339*1da177e4SLinus Torvalds struct mm_struct * mm; 340*1da177e4SLinus Torvalds 341*1da177e4SLinus Torvalds mm = allocate_mm(); 342*1da177e4SLinus Torvalds if (mm) { 343*1da177e4SLinus Torvalds memset(mm, 0, sizeof(*mm)); 344*1da177e4SLinus Torvalds mm = mm_init(mm); 345*1da177e4SLinus Torvalds } 346*1da177e4SLinus Torvalds return mm; 347*1da177e4SLinus Torvalds } 348*1da177e4SLinus Torvalds 349*1da177e4SLinus Torvalds /* 350*1da177e4SLinus Torvalds * Called when the last reference to the mm 351*1da177e4SLinus Torvalds * is dropped: either by a lazy thread or by 352*1da177e4SLinus Torvalds * mmput. Free the page directory and the mm. 353*1da177e4SLinus Torvalds */ 354*1da177e4SLinus Torvalds void fastcall __mmdrop(struct mm_struct *mm) 355*1da177e4SLinus Torvalds { 356*1da177e4SLinus Torvalds BUG_ON(mm == &init_mm); 357*1da177e4SLinus Torvalds mm_free_pgd(mm); 358*1da177e4SLinus Torvalds destroy_context(mm); 359*1da177e4SLinus Torvalds free_mm(mm); 360*1da177e4SLinus Torvalds } 361*1da177e4SLinus Torvalds 362*1da177e4SLinus Torvalds /* 363*1da177e4SLinus Torvalds * Decrement the use count and release all resources for an mm. 364*1da177e4SLinus Torvalds */ 365*1da177e4SLinus Torvalds void mmput(struct mm_struct *mm) 366*1da177e4SLinus Torvalds { 367*1da177e4SLinus Torvalds if (atomic_dec_and_test(&mm->mm_users)) { 368*1da177e4SLinus Torvalds exit_aio(mm); 369*1da177e4SLinus Torvalds exit_mmap(mm); 370*1da177e4SLinus Torvalds if (!list_empty(&mm->mmlist)) { 371*1da177e4SLinus Torvalds spin_lock(&mmlist_lock); 372*1da177e4SLinus Torvalds list_del(&mm->mmlist); 373*1da177e4SLinus Torvalds spin_unlock(&mmlist_lock); 374*1da177e4SLinus Torvalds } 375*1da177e4SLinus Torvalds put_swap_token(mm); 376*1da177e4SLinus Torvalds mmdrop(mm); 377*1da177e4SLinus Torvalds } 378*1da177e4SLinus Torvalds } 379*1da177e4SLinus Torvalds EXPORT_SYMBOL_GPL(mmput); 380*1da177e4SLinus Torvalds 381*1da177e4SLinus Torvalds /** 382*1da177e4SLinus Torvalds * get_task_mm - acquire a reference to the task's mm 383*1da177e4SLinus Torvalds * 384*1da177e4SLinus Torvalds * Returns %NULL if the task has no mm. Checks PF_BORROWED_MM (meaning 385*1da177e4SLinus Torvalds * this kernel workthread has transiently adopted a user mm with use_mm, 386*1da177e4SLinus Torvalds * to do its AIO) is not set and if so returns a reference to it, after 387*1da177e4SLinus Torvalds * bumping up the use count. User must release the mm via mmput() 388*1da177e4SLinus Torvalds * after use. Typically used by /proc and ptrace. 389*1da177e4SLinus Torvalds */ 390*1da177e4SLinus Torvalds struct mm_struct *get_task_mm(struct task_struct *task) 391*1da177e4SLinus Torvalds { 392*1da177e4SLinus Torvalds struct mm_struct *mm; 393*1da177e4SLinus Torvalds 394*1da177e4SLinus Torvalds task_lock(task); 395*1da177e4SLinus Torvalds mm = task->mm; 396*1da177e4SLinus Torvalds if (mm) { 397*1da177e4SLinus Torvalds if (task->flags & PF_BORROWED_MM) 398*1da177e4SLinus Torvalds mm = NULL; 399*1da177e4SLinus Torvalds else 400*1da177e4SLinus Torvalds atomic_inc(&mm->mm_users); 401*1da177e4SLinus Torvalds } 402*1da177e4SLinus Torvalds task_unlock(task); 403*1da177e4SLinus Torvalds return mm; 404*1da177e4SLinus Torvalds } 405*1da177e4SLinus Torvalds EXPORT_SYMBOL_GPL(get_task_mm); 406*1da177e4SLinus Torvalds 407*1da177e4SLinus Torvalds /* Please note the differences between mmput and mm_release. 408*1da177e4SLinus Torvalds * mmput is called whenever we stop holding onto a mm_struct, 409*1da177e4SLinus Torvalds * error success whatever. 410*1da177e4SLinus Torvalds * 411*1da177e4SLinus Torvalds * mm_release is called after a mm_struct has been removed 412*1da177e4SLinus Torvalds * from the current process. 413*1da177e4SLinus Torvalds * 414*1da177e4SLinus Torvalds * This difference is important for error handling, when we 415*1da177e4SLinus Torvalds * only half set up a mm_struct for a new process and need to restore 416*1da177e4SLinus Torvalds * the old one. Because we mmput the new mm_struct before 417*1da177e4SLinus Torvalds * restoring the old one. . . 418*1da177e4SLinus Torvalds * Eric Biederman 10 January 1998 419*1da177e4SLinus Torvalds */ 420*1da177e4SLinus Torvalds void mm_release(struct task_struct *tsk, struct mm_struct *mm) 421*1da177e4SLinus Torvalds { 422*1da177e4SLinus Torvalds struct completion *vfork_done = tsk->vfork_done; 423*1da177e4SLinus Torvalds 424*1da177e4SLinus Torvalds /* Get rid of any cached register state */ 425*1da177e4SLinus Torvalds deactivate_mm(tsk, mm); 426*1da177e4SLinus Torvalds 427*1da177e4SLinus Torvalds /* notify parent sleeping on vfork() */ 428*1da177e4SLinus Torvalds if (vfork_done) { 429*1da177e4SLinus Torvalds tsk->vfork_done = NULL; 430*1da177e4SLinus Torvalds complete(vfork_done); 431*1da177e4SLinus Torvalds } 432*1da177e4SLinus Torvalds if (tsk->clear_child_tid && atomic_read(&mm->mm_users) > 1) { 433*1da177e4SLinus Torvalds u32 __user * tidptr = tsk->clear_child_tid; 434*1da177e4SLinus Torvalds tsk->clear_child_tid = NULL; 435*1da177e4SLinus Torvalds 436*1da177e4SLinus Torvalds /* 437*1da177e4SLinus Torvalds * We don't check the error code - if userspace has 438*1da177e4SLinus Torvalds * not set up a proper pointer then tough luck. 439*1da177e4SLinus Torvalds */ 440*1da177e4SLinus Torvalds put_user(0, tidptr); 441*1da177e4SLinus Torvalds sys_futex(tidptr, FUTEX_WAKE, 1, NULL, NULL, 0); 442*1da177e4SLinus Torvalds } 443*1da177e4SLinus Torvalds } 444*1da177e4SLinus Torvalds 445*1da177e4SLinus Torvalds static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) 446*1da177e4SLinus Torvalds { 447*1da177e4SLinus Torvalds struct mm_struct * mm, *oldmm; 448*1da177e4SLinus Torvalds int retval; 449*1da177e4SLinus Torvalds 450*1da177e4SLinus Torvalds tsk->min_flt = tsk->maj_flt = 0; 451*1da177e4SLinus Torvalds tsk->nvcsw = tsk->nivcsw = 0; 452*1da177e4SLinus Torvalds 453*1da177e4SLinus Torvalds tsk->mm = NULL; 454*1da177e4SLinus Torvalds tsk->active_mm = NULL; 455*1da177e4SLinus Torvalds 456*1da177e4SLinus Torvalds /* 457*1da177e4SLinus Torvalds * Are we cloning a kernel thread? 458*1da177e4SLinus Torvalds * 459*1da177e4SLinus Torvalds * We need to steal a active VM for that.. 460*1da177e4SLinus Torvalds */ 461*1da177e4SLinus Torvalds oldmm = current->mm; 462*1da177e4SLinus Torvalds if (!oldmm) 463*1da177e4SLinus Torvalds return 0; 464*1da177e4SLinus Torvalds 465*1da177e4SLinus Torvalds if (clone_flags & CLONE_VM) { 466*1da177e4SLinus Torvalds atomic_inc(&oldmm->mm_users); 467*1da177e4SLinus Torvalds mm = oldmm; 468*1da177e4SLinus Torvalds /* 469*1da177e4SLinus Torvalds * There are cases where the PTL is held to ensure no 470*1da177e4SLinus Torvalds * new threads start up in user mode using an mm, which 471*1da177e4SLinus Torvalds * allows optimizing out ipis; the tlb_gather_mmu code 472*1da177e4SLinus Torvalds * is an example. 473*1da177e4SLinus Torvalds */ 474*1da177e4SLinus Torvalds spin_unlock_wait(&oldmm->page_table_lock); 475*1da177e4SLinus Torvalds goto good_mm; 476*1da177e4SLinus Torvalds } 477*1da177e4SLinus Torvalds 478*1da177e4SLinus Torvalds retval = -ENOMEM; 479*1da177e4SLinus Torvalds mm = allocate_mm(); 480*1da177e4SLinus Torvalds if (!mm) 481*1da177e4SLinus Torvalds goto fail_nomem; 482*1da177e4SLinus Torvalds 483*1da177e4SLinus Torvalds /* Copy the current MM stuff.. */ 484*1da177e4SLinus Torvalds memcpy(mm, oldmm, sizeof(*mm)); 485*1da177e4SLinus Torvalds if (!mm_init(mm)) 486*1da177e4SLinus Torvalds goto fail_nomem; 487*1da177e4SLinus Torvalds 488*1da177e4SLinus Torvalds if (init_new_context(tsk,mm)) 489*1da177e4SLinus Torvalds goto fail_nocontext; 490*1da177e4SLinus Torvalds 491*1da177e4SLinus Torvalds retval = dup_mmap(mm, oldmm); 492*1da177e4SLinus Torvalds if (retval) 493*1da177e4SLinus Torvalds goto free_pt; 494*1da177e4SLinus Torvalds 495*1da177e4SLinus Torvalds mm->hiwater_rss = get_mm_counter(mm,rss); 496*1da177e4SLinus Torvalds mm->hiwater_vm = mm->total_vm; 497*1da177e4SLinus Torvalds 498*1da177e4SLinus Torvalds good_mm: 499*1da177e4SLinus Torvalds tsk->mm = mm; 500*1da177e4SLinus Torvalds tsk->active_mm = mm; 501*1da177e4SLinus Torvalds return 0; 502*1da177e4SLinus Torvalds 503*1da177e4SLinus Torvalds free_pt: 504*1da177e4SLinus Torvalds mmput(mm); 505*1da177e4SLinus Torvalds fail_nomem: 506*1da177e4SLinus Torvalds return retval; 507*1da177e4SLinus Torvalds 508*1da177e4SLinus Torvalds fail_nocontext: 509*1da177e4SLinus Torvalds /* 510*1da177e4SLinus Torvalds * If init_new_context() failed, we cannot use mmput() to free the mm 511*1da177e4SLinus Torvalds * because it calls destroy_context() 512*1da177e4SLinus Torvalds */ 513*1da177e4SLinus Torvalds mm_free_pgd(mm); 514*1da177e4SLinus Torvalds free_mm(mm); 515*1da177e4SLinus Torvalds return retval; 516*1da177e4SLinus Torvalds } 517*1da177e4SLinus Torvalds 518*1da177e4SLinus Torvalds static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old) 519*1da177e4SLinus Torvalds { 520*1da177e4SLinus Torvalds struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL); 521*1da177e4SLinus Torvalds /* We don't need to lock fs - think why ;-) */ 522*1da177e4SLinus Torvalds if (fs) { 523*1da177e4SLinus Torvalds atomic_set(&fs->count, 1); 524*1da177e4SLinus Torvalds rwlock_init(&fs->lock); 525*1da177e4SLinus Torvalds fs->umask = old->umask; 526*1da177e4SLinus Torvalds read_lock(&old->lock); 527*1da177e4SLinus Torvalds fs->rootmnt = mntget(old->rootmnt); 528*1da177e4SLinus Torvalds fs->root = dget(old->root); 529*1da177e4SLinus Torvalds fs->pwdmnt = mntget(old->pwdmnt); 530*1da177e4SLinus Torvalds fs->pwd = dget(old->pwd); 531*1da177e4SLinus Torvalds if (old->altroot) { 532*1da177e4SLinus Torvalds fs->altrootmnt = mntget(old->altrootmnt); 533*1da177e4SLinus Torvalds fs->altroot = dget(old->altroot); 534*1da177e4SLinus Torvalds } else { 535*1da177e4SLinus Torvalds fs->altrootmnt = NULL; 536*1da177e4SLinus Torvalds fs->altroot = NULL; 537*1da177e4SLinus Torvalds } 538*1da177e4SLinus Torvalds read_unlock(&old->lock); 539*1da177e4SLinus Torvalds } 540*1da177e4SLinus Torvalds return fs; 541*1da177e4SLinus Torvalds } 542*1da177e4SLinus Torvalds 543*1da177e4SLinus Torvalds struct fs_struct *copy_fs_struct(struct fs_struct *old) 544*1da177e4SLinus Torvalds { 545*1da177e4SLinus Torvalds return __copy_fs_struct(old); 546*1da177e4SLinus Torvalds } 547*1da177e4SLinus Torvalds 548*1da177e4SLinus Torvalds EXPORT_SYMBOL_GPL(copy_fs_struct); 549*1da177e4SLinus Torvalds 550*1da177e4SLinus Torvalds static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk) 551*1da177e4SLinus Torvalds { 552*1da177e4SLinus Torvalds if (clone_flags & CLONE_FS) { 553*1da177e4SLinus Torvalds atomic_inc(¤t->fs->count); 554*1da177e4SLinus Torvalds return 0; 555*1da177e4SLinus Torvalds } 556*1da177e4SLinus Torvalds tsk->fs = __copy_fs_struct(current->fs); 557*1da177e4SLinus Torvalds if (!tsk->fs) 558*1da177e4SLinus Torvalds return -ENOMEM; 559*1da177e4SLinus Torvalds return 0; 560*1da177e4SLinus Torvalds } 561*1da177e4SLinus Torvalds 562*1da177e4SLinus Torvalds static int count_open_files(struct files_struct *files, int size) 563*1da177e4SLinus Torvalds { 564*1da177e4SLinus Torvalds int i; 565*1da177e4SLinus Torvalds 566*1da177e4SLinus Torvalds /* Find the last open fd */ 567*1da177e4SLinus Torvalds for (i = size/(8*sizeof(long)); i > 0; ) { 568*1da177e4SLinus Torvalds if (files->open_fds->fds_bits[--i]) 569*1da177e4SLinus Torvalds break; 570*1da177e4SLinus Torvalds } 571*1da177e4SLinus Torvalds i = (i+1) * 8 * sizeof(long); 572*1da177e4SLinus Torvalds return i; 573*1da177e4SLinus Torvalds } 574*1da177e4SLinus Torvalds 575*1da177e4SLinus Torvalds static int copy_files(unsigned long clone_flags, struct task_struct * tsk) 576*1da177e4SLinus Torvalds { 577*1da177e4SLinus Torvalds struct files_struct *oldf, *newf; 578*1da177e4SLinus Torvalds struct file **old_fds, **new_fds; 579*1da177e4SLinus Torvalds int open_files, size, i, error = 0, expand; 580*1da177e4SLinus Torvalds 581*1da177e4SLinus Torvalds /* 582*1da177e4SLinus Torvalds * A background process may not have any files ... 583*1da177e4SLinus Torvalds */ 584*1da177e4SLinus Torvalds oldf = current->files; 585*1da177e4SLinus Torvalds if (!oldf) 586*1da177e4SLinus Torvalds goto out; 587*1da177e4SLinus Torvalds 588*1da177e4SLinus Torvalds if (clone_flags & CLONE_FILES) { 589*1da177e4SLinus Torvalds atomic_inc(&oldf->count); 590*1da177e4SLinus Torvalds goto out; 591*1da177e4SLinus Torvalds } 592*1da177e4SLinus Torvalds 593*1da177e4SLinus Torvalds /* 594*1da177e4SLinus Torvalds * Note: we may be using current for both targets (See exec.c) 595*1da177e4SLinus Torvalds * This works because we cache current->files (old) as oldf. Don't 596*1da177e4SLinus Torvalds * break this. 597*1da177e4SLinus Torvalds */ 598*1da177e4SLinus Torvalds tsk->files = NULL; 599*1da177e4SLinus Torvalds error = -ENOMEM; 600*1da177e4SLinus Torvalds newf = kmem_cache_alloc(files_cachep, SLAB_KERNEL); 601*1da177e4SLinus Torvalds if (!newf) 602*1da177e4SLinus Torvalds goto out; 603*1da177e4SLinus Torvalds 604*1da177e4SLinus Torvalds atomic_set(&newf->count, 1); 605*1da177e4SLinus Torvalds 606*1da177e4SLinus Torvalds spin_lock_init(&newf->file_lock); 607*1da177e4SLinus Torvalds newf->next_fd = 0; 608*1da177e4SLinus Torvalds newf->max_fds = NR_OPEN_DEFAULT; 609*1da177e4SLinus Torvalds newf->max_fdset = __FD_SETSIZE; 610*1da177e4SLinus Torvalds newf->close_on_exec = &newf->close_on_exec_init; 611*1da177e4SLinus Torvalds newf->open_fds = &newf->open_fds_init; 612*1da177e4SLinus Torvalds newf->fd = &newf->fd_array[0]; 613*1da177e4SLinus Torvalds 614*1da177e4SLinus Torvalds spin_lock(&oldf->file_lock); 615*1da177e4SLinus Torvalds 616*1da177e4SLinus Torvalds open_files = count_open_files(oldf, oldf->max_fdset); 617*1da177e4SLinus Torvalds expand = 0; 618*1da177e4SLinus Torvalds 619*1da177e4SLinus Torvalds /* 620*1da177e4SLinus Torvalds * Check whether we need to allocate a larger fd array or fd set. 621*1da177e4SLinus Torvalds * Note: we're not a clone task, so the open count won't change. 622*1da177e4SLinus Torvalds */ 623*1da177e4SLinus Torvalds if (open_files > newf->max_fdset) { 624*1da177e4SLinus Torvalds newf->max_fdset = 0; 625*1da177e4SLinus Torvalds expand = 1; 626*1da177e4SLinus Torvalds } 627*1da177e4SLinus Torvalds if (open_files > newf->max_fds) { 628*1da177e4SLinus Torvalds newf->max_fds = 0; 629*1da177e4SLinus Torvalds expand = 1; 630*1da177e4SLinus Torvalds } 631*1da177e4SLinus Torvalds 632*1da177e4SLinus Torvalds /* if the old fdset gets grown now, we'll only copy up to "size" fds */ 633*1da177e4SLinus Torvalds if (expand) { 634*1da177e4SLinus Torvalds spin_unlock(&oldf->file_lock); 635*1da177e4SLinus Torvalds spin_lock(&newf->file_lock); 636*1da177e4SLinus Torvalds error = expand_files(newf, open_files-1); 637*1da177e4SLinus Torvalds spin_unlock(&newf->file_lock); 638*1da177e4SLinus Torvalds if (error < 0) 639*1da177e4SLinus Torvalds goto out_release; 640*1da177e4SLinus Torvalds spin_lock(&oldf->file_lock); 641*1da177e4SLinus Torvalds } 642*1da177e4SLinus Torvalds 643*1da177e4SLinus Torvalds old_fds = oldf->fd; 644*1da177e4SLinus Torvalds new_fds = newf->fd; 645*1da177e4SLinus Torvalds 646*1da177e4SLinus Torvalds memcpy(newf->open_fds->fds_bits, oldf->open_fds->fds_bits, open_files/8); 647*1da177e4SLinus Torvalds memcpy(newf->close_on_exec->fds_bits, oldf->close_on_exec->fds_bits, open_files/8); 648*1da177e4SLinus Torvalds 649*1da177e4SLinus Torvalds for (i = open_files; i != 0; i--) { 650*1da177e4SLinus Torvalds struct file *f = *old_fds++; 651*1da177e4SLinus Torvalds if (f) { 652*1da177e4SLinus Torvalds get_file(f); 653*1da177e4SLinus Torvalds } else { 654*1da177e4SLinus Torvalds /* 655*1da177e4SLinus Torvalds * The fd may be claimed in the fd bitmap but not yet 656*1da177e4SLinus Torvalds * instantiated in the files array if a sibling thread 657*1da177e4SLinus Torvalds * is partway through open(). So make sure that this 658*1da177e4SLinus Torvalds * fd is available to the new process. 659*1da177e4SLinus Torvalds */ 660*1da177e4SLinus Torvalds FD_CLR(open_files - i, newf->open_fds); 661*1da177e4SLinus Torvalds } 662*1da177e4SLinus Torvalds *new_fds++ = f; 663*1da177e4SLinus Torvalds } 664*1da177e4SLinus Torvalds spin_unlock(&oldf->file_lock); 665*1da177e4SLinus Torvalds 666*1da177e4SLinus Torvalds /* compute the remainder to be cleared */ 667*1da177e4SLinus Torvalds size = (newf->max_fds - open_files) * sizeof(struct file *); 668*1da177e4SLinus Torvalds 669*1da177e4SLinus Torvalds /* This is long word aligned thus could use a optimized version */ 670*1da177e4SLinus Torvalds memset(new_fds, 0, size); 671*1da177e4SLinus Torvalds 672*1da177e4SLinus Torvalds if (newf->max_fdset > open_files) { 673*1da177e4SLinus Torvalds int left = (newf->max_fdset-open_files)/8; 674*1da177e4SLinus Torvalds int start = open_files / (8 * sizeof(unsigned long)); 675*1da177e4SLinus Torvalds 676*1da177e4SLinus Torvalds memset(&newf->open_fds->fds_bits[start], 0, left); 677*1da177e4SLinus Torvalds memset(&newf->close_on_exec->fds_bits[start], 0, left); 678*1da177e4SLinus Torvalds } 679*1da177e4SLinus Torvalds 680*1da177e4SLinus Torvalds tsk->files = newf; 681*1da177e4SLinus Torvalds error = 0; 682*1da177e4SLinus Torvalds out: 683*1da177e4SLinus Torvalds return error; 684*1da177e4SLinus Torvalds 685*1da177e4SLinus Torvalds out_release: 686*1da177e4SLinus Torvalds free_fdset (newf->close_on_exec, newf->max_fdset); 687*1da177e4SLinus Torvalds free_fdset (newf->open_fds, newf->max_fdset); 688*1da177e4SLinus Torvalds free_fd_array(newf->fd, newf->max_fds); 689*1da177e4SLinus Torvalds kmem_cache_free(files_cachep, newf); 690*1da177e4SLinus Torvalds goto out; 691*1da177e4SLinus Torvalds } 692*1da177e4SLinus Torvalds 693*1da177e4SLinus Torvalds /* 694*1da177e4SLinus Torvalds * Helper to unshare the files of the current task. 695*1da177e4SLinus Torvalds * We don't want to expose copy_files internals to 696*1da177e4SLinus Torvalds * the exec layer of the kernel. 697*1da177e4SLinus Torvalds */ 698*1da177e4SLinus Torvalds 699*1da177e4SLinus Torvalds int unshare_files(void) 700*1da177e4SLinus Torvalds { 701*1da177e4SLinus Torvalds struct files_struct *files = current->files; 702*1da177e4SLinus Torvalds int rc; 703*1da177e4SLinus Torvalds 704*1da177e4SLinus Torvalds if(!files) 705*1da177e4SLinus Torvalds BUG(); 706*1da177e4SLinus Torvalds 707*1da177e4SLinus Torvalds /* This can race but the race causes us to copy when we don't 708*1da177e4SLinus Torvalds need to and drop the copy */ 709*1da177e4SLinus Torvalds if(atomic_read(&files->count) == 1) 710*1da177e4SLinus Torvalds { 711*1da177e4SLinus Torvalds atomic_inc(&files->count); 712*1da177e4SLinus Torvalds return 0; 713*1da177e4SLinus Torvalds } 714*1da177e4SLinus Torvalds rc = copy_files(0, current); 715*1da177e4SLinus Torvalds if(rc) 716*1da177e4SLinus Torvalds current->files = files; 717*1da177e4SLinus Torvalds return rc; 718*1da177e4SLinus Torvalds } 719*1da177e4SLinus Torvalds 720*1da177e4SLinus Torvalds EXPORT_SYMBOL(unshare_files); 721*1da177e4SLinus Torvalds 722*1da177e4SLinus Torvalds static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk) 723*1da177e4SLinus Torvalds { 724*1da177e4SLinus Torvalds struct sighand_struct *sig; 725*1da177e4SLinus Torvalds 726*1da177e4SLinus Torvalds if (clone_flags & (CLONE_SIGHAND | CLONE_THREAD)) { 727*1da177e4SLinus Torvalds atomic_inc(¤t->sighand->count); 728*1da177e4SLinus Torvalds return 0; 729*1da177e4SLinus Torvalds } 730*1da177e4SLinus Torvalds sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL); 731*1da177e4SLinus Torvalds tsk->sighand = sig; 732*1da177e4SLinus Torvalds if (!sig) 733*1da177e4SLinus Torvalds return -ENOMEM; 734*1da177e4SLinus Torvalds spin_lock_init(&sig->siglock); 735*1da177e4SLinus Torvalds atomic_set(&sig->count, 1); 736*1da177e4SLinus Torvalds memcpy(sig->action, current->sighand->action, sizeof(sig->action)); 737*1da177e4SLinus Torvalds return 0; 738*1da177e4SLinus Torvalds } 739*1da177e4SLinus Torvalds 740*1da177e4SLinus Torvalds static inline int copy_signal(unsigned long clone_flags, struct task_struct * tsk) 741*1da177e4SLinus Torvalds { 742*1da177e4SLinus Torvalds struct signal_struct *sig; 743*1da177e4SLinus Torvalds int ret; 744*1da177e4SLinus Torvalds 745*1da177e4SLinus Torvalds if (clone_flags & CLONE_THREAD) { 746*1da177e4SLinus Torvalds atomic_inc(¤t->signal->count); 747*1da177e4SLinus Torvalds atomic_inc(¤t->signal->live); 748*1da177e4SLinus Torvalds return 0; 749*1da177e4SLinus Torvalds } 750*1da177e4SLinus Torvalds sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); 751*1da177e4SLinus Torvalds tsk->signal = sig; 752*1da177e4SLinus Torvalds if (!sig) 753*1da177e4SLinus Torvalds return -ENOMEM; 754*1da177e4SLinus Torvalds 755*1da177e4SLinus Torvalds ret = copy_thread_group_keys(tsk); 756*1da177e4SLinus Torvalds if (ret < 0) { 757*1da177e4SLinus Torvalds kmem_cache_free(signal_cachep, sig); 758*1da177e4SLinus Torvalds return ret; 759*1da177e4SLinus Torvalds } 760*1da177e4SLinus Torvalds 761*1da177e4SLinus Torvalds atomic_set(&sig->count, 1); 762*1da177e4SLinus Torvalds atomic_set(&sig->live, 1); 763*1da177e4SLinus Torvalds init_waitqueue_head(&sig->wait_chldexit); 764*1da177e4SLinus Torvalds sig->flags = 0; 765*1da177e4SLinus Torvalds sig->group_exit_code = 0; 766*1da177e4SLinus Torvalds sig->group_exit_task = NULL; 767*1da177e4SLinus Torvalds sig->group_stop_count = 0; 768*1da177e4SLinus Torvalds sig->curr_target = NULL; 769*1da177e4SLinus Torvalds init_sigpending(&sig->shared_pending); 770*1da177e4SLinus Torvalds INIT_LIST_HEAD(&sig->posix_timers); 771*1da177e4SLinus Torvalds 772*1da177e4SLinus Torvalds sig->it_real_value = sig->it_real_incr = 0; 773*1da177e4SLinus Torvalds sig->real_timer.function = it_real_fn; 774*1da177e4SLinus Torvalds sig->real_timer.data = (unsigned long) tsk; 775*1da177e4SLinus Torvalds init_timer(&sig->real_timer); 776*1da177e4SLinus Torvalds 777*1da177e4SLinus Torvalds sig->it_virt_expires = cputime_zero; 778*1da177e4SLinus Torvalds sig->it_virt_incr = cputime_zero; 779*1da177e4SLinus Torvalds sig->it_prof_expires = cputime_zero; 780*1da177e4SLinus Torvalds sig->it_prof_incr = cputime_zero; 781*1da177e4SLinus Torvalds 782*1da177e4SLinus Torvalds sig->tty = current->signal->tty; 783*1da177e4SLinus Torvalds sig->pgrp = process_group(current); 784*1da177e4SLinus Torvalds sig->session = current->signal->session; 785*1da177e4SLinus Torvalds sig->leader = 0; /* session leadership doesn't inherit */ 786*1da177e4SLinus Torvalds sig->tty_old_pgrp = 0; 787*1da177e4SLinus Torvalds 788*1da177e4SLinus Torvalds sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero; 789*1da177e4SLinus Torvalds sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; 790*1da177e4SLinus Torvalds sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; 791*1da177e4SLinus Torvalds sig->sched_time = 0; 792*1da177e4SLinus Torvalds INIT_LIST_HEAD(&sig->cpu_timers[0]); 793*1da177e4SLinus Torvalds INIT_LIST_HEAD(&sig->cpu_timers[1]); 794*1da177e4SLinus Torvalds INIT_LIST_HEAD(&sig->cpu_timers[2]); 795*1da177e4SLinus Torvalds 796*1da177e4SLinus Torvalds task_lock(current->group_leader); 797*1da177e4SLinus Torvalds memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); 798*1da177e4SLinus Torvalds task_unlock(current->group_leader); 799*1da177e4SLinus Torvalds 800*1da177e4SLinus Torvalds if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) { 801*1da177e4SLinus Torvalds /* 802*1da177e4SLinus Torvalds * New sole thread in the process gets an expiry time 803*1da177e4SLinus Torvalds * of the whole CPU time limit. 804*1da177e4SLinus Torvalds */ 805*1da177e4SLinus Torvalds tsk->it_prof_expires = 806*1da177e4SLinus Torvalds secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur); 807*1da177e4SLinus Torvalds } 808*1da177e4SLinus Torvalds 809*1da177e4SLinus Torvalds return 0; 810*1da177e4SLinus Torvalds } 811*1da177e4SLinus Torvalds 812*1da177e4SLinus Torvalds static inline void copy_flags(unsigned long clone_flags, struct task_struct *p) 813*1da177e4SLinus Torvalds { 814*1da177e4SLinus Torvalds unsigned long new_flags = p->flags; 815*1da177e4SLinus Torvalds 816*1da177e4SLinus Torvalds new_flags &= ~PF_SUPERPRIV; 817*1da177e4SLinus Torvalds new_flags |= PF_FORKNOEXEC; 818*1da177e4SLinus Torvalds if (!(clone_flags & CLONE_PTRACE)) 819*1da177e4SLinus Torvalds p->ptrace = 0; 820*1da177e4SLinus Torvalds p->flags = new_flags; 821*1da177e4SLinus Torvalds } 822*1da177e4SLinus Torvalds 823*1da177e4SLinus Torvalds asmlinkage long sys_set_tid_address(int __user *tidptr) 824*1da177e4SLinus Torvalds { 825*1da177e4SLinus Torvalds current->clear_child_tid = tidptr; 826*1da177e4SLinus Torvalds 827*1da177e4SLinus Torvalds return current->pid; 828*1da177e4SLinus Torvalds } 829*1da177e4SLinus Torvalds 830*1da177e4SLinus Torvalds /* 831*1da177e4SLinus Torvalds * This creates a new process as a copy of the old one, 832*1da177e4SLinus Torvalds * but does not actually start it yet. 833*1da177e4SLinus Torvalds * 834*1da177e4SLinus Torvalds * It copies the registers, and all the appropriate 835*1da177e4SLinus Torvalds * parts of the process environment (as per the clone 836*1da177e4SLinus Torvalds * flags). The actual kick-off is left to the caller. 837*1da177e4SLinus Torvalds */ 838*1da177e4SLinus Torvalds static task_t *copy_process(unsigned long clone_flags, 839*1da177e4SLinus Torvalds unsigned long stack_start, 840*1da177e4SLinus Torvalds struct pt_regs *regs, 841*1da177e4SLinus Torvalds unsigned long stack_size, 842*1da177e4SLinus Torvalds int __user *parent_tidptr, 843*1da177e4SLinus Torvalds int __user *child_tidptr, 844*1da177e4SLinus Torvalds int pid) 845*1da177e4SLinus Torvalds { 846*1da177e4SLinus Torvalds int retval; 847*1da177e4SLinus Torvalds struct task_struct *p = NULL; 848*1da177e4SLinus Torvalds 849*1da177e4SLinus Torvalds if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) 850*1da177e4SLinus Torvalds return ERR_PTR(-EINVAL); 851*1da177e4SLinus Torvalds 852*1da177e4SLinus Torvalds /* 853*1da177e4SLinus Torvalds * Thread groups must share signals as well, and detached threads 854*1da177e4SLinus Torvalds * can only be started up within the thread group. 855*1da177e4SLinus Torvalds */ 856*1da177e4SLinus Torvalds if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND)) 857*1da177e4SLinus Torvalds return ERR_PTR(-EINVAL); 858*1da177e4SLinus Torvalds 859*1da177e4SLinus Torvalds /* 860*1da177e4SLinus Torvalds * Shared signal handlers imply shared VM. By way of the above, 861*1da177e4SLinus Torvalds * thread groups also imply shared VM. Blocking this case allows 862*1da177e4SLinus Torvalds * for various simplifications in other code. 863*1da177e4SLinus Torvalds */ 864*1da177e4SLinus Torvalds if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) 865*1da177e4SLinus Torvalds return ERR_PTR(-EINVAL); 866*1da177e4SLinus Torvalds 867*1da177e4SLinus Torvalds retval = security_task_create(clone_flags); 868*1da177e4SLinus Torvalds if (retval) 869*1da177e4SLinus Torvalds goto fork_out; 870*1da177e4SLinus Torvalds 871*1da177e4SLinus Torvalds retval = -ENOMEM; 872*1da177e4SLinus Torvalds p = dup_task_struct(current); 873*1da177e4SLinus Torvalds if (!p) 874*1da177e4SLinus Torvalds goto fork_out; 875*1da177e4SLinus Torvalds 876*1da177e4SLinus Torvalds retval = -EAGAIN; 877*1da177e4SLinus Torvalds if (atomic_read(&p->user->processes) >= 878*1da177e4SLinus Torvalds p->signal->rlim[RLIMIT_NPROC].rlim_cur) { 879*1da177e4SLinus Torvalds if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && 880*1da177e4SLinus Torvalds p->user != &root_user) 881*1da177e4SLinus Torvalds goto bad_fork_free; 882*1da177e4SLinus Torvalds } 883*1da177e4SLinus Torvalds 884*1da177e4SLinus Torvalds atomic_inc(&p->user->__count); 885*1da177e4SLinus Torvalds atomic_inc(&p->user->processes); 886*1da177e4SLinus Torvalds get_group_info(p->group_info); 887*1da177e4SLinus Torvalds 888*1da177e4SLinus Torvalds /* 889*1da177e4SLinus Torvalds * If multiple threads are within copy_process(), then this check 890*1da177e4SLinus Torvalds * triggers too late. This doesn't hurt, the check is only there 891*1da177e4SLinus Torvalds * to stop root fork bombs. 892*1da177e4SLinus Torvalds */ 893*1da177e4SLinus Torvalds if (nr_threads >= max_threads) 894*1da177e4SLinus Torvalds goto bad_fork_cleanup_count; 895*1da177e4SLinus Torvalds 896*1da177e4SLinus Torvalds if (!try_module_get(p->thread_info->exec_domain->module)) 897*1da177e4SLinus Torvalds goto bad_fork_cleanup_count; 898*1da177e4SLinus Torvalds 899*1da177e4SLinus Torvalds if (p->binfmt && !try_module_get(p->binfmt->module)) 900*1da177e4SLinus Torvalds goto bad_fork_cleanup_put_domain; 901*1da177e4SLinus Torvalds 902*1da177e4SLinus Torvalds p->did_exec = 0; 903*1da177e4SLinus Torvalds copy_flags(clone_flags, p); 904*1da177e4SLinus Torvalds p->pid = pid; 905*1da177e4SLinus Torvalds retval = -EFAULT; 906*1da177e4SLinus Torvalds if (clone_flags & CLONE_PARENT_SETTID) 907*1da177e4SLinus Torvalds if (put_user(p->pid, parent_tidptr)) 908*1da177e4SLinus Torvalds goto bad_fork_cleanup; 909*1da177e4SLinus Torvalds 910*1da177e4SLinus Torvalds p->proc_dentry = NULL; 911*1da177e4SLinus Torvalds 912*1da177e4SLinus Torvalds INIT_LIST_HEAD(&p->children); 913*1da177e4SLinus Torvalds INIT_LIST_HEAD(&p->sibling); 914*1da177e4SLinus Torvalds p->vfork_done = NULL; 915*1da177e4SLinus Torvalds spin_lock_init(&p->alloc_lock); 916*1da177e4SLinus Torvalds spin_lock_init(&p->proc_lock); 917*1da177e4SLinus Torvalds 918*1da177e4SLinus Torvalds clear_tsk_thread_flag(p, TIF_SIGPENDING); 919*1da177e4SLinus Torvalds init_sigpending(&p->pending); 920*1da177e4SLinus Torvalds 921*1da177e4SLinus Torvalds p->utime = cputime_zero; 922*1da177e4SLinus Torvalds p->stime = cputime_zero; 923*1da177e4SLinus Torvalds p->sched_time = 0; 924*1da177e4SLinus Torvalds p->rchar = 0; /* I/O counter: bytes read */ 925*1da177e4SLinus Torvalds p->wchar = 0; /* I/O counter: bytes written */ 926*1da177e4SLinus Torvalds p->syscr = 0; /* I/O counter: read syscalls */ 927*1da177e4SLinus Torvalds p->syscw = 0; /* I/O counter: write syscalls */ 928*1da177e4SLinus Torvalds acct_clear_integrals(p); 929*1da177e4SLinus Torvalds 930*1da177e4SLinus Torvalds p->it_virt_expires = cputime_zero; 931*1da177e4SLinus Torvalds p->it_prof_expires = cputime_zero; 932*1da177e4SLinus Torvalds p->it_sched_expires = 0; 933*1da177e4SLinus Torvalds INIT_LIST_HEAD(&p->cpu_timers[0]); 934*1da177e4SLinus Torvalds INIT_LIST_HEAD(&p->cpu_timers[1]); 935*1da177e4SLinus Torvalds INIT_LIST_HEAD(&p->cpu_timers[2]); 936*1da177e4SLinus Torvalds 937*1da177e4SLinus Torvalds p->lock_depth = -1; /* -1 = no lock */ 938*1da177e4SLinus Torvalds do_posix_clock_monotonic_gettime(&p->start_time); 939*1da177e4SLinus Torvalds p->security = NULL; 940*1da177e4SLinus Torvalds p->io_context = NULL; 941*1da177e4SLinus Torvalds p->io_wait = NULL; 942*1da177e4SLinus Torvalds p->audit_context = NULL; 943*1da177e4SLinus Torvalds #ifdef CONFIG_NUMA 944*1da177e4SLinus Torvalds p->mempolicy = mpol_copy(p->mempolicy); 945*1da177e4SLinus Torvalds if (IS_ERR(p->mempolicy)) { 946*1da177e4SLinus Torvalds retval = PTR_ERR(p->mempolicy); 947*1da177e4SLinus Torvalds p->mempolicy = NULL; 948*1da177e4SLinus Torvalds goto bad_fork_cleanup; 949*1da177e4SLinus Torvalds } 950*1da177e4SLinus Torvalds #endif 951*1da177e4SLinus Torvalds 952*1da177e4SLinus Torvalds p->tgid = p->pid; 953*1da177e4SLinus Torvalds if (clone_flags & CLONE_THREAD) 954*1da177e4SLinus Torvalds p->tgid = current->tgid; 955*1da177e4SLinus Torvalds 956*1da177e4SLinus Torvalds if ((retval = security_task_alloc(p))) 957*1da177e4SLinus Torvalds goto bad_fork_cleanup_policy; 958*1da177e4SLinus Torvalds if ((retval = audit_alloc(p))) 959*1da177e4SLinus Torvalds goto bad_fork_cleanup_security; 960*1da177e4SLinus Torvalds /* copy all the process information */ 961*1da177e4SLinus Torvalds if ((retval = copy_semundo(clone_flags, p))) 962*1da177e4SLinus Torvalds goto bad_fork_cleanup_audit; 963*1da177e4SLinus Torvalds if ((retval = copy_files(clone_flags, p))) 964*1da177e4SLinus Torvalds goto bad_fork_cleanup_semundo; 965*1da177e4SLinus Torvalds if ((retval = copy_fs(clone_flags, p))) 966*1da177e4SLinus Torvalds goto bad_fork_cleanup_files; 967*1da177e4SLinus Torvalds if ((retval = copy_sighand(clone_flags, p))) 968*1da177e4SLinus Torvalds goto bad_fork_cleanup_fs; 969*1da177e4SLinus Torvalds if ((retval = copy_signal(clone_flags, p))) 970*1da177e4SLinus Torvalds goto bad_fork_cleanup_sighand; 971*1da177e4SLinus Torvalds if ((retval = copy_mm(clone_flags, p))) 972*1da177e4SLinus Torvalds goto bad_fork_cleanup_signal; 973*1da177e4SLinus Torvalds if ((retval = copy_keys(clone_flags, p))) 974*1da177e4SLinus Torvalds goto bad_fork_cleanup_mm; 975*1da177e4SLinus Torvalds if ((retval = copy_namespace(clone_flags, p))) 976*1da177e4SLinus Torvalds goto bad_fork_cleanup_keys; 977*1da177e4SLinus Torvalds retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs); 978*1da177e4SLinus Torvalds if (retval) 979*1da177e4SLinus Torvalds goto bad_fork_cleanup_namespace; 980*1da177e4SLinus Torvalds 981*1da177e4SLinus Torvalds p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; 982*1da177e4SLinus Torvalds /* 983*1da177e4SLinus Torvalds * Clear TID on mm_release()? 984*1da177e4SLinus Torvalds */ 985*1da177e4SLinus Torvalds p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; 986*1da177e4SLinus Torvalds 987*1da177e4SLinus Torvalds /* 988*1da177e4SLinus Torvalds * Syscall tracing should be turned off in the child regardless 989*1da177e4SLinus Torvalds * of CLONE_PTRACE. 990*1da177e4SLinus Torvalds */ 991*1da177e4SLinus Torvalds clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); 992*1da177e4SLinus Torvalds 993*1da177e4SLinus Torvalds /* Our parent execution domain becomes current domain 994*1da177e4SLinus Torvalds These must match for thread signalling to apply */ 995*1da177e4SLinus Torvalds 996*1da177e4SLinus Torvalds p->parent_exec_id = p->self_exec_id; 997*1da177e4SLinus Torvalds 998*1da177e4SLinus Torvalds /* ok, now we should be set up.. */ 999*1da177e4SLinus Torvalds p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL); 1000*1da177e4SLinus Torvalds p->pdeath_signal = 0; 1001*1da177e4SLinus Torvalds p->exit_state = 0; 1002*1da177e4SLinus Torvalds 1003*1da177e4SLinus Torvalds /* Perform scheduler related setup */ 1004*1da177e4SLinus Torvalds sched_fork(p); 1005*1da177e4SLinus Torvalds 1006*1da177e4SLinus Torvalds /* 1007*1da177e4SLinus Torvalds * Ok, make it visible to the rest of the system. 1008*1da177e4SLinus Torvalds * We dont wake it up yet. 1009*1da177e4SLinus Torvalds */ 1010*1da177e4SLinus Torvalds p->group_leader = p; 1011*1da177e4SLinus Torvalds INIT_LIST_HEAD(&p->ptrace_children); 1012*1da177e4SLinus Torvalds INIT_LIST_HEAD(&p->ptrace_list); 1013*1da177e4SLinus Torvalds 1014*1da177e4SLinus Torvalds /* Need tasklist lock for parent etc handling! */ 1015*1da177e4SLinus Torvalds write_lock_irq(&tasklist_lock); 1016*1da177e4SLinus Torvalds 1017*1da177e4SLinus Torvalds /* 1018*1da177e4SLinus Torvalds * The task hasn't been attached yet, so cpus_allowed mask cannot 1019*1da177e4SLinus Torvalds * have changed. The cpus_allowed mask of the parent may have 1020*1da177e4SLinus Torvalds * changed after it was copied first time, and it may then move to 1021*1da177e4SLinus Torvalds * another CPU - so we re-copy it here and set the child's CPU to 1022*1da177e4SLinus Torvalds * the parent's CPU. This avoids alot of nasty races. 1023*1da177e4SLinus Torvalds */ 1024*1da177e4SLinus Torvalds p->cpus_allowed = current->cpus_allowed; 1025*1da177e4SLinus Torvalds set_task_cpu(p, smp_processor_id()); 1026*1da177e4SLinus Torvalds 1027*1da177e4SLinus Torvalds /* 1028*1da177e4SLinus Torvalds * Check for pending SIGKILL! The new thread should not be allowed 1029*1da177e4SLinus Torvalds * to slip out of an OOM kill. (or normal SIGKILL.) 1030*1da177e4SLinus Torvalds */ 1031*1da177e4SLinus Torvalds if (sigismember(¤t->pending.signal, SIGKILL)) { 1032*1da177e4SLinus Torvalds write_unlock_irq(&tasklist_lock); 1033*1da177e4SLinus Torvalds retval = -EINTR; 1034*1da177e4SLinus Torvalds goto bad_fork_cleanup_namespace; 1035*1da177e4SLinus Torvalds } 1036*1da177e4SLinus Torvalds 1037*1da177e4SLinus Torvalds /* CLONE_PARENT re-uses the old parent */ 1038*1da177e4SLinus Torvalds if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) 1039*1da177e4SLinus Torvalds p->real_parent = current->real_parent; 1040*1da177e4SLinus Torvalds else 1041*1da177e4SLinus Torvalds p->real_parent = current; 1042*1da177e4SLinus Torvalds p->parent = p->real_parent; 1043*1da177e4SLinus Torvalds 1044*1da177e4SLinus Torvalds if (clone_flags & CLONE_THREAD) { 1045*1da177e4SLinus Torvalds spin_lock(¤t->sighand->siglock); 1046*1da177e4SLinus Torvalds /* 1047*1da177e4SLinus Torvalds * Important: if an exit-all has been started then 1048*1da177e4SLinus Torvalds * do not create this new thread - the whole thread 1049*1da177e4SLinus Torvalds * group is supposed to exit anyway. 1050*1da177e4SLinus Torvalds */ 1051*1da177e4SLinus Torvalds if (current->signal->flags & SIGNAL_GROUP_EXIT) { 1052*1da177e4SLinus Torvalds spin_unlock(¤t->sighand->siglock); 1053*1da177e4SLinus Torvalds write_unlock_irq(&tasklist_lock); 1054*1da177e4SLinus Torvalds retval = -EAGAIN; 1055*1da177e4SLinus Torvalds goto bad_fork_cleanup_namespace; 1056*1da177e4SLinus Torvalds } 1057*1da177e4SLinus Torvalds p->group_leader = current->group_leader; 1058*1da177e4SLinus Torvalds 1059*1da177e4SLinus Torvalds if (current->signal->group_stop_count > 0) { 1060*1da177e4SLinus Torvalds /* 1061*1da177e4SLinus Torvalds * There is an all-stop in progress for the group. 1062*1da177e4SLinus Torvalds * We ourselves will stop as soon as we check signals. 1063*1da177e4SLinus Torvalds * Make the new thread part of that group stop too. 1064*1da177e4SLinus Torvalds */ 1065*1da177e4SLinus Torvalds current->signal->group_stop_count++; 1066*1da177e4SLinus Torvalds set_tsk_thread_flag(p, TIF_SIGPENDING); 1067*1da177e4SLinus Torvalds } 1068*1da177e4SLinus Torvalds 1069*1da177e4SLinus Torvalds if (!cputime_eq(current->signal->it_virt_expires, 1070*1da177e4SLinus Torvalds cputime_zero) || 1071*1da177e4SLinus Torvalds !cputime_eq(current->signal->it_prof_expires, 1072*1da177e4SLinus Torvalds cputime_zero) || 1073*1da177e4SLinus Torvalds current->signal->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY || 1074*1da177e4SLinus Torvalds !list_empty(¤t->signal->cpu_timers[0]) || 1075*1da177e4SLinus Torvalds !list_empty(¤t->signal->cpu_timers[1]) || 1076*1da177e4SLinus Torvalds !list_empty(¤t->signal->cpu_timers[2])) { 1077*1da177e4SLinus Torvalds /* 1078*1da177e4SLinus Torvalds * Have child wake up on its first tick to check 1079*1da177e4SLinus Torvalds * for process CPU timers. 1080*1da177e4SLinus Torvalds */ 1081*1da177e4SLinus Torvalds p->it_prof_expires = jiffies_to_cputime(1); 1082*1da177e4SLinus Torvalds } 1083*1da177e4SLinus Torvalds 1084*1da177e4SLinus Torvalds spin_unlock(¤t->sighand->siglock); 1085*1da177e4SLinus Torvalds } 1086*1da177e4SLinus Torvalds 1087*1da177e4SLinus Torvalds SET_LINKS(p); 1088*1da177e4SLinus Torvalds if (unlikely(p->ptrace & PT_PTRACED)) 1089*1da177e4SLinus Torvalds __ptrace_link(p, current->parent); 1090*1da177e4SLinus Torvalds 1091*1da177e4SLinus Torvalds cpuset_fork(p); 1092*1da177e4SLinus Torvalds 1093*1da177e4SLinus Torvalds attach_pid(p, PIDTYPE_PID, p->pid); 1094*1da177e4SLinus Torvalds attach_pid(p, PIDTYPE_TGID, p->tgid); 1095*1da177e4SLinus Torvalds if (thread_group_leader(p)) { 1096*1da177e4SLinus Torvalds attach_pid(p, PIDTYPE_PGID, process_group(p)); 1097*1da177e4SLinus Torvalds attach_pid(p, PIDTYPE_SID, p->signal->session); 1098*1da177e4SLinus Torvalds if (p->pid) 1099*1da177e4SLinus Torvalds __get_cpu_var(process_counts)++; 1100*1da177e4SLinus Torvalds } 1101*1da177e4SLinus Torvalds 1102*1da177e4SLinus Torvalds nr_threads++; 1103*1da177e4SLinus Torvalds total_forks++; 1104*1da177e4SLinus Torvalds write_unlock_irq(&tasklist_lock); 1105*1da177e4SLinus Torvalds retval = 0; 1106*1da177e4SLinus Torvalds 1107*1da177e4SLinus Torvalds fork_out: 1108*1da177e4SLinus Torvalds if (retval) 1109*1da177e4SLinus Torvalds return ERR_PTR(retval); 1110*1da177e4SLinus Torvalds return p; 1111*1da177e4SLinus Torvalds 1112*1da177e4SLinus Torvalds bad_fork_cleanup_namespace: 1113*1da177e4SLinus Torvalds exit_namespace(p); 1114*1da177e4SLinus Torvalds bad_fork_cleanup_keys: 1115*1da177e4SLinus Torvalds exit_keys(p); 1116*1da177e4SLinus Torvalds bad_fork_cleanup_mm: 1117*1da177e4SLinus Torvalds if (p->mm) 1118*1da177e4SLinus Torvalds mmput(p->mm); 1119*1da177e4SLinus Torvalds bad_fork_cleanup_signal: 1120*1da177e4SLinus Torvalds exit_signal(p); 1121*1da177e4SLinus Torvalds bad_fork_cleanup_sighand: 1122*1da177e4SLinus Torvalds exit_sighand(p); 1123*1da177e4SLinus Torvalds bad_fork_cleanup_fs: 1124*1da177e4SLinus Torvalds exit_fs(p); /* blocking */ 1125*1da177e4SLinus Torvalds bad_fork_cleanup_files: 1126*1da177e4SLinus Torvalds exit_files(p); /* blocking */ 1127*1da177e4SLinus Torvalds bad_fork_cleanup_semundo: 1128*1da177e4SLinus Torvalds exit_sem(p); 1129*1da177e4SLinus Torvalds bad_fork_cleanup_audit: 1130*1da177e4SLinus Torvalds audit_free(p); 1131*1da177e4SLinus Torvalds bad_fork_cleanup_security: 1132*1da177e4SLinus Torvalds security_task_free(p); 1133*1da177e4SLinus Torvalds bad_fork_cleanup_policy: 1134*1da177e4SLinus Torvalds #ifdef CONFIG_NUMA 1135*1da177e4SLinus Torvalds mpol_free(p->mempolicy); 1136*1da177e4SLinus Torvalds #endif 1137*1da177e4SLinus Torvalds bad_fork_cleanup: 1138*1da177e4SLinus Torvalds if (p->binfmt) 1139*1da177e4SLinus Torvalds module_put(p->binfmt->module); 1140*1da177e4SLinus Torvalds bad_fork_cleanup_put_domain: 1141*1da177e4SLinus Torvalds module_put(p->thread_info->exec_domain->module); 1142*1da177e4SLinus Torvalds bad_fork_cleanup_count: 1143*1da177e4SLinus Torvalds put_group_info(p->group_info); 1144*1da177e4SLinus Torvalds atomic_dec(&p->user->processes); 1145*1da177e4SLinus Torvalds free_uid(p->user); 1146*1da177e4SLinus Torvalds bad_fork_free: 1147*1da177e4SLinus Torvalds free_task(p); 1148*1da177e4SLinus Torvalds goto fork_out; 1149*1da177e4SLinus Torvalds } 1150*1da177e4SLinus Torvalds 1151*1da177e4SLinus Torvalds struct pt_regs * __devinit __attribute__((weak)) idle_regs(struct pt_regs *regs) 1152*1da177e4SLinus Torvalds { 1153*1da177e4SLinus Torvalds memset(regs, 0, sizeof(struct pt_regs)); 1154*1da177e4SLinus Torvalds return regs; 1155*1da177e4SLinus Torvalds } 1156*1da177e4SLinus Torvalds 1157*1da177e4SLinus Torvalds task_t * __devinit fork_idle(int cpu) 1158*1da177e4SLinus Torvalds { 1159*1da177e4SLinus Torvalds task_t *task; 1160*1da177e4SLinus Torvalds struct pt_regs regs; 1161*1da177e4SLinus Torvalds 1162*1da177e4SLinus Torvalds task = copy_process(CLONE_VM, 0, idle_regs(®s), 0, NULL, NULL, 0); 1163*1da177e4SLinus Torvalds if (!task) 1164*1da177e4SLinus Torvalds return ERR_PTR(-ENOMEM); 1165*1da177e4SLinus Torvalds init_idle(task, cpu); 1166*1da177e4SLinus Torvalds unhash_process(task); 1167*1da177e4SLinus Torvalds return task; 1168*1da177e4SLinus Torvalds } 1169*1da177e4SLinus Torvalds 1170*1da177e4SLinus Torvalds static inline int fork_traceflag (unsigned clone_flags) 1171*1da177e4SLinus Torvalds { 1172*1da177e4SLinus Torvalds if (clone_flags & CLONE_UNTRACED) 1173*1da177e4SLinus Torvalds return 0; 1174*1da177e4SLinus Torvalds else if (clone_flags & CLONE_VFORK) { 1175*1da177e4SLinus Torvalds if (current->ptrace & PT_TRACE_VFORK) 1176*1da177e4SLinus Torvalds return PTRACE_EVENT_VFORK; 1177*1da177e4SLinus Torvalds } else if ((clone_flags & CSIGNAL) != SIGCHLD) { 1178*1da177e4SLinus Torvalds if (current->ptrace & PT_TRACE_CLONE) 1179*1da177e4SLinus Torvalds return PTRACE_EVENT_CLONE; 1180*1da177e4SLinus Torvalds } else if (current->ptrace & PT_TRACE_FORK) 1181*1da177e4SLinus Torvalds return PTRACE_EVENT_FORK; 1182*1da177e4SLinus Torvalds 1183*1da177e4SLinus Torvalds return 0; 1184*1da177e4SLinus Torvalds } 1185*1da177e4SLinus Torvalds 1186*1da177e4SLinus Torvalds /* 1187*1da177e4SLinus Torvalds * Ok, this is the main fork-routine. 1188*1da177e4SLinus Torvalds * 1189*1da177e4SLinus Torvalds * It copies the process, and if successful kick-starts 1190*1da177e4SLinus Torvalds * it and waits for it to finish using the VM if required. 1191*1da177e4SLinus Torvalds */ 1192*1da177e4SLinus Torvalds long do_fork(unsigned long clone_flags, 1193*1da177e4SLinus Torvalds unsigned long stack_start, 1194*1da177e4SLinus Torvalds struct pt_regs *regs, 1195*1da177e4SLinus Torvalds unsigned long stack_size, 1196*1da177e4SLinus Torvalds int __user *parent_tidptr, 1197*1da177e4SLinus Torvalds int __user *child_tidptr) 1198*1da177e4SLinus Torvalds { 1199*1da177e4SLinus Torvalds struct task_struct *p; 1200*1da177e4SLinus Torvalds int trace = 0; 1201*1da177e4SLinus Torvalds long pid = alloc_pidmap(); 1202*1da177e4SLinus Torvalds 1203*1da177e4SLinus Torvalds if (pid < 0) 1204*1da177e4SLinus Torvalds return -EAGAIN; 1205*1da177e4SLinus Torvalds if (unlikely(current->ptrace)) { 1206*1da177e4SLinus Torvalds trace = fork_traceflag (clone_flags); 1207*1da177e4SLinus Torvalds if (trace) 1208*1da177e4SLinus Torvalds clone_flags |= CLONE_PTRACE; 1209*1da177e4SLinus Torvalds } 1210*1da177e4SLinus Torvalds 1211*1da177e4SLinus Torvalds p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid); 1212*1da177e4SLinus Torvalds /* 1213*1da177e4SLinus Torvalds * Do this prior waking up the new thread - the thread pointer 1214*1da177e4SLinus Torvalds * might get invalid after that point, if the thread exits quickly. 1215*1da177e4SLinus Torvalds */ 1216*1da177e4SLinus Torvalds if (!IS_ERR(p)) { 1217*1da177e4SLinus Torvalds struct completion vfork; 1218*1da177e4SLinus Torvalds 1219*1da177e4SLinus Torvalds if (clone_flags & CLONE_VFORK) { 1220*1da177e4SLinus Torvalds p->vfork_done = &vfork; 1221*1da177e4SLinus Torvalds init_completion(&vfork); 1222*1da177e4SLinus Torvalds } 1223*1da177e4SLinus Torvalds 1224*1da177e4SLinus Torvalds if ((p->ptrace & PT_PTRACED) || (clone_flags & CLONE_STOPPED)) { 1225*1da177e4SLinus Torvalds /* 1226*1da177e4SLinus Torvalds * We'll start up with an immediate SIGSTOP. 1227*1da177e4SLinus Torvalds */ 1228*1da177e4SLinus Torvalds sigaddset(&p->pending.signal, SIGSTOP); 1229*1da177e4SLinus Torvalds set_tsk_thread_flag(p, TIF_SIGPENDING); 1230*1da177e4SLinus Torvalds } 1231*1da177e4SLinus Torvalds 1232*1da177e4SLinus Torvalds if (!(clone_flags & CLONE_STOPPED)) 1233*1da177e4SLinus Torvalds wake_up_new_task(p, clone_flags); 1234*1da177e4SLinus Torvalds else 1235*1da177e4SLinus Torvalds p->state = TASK_STOPPED; 1236*1da177e4SLinus Torvalds 1237*1da177e4SLinus Torvalds if (unlikely (trace)) { 1238*1da177e4SLinus Torvalds current->ptrace_message = pid; 1239*1da177e4SLinus Torvalds ptrace_notify ((trace << 8) | SIGTRAP); 1240*1da177e4SLinus Torvalds } 1241*1da177e4SLinus Torvalds 1242*1da177e4SLinus Torvalds if (clone_flags & CLONE_VFORK) { 1243*1da177e4SLinus Torvalds wait_for_completion(&vfork); 1244*1da177e4SLinus Torvalds if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE)) 1245*1da177e4SLinus Torvalds ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP); 1246*1da177e4SLinus Torvalds } 1247*1da177e4SLinus Torvalds } else { 1248*1da177e4SLinus Torvalds free_pidmap(pid); 1249*1da177e4SLinus Torvalds pid = PTR_ERR(p); 1250*1da177e4SLinus Torvalds } 1251*1da177e4SLinus Torvalds return pid; 1252*1da177e4SLinus Torvalds } 1253*1da177e4SLinus Torvalds 1254*1da177e4SLinus Torvalds void __init proc_caches_init(void) 1255*1da177e4SLinus Torvalds { 1256*1da177e4SLinus Torvalds sighand_cachep = kmem_cache_create("sighand_cache", 1257*1da177e4SLinus Torvalds sizeof(struct sighand_struct), 0, 1258*1da177e4SLinus Torvalds SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); 1259*1da177e4SLinus Torvalds signal_cachep = kmem_cache_create("signal_cache", 1260*1da177e4SLinus Torvalds sizeof(struct signal_struct), 0, 1261*1da177e4SLinus Torvalds SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); 1262*1da177e4SLinus Torvalds files_cachep = kmem_cache_create("files_cache", 1263*1da177e4SLinus Torvalds sizeof(struct files_struct), 0, 1264*1da177e4SLinus Torvalds SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); 1265*1da177e4SLinus Torvalds fs_cachep = kmem_cache_create("fs_cache", 1266*1da177e4SLinus Torvalds sizeof(struct fs_struct), 0, 1267*1da177e4SLinus Torvalds SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); 1268*1da177e4SLinus Torvalds vm_area_cachep = kmem_cache_create("vm_area_struct", 1269*1da177e4SLinus Torvalds sizeof(struct vm_area_struct), 0, 1270*1da177e4SLinus Torvalds SLAB_PANIC, NULL, NULL); 1271*1da177e4SLinus Torvalds mm_cachep = kmem_cache_create("mm_struct", 1272*1da177e4SLinus Torvalds sizeof(struct mm_struct), 0, 1273*1da177e4SLinus Torvalds SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); 1274*1da177e4SLinus Torvalds } 1275