1 /* 2 * Generic pidhash and scalable, time-bounded PID allocator 3 * 4 * (C) 2002-2003 William Irwin, IBM 5 * (C) 2004 William Irwin, Oracle 6 * (C) 2002-2004 Ingo Molnar, Red Hat 7 * 8 * pid-structures are backing objects for tasks sharing a given ID to chain 9 * against. There is very little to them aside from hashing them and 10 * parking tasks using given ID's on a list. 11 * 12 * The hash is always changed with the tasklist_lock write-acquired, 13 * and the hash is only accessed with the tasklist_lock at least 14 * read-acquired, so there's no additional SMP locking needed here. 15 * 16 * We have a list of bitmap pages, which bitmaps represent the PID space. 17 * Allocating and freeing PIDs is completely lockless. The worst-case 18 * allocation scenario when all but one out of 1 million PIDs possible are 19 * allocated already: the scanning of 32 list entries and at most PAGE_SIZE 20 * bytes. The typical fastpath is a single successful setbit. Freeing is O(1). 21 */ 22 23 #include <linux/mm.h> 24 #include <linux/module.h> 25 #include <linux/slab.h> 26 #include <linux/init.h> 27 #include <linux/bootmem.h> 28 #include <linux/hash.h> 29 30 #define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift) 31 static struct hlist_head *pid_hash[PIDTYPE_MAX]; 32 static int pidhash_shift; 33 34 int pid_max = PID_MAX_DEFAULT; 35 int last_pid; 36 37 #define RESERVED_PIDS 300 38 39 int pid_max_min = RESERVED_PIDS + 1; 40 int pid_max_max = PID_MAX_LIMIT; 41 42 #define PIDMAP_ENTRIES ((PID_MAX_LIMIT + 8*PAGE_SIZE - 1)/PAGE_SIZE/8) 43 #define BITS_PER_PAGE (PAGE_SIZE*8) 44 #define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1) 45 #define mk_pid(map, off) (((map) - pidmap_array)*BITS_PER_PAGE + (off)) 46 #define find_next_offset(map, off) \ 47 find_next_zero_bit((map)->page, BITS_PER_PAGE, off) 48 49 /* 50 * PID-map pages start out as NULL, they get allocated upon 51 * first use and are never deallocated. This way a low pid_max 52 * value does not cause lots of bitmaps to be allocated, but 53 * the scheme scales to up to 4 million PIDs, runtime. 54 */ 55 typedef struct pidmap { 56 atomic_t nr_free; 57 void *page; 58 } pidmap_t; 59 60 static pidmap_t pidmap_array[PIDMAP_ENTRIES] = 61 { [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } }; 62 63 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock); 64 65 fastcall void free_pidmap(int pid) 66 { 67 pidmap_t *map = pidmap_array + pid / BITS_PER_PAGE; 68 int offset = pid & BITS_PER_PAGE_MASK; 69 70 clear_bit(offset, map->page); 71 atomic_inc(&map->nr_free); 72 } 73 74 int alloc_pidmap(void) 75 { 76 int i, offset, max_scan, pid, last = last_pid; 77 pidmap_t *map; 78 79 pid = last + 1; 80 if (pid >= pid_max) 81 pid = RESERVED_PIDS; 82 offset = pid & BITS_PER_PAGE_MASK; 83 map = &pidmap_array[pid/BITS_PER_PAGE]; 84 max_scan = (pid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset; 85 for (i = 0; i <= max_scan; ++i) { 86 if (unlikely(!map->page)) { 87 unsigned long page = get_zeroed_page(GFP_KERNEL); 88 /* 89 * Free the page if someone raced with us 90 * installing it: 91 */ 92 spin_lock(&pidmap_lock); 93 if (map->page) 94 free_page(page); 95 else 96 map->page = (void *)page; 97 spin_unlock(&pidmap_lock); 98 if (unlikely(!map->page)) 99 break; 100 } 101 if (likely(atomic_read(&map->nr_free))) { 102 do { 103 if (!test_and_set_bit(offset, map->page)) { 104 atomic_dec(&map->nr_free); 105 last_pid = pid; 106 return pid; 107 } 108 offset = find_next_offset(map, offset); 109 pid = mk_pid(map, offset); 110 /* 111 * find_next_offset() found a bit, the pid from it 112 * is in-bounds, and if we fell back to the last 113 * bitmap block and the final block was the same 114 * as the starting point, pid is before last_pid. 115 */ 116 } while (offset < BITS_PER_PAGE && pid < pid_max && 117 (i != max_scan || pid < last || 118 !((last+1) & BITS_PER_PAGE_MASK))); 119 } 120 if (map < &pidmap_array[(pid_max-1)/BITS_PER_PAGE]) { 121 ++map; 122 offset = 0; 123 } else { 124 map = &pidmap_array[0]; 125 offset = RESERVED_PIDS; 126 if (unlikely(last == offset)) 127 break; 128 } 129 pid = mk_pid(map, offset); 130 } 131 return -1; 132 } 133 134 struct pid * fastcall find_pid(enum pid_type type, int nr) 135 { 136 struct hlist_node *elem; 137 struct pid *pid; 138 139 hlist_for_each_entry(pid, elem, 140 &pid_hash[type][pid_hashfn(nr)], pid_chain) { 141 if (pid->nr == nr) 142 return pid; 143 } 144 return NULL; 145 } 146 147 int fastcall attach_pid(task_t *task, enum pid_type type, int nr) 148 { 149 struct pid *pid, *task_pid; 150 151 task_pid = &task->pids[type]; 152 pid = find_pid(type, nr); 153 if (pid == NULL) { 154 hlist_add_head(&task_pid->pid_chain, 155 &pid_hash[type][pid_hashfn(nr)]); 156 INIT_LIST_HEAD(&task_pid->pid_list); 157 } else { 158 INIT_HLIST_NODE(&task_pid->pid_chain); 159 list_add_tail(&task_pid->pid_list, &pid->pid_list); 160 } 161 task_pid->nr = nr; 162 163 return 0; 164 } 165 166 static fastcall int __detach_pid(task_t *task, enum pid_type type) 167 { 168 struct pid *pid, *pid_next; 169 int nr = 0; 170 171 pid = &task->pids[type]; 172 if (!hlist_unhashed(&pid->pid_chain)) { 173 hlist_del(&pid->pid_chain); 174 175 if (list_empty(&pid->pid_list)) 176 nr = pid->nr; 177 else { 178 pid_next = list_entry(pid->pid_list.next, 179 struct pid, pid_list); 180 /* insert next pid from pid_list to hash */ 181 hlist_add_head(&pid_next->pid_chain, 182 &pid_hash[type][pid_hashfn(pid_next->nr)]); 183 } 184 } 185 186 list_del(&pid->pid_list); 187 pid->nr = 0; 188 189 return nr; 190 } 191 192 void fastcall detach_pid(task_t *task, enum pid_type type) 193 { 194 int tmp, nr; 195 196 nr = __detach_pid(task, type); 197 if (!nr) 198 return; 199 200 for (tmp = PIDTYPE_MAX; --tmp >= 0; ) 201 if (tmp != type && find_pid(tmp, nr)) 202 return; 203 204 free_pidmap(nr); 205 } 206 207 task_t *find_task_by_pid_type(int type, int nr) 208 { 209 struct pid *pid; 210 211 pid = find_pid(type, nr); 212 if (!pid) 213 return NULL; 214 215 return pid_task(&pid->pid_list, type); 216 } 217 218 EXPORT_SYMBOL(find_task_by_pid_type); 219 220 /* 221 * This function switches the PIDs if a non-leader thread calls 222 * sys_execve() - this must be done without releasing the PID. 223 * (which a detach_pid() would eventually do.) 224 */ 225 void switch_exec_pids(task_t *leader, task_t *thread) 226 { 227 __detach_pid(leader, PIDTYPE_PID); 228 __detach_pid(leader, PIDTYPE_TGID); 229 __detach_pid(leader, PIDTYPE_PGID); 230 __detach_pid(leader, PIDTYPE_SID); 231 232 __detach_pid(thread, PIDTYPE_PID); 233 __detach_pid(thread, PIDTYPE_TGID); 234 235 leader->pid = leader->tgid = thread->pid; 236 thread->pid = thread->tgid; 237 238 attach_pid(thread, PIDTYPE_PID, thread->pid); 239 attach_pid(thread, PIDTYPE_TGID, thread->tgid); 240 attach_pid(thread, PIDTYPE_PGID, thread->signal->pgrp); 241 attach_pid(thread, PIDTYPE_SID, thread->signal->session); 242 list_add_tail(&thread->tasks, &init_task.tasks); 243 244 attach_pid(leader, PIDTYPE_PID, leader->pid); 245 attach_pid(leader, PIDTYPE_TGID, leader->tgid); 246 attach_pid(leader, PIDTYPE_PGID, leader->signal->pgrp); 247 attach_pid(leader, PIDTYPE_SID, leader->signal->session); 248 } 249 250 /* 251 * The pid hash table is scaled according to the amount of memory in the 252 * machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or 253 * more. 254 */ 255 void __init pidhash_init(void) 256 { 257 int i, j, pidhash_size; 258 unsigned long megabytes = nr_kernel_pages >> (20 - PAGE_SHIFT); 259 260 pidhash_shift = max(4, fls(megabytes * 4)); 261 pidhash_shift = min(12, pidhash_shift); 262 pidhash_size = 1 << pidhash_shift; 263 264 printk("PID hash table entries: %d (order: %d, %Zd bytes)\n", 265 pidhash_size, pidhash_shift, 266 PIDTYPE_MAX * pidhash_size * sizeof(struct hlist_head)); 267 268 for (i = 0; i < PIDTYPE_MAX; i++) { 269 pid_hash[i] = alloc_bootmem(pidhash_size * 270 sizeof(*(pid_hash[i]))); 271 if (!pid_hash[i]) 272 panic("Could not alloc pidhash!\n"); 273 for (j = 0; j < pidhash_size; j++) 274 INIT_HLIST_HEAD(&pid_hash[i][j]); 275 } 276 } 277 278 void __init pidmap_init(void) 279 { 280 int i; 281 282 pidmap_array->page = (void *)get_zeroed_page(GFP_KERNEL); 283 set_bit(0, pidmap_array->page); 284 atomic_dec(&pidmap_array->nr_free); 285 286 /* 287 * Allocate PID 0, and hash it via all PID types: 288 */ 289 290 for (i = 0; i < PIDTYPE_MAX; i++) 291 attach_pid(current, i, 0); 292 } 293