kernel/pid.c

   1 /*
   2  * Generic pidhash and scalable, time-bounded PID allocator
   3  *
   4  * (C) 2002-2003 William Irwin, IBM
   5  * (C) 2004 William Irwin, Oracle
   6  * (C) 2002-2004 Ingo Molnar, Red Hat
   7  *
   8  * pid-structures are backing objects for tasks sharing a given ID to chain
   9  * against. There is very little to them aside from hashing them and
  10  * parking tasks using given ID's on a list.
  11  *
  12  * The hash is always changed with the tasklist_lock write-acquired,
  13  * and the hash is only accessed with the tasklist_lock at least
  14  * read-acquired, so there's no additional SMP locking needed here.
  15  *
  16  * We have a list of bitmap pages, which bitmaps represent the PID space.
  17  * Allocating and freeing PIDs is completely lockless. The worst-case
  18  * allocation scenario when all but one out of 1 million PIDs possible are
  19  * allocated already: the scanning of 32 list entries and at most PAGE_SIZE
  20  * bytes. The typical fastpath is a single successful setbit. Freeing is O(1).
  21  */
  22
  23 #include <linux/mm.h>
  24 #include <linux/module.h>
  25 #include <linux/slab.h>
  26 #include <linux/init.h>
  27 #include <linux/bootmem.h>
  28 #include <linux/hash.h>
  29
  30 #define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift)
  31 static struct hlist_head *pid_hash[PIDTYPE_MAX];
  32 static int pidhash_shift;
  33
  34 int pid_max = PID_MAX_DEFAULT;
  35 int last_pid;
  36
  37 #define RESERVED_PIDS           300
  38
  39 int pid_max_min = RESERVED_PIDS + 1;
  40 int pid_max_max = PID_MAX_LIMIT;
  41
  42 #define PIDMAP_ENTRIES          ((PID_MAX_LIMIT + 8*PAGE_SIZE - 1)/PAGE_SIZE/8)
  43 #define BITS_PER_PAGE           (PAGE_SIZE*8)
  44 #define BITS_PER_PAGE_MASK      (BITS_PER_PAGE-1)
  45 #define mk_pid(map, off)        (((map) - pidmap_array)*BITS_PER_PAGE + (off))
  46 #define find_next_offset(map, off)                                      \
  47                 find_next_zero_bit((map)->page, BITS_PER_PAGE, off)
  48
  49 /*
  50  * PID-map pages start out as NULL, they get allocated upon
  51  * first use and are never deallocated. This way a low pid_max
  52  * value does not cause lots of bitmaps to be allocated, but
  53  * the scheme scales to up to 4 million PIDs, runtime.
  54  */
  55 typedef struct pidmap {
  56         atomic_t nr_free;
  57         void *page;
  58 } pidmap_t;
  59
  60 static pidmap_t pidmap_array[PIDMAP_ENTRIES] =
  61          { [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } };
  62
  63 static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
  64
  65 fastcall void free_pidmap(int pid)
  66 {
  67         pidmap_t *map = pidmap_array + pid / BITS_PER_PAGE;
  68         int offset = pid & BITS_PER_PAGE_MASK;
  69
  70         clear_bit(offset, map->page);
  71         atomic_inc(&map->nr_free);
  72 }
  73
  74 int alloc_pidmap(void)
  75 {
  76         int i, offset, max_scan, pid, last = last_pid;
  77         pidmap_t *map;
  78
  79         pid = last + 1;
  80         if (pid >= pid_max)
  81                 pid = RESERVED_PIDS;
  82         offset = pid & BITS_PER_PAGE_MASK;
  83         map = &pidmap_array[pid/BITS_PER_PAGE];
  84         max_scan = (pid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset;
  85         for (i = 0; i <= max_scan; ++i) {
  86                 if (unlikely(!map->page)) {
  87                         unsigned long page = get_zeroed_page(GFP_KERNEL);
  88                         /*
  89                          * Free the page if someone raced with us
  90                          * installing it:
  91                          */
  92                         spin_lock(&pidmap_lock);
  93                         if (map->page)
  94                                 free_page(page);
  95                         else
  96                                 map->page = (void *)page;
  97                         spin_unlock(&pidmap_lock);
  98                         if (unlikely(!map->page))
  99                                 break;
 100                 }
 101                 if (likely(atomic_read(&map->nr_free))) {
 102                         do {
 103                                 if (!test_and_set_bit(offset, map->page)) {
 104                                         atomic_dec(&map->nr_free);
 105                                         last_pid = pid;
 106                                         return pid;
 107                                 }
 108                                 offset = find_next_offset(map, offset);
 109                                 pid = mk_pid(map, offset);
 110                         /*
 111                          * find_next_offset() found a bit, the pid from it
 112                          * is in-bounds, and if we fell back to the last
 113                          * bitmap block and the final block was the same
 114                          * as the starting point, pid is before last_pid.
 115                          */
 116                         } while (offset < BITS_PER_PAGE && pid < pid_max &&
 117                                         (i != max_scan || pid < last ||
 118                                             !((last+1) & BITS_PER_PAGE_MASK)));
 119                 }
 120                 if (map < &pidmap_array[(pid_max-1)/BITS_PER_PAGE]) {
 121                         ++map;
 122                         offset = 0;
 123                 } else {
 124                         map = &pidmap_array[0];
 125                         offset = RESERVED_PIDS;
 126                         if (unlikely(last == offset))
 127                                 break;
 128                 }
 129                 pid = mk_pid(map, offset);
 130         }
 131         return -1;
 132 }
 133
 134 struct pid * fastcall find_pid(enum pid_type type, int nr)
 135 {
 136         struct hlist_node *elem;
 137         struct pid *pid;
 138
 139         hlist_for_each_entry_rcu(pid, elem,
 140                         &pid_hash[type][pid_hashfn(nr)], pid_chain) {
 141                 if (pid->nr == nr)
 142                         return pid;
 143         }
 144         return NULL;
 145 }
 146
 147 int fastcall attach_pid(task_t *task, enum pid_type type, int nr)
 148 {
 149         struct pid *pid, *task_pid;
 150
 151         task_pid = &task->pids[type];
 152         pid = find_pid(type, nr);
 153         task_pid->nr = nr;
 154         if (pid == NULL) {
 155                 INIT_LIST_HEAD(&task_pid->pid_list);
 156                 hlist_add_head_rcu(&task_pid->pid_chain,
 157                                    &pid_hash[type][pid_hashfn(nr)]);
 158         } else {
 159                 INIT_HLIST_NODE(&task_pid->pid_chain);
 160                 list_add_tail_rcu(&task_pid->pid_list, &pid->pid_list);
 161         }
 162
 163         return 0;
 164 }
 165
 166 static fastcall int __detach_pid(task_t *task, enum pid_type type)
 167 {
 168         struct pid *pid, *pid_next;
 169         int nr = 0;
 170
 171         pid = &task->pids[type];
 172         if (!hlist_unhashed(&pid->pid_chain)) {
 173
 174                 if (list_empty(&pid->pid_list)) {
 175                         nr = pid->nr;
 176                         hlist_del_rcu(&pid->pid_chain);
 177                 } else {
 178                         pid_next = list_entry(pid->pid_list.next,
 179                                                 struct pid, pid_list);
 180                         /* insert next pid from pid_list to hash */
 181                         hlist_replace_rcu(&pid->pid_chain,
 182                                           &pid_next->pid_chain);
 183                 }
 184         }
 185
 186         list_del_rcu(&pid->pid_list);
 187         pid->nr = 0;
 188
 189         return nr;
 190 }
 191
 192 void fastcall detach_pid(task_t *task, enum pid_type type)
 193 {
 194         int tmp, nr;
 195
 196         nr = __detach_pid(task, type);
 197         if (!nr)
 198                 return;
 199
 200         for (tmp = PIDTYPE_MAX; --tmp >= 0; )
 201                 if (tmp != type && find_pid(tmp, nr))
 202                         return;
 203
 204         free_pidmap(nr);
 205 }
 206
 207 task_t *find_task_by_pid_type(int type, int nr)
 208 {
 209         struct pid *pid;
 210
 211         pid = find_pid(type, nr);
 212         if (!pid)
 213                 return NULL;
 214
 215         return pid_task(&pid->pid_list, type);
 216 }
 217
 218 EXPORT_SYMBOL(find_task_by_pid_type);
 219
 220 /*
 221  * This function switches the PIDs if a non-leader thread calls
 222  * sys_execve() - this must be done without releasing the PID.
 223  * (which a detach_pid() would eventually do.)
 224  */
 225 void switch_exec_pids(task_t *leader, task_t *thread)
 226 {
 227         __detach_pid(leader, PIDTYPE_PID);
 228         __detach_pid(leader, PIDTYPE_TGID);
 229         __detach_pid(leader, PIDTYPE_PGID);
 230         __detach_pid(leader, PIDTYPE_SID);
 231
 232         __detach_pid(thread, PIDTYPE_PID);
 233         __detach_pid(thread, PIDTYPE_TGID);
 234
 235         leader->pid = leader->tgid = thread->pid;
 236         thread->pid = thread->tgid;
 237
 238         attach_pid(thread, PIDTYPE_PID, thread->pid);
 239         attach_pid(thread, PIDTYPE_TGID, thread->tgid);
 240         attach_pid(thread, PIDTYPE_PGID, thread->signal->pgrp);
 241         attach_pid(thread, PIDTYPE_SID, thread->signal->session);
 242         list_add_tail(&thread->tasks, &init_task.tasks);
 243
 244         attach_pid(leader, PIDTYPE_PID, leader->pid);
 245         attach_pid(leader, PIDTYPE_TGID, leader->tgid);
 246         attach_pid(leader, PIDTYPE_PGID, leader->signal->pgrp);
 247         attach_pid(leader, PIDTYPE_SID, leader->signal->session);
 248 }
 249
 250 /*
 251  * The pid hash table is scaled according to the amount of memory in the
 252  * machine.  From a minimum of 16 slots up to 4096 slots at one gigabyte or
 253  * more.
 254  */
 255 void __init pidhash_init(void)
 256 {
 257         int i, j, pidhash_size;
 258         unsigned long megabytes = nr_kernel_pages >> (20 - PAGE_SHIFT);
 259
 260         pidhash_shift = max(4, fls(megabytes * 4));
 261         pidhash_shift = min(12, pidhash_shift);
 262         pidhash_size = 1 << pidhash_shift;
 263
 264         printk("PID hash table entries: %d (order: %d, %Zd bytes)\n",
 265                 pidhash_size, pidhash_shift,
 266                 PIDTYPE_MAX * pidhash_size * sizeof(struct hlist_head));
 267
 268         for (i = 0; i < PIDTYPE_MAX; i++) {
 269                 pid_hash[i] = alloc_bootmem(pidhash_size *
 270                                         sizeof(*(pid_hash[i])));
 271                 if (!pid_hash[i])
 272                         panic("Could not alloc pidhash!\n");
 273                 for (j = 0; j < pidhash_size; j++)
 274                         INIT_HLIST_HEAD(&pid_hash[i][j]);
 275         }
 276 }
 277
 278 void __init pidmap_init(void)
 279 {
 280         int i;
 281
 282         pidmap_array->page = (void *)get_zeroed_page(GFP_KERNEL);
 283         set_bit(0, pidmap_array->page);
 284         atomic_dec(&pidmap_array->nr_free);
 285
 286         /*
 287          * Allocate PID 0, and hash it via all PID types:
 288          */
 289
 290         for (i = 0; i < PIDTYPE_MAX; i++)
 291                 attach_pid(current, i, 0);
 292 }