2 * Generic pidhash and scalable, time-bounded PID allocator
4 * (C) 2002-2003 William Irwin, IBM
5 * (C) 2004 William Irwin, Oracle
6 * (C) 2002-2004 Ingo Molnar, Red Hat
8 * pid-structures are backing objects for tasks sharing a given ID to chain
9 * against. There is very little to them aside from hashing them and
10 * parking tasks using given ID's on a list.
12 * The hash is always changed with the tasklist_lock write-acquired,
13 * and the hash is only accessed with the tasklist_lock at least
14 * read-acquired, so there's no additional SMP locking needed here.
16 * We have a list of bitmap pages, which bitmaps represent the PID space.
17 * Allocating and freeing PIDs is completely lockless. The worst-case
18 * allocation scenario when all but one out of 1 million PIDs possible are
19 * allocated already: the scanning of 32 list entries and at most PAGE_SIZE
20 * bytes. The typical fastpath is a single successful setbit. Freeing is O(1).
24 #include <linux/module.h>
25 #include <linux/slab.h>
26 #include <linux/init.h>
27 #include <linux/bootmem.h>
28 #include <linux/hash.h>
30 #define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift)
31 static struct hlist_head
*pid_hash
[PIDTYPE_MAX
];
32 static int pidhash_shift
;
34 int pid_max
= PID_MAX_DEFAULT
;
37 #define RESERVED_PIDS 300
39 int pid_max_min
= RESERVED_PIDS
+ 1;
40 int pid_max_max
= PID_MAX_LIMIT
;
42 #define PIDMAP_ENTRIES ((PID_MAX_LIMIT + 8*PAGE_SIZE - 1)/PAGE_SIZE/8)
43 #define BITS_PER_PAGE (PAGE_SIZE*8)
44 #define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1)
45 #define mk_pid(map, off) (((map) - pidmap_array)*BITS_PER_PAGE + (off))
46 #define find_next_offset(map, off) \
47 find_next_zero_bit((map)->page, BITS_PER_PAGE, off)
50 * PID-map pages start out as NULL, they get allocated upon
51 * first use and are never deallocated. This way a low pid_max
52 * value does not cause lots of bitmaps to be allocated, but
53 * the scheme scales to up to 4 million PIDs, runtime.
55 typedef struct pidmap
{
60 static pidmap_t pidmap_array
[PIDMAP_ENTRIES
] =
61 { [ 0 ... PIDMAP_ENTRIES
-1 ] = { ATOMIC_INIT(BITS_PER_PAGE
), NULL
} };
63 static __cacheline_aligned_in_smp
DEFINE_SPINLOCK(pidmap_lock
);
65 fastcall
void free_pidmap(int pid
)
67 pidmap_t
*map
= pidmap_array
+ pid
/ BITS_PER_PAGE
;
68 int offset
= pid
& BITS_PER_PAGE_MASK
;
70 clear_bit(offset
, map
->page
);
71 atomic_inc(&map
->nr_free
);
74 int alloc_pidmap(void)
76 int i
, offset
, max_scan
, pid
, last
= last_pid
;
82 offset
= pid
& BITS_PER_PAGE_MASK
;
83 map
= &pidmap_array
[pid
/BITS_PER_PAGE
];
84 max_scan
= (pid_max
+ BITS_PER_PAGE
- 1)/BITS_PER_PAGE
- !offset
;
85 for (i
= 0; i
<= max_scan
; ++i
) {
86 if (unlikely(!map
->page
)) {
87 unsigned long page
= get_zeroed_page(GFP_KERNEL
);
89 * Free the page if someone raced with us
92 spin_lock(&pidmap_lock
);
96 map
->page
= (void *)page
;
97 spin_unlock(&pidmap_lock
);
98 if (unlikely(!map
->page
))
101 if (likely(atomic_read(&map
->nr_free
))) {
103 if (!test_and_set_bit(offset
, map
->page
)) {
104 atomic_dec(&map
->nr_free
);
108 offset
= find_next_offset(map
, offset
);
109 pid
= mk_pid(map
, offset
);
111 * find_next_offset() found a bit, the pid from it
112 * is in-bounds, and if we fell back to the last
113 * bitmap block and the final block was the same
114 * as the starting point, pid is before last_pid.
116 } while (offset
< BITS_PER_PAGE
&& pid
< pid_max
&&
117 (i
!= max_scan
|| pid
< last
||
118 !((last
+1) & BITS_PER_PAGE_MASK
)));
120 if (map
< &pidmap_array
[(pid_max
-1)/BITS_PER_PAGE
]) {
124 map
= &pidmap_array
[0];
125 offset
= RESERVED_PIDS
;
126 if (unlikely(last
== offset
))
129 pid
= mk_pid(map
, offset
);
134 struct pid
* fastcall
find_pid(enum pid_type type
, int nr
)
136 struct hlist_node
*elem
;
139 hlist_for_each_entry_rcu(pid
, elem
,
140 &pid_hash
[type
][pid_hashfn(nr
)], pid_chain
) {
147 int fastcall
attach_pid(task_t
*task
, enum pid_type type
, int nr
)
149 struct pid
*pid
, *task_pid
;
151 task_pid
= &task
->pids
[type
];
152 pid
= find_pid(type
, nr
);
155 INIT_LIST_HEAD(&task_pid
->pid_list
);
156 hlist_add_head_rcu(&task_pid
->pid_chain
,
157 &pid_hash
[type
][pid_hashfn(nr
)]);
159 INIT_HLIST_NODE(&task_pid
->pid_chain
);
160 list_add_tail_rcu(&task_pid
->pid_list
, &pid
->pid_list
);
166 static fastcall
int __detach_pid(task_t
*task
, enum pid_type type
)
168 struct pid
*pid
, *pid_next
;
171 pid
= &task
->pids
[type
];
172 if (!hlist_unhashed(&pid
->pid_chain
)) {
174 if (list_empty(&pid
->pid_list
)) {
176 hlist_del_rcu(&pid
->pid_chain
);
178 pid_next
= list_entry(pid
->pid_list
.next
,
179 struct pid
, pid_list
);
180 /* insert next pid from pid_list to hash */
181 hlist_replace_rcu(&pid
->pid_chain
,
182 &pid_next
->pid_chain
);
186 list_del_rcu(&pid
->pid_list
);
192 void fastcall
detach_pid(task_t
*task
, enum pid_type type
)
196 nr
= __detach_pid(task
, type
);
200 for (tmp
= PIDTYPE_MAX
; --tmp
>= 0; )
201 if (tmp
!= type
&& find_pid(tmp
, nr
))
207 task_t
*find_task_by_pid_type(int type
, int nr
)
211 pid
= find_pid(type
, nr
);
215 return pid_task(&pid
->pid_list
, type
);
218 EXPORT_SYMBOL(find_task_by_pid_type
);
221 * This function switches the PIDs if a non-leader thread calls
222 * sys_execve() - this must be done without releasing the PID.
223 * (which a detach_pid() would eventually do.)
225 void switch_exec_pids(task_t
*leader
, task_t
*thread
)
227 __detach_pid(leader
, PIDTYPE_PID
);
228 __detach_pid(leader
, PIDTYPE_TGID
);
229 __detach_pid(leader
, PIDTYPE_PGID
);
230 __detach_pid(leader
, PIDTYPE_SID
);
232 __detach_pid(thread
, PIDTYPE_PID
);
233 __detach_pid(thread
, PIDTYPE_TGID
);
235 leader
->pid
= leader
->tgid
= thread
->pid
;
236 thread
->pid
= thread
->tgid
;
238 attach_pid(thread
, PIDTYPE_PID
, thread
->pid
);
239 attach_pid(thread
, PIDTYPE_TGID
, thread
->tgid
);
240 attach_pid(thread
, PIDTYPE_PGID
, thread
->signal
->pgrp
);
241 attach_pid(thread
, PIDTYPE_SID
, thread
->signal
->session
);
242 list_add_tail(&thread
->tasks
, &init_task
.tasks
);
244 attach_pid(leader
, PIDTYPE_PID
, leader
->pid
);
245 attach_pid(leader
, PIDTYPE_TGID
, leader
->tgid
);
246 attach_pid(leader
, PIDTYPE_PGID
, leader
->signal
->pgrp
);
247 attach_pid(leader
, PIDTYPE_SID
, leader
->signal
->session
);
251 * The pid hash table is scaled according to the amount of memory in the
252 * machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or
255 void __init
pidhash_init(void)
257 int i
, j
, pidhash_size
;
258 unsigned long megabytes
= nr_kernel_pages
>> (20 - PAGE_SHIFT
);
260 pidhash_shift
= max(4, fls(megabytes
* 4));
261 pidhash_shift
= min(12, pidhash_shift
);
262 pidhash_size
= 1 << pidhash_shift
;
264 printk("PID hash table entries: %d (order: %d, %Zd bytes)\n",
265 pidhash_size
, pidhash_shift
,
266 PIDTYPE_MAX
* pidhash_size
* sizeof(struct hlist_head
));
268 for (i
= 0; i
< PIDTYPE_MAX
; i
++) {
269 pid_hash
[i
] = alloc_bootmem(pidhash_size
*
270 sizeof(*(pid_hash
[i
])));
272 panic("Could not alloc pidhash!\n");
273 for (j
= 0; j
< pidhash_size
; j
++)
274 INIT_HLIST_HEAD(&pid_hash
[i
][j
]);
278 void __init
pidmap_init(void)
282 pidmap_array
->page
= (void *)get_zeroed_page(GFP_KERNEL
);
283 set_bit(0, pidmap_array
->page
);
284 atomic_dec(&pidmap_array
->nr_free
);
287 * Allocate PID 0, and hash it via all PID types:
290 for (i
= 0; i
< PIDTYPE_MAX
; i
++)
291 attach_pid(current
, i
, 0);