Import 2.1.118
[davej-history.git] / ipc / shm.c
blob0ec61b1e3bfac58c9485a4a6b6d4d7da383c557b
1 /*
2 * linux/ipc/shm.c
3 * Copyright (C) 1992, 1993 Krishna Balasubramanian
4 * Many improvements/fixes by Bruno Haible.
5 * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994.
6 * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli.
7 */
9 #include <linux/errno.h>
10 #include <linux/sched.h>
11 #include <linux/mm.h>
12 #include <linux/slab.h>
13 #include <linux/ipc.h>
14 #include <linux/shm.h>
15 #include <linux/stat.h>
16 #include <linux/malloc.h>
17 #include <linux/swap.h>
18 #include <linux/smp.h>
19 #include <linux/smp_lock.h>
20 #include <linux/init.h>
22 #include <asm/uaccess.h>
23 #include <asm/pgtable.h>
25 extern int ipcperms (struct ipc_perm *ipcp, short shmflg);
26 extern unsigned long get_swap_page (void);
27 static int findkey (key_t key);
28 static int newseg (key_t key, int shmflg, int size);
29 static int shm_map (struct vm_area_struct *shmd);
30 static void killseg (int id);
31 static void shm_open (struct vm_area_struct *shmd);
32 static void shm_close (struct vm_area_struct *shmd);
33 static pte_t shm_swap_in(struct vm_area_struct *, unsigned long, unsigned long);
35 static int shm_tot = 0; /* total number of shared memory pages */
36 static int shm_rss = 0; /* number of shared memory pages that are in memory */
37 static int shm_swp = 0; /* number of shared memory pages that are in swap */
38 static int max_shmid = 0; /* every used id is <= max_shmid */
39 static struct wait_queue *shm_lock = NULL; /* calling findkey() may need to wait */
40 static struct shmid_ds *shm_segs[SHMMNI];
42 static unsigned short shm_seq = 0; /* incremented, for recognizing stale ids */
44 /* some statistics */
45 static ulong swap_attempts = 0;
46 static ulong swap_successes = 0;
47 static ulong used_segs = 0;
49 void __init shm_init (void)
51 int id;
53 for (id = 0; id < SHMMNI; id++)
54 shm_segs[id] = (struct shmid_ds *) IPC_UNUSED;
55 shm_tot = shm_rss = shm_seq = max_shmid = used_segs = 0;
56 shm_lock = NULL;
57 return;
60 static int findkey (key_t key)
62 int id;
63 struct shmid_ds *shp;
65 for (id = 0; id <= max_shmid; id++) {
66 while ((shp = shm_segs[id]) == IPC_NOID)
67 sleep_on (&shm_lock);
68 if (shp == IPC_UNUSED)
69 continue;
70 if (key == shp->shm_perm.key)
71 return id;
73 return -1;
77 * allocate new shmid_ds and pgtable. protected by shm_segs[id] = NOID.
79 static int newseg (key_t key, int shmflg, int size)
81 struct shmid_ds *shp;
82 int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT;
83 int id, i;
85 if (size < SHMMIN)
86 return -EINVAL;
87 if (shm_tot + numpages >= SHMALL)
88 return -ENOSPC;
89 for (id = 0; id < SHMMNI; id++)
90 if (shm_segs[id] == IPC_UNUSED) {
91 shm_segs[id] = (struct shmid_ds *) IPC_NOID;
92 goto found;
94 return -ENOSPC;
96 found:
97 shp = (struct shmid_ds *) kmalloc (sizeof (*shp), GFP_KERNEL);
98 if (!shp) {
99 shm_segs[id] = (struct shmid_ds *) IPC_UNUSED;
100 wake_up (&shm_lock);
101 return -ENOMEM;
104 shp->shm_pages = (ulong *) kmalloc (numpages*sizeof(ulong),GFP_KERNEL);
105 if (!shp->shm_pages) {
106 shm_segs[id] = (struct shmid_ds *) IPC_UNUSED;
107 wake_up (&shm_lock);
108 kfree(shp);
109 return -ENOMEM;
112 for (i = 0; i < numpages; shp->shm_pages[i++] = 0);
113 shm_tot += numpages;
114 shp->shm_perm.key = key;
115 shp->shm_perm.mode = (shmflg & S_IRWXUGO);
116 shp->shm_perm.cuid = shp->shm_perm.uid = current->euid;
117 shp->shm_perm.cgid = shp->shm_perm.gid = current->egid;
118 shp->shm_perm.seq = shm_seq;
119 shp->shm_segsz = size;
120 shp->shm_cpid = current->pid;
121 shp->attaches = NULL;
122 shp->shm_lpid = shp->shm_nattch = 0;
123 shp->shm_atime = shp->shm_dtime = 0;
124 shp->shm_ctime = CURRENT_TIME;
125 shp->shm_npages = numpages;
127 if (id > max_shmid)
128 max_shmid = id;
129 shm_segs[id] = shp;
130 used_segs++;
131 wake_up (&shm_lock);
132 return (unsigned int) shp->shm_perm.seq * SHMMNI + id;
135 asmlinkage int sys_shmget (key_t key, int size, int shmflg)
137 struct shmid_ds *shp;
138 int err, id = 0;
140 down(&current->mm->mmap_sem);
141 lock_kernel();
142 if (size < 0 || size > SHMMAX) {
143 err = -EINVAL;
144 } else if (key == IPC_PRIVATE) {
145 err = newseg(key, shmflg, size);
146 } else if ((id = findkey (key)) == -1) {
147 if (!(shmflg & IPC_CREAT))
148 err = -ENOENT;
149 else
150 err = newseg(key, shmflg, size);
151 } else if ((shmflg & IPC_CREAT) && (shmflg & IPC_EXCL)) {
152 err = -EEXIST;
153 } else {
154 shp = shm_segs[id];
155 if (shp->shm_perm.mode & SHM_DEST)
156 err = -EIDRM;
157 else if (size > shp->shm_segsz)
158 err = -EINVAL;
159 else if (ipcperms (&shp->shm_perm, shmflg))
160 err = -EACCES;
161 else
162 err = (int) shp->shm_perm.seq * SHMMNI + id;
164 unlock_kernel();
165 up(&current->mm->mmap_sem);
166 return err;
170 * Only called after testing nattch and SHM_DEST.
171 * Here pages, pgtable and shmid_ds are freed.
173 static void killseg (int id)
175 struct shmid_ds *shp;
176 int i, numpages;
178 shp = shm_segs[id];
179 if (shp == IPC_NOID || shp == IPC_UNUSED) {
180 printk ("shm nono: killseg called on unused seg id=%d\n", id);
181 return;
183 shp->shm_perm.seq++; /* for shmat */
184 shm_seq = (shm_seq+1) % ((unsigned)(1<<31)/SHMMNI); /* increment, but avoid overflow */
185 shm_segs[id] = (struct shmid_ds *) IPC_UNUSED;
186 used_segs--;
187 if (id == max_shmid)
188 while (max_shmid && (shm_segs[--max_shmid] == IPC_UNUSED));
189 if (!shp->shm_pages) {
190 printk ("shm nono: killseg shp->pages=NULL. id=%d\n", id);
191 return;
193 numpages = shp->shm_npages;
194 for (i = 0; i < numpages ; i++) {
195 pte_t pte;
196 pte = __pte(shp->shm_pages[i]);
197 if (pte_none(pte))
198 continue;
199 if (pte_present(pte)) {
200 free_page (pte_page(pte));
201 shm_rss--;
202 } else {
203 swap_free(pte_val(pte));
204 shm_swp--;
207 kfree(shp->shm_pages);
208 shm_tot -= numpages;
209 kfree(shp);
210 return;
213 asmlinkage int sys_shmctl (int shmid, int cmd, struct shmid_ds *buf)
215 struct shmid_ds tbuf;
216 struct shmid_ds *shp;
217 struct ipc_perm *ipcp;
218 int id, err = -EINVAL;
220 lock_kernel();
221 if (cmd < 0 || shmid < 0)
222 goto out;
223 if (cmd == IPC_SET) {
224 err = -EFAULT;
225 if (!buf)
226 goto out;
227 err = verify_area (VERIFY_READ, buf, sizeof (*buf));
228 if (err)
229 goto out;
230 copy_from_user (&tbuf, buf, sizeof (*buf));
233 switch (cmd) { /* replace with proc interface ? */
234 case IPC_INFO:
236 struct shminfo shminfo;
237 err = -EFAULT;
238 if (!buf)
239 goto out;
240 shminfo.shmmni = SHMMNI;
241 shminfo.shmmax = SHMMAX;
242 shminfo.shmmin = SHMMIN;
243 shminfo.shmall = SHMALL;
244 shminfo.shmseg = SHMSEG;
245 err = verify_area (VERIFY_WRITE, buf, sizeof (struct shminfo));
246 if (err)
247 goto out;
248 copy_to_user (buf, &shminfo, sizeof(struct shminfo));
249 err = max_shmid;
250 goto out;
252 case SHM_INFO:
254 struct shm_info shm_info;
255 err = -EFAULT;
256 if (!buf)
257 goto out;
258 err = verify_area (VERIFY_WRITE, buf, sizeof (shm_info));
259 if (err)
260 goto out;
261 shm_info.used_ids = used_segs;
262 shm_info.shm_rss = shm_rss;
263 shm_info.shm_tot = shm_tot;
264 shm_info.shm_swp = shm_swp;
265 shm_info.swap_attempts = swap_attempts;
266 shm_info.swap_successes = swap_successes;
267 copy_to_user (buf, &shm_info, sizeof(shm_info));
268 err = max_shmid;
269 goto out;
271 case SHM_STAT:
272 err = -EFAULT;
273 if (!buf)
274 goto out;
275 err = verify_area (VERIFY_WRITE, buf, sizeof (*buf));
276 if (err)
277 goto out;
278 err = -EINVAL;
279 if (shmid > max_shmid)
280 goto out;
281 shp = shm_segs[shmid];
282 if (shp == IPC_UNUSED || shp == IPC_NOID)
283 goto out;
284 if (ipcperms (&shp->shm_perm, S_IRUGO))
285 goto out;
286 id = (unsigned int) shp->shm_perm.seq * SHMMNI + shmid;
287 tbuf.shm_perm = shp->shm_perm;
288 tbuf.shm_segsz = shp->shm_segsz;
289 tbuf.shm_atime = shp->shm_atime;
290 tbuf.shm_dtime = shp->shm_dtime;
291 tbuf.shm_ctime = shp->shm_ctime;
292 tbuf.shm_cpid = shp->shm_cpid;
293 tbuf.shm_lpid = shp->shm_lpid;
294 tbuf.shm_nattch = shp->shm_nattch;
295 copy_to_user (buf, &tbuf, sizeof(*buf));
296 err = id;
297 goto out;
300 shp = shm_segs[id = (unsigned int) shmid % SHMMNI];
301 err = -EINVAL;
302 if (shp == IPC_UNUSED || shp == IPC_NOID)
303 goto out;
304 err = -EIDRM;
305 if (shp->shm_perm.seq != (unsigned int) shmid / SHMMNI)
306 goto out;
307 ipcp = &shp->shm_perm;
309 switch (cmd) {
310 case SHM_UNLOCK:
311 err = -EPERM;
312 if (!capable(CAP_IPC_LOCK))
313 goto out;
314 err = -EINVAL;
315 if (!(ipcp->mode & SHM_LOCKED))
316 goto out;
317 ipcp->mode &= ~SHM_LOCKED;
318 break;
319 case SHM_LOCK:
320 /* Allow superuser to lock segment in memory */
321 /* Should the pages be faulted in here or leave it to user? */
322 /* need to determine interaction with current->swappable */
323 err = -EPERM;
324 if (!capable(CAP_IPC_LOCK))
325 goto out;
326 err = -EINVAL;
327 if (ipcp->mode & SHM_LOCKED)
328 goto out;
329 ipcp->mode |= SHM_LOCKED;
330 break;
331 case IPC_STAT:
332 err = -EACCES;
333 if (ipcperms (ipcp, S_IRUGO))
334 goto out;
335 err = -EFAULT;
336 if (!buf)
337 goto out;
338 err = verify_area (VERIFY_WRITE, buf, sizeof (*buf));
339 if (err)
340 goto out;
341 tbuf.shm_perm = shp->shm_perm;
342 tbuf.shm_segsz = shp->shm_segsz;
343 tbuf.shm_atime = shp->shm_atime;
344 tbuf.shm_dtime = shp->shm_dtime;
345 tbuf.shm_ctime = shp->shm_ctime;
346 tbuf.shm_cpid = shp->shm_cpid;
347 tbuf.shm_lpid = shp->shm_lpid;
348 tbuf.shm_nattch = shp->shm_nattch;
349 copy_to_user (buf, &tbuf, sizeof(*buf));
350 break;
351 case IPC_SET:
352 if (current->euid == shp->shm_perm.uid ||
353 current->euid == shp->shm_perm.cuid ||
354 capable(CAP_SYS_ADMIN)) {
355 ipcp->uid = tbuf.shm_perm.uid;
356 ipcp->gid = tbuf.shm_perm.gid;
357 ipcp->mode = (ipcp->mode & ~S_IRWXUGO)
358 | (tbuf.shm_perm.mode & S_IRWXUGO);
359 shp->shm_ctime = CURRENT_TIME;
360 break;
362 err = -EPERM;
363 goto out;
364 case IPC_RMID:
365 if (current->euid == shp->shm_perm.uid ||
366 current->euid == shp->shm_perm.cuid ||
367 capable(CAP_SYS_ADMIN)) {
368 shp->shm_perm.mode |= SHM_DEST;
369 if (shp->shm_nattch <= 0)
370 killseg (id);
371 break;
373 err = -EPERM;
374 goto out;
375 default:
376 err = -EINVAL;
377 goto out;
379 err = 0;
380 out:
381 unlock_kernel();
382 return err;
386 * The per process internal structure for managing segments is
387 * `struct vm_area_struct'.
388 * A shmat will add to and shmdt will remove from the list.
389 * shmd->vm_mm the attacher
390 * shmd->vm_start virt addr of attach, multiple of SHMLBA
391 * shmd->vm_end multiple of SHMLBA
392 * shmd->vm_next next attach for task
393 * shmd->vm_next_share next attach for segment
394 * shmd->vm_offset offset into segment
395 * shmd->vm_pte signature for this attach
398 static struct vm_operations_struct shm_vm_ops = {
399 shm_open, /* open - callback for a new vm-area open */
400 shm_close, /* close - callback for when the vm-area is released */
401 NULL, /* no need to sync pages at unmap */
402 NULL, /* protect */
403 NULL, /* sync */
404 NULL, /* advise */
405 NULL, /* nopage (done with swapin) */
406 NULL, /* wppage */
407 NULL, /* swapout (hardcoded right now) */
408 shm_swap_in /* swapin */
411 /* Insert shmd into the list shp->attaches */
412 static inline void insert_attach (struct shmid_ds * shp, struct vm_area_struct * shmd)
414 if((shmd->vm_next_share = shp->attaches) != NULL)
415 shp->attaches->vm_pprev_share = &shmd->vm_next_share;
416 shp->attaches = shmd;
417 shmd->vm_pprev_share = &shp->attaches;
420 /* Remove shmd from list shp->attaches */
421 static inline void remove_attach (struct shmid_ds * shp, struct vm_area_struct * shmd)
423 if(shmd->vm_next_share)
424 shmd->vm_next_share->vm_pprev_share = shmd->vm_pprev_share;
425 *shmd->vm_pprev_share = shmd->vm_next_share;
429 * ensure page tables exist
430 * mark page table entries with shm_sgn.
432 static int shm_map (struct vm_area_struct *shmd)
434 pgd_t *page_dir;
435 pmd_t *page_middle;
436 pte_t *page_table;
437 unsigned long tmp, shm_sgn;
438 int error;
440 /* clear old mappings */
441 do_munmap(shmd->vm_start, shmd->vm_end - shmd->vm_start);
443 /* add new mapping */
444 tmp = shmd->vm_end - shmd->vm_start;
445 if((current->mm->total_vm << PAGE_SHIFT) + tmp
446 > (unsigned long) current->rlim[RLIMIT_AS].rlim_cur)
447 return -ENOMEM;
448 current->mm->total_vm += tmp >> PAGE_SHIFT;
449 insert_vm_struct(current->mm, shmd);
450 merge_segments(current->mm, shmd->vm_start, shmd->vm_end);
452 /* map page range */
453 error = 0;
454 shm_sgn = shmd->vm_pte +
455 SWP_ENTRY(0, (shmd->vm_offset >> PAGE_SHIFT) << SHM_IDX_SHIFT);
456 flush_cache_range(shmd->vm_mm, shmd->vm_start, shmd->vm_end);
457 for (tmp = shmd->vm_start;
458 tmp < shmd->vm_end;
459 tmp += PAGE_SIZE, shm_sgn += SWP_ENTRY(0, 1 << SHM_IDX_SHIFT))
461 page_dir = pgd_offset(shmd->vm_mm,tmp);
462 page_middle = pmd_alloc(page_dir,tmp);
463 if (!page_middle) {
464 error = -ENOMEM;
465 break;
467 page_table = pte_alloc(page_middle,tmp);
468 if (!page_table) {
469 error = -ENOMEM;
470 break;
472 set_pte(page_table, __pte(shm_sgn));
474 flush_tlb_range(shmd->vm_mm, shmd->vm_start, shmd->vm_end);
475 return error;
479 * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists.
481 asmlinkage int sys_shmat (int shmid, char *shmaddr, int shmflg, ulong *raddr)
483 struct shmid_ds *shp;
484 struct vm_area_struct *shmd;
485 int err = -EINVAL;
486 unsigned int id;
487 unsigned long addr;
488 unsigned long len;
490 down(&current->mm->mmap_sem);
491 lock_kernel();
492 if (shmid < 0) {
493 /* printk("shmat() -> EINVAL because shmid = %d < 0\n",shmid); */
494 goto out;
497 shp = shm_segs[id = (unsigned int) shmid % SHMMNI];
498 if (shp == IPC_UNUSED || shp == IPC_NOID) {
499 /* printk("shmat() -> EINVAL because shmid = %d is invalid\n",shmid); */
500 goto out;
503 if (!(addr = (ulong) shmaddr)) {
504 if (shmflg & SHM_REMAP)
505 goto out;
506 err = -ENOMEM;
507 addr = 0;
508 again:
509 if (!(addr = get_unmapped_area(addr, shp->shm_segsz)))
510 goto out;
511 if(addr & (SHMLBA - 1)) {
512 addr = (addr + (SHMLBA - 1)) & ~(SHMLBA - 1);
513 goto again;
515 } else if (addr & (SHMLBA-1)) {
516 if (shmflg & SHM_RND)
517 addr &= ~(SHMLBA-1); /* round down */
518 else
519 goto out;
522 * Check if addr exceeds TASK_SIZE (from do_mmap)
524 len = PAGE_SIZE*shp->shm_npages;
525 err = -EINVAL;
526 if (addr >= TASK_SIZE || len > TASK_SIZE || addr > TASK_SIZE - len)
527 goto out;
529 * If shm segment goes below stack, make sure there is some
530 * space left for the stack to grow (presently 4 pages).
532 if (addr < current->mm->start_stack &&
533 addr > current->mm->start_stack - PAGE_SIZE*(shp->shm_npages + 4))
535 /* printk("shmat() -> EINVAL because segment intersects stack\n"); */
536 goto out;
538 if (!(shmflg & SHM_REMAP))
539 if ((shmd = find_vma_intersection(current->mm, addr, addr + shp->shm_segsz))) {
540 /* printk("shmat() -> EINVAL because the interval [0x%lx,0x%lx) intersects an already mapped interval [0x%lx,0x%lx).\n",
541 addr, addr + shp->shm_segsz, shmd->vm_start, shmd->vm_end); */
542 goto out;
545 err = -EACCES;
546 if (ipcperms(&shp->shm_perm, shmflg & SHM_RDONLY ? S_IRUGO : S_IRUGO|S_IWUGO))
547 goto out;
548 err = -EIDRM;
549 if (shp->shm_perm.seq != (unsigned int) shmid / SHMMNI)
550 goto out;
552 err = -ENOMEM;
553 shmd = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
554 if (!shmd)
555 goto out;
556 if ((shp != shm_segs[id]) || (shp->shm_perm.seq != (unsigned int) shmid / SHMMNI)) {
557 kmem_cache_free(vm_area_cachep, shmd);
558 err = -EIDRM;
559 goto out;
562 shmd->vm_pte = SWP_ENTRY(SHM_SWP_TYPE, id);
563 shmd->vm_start = addr;
564 shmd->vm_end = addr + shp->shm_npages * PAGE_SIZE;
565 shmd->vm_mm = current->mm;
566 shmd->vm_page_prot = (shmflg & SHM_RDONLY) ? PAGE_READONLY : PAGE_SHARED;
567 shmd->vm_flags = VM_SHM | VM_MAYSHARE | VM_SHARED
568 | VM_MAYREAD | VM_MAYEXEC | VM_READ | VM_EXEC
569 | ((shmflg & SHM_RDONLY) ? 0 : VM_MAYWRITE | VM_WRITE);
570 shmd->vm_file = NULL;
571 shmd->vm_offset = 0;
572 shmd->vm_ops = &shm_vm_ops;
574 shp->shm_nattch++; /* prevent destruction */
575 if ((err = shm_map (shmd))) {
576 if (--shp->shm_nattch <= 0 && shp->shm_perm.mode & SHM_DEST)
577 killseg(id);
578 kmem_cache_free(vm_area_cachep, shmd);
579 goto out;
582 insert_attach(shp,shmd); /* insert shmd into shp->attaches */
584 shp->shm_lpid = current->pid;
585 shp->shm_atime = CURRENT_TIME;
587 *raddr = addr;
588 err = 0;
589 out:
590 unlock_kernel();
591 up(&current->mm->mmap_sem);
592 return err;
595 /* This is called by fork, once for every shm attach. */
596 static void shm_open (struct vm_area_struct *shmd)
598 unsigned int id;
599 struct shmid_ds *shp;
601 id = SWP_OFFSET(shmd->vm_pte) & SHM_ID_MASK;
602 shp = shm_segs[id];
603 if (shp == IPC_UNUSED) {
604 printk("shm_open: unused id=%d PANIC\n", id);
605 return;
607 insert_attach(shp,shmd); /* insert shmd into shp->attaches */
608 shp->shm_nattch++;
609 shp->shm_atime = CURRENT_TIME;
610 shp->shm_lpid = current->pid;
614 * remove the attach descriptor shmd.
615 * free memory for segment if it is marked destroyed.
616 * The descriptor has already been removed from the current->mm->mmap list
617 * and will later be kfree()d.
619 static void shm_close (struct vm_area_struct *shmd)
621 struct shmid_ds *shp;
622 int id;
624 /* remove from the list of attaches of the shm segment */
625 id = SWP_OFFSET(shmd->vm_pte) & SHM_ID_MASK;
626 shp = shm_segs[id];
627 remove_attach(shp,shmd); /* remove from shp->attaches */
628 shp->shm_lpid = current->pid;
629 shp->shm_dtime = CURRENT_TIME;
630 if (--shp->shm_nattch <= 0 && shp->shm_perm.mode & SHM_DEST)
631 killseg (id);
635 * detach and kill segment if marked destroyed.
636 * The work is done in shm_close.
638 asmlinkage int sys_shmdt (char *shmaddr)
640 struct vm_area_struct *shmd, *shmdnext;
642 down(&current->mm->mmap_sem);
643 lock_kernel();
644 for (shmd = current->mm->mmap; shmd; shmd = shmdnext) {
645 shmdnext = shmd->vm_next;
646 if (shmd->vm_ops == &shm_vm_ops
647 && shmd->vm_start - shmd->vm_offset == (ulong) shmaddr)
648 do_munmap(shmd->vm_start, shmd->vm_end - shmd->vm_start);
650 unlock_kernel();
651 up(&current->mm->mmap_sem);
652 return 0;
656 * page not present ... go through shm_pages
658 static pte_t shm_swap_in(struct vm_area_struct * shmd, unsigned long offset, unsigned long code)
660 pte_t pte;
661 struct shmid_ds *shp;
662 unsigned int id, idx;
664 id = SWP_OFFSET(code) & SHM_ID_MASK;
665 if (id != (SWP_OFFSET(shmd->vm_pte) & SHM_ID_MASK)) {
666 printk ("shm_swap_in: code id = %d and shmd id = %ld differ\n",
667 id, SWP_OFFSET(shmd->vm_pte) & SHM_ID_MASK);
668 return BAD_PAGE;
670 if (id > max_shmid) {
671 printk ("shm_swap_in: id=%d too big. proc mem corrupted\n", id);
672 return BAD_PAGE;
674 shp = shm_segs[id];
675 if (shp == IPC_UNUSED || shp == IPC_NOID) {
676 printk ("shm_swap_in: id=%d invalid. Race.\n", id);
677 return BAD_PAGE;
679 idx = (SWP_OFFSET(code) >> SHM_IDX_SHIFT) & SHM_IDX_MASK;
680 if (idx != (offset >> PAGE_SHIFT)) {
681 printk ("shm_swap_in: code idx = %u and shmd idx = %lu differ\n",
682 idx, offset >> PAGE_SHIFT);
683 return BAD_PAGE;
685 if (idx >= shp->shm_npages) {
686 printk ("shm_swap_in : too large page index. id=%d\n", id);
687 return BAD_PAGE;
690 pte = __pte(shp->shm_pages[idx]);
691 if (!pte_present(pte)) {
692 unsigned long page = get_free_page(GFP_KERNEL);
693 if (!page) {
694 oom(current);
695 return BAD_PAGE;
697 pte = __pte(shp->shm_pages[idx]);
698 if (pte_present(pte)) {
699 free_page (page); /* doesn't sleep */
700 goto done;
702 if (!pte_none(pte)) {
703 rw_swap_page_nocache(READ, pte_val(pte), (char *)page);
704 pte = __pte(shp->shm_pages[idx]);
705 if (pte_present(pte)) {
706 free_page (page); /* doesn't sleep */
707 goto done;
709 swap_free(pte_val(pte));
710 shm_swp--;
712 shm_rss++;
713 pte = pte_mkdirty(mk_pte(page, PAGE_SHARED));
714 shp->shm_pages[idx] = pte_val(pte);
715 } else
716 --current->maj_flt; /* was incremented in do_no_page */
718 done: /* pte_val(pte) == shp->shm_pages[idx] */
719 current->min_flt++;
720 atomic_inc(&mem_map[MAP_NR(pte_page(pte))].count);
721 return pte_modify(pte, shmd->vm_page_prot);
725 * Goes through counter = (shm_rss >> prio) present shm pages.
727 static unsigned long swap_id = 0; /* currently being swapped */
728 static unsigned long swap_idx = 0; /* next to swap */
730 int shm_swap (int prio, int gfp_mask)
732 pte_t page;
733 struct shmid_ds *shp;
734 struct vm_area_struct *shmd;
735 unsigned long swap_nr;
736 unsigned long id, idx;
737 int loop = 0;
738 int counter;
740 counter = shm_rss >> prio;
741 if (!counter || !(swap_nr = get_swap_page()))
742 return 0;
744 check_id:
745 shp = shm_segs[swap_id];
746 if (shp == IPC_UNUSED || shp == IPC_NOID || shp->shm_perm.mode & SHM_LOCKED ) {
747 next_id:
748 swap_idx = 0;
749 if (++swap_id > max_shmid) {
750 if (loop)
751 goto failed;
752 loop = 1;
753 swap_id = 0;
755 goto check_id;
757 id = swap_id;
759 check_table:
760 idx = swap_idx++;
761 if (idx >= shp->shm_npages)
762 goto next_id;
764 page = __pte(shp->shm_pages[idx]);
765 if (!pte_present(page))
766 goto check_table;
767 if ((gfp_mask & __GFP_DMA) && !PageDMA(&mem_map[MAP_NR(pte_page(page))]))
768 goto check_table;
769 swap_attempts++;
771 if (--counter < 0) { /* failed */
772 failed:
773 swap_free (swap_nr);
774 return 0;
776 if (shp->attaches)
777 for (shmd = shp->attaches; ; ) {
778 do {
779 pgd_t *page_dir;
780 pmd_t *page_middle;
781 pte_t *page_table, pte;
782 unsigned long tmp;
784 if ((SWP_OFFSET(shmd->vm_pte) & SHM_ID_MASK) != id) {
785 printk ("shm_swap: id=%ld does not match shmd->vm_pte.id=%ld\n",
786 id, SWP_OFFSET(shmd->vm_pte) & SHM_ID_MASK);
787 continue;
789 tmp = shmd->vm_start + (idx << PAGE_SHIFT) - shmd->vm_offset;
790 if (!(tmp >= shmd->vm_start && tmp < shmd->vm_end))
791 continue;
792 page_dir = pgd_offset(shmd->vm_mm,tmp);
793 if (pgd_none(*page_dir) || pgd_bad(*page_dir)) {
794 printk("shm_swap: bad pgtbl! id=%ld start=%lx idx=%ld\n",
795 id, shmd->vm_start, idx);
796 pgd_clear(page_dir);
797 continue;
799 page_middle = pmd_offset(page_dir,tmp);
800 if (pmd_none(*page_middle) || pmd_bad(*page_middle)) {
801 printk("shm_swap: bad pgmid! id=%ld start=%lx idx=%ld\n",
802 id, shmd->vm_start, idx);
803 pmd_clear(page_middle);
804 continue;
806 page_table = pte_offset(page_middle,tmp);
807 pte = *page_table;
808 if (!pte_present(pte))
809 continue;
810 if (pte_young(pte)) {
811 set_pte(page_table, pte_mkold(pte));
812 continue;
814 if (pte_page(pte) != pte_page(page))
815 printk("shm_swap_out: page and pte mismatch %lx %lx\n",
816 pte_page(pte),pte_page(page));
817 flush_cache_page(shmd, tmp);
818 set_pte(page_table,
819 __pte(shmd->vm_pte + SWP_ENTRY(0, idx << SHM_IDX_SHIFT)));
820 atomic_dec(&mem_map[MAP_NR(pte_page(pte))].count);
821 if (shmd->vm_mm->rss > 0)
822 shmd->vm_mm->rss--;
823 flush_tlb_page(shmd, tmp);
824 /* continue looping through the linked list */
825 } while (0);
826 shmd = shmd->vm_next_share;
827 if (!shmd)
828 break;
831 if (atomic_read(&mem_map[MAP_NR(pte_page(page))].count) != 1)
832 goto check_table;
833 shp->shm_pages[idx] = swap_nr;
834 rw_swap_page_nocache (WRITE, swap_nr, (char *) pte_page(page));
835 free_page(pte_page(page));
836 swap_successes++;
837 shm_swp++;
838 shm_rss--;
839 return 1;
843 * Free the swap entry and set the new pte for the shm page.
845 static void shm_unuse_page(struct shmid_ds *shp, unsigned long idx,
846 unsigned long page, unsigned long entry)
848 pte_t pte;
850 pte = pte_mkdirty(mk_pte(page, PAGE_SHARED));
851 shp->shm_pages[idx] = pte_val(pte);
852 atomic_inc(&mem_map[MAP_NR(page)].count);
853 shm_rss++;
855 swap_free(entry);
856 shm_swp--;
860 * unuse_shm() search for an eventually swapped out shm page.
862 void shm_unuse(unsigned long entry, unsigned long page)
864 int i, n;
866 for (i = 0; i < SHMMNI; i++)
867 if (shm_segs[i] != IPC_UNUSED && shm_segs[i] != IPC_NOID)
868 for (n = 0; n < shm_segs[i]->shm_npages; n++)
869 if (shm_segs[i]->shm_pages[n] == entry)
871 shm_unuse_page(shm_segs[i], n,
872 page, entry);
873 return;