Import 2.3.12pre9
[davej-history.git] / ipc / shm.c
blobdf365209e28cad1e9dcf63cd6062021eb335449e
1 /*
2 * linux/ipc/shm.c
3 * Copyright (C) 1992, 1993 Krishna Balasubramanian
4 * Many improvements/fixes by Bruno Haible.
5 * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994.
6 * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli.
8 * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
9 */
11 #include <linux/config.h>
12 #include <linux/malloc.h>
13 #include <linux/shm.h>
14 #include <linux/swap.h>
15 #include <linux/smp_lock.h>
16 #include <linux/init.h>
17 #include <linux/vmalloc.h>
18 #include <linux/pagemap.h>
19 #include <linux/proc_fs.h>
21 #include <asm/uaccess.h>
22 #include <asm/pgtable.h>
24 extern int ipcperms (struct ipc_perm *ipcp, short shmflg);
25 extern unsigned long get_swap_page (void);
26 static int findkey (key_t key);
27 static int newseg (key_t key, int shmflg, int size);
28 static int shm_map (struct vm_area_struct *shmd);
29 static void killseg (int id);
30 static void shm_open (struct vm_area_struct *shmd);
31 static void shm_close (struct vm_area_struct *shmd);
32 static unsigned long shm_nopage(struct vm_area_struct *, unsigned long, int);
33 static int shm_swapout(struct vm_area_struct *, struct page *);
34 #ifdef CONFIG_PROC_FS
35 static int sysvipc_shm_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data);
36 #endif
38 static int shm_tot = 0; /* total number of shared memory pages */
39 static int shm_rss = 0; /* number of shared memory pages that are in memory */
40 static int shm_swp = 0; /* number of shared memory pages that are in swap */
41 static int max_shmid = 0; /* every used id is <= max_shmid */
42 static DECLARE_WAIT_QUEUE_HEAD(shm_lock); /* calling findkey() may need to wait */
43 static struct shmid_kernel *shm_segs[SHMMNI];
45 static unsigned short shm_seq = 0; /* incremented, for recognizing stale ids */
47 /* some statistics */
48 static ulong swap_attempts = 0;
49 static ulong swap_successes = 0;
50 static ulong used_segs = 0;
52 void __init shm_init (void)
54 int id;
55 #ifdef CONFIG_PROC_FS
56 struct proc_dir_entry *ent;
57 #endif
59 for (id = 0; id < SHMMNI; id++)
60 shm_segs[id] = (struct shmid_kernel *) IPC_UNUSED;
61 shm_tot = shm_rss = shm_seq = max_shmid = used_segs = 0;
62 init_waitqueue_head(&shm_lock);
63 #ifdef CONFIG_PROC_FS
64 ent = create_proc_entry("sysvipc/shm", 0, 0);
65 ent->read_proc = sysvipc_shm_read_proc;
66 #endif
67 return;
70 static int findkey (key_t key)
72 int id;
73 struct shmid_kernel *shp;
75 for (id = 0; id <= max_shmid; id++) {
76 while ((shp = shm_segs[id]) == IPC_NOID)
77 sleep_on (&shm_lock);
78 if (shp == IPC_UNUSED)
79 continue;
80 if (key == shp->u.shm_perm.key)
81 return id;
83 return -1;
87 * allocate new shmid_kernel and pgtable. protected by shm_segs[id] = NOID.
89 static int newseg (key_t key, int shmflg, int size)
91 struct shmid_kernel *shp;
92 int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT;
93 int id, i;
95 if (size < SHMMIN)
96 return -EINVAL;
97 if (shm_tot + numpages >= SHMALL)
98 return -ENOSPC;
99 for (id = 0; id < SHMMNI; id++)
100 if (shm_segs[id] == IPC_UNUSED) {
101 shm_segs[id] = (struct shmid_kernel *) IPC_NOID;
102 goto found;
104 return -ENOSPC;
106 found:
107 shp = (struct shmid_kernel *) kmalloc (sizeof (*shp), GFP_KERNEL);
108 if (!shp) {
109 shm_segs[id] = (struct shmid_kernel *) IPC_UNUSED;
110 wake_up (&shm_lock);
111 return -ENOMEM;
114 shp->shm_pages = (ulong *) vmalloc (numpages*sizeof(ulong));
115 if (!shp->shm_pages) {
116 shm_segs[id] = (struct shmid_kernel *) IPC_UNUSED;
117 wake_up (&shm_lock);
118 kfree(shp);
119 return -ENOMEM;
122 for (i = 0; i < numpages; shp->shm_pages[i++] = 0);
123 shm_tot += numpages;
124 shp->u.shm_perm.key = key;
125 shp->u.shm_perm.mode = (shmflg & S_IRWXUGO);
126 shp->u.shm_perm.cuid = shp->u.shm_perm.uid = current->euid;
127 shp->u.shm_perm.cgid = shp->u.shm_perm.gid = current->egid;
128 shp->u.shm_perm.seq = shm_seq;
129 shp->u.shm_segsz = size;
130 shp->u.shm_cpid = current->pid;
131 shp->attaches = NULL;
132 shp->u.shm_lpid = shp->u.shm_nattch = 0;
133 shp->u.shm_atime = shp->u.shm_dtime = 0;
134 shp->u.shm_ctime = CURRENT_TIME;
135 shp->shm_npages = numpages;
137 if (id > max_shmid)
138 max_shmid = id;
139 shm_segs[id] = shp;
140 used_segs++;
141 wake_up (&shm_lock);
142 return (unsigned int) shp->u.shm_perm.seq * SHMMNI + id;
145 int shmmax = SHMMAX;
147 asmlinkage int sys_shmget (key_t key, int size, int shmflg)
149 struct shmid_kernel *shp;
150 int err, id = 0;
152 down(&current->mm->mmap_sem);
153 lock_kernel();
154 if (size < 0 || size > shmmax) {
155 err = -EINVAL;
156 } else if (key == IPC_PRIVATE) {
157 err = newseg(key, shmflg, size);
158 } else if ((id = findkey (key)) == -1) {
159 if (!(shmflg & IPC_CREAT))
160 err = -ENOENT;
161 else
162 err = newseg(key, shmflg, size);
163 } else if ((shmflg & IPC_CREAT) && (shmflg & IPC_EXCL)) {
164 err = -EEXIST;
165 } else {
166 shp = shm_segs[id];
167 if (shp->u.shm_perm.mode & SHM_DEST)
168 err = -EIDRM;
169 else if (size > shp->u.shm_segsz)
170 err = -EINVAL;
171 else if (ipcperms (&shp->u.shm_perm, shmflg))
172 err = -EACCES;
173 else
174 err = (int) shp->u.shm_perm.seq * SHMMNI + id;
176 unlock_kernel();
177 up(&current->mm->mmap_sem);
178 return err;
182 * Only called after testing nattch and SHM_DEST.
183 * Here pages, pgtable and shmid_kernel are freed.
185 static void killseg (int id)
187 struct shmid_kernel *shp;
188 int i, numpages;
190 shp = shm_segs[id];
191 if (shp == IPC_NOID || shp == IPC_UNUSED) {
192 printk ("shm nono: killseg called on unused seg id=%d\n", id);
193 return;
195 shp->u.shm_perm.seq++; /* for shmat */
196 shm_seq = (shm_seq+1) % ((unsigned)(1<<31)/SHMMNI); /* increment, but avoid overflow */
197 shm_segs[id] = (struct shmid_kernel *) IPC_UNUSED;
198 used_segs--;
199 if (id == max_shmid)
200 while (max_shmid && (shm_segs[--max_shmid] == IPC_UNUSED));
201 if (!shp->shm_pages) {
202 printk ("shm nono: killseg shp->pages=NULL. id=%d\n", id);
203 return;
205 numpages = shp->shm_npages;
206 for (i = 0; i < numpages ; i++) {
207 pte_t pte;
208 pte = __pte(shp->shm_pages[i]);
209 if (pte_none(pte))
210 continue;
211 if (pte_present(pte)) {
212 free_page (pte_page(pte));
213 shm_rss--;
214 } else {
215 swap_free(pte_val(pte));
216 shm_swp--;
219 vfree(shp->shm_pages);
220 shm_tot -= numpages;
221 kfree(shp);
222 return;
225 asmlinkage int sys_shmctl (int shmid, int cmd, struct shmid_ds *buf)
227 struct shmid_ds tbuf;
228 struct shmid_kernel *shp;
229 struct ipc_perm *ipcp;
230 int id, err = -EINVAL;
232 lock_kernel();
233 if (cmd < 0 || shmid < 0)
234 goto out;
235 if (cmd == IPC_SET) {
236 err = -EFAULT;
237 if(copy_from_user (&tbuf, buf, sizeof (*buf)))
238 goto out;
241 switch (cmd) { /* replace with proc interface ? */
242 case IPC_INFO:
244 struct shminfo shminfo;
245 err = -EFAULT;
246 if (!buf)
247 goto out;
248 shminfo.shmmni = SHMMNI;
249 shminfo.shmmax = shmmax;
250 shminfo.shmmin = SHMMIN;
251 shminfo.shmall = SHMALL;
252 shminfo.shmseg = SHMSEG;
253 if(copy_to_user (buf, &shminfo, sizeof(struct shminfo)))
254 goto out;
255 err = max_shmid;
256 goto out;
258 case SHM_INFO:
260 struct shm_info shm_info;
261 err = -EFAULT;
262 shm_info.used_ids = used_segs;
263 shm_info.shm_rss = shm_rss;
264 shm_info.shm_tot = shm_tot;
265 shm_info.shm_swp = shm_swp;
266 shm_info.swap_attempts = swap_attempts;
267 shm_info.swap_successes = swap_successes;
268 if(copy_to_user (buf, &shm_info, sizeof(shm_info)))
269 goto out;
270 err = max_shmid;
271 goto out;
273 case SHM_STAT:
274 err = -EINVAL;
275 if (shmid > max_shmid)
276 goto out;
277 shp = shm_segs[shmid];
278 if (shp == IPC_UNUSED || shp == IPC_NOID)
279 goto out;
280 if (ipcperms (&shp->u.shm_perm, S_IRUGO))
281 goto out;
282 id = (unsigned int) shp->u.shm_perm.seq * SHMMNI + shmid;
283 err = -EFAULT;
284 if(copy_to_user (buf, &shp->u, sizeof(*buf)))
285 goto out;
286 err = id;
287 goto out;
290 shp = shm_segs[id = (unsigned int) shmid % SHMMNI];
291 err = -EINVAL;
292 if (shp == IPC_UNUSED || shp == IPC_NOID)
293 goto out;
294 err = -EIDRM;
295 if (shp->u.shm_perm.seq != (unsigned int) shmid / SHMMNI)
296 goto out;
297 ipcp = &shp->u.shm_perm;
299 switch (cmd) {
300 case SHM_UNLOCK:
301 err = -EPERM;
302 if (!capable(CAP_IPC_LOCK))
303 goto out;
304 err = -EINVAL;
305 if (!(ipcp->mode & SHM_LOCKED))
306 goto out;
307 ipcp->mode &= ~SHM_LOCKED;
308 break;
309 case SHM_LOCK:
310 /* Allow superuser to lock segment in memory */
311 /* Should the pages be faulted in here or leave it to user? */
312 /* need to determine interaction with current->swappable */
313 err = -EPERM;
314 if (!capable(CAP_IPC_LOCK))
315 goto out;
316 err = -EINVAL;
317 if (ipcp->mode & SHM_LOCKED)
318 goto out;
319 ipcp->mode |= SHM_LOCKED;
320 break;
321 case IPC_STAT:
322 err = -EACCES;
323 if (ipcperms (ipcp, S_IRUGO))
324 goto out;
325 err = -EFAULT;
326 if(copy_to_user (buf, &shp->u, sizeof(shp->u)))
327 goto out;
328 break;
329 case IPC_SET:
330 if (current->euid == shp->u.shm_perm.uid ||
331 current->euid == shp->u.shm_perm.cuid ||
332 capable(CAP_SYS_ADMIN)) {
333 ipcp->uid = tbuf.shm_perm.uid;
334 ipcp->gid = tbuf.shm_perm.gid;
335 ipcp->mode = (ipcp->mode & ~S_IRWXUGO)
336 | (tbuf.shm_perm.mode & S_IRWXUGO);
337 shp->u.shm_ctime = CURRENT_TIME;
338 break;
340 err = -EPERM;
341 goto out;
342 case IPC_RMID:
343 if (current->euid == shp->u.shm_perm.uid ||
344 current->euid == shp->u.shm_perm.cuid ||
345 capable(CAP_SYS_ADMIN)) {
346 shp->u.shm_perm.mode |= SHM_DEST;
347 if (shp->u.shm_nattch <= 0)
348 killseg (id);
349 break;
351 err = -EPERM;
352 goto out;
353 default:
354 err = -EINVAL;
355 goto out;
357 err = 0;
358 out:
359 unlock_kernel();
360 return err;
364 * The per process internal structure for managing segments is
365 * `struct vm_area_struct'.
366 * A shmat will add to and shmdt will remove from the list.
367 * shmd->vm_mm the attacher
368 * shmd->vm_start virt addr of attach, multiple of SHMLBA
369 * shmd->vm_end multiple of SHMLBA
370 * shmd->vm_next next attach for task
371 * shmd->vm_next_share next attach for segment
372 * shmd->vm_offset offset into segment
373 * shmd->vm_pte signature for this attach
376 static struct vm_operations_struct shm_vm_ops = {
377 shm_open, /* open - callback for a new vm-area open */
378 shm_close, /* close - callback for when the vm-area is released */
379 NULL, /* no need to sync pages at unmap */
380 NULL, /* protect */
381 NULL, /* sync */
382 NULL, /* advise */
383 shm_nopage, /* nopage */
384 NULL, /* wppage */
385 shm_swapout /* swapout */
388 /* Insert shmd into the list shp->attaches */
389 static inline void insert_attach (struct shmid_kernel * shp, struct vm_area_struct * shmd)
391 if((shmd->vm_next_share = shp->attaches) != NULL)
392 shp->attaches->vm_pprev_share = &shmd->vm_next_share;
393 shp->attaches = shmd;
394 shmd->vm_pprev_share = &shp->attaches;
397 /* Remove shmd from list shp->attaches */
398 static inline void remove_attach (struct shmid_kernel * shp, struct vm_area_struct * shmd)
400 if(shmd->vm_next_share)
401 shmd->vm_next_share->vm_pprev_share = shmd->vm_pprev_share;
402 *shmd->vm_pprev_share = shmd->vm_next_share;
406 * ensure page tables exist
407 * mark page table entries with shm_sgn.
409 static int shm_map (struct vm_area_struct *shmd)
411 unsigned long tmp;
413 /* clear old mappings */
414 do_munmap(shmd->vm_start, shmd->vm_end - shmd->vm_start);
416 /* add new mapping */
417 tmp = shmd->vm_end - shmd->vm_start;
418 if((current->mm->total_vm << PAGE_SHIFT) + tmp
419 > (unsigned long) current->rlim[RLIMIT_AS].rlim_cur)
420 return -ENOMEM;
421 current->mm->total_vm += tmp >> PAGE_SHIFT;
422 insert_vm_struct(current->mm, shmd);
423 merge_segments(current->mm, shmd->vm_start, shmd->vm_end);
425 return 0;
429 * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists.
431 asmlinkage int sys_shmat (int shmid, char *shmaddr, int shmflg, ulong *raddr)
433 struct shmid_kernel *shp;
434 struct vm_area_struct *shmd;
435 int err = -EINVAL;
436 unsigned int id;
437 unsigned long addr;
438 unsigned long len;
440 down(&current->mm->mmap_sem);
441 lock_kernel();
442 if (shmid < 0) {
443 /* printk("shmat() -> EINVAL because shmid = %d < 0\n",shmid); */
444 goto out;
447 shp = shm_segs[id = (unsigned int) shmid % SHMMNI];
448 if (shp == IPC_UNUSED || shp == IPC_NOID) {
449 /* printk("shmat() -> EINVAL because shmid = %d is invalid\n",shmid); */
450 goto out;
453 if (!(addr = (ulong) shmaddr)) {
454 if (shmflg & SHM_REMAP)
455 goto out;
456 err = -ENOMEM;
457 addr = 0;
458 again:
459 if (!(addr = get_unmapped_area(addr, shp->u.shm_segsz)))
460 goto out;
461 if(addr & (SHMLBA - 1)) {
462 addr = (addr + (SHMLBA - 1)) & ~(SHMLBA - 1);
463 goto again;
465 } else if (addr & (SHMLBA-1)) {
466 if (shmflg & SHM_RND)
467 addr &= ~(SHMLBA-1); /* round down */
468 else
469 goto out;
472 * Check if addr exceeds TASK_SIZE (from do_mmap)
474 len = PAGE_SIZE*shp->shm_npages;
475 err = -EINVAL;
476 if (addr >= TASK_SIZE || len > TASK_SIZE || addr > TASK_SIZE - len)
477 goto out;
479 * If shm segment goes below stack, make sure there is some
480 * space left for the stack to grow (presently 4 pages).
482 if (addr < current->mm->start_stack &&
483 addr > current->mm->start_stack - PAGE_SIZE*(shp->shm_npages + 4))
485 /* printk("shmat() -> EINVAL because segment intersects stack\n"); */
486 goto out;
488 if (!(shmflg & SHM_REMAP))
489 if ((shmd = find_vma_intersection(current->mm, addr, addr + shp->u.shm_segsz))) {
490 /* printk("shmat() -> EINVAL because the interval [0x%lx,0x%lx) intersects an already mapped interval [0x%lx,0x%lx).\n",
491 addr, addr + shp->shm_segsz, shmd->vm_start, shmd->vm_end); */
492 goto out;
495 err = -EACCES;
496 if (ipcperms(&shp->u.shm_perm, shmflg & SHM_RDONLY ? S_IRUGO : S_IRUGO|S_IWUGO))
497 goto out;
498 err = -EIDRM;
499 if (shp->u.shm_perm.seq != (unsigned int) shmid / SHMMNI)
500 goto out;
502 err = -ENOMEM;
503 shmd = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
504 if (!shmd)
505 goto out;
506 if ((shp != shm_segs[id]) || (shp->u.shm_perm.seq != (unsigned int) shmid / SHMMNI)) {
507 kmem_cache_free(vm_area_cachep, shmd);
508 err = -EIDRM;
509 goto out;
512 shmd->vm_pte = SWP_ENTRY(SHM_SWP_TYPE, id);
513 shmd->vm_start = addr;
514 shmd->vm_end = addr + shp->shm_npages * PAGE_SIZE;
515 shmd->vm_mm = current->mm;
516 shmd->vm_page_prot = (shmflg & SHM_RDONLY) ? PAGE_READONLY : PAGE_SHARED;
517 shmd->vm_flags = VM_SHM | VM_MAYSHARE | VM_SHARED
518 | VM_MAYREAD | VM_MAYEXEC | VM_READ | VM_EXEC
519 | ((shmflg & SHM_RDONLY) ? 0 : VM_MAYWRITE | VM_WRITE);
520 shmd->vm_file = NULL;
521 shmd->vm_offset = 0;
522 shmd->vm_ops = &shm_vm_ops;
524 shp->u.shm_nattch++; /* prevent destruction */
525 if ((err = shm_map (shmd))) {
526 if (--shp->u.shm_nattch <= 0 && shp->u.shm_perm.mode & SHM_DEST)
527 killseg(id);
528 kmem_cache_free(vm_area_cachep, shmd);
529 goto out;
532 insert_attach(shp,shmd); /* insert shmd into shp->attaches */
534 shp->u.shm_lpid = current->pid;
535 shp->u.shm_atime = CURRENT_TIME;
537 *raddr = addr;
538 err = 0;
539 out:
540 unlock_kernel();
541 up(&current->mm->mmap_sem);
542 return err;
545 /* This is called by fork, once for every shm attach. */
546 static void shm_open (struct vm_area_struct *shmd)
548 unsigned int id;
549 struct shmid_kernel *shp;
551 lock_kernel();
552 id = SWP_OFFSET(shmd->vm_pte) & SHM_ID_MASK;
553 shp = shm_segs[id];
554 if (shp == IPC_UNUSED) {
555 printk("shm_open: unused id=%d PANIC\n", id);
556 return;
558 insert_attach(shp,shmd); /* insert shmd into shp->attaches */
559 shp->u.shm_nattch++;
560 shp->u.shm_atime = CURRENT_TIME;
561 shp->u.shm_lpid = current->pid;
562 unlock_kernel();
566 * remove the attach descriptor shmd.
567 * free memory for segment if it is marked destroyed.
568 * The descriptor has already been removed from the current->mm->mmap list
569 * and will later be kfree()d.
571 static void shm_close (struct vm_area_struct *shmd)
573 struct shmid_kernel *shp;
574 int id;
576 lock_kernel();
577 /* remove from the list of attaches of the shm segment */
578 id = SWP_OFFSET(shmd->vm_pte) & SHM_ID_MASK;
579 shp = shm_segs[id];
580 remove_attach(shp,shmd); /* remove from shp->attaches */
581 shp->u.shm_lpid = current->pid;
582 shp->u.shm_dtime = CURRENT_TIME;
583 if (--shp->u.shm_nattch <= 0 && shp->u.shm_perm.mode & SHM_DEST)
584 killseg (id);
585 unlock_kernel();
589 * detach and kill segment if marked destroyed.
590 * The work is done in shm_close.
592 asmlinkage int sys_shmdt (char *shmaddr)
594 struct vm_area_struct *shmd, *shmdnext;
596 down(&current->mm->mmap_sem);
597 lock_kernel();
598 for (shmd = current->mm->mmap; shmd; shmd = shmdnext) {
599 shmdnext = shmd->vm_next;
600 if (shmd->vm_ops == &shm_vm_ops
601 && shmd->vm_start - shmd->vm_offset == (ulong) shmaddr)
602 do_munmap(shmd->vm_start, shmd->vm_end - shmd->vm_start);
604 unlock_kernel();
605 up(&current->mm->mmap_sem);
606 return 0;
610 * Enter the shm page into the SHM data structures.
612 * The way "nopage" is done, we don't actually have to
613 * do anything here: nopage will have filled in the shm
614 * data structures already, and shm_swap_out() will just
615 * work off them..
617 static int shm_swapout(struct vm_area_struct * vma, struct page * page)
619 return 0;
623 * page not present ... go through shm_pages
625 static unsigned long shm_nopage(struct vm_area_struct * shmd, unsigned long address, int no_share)
627 pte_t pte;
628 struct shmid_kernel *shp;
629 unsigned int id, idx;
630 unsigned long page;
631 struct page * page_map;
633 id = SWP_OFFSET(shmd->vm_pte) & SHM_ID_MASK;
634 idx = (address - shmd->vm_start + shmd->vm_offset) >> PAGE_SHIFT;
636 #ifdef DEBUG_SHM
637 if (id > max_shmid) {
638 printk ("shm_nopage: id=%d too big. proc mem corrupted\n", id);
639 return 0;
641 #endif
642 shp = shm_segs[id];
644 #ifdef DEBUG_SHM
645 if (shp == IPC_UNUSED || shp == IPC_NOID) {
646 printk ("shm_nopage: id=%d invalid. Race.\n", id);
647 return 0;
649 if (idx >= shp->shm_npages) {
650 printk ("shm_nopage : too large page index. id=%d\n", id);
651 return 0;
653 #endif
655 lock_kernel();
656 again:
657 pte = __pte(shp->shm_pages[idx]);
658 if (!pte_present(pte)) {
659 if (pte_none(pte)) {
660 page = get_free_page(GFP_USER);
661 if (!page)
662 goto oom;
663 if (pte_val(pte) != shp->shm_pages[idx])
664 goto changed;
665 } else {
666 unsigned long entry = pte_val(pte);
668 page_map = lookup_swap_cache(entry);
669 if (!page_map) {
670 swapin_readahead(entry);
671 page_map = read_swap_cache(entry);
673 pte = __pte(shp->shm_pages[idx]);
674 page = page_address(page_map);
675 if (pte_present(pte))
676 goto present;
677 if (!page_map)
678 goto oom;
679 delete_from_swap_cache(page_map);
680 swap_free(entry);
681 shm_swp--;
683 shm_rss++;
684 pte = pte_mkdirty(mk_pte(page, PAGE_SHARED));
685 shp->shm_pages[idx] = pte_val(pte);
686 } else
687 --current->maj_flt; /* was incremented in do_no_page */
689 done: /* pte_val(pte) == shp->shm_pages[idx] */
690 unlock_kernel();
691 current->min_flt++;
692 get_page(mem_map + MAP_NR(pte_page(pte)));
693 return pte_page(pte);
695 changed:
696 free_page(page);
697 goto again;
698 present:
699 if (page_map)
700 free_page_and_swap_cache(page);
701 goto done;
702 oom:
703 unlock_kernel();
704 return -1;
708 * Goes through counter = (shm_rss >> prio) present shm pages.
710 static unsigned long swap_id = 0; /* currently being swapped */
711 static unsigned long swap_idx = 0; /* next to swap */
713 int shm_swap (int prio, int gfp_mask)
715 pte_t page;
716 struct shmid_kernel *shp;
717 unsigned long swap_nr;
718 unsigned long id, idx;
719 int loop = 0;
720 int counter;
721 struct page * page_map;
723 counter = shm_rss >> prio;
724 if (!counter || !(swap_nr = get_swap_page()))
725 return 0;
727 check_id:
728 shp = shm_segs[swap_id];
729 if (shp == IPC_UNUSED || shp == IPC_NOID || shp->u.shm_perm.mode & SHM_LOCKED ) {
730 next_id:
731 swap_idx = 0;
732 if (++swap_id > max_shmid) {
733 swap_id = 0;
734 if (loop)
735 goto failed;
736 loop = 1;
738 goto check_id;
740 id = swap_id;
742 check_table:
743 idx = swap_idx++;
744 if (idx >= shp->shm_npages)
745 goto next_id;
747 page = __pte(shp->shm_pages[idx]);
748 if (!pte_present(page))
749 goto check_table;
750 page_map = &mem_map[MAP_NR(pte_page(page))];
751 if ((gfp_mask & __GFP_DMA) && !PageDMA(page_map))
752 goto check_table;
753 swap_attempts++;
755 if (--counter < 0) { /* failed */
756 failed:
757 swap_free (swap_nr);
758 return 0;
760 if (page_count(mem_map + MAP_NR(pte_page(page))) != 1)
761 goto check_table;
762 shp->shm_pages[idx] = swap_nr;
763 swap_duplicate(swap_nr);
764 add_to_swap_cache(page_map, swap_nr);
765 rw_swap_page(WRITE, page_map, 0);
767 __free_page(page_map);
768 swap_successes++;
769 shm_swp++;
770 shm_rss--;
771 return 1;
775 * Free the swap entry and set the new pte for the shm page.
777 static void shm_unuse_page(struct shmid_kernel *shp, unsigned long idx,
778 unsigned long page, unsigned long entry)
780 pte_t pte;
782 pte = pte_mkdirty(mk_pte(page, PAGE_SHARED));
783 shp->shm_pages[idx] = pte_val(pte);
784 get_page(mem_map + MAP_NR(page));
785 shm_rss++;
787 swap_free(entry);
788 shm_swp--;
792 * unuse_shm() search for an eventually swapped out shm page.
794 void shm_unuse(unsigned long entry, unsigned long page)
796 int i, n;
798 for (i = 0; i < SHMMNI; i++)
799 if (shm_segs[i] != IPC_UNUSED && shm_segs[i] != IPC_NOID)
800 for (n = 0; n < shm_segs[i]->shm_npages; n++)
801 if (shm_segs[i]->shm_pages[n] == entry)
803 shm_unuse_page(shm_segs[i], n,
804 page, entry);
805 return;
809 #ifdef CONFIG_PROC_FS
810 static int sysvipc_shm_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data)
812 off_t pos = 0;
813 off_t begin = 0;
814 int i, len = 0;
816 len += sprintf(buffer, " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime\n");
818 for(i = 0; i < SHMMNI; i++)
819 if(shm_segs[i] != IPC_UNUSED) {
820 len += sprintf(buffer + len, "%10d %10d %4o %10d %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n",
821 shm_segs[i]->u.shm_perm.key,
822 shm_segs[i]->u.shm_perm.seq * SHMMNI + i,
823 shm_segs[i]->u.shm_perm.mode,
824 shm_segs[i]->u.shm_segsz,
825 shm_segs[i]->u.shm_cpid,
826 shm_segs[i]->u.shm_lpid,
827 shm_segs[i]->u.shm_nattch,
828 shm_segs[i]->u.shm_perm.uid,
829 shm_segs[i]->u.shm_perm.gid,
830 shm_segs[i]->u.shm_perm.cuid,
831 shm_segs[i]->u.shm_perm.cgid,
832 shm_segs[i]->u.shm_atime,
833 shm_segs[i]->u.shm_dtime,
834 shm_segs[i]->u.shm_ctime);
836 pos += len;
837 if(pos < offset) {
838 len = 0;
839 begin = pos;
841 if(pos > offset + length)
842 goto done;
844 *eof = 1;
845 done:
846 *start = buffer + (offset - begin);
847 len -= (offset - begin);
848 if(len > length)
849 len = length;
850 if(len < 0)
851 len = 0;
852 return len;
854 #endif