Import 2.3.6
[davej-history.git] / ipc / shm.c
blob9aa0e87b3c98f10273957f3886e7680795a981b8
1 /*
2 * linux/ipc/shm.c
3 * Copyright (C) 1992, 1993 Krishna Balasubramanian
4 * Many improvements/fixes by Bruno Haible.
5 * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994.
6 * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli.
8 * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
9 */
11 #include <linux/malloc.h>
12 #include <linux/shm.h>
13 #include <linux/swap.h>
14 #include <linux/smp_lock.h>
15 #include <linux/init.h>
16 #include <linux/vmalloc.h>
17 #include <linux/proc_fs.h>
19 #include <asm/uaccess.h>
20 #include <asm/pgtable.h>
22 extern int ipcperms (struct ipc_perm *ipcp, short shmflg);
23 extern unsigned long get_swap_page (void);
24 static int findkey (key_t key);
25 static int newseg (key_t key, int shmflg, int size);
26 static int shm_map (struct vm_area_struct *shmd);
27 static void killseg (int id);
28 static void shm_open (struct vm_area_struct *shmd);
29 static void shm_close (struct vm_area_struct *shmd);
30 static unsigned long shm_nopage(struct vm_area_struct *, unsigned long, int);
31 static int shm_swapout(struct vm_area_struct *, struct page *);
32 #ifdef CONFIG_PROC_FS
33 static int sysvipc_shm_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data);
34 #endif
36 static int shm_tot = 0; /* total number of shared memory pages */
37 static int shm_rss = 0; /* number of shared memory pages that are in memory */
38 static int shm_swp = 0; /* number of shared memory pages that are in swap */
39 static int max_shmid = 0; /* every used id is <= max_shmid */
40 static DECLARE_WAIT_QUEUE_HEAD(shm_lock); /* calling findkey() may need to wait */
41 static struct shmid_kernel *shm_segs[SHMMNI];
43 static unsigned short shm_seq = 0; /* incremented, for recognizing stale ids */
45 /* some statistics */
46 static ulong swap_attempts = 0;
47 static ulong swap_successes = 0;
48 static ulong used_segs = 0;
50 void __init shm_init (void)
52 int id;
53 #ifdef CONFIG_PROC_FS
54 struct proc_dir_entry *ent;
55 #endif
57 for (id = 0; id < SHMMNI; id++)
58 shm_segs[id] = (struct shmid_kernel *) IPC_UNUSED;
59 shm_tot = shm_rss = shm_seq = max_shmid = used_segs = 0;
60 init_waitqueue_head(&shm_lock);
61 #ifdef CONFIG_PROC_FS
62 ent = create_proc_entry("sysvipc/shm", 0, 0);
63 ent->read_proc = sysvipc_shm_read_proc;
64 #endif
65 return;
68 static int findkey (key_t key)
70 int id;
71 struct shmid_kernel *shp;
73 for (id = 0; id <= max_shmid; id++) {
74 while ((shp = shm_segs[id]) == IPC_NOID)
75 sleep_on (&shm_lock);
76 if (shp == IPC_UNUSED)
77 continue;
78 if (key == shp->u.shm_perm.key)
79 return id;
81 return -1;
85 * allocate new shmid_kernel and pgtable. protected by shm_segs[id] = NOID.
87 static int newseg (key_t key, int shmflg, int size)
89 struct shmid_kernel *shp;
90 int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT;
91 int id, i;
93 if (size < SHMMIN)
94 return -EINVAL;
95 if (shm_tot + numpages >= SHMALL)
96 return -ENOSPC;
97 for (id = 0; id < SHMMNI; id++)
98 if (shm_segs[id] == IPC_UNUSED) {
99 shm_segs[id] = (struct shmid_kernel *) IPC_NOID;
100 goto found;
102 return -ENOSPC;
104 found:
105 shp = (struct shmid_kernel *) kmalloc (sizeof (*shp), GFP_KERNEL);
106 if (!shp) {
107 shm_segs[id] = (struct shmid_kernel *) IPC_UNUSED;
108 wake_up (&shm_lock);
109 return -ENOMEM;
112 shp->shm_pages = (ulong *) vmalloc (numpages*sizeof(ulong));
113 if (!shp->shm_pages) {
114 shm_segs[id] = (struct shmid_kernel *) IPC_UNUSED;
115 wake_up (&shm_lock);
116 kfree(shp);
117 return -ENOMEM;
120 for (i = 0; i < numpages; shp->shm_pages[i++] = 0);
121 shm_tot += numpages;
122 shp->u.shm_perm.key = key;
123 shp->u.shm_perm.mode = (shmflg & S_IRWXUGO);
124 shp->u.shm_perm.cuid = shp->u.shm_perm.uid = current->euid;
125 shp->u.shm_perm.cgid = shp->u.shm_perm.gid = current->egid;
126 shp->u.shm_perm.seq = shm_seq;
127 shp->u.shm_segsz = size;
128 shp->u.shm_cpid = current->pid;
129 shp->attaches = NULL;
130 shp->u.shm_lpid = shp->u.shm_nattch = 0;
131 shp->u.shm_atime = shp->u.shm_dtime = 0;
132 shp->u.shm_ctime = CURRENT_TIME;
133 shp->shm_npages = numpages;
135 if (id > max_shmid)
136 max_shmid = id;
137 shm_segs[id] = shp;
138 used_segs++;
139 wake_up (&shm_lock);
140 return (unsigned int) shp->u.shm_perm.seq * SHMMNI + id;
143 int shmmax = SHMMAX;
145 asmlinkage int sys_shmget (key_t key, int size, int shmflg)
147 struct shmid_kernel *shp;
148 int err, id = 0;
150 down(&current->mm->mmap_sem);
151 lock_kernel();
152 if (size < 0 || size > shmmax) {
153 err = -EINVAL;
154 } else if (key == IPC_PRIVATE) {
155 err = newseg(key, shmflg, size);
156 } else if ((id = findkey (key)) == -1) {
157 if (!(shmflg & IPC_CREAT))
158 err = -ENOENT;
159 else
160 err = newseg(key, shmflg, size);
161 } else if ((shmflg & IPC_CREAT) && (shmflg & IPC_EXCL)) {
162 err = -EEXIST;
163 } else {
164 shp = shm_segs[id];
165 if (shp->u.shm_perm.mode & SHM_DEST)
166 err = -EIDRM;
167 else if (size > shp->u.shm_segsz)
168 err = -EINVAL;
169 else if (ipcperms (&shp->u.shm_perm, shmflg))
170 err = -EACCES;
171 else
172 err = (int) shp->u.shm_perm.seq * SHMMNI + id;
174 unlock_kernel();
175 up(&current->mm->mmap_sem);
176 return err;
180 * Only called after testing nattch and SHM_DEST.
181 * Here pages, pgtable and shmid_kernel are freed.
183 static void killseg (int id)
185 struct shmid_kernel *shp;
186 int i, numpages;
188 shp = shm_segs[id];
189 if (shp == IPC_NOID || shp == IPC_UNUSED) {
190 printk ("shm nono: killseg called on unused seg id=%d\n", id);
191 return;
193 shp->u.shm_perm.seq++; /* for shmat */
194 shm_seq = (shm_seq+1) % ((unsigned)(1<<31)/SHMMNI); /* increment, but avoid overflow */
195 shm_segs[id] = (struct shmid_kernel *) IPC_UNUSED;
196 used_segs--;
197 if (id == max_shmid)
198 while (max_shmid && (shm_segs[--max_shmid] == IPC_UNUSED));
199 if (!shp->shm_pages) {
200 printk ("shm nono: killseg shp->pages=NULL. id=%d\n", id);
201 return;
203 numpages = shp->shm_npages;
204 for (i = 0; i < numpages ; i++) {
205 pte_t pte;
206 pte = __pte(shp->shm_pages[i]);
207 if (pte_none(pte))
208 continue;
209 if (pte_present(pte)) {
210 free_page (pte_page(pte));
211 shm_rss--;
212 } else {
213 swap_free(pte_val(pte));
214 shm_swp--;
217 vfree(shp->shm_pages);
218 shm_tot -= numpages;
219 kfree(shp);
220 return;
223 asmlinkage int sys_shmctl (int shmid, int cmd, struct shmid_ds *buf)
225 struct shmid_ds tbuf;
226 struct shmid_kernel *shp;
227 struct ipc_perm *ipcp;
228 int id, err = -EINVAL;
230 lock_kernel();
231 if (cmd < 0 || shmid < 0)
232 goto out;
233 if (cmd == IPC_SET) {
234 err = -EFAULT;
235 if(copy_from_user (&tbuf, buf, sizeof (*buf)))
236 goto out;
239 switch (cmd) { /* replace with proc interface ? */
240 case IPC_INFO:
242 struct shminfo shminfo;
243 err = -EFAULT;
244 if (!buf)
245 goto out;
246 shminfo.shmmni = SHMMNI;
247 shminfo.shmmax = shmmax;
248 shminfo.shmmin = SHMMIN;
249 shminfo.shmall = SHMALL;
250 shminfo.shmseg = SHMSEG;
251 if(copy_to_user (buf, &shminfo, sizeof(struct shminfo)))
252 goto out;
253 err = max_shmid;
254 goto out;
256 case SHM_INFO:
258 struct shm_info shm_info;
259 err = -EFAULT;
260 shm_info.used_ids = used_segs;
261 shm_info.shm_rss = shm_rss;
262 shm_info.shm_tot = shm_tot;
263 shm_info.shm_swp = shm_swp;
264 shm_info.swap_attempts = swap_attempts;
265 shm_info.swap_successes = swap_successes;
266 if(copy_to_user (buf, &shm_info, sizeof(shm_info)))
267 goto out;
268 err = max_shmid;
269 goto out;
271 case SHM_STAT:
272 err = -EINVAL;
273 if (shmid > max_shmid)
274 goto out;
275 shp = shm_segs[shmid];
276 if (shp == IPC_UNUSED || shp == IPC_NOID)
277 goto out;
278 if (ipcperms (&shp->u.shm_perm, S_IRUGO))
279 goto out;
280 id = (unsigned int) shp->u.shm_perm.seq * SHMMNI + shmid;
281 err = -EFAULT;
282 if(copy_to_user (buf, &shp->u, sizeof(*buf)))
283 goto out;
284 err = id;
285 goto out;
288 shp = shm_segs[id = (unsigned int) shmid % SHMMNI];
289 err = -EINVAL;
290 if (shp == IPC_UNUSED || shp == IPC_NOID)
291 goto out;
292 err = -EIDRM;
293 if (shp->u.shm_perm.seq != (unsigned int) shmid / SHMMNI)
294 goto out;
295 ipcp = &shp->u.shm_perm;
297 switch (cmd) {
298 case SHM_UNLOCK:
299 err = -EPERM;
300 if (!capable(CAP_IPC_LOCK))
301 goto out;
302 err = -EINVAL;
303 if (!(ipcp->mode & SHM_LOCKED))
304 goto out;
305 ipcp->mode &= ~SHM_LOCKED;
306 break;
307 case SHM_LOCK:
308 /* Allow superuser to lock segment in memory */
309 /* Should the pages be faulted in here or leave it to user? */
310 /* need to determine interaction with current->swappable */
311 err = -EPERM;
312 if (!capable(CAP_IPC_LOCK))
313 goto out;
314 err = -EINVAL;
315 if (ipcp->mode & SHM_LOCKED)
316 goto out;
317 ipcp->mode |= SHM_LOCKED;
318 break;
319 case IPC_STAT:
320 err = -EACCES;
321 if (ipcperms (ipcp, S_IRUGO))
322 goto out;
323 err = -EFAULT;
324 if(copy_to_user (buf, &shp->u, sizeof(shp->u)))
325 goto out;
326 break;
327 case IPC_SET:
328 if (current->euid == shp->u.shm_perm.uid ||
329 current->euid == shp->u.shm_perm.cuid ||
330 capable(CAP_SYS_ADMIN)) {
331 ipcp->uid = tbuf.shm_perm.uid;
332 ipcp->gid = tbuf.shm_perm.gid;
333 ipcp->mode = (ipcp->mode & ~S_IRWXUGO)
334 | (tbuf.shm_perm.mode & S_IRWXUGO);
335 shp->u.shm_ctime = CURRENT_TIME;
336 break;
338 err = -EPERM;
339 goto out;
340 case IPC_RMID:
341 if (current->euid == shp->u.shm_perm.uid ||
342 current->euid == shp->u.shm_perm.cuid ||
343 capable(CAP_SYS_ADMIN)) {
344 shp->u.shm_perm.mode |= SHM_DEST;
345 if (shp->u.shm_nattch <= 0)
346 killseg (id);
347 break;
349 err = -EPERM;
350 goto out;
351 default:
352 err = -EINVAL;
353 goto out;
355 err = 0;
356 out:
357 unlock_kernel();
358 return err;
362 * The per process internal structure for managing segments is
363 * `struct vm_area_struct'.
364 * A shmat will add to and shmdt will remove from the list.
365 * shmd->vm_mm the attacher
366 * shmd->vm_start virt addr of attach, multiple of SHMLBA
367 * shmd->vm_end multiple of SHMLBA
368 * shmd->vm_next next attach for task
369 * shmd->vm_next_share next attach for segment
370 * shmd->vm_offset offset into segment
371 * shmd->vm_pte signature for this attach
374 static struct vm_operations_struct shm_vm_ops = {
375 shm_open, /* open - callback for a new vm-area open */
376 shm_close, /* close - callback for when the vm-area is released */
377 NULL, /* no need to sync pages at unmap */
378 NULL, /* protect */
379 NULL, /* sync */
380 NULL, /* advise */
381 shm_nopage, /* nopage */
382 NULL, /* wppage */
383 shm_swapout, /* swapout */
384 NULL /* swapin */
387 /* Insert shmd into the list shp->attaches */
388 static inline void insert_attach (struct shmid_kernel * shp, struct vm_area_struct * shmd)
390 if((shmd->vm_next_share = shp->attaches) != NULL)
391 shp->attaches->vm_pprev_share = &shmd->vm_next_share;
392 shp->attaches = shmd;
393 shmd->vm_pprev_share = &shp->attaches;
396 /* Remove shmd from list shp->attaches */
397 static inline void remove_attach (struct shmid_kernel * shp, struct vm_area_struct * shmd)
399 if(shmd->vm_next_share)
400 shmd->vm_next_share->vm_pprev_share = shmd->vm_pprev_share;
401 *shmd->vm_pprev_share = shmd->vm_next_share;
405 * ensure page tables exist
406 * mark page table entries with shm_sgn.
408 static int shm_map (struct vm_area_struct *shmd)
410 unsigned long tmp;
412 /* clear old mappings */
413 do_munmap(shmd->vm_start, shmd->vm_end - shmd->vm_start);
415 /* add new mapping */
416 tmp = shmd->vm_end - shmd->vm_start;
417 if((current->mm->total_vm << PAGE_SHIFT) + tmp
418 > (unsigned long) current->rlim[RLIMIT_AS].rlim_cur)
419 return -ENOMEM;
420 current->mm->total_vm += tmp >> PAGE_SHIFT;
421 insert_vm_struct(current->mm, shmd);
422 merge_segments(current->mm, shmd->vm_start, shmd->vm_end);
424 return 0;
428 * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists.
430 asmlinkage int sys_shmat (int shmid, char *shmaddr, int shmflg, ulong *raddr)
432 struct shmid_kernel *shp;
433 struct vm_area_struct *shmd;
434 int err = -EINVAL;
435 unsigned int id;
436 unsigned long addr;
437 unsigned long len;
439 down(&current->mm->mmap_sem);
440 lock_kernel();
441 if (shmid < 0) {
442 /* printk("shmat() -> EINVAL because shmid = %d < 0\n",shmid); */
443 goto out;
446 shp = shm_segs[id = (unsigned int) shmid % SHMMNI];
447 if (shp == IPC_UNUSED || shp == IPC_NOID) {
448 /* printk("shmat() -> EINVAL because shmid = %d is invalid\n",shmid); */
449 goto out;
452 if (!(addr = (ulong) shmaddr)) {
453 if (shmflg & SHM_REMAP)
454 goto out;
455 err = -ENOMEM;
456 addr = 0;
457 again:
458 if (!(addr = get_unmapped_area(addr, shp->u.shm_segsz)))
459 goto out;
460 if(addr & (SHMLBA - 1)) {
461 addr = (addr + (SHMLBA - 1)) & ~(SHMLBA - 1);
462 goto again;
464 } else if (addr & (SHMLBA-1)) {
465 if (shmflg & SHM_RND)
466 addr &= ~(SHMLBA-1); /* round down */
467 else
468 goto out;
471 * Check if addr exceeds TASK_SIZE (from do_mmap)
473 len = PAGE_SIZE*shp->shm_npages;
474 err = -EINVAL;
475 if (addr >= TASK_SIZE || len > TASK_SIZE || addr > TASK_SIZE - len)
476 goto out;
478 * If shm segment goes below stack, make sure there is some
479 * space left for the stack to grow (presently 4 pages).
481 if (addr < current->mm->start_stack &&
482 addr > current->mm->start_stack - PAGE_SIZE*(shp->shm_npages + 4))
484 /* printk("shmat() -> EINVAL because segment intersects stack\n"); */
485 goto out;
487 if (!(shmflg & SHM_REMAP))
488 if ((shmd = find_vma_intersection(current->mm, addr, addr + shp->u.shm_segsz))) {
489 /* printk("shmat() -> EINVAL because the interval [0x%lx,0x%lx) intersects an already mapped interval [0x%lx,0x%lx).\n",
490 addr, addr + shp->shm_segsz, shmd->vm_start, shmd->vm_end); */
491 goto out;
494 err = -EACCES;
495 if (ipcperms(&shp->u.shm_perm, shmflg & SHM_RDONLY ? S_IRUGO : S_IRUGO|S_IWUGO))
496 goto out;
497 err = -EIDRM;
498 if (shp->u.shm_perm.seq != (unsigned int) shmid / SHMMNI)
499 goto out;
501 err = -ENOMEM;
502 shmd = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
503 if (!shmd)
504 goto out;
505 if ((shp != shm_segs[id]) || (shp->u.shm_perm.seq != (unsigned int) shmid / SHMMNI)) {
506 kmem_cache_free(vm_area_cachep, shmd);
507 err = -EIDRM;
508 goto out;
511 shmd->vm_pte = SWP_ENTRY(SHM_SWP_TYPE, id);
512 shmd->vm_start = addr;
513 shmd->vm_end = addr + shp->shm_npages * PAGE_SIZE;
514 shmd->vm_mm = current->mm;
515 shmd->vm_page_prot = (shmflg & SHM_RDONLY) ? PAGE_READONLY : PAGE_SHARED;
516 shmd->vm_flags = VM_SHM | VM_MAYSHARE | VM_SHARED
517 | VM_MAYREAD | VM_MAYEXEC | VM_READ | VM_EXEC
518 | ((shmflg & SHM_RDONLY) ? 0 : VM_MAYWRITE | VM_WRITE);
519 shmd->vm_file = NULL;
520 shmd->vm_offset = 0;
521 shmd->vm_ops = &shm_vm_ops;
523 shp->u.shm_nattch++; /* prevent destruction */
524 if ((err = shm_map (shmd))) {
525 if (--shp->u.shm_nattch <= 0 && shp->u.shm_perm.mode & SHM_DEST)
526 killseg(id);
527 kmem_cache_free(vm_area_cachep, shmd);
528 goto out;
531 insert_attach(shp,shmd); /* insert shmd into shp->attaches */
533 shp->u.shm_lpid = current->pid;
534 shp->u.shm_atime = CURRENT_TIME;
536 *raddr = addr;
537 err = 0;
538 out:
539 unlock_kernel();
540 up(&current->mm->mmap_sem);
541 return err;
544 /* This is called by fork, once for every shm attach. */
545 static void shm_open (struct vm_area_struct *shmd)
547 unsigned int id;
548 struct shmid_kernel *shp;
550 id = SWP_OFFSET(shmd->vm_pte) & SHM_ID_MASK;
551 shp = shm_segs[id];
552 if (shp == IPC_UNUSED) {
553 printk("shm_open: unused id=%d PANIC\n", id);
554 return;
556 insert_attach(shp,shmd); /* insert shmd into shp->attaches */
557 shp->u.shm_nattch++;
558 shp->u.shm_atime = CURRENT_TIME;
559 shp->u.shm_lpid = current->pid;
563 * remove the attach descriptor shmd.
564 * free memory for segment if it is marked destroyed.
565 * The descriptor has already been removed from the current->mm->mmap list
566 * and will later be kfree()d.
568 static void shm_close (struct vm_area_struct *shmd)
570 struct shmid_kernel *shp;
571 int id;
573 /* remove from the list of attaches of the shm segment */
574 id = SWP_OFFSET(shmd->vm_pte) & SHM_ID_MASK;
575 shp = shm_segs[id];
576 remove_attach(shp,shmd); /* remove from shp->attaches */
577 shp->u.shm_lpid = current->pid;
578 shp->u.shm_dtime = CURRENT_TIME;
579 if (--shp->u.shm_nattch <= 0 && shp->u.shm_perm.mode & SHM_DEST)
580 killseg (id);
584 * detach and kill segment if marked destroyed.
585 * The work is done in shm_close.
587 asmlinkage int sys_shmdt (char *shmaddr)
589 struct vm_area_struct *shmd, *shmdnext;
591 down(&current->mm->mmap_sem);
592 lock_kernel();
593 for (shmd = current->mm->mmap; shmd; shmd = shmdnext) {
594 shmdnext = shmd->vm_next;
595 if (shmd->vm_ops == &shm_vm_ops
596 && shmd->vm_start - shmd->vm_offset == (ulong) shmaddr)
597 do_munmap(shmd->vm_start, shmd->vm_end - shmd->vm_start);
599 unlock_kernel();
600 up(&current->mm->mmap_sem);
601 return 0;
605 * Enter the shm page into the SHM data structures.
607 * The way "nopage" is done, we don't actually have to
608 * do anything here: nopage will have filled in the shm
609 * data structures already, and shm_swap_out() will just
610 * work off them..
612 static int shm_swapout(struct vm_area_struct * vma, struct page * page)
614 return 0;
618 * page not present ... go through shm_pages
620 static unsigned long shm_nopage(struct vm_area_struct * shmd, unsigned long address, int no_share)
622 pte_t pte;
623 struct shmid_kernel *shp;
624 unsigned int id, idx;
626 id = SWP_OFFSET(shmd->vm_pte) & SHM_ID_MASK;
627 idx = (address - shmd->vm_start + shmd->vm_offset) >> PAGE_SHIFT;
629 #ifdef DEBUG_SHM
630 if (id > max_shmid) {
631 printk ("shm_nopage: id=%d too big. proc mem corrupted\n", id);
632 return 0;
634 #endif
635 shp = shm_segs[id];
637 #ifdef DEBUG_SHM
638 if (shp == IPC_UNUSED || shp == IPC_NOID) {
639 printk ("shm_nopage: id=%d invalid. Race.\n", id);
640 return 0;
642 if (idx >= shp->shm_npages) {
643 printk ("shm_nopage : too large page index. id=%d\n", id);
644 return 0;
646 #endif
648 pte = __pte(shp->shm_pages[idx]);
649 if (!pte_present(pte)) {
650 unsigned long page = get_free_page(GFP_USER);
651 if (!page) {
652 oom(current);
653 return 0;
655 pte = __pte(shp->shm_pages[idx]);
656 if (pte_present(pte)) {
657 free_page (page); /* doesn't sleep */
658 goto done;
660 if (!pte_none(pte)) {
661 rw_swap_page_nocache(READ, pte_val(pte), (char *)page);
662 pte = __pte(shp->shm_pages[idx]);
663 if (pte_present(pte)) {
664 free_page (page); /* doesn't sleep */
665 goto done;
667 swap_free(pte_val(pte));
668 shm_swp--;
670 shm_rss++;
671 pte = pte_mkdirty(mk_pte(page, PAGE_SHARED));
672 shp->shm_pages[idx] = pte_val(pte);
673 } else
674 --current->maj_flt; /* was incremented in do_no_page */
676 done: /* pte_val(pte) == shp->shm_pages[idx] */
677 current->min_flt++;
678 atomic_inc(&mem_map[MAP_NR(pte_page(pte))].count);
679 return pte_page(pte);
683 * Goes through counter = (shm_rss >> prio) present shm pages.
685 static unsigned long swap_id = 0; /* currently being swapped */
686 static unsigned long swap_idx = 0; /* next to swap */
688 int shm_swap (int prio, int gfp_mask)
690 pte_t page;
691 struct shmid_kernel *shp;
692 unsigned long swap_nr;
693 unsigned long id, idx;
694 int loop = 0;
695 int counter;
697 counter = shm_rss >> prio;
698 if (!counter || !(swap_nr = get_swap_page()))
699 return 0;
701 check_id:
702 shp = shm_segs[swap_id];
703 if (shp == IPC_UNUSED || shp == IPC_NOID || shp->u.shm_perm.mode & SHM_LOCKED ) {
704 next_id:
705 swap_idx = 0;
706 if (++swap_id > max_shmid) {
707 swap_id = 0;
708 if (loop)
709 goto failed;
710 loop = 1;
712 goto check_id;
714 id = swap_id;
716 check_table:
717 idx = swap_idx++;
718 if (idx >= shp->shm_npages)
719 goto next_id;
721 page = __pte(shp->shm_pages[idx]);
722 if (!pte_present(page))
723 goto check_table;
724 if ((gfp_mask & __GFP_DMA) && !PageDMA(&mem_map[MAP_NR(pte_page(page))]))
725 goto check_table;
726 swap_attempts++;
728 if (--counter < 0) { /* failed */
729 failed:
730 swap_free (swap_nr);
731 return 0;
733 if (atomic_read(&mem_map[MAP_NR(pte_page(page))].count) != 1)
734 goto check_table;
735 shp->shm_pages[idx] = swap_nr;
736 rw_swap_page_nocache (WRITE, swap_nr, (char *) pte_page(page));
737 free_page(pte_page(page));
738 swap_successes++;
739 shm_swp++;
740 shm_rss--;
741 return 1;
745 * Free the swap entry and set the new pte for the shm page.
747 static void shm_unuse_page(struct shmid_kernel *shp, unsigned long idx,
748 unsigned long page, unsigned long entry)
750 pte_t pte;
752 pte = pte_mkdirty(mk_pte(page, PAGE_SHARED));
753 shp->shm_pages[idx] = pte_val(pte);
754 atomic_inc(&mem_map[MAP_NR(page)].count);
755 shm_rss++;
757 swap_free(entry);
758 shm_swp--;
762 * unuse_shm() search for an eventually swapped out shm page.
764 void shm_unuse(unsigned long entry, unsigned long page)
766 int i, n;
768 for (i = 0; i < SHMMNI; i++)
769 if (shm_segs[i] != IPC_UNUSED && shm_segs[i] != IPC_NOID)
770 for (n = 0; n < shm_segs[i]->shm_npages; n++)
771 if (shm_segs[i]->shm_pages[n] == entry)
773 shm_unuse_page(shm_segs[i], n,
774 page, entry);
775 return;
779 #ifdef CONFIG_PROC_FS
780 static int sysvipc_shm_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data)
782 off_t pos = 0;
783 off_t begin = 0;
784 int i, len = 0;
786 len += sprintf(buffer, " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime\n");
788 for(i = 0; i < SHMMNI; i++)
789 if(shm_segs[i] != IPC_UNUSED) {
790 len += sprintf(buffer + len, "%10d %10d %4o %10d %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n",
791 shm_segs[i]->u.shm_perm.key,
792 shm_segs[i]->u.shm_perm.seq * SHMMNI + i,
793 shm_segs[i]->u.shm_perm.mode,
794 shm_segs[i]->u.shm_segsz,
795 shm_segs[i]->u.shm_cpid,
796 shm_segs[i]->u.shm_lpid,
797 shm_segs[i]->u.shm_nattch,
798 shm_segs[i]->u.shm_perm.uid,
799 shm_segs[i]->u.shm_perm.gid,
800 shm_segs[i]->u.shm_perm.cuid,
801 shm_segs[i]->u.shm_perm.cgid,
802 shm_segs[i]->u.shm_atime,
803 shm_segs[i]->u.shm_dtime,
804 shm_segs[i]->u.shm_ctime);
806 pos += len;
807 if(pos < offset) {
808 len = 0;
809 begin = pos;
811 if(pos > offset + length)
812 goto done;
814 *eof = 1;
815 done:
816 *start = buffer + (offset - begin);
817 len -= (offset - begin);
818 if(len > length)
819 len = length;
820 if(len < 0)
821 len = 0;
822 return len;
824 #endif