Import 2.3.25
[davej-history.git] / ipc / shm.c
blob4406c5a752fbb239d63b0e747ede3d8df8667d81
1 /*
2 * linux/ipc/shm.c
3 * Copyright (C) 1992, 1993 Krishna Balasubramanian
4 * Many improvements/fixes by Bruno Haible.
5 * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994.
6 * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli.
8 * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
9 * BIGMEM support, Andrea Arcangeli <andrea@suse.de>
10 * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr>
11 * HIGHMEM support, Ingo Molnar <mingo@redhat.com>
14 #include <linux/config.h>
15 #include <linux/malloc.h>
16 #include <linux/shm.h>
17 #include <linux/swap.h>
18 #include <linux/smp_lock.h>
19 #include <linux/init.h>
20 #include <linux/vmalloc.h>
21 #include <linux/pagemap.h>
22 #include <linux/proc_fs.h>
23 #include <linux/highmem.h>
25 #include <asm/uaccess.h>
26 #include <asm/pgtable.h>
28 extern int ipcperms (struct ipc_perm *ipcp, short shmflg);
29 static int findkey (key_t key);
30 static int newseg (key_t key, int shmflg, size_t size);
31 static int shm_map (struct vm_area_struct *shmd);
32 static void killseg (int id);
33 static void shm_open (struct vm_area_struct *shmd);
34 static void shm_close (struct vm_area_struct *shmd);
35 static struct page * shm_nopage(struct vm_area_struct *, unsigned long, int);
36 static int shm_swapout(struct page *, struct file *);
37 #ifdef CONFIG_PROC_FS
38 static int sysvipc_shm_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data);
39 #endif
41 static int shm_tot = 0; /* total number of shared memory pages */
42 static int shm_rss = 0; /* number of shared memory pages that are in memory */
43 static int shm_swp = 0; /* number of shared memory pages that are in swap */
44 static int max_shmid = 0; /* every used id is <= max_shmid */
45 static DECLARE_WAIT_QUEUE_HEAD(shm_wait); /* calling findkey() may need to wait */
46 static struct shmid_kernel *shm_segs[SHMMNI];
48 static unsigned short shm_seq = 0; /* incremented, for recognizing stale ids */
50 spinlock_t shm_lock = SPIN_LOCK_UNLOCKED;
52 /* some statistics */
53 static ulong swap_attempts = 0;
54 static ulong swap_successes = 0;
55 static ulong used_segs = 0;
57 void __init shm_init (void)
59 int id;
61 for (id = 0; id < SHMMNI; id++)
62 shm_segs[id] = (struct shmid_kernel *) IPC_UNUSED;
63 shm_tot = shm_rss = shm_seq = max_shmid = used_segs = 0;
64 init_waitqueue_head(&shm_wait);
65 #ifdef CONFIG_PROC_FS
66 create_proc_read_entry("sysvipc/shm", 0, 0, sysvipc_shm_read_proc, NULL);
67 #endif
68 return;
71 static int findkey (key_t key)
73 int id;
74 struct shmid_kernel *shp;
76 for (id = 0; id <= max_shmid; id++) {
77 if ((shp = shm_segs[id]) == IPC_NOID) {
78 DECLARE_WAITQUEUE(wait, current);
80 add_wait_queue(&shm_wait, &wait);
81 for(;;) {
82 set_current_state(TASK_UNINTERRUPTIBLE);
83 if ((shp = shm_segs[id]) != IPC_NOID)
84 break;
85 spin_unlock(&shm_lock);
86 schedule();
87 spin_lock(&shm_lock);
89 __set_current_state(TASK_RUNNING);
90 remove_wait_queue(&shm_wait, &wait);
92 if (shp == IPC_UNUSED)
93 continue;
94 if (key == shp->u.shm_perm.key)
95 return id;
97 return -1;
101 * allocate new shmid_kernel and pgtable. protected by shm_segs[id] = NOID.
103 static int newseg (key_t key, int shmflg, size_t size)
105 struct shmid_kernel *shp;
106 int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT;
107 int id;
109 if (size < SHMMIN)
110 return -EINVAL;
111 if (shm_tot + numpages >= SHMALL)
112 return -ENOSPC;
113 for (id = 0; id < SHMMNI; id++)
114 if (shm_segs[id] == IPC_UNUSED) {
115 shm_segs[id] = (struct shmid_kernel *) IPC_NOID;
116 goto found;
118 return -ENOSPC;
120 found:
121 spin_unlock(&shm_lock);
122 shp = (struct shmid_kernel *) kmalloc (sizeof (*shp), GFP_KERNEL);
123 if (!shp) {
124 spin_lock(&shm_lock);
125 shm_segs[id] = (struct shmid_kernel *) IPC_UNUSED;
126 wake_up (&shm_wait);
127 return -ENOMEM;
129 lock_kernel();
130 shp->shm_pages = (pte_t *) vmalloc (numpages*sizeof(pte_t));
131 unlock_kernel();
132 if (!shp->shm_pages) {
133 kfree(shp);
134 spin_lock(&shm_lock);
135 shm_segs[id] = (struct shmid_kernel *) IPC_UNUSED;
136 wake_up (&shm_wait);
137 return -ENOMEM;
140 memset(shp->shm_pages, 0, numpages*sizeof(pte_t));
142 shp->u.shm_perm.key = key;
143 shp->u.shm_perm.mode = (shmflg & S_IRWXUGO);
144 shp->u.shm_perm.cuid = shp->u.shm_perm.uid = current->euid;
145 shp->u.shm_perm.cgid = shp->u.shm_perm.gid = current->egid;
146 shp->u.shm_segsz = size;
147 shp->u.shm_cpid = current->pid;
148 shp->attaches = NULL;
149 shp->u.shm_lpid = shp->u.shm_nattch = 0;
150 shp->u.shm_atime = shp->u.shm_dtime = 0;
151 shp->u.shm_ctime = CURRENT_TIME;
152 shp->shm_npages = numpages;
154 spin_lock(&shm_lock);
156 shm_tot += numpages;
157 shp->u.shm_perm.seq = shm_seq;
159 if (id > max_shmid)
160 max_shmid = id;
161 shm_segs[id] = shp;
162 used_segs++;
163 wake_up (&shm_wait);
164 return (unsigned int) shp->u.shm_perm.seq * SHMMNI + id;
167 size_t shmmax = SHMMAX;
169 asmlinkage long sys_shmget (key_t key, size_t size, int shmflg)
171 struct shmid_kernel *shp;
172 int err, id = 0;
174 down(&current->mm->mmap_sem);
175 spin_lock(&shm_lock);
176 if (size > shmmax) {
177 err = -EINVAL;
178 } else if (key == IPC_PRIVATE) {
179 err = newseg(key, shmflg, size);
180 } else if ((id = findkey (key)) == -1) {
181 if (!(shmflg & IPC_CREAT))
182 err = -ENOENT;
183 else
184 err = newseg(key, shmflg, size);
185 } else if ((shmflg & IPC_CREAT) && (shmflg & IPC_EXCL)) {
186 err = -EEXIST;
187 } else {
188 shp = shm_segs[id];
189 if (shp->u.shm_perm.mode & SHM_DEST)
190 err = -EIDRM;
191 else if (size > shp->u.shm_segsz)
192 err = -EINVAL;
193 else if (ipcperms (&shp->u.shm_perm, shmflg))
194 err = -EACCES;
195 else
196 err = (int) shp->u.shm_perm.seq * SHMMNI + id;
198 spin_unlock(&shm_lock);
199 up(&current->mm->mmap_sem);
200 return err;
204 * Only called after testing nattch and SHM_DEST.
205 * Here pages, pgtable and shmid_kernel are freed.
207 static void killseg (int id)
209 struct shmid_kernel *shp;
210 int i, numpages;
211 int rss, swp;
213 shp = shm_segs[id];
214 if (shp == IPC_NOID || shp == IPC_UNUSED)
215 BUG();
216 shp->u.shm_perm.seq++; /* for shmat */
217 shm_seq = (shm_seq+1) % ((unsigned)(1<<31)/SHMMNI); /* increment, but avoid overflow */
218 shm_segs[id] = (struct shmid_kernel *) IPC_UNUSED;
219 used_segs--;
220 if (id == max_shmid)
221 while (max_shmid && (shm_segs[--max_shmid] == IPC_UNUSED));
222 if (!shp->shm_pages)
223 BUG();
224 spin_unlock(&shm_lock);
225 numpages = shp->shm_npages;
226 for (i = 0, rss = 0, swp = 0; i < numpages ; i++) {
227 pte_t pte;
228 pte = shp->shm_pages[i];
229 if (pte_none(pte))
230 continue;
231 if (pte_present(pte)) {
232 __free_page (pte_page(pte));
233 rss++;
234 } else {
235 lock_kernel();
236 swap_free(pte_to_swp_entry(pte));
237 unlock_kernel();
238 swp++;
241 lock_kernel();
242 vfree(shp->shm_pages);
243 unlock_kernel();
244 kfree(shp);
245 spin_lock(&shm_lock);
246 shm_rss -= rss;
247 shm_swp -= swp;
248 shm_tot -= numpages;
249 return;
252 asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds *buf)
254 struct shmid_ds tbuf;
255 struct shmid_kernel *shp;
256 struct ipc_perm *ipcp;
257 int id, err = -EINVAL;
259 if (cmd < 0 || shmid < 0)
260 goto out_unlocked;
261 if (cmd == IPC_SET) {
262 err = -EFAULT;
263 if(copy_from_user (&tbuf, buf, sizeof (*buf)))
264 goto out_unlocked;
266 spin_lock(&shm_lock);
268 switch (cmd) { /* replace with proc interface ? */
269 case IPC_INFO:
271 struct shminfo shminfo;
272 err = -EFAULT;
273 if (!buf)
274 goto out;
275 shminfo.shmmni = SHMMNI;
276 shminfo.shmmax = shmmax;
277 shminfo.shmmin = SHMMIN;
278 shminfo.shmall = SHMALL;
279 shminfo.shmseg = SHMSEG;
280 spin_unlock(&shm_lock);
281 if(copy_to_user (buf, &shminfo, sizeof(struct shminfo)))
282 goto out_unlocked;
283 spin_lock(&shm_lock);
284 err = max_shmid;
285 goto out;
287 case SHM_INFO:
289 struct shm_info shm_info;
290 err = -EFAULT;
291 shm_info.used_ids = used_segs;
292 shm_info.shm_rss = shm_rss;
293 shm_info.shm_tot = shm_tot;
294 shm_info.shm_swp = shm_swp;
295 shm_info.swap_attempts = swap_attempts;
296 shm_info.swap_successes = swap_successes;
297 spin_unlock(&shm_lock);
298 if(copy_to_user (buf, &shm_info, sizeof(shm_info)))
299 goto out_unlocked;
300 spin_lock(&shm_lock);
301 err = max_shmid;
302 goto out;
304 case SHM_STAT:
305 err = -EINVAL;
306 if (shmid > max_shmid)
307 goto out;
308 shp = shm_segs[shmid];
309 if (shp == IPC_UNUSED || shp == IPC_NOID)
310 goto out;
311 if (ipcperms (&shp->u.shm_perm, S_IRUGO))
312 goto out;
313 id = (unsigned int) shp->u.shm_perm.seq * SHMMNI + shmid;
314 err = -EFAULT;
315 spin_unlock(&shm_lock);
316 if(copy_to_user (buf, &shp->u, sizeof(*buf)))
317 goto out_unlocked;
318 spin_lock(&shm_lock);
319 err = id;
320 goto out;
323 shp = shm_segs[id = (unsigned int) shmid % SHMMNI];
324 err = -EINVAL;
325 if (shp == IPC_UNUSED || shp == IPC_NOID)
326 goto out;
327 err = -EIDRM;
328 if (shp->u.shm_perm.seq != (unsigned int) shmid / SHMMNI)
329 goto out;
330 ipcp = &shp->u.shm_perm;
332 switch (cmd) {
333 case SHM_UNLOCK:
334 err = -EPERM;
335 if (!capable(CAP_IPC_LOCK))
336 goto out;
337 err = -EINVAL;
338 if (!(ipcp->mode & SHM_LOCKED))
339 goto out;
340 ipcp->mode &= ~SHM_LOCKED;
341 break;
342 case SHM_LOCK:
343 /* Allow superuser to lock segment in memory */
344 /* Should the pages be faulted in here or leave it to user? */
345 /* need to determine interaction with current->swappable */
346 err = -EPERM;
347 if (!capable(CAP_IPC_LOCK))
348 goto out;
349 err = -EINVAL;
350 if (ipcp->mode & SHM_LOCKED)
351 goto out;
352 ipcp->mode |= SHM_LOCKED;
353 break;
354 case IPC_STAT:
355 err = -EACCES;
356 if (ipcperms (ipcp, S_IRUGO))
357 goto out;
358 err = -EFAULT;
359 spin_unlock(&shm_lock);
360 if(copy_to_user (buf, &shp->u, sizeof(shp->u)))
361 goto out_unlocked;
362 spin_lock(&shm_lock);
363 break;
364 case IPC_SET:
365 if (current->euid == shp->u.shm_perm.uid ||
366 current->euid == shp->u.shm_perm.cuid ||
367 capable(CAP_SYS_ADMIN)) {
368 ipcp->uid = tbuf.shm_perm.uid;
369 ipcp->gid = tbuf.shm_perm.gid;
370 ipcp->mode = (ipcp->mode & ~S_IRWXUGO)
371 | (tbuf.shm_perm.mode & S_IRWXUGO);
372 shp->u.shm_ctime = CURRENT_TIME;
373 break;
375 err = -EPERM;
376 goto out;
377 case IPC_RMID:
378 if (current->euid == shp->u.shm_perm.uid ||
379 current->euid == shp->u.shm_perm.cuid ||
380 capable(CAP_SYS_ADMIN)) {
381 shp->u.shm_perm.mode |= SHM_DEST;
382 if (shp->u.shm_nattch <= 0)
383 killseg (id);
384 break;
386 err = -EPERM;
387 goto out;
388 default:
389 err = -EINVAL;
390 goto out;
392 err = 0;
393 out:
394 spin_unlock(&shm_lock);
395 out_unlocked:
396 return err;
400 * The per process internal structure for managing segments is
401 * `struct vm_area_struct'.
402 * A shmat will add to and shmdt will remove from the list.
403 * shmd->vm_mm the attacher
404 * shmd->vm_start virt addr of attach, multiple of SHMLBA
405 * shmd->vm_end multiple of SHMLBA
406 * shmd->vm_next next attach for task
407 * shmd->vm_next_share next attach for segment
408 * shmd->vm_pgoff offset into segment (in pages)
409 * shmd->vm_private_data signature for this attach
412 static struct vm_operations_struct shm_vm_ops = {
413 shm_open, /* open - callback for a new vm-area open */
414 shm_close, /* close - callback for when the vm-area is released */
415 NULL, /* no need to sync pages at unmap */
416 NULL, /* protect */
417 NULL, /* sync */
418 NULL, /* advise */
419 shm_nopage, /* nopage */
420 NULL, /* wppage */
421 shm_swapout /* swapout */
424 /* Insert shmd into the list shp->attaches */
425 static inline void insert_attach (struct shmid_kernel * shp, struct vm_area_struct * shmd)
427 if((shmd->vm_next_share = shp->attaches) != NULL)
428 shp->attaches->vm_pprev_share = &shmd->vm_next_share;
429 shp->attaches = shmd;
430 shmd->vm_pprev_share = &shp->attaches;
433 /* Remove shmd from list shp->attaches */
434 static inline void remove_attach (struct shmid_kernel * shp, struct vm_area_struct * shmd)
436 if(shmd->vm_next_share)
437 shmd->vm_next_share->vm_pprev_share = shmd->vm_pprev_share;
438 *shmd->vm_pprev_share = shmd->vm_next_share;
442 * ensure page tables exist
443 * mark page table entries with shm_sgn.
445 static int shm_map (struct vm_area_struct *shmd)
447 unsigned long tmp;
449 /* clear old mappings */
450 do_munmap(shmd->vm_start, shmd->vm_end - shmd->vm_start);
452 /* add new mapping */
453 tmp = shmd->vm_end - shmd->vm_start;
454 if((current->mm->total_vm << PAGE_SHIFT) + tmp
455 > (unsigned long) current->rlim[RLIMIT_AS].rlim_cur)
456 return -ENOMEM;
457 current->mm->total_vm += tmp >> PAGE_SHIFT;
458 vmlist_modify_lock(current->mm);
459 insert_vm_struct(current->mm, shmd);
460 merge_segments(current->mm, shmd->vm_start, shmd->vm_end);
461 vmlist_modify_unlock(current->mm);
463 return 0;
467 * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists.
469 asmlinkage long sys_shmat (int shmid, char *shmaddr, int shmflg, ulong *raddr)
471 struct shmid_kernel *shp;
472 struct vm_area_struct *shmd;
473 int err = -EINVAL;
474 unsigned int id;
475 unsigned long addr;
476 unsigned long len;
478 down(&current->mm->mmap_sem);
479 spin_lock(&shm_lock);
480 if (shmid < 0)
481 goto out;
483 shp = shm_segs[id = (unsigned int) shmid % SHMMNI];
484 if (shp == IPC_UNUSED || shp == IPC_NOID)
485 goto out;
487 if (!(addr = (ulong) shmaddr)) {
488 if (shmflg & SHM_REMAP)
489 goto out;
490 err = -ENOMEM;
491 addr = 0;
492 again:
493 if (!(addr = get_unmapped_area(addr, (unsigned long)shp->u.shm_segsz)))
494 goto out;
495 if(addr & (SHMLBA - 1)) {
496 addr = (addr + (SHMLBA - 1)) & ~(SHMLBA - 1);
497 goto again;
499 } else if (addr & (SHMLBA-1)) {
500 if (shmflg & SHM_RND)
501 addr &= ~(SHMLBA-1); /* round down */
502 else
503 goto out;
506 * Check if addr exceeds TASK_SIZE (from do_mmap)
508 len = PAGE_SIZE*shp->shm_npages;
509 err = -EINVAL;
510 if (addr >= TASK_SIZE || len > TASK_SIZE || addr > TASK_SIZE - len)
511 goto out;
513 * If shm segment goes below stack, make sure there is some
514 * space left for the stack to grow (presently 4 pages).
516 if (addr < current->mm->start_stack &&
517 addr > current->mm->start_stack - PAGE_SIZE*(shp->shm_npages + 4))
518 goto out;
519 if (!(shmflg & SHM_REMAP) && find_vma_intersection(current->mm, addr, addr + (unsigned long)shp->u.shm_segsz))
520 goto out;
522 err = -EACCES;
523 if (ipcperms(&shp->u.shm_perm, shmflg & SHM_RDONLY ? S_IRUGO : S_IRUGO|S_IWUGO))
524 goto out;
525 err = -EIDRM;
526 if (shp->u.shm_perm.seq != (unsigned int) shmid / SHMMNI)
527 goto out;
529 spin_unlock(&shm_lock);
530 err = -ENOMEM;
531 shmd = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
532 spin_lock(&shm_lock);
533 if (!shmd)
534 goto out;
535 if ((shp != shm_segs[id]) || (shp->u.shm_perm.seq != (unsigned int) shmid / SHMMNI)) {
536 kmem_cache_free(vm_area_cachep, shmd);
537 err = -EIDRM;
538 goto out;
541 shmd->vm_private_data = shm_segs + id;
542 shmd->vm_start = addr;
543 shmd->vm_end = addr + shp->shm_npages * PAGE_SIZE;
544 shmd->vm_mm = current->mm;
545 shmd->vm_page_prot = (shmflg & SHM_RDONLY) ? PAGE_READONLY : PAGE_SHARED;
546 shmd->vm_flags = VM_SHM | VM_MAYSHARE | VM_SHARED
547 | VM_MAYREAD | VM_MAYEXEC | VM_READ | VM_EXEC
548 | ((shmflg & SHM_RDONLY) ? 0 : VM_MAYWRITE | VM_WRITE);
549 shmd->vm_file = NULL;
550 shmd->vm_pgoff = 0;
551 shmd->vm_ops = &shm_vm_ops;
553 shp->u.shm_nattch++; /* prevent destruction */
554 spin_unlock(&shm_lock);
555 err = shm_map (shmd);
556 spin_lock(&shm_lock);
557 if (err)
558 goto failed_shm_map;
560 insert_attach(shp,shmd); /* insert shmd into shp->attaches */
562 shp->u.shm_lpid = current->pid;
563 shp->u.shm_atime = CURRENT_TIME;
565 *raddr = addr;
566 err = 0;
567 out:
568 spin_unlock(&shm_lock);
569 up(&current->mm->mmap_sem);
570 return err;
572 failed_shm_map:
573 if (--shp->u.shm_nattch <= 0 && shp->u.shm_perm.mode & SHM_DEST)
574 killseg(id);
575 spin_unlock(&shm_lock);
576 up(&current->mm->mmap_sem);
577 kmem_cache_free(vm_area_cachep, shmd);
578 return err;
581 /* This is called by fork, once for every shm attach. */
582 static void shm_open (struct vm_area_struct *shmd)
584 struct shmid_kernel *shp;
586 spin_lock(&shm_lock);
587 shp = *(struct shmid_kernel **) shmd->vm_private_data;
588 insert_attach(shp,shmd); /* insert shmd into shp->attaches */
589 shp->u.shm_nattch++;
590 shp->u.shm_atime = CURRENT_TIME;
591 shp->u.shm_lpid = current->pid;
592 spin_unlock(&shm_lock);
596 * remove the attach descriptor shmd.
597 * free memory for segment if it is marked destroyed.
598 * The descriptor has already been removed from the current->mm->mmap list
599 * and will later be kfree()d.
601 static void shm_close (struct vm_area_struct *shmd)
603 struct shmid_kernel *shp;
605 spin_lock(&shm_lock);
606 /* remove from the list of attaches of the shm segment */
607 shp = *(struct shmid_kernel **) shmd->vm_private_data;
608 remove_attach(shp,shmd); /* remove from shp->attaches */
609 shp->u.shm_lpid = current->pid;
610 shp->u.shm_dtime = CURRENT_TIME;
611 if (--shp->u.shm_nattch <= 0 && shp->u.shm_perm.mode & SHM_DEST) {
612 unsigned int id = (struct shmid_kernel **)shmd->vm_private_data - shm_segs;
613 killseg (id);
615 spin_unlock(&shm_lock);
619 * detach and kill segment if marked destroyed.
620 * The work is done in shm_close.
622 asmlinkage long sys_shmdt (char *shmaddr)
624 struct vm_area_struct *shmd, *shmdnext;
626 down(&current->mm->mmap_sem);
627 for (shmd = current->mm->mmap; shmd; shmd = shmdnext) {
628 shmdnext = shmd->vm_next;
629 if (shmd->vm_ops == &shm_vm_ops
630 && shmd->vm_start - (shmd->vm_pgoff << PAGE_SHIFT) == (ulong) shmaddr)
631 do_munmap(shmd->vm_start, shmd->vm_end - shmd->vm_start);
633 up(&current->mm->mmap_sem);
634 return 0;
638 * Enter the shm page into the SHM data structures.
640 * The way "nopage" is done, we don't actually have to
641 * do anything here: nopage will have filled in the shm
642 * data structures already, and shm_swap_out() will just
643 * work off them..
645 static int shm_swapout(struct page * page, struct file *file)
647 return 0;
651 * page not present ... go through shm_pages
653 static struct page * shm_nopage(struct vm_area_struct * shmd, unsigned long address, int no_share)
655 pte_t pte;
656 struct shmid_kernel *shp;
657 unsigned int idx;
658 struct page * page;
660 shp = *(struct shmid_kernel **) shmd->vm_private_data;
661 idx = (address - shmd->vm_start) >> PAGE_SHIFT;
662 idx += shmd->vm_pgoff;
664 spin_lock(&shm_lock);
665 again:
666 pte = shp->shm_pages[idx];
667 if (!pte_present(pte)) {
668 if (pte_none(pte)) {
669 spin_unlock(&shm_lock);
670 page = get_free_highpage(GFP_HIGHUSER);
671 if (!page)
672 goto oom;
673 clear_highpage(page);
674 spin_lock(&shm_lock);
675 if (pte_val(pte) != pte_val(shp->shm_pages[idx]))
676 goto changed;
677 } else {
678 swp_entry_t entry = pte_to_swp_entry(pte);
680 spin_unlock(&shm_lock);
681 page = lookup_swap_cache(entry);
682 if (!page) {
683 lock_kernel();
684 swapin_readahead(entry);
685 page = read_swap_cache(entry);
686 unlock_kernel();
687 if (!page)
688 goto oom;
690 delete_from_swap_cache(page);
691 page = replace_with_highmem(page);
692 lock_kernel();
693 swap_free(entry);
694 unlock_kernel();
695 spin_lock(&shm_lock);
696 shm_swp--;
697 pte = shp->shm_pages[idx];
698 if (pte_present(pte))
699 goto present;
701 shm_rss++;
702 pte = pte_mkdirty(mk_pte(page, PAGE_SHARED));
703 shp->shm_pages[idx] = pte;
704 } else
705 --current->maj_flt; /* was incremented in do_no_page */
707 done:
708 /* pte_val(pte) == shp->shm_pages[idx] */
709 get_page(pte_page(pte));
710 spin_unlock(&shm_lock);
711 current->min_flt++;
712 return pte_page(pte);
714 changed:
715 __free_page(page);
716 goto again;
717 present:
718 if (page)
719 free_page_and_swap_cache(page);
720 goto done;
721 oom:
722 return (struct page *)(-1);
726 * Goes through counter = (shm_rss >> prio) present shm pages.
728 static unsigned long swap_id = 0; /* currently being swapped */
729 static unsigned long swap_idx = 0; /* next to swap */
731 int shm_swap (int prio, int gfp_mask)
733 pte_t page;
734 struct shmid_kernel *shp;
735 swp_entry_t swap_entry;
736 unsigned long id, idx;
737 int loop = 0;
738 int counter;
739 struct page * page_map;
741 counter = shm_rss >> prio;
742 if (!counter)
743 return 0;
744 lock_kernel();
745 swap_entry = get_swap_page();
746 if (!swap_entry.val) {
747 unlock_kernel();
748 return 0;
750 unlock_kernel();
752 spin_lock(&shm_lock);
753 check_id:
754 shp = shm_segs[swap_id];
755 if (shp == IPC_UNUSED || shp == IPC_NOID || shp->u.shm_perm.mode & SHM_LOCKED ) {
756 next_id:
757 swap_idx = 0;
758 if (++swap_id > max_shmid) {
759 swap_id = 0;
760 if (loop)
761 goto failed;
762 loop = 1;
764 goto check_id;
766 id = swap_id;
768 check_table:
769 idx = swap_idx++;
770 if (idx >= shp->shm_npages)
771 goto next_id;
773 page = shp->shm_pages[idx];
774 if (!pte_present(page))
775 goto check_table;
776 page_map = pte_page(page);
777 if ((gfp_mask & __GFP_DMA) && !PageDMA(page_map))
778 goto check_table;
779 if (!(gfp_mask & __GFP_HIGHMEM) && PageHighMem(page_map))
780 goto check_table;
781 swap_attempts++;
783 if (--counter < 0) { /* failed */
784 failed:
785 spin_unlock(&shm_lock);
786 lock_kernel();
787 swap_free(swap_entry);
788 unlock_kernel();
789 return 0;
791 if (page_count(page_map))
792 goto check_table;
793 if (!(page_map = prepare_highmem_swapout(page_map)))
794 goto check_table;
795 shp->shm_pages[idx] = swp_entry_to_pte(swap_entry);
796 swap_successes++;
797 shm_swp++;
798 shm_rss--;
799 spin_unlock(&shm_lock);
801 lock_kernel();
802 swap_duplicate(swap_entry);
803 add_to_swap_cache(page_map, swap_entry);
804 rw_swap_page(WRITE, page_map, 0);
805 unlock_kernel();
807 __free_page(page_map);
808 return 1;
812 * Free the swap entry and set the new pte for the shm page.
814 static void shm_unuse_page(struct shmid_kernel *shp, unsigned long idx,
815 swp_entry_t entry, struct page *page)
817 pte_t pte;
819 pte = pte_mkdirty(mk_pte(page, PAGE_SHARED));
820 shp->shm_pages[idx] = pte;
821 get_page(page);
822 shm_rss++;
824 shm_swp--;
825 spin_unlock(&shm_lock);
827 lock_kernel();
828 swap_free(entry);
829 unlock_kernel();
833 * unuse_shm() search for an eventually swapped out shm page.
835 void shm_unuse(swp_entry_t entry, struct page *page)
837 int i, n;
839 spin_lock(&shm_lock);
840 for (i = 0; i < SHMMNI; i++) {
841 struct shmid_kernel *seg = shm_segs[i];
842 if ((seg == IPC_UNUSED) || (seg == IPC_NOID))
843 continue;
844 for (n = 0; n < seg->shm_npages; n++) {
845 if (pte_none(seg->shm_pages[n]))
846 continue;
847 if (pte_present(seg->shm_pages[n]))
848 continue;
849 if (pte_to_swp_entry(seg->shm_pages[n]).val == entry.val) {
850 shm_unuse_page(seg, n, entry, page);
851 return;
855 spin_unlock(&shm_lock);
858 #ifdef CONFIG_PROC_FS
859 static int sysvipc_shm_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data)
861 off_t pos = 0;
862 off_t begin = 0;
863 int i, len = 0;
865 len += sprintf(buffer, " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime\n");
867 spin_lock(&shm_lock);
868 for(i = 0; i < SHMMNI; i++)
869 if(shm_segs[i] != IPC_UNUSED) {
870 #define SMALL_STRING "%10d %10d %4o %10u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n"
871 #define BIG_STRING "%10d %10d %4o %21u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n"
872 char *format;
874 if (sizeof(size_t) <= sizeof(int))
875 format = SMALL_STRING;
876 else
877 format = BIG_STRING;
878 len += sprintf(buffer + len, format,
879 shm_segs[i]->u.shm_perm.key,
880 shm_segs[i]->u.shm_perm.seq * SHMMNI + i,
881 shm_segs[i]->u.shm_perm.mode,
882 shm_segs[i]->u.shm_segsz,
883 shm_segs[i]->u.shm_cpid,
884 shm_segs[i]->u.shm_lpid,
885 shm_segs[i]->u.shm_nattch,
886 shm_segs[i]->u.shm_perm.uid,
887 shm_segs[i]->u.shm_perm.gid,
888 shm_segs[i]->u.shm_perm.cuid,
889 shm_segs[i]->u.shm_perm.cgid,
890 shm_segs[i]->u.shm_atime,
891 shm_segs[i]->u.shm_dtime,
892 shm_segs[i]->u.shm_ctime);
894 pos += len;
895 if(pos < offset) {
896 len = 0;
897 begin = pos;
899 if(pos > offset + length)
900 goto done;
902 *eof = 1;
903 done:
904 *start = buffer + (offset - begin);
905 len -= (offset - begin);
906 if(len > length)
907 len = length;
908 if(len < 0)
909 len = 0;
910 spin_unlock(&shm_lock);
911 return len;
913 #endif