Import 2.3.50pre1
[davej-history.git] / ipc / shm.c
blob6c6fc31d65c303fe0d695836c29fc7283779bae3
1 /*
2 * linux/ipc/shm.c
3 * Copyright (C) 1992, 1993 Krishna Balasubramanian
4 * Many improvements/fixes by Bruno Haible.
5 * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994.
6 * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli.
8 * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
9 * BIGMEM support, Andrea Arcangeli <andrea@suse.de>
10 * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr>
11 * HIGHMEM support, Ingo Molnar <mingo@redhat.com>
12 * avoid vmalloc and make shmmax, shmall, shmmni sysctl'able,
13 * Christoph Rohland <hans-christoph.rohland@sap.com>
14 * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com>
17 #include <linux/config.h>
18 #include <linux/malloc.h>
19 #include <linux/shm.h>
20 #include <linux/swap.h>
21 #include <linux/smp_lock.h>
22 #include <linux/init.h>
23 #include <linux/vmalloc.h>
24 #include <linux/pagemap.h>
25 #include <linux/proc_fs.h>
26 #include <linux/highmem.h>
28 #include <asm/uaccess.h>
29 #include <asm/pgtable.h>
31 #include "util.h"
33 struct shmid_kernel /* private to the kernel */
35 struct kern_ipc_perm shm_perm;
36 size_t shm_segsz;
37 time_t shm_atime;
38 time_t shm_dtime;
39 time_t shm_ctime;
40 pid_t shm_cpid;
41 pid_t shm_lpid;
42 unsigned long shm_nattch;
43 unsigned long shm_npages; /* size of segment (pages) */
44 pte_t **shm_dir; /* ptr to array of ptrs to frames -> SHMMAX */
45 struct vm_area_struct *attaches; /* descriptors for attaches */
46 int id; /* backreference to id for shm_close */
47 struct semaphore sem;
50 static struct ipc_ids shm_ids;
52 #define shm_lock(id) ((struct shmid_kernel*)ipc_lock(&shm_ids,id))
53 #define shm_unlock(id) ipc_unlock(&shm_ids,id)
54 #define shm_lockall() ipc_lockall(&shm_ids)
55 #define shm_unlockall() ipc_unlockall(&shm_ids)
56 #define shm_get(id) ((struct shmid_kernel*)ipc_get(&shm_ids,id))
57 #define shm_rmid(id) ((struct shmid_kernel*)ipc_rmid(&shm_ids,id))
58 #define shm_checkid(s, id) \
59 ipc_checkid(&shm_ids,&s->shm_perm,id)
60 #define shm_buildid(id, seq) \
61 ipc_buildid(&shm_ids, id, seq)
63 static int newseg (key_t key, int shmflg, size_t size);
64 static int shm_map (struct vm_area_struct *shmd);
65 static void killseg (int shmid);
66 static void shm_open (struct vm_area_struct *shmd);
67 static void shm_close (struct vm_area_struct *shmd);
68 static struct page * shm_nopage(struct vm_area_struct *, unsigned long, int);
69 static int shm_swapout(struct page *, struct file *);
70 #ifdef CONFIG_PROC_FS
71 static int sysvipc_shm_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data);
72 #endif
74 static void zshm_swap (int prio, int gfp_mask, zone_t *zone);
75 static void zmap_unuse(swp_entry_t entry, struct page *page);
76 static void shmzero_open(struct vm_area_struct *shmd);
77 static void shmzero_close(struct vm_area_struct *shmd);
78 static int zero_id;
79 static struct shmid_kernel zshmid_kernel;
81 size_t shm_ctlmax = SHMMAX;
82 int shm_ctlall = SHMALL;
83 int shm_ctlmni = SHMMNI;
85 static int shm_tot = 0; /* total number of shared memory pages */
86 static int shm_rss = 0; /* number of shared memory pages that are in memory */
87 static int shm_swp = 0; /* number of shared memory pages that are in swap */
89 /* locks order:
90 pagecache_lock
91 shm_lock()/shm_lockall()
92 kernel lock
93 shp->sem
94 sem_ids.sem
95 mmap_sem
97 SMP assumptions:
98 - swap_free() never sleeps
99 - add_to_swap_cache() never sleeps
100 - add_to_swap_cache() doesn't acquire the big kernel lock.
101 - shm_unuse() is called with the kernel lock acquired.
104 /* some statistics */
105 static ulong swap_attempts = 0;
106 static ulong swap_successes = 0;
108 void __init shm_init (void)
110 ipc_init_ids(&shm_ids, shm_ctlmni);
111 #ifdef CONFIG_PROC_FS
112 create_proc_read_entry("sysvipc/shm", 0, 0, sysvipc_shm_read_proc, NULL);
113 #endif
114 zero_id = ipc_addid(&shm_ids, &zshmid_kernel.shm_perm, shm_ctlmni);
115 shm_unlock(zero_id);
116 return;
119 #define SHM_ENTRY(shp, index) (shp)->shm_dir[(index)/PTRS_PER_PTE][(index)%PTRS_PER_PTE]
121 static pte_t **shm_alloc(unsigned long pages)
123 unsigned short dir = pages / PTRS_PER_PTE;
124 unsigned short last = pages % PTRS_PER_PTE;
125 pte_t **ret, **ptr;
127 ret = kmalloc ((dir+1) * sizeof(pte_t *), GFP_KERNEL);
128 if (!ret)
129 goto out;
131 for (ptr = ret; ptr < ret+dir ; ptr++)
133 *ptr = (pte_t *)__get_free_page (GFP_KERNEL);
134 if (!*ptr)
135 goto free;
136 memset (*ptr, 0, PAGE_SIZE);
139 /* The last one is probably not of PAGE_SIZE: we use kmalloc */
140 if (last) {
141 *ptr = kmalloc (last*sizeof(pte_t), GFP_KERNEL);
142 if (!*ptr)
143 goto free;
144 memset (*ptr, 0, last*sizeof(pte_t));
146 out:
147 return ret;
149 free:
150 /* The last failed: we decrement first */
151 while (--ptr >= ret)
152 free_page ((unsigned long)*ptr);
154 kfree (ret);
155 return NULL;
159 static void shm_free(pte_t** dir, unsigned long pages)
161 pte_t **ptr = dir+pages/PTRS_PER_PTE;
163 /* first the last page */
164 if (pages%PTRS_PER_PTE)
165 kfree (*ptr);
166 /* now the whole pages */
167 while (--ptr >= dir)
168 free_page ((unsigned long)*ptr);
170 /* Now the indirect block */
171 kfree (dir);
174 static int shm_revalidate(struct shmid_kernel* shp, int shmid, int pagecount, int flg)
176 struct shmid_kernel* new;
177 new = shm_lock(shmid);
178 if(new==NULL) {
179 return -EIDRM;
181 if(new!=shp || shm_checkid(shp, shmid) || shp->shm_npages != pagecount) {
182 shm_unlock(shmid);
183 return -EIDRM;
185 if (ipcperms(&shp->shm_perm, flg)) {
186 shm_unlock(shmid);
187 return -EACCES;
189 return 0;
192 static inline struct shmid_kernel *newseg_alloc(int numpages)
194 struct shmid_kernel *shp;
196 shp = (struct shmid_kernel *) kmalloc (sizeof (*shp), GFP_KERNEL);
197 if (!shp)
198 return 0;
200 shp->shm_dir = shm_alloc (numpages);
201 if (!shp->shm_dir) {
202 kfree(shp);
203 return 0;
205 shp->shm_npages = numpages;
206 shp->attaches = NULL;
207 shp->shm_nattch = 0;
208 init_MUTEX(&shp->sem);
209 return(shp);
212 static int newseg (key_t key, int shmflg, size_t size)
214 struct shmid_kernel *shp;
215 int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT;
216 int id;
218 if (size < SHMMIN)
219 return -EINVAL;
221 if (size > shm_ctlmax)
222 return -EINVAL;
223 if (shm_tot + numpages >= shm_ctlall)
224 return -ENOSPC;
226 if (!(shp = newseg_alloc(numpages)))
227 return -ENOMEM;
228 id = ipc_addid(&shm_ids, &shp->shm_perm, shm_ctlmni);
229 if(id == -1) {
230 shm_free(shp->shm_dir,numpages);
231 kfree(shp);
232 return -ENOSPC;
234 shp->shm_perm.key = key;
235 shp->shm_perm.mode = (shmflg & S_IRWXUGO);
236 shp->shm_segsz = size;
237 shp->shm_cpid = current->pid;
238 shp->shm_lpid = 0;
239 shp->shm_atime = shp->shm_dtime = 0;
240 shp->shm_ctime = CURRENT_TIME;
241 shp->id = shm_buildid(id,shp->shm_perm.seq);
243 shm_tot += numpages;
244 shm_unlock(id);
246 return shm_buildid(id,shp->shm_perm.seq);
249 asmlinkage long sys_shmget (key_t key, size_t size, int shmflg)
251 struct shmid_kernel *shp;
252 int err, id = 0;
254 down(&shm_ids.sem);
255 if (key == IPC_PRIVATE) {
256 err = newseg(key, shmflg, size);
257 } else if ((id = ipc_findkey(&shm_ids,key)) == -1) {
258 if (!(shmflg & IPC_CREAT))
259 err = -ENOENT;
260 else
261 err = newseg(key, shmflg, size);
262 } else if ((shmflg & IPC_CREAT) && (shmflg & IPC_EXCL)) {
263 err = -EEXIST;
264 } else {
265 shp = shm_lock(id);
266 if(shp==NULL)
267 BUG();
268 if (ipcperms(&shp->shm_perm, shmflg))
269 err = -EACCES;
270 else
271 err = shm_buildid(id, shp->shm_perm.seq);
272 shm_unlock(id);
274 up(&shm_ids.sem);
275 return err;
278 static void killseg_core(struct shmid_kernel *shp, int doacc)
280 int i, numpages, rss, swp;
282 numpages = shp->shm_npages;
283 for (i = 0, rss = 0, swp = 0; i < numpages ; i++) {
284 pte_t pte;
285 pte = SHM_ENTRY (shp,i);
286 if (pte_none(pte))
287 continue;
288 if (pte_present(pte)) {
289 __free_page (pte_page(pte));
290 rss++;
291 } else {
292 swap_free(pte_to_swp_entry(pte));
293 swp++;
296 shm_free (shp->shm_dir, numpages);
297 kfree(shp);
298 if (doacc) {
299 shm_lockall();
300 shm_rss -= rss;
301 shm_swp -= swp;
302 shm_tot -= numpages;
303 shm_unlockall();
308 * Only called after testing nattch and SHM_DEST.
309 * Here pages, pgtable and shmid_kernel are freed.
311 static void killseg (int shmid)
313 struct shmid_kernel *shp;
315 down(&shm_ids.sem);
316 shp = shm_lock(shmid);
317 if(shp==NULL) {
318 out_up:
319 up(&shm_ids.sem);
320 return;
322 if(shm_checkid(shp,shmid) || shp->shm_nattch > 0 ||
323 !(shp->shm_perm.mode & SHM_DEST)) {
324 shm_unlock(shmid);
325 goto out_up;
327 shp = shm_rmid(shmid);
328 if(shp==NULL)
329 BUG();
330 if (!shp->shm_dir)
331 BUG();
332 shm_unlock(shmid);
333 up(&shm_ids.sem);
334 killseg_core(shp, 1);
336 return;
339 static inline unsigned long copy_shmid_to_user(void *buf, struct shmid64_ds *in, int version)
341 switch(version) {
342 case IPC_64:
343 return copy_to_user(buf, in, sizeof(*in));
344 case IPC_OLD:
346 struct shmid_ds out;
348 ipc64_perm_to_ipc_perm(&in->shm_perm, &out.shm_perm);
349 out.shm_segsz = in->shm_segsz;
350 out.shm_atime = in->shm_atime;
351 out.shm_dtime = in->shm_dtime;
352 out.shm_ctime = in->shm_ctime;
353 out.shm_cpid = in->shm_cpid;
354 out.shm_lpid = in->shm_lpid;
355 out.shm_nattch = in->shm_nattch;
357 return copy_to_user(buf, &out, sizeof(out));
359 default:
360 return -EINVAL;
364 struct shm_setbuf {
365 uid_t uid;
366 gid_t gid;
367 mode_t mode;
370 static inline unsigned long copy_shmid_from_user(struct shm_setbuf *out, void *buf, int version)
372 switch(version) {
373 case IPC_64:
375 struct shmid64_ds tbuf;
377 if (copy_from_user(&tbuf, buf, sizeof(tbuf)))
378 return -EFAULT;
380 out->uid = tbuf.shm_perm.uid;
381 out->gid = tbuf.shm_perm.gid;
382 out->mode = tbuf.shm_perm.mode;
384 return 0;
386 case IPC_OLD:
388 struct shmid_ds tbuf_old;
390 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
391 return -EFAULT;
393 out->uid = tbuf_old.shm_perm.uid;
394 out->gid = tbuf_old.shm_perm.gid;
395 out->mode = tbuf_old.shm_perm.mode;
397 return 0;
399 default:
400 return -EINVAL;
404 static inline unsigned long copy_shminfo_to_user(void *buf, struct shminfo64 *in, int version)
406 switch(version) {
407 case IPC_64:
408 return copy_to_user(buf, in, sizeof(*in));
409 case IPC_OLD:
411 struct shminfo out;
413 if(in->shmmax > INT_MAX)
414 out.shmmax = INT_MAX;
415 else
416 out.shmmax = (int)in->shmmax;
418 out.shmmin = in->shmmin;
419 out.shmmni = in->shmmni;
420 out.shmseg = in->shmseg;
421 out.shmall = in->shmall;
423 return copy_to_user(buf, &out, sizeof(out));
425 default:
426 return -EINVAL;
430 asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds *buf)
432 struct shm_setbuf setbuf;
433 struct shmid_kernel *shp;
434 int err, version;
436 if (cmd < 0 || shmid < 0)
437 return -EINVAL;
439 version = ipc_parse_version(&cmd);
441 switch (cmd) { /* replace with proc interface ? */
442 case IPC_INFO:
444 struct shminfo64 shminfo;
446 memset(&shminfo,0,sizeof(shminfo));
447 shminfo.shmmni = shminfo.shmseg = shm_ctlmni;
448 shminfo.shmmax = shm_ctlmax;
449 shminfo.shmall = shm_ctlall;
451 shminfo.shmmin = SHMMIN;
452 if(copy_shminfo_to_user (buf, &shminfo, version))
453 return -EFAULT;
454 /* reading a integer is always atomic */
455 err= shm_ids.max_id;
456 if(err<0)
457 err = 0;
458 return err;
460 case SHM_INFO:
462 struct shm_info shm_info;
464 memset(&shm_info,0,sizeof(shm_info));
465 shm_lockall();
466 shm_info.used_ids = shm_ids.in_use;
467 shm_info.shm_rss = shm_rss;
468 shm_info.shm_tot = shm_tot;
469 shm_info.shm_swp = shm_swp;
470 shm_info.swap_attempts = swap_attempts;
471 shm_info.swap_successes = swap_successes;
472 err = shm_ids.max_id;
473 shm_unlockall();
474 if(copy_to_user (buf, &shm_info, sizeof(shm_info)))
475 return -EFAULT;
477 return err < 0 ? 0 : err;
479 case SHM_STAT:
480 case IPC_STAT:
482 struct shmid64_ds tbuf;
483 int result;
484 memset(&tbuf, 0, sizeof(tbuf));
485 shp = shm_lock(shmid);
486 if(shp==NULL)
487 return -EINVAL;
488 if (shp == &zshmid_kernel) {
489 shm_unlock(shmid);
490 return -EINVAL;
492 if(cmd==SHM_STAT) {
493 err = -EINVAL;
494 if (shmid > shm_ids.max_id)
495 goto out_unlock;
496 result = shm_buildid(shmid, shp->shm_perm.seq);
497 } else {
498 err = -EIDRM;
499 if(shm_checkid(shp,shmid))
500 goto out_unlock;
501 result = 0;
503 err=-EACCES;
504 if (ipcperms (&shp->shm_perm, S_IRUGO))
505 goto out_unlock;
506 kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm);
507 tbuf.shm_segsz = shp->shm_segsz;
508 tbuf.shm_atime = shp->shm_atime;
509 tbuf.shm_dtime = shp->shm_dtime;
510 tbuf.shm_ctime = shp->shm_ctime;
511 tbuf.shm_cpid = shp->shm_cpid;
512 tbuf.shm_lpid = shp->shm_lpid;
513 tbuf.shm_nattch = shp->shm_nattch;
514 shm_unlock(shmid);
515 if(copy_shmid_to_user (buf, &tbuf, version))
516 return -EFAULT;
517 return result;
519 case SHM_LOCK:
520 case SHM_UNLOCK:
522 /* Allow superuser to lock segment in memory */
523 /* Should the pages be faulted in here or leave it to user? */
524 /* need to determine interaction with current->swappable */
525 struct kern_ipc_perm *ipcp;
526 if (!capable(CAP_IPC_LOCK))
527 return -EPERM;
529 shp = shm_lock(shmid);
530 if(shp==NULL)
531 return -EINVAL;
532 if (shp == &zshmid_kernel) {
533 shm_unlock(shmid);
534 return -EINVAL;
536 err=-EIDRM;
537 if(shm_checkid(shp,shmid))
538 goto out_unlock;
539 ipcp = &shp->shm_perm;
540 if(cmd==SHM_LOCK) {
541 if (!(ipcp->mode & SHM_LOCKED)) {
542 ipcp->mode |= SHM_LOCKED;
543 err = 0;
545 } else {
546 if (ipcp->mode & SHM_LOCKED) {
547 ipcp->mode &= ~SHM_LOCKED;
548 err = 0;
551 shm_unlock(shmid);
552 return err;
554 case IPC_RMID:
555 case IPC_SET:
556 break;
557 default:
558 return -EINVAL;
561 if (cmd == IPC_SET) {
562 if(copy_shmid_from_user (&setbuf, buf, version))
563 return -EFAULT;
565 down(&shm_ids.sem);
566 shp = shm_lock(shmid);
567 err=-EINVAL;
568 if(shp==NULL)
569 goto out_up;
570 if (shp == &zshmid_kernel)
571 goto out_unlock_up;
572 err=-EIDRM;
573 if(shm_checkid(shp,shmid))
574 goto out_unlock_up;
575 err=-EPERM;
576 if (current->euid != shp->shm_perm.uid &&
577 current->euid != shp->shm_perm.cuid &&
578 !capable(CAP_SYS_ADMIN)) {
579 goto out_unlock_up;
582 switch (cmd) {
583 case IPC_SET:
584 shp->shm_perm.uid = setbuf.uid;
585 shp->shm_perm.gid = setbuf.gid;
586 shp->shm_perm.mode = (shp->shm_perm.mode & ~S_IRWXUGO)
587 | (setbuf.mode & S_IRWXUGO);
588 shp->shm_ctime = CURRENT_TIME;
589 break;
590 case IPC_RMID:
591 shp->shm_perm.mode |= SHM_DEST;
592 if (shp->shm_nattch <= 0) {
593 shm_unlock(shmid);
594 up(&shm_ids.sem);
595 killseg (shmid);
596 return 0;
599 err = 0;
600 out_unlock_up:
601 shm_unlock(shmid);
602 out_up:
603 up(&shm_ids.sem);
604 return err;
605 out_unlock:
606 shm_unlock(shmid);
607 return err;
611 * The per process internal structure for managing segments is
612 * `struct vm_area_struct'.
613 * A shmat will add to and shmdt will remove from the list.
614 * shmd->vm_mm the attacher
615 * shmd->vm_start virt addr of attach, multiple of SHMLBA
616 * shmd->vm_end multiple of SHMLBA
617 * shmd->vm_next next attach for task
618 * shmd->vm_next_share next attach for segment
619 * shmd->vm_pgoff offset into segment (in pages)
620 * shmd->vm_private_data signature for this attach
623 static struct vm_operations_struct shm_vm_ops = {
624 open: shm_open, /* open - callback for a new vm-area open */
625 close: shm_close, /* close - callback for when the vm-area is released */
626 nopage: shm_nopage,
627 swapout: shm_swapout,
630 /* Insert shmd into the list shp->attaches */
631 static inline void insert_attach (struct shmid_kernel * shp, struct vm_area_struct * shmd)
633 if((shmd->vm_next_share = shp->attaches) != NULL)
634 shp->attaches->vm_pprev_share = &shmd->vm_next_share;
635 shp->attaches = shmd;
636 shmd->vm_pprev_share = &shp->attaches;
639 /* Remove shmd from list shp->attaches */
640 static inline void remove_attach (struct shmid_kernel * shp, struct vm_area_struct * shmd)
642 if(shmd->vm_next_share)
643 shmd->vm_next_share->vm_pprev_share = shmd->vm_pprev_share;
644 *shmd->vm_pprev_share = shmd->vm_next_share;
648 * ensure page tables exist
649 * mark page table entries with shm_sgn.
651 static int shm_map (struct vm_area_struct *shmd)
653 unsigned long tmp;
655 /* clear old mappings */
656 do_munmap(shmd->vm_start, shmd->vm_end - shmd->vm_start);
658 /* add new mapping */
659 tmp = shmd->vm_end - shmd->vm_start;
660 if((current->mm->total_vm << PAGE_SHIFT) + tmp
661 > (unsigned long) current->rlim[RLIMIT_AS].rlim_cur)
662 return -ENOMEM;
663 current->mm->total_vm += tmp >> PAGE_SHIFT;
664 vmlist_modify_lock(current->mm);
665 insert_vm_struct(current->mm, shmd);
666 merge_segments(current->mm, shmd->vm_start, shmd->vm_end);
667 vmlist_modify_unlock(current->mm);
669 return 0;
673 * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists.
675 asmlinkage long sys_shmat (int shmid, char *shmaddr, int shmflg, ulong *raddr)
677 struct shmid_kernel *shp;
678 struct vm_area_struct *shmd;
679 int err;
680 unsigned long addr;
681 unsigned long len;
682 short flg = shmflg & SHM_RDONLY ? S_IRUGO : S_IRUGO|S_IWUGO;
685 if (shmid < 0)
686 return -EINVAL;
688 down(&current->mm->mmap_sem);
689 err = -EINVAL;
690 shp = shm_lock(shmid);
691 if (!shp)
692 goto out_up;
693 if (shp == &zshmid_kernel)
694 goto out_unlock_up;
696 err = -EACCES;
697 if (ipcperms(&shp->shm_perm, flg))
698 goto out_unlock_up;
700 err = -EIDRM;
701 if (shm_checkid(shp,shmid))
702 goto out_unlock_up;
704 if (!(addr = (ulong) shmaddr)) {
705 if (shmflg & SHM_REMAP)
706 goto out_unlock_up;
707 err = -ENOMEM;
708 addr = 0;
709 again:
710 if (!(addr = get_unmapped_area(addr, (unsigned long)shp->shm_segsz)))
711 goto out_unlock_up;
712 if(addr & (SHMLBA - 1)) {
713 addr = (addr + (SHMLBA - 1)) & ~(SHMLBA - 1);
714 goto again;
716 } else if (addr & (SHMLBA-1)) {
717 err=-EINVAL;
718 if (shmflg & SHM_RND)
719 addr &= ~(SHMLBA-1); /* round down */
720 else
721 goto out_unlock_up;
724 * Check if addr exceeds TASK_SIZE (from do_mmap)
726 len = PAGE_SIZE*shp->shm_npages;
727 err = -EINVAL;
728 if (addr >= TASK_SIZE || len > TASK_SIZE || addr > TASK_SIZE - len)
729 goto out_unlock_up;
731 * If shm segment goes below stack, make sure there is some
732 * space left for the stack to grow (presently 4 pages).
734 if (addr < current->mm->start_stack &&
735 addr > current->mm->start_stack - PAGE_SIZE*(shp->shm_npages + 4))
736 goto out_unlock_up;
737 if (!(shmflg & SHM_REMAP) && find_vma_intersection(current->mm, addr, addr + (unsigned long)shp->shm_segsz))
738 goto out_unlock_up;
740 shm_unlock(shmid);
741 err = -ENOMEM;
742 shmd = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
743 err = shm_revalidate(shp, shmid, len/PAGE_SIZE,flg);
744 if(err) {
745 kmem_cache_free(vm_area_cachep, shmd);
746 goto out_up;
749 shmd->vm_private_data = shp;
750 shmd->vm_start = addr;
751 shmd->vm_end = addr + shp->shm_npages * PAGE_SIZE;
752 shmd->vm_mm = current->mm;
753 shmd->vm_page_prot = (shmflg & SHM_RDONLY) ? PAGE_READONLY : PAGE_SHARED;
754 shmd->vm_flags = VM_SHM | VM_MAYSHARE | VM_SHARED
755 | VM_MAYREAD | VM_MAYEXEC | VM_READ | VM_EXEC
756 | ((shmflg & SHM_RDONLY) ? 0 : VM_MAYWRITE | VM_WRITE);
757 shmd->vm_file = NULL;
758 shmd->vm_pgoff = 0;
759 shmd->vm_ops = &shm_vm_ops;
761 shp->shm_nattch++; /* prevent destruction */
762 shm_unlock(shp->id);
763 err = shm_map (shmd);
764 shm_lock(shmid); /* cannot fail */
765 if (err)
766 goto failed_shm_map;
768 insert_attach(shp,shmd); /* insert shmd into shp->attaches */
770 shp->shm_lpid = current->pid;
771 shp->shm_atime = CURRENT_TIME;
773 *raddr = addr;
774 err = 0;
775 out_unlock_up:
776 shm_unlock(shmid);
777 out_up:
778 up(&current->mm->mmap_sem);
779 return err;
781 failed_shm_map:
783 int delete = 0;
784 if (--shp->shm_nattch <= 0 && shp->shm_perm.mode & SHM_DEST)
785 delete = 1;
786 shm_unlock(shmid);
787 up(&current->mm->mmap_sem);
788 kmem_cache_free(vm_area_cachep, shmd);
789 if(delete)
790 killseg(shmid);
791 return err;
795 /* This is called by fork, once for every shm attach. */
796 static void shm_open (struct vm_area_struct *shmd)
798 struct shmid_kernel *shp;
800 shp = (struct shmid_kernel *) shmd->vm_private_data;
801 if(shp != shm_lock(shp->id))
802 BUG();
803 insert_attach(shp,shmd); /* insert shmd into shp->attaches */
804 shp->shm_nattch++;
805 shp->shm_atime = CURRENT_TIME;
806 shp->shm_lpid = current->pid;
807 shm_unlock(shp->id);
811 * remove the attach descriptor shmd.
812 * free memory for segment if it is marked destroyed.
813 * The descriptor has already been removed from the current->mm->mmap list
814 * and will later be kfree()d.
816 static void shm_close (struct vm_area_struct *shmd)
818 struct shmid_kernel *shp;
819 int id;
821 /* remove from the list of attaches of the shm segment */
822 shp = (struct shmid_kernel *) shmd->vm_private_data;
823 if(shp != shm_lock(shp->id))
824 BUG();
825 remove_attach(shp,shmd); /* remove from shp->attaches */
826 shp->shm_lpid = current->pid;
827 shp->shm_dtime = CURRENT_TIME;
828 id=-1;
829 if (--shp->shm_nattch <= 0 && shp->shm_perm.mode & SHM_DEST)
830 id=shp->id;
831 shm_unlock(shp->id);
832 if(id!=-1)
833 killseg(id);
837 * detach and kill segment if marked destroyed.
838 * The work is done in shm_close.
840 asmlinkage long sys_shmdt (char *shmaddr)
842 struct vm_area_struct *shmd, *shmdnext;
844 down(&current->mm->mmap_sem);
845 for (shmd = current->mm->mmap; shmd; shmd = shmdnext) {
846 shmdnext = shmd->vm_next;
847 if (shmd->vm_ops == &shm_vm_ops
848 && shmd->vm_start - (shmd->vm_pgoff << PAGE_SHIFT) == (ulong) shmaddr)
849 do_munmap(shmd->vm_start, shmd->vm_end - shmd->vm_start);
851 up(&current->mm->mmap_sem);
852 return 0;
856 * Enter the shm page into the SHM data structures.
858 * The way "nopage" is done, we don't actually have to
859 * do anything here: nopage will have filled in the shm
860 * data structures already, and shm_swap_out() will just
861 * work off them..
863 static int shm_swapout(struct page * page, struct file *file)
865 return 0;
869 * page not present ... go through shm_dir
871 static struct page * shm_nopage(struct vm_area_struct * shmd, unsigned long address, int no_share)
873 pte_t pte;
874 struct shmid_kernel *shp;
875 unsigned int idx;
876 struct page * page;
877 int is_shmzero;
879 shp = (struct shmid_kernel *) shmd->vm_private_data;
880 idx = (address - shmd->vm_start) >> PAGE_SHIFT;
881 idx += shmd->vm_pgoff;
882 is_shmzero = (shp->id == zero_id);
885 * A shared mapping past the last page of the file is an error
886 * and results in a SIGBUS, so logically a shared mapping past
887 * the end of a shared memory segment should result in SIGBUS
888 * as well.
890 if (idx >= shp->shm_npages) {
891 return NULL;
893 down(&shp->sem);
894 if ((shp != shm_lock(shp->id)) && (is_shmzero == 0))
895 BUG();
897 pte = SHM_ENTRY(shp,idx);
898 if (!pte_present(pte)) {
899 /* page not present so shm_swap can't race with us
900 and the semaphore protects us by other tasks that
901 could potentially fault on our pte under us */
902 if (pte_none(pte)) {
903 shm_unlock(shp->id);
904 page = alloc_page(GFP_HIGHUSER);
905 if (!page)
906 goto oom;
907 clear_highpage(page);
908 if ((shp != shm_lock(shp->id)) && (is_shmzero == 0))
909 BUG();
910 } else {
911 swp_entry_t entry = pte_to_swp_entry(pte);
913 shm_unlock(shp->id);
914 page = lookup_swap_cache(entry);
915 if (!page) {
916 lock_kernel();
917 swapin_readahead(entry);
918 page = read_swap_cache(entry);
919 unlock_kernel();
920 if (!page)
921 goto oom;
923 delete_from_swap_cache(page);
924 page = replace_with_highmem(page);
925 swap_free(entry);
926 if ((shp != shm_lock(shp->id)) && (is_shmzero == 0))
927 BUG();
928 if (is_shmzero == 0) shm_swp--;
930 if (is_shmzero == 0) shm_rss++;
931 pte = pte_mkdirty(mk_pte(page, PAGE_SHARED));
932 SHM_ENTRY(shp, idx) = pte;
933 } else
934 --current->maj_flt; /* was incremented in do_no_page */
936 /* pte_val(pte) == SHM_ENTRY (shp, idx) */
937 get_page(pte_page(pte));
938 shm_unlock(shp->id);
939 up(&shp->sem);
940 current->min_flt++;
941 return pte_page(pte);
943 oom:
944 up(&shp->sem);
945 return NOPAGE_OOM;
948 #define OKAY 0
949 #define RETRY 1
950 #define FAILED 2
952 static int shm_swap_core(struct shmid_kernel *shp, unsigned long idx, swp_entry_t swap_entry, zone_t *zone, int *counter, struct page **outpage)
954 pte_t page;
955 struct page *page_map;
957 page = SHM_ENTRY(shp, idx);
958 if (!pte_present(page))
959 return RETRY;
960 page_map = pte_page(page);
961 if (zone && (!memclass(page_map->zone, zone)))
962 return RETRY;
963 if (shp->id != zero_id) swap_attempts++;
965 if (--counter < 0) /* failed */
966 return FAILED;
967 if (page_count(page_map) != 1)
968 return RETRY;
970 if (!(page_map = prepare_highmem_swapout(page_map)))
971 return FAILED;
972 SHM_ENTRY (shp, idx) = swp_entry_to_pte(swap_entry);
974 /* add the locked page to the swap cache before allowing
975 the swapin path to run lookup_swap_cache(). This avoids
976 reading a not yet uptodate block from disk.
977 NOTE: we just accounted the swap space reference for this
978 swap cache page at __get_swap_page() time. */
979 add_to_swap_cache(*outpage = page_map, swap_entry);
980 return OKAY;
983 static void shm_swap_postop(struct page *page)
985 lock_kernel();
986 rw_swap_page(WRITE, page, 0);
987 unlock_kernel();
988 __free_page(page);
991 static int shm_swap_preop(swp_entry_t *swap_entry)
993 lock_kernel();
994 /* subtle: preload the swap count for the swap cache. We can't
995 increase the count inside the critical section as we can't release
996 the shm_lock there. And we can't acquire the big lock with the
997 shm_lock held (otherwise we would deadlock too easily). */
998 *swap_entry = __get_swap_page(2);
999 if (!(*swap_entry).val) {
1000 unlock_kernel();
1001 return 1;
1003 unlock_kernel();
1004 return 0;
1008 * Goes through counter = (shm_rss >> prio) present shm pages.
1010 static unsigned long swap_id = 0; /* currently being swapped */
1011 static unsigned long swap_idx = 0; /* next to swap */
1013 int shm_swap (int prio, int gfp_mask, zone_t *zone)
1015 struct shmid_kernel *shp;
1016 swp_entry_t swap_entry;
1017 unsigned long id, idx;
1018 int loop = 0;
1019 int counter;
1020 struct page * page_map;
1022 zshm_swap(prio, gfp_mask, zone);
1023 counter = shm_rss >> prio;
1024 if (!counter)
1025 return 0;
1026 if (shm_swap_preop(&swap_entry))
1027 return 0;
1029 shm_lockall();
1030 check_id:
1031 shp = shm_get(swap_id);
1032 if(shp==NULL || shp->shm_perm.mode & SHM_LOCKED) {
1033 next_id:
1034 swap_idx = 0;
1035 if (++swap_id > shm_ids.max_id) {
1036 swap_id = 0;
1037 if (loop) {
1038 failed:
1039 shm_unlockall();
1040 __swap_free(swap_entry, 2);
1041 return 0;
1043 loop = 1;
1045 goto check_id;
1047 id = swap_id;
1049 check_table:
1050 idx = swap_idx++;
1051 if (idx >= shp->shm_npages)
1052 goto next_id;
1054 switch (shm_swap_core(shp, idx, swap_entry, zone, &counter, &page_map)) {
1055 case RETRY: goto check_table;
1056 case FAILED: goto failed;
1058 swap_successes++;
1059 shm_swp++;
1060 shm_rss--;
1061 shm_unlockall();
1063 shm_swap_postop(page_map);
1064 return 1;
1068 * Free the swap entry and set the new pte for the shm page.
1070 static void shm_unuse_page(struct shmid_kernel *shp, unsigned long idx,
1071 swp_entry_t entry, struct page *page)
1073 pte_t pte;
1075 pte = pte_mkdirty(mk_pte(page, PAGE_SHARED));
1076 SHM_ENTRY(shp, idx) = pte;
1077 get_page(page);
1078 shm_rss++;
1080 shm_swp--;
1082 swap_free(entry);
1085 static int shm_unuse_core(struct shmid_kernel *shp, swp_entry_t entry, struct page *page)
1087 int n;
1089 for (n = 0; n < shp->shm_npages; n++) {
1090 if (pte_none(SHM_ENTRY(shp,n)))
1091 continue;
1092 if (pte_present(SHM_ENTRY(shp,n)))
1093 continue;
1094 if (pte_to_swp_entry(SHM_ENTRY(shp,n)).val == entry.val) {
1095 shm_unuse_page(shp, n, entry, page);
1096 return 1;
1099 return 0;
1103 * unuse_shm() search for an eventually swapped out shm page.
1105 void shm_unuse(swp_entry_t entry, struct page *page)
1107 int i;
1109 shm_lockall();
1110 for (i = 0; i <= shm_ids.max_id; i++) {
1111 struct shmid_kernel *shp = shm_get(i);
1112 if(shp==NULL)
1113 continue;
1114 if (shm_unuse_core(shp, entry, page))
1115 goto out;
1117 out:
1118 shm_unlockall();
1119 zmap_unuse(entry, page);
1122 #ifdef CONFIG_PROC_FS
1123 static int sysvipc_shm_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data)
1125 off_t pos = 0;
1126 off_t begin = 0;
1127 int i, len = 0;
1129 down(&shm_ids.sem);
1130 len += sprintf(buffer, " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime\n");
1132 for(i = 0; i <= shm_ids.max_id; i++) {
1133 struct shmid_kernel* shp = shm_lock(i);
1134 if (shp == &zshmid_kernel) {
1135 shm_unlock(i);
1136 continue;
1138 if(shp!=NULL) {
1139 #define SMALL_STRING "%10d %10d %4o %10u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n"
1140 #define BIG_STRING "%10d %10d %4o %21u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n"
1141 char *format;
1143 if (sizeof(size_t) <= sizeof(int))
1144 format = SMALL_STRING;
1145 else
1146 format = BIG_STRING;
1147 len += sprintf(buffer + len, format,
1148 shp->shm_perm.key,
1149 shm_buildid(i, shp->shm_perm.seq),
1150 shp->shm_perm.mode,
1151 shp->shm_segsz,
1152 shp->shm_cpid,
1153 shp->shm_lpid,
1154 shp->shm_nattch,
1155 shp->shm_perm.uid,
1156 shp->shm_perm.gid,
1157 shp->shm_perm.cuid,
1158 shp->shm_perm.cgid,
1159 shp->shm_atime,
1160 shp->shm_dtime,
1161 shp->shm_ctime);
1162 shm_unlock(i);
1164 pos += len;
1165 if(pos < offset) {
1166 len = 0;
1167 begin = pos;
1169 if(pos > offset + length)
1170 goto done;
1173 *eof = 1;
1174 done:
1175 up(&shm_ids.sem);
1176 *start = buffer + (offset - begin);
1177 len -= (offset - begin);
1178 if(len > length)
1179 len = length;
1180 if(len < 0)
1181 len = 0;
1182 return len;
1184 #endif
1186 static struct shmid_kernel *zmap_list = 0;
1187 static spinlock_t zmap_list_lock = SPIN_LOCK_UNLOCKED;
1188 static unsigned long zswap_idx = 0; /* next to swap */
1189 static struct shmid_kernel *zswap_shp = 0;
1191 static struct vm_operations_struct shmzero_vm_ops = {
1192 open: shmzero_open,
1193 close: shmzero_close,
1194 nopage: shm_nopage,
1195 swapout: shm_swapout,
1198 int map_zero_setup(struct vm_area_struct *vma)
1200 struct shmid_kernel *shp;
1202 if (!(shp = newseg_alloc((vma->vm_end - vma->vm_start) / PAGE_SIZE)))
1203 return -ENOMEM;
1204 shp->id = zero_id; /* hack for shm_lock et al */
1205 vma->vm_private_data = shp;
1206 vma->vm_ops = &shmzero_vm_ops;
1207 shmzero_open(vma);
1208 spin_lock(&zmap_list_lock);
1209 shp->attaches = (struct vm_area_struct *)zmap_list;
1210 zmap_list = shp;
1211 spin_unlock(&zmap_list_lock);
1212 return 0;
1215 static void shmzero_open(struct vm_area_struct *shmd)
1217 struct shmid_kernel *shp;
1219 shp = (struct shmid_kernel *) shmd->vm_private_data;
1220 down(&shp->sem);
1221 shp->shm_nattch++;
1222 up(&shp->sem);
1225 static void shmzero_close(struct vm_area_struct *shmd)
1227 int done = 0;
1228 struct shmid_kernel *shp, *prev, *cur;
1230 shp = (struct shmid_kernel *) shmd->vm_private_data;
1231 down(&shp->sem);
1232 if (--shp->shm_nattch == 0)
1233 done = 1;
1234 up(&shp->sem);
1235 if (done) {
1236 spin_lock(&zmap_list_lock);
1237 if (shp == zswap_shp)
1238 zswap_shp = (struct shmid_kernel *)(shp->attaches);
1239 if (shp == zmap_list)
1240 zmap_list = (struct shmid_kernel *)(shp->attaches);
1241 else {
1242 prev = zmap_list;
1243 cur = (struct shmid_kernel *)(prev->attaches);
1244 while (cur != shp) {
1245 prev = cur;
1246 cur = (struct shmid_kernel *)(prev->attaches);
1248 prev->attaches = (struct vm_area_struct *)(shp->attaches);
1250 spin_unlock(&zmap_list_lock);
1251 killseg_core(shp, 0);
1255 static void zmap_unuse(swp_entry_t entry, struct page *page)
1257 struct shmid_kernel *shp;
1259 spin_lock(&zmap_list_lock);
1260 shp = zmap_list;
1261 while (shp) {
1262 if (shm_unuse_core(shp, entry, page))
1263 break;
1264 shp = (struct shmid_kernel *)shp->attaches;
1266 spin_unlock(&zmap_list_lock);
1269 static void zshm_swap (int prio, int gfp_mask, zone_t *zone)
1271 struct shmid_kernel *shp;
1272 swp_entry_t swap_entry;
1273 unsigned long idx;
1274 int loop = 0;
1275 int counter;
1276 struct page * page_map;
1278 counter = 10; /* maybe we should use zshm_rss */
1279 if (!counter)
1280 return;
1281 next:
1282 if (shm_swap_preop(&swap_entry))
1283 return;
1285 spin_lock(&zmap_list_lock);
1286 if (zmap_list == 0)
1287 goto failed;
1288 next_id:
1289 if ((shp = zswap_shp) == 0) {
1290 if (loop) {
1291 failed:
1292 spin_unlock(&zmap_list_lock);
1293 __swap_free(swap_entry, 2);
1294 return;
1296 zswap_shp = shp = zmap_list;
1297 zswap_idx = 0;
1298 loop = 1;
1301 check_table:
1302 idx = zswap_idx++;
1303 if (idx >= shp->shm_npages) {
1304 zswap_shp = (struct shmid_kernel *)(zswap_shp->attaches);
1305 zswap_idx = 0;
1306 goto next_id;
1309 switch (shm_swap_core(shp, idx, swap_entry, zone, &counter, &page_map)) {
1310 case RETRY: goto check_table;
1311 case FAILED: goto failed;
1313 spin_unlock(&zmap_list_lock);
1315 shm_swap_postop(page_map);
1316 if (counter)
1317 goto next;
1318 return;