3 * Copyright (C) 1992, 1993 Krishna Balasubramanian
4 * Many improvements/fixes by Bruno Haible.
5 * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994.
6 * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli.
8 * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
9 * BIGMEM support, Andrea Arcangeli <andrea@suse.de>
10 * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr>
11 * HIGHMEM support, Ingo Molnar <mingo@redhat.com>
12 * avoid vmalloc and make shmmax, shmall, shmmni sysctl'able,
13 * Christoph Rohland <hans-christoph.rohland@sap.com>
14 * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com>
17 #include <linux/config.h>
18 #include <linux/malloc.h>
19 #include <linux/shm.h>
20 #include <linux/swap.h>
21 #include <linux/smp_lock.h>
22 #include <linux/init.h>
23 #include <linux/vmalloc.h>
24 #include <linux/pagemap.h>
25 #include <linux/proc_fs.h>
26 #include <linux/highmem.h>
28 #include <asm/uaccess.h>
29 #include <asm/pgtable.h>
33 struct shmid_kernel
/* private to the kernel */
35 struct kern_ipc_perm shm_perm
;
42 unsigned long shm_nattch
;
43 unsigned long shm_npages
; /* size of segment (pages) */
44 pte_t
**shm_dir
; /* ptr to array of ptrs to frames -> SHMMAX */
45 struct vm_area_struct
*attaches
; /* descriptors for attaches */
46 int id
; /* backreference to id for shm_close */
50 static struct ipc_ids shm_ids
;
52 #define shm_lock(id) ((struct shmid_kernel*)ipc_lock(&shm_ids,id))
53 #define shm_unlock(id) ipc_unlock(&shm_ids,id)
54 #define shm_lockall() ipc_lockall(&shm_ids)
55 #define shm_unlockall() ipc_unlockall(&shm_ids)
56 #define shm_get(id) ((struct shmid_kernel*)ipc_get(&shm_ids,id))
57 #define shm_rmid(id) ((struct shmid_kernel*)ipc_rmid(&shm_ids,id))
58 #define shm_checkid(s, id) \
59 ipc_checkid(&shm_ids,&s->shm_perm,id)
60 #define shm_buildid(id, seq) \
61 ipc_buildid(&shm_ids, id, seq)
63 static int newseg (key_t key
, int shmflg
, size_t size
);
64 static int shm_map (struct vm_area_struct
*shmd
);
65 static void killseg (int shmid
);
66 static void shm_open (struct vm_area_struct
*shmd
);
67 static void shm_close (struct vm_area_struct
*shmd
);
68 static struct page
* shm_nopage(struct vm_area_struct
*, unsigned long, int);
69 static int shm_swapout(struct page
*, struct file
*);
71 static int sysvipc_shm_read_proc(char *buffer
, char **start
, off_t offset
, int length
, int *eof
, void *data
);
74 static void zshm_swap (int prio
, int gfp_mask
, zone_t
*zone
);
75 static void zmap_unuse(swp_entry_t entry
, struct page
*page
);
76 static void shmzero_open(struct vm_area_struct
*shmd
);
77 static void shmzero_close(struct vm_area_struct
*shmd
);
79 static struct shmid_kernel zshmid_kernel
;
81 size_t shm_ctlmax
= SHMMAX
;
82 int shm_ctlall
= SHMALL
;
83 int shm_ctlmni
= SHMMNI
;
85 static int shm_tot
= 0; /* total number of shared memory pages */
86 static int shm_rss
= 0; /* number of shared memory pages that are in memory */
87 static int shm_swp
= 0; /* number of shared memory pages that are in swap */
91 shm_lock()/shm_lockall()
98 - swap_free() never sleeps
99 - add_to_swap_cache() never sleeps
100 - add_to_swap_cache() doesn't acquire the big kernel lock.
101 - shm_unuse() is called with the kernel lock acquired.
104 /* some statistics */
105 static ulong swap_attempts
= 0;
106 static ulong swap_successes
= 0;
108 void __init
shm_init (void)
110 ipc_init_ids(&shm_ids
, shm_ctlmni
);
111 #ifdef CONFIG_PROC_FS
112 create_proc_read_entry("sysvipc/shm", 0, 0, sysvipc_shm_read_proc
, NULL
);
114 zero_id
= ipc_addid(&shm_ids
, &zshmid_kernel
.shm_perm
, shm_ctlmni
);
119 #define SHM_ENTRY(shp, index) (shp)->shm_dir[(index)/PTRS_PER_PTE][(index)%PTRS_PER_PTE]
121 static pte_t
**shm_alloc(unsigned long pages
)
123 unsigned short dir
= pages
/ PTRS_PER_PTE
;
124 unsigned short last
= pages
% PTRS_PER_PTE
;
127 ret
= kmalloc ((dir
+1) * sizeof(pte_t
*), GFP_KERNEL
);
131 for (ptr
= ret
; ptr
< ret
+dir
; ptr
++)
133 *ptr
= (pte_t
*)__get_free_page (GFP_KERNEL
);
136 memset (*ptr
, 0, PAGE_SIZE
);
139 /* The last one is probably not of PAGE_SIZE: we use kmalloc */
141 *ptr
= kmalloc (last
*sizeof(pte_t
), GFP_KERNEL
);
144 memset (*ptr
, 0, last
*sizeof(pte_t
));
150 /* The last failed: we decrement first */
152 free_page ((unsigned long)*ptr
);
159 static void shm_free(pte_t
** dir
, unsigned long pages
)
161 pte_t
**ptr
= dir
+pages
/PTRS_PER_PTE
;
163 /* first the last page */
164 if (pages
%PTRS_PER_PTE
)
166 /* now the whole pages */
168 free_page ((unsigned long)*ptr
);
170 /* Now the indirect block */
174 static int shm_revalidate(struct shmid_kernel
* shp
, int shmid
, int pagecount
, int flg
)
176 struct shmid_kernel
* new;
177 new = shm_lock(shmid
);
181 if(new!=shp
|| shm_checkid(shp
, shmid
) || shp
->shm_npages
!= pagecount
) {
185 if (ipcperms(&shp
->shm_perm
, flg
)) {
192 static inline struct shmid_kernel
*newseg_alloc(int numpages
)
194 struct shmid_kernel
*shp
;
196 shp
= (struct shmid_kernel
*) kmalloc (sizeof (*shp
), GFP_KERNEL
);
200 shp
->shm_dir
= shm_alloc (numpages
);
205 shp
->shm_npages
= numpages
;
206 shp
->attaches
= NULL
;
208 init_MUTEX(&shp
->sem
);
212 static int newseg (key_t key
, int shmflg
, size_t size
)
214 struct shmid_kernel
*shp
;
215 int numpages
= (size
+ PAGE_SIZE
-1) >> PAGE_SHIFT
;
221 if (size
> shm_ctlmax
)
223 if (shm_tot
+ numpages
>= shm_ctlall
)
226 if (!(shp
= newseg_alloc(numpages
)))
228 id
= ipc_addid(&shm_ids
, &shp
->shm_perm
, shm_ctlmni
);
230 shm_free(shp
->shm_dir
,numpages
);
234 shp
->shm_perm
.key
= key
;
235 shp
->shm_perm
.mode
= (shmflg
& S_IRWXUGO
);
236 shp
->shm_segsz
= size
;
237 shp
->shm_cpid
= current
->pid
;
239 shp
->shm_atime
= shp
->shm_dtime
= 0;
240 shp
->shm_ctime
= CURRENT_TIME
;
241 shp
->id
= shm_buildid(id
,shp
->shm_perm
.seq
);
246 return shm_buildid(id
,shp
->shm_perm
.seq
);
249 asmlinkage
long sys_shmget (key_t key
, size_t size
, int shmflg
)
251 struct shmid_kernel
*shp
;
255 if (key
== IPC_PRIVATE
) {
256 err
= newseg(key
, shmflg
, size
);
257 } else if ((id
= ipc_findkey(&shm_ids
,key
)) == -1) {
258 if (!(shmflg
& IPC_CREAT
))
261 err
= newseg(key
, shmflg
, size
);
262 } else if ((shmflg
& IPC_CREAT
) && (shmflg
& IPC_EXCL
)) {
268 if (ipcperms(&shp
->shm_perm
, shmflg
))
271 err
= shm_buildid(id
, shp
->shm_perm
.seq
);
278 static void killseg_core(struct shmid_kernel
*shp
, int doacc
)
280 int i
, numpages
, rss
, swp
;
282 numpages
= shp
->shm_npages
;
283 for (i
= 0, rss
= 0, swp
= 0; i
< numpages
; i
++) {
285 pte
= SHM_ENTRY (shp
,i
);
288 if (pte_present(pte
)) {
289 __free_page (pte_page(pte
));
292 swap_free(pte_to_swp_entry(pte
));
296 shm_free (shp
->shm_dir
, numpages
);
308 * Only called after testing nattch and SHM_DEST.
309 * Here pages, pgtable and shmid_kernel are freed.
311 static void killseg (int shmid
)
313 struct shmid_kernel
*shp
;
316 shp
= shm_lock(shmid
);
322 if(shm_checkid(shp
,shmid
) || shp
->shm_nattch
> 0 ||
323 !(shp
->shm_perm
.mode
& SHM_DEST
)) {
327 shp
= shm_rmid(shmid
);
334 killseg_core(shp
, 1);
339 static inline unsigned long copy_shmid_to_user(void *buf
, struct shmid64_ds
*in
, int version
)
343 return copy_to_user(buf
, in
, sizeof(*in
));
348 ipc64_perm_to_ipc_perm(&in
->shm_perm
, &out
.shm_perm
);
349 out
.shm_segsz
= in
->shm_segsz
;
350 out
.shm_atime
= in
->shm_atime
;
351 out
.shm_dtime
= in
->shm_dtime
;
352 out
.shm_ctime
= in
->shm_ctime
;
353 out
.shm_cpid
= in
->shm_cpid
;
354 out
.shm_lpid
= in
->shm_lpid
;
355 out
.shm_nattch
= in
->shm_nattch
;
357 return copy_to_user(buf
, &out
, sizeof(out
));
370 static inline unsigned long copy_shmid_from_user(struct shm_setbuf
*out
, void *buf
, int version
)
375 struct shmid64_ds tbuf
;
377 if (copy_from_user(&tbuf
, buf
, sizeof(tbuf
)))
380 out
->uid
= tbuf
.shm_perm
.uid
;
381 out
->gid
= tbuf
.shm_perm
.gid
;
382 out
->mode
= tbuf
.shm_perm
.mode
;
388 struct shmid_ds tbuf_old
;
390 if (copy_from_user(&tbuf_old
, buf
, sizeof(tbuf_old
)))
393 out
->uid
= tbuf_old
.shm_perm
.uid
;
394 out
->gid
= tbuf_old
.shm_perm
.gid
;
395 out
->mode
= tbuf_old
.shm_perm
.mode
;
404 static inline unsigned long copy_shminfo_to_user(void *buf
, struct shminfo64
*in
, int version
)
408 return copy_to_user(buf
, in
, sizeof(*in
));
413 if(in
->shmmax
> INT_MAX
)
414 out
.shmmax
= INT_MAX
;
416 out
.shmmax
= (int)in
->shmmax
;
418 out
.shmmin
= in
->shmmin
;
419 out
.shmmni
= in
->shmmni
;
420 out
.shmseg
= in
->shmseg
;
421 out
.shmall
= in
->shmall
;
423 return copy_to_user(buf
, &out
, sizeof(out
));
430 asmlinkage
long sys_shmctl (int shmid
, int cmd
, struct shmid_ds
*buf
)
432 struct shm_setbuf setbuf
;
433 struct shmid_kernel
*shp
;
436 if (cmd
< 0 || shmid
< 0)
439 version
= ipc_parse_version(&cmd
);
441 switch (cmd
) { /* replace with proc interface ? */
444 struct shminfo64 shminfo
;
446 memset(&shminfo
,0,sizeof(shminfo
));
447 shminfo
.shmmni
= shminfo
.shmseg
= shm_ctlmni
;
448 shminfo
.shmmax
= shm_ctlmax
;
449 shminfo
.shmall
= shm_ctlall
;
451 shminfo
.shmmin
= SHMMIN
;
452 if(copy_shminfo_to_user (buf
, &shminfo
, version
))
454 /* reading a integer is always atomic */
462 struct shm_info shm_info
;
464 memset(&shm_info
,0,sizeof(shm_info
));
466 shm_info
.used_ids
= shm_ids
.in_use
;
467 shm_info
.shm_rss
= shm_rss
;
468 shm_info
.shm_tot
= shm_tot
;
469 shm_info
.shm_swp
= shm_swp
;
470 shm_info
.swap_attempts
= swap_attempts
;
471 shm_info
.swap_successes
= swap_successes
;
472 err
= shm_ids
.max_id
;
474 if(copy_to_user (buf
, &shm_info
, sizeof(shm_info
)))
477 return err
< 0 ? 0 : err
;
482 struct shmid64_ds tbuf
;
484 memset(&tbuf
, 0, sizeof(tbuf
));
485 shp
= shm_lock(shmid
);
488 if (shp
== &zshmid_kernel
) {
494 if (shmid
> shm_ids
.max_id
)
496 result
= shm_buildid(shmid
, shp
->shm_perm
.seq
);
499 if(shm_checkid(shp
,shmid
))
504 if (ipcperms (&shp
->shm_perm
, S_IRUGO
))
506 kernel_to_ipc64_perm(&shp
->shm_perm
, &tbuf
.shm_perm
);
507 tbuf
.shm_segsz
= shp
->shm_segsz
;
508 tbuf
.shm_atime
= shp
->shm_atime
;
509 tbuf
.shm_dtime
= shp
->shm_dtime
;
510 tbuf
.shm_ctime
= shp
->shm_ctime
;
511 tbuf
.shm_cpid
= shp
->shm_cpid
;
512 tbuf
.shm_lpid
= shp
->shm_lpid
;
513 tbuf
.shm_nattch
= shp
->shm_nattch
;
515 if(copy_shmid_to_user (buf
, &tbuf
, version
))
522 /* Allow superuser to lock segment in memory */
523 /* Should the pages be faulted in here or leave it to user? */
524 /* need to determine interaction with current->swappable */
525 struct kern_ipc_perm
*ipcp
;
526 if (!capable(CAP_IPC_LOCK
))
529 shp
= shm_lock(shmid
);
532 if (shp
== &zshmid_kernel
) {
537 if(shm_checkid(shp
,shmid
))
539 ipcp
= &shp
->shm_perm
;
541 if (!(ipcp
->mode
& SHM_LOCKED
)) {
542 ipcp
->mode
|= SHM_LOCKED
;
546 if (ipcp
->mode
& SHM_LOCKED
) {
547 ipcp
->mode
&= ~SHM_LOCKED
;
561 if (cmd
== IPC_SET
) {
562 if(copy_shmid_from_user (&setbuf
, buf
, version
))
566 shp
= shm_lock(shmid
);
570 if (shp
== &zshmid_kernel
)
573 if(shm_checkid(shp
,shmid
))
576 if (current
->euid
!= shp
->shm_perm
.uid
&&
577 current
->euid
!= shp
->shm_perm
.cuid
&&
578 !capable(CAP_SYS_ADMIN
)) {
584 shp
->shm_perm
.uid
= setbuf
.uid
;
585 shp
->shm_perm
.gid
= setbuf
.gid
;
586 shp
->shm_perm
.mode
= (shp
->shm_perm
.mode
& ~S_IRWXUGO
)
587 | (setbuf
.mode
& S_IRWXUGO
);
588 shp
->shm_ctime
= CURRENT_TIME
;
591 shp
->shm_perm
.mode
|= SHM_DEST
;
592 if (shp
->shm_nattch
<= 0) {
611 * The per process internal structure for managing segments is
612 * `struct vm_area_struct'.
613 * A shmat will add to and shmdt will remove from the list.
614 * shmd->vm_mm the attacher
615 * shmd->vm_start virt addr of attach, multiple of SHMLBA
616 * shmd->vm_end multiple of SHMLBA
617 * shmd->vm_next next attach for task
618 * shmd->vm_next_share next attach for segment
619 * shmd->vm_pgoff offset into segment (in pages)
620 * shmd->vm_private_data signature for this attach
623 static struct vm_operations_struct shm_vm_ops
= {
624 open
: shm_open
, /* open - callback for a new vm-area open */
625 close
: shm_close
, /* close - callback for when the vm-area is released */
627 swapout
: shm_swapout
,
630 /* Insert shmd into the list shp->attaches */
631 static inline void insert_attach (struct shmid_kernel
* shp
, struct vm_area_struct
* shmd
)
633 if((shmd
->vm_next_share
= shp
->attaches
) != NULL
)
634 shp
->attaches
->vm_pprev_share
= &shmd
->vm_next_share
;
635 shp
->attaches
= shmd
;
636 shmd
->vm_pprev_share
= &shp
->attaches
;
639 /* Remove shmd from list shp->attaches */
640 static inline void remove_attach (struct shmid_kernel
* shp
, struct vm_area_struct
* shmd
)
642 if(shmd
->vm_next_share
)
643 shmd
->vm_next_share
->vm_pprev_share
= shmd
->vm_pprev_share
;
644 *shmd
->vm_pprev_share
= shmd
->vm_next_share
;
648 * ensure page tables exist
649 * mark page table entries with shm_sgn.
651 static int shm_map (struct vm_area_struct
*shmd
)
655 /* clear old mappings */
656 do_munmap(shmd
->vm_start
, shmd
->vm_end
- shmd
->vm_start
);
658 /* add new mapping */
659 tmp
= shmd
->vm_end
- shmd
->vm_start
;
660 if((current
->mm
->total_vm
<< PAGE_SHIFT
) + tmp
661 > (unsigned long) current
->rlim
[RLIMIT_AS
].rlim_cur
)
663 current
->mm
->total_vm
+= tmp
>> PAGE_SHIFT
;
664 vmlist_modify_lock(current
->mm
);
665 insert_vm_struct(current
->mm
, shmd
);
666 merge_segments(current
->mm
, shmd
->vm_start
, shmd
->vm_end
);
667 vmlist_modify_unlock(current
->mm
);
673 * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists.
675 asmlinkage
long sys_shmat (int shmid
, char *shmaddr
, int shmflg
, ulong
*raddr
)
677 struct shmid_kernel
*shp
;
678 struct vm_area_struct
*shmd
;
682 short flg
= shmflg
& SHM_RDONLY
? S_IRUGO
: S_IRUGO
|S_IWUGO
;
688 down(¤t
->mm
->mmap_sem
);
690 shp
= shm_lock(shmid
);
693 if (shp
== &zshmid_kernel
)
697 if (ipcperms(&shp
->shm_perm
, flg
))
701 if (shm_checkid(shp
,shmid
))
704 if (!(addr
= (ulong
) shmaddr
)) {
705 if (shmflg
& SHM_REMAP
)
710 if (!(addr
= get_unmapped_area(addr
, (unsigned long)shp
->shm_segsz
)))
712 if(addr
& (SHMLBA
- 1)) {
713 addr
= (addr
+ (SHMLBA
- 1)) & ~(SHMLBA
- 1);
716 } else if (addr
& (SHMLBA
-1)) {
718 if (shmflg
& SHM_RND
)
719 addr
&= ~(SHMLBA
-1); /* round down */
724 * Check if addr exceeds TASK_SIZE (from do_mmap)
726 len
= PAGE_SIZE
*shp
->shm_npages
;
728 if (addr
>= TASK_SIZE
|| len
> TASK_SIZE
|| addr
> TASK_SIZE
- len
)
731 * If shm segment goes below stack, make sure there is some
732 * space left for the stack to grow (presently 4 pages).
734 if (addr
< current
->mm
->start_stack
&&
735 addr
> current
->mm
->start_stack
- PAGE_SIZE
*(shp
->shm_npages
+ 4))
737 if (!(shmflg
& SHM_REMAP
) && find_vma_intersection(current
->mm
, addr
, addr
+ (unsigned long)shp
->shm_segsz
))
742 shmd
= kmem_cache_alloc(vm_area_cachep
, SLAB_KERNEL
);
743 err
= shm_revalidate(shp
, shmid
, len
/PAGE_SIZE
,flg
);
745 kmem_cache_free(vm_area_cachep
, shmd
);
749 shmd
->vm_private_data
= shp
;
750 shmd
->vm_start
= addr
;
751 shmd
->vm_end
= addr
+ shp
->shm_npages
* PAGE_SIZE
;
752 shmd
->vm_mm
= current
->mm
;
753 shmd
->vm_page_prot
= (shmflg
& SHM_RDONLY
) ? PAGE_READONLY
: PAGE_SHARED
;
754 shmd
->vm_flags
= VM_SHM
| VM_MAYSHARE
| VM_SHARED
755 | VM_MAYREAD
| VM_MAYEXEC
| VM_READ
| VM_EXEC
756 | ((shmflg
& SHM_RDONLY
) ? 0 : VM_MAYWRITE
| VM_WRITE
);
757 shmd
->vm_file
= NULL
;
759 shmd
->vm_ops
= &shm_vm_ops
;
761 shp
->shm_nattch
++; /* prevent destruction */
763 err
= shm_map (shmd
);
764 shm_lock(shmid
); /* cannot fail */
768 insert_attach(shp
,shmd
); /* insert shmd into shp->attaches */
770 shp
->shm_lpid
= current
->pid
;
771 shp
->shm_atime
= CURRENT_TIME
;
778 up(¤t
->mm
->mmap_sem
);
784 if (--shp
->shm_nattch
<= 0 && shp
->shm_perm
.mode
& SHM_DEST
)
787 up(¤t
->mm
->mmap_sem
);
788 kmem_cache_free(vm_area_cachep
, shmd
);
795 /* This is called by fork, once for every shm attach. */
796 static void shm_open (struct vm_area_struct
*shmd
)
798 struct shmid_kernel
*shp
;
800 shp
= (struct shmid_kernel
*) shmd
->vm_private_data
;
801 if(shp
!= shm_lock(shp
->id
))
803 insert_attach(shp
,shmd
); /* insert shmd into shp->attaches */
805 shp
->shm_atime
= CURRENT_TIME
;
806 shp
->shm_lpid
= current
->pid
;
811 * remove the attach descriptor shmd.
812 * free memory for segment if it is marked destroyed.
813 * The descriptor has already been removed from the current->mm->mmap list
814 * and will later be kfree()d.
816 static void shm_close (struct vm_area_struct
*shmd
)
818 struct shmid_kernel
*shp
;
821 /* remove from the list of attaches of the shm segment */
822 shp
= (struct shmid_kernel
*) shmd
->vm_private_data
;
823 if(shp
!= shm_lock(shp
->id
))
825 remove_attach(shp
,shmd
); /* remove from shp->attaches */
826 shp
->shm_lpid
= current
->pid
;
827 shp
->shm_dtime
= CURRENT_TIME
;
829 if (--shp
->shm_nattch
<= 0 && shp
->shm_perm
.mode
& SHM_DEST
)
837 * detach and kill segment if marked destroyed.
838 * The work is done in shm_close.
840 asmlinkage
long sys_shmdt (char *shmaddr
)
842 struct vm_area_struct
*shmd
, *shmdnext
;
844 down(¤t
->mm
->mmap_sem
);
845 for (shmd
= current
->mm
->mmap
; shmd
; shmd
= shmdnext
) {
846 shmdnext
= shmd
->vm_next
;
847 if (shmd
->vm_ops
== &shm_vm_ops
848 && shmd
->vm_start
- (shmd
->vm_pgoff
<< PAGE_SHIFT
) == (ulong
) shmaddr
)
849 do_munmap(shmd
->vm_start
, shmd
->vm_end
- shmd
->vm_start
);
851 up(¤t
->mm
->mmap_sem
);
856 * Enter the shm page into the SHM data structures.
858 * The way "nopage" is done, we don't actually have to
859 * do anything here: nopage will have filled in the shm
860 * data structures already, and shm_swap_out() will just
863 static int shm_swapout(struct page
* page
, struct file
*file
)
869 * page not present ... go through shm_dir
871 static struct page
* shm_nopage(struct vm_area_struct
* shmd
, unsigned long address
, int no_share
)
874 struct shmid_kernel
*shp
;
879 shp
= (struct shmid_kernel
*) shmd
->vm_private_data
;
880 idx
= (address
- shmd
->vm_start
) >> PAGE_SHIFT
;
881 idx
+= shmd
->vm_pgoff
;
882 is_shmzero
= (shp
->id
== zero_id
);
885 * A shared mapping past the last page of the file is an error
886 * and results in a SIGBUS, so logically a shared mapping past
887 * the end of a shared memory segment should result in SIGBUS
890 if (idx
>= shp
->shm_npages
) {
894 if ((shp
!= shm_lock(shp
->id
)) && (is_shmzero
== 0))
897 pte
= SHM_ENTRY(shp
,idx
);
898 if (!pte_present(pte
)) {
899 /* page not present so shm_swap can't race with us
900 and the semaphore protects us by other tasks that
901 could potentially fault on our pte under us */
904 page
= alloc_page(GFP_HIGHUSER
);
907 clear_highpage(page
);
908 if ((shp
!= shm_lock(shp
->id
)) && (is_shmzero
== 0))
911 swp_entry_t entry
= pte_to_swp_entry(pte
);
914 page
= lookup_swap_cache(entry
);
917 swapin_readahead(entry
);
918 page
= read_swap_cache(entry
);
923 delete_from_swap_cache(page
);
924 page
= replace_with_highmem(page
);
926 if ((shp
!= shm_lock(shp
->id
)) && (is_shmzero
== 0))
928 if (is_shmzero
== 0) shm_swp
--;
930 if (is_shmzero
== 0) shm_rss
++;
931 pte
= pte_mkdirty(mk_pte(page
, PAGE_SHARED
));
932 SHM_ENTRY(shp
, idx
) = pte
;
934 --current
->maj_flt
; /* was incremented in do_no_page */
936 /* pte_val(pte) == SHM_ENTRY (shp, idx) */
937 get_page(pte_page(pte
));
941 return pte_page(pte
);
952 static int shm_swap_core(struct shmid_kernel
*shp
, unsigned long idx
, swp_entry_t swap_entry
, zone_t
*zone
, int *counter
, struct page
**outpage
)
955 struct page
*page_map
;
957 page
= SHM_ENTRY(shp
, idx
);
958 if (!pte_present(page
))
960 page_map
= pte_page(page
);
961 if (zone
&& (!memclass(page_map
->zone
, zone
)))
963 if (shp
->id
!= zero_id
) swap_attempts
++;
965 if (--counter
< 0) /* failed */
967 if (page_count(page_map
) != 1)
970 if (!(page_map
= prepare_highmem_swapout(page_map
)))
972 SHM_ENTRY (shp
, idx
) = swp_entry_to_pte(swap_entry
);
974 /* add the locked page to the swap cache before allowing
975 the swapin path to run lookup_swap_cache(). This avoids
976 reading a not yet uptodate block from disk.
977 NOTE: we just accounted the swap space reference for this
978 swap cache page at __get_swap_page() time. */
979 add_to_swap_cache(*outpage
= page_map
, swap_entry
);
983 static void shm_swap_postop(struct page
*page
)
986 rw_swap_page(WRITE
, page
, 0);
991 static int shm_swap_preop(swp_entry_t
*swap_entry
)
994 /* subtle: preload the swap count for the swap cache. We can't
995 increase the count inside the critical section as we can't release
996 the shm_lock there. And we can't acquire the big lock with the
997 shm_lock held (otherwise we would deadlock too easily). */
998 *swap_entry
= __get_swap_page(2);
999 if (!(*swap_entry
).val
) {
1008 * Goes through counter = (shm_rss >> prio) present shm pages.
1010 static unsigned long swap_id
= 0; /* currently being swapped */
1011 static unsigned long swap_idx
= 0; /* next to swap */
1013 int shm_swap (int prio
, int gfp_mask
, zone_t
*zone
)
1015 struct shmid_kernel
*shp
;
1016 swp_entry_t swap_entry
;
1017 unsigned long id
, idx
;
1020 struct page
* page_map
;
1022 zshm_swap(prio
, gfp_mask
, zone
);
1023 counter
= shm_rss
>> prio
;
1026 if (shm_swap_preop(&swap_entry
))
1031 shp
= shm_get(swap_id
);
1032 if(shp
==NULL
|| shp
->shm_perm
.mode
& SHM_LOCKED
) {
1035 if (++swap_id
> shm_ids
.max_id
) {
1040 __swap_free(swap_entry
, 2);
1051 if (idx
>= shp
->shm_npages
)
1054 switch (shm_swap_core(shp
, idx
, swap_entry
, zone
, &counter
, &page_map
)) {
1055 case RETRY
: goto check_table
;
1056 case FAILED
: goto failed
;
1063 shm_swap_postop(page_map
);
1068 * Free the swap entry and set the new pte for the shm page.
1070 static void shm_unuse_page(struct shmid_kernel
*shp
, unsigned long idx
,
1071 swp_entry_t entry
, struct page
*page
)
1075 pte
= pte_mkdirty(mk_pte(page
, PAGE_SHARED
));
1076 SHM_ENTRY(shp
, idx
) = pte
;
1085 static int shm_unuse_core(struct shmid_kernel
*shp
, swp_entry_t entry
, struct page
*page
)
1089 for (n
= 0; n
< shp
->shm_npages
; n
++) {
1090 if (pte_none(SHM_ENTRY(shp
,n
)))
1092 if (pte_present(SHM_ENTRY(shp
,n
)))
1094 if (pte_to_swp_entry(SHM_ENTRY(shp
,n
)).val
== entry
.val
) {
1095 shm_unuse_page(shp
, n
, entry
, page
);
1103 * unuse_shm() search for an eventually swapped out shm page.
1105 void shm_unuse(swp_entry_t entry
, struct page
*page
)
1110 for (i
= 0; i
<= shm_ids
.max_id
; i
++) {
1111 struct shmid_kernel
*shp
= shm_get(i
);
1114 if (shm_unuse_core(shp
, entry
, page
))
1119 zmap_unuse(entry
, page
);
1122 #ifdef CONFIG_PROC_FS
1123 static int sysvipc_shm_read_proc(char *buffer
, char **start
, off_t offset
, int length
, int *eof
, void *data
)
1130 len
+= sprintf(buffer
, " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime\n");
1132 for(i
= 0; i
<= shm_ids
.max_id
; i
++) {
1133 struct shmid_kernel
* shp
= shm_lock(i
);
1134 if (shp
== &zshmid_kernel
) {
1139 #define SMALL_STRING "%10d %10d %4o %10u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n"
1140 #define BIG_STRING "%10d %10d %4o %21u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n"
1143 if (sizeof(size_t) <= sizeof(int))
1144 format
= SMALL_STRING
;
1146 format
= BIG_STRING
;
1147 len
+= sprintf(buffer
+ len
, format
,
1149 shm_buildid(i
, shp
->shm_perm
.seq
),
1169 if(pos
> offset
+ length
)
1176 *start
= buffer
+ (offset
- begin
);
1177 len
-= (offset
- begin
);
1186 static struct shmid_kernel
*zmap_list
= 0;
1187 static spinlock_t zmap_list_lock
= SPIN_LOCK_UNLOCKED
;
1188 static unsigned long zswap_idx
= 0; /* next to swap */
1189 static struct shmid_kernel
*zswap_shp
= 0;
1191 static struct vm_operations_struct shmzero_vm_ops
= {
1193 close
: shmzero_close
,
1195 swapout
: shm_swapout
,
1198 int map_zero_setup(struct vm_area_struct
*vma
)
1200 struct shmid_kernel
*shp
;
1202 if (!(shp
= newseg_alloc((vma
->vm_end
- vma
->vm_start
) / PAGE_SIZE
)))
1204 shp
->id
= zero_id
; /* hack for shm_lock et al */
1205 vma
->vm_private_data
= shp
;
1206 vma
->vm_ops
= &shmzero_vm_ops
;
1208 spin_lock(&zmap_list_lock
);
1209 shp
->attaches
= (struct vm_area_struct
*)zmap_list
;
1211 spin_unlock(&zmap_list_lock
);
1215 static void shmzero_open(struct vm_area_struct
*shmd
)
1217 struct shmid_kernel
*shp
;
1219 shp
= (struct shmid_kernel
*) shmd
->vm_private_data
;
1225 static void shmzero_close(struct vm_area_struct
*shmd
)
1228 struct shmid_kernel
*shp
, *prev
, *cur
;
1230 shp
= (struct shmid_kernel
*) shmd
->vm_private_data
;
1232 if (--shp
->shm_nattch
== 0)
1236 spin_lock(&zmap_list_lock
);
1237 if (shp
== zswap_shp
)
1238 zswap_shp
= (struct shmid_kernel
*)(shp
->attaches
);
1239 if (shp
== zmap_list
)
1240 zmap_list
= (struct shmid_kernel
*)(shp
->attaches
);
1243 cur
= (struct shmid_kernel
*)(prev
->attaches
);
1244 while (cur
!= shp
) {
1246 cur
= (struct shmid_kernel
*)(prev
->attaches
);
1248 prev
->attaches
= (struct vm_area_struct
*)(shp
->attaches
);
1250 spin_unlock(&zmap_list_lock
);
1251 killseg_core(shp
, 0);
1255 static void zmap_unuse(swp_entry_t entry
, struct page
*page
)
1257 struct shmid_kernel
*shp
;
1259 spin_lock(&zmap_list_lock
);
1262 if (shm_unuse_core(shp
, entry
, page
))
1264 shp
= (struct shmid_kernel
*)shp
->attaches
;
1266 spin_unlock(&zmap_list_lock
);
1269 static void zshm_swap (int prio
, int gfp_mask
, zone_t
*zone
)
1271 struct shmid_kernel
*shp
;
1272 swp_entry_t swap_entry
;
1276 struct page
* page_map
;
1278 counter
= 10; /* maybe we should use zshm_rss */
1282 if (shm_swap_preop(&swap_entry
))
1285 spin_lock(&zmap_list_lock
);
1289 if ((shp
= zswap_shp
) == 0) {
1292 spin_unlock(&zmap_list_lock
);
1293 __swap_free(swap_entry
, 2);
1296 zswap_shp
= shp
= zmap_list
;
1303 if (idx
>= shp
->shm_npages
) {
1304 zswap_shp
= (struct shmid_kernel
*)(zswap_shp
->attaches
);
1309 switch (shm_swap_core(shp
, idx
, swap_entry
, zone
, &counter
, &page_map
)) {
1310 case RETRY
: goto check_table
;
1311 case FAILED
: goto failed
;
1313 spin_unlock(&zmap_list_lock
);
1315 shm_swap_postop(page_map
);