3 * Copyright (C) 1992, 1993 Krishna Balasubramanian
4 * Many improvements/fixes by Bruno Haible.
5 * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994.
6 * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli.
9 #include <linux/malloc.h>
10 #include <linux/shm.h>
11 #include <linux/swap.h>
12 #include <linux/smp_lock.h>
13 #include <linux/init.h>
14 #include <linux/vmalloc.h>
16 #include <asm/uaccess.h>
17 #include <asm/pgtable.h>
19 extern int ipcperms (struct ipc_perm
*ipcp
, short shmflg
);
20 extern unsigned long get_swap_page (void);
21 static int findkey (key_t key
);
22 static int newseg (key_t key
, int shmflg
, int size
);
23 static int shm_map (struct vm_area_struct
*shmd
);
24 static void killseg (int id
);
25 static void shm_open (struct vm_area_struct
*shmd
);
26 static void shm_close (struct vm_area_struct
*shmd
);
27 static unsigned long shm_nopage(struct vm_area_struct
*, unsigned long, int);
28 static int shm_swapout(struct vm_area_struct
*, struct page
*);
30 static int shm_tot
= 0; /* total number of shared memory pages */
31 static int shm_rss
= 0; /* number of shared memory pages that are in memory */
32 static int shm_swp
= 0; /* number of shared memory pages that are in swap */
33 static int max_shmid
= 0; /* every used id is <= max_shmid */
34 static DECLARE_WAIT_QUEUE_HEAD(shm_lock
); /* calling findkey() may need to wait */
35 static struct shmid_kernel
*shm_segs
[SHMMNI
];
37 static unsigned short shm_seq
= 0; /* incremented, for recognizing stale ids */
40 static ulong swap_attempts
= 0;
41 static ulong swap_successes
= 0;
42 static ulong used_segs
= 0;
44 void __init
shm_init (void)
48 for (id
= 0; id
< SHMMNI
; id
++)
49 shm_segs
[id
] = (struct shmid_kernel
*) IPC_UNUSED
;
50 shm_tot
= shm_rss
= shm_seq
= max_shmid
= used_segs
= 0;
51 init_waitqueue_head(&shm_lock
);
55 static int findkey (key_t key
)
58 struct shmid_kernel
*shp
;
60 for (id
= 0; id
<= max_shmid
; id
++) {
61 while ((shp
= shm_segs
[id
]) == IPC_NOID
)
63 if (shp
== IPC_UNUSED
)
65 if (key
== shp
->u
.shm_perm
.key
)
72 * allocate new shmid_kernel and pgtable. protected by shm_segs[id] = NOID.
74 static int newseg (key_t key
, int shmflg
, int size
)
76 struct shmid_kernel
*shp
;
77 int numpages
= (size
+ PAGE_SIZE
-1) >> PAGE_SHIFT
;
82 if (shm_tot
+ numpages
>= SHMALL
)
84 for (id
= 0; id
< SHMMNI
; id
++)
85 if (shm_segs
[id
] == IPC_UNUSED
) {
86 shm_segs
[id
] = (struct shmid_kernel
*) IPC_NOID
;
92 shp
= (struct shmid_kernel
*) kmalloc (sizeof (*shp
), GFP_KERNEL
);
94 shm_segs
[id
] = (struct shmid_kernel
*) IPC_UNUSED
;
99 shp
->shm_pages
= (ulong
*) vmalloc (numpages
*sizeof(ulong
));
100 if (!shp
->shm_pages
) {
101 shm_segs
[id
] = (struct shmid_kernel
*) IPC_UNUSED
;
107 for (i
= 0; i
< numpages
; shp
->shm_pages
[i
++] = 0);
109 shp
->u
.shm_perm
.key
= key
;
110 shp
->u
.shm_perm
.mode
= (shmflg
& S_IRWXUGO
);
111 shp
->u
.shm_perm
.cuid
= shp
->u
.shm_perm
.uid
= current
->euid
;
112 shp
->u
.shm_perm
.cgid
= shp
->u
.shm_perm
.gid
= current
->egid
;
113 shp
->u
.shm_perm
.seq
= shm_seq
;
114 shp
->u
.shm_segsz
= size
;
115 shp
->u
.shm_cpid
= current
->pid
;
116 shp
->attaches
= NULL
;
117 shp
->u
.shm_lpid
= shp
->u
.shm_nattch
= 0;
118 shp
->u
.shm_atime
= shp
->u
.shm_dtime
= 0;
119 shp
->u
.shm_ctime
= CURRENT_TIME
;
120 shp
->shm_npages
= numpages
;
127 return (unsigned int) shp
->u
.shm_perm
.seq
* SHMMNI
+ id
;
132 asmlinkage
int sys_shmget (key_t key
, int size
, int shmflg
)
134 struct shmid_kernel
*shp
;
137 down(¤t
->mm
->mmap_sem
);
139 if (size
< 0 || size
> shmmax
) {
141 } else if (key
== IPC_PRIVATE
) {
142 err
= newseg(key
, shmflg
, size
);
143 } else if ((id
= findkey (key
)) == -1) {
144 if (!(shmflg
& IPC_CREAT
))
147 err
= newseg(key
, shmflg
, size
);
148 } else if ((shmflg
& IPC_CREAT
) && (shmflg
& IPC_EXCL
)) {
152 if (shp
->u
.shm_perm
.mode
& SHM_DEST
)
154 else if (size
> shp
->u
.shm_segsz
)
156 else if (ipcperms (&shp
->u
.shm_perm
, shmflg
))
159 err
= (int) shp
->u
.shm_perm
.seq
* SHMMNI
+ id
;
162 up(¤t
->mm
->mmap_sem
);
167 * Only called after testing nattch and SHM_DEST.
168 * Here pages, pgtable and shmid_kernel are freed.
170 static void killseg (int id
)
172 struct shmid_kernel
*shp
;
176 if (shp
== IPC_NOID
|| shp
== IPC_UNUSED
) {
177 printk ("shm nono: killseg called on unused seg id=%d\n", id
);
180 shp
->u
.shm_perm
.seq
++; /* for shmat */
181 shm_seq
= (shm_seq
+1) % ((unsigned)(1<<31)/SHMMNI
); /* increment, but avoid overflow */
182 shm_segs
[id
] = (struct shmid_kernel
*) IPC_UNUSED
;
185 while (max_shmid
&& (shm_segs
[--max_shmid
] == IPC_UNUSED
));
186 if (!shp
->shm_pages
) {
187 printk ("shm nono: killseg shp->pages=NULL. id=%d\n", id
);
190 numpages
= shp
->shm_npages
;
191 for (i
= 0; i
< numpages
; i
++) {
193 pte
= __pte(shp
->shm_pages
[i
]);
196 if (pte_present(pte
)) {
197 free_page (pte_page(pte
));
200 swap_free(pte_val(pte
));
204 vfree(shp
->shm_pages
);
210 asmlinkage
int sys_shmctl (int shmid
, int cmd
, struct shmid_ds
*buf
)
212 struct shmid_ds tbuf
;
213 struct shmid_kernel
*shp
;
214 struct ipc_perm
*ipcp
;
215 int id
, err
= -EINVAL
;
218 if (cmd
< 0 || shmid
< 0)
220 if (cmd
== IPC_SET
) {
222 if(copy_from_user (&tbuf
, buf
, sizeof (*buf
)))
226 switch (cmd
) { /* replace with proc interface ? */
229 struct shminfo shminfo
;
233 shminfo
.shmmni
= SHMMNI
;
234 shminfo
.shmmax
= shmmax
;
235 shminfo
.shmmin
= SHMMIN
;
236 shminfo
.shmall
= SHMALL
;
237 shminfo
.shmseg
= SHMSEG
;
238 if(copy_to_user (buf
, &shminfo
, sizeof(struct shminfo
)))
245 struct shm_info shm_info
;
247 shm_info
.used_ids
= used_segs
;
248 shm_info
.shm_rss
= shm_rss
;
249 shm_info
.shm_tot
= shm_tot
;
250 shm_info
.shm_swp
= shm_swp
;
251 shm_info
.swap_attempts
= swap_attempts
;
252 shm_info
.swap_successes
= swap_successes
;
253 if(copy_to_user (buf
, &shm_info
, sizeof(shm_info
)))
260 if (shmid
> max_shmid
)
262 shp
= shm_segs
[shmid
];
263 if (shp
== IPC_UNUSED
|| shp
== IPC_NOID
)
265 if (ipcperms (&shp
->u
.shm_perm
, S_IRUGO
))
267 id
= (unsigned int) shp
->u
.shm_perm
.seq
* SHMMNI
+ shmid
;
269 if(copy_to_user (buf
, &shp
->u
, sizeof(*buf
)))
275 shp
= shm_segs
[id
= (unsigned int) shmid
% SHMMNI
];
277 if (shp
== IPC_UNUSED
|| shp
== IPC_NOID
)
280 if (shp
->u
.shm_perm
.seq
!= (unsigned int) shmid
/ SHMMNI
)
282 ipcp
= &shp
->u
.shm_perm
;
287 if (!capable(CAP_IPC_LOCK
))
290 if (!(ipcp
->mode
& SHM_LOCKED
))
292 ipcp
->mode
&= ~SHM_LOCKED
;
295 /* Allow superuser to lock segment in memory */
296 /* Should the pages be faulted in here or leave it to user? */
297 /* need to determine interaction with current->swappable */
299 if (!capable(CAP_IPC_LOCK
))
302 if (ipcp
->mode
& SHM_LOCKED
)
304 ipcp
->mode
|= SHM_LOCKED
;
308 if (ipcperms (ipcp
, S_IRUGO
))
311 if(copy_to_user (buf
, &shp
->u
, sizeof(shp
->u
)))
315 if (current
->euid
== shp
->u
.shm_perm
.uid
||
316 current
->euid
== shp
->u
.shm_perm
.cuid
||
317 capable(CAP_SYS_ADMIN
)) {
318 ipcp
->uid
= tbuf
.shm_perm
.uid
;
319 ipcp
->gid
= tbuf
.shm_perm
.gid
;
320 ipcp
->mode
= (ipcp
->mode
& ~S_IRWXUGO
)
321 | (tbuf
.shm_perm
.mode
& S_IRWXUGO
);
322 shp
->u
.shm_ctime
= CURRENT_TIME
;
328 if (current
->euid
== shp
->u
.shm_perm
.uid
||
329 current
->euid
== shp
->u
.shm_perm
.cuid
||
330 capable(CAP_SYS_ADMIN
)) {
331 shp
->u
.shm_perm
.mode
|= SHM_DEST
;
332 if (shp
->u
.shm_nattch
<= 0)
349 * The per process internal structure for managing segments is
350 * `struct vm_area_struct'.
351 * A shmat will add to and shmdt will remove from the list.
352 * shmd->vm_mm the attacher
353 * shmd->vm_start virt addr of attach, multiple of SHMLBA
354 * shmd->vm_end multiple of SHMLBA
355 * shmd->vm_next next attach for task
356 * shmd->vm_next_share next attach for segment
357 * shmd->vm_offset offset into segment
358 * shmd->vm_pte signature for this attach
361 static struct vm_operations_struct shm_vm_ops
= {
362 shm_open
, /* open - callback for a new vm-area open */
363 shm_close
, /* close - callback for when the vm-area is released */
364 NULL
, /* no need to sync pages at unmap */
368 shm_nopage
, /* nopage */
370 shm_swapout
, /* swapout */
374 /* Insert shmd into the list shp->attaches */
375 static inline void insert_attach (struct shmid_kernel
* shp
, struct vm_area_struct
* shmd
)
377 if((shmd
->vm_next_share
= shp
->attaches
) != NULL
)
378 shp
->attaches
->vm_pprev_share
= &shmd
->vm_next_share
;
379 shp
->attaches
= shmd
;
380 shmd
->vm_pprev_share
= &shp
->attaches
;
383 /* Remove shmd from list shp->attaches */
384 static inline void remove_attach (struct shmid_kernel
* shp
, struct vm_area_struct
* shmd
)
386 if(shmd
->vm_next_share
)
387 shmd
->vm_next_share
->vm_pprev_share
= shmd
->vm_pprev_share
;
388 *shmd
->vm_pprev_share
= shmd
->vm_next_share
;
392 * ensure page tables exist
393 * mark page table entries with shm_sgn.
395 static int shm_map (struct vm_area_struct
*shmd
)
399 /* clear old mappings */
400 do_munmap(shmd
->vm_start
, shmd
->vm_end
- shmd
->vm_start
);
402 /* add new mapping */
403 tmp
= shmd
->vm_end
- shmd
->vm_start
;
404 if((current
->mm
->total_vm
<< PAGE_SHIFT
) + tmp
405 > (unsigned long) current
->rlim
[RLIMIT_AS
].rlim_cur
)
407 current
->mm
->total_vm
+= tmp
>> PAGE_SHIFT
;
408 insert_vm_struct(current
->mm
, shmd
);
409 merge_segments(current
->mm
, shmd
->vm_start
, shmd
->vm_end
);
415 * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists.
417 asmlinkage
int sys_shmat (int shmid
, char *shmaddr
, int shmflg
, ulong
*raddr
)
419 struct shmid_kernel
*shp
;
420 struct vm_area_struct
*shmd
;
426 down(¤t
->mm
->mmap_sem
);
429 /* printk("shmat() -> EINVAL because shmid = %d < 0\n",shmid); */
433 shp
= shm_segs
[id
= (unsigned int) shmid
% SHMMNI
];
434 if (shp
== IPC_UNUSED
|| shp
== IPC_NOID
) {
435 /* printk("shmat() -> EINVAL because shmid = %d is invalid\n",shmid); */
439 if (!(addr
= (ulong
) shmaddr
)) {
440 if (shmflg
& SHM_REMAP
)
445 if (!(addr
= get_unmapped_area(addr
, shp
->u
.shm_segsz
)))
447 if(addr
& (SHMLBA
- 1)) {
448 addr
= (addr
+ (SHMLBA
- 1)) & ~(SHMLBA
- 1);
451 } else if (addr
& (SHMLBA
-1)) {
452 if (shmflg
& SHM_RND
)
453 addr
&= ~(SHMLBA
-1); /* round down */
458 * Check if addr exceeds TASK_SIZE (from do_mmap)
460 len
= PAGE_SIZE
*shp
->shm_npages
;
462 if (addr
>= TASK_SIZE
|| len
> TASK_SIZE
|| addr
> TASK_SIZE
- len
)
465 * If shm segment goes below stack, make sure there is some
466 * space left for the stack to grow (presently 4 pages).
468 if (addr
< current
->mm
->start_stack
&&
469 addr
> current
->mm
->start_stack
- PAGE_SIZE
*(shp
->shm_npages
+ 4))
471 /* printk("shmat() -> EINVAL because segment intersects stack\n"); */
474 if (!(shmflg
& SHM_REMAP
))
475 if ((shmd
= find_vma_intersection(current
->mm
, addr
, addr
+ shp
->u
.shm_segsz
))) {
476 /* printk("shmat() -> EINVAL because the interval [0x%lx,0x%lx) intersects an already mapped interval [0x%lx,0x%lx).\n",
477 addr, addr + shp->shm_segsz, shmd->vm_start, shmd->vm_end); */
482 if (ipcperms(&shp
->u
.shm_perm
, shmflg
& SHM_RDONLY
? S_IRUGO
: S_IRUGO
|S_IWUGO
))
485 if (shp
->u
.shm_perm
.seq
!= (unsigned int) shmid
/ SHMMNI
)
489 shmd
= kmem_cache_alloc(vm_area_cachep
, SLAB_KERNEL
);
492 if ((shp
!= shm_segs
[id
]) || (shp
->u
.shm_perm
.seq
!= (unsigned int) shmid
/ SHMMNI
)) {
493 kmem_cache_free(vm_area_cachep
, shmd
);
498 shmd
->vm_pte
= SWP_ENTRY(SHM_SWP_TYPE
, id
);
499 shmd
->vm_start
= addr
;
500 shmd
->vm_end
= addr
+ shp
->shm_npages
* PAGE_SIZE
;
501 shmd
->vm_mm
= current
->mm
;
502 shmd
->vm_page_prot
= (shmflg
& SHM_RDONLY
) ? PAGE_READONLY
: PAGE_SHARED
;
503 shmd
->vm_flags
= VM_SHM
| VM_MAYSHARE
| VM_SHARED
504 | VM_MAYREAD
| VM_MAYEXEC
| VM_READ
| VM_EXEC
505 | ((shmflg
& SHM_RDONLY
) ? 0 : VM_MAYWRITE
| VM_WRITE
);
506 shmd
->vm_file
= NULL
;
508 shmd
->vm_ops
= &shm_vm_ops
;
510 shp
->u
.shm_nattch
++; /* prevent destruction */
511 if ((err
= shm_map (shmd
))) {
512 if (--shp
->u
.shm_nattch
<= 0 && shp
->u
.shm_perm
.mode
& SHM_DEST
)
514 kmem_cache_free(vm_area_cachep
, shmd
);
518 insert_attach(shp
,shmd
); /* insert shmd into shp->attaches */
520 shp
->u
.shm_lpid
= current
->pid
;
521 shp
->u
.shm_atime
= CURRENT_TIME
;
527 up(¤t
->mm
->mmap_sem
);
531 /* This is called by fork, once for every shm attach. */
532 static void shm_open (struct vm_area_struct
*shmd
)
535 struct shmid_kernel
*shp
;
537 id
= SWP_OFFSET(shmd
->vm_pte
) & SHM_ID_MASK
;
539 if (shp
== IPC_UNUSED
) {
540 printk("shm_open: unused id=%d PANIC\n", id
);
543 insert_attach(shp
,shmd
); /* insert shmd into shp->attaches */
545 shp
->u
.shm_atime
= CURRENT_TIME
;
546 shp
->u
.shm_lpid
= current
->pid
;
550 * remove the attach descriptor shmd.
551 * free memory for segment if it is marked destroyed.
552 * The descriptor has already been removed from the current->mm->mmap list
553 * and will later be kfree()d.
555 static void shm_close (struct vm_area_struct
*shmd
)
557 struct shmid_kernel
*shp
;
560 /* remove from the list of attaches of the shm segment */
561 id
= SWP_OFFSET(shmd
->vm_pte
) & SHM_ID_MASK
;
563 remove_attach(shp
,shmd
); /* remove from shp->attaches */
564 shp
->u
.shm_lpid
= current
->pid
;
565 shp
->u
.shm_dtime
= CURRENT_TIME
;
566 if (--shp
->u
.shm_nattch
<= 0 && shp
->u
.shm_perm
.mode
& SHM_DEST
)
571 * detach and kill segment if marked destroyed.
572 * The work is done in shm_close.
574 asmlinkage
int sys_shmdt (char *shmaddr
)
576 struct vm_area_struct
*shmd
, *shmdnext
;
578 down(¤t
->mm
->mmap_sem
);
580 for (shmd
= current
->mm
->mmap
; shmd
; shmd
= shmdnext
) {
581 shmdnext
= shmd
->vm_next
;
582 if (shmd
->vm_ops
== &shm_vm_ops
583 && shmd
->vm_start
- shmd
->vm_offset
== (ulong
) shmaddr
)
584 do_munmap(shmd
->vm_start
, shmd
->vm_end
- shmd
->vm_start
);
587 up(¤t
->mm
->mmap_sem
);
592 * Enter the shm page into the SHM data structures.
594 * The way "nopage" is done, we don't actually have to
595 * do anything here: nopage will have filled in the shm
596 * data structures already, and shm_swap_out() will just
599 static int shm_swapout(struct vm_area_struct
* vma
, struct page
* page
)
605 * page not present ... go through shm_pages
607 static unsigned long shm_nopage(struct vm_area_struct
* shmd
, unsigned long address
, int no_share
)
610 struct shmid_kernel
*shp
;
611 unsigned int id
, idx
;
613 id
= SWP_OFFSET(shmd
->vm_pte
) & SHM_ID_MASK
;
614 idx
= (address
- shmd
->vm_start
+ shmd
->vm_offset
) >> PAGE_SHIFT
;
617 if (id
> max_shmid
) {
618 printk ("shm_nopage: id=%d too big. proc mem corrupted\n", id
);
625 if (shp
== IPC_UNUSED
|| shp
== IPC_NOID
) {
626 printk ("shm_nopage: id=%d invalid. Race.\n", id
);
629 if (idx
>= shp
->shm_npages
) {
630 printk ("shm_nopage : too large page index. id=%d\n", id
);
635 pte
= __pte(shp
->shm_pages
[idx
]);
636 if (!pte_present(pte
)) {
637 unsigned long page
= get_free_page(GFP_USER
);
642 pte
= __pte(shp
->shm_pages
[idx
]);
643 if (pte_present(pte
)) {
644 free_page (page
); /* doesn't sleep */
647 if (!pte_none(pte
)) {
648 rw_swap_page_nocache(READ
, pte_val(pte
), (char *)page
);
649 pte
= __pte(shp
->shm_pages
[idx
]);
650 if (pte_present(pte
)) {
651 free_page (page
); /* doesn't sleep */
654 swap_free(pte_val(pte
));
658 pte
= pte_mkdirty(mk_pte(page
, PAGE_SHARED
));
659 shp
->shm_pages
[idx
] = pte_val(pte
);
661 --current
->maj_flt
; /* was incremented in do_no_page */
663 done
: /* pte_val(pte) == shp->shm_pages[idx] */
665 atomic_inc(&mem_map
[MAP_NR(pte_page(pte
))].count
);
666 return pte_page(pte
);
670 * Goes through counter = (shm_rss >> prio) present shm pages.
672 static unsigned long swap_id
= 0; /* currently being swapped */
673 static unsigned long swap_idx
= 0; /* next to swap */
675 int shm_swap (int prio
, int gfp_mask
)
678 struct shmid_kernel
*shp
;
679 unsigned long swap_nr
;
680 unsigned long id
, idx
;
684 counter
= shm_rss
>> prio
;
685 if (!counter
|| !(swap_nr
= get_swap_page()))
689 shp
= shm_segs
[swap_id
];
690 if (shp
== IPC_UNUSED
|| shp
== IPC_NOID
|| shp
->u
.shm_perm
.mode
& SHM_LOCKED
) {
693 if (++swap_id
> max_shmid
) {
705 if (idx
>= shp
->shm_npages
)
708 page
= __pte(shp
->shm_pages
[idx
]);
709 if (!pte_present(page
))
711 if ((gfp_mask
& __GFP_DMA
) && !PageDMA(&mem_map
[MAP_NR(pte_page(page
))]))
715 if (--counter
< 0) { /* failed */
720 if (atomic_read(&mem_map
[MAP_NR(pte_page(page
))].count
) != 1)
722 shp
->shm_pages
[idx
] = swap_nr
;
723 rw_swap_page_nocache (WRITE
, swap_nr
, (char *) pte_page(page
));
724 free_page(pte_page(page
));
732 * Free the swap entry and set the new pte for the shm page.
734 static void shm_unuse_page(struct shmid_kernel
*shp
, unsigned long idx
,
735 unsigned long page
, unsigned long entry
)
739 pte
= pte_mkdirty(mk_pte(page
, PAGE_SHARED
));
740 shp
->shm_pages
[idx
] = pte_val(pte
);
741 atomic_inc(&mem_map
[MAP_NR(page
)].count
);
749 * unuse_shm() search for an eventually swapped out shm page.
751 void shm_unuse(unsigned long entry
, unsigned long page
)
755 for (i
= 0; i
< SHMMNI
; i
++)
756 if (shm_segs
[i
] != IPC_UNUSED
&& shm_segs
[i
] != IPC_NOID
)
757 for (n
= 0; n
< shm_segs
[i
]->shm_npages
; n
++)
758 if (shm_segs
[i
]->shm_pages
[n
] == entry
)
760 shm_unuse_page(shm_segs
[i
], n
,