Cleanup syscall code to look more like it's mips64 equivalent.
[linux-2.6/linux-mips.git] / mm / shmem.c
blob7d26873092b916d6bc38e275fa4de3d9d8d26af6
1 /*
2 * Resizable virtual memory filesystem for Linux.
4 * Copyright (C) 2000 Linus Torvalds.
5 * 2000 Transmeta Corp.
6 * 2000-2001 Christoph Rohland
7 * 2000-2001 SAP AG
8 * 2002 Red Hat Inc.
9 * Copyright (C) 2002-2003 Hugh Dickins.
10 * Copyright (C) 2002-2003 VERITAS Software Corporation.
12 * This file is released under the GPL.
16 * This virtual memory filesystem is heavily based on the ramfs. It
17 * extends ramfs by the ability to use swap and honor resource limits
18 * which makes it a completely usable filesystem.
21 #include <linux/config.h>
22 #include <linux/module.h>
23 #include <linux/init.h>
24 #include <linux/devfs_fs_kernel.h>
25 #include <linux/fs.h>
26 #include <linux/mm.h>
27 #include <linux/mman.h>
28 #include <linux/file.h>
29 #include <linux/swap.h>
30 #include <linux/pagemap.h>
31 #include <linux/string.h>
32 #include <linux/slab.h>
33 #include <linux/backing-dev.h>
34 #include <linux/shmem_fs.h>
35 #include <linux/mount.h>
36 #include <linux/writeback.h>
37 #include <linux/vfs.h>
38 #include <linux/blkdev.h>
39 #include <linux/security.h>
40 #include <asm/uaccess.h>
41 #include <asm/div64.h>
43 /* This magic number is used in glibc for posix shared memory */
44 #define TMPFS_MAGIC 0x01021994
46 #define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long))
47 #define ENTRIES_PER_PAGEPAGE (ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
48 #define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512)
50 #define SHMEM_MAX_INDEX (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1))
51 #define SHMEM_MAX_BYTES ((unsigned long long)SHMEM_MAX_INDEX << PAGE_CACHE_SHIFT)
53 #define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT)
55 /* Pretend that each entry is of this size in directory's i_size */
56 #define BOGO_DIRENT_SIZE 20
58 /* Keep swapped page count in private field of indirect struct page */
59 #define nr_swapped private
61 /* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */
62 enum sgp_type {
63 SGP_QUICK, /* don't try more than file page cache lookup */
64 SGP_READ, /* don't exceed i_size, don't allocate page */
65 SGP_CACHE, /* don't exceed i_size, may allocate page */
66 SGP_WRITE, /* may exceed i_size, may allocate page */
69 static int shmem_getpage(struct inode *inode, unsigned long idx,
70 struct page **pagep, enum sgp_type sgp);
72 static inline struct page *shmem_dir_alloc(unsigned int gfp_mask)
75 * The above definition of ENTRIES_PER_PAGE, and the use of
76 * BLOCKS_PER_PAGE on indirect pages, assume PAGE_CACHE_SIZE:
77 * might be reconsidered if it ever diverges from PAGE_SIZE.
79 return alloc_pages(gfp_mask, PAGE_CACHE_SHIFT-PAGE_SHIFT);
82 static inline void shmem_dir_free(struct page *page)
84 __free_pages(page, PAGE_CACHE_SHIFT-PAGE_SHIFT);
87 static struct page **shmem_dir_map(struct page *page)
89 return (struct page **)kmap_atomic(page, KM_USER0);
92 static inline void shmem_dir_unmap(struct page **dir)
94 kunmap_atomic(dir, KM_USER0);
97 static swp_entry_t *shmem_swp_map(struct page *page)
100 * We have to avoid the unconditional inc_preempt_count()
101 * in kmap_atomic(), since shmem_swp_unmap() will also be
102 * applied to the low memory addresses within i_direct[].
103 * PageHighMem and high_memory tests are good for all arches
104 * and configs: highmem_start_page and FIXADDR_START are not.
106 return PageHighMem(page)?
107 (swp_entry_t *)kmap_atomic(page, KM_USER1):
108 (swp_entry_t *)page_address(page);
111 static inline void shmem_swp_unmap(swp_entry_t *entry)
113 if (entry >= (swp_entry_t *)high_memory)
114 kunmap_atomic(entry, KM_USER1);
117 static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
119 return sb->s_fs_info;
122 static struct super_operations shmem_ops;
123 static struct address_space_operations shmem_aops;
124 static struct file_operations shmem_file_operations;
125 static struct inode_operations shmem_inode_operations;
126 static struct inode_operations shmem_dir_inode_operations;
127 static struct vm_operations_struct shmem_vm_ops;
129 static struct backing_dev_info shmem_backing_dev_info = {
130 .ra_pages = 0, /* No readahead */
131 .memory_backed = 1, /* Does not contribute to dirty memory */
134 LIST_HEAD(shmem_inodes);
135 static spinlock_t shmem_ilock = SPIN_LOCK_UNLOCKED;
137 static void shmem_free_block(struct inode *inode)
139 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
140 spin_lock(&sbinfo->stat_lock);
141 sbinfo->free_blocks++;
142 inode->i_blocks -= BLOCKS_PER_PAGE;
143 spin_unlock(&sbinfo->stat_lock);
147 * shmem_recalc_inode - recalculate the size of an inode
149 * @inode: inode to recalc
151 * We have to calculate the free blocks since the mm can drop
152 * undirtied hole pages behind our back.
154 * But normally info->alloced == inode->i_mapping->nrpages + info->swapped
155 * So mm freed is info->alloced - (inode->i_mapping->nrpages + info->swapped)
157 * It has to be called with the spinlock held.
159 static void shmem_recalc_inode(struct inode *inode)
161 struct shmem_inode_info *info = SHMEM_I(inode);
162 long freed;
164 freed = info->alloced - info->swapped - inode->i_mapping->nrpages;
165 if (freed > 0) {
166 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
167 info->alloced -= freed;
168 spin_lock(&sbinfo->stat_lock);
169 sbinfo->free_blocks += freed;
170 inode->i_blocks -= freed*BLOCKS_PER_PAGE;
171 spin_unlock(&sbinfo->stat_lock);
176 * shmem_swp_entry - find the swap vector position in the info structure
178 * @info: info structure for the inode
179 * @index: index of the page to find
180 * @page: optional page to add to the structure. Has to be preset to
181 * all zeros
183 * If there is no space allocated yet it will return NULL when
184 * page is NULL, else it will use the page for the needed block,
185 * setting it to NULL on return to indicate that it has been used.
187 * The swap vector is organized the following way:
189 * There are SHMEM_NR_DIRECT entries directly stored in the
190 * shmem_inode_info structure. So small files do not need an addional
191 * allocation.
193 * For pages with index > SHMEM_NR_DIRECT there is the pointer
194 * i_indirect which points to a page which holds in the first half
195 * doubly indirect blocks, in the second half triple indirect blocks:
197 * For an artificial ENTRIES_PER_PAGE = 4 this would lead to the
198 * following layout (for SHMEM_NR_DIRECT == 16):
200 * i_indirect -> dir --> 16-19
201 * | +-> 20-23
203 * +-->dir2 --> 24-27
204 * | +-> 28-31
205 * | +-> 32-35
206 * | +-> 36-39
208 * +-->dir3 --> 40-43
209 * +-> 44-47
210 * +-> 48-51
211 * +-> 52-55
213 static swp_entry_t *shmem_swp_entry(struct shmem_inode_info *info, unsigned long index, struct page **page)
215 unsigned long offset;
216 struct page **dir;
217 struct page *subdir;
219 if (index < SHMEM_NR_DIRECT)
220 return info->i_direct+index;
221 if (!info->i_indirect) {
222 if (page) {
223 info->i_indirect = *page;
224 *page = NULL;
226 return NULL; /* need another page */
229 index -= SHMEM_NR_DIRECT;
230 offset = index % ENTRIES_PER_PAGE;
231 index /= ENTRIES_PER_PAGE;
232 dir = shmem_dir_map(info->i_indirect);
234 if (index >= ENTRIES_PER_PAGE/2) {
235 index -= ENTRIES_PER_PAGE/2;
236 dir += ENTRIES_PER_PAGE/2 + index/ENTRIES_PER_PAGE;
237 index %= ENTRIES_PER_PAGE;
238 subdir = *dir;
239 if (!subdir) {
240 if (page) {
241 *dir = *page;
242 *page = NULL;
244 shmem_dir_unmap(dir);
245 return NULL; /* need another page */
247 shmem_dir_unmap(dir);
248 dir = shmem_dir_map(subdir);
251 dir += index;
252 subdir = *dir;
253 if (!subdir) {
254 if (!page || !(subdir = *page)) {
255 shmem_dir_unmap(dir);
256 return NULL; /* need a page */
258 *dir = subdir;
259 *page = NULL;
261 shmem_dir_unmap(dir);
264 * With apologies... caller shmem_swp_alloc passes non-NULL
265 * page (though perhaps NULL *page); and now we know that this
266 * indirect page has been allocated, we can shortcut the final
267 * kmap if we know it contains no swap entries, as is commonly
268 * the case: return pointer to a 0 which doesn't need kmapping.
270 return (page && !subdir->nr_swapped)?
271 (swp_entry_t *)&subdir->nr_swapped:
272 shmem_swp_map(subdir) + offset;
275 static void shmem_swp_set(struct shmem_inode_info *info, swp_entry_t *entry, unsigned long value)
277 long incdec = value? 1: -1;
279 entry->val = value;
280 info->swapped += incdec;
281 if ((unsigned long)(entry - info->i_direct) >= SHMEM_NR_DIRECT)
282 kmap_atomic_to_page(entry)->nr_swapped += incdec;
286 * shmem_swp_alloc - get the position of the swap entry for the page.
287 * If it does not exist allocate the entry.
289 * @info: info structure for the inode
290 * @index: index of the page to find
291 * @sgp: check and recheck i_size? skip allocation?
293 static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long index, enum sgp_type sgp)
295 struct inode *inode = &info->vfs_inode;
296 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
297 struct page *page = NULL;
298 swp_entry_t *entry;
299 static const swp_entry_t unswapped = {0};
301 if (sgp != SGP_WRITE &&
302 ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode))
303 return ERR_PTR(-EINVAL);
305 while (!(entry = shmem_swp_entry(info, index, &page))) {
306 if (sgp == SGP_READ)
307 return (swp_entry_t *) &unswapped;
309 * Test free_blocks against 1 not 0, since we have 1 data
310 * page (and perhaps indirect index pages) yet to allocate:
311 * a waste to allocate index if we cannot allocate data.
313 spin_lock(&sbinfo->stat_lock);
314 if (sbinfo->free_blocks <= 1) {
315 spin_unlock(&sbinfo->stat_lock);
316 return ERR_PTR(-ENOSPC);
318 sbinfo->free_blocks--;
319 inode->i_blocks += BLOCKS_PER_PAGE;
320 spin_unlock(&sbinfo->stat_lock);
322 spin_unlock(&info->lock);
323 page = shmem_dir_alloc(inode->i_mapping->gfp_mask);
324 if (page) {
325 clear_highpage(page);
326 page->nr_swapped = 0;
328 spin_lock(&info->lock);
330 if (!page) {
331 shmem_free_block(inode);
332 return ERR_PTR(-ENOMEM);
334 if (sgp != SGP_WRITE &&
335 ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
336 entry = ERR_PTR(-EINVAL);
337 break;
339 if (info->next_index <= index)
340 info->next_index = index + 1;
342 if (page) {
343 /* another task gave its page, or truncated the file */
344 shmem_free_block(inode);
345 shmem_dir_free(page);
347 if (info->next_index <= index && !IS_ERR(entry))
348 info->next_index = index + 1;
349 return entry;
353 * shmem_free_swp - free some swap entries in a directory
355 * @dir: pointer to the directory
356 * @edir: pointer after last entry of the directory
358 static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir)
360 swp_entry_t *ptr;
361 int freed = 0;
363 for (ptr = dir; ptr < edir; ptr++) {
364 if (ptr->val) {
365 free_swap_and_cache(*ptr);
366 *ptr = (swp_entry_t){0};
367 freed++;
370 return freed;
373 static void shmem_truncate(struct inode *inode)
375 struct shmem_inode_info *info = SHMEM_I(inode);
376 unsigned long idx;
377 unsigned long size;
378 unsigned long limit;
379 unsigned long stage;
380 struct page **dir;
381 struct page *subdir;
382 struct page *empty;
383 swp_entry_t *ptr;
384 int offset;
385 int freed;
387 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
388 idx = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
389 if (idx >= info->next_index)
390 return;
392 spin_lock(&info->lock);
393 limit = info->next_index;
394 info->next_index = idx;
395 if (info->swapped && idx < SHMEM_NR_DIRECT) {
396 ptr = info->i_direct;
397 size = limit;
398 if (size > SHMEM_NR_DIRECT)
399 size = SHMEM_NR_DIRECT;
400 info->swapped -= shmem_free_swp(ptr+idx, ptr+size);
402 if (!info->i_indirect)
403 goto done2;
405 BUG_ON(limit <= SHMEM_NR_DIRECT);
406 limit -= SHMEM_NR_DIRECT;
407 idx = (idx > SHMEM_NR_DIRECT)? (idx - SHMEM_NR_DIRECT): 0;
408 offset = idx % ENTRIES_PER_PAGE;
409 idx -= offset;
411 empty = NULL;
412 dir = shmem_dir_map(info->i_indirect);
413 stage = ENTRIES_PER_PAGEPAGE/2;
414 if (idx < ENTRIES_PER_PAGEPAGE/2)
415 dir += idx/ENTRIES_PER_PAGE;
416 else {
417 dir += ENTRIES_PER_PAGE/2;
418 dir += (idx - ENTRIES_PER_PAGEPAGE/2)/ENTRIES_PER_PAGEPAGE;
419 while (stage <= idx)
420 stage += ENTRIES_PER_PAGEPAGE;
421 if (*dir) {
422 subdir = *dir;
423 size = ((idx - ENTRIES_PER_PAGEPAGE/2) %
424 ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE;
425 if (!size && !offset) {
426 empty = subdir;
427 *dir = NULL;
429 shmem_dir_unmap(dir);
430 dir = shmem_dir_map(subdir) + size;
431 } else {
432 offset = 0;
433 idx = stage;
437 for (; idx < limit; idx += ENTRIES_PER_PAGE, dir++) {
438 if (unlikely(idx == stage)) {
439 shmem_dir_unmap(dir-1);
440 dir = shmem_dir_map(info->i_indirect) +
441 ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE;
442 while (!*dir) {
443 dir++;
444 idx += ENTRIES_PER_PAGEPAGE;
445 if (idx >= limit)
446 goto done1;
448 stage = idx + ENTRIES_PER_PAGEPAGE;
449 subdir = *dir;
450 *dir = NULL;
451 shmem_dir_unmap(dir);
452 if (empty) {
453 shmem_dir_free(empty);
454 info->alloced++;
456 empty = subdir;
457 cond_resched_lock(&info->lock);
458 dir = shmem_dir_map(subdir);
460 subdir = *dir;
461 if (subdir && subdir->nr_swapped) {
462 ptr = shmem_swp_map(subdir);
463 size = limit - idx;
464 if (size > ENTRIES_PER_PAGE)
465 size = ENTRIES_PER_PAGE;
466 freed = shmem_free_swp(ptr+offset, ptr+size);
467 shmem_swp_unmap(ptr);
468 info->swapped -= freed;
469 subdir->nr_swapped -= freed;
470 BUG_ON(subdir->nr_swapped > offset);
472 if (offset)
473 offset = 0;
474 else if (subdir) {
475 *dir = NULL;
476 shmem_dir_free(subdir);
477 info->alloced++;
480 done1:
481 shmem_dir_unmap(dir-1);
482 if (empty) {
483 shmem_dir_free(empty);
484 info->alloced++;
486 if (info->next_index <= SHMEM_NR_DIRECT) {
487 shmem_dir_free(info->i_indirect);
488 info->i_indirect = NULL;
489 info->alloced++;
491 done2:
492 BUG_ON(info->swapped > info->next_index);
493 shmem_recalc_inode(inode);
494 spin_unlock(&info->lock);
497 static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
499 struct inode *inode = dentry->d_inode;
500 struct page *page = NULL;
501 long change = 0;
502 int error;
504 if ((attr->ia_valid & ATTR_SIZE) && (attr->ia_size <= SHMEM_MAX_BYTES)) {
506 * Account swap file usage based on new file size,
507 * but just let vmtruncate fail on out-of-range sizes.
509 change = VM_ACCT(attr->ia_size) - VM_ACCT(inode->i_size);
510 if (change > 0) {
511 if (security_vm_enough_memory(change))
512 return -ENOMEM;
513 } else if (attr->ia_size < inode->i_size) {
514 vm_unacct_memory(-change);
516 * If truncating down to a partial page, then
517 * if that page is already allocated, hold it
518 * in memory until the truncation is over, so
519 * truncate_partial_page cannnot miss it were
520 * it assigned to swap.
522 if (attr->ia_size & (PAGE_CACHE_SIZE-1)) {
523 (void) shmem_getpage(inode,
524 attr->ia_size>>PAGE_CACHE_SHIFT,
525 &page, SGP_READ);
530 error = inode_change_ok(inode, attr);
531 if (!error)
532 error = inode_setattr(inode, attr);
533 if (page)
534 page_cache_release(page);
535 if (error)
536 vm_unacct_memory(change);
537 return error;
540 static void shmem_delete_inode(struct inode *inode)
542 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
543 struct shmem_inode_info *info = SHMEM_I(inode);
545 if (inode->i_op->truncate == shmem_truncate) {
546 spin_lock(&shmem_ilock);
547 list_del(&info->list);
548 spin_unlock(&shmem_ilock);
549 if (info->flags & VM_ACCOUNT)
550 vm_unacct_memory(VM_ACCT(inode->i_size));
551 inode->i_size = 0;
552 shmem_truncate(inode);
554 BUG_ON(inode->i_blocks);
555 spin_lock(&sbinfo->stat_lock);
556 sbinfo->free_inodes++;
557 spin_unlock(&sbinfo->stat_lock);
558 clear_inode(inode);
561 static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_t *edir)
563 swp_entry_t *ptr;
565 for (ptr = dir; ptr < edir; ptr++) {
566 if (ptr->val == entry.val)
567 return ptr - dir;
569 return -1;
572 static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page)
574 struct inode *inode;
575 unsigned long idx;
576 unsigned long size;
577 unsigned long limit;
578 unsigned long stage;
579 struct page **dir;
580 struct page *subdir;
581 swp_entry_t *ptr;
582 int offset;
584 idx = 0;
585 ptr = info->i_direct;
586 spin_lock(&info->lock);
587 limit = info->next_index;
588 size = limit;
589 if (size > SHMEM_NR_DIRECT)
590 size = SHMEM_NR_DIRECT;
591 offset = shmem_find_swp(entry, ptr, ptr+size);
592 if (offset >= 0)
593 goto found;
594 if (!info->i_indirect)
595 goto lost2;
596 /* we might be racing with shmem_truncate */
597 if (limit <= SHMEM_NR_DIRECT)
598 goto lost2;
600 dir = shmem_dir_map(info->i_indirect);
601 stage = SHMEM_NR_DIRECT + ENTRIES_PER_PAGEPAGE/2;
603 for (idx = SHMEM_NR_DIRECT; idx < limit; idx += ENTRIES_PER_PAGE, dir++) {
604 if (unlikely(idx == stage)) {
605 shmem_dir_unmap(dir-1);
606 dir = shmem_dir_map(info->i_indirect) +
607 ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE;
608 while (!*dir) {
609 dir++;
610 idx += ENTRIES_PER_PAGEPAGE;
611 if (idx >= limit)
612 goto lost1;
614 stage = idx + ENTRIES_PER_PAGEPAGE;
615 subdir = *dir;
616 shmem_dir_unmap(dir);
617 dir = shmem_dir_map(subdir);
619 subdir = *dir;
620 if (subdir && subdir->nr_swapped) {
621 ptr = shmem_swp_map(subdir);
622 size = limit - idx;
623 if (size > ENTRIES_PER_PAGE)
624 size = ENTRIES_PER_PAGE;
625 offset = shmem_find_swp(entry, ptr, ptr+size);
626 if (offset >= 0) {
627 shmem_dir_unmap(dir);
628 goto found;
630 shmem_swp_unmap(ptr);
633 lost1:
634 shmem_dir_unmap(dir-1);
635 lost2:
636 spin_unlock(&info->lock);
637 return 0;
638 found:
639 idx += offset;
640 inode = &info->vfs_inode;
642 /* Racing against delete or truncate? Must leave out of page cache */
643 limit = (inode->i_state & I_FREEING)? 0:
644 (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
646 if (idx >= limit ||
647 move_from_swap_cache(page, idx, inode->i_mapping) == 0)
648 shmem_swp_set(info, ptr + offset, 0);
649 shmem_swp_unmap(ptr);
650 spin_unlock(&info->lock);
652 * Decrement swap count even when the entry is left behind:
653 * try_to_unuse will skip over mms, then reincrement count.
655 swap_free(entry);
656 return idx < limit;
660 * shmem_unuse() search for an eventually swapped out shmem page.
662 int shmem_unuse(swp_entry_t entry, struct page *page)
664 struct list_head *p;
665 struct shmem_inode_info *info;
666 int found = 0;
668 spin_lock(&shmem_ilock);
669 list_for_each(p, &shmem_inodes) {
670 info = list_entry(p, struct shmem_inode_info, list);
672 if (info->swapped && shmem_unuse_inode(info, entry, page)) {
673 /* move head to start search for next from here */
674 list_move_tail(&shmem_inodes, &info->list);
675 found = 1;
676 break;
679 spin_unlock(&shmem_ilock);
680 return found;
684 * Move the page from the page cache to the swap cache.
686 static int shmem_writepage(struct page *page, struct writeback_control *wbc)
688 struct shmem_inode_info *info;
689 swp_entry_t *entry, swap;
690 struct address_space *mapping;
691 unsigned long index;
692 struct inode *inode;
694 BUG_ON(!PageLocked(page));
695 BUG_ON(page_mapped(page));
697 mapping = page->mapping;
698 index = page->index;
699 inode = mapping->host;
700 info = SHMEM_I(inode);
701 if (info->flags & VM_LOCKED)
702 goto redirty;
703 swap = get_swap_page();
704 if (!swap.val)
705 goto redirty;
707 spin_lock(&info->lock);
708 shmem_recalc_inode(inode);
709 BUG_ON(index >= info->next_index);
710 entry = shmem_swp_entry(info, index, NULL);
711 BUG_ON(!entry);
712 BUG_ON(entry->val);
714 if (move_to_swap_cache(page, swap) == 0) {
715 shmem_swp_set(info, entry, swap.val);
716 shmem_swp_unmap(entry);
717 spin_unlock(&info->lock);
718 unlock_page(page);
719 return 0;
722 shmem_swp_unmap(entry);
723 spin_unlock(&info->lock);
724 swap_free(swap);
725 redirty:
726 set_page_dirty(page);
727 return WRITEPAGE_ACTIVATE; /* Return with the page locked */
731 * shmem_getpage - either get the page from swap or allocate a new one
733 * If we allocate a new one we do not mark it dirty. That's up to the
734 * vm. If we swap it in we mark it dirty since we also free the swap
735 * entry since a page cannot live in both the swap and page cache
737 static int shmem_getpage(struct inode *inode, unsigned long idx, struct page **pagep, enum sgp_type sgp)
739 struct address_space *mapping = inode->i_mapping;
740 struct shmem_inode_info *info = SHMEM_I(inode);
741 struct shmem_sb_info *sbinfo;
742 struct page *filepage = *pagep;
743 struct page *swappage;
744 swp_entry_t *entry;
745 swp_entry_t swap;
746 int error;
748 if (idx >= SHMEM_MAX_INDEX)
749 return -EFBIG;
751 * Normally, filepage is NULL on entry, and either found
752 * uptodate immediately, or allocated and zeroed, or read
753 * in under swappage, which is then assigned to filepage.
754 * But shmem_prepare_write passes in a locked filepage,
755 * which may be found not uptodate by other callers too,
756 * and may need to be copied from the swappage read in.
758 repeat:
759 if (!filepage)
760 filepage = find_lock_page(mapping, idx);
761 if (filepage && PageUptodate(filepage))
762 goto done;
763 error = 0;
764 if (sgp == SGP_QUICK)
765 goto failed;
767 spin_lock(&info->lock);
768 shmem_recalc_inode(inode);
769 entry = shmem_swp_alloc(info, idx, sgp);
770 if (IS_ERR(entry)) {
771 spin_unlock(&info->lock);
772 error = PTR_ERR(entry);
773 goto failed;
775 swap = *entry;
777 if (swap.val) {
778 /* Look it up and read it in.. */
779 swappage = lookup_swap_cache(swap);
780 if (!swappage) {
781 shmem_swp_unmap(entry);
782 spin_unlock(&info->lock);
783 swapin_readahead(swap);
784 swappage = read_swap_cache_async(swap);
785 if (!swappage) {
786 spin_lock(&info->lock);
787 entry = shmem_swp_alloc(info, idx, sgp);
788 if (IS_ERR(entry))
789 error = PTR_ERR(entry);
790 else {
791 if (entry->val == swap.val)
792 error = -ENOMEM;
793 shmem_swp_unmap(entry);
795 spin_unlock(&info->lock);
796 if (error)
797 goto failed;
798 goto repeat;
800 wait_on_page_locked(swappage);
801 page_cache_release(swappage);
802 goto repeat;
805 /* We have to do this with page locked to prevent races */
806 if (TestSetPageLocked(swappage)) {
807 shmem_swp_unmap(entry);
808 spin_unlock(&info->lock);
809 wait_on_page_locked(swappage);
810 page_cache_release(swappage);
811 goto repeat;
813 if (PageWriteback(swappage)) {
814 shmem_swp_unmap(entry);
815 spin_unlock(&info->lock);
816 wait_on_page_writeback(swappage);
817 unlock_page(swappage);
818 page_cache_release(swappage);
819 goto repeat;
821 if (!PageUptodate(swappage)) {
822 shmem_swp_unmap(entry);
823 spin_unlock(&info->lock);
824 unlock_page(swappage);
825 page_cache_release(swappage);
826 error = -EIO;
827 goto failed;
830 if (filepage) {
831 shmem_swp_set(info, entry, 0);
832 shmem_swp_unmap(entry);
833 delete_from_swap_cache(swappage);
834 spin_unlock(&info->lock);
835 copy_highpage(filepage, swappage);
836 unlock_page(swappage);
837 page_cache_release(swappage);
838 flush_dcache_page(filepage);
839 SetPageUptodate(filepage);
840 set_page_dirty(filepage);
841 swap_free(swap);
842 } else if (!(error = move_from_swap_cache(
843 swappage, idx, mapping))) {
844 shmem_swp_set(info, entry, 0);
845 shmem_swp_unmap(entry);
846 spin_unlock(&info->lock);
847 filepage = swappage;
848 swap_free(swap);
849 } else {
850 shmem_swp_unmap(entry);
851 spin_unlock(&info->lock);
852 unlock_page(swappage);
853 page_cache_release(swappage);
854 if (error == -ENOMEM) {
855 /* let kswapd refresh zone for GFP_ATOMICs */
856 blk_congestion_wait(WRITE, HZ/50);
858 goto repeat;
860 } else if (sgp == SGP_READ && !filepage) {
861 shmem_swp_unmap(entry);
862 filepage = find_get_page(mapping, idx);
863 if (filepage &&
864 (!PageUptodate(filepage) || TestSetPageLocked(filepage))) {
865 spin_unlock(&info->lock);
866 wait_on_page_locked(filepage);
867 page_cache_release(filepage);
868 filepage = NULL;
869 goto repeat;
871 spin_unlock(&info->lock);
872 } else {
873 shmem_swp_unmap(entry);
874 sbinfo = SHMEM_SB(inode->i_sb);
875 spin_lock(&sbinfo->stat_lock);
876 if (sbinfo->free_blocks == 0) {
877 spin_unlock(&sbinfo->stat_lock);
878 spin_unlock(&info->lock);
879 error = -ENOSPC;
880 goto failed;
882 sbinfo->free_blocks--;
883 inode->i_blocks += BLOCKS_PER_PAGE;
884 spin_unlock(&sbinfo->stat_lock);
886 if (!filepage) {
887 spin_unlock(&info->lock);
888 filepage = page_cache_alloc(mapping);
889 if (!filepage) {
890 shmem_free_block(inode);
891 error = -ENOMEM;
892 goto failed;
895 spin_lock(&info->lock);
896 entry = shmem_swp_alloc(info, idx, sgp);
897 if (IS_ERR(entry))
898 error = PTR_ERR(entry);
899 else {
900 swap = *entry;
901 shmem_swp_unmap(entry);
903 if (error || swap.val || 0 != add_to_page_cache_lru(
904 filepage, mapping, idx, GFP_ATOMIC)) {
905 spin_unlock(&info->lock);
906 page_cache_release(filepage);
907 shmem_free_block(inode);
908 filepage = NULL;
909 if (error)
910 goto failed;
911 goto repeat;
915 info->alloced++;
916 spin_unlock(&info->lock);
917 clear_highpage(filepage);
918 flush_dcache_page(filepage);
919 SetPageUptodate(filepage);
921 done:
922 if (!*pagep) {
923 if (filepage) {
924 unlock_page(filepage);
925 *pagep = filepage;
926 } else
927 *pagep = ZERO_PAGE(0);
929 return 0;
931 failed:
932 if (*pagep != filepage) {
933 unlock_page(filepage);
934 page_cache_release(filepage);
936 return error;
939 struct page *shmem_nopage(struct vm_area_struct *vma, unsigned long address, int unused)
941 struct inode *inode = vma->vm_file->f_dentry->d_inode;
942 struct page *page = NULL;
943 unsigned long idx;
944 int error;
946 idx = (address - vma->vm_start) >> PAGE_SHIFT;
947 idx += vma->vm_pgoff;
948 idx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT;
950 error = shmem_getpage(inode, idx, &page, SGP_CACHE);
951 if (error)
952 return (error == -ENOMEM)? NOPAGE_OOM: NOPAGE_SIGBUS;
954 mark_page_accessed(page);
955 return page;
958 static int shmem_populate(struct vm_area_struct *vma,
959 unsigned long addr, unsigned long len,
960 pgprot_t prot, unsigned long pgoff, int nonblock)
962 struct inode *inode = vma->vm_file->f_dentry->d_inode;
963 struct mm_struct *mm = vma->vm_mm;
964 enum sgp_type sgp = nonblock? SGP_QUICK: SGP_CACHE;
965 unsigned long size;
967 size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
968 if (pgoff >= size || pgoff + (len >> PAGE_SHIFT) > size)
969 return -EINVAL;
971 while ((long) len > 0) {
972 struct page *page = NULL;
973 int err;
975 * Will need changing if PAGE_CACHE_SIZE != PAGE_SIZE
977 err = shmem_getpage(inode, pgoff, &page, sgp);
978 if (err)
979 return err;
980 if (page) {
981 mark_page_accessed(page);
982 err = install_page(mm, vma, addr, page, prot);
983 if (err) {
984 page_cache_release(page);
985 return err;
988 len -= PAGE_SIZE;
989 addr += PAGE_SIZE;
990 pgoff++;
992 return 0;
995 void shmem_lock(struct file *file, int lock)
997 struct inode *inode = file->f_dentry->d_inode;
998 struct shmem_inode_info *info = SHMEM_I(inode);
1000 spin_lock(&info->lock);
1001 if (lock)
1002 info->flags |= VM_LOCKED;
1003 else
1004 info->flags &= ~VM_LOCKED;
1005 spin_unlock(&info->lock);
1008 static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
1010 struct vm_operations_struct *ops;
1011 struct inode *inode = file->f_dentry->d_inode;
1013 ops = &shmem_vm_ops;
1014 if (!S_ISREG(inode->i_mode))
1015 return -EACCES;
1016 update_atime(inode);
1017 vma->vm_ops = ops;
1018 return 0;
1021 static struct inode *
1022 shmem_get_inode(struct super_block *sb, int mode, dev_t dev)
1024 struct inode *inode;
1025 struct shmem_inode_info *info;
1026 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
1028 spin_lock(&sbinfo->stat_lock);
1029 if (!sbinfo->free_inodes) {
1030 spin_unlock(&sbinfo->stat_lock);
1031 return NULL;
1033 sbinfo->free_inodes--;
1034 spin_unlock(&sbinfo->stat_lock);
1036 inode = new_inode(sb);
1037 if (inode) {
1038 inode->i_mode = mode;
1039 inode->i_uid = current->fsuid;
1040 inode->i_gid = current->fsgid;
1041 inode->i_blksize = PAGE_CACHE_SIZE;
1042 inode->i_blocks = 0;
1043 inode->i_rdev = NODEV;
1044 inode->i_mapping->a_ops = &shmem_aops;
1045 inode->i_mapping->backing_dev_info = &shmem_backing_dev_info;
1046 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
1047 info = SHMEM_I(inode);
1048 memset(info, 0, (char *)inode - (char *)info);
1049 spin_lock_init(&info->lock);
1050 info->flags = VM_ACCOUNT;
1051 switch (mode & S_IFMT) {
1052 default:
1053 init_special_inode(inode, mode, dev);
1054 break;
1055 case S_IFREG:
1056 inode->i_op = &shmem_inode_operations;
1057 inode->i_fop = &shmem_file_operations;
1058 spin_lock(&shmem_ilock);
1059 list_add_tail(&info->list, &shmem_inodes);
1060 spin_unlock(&shmem_ilock);
1061 break;
1062 case S_IFDIR:
1063 inode->i_nlink++;
1064 /* Some things misbehave if size == 0 on a directory */
1065 inode->i_size = 2 * BOGO_DIRENT_SIZE;
1066 inode->i_op = &shmem_dir_inode_operations;
1067 inode->i_fop = &simple_dir_operations;
1068 break;
1069 case S_IFLNK:
1070 break;
1073 return inode;
1076 static int shmem_set_size(struct shmem_sb_info *info,
1077 unsigned long max_blocks, unsigned long max_inodes)
1079 int error;
1080 unsigned long blocks, inodes;
1082 spin_lock(&info->stat_lock);
1083 blocks = info->max_blocks - info->free_blocks;
1084 inodes = info->max_inodes - info->free_inodes;
1085 error = -EINVAL;
1086 if (max_blocks < blocks)
1087 goto out;
1088 if (max_inodes < inodes)
1089 goto out;
1090 error = 0;
1091 info->max_blocks = max_blocks;
1092 info->free_blocks = max_blocks - blocks;
1093 info->max_inodes = max_inodes;
1094 info->free_inodes = max_inodes - inodes;
1095 out:
1096 spin_unlock(&info->stat_lock);
1097 return error;
1100 #ifdef CONFIG_TMPFS
1102 static struct inode_operations shmem_symlink_inode_operations;
1103 static struct inode_operations shmem_symlink_inline_operations;
1106 * Normally tmpfs makes no use of shmem_prepare_write, but it
1107 * lets a tmpfs file be used read-write below the loop driver.
1109 static int
1110 shmem_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to)
1112 struct inode *inode = page->mapping->host;
1113 return shmem_getpage(inode, page->index, &page, SGP_WRITE);
1116 static ssize_t
1117 shmem_file_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
1119 struct inode *inode = file->f_dentry->d_inode;
1120 loff_t pos;
1121 unsigned long written;
1122 int err;
1123 loff_t maxpos;
1125 if ((ssize_t) count < 0)
1126 return -EINVAL;
1128 if (!access_ok(VERIFY_READ, buf, count))
1129 return -EFAULT;
1131 down(&inode->i_sem);
1133 pos = *ppos;
1134 written = 0;
1136 err = generic_write_checks(inode, file, &pos, &count, 0);
1137 if (err || !count)
1138 goto out;
1140 maxpos = inode->i_size;
1141 if (maxpos < pos + count) {
1142 maxpos = pos + count;
1143 if (security_vm_enough_memory(VM_ACCT(maxpos) - VM_ACCT(inode->i_size))) {
1144 err = -ENOMEM;
1145 goto out;
1149 remove_suid(file->f_dentry);
1150 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
1152 do {
1153 struct page *page = NULL;
1154 unsigned long bytes, index, offset;
1155 char *kaddr;
1156 int left;
1158 offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
1159 index = pos >> PAGE_CACHE_SHIFT;
1160 bytes = PAGE_CACHE_SIZE - offset;
1161 if (bytes > count)
1162 bytes = count;
1165 * We don't hold page lock across copy from user -
1166 * what would it guard against? - so no deadlock here.
1167 * But it still may be a good idea to prefault below.
1170 err = shmem_getpage(inode, index, &page, SGP_WRITE);
1171 if (err)
1172 break;
1174 left = bytes;
1175 if (PageHighMem(page)) {
1176 volatile unsigned char dummy;
1177 __get_user(dummy, buf);
1178 __get_user(dummy, buf + bytes - 1);
1180 kaddr = kmap_atomic(page, KM_USER0);
1181 left = __copy_from_user(kaddr + offset, buf, bytes);
1182 kunmap_atomic(kaddr, KM_USER0);
1184 if (left) {
1185 kaddr = kmap(page);
1186 left = __copy_from_user(kaddr + offset, buf, bytes);
1187 kunmap(page);
1190 written += bytes;
1191 count -= bytes;
1192 pos += bytes;
1193 buf += bytes;
1194 if (pos > inode->i_size)
1195 inode->i_size = pos;
1197 flush_dcache_page(page);
1198 set_page_dirty(page);
1199 if (!PageReferenced(page))
1200 SetPageReferenced(page);
1201 page_cache_release(page);
1203 if (left) {
1204 pos -= left;
1205 written -= left;
1206 err = -EFAULT;
1207 break;
1211 * Our dirty pages are not counted in nr_dirty,
1212 * and we do not attempt to balance dirty pages.
1215 cond_resched();
1216 } while (count);
1218 *ppos = pos;
1219 if (written)
1220 err = written;
1222 /* Short writes give back address space */
1223 if (inode->i_size != maxpos)
1224 vm_unacct_memory(VM_ACCT(maxpos) - VM_ACCT(inode->i_size));
1225 out:
1226 up(&inode->i_sem);
1227 return err;
1230 static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc, read_actor_t actor)
1232 struct inode *inode = filp->f_dentry->d_inode;
1233 struct address_space *mapping = inode->i_mapping;
1234 unsigned long index, offset;
1236 index = *ppos >> PAGE_CACHE_SHIFT;
1237 offset = *ppos & ~PAGE_CACHE_MASK;
1239 for (;;) {
1240 struct page *page = NULL;
1241 unsigned long end_index, nr, ret;
1242 loff_t i_size = i_size_read(inode);
1244 end_index = i_size >> PAGE_CACHE_SHIFT;
1245 if (index > end_index)
1246 break;
1247 if (index == end_index) {
1248 nr = i_size & ~PAGE_CACHE_MASK;
1249 if (nr <= offset)
1250 break;
1253 desc->error = shmem_getpage(inode, index, &page, SGP_READ);
1254 if (desc->error) {
1255 if (desc->error == -EINVAL)
1256 desc->error = 0;
1257 break;
1261 * We must evaluate after, since reads (unlike writes)
1262 * are called without i_sem protection against truncate
1264 nr = PAGE_CACHE_SIZE;
1265 i_size = i_size_read(inode);
1266 end_index = i_size >> PAGE_CACHE_SHIFT;
1267 if (index == end_index) {
1268 nr = i_size & ~PAGE_CACHE_MASK;
1269 if (nr <= offset) {
1270 page_cache_release(page);
1271 break;
1274 nr -= offset;
1276 if (page != ZERO_PAGE(0)) {
1278 * If users can be writing to this page using arbitrary
1279 * virtual addresses, take care about potential aliasing
1280 * before reading the page on the kernel side.
1282 if (!list_empty(&mapping->i_mmap_shared))
1283 flush_dcache_page(page);
1285 * Mark the page accessed if we read the beginning.
1287 if (!offset)
1288 mark_page_accessed(page);
1292 * Ok, we have the page, and it's up-to-date, so
1293 * now we can copy it to user space...
1295 * The actor routine returns how many bytes were actually used..
1296 * NOTE! This may not be the same as how much of a user buffer
1297 * we filled up (we may be padding etc), so we can only update
1298 * "pos" here (the actor routine has to update the user buffer
1299 * pointers and the remaining count).
1301 ret = actor(desc, page, offset, nr);
1302 offset += ret;
1303 index += offset >> PAGE_CACHE_SHIFT;
1304 offset &= ~PAGE_CACHE_MASK;
1306 page_cache_release(page);
1307 if (ret != nr || !desc->count)
1308 break;
1310 cond_resched();
1313 *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
1314 update_atime(inode);
1317 static ssize_t shmem_file_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
1319 read_descriptor_t desc;
1321 if ((ssize_t) count < 0)
1322 return -EINVAL;
1323 if (!access_ok(VERIFY_WRITE, buf, count))
1324 return -EFAULT;
1325 if (!count)
1326 return 0;
1328 desc.written = 0;
1329 desc.count = count;
1330 desc.buf = buf;
1331 desc.error = 0;
1333 do_shmem_file_read(filp, ppos, &desc, file_read_actor);
1334 if (desc.written)
1335 return desc.written;
1336 return desc.error;
1339 static ssize_t shmem_file_sendfile(struct file *in_file, loff_t *ppos,
1340 size_t count, read_actor_t actor, void __user *target)
1342 read_descriptor_t desc;
1344 if (!count)
1345 return 0;
1347 desc.written = 0;
1348 desc.count = count;
1349 desc.buf = target;
1350 desc.error = 0;
1352 do_shmem_file_read(in_file, ppos, &desc, actor);
1353 if (desc.written)
1354 return desc.written;
1355 return desc.error;
1358 static int shmem_statfs(struct super_block *sb, struct kstatfs *buf)
1360 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
1362 buf->f_type = TMPFS_MAGIC;
1363 buf->f_bsize = PAGE_CACHE_SIZE;
1364 spin_lock(&sbinfo->stat_lock);
1365 buf->f_blocks = sbinfo->max_blocks;
1366 buf->f_bavail = buf->f_bfree = sbinfo->free_blocks;
1367 buf->f_files = sbinfo->max_inodes;
1368 buf->f_ffree = sbinfo->free_inodes;
1369 spin_unlock(&sbinfo->stat_lock);
1370 buf->f_namelen = NAME_MAX;
1371 return 0;
1375 * File creation. Allocate an inode, and we're done..
1377 static int
1378 shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
1380 struct inode *inode = shmem_get_inode(dir->i_sb, mode, dev);
1381 int error = -ENOSPC;
1383 if (inode) {
1384 dir->i_size += BOGO_DIRENT_SIZE;
1385 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1386 d_instantiate(dentry, inode);
1387 dget(dentry); /* Extra count - pin the dentry in core */
1388 error = 0;
1390 return error;
1393 static int shmem_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1395 int error;
1397 if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0)))
1398 return error;
1399 dir->i_nlink++;
1400 return 0;
1403 static int shmem_create(struct inode *dir, struct dentry *dentry, int mode,
1404 struct nameidata *nd)
1406 return shmem_mknod(dir, dentry, mode | S_IFREG, 0);
1410 * Link a file..
1412 static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
1414 struct inode *inode = old_dentry->d_inode;
1416 dir->i_size += BOGO_DIRENT_SIZE;
1417 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1418 inode->i_nlink++;
1419 atomic_inc(&inode->i_count); /* New dentry reference */
1420 dget(dentry); /* Extra pinning count for the created dentry */
1421 d_instantiate(dentry, inode);
1422 return 0;
1425 static int shmem_unlink(struct inode *dir, struct dentry *dentry)
1427 struct inode *inode = dentry->d_inode;
1429 dir->i_size -= BOGO_DIRENT_SIZE;
1430 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1431 inode->i_nlink--;
1432 dput(dentry); /* Undo the count from "create" - this does all the work */
1433 return 0;
1436 static int shmem_rmdir(struct inode *dir, struct dentry *dentry)
1438 if (!simple_empty(dentry))
1439 return -ENOTEMPTY;
1441 dir->i_nlink--;
1442 return shmem_unlink(dir, dentry);
1446 * The VFS layer already does all the dentry stuff for rename,
1447 * we just have to decrement the usage count for the target if
1448 * it exists so that the VFS layer correctly free's it when it
1449 * gets overwritten.
1451 static int shmem_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry)
1453 struct inode *inode = old_dentry->d_inode;
1454 int they_are_dirs = S_ISDIR(inode->i_mode);
1456 if (!simple_empty(new_dentry))
1457 return -ENOTEMPTY;
1459 if (new_dentry->d_inode) {
1460 (void) shmem_unlink(new_dir, new_dentry);
1461 if (they_are_dirs)
1462 old_dir->i_nlink--;
1463 } else if (they_are_dirs) {
1464 old_dir->i_nlink--;
1465 new_dir->i_nlink++;
1468 old_dir->i_size -= BOGO_DIRENT_SIZE;
1469 new_dir->i_size += BOGO_DIRENT_SIZE;
1470 old_dir->i_ctime = old_dir->i_mtime =
1471 new_dir->i_ctime = new_dir->i_mtime =
1472 inode->i_ctime = CURRENT_TIME;
1473 return 0;
1476 static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
1478 int error;
1479 int len;
1480 struct inode *inode;
1481 struct page *page = NULL;
1482 char *kaddr;
1483 struct shmem_inode_info *info;
1485 len = strlen(symname) + 1;
1486 if (len > PAGE_CACHE_SIZE)
1487 return -ENAMETOOLONG;
1489 inode = shmem_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0);
1490 if (!inode)
1491 return -ENOSPC;
1493 info = SHMEM_I(inode);
1494 inode->i_size = len-1;
1495 if (len <= (char *)inode - (char *)info) {
1496 /* do it inline */
1497 memcpy(info, symname, len);
1498 inode->i_op = &shmem_symlink_inline_operations;
1499 } else {
1500 if (security_vm_enough_memory(VM_ACCT(1))) {
1501 iput(inode);
1502 return -ENOMEM;
1504 error = shmem_getpage(inode, 0, &page, SGP_WRITE);
1505 if (error) {
1506 vm_unacct_memory(VM_ACCT(1));
1507 iput(inode);
1508 return error;
1510 inode->i_op = &shmem_symlink_inode_operations;
1511 spin_lock(&shmem_ilock);
1512 list_add_tail(&info->list, &shmem_inodes);
1513 spin_unlock(&shmem_ilock);
1514 kaddr = kmap_atomic(page, KM_USER0);
1515 memcpy(kaddr, symname, len);
1516 kunmap_atomic(kaddr, KM_USER0);
1517 set_page_dirty(page);
1518 page_cache_release(page);
1520 dir->i_size += BOGO_DIRENT_SIZE;
1521 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1522 d_instantiate(dentry, inode);
1523 dget(dentry);
1524 return 0;
1527 static int shmem_readlink_inline(struct dentry *dentry, char __user *buffer, int buflen)
1529 return vfs_readlink(dentry, buffer, buflen, (const char *)SHMEM_I(dentry->d_inode));
1532 static int shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd)
1534 return vfs_follow_link(nd, (const char *)SHMEM_I(dentry->d_inode));
1537 static int shmem_readlink(struct dentry *dentry, char __user *buffer, int buflen)
1539 struct page *page = NULL;
1540 int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ);
1541 if (res)
1542 return res;
1543 res = vfs_readlink(dentry, buffer, buflen, kmap(page));
1544 kunmap(page);
1545 mark_page_accessed(page);
1546 page_cache_release(page);
1547 return res;
1550 static int shmem_follow_link(struct dentry *dentry, struct nameidata *nd)
1552 struct page *page = NULL;
1553 int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ);
1554 if (res)
1555 return res;
1556 res = vfs_follow_link(nd, kmap(page));
1557 kunmap(page);
1558 mark_page_accessed(page);
1559 page_cache_release(page);
1560 return res;
1563 static struct inode_operations shmem_symlink_inline_operations = {
1564 .readlink = shmem_readlink_inline,
1565 .follow_link = shmem_follow_link_inline,
1568 static struct inode_operations shmem_symlink_inode_operations = {
1569 .truncate = shmem_truncate,
1570 .readlink = shmem_readlink,
1571 .follow_link = shmem_follow_link,
1574 static int shmem_parse_options(char *options, int *mode, uid_t *uid, gid_t *gid, unsigned long *blocks, unsigned long *inodes)
1576 char *this_char, *value, *rest;
1578 while ((this_char = strsep(&options, ",")) != NULL) {
1579 if (!*this_char)
1580 continue;
1581 if ((value = strchr(this_char,'=')) != NULL) {
1582 *value++ = 0;
1583 } else {
1584 printk(KERN_ERR
1585 "tmpfs: No value for mount option '%s'\n",
1586 this_char);
1587 return 1;
1590 if (!strcmp(this_char,"size")) {
1591 unsigned long long size;
1592 size = memparse(value,&rest);
1593 if (*rest == '%') {
1594 size <<= PAGE_SHIFT;
1595 size *= totalram_pages;
1596 do_div(size, 100);
1597 rest++;
1599 if (*rest)
1600 goto bad_val;
1601 *blocks = size >> PAGE_CACHE_SHIFT;
1602 } else if (!strcmp(this_char,"nr_blocks")) {
1603 *blocks = memparse(value,&rest);
1604 if (*rest)
1605 goto bad_val;
1606 } else if (!strcmp(this_char,"nr_inodes")) {
1607 *inodes = memparse(value,&rest);
1608 if (*rest)
1609 goto bad_val;
1610 } else if (!strcmp(this_char,"mode")) {
1611 if (!mode)
1612 continue;
1613 *mode = simple_strtoul(value,&rest,8);
1614 if (*rest)
1615 goto bad_val;
1616 } else if (!strcmp(this_char,"uid")) {
1617 if (!uid)
1618 continue;
1619 *uid = simple_strtoul(value,&rest,0);
1620 if (*rest)
1621 goto bad_val;
1622 } else if (!strcmp(this_char,"gid")) {
1623 if (!gid)
1624 continue;
1625 *gid = simple_strtoul(value,&rest,0);
1626 if (*rest)
1627 goto bad_val;
1628 } else {
1629 printk(KERN_ERR "tmpfs: Bad mount option %s\n",
1630 this_char);
1631 return 1;
1634 return 0;
1636 bad_val:
1637 printk(KERN_ERR "tmpfs: Bad value '%s' for mount option '%s'\n",
1638 value, this_char);
1639 return 1;
1643 static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
1645 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
1646 unsigned long max_blocks = sbinfo->max_blocks;
1647 unsigned long max_inodes = sbinfo->max_inodes;
1649 if (shmem_parse_options(data, NULL, NULL, NULL, &max_blocks, &max_inodes))
1650 return -EINVAL;
1651 return shmem_set_size(sbinfo, max_blocks, max_inodes);
1653 #endif
1655 static int shmem_fill_super(struct super_block *sb,
1656 void *data, int silent)
1658 struct inode *inode;
1659 struct dentry *root;
1660 unsigned long blocks, inodes;
1661 int mode = S_IRWXUGO | S_ISVTX;
1662 uid_t uid = current->fsuid;
1663 gid_t gid = current->fsgid;
1664 struct shmem_sb_info *sbinfo;
1665 int err = -ENOMEM;
1667 sbinfo = kmalloc(sizeof(struct shmem_sb_info), GFP_KERNEL);
1668 if (!sbinfo)
1669 return -ENOMEM;
1670 sb->s_fs_info = sbinfo;
1671 memset(sbinfo, 0, sizeof(struct shmem_sb_info));
1674 * Per default we only allow half of the physical ram per
1675 * tmpfs instance
1677 blocks = inodes = totalram_pages / 2;
1679 #ifdef CONFIG_TMPFS
1680 if (shmem_parse_options(data, &mode, &uid, &gid, &blocks, &inodes)) {
1681 err = -EINVAL;
1682 goto failed;
1684 #else
1685 sb->s_flags |= MS_NOUSER;
1686 #endif
1688 spin_lock_init(&sbinfo->stat_lock);
1689 sbinfo->max_blocks = blocks;
1690 sbinfo->free_blocks = blocks;
1691 sbinfo->max_inodes = inodes;
1692 sbinfo->free_inodes = inodes;
1693 sb->s_maxbytes = SHMEM_MAX_BYTES;
1694 sb->s_blocksize = PAGE_CACHE_SIZE;
1695 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
1696 sb->s_magic = TMPFS_MAGIC;
1697 sb->s_op = &shmem_ops;
1698 inode = shmem_get_inode(sb, S_IFDIR | mode, 0);
1699 if (!inode)
1700 goto failed;
1701 inode->i_uid = uid;
1702 inode->i_gid = gid;
1703 root = d_alloc_root(inode);
1704 if (!root)
1705 goto failed_iput;
1706 sb->s_root = root;
1707 return 0;
1709 failed_iput:
1710 iput(inode);
1711 failed:
1712 kfree(sbinfo);
1713 sb->s_fs_info = NULL;
1714 return err;
1717 static void shmem_put_super(struct super_block *sb)
1719 kfree(sb->s_fs_info);
1720 sb->s_fs_info = NULL;
1723 static kmem_cache_t *shmem_inode_cachep;
1725 static struct inode *shmem_alloc_inode(struct super_block *sb)
1727 struct shmem_inode_info *p;
1728 p = (struct shmem_inode_info *)kmem_cache_alloc(shmem_inode_cachep, SLAB_KERNEL);
1729 if (!p)
1730 return NULL;
1731 return &p->vfs_inode;
1734 static void shmem_destroy_inode(struct inode *inode)
1736 kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
1739 static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
1741 struct shmem_inode_info *p = (struct shmem_inode_info *) foo;
1743 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
1744 SLAB_CTOR_CONSTRUCTOR) {
1745 inode_init_once(&p->vfs_inode);
1749 static int init_inodecache(void)
1751 shmem_inode_cachep = kmem_cache_create("shmem_inode_cache",
1752 sizeof(struct shmem_inode_info),
1753 0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT,
1754 init_once, NULL);
1755 if (shmem_inode_cachep == NULL)
1756 return -ENOMEM;
1757 return 0;
1760 static void destroy_inodecache(void)
1762 if (kmem_cache_destroy(shmem_inode_cachep))
1763 printk(KERN_INFO "shmem_inode_cache: not all structures were freed\n");
1766 static struct address_space_operations shmem_aops = {
1767 .writepage = shmem_writepage,
1768 .set_page_dirty = __set_page_dirty_nobuffers,
1769 #ifdef CONFIG_TMPFS
1770 .prepare_write = shmem_prepare_write,
1771 .commit_write = simple_commit_write,
1772 #endif
1775 static struct file_operations shmem_file_operations = {
1776 .mmap = shmem_mmap,
1777 #ifdef CONFIG_TMPFS
1778 .llseek = generic_file_llseek,
1779 .read = shmem_file_read,
1780 .write = shmem_file_write,
1781 .fsync = simple_sync_file,
1782 .sendfile = shmem_file_sendfile,
1783 #endif
1786 static struct inode_operations shmem_inode_operations = {
1787 .truncate = shmem_truncate,
1788 .setattr = shmem_notify_change,
1791 static struct inode_operations shmem_dir_inode_operations = {
1792 #ifdef CONFIG_TMPFS
1793 .create = shmem_create,
1794 .lookup = simple_lookup,
1795 .link = shmem_link,
1796 .unlink = shmem_unlink,
1797 .symlink = shmem_symlink,
1798 .mkdir = shmem_mkdir,
1799 .rmdir = shmem_rmdir,
1800 .mknod = shmem_mknod,
1801 .rename = shmem_rename,
1802 #endif
1805 static struct super_operations shmem_ops = {
1806 .alloc_inode = shmem_alloc_inode,
1807 .destroy_inode = shmem_destroy_inode,
1808 #ifdef CONFIG_TMPFS
1809 .statfs = shmem_statfs,
1810 .remount_fs = shmem_remount_fs,
1811 #endif
1812 .delete_inode = shmem_delete_inode,
1813 .drop_inode = generic_delete_inode,
1814 .put_super = shmem_put_super,
1817 static struct vm_operations_struct shmem_vm_ops = {
1818 .nopage = shmem_nopage,
1819 .populate = shmem_populate,
1822 static struct super_block *shmem_get_sb(struct file_system_type *fs_type,
1823 int flags, const char *dev_name, void *data)
1825 return get_sb_nodev(fs_type, flags, data, shmem_fill_super);
1828 static struct file_system_type tmpfs_fs_type = {
1829 .owner = THIS_MODULE,
1830 .name = "tmpfs",
1831 .get_sb = shmem_get_sb,
1832 .kill_sb = kill_litter_super,
1834 static struct vfsmount *shm_mnt;
1836 static int __init init_tmpfs(void)
1838 int error;
1840 error = init_inodecache();
1841 if (error)
1842 goto out3;
1844 error = register_filesystem(&tmpfs_fs_type);
1845 if (error) {
1846 printk(KERN_ERR "Could not register tmpfs\n");
1847 goto out2;
1849 #ifdef CONFIG_TMPFS
1850 devfs_mk_dir("shm");
1851 #endif
1852 shm_mnt = kern_mount(&tmpfs_fs_type);
1853 if (IS_ERR(shm_mnt)) {
1854 error = PTR_ERR(shm_mnt);
1855 printk(KERN_ERR "Could not kern_mount tmpfs\n");
1856 goto out1;
1859 /* The internal instance should not do size checking */
1860 shmem_set_size(SHMEM_SB(shm_mnt->mnt_sb), ULONG_MAX, ULONG_MAX);
1861 return 0;
1863 out1:
1864 unregister_filesystem(&tmpfs_fs_type);
1865 out2:
1866 destroy_inodecache();
1867 out3:
1868 shm_mnt = ERR_PTR(error);
1869 return error;
1871 module_init(init_tmpfs)
1874 * shmem_file_setup - get an unlinked file living in tmpfs
1876 * @name: name for dentry (to be seen in /proc/<pid>/maps
1877 * @size: size to be set for the file
1880 struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags)
1882 int error;
1883 struct file *file;
1884 struct inode *inode;
1885 struct dentry *dentry, *root;
1886 struct qstr this;
1888 if (IS_ERR(shm_mnt))
1889 return (void *)shm_mnt;
1891 if (size > SHMEM_MAX_BYTES)
1892 return ERR_PTR(-EINVAL);
1894 if ((flags & VM_ACCOUNT) && security_vm_enough_memory(VM_ACCT(size)))
1895 return ERR_PTR(-ENOMEM);
1897 error = -ENOMEM;
1898 this.name = name;
1899 this.len = strlen(name);
1900 this.hash = 0; /* will go */
1901 root = shm_mnt->mnt_root;
1902 dentry = d_alloc(root, &this);
1903 if (!dentry)
1904 goto put_memory;
1906 error = -ENFILE;
1907 file = get_empty_filp();
1908 if (!file)
1909 goto put_dentry;
1911 error = -ENOSPC;
1912 inode = shmem_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0);
1913 if (!inode)
1914 goto close_file;
1916 SHMEM_I(inode)->flags &= flags;
1917 d_instantiate(dentry, inode);
1918 inode->i_size = size;
1919 inode->i_nlink = 0; /* It is unlinked */
1920 file->f_vfsmnt = mntget(shm_mnt);
1921 file->f_dentry = dentry;
1922 file->f_op = &shmem_file_operations;
1923 file->f_mode = FMODE_WRITE | FMODE_READ;
1924 return(file);
1926 close_file:
1927 put_filp(file);
1928 put_dentry:
1929 dput(dentry);
1930 put_memory:
1931 if (flags & VM_ACCOUNT)
1932 vm_unacct_memory(VM_ACCT(size));
1933 return ERR_PTR(error);
1937 * shmem_zero_setup - setup a shared anonymous mapping
1939 * @vma: the vma to be mmapped is prepared by do_mmap_pgoff
1941 int shmem_zero_setup(struct vm_area_struct *vma)
1943 struct file *file;
1944 loff_t size = vma->vm_end - vma->vm_start;
1946 file = shmem_file_setup("dev/zero", size, vma->vm_flags);
1947 if (IS_ERR(file))
1948 return PTR_ERR(file);
1950 if (vma->vm_file)
1951 fput(vma->vm_file);
1952 vma->vm_file = file;
1953 vma->vm_ops = &shmem_vm_ops;
1954 return 0;
1957 EXPORT_SYMBOL(shmem_file_setup);