[PATCH] Fix up 'linux-dvb' maintainers entry
[linux-2.6/history.git] / mm / shmem.c
blob9eee85b3f756ccaa58af1fcc3e80a44aa821a37a
1 /*
2 * Resizable virtual memory filesystem for Linux.
4 * Copyright (C) 2000 Linus Torvalds.
5 * 2000 Transmeta Corp.
6 * 2000-2001 Christoph Rohland
7 * 2000-2001 SAP AG
8 * 2002 Red Hat Inc.
9 * Copyright (C) 2002-2003 Hugh Dickins.
10 * Copyright (C) 2002-2003 VERITAS Software Corporation.
12 * This file is released under the GPL.
16 * This virtual memory filesystem is heavily based on the ramfs. It
17 * extends ramfs by the ability to use swap and honor resource limits
18 * which makes it a completely usable filesystem.
21 #include <linux/config.h>
22 #include <linux/module.h>
23 #include <linux/init.h>
24 #include <linux/devfs_fs_kernel.h>
25 #include <linux/fs.h>
26 #include <linux/mm.h>
27 #include <linux/mman.h>
28 #include <linux/file.h>
29 #include <linux/swap.h>
30 #include <linux/pagemap.h>
31 #include <linux/string.h>
32 #include <linux/slab.h>
33 #include <linux/backing-dev.h>
34 #include <linux/shmem_fs.h>
35 #include <linux/mount.h>
36 #include <linux/writeback.h>
37 #include <linux/vfs.h>
38 #include <linux/blkdev.h>
39 #include <linux/security.h>
40 #include <asm/uaccess.h>
41 #include <asm/div64.h>
43 /* This magic number is used in glibc for posix shared memory */
44 #define TMPFS_MAGIC 0x01021994
46 #define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long))
47 #define ENTRIES_PER_PAGEPAGE (ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
48 #define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512)
50 #define SHMEM_MAX_INDEX (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1))
51 #define SHMEM_MAX_BYTES ((unsigned long long)SHMEM_MAX_INDEX << PAGE_CACHE_SHIFT)
53 #define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT)
55 /* info->flags needs VM_flags to handle pagein/truncate races efficiently */
56 #define SHMEM_PAGEIN VM_READ
57 #define SHMEM_TRUNCATE VM_WRITE
59 /* Pretend that each entry is of this size in directory's i_size */
60 #define BOGO_DIRENT_SIZE 20
62 /* Keep swapped page count in private field of indirect struct page */
63 #define nr_swapped private
65 /* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */
66 enum sgp_type {
67 SGP_QUICK, /* don't try more than file page cache lookup */
68 SGP_READ, /* don't exceed i_size, don't allocate page */
69 SGP_CACHE, /* don't exceed i_size, may allocate page */
70 SGP_WRITE, /* may exceed i_size, may allocate page */
73 static int shmem_getpage(struct inode *inode, unsigned long idx,
74 struct page **pagep, enum sgp_type sgp, int *type);
76 static inline struct page *shmem_dir_alloc(unsigned int gfp_mask)
79 * The above definition of ENTRIES_PER_PAGE, and the use of
80 * BLOCKS_PER_PAGE on indirect pages, assume PAGE_CACHE_SIZE:
81 * might be reconsidered if it ever diverges from PAGE_SIZE.
83 return alloc_pages(gfp_mask, PAGE_CACHE_SHIFT-PAGE_SHIFT);
86 static inline void shmem_dir_free(struct page *page)
88 __free_pages(page, PAGE_CACHE_SHIFT-PAGE_SHIFT);
91 static struct page **shmem_dir_map(struct page *page)
93 return (struct page **)kmap_atomic(page, KM_USER0);
96 static inline void shmem_dir_unmap(struct page **dir)
98 kunmap_atomic(dir, KM_USER0);
101 static swp_entry_t *shmem_swp_map(struct page *page)
104 * We have to avoid the unconditional inc_preempt_count()
105 * in kmap_atomic(), since shmem_swp_unmap() will also be
106 * applied to the low memory addresses within i_direct[].
107 * PageHighMem and high_memory tests are good for all arches
108 * and configs: highmem_start_page and FIXADDR_START are not.
110 return PageHighMem(page)?
111 (swp_entry_t *)kmap_atomic(page, KM_USER1):
112 (swp_entry_t *)page_address(page);
115 static inline void shmem_swp_unmap(swp_entry_t *entry)
117 if (entry >= (swp_entry_t *)high_memory)
118 kunmap_atomic(entry, KM_USER1);
121 static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
123 return sb->s_fs_info;
126 static struct super_operations shmem_ops;
127 static struct address_space_operations shmem_aops;
128 static struct file_operations shmem_file_operations;
129 static struct inode_operations shmem_inode_operations;
130 static struct inode_operations shmem_dir_inode_operations;
131 static struct vm_operations_struct shmem_vm_ops;
133 static struct backing_dev_info shmem_backing_dev_info = {
134 .ra_pages = 0, /* No readahead */
135 .memory_backed = 1, /* Does not contribute to dirty memory */
138 LIST_HEAD(shmem_inodes);
139 static spinlock_t shmem_ilock = SPIN_LOCK_UNLOCKED;
141 static void shmem_free_block(struct inode *inode)
143 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
144 spin_lock(&sbinfo->stat_lock);
145 sbinfo->free_blocks++;
146 inode->i_blocks -= BLOCKS_PER_PAGE;
147 spin_unlock(&sbinfo->stat_lock);
151 * shmem_recalc_inode - recalculate the size of an inode
153 * @inode: inode to recalc
155 * We have to calculate the free blocks since the mm can drop
156 * undirtied hole pages behind our back.
158 * But normally info->alloced == inode->i_mapping->nrpages + info->swapped
159 * So mm freed is info->alloced - (inode->i_mapping->nrpages + info->swapped)
161 * It has to be called with the spinlock held.
163 static void shmem_recalc_inode(struct inode *inode)
165 struct shmem_inode_info *info = SHMEM_I(inode);
166 long freed;
168 freed = info->alloced - info->swapped - inode->i_mapping->nrpages;
169 if (freed > 0) {
170 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
171 info->alloced -= freed;
172 spin_lock(&sbinfo->stat_lock);
173 sbinfo->free_blocks += freed;
174 inode->i_blocks -= freed*BLOCKS_PER_PAGE;
175 spin_unlock(&sbinfo->stat_lock);
180 * shmem_swp_entry - find the swap vector position in the info structure
182 * @info: info structure for the inode
183 * @index: index of the page to find
184 * @page: optional page to add to the structure. Has to be preset to
185 * all zeros
187 * If there is no space allocated yet it will return NULL when
188 * page is NULL, else it will use the page for the needed block,
189 * setting it to NULL on return to indicate that it has been used.
191 * The swap vector is organized the following way:
193 * There are SHMEM_NR_DIRECT entries directly stored in the
194 * shmem_inode_info structure. So small files do not need an addional
195 * allocation.
197 * For pages with index > SHMEM_NR_DIRECT there is the pointer
198 * i_indirect which points to a page which holds in the first half
199 * doubly indirect blocks, in the second half triple indirect blocks:
201 * For an artificial ENTRIES_PER_PAGE = 4 this would lead to the
202 * following layout (for SHMEM_NR_DIRECT == 16):
204 * i_indirect -> dir --> 16-19
205 * | +-> 20-23
207 * +-->dir2 --> 24-27
208 * | +-> 28-31
209 * | +-> 32-35
210 * | +-> 36-39
212 * +-->dir3 --> 40-43
213 * +-> 44-47
214 * +-> 48-51
215 * +-> 52-55
217 static swp_entry_t *shmem_swp_entry(struct shmem_inode_info *info, unsigned long index, struct page **page)
219 unsigned long offset;
220 struct page **dir;
221 struct page *subdir;
223 if (index < SHMEM_NR_DIRECT)
224 return info->i_direct+index;
225 if (!info->i_indirect) {
226 if (page) {
227 info->i_indirect = *page;
228 *page = NULL;
230 return NULL; /* need another page */
233 index -= SHMEM_NR_DIRECT;
234 offset = index % ENTRIES_PER_PAGE;
235 index /= ENTRIES_PER_PAGE;
236 dir = shmem_dir_map(info->i_indirect);
238 if (index >= ENTRIES_PER_PAGE/2) {
239 index -= ENTRIES_PER_PAGE/2;
240 dir += ENTRIES_PER_PAGE/2 + index/ENTRIES_PER_PAGE;
241 index %= ENTRIES_PER_PAGE;
242 subdir = *dir;
243 if (!subdir) {
244 if (page) {
245 *dir = *page;
246 *page = NULL;
248 shmem_dir_unmap(dir);
249 return NULL; /* need another page */
251 shmem_dir_unmap(dir);
252 dir = shmem_dir_map(subdir);
255 dir += index;
256 subdir = *dir;
257 if (!subdir) {
258 if (!page || !(subdir = *page)) {
259 shmem_dir_unmap(dir);
260 return NULL; /* need a page */
262 *dir = subdir;
263 *page = NULL;
265 shmem_dir_unmap(dir);
268 * With apologies... caller shmem_swp_alloc passes non-NULL
269 * page (though perhaps NULL *page); and now we know that this
270 * indirect page has been allocated, we can shortcut the final
271 * kmap if we know it contains no swap entries, as is commonly
272 * the case: return pointer to a 0 which doesn't need kmapping.
274 return (page && !subdir->nr_swapped)?
275 (swp_entry_t *)&subdir->nr_swapped:
276 shmem_swp_map(subdir) + offset;
279 static void shmem_swp_set(struct shmem_inode_info *info, swp_entry_t *entry, unsigned long value)
281 long incdec = value? 1: -1;
283 entry->val = value;
284 info->swapped += incdec;
285 if ((unsigned long)(entry - info->i_direct) >= SHMEM_NR_DIRECT)
286 kmap_atomic_to_page(entry)->nr_swapped += incdec;
290 * shmem_swp_alloc - get the position of the swap entry for the page.
291 * If it does not exist allocate the entry.
293 * @info: info structure for the inode
294 * @index: index of the page to find
295 * @sgp: check and recheck i_size? skip allocation?
297 static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long index, enum sgp_type sgp)
299 struct inode *inode = &info->vfs_inode;
300 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
301 struct page *page = NULL;
302 swp_entry_t *entry;
303 static const swp_entry_t unswapped = { 0 };
305 if (sgp != SGP_WRITE &&
306 ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode))
307 return ERR_PTR(-EINVAL);
309 while (!(entry = shmem_swp_entry(info, index, &page))) {
310 if (sgp == SGP_READ)
311 return (swp_entry_t *) &unswapped;
313 * Test free_blocks against 1 not 0, since we have 1 data
314 * page (and perhaps indirect index pages) yet to allocate:
315 * a waste to allocate index if we cannot allocate data.
317 spin_lock(&sbinfo->stat_lock);
318 if (sbinfo->free_blocks <= 1) {
319 spin_unlock(&sbinfo->stat_lock);
320 return ERR_PTR(-ENOSPC);
322 sbinfo->free_blocks--;
323 inode->i_blocks += BLOCKS_PER_PAGE;
324 spin_unlock(&sbinfo->stat_lock);
326 spin_unlock(&info->lock);
327 page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping));
328 if (page) {
329 clear_highpage(page);
330 page->nr_swapped = 0;
332 spin_lock(&info->lock);
334 if (!page) {
335 shmem_free_block(inode);
336 return ERR_PTR(-ENOMEM);
338 if (sgp != SGP_WRITE &&
339 ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
340 entry = ERR_PTR(-EINVAL);
341 break;
343 if (info->next_index <= index)
344 info->next_index = index + 1;
346 if (page) {
347 /* another task gave its page, or truncated the file */
348 shmem_free_block(inode);
349 shmem_dir_free(page);
351 if (info->next_index <= index && !IS_ERR(entry))
352 info->next_index = index + 1;
353 return entry;
357 * shmem_free_swp - free some swap entries in a directory
359 * @dir: pointer to the directory
360 * @edir: pointer after last entry of the directory
362 static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir)
364 swp_entry_t *ptr;
365 int freed = 0;
367 for (ptr = dir; ptr < edir; ptr++) {
368 if (ptr->val) {
369 free_swap_and_cache(*ptr);
370 *ptr = (swp_entry_t){0};
371 freed++;
374 return freed;
377 static void shmem_truncate(struct inode *inode)
379 struct shmem_inode_info *info = SHMEM_I(inode);
380 unsigned long idx;
381 unsigned long size;
382 unsigned long limit;
383 unsigned long stage;
384 struct page **dir;
385 struct page *subdir;
386 struct page *empty;
387 swp_entry_t *ptr;
388 int offset;
389 int freed;
391 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
392 idx = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
393 if (idx >= info->next_index)
394 return;
396 spin_lock(&info->lock);
397 info->flags |= SHMEM_TRUNCATE;
398 limit = info->next_index;
399 info->next_index = idx;
400 if (info->swapped && idx < SHMEM_NR_DIRECT) {
401 ptr = info->i_direct;
402 size = limit;
403 if (size > SHMEM_NR_DIRECT)
404 size = SHMEM_NR_DIRECT;
405 info->swapped -= shmem_free_swp(ptr+idx, ptr+size);
407 if (!info->i_indirect)
408 goto done2;
410 BUG_ON(limit <= SHMEM_NR_DIRECT);
411 limit -= SHMEM_NR_DIRECT;
412 idx = (idx > SHMEM_NR_DIRECT)? (idx - SHMEM_NR_DIRECT): 0;
413 offset = idx % ENTRIES_PER_PAGE;
414 idx -= offset;
416 empty = NULL;
417 dir = shmem_dir_map(info->i_indirect);
418 stage = ENTRIES_PER_PAGEPAGE/2;
419 if (idx < ENTRIES_PER_PAGEPAGE/2)
420 dir += idx/ENTRIES_PER_PAGE;
421 else {
422 dir += ENTRIES_PER_PAGE/2;
423 dir += (idx - ENTRIES_PER_PAGEPAGE/2)/ENTRIES_PER_PAGEPAGE;
424 while (stage <= idx)
425 stage += ENTRIES_PER_PAGEPAGE;
426 if (*dir) {
427 subdir = *dir;
428 size = ((idx - ENTRIES_PER_PAGEPAGE/2) %
429 ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE;
430 if (!size && !offset) {
431 empty = subdir;
432 *dir = NULL;
434 shmem_dir_unmap(dir);
435 dir = shmem_dir_map(subdir) + size;
436 } else {
437 offset = 0;
438 idx = stage;
442 for (; idx < limit; idx += ENTRIES_PER_PAGE, dir++) {
443 if (unlikely(idx == stage)) {
444 shmem_dir_unmap(dir-1);
445 dir = shmem_dir_map(info->i_indirect) +
446 ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE;
447 while (!*dir) {
448 dir++;
449 idx += ENTRIES_PER_PAGEPAGE;
450 if (idx >= limit)
451 goto done1;
453 stage = idx + ENTRIES_PER_PAGEPAGE;
454 subdir = *dir;
455 *dir = NULL;
456 shmem_dir_unmap(dir);
457 if (empty) {
458 shmem_dir_free(empty);
459 info->alloced++;
461 empty = subdir;
462 cond_resched_lock(&info->lock);
463 dir = shmem_dir_map(subdir);
465 subdir = *dir;
466 if (subdir && subdir->nr_swapped) {
467 ptr = shmem_swp_map(subdir);
468 size = limit - idx;
469 if (size > ENTRIES_PER_PAGE)
470 size = ENTRIES_PER_PAGE;
471 freed = shmem_free_swp(ptr+offset, ptr+size);
472 shmem_swp_unmap(ptr);
473 info->swapped -= freed;
474 subdir->nr_swapped -= freed;
475 BUG_ON(subdir->nr_swapped > offset);
477 if (offset)
478 offset = 0;
479 else if (subdir) {
480 *dir = NULL;
481 shmem_dir_free(subdir);
482 info->alloced++;
485 done1:
486 shmem_dir_unmap(dir-1);
487 if (empty) {
488 shmem_dir_free(empty);
489 info->alloced++;
491 if (info->next_index <= SHMEM_NR_DIRECT) {
492 shmem_dir_free(info->i_indirect);
493 info->i_indirect = NULL;
494 info->alloced++;
496 done2:
497 BUG_ON(info->swapped > info->next_index);
498 if (inode->i_mapping->nrpages && (info->flags & SHMEM_PAGEIN)) {
500 * Call truncate_inode_pages again: racing shmem_unuse_inode
501 * may have swizzled a page in from swap since vmtruncate or
502 * generic_delete_inode did it, before we lowered next_index.
503 * Also, though shmem_getpage checks i_size before adding to
504 * cache, no recheck after: so fix the narrow window there too.
506 spin_unlock(&info->lock);
507 truncate_inode_pages(inode->i_mapping, inode->i_size);
508 spin_lock(&info->lock);
510 info->flags &= ~SHMEM_TRUNCATE;
511 shmem_recalc_inode(inode);
512 spin_unlock(&info->lock);
515 static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
517 struct inode *inode = dentry->d_inode;
518 struct page *page = NULL;
519 long change = 0;
520 int error;
522 if ((attr->ia_valid & ATTR_SIZE) && (attr->ia_size <= SHMEM_MAX_BYTES)) {
524 * Account swap file usage based on new file size,
525 * but just let vmtruncate fail on out-of-range sizes.
527 change = VM_ACCT(attr->ia_size) - VM_ACCT(inode->i_size);
528 if (change > 0) {
529 if (security_vm_enough_memory(change))
530 return -ENOMEM;
531 } else if (attr->ia_size < inode->i_size) {
532 vm_unacct_memory(-change);
534 * If truncating down to a partial page, then
535 * if that page is already allocated, hold it
536 * in memory until the truncation is over, so
537 * truncate_partial_page cannnot miss it were
538 * it assigned to swap.
540 if (attr->ia_size & (PAGE_CACHE_SIZE-1)) {
541 (void) shmem_getpage(inode,
542 attr->ia_size>>PAGE_CACHE_SHIFT,
543 &page, SGP_READ, NULL);
546 * Reset SHMEM_PAGEIN flag so that shmem_truncate can
547 * detect if any pages might have been added to cache
548 * after truncate_inode_pages. But we needn't bother
549 * if it's being fully truncated to zero-length: the
550 * nrpages check is efficient enough in that case.
552 if (attr->ia_size) {
553 struct shmem_inode_info *info = SHMEM_I(inode);
554 spin_lock(&info->lock);
555 info->flags &= ~SHMEM_PAGEIN;
556 spin_unlock(&info->lock);
561 error = inode_change_ok(inode, attr);
562 if (!error)
563 error = inode_setattr(inode, attr);
564 if (page)
565 page_cache_release(page);
566 if (error)
567 vm_unacct_memory(change);
568 return error;
571 static void shmem_delete_inode(struct inode *inode)
573 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
574 struct shmem_inode_info *info = SHMEM_I(inode);
576 if (inode->i_op->truncate == shmem_truncate) {
577 spin_lock(&shmem_ilock);
578 list_del(&info->list);
579 spin_unlock(&shmem_ilock);
580 if (info->flags & VM_ACCOUNT)
581 vm_unacct_memory(VM_ACCT(inode->i_size));
582 inode->i_size = 0;
583 shmem_truncate(inode);
585 BUG_ON(inode->i_blocks);
586 spin_lock(&sbinfo->stat_lock);
587 sbinfo->free_inodes++;
588 spin_unlock(&sbinfo->stat_lock);
589 clear_inode(inode);
592 static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_t *edir)
594 swp_entry_t *ptr;
596 for (ptr = dir; ptr < edir; ptr++) {
597 if (ptr->val == entry.val)
598 return ptr - dir;
600 return -1;
603 static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page)
605 struct inode *inode;
606 unsigned long idx;
607 unsigned long size;
608 unsigned long limit;
609 unsigned long stage;
610 struct page **dir;
611 struct page *subdir;
612 swp_entry_t *ptr;
613 int offset;
615 idx = 0;
616 ptr = info->i_direct;
617 spin_lock(&info->lock);
618 limit = info->next_index;
619 size = limit;
620 if (size > SHMEM_NR_DIRECT)
621 size = SHMEM_NR_DIRECT;
622 offset = shmem_find_swp(entry, ptr, ptr+size);
623 if (offset >= 0)
624 goto found;
625 if (!info->i_indirect)
626 goto lost2;
627 /* we might be racing with shmem_truncate */
628 if (limit <= SHMEM_NR_DIRECT)
629 goto lost2;
631 dir = shmem_dir_map(info->i_indirect);
632 stage = SHMEM_NR_DIRECT + ENTRIES_PER_PAGEPAGE/2;
634 for (idx = SHMEM_NR_DIRECT; idx < limit; idx += ENTRIES_PER_PAGE, dir++) {
635 if (unlikely(idx == stage)) {
636 shmem_dir_unmap(dir-1);
637 dir = shmem_dir_map(info->i_indirect) +
638 ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE;
639 while (!*dir) {
640 dir++;
641 idx += ENTRIES_PER_PAGEPAGE;
642 if (idx >= limit)
643 goto lost1;
645 stage = idx + ENTRIES_PER_PAGEPAGE;
646 subdir = *dir;
647 shmem_dir_unmap(dir);
648 dir = shmem_dir_map(subdir);
650 subdir = *dir;
651 if (subdir && subdir->nr_swapped) {
652 ptr = shmem_swp_map(subdir);
653 size = limit - idx;
654 if (size > ENTRIES_PER_PAGE)
655 size = ENTRIES_PER_PAGE;
656 offset = shmem_find_swp(entry, ptr, ptr+size);
657 if (offset >= 0) {
658 shmem_dir_unmap(dir);
659 goto found;
661 shmem_swp_unmap(ptr);
664 lost1:
665 shmem_dir_unmap(dir-1);
666 lost2:
667 spin_unlock(&info->lock);
668 return 0;
669 found:
670 idx += offset;
671 inode = &info->vfs_inode;
672 if (move_from_swap_cache(page, idx, inode->i_mapping) == 0) {
673 info->flags |= SHMEM_PAGEIN;
674 shmem_swp_set(info, ptr + offset, 0);
676 shmem_swp_unmap(ptr);
677 spin_unlock(&info->lock);
679 * Decrement swap count even when the entry is left behind:
680 * try_to_unuse will skip over mms, then reincrement count.
682 swap_free(entry);
683 return 1;
687 * shmem_unuse() search for an eventually swapped out shmem page.
689 int shmem_unuse(swp_entry_t entry, struct page *page)
691 struct list_head *p;
692 struct shmem_inode_info *info;
693 int found = 0;
695 spin_lock(&shmem_ilock);
696 list_for_each(p, &shmem_inodes) {
697 info = list_entry(p, struct shmem_inode_info, list);
699 if (info->swapped && shmem_unuse_inode(info, entry, page)) {
700 /* move head to start search for next from here */
701 list_move_tail(&shmem_inodes, &info->list);
702 found = 1;
703 break;
706 spin_unlock(&shmem_ilock);
707 return found;
711 * Move the page from the page cache to the swap cache.
713 static int shmem_writepage(struct page *page, struct writeback_control *wbc)
715 struct shmem_inode_info *info;
716 swp_entry_t *entry, swap;
717 struct address_space *mapping;
718 unsigned long index;
719 struct inode *inode;
721 BUG_ON(!PageLocked(page));
722 BUG_ON(page_mapped(page));
724 mapping = page->mapping;
725 index = page->index;
726 inode = mapping->host;
727 info = SHMEM_I(inode);
728 if (info->flags & VM_LOCKED)
729 goto redirty;
730 swap = get_swap_page();
731 if (!swap.val)
732 goto redirty;
734 spin_lock(&info->lock);
735 shmem_recalc_inode(inode);
736 if (index >= info->next_index) {
737 BUG_ON(!(info->flags & SHMEM_TRUNCATE));
738 goto unlock;
740 entry = shmem_swp_entry(info, index, NULL);
741 BUG_ON(!entry);
742 BUG_ON(entry->val);
744 if (move_to_swap_cache(page, swap) == 0) {
745 shmem_swp_set(info, entry, swap.val);
746 shmem_swp_unmap(entry);
747 spin_unlock(&info->lock);
748 unlock_page(page);
749 return 0;
752 shmem_swp_unmap(entry);
753 unlock:
754 spin_unlock(&info->lock);
755 swap_free(swap);
756 redirty:
757 set_page_dirty(page);
758 return WRITEPAGE_ACTIVATE; /* Return with the page locked */
762 * shmem_getpage - either get the page from swap or allocate a new one
764 * If we allocate a new one we do not mark it dirty. That's up to the
765 * vm. If we swap it in we mark it dirty since we also free the swap
766 * entry since a page cannot live in both the swap and page cache
768 static int shmem_getpage(struct inode *inode, unsigned long idx, struct page **pagep, enum sgp_type sgp, int *type)
770 struct address_space *mapping = inode->i_mapping;
771 struct shmem_inode_info *info = SHMEM_I(inode);
772 struct shmem_sb_info *sbinfo;
773 struct page *filepage = *pagep;
774 struct page *swappage;
775 swp_entry_t *entry;
776 swp_entry_t swap;
777 int error, majmin = VM_FAULT_MINOR;
779 if (idx >= SHMEM_MAX_INDEX)
780 return -EFBIG;
782 * Normally, filepage is NULL on entry, and either found
783 * uptodate immediately, or allocated and zeroed, or read
784 * in under swappage, which is then assigned to filepage.
785 * But shmem_prepare_write passes in a locked filepage,
786 * which may be found not uptodate by other callers too,
787 * and may need to be copied from the swappage read in.
789 repeat:
790 if (!filepage)
791 filepage = find_lock_page(mapping, idx);
792 if (filepage && PageUptodate(filepage))
793 goto done;
794 error = 0;
795 if (sgp == SGP_QUICK)
796 goto failed;
798 spin_lock(&info->lock);
799 shmem_recalc_inode(inode);
800 entry = shmem_swp_alloc(info, idx, sgp);
801 if (IS_ERR(entry)) {
802 spin_unlock(&info->lock);
803 error = PTR_ERR(entry);
804 goto failed;
806 swap = *entry;
808 if (swap.val) {
809 /* Look it up and read it in.. */
810 swappage = lookup_swap_cache(swap);
811 if (!swappage) {
812 shmem_swp_unmap(entry);
813 spin_unlock(&info->lock);
814 /* here we actually do the io */
815 if (majmin == VM_FAULT_MINOR && type)
816 inc_page_state(pgmajfault);
817 majmin = VM_FAULT_MAJOR;
818 swapin_readahead(swap);
819 swappage = read_swap_cache_async(swap);
820 if (!swappage) {
821 spin_lock(&info->lock);
822 entry = shmem_swp_alloc(info, idx, sgp);
823 if (IS_ERR(entry))
824 error = PTR_ERR(entry);
825 else {
826 if (entry->val == swap.val)
827 error = -ENOMEM;
828 shmem_swp_unmap(entry);
830 spin_unlock(&info->lock);
831 if (error)
832 goto failed;
833 goto repeat;
835 wait_on_page_locked(swappage);
836 page_cache_release(swappage);
837 goto repeat;
840 /* We have to do this with page locked to prevent races */
841 if (TestSetPageLocked(swappage)) {
842 shmem_swp_unmap(entry);
843 spin_unlock(&info->lock);
844 wait_on_page_locked(swappage);
845 page_cache_release(swappage);
846 goto repeat;
848 if (PageWriteback(swappage)) {
849 shmem_swp_unmap(entry);
850 spin_unlock(&info->lock);
851 wait_on_page_writeback(swappage);
852 unlock_page(swappage);
853 page_cache_release(swappage);
854 goto repeat;
856 if (!PageUptodate(swappage)) {
857 shmem_swp_unmap(entry);
858 spin_unlock(&info->lock);
859 unlock_page(swappage);
860 page_cache_release(swappage);
861 error = -EIO;
862 goto failed;
865 if (filepage) {
866 shmem_swp_set(info, entry, 0);
867 shmem_swp_unmap(entry);
868 delete_from_swap_cache(swappage);
869 spin_unlock(&info->lock);
870 copy_highpage(filepage, swappage);
871 unlock_page(swappage);
872 page_cache_release(swappage);
873 flush_dcache_page(filepage);
874 SetPageUptodate(filepage);
875 set_page_dirty(filepage);
876 swap_free(swap);
877 } else if (!(error = move_from_swap_cache(
878 swappage, idx, mapping))) {
879 info->flags |= SHMEM_PAGEIN;
880 shmem_swp_set(info, entry, 0);
881 shmem_swp_unmap(entry);
882 spin_unlock(&info->lock);
883 filepage = swappage;
884 swap_free(swap);
885 } else {
886 shmem_swp_unmap(entry);
887 spin_unlock(&info->lock);
888 unlock_page(swappage);
889 page_cache_release(swappage);
890 if (error == -ENOMEM) {
891 /* let kswapd refresh zone for GFP_ATOMICs */
892 blk_congestion_wait(WRITE, HZ/50);
894 goto repeat;
896 } else if (sgp == SGP_READ && !filepage) {
897 shmem_swp_unmap(entry);
898 filepage = find_get_page(mapping, idx);
899 if (filepage &&
900 (!PageUptodate(filepage) || TestSetPageLocked(filepage))) {
901 spin_unlock(&info->lock);
902 wait_on_page_locked(filepage);
903 page_cache_release(filepage);
904 filepage = NULL;
905 goto repeat;
907 spin_unlock(&info->lock);
908 } else {
909 shmem_swp_unmap(entry);
910 sbinfo = SHMEM_SB(inode->i_sb);
911 spin_lock(&sbinfo->stat_lock);
912 if (sbinfo->free_blocks == 0) {
913 spin_unlock(&sbinfo->stat_lock);
914 spin_unlock(&info->lock);
915 error = -ENOSPC;
916 goto failed;
918 sbinfo->free_blocks--;
919 inode->i_blocks += BLOCKS_PER_PAGE;
920 spin_unlock(&sbinfo->stat_lock);
922 if (!filepage) {
923 spin_unlock(&info->lock);
924 filepage = page_cache_alloc(mapping);
925 if (!filepage) {
926 shmem_free_block(inode);
927 error = -ENOMEM;
928 goto failed;
931 spin_lock(&info->lock);
932 entry = shmem_swp_alloc(info, idx, sgp);
933 if (IS_ERR(entry))
934 error = PTR_ERR(entry);
935 else {
936 swap = *entry;
937 shmem_swp_unmap(entry);
939 if (error || swap.val || 0 != add_to_page_cache_lru(
940 filepage, mapping, idx, GFP_ATOMIC)) {
941 spin_unlock(&info->lock);
942 page_cache_release(filepage);
943 shmem_free_block(inode);
944 filepage = NULL;
945 if (error)
946 goto failed;
947 goto repeat;
949 info->flags |= SHMEM_PAGEIN;
952 info->alloced++;
953 spin_unlock(&info->lock);
954 clear_highpage(filepage);
955 flush_dcache_page(filepage);
956 SetPageUptodate(filepage);
958 done:
959 if (!*pagep) {
960 if (filepage) {
961 unlock_page(filepage);
962 *pagep = filepage;
963 } else
964 *pagep = ZERO_PAGE(0);
966 if (type)
967 *type = majmin;
968 return 0;
970 failed:
971 if (*pagep != filepage) {
972 unlock_page(filepage);
973 page_cache_release(filepage);
975 return error;
978 struct page *shmem_nopage(struct vm_area_struct *vma, unsigned long address, int *type)
980 struct inode *inode = vma->vm_file->f_dentry->d_inode;
981 struct page *page = NULL;
982 unsigned long idx;
983 int error;
985 idx = (address - vma->vm_start) >> PAGE_SHIFT;
986 idx += vma->vm_pgoff;
987 idx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT;
989 error = shmem_getpage(inode, idx, &page, SGP_CACHE, type);
990 if (error)
991 return (error == -ENOMEM)? NOPAGE_OOM: NOPAGE_SIGBUS;
993 mark_page_accessed(page);
994 return page;
997 static int shmem_populate(struct vm_area_struct *vma,
998 unsigned long addr, unsigned long len,
999 pgprot_t prot, unsigned long pgoff, int nonblock)
1001 struct inode *inode = vma->vm_file->f_dentry->d_inode;
1002 struct mm_struct *mm = vma->vm_mm;
1003 enum sgp_type sgp = nonblock? SGP_QUICK: SGP_CACHE;
1004 unsigned long size;
1006 size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
1007 if (pgoff >= size || pgoff + (len >> PAGE_SHIFT) > size)
1008 return -EINVAL;
1010 while ((long) len > 0) {
1011 struct page *page = NULL;
1012 int err;
1014 * Will need changing if PAGE_CACHE_SIZE != PAGE_SIZE
1016 err = shmem_getpage(inode, pgoff, &page, sgp, NULL);
1017 if (err)
1018 return err;
1019 if (page) {
1020 mark_page_accessed(page);
1021 err = install_page(mm, vma, addr, page, prot);
1022 if (err) {
1023 page_cache_release(page);
1024 return err;
1026 } else if (nonblock) {
1028 * If a nonlinear mapping then store the file page
1029 * offset in the pte.
1031 unsigned long pgidx;
1032 pgidx = (addr - vma->vm_start) >> PAGE_SHIFT;
1033 pgidx += vma->vm_pgoff;
1034 pgidx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT;
1035 if (pgoff != pgidx) {
1036 err = install_file_pte(mm, vma, addr, pgoff, prot);
1037 if (err)
1038 return err;
1042 len -= PAGE_SIZE;
1043 addr += PAGE_SIZE;
1044 pgoff++;
1046 return 0;
1049 void shmem_lock(struct file *file, int lock)
1051 struct inode *inode = file->f_dentry->d_inode;
1052 struct shmem_inode_info *info = SHMEM_I(inode);
1054 spin_lock(&info->lock);
1055 if (lock)
1056 info->flags |= VM_LOCKED;
1057 else
1058 info->flags &= ~VM_LOCKED;
1059 spin_unlock(&info->lock);
1062 static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
1064 struct vm_operations_struct *ops;
1065 struct inode *inode = file->f_dentry->d_inode;
1067 ops = &shmem_vm_ops;
1068 if (!S_ISREG(inode->i_mode))
1069 return -EACCES;
1070 update_atime(inode);
1071 vma->vm_ops = ops;
1072 return 0;
1075 static struct inode *
1076 shmem_get_inode(struct super_block *sb, int mode, dev_t dev)
1078 struct inode *inode;
1079 struct shmem_inode_info *info;
1080 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
1082 spin_lock(&sbinfo->stat_lock);
1083 if (!sbinfo->free_inodes) {
1084 spin_unlock(&sbinfo->stat_lock);
1085 return NULL;
1087 sbinfo->free_inodes--;
1088 spin_unlock(&sbinfo->stat_lock);
1090 inode = new_inode(sb);
1091 if (inode) {
1092 inode->i_mode = mode;
1093 inode->i_uid = current->fsuid;
1094 inode->i_gid = current->fsgid;
1095 inode->i_blksize = PAGE_CACHE_SIZE;
1096 inode->i_blocks = 0;
1097 inode->i_mapping->a_ops = &shmem_aops;
1098 inode->i_mapping->backing_dev_info = &shmem_backing_dev_info;
1099 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
1100 info = SHMEM_I(inode);
1101 memset(info, 0, (char *)inode - (char *)info);
1102 spin_lock_init(&info->lock);
1103 info->flags = VM_ACCOUNT;
1104 switch (mode & S_IFMT) {
1105 default:
1106 init_special_inode(inode, mode, dev);
1107 break;
1108 case S_IFREG:
1109 inode->i_op = &shmem_inode_operations;
1110 inode->i_fop = &shmem_file_operations;
1111 spin_lock(&shmem_ilock);
1112 list_add_tail(&info->list, &shmem_inodes);
1113 spin_unlock(&shmem_ilock);
1114 break;
1115 case S_IFDIR:
1116 inode->i_nlink++;
1117 /* Some things misbehave if size == 0 on a directory */
1118 inode->i_size = 2 * BOGO_DIRENT_SIZE;
1119 inode->i_op = &shmem_dir_inode_operations;
1120 inode->i_fop = &simple_dir_operations;
1121 break;
1122 case S_IFLNK:
1123 break;
1126 return inode;
1129 static int shmem_set_size(struct shmem_sb_info *info,
1130 unsigned long max_blocks, unsigned long max_inodes)
1132 int error;
1133 unsigned long blocks, inodes;
1135 spin_lock(&info->stat_lock);
1136 blocks = info->max_blocks - info->free_blocks;
1137 inodes = info->max_inodes - info->free_inodes;
1138 error = -EINVAL;
1139 if (max_blocks < blocks)
1140 goto out;
1141 if (max_inodes < inodes)
1142 goto out;
1143 error = 0;
1144 info->max_blocks = max_blocks;
1145 info->free_blocks = max_blocks - blocks;
1146 info->max_inodes = max_inodes;
1147 info->free_inodes = max_inodes - inodes;
1148 out:
1149 spin_unlock(&info->stat_lock);
1150 return error;
1153 #ifdef CONFIG_TMPFS
1155 static struct inode_operations shmem_symlink_inode_operations;
1156 static struct inode_operations shmem_symlink_inline_operations;
1159 * Normally tmpfs makes no use of shmem_prepare_write, but it
1160 * lets a tmpfs file be used read-write below the loop driver.
1162 static int
1163 shmem_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to)
1165 struct inode *inode = page->mapping->host;
1166 return shmem_getpage(inode, page->index, &page, SGP_WRITE, NULL);
1169 static ssize_t
1170 shmem_file_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
1172 struct inode *inode = file->f_dentry->d_inode;
1173 loff_t pos;
1174 unsigned long written;
1175 int err;
1176 loff_t maxpos;
1178 if ((ssize_t) count < 0)
1179 return -EINVAL;
1181 if (!access_ok(VERIFY_READ, buf, count))
1182 return -EFAULT;
1184 down(&inode->i_sem);
1186 pos = *ppos;
1187 written = 0;
1189 err = generic_write_checks(file, &pos, &count, 0);
1190 if (err || !count)
1191 goto out;
1193 maxpos = inode->i_size;
1194 if (maxpos < pos + count) {
1195 maxpos = pos + count;
1196 if (security_vm_enough_memory(VM_ACCT(maxpos) - VM_ACCT(inode->i_size))) {
1197 err = -ENOMEM;
1198 goto out;
1202 remove_suid(file->f_dentry);
1203 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
1205 do {
1206 struct page *page = NULL;
1207 unsigned long bytes, index, offset;
1208 char *kaddr;
1209 int left;
1211 offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
1212 index = pos >> PAGE_CACHE_SHIFT;
1213 bytes = PAGE_CACHE_SIZE - offset;
1214 if (bytes > count)
1215 bytes = count;
1218 * We don't hold page lock across copy from user -
1219 * what would it guard against? - so no deadlock here.
1220 * But it still may be a good idea to prefault below.
1223 err = shmem_getpage(inode, index, &page, SGP_WRITE, NULL);
1224 if (err)
1225 break;
1227 left = bytes;
1228 if (PageHighMem(page)) {
1229 volatile unsigned char dummy;
1230 __get_user(dummy, buf);
1231 __get_user(dummy, buf + bytes - 1);
1233 kaddr = kmap_atomic(page, KM_USER0);
1234 left = __copy_from_user(kaddr + offset, buf, bytes);
1235 kunmap_atomic(kaddr, KM_USER0);
1237 if (left) {
1238 kaddr = kmap(page);
1239 left = __copy_from_user(kaddr + offset, buf, bytes);
1240 kunmap(page);
1243 written += bytes;
1244 count -= bytes;
1245 pos += bytes;
1246 buf += bytes;
1247 if (pos > inode->i_size)
1248 i_size_write(inode, pos);
1250 flush_dcache_page(page);
1251 set_page_dirty(page);
1252 mark_page_accessed(page);
1253 page_cache_release(page);
1255 if (left) {
1256 pos -= left;
1257 written -= left;
1258 err = -EFAULT;
1259 break;
1263 * Our dirty pages are not counted in nr_dirty,
1264 * and we do not attempt to balance dirty pages.
1267 cond_resched();
1268 } while (count);
1270 *ppos = pos;
1271 if (written)
1272 err = written;
1274 /* Short writes give back address space */
1275 if (inode->i_size != maxpos)
1276 vm_unacct_memory(VM_ACCT(maxpos) - VM_ACCT(inode->i_size));
1277 out:
1278 up(&inode->i_sem);
1279 return err;
1282 static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc, read_actor_t actor)
1284 struct inode *inode = filp->f_dentry->d_inode;
1285 struct address_space *mapping = inode->i_mapping;
1286 unsigned long index, offset;
1288 index = *ppos >> PAGE_CACHE_SHIFT;
1289 offset = *ppos & ~PAGE_CACHE_MASK;
1291 for (;;) {
1292 struct page *page = NULL;
1293 unsigned long end_index, nr, ret;
1294 loff_t i_size = i_size_read(inode);
1296 end_index = i_size >> PAGE_CACHE_SHIFT;
1297 if (index > end_index)
1298 break;
1299 if (index == end_index) {
1300 nr = i_size & ~PAGE_CACHE_MASK;
1301 if (nr <= offset)
1302 break;
1305 desc->error = shmem_getpage(inode, index, &page, SGP_READ, NULL);
1306 if (desc->error) {
1307 if (desc->error == -EINVAL)
1308 desc->error = 0;
1309 break;
1313 * We must evaluate after, since reads (unlike writes)
1314 * are called without i_sem protection against truncate
1316 nr = PAGE_CACHE_SIZE;
1317 i_size = i_size_read(inode);
1318 end_index = i_size >> PAGE_CACHE_SHIFT;
1319 if (index == end_index) {
1320 nr = i_size & ~PAGE_CACHE_MASK;
1321 if (nr <= offset) {
1322 page_cache_release(page);
1323 break;
1326 nr -= offset;
1328 if (page != ZERO_PAGE(0)) {
1330 * If users can be writing to this page using arbitrary
1331 * virtual addresses, take care about potential aliasing
1332 * before reading the page on the kernel side.
1334 if (!list_empty(&mapping->i_mmap_shared))
1335 flush_dcache_page(page);
1337 * Mark the page accessed if we read the beginning.
1339 if (!offset)
1340 mark_page_accessed(page);
1344 * Ok, we have the page, and it's up-to-date, so
1345 * now we can copy it to user space...
1347 * The actor routine returns how many bytes were actually used..
1348 * NOTE! This may not be the same as how much of a user buffer
1349 * we filled up (we may be padding etc), so we can only update
1350 * "pos" here (the actor routine has to update the user buffer
1351 * pointers and the remaining count).
1353 ret = actor(desc, page, offset, nr);
1354 offset += ret;
1355 index += offset >> PAGE_CACHE_SHIFT;
1356 offset &= ~PAGE_CACHE_MASK;
1358 page_cache_release(page);
1359 if (ret != nr || !desc->count)
1360 break;
1362 cond_resched();
1365 *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
1366 update_atime(inode);
1369 static ssize_t shmem_file_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
1371 read_descriptor_t desc;
1373 if ((ssize_t) count < 0)
1374 return -EINVAL;
1375 if (!access_ok(VERIFY_WRITE, buf, count))
1376 return -EFAULT;
1377 if (!count)
1378 return 0;
1380 desc.written = 0;
1381 desc.count = count;
1382 desc.buf = buf;
1383 desc.error = 0;
1385 do_shmem_file_read(filp, ppos, &desc, file_read_actor);
1386 if (desc.written)
1387 return desc.written;
1388 return desc.error;
1391 static ssize_t shmem_file_sendfile(struct file *in_file, loff_t *ppos,
1392 size_t count, read_actor_t actor, void __user *target)
1394 read_descriptor_t desc;
1396 if (!count)
1397 return 0;
1399 desc.written = 0;
1400 desc.count = count;
1401 desc.buf = target;
1402 desc.error = 0;
1404 do_shmem_file_read(in_file, ppos, &desc, actor);
1405 if (desc.written)
1406 return desc.written;
1407 return desc.error;
1410 static int shmem_statfs(struct super_block *sb, struct kstatfs *buf)
1412 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
1414 buf->f_type = TMPFS_MAGIC;
1415 buf->f_bsize = PAGE_CACHE_SIZE;
1416 spin_lock(&sbinfo->stat_lock);
1417 buf->f_blocks = sbinfo->max_blocks;
1418 buf->f_bavail = buf->f_bfree = sbinfo->free_blocks;
1419 buf->f_files = sbinfo->max_inodes;
1420 buf->f_ffree = sbinfo->free_inodes;
1421 spin_unlock(&sbinfo->stat_lock);
1422 buf->f_namelen = NAME_MAX;
1423 return 0;
1427 * File creation. Allocate an inode, and we're done..
1429 static int
1430 shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
1432 struct inode *inode = shmem_get_inode(dir->i_sb, mode, dev);
1433 int error = -ENOSPC;
1435 if (inode) {
1436 if (dir->i_mode & S_ISGID) {
1437 inode->i_gid = dir->i_gid;
1438 if (S_ISDIR(mode))
1439 inode->i_mode |= S_ISGID;
1441 dir->i_size += BOGO_DIRENT_SIZE;
1442 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1443 d_instantiate(dentry, inode);
1444 dget(dentry); /* Extra count - pin the dentry in core */
1445 error = 0;
1447 return error;
1450 static int shmem_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1452 int error;
1454 if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0)))
1455 return error;
1456 dir->i_nlink++;
1457 return 0;
1460 static int shmem_create(struct inode *dir, struct dentry *dentry, int mode,
1461 struct nameidata *nd)
1463 return shmem_mknod(dir, dentry, mode | S_IFREG, 0);
1467 * Link a file..
1469 static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
1471 struct inode *inode = old_dentry->d_inode;
1473 dir->i_size += BOGO_DIRENT_SIZE;
1474 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1475 inode->i_nlink++;
1476 atomic_inc(&inode->i_count); /* New dentry reference */
1477 dget(dentry); /* Extra pinning count for the created dentry */
1478 d_instantiate(dentry, inode);
1479 return 0;
1482 static int shmem_unlink(struct inode *dir, struct dentry *dentry)
1484 struct inode *inode = dentry->d_inode;
1486 dir->i_size -= BOGO_DIRENT_SIZE;
1487 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1488 inode->i_nlink--;
1489 dput(dentry); /* Undo the count from "create" - this does all the work */
1490 return 0;
1493 static int shmem_rmdir(struct inode *dir, struct dentry *dentry)
1495 if (!simple_empty(dentry))
1496 return -ENOTEMPTY;
1498 dir->i_nlink--;
1499 return shmem_unlink(dir, dentry);
1503 * The VFS layer already does all the dentry stuff for rename,
1504 * we just have to decrement the usage count for the target if
1505 * it exists so that the VFS layer correctly free's it when it
1506 * gets overwritten.
1508 static int shmem_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry)
1510 struct inode *inode = old_dentry->d_inode;
1511 int they_are_dirs = S_ISDIR(inode->i_mode);
1513 if (!simple_empty(new_dentry))
1514 return -ENOTEMPTY;
1516 if (new_dentry->d_inode) {
1517 (void) shmem_unlink(new_dir, new_dentry);
1518 if (they_are_dirs)
1519 old_dir->i_nlink--;
1520 } else if (they_are_dirs) {
1521 old_dir->i_nlink--;
1522 new_dir->i_nlink++;
1525 old_dir->i_size -= BOGO_DIRENT_SIZE;
1526 new_dir->i_size += BOGO_DIRENT_SIZE;
1527 old_dir->i_ctime = old_dir->i_mtime =
1528 new_dir->i_ctime = new_dir->i_mtime =
1529 inode->i_ctime = CURRENT_TIME;
1530 return 0;
1533 static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
1535 int error;
1536 int len;
1537 struct inode *inode;
1538 struct page *page = NULL;
1539 char *kaddr;
1540 struct shmem_inode_info *info;
1542 len = strlen(symname) + 1;
1543 if (len > PAGE_CACHE_SIZE)
1544 return -ENAMETOOLONG;
1546 inode = shmem_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0);
1547 if (!inode)
1548 return -ENOSPC;
1550 info = SHMEM_I(inode);
1551 inode->i_size = len-1;
1552 if (len <= (char *)inode - (char *)info) {
1553 /* do it inline */
1554 memcpy(info, symname, len);
1555 inode->i_op = &shmem_symlink_inline_operations;
1556 } else {
1557 if (security_vm_enough_memory(VM_ACCT(1))) {
1558 iput(inode);
1559 return -ENOMEM;
1561 error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL);
1562 if (error) {
1563 vm_unacct_memory(VM_ACCT(1));
1564 iput(inode);
1565 return error;
1567 inode->i_op = &shmem_symlink_inode_operations;
1568 spin_lock(&shmem_ilock);
1569 list_add_tail(&info->list, &shmem_inodes);
1570 spin_unlock(&shmem_ilock);
1571 kaddr = kmap_atomic(page, KM_USER0);
1572 memcpy(kaddr, symname, len);
1573 kunmap_atomic(kaddr, KM_USER0);
1574 set_page_dirty(page);
1575 page_cache_release(page);
1577 if (dir->i_mode & S_ISGID)
1578 inode->i_gid = dir->i_gid;
1579 dir->i_size += BOGO_DIRENT_SIZE;
1580 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1581 d_instantiate(dentry, inode);
1582 dget(dentry);
1583 return 0;
1586 static int shmem_readlink_inline(struct dentry *dentry, char __user *buffer, int buflen)
1588 return vfs_readlink(dentry, buffer, buflen, (const char *)SHMEM_I(dentry->d_inode));
1591 static int shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd)
1593 return vfs_follow_link(nd, (const char *)SHMEM_I(dentry->d_inode));
1596 static int shmem_readlink(struct dentry *dentry, char __user *buffer, int buflen)
1598 struct page *page = NULL;
1599 int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL);
1600 if (res)
1601 return res;
1602 res = vfs_readlink(dentry, buffer, buflen, kmap(page));
1603 kunmap(page);
1604 mark_page_accessed(page);
1605 page_cache_release(page);
1606 return res;
1609 static int shmem_follow_link(struct dentry *dentry, struct nameidata *nd)
1611 struct page *page = NULL;
1612 int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL);
1613 if (res)
1614 return res;
1615 res = vfs_follow_link(nd, kmap(page));
1616 kunmap(page);
1617 mark_page_accessed(page);
1618 page_cache_release(page);
1619 return res;
1622 static struct inode_operations shmem_symlink_inline_operations = {
1623 .readlink = shmem_readlink_inline,
1624 .follow_link = shmem_follow_link_inline,
1627 static struct inode_operations shmem_symlink_inode_operations = {
1628 .truncate = shmem_truncate,
1629 .readlink = shmem_readlink,
1630 .follow_link = shmem_follow_link,
1633 static int shmem_parse_options(char *options, int *mode, uid_t *uid, gid_t *gid, unsigned long *blocks, unsigned long *inodes)
1635 char *this_char, *value, *rest;
1637 while ((this_char = strsep(&options, ",")) != NULL) {
1638 if (!*this_char)
1639 continue;
1640 if ((value = strchr(this_char,'=')) != NULL) {
1641 *value++ = 0;
1642 } else {
1643 printk(KERN_ERR
1644 "tmpfs: No value for mount option '%s'\n",
1645 this_char);
1646 return 1;
1649 if (!strcmp(this_char,"size")) {
1650 unsigned long long size;
1651 size = memparse(value,&rest);
1652 if (*rest == '%') {
1653 size <<= PAGE_SHIFT;
1654 size *= totalram_pages;
1655 do_div(size, 100);
1656 rest++;
1658 if (*rest)
1659 goto bad_val;
1660 *blocks = size >> PAGE_CACHE_SHIFT;
1661 } else if (!strcmp(this_char,"nr_blocks")) {
1662 *blocks = memparse(value,&rest);
1663 if (*rest)
1664 goto bad_val;
1665 } else if (!strcmp(this_char,"nr_inodes")) {
1666 *inodes = memparse(value,&rest);
1667 if (*rest)
1668 goto bad_val;
1669 } else if (!strcmp(this_char,"mode")) {
1670 if (!mode)
1671 continue;
1672 *mode = simple_strtoul(value,&rest,8);
1673 if (*rest)
1674 goto bad_val;
1675 } else if (!strcmp(this_char,"uid")) {
1676 if (!uid)
1677 continue;
1678 *uid = simple_strtoul(value,&rest,0);
1679 if (*rest)
1680 goto bad_val;
1681 } else if (!strcmp(this_char,"gid")) {
1682 if (!gid)
1683 continue;
1684 *gid = simple_strtoul(value,&rest,0);
1685 if (*rest)
1686 goto bad_val;
1687 } else {
1688 printk(KERN_ERR "tmpfs: Bad mount option %s\n",
1689 this_char);
1690 return 1;
1693 return 0;
1695 bad_val:
1696 printk(KERN_ERR "tmpfs: Bad value '%s' for mount option '%s'\n",
1697 value, this_char);
1698 return 1;
1702 static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
1704 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
1705 unsigned long max_blocks = sbinfo->max_blocks;
1706 unsigned long max_inodes = sbinfo->max_inodes;
1708 if (shmem_parse_options(data, NULL, NULL, NULL, &max_blocks, &max_inodes))
1709 return -EINVAL;
1710 return shmem_set_size(sbinfo, max_blocks, max_inodes);
1712 #endif
1714 static int shmem_fill_super(struct super_block *sb,
1715 void *data, int silent)
1717 struct inode *inode;
1718 struct dentry *root;
1719 unsigned long blocks, inodes;
1720 int mode = S_IRWXUGO | S_ISVTX;
1721 uid_t uid = current->fsuid;
1722 gid_t gid = current->fsgid;
1723 struct shmem_sb_info *sbinfo;
1724 int err = -ENOMEM;
1726 sbinfo = kmalloc(sizeof(struct shmem_sb_info), GFP_KERNEL);
1727 if (!sbinfo)
1728 return -ENOMEM;
1729 sb->s_fs_info = sbinfo;
1730 memset(sbinfo, 0, sizeof(struct shmem_sb_info));
1733 * Per default we only allow half of the physical ram per
1734 * tmpfs instance
1736 blocks = inodes = totalram_pages / 2;
1738 #ifdef CONFIG_TMPFS
1739 if (shmem_parse_options(data, &mode, &uid, &gid, &blocks, &inodes)) {
1740 err = -EINVAL;
1741 goto failed;
1743 #else
1744 sb->s_flags |= MS_NOUSER;
1745 #endif
1747 spin_lock_init(&sbinfo->stat_lock);
1748 sbinfo->max_blocks = blocks;
1749 sbinfo->free_blocks = blocks;
1750 sbinfo->max_inodes = inodes;
1751 sbinfo->free_inodes = inodes;
1752 sb->s_maxbytes = SHMEM_MAX_BYTES;
1753 sb->s_blocksize = PAGE_CACHE_SIZE;
1754 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
1755 sb->s_magic = TMPFS_MAGIC;
1756 sb->s_op = &shmem_ops;
1757 inode = shmem_get_inode(sb, S_IFDIR | mode, 0);
1758 if (!inode)
1759 goto failed;
1760 inode->i_uid = uid;
1761 inode->i_gid = gid;
1762 root = d_alloc_root(inode);
1763 if (!root)
1764 goto failed_iput;
1765 sb->s_root = root;
1766 return 0;
1768 failed_iput:
1769 iput(inode);
1770 failed:
1771 kfree(sbinfo);
1772 sb->s_fs_info = NULL;
1773 return err;
1776 static void shmem_put_super(struct super_block *sb)
1778 kfree(sb->s_fs_info);
1779 sb->s_fs_info = NULL;
1782 static kmem_cache_t *shmem_inode_cachep;
1784 static struct inode *shmem_alloc_inode(struct super_block *sb)
1786 struct shmem_inode_info *p;
1787 p = (struct shmem_inode_info *)kmem_cache_alloc(shmem_inode_cachep, SLAB_KERNEL);
1788 if (!p)
1789 return NULL;
1790 return &p->vfs_inode;
1793 static void shmem_destroy_inode(struct inode *inode)
1795 kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
1798 static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
1800 struct shmem_inode_info *p = (struct shmem_inode_info *) foo;
1802 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
1803 SLAB_CTOR_CONSTRUCTOR) {
1804 inode_init_once(&p->vfs_inode);
1808 static int init_inodecache(void)
1810 shmem_inode_cachep = kmem_cache_create("shmem_inode_cache",
1811 sizeof(struct shmem_inode_info),
1812 0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT,
1813 init_once, NULL);
1814 if (shmem_inode_cachep == NULL)
1815 return -ENOMEM;
1816 return 0;
1819 static void destroy_inodecache(void)
1821 if (kmem_cache_destroy(shmem_inode_cachep))
1822 printk(KERN_INFO "shmem_inode_cache: not all structures were freed\n");
1825 static struct address_space_operations shmem_aops = {
1826 .writepage = shmem_writepage,
1827 .set_page_dirty = __set_page_dirty_nobuffers,
1828 #ifdef CONFIG_TMPFS
1829 .prepare_write = shmem_prepare_write,
1830 .commit_write = simple_commit_write,
1831 #endif
1834 static struct file_operations shmem_file_operations = {
1835 .mmap = shmem_mmap,
1836 #ifdef CONFIG_TMPFS
1837 .llseek = generic_file_llseek,
1838 .read = shmem_file_read,
1839 .write = shmem_file_write,
1840 .fsync = simple_sync_file,
1841 .sendfile = shmem_file_sendfile,
1842 #endif
1845 static struct inode_operations shmem_inode_operations = {
1846 .truncate = shmem_truncate,
1847 .setattr = shmem_notify_change,
1850 static struct inode_operations shmem_dir_inode_operations = {
1851 #ifdef CONFIG_TMPFS
1852 .create = shmem_create,
1853 .lookup = simple_lookup,
1854 .link = shmem_link,
1855 .unlink = shmem_unlink,
1856 .symlink = shmem_symlink,
1857 .mkdir = shmem_mkdir,
1858 .rmdir = shmem_rmdir,
1859 .mknod = shmem_mknod,
1860 .rename = shmem_rename,
1861 #endif
1864 static struct super_operations shmem_ops = {
1865 .alloc_inode = shmem_alloc_inode,
1866 .destroy_inode = shmem_destroy_inode,
1867 #ifdef CONFIG_TMPFS
1868 .statfs = shmem_statfs,
1869 .remount_fs = shmem_remount_fs,
1870 #endif
1871 .delete_inode = shmem_delete_inode,
1872 .drop_inode = generic_delete_inode,
1873 .put_super = shmem_put_super,
1876 static struct vm_operations_struct shmem_vm_ops = {
1877 .nopage = shmem_nopage,
1878 .populate = shmem_populate,
1881 static struct super_block *shmem_get_sb(struct file_system_type *fs_type,
1882 int flags, const char *dev_name, void *data)
1884 return get_sb_nodev(fs_type, flags, data, shmem_fill_super);
1887 static struct file_system_type tmpfs_fs_type = {
1888 .owner = THIS_MODULE,
1889 .name = "tmpfs",
1890 .get_sb = shmem_get_sb,
1891 .kill_sb = kill_litter_super,
1893 static struct vfsmount *shm_mnt;
1895 static int __init init_tmpfs(void)
1897 int error;
1899 error = init_inodecache();
1900 if (error)
1901 goto out3;
1903 error = register_filesystem(&tmpfs_fs_type);
1904 if (error) {
1905 printk(KERN_ERR "Could not register tmpfs\n");
1906 goto out2;
1908 #ifdef CONFIG_TMPFS
1909 devfs_mk_dir("shm");
1910 #endif
1911 shm_mnt = kern_mount(&tmpfs_fs_type);
1912 if (IS_ERR(shm_mnt)) {
1913 error = PTR_ERR(shm_mnt);
1914 printk(KERN_ERR "Could not kern_mount tmpfs\n");
1915 goto out1;
1918 /* The internal instance should not do size checking */
1919 shmem_set_size(SHMEM_SB(shm_mnt->mnt_sb), ULONG_MAX, ULONG_MAX);
1920 return 0;
1922 out1:
1923 unregister_filesystem(&tmpfs_fs_type);
1924 out2:
1925 destroy_inodecache();
1926 out3:
1927 shm_mnt = ERR_PTR(error);
1928 return error;
1930 module_init(init_tmpfs)
1933 * shmem_file_setup - get an unlinked file living in tmpfs
1935 * @name: name for dentry (to be seen in /proc/<pid>/maps
1936 * @size: size to be set for the file
1939 struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags)
1941 int error;
1942 struct file *file;
1943 struct inode *inode;
1944 struct dentry *dentry, *root;
1945 struct qstr this;
1947 if (IS_ERR(shm_mnt))
1948 return (void *)shm_mnt;
1950 if (size > SHMEM_MAX_BYTES)
1951 return ERR_PTR(-EINVAL);
1953 if ((flags & VM_ACCOUNT) && security_vm_enough_memory(VM_ACCT(size)))
1954 return ERR_PTR(-ENOMEM);
1956 error = -ENOMEM;
1957 this.name = name;
1958 this.len = strlen(name);
1959 this.hash = 0; /* will go */
1960 root = shm_mnt->mnt_root;
1961 dentry = d_alloc(root, &this);
1962 if (!dentry)
1963 goto put_memory;
1965 error = -ENFILE;
1966 file = get_empty_filp();
1967 if (!file)
1968 goto put_dentry;
1970 error = -ENOSPC;
1971 inode = shmem_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0);
1972 if (!inode)
1973 goto close_file;
1975 SHMEM_I(inode)->flags &= flags;
1976 d_instantiate(dentry, inode);
1977 inode->i_size = size;
1978 inode->i_nlink = 0; /* It is unlinked */
1979 file->f_vfsmnt = mntget(shm_mnt);
1980 file->f_dentry = dentry;
1981 file->f_mapping = inode->i_mapping;
1982 file->f_op = &shmem_file_operations;
1983 file->f_mode = FMODE_WRITE | FMODE_READ;
1984 return(file);
1986 close_file:
1987 put_filp(file);
1988 put_dentry:
1989 dput(dentry);
1990 put_memory:
1991 if (flags & VM_ACCOUNT)
1992 vm_unacct_memory(VM_ACCT(size));
1993 return ERR_PTR(error);
1997 * shmem_zero_setup - setup a shared anonymous mapping
1999 * @vma: the vma to be mmapped is prepared by do_mmap_pgoff
2001 int shmem_zero_setup(struct vm_area_struct *vma)
2003 struct file *file;
2004 loff_t size = vma->vm_end - vma->vm_start;
2006 file = shmem_file_setup("dev/zero", size, vma->vm_flags);
2007 if (IS_ERR(file))
2008 return PTR_ERR(file);
2010 if (vma->vm_file)
2011 fput(vma->vm_file);
2012 vma->vm_file = file;
2013 vma->vm_ops = &shmem_vm_ops;
2014 return 0;
2017 EXPORT_SYMBOL(shmem_file_setup);