4 * Copyright (C) 1994-1999 Linus Torvalds
8 * The msync() system call.
10 #include <linux/slab.h>
11 #include <linux/pagemap.h>
13 #include <linux/mman.h>
15 #include <asm/pgtable.h>
16 #include <asm/pgalloc.h>
17 #include <asm/tlbflush.h>
20 * Called with mm->page_table_lock held to protect against other
21 * threads/the swapper from ripping pte's out from under us.
23 static int filemap_sync_pte(pte_t
*ptep
, struct vm_area_struct
*vma
,
24 unsigned long address
, unsigned int flags
)
28 if (pte_present(pte
) && pte_dirty(pte
)) {
30 unsigned long pfn
= pte_pfn(pte
);
32 page
= pfn_to_page(pfn
);
33 if (!PageReserved(page
) && ptep_test_and_clear_dirty(ptep
)) {
34 flush_tlb_page(vma
, address
);
42 static int filemap_sync_pte_range(pmd_t
* pmd
,
43 unsigned long address
, unsigned long end
,
44 struct vm_area_struct
*vma
, unsigned int flags
)
56 pte
= pte_offset_map(pmd
, address
);
57 if ((address
& PMD_MASK
) != (end
& PMD_MASK
))
58 end
= (address
& PMD_MASK
) + PMD_SIZE
;
61 error
|= filemap_sync_pte(pte
, vma
, address
, flags
);
64 } while (address
&& (address
< end
));
71 static inline int filemap_sync_pmd_range(pgd_t
* pgd
,
72 unsigned long address
, unsigned long end
,
73 struct vm_area_struct
*vma
, unsigned int flags
)
85 pmd
= pmd_offset(pgd
, address
);
86 if ((address
& PGDIR_MASK
) != (end
& PGDIR_MASK
))
87 end
= (address
& PGDIR_MASK
) + PGDIR_SIZE
;
90 error
|= filemap_sync_pte_range(pmd
, address
, end
, vma
, flags
);
91 address
= (address
+ PMD_SIZE
) & PMD_MASK
;
93 } while (address
&& (address
< end
));
97 static int filemap_sync(struct vm_area_struct
* vma
, unsigned long address
,
98 size_t size
, unsigned int flags
)
101 unsigned long end
= address
+ size
;
104 /* Aquire the lock early; it may be possible to avoid dropping
105 * and reaquiring it repeatedly.
107 spin_lock(&vma
->vm_mm
->page_table_lock
);
109 dir
= pgd_offset(vma
->vm_mm
, address
);
110 flush_cache_range(vma
, address
, end
);
114 error
|= filemap_sync_pmd_range(dir
, address
, end
, vma
, flags
);
115 address
= (address
+ PGDIR_SIZE
) & PGDIR_MASK
;
117 } while (address
&& (address
< end
));
118 flush_tlb_range(vma
, end
- size
, end
);
120 spin_unlock(&vma
->vm_mm
->page_table_lock
);
126 * MS_SYNC syncs the entire file - including mappings.
128 * MS_ASYNC does not start I/O (it used to, up to 2.5.67). Instead, it just
129 * marks the relevant pages dirty. The application may now run fsync() to
130 * write out the dirty pages and wait on the writeout and check the result.
131 * Or the application may run fadvise(FADV_DONTNEED) against the fd to start
132 * async writeout immediately.
133 * So my _not_ starting I/O in MS_ASYNC we provide complete flexibility to
136 static int msync_interval(struct vm_area_struct
* vma
,
137 unsigned long start
, unsigned long end
, int flags
)
140 struct file
* file
= vma
->vm_file
;
142 if ((flags
& MS_INVALIDATE
) && (vma
->vm_flags
& VM_LOCKED
))
145 if (file
&& (vma
->vm_flags
& VM_SHARED
)) {
146 ret
= filemap_sync(vma
, start
, end
-start
, flags
);
148 if (!ret
&& (flags
& MS_SYNC
)) {
149 struct inode
*inode
= file
->f_dentry
->d_inode
;
153 ret
= filemap_fdatawrite(inode
->i_mapping
);
154 if (file
->f_op
&& file
->f_op
->fsync
) {
155 err
= file
->f_op
->fsync(file
,file
->f_dentry
,1);
159 err
= filemap_fdatawait(inode
->i_mapping
);
168 asmlinkage
long sys_msync(unsigned long start
, size_t len
, int flags
)
171 struct vm_area_struct
* vma
;
172 int unmapped_error
, error
= -EINVAL
;
174 down_read(¤t
->mm
->mmap_sem
);
175 if (flags
& ~(MS_ASYNC
| MS_INVALIDATE
| MS_SYNC
))
177 if (start
& ~PAGE_MASK
)
179 if ((flags
& MS_ASYNC
) && (flags
& MS_SYNC
))
182 len
= (len
+ ~PAGE_MASK
) & PAGE_MASK
;
190 * If the interval [start,end) covers some unmapped address ranges,
191 * just ignore them, but return -ENOMEM at the end.
193 vma
= find_vma(current
->mm
, start
);
196 /* Still start < end. */
200 /* Here start < vma->vm_end. */
201 if (start
< vma
->vm_start
) {
202 unmapped_error
= -ENOMEM
;
203 start
= vma
->vm_start
;
205 /* Here vma->vm_start <= start < vma->vm_end. */
206 if (end
<= vma
->vm_end
) {
208 error
= msync_interval(vma
, start
, end
, flags
);
212 error
= unmapped_error
;
215 /* Here vma->vm_start <= start < vma->vm_end < end. */
216 error
= msync_interval(vma
, start
, vma
->vm_end
, flags
);
223 up_read(¤t
->mm
->mmap_sem
);