2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
10 #include <linux/sched.h>
11 #include <linux/slab.h>
12 #include <linux/spinlock.h>
13 #include <linux/completion.h>
14 #include <linux/buffer_head.h>
15 #include <linux/pagemap.h>
16 #include <linux/uio.h>
17 #include <linux/blkdev.h>
19 #include <linux/smp_lock.h>
20 #include <linux/gfs2_ioctl.h>
21 #include <asm/semaphore.h>
22 #include <asm/uaccess.h>
40 /* "bad" is for NFS support */
41 struct filldir_bad_entry
{
43 unsigned int fbe_length
;
45 struct gfs2_inum fbe_inum
;
46 unsigned int fbe_type
;
50 struct gfs2_sbd
*fdb_sbd
;
52 struct filldir_bad_entry
*fdb_entry
;
53 unsigned int fdb_entry_num
;
54 unsigned int fdb_entry_off
;
57 unsigned int fdb_name_size
;
58 unsigned int fdb_name_off
;
61 /* For regular, non-NFS */
63 struct gfs2_sbd
*fdr_sbd
;
66 filldir_t fdr_filldir
;
70 typedef ssize_t(*do_rw_t
) (struct file
*file
,
72 size_t size
, loff_t
*offset
,
73 unsigned int num_gh
, struct gfs2_holder
*ghs
);
76 * gfs2_llseek - seek to a location in a file
79 * @origin: Where to seek from (SEEK_SET, SEEK_CUR, or SEEK_END)
81 * SEEK_END requires the glock for the file because it references the
84 * Returns: The new offset, or errno
87 static loff_t
gfs2_llseek(struct file
*file
, loff_t offset
, int origin
)
89 struct gfs2_inode
*ip
= get_v2ip(file
->f_mapping
->host
);
90 struct gfs2_holder i_gh
;
93 atomic_inc(&ip
->i_sbd
->sd_ops_file
);
96 error
= gfs2_glock_nq_init(ip
->i_gl
, LM_ST_SHARED
, LM_FLAG_ANY
,
99 error
= remote_llseek(file
, offset
, origin
);
100 gfs2_glock_dq_uninit(&i_gh
);
103 error
= remote_llseek(file
, offset
, origin
);
108 static inline unsigned int vma2state(struct vm_area_struct
*vma
)
110 if ((vma
->vm_flags
& (VM_MAYWRITE
| VM_MAYSHARE
)) ==
111 (VM_MAYWRITE
| VM_MAYSHARE
))
112 return LM_ST_EXCLUSIVE
;
116 static ssize_t
walk_vm_hard(struct file
*file
, const char __user
*buf
, size_t size
,
117 loff_t
*offset
, do_rw_t operation
)
119 struct gfs2_holder
*ghs
;
120 unsigned int num_gh
= 0;
122 struct super_block
*sb
= file
->f_dentry
->d_inode
->i_sb
;
123 struct mm_struct
*mm
= current
->mm
;
124 struct vm_area_struct
*vma
;
125 unsigned long start
= (unsigned long)buf
;
126 unsigned long end
= start
+ size
;
127 int dumping
= (current
->flags
& PF_DUMPCORE
);
130 for (vma
= find_vma(mm
, start
); vma
; vma
= vma
->vm_next
) {
131 if (end
<= vma
->vm_start
)
134 vma
->vm_file
->f_dentry
->d_inode
->i_sb
== sb
) {
139 ghs
= kcalloc((num_gh
+ 1), sizeof(struct gfs2_holder
), GFP_KERNEL
);
142 up_read(&mm
->mmap_sem
);
146 for (vma
= find_vma(mm
, start
); vma
; vma
= vma
->vm_next
) {
147 if (end
<= vma
->vm_start
)
150 struct inode
*inode
= vma
->vm_file
->f_dentry
->d_inode
;
151 if (inode
->i_sb
== sb
)
152 gfs2_holder_init(get_v2ip(inode
)->i_gl
,
153 vma2state(vma
), 0, &ghs
[x
++]);
158 up_read(&mm
->mmap_sem
);
160 gfs2_assert(get_v2sdp(sb
), x
== num_gh
);
162 count
= operation(file
, buf
, size
, offset
, num_gh
, ghs
);
165 gfs2_holder_uninit(&ghs
[num_gh
]);
172 * walk_vm - Walk the vmas associated with a buffer for read or write.
173 * If any of them are gfs2, pass the gfs2 inode down to the read/write
174 * worker function so that locks can be acquired in the correct order.
175 * @file: The file to read/write from/to
176 * @buf: The buffer to copy to/from
177 * @size: The amount of data requested
178 * @offset: The current file offset
179 * @operation: The read or write worker function
181 * Outputs: Offset - updated according to number of bytes written
183 * Returns: The number of bytes written, errno on failure
186 static ssize_t
walk_vm(struct file
*file
, const char __user
*buf
, size_t size
,
187 loff_t
*offset
, do_rw_t operation
)
189 struct gfs2_holder gh
;
192 struct super_block
*sb
= file
->f_dentry
->d_inode
->i_sb
;
193 struct mm_struct
*mm
= current
->mm
;
194 struct vm_area_struct
*vma
;
195 unsigned long start
= (unsigned long)buf
;
196 unsigned long end
= start
+ size
;
197 int dumping
= (current
->flags
& PF_DUMPCORE
);
200 down_read(&mm
->mmap_sem
);
202 for (vma
= find_vma(mm
, start
); vma
; vma
= vma
->vm_next
) {
203 if (end
<= vma
->vm_start
)
206 vma
->vm_file
->f_dentry
->d_inode
->i_sb
== sb
)
211 up_read(&mm
->mmap_sem
);
214 return operation(file
, buf
, size
, offset
, 0, &gh
);
217 return walk_vm_hard(file
, buf
, size
, offset
, operation
);
220 static ssize_t
do_jdata_read(struct file
*file
, char __user
*buf
, size_t size
,
223 struct gfs2_inode
*ip
= get_v2ip(file
->f_mapping
->host
);
228 if (!access_ok(VERIFY_WRITE
, buf
, size
))
231 if (!(file
->f_flags
& O_LARGEFILE
)) {
232 if (*offset
>= MAX_NON_LFS
)
234 if (*offset
+ size
> MAX_NON_LFS
)
235 size
= MAX_NON_LFS
- *offset
;
238 count
= gfs2_jdata_read(ip
, buf
, *offset
, size
, gfs2_copy2user
);
247 * do_read_direct - Read bytes from a file
248 * @file: The file to read from
249 * @buf: The buffer to copy into
250 * @size: The amount of data requested
251 * @offset: The current file offset
252 * @num_gh: The number of other locks we need to do the read
253 * @ghs: the locks we need plus one for our lock
255 * Outputs: Offset - updated according to number of bytes read
257 * Returns: The number of bytes read, errno on failure
260 static ssize_t
do_read_direct(struct file
*file
, char __user
*buf
, size_t size
,
261 loff_t
*offset
, unsigned int num_gh
,
262 struct gfs2_holder
*ghs
)
264 struct inode
*inode
= file
->f_mapping
->host
;
265 struct gfs2_inode
*ip
= get_v2ip(inode
);
266 unsigned int state
= LM_ST_DEFERRED
;
272 for (x
= 0; x
< num_gh
; x
++)
273 if (ghs
[x
].gh_gl
== ip
->i_gl
) {
274 state
= LM_ST_SHARED
;
275 flags
|= GL_LOCAL_EXCL
;
279 gfs2_holder_init(ip
->i_gl
, state
, flags
, &ghs
[num_gh
]);
281 error
= gfs2_glock_nq_m(num_gh
+ 1, ghs
);
286 if (gfs2_is_jdata(ip
))
289 if (gfs2_is_stuffed(ip
)) {
290 size_t mask
= bdev_hardsect_size(inode
->i_sb
->s_bdev
) - 1;
292 if (((*offset
) & mask
) || (((unsigned long)buf
) & mask
))
295 count
= do_jdata_read(file
, buf
, size
& ~mask
, offset
);
297 count
= generic_file_read(file
, buf
, size
, offset
);
302 gfs2_glock_dq_m(num_gh
+ 1, ghs
);
305 gfs2_holder_uninit(&ghs
[num_gh
]);
307 return (count
) ? count
: error
;
311 * do_read_buf - Read bytes from a file
312 * @file: The file to read from
313 * @buf: The buffer to copy into
314 * @size: The amount of data requested
315 * @offset: The current file offset
316 * @num_gh: The number of other locks we need to do the read
317 * @ghs: the locks we need plus one for our lock
319 * Outputs: Offset - updated according to number of bytes read
321 * Returns: The number of bytes read, errno on failure
324 static ssize_t
do_read_buf(struct file
*file
, char __user
*buf
, size_t size
,
325 loff_t
*offset
, unsigned int num_gh
,
326 struct gfs2_holder
*ghs
)
328 struct gfs2_inode
*ip
= get_v2ip(file
->f_mapping
->host
);
332 gfs2_holder_init(ip
->i_gl
, LM_ST_SHARED
, GL_ATIME
, &ghs
[num_gh
]);
334 error
= gfs2_glock_nq_m_atime(num_gh
+ 1, ghs
);
338 if (gfs2_is_jdata(ip
))
339 count
= do_jdata_read(file
, buf
, size
, offset
);
341 count
= generic_file_read(file
, buf
, size
, offset
);
343 gfs2_glock_dq_m(num_gh
+ 1, ghs
);
346 gfs2_holder_uninit(&ghs
[num_gh
]);
348 return (count
) ? count
: error
;
352 * gfs2_read - Read bytes from a file
353 * @file: The file to read from
354 * @buf: The buffer to copy into
355 * @size: The amount of data requested
356 * @offset: The current file offset
358 * Outputs: Offset - updated according to number of bytes read
360 * Returns: The number of bytes read, errno on failure
363 static ssize_t
gfs2_read(struct file
*file
, char __user
*buf
, size_t size
,
366 atomic_inc(&get_v2sdp(file
->f_mapping
->host
->i_sb
)->sd_ops_file
);
368 if (file
->f_flags
& O_DIRECT
)
369 return walk_vm(file
, buf
, size
, offset
, do_read_direct
);
371 return walk_vm(file
, buf
, size
, offset
, do_read_buf
);
375 * grope_mapping - feel up a mapping that needs to be written
376 * @buf: the start of the memory to be written
377 * @size: the size of the memory to be written
379 * We do this after acquiring the locks on the mapping,
380 * but before starting the write transaction. We need to make
381 * sure that we don't cause recursive transactions if blocks
382 * need to be allocated to the file backing the mapping.
387 static int grope_mapping(const char __user
*buf
, size_t size
)
389 const char __user
*stop
= buf
+ size
;
393 if (copy_from_user(&c
, buf
, 1))
395 buf
+= PAGE_CACHE_SIZE
;
396 buf
= (const char __user
*)PAGE_ALIGN((unsigned long)buf
);
403 * do_write_direct_alloc - Write bytes to a file
404 * @file: The file to write to
405 * @buf: The buffer to copy from
406 * @size: The amount of data requested
407 * @offset: The current file offset
409 * Outputs: Offset - updated according to number of bytes written
411 * Returns: The number of bytes written, errno on failure
414 static ssize_t
do_write_direct_alloc(struct file
*file
, const char __user
*buf
, size_t size
,
417 struct inode
*inode
= file
->f_mapping
->host
;
418 struct gfs2_inode
*ip
= get_v2ip(inode
);
419 struct gfs2_sbd
*sdp
= ip
->i_sbd
;
420 struct gfs2_alloc
*al
= NULL
;
421 struct iovec local_iov
= { .iov_base
= buf
, .iov_len
= size
};
422 struct buffer_head
*dibh
;
423 unsigned int data_blocks
, ind_blocks
;
427 gfs2_write_calc_reserv(ip
, size
, &data_blocks
, &ind_blocks
);
429 al
= gfs2_alloc_get(ip
);
431 error
= gfs2_quota_lock(ip
, NO_QUOTA_CHANGE
, NO_QUOTA_CHANGE
);
435 error
= gfs2_quota_check(ip
, ip
->i_di
.di_uid
, ip
->i_di
.di_gid
);
439 al
->al_requested
= data_blocks
+ ind_blocks
;
441 error
= gfs2_inplace_reserve(ip
);
445 error
= gfs2_trans_begin(sdp
,
446 al
->al_rgd
->rd_ri
.ri_length
+ ind_blocks
+
447 RES_DINODE
+ RES_STATFS
+ RES_QUOTA
, 0);
451 if ((ip
->i_di
.di_mode
& (S_ISUID
| S_ISGID
)) && !capable(CAP_FSETID
)) {
452 error
= gfs2_meta_inode_buffer(ip
, &dibh
);
456 ip
->i_di
.di_mode
&= (ip
->i_di
.di_mode
& S_IXGRP
) ?
457 (~(S_ISUID
| S_ISGID
)) : (~S_ISUID
);
459 gfs2_trans_add_bh(ip
->i_gl
, dibh
);
460 gfs2_dinode_out(&ip
->i_di
, dibh
->b_data
);
464 if (gfs2_is_stuffed(ip
)) {
465 error
= gfs2_unstuff_dinode(ip
, gfs2_unstuffer_sync
, NULL
);
470 count
= generic_file_write_nolock(file
, &local_iov
, 1, offset
);
476 error
= gfs2_meta_inode_buffer(ip
, &dibh
);
480 if (ip
->i_di
.di_size
< inode
->i_size
)
481 ip
->i_di
.di_size
= inode
->i_size
;
482 ip
->i_di
.di_mtime
= ip
->i_di
.di_ctime
= get_seconds();
484 gfs2_trans_add_bh(ip
->i_gl
, dibh
);
485 gfs2_dinode_out(&ip
->i_di
, dibh
->b_data
);
490 if (file
->f_flags
& O_SYNC
)
491 gfs2_log_flush_glock(ip
->i_gl
);
493 gfs2_inplace_release(ip
);
494 gfs2_quota_unlock(ip
);
497 if (file
->f_mapping
->nrpages
) {
498 error
= filemap_fdatawrite(file
->f_mapping
);
500 error
= filemap_fdatawait(file
->f_mapping
);
511 gfs2_inplace_release(ip
);
514 gfs2_quota_unlock(ip
);
523 * do_write_direct - Write bytes to a file
524 * @file: The file to write to
525 * @buf: The buffer to copy from
526 * @size: The amount of data requested
527 * @offset: The current file offset
528 * @num_gh: The number of other locks we need to do the read
529 * @gh: the locks we need plus one for our lock
531 * Outputs: Offset - updated according to number of bytes written
533 * Returns: The number of bytes written, errno on failure
536 static ssize_t
do_write_direct(struct file
*file
, const char __user
*buf
, size_t size
,
537 loff_t
*offset
, unsigned int num_gh
,
538 struct gfs2_holder
*ghs
)
540 struct gfs2_inode
*ip
= get_v2ip(file
->f_mapping
->host
);
541 struct gfs2_sbd
*sdp
= ip
->i_sbd
;
542 struct gfs2_file
*fp
= get_v2fp(file
);
543 unsigned int state
= LM_ST_DEFERRED
;
550 if (test_bit(GFF_DID_DIRECT_ALLOC
, &fp
->f_flags
))
551 state
= LM_ST_EXCLUSIVE
;
553 for (x
= 0; x
< num_gh
; x
++)
554 if (ghs
[x
].gh_gl
== ip
->i_gl
) {
555 state
= LM_ST_EXCLUSIVE
;
560 gfs2_holder_init(ip
->i_gl
, state
, 0, &ghs
[num_gh
]);
562 error
= gfs2_glock_nq_m(num_gh
+ 1, ghs
);
567 if (gfs2_is_jdata(ip
))
571 error
= grope_mapping(buf
, size
);
576 if (file
->f_flags
& O_APPEND
)
577 *offset
= ip
->i_di
.di_size
;
579 if (!(file
->f_flags
& O_LARGEFILE
)) {
581 if (*offset
>= MAX_NON_LFS
)
583 if (*offset
+ size
> MAX_NON_LFS
)
584 size
= MAX_NON_LFS
- *offset
;
587 if (gfs2_is_stuffed(ip
) ||
588 *offset
+ size
> ip
->i_di
.di_size
||
589 ((ip
->i_di
.di_mode
& (S_ISUID
| S_ISGID
)) && !capable(CAP_FSETID
)))
592 error
= gfs2_write_alloc_required(ip
, *offset
, size
,
598 if (alloc_required
&& state
!= LM_ST_EXCLUSIVE
) {
599 gfs2_glock_dq_m(num_gh
+ 1, ghs
);
600 gfs2_holder_uninit(&ghs
[num_gh
]);
601 state
= LM_ST_EXCLUSIVE
;
605 if (alloc_required
) {
606 set_bit(GFF_DID_DIRECT_ALLOC
, &fp
->f_flags
);
608 /* split large writes into smaller atomic transactions */
610 s
= gfs2_tune_get(sdp
, gt_max_atomic_write
);
614 error
= do_write_direct_alloc(file
, buf
, s
, offset
);
623 struct iovec local_iov
= { .iov_base
= buf
, .iov_len
= size
};
624 struct gfs2_holder t_gh
;
626 clear_bit(GFF_DID_DIRECT_ALLOC
, &fp
->f_flags
);
628 error
= gfs2_glock_nq_init(sdp
->sd_trans_gl
, LM_ST_SHARED
,
629 GL_NEVER_RECURSE
, &t_gh
);
633 count
= generic_file_write_nolock(file
, &local_iov
, 1, offset
);
635 gfs2_glock_dq_uninit(&t_gh
);
641 gfs2_glock_dq_m(num_gh
+ 1, ghs
);
644 gfs2_holder_uninit(&ghs
[num_gh
]);
646 return (count
) ? count
: error
;
650 * do_do_write_buf - Write bytes to a file
651 * @file: The file to write to
652 * @buf: The buffer to copy from
653 * @size: The amount of data requested
654 * @offset: The current file offset
656 * Outputs: Offset - updated according to number of bytes written
658 * Returns: The number of bytes written, errno on failure
661 static ssize_t
do_do_write_buf(struct file
*file
, const char __user
*buf
, size_t size
,
664 struct inode
*inode
= file
->f_mapping
->host
;
665 struct gfs2_inode
*ip
= get_v2ip(inode
);
666 struct gfs2_sbd
*sdp
= ip
->i_sbd
;
667 struct gfs2_alloc
*al
= NULL
;
668 struct buffer_head
*dibh
;
669 unsigned int data_blocks
, ind_blocks
;
670 int alloc_required
, journaled
;
674 journaled
= gfs2_is_jdata(ip
);
676 gfs2_write_calc_reserv(ip
, size
, &data_blocks
, &ind_blocks
);
678 error
= gfs2_write_alloc_required(ip
, *offset
, size
, &alloc_required
);
682 if (alloc_required
) {
683 al
= gfs2_alloc_get(ip
);
685 error
= gfs2_quota_lock(ip
, NO_QUOTA_CHANGE
, NO_QUOTA_CHANGE
);
689 error
= gfs2_quota_check(ip
, ip
->i_di
.di_uid
, ip
->i_di
.di_gid
);
693 al
->al_requested
= data_blocks
+ ind_blocks
;
695 error
= gfs2_inplace_reserve(ip
);
699 error
= gfs2_trans_begin(sdp
,
700 al
->al_rgd
->rd_ri
.ri_length
+
702 ((journaled
) ? data_blocks
: 0) +
703 RES_DINODE
+ RES_STATFS
+ RES_QUOTA
,
708 error
= gfs2_trans_begin(sdp
,
709 ((journaled
) ? data_blocks
: 0) +
716 if ((ip
->i_di
.di_mode
& (S_ISUID
| S_ISGID
)) && !capable(CAP_FSETID
)) {
717 error
= gfs2_meta_inode_buffer(ip
, &dibh
);
721 ip
->i_di
.di_mode
&= (ip
->i_di
.di_mode
& S_IXGRP
) ?
722 (~(S_ISUID
| S_ISGID
)) : (~S_ISUID
);
724 gfs2_trans_add_bh(ip
->i_gl
, dibh
);
725 gfs2_dinode_out(&ip
->i_di
, dibh
->b_data
);
730 count
= gfs2_jdata_write(ip
, buf
, *offset
, size
,
731 gfs2_copy_from_user
);
739 struct iovec local_iov
= { .iov_base
= buf
, .iov_len
= size
};
741 count
= generic_file_write_nolock(file
, &local_iov
, 1, offset
);
747 error
= gfs2_meta_inode_buffer(ip
, &dibh
);
751 if (ip
->i_di
.di_size
< inode
->i_size
)
752 ip
->i_di
.di_size
= inode
->i_size
;
753 ip
->i_di
.di_mtime
= ip
->i_di
.di_ctime
= get_seconds();
755 gfs2_trans_add_bh(ip
->i_gl
, dibh
);
756 gfs2_dinode_out(&ip
->i_di
, dibh
->b_data
);
762 if (file
->f_flags
& O_SYNC
|| IS_SYNC(inode
)) {
763 gfs2_log_flush_glock(ip
->i_gl
);
764 error
= filemap_fdatawrite(file
->f_mapping
);
766 error
= filemap_fdatawait(file
->f_mapping
);
771 if (alloc_required
) {
772 gfs2_assert_warn(sdp
, count
!= size
||
774 gfs2_inplace_release(ip
);
775 gfs2_quota_unlock(ip
);
786 gfs2_inplace_release(ip
);
790 gfs2_quota_unlock(ip
);
800 * do_write_buf - Write bytes to a file
801 * @file: The file to write to
802 * @buf: The buffer to copy from
803 * @size: The amount of data requested
804 * @offset: The current file offset
805 * @num_gh: The number of other locks we need to do the read
806 * @gh: the locks we need plus one for our lock
808 * Outputs: Offset - updated according to number of bytes written
810 * Returns: The number of bytes written, errno on failure
813 static ssize_t
do_write_buf(struct file
*file
, const char __user
*buf
, size_t size
,
814 loff_t
*offset
, unsigned int num_gh
,
815 struct gfs2_holder
*ghs
)
817 struct gfs2_inode
*ip
= get_v2ip(file
->f_mapping
->host
);
818 struct gfs2_sbd
*sdp
= ip
->i_sbd
;
823 gfs2_holder_init(ip
->i_gl
, LM_ST_EXCLUSIVE
, 0, &ghs
[num_gh
]);
825 error
= gfs2_glock_nq_m(num_gh
+ 1, ghs
);
830 error
= grope_mapping(buf
, size
);
835 if (file
->f_flags
& O_APPEND
)
836 *offset
= ip
->i_di
.di_size
;
838 if (!(file
->f_flags
& O_LARGEFILE
)) {
840 if (*offset
>= MAX_NON_LFS
)
842 if (*offset
+ size
> MAX_NON_LFS
)
843 size
= MAX_NON_LFS
- *offset
;
846 /* split large writes into smaller atomic transactions */
848 s
= gfs2_tune_get(sdp
, gt_max_atomic_write
);
852 error
= do_do_write_buf(file
, buf
, s
, offset
);
864 gfs2_glock_dq_m(num_gh
+ 1, ghs
);
867 gfs2_holder_uninit(&ghs
[num_gh
]);
869 return (count
) ? count
: error
;
873 * gfs2_write - Write bytes to a file
874 * @file: The file to write to
875 * @buf: The buffer to copy from
876 * @size: The amount of data requested
877 * @offset: The current file offset
879 * Outputs: Offset - updated according to number of bytes written
881 * Returns: The number of bytes written, errno on failure
884 static ssize_t
gfs2_write(struct file
*file
, const char __user
*buf
,
885 size_t size
, loff_t
*offset
)
887 struct inode
*inode
= file
->f_mapping
->host
;
890 atomic_inc(&get_v2sdp(inode
->i_sb
)->sd_ops_file
);
894 if (!access_ok(VERIFY_READ
, buf
, size
))
897 mutex_lock(&inode
->i_mutex
);
898 if (file
->f_flags
& O_DIRECT
)
899 count
= walk_vm(file
, buf
, size
, offset
,
902 count
= walk_vm(file
, buf
, size
, offset
, do_write_buf
);
903 mutex_unlock(&inode
->i_mutex
);
909 * filldir_reg_func - Report a directory entry to the caller of gfs2_dir_read()
910 * @opaque: opaque data used by the function
911 * @name: the name of the directory entry
912 * @length: the length of the name
913 * @offset: the entry's offset in the directory
914 * @inum: the inode number the entry points to
915 * @type: the type of inode the entry points to
917 * Returns: 0 on success, 1 if buffer full
920 static int filldir_reg_func(void *opaque
, const char *name
, unsigned int length
,
921 uint64_t offset
, struct gfs2_inum
*inum
,
924 struct filldir_reg
*fdr
= (struct filldir_reg
*)opaque
;
925 struct gfs2_sbd
*sdp
= fdr
->fdr_sbd
;
928 error
= fdr
->fdr_filldir(fdr
->fdr_opaque
, name
, length
, offset
,
929 inum
->no_formal_ino
, type
);
933 if (fdr
->fdr_prefetch
&& !(length
== 1 && *name
== '.')) {
934 gfs2_glock_prefetch_num(sdp
,
935 inum
->no_addr
, &gfs2_inode_glops
,
936 LM_ST_SHARED
, LM_FLAG_TRY
| LM_FLAG_ANY
);
937 gfs2_glock_prefetch_num(sdp
,
938 inum
->no_addr
, &gfs2_iopen_glops
,
939 LM_ST_SHARED
, LM_FLAG_TRY
);
946 * readdir_reg - Read directory entries from a directory
947 * @file: The directory to read from
948 * @dirent: Buffer for dirents
949 * @filldir: Function used to do the copying
954 static int readdir_reg(struct file
*file
, void *dirent
, filldir_t filldir
)
956 struct gfs2_inode
*dip
= get_v2ip(file
->f_mapping
->host
);
957 struct filldir_reg fdr
;
958 struct gfs2_holder d_gh
;
959 uint64_t offset
= file
->f_pos
;
962 fdr
.fdr_sbd
= dip
->i_sbd
;
963 fdr
.fdr_prefetch
= 1;
964 fdr
.fdr_filldir
= filldir
;
965 fdr
.fdr_opaque
= dirent
;
967 gfs2_holder_init(dip
->i_gl
, LM_ST_SHARED
, GL_ATIME
, &d_gh
);
968 error
= gfs2_glock_nq_atime(&d_gh
);
970 gfs2_holder_uninit(&d_gh
);
974 error
= gfs2_dir_read(dip
, &offset
, &fdr
, filldir_reg_func
);
976 gfs2_glock_dq_uninit(&d_gh
);
978 file
->f_pos
= offset
;
984 * filldir_bad_func - Report a directory entry to the caller of gfs2_dir_read()
985 * @opaque: opaque data used by the function
986 * @name: the name of the directory entry
987 * @length: the length of the name
988 * @offset: the entry's offset in the directory
989 * @inum: the inode number the entry points to
990 * @type: the type of inode the entry points to
992 * For supporting NFS.
994 * Returns: 0 on success, 1 if buffer full
997 static int filldir_bad_func(void *opaque
, const char *name
, unsigned int length
,
998 uint64_t offset
, struct gfs2_inum
*inum
,
1001 struct filldir_bad
*fdb
= (struct filldir_bad
*)opaque
;
1002 struct gfs2_sbd
*sdp
= fdb
->fdb_sbd
;
1003 struct filldir_bad_entry
*fbe
;
1005 if (fdb
->fdb_entry_off
== fdb
->fdb_entry_num
||
1006 fdb
->fdb_name_off
+ length
> fdb
->fdb_name_size
)
1009 fbe
= &fdb
->fdb_entry
[fdb
->fdb_entry_off
];
1010 fbe
->fbe_name
= fdb
->fdb_name
+ fdb
->fdb_name_off
;
1011 memcpy(fbe
->fbe_name
, name
, length
);
1012 fbe
->fbe_length
= length
;
1013 fbe
->fbe_offset
= offset
;
1014 fbe
->fbe_inum
= *inum
;
1015 fbe
->fbe_type
= type
;
1017 fdb
->fdb_entry_off
++;
1018 fdb
->fdb_name_off
+= length
;
1020 if (!(length
== 1 && *name
== '.')) {
1021 gfs2_glock_prefetch_num(sdp
,
1022 inum
->no_addr
, &gfs2_inode_glops
,
1023 LM_ST_SHARED
, LM_FLAG_TRY
| LM_FLAG_ANY
);
1024 gfs2_glock_prefetch_num(sdp
,
1025 inum
->no_addr
, &gfs2_iopen_glops
,
1026 LM_ST_SHARED
, LM_FLAG_TRY
);
1033 * readdir_bad - Read directory entries from a directory
1034 * @file: The directory to read from
1035 * @dirent: Buffer for dirents
1036 * @filldir: Function used to do the copying
1038 * For supporting NFS.
1043 static int readdir_bad(struct file
*file
, void *dirent
, filldir_t filldir
)
1045 struct gfs2_inode
*dip
= get_v2ip(file
->f_mapping
->host
);
1046 struct gfs2_sbd
*sdp
= dip
->i_sbd
;
1047 struct filldir_reg fdr
;
1048 unsigned int entries
, size
;
1049 struct filldir_bad
*fdb
;
1050 struct gfs2_holder d_gh
;
1051 uint64_t offset
= file
->f_pos
;
1053 struct filldir_bad_entry
*fbe
;
1056 entries
= gfs2_tune_get(sdp
, gt_entries_per_readdir
);
1057 size
= sizeof(struct filldir_bad
) +
1058 entries
* (sizeof(struct filldir_bad_entry
) + GFS2_FAST_NAME_SIZE
);
1060 fdb
= kzalloc(size
, GFP_KERNEL
);
1065 fdb
->fdb_entry
= (struct filldir_bad_entry
*)(fdb
+ 1);
1066 fdb
->fdb_entry_num
= entries
;
1067 fdb
->fdb_name
= ((char *)fdb
) + sizeof(struct filldir_bad
) +
1068 entries
* sizeof(struct filldir_bad_entry
);
1069 fdb
->fdb_name_size
= entries
* GFS2_FAST_NAME_SIZE
;
1071 gfs2_holder_init(dip
->i_gl
, LM_ST_SHARED
, GL_ATIME
, &d_gh
);
1072 error
= gfs2_glock_nq_atime(&d_gh
);
1074 gfs2_holder_uninit(&d_gh
);
1078 error
= gfs2_dir_read(dip
, &offset
, fdb
, filldir_bad_func
);
1080 gfs2_glock_dq_uninit(&d_gh
);
1083 fdr
.fdr_prefetch
= 0;
1084 fdr
.fdr_filldir
= filldir
;
1085 fdr
.fdr_opaque
= dirent
;
1087 for (x
= 0; x
< fdb
->fdb_entry_off
; x
++) {
1088 fbe
= &fdb
->fdb_entry
[x
];
1090 error
= filldir_reg_func(&fdr
,
1091 fbe
->fbe_name
, fbe
->fbe_length
,
1093 &fbe
->fbe_inum
, fbe
->fbe_type
);
1095 file
->f_pos
= fbe
->fbe_offset
;
1101 file
->f_pos
= offset
;
1110 * gfs2_readdir - Read directory entries from a directory
1111 * @file: The directory to read from
1112 * @dirent: Buffer for dirents
1113 * @filldir: Function used to do the copying
1118 static int gfs2_readdir(struct file
*file
, void *dirent
, filldir_t filldir
)
1122 atomic_inc(&get_v2sdp(file
->f_mapping
->host
->i_sb
)->sd_ops_file
);
1124 if (strcmp(current
->comm
, "nfsd") != 0)
1125 error
= readdir_reg(file
, dirent
, filldir
);
1127 error
= readdir_bad(file
, dirent
, filldir
);
1132 static int gfs2_ioctl_flags(struct gfs2_inode
*ip
, unsigned int cmd
, unsigned long arg
)
1134 unsigned int lmode
= (cmd
== GFS2_IOCTL_SETFLAGS
) ? LM_ST_EXCLUSIVE
: LM_ST_SHARED
;
1135 struct buffer_head
*dibh
;
1136 struct gfs2_holder i_gh
;
1138 __u32 flags
= 0, change
;
1140 if (cmd
== GFS2_IOCTL_SETFLAGS
) {
1141 error
= get_user(flags
, (__u32 __user
*)arg
);
1146 error
= gfs2_glock_nq_init(ip
->i_gl
, lmode
, 0, &i_gh
);
1150 if (cmd
== GFS2_IOCTL_SETFLAGS
) {
1151 change
= flags
^ ip
->i_di
.di_flags
;
1153 if (change
& (GFS2_DIF_IMMUTABLE
|GFS2_DIF_APPENDONLY
)) {
1154 if (!capable(CAP_LINUX_IMMUTABLE
))
1158 if (flags
& (GFS2_DIF_JDATA
|GFS2_DIF_DIRECTIO
)) {
1159 if (!S_ISREG(ip
->i_di
.di_mode
))
1161 /* FIXME: Would be nice not to require the following test */
1162 if ((flags
& GFS2_DIF_JDATA
) && ip
->i_di
.di_size
)
1165 if (flags
& (GFS2_DIF_INHERIT_JDATA
|GFS2_DIF_INHERIT_DIRECTIO
)) {
1166 if (!S_ISDIR(ip
->i_di
.di_mode
))
1170 error
= gfs2_trans_begin(ip
->i_sbd
, RES_DINODE
, 0);
1174 error
= gfs2_meta_inode_buffer(ip
, &dibh
);
1178 ip
->i_di
.di_flags
= flags
;
1180 gfs2_trans_add_bh(ip
->i_gl
, dibh
);
1181 gfs2_dinode_out(&ip
->i_di
, dibh
->b_data
);
1186 gfs2_trans_end(ip
->i_sbd
);
1188 flags
= ip
->i_di
.di_flags
;
1191 gfs2_glock_dq_uninit(&i_gh
);
1192 if (cmd
== GFS2_IOCTL_GETFLAGS
) {
1193 if (put_user(flags
, (__u32 __user
*)arg
))
1200 * gfs2_ioctl - do an ioctl on a file
1202 * @file: the file pointer
1203 * @cmd: the ioctl command
1204 * @arg: the argument
1209 static int gfs2_ioctl(struct inode
*inode
, struct file
*file
, unsigned int cmd
,
1212 struct gfs2_inode
*ip
= get_v2ip(inode
);
1214 atomic_inc(&ip
->i_sbd
->sd_ops_file
);
1217 case GFS2_IOCTL_IDENTIFY
: {
1218 unsigned int x
= GFS2_MAGIC
;
1219 if (copy_to_user((unsigned int __user
*)arg
, &x
, sizeof(unsigned int)))
1223 case GFS2_IOCTL_SETFLAGS
:
1224 case GFS2_IOCTL_GETFLAGS
:
1225 return gfs2_ioctl_flags(ip
, cmd
, arg
);
1235 * @file: The file to map
1236 * @vma: The VMA which described the mapping
1238 * Returns: 0 or error code
1241 static int gfs2_mmap(struct file
*file
, struct vm_area_struct
*vma
)
1243 struct gfs2_inode
*ip
= get_v2ip(file
->f_mapping
->host
);
1244 struct gfs2_holder i_gh
;
1247 atomic_inc(&ip
->i_sbd
->sd_ops_file
);
1249 gfs2_holder_init(ip
->i_gl
, LM_ST_SHARED
, GL_ATIME
, &i_gh
);
1250 error
= gfs2_glock_nq_atime(&i_gh
);
1252 gfs2_holder_uninit(&i_gh
);
1256 if (gfs2_is_jdata(ip
)) {
1257 if (vma
->vm_flags
& VM_MAYSHARE
)
1258 error
= -EOPNOTSUPP
;
1260 vma
->vm_ops
= &gfs2_vm_ops_private
;
1262 /* This is VM_MAYWRITE instead of VM_WRITE because a call
1263 to mprotect() can turn on VM_WRITE later. */
1265 if ((vma
->vm_flags
& (VM_MAYSHARE
| VM_MAYWRITE
)) ==
1266 (VM_MAYSHARE
| VM_MAYWRITE
))
1267 vma
->vm_ops
= &gfs2_vm_ops_sharewrite
;
1269 vma
->vm_ops
= &gfs2_vm_ops_private
;
1272 gfs2_glock_dq_uninit(&i_gh
);
1278 * gfs2_open - open a file
1279 * @inode: the inode to open
1280 * @file: the struct file for this opening
1285 static int gfs2_open(struct inode
*inode
, struct file
*file
)
1287 struct gfs2_inode
*ip
= get_v2ip(inode
);
1288 struct gfs2_holder i_gh
;
1289 struct gfs2_file
*fp
;
1292 atomic_inc(&ip
->i_sbd
->sd_ops_file
);
1294 fp
= kzalloc(sizeof(struct gfs2_file
), GFP_KERNEL
);
1298 init_MUTEX(&fp
->f_fl_mutex
);
1303 gfs2_assert_warn(ip
->i_sbd
, !get_v2fp(file
));
1306 if (S_ISREG(ip
->i_di
.di_mode
)) {
1307 error
= gfs2_glock_nq_init(ip
->i_gl
, LM_ST_SHARED
, LM_FLAG_ANY
,
1312 if (!(file
->f_flags
& O_LARGEFILE
) &&
1313 ip
->i_di
.di_size
> MAX_NON_LFS
) {
1318 /* Listen to the Direct I/O flag */
1320 if (ip
->i_di
.di_flags
& GFS2_DIF_DIRECTIO
)
1321 file
->f_flags
|= O_DIRECT
;
1323 /* Don't let the user open O_DIRECT on a jdata file */
1325 if ((file
->f_flags
& O_DIRECT
) && gfs2_is_jdata(ip
)) {
1330 gfs2_glock_dq_uninit(&i_gh
);
1336 gfs2_glock_dq_uninit(&i_gh
);
1339 set_v2fp(file
, NULL
);
1346 * gfs2_close - called to close a struct file
1347 * @inode: the inode the struct file belongs to
1348 * @file: the struct file being closed
1353 static int gfs2_close(struct inode
*inode
, struct file
*file
)
1355 struct gfs2_sbd
*sdp
= get_v2sdp(inode
->i_sb
);
1356 struct gfs2_file
*fp
;
1358 atomic_inc(&sdp
->sd_ops_file
);
1360 fp
= get_v2fp(file
);
1361 set_v2fp(file
, NULL
);
1363 if (gfs2_assert_warn(sdp
, fp
))
1372 * gfs2_fsync - sync the dirty data for a file (across the cluster)
1373 * @file: the file that points to the dentry (we ignore this)
1374 * @dentry: the dentry that points to the inode to sync
1379 static int gfs2_fsync(struct file
*file
, struct dentry
*dentry
, int datasync
)
1381 struct gfs2_inode
*ip
= get_v2ip(dentry
->d_inode
);
1383 atomic_inc(&ip
->i_sbd
->sd_ops_file
);
1384 gfs2_log_flush_glock(ip
->i_gl
);
1390 * gfs2_lock - acquire/release a posix lock on a file
1391 * @file: the file pointer
1392 * @cmd: either modify or retrieve lock state, possibly wait
1393 * @fl: type and range of lock
1398 static int gfs2_lock(struct file
*file
, int cmd
, struct file_lock
*fl
)
1400 struct gfs2_inode
*ip
= get_v2ip(file
->f_mapping
->host
);
1401 struct gfs2_sbd
*sdp
= ip
->i_sbd
;
1402 struct lm_lockname name
=
1403 { .ln_number
= ip
->i_num
.no_addr
,
1404 .ln_type
= LM_TYPE_PLOCK
};
1406 atomic_inc(&sdp
->sd_ops_file
);
1408 if (!(fl
->fl_flags
& FL_POSIX
))
1410 if ((ip
->i_di
.di_mode
& (S_ISGID
| S_IXGRP
)) == S_ISGID
)
1413 if (sdp
->sd_args
.ar_localflocks
) {
1414 if (IS_GETLK(cmd
)) {
1415 struct file_lock
*tmp
;
1417 tmp
= posix_test_lock(file
, fl
);
1418 fl
->fl_type
= F_UNLCK
;
1420 memcpy(fl
, tmp
, sizeof(struct file_lock
));
1426 error
= posix_lock_file_wait(file
, fl
);
1433 return gfs2_lm_plock_get(sdp
, &name
, file
, fl
);
1434 else if (fl
->fl_type
== F_UNLCK
)
1435 return gfs2_lm_punlock(sdp
, &name
, file
, fl
);
1437 return gfs2_lm_plock(sdp
, &name
, file
, cmd
, fl
);
1441 * gfs2_sendfile - Send bytes to a file or socket
1442 * @in_file: The file to read from
1443 * @out_file: The file to write to
1444 * @count: The amount of data
1445 * @offset: The beginning file offset
1447 * Outputs: offset - updated according to number of bytes read
1449 * Returns: The number of bytes sent, errno on failure
1452 static ssize_t
gfs2_sendfile(struct file
*in_file
, loff_t
*offset
, size_t count
,
1453 read_actor_t actor
, void *target
)
1455 struct gfs2_inode
*ip
= get_v2ip(in_file
->f_mapping
->host
);
1456 struct gfs2_holder gh
;
1459 atomic_inc(&ip
->i_sbd
->sd_ops_file
);
1461 gfs2_holder_init(ip
->i_gl
, LM_ST_SHARED
, GL_ATIME
, &gh
);
1463 retval
= gfs2_glock_nq_atime(&gh
);
1467 if (gfs2_is_jdata(ip
))
1468 retval
= -EOPNOTSUPP
;
1470 retval
= generic_file_sendfile(in_file
, offset
, count
, actor
,
1476 gfs2_holder_uninit(&gh
);
1481 static int do_flock(struct file
*file
, int cmd
, struct file_lock
*fl
)
1483 struct gfs2_file
*fp
= get_v2fp(file
);
1484 struct gfs2_holder
*fl_gh
= &fp
->f_fl_gh
;
1485 struct gfs2_inode
*ip
= fp
->f_inode
;
1486 struct gfs2_glock
*gl
;
1491 state
= (fl
->fl_type
== F_WRLCK
) ? LM_ST_EXCLUSIVE
: LM_ST_SHARED
;
1492 flags
= ((IS_SETLKW(cmd
)) ? 0 : LM_FLAG_TRY
) | GL_EXACT
| GL_NOCACHE
;
1494 down(&fp
->f_fl_mutex
);
1498 if (fl_gh
->gh_state
== state
)
1500 gfs2_glock_hold(gl
);
1501 flock_lock_file_wait(file
,
1502 &(struct file_lock
){.fl_type
= F_UNLCK
});
1503 gfs2_glock_dq_uninit(fl_gh
);
1505 error
= gfs2_glock_get(ip
->i_sbd
,
1506 ip
->i_num
.no_addr
, &gfs2_flock_glops
,
1512 gfs2_holder_init(gl
, state
, flags
, fl_gh
);
1515 error
= gfs2_glock_nq(fl_gh
);
1517 gfs2_holder_uninit(fl_gh
);
1518 if (error
== GLR_TRYFAILED
)
1521 error
= flock_lock_file_wait(file
, fl
);
1522 gfs2_assert_warn(ip
->i_sbd
, !error
);
1526 up(&fp
->f_fl_mutex
);
1531 static void do_unflock(struct file
*file
, struct file_lock
*fl
)
1533 struct gfs2_file
*fp
= get_v2fp(file
);
1534 struct gfs2_holder
*fl_gh
= &fp
->f_fl_gh
;
1536 down(&fp
->f_fl_mutex
);
1537 flock_lock_file_wait(file
, fl
);
1539 gfs2_glock_dq_uninit(fl_gh
);
1540 up(&fp
->f_fl_mutex
);
1544 * gfs2_flock - acquire/release a flock lock on a file
1545 * @file: the file pointer
1546 * @cmd: either modify or retrieve lock state, possibly wait
1547 * @fl: type and range of lock
1552 static int gfs2_flock(struct file
*file
, int cmd
, struct file_lock
*fl
)
1554 struct gfs2_inode
*ip
= get_v2ip(file
->f_mapping
->host
);
1555 struct gfs2_sbd
*sdp
= ip
->i_sbd
;
1557 atomic_inc(&ip
->i_sbd
->sd_ops_file
);
1559 if (!(fl
->fl_flags
& FL_FLOCK
))
1561 if ((ip
->i_di
.di_mode
& (S_ISGID
| S_IXGRP
)) == S_ISGID
)
1564 if (sdp
->sd_args
.ar_localflocks
)
1565 return flock_lock_file_wait(file
, fl
);
1567 if (fl
->fl_type
== F_UNLCK
) {
1568 do_unflock(file
, fl
);
1571 return do_flock(file
, cmd
, fl
);
1574 struct file_operations gfs2_file_fops
= {
1575 .llseek
= gfs2_llseek
,
1577 .write
= gfs2_write
,
1578 .ioctl
= gfs2_ioctl
,
1581 .release
= gfs2_close
,
1582 .fsync
= gfs2_fsync
,
1584 .sendfile
= gfs2_sendfile
,
1585 .flock
= gfs2_flock
,
1588 struct file_operations gfs2_dir_fops
= {
1589 .readdir
= gfs2_readdir
,
1590 .ioctl
= gfs2_ioctl
,
1592 .release
= gfs2_close
,
1593 .fsync
= gfs2_fsync
,
1595 .flock
= gfs2_flock
,