cifs: get rid of ->f_path.dentry->d_sb uses, add a new helper
[linux-2.6/btrfs-unstable.git] / fs / cifs / file.c
blobd535e168a9d3b4cacc6ee5d34b03e78876e6213c
1 /*
2 * fs/cifs/file.c
4 * vfs operations that deal with files
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
37 #include "cifsfs.h"
38 #include "cifspdu.h"
39 #include "cifsglob.h"
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
44 #include "fscache.h"
47 static inline int cifs_convert_flags(unsigned int flags)
49 if ((flags & O_ACCMODE) == O_RDONLY)
50 return GENERIC_READ;
51 else if ((flags & O_ACCMODE) == O_WRONLY)
52 return GENERIC_WRITE;
53 else if ((flags & O_ACCMODE) == O_RDWR) {
54 /* GENERIC_ALL is too much permission to request
55 can cause unnecessary access denied on create */
56 /* return GENERIC_ALL; */
57 return (GENERIC_READ | GENERIC_WRITE);
60 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
61 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
62 FILE_READ_DATA);
65 static u32 cifs_posix_convert_flags(unsigned int flags)
67 u32 posix_flags = 0;
69 if ((flags & O_ACCMODE) == O_RDONLY)
70 posix_flags = SMB_O_RDONLY;
71 else if ((flags & O_ACCMODE) == O_WRONLY)
72 posix_flags = SMB_O_WRONLY;
73 else if ((flags & O_ACCMODE) == O_RDWR)
74 posix_flags = SMB_O_RDWR;
76 if (flags & O_CREAT) {
77 posix_flags |= SMB_O_CREAT;
78 if (flags & O_EXCL)
79 posix_flags |= SMB_O_EXCL;
80 } else if (flags & O_EXCL)
81 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
82 current->comm, current->tgid);
84 if (flags & O_TRUNC)
85 posix_flags |= SMB_O_TRUNC;
86 /* be safe and imply O_SYNC for O_DSYNC */
87 if (flags & O_DSYNC)
88 posix_flags |= SMB_O_SYNC;
89 if (flags & O_DIRECTORY)
90 posix_flags |= SMB_O_DIRECTORY;
91 if (flags & O_NOFOLLOW)
92 posix_flags |= SMB_O_NOFOLLOW;
93 if (flags & O_DIRECT)
94 posix_flags |= SMB_O_DIRECT;
96 return posix_flags;
99 static inline int cifs_get_disposition(unsigned int flags)
101 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
102 return FILE_CREATE;
103 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
104 return FILE_OVERWRITE_IF;
105 else if ((flags & O_CREAT) == O_CREAT)
106 return FILE_OPEN_IF;
107 else if ((flags & O_TRUNC) == O_TRUNC)
108 return FILE_OVERWRITE;
109 else
110 return FILE_OPEN;
113 int cifs_posix_open(char *full_path, struct inode **pinode,
114 struct super_block *sb, int mode, unsigned int f_flags,
115 __u32 *poplock, __u16 *pnetfid, unsigned int xid)
117 int rc;
118 FILE_UNIX_BASIC_INFO *presp_data;
119 __u32 posix_flags = 0;
120 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
121 struct cifs_fattr fattr;
122 struct tcon_link *tlink;
123 struct cifs_tcon *tcon;
125 cifs_dbg(FYI, "posix open %s\n", full_path);
127 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
128 if (presp_data == NULL)
129 return -ENOMEM;
131 tlink = cifs_sb_tlink(cifs_sb);
132 if (IS_ERR(tlink)) {
133 rc = PTR_ERR(tlink);
134 goto posix_open_ret;
137 tcon = tlink_tcon(tlink);
138 mode &= ~current_umask();
140 posix_flags = cifs_posix_convert_flags(f_flags);
141 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
142 poplock, full_path, cifs_sb->local_nls,
143 cifs_sb->mnt_cifs_flags &
144 CIFS_MOUNT_MAP_SPECIAL_CHR);
145 cifs_put_tlink(tlink);
147 if (rc)
148 goto posix_open_ret;
150 if (presp_data->Type == cpu_to_le32(-1))
151 goto posix_open_ret; /* open ok, caller does qpathinfo */
153 if (!pinode)
154 goto posix_open_ret; /* caller does not need info */
156 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
158 /* get new inode and set it up */
159 if (*pinode == NULL) {
160 cifs_fill_uniqueid(sb, &fattr);
161 *pinode = cifs_iget(sb, &fattr);
162 if (!*pinode) {
163 rc = -ENOMEM;
164 goto posix_open_ret;
166 } else {
167 cifs_fattr_to_inode(*pinode, &fattr);
170 posix_open_ret:
171 kfree(presp_data);
172 return rc;
175 static int
176 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
177 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
178 struct cifs_fid *fid, unsigned int xid)
180 int rc;
181 int desired_access;
182 int disposition;
183 int create_options = CREATE_NOT_DIR;
184 FILE_ALL_INFO *buf;
185 struct TCP_Server_Info *server = tcon->ses->server;
186 struct cifs_open_parms oparms;
188 if (!server->ops->open)
189 return -ENOSYS;
191 desired_access = cifs_convert_flags(f_flags);
193 /*********************************************************************
194 * open flag mapping table:
196 * POSIX Flag CIFS Disposition
197 * ---------- ----------------
198 * O_CREAT FILE_OPEN_IF
199 * O_CREAT | O_EXCL FILE_CREATE
200 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
201 * O_TRUNC FILE_OVERWRITE
202 * none of the above FILE_OPEN
204 * Note that there is not a direct match between disposition
205 * FILE_SUPERSEDE (ie create whether or not file exists although
206 * O_CREAT | O_TRUNC is similar but truncates the existing
207 * file rather than creating a new file as FILE_SUPERSEDE does
208 * (which uses the attributes / metadata passed in on open call)
210 *? O_SYNC is a reasonable match to CIFS writethrough flag
211 *? and the read write flags match reasonably. O_LARGEFILE
212 *? is irrelevant because largefile support is always used
213 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
214 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
215 *********************************************************************/
217 disposition = cifs_get_disposition(f_flags);
219 /* BB pass O_SYNC flag through on file attributes .. BB */
221 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
222 if (!buf)
223 return -ENOMEM;
225 if (backup_cred(cifs_sb))
226 create_options |= CREATE_OPEN_BACKUP_INTENT;
228 oparms.tcon = tcon;
229 oparms.cifs_sb = cifs_sb;
230 oparms.desired_access = desired_access;
231 oparms.create_options = create_options;
232 oparms.disposition = disposition;
233 oparms.path = full_path;
234 oparms.fid = fid;
235 oparms.reconnect = false;
237 rc = server->ops->open(xid, &oparms, oplock, buf);
239 if (rc)
240 goto out;
242 if (tcon->unix_ext)
243 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
244 xid);
245 else
246 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
247 xid, fid);
249 out:
250 kfree(buf);
251 return rc;
254 static bool
255 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
257 struct cifs_fid_locks *cur;
258 bool has_locks = false;
260 down_read(&cinode->lock_sem);
261 list_for_each_entry(cur, &cinode->llist, llist) {
262 if (!list_empty(&cur->locks)) {
263 has_locks = true;
264 break;
267 up_read(&cinode->lock_sem);
268 return has_locks;
271 struct cifsFileInfo *
272 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
273 struct tcon_link *tlink, __u32 oplock)
275 struct dentry *dentry = file->f_path.dentry;
276 struct inode *inode = dentry->d_inode;
277 struct cifsInodeInfo *cinode = CIFS_I(inode);
278 struct cifsFileInfo *cfile;
279 struct cifs_fid_locks *fdlocks;
280 struct cifs_tcon *tcon = tlink_tcon(tlink);
281 struct TCP_Server_Info *server = tcon->ses->server;
283 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
284 if (cfile == NULL)
285 return cfile;
287 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
288 if (!fdlocks) {
289 kfree(cfile);
290 return NULL;
293 INIT_LIST_HEAD(&fdlocks->locks);
294 fdlocks->cfile = cfile;
295 cfile->llist = fdlocks;
296 down_write(&cinode->lock_sem);
297 list_add(&fdlocks->llist, &cinode->llist);
298 up_write(&cinode->lock_sem);
300 cfile->count = 1;
301 cfile->pid = current->tgid;
302 cfile->uid = current_fsuid();
303 cfile->dentry = dget(dentry);
304 cfile->f_flags = file->f_flags;
305 cfile->invalidHandle = false;
306 cfile->tlink = cifs_get_tlink(tlink);
307 INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
308 mutex_init(&cfile->fh_mutex);
310 cifs_sb_active(inode->i_sb);
313 * If the server returned a read oplock and we have mandatory brlocks,
314 * set oplock level to None.
316 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
317 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
318 oplock = 0;
321 spin_lock(&cifs_file_list_lock);
322 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
323 oplock = fid->pending_open->oplock;
324 list_del(&fid->pending_open->olist);
326 fid->purge_cache = false;
327 server->ops->set_fid(cfile, fid, oplock);
329 list_add(&cfile->tlist, &tcon->openFileList);
330 /* if readable file instance put first in list*/
331 if (file->f_mode & FMODE_READ)
332 list_add(&cfile->flist, &cinode->openFileList);
333 else
334 list_add_tail(&cfile->flist, &cinode->openFileList);
335 spin_unlock(&cifs_file_list_lock);
337 if (fid->purge_cache)
338 cifs_zap_mapping(inode);
340 file->private_data = cfile;
341 return cfile;
344 struct cifsFileInfo *
345 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
347 spin_lock(&cifs_file_list_lock);
348 cifsFileInfo_get_locked(cifs_file);
349 spin_unlock(&cifs_file_list_lock);
350 return cifs_file;
354 * Release a reference on the file private data. This may involve closing
355 * the filehandle out on the server. Must be called without holding
356 * cifs_file_list_lock.
358 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
360 struct inode *inode = cifs_file->dentry->d_inode;
361 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
362 struct TCP_Server_Info *server = tcon->ses->server;
363 struct cifsInodeInfo *cifsi = CIFS_I(inode);
364 struct super_block *sb = inode->i_sb;
365 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
366 struct cifsLockInfo *li, *tmp;
367 struct cifs_fid fid;
368 struct cifs_pending_open open;
370 spin_lock(&cifs_file_list_lock);
371 if (--cifs_file->count > 0) {
372 spin_unlock(&cifs_file_list_lock);
373 return;
376 if (server->ops->get_lease_key)
377 server->ops->get_lease_key(inode, &fid);
379 /* store open in pending opens to make sure we don't miss lease break */
380 cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
382 /* remove it from the lists */
383 list_del(&cifs_file->flist);
384 list_del(&cifs_file->tlist);
386 if (list_empty(&cifsi->openFileList)) {
387 cifs_dbg(FYI, "closing last open instance for inode %p\n",
388 cifs_file->dentry->d_inode);
390 * In strict cache mode we need invalidate mapping on the last
391 * close because it may cause a error when we open this file
392 * again and get at least level II oplock.
394 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
395 set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
396 cifs_set_oplock_level(cifsi, 0);
398 spin_unlock(&cifs_file_list_lock);
400 cancel_work_sync(&cifs_file->oplock_break);
402 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
403 struct TCP_Server_Info *server = tcon->ses->server;
404 unsigned int xid;
406 xid = get_xid();
407 if (server->ops->close)
408 server->ops->close(xid, tcon, &cifs_file->fid);
409 _free_xid(xid);
412 cifs_del_pending_open(&open);
415 * Delete any outstanding lock records. We'll lose them when the file
416 * is closed anyway.
418 down_write(&cifsi->lock_sem);
419 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
420 list_del(&li->llist);
421 cifs_del_lock_waiters(li);
422 kfree(li);
424 list_del(&cifs_file->llist->llist);
425 kfree(cifs_file->llist);
426 up_write(&cifsi->lock_sem);
428 cifs_put_tlink(cifs_file->tlink);
429 dput(cifs_file->dentry);
430 cifs_sb_deactive(sb);
431 kfree(cifs_file);
434 int cifs_open(struct inode *inode, struct file *file)
437 int rc = -EACCES;
438 unsigned int xid;
439 __u32 oplock;
440 struct cifs_sb_info *cifs_sb;
441 struct TCP_Server_Info *server;
442 struct cifs_tcon *tcon;
443 struct tcon_link *tlink;
444 struct cifsFileInfo *cfile = NULL;
445 char *full_path = NULL;
446 bool posix_open_ok = false;
447 struct cifs_fid fid;
448 struct cifs_pending_open open;
450 xid = get_xid();
452 cifs_sb = CIFS_SB(inode->i_sb);
453 tlink = cifs_sb_tlink(cifs_sb);
454 if (IS_ERR(tlink)) {
455 free_xid(xid);
456 return PTR_ERR(tlink);
458 tcon = tlink_tcon(tlink);
459 server = tcon->ses->server;
461 full_path = build_path_from_dentry(file->f_path.dentry);
462 if (full_path == NULL) {
463 rc = -ENOMEM;
464 goto out;
467 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
468 inode, file->f_flags, full_path);
470 if (file->f_flags & O_DIRECT &&
471 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
472 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
473 file->f_op = &cifs_file_direct_nobrl_ops;
474 else
475 file->f_op = &cifs_file_direct_ops;
478 if (server->oplocks)
479 oplock = REQ_OPLOCK;
480 else
481 oplock = 0;
483 if (!tcon->broken_posix_open && tcon->unix_ext &&
484 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
485 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
486 /* can not refresh inode info since size could be stale */
487 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
488 cifs_sb->mnt_file_mode /* ignored */,
489 file->f_flags, &oplock, &fid.netfid, xid);
490 if (rc == 0) {
491 cifs_dbg(FYI, "posix open succeeded\n");
492 posix_open_ok = true;
493 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
494 if (tcon->ses->serverNOS)
495 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
496 tcon->ses->serverName,
497 tcon->ses->serverNOS);
498 tcon->broken_posix_open = true;
499 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
500 (rc != -EOPNOTSUPP)) /* path not found or net err */
501 goto out;
503 * Else fallthrough to retry open the old way on network i/o
504 * or DFS errors.
508 if (server->ops->get_lease_key)
509 server->ops->get_lease_key(inode, &fid);
511 cifs_add_pending_open(&fid, tlink, &open);
513 if (!posix_open_ok) {
514 if (server->ops->get_lease_key)
515 server->ops->get_lease_key(inode, &fid);
517 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
518 file->f_flags, &oplock, &fid, xid);
519 if (rc) {
520 cifs_del_pending_open(&open);
521 goto out;
525 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
526 if (cfile == NULL) {
527 if (server->ops->close)
528 server->ops->close(xid, tcon, &fid);
529 cifs_del_pending_open(&open);
530 rc = -ENOMEM;
531 goto out;
534 cifs_fscache_set_inode_cookie(inode, file);
536 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
538 * Time to set mode which we can not set earlier due to
539 * problems creating new read-only files.
541 struct cifs_unix_set_info_args args = {
542 .mode = inode->i_mode,
543 .uid = INVALID_UID, /* no change */
544 .gid = INVALID_GID, /* no change */
545 .ctime = NO_CHANGE_64,
546 .atime = NO_CHANGE_64,
547 .mtime = NO_CHANGE_64,
548 .device = 0,
550 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
551 cfile->pid);
554 out:
555 kfree(full_path);
556 free_xid(xid);
557 cifs_put_tlink(tlink);
558 return rc;
561 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
564 * Try to reacquire byte range locks that were released when session
565 * to server was lost.
567 static int
568 cifs_relock_file(struct cifsFileInfo *cfile)
570 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
571 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
572 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
573 int rc = 0;
575 down_read(&cinode->lock_sem);
576 if (cinode->can_cache_brlcks) {
577 /* can cache locks - no need to relock */
578 up_read(&cinode->lock_sem);
579 return rc;
582 if (cap_unix(tcon->ses) &&
583 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
584 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
585 rc = cifs_push_posix_locks(cfile);
586 else
587 rc = tcon->ses->server->ops->push_mand_locks(cfile);
589 up_read(&cinode->lock_sem);
590 return rc;
593 static int
594 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
596 int rc = -EACCES;
597 unsigned int xid;
598 __u32 oplock;
599 struct cifs_sb_info *cifs_sb;
600 struct cifs_tcon *tcon;
601 struct TCP_Server_Info *server;
602 struct cifsInodeInfo *cinode;
603 struct inode *inode;
604 char *full_path = NULL;
605 int desired_access;
606 int disposition = FILE_OPEN;
607 int create_options = CREATE_NOT_DIR;
608 struct cifs_open_parms oparms;
610 xid = get_xid();
611 mutex_lock(&cfile->fh_mutex);
612 if (!cfile->invalidHandle) {
613 mutex_unlock(&cfile->fh_mutex);
614 rc = 0;
615 free_xid(xid);
616 return rc;
619 inode = cfile->dentry->d_inode;
620 cifs_sb = CIFS_SB(inode->i_sb);
621 tcon = tlink_tcon(cfile->tlink);
622 server = tcon->ses->server;
625 * Can not grab rename sem here because various ops, including those
626 * that already have the rename sem can end up causing writepage to get
627 * called and if the server was down that means we end up here, and we
628 * can never tell if the caller already has the rename_sem.
630 full_path = build_path_from_dentry(cfile->dentry);
631 if (full_path == NULL) {
632 rc = -ENOMEM;
633 mutex_unlock(&cfile->fh_mutex);
634 free_xid(xid);
635 return rc;
638 cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
639 inode, cfile->f_flags, full_path);
641 if (tcon->ses->server->oplocks)
642 oplock = REQ_OPLOCK;
643 else
644 oplock = 0;
646 if (tcon->unix_ext && cap_unix(tcon->ses) &&
647 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
648 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
650 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
651 * original open. Must mask them off for a reopen.
653 unsigned int oflags = cfile->f_flags &
654 ~(O_CREAT | O_EXCL | O_TRUNC);
656 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
657 cifs_sb->mnt_file_mode /* ignored */,
658 oflags, &oplock, &cfile->fid.netfid, xid);
659 if (rc == 0) {
660 cifs_dbg(FYI, "posix reopen succeeded\n");
661 oparms.reconnect = true;
662 goto reopen_success;
665 * fallthrough to retry open the old way on errors, especially
666 * in the reconnect path it is important to retry hard
670 desired_access = cifs_convert_flags(cfile->f_flags);
672 if (backup_cred(cifs_sb))
673 create_options |= CREATE_OPEN_BACKUP_INTENT;
675 if (server->ops->get_lease_key)
676 server->ops->get_lease_key(inode, &cfile->fid);
678 oparms.tcon = tcon;
679 oparms.cifs_sb = cifs_sb;
680 oparms.desired_access = desired_access;
681 oparms.create_options = create_options;
682 oparms.disposition = disposition;
683 oparms.path = full_path;
684 oparms.fid = &cfile->fid;
685 oparms.reconnect = true;
688 * Can not refresh inode by passing in file_info buf to be returned by
689 * ops->open and then calling get_inode_info with returned buf since
690 * file might have write behind data that needs to be flushed and server
691 * version of file size can be stale. If we knew for sure that inode was
692 * not dirty locally we could do this.
694 rc = server->ops->open(xid, &oparms, &oplock, NULL);
695 if (rc == -ENOENT && oparms.reconnect == false) {
696 /* durable handle timeout is expired - open the file again */
697 rc = server->ops->open(xid, &oparms, &oplock, NULL);
698 /* indicate that we need to relock the file */
699 oparms.reconnect = true;
702 if (rc) {
703 mutex_unlock(&cfile->fh_mutex);
704 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
705 cifs_dbg(FYI, "oplock: %d\n", oplock);
706 goto reopen_error_exit;
709 reopen_success:
710 cfile->invalidHandle = false;
711 mutex_unlock(&cfile->fh_mutex);
712 cinode = CIFS_I(inode);
714 if (can_flush) {
715 rc = filemap_write_and_wait(inode->i_mapping);
716 mapping_set_error(inode->i_mapping, rc);
718 if (tcon->unix_ext)
719 rc = cifs_get_inode_info_unix(&inode, full_path,
720 inode->i_sb, xid);
721 else
722 rc = cifs_get_inode_info(&inode, full_path, NULL,
723 inode->i_sb, xid, NULL);
726 * Else we are writing out data to server already and could deadlock if
727 * we tried to flush data, and since we do not know if we have data that
728 * would invalidate the current end of file on the server we can not go
729 * to the server to get the new inode info.
732 server->ops->set_fid(cfile, &cfile->fid, oplock);
733 if (oparms.reconnect)
734 cifs_relock_file(cfile);
736 reopen_error_exit:
737 kfree(full_path);
738 free_xid(xid);
739 return rc;
742 int cifs_close(struct inode *inode, struct file *file)
744 if (file->private_data != NULL) {
745 cifsFileInfo_put(file->private_data);
746 file->private_data = NULL;
749 /* return code from the ->release op is always ignored */
750 return 0;
753 int cifs_closedir(struct inode *inode, struct file *file)
755 int rc = 0;
756 unsigned int xid;
757 struct cifsFileInfo *cfile = file->private_data;
758 struct cifs_tcon *tcon;
759 struct TCP_Server_Info *server;
760 char *buf;
762 cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
764 if (cfile == NULL)
765 return rc;
767 xid = get_xid();
768 tcon = tlink_tcon(cfile->tlink);
769 server = tcon->ses->server;
771 cifs_dbg(FYI, "Freeing private data in close dir\n");
772 spin_lock(&cifs_file_list_lock);
773 if (server->ops->dir_needs_close(cfile)) {
774 cfile->invalidHandle = true;
775 spin_unlock(&cifs_file_list_lock);
776 if (server->ops->close_dir)
777 rc = server->ops->close_dir(xid, tcon, &cfile->fid);
778 else
779 rc = -ENOSYS;
780 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
781 /* not much we can do if it fails anyway, ignore rc */
782 rc = 0;
783 } else
784 spin_unlock(&cifs_file_list_lock);
786 buf = cfile->srch_inf.ntwrk_buf_start;
787 if (buf) {
788 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
789 cfile->srch_inf.ntwrk_buf_start = NULL;
790 if (cfile->srch_inf.smallBuf)
791 cifs_small_buf_release(buf);
792 else
793 cifs_buf_release(buf);
796 cifs_put_tlink(cfile->tlink);
797 kfree(file->private_data);
798 file->private_data = NULL;
799 /* BB can we lock the filestruct while this is going on? */
800 free_xid(xid);
801 return rc;
804 static struct cifsLockInfo *
805 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
807 struct cifsLockInfo *lock =
808 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
809 if (!lock)
810 return lock;
811 lock->offset = offset;
812 lock->length = length;
813 lock->type = type;
814 lock->pid = current->tgid;
815 INIT_LIST_HEAD(&lock->blist);
816 init_waitqueue_head(&lock->block_q);
817 return lock;
820 void
821 cifs_del_lock_waiters(struct cifsLockInfo *lock)
823 struct cifsLockInfo *li, *tmp;
824 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
825 list_del_init(&li->blist);
826 wake_up(&li->block_q);
830 #define CIFS_LOCK_OP 0
831 #define CIFS_READ_OP 1
832 #define CIFS_WRITE_OP 2
834 /* @rw_check : 0 - no op, 1 - read, 2 - write */
835 static bool
836 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
837 __u64 length, __u8 type, struct cifsFileInfo *cfile,
838 struct cifsLockInfo **conf_lock, int rw_check)
840 struct cifsLockInfo *li;
841 struct cifsFileInfo *cur_cfile = fdlocks->cfile;
842 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
844 list_for_each_entry(li, &fdlocks->locks, llist) {
845 if (offset + length <= li->offset ||
846 offset >= li->offset + li->length)
847 continue;
848 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
849 server->ops->compare_fids(cfile, cur_cfile)) {
850 /* shared lock prevents write op through the same fid */
851 if (!(li->type & server->vals->shared_lock_type) ||
852 rw_check != CIFS_WRITE_OP)
853 continue;
855 if ((type & server->vals->shared_lock_type) &&
856 ((server->ops->compare_fids(cfile, cur_cfile) &&
857 current->tgid == li->pid) || type == li->type))
858 continue;
859 if (conf_lock)
860 *conf_lock = li;
861 return true;
863 return false;
866 bool
867 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
868 __u8 type, struct cifsLockInfo **conf_lock,
869 int rw_check)
871 bool rc = false;
872 struct cifs_fid_locks *cur;
873 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
875 list_for_each_entry(cur, &cinode->llist, llist) {
876 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
877 cfile, conf_lock, rw_check);
878 if (rc)
879 break;
882 return rc;
886 * Check if there is another lock that prevents us to set the lock (mandatory
887 * style). If such a lock exists, update the flock structure with its
888 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
889 * or leave it the same if we can't. Returns 0 if we don't need to request to
890 * the server or 1 otherwise.
892 static int
893 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
894 __u8 type, struct file_lock *flock)
896 int rc = 0;
897 struct cifsLockInfo *conf_lock;
898 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
899 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
900 bool exist;
902 down_read(&cinode->lock_sem);
904 exist = cifs_find_lock_conflict(cfile, offset, length, type,
905 &conf_lock, CIFS_LOCK_OP);
906 if (exist) {
907 flock->fl_start = conf_lock->offset;
908 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
909 flock->fl_pid = conf_lock->pid;
910 if (conf_lock->type & server->vals->shared_lock_type)
911 flock->fl_type = F_RDLCK;
912 else
913 flock->fl_type = F_WRLCK;
914 } else if (!cinode->can_cache_brlcks)
915 rc = 1;
916 else
917 flock->fl_type = F_UNLCK;
919 up_read(&cinode->lock_sem);
920 return rc;
923 static void
924 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
926 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
927 down_write(&cinode->lock_sem);
928 list_add_tail(&lock->llist, &cfile->llist->locks);
929 up_write(&cinode->lock_sem);
933 * Set the byte-range lock (mandatory style). Returns:
934 * 1) 0, if we set the lock and don't need to request to the server;
935 * 2) 1, if no locks prevent us but we need to request to the server;
936 * 3) -EACCESS, if there is a lock that prevents us and wait is false.
938 static int
939 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
940 bool wait)
942 struct cifsLockInfo *conf_lock;
943 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
944 bool exist;
945 int rc = 0;
947 try_again:
948 exist = false;
949 down_write(&cinode->lock_sem);
951 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
952 lock->type, &conf_lock, CIFS_LOCK_OP);
953 if (!exist && cinode->can_cache_brlcks) {
954 list_add_tail(&lock->llist, &cfile->llist->locks);
955 up_write(&cinode->lock_sem);
956 return rc;
959 if (!exist)
960 rc = 1;
961 else if (!wait)
962 rc = -EACCES;
963 else {
964 list_add_tail(&lock->blist, &conf_lock->blist);
965 up_write(&cinode->lock_sem);
966 rc = wait_event_interruptible(lock->block_q,
967 (lock->blist.prev == &lock->blist) &&
968 (lock->blist.next == &lock->blist));
969 if (!rc)
970 goto try_again;
971 down_write(&cinode->lock_sem);
972 list_del_init(&lock->blist);
975 up_write(&cinode->lock_sem);
976 return rc;
980 * Check if there is another lock that prevents us to set the lock (posix
981 * style). If such a lock exists, update the flock structure with its
982 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
983 * or leave it the same if we can't. Returns 0 if we don't need to request to
984 * the server or 1 otherwise.
986 static int
987 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
989 int rc = 0;
990 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
991 unsigned char saved_type = flock->fl_type;
993 if ((flock->fl_flags & FL_POSIX) == 0)
994 return 1;
996 down_read(&cinode->lock_sem);
997 posix_test_lock(file, flock);
999 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1000 flock->fl_type = saved_type;
1001 rc = 1;
1004 up_read(&cinode->lock_sem);
1005 return rc;
1009 * Set the byte-range lock (posix style). Returns:
1010 * 1) 0, if we set the lock and don't need to request to the server;
1011 * 2) 1, if we need to request to the server;
1012 * 3) <0, if the error occurs while setting the lock.
1014 static int
1015 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1017 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1018 int rc = 1;
1020 if ((flock->fl_flags & FL_POSIX) == 0)
1021 return rc;
1023 try_again:
1024 down_write(&cinode->lock_sem);
1025 if (!cinode->can_cache_brlcks) {
1026 up_write(&cinode->lock_sem);
1027 return rc;
1030 rc = posix_lock_file(file, flock, NULL);
1031 up_write(&cinode->lock_sem);
1032 if (rc == FILE_LOCK_DEFERRED) {
1033 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1034 if (!rc)
1035 goto try_again;
1036 posix_unblock_lock(flock);
1038 return rc;
1042 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1044 unsigned int xid;
1045 int rc = 0, stored_rc;
1046 struct cifsLockInfo *li, *tmp;
1047 struct cifs_tcon *tcon;
1048 unsigned int num, max_num, max_buf;
1049 LOCKING_ANDX_RANGE *buf, *cur;
1050 int types[] = {LOCKING_ANDX_LARGE_FILES,
1051 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1052 int i;
1054 xid = get_xid();
1055 tcon = tlink_tcon(cfile->tlink);
1058 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1059 * and check it for zero before using.
1061 max_buf = tcon->ses->server->maxBuf;
1062 if (!max_buf) {
1063 free_xid(xid);
1064 return -EINVAL;
1067 max_num = (max_buf - sizeof(struct smb_hdr)) /
1068 sizeof(LOCKING_ANDX_RANGE);
1069 buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1070 if (!buf) {
1071 free_xid(xid);
1072 return -ENOMEM;
1075 for (i = 0; i < 2; i++) {
1076 cur = buf;
1077 num = 0;
1078 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1079 if (li->type != types[i])
1080 continue;
1081 cur->Pid = cpu_to_le16(li->pid);
1082 cur->LengthLow = cpu_to_le32((u32)li->length);
1083 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1084 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1085 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1086 if (++num == max_num) {
1087 stored_rc = cifs_lockv(xid, tcon,
1088 cfile->fid.netfid,
1089 (__u8)li->type, 0, num,
1090 buf);
1091 if (stored_rc)
1092 rc = stored_rc;
1093 cur = buf;
1094 num = 0;
1095 } else
1096 cur++;
1099 if (num) {
1100 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1101 (__u8)types[i], 0, num, buf);
1102 if (stored_rc)
1103 rc = stored_rc;
1107 kfree(buf);
1108 free_xid(xid);
1109 return rc;
1112 /* copied from fs/locks.c with a name change */
1113 #define cifs_for_each_lock(inode, lockp) \
1114 for (lockp = &inode->i_flock; *lockp != NULL; \
1115 lockp = &(*lockp)->fl_next)
1117 struct lock_to_push {
1118 struct list_head llist;
1119 __u64 offset;
1120 __u64 length;
1121 __u32 pid;
1122 __u16 netfid;
1123 __u8 type;
1126 static int
1127 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1129 struct inode *inode = cfile->dentry->d_inode;
1130 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1131 struct file_lock *flock, **before;
1132 unsigned int count = 0, i = 0;
1133 int rc = 0, xid, type;
1134 struct list_head locks_to_send, *el;
1135 struct lock_to_push *lck, *tmp;
1136 __u64 length;
1138 xid = get_xid();
1140 spin_lock(&inode->i_lock);
1141 cifs_for_each_lock(inode, before) {
1142 if ((*before)->fl_flags & FL_POSIX)
1143 count++;
1145 spin_unlock(&inode->i_lock);
1147 INIT_LIST_HEAD(&locks_to_send);
1150 * Allocating count locks is enough because no FL_POSIX locks can be
1151 * added to the list while we are holding cinode->lock_sem that
1152 * protects locking operations of this inode.
1154 for (; i < count; i++) {
1155 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1156 if (!lck) {
1157 rc = -ENOMEM;
1158 goto err_out;
1160 list_add_tail(&lck->llist, &locks_to_send);
1163 el = locks_to_send.next;
1164 spin_lock(&inode->i_lock);
1165 cifs_for_each_lock(inode, before) {
1166 flock = *before;
1167 if ((flock->fl_flags & FL_POSIX) == 0)
1168 continue;
1169 if (el == &locks_to_send) {
1171 * The list ended. We don't have enough allocated
1172 * structures - something is really wrong.
1174 cifs_dbg(VFS, "Can't push all brlocks!\n");
1175 break;
1177 length = 1 + flock->fl_end - flock->fl_start;
1178 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1179 type = CIFS_RDLCK;
1180 else
1181 type = CIFS_WRLCK;
1182 lck = list_entry(el, struct lock_to_push, llist);
1183 lck->pid = flock->fl_pid;
1184 lck->netfid = cfile->fid.netfid;
1185 lck->length = length;
1186 lck->type = type;
1187 lck->offset = flock->fl_start;
1188 el = el->next;
1190 spin_unlock(&inode->i_lock);
1192 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1193 int stored_rc;
1195 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1196 lck->offset, lck->length, NULL,
1197 lck->type, 0);
1198 if (stored_rc)
1199 rc = stored_rc;
1200 list_del(&lck->llist);
1201 kfree(lck);
1204 out:
1205 free_xid(xid);
1206 return rc;
1207 err_out:
1208 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1209 list_del(&lck->llist);
1210 kfree(lck);
1212 goto out;
1215 static int
1216 cifs_push_locks(struct cifsFileInfo *cfile)
1218 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1219 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1220 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1221 int rc = 0;
1223 /* we are going to update can_cache_brlcks here - need a write access */
1224 down_write(&cinode->lock_sem);
1225 if (!cinode->can_cache_brlcks) {
1226 up_write(&cinode->lock_sem);
1227 return rc;
1230 if (cap_unix(tcon->ses) &&
1231 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1232 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1233 rc = cifs_push_posix_locks(cfile);
1234 else
1235 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1237 cinode->can_cache_brlcks = false;
1238 up_write(&cinode->lock_sem);
1239 return rc;
1242 static void
1243 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1244 bool *wait_flag, struct TCP_Server_Info *server)
1246 if (flock->fl_flags & FL_POSIX)
1247 cifs_dbg(FYI, "Posix\n");
1248 if (flock->fl_flags & FL_FLOCK)
1249 cifs_dbg(FYI, "Flock\n");
1250 if (flock->fl_flags & FL_SLEEP) {
1251 cifs_dbg(FYI, "Blocking lock\n");
1252 *wait_flag = true;
1254 if (flock->fl_flags & FL_ACCESS)
1255 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1256 if (flock->fl_flags & FL_LEASE)
1257 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1258 if (flock->fl_flags &
1259 (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1260 FL_ACCESS | FL_LEASE | FL_CLOSE)))
1261 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1263 *type = server->vals->large_lock_type;
1264 if (flock->fl_type == F_WRLCK) {
1265 cifs_dbg(FYI, "F_WRLCK\n");
1266 *type |= server->vals->exclusive_lock_type;
1267 *lock = 1;
1268 } else if (flock->fl_type == F_UNLCK) {
1269 cifs_dbg(FYI, "F_UNLCK\n");
1270 *type |= server->vals->unlock_lock_type;
1271 *unlock = 1;
1272 /* Check if unlock includes more than one lock range */
1273 } else if (flock->fl_type == F_RDLCK) {
1274 cifs_dbg(FYI, "F_RDLCK\n");
1275 *type |= server->vals->shared_lock_type;
1276 *lock = 1;
1277 } else if (flock->fl_type == F_EXLCK) {
1278 cifs_dbg(FYI, "F_EXLCK\n");
1279 *type |= server->vals->exclusive_lock_type;
1280 *lock = 1;
1281 } else if (flock->fl_type == F_SHLCK) {
1282 cifs_dbg(FYI, "F_SHLCK\n");
1283 *type |= server->vals->shared_lock_type;
1284 *lock = 1;
1285 } else
1286 cifs_dbg(FYI, "Unknown type of lock\n");
1289 static int
1290 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1291 bool wait_flag, bool posix_lck, unsigned int xid)
1293 int rc = 0;
1294 __u64 length = 1 + flock->fl_end - flock->fl_start;
1295 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1296 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1297 struct TCP_Server_Info *server = tcon->ses->server;
1298 __u16 netfid = cfile->fid.netfid;
1300 if (posix_lck) {
1301 int posix_lock_type;
1303 rc = cifs_posix_lock_test(file, flock);
1304 if (!rc)
1305 return rc;
1307 if (type & server->vals->shared_lock_type)
1308 posix_lock_type = CIFS_RDLCK;
1309 else
1310 posix_lock_type = CIFS_WRLCK;
1311 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1312 flock->fl_start, length, flock,
1313 posix_lock_type, wait_flag);
1314 return rc;
1317 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1318 if (!rc)
1319 return rc;
1321 /* BB we could chain these into one lock request BB */
1322 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1323 1, 0, false);
1324 if (rc == 0) {
1325 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1326 type, 0, 1, false);
1327 flock->fl_type = F_UNLCK;
1328 if (rc != 0)
1329 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1330 rc);
1331 return 0;
1334 if (type & server->vals->shared_lock_type) {
1335 flock->fl_type = F_WRLCK;
1336 return 0;
1339 type &= ~server->vals->exclusive_lock_type;
1341 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1342 type | server->vals->shared_lock_type,
1343 1, 0, false);
1344 if (rc == 0) {
1345 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1346 type | server->vals->shared_lock_type, 0, 1, false);
1347 flock->fl_type = F_RDLCK;
1348 if (rc != 0)
1349 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1350 rc);
1351 } else
1352 flock->fl_type = F_WRLCK;
1354 return 0;
1357 void
1358 cifs_move_llist(struct list_head *source, struct list_head *dest)
1360 struct list_head *li, *tmp;
1361 list_for_each_safe(li, tmp, source)
1362 list_move(li, dest);
1365 void
1366 cifs_free_llist(struct list_head *llist)
1368 struct cifsLockInfo *li, *tmp;
1369 list_for_each_entry_safe(li, tmp, llist, llist) {
1370 cifs_del_lock_waiters(li);
1371 list_del(&li->llist);
1372 kfree(li);
1377 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1378 unsigned int xid)
1380 int rc = 0, stored_rc;
1381 int types[] = {LOCKING_ANDX_LARGE_FILES,
1382 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1383 unsigned int i;
1384 unsigned int max_num, num, max_buf;
1385 LOCKING_ANDX_RANGE *buf, *cur;
1386 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1387 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1388 struct cifsLockInfo *li, *tmp;
1389 __u64 length = 1 + flock->fl_end - flock->fl_start;
1390 struct list_head tmp_llist;
1392 INIT_LIST_HEAD(&tmp_llist);
1395 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1396 * and check it for zero before using.
1398 max_buf = tcon->ses->server->maxBuf;
1399 if (!max_buf)
1400 return -EINVAL;
1402 max_num = (max_buf - sizeof(struct smb_hdr)) /
1403 sizeof(LOCKING_ANDX_RANGE);
1404 buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1405 if (!buf)
1406 return -ENOMEM;
1408 down_write(&cinode->lock_sem);
1409 for (i = 0; i < 2; i++) {
1410 cur = buf;
1411 num = 0;
1412 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1413 if (flock->fl_start > li->offset ||
1414 (flock->fl_start + length) <
1415 (li->offset + li->length))
1416 continue;
1417 if (current->tgid != li->pid)
1418 continue;
1419 if (types[i] != li->type)
1420 continue;
1421 if (cinode->can_cache_brlcks) {
1423 * We can cache brlock requests - simply remove
1424 * a lock from the file's list.
1426 list_del(&li->llist);
1427 cifs_del_lock_waiters(li);
1428 kfree(li);
1429 continue;
1431 cur->Pid = cpu_to_le16(li->pid);
1432 cur->LengthLow = cpu_to_le32((u32)li->length);
1433 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1434 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1435 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1437 * We need to save a lock here to let us add it again to
1438 * the file's list if the unlock range request fails on
1439 * the server.
1441 list_move(&li->llist, &tmp_llist);
1442 if (++num == max_num) {
1443 stored_rc = cifs_lockv(xid, tcon,
1444 cfile->fid.netfid,
1445 li->type, num, 0, buf);
1446 if (stored_rc) {
1448 * We failed on the unlock range
1449 * request - add all locks from the tmp
1450 * list to the head of the file's list.
1452 cifs_move_llist(&tmp_llist,
1453 &cfile->llist->locks);
1454 rc = stored_rc;
1455 } else
1457 * The unlock range request succeed -
1458 * free the tmp list.
1460 cifs_free_llist(&tmp_llist);
1461 cur = buf;
1462 num = 0;
1463 } else
1464 cur++;
1466 if (num) {
1467 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1468 types[i], num, 0, buf);
1469 if (stored_rc) {
1470 cifs_move_llist(&tmp_llist,
1471 &cfile->llist->locks);
1472 rc = stored_rc;
1473 } else
1474 cifs_free_llist(&tmp_llist);
1478 up_write(&cinode->lock_sem);
1479 kfree(buf);
1480 return rc;
1483 static int
1484 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1485 bool wait_flag, bool posix_lck, int lock, int unlock,
1486 unsigned int xid)
1488 int rc = 0;
1489 __u64 length = 1 + flock->fl_end - flock->fl_start;
1490 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1491 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1492 struct TCP_Server_Info *server = tcon->ses->server;
1493 struct inode *inode = cfile->dentry->d_inode;
1495 if (posix_lck) {
1496 int posix_lock_type;
1498 rc = cifs_posix_lock_set(file, flock);
1499 if (!rc || rc < 0)
1500 return rc;
1502 if (type & server->vals->shared_lock_type)
1503 posix_lock_type = CIFS_RDLCK;
1504 else
1505 posix_lock_type = CIFS_WRLCK;
1507 if (unlock == 1)
1508 posix_lock_type = CIFS_UNLCK;
1510 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1511 current->tgid, flock->fl_start, length,
1512 NULL, posix_lock_type, wait_flag);
1513 goto out;
1516 if (lock) {
1517 struct cifsLockInfo *lock;
1519 lock = cifs_lock_init(flock->fl_start, length, type);
1520 if (!lock)
1521 return -ENOMEM;
1523 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1524 if (rc < 0) {
1525 kfree(lock);
1526 return rc;
1528 if (!rc)
1529 goto out;
1532 * Windows 7 server can delay breaking lease from read to None
1533 * if we set a byte-range lock on a file - break it explicitly
1534 * before sending the lock to the server to be sure the next
1535 * read won't conflict with non-overlapted locks due to
1536 * pagereading.
1538 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1539 CIFS_CACHE_READ(CIFS_I(inode))) {
1540 cifs_zap_mapping(inode);
1541 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1542 inode);
1543 CIFS_I(inode)->oplock = 0;
1546 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1547 type, 1, 0, wait_flag);
1548 if (rc) {
1549 kfree(lock);
1550 return rc;
1553 cifs_lock_add(cfile, lock);
1554 } else if (unlock)
1555 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1557 out:
1558 if (flock->fl_flags & FL_POSIX)
1559 posix_lock_file_wait(file, flock);
1560 return rc;
1563 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1565 int rc, xid;
1566 int lock = 0, unlock = 0;
1567 bool wait_flag = false;
1568 bool posix_lck = false;
1569 struct cifs_sb_info *cifs_sb;
1570 struct cifs_tcon *tcon;
1571 struct cifsInodeInfo *cinode;
1572 struct cifsFileInfo *cfile;
1573 __u16 netfid;
1574 __u32 type;
1576 rc = -EACCES;
1577 xid = get_xid();
1579 cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1580 cmd, flock->fl_flags, flock->fl_type,
1581 flock->fl_start, flock->fl_end);
1583 cfile = (struct cifsFileInfo *)file->private_data;
1584 tcon = tlink_tcon(cfile->tlink);
1586 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1587 tcon->ses->server);
1589 cifs_sb = CIFS_FILE_SB(file);
1590 netfid = cfile->fid.netfid;
1591 cinode = CIFS_I(file_inode(file));
1593 if (cap_unix(tcon->ses) &&
1594 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1595 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1596 posix_lck = true;
1598 * BB add code here to normalize offset and length to account for
1599 * negative length which we can not accept over the wire.
1601 if (IS_GETLK(cmd)) {
1602 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1603 free_xid(xid);
1604 return rc;
1607 if (!lock && !unlock) {
1609 * if no lock or unlock then nothing to do since we do not
1610 * know what it is
1612 free_xid(xid);
1613 return -EOPNOTSUPP;
1616 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1617 xid);
1618 free_xid(xid);
1619 return rc;
1623 * update the file size (if needed) after a write. Should be called with
1624 * the inode->i_lock held
1626 void
1627 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1628 unsigned int bytes_written)
1630 loff_t end_of_write = offset + bytes_written;
1632 if (end_of_write > cifsi->server_eof)
1633 cifsi->server_eof = end_of_write;
1636 static ssize_t
1637 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1638 size_t write_size, loff_t *offset)
1640 int rc = 0;
1641 unsigned int bytes_written = 0;
1642 unsigned int total_written;
1643 struct cifs_sb_info *cifs_sb;
1644 struct cifs_tcon *tcon;
1645 struct TCP_Server_Info *server;
1646 unsigned int xid;
1647 struct dentry *dentry = open_file->dentry;
1648 struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
1649 struct cifs_io_parms io_parms;
1651 cifs_sb = CIFS_SB(dentry->d_sb);
1653 cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1654 write_size, *offset, dentry);
1656 tcon = tlink_tcon(open_file->tlink);
1657 server = tcon->ses->server;
1659 if (!server->ops->sync_write)
1660 return -ENOSYS;
1662 xid = get_xid();
1664 for (total_written = 0; write_size > total_written;
1665 total_written += bytes_written) {
1666 rc = -EAGAIN;
1667 while (rc == -EAGAIN) {
1668 struct kvec iov[2];
1669 unsigned int len;
1671 if (open_file->invalidHandle) {
1672 /* we could deadlock if we called
1673 filemap_fdatawait from here so tell
1674 reopen_file not to flush data to
1675 server now */
1676 rc = cifs_reopen_file(open_file, false);
1677 if (rc != 0)
1678 break;
1681 len = min(server->ops->wp_retry_size(dentry->d_inode),
1682 (unsigned int)write_size - total_written);
1683 /* iov[0] is reserved for smb header */
1684 iov[1].iov_base = (char *)write_data + total_written;
1685 iov[1].iov_len = len;
1686 io_parms.pid = pid;
1687 io_parms.tcon = tcon;
1688 io_parms.offset = *offset;
1689 io_parms.length = len;
1690 rc = server->ops->sync_write(xid, &open_file->fid,
1691 &io_parms, &bytes_written, iov, 1);
1693 if (rc || (bytes_written == 0)) {
1694 if (total_written)
1695 break;
1696 else {
1697 free_xid(xid);
1698 return rc;
1700 } else {
1701 spin_lock(&dentry->d_inode->i_lock);
1702 cifs_update_eof(cifsi, *offset, bytes_written);
1703 spin_unlock(&dentry->d_inode->i_lock);
1704 *offset += bytes_written;
1708 cifs_stats_bytes_written(tcon, total_written);
1710 if (total_written > 0) {
1711 spin_lock(&dentry->d_inode->i_lock);
1712 if (*offset > dentry->d_inode->i_size)
1713 i_size_write(dentry->d_inode, *offset);
1714 spin_unlock(&dentry->d_inode->i_lock);
1716 mark_inode_dirty_sync(dentry->d_inode);
1717 free_xid(xid);
1718 return total_written;
1721 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1722 bool fsuid_only)
1724 struct cifsFileInfo *open_file = NULL;
1725 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1727 /* only filter by fsuid on multiuser mounts */
1728 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1729 fsuid_only = false;
1731 spin_lock(&cifs_file_list_lock);
1732 /* we could simply get the first_list_entry since write-only entries
1733 are always at the end of the list but since the first entry might
1734 have a close pending, we go through the whole list */
1735 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1736 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1737 continue;
1738 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1739 if (!open_file->invalidHandle) {
1740 /* found a good file */
1741 /* lock it so it will not be closed on us */
1742 cifsFileInfo_get_locked(open_file);
1743 spin_unlock(&cifs_file_list_lock);
1744 return open_file;
1745 } /* else might as well continue, and look for
1746 another, or simply have the caller reopen it
1747 again rather than trying to fix this handle */
1748 } else /* write only file */
1749 break; /* write only files are last so must be done */
1751 spin_unlock(&cifs_file_list_lock);
1752 return NULL;
1755 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1756 bool fsuid_only)
1758 struct cifsFileInfo *open_file, *inv_file = NULL;
1759 struct cifs_sb_info *cifs_sb;
1760 bool any_available = false;
1761 int rc;
1762 unsigned int refind = 0;
1764 /* Having a null inode here (because mapping->host was set to zero by
1765 the VFS or MM) should not happen but we had reports of on oops (due to
1766 it being zero) during stress testcases so we need to check for it */
1768 if (cifs_inode == NULL) {
1769 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1770 dump_stack();
1771 return NULL;
1774 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1776 /* only filter by fsuid on multiuser mounts */
1777 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1778 fsuid_only = false;
1780 spin_lock(&cifs_file_list_lock);
1781 refind_writable:
1782 if (refind > MAX_REOPEN_ATT) {
1783 spin_unlock(&cifs_file_list_lock);
1784 return NULL;
1786 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1787 if (!any_available && open_file->pid != current->tgid)
1788 continue;
1789 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1790 continue;
1791 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1792 if (!open_file->invalidHandle) {
1793 /* found a good writable file */
1794 cifsFileInfo_get_locked(open_file);
1795 spin_unlock(&cifs_file_list_lock);
1796 return open_file;
1797 } else {
1798 if (!inv_file)
1799 inv_file = open_file;
1803 /* couldn't find useable FH with same pid, try any available */
1804 if (!any_available) {
1805 any_available = true;
1806 goto refind_writable;
1809 if (inv_file) {
1810 any_available = false;
1811 cifsFileInfo_get_locked(inv_file);
1814 spin_unlock(&cifs_file_list_lock);
1816 if (inv_file) {
1817 rc = cifs_reopen_file(inv_file, false);
1818 if (!rc)
1819 return inv_file;
1820 else {
1821 spin_lock(&cifs_file_list_lock);
1822 list_move_tail(&inv_file->flist,
1823 &cifs_inode->openFileList);
1824 spin_unlock(&cifs_file_list_lock);
1825 cifsFileInfo_put(inv_file);
1826 spin_lock(&cifs_file_list_lock);
1827 ++refind;
1828 goto refind_writable;
1832 return NULL;
1835 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1837 struct address_space *mapping = page->mapping;
1838 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1839 char *write_data;
1840 int rc = -EFAULT;
1841 int bytes_written = 0;
1842 struct inode *inode;
1843 struct cifsFileInfo *open_file;
1845 if (!mapping || !mapping->host)
1846 return -EFAULT;
1848 inode = page->mapping->host;
1850 offset += (loff_t)from;
1851 write_data = kmap(page);
1852 write_data += from;
1854 if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1855 kunmap(page);
1856 return -EIO;
1859 /* racing with truncate? */
1860 if (offset > mapping->host->i_size) {
1861 kunmap(page);
1862 return 0; /* don't care */
1865 /* check to make sure that we are not extending the file */
1866 if (mapping->host->i_size - offset < (loff_t)to)
1867 to = (unsigned)(mapping->host->i_size - offset);
1869 open_file = find_writable_file(CIFS_I(mapping->host), false);
1870 if (open_file) {
1871 bytes_written = cifs_write(open_file, open_file->pid,
1872 write_data, to - from, &offset);
1873 cifsFileInfo_put(open_file);
1874 /* Does mm or vfs already set times? */
1875 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1876 if ((bytes_written > 0) && (offset))
1877 rc = 0;
1878 else if (bytes_written < 0)
1879 rc = bytes_written;
1880 } else {
1881 cifs_dbg(FYI, "No writeable filehandles for inode\n");
1882 rc = -EIO;
1885 kunmap(page);
1886 return rc;
1889 static struct cifs_writedata *
1890 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
1891 pgoff_t end, pgoff_t *index,
1892 unsigned int *found_pages)
1894 unsigned int nr_pages;
1895 struct page **pages;
1896 struct cifs_writedata *wdata;
1898 wdata = cifs_writedata_alloc((unsigned int)tofind,
1899 cifs_writev_complete);
1900 if (!wdata)
1901 return NULL;
1904 * find_get_pages_tag seems to return a max of 256 on each
1905 * iteration, so we must call it several times in order to
1906 * fill the array or the wsize is effectively limited to
1907 * 256 * PAGE_CACHE_SIZE.
1909 *found_pages = 0;
1910 pages = wdata->pages;
1911 do {
1912 nr_pages = find_get_pages_tag(mapping, index,
1913 PAGECACHE_TAG_DIRTY, tofind,
1914 pages);
1915 *found_pages += nr_pages;
1916 tofind -= nr_pages;
1917 pages += nr_pages;
1918 } while (nr_pages && tofind && *index <= end);
1920 return wdata;
1923 static unsigned int
1924 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
1925 struct address_space *mapping,
1926 struct writeback_control *wbc,
1927 pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
1929 unsigned int nr_pages = 0, i;
1930 struct page *page;
1932 for (i = 0; i < found_pages; i++) {
1933 page = wdata->pages[i];
1935 * At this point we hold neither mapping->tree_lock nor
1936 * lock on the page itself: the page may be truncated or
1937 * invalidated (changing page->mapping to NULL), or even
1938 * swizzled back from swapper_space to tmpfs file
1939 * mapping
1942 if (nr_pages == 0)
1943 lock_page(page);
1944 else if (!trylock_page(page))
1945 break;
1947 if (unlikely(page->mapping != mapping)) {
1948 unlock_page(page);
1949 break;
1952 if (!wbc->range_cyclic && page->index > end) {
1953 *done = true;
1954 unlock_page(page);
1955 break;
1958 if (*next && (page->index != *next)) {
1959 /* Not next consecutive page */
1960 unlock_page(page);
1961 break;
1964 if (wbc->sync_mode != WB_SYNC_NONE)
1965 wait_on_page_writeback(page);
1967 if (PageWriteback(page) ||
1968 !clear_page_dirty_for_io(page)) {
1969 unlock_page(page);
1970 break;
1974 * This actually clears the dirty bit in the radix tree.
1975 * See cifs_writepage() for more commentary.
1977 set_page_writeback(page);
1978 if (page_offset(page) >= i_size_read(mapping->host)) {
1979 *done = true;
1980 unlock_page(page);
1981 end_page_writeback(page);
1982 break;
1985 wdata->pages[i] = page;
1986 *next = page->index + 1;
1987 ++nr_pages;
1990 /* reset index to refind any pages skipped */
1991 if (nr_pages == 0)
1992 *index = wdata->pages[0]->index + 1;
1994 /* put any pages we aren't going to use */
1995 for (i = nr_pages; i < found_pages; i++) {
1996 page_cache_release(wdata->pages[i]);
1997 wdata->pages[i] = NULL;
2000 return nr_pages;
2003 static int
2004 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2005 struct address_space *mapping, struct writeback_control *wbc)
2007 int rc = 0;
2008 struct TCP_Server_Info *server;
2009 unsigned int i;
2011 wdata->sync_mode = wbc->sync_mode;
2012 wdata->nr_pages = nr_pages;
2013 wdata->offset = page_offset(wdata->pages[0]);
2014 wdata->pagesz = PAGE_CACHE_SIZE;
2015 wdata->tailsz = min(i_size_read(mapping->host) -
2016 page_offset(wdata->pages[nr_pages - 1]),
2017 (loff_t)PAGE_CACHE_SIZE);
2018 wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) + wdata->tailsz;
2020 if (wdata->cfile != NULL)
2021 cifsFileInfo_put(wdata->cfile);
2022 wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
2023 if (!wdata->cfile) {
2024 cifs_dbg(VFS, "No writable handles for inode\n");
2025 rc = -EBADF;
2026 } else {
2027 wdata->pid = wdata->cfile->pid;
2028 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2029 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2032 for (i = 0; i < nr_pages; ++i)
2033 unlock_page(wdata->pages[i]);
2035 return rc;
2038 static int cifs_writepages(struct address_space *mapping,
2039 struct writeback_control *wbc)
2041 struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
2042 struct TCP_Server_Info *server;
2043 bool done = false, scanned = false, range_whole = false;
2044 pgoff_t end, index;
2045 struct cifs_writedata *wdata;
2046 int rc = 0;
2049 * If wsize is smaller than the page cache size, default to writing
2050 * one page at a time via cifs_writepage
2052 if (cifs_sb->wsize < PAGE_CACHE_SIZE)
2053 return generic_writepages(mapping, wbc);
2055 if (wbc->range_cyclic) {
2056 index = mapping->writeback_index; /* Start from prev offset */
2057 end = -1;
2058 } else {
2059 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2060 end = wbc->range_end >> PAGE_CACHE_SHIFT;
2061 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2062 range_whole = true;
2063 scanned = true;
2065 server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2066 retry:
2067 while (!done && index <= end) {
2068 unsigned int i, nr_pages, found_pages, wsize, credits;
2069 pgoff_t next = 0, tofind, saved_index = index;
2071 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2072 &wsize, &credits);
2073 if (rc)
2074 break;
2076 tofind = min((wsize / PAGE_CACHE_SIZE) - 1, end - index) + 1;
2078 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2079 &found_pages);
2080 if (!wdata) {
2081 rc = -ENOMEM;
2082 add_credits_and_wake_if(server, credits, 0);
2083 break;
2086 if (found_pages == 0) {
2087 kref_put(&wdata->refcount, cifs_writedata_release);
2088 add_credits_and_wake_if(server, credits, 0);
2089 break;
2092 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2093 end, &index, &next, &done);
2095 /* nothing to write? */
2096 if (nr_pages == 0) {
2097 kref_put(&wdata->refcount, cifs_writedata_release);
2098 add_credits_and_wake_if(server, credits, 0);
2099 continue;
2102 wdata->credits = credits;
2104 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2106 /* send failure -- clean up the mess */
2107 if (rc != 0) {
2108 add_credits_and_wake_if(server, wdata->credits, 0);
2109 for (i = 0; i < nr_pages; ++i) {
2110 if (rc == -EAGAIN)
2111 redirty_page_for_writepage(wbc,
2112 wdata->pages[i]);
2113 else
2114 SetPageError(wdata->pages[i]);
2115 end_page_writeback(wdata->pages[i]);
2116 page_cache_release(wdata->pages[i]);
2118 if (rc != -EAGAIN)
2119 mapping_set_error(mapping, rc);
2121 kref_put(&wdata->refcount, cifs_writedata_release);
2123 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2124 index = saved_index;
2125 continue;
2128 wbc->nr_to_write -= nr_pages;
2129 if (wbc->nr_to_write <= 0)
2130 done = true;
2132 index = next;
2135 if (!scanned && !done) {
2137 * We hit the last page and there is more work to be done: wrap
2138 * back to the start of the file
2140 scanned = true;
2141 index = 0;
2142 goto retry;
2145 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2146 mapping->writeback_index = index;
2148 return rc;
2151 static int
2152 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2154 int rc;
2155 unsigned int xid;
2157 xid = get_xid();
2158 /* BB add check for wbc flags */
2159 page_cache_get(page);
2160 if (!PageUptodate(page))
2161 cifs_dbg(FYI, "ppw - page not up to date\n");
2164 * Set the "writeback" flag, and clear "dirty" in the radix tree.
2166 * A writepage() implementation always needs to do either this,
2167 * or re-dirty the page with "redirty_page_for_writepage()" in
2168 * the case of a failure.
2170 * Just unlocking the page will cause the radix tree tag-bits
2171 * to fail to update with the state of the page correctly.
2173 set_page_writeback(page);
2174 retry_write:
2175 rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
2176 if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
2177 goto retry_write;
2178 else if (rc == -EAGAIN)
2179 redirty_page_for_writepage(wbc, page);
2180 else if (rc != 0)
2181 SetPageError(page);
2182 else
2183 SetPageUptodate(page);
2184 end_page_writeback(page);
2185 page_cache_release(page);
2186 free_xid(xid);
2187 return rc;
2190 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2192 int rc = cifs_writepage_locked(page, wbc);
2193 unlock_page(page);
2194 return rc;
2197 static int cifs_write_end(struct file *file, struct address_space *mapping,
2198 loff_t pos, unsigned len, unsigned copied,
2199 struct page *page, void *fsdata)
2201 int rc;
2202 struct inode *inode = mapping->host;
2203 struct cifsFileInfo *cfile = file->private_data;
2204 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2205 __u32 pid;
2207 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2208 pid = cfile->pid;
2209 else
2210 pid = current->tgid;
2212 cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2213 page, pos, copied);
2215 if (PageChecked(page)) {
2216 if (copied == len)
2217 SetPageUptodate(page);
2218 ClearPageChecked(page);
2219 } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
2220 SetPageUptodate(page);
2222 if (!PageUptodate(page)) {
2223 char *page_data;
2224 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
2225 unsigned int xid;
2227 xid = get_xid();
2228 /* this is probably better than directly calling
2229 partialpage_write since in this function the file handle is
2230 known which we might as well leverage */
2231 /* BB check if anything else missing out of ppw
2232 such as updating last write time */
2233 page_data = kmap(page);
2234 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2235 /* if (rc < 0) should we set writebehind rc? */
2236 kunmap(page);
2238 free_xid(xid);
2239 } else {
2240 rc = copied;
2241 pos += copied;
2242 set_page_dirty(page);
2245 if (rc > 0) {
2246 spin_lock(&inode->i_lock);
2247 if (pos > inode->i_size)
2248 i_size_write(inode, pos);
2249 spin_unlock(&inode->i_lock);
2252 unlock_page(page);
2253 page_cache_release(page);
2255 return rc;
2258 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2259 int datasync)
2261 unsigned int xid;
2262 int rc = 0;
2263 struct cifs_tcon *tcon;
2264 struct TCP_Server_Info *server;
2265 struct cifsFileInfo *smbfile = file->private_data;
2266 struct inode *inode = file_inode(file);
2267 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2269 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2270 if (rc)
2271 return rc;
2272 mutex_lock(&inode->i_mutex);
2274 xid = get_xid();
2276 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2277 file, datasync);
2279 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2280 rc = cifs_zap_mapping(inode);
2281 if (rc) {
2282 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2283 rc = 0; /* don't care about it in fsync */
2287 tcon = tlink_tcon(smbfile->tlink);
2288 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2289 server = tcon->ses->server;
2290 if (server->ops->flush)
2291 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2292 else
2293 rc = -ENOSYS;
2296 free_xid(xid);
2297 mutex_unlock(&inode->i_mutex);
2298 return rc;
2301 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2303 unsigned int xid;
2304 int rc = 0;
2305 struct cifs_tcon *tcon;
2306 struct TCP_Server_Info *server;
2307 struct cifsFileInfo *smbfile = file->private_data;
2308 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2309 struct inode *inode = file->f_mapping->host;
2311 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2312 if (rc)
2313 return rc;
2314 mutex_lock(&inode->i_mutex);
2316 xid = get_xid();
2318 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2319 file, datasync);
2321 tcon = tlink_tcon(smbfile->tlink);
2322 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2323 server = tcon->ses->server;
2324 if (server->ops->flush)
2325 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2326 else
2327 rc = -ENOSYS;
2330 free_xid(xid);
2331 mutex_unlock(&inode->i_mutex);
2332 return rc;
2336 * As file closes, flush all cached write data for this inode checking
2337 * for write behind errors.
2339 int cifs_flush(struct file *file, fl_owner_t id)
2341 struct inode *inode = file_inode(file);
2342 int rc = 0;
2344 if (file->f_mode & FMODE_WRITE)
2345 rc = filemap_write_and_wait(inode->i_mapping);
2347 cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2349 return rc;
2352 static int
2353 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2355 int rc = 0;
2356 unsigned long i;
2358 for (i = 0; i < num_pages; i++) {
2359 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2360 if (!pages[i]) {
2362 * save number of pages we have already allocated and
2363 * return with ENOMEM error
2365 num_pages = i;
2366 rc = -ENOMEM;
2367 break;
2371 if (rc) {
2372 for (i = 0; i < num_pages; i++)
2373 put_page(pages[i]);
2375 return rc;
2378 static inline
2379 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2381 size_t num_pages;
2382 size_t clen;
2384 clen = min_t(const size_t, len, wsize);
2385 num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2387 if (cur_len)
2388 *cur_len = clen;
2390 return num_pages;
2393 static void
2394 cifs_uncached_writedata_release(struct kref *refcount)
2396 int i;
2397 struct cifs_writedata *wdata = container_of(refcount,
2398 struct cifs_writedata, refcount);
2400 for (i = 0; i < wdata->nr_pages; i++)
2401 put_page(wdata->pages[i]);
2402 cifs_writedata_release(refcount);
2405 static void
2406 cifs_uncached_writev_complete(struct work_struct *work)
2408 struct cifs_writedata *wdata = container_of(work,
2409 struct cifs_writedata, work);
2410 struct inode *inode = wdata->cfile->dentry->d_inode;
2411 struct cifsInodeInfo *cifsi = CIFS_I(inode);
2413 spin_lock(&inode->i_lock);
2414 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2415 if (cifsi->server_eof > inode->i_size)
2416 i_size_write(inode, cifsi->server_eof);
2417 spin_unlock(&inode->i_lock);
2419 complete(&wdata->done);
2421 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2424 static int
2425 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2426 size_t *len, unsigned long *num_pages)
2428 size_t save_len, copied, bytes, cur_len = *len;
2429 unsigned long i, nr_pages = *num_pages;
2431 save_len = cur_len;
2432 for (i = 0; i < nr_pages; i++) {
2433 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2434 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2435 cur_len -= copied;
2437 * If we didn't copy as much as we expected, then that
2438 * may mean we trod into an unmapped area. Stop copying
2439 * at that point. On the next pass through the big
2440 * loop, we'll likely end up getting a zero-length
2441 * write and bailing out of it.
2443 if (copied < bytes)
2444 break;
2446 cur_len = save_len - cur_len;
2447 *len = cur_len;
2450 * If we have no data to send, then that probably means that
2451 * the copy above failed altogether. That's most likely because
2452 * the address in the iovec was bogus. Return -EFAULT and let
2453 * the caller free anything we allocated and bail out.
2455 if (!cur_len)
2456 return -EFAULT;
2459 * i + 1 now represents the number of pages we actually used in
2460 * the copy phase above.
2462 *num_pages = i + 1;
2463 return 0;
2466 static int
2467 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2468 struct cifsFileInfo *open_file,
2469 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list)
2471 int rc = 0;
2472 size_t cur_len;
2473 unsigned long nr_pages, num_pages, i;
2474 struct cifs_writedata *wdata;
2475 struct iov_iter saved_from;
2476 loff_t saved_offset = offset;
2477 pid_t pid;
2478 struct TCP_Server_Info *server;
2480 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2481 pid = open_file->pid;
2482 else
2483 pid = current->tgid;
2485 server = tlink_tcon(open_file->tlink)->ses->server;
2486 memcpy(&saved_from, from, sizeof(struct iov_iter));
2488 do {
2489 unsigned int wsize, credits;
2491 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2492 &wsize, &credits);
2493 if (rc)
2494 break;
2496 nr_pages = get_numpages(wsize, len, &cur_len);
2497 wdata = cifs_writedata_alloc(nr_pages,
2498 cifs_uncached_writev_complete);
2499 if (!wdata) {
2500 rc = -ENOMEM;
2501 add_credits_and_wake_if(server, credits, 0);
2502 break;
2505 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2506 if (rc) {
2507 kfree(wdata);
2508 add_credits_and_wake_if(server, credits, 0);
2509 break;
2512 num_pages = nr_pages;
2513 rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages);
2514 if (rc) {
2515 for (i = 0; i < nr_pages; i++)
2516 put_page(wdata->pages[i]);
2517 kfree(wdata);
2518 add_credits_and_wake_if(server, credits, 0);
2519 break;
2523 * Bring nr_pages down to the number of pages we actually used,
2524 * and free any pages that we didn't use.
2526 for ( ; nr_pages > num_pages; nr_pages--)
2527 put_page(wdata->pages[nr_pages - 1]);
2529 wdata->sync_mode = WB_SYNC_ALL;
2530 wdata->nr_pages = nr_pages;
2531 wdata->offset = (__u64)offset;
2532 wdata->cfile = cifsFileInfo_get(open_file);
2533 wdata->pid = pid;
2534 wdata->bytes = cur_len;
2535 wdata->pagesz = PAGE_SIZE;
2536 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2537 wdata->credits = credits;
2539 if (!wdata->cfile->invalidHandle ||
2540 !cifs_reopen_file(wdata->cfile, false))
2541 rc = server->ops->async_writev(wdata,
2542 cifs_uncached_writedata_release);
2543 if (rc) {
2544 add_credits_and_wake_if(server, wdata->credits, 0);
2545 kref_put(&wdata->refcount,
2546 cifs_uncached_writedata_release);
2547 if (rc == -EAGAIN) {
2548 memcpy(from, &saved_from,
2549 sizeof(struct iov_iter));
2550 iov_iter_advance(from, offset - saved_offset);
2551 continue;
2553 break;
2556 list_add_tail(&wdata->list, wdata_list);
2557 offset += cur_len;
2558 len -= cur_len;
2559 } while (len > 0);
2561 return rc;
2564 static ssize_t
2565 cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset)
2567 size_t len;
2568 ssize_t total_written = 0;
2569 struct cifsFileInfo *open_file;
2570 struct cifs_tcon *tcon;
2571 struct cifs_sb_info *cifs_sb;
2572 struct cifs_writedata *wdata, *tmp;
2573 struct list_head wdata_list;
2574 struct iov_iter saved_from;
2575 int rc;
2577 len = iov_iter_count(from);
2578 rc = generic_write_checks(file, poffset, &len, 0);
2579 if (rc)
2580 return rc;
2582 if (!len)
2583 return 0;
2585 iov_iter_truncate(from, len);
2587 INIT_LIST_HEAD(&wdata_list);
2588 cifs_sb = CIFS_FILE_SB(file);
2589 open_file = file->private_data;
2590 tcon = tlink_tcon(open_file->tlink);
2592 if (!tcon->ses->server->ops->async_writev)
2593 return -ENOSYS;
2595 memcpy(&saved_from, from, sizeof(struct iov_iter));
2597 rc = cifs_write_from_iter(*poffset, len, from, open_file, cifs_sb,
2598 &wdata_list);
2601 * If at least one write was successfully sent, then discard any rc
2602 * value from the later writes. If the other write succeeds, then
2603 * we'll end up returning whatever was written. If it fails, then
2604 * we'll get a new rc value from that.
2606 if (!list_empty(&wdata_list))
2607 rc = 0;
2610 * Wait for and collect replies for any successful sends in order of
2611 * increasing offset. Once an error is hit or we get a fatal signal
2612 * while waiting, then return without waiting for any more replies.
2614 restart_loop:
2615 list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2616 if (!rc) {
2617 /* FIXME: freezable too? */
2618 rc = wait_for_completion_killable(&wdata->done);
2619 if (rc)
2620 rc = -EINTR;
2621 else if (wdata->result)
2622 rc = wdata->result;
2623 else
2624 total_written += wdata->bytes;
2626 /* resend call if it's a retryable error */
2627 if (rc == -EAGAIN) {
2628 struct list_head tmp_list;
2629 struct iov_iter tmp_from;
2631 INIT_LIST_HEAD(&tmp_list);
2632 list_del_init(&wdata->list);
2634 memcpy(&tmp_from, &saved_from,
2635 sizeof(struct iov_iter));
2636 iov_iter_advance(&tmp_from,
2637 wdata->offset - *poffset);
2639 rc = cifs_write_from_iter(wdata->offset,
2640 wdata->bytes, &tmp_from,
2641 open_file, cifs_sb, &tmp_list);
2643 list_splice(&tmp_list, &wdata_list);
2645 kref_put(&wdata->refcount,
2646 cifs_uncached_writedata_release);
2647 goto restart_loop;
2650 list_del_init(&wdata->list);
2651 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2654 if (total_written > 0)
2655 *poffset += total_written;
2657 cifs_stats_bytes_written(tcon, total_written);
2658 return total_written ? total_written : (ssize_t)rc;
2661 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2663 ssize_t written;
2664 struct inode *inode;
2665 loff_t pos = iocb->ki_pos;
2667 inode = file_inode(iocb->ki_filp);
2670 * BB - optimize the way when signing is disabled. We can drop this
2671 * extra memory-to-memory copying and use iovec buffers for constructing
2672 * write request.
2675 written = cifs_iovec_write(iocb->ki_filp, from, &pos);
2676 if (written > 0) {
2677 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(inode)->flags);
2678 iocb->ki_pos = pos;
2681 return written;
2684 static ssize_t
2685 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
2687 struct file *file = iocb->ki_filp;
2688 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2689 struct inode *inode = file->f_mapping->host;
2690 struct cifsInodeInfo *cinode = CIFS_I(inode);
2691 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2692 ssize_t rc = -EACCES;
2693 loff_t lock_pos = iocb->ki_pos;
2696 * We need to hold the sem to be sure nobody modifies lock list
2697 * with a brlock that prevents writing.
2699 down_read(&cinode->lock_sem);
2700 mutex_lock(&inode->i_mutex);
2701 if (file->f_flags & O_APPEND)
2702 lock_pos = i_size_read(inode);
2703 if (!cifs_find_lock_conflict(cfile, lock_pos, iov_iter_count(from),
2704 server->vals->exclusive_lock_type, NULL,
2705 CIFS_WRITE_OP)) {
2706 rc = __generic_file_write_iter(iocb, from);
2707 mutex_unlock(&inode->i_mutex);
2709 if (rc > 0) {
2710 ssize_t err;
2712 err = generic_write_sync(file, iocb->ki_pos - rc, rc);
2713 if (err < 0)
2714 rc = err;
2716 } else {
2717 mutex_unlock(&inode->i_mutex);
2719 up_read(&cinode->lock_sem);
2720 return rc;
2723 ssize_t
2724 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
2726 struct inode *inode = file_inode(iocb->ki_filp);
2727 struct cifsInodeInfo *cinode = CIFS_I(inode);
2728 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2729 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2730 iocb->ki_filp->private_data;
2731 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2732 ssize_t written;
2734 written = cifs_get_writer(cinode);
2735 if (written)
2736 return written;
2738 if (CIFS_CACHE_WRITE(cinode)) {
2739 if (cap_unix(tcon->ses) &&
2740 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2741 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
2742 written = generic_file_write_iter(iocb, from);
2743 goto out;
2745 written = cifs_writev(iocb, from);
2746 goto out;
2749 * For non-oplocked files in strict cache mode we need to write the data
2750 * to the server exactly from the pos to pos+len-1 rather than flush all
2751 * affected pages because it may cause a error with mandatory locks on
2752 * these pages but not on the region from pos to ppos+len-1.
2754 written = cifs_user_writev(iocb, from);
2755 if (written > 0 && CIFS_CACHE_READ(cinode)) {
2757 * Windows 7 server can delay breaking level2 oplock if a write
2758 * request comes - break it on the client to prevent reading
2759 * an old data.
2761 cifs_zap_mapping(inode);
2762 cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
2763 inode);
2764 cinode->oplock = 0;
2766 out:
2767 cifs_put_writer(cinode);
2768 return written;
2771 static struct cifs_readdata *
2772 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2774 struct cifs_readdata *rdata;
2776 rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2777 GFP_KERNEL);
2778 if (rdata != NULL) {
2779 kref_init(&rdata->refcount);
2780 INIT_LIST_HEAD(&rdata->list);
2781 init_completion(&rdata->done);
2782 INIT_WORK(&rdata->work, complete);
2785 return rdata;
2788 void
2789 cifs_readdata_release(struct kref *refcount)
2791 struct cifs_readdata *rdata = container_of(refcount,
2792 struct cifs_readdata, refcount);
2794 if (rdata->cfile)
2795 cifsFileInfo_put(rdata->cfile);
2797 kfree(rdata);
2800 static int
2801 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2803 int rc = 0;
2804 struct page *page;
2805 unsigned int i;
2807 for (i = 0; i < nr_pages; i++) {
2808 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2809 if (!page) {
2810 rc = -ENOMEM;
2811 break;
2813 rdata->pages[i] = page;
2816 if (rc) {
2817 for (i = 0; i < nr_pages; i++) {
2818 put_page(rdata->pages[i]);
2819 rdata->pages[i] = NULL;
2822 return rc;
2825 static void
2826 cifs_uncached_readdata_release(struct kref *refcount)
2828 struct cifs_readdata *rdata = container_of(refcount,
2829 struct cifs_readdata, refcount);
2830 unsigned int i;
2832 for (i = 0; i < rdata->nr_pages; i++) {
2833 put_page(rdata->pages[i]);
2834 rdata->pages[i] = NULL;
2836 cifs_readdata_release(refcount);
2840 * cifs_readdata_to_iov - copy data from pages in response to an iovec
2841 * @rdata: the readdata response with list of pages holding data
2842 * @iter: destination for our data
2844 * This function copies data from a list of pages in a readdata response into
2845 * an array of iovecs. It will first calculate where the data should go
2846 * based on the info in the readdata and then copy the data into that spot.
2848 static int
2849 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
2851 size_t remaining = rdata->got_bytes;
2852 unsigned int i;
2854 for (i = 0; i < rdata->nr_pages; i++) {
2855 struct page *page = rdata->pages[i];
2856 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
2857 size_t written = copy_page_to_iter(page, 0, copy, iter);
2858 remaining -= written;
2859 if (written < copy && iov_iter_count(iter) > 0)
2860 break;
2862 return remaining ? -EFAULT : 0;
2865 static void
2866 cifs_uncached_readv_complete(struct work_struct *work)
2868 struct cifs_readdata *rdata = container_of(work,
2869 struct cifs_readdata, work);
2871 complete(&rdata->done);
2872 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2875 static int
2876 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2877 struct cifs_readdata *rdata, unsigned int len)
2879 int result = 0;
2880 unsigned int i;
2881 unsigned int nr_pages = rdata->nr_pages;
2882 struct kvec iov;
2884 rdata->got_bytes = 0;
2885 rdata->tailsz = PAGE_SIZE;
2886 for (i = 0; i < nr_pages; i++) {
2887 struct page *page = rdata->pages[i];
2889 if (len >= PAGE_SIZE) {
2890 /* enough data to fill the page */
2891 iov.iov_base = kmap(page);
2892 iov.iov_len = PAGE_SIZE;
2893 cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2894 i, iov.iov_base, iov.iov_len);
2895 len -= PAGE_SIZE;
2896 } else if (len > 0) {
2897 /* enough for partial page, fill and zero the rest */
2898 iov.iov_base = kmap(page);
2899 iov.iov_len = len;
2900 cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2901 i, iov.iov_base, iov.iov_len);
2902 memset(iov.iov_base + len, '\0', PAGE_SIZE - len);
2903 rdata->tailsz = len;
2904 len = 0;
2905 } else {
2906 /* no need to hold page hostage */
2907 rdata->pages[i] = NULL;
2908 rdata->nr_pages--;
2909 put_page(page);
2910 continue;
2913 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
2914 kunmap(page);
2915 if (result < 0)
2916 break;
2918 rdata->got_bytes += result;
2921 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
2922 rdata->got_bytes : result;
2925 static int
2926 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
2927 struct cifs_sb_info *cifs_sb, struct list_head *rdata_list)
2929 struct cifs_readdata *rdata;
2930 unsigned int npages, rsize, credits;
2931 size_t cur_len;
2932 int rc;
2933 pid_t pid;
2934 struct TCP_Server_Info *server;
2936 server = tlink_tcon(open_file->tlink)->ses->server;
2938 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2939 pid = open_file->pid;
2940 else
2941 pid = current->tgid;
2943 do {
2944 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
2945 &rsize, &credits);
2946 if (rc)
2947 break;
2949 cur_len = min_t(const size_t, len, rsize);
2950 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2952 /* allocate a readdata struct */
2953 rdata = cifs_readdata_alloc(npages,
2954 cifs_uncached_readv_complete);
2955 if (!rdata) {
2956 add_credits_and_wake_if(server, credits, 0);
2957 rc = -ENOMEM;
2958 break;
2961 rc = cifs_read_allocate_pages(rdata, npages);
2962 if (rc)
2963 goto error;
2965 rdata->cfile = cifsFileInfo_get(open_file);
2966 rdata->nr_pages = npages;
2967 rdata->offset = offset;
2968 rdata->bytes = cur_len;
2969 rdata->pid = pid;
2970 rdata->pagesz = PAGE_SIZE;
2971 rdata->read_into_pages = cifs_uncached_read_into_pages;
2972 rdata->credits = credits;
2974 if (!rdata->cfile->invalidHandle ||
2975 !cifs_reopen_file(rdata->cfile, true))
2976 rc = server->ops->async_readv(rdata);
2977 error:
2978 if (rc) {
2979 add_credits_and_wake_if(server, rdata->credits, 0);
2980 kref_put(&rdata->refcount,
2981 cifs_uncached_readdata_release);
2982 if (rc == -EAGAIN)
2983 continue;
2984 break;
2987 list_add_tail(&rdata->list, rdata_list);
2988 offset += cur_len;
2989 len -= cur_len;
2990 } while (len > 0);
2992 return rc;
2995 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
2997 struct file *file = iocb->ki_filp;
2998 ssize_t rc;
2999 size_t len;
3000 ssize_t total_read = 0;
3001 loff_t offset = iocb->ki_pos;
3002 struct cifs_sb_info *cifs_sb;
3003 struct cifs_tcon *tcon;
3004 struct cifsFileInfo *open_file;
3005 struct cifs_readdata *rdata, *tmp;
3006 struct list_head rdata_list;
3008 len = iov_iter_count(to);
3009 if (!len)
3010 return 0;
3012 INIT_LIST_HEAD(&rdata_list);
3013 cifs_sb = CIFS_FILE_SB(file);
3014 open_file = file->private_data;
3015 tcon = tlink_tcon(open_file->tlink);
3017 if (!tcon->ses->server->ops->async_readv)
3018 return -ENOSYS;
3020 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3021 cifs_dbg(FYI, "attempting read on write only file instance\n");
3023 rc = cifs_send_async_read(offset, len, open_file, cifs_sb, &rdata_list);
3025 /* if at least one read request send succeeded, then reset rc */
3026 if (!list_empty(&rdata_list))
3027 rc = 0;
3029 len = iov_iter_count(to);
3030 /* the loop below should proceed in the order of increasing offsets */
3031 again:
3032 list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
3033 if (!rc) {
3034 /* FIXME: freezable sleep too? */
3035 rc = wait_for_completion_killable(&rdata->done);
3036 if (rc)
3037 rc = -EINTR;
3038 else if (rdata->result == -EAGAIN) {
3039 /* resend call if it's a retryable error */
3040 struct list_head tmp_list;
3041 unsigned int got_bytes = rdata->got_bytes;
3043 list_del_init(&rdata->list);
3044 INIT_LIST_HEAD(&tmp_list);
3047 * Got a part of data and then reconnect has
3048 * happened -- fill the buffer and continue
3049 * reading.
3051 if (got_bytes && got_bytes < rdata->bytes) {
3052 rc = cifs_readdata_to_iov(rdata, to);
3053 if (rc) {
3054 kref_put(&rdata->refcount,
3055 cifs_uncached_readdata_release);
3056 continue;
3060 rc = cifs_send_async_read(
3061 rdata->offset + got_bytes,
3062 rdata->bytes - got_bytes,
3063 rdata->cfile, cifs_sb,
3064 &tmp_list);
3066 list_splice(&tmp_list, &rdata_list);
3068 kref_put(&rdata->refcount,
3069 cifs_uncached_readdata_release);
3070 goto again;
3071 } else if (rdata->result)
3072 rc = rdata->result;
3073 else
3074 rc = cifs_readdata_to_iov(rdata, to);
3076 /* if there was a short read -- discard anything left */
3077 if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3078 rc = -ENODATA;
3080 list_del_init(&rdata->list);
3081 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3084 total_read = len - iov_iter_count(to);
3086 cifs_stats_bytes_read(tcon, total_read);
3088 /* mask nodata case */
3089 if (rc == -ENODATA)
3090 rc = 0;
3092 if (total_read) {
3093 iocb->ki_pos += total_read;
3094 return total_read;
3096 return rc;
3099 ssize_t
3100 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3102 struct inode *inode = file_inode(iocb->ki_filp);
3103 struct cifsInodeInfo *cinode = CIFS_I(inode);
3104 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3105 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3106 iocb->ki_filp->private_data;
3107 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3108 int rc = -EACCES;
3111 * In strict cache mode we need to read from the server all the time
3112 * if we don't have level II oplock because the server can delay mtime
3113 * change - so we can't make a decision about inode invalidating.
3114 * And we can also fail with pagereading if there are mandatory locks
3115 * on pages affected by this read but not on the region from pos to
3116 * pos+len-1.
3118 if (!CIFS_CACHE_READ(cinode))
3119 return cifs_user_readv(iocb, to);
3121 if (cap_unix(tcon->ses) &&
3122 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3123 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3124 return generic_file_read_iter(iocb, to);
3127 * We need to hold the sem to be sure nobody modifies lock list
3128 * with a brlock that prevents reading.
3130 down_read(&cinode->lock_sem);
3131 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3132 tcon->ses->server->vals->shared_lock_type,
3133 NULL, CIFS_READ_OP))
3134 rc = generic_file_read_iter(iocb, to);
3135 up_read(&cinode->lock_sem);
3136 return rc;
3139 static ssize_t
3140 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3142 int rc = -EACCES;
3143 unsigned int bytes_read = 0;
3144 unsigned int total_read;
3145 unsigned int current_read_size;
3146 unsigned int rsize;
3147 struct cifs_sb_info *cifs_sb;
3148 struct cifs_tcon *tcon;
3149 struct TCP_Server_Info *server;
3150 unsigned int xid;
3151 char *cur_offset;
3152 struct cifsFileInfo *open_file;
3153 struct cifs_io_parms io_parms;
3154 int buf_type = CIFS_NO_BUFFER;
3155 __u32 pid;
3157 xid = get_xid();
3158 cifs_sb = CIFS_FILE_SB(file);
3160 /* FIXME: set up handlers for larger reads and/or convert to async */
3161 rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3163 if (file->private_data == NULL) {
3164 rc = -EBADF;
3165 free_xid(xid);
3166 return rc;
3168 open_file = file->private_data;
3169 tcon = tlink_tcon(open_file->tlink);
3170 server = tcon->ses->server;
3172 if (!server->ops->sync_read) {
3173 free_xid(xid);
3174 return -ENOSYS;
3177 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3178 pid = open_file->pid;
3179 else
3180 pid = current->tgid;
3182 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3183 cifs_dbg(FYI, "attempting read on write only file instance\n");
3185 for (total_read = 0, cur_offset = read_data; read_size > total_read;
3186 total_read += bytes_read, cur_offset += bytes_read) {
3187 do {
3188 current_read_size = min_t(uint, read_size - total_read,
3189 rsize);
3191 * For windows me and 9x we do not want to request more
3192 * than it negotiated since it will refuse the read
3193 * then.
3195 if ((tcon->ses) && !(tcon->ses->capabilities &
3196 tcon->ses->server->vals->cap_large_files)) {
3197 current_read_size = min_t(uint,
3198 current_read_size, CIFSMaxBufSize);
3200 if (open_file->invalidHandle) {
3201 rc = cifs_reopen_file(open_file, true);
3202 if (rc != 0)
3203 break;
3205 io_parms.pid = pid;
3206 io_parms.tcon = tcon;
3207 io_parms.offset = *offset;
3208 io_parms.length = current_read_size;
3209 rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3210 &bytes_read, &cur_offset,
3211 &buf_type);
3212 } while (rc == -EAGAIN);
3214 if (rc || (bytes_read == 0)) {
3215 if (total_read) {
3216 break;
3217 } else {
3218 free_xid(xid);
3219 return rc;
3221 } else {
3222 cifs_stats_bytes_read(tcon, total_read);
3223 *offset += bytes_read;
3226 free_xid(xid);
3227 return total_read;
3231 * If the page is mmap'ed into a process' page tables, then we need to make
3232 * sure that it doesn't change while being written back.
3234 static int
3235 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
3237 struct page *page = vmf->page;
3239 lock_page(page);
3240 return VM_FAULT_LOCKED;
3243 static struct vm_operations_struct cifs_file_vm_ops = {
3244 .fault = filemap_fault,
3245 .map_pages = filemap_map_pages,
3246 .page_mkwrite = cifs_page_mkwrite,
3247 .remap_pages = generic_file_remap_pages,
3250 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3252 int rc, xid;
3253 struct inode *inode = file_inode(file);
3255 xid = get_xid();
3257 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3258 rc = cifs_zap_mapping(inode);
3259 if (rc)
3260 return rc;
3263 rc = generic_file_mmap(file, vma);
3264 if (rc == 0)
3265 vma->vm_ops = &cifs_file_vm_ops;
3266 free_xid(xid);
3267 return rc;
3270 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3272 int rc, xid;
3274 xid = get_xid();
3275 rc = cifs_revalidate_file(file);
3276 if (rc) {
3277 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3278 rc);
3279 free_xid(xid);
3280 return rc;
3282 rc = generic_file_mmap(file, vma);
3283 if (rc == 0)
3284 vma->vm_ops = &cifs_file_vm_ops;
3285 free_xid(xid);
3286 return rc;
3289 static void
3290 cifs_readv_complete(struct work_struct *work)
3292 unsigned int i, got_bytes;
3293 struct cifs_readdata *rdata = container_of(work,
3294 struct cifs_readdata, work);
3296 got_bytes = rdata->got_bytes;
3297 for (i = 0; i < rdata->nr_pages; i++) {
3298 struct page *page = rdata->pages[i];
3300 lru_cache_add_file(page);
3302 if (rdata->result == 0 ||
3303 (rdata->result == -EAGAIN && got_bytes)) {
3304 flush_dcache_page(page);
3305 SetPageUptodate(page);
3308 unlock_page(page);
3310 if (rdata->result == 0 ||
3311 (rdata->result == -EAGAIN && got_bytes))
3312 cifs_readpage_to_fscache(rdata->mapping->host, page);
3314 got_bytes -= min_t(unsigned int, PAGE_CACHE_SIZE, got_bytes);
3316 page_cache_release(page);
3317 rdata->pages[i] = NULL;
3319 kref_put(&rdata->refcount, cifs_readdata_release);
3322 static int
3323 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3324 struct cifs_readdata *rdata, unsigned int len)
3326 int result = 0;
3327 unsigned int i;
3328 u64 eof;
3329 pgoff_t eof_index;
3330 unsigned int nr_pages = rdata->nr_pages;
3331 struct kvec iov;
3333 /* determine the eof that the server (probably) has */
3334 eof = CIFS_I(rdata->mapping->host)->server_eof;
3335 eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
3336 cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3338 rdata->got_bytes = 0;
3339 rdata->tailsz = PAGE_CACHE_SIZE;
3340 for (i = 0; i < nr_pages; i++) {
3341 struct page *page = rdata->pages[i];
3343 if (len >= PAGE_CACHE_SIZE) {
3344 /* enough data to fill the page */
3345 iov.iov_base = kmap(page);
3346 iov.iov_len = PAGE_CACHE_SIZE;
3347 cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3348 i, page->index, iov.iov_base, iov.iov_len);
3349 len -= PAGE_CACHE_SIZE;
3350 } else if (len > 0) {
3351 /* enough for partial page, fill and zero the rest */
3352 iov.iov_base = kmap(page);
3353 iov.iov_len = len;
3354 cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3355 i, page->index, iov.iov_base, iov.iov_len);
3356 memset(iov.iov_base + len,
3357 '\0', PAGE_CACHE_SIZE - len);
3358 rdata->tailsz = len;
3359 len = 0;
3360 } else if (page->index > eof_index) {
3362 * The VFS will not try to do readahead past the
3363 * i_size, but it's possible that we have outstanding
3364 * writes with gaps in the middle and the i_size hasn't
3365 * caught up yet. Populate those with zeroed out pages
3366 * to prevent the VFS from repeatedly attempting to
3367 * fill them until the writes are flushed.
3369 zero_user(page, 0, PAGE_CACHE_SIZE);
3370 lru_cache_add_file(page);
3371 flush_dcache_page(page);
3372 SetPageUptodate(page);
3373 unlock_page(page);
3374 page_cache_release(page);
3375 rdata->pages[i] = NULL;
3376 rdata->nr_pages--;
3377 continue;
3378 } else {
3379 /* no need to hold page hostage */
3380 lru_cache_add_file(page);
3381 unlock_page(page);
3382 page_cache_release(page);
3383 rdata->pages[i] = NULL;
3384 rdata->nr_pages--;
3385 continue;
3388 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
3389 kunmap(page);
3390 if (result < 0)
3391 break;
3393 rdata->got_bytes += result;
3396 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3397 rdata->got_bytes : result;
3400 static int
3401 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
3402 unsigned int rsize, struct list_head *tmplist,
3403 unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
3405 struct page *page, *tpage;
3406 unsigned int expected_index;
3407 int rc;
3409 INIT_LIST_HEAD(tmplist);
3411 page = list_entry(page_list->prev, struct page, lru);
3414 * Lock the page and put it in the cache. Since no one else
3415 * should have access to this page, we're safe to simply set
3416 * PG_locked without checking it first.
3418 __set_page_locked(page);
3419 rc = add_to_page_cache_locked(page, mapping,
3420 page->index, GFP_KERNEL);
3422 /* give up if we can't stick it in the cache */
3423 if (rc) {
3424 __clear_page_locked(page);
3425 return rc;
3428 /* move first page to the tmplist */
3429 *offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3430 *bytes = PAGE_CACHE_SIZE;
3431 *nr_pages = 1;
3432 list_move_tail(&page->lru, tmplist);
3434 /* now try and add more pages onto the request */
3435 expected_index = page->index + 1;
3436 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3437 /* discontinuity ? */
3438 if (page->index != expected_index)
3439 break;
3441 /* would this page push the read over the rsize? */
3442 if (*bytes + PAGE_CACHE_SIZE > rsize)
3443 break;
3445 __set_page_locked(page);
3446 if (add_to_page_cache_locked(page, mapping, page->index,
3447 GFP_KERNEL)) {
3448 __clear_page_locked(page);
3449 break;
3451 list_move_tail(&page->lru, tmplist);
3452 (*bytes) += PAGE_CACHE_SIZE;
3453 expected_index++;
3454 (*nr_pages)++;
3456 return rc;
3459 static int cifs_readpages(struct file *file, struct address_space *mapping,
3460 struct list_head *page_list, unsigned num_pages)
3462 int rc;
3463 struct list_head tmplist;
3464 struct cifsFileInfo *open_file = file->private_data;
3465 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3466 struct TCP_Server_Info *server;
3467 pid_t pid;
3470 * Reads as many pages as possible from fscache. Returns -ENOBUFS
3471 * immediately if the cookie is negative
3473 * After this point, every page in the list might have PG_fscache set,
3474 * so we will need to clean that up off of every page we don't use.
3476 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3477 &num_pages);
3478 if (rc == 0)
3479 return rc;
3481 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3482 pid = open_file->pid;
3483 else
3484 pid = current->tgid;
3486 rc = 0;
3487 server = tlink_tcon(open_file->tlink)->ses->server;
3489 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
3490 __func__, file, mapping, num_pages);
3493 * Start with the page at end of list and move it to private
3494 * list. Do the same with any following pages until we hit
3495 * the rsize limit, hit an index discontinuity, or run out of
3496 * pages. Issue the async read and then start the loop again
3497 * until the list is empty.
3499 * Note that list order is important. The page_list is in
3500 * the order of declining indexes. When we put the pages in
3501 * the rdata->pages, then we want them in increasing order.
3503 while (!list_empty(page_list)) {
3504 unsigned int i, nr_pages, bytes, rsize;
3505 loff_t offset;
3506 struct page *page, *tpage;
3507 struct cifs_readdata *rdata;
3508 unsigned credits;
3510 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3511 &rsize, &credits);
3512 if (rc)
3513 break;
3516 * Give up immediately if rsize is too small to read an entire
3517 * page. The VFS will fall back to readpage. We should never
3518 * reach this point however since we set ra_pages to 0 when the
3519 * rsize is smaller than a cache page.
3521 if (unlikely(rsize < PAGE_CACHE_SIZE)) {
3522 add_credits_and_wake_if(server, credits, 0);
3523 return 0;
3526 rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
3527 &nr_pages, &offset, &bytes);
3528 if (rc) {
3529 add_credits_and_wake_if(server, credits, 0);
3530 break;
3533 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3534 if (!rdata) {
3535 /* best to give up if we're out of mem */
3536 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3537 list_del(&page->lru);
3538 lru_cache_add_file(page);
3539 unlock_page(page);
3540 page_cache_release(page);
3542 rc = -ENOMEM;
3543 add_credits_and_wake_if(server, credits, 0);
3544 break;
3547 rdata->cfile = cifsFileInfo_get(open_file);
3548 rdata->mapping = mapping;
3549 rdata->offset = offset;
3550 rdata->bytes = bytes;
3551 rdata->pid = pid;
3552 rdata->pagesz = PAGE_CACHE_SIZE;
3553 rdata->read_into_pages = cifs_readpages_read_into_pages;
3554 rdata->credits = credits;
3556 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3557 list_del(&page->lru);
3558 rdata->pages[rdata->nr_pages++] = page;
3561 if (!rdata->cfile->invalidHandle ||
3562 !cifs_reopen_file(rdata->cfile, true))
3563 rc = server->ops->async_readv(rdata);
3564 if (rc) {
3565 add_credits_and_wake_if(server, rdata->credits, 0);
3566 for (i = 0; i < rdata->nr_pages; i++) {
3567 page = rdata->pages[i];
3568 lru_cache_add_file(page);
3569 unlock_page(page);
3570 page_cache_release(page);
3572 /* Fallback to the readpage in error/reconnect cases */
3573 kref_put(&rdata->refcount, cifs_readdata_release);
3574 break;
3577 kref_put(&rdata->refcount, cifs_readdata_release);
3580 /* Any pages that have been shown to fscache but didn't get added to
3581 * the pagecache must be uncached before they get returned to the
3582 * allocator.
3584 cifs_fscache_readpages_cancel(mapping->host, page_list);
3585 return rc;
3589 * cifs_readpage_worker must be called with the page pinned
3591 static int cifs_readpage_worker(struct file *file, struct page *page,
3592 loff_t *poffset)
3594 char *read_data;
3595 int rc;
3597 /* Is the page cached? */
3598 rc = cifs_readpage_from_fscache(file_inode(file), page);
3599 if (rc == 0)
3600 goto read_complete;
3602 read_data = kmap(page);
3603 /* for reads over a certain size could initiate async read ahead */
3605 rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
3607 if (rc < 0)
3608 goto io_error;
3609 else
3610 cifs_dbg(FYI, "Bytes read %d\n", rc);
3612 file_inode(file)->i_atime =
3613 current_fs_time(file_inode(file)->i_sb);
3615 if (PAGE_CACHE_SIZE > rc)
3616 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
3618 flush_dcache_page(page);
3619 SetPageUptodate(page);
3621 /* send this page to the cache */
3622 cifs_readpage_to_fscache(file_inode(file), page);
3624 rc = 0;
3626 io_error:
3627 kunmap(page);
3628 unlock_page(page);
3630 read_complete:
3631 return rc;
3634 static int cifs_readpage(struct file *file, struct page *page)
3636 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3637 int rc = -EACCES;
3638 unsigned int xid;
3640 xid = get_xid();
3642 if (file->private_data == NULL) {
3643 rc = -EBADF;
3644 free_xid(xid);
3645 return rc;
3648 cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
3649 page, (int)offset, (int)offset);
3651 rc = cifs_readpage_worker(file, page, &offset);
3653 free_xid(xid);
3654 return rc;
3657 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3659 struct cifsFileInfo *open_file;
3661 spin_lock(&cifs_file_list_lock);
3662 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3663 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3664 spin_unlock(&cifs_file_list_lock);
3665 return 1;
3668 spin_unlock(&cifs_file_list_lock);
3669 return 0;
3672 /* We do not want to update the file size from server for inodes
3673 open for write - to avoid races with writepage extending
3674 the file - in the future we could consider allowing
3675 refreshing the inode only on increases in the file size
3676 but this is tricky to do without racing with writebehind
3677 page caching in the current Linux kernel design */
3678 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3680 if (!cifsInode)
3681 return true;
3683 if (is_inode_writable(cifsInode)) {
3684 /* This inode is open for write at least once */
3685 struct cifs_sb_info *cifs_sb;
3687 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3688 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3689 /* since no page cache to corrupt on directio
3690 we can change size safely */
3691 return true;
3694 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3695 return true;
3697 return false;
3698 } else
3699 return true;
3702 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3703 loff_t pos, unsigned len, unsigned flags,
3704 struct page **pagep, void **fsdata)
3706 int oncethru = 0;
3707 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
3708 loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
3709 loff_t page_start = pos & PAGE_MASK;
3710 loff_t i_size;
3711 struct page *page;
3712 int rc = 0;
3714 cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
3716 start:
3717 page = grab_cache_page_write_begin(mapping, index, flags);
3718 if (!page) {
3719 rc = -ENOMEM;
3720 goto out;
3723 if (PageUptodate(page))
3724 goto out;
3727 * If we write a full page it will be up to date, no need to read from
3728 * the server. If the write is short, we'll end up doing a sync write
3729 * instead.
3731 if (len == PAGE_CACHE_SIZE)
3732 goto out;
3735 * optimize away the read when we have an oplock, and we're not
3736 * expecting to use any of the data we'd be reading in. That
3737 * is, when the page lies beyond the EOF, or straddles the EOF
3738 * and the write will cover all of the existing data.
3740 if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
3741 i_size = i_size_read(mapping->host);
3742 if (page_start >= i_size ||
3743 (offset == 0 && (pos + len) >= i_size)) {
3744 zero_user_segments(page, 0, offset,
3745 offset + len,
3746 PAGE_CACHE_SIZE);
3748 * PageChecked means that the parts of the page
3749 * to which we're not writing are considered up
3750 * to date. Once the data is copied to the
3751 * page, it can be set uptodate.
3753 SetPageChecked(page);
3754 goto out;
3758 if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
3760 * might as well read a page, it is fast enough. If we get
3761 * an error, we don't need to return it. cifs_write_end will
3762 * do a sync write instead since PG_uptodate isn't set.
3764 cifs_readpage_worker(file, page, &page_start);
3765 page_cache_release(page);
3766 oncethru = 1;
3767 goto start;
3768 } else {
3769 /* we could try using another file handle if there is one -
3770 but how would we lock it to prevent close of that handle
3771 racing with this read? In any case
3772 this will be written out by write_end so is fine */
3774 out:
3775 *pagep = page;
3776 return rc;
3779 static int cifs_release_page(struct page *page, gfp_t gfp)
3781 if (PagePrivate(page))
3782 return 0;
3784 return cifs_fscache_release_page(page, gfp);
3787 static void cifs_invalidate_page(struct page *page, unsigned int offset,
3788 unsigned int length)
3790 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3792 if (offset == 0 && length == PAGE_CACHE_SIZE)
3793 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3796 static int cifs_launder_page(struct page *page)
3798 int rc = 0;
3799 loff_t range_start = page_offset(page);
3800 loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
3801 struct writeback_control wbc = {
3802 .sync_mode = WB_SYNC_ALL,
3803 .nr_to_write = 0,
3804 .range_start = range_start,
3805 .range_end = range_end,
3808 cifs_dbg(FYI, "Launder page: %p\n", page);
3810 if (clear_page_dirty_for_io(page))
3811 rc = cifs_writepage_locked(page, &wbc);
3813 cifs_fscache_invalidate_page(page, page->mapping->host);
3814 return rc;
3817 void cifs_oplock_break(struct work_struct *work)
3819 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3820 oplock_break);
3821 struct inode *inode = cfile->dentry->d_inode;
3822 struct cifsInodeInfo *cinode = CIFS_I(inode);
3823 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3824 struct TCP_Server_Info *server = tcon->ses->server;
3825 int rc = 0;
3827 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
3828 TASK_UNINTERRUPTIBLE);
3830 server->ops->downgrade_oplock(server, cinode,
3831 test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
3833 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
3834 cifs_has_mand_locks(cinode)) {
3835 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
3836 inode);
3837 cinode->oplock = 0;
3840 if (inode && S_ISREG(inode->i_mode)) {
3841 if (CIFS_CACHE_READ(cinode))
3842 break_lease(inode, O_RDONLY);
3843 else
3844 break_lease(inode, O_WRONLY);
3845 rc = filemap_fdatawrite(inode->i_mapping);
3846 if (!CIFS_CACHE_READ(cinode)) {
3847 rc = filemap_fdatawait(inode->i_mapping);
3848 mapping_set_error(inode->i_mapping, rc);
3849 cifs_zap_mapping(inode);
3851 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
3854 rc = cifs_push_locks(cfile);
3855 if (rc)
3856 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
3859 * releasing stale oplock after recent reconnect of smb session using
3860 * a now incorrect file handle is not a data integrity issue but do
3861 * not bother sending an oplock release if session to server still is
3862 * disconnected since oplock already released by the server
3864 if (!cfile->oplock_break_cancelled) {
3865 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3866 cinode);
3867 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
3869 cifs_done_oplock_break(cinode);
3873 * The presence of cifs_direct_io() in the address space ops vector
3874 * allowes open() O_DIRECT flags which would have failed otherwise.
3876 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
3877 * so this method should never be called.
3879 * Direct IO is not yet supported in the cached mode.
3881 static ssize_t
3882 cifs_direct_io(int rw, struct kiocb *iocb, struct iov_iter *iter,
3883 loff_t pos)
3886 * FIXME
3887 * Eventually need to support direct IO for non forcedirectio mounts
3889 return -EINVAL;
3893 const struct address_space_operations cifs_addr_ops = {
3894 .readpage = cifs_readpage,
3895 .readpages = cifs_readpages,
3896 .writepage = cifs_writepage,
3897 .writepages = cifs_writepages,
3898 .write_begin = cifs_write_begin,
3899 .write_end = cifs_write_end,
3900 .set_page_dirty = __set_page_dirty_nobuffers,
3901 .releasepage = cifs_release_page,
3902 .direct_IO = cifs_direct_io,
3903 .invalidatepage = cifs_invalidate_page,
3904 .launder_page = cifs_launder_page,
3908 * cifs_readpages requires the server to support a buffer large enough to
3909 * contain the header plus one complete page of data. Otherwise, we need
3910 * to leave cifs_readpages out of the address space operations.
3912 const struct address_space_operations cifs_addr_ops_smallbuf = {
3913 .readpage = cifs_readpage,
3914 .writepage = cifs_writepage,
3915 .writepages = cifs_writepages,
3916 .write_begin = cifs_write_begin,
3917 .write_end = cifs_write_end,
3918 .set_page_dirty = __set_page_dirty_nobuffers,
3919 .releasepage = cifs_release_page,
3920 .invalidatepage = cifs_invalidate_page,
3921 .launder_page = cifs_launder_page,