x86/mm: Fix some kernel-doc warnings
[linux-2.6.git] / fs / cifs / file.c
blob513adbc211d7029d1c8b1d6670a1a37d9029cac6
1 /*
2 * fs/cifs/file.c
4 * vfs operations that deal with files
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
37 #include "cifsfs.h"
38 #include "cifspdu.h"
39 #include "cifsglob.h"
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
44 #include "fscache.h"
46 static inline int cifs_convert_flags(unsigned int flags)
48 if ((flags & O_ACCMODE) == O_RDONLY)
49 return GENERIC_READ;
50 else if ((flags & O_ACCMODE) == O_WRONLY)
51 return GENERIC_WRITE;
52 else if ((flags & O_ACCMODE) == O_RDWR) {
53 /* GENERIC_ALL is too much permission to request
54 can cause unnecessary access denied on create */
55 /* return GENERIC_ALL; */
56 return (GENERIC_READ | GENERIC_WRITE);
59 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
60 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
61 FILE_READ_DATA);
64 static u32 cifs_posix_convert_flags(unsigned int flags)
66 u32 posix_flags = 0;
68 if ((flags & O_ACCMODE) == O_RDONLY)
69 posix_flags = SMB_O_RDONLY;
70 else if ((flags & O_ACCMODE) == O_WRONLY)
71 posix_flags = SMB_O_WRONLY;
72 else if ((flags & O_ACCMODE) == O_RDWR)
73 posix_flags = SMB_O_RDWR;
75 if (flags & O_CREAT)
76 posix_flags |= SMB_O_CREAT;
77 if (flags & O_EXCL)
78 posix_flags |= SMB_O_EXCL;
79 if (flags & O_TRUNC)
80 posix_flags |= SMB_O_TRUNC;
81 /* be safe and imply O_SYNC for O_DSYNC */
82 if (flags & O_DSYNC)
83 posix_flags |= SMB_O_SYNC;
84 if (flags & O_DIRECTORY)
85 posix_flags |= SMB_O_DIRECTORY;
86 if (flags & O_NOFOLLOW)
87 posix_flags |= SMB_O_NOFOLLOW;
88 if (flags & O_DIRECT)
89 posix_flags |= SMB_O_DIRECT;
91 return posix_flags;
94 static inline int cifs_get_disposition(unsigned int flags)
96 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
97 return FILE_CREATE;
98 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
99 return FILE_OVERWRITE_IF;
100 else if ((flags & O_CREAT) == O_CREAT)
101 return FILE_OPEN_IF;
102 else if ((flags & O_TRUNC) == O_TRUNC)
103 return FILE_OVERWRITE;
104 else
105 return FILE_OPEN;
108 int cifs_posix_open(char *full_path, struct inode **pinode,
109 struct super_block *sb, int mode, unsigned int f_flags,
110 __u32 *poplock, __u16 *pnetfid, int xid)
112 int rc;
113 FILE_UNIX_BASIC_INFO *presp_data;
114 __u32 posix_flags = 0;
115 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
116 struct cifs_fattr fattr;
117 struct tcon_link *tlink;
118 struct cifs_tcon *tcon;
120 cFYI(1, "posix open %s", full_path);
122 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
123 if (presp_data == NULL)
124 return -ENOMEM;
126 tlink = cifs_sb_tlink(cifs_sb);
127 if (IS_ERR(tlink)) {
128 rc = PTR_ERR(tlink);
129 goto posix_open_ret;
132 tcon = tlink_tcon(tlink);
133 mode &= ~current_umask();
135 posix_flags = cifs_posix_convert_flags(f_flags);
136 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
137 poplock, full_path, cifs_sb->local_nls,
138 cifs_sb->mnt_cifs_flags &
139 CIFS_MOUNT_MAP_SPECIAL_CHR);
140 cifs_put_tlink(tlink);
142 if (rc)
143 goto posix_open_ret;
145 if (presp_data->Type == cpu_to_le32(-1))
146 goto posix_open_ret; /* open ok, caller does qpathinfo */
148 if (!pinode)
149 goto posix_open_ret; /* caller does not need info */
151 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
153 /* get new inode and set it up */
154 if (*pinode == NULL) {
155 cifs_fill_uniqueid(sb, &fattr);
156 *pinode = cifs_iget(sb, &fattr);
157 if (!*pinode) {
158 rc = -ENOMEM;
159 goto posix_open_ret;
161 } else {
162 cifs_fattr_to_inode(*pinode, &fattr);
165 posix_open_ret:
166 kfree(presp_data);
167 return rc;
170 static int
171 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
172 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *poplock,
173 __u16 *pnetfid, int xid)
175 int rc;
176 int desiredAccess;
177 int disposition;
178 int create_options = CREATE_NOT_DIR;
179 FILE_ALL_INFO *buf;
181 desiredAccess = cifs_convert_flags(f_flags);
183 /*********************************************************************
184 * open flag mapping table:
186 * POSIX Flag CIFS Disposition
187 * ---------- ----------------
188 * O_CREAT FILE_OPEN_IF
189 * O_CREAT | O_EXCL FILE_CREATE
190 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
191 * O_TRUNC FILE_OVERWRITE
192 * none of the above FILE_OPEN
194 * Note that there is not a direct match between disposition
195 * FILE_SUPERSEDE (ie create whether or not file exists although
196 * O_CREAT | O_TRUNC is similar but truncates the existing
197 * file rather than creating a new file as FILE_SUPERSEDE does
198 * (which uses the attributes / metadata passed in on open call)
200 *? O_SYNC is a reasonable match to CIFS writethrough flag
201 *? and the read write flags match reasonably. O_LARGEFILE
202 *? is irrelevant because largefile support is always used
203 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
204 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
205 *********************************************************************/
207 disposition = cifs_get_disposition(f_flags);
209 /* BB pass O_SYNC flag through on file attributes .. BB */
211 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
212 if (!buf)
213 return -ENOMEM;
215 if (backup_cred(cifs_sb))
216 create_options |= CREATE_OPEN_BACKUP_INTENT;
218 if (tcon->ses->capabilities & CAP_NT_SMBS)
219 rc = CIFSSMBOpen(xid, tcon, full_path, disposition,
220 desiredAccess, create_options, pnetfid, poplock, buf,
221 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
222 & CIFS_MOUNT_MAP_SPECIAL_CHR);
223 else
224 rc = SMBLegacyOpen(xid, tcon, full_path, disposition,
225 desiredAccess, CREATE_NOT_DIR, pnetfid, poplock, buf,
226 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
227 & CIFS_MOUNT_MAP_SPECIAL_CHR);
229 if (rc)
230 goto out;
232 if (tcon->unix_ext)
233 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
234 xid);
235 else
236 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
237 xid, pnetfid);
239 out:
240 kfree(buf);
241 return rc;
244 struct cifsFileInfo *
245 cifs_new_fileinfo(__u16 fileHandle, struct file *file,
246 struct tcon_link *tlink, __u32 oplock)
248 struct dentry *dentry = file->f_path.dentry;
249 struct inode *inode = dentry->d_inode;
250 struct cifsInodeInfo *pCifsInode = CIFS_I(inode);
251 struct cifsFileInfo *pCifsFile;
253 pCifsFile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
254 if (pCifsFile == NULL)
255 return pCifsFile;
257 pCifsFile->count = 1;
258 pCifsFile->netfid = fileHandle;
259 pCifsFile->pid = current->tgid;
260 pCifsFile->uid = current_fsuid();
261 pCifsFile->dentry = dget(dentry);
262 pCifsFile->f_flags = file->f_flags;
263 pCifsFile->invalidHandle = false;
264 pCifsFile->tlink = cifs_get_tlink(tlink);
265 mutex_init(&pCifsFile->fh_mutex);
266 INIT_WORK(&pCifsFile->oplock_break, cifs_oplock_break);
267 INIT_LIST_HEAD(&pCifsFile->llist);
269 spin_lock(&cifs_file_list_lock);
270 list_add(&pCifsFile->tlist, &(tlink_tcon(tlink)->openFileList));
271 /* if readable file instance put first in list*/
272 if (file->f_mode & FMODE_READ)
273 list_add(&pCifsFile->flist, &pCifsInode->openFileList);
274 else
275 list_add_tail(&pCifsFile->flist, &pCifsInode->openFileList);
276 spin_unlock(&cifs_file_list_lock);
278 cifs_set_oplock_level(pCifsInode, oplock);
279 pCifsInode->can_cache_brlcks = pCifsInode->clientCanCacheAll;
281 file->private_data = pCifsFile;
282 return pCifsFile;
285 static void cifs_del_lock_waiters(struct cifsLockInfo *lock);
288 * Release a reference on the file private data. This may involve closing
289 * the filehandle out on the server. Must be called without holding
290 * cifs_file_list_lock.
292 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
294 struct inode *inode = cifs_file->dentry->d_inode;
295 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
296 struct cifsInodeInfo *cifsi = CIFS_I(inode);
297 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
298 struct cifsLockInfo *li, *tmp;
300 spin_lock(&cifs_file_list_lock);
301 if (--cifs_file->count > 0) {
302 spin_unlock(&cifs_file_list_lock);
303 return;
306 /* remove it from the lists */
307 list_del(&cifs_file->flist);
308 list_del(&cifs_file->tlist);
310 if (list_empty(&cifsi->openFileList)) {
311 cFYI(1, "closing last open instance for inode %p",
312 cifs_file->dentry->d_inode);
314 /* in strict cache mode we need invalidate mapping on the last
315 close because it may cause a error when we open this file
316 again and get at least level II oplock */
317 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
318 CIFS_I(inode)->invalid_mapping = true;
320 cifs_set_oplock_level(cifsi, 0);
322 spin_unlock(&cifs_file_list_lock);
324 cancel_work_sync(&cifs_file->oplock_break);
326 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
327 int xid, rc;
329 xid = GetXid();
330 rc = CIFSSMBClose(xid, tcon, cifs_file->netfid);
331 FreeXid(xid);
334 /* Delete any outstanding lock records. We'll lose them when the file
335 * is closed anyway.
337 mutex_lock(&cifsi->lock_mutex);
338 list_for_each_entry_safe(li, tmp, &cifs_file->llist, llist) {
339 list_del(&li->llist);
340 cifs_del_lock_waiters(li);
341 kfree(li);
343 mutex_unlock(&cifsi->lock_mutex);
345 cifs_put_tlink(cifs_file->tlink);
346 dput(cifs_file->dentry);
347 kfree(cifs_file);
350 int cifs_open(struct inode *inode, struct file *file)
352 int rc = -EACCES;
353 int xid;
354 __u32 oplock;
355 struct cifs_sb_info *cifs_sb;
356 struct cifs_tcon *tcon;
357 struct tcon_link *tlink;
358 struct cifsFileInfo *pCifsFile = NULL;
359 char *full_path = NULL;
360 bool posix_open_ok = false;
361 __u16 netfid;
363 xid = GetXid();
365 cifs_sb = CIFS_SB(inode->i_sb);
366 tlink = cifs_sb_tlink(cifs_sb);
367 if (IS_ERR(tlink)) {
368 FreeXid(xid);
369 return PTR_ERR(tlink);
371 tcon = tlink_tcon(tlink);
373 full_path = build_path_from_dentry(file->f_path.dentry);
374 if (full_path == NULL) {
375 rc = -ENOMEM;
376 goto out;
379 cFYI(1, "inode = 0x%p file flags are 0x%x for %s",
380 inode, file->f_flags, full_path);
382 if (tcon->ses->server->oplocks)
383 oplock = REQ_OPLOCK;
384 else
385 oplock = 0;
387 if (!tcon->broken_posix_open && tcon->unix_ext &&
388 (tcon->ses->capabilities & CAP_UNIX) &&
389 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
390 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
391 /* can not refresh inode info since size could be stale */
392 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
393 cifs_sb->mnt_file_mode /* ignored */,
394 file->f_flags, &oplock, &netfid, xid);
395 if (rc == 0) {
396 cFYI(1, "posix open succeeded");
397 posix_open_ok = true;
398 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
399 if (tcon->ses->serverNOS)
400 cERROR(1, "server %s of type %s returned"
401 " unexpected error on SMB posix open"
402 ", disabling posix open support."
403 " Check if server update available.",
404 tcon->ses->serverName,
405 tcon->ses->serverNOS);
406 tcon->broken_posix_open = true;
407 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
408 (rc != -EOPNOTSUPP)) /* path not found or net err */
409 goto out;
410 /* else fallthrough to retry open the old way on network i/o
411 or DFS errors */
414 if (!posix_open_ok) {
415 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
416 file->f_flags, &oplock, &netfid, xid);
417 if (rc)
418 goto out;
421 pCifsFile = cifs_new_fileinfo(netfid, file, tlink, oplock);
422 if (pCifsFile == NULL) {
423 CIFSSMBClose(xid, tcon, netfid);
424 rc = -ENOMEM;
425 goto out;
428 cifs_fscache_set_inode_cookie(inode, file);
430 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
431 /* time to set mode which we can not set earlier due to
432 problems creating new read-only files */
433 struct cifs_unix_set_info_args args = {
434 .mode = inode->i_mode,
435 .uid = NO_CHANGE_64,
436 .gid = NO_CHANGE_64,
437 .ctime = NO_CHANGE_64,
438 .atime = NO_CHANGE_64,
439 .mtime = NO_CHANGE_64,
440 .device = 0,
442 CIFSSMBUnixSetFileInfo(xid, tcon, &args, netfid,
443 pCifsFile->pid);
446 out:
447 kfree(full_path);
448 FreeXid(xid);
449 cifs_put_tlink(tlink);
450 return rc;
453 /* Try to reacquire byte range locks that were released when session */
454 /* to server was lost */
455 static int cifs_relock_file(struct cifsFileInfo *cifsFile)
457 int rc = 0;
459 /* BB list all locks open on this file and relock */
461 return rc;
464 static int cifs_reopen_file(struct cifsFileInfo *pCifsFile, bool can_flush)
466 int rc = -EACCES;
467 int xid;
468 __u32 oplock;
469 struct cifs_sb_info *cifs_sb;
470 struct cifs_tcon *tcon;
471 struct cifsInodeInfo *pCifsInode;
472 struct inode *inode;
473 char *full_path = NULL;
474 int desiredAccess;
475 int disposition = FILE_OPEN;
476 int create_options = CREATE_NOT_DIR;
477 __u16 netfid;
479 xid = GetXid();
480 mutex_lock(&pCifsFile->fh_mutex);
481 if (!pCifsFile->invalidHandle) {
482 mutex_unlock(&pCifsFile->fh_mutex);
483 rc = 0;
484 FreeXid(xid);
485 return rc;
488 inode = pCifsFile->dentry->d_inode;
489 cifs_sb = CIFS_SB(inode->i_sb);
490 tcon = tlink_tcon(pCifsFile->tlink);
492 /* can not grab rename sem here because various ops, including
493 those that already have the rename sem can end up causing writepage
494 to get called and if the server was down that means we end up here,
495 and we can never tell if the caller already has the rename_sem */
496 full_path = build_path_from_dentry(pCifsFile->dentry);
497 if (full_path == NULL) {
498 rc = -ENOMEM;
499 mutex_unlock(&pCifsFile->fh_mutex);
500 FreeXid(xid);
501 return rc;
504 cFYI(1, "inode = 0x%p file flags 0x%x for %s",
505 inode, pCifsFile->f_flags, full_path);
507 if (tcon->ses->server->oplocks)
508 oplock = REQ_OPLOCK;
509 else
510 oplock = 0;
512 if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) &&
513 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
514 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
517 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
518 * original open. Must mask them off for a reopen.
520 unsigned int oflags = pCifsFile->f_flags &
521 ~(O_CREAT | O_EXCL | O_TRUNC);
523 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
524 cifs_sb->mnt_file_mode /* ignored */,
525 oflags, &oplock, &netfid, xid);
526 if (rc == 0) {
527 cFYI(1, "posix reopen succeeded");
528 goto reopen_success;
530 /* fallthrough to retry open the old way on errors, especially
531 in the reconnect path it is important to retry hard */
534 desiredAccess = cifs_convert_flags(pCifsFile->f_flags);
536 if (backup_cred(cifs_sb))
537 create_options |= CREATE_OPEN_BACKUP_INTENT;
539 /* Can not refresh inode by passing in file_info buf to be returned
540 by SMBOpen and then calling get_inode_info with returned buf
541 since file might have write behind data that needs to be flushed
542 and server version of file size can be stale. If we knew for sure
543 that inode was not dirty locally we could do this */
545 rc = CIFSSMBOpen(xid, tcon, full_path, disposition, desiredAccess,
546 create_options, &netfid, &oplock, NULL,
547 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
548 CIFS_MOUNT_MAP_SPECIAL_CHR);
549 if (rc) {
550 mutex_unlock(&pCifsFile->fh_mutex);
551 cFYI(1, "cifs_open returned 0x%x", rc);
552 cFYI(1, "oplock: %d", oplock);
553 goto reopen_error_exit;
556 reopen_success:
557 pCifsFile->netfid = netfid;
558 pCifsFile->invalidHandle = false;
559 mutex_unlock(&pCifsFile->fh_mutex);
560 pCifsInode = CIFS_I(inode);
562 if (can_flush) {
563 rc = filemap_write_and_wait(inode->i_mapping);
564 mapping_set_error(inode->i_mapping, rc);
566 if (tcon->unix_ext)
567 rc = cifs_get_inode_info_unix(&inode,
568 full_path, inode->i_sb, xid);
569 else
570 rc = cifs_get_inode_info(&inode,
571 full_path, NULL, inode->i_sb,
572 xid, NULL);
573 } /* else we are writing out data to server already
574 and could deadlock if we tried to flush data, and
575 since we do not know if we have data that would
576 invalidate the current end of file on the server
577 we can not go to the server to get the new inod
578 info */
580 cifs_set_oplock_level(pCifsInode, oplock);
582 cifs_relock_file(pCifsFile);
584 reopen_error_exit:
585 kfree(full_path);
586 FreeXid(xid);
587 return rc;
590 int cifs_close(struct inode *inode, struct file *file)
592 if (file->private_data != NULL) {
593 cifsFileInfo_put(file->private_data);
594 file->private_data = NULL;
597 /* return code from the ->release op is always ignored */
598 return 0;
601 int cifs_closedir(struct inode *inode, struct file *file)
603 int rc = 0;
604 int xid;
605 struct cifsFileInfo *pCFileStruct = file->private_data;
606 char *ptmp;
608 cFYI(1, "Closedir inode = 0x%p", inode);
610 xid = GetXid();
612 if (pCFileStruct) {
613 struct cifs_tcon *pTcon = tlink_tcon(pCFileStruct->tlink);
615 cFYI(1, "Freeing private data in close dir");
616 spin_lock(&cifs_file_list_lock);
617 if (!pCFileStruct->srch_inf.endOfSearch &&
618 !pCFileStruct->invalidHandle) {
619 pCFileStruct->invalidHandle = true;
620 spin_unlock(&cifs_file_list_lock);
621 rc = CIFSFindClose(xid, pTcon, pCFileStruct->netfid);
622 cFYI(1, "Closing uncompleted readdir with rc %d",
623 rc);
624 /* not much we can do if it fails anyway, ignore rc */
625 rc = 0;
626 } else
627 spin_unlock(&cifs_file_list_lock);
628 ptmp = pCFileStruct->srch_inf.ntwrk_buf_start;
629 if (ptmp) {
630 cFYI(1, "closedir free smb buf in srch struct");
631 pCFileStruct->srch_inf.ntwrk_buf_start = NULL;
632 if (pCFileStruct->srch_inf.smallBuf)
633 cifs_small_buf_release(ptmp);
634 else
635 cifs_buf_release(ptmp);
637 cifs_put_tlink(pCFileStruct->tlink);
638 kfree(file->private_data);
639 file->private_data = NULL;
641 /* BB can we lock the filestruct while this is going on? */
642 FreeXid(xid);
643 return rc;
646 static struct cifsLockInfo *
647 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
649 struct cifsLockInfo *lock =
650 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
651 if (!lock)
652 return lock;
653 lock->offset = offset;
654 lock->length = length;
655 lock->type = type;
656 lock->pid = current->tgid;
657 INIT_LIST_HEAD(&lock->blist);
658 init_waitqueue_head(&lock->block_q);
659 return lock;
662 static void
663 cifs_del_lock_waiters(struct cifsLockInfo *lock)
665 struct cifsLockInfo *li, *tmp;
666 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
667 list_del_init(&li->blist);
668 wake_up(&li->block_q);
672 static bool
673 cifs_find_fid_lock_conflict(struct cifsFileInfo *cfile, __u64 offset,
674 __u64 length, __u8 type, struct cifsFileInfo *cur,
675 struct cifsLockInfo **conf_lock)
677 struct cifsLockInfo *li;
678 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
680 list_for_each_entry(li, &cfile->llist, llist) {
681 if (offset + length <= li->offset ||
682 offset >= li->offset + li->length)
683 continue;
684 else if ((type & server->vals->shared_lock_type) &&
685 ((server->ops->compare_fids(cur, cfile) &&
686 current->tgid == li->pid) || type == li->type))
687 continue;
688 else {
689 *conf_lock = li;
690 return true;
693 return false;
696 static bool
697 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
698 __u8 type, struct cifsLockInfo **conf_lock)
700 bool rc = false;
701 struct cifsFileInfo *fid, *tmp;
702 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
704 spin_lock(&cifs_file_list_lock);
705 list_for_each_entry_safe(fid, tmp, &cinode->openFileList, flist) {
706 rc = cifs_find_fid_lock_conflict(fid, offset, length, type,
707 cfile, conf_lock);
708 if (rc)
709 break;
711 spin_unlock(&cifs_file_list_lock);
713 return rc;
717 * Check if there is another lock that prevents us to set the lock (mandatory
718 * style). If such a lock exists, update the flock structure with its
719 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
720 * or leave it the same if we can't. Returns 0 if we don't need to request to
721 * the server or 1 otherwise.
723 static int
724 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
725 __u8 type, struct file_lock *flock)
727 int rc = 0;
728 struct cifsLockInfo *conf_lock;
729 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
730 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
731 bool exist;
733 mutex_lock(&cinode->lock_mutex);
735 exist = cifs_find_lock_conflict(cfile, offset, length, type,
736 &conf_lock);
737 if (exist) {
738 flock->fl_start = conf_lock->offset;
739 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
740 flock->fl_pid = conf_lock->pid;
741 if (conf_lock->type & server->vals->shared_lock_type)
742 flock->fl_type = F_RDLCK;
743 else
744 flock->fl_type = F_WRLCK;
745 } else if (!cinode->can_cache_brlcks)
746 rc = 1;
747 else
748 flock->fl_type = F_UNLCK;
750 mutex_unlock(&cinode->lock_mutex);
751 return rc;
754 static void
755 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
757 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
758 mutex_lock(&cinode->lock_mutex);
759 list_add_tail(&lock->llist, &cfile->llist);
760 mutex_unlock(&cinode->lock_mutex);
764 * Set the byte-range lock (mandatory style). Returns:
765 * 1) 0, if we set the lock and don't need to request to the server;
766 * 2) 1, if no locks prevent us but we need to request to the server;
767 * 3) -EACCESS, if there is a lock that prevents us and wait is false.
769 static int
770 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
771 bool wait)
773 struct cifsLockInfo *conf_lock;
774 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
775 bool exist;
776 int rc = 0;
778 try_again:
779 exist = false;
780 mutex_lock(&cinode->lock_mutex);
782 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
783 lock->type, &conf_lock);
784 if (!exist && cinode->can_cache_brlcks) {
785 list_add_tail(&lock->llist, &cfile->llist);
786 mutex_unlock(&cinode->lock_mutex);
787 return rc;
790 if (!exist)
791 rc = 1;
792 else if (!wait)
793 rc = -EACCES;
794 else {
795 list_add_tail(&lock->blist, &conf_lock->blist);
796 mutex_unlock(&cinode->lock_mutex);
797 rc = wait_event_interruptible(lock->block_q,
798 (lock->blist.prev == &lock->blist) &&
799 (lock->blist.next == &lock->blist));
800 if (!rc)
801 goto try_again;
802 mutex_lock(&cinode->lock_mutex);
803 list_del_init(&lock->blist);
806 mutex_unlock(&cinode->lock_mutex);
807 return rc;
811 * Check if there is another lock that prevents us to set the lock (posix
812 * style). If such a lock exists, update the flock structure with its
813 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
814 * or leave it the same if we can't. Returns 0 if we don't need to request to
815 * the server or 1 otherwise.
817 static int
818 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
820 int rc = 0;
821 struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
822 unsigned char saved_type = flock->fl_type;
824 if ((flock->fl_flags & FL_POSIX) == 0)
825 return 1;
827 mutex_lock(&cinode->lock_mutex);
828 posix_test_lock(file, flock);
830 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
831 flock->fl_type = saved_type;
832 rc = 1;
835 mutex_unlock(&cinode->lock_mutex);
836 return rc;
840 * Set the byte-range lock (posix style). Returns:
841 * 1) 0, if we set the lock and don't need to request to the server;
842 * 2) 1, if we need to request to the server;
843 * 3) <0, if the error occurs while setting the lock.
845 static int
846 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
848 struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
849 int rc = 1;
851 if ((flock->fl_flags & FL_POSIX) == 0)
852 return rc;
854 try_again:
855 mutex_lock(&cinode->lock_mutex);
856 if (!cinode->can_cache_brlcks) {
857 mutex_unlock(&cinode->lock_mutex);
858 return rc;
861 rc = posix_lock_file(file, flock, NULL);
862 mutex_unlock(&cinode->lock_mutex);
863 if (rc == FILE_LOCK_DEFERRED) {
864 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
865 if (!rc)
866 goto try_again;
867 locks_delete_block(flock);
869 return rc;
872 static int
873 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
875 int xid, rc = 0, stored_rc;
876 struct cifsLockInfo *li, *tmp;
877 struct cifs_tcon *tcon;
878 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
879 unsigned int num, max_num, max_buf;
880 LOCKING_ANDX_RANGE *buf, *cur;
881 int types[] = {LOCKING_ANDX_LARGE_FILES,
882 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
883 int i;
885 xid = GetXid();
886 tcon = tlink_tcon(cfile->tlink);
888 mutex_lock(&cinode->lock_mutex);
889 if (!cinode->can_cache_brlcks) {
890 mutex_unlock(&cinode->lock_mutex);
891 FreeXid(xid);
892 return rc;
896 * Accessing maxBuf is racy with cifs_reconnect - need to store value
897 * and check it for zero before using.
899 max_buf = tcon->ses->server->maxBuf;
900 if (!max_buf) {
901 mutex_unlock(&cinode->lock_mutex);
902 FreeXid(xid);
903 return -EINVAL;
906 max_num = (max_buf - sizeof(struct smb_hdr)) /
907 sizeof(LOCKING_ANDX_RANGE);
908 buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
909 if (!buf) {
910 mutex_unlock(&cinode->lock_mutex);
911 FreeXid(xid);
912 return rc;
915 for (i = 0; i < 2; i++) {
916 cur = buf;
917 num = 0;
918 list_for_each_entry_safe(li, tmp, &cfile->llist, llist) {
919 if (li->type != types[i])
920 continue;
921 cur->Pid = cpu_to_le16(li->pid);
922 cur->LengthLow = cpu_to_le32((u32)li->length);
923 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
924 cur->OffsetLow = cpu_to_le32((u32)li->offset);
925 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
926 if (++num == max_num) {
927 stored_rc = cifs_lockv(xid, tcon, cfile->netfid,
928 (__u8)li->type, 0, num,
929 buf);
930 if (stored_rc)
931 rc = stored_rc;
932 cur = buf;
933 num = 0;
934 } else
935 cur++;
938 if (num) {
939 stored_rc = cifs_lockv(xid, tcon, cfile->netfid,
940 (__u8)types[i], 0, num, buf);
941 if (stored_rc)
942 rc = stored_rc;
946 cinode->can_cache_brlcks = false;
947 mutex_unlock(&cinode->lock_mutex);
949 kfree(buf);
950 FreeXid(xid);
951 return rc;
954 /* copied from fs/locks.c with a name change */
955 #define cifs_for_each_lock(inode, lockp) \
956 for (lockp = &inode->i_flock; *lockp != NULL; \
957 lockp = &(*lockp)->fl_next)
959 struct lock_to_push {
960 struct list_head llist;
961 __u64 offset;
962 __u64 length;
963 __u32 pid;
964 __u16 netfid;
965 __u8 type;
968 static int
969 cifs_push_posix_locks(struct cifsFileInfo *cfile)
971 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
972 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
973 struct file_lock *flock, **before;
974 unsigned int count = 0, i = 0;
975 int rc = 0, xid, type;
976 struct list_head locks_to_send, *el;
977 struct lock_to_push *lck, *tmp;
978 __u64 length;
980 xid = GetXid();
982 mutex_lock(&cinode->lock_mutex);
983 if (!cinode->can_cache_brlcks) {
984 mutex_unlock(&cinode->lock_mutex);
985 FreeXid(xid);
986 return rc;
989 lock_flocks();
990 cifs_for_each_lock(cfile->dentry->d_inode, before) {
991 if ((*before)->fl_flags & FL_POSIX)
992 count++;
994 unlock_flocks();
996 INIT_LIST_HEAD(&locks_to_send);
999 * Allocating count locks is enough because no FL_POSIX locks can be
1000 * added to the list while we are holding cinode->lock_mutex that
1001 * protects locking operations of this inode.
1003 for (; i < count; i++) {
1004 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1005 if (!lck) {
1006 rc = -ENOMEM;
1007 goto err_out;
1009 list_add_tail(&lck->llist, &locks_to_send);
1012 el = locks_to_send.next;
1013 lock_flocks();
1014 cifs_for_each_lock(cfile->dentry->d_inode, before) {
1015 flock = *before;
1016 if ((flock->fl_flags & FL_POSIX) == 0)
1017 continue;
1018 if (el == &locks_to_send) {
1020 * The list ended. We don't have enough allocated
1021 * structures - something is really wrong.
1023 cERROR(1, "Can't push all brlocks!");
1024 break;
1026 length = 1 + flock->fl_end - flock->fl_start;
1027 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1028 type = CIFS_RDLCK;
1029 else
1030 type = CIFS_WRLCK;
1031 lck = list_entry(el, struct lock_to_push, llist);
1032 lck->pid = flock->fl_pid;
1033 lck->netfid = cfile->netfid;
1034 lck->length = length;
1035 lck->type = type;
1036 lck->offset = flock->fl_start;
1037 el = el->next;
1039 unlock_flocks();
1041 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1042 struct file_lock tmp_lock;
1043 int stored_rc;
1045 tmp_lock.fl_start = lck->offset;
1046 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1047 0, lck->length, &tmp_lock,
1048 lck->type, 0);
1049 if (stored_rc)
1050 rc = stored_rc;
1051 list_del(&lck->llist);
1052 kfree(lck);
1055 out:
1056 cinode->can_cache_brlcks = false;
1057 mutex_unlock(&cinode->lock_mutex);
1059 FreeXid(xid);
1060 return rc;
1061 err_out:
1062 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1063 list_del(&lck->llist);
1064 kfree(lck);
1066 goto out;
1069 static int
1070 cifs_push_locks(struct cifsFileInfo *cfile)
1072 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1073 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1075 if ((tcon->ses->capabilities & CAP_UNIX) &&
1076 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1077 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1078 return cifs_push_posix_locks(cfile);
1080 return cifs_push_mandatory_locks(cfile);
1083 static void
1084 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1085 bool *wait_flag, struct TCP_Server_Info *server)
1087 if (flock->fl_flags & FL_POSIX)
1088 cFYI(1, "Posix");
1089 if (flock->fl_flags & FL_FLOCK)
1090 cFYI(1, "Flock");
1091 if (flock->fl_flags & FL_SLEEP) {
1092 cFYI(1, "Blocking lock");
1093 *wait_flag = true;
1095 if (flock->fl_flags & FL_ACCESS)
1096 cFYI(1, "Process suspended by mandatory locking - "
1097 "not implemented yet");
1098 if (flock->fl_flags & FL_LEASE)
1099 cFYI(1, "Lease on file - not implemented yet");
1100 if (flock->fl_flags &
1101 (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE)))
1102 cFYI(1, "Unknown lock flags 0x%x", flock->fl_flags);
1104 *type = server->vals->large_lock_type;
1105 if (flock->fl_type == F_WRLCK) {
1106 cFYI(1, "F_WRLCK ");
1107 *type |= server->vals->exclusive_lock_type;
1108 *lock = 1;
1109 } else if (flock->fl_type == F_UNLCK) {
1110 cFYI(1, "F_UNLCK");
1111 *type |= server->vals->unlock_lock_type;
1112 *unlock = 1;
1113 /* Check if unlock includes more than one lock range */
1114 } else if (flock->fl_type == F_RDLCK) {
1115 cFYI(1, "F_RDLCK");
1116 *type |= server->vals->shared_lock_type;
1117 *lock = 1;
1118 } else if (flock->fl_type == F_EXLCK) {
1119 cFYI(1, "F_EXLCK");
1120 *type |= server->vals->exclusive_lock_type;
1121 *lock = 1;
1122 } else if (flock->fl_type == F_SHLCK) {
1123 cFYI(1, "F_SHLCK");
1124 *type |= server->vals->shared_lock_type;
1125 *lock = 1;
1126 } else
1127 cFYI(1, "Unknown type of lock");
1130 static int
1131 cifs_mandatory_lock(int xid, struct cifsFileInfo *cfile, __u64 offset,
1132 __u64 length, __u32 type, int lock, int unlock, bool wait)
1134 return CIFSSMBLock(xid, tlink_tcon(cfile->tlink), cfile->netfid,
1135 current->tgid, length, offset, unlock, lock,
1136 (__u8)type, wait, 0);
1139 static int
1140 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1141 bool wait_flag, bool posix_lck, int xid)
1143 int rc = 0;
1144 __u64 length = 1 + flock->fl_end - flock->fl_start;
1145 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1146 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1147 struct TCP_Server_Info *server = tcon->ses->server;
1148 __u16 netfid = cfile->netfid;
1150 if (posix_lck) {
1151 int posix_lock_type;
1153 rc = cifs_posix_lock_test(file, flock);
1154 if (!rc)
1155 return rc;
1157 if (type & server->vals->shared_lock_type)
1158 posix_lock_type = CIFS_RDLCK;
1159 else
1160 posix_lock_type = CIFS_WRLCK;
1161 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1162 1 /* get */, length, flock,
1163 posix_lock_type, wait_flag);
1164 return rc;
1167 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1168 if (!rc)
1169 return rc;
1171 /* BB we could chain these into one lock request BB */
1172 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length, type,
1173 1, 0, false);
1174 if (rc == 0) {
1175 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length,
1176 type, 0, 1, false);
1177 flock->fl_type = F_UNLCK;
1178 if (rc != 0)
1179 cERROR(1, "Error unlocking previously locked "
1180 "range %d during test of lock", rc);
1181 return 0;
1184 if (type & server->vals->shared_lock_type) {
1185 flock->fl_type = F_WRLCK;
1186 return 0;
1189 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length,
1190 type | server->vals->shared_lock_type, 1, 0,
1191 false);
1192 if (rc == 0) {
1193 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length,
1194 type | server->vals->shared_lock_type,
1195 0, 1, false);
1196 flock->fl_type = F_RDLCK;
1197 if (rc != 0)
1198 cERROR(1, "Error unlocking previously locked "
1199 "range %d during test of lock", rc);
1200 } else
1201 flock->fl_type = F_WRLCK;
1203 return 0;
1206 static void
1207 cifs_move_llist(struct list_head *source, struct list_head *dest)
1209 struct list_head *li, *tmp;
1210 list_for_each_safe(li, tmp, source)
1211 list_move(li, dest);
1214 static void
1215 cifs_free_llist(struct list_head *llist)
1217 struct cifsLockInfo *li, *tmp;
1218 list_for_each_entry_safe(li, tmp, llist, llist) {
1219 cifs_del_lock_waiters(li);
1220 list_del(&li->llist);
1221 kfree(li);
1225 static int
1226 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, int xid)
1228 int rc = 0, stored_rc;
1229 int types[] = {LOCKING_ANDX_LARGE_FILES,
1230 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1231 unsigned int i;
1232 unsigned int max_num, num, max_buf;
1233 LOCKING_ANDX_RANGE *buf, *cur;
1234 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1235 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1236 struct cifsLockInfo *li, *tmp;
1237 __u64 length = 1 + flock->fl_end - flock->fl_start;
1238 struct list_head tmp_llist;
1240 INIT_LIST_HEAD(&tmp_llist);
1243 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1244 * and check it for zero before using.
1246 max_buf = tcon->ses->server->maxBuf;
1247 if (!max_buf)
1248 return -EINVAL;
1250 max_num = (max_buf - sizeof(struct smb_hdr)) /
1251 sizeof(LOCKING_ANDX_RANGE);
1252 buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1253 if (!buf)
1254 return -ENOMEM;
1256 mutex_lock(&cinode->lock_mutex);
1257 for (i = 0; i < 2; i++) {
1258 cur = buf;
1259 num = 0;
1260 list_for_each_entry_safe(li, tmp, &cfile->llist, llist) {
1261 if (flock->fl_start > li->offset ||
1262 (flock->fl_start + length) <
1263 (li->offset + li->length))
1264 continue;
1265 if (current->tgid != li->pid)
1266 continue;
1267 if (types[i] != li->type)
1268 continue;
1269 if (cinode->can_cache_brlcks) {
1271 * We can cache brlock requests - simply remove
1272 * a lock from the file's list.
1274 list_del(&li->llist);
1275 cifs_del_lock_waiters(li);
1276 kfree(li);
1277 continue;
1279 cur->Pid = cpu_to_le16(li->pid);
1280 cur->LengthLow = cpu_to_le32((u32)li->length);
1281 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1282 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1283 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1285 * We need to save a lock here to let us add it again to
1286 * the file's list if the unlock range request fails on
1287 * the server.
1289 list_move(&li->llist, &tmp_llist);
1290 if (++num == max_num) {
1291 stored_rc = cifs_lockv(xid, tcon, cfile->netfid,
1292 li->type, num, 0, buf);
1293 if (stored_rc) {
1295 * We failed on the unlock range
1296 * request - add all locks from the tmp
1297 * list to the head of the file's list.
1299 cifs_move_llist(&tmp_llist,
1300 &cfile->llist);
1301 rc = stored_rc;
1302 } else
1304 * The unlock range request succeed -
1305 * free the tmp list.
1307 cifs_free_llist(&tmp_llist);
1308 cur = buf;
1309 num = 0;
1310 } else
1311 cur++;
1313 if (num) {
1314 stored_rc = cifs_lockv(xid, tcon, cfile->netfid,
1315 types[i], num, 0, buf);
1316 if (stored_rc) {
1317 cifs_move_llist(&tmp_llist, &cfile->llist);
1318 rc = stored_rc;
1319 } else
1320 cifs_free_llist(&tmp_llist);
1324 mutex_unlock(&cinode->lock_mutex);
1325 kfree(buf);
1326 return rc;
1329 static int
1330 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1331 bool wait_flag, bool posix_lck, int lock, int unlock, int xid)
1333 int rc = 0;
1334 __u64 length = 1 + flock->fl_end - flock->fl_start;
1335 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1336 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1337 struct TCP_Server_Info *server = tcon->ses->server;
1338 __u16 netfid = cfile->netfid;
1340 if (posix_lck) {
1341 int posix_lock_type;
1343 rc = cifs_posix_lock_set(file, flock);
1344 if (!rc || rc < 0)
1345 return rc;
1347 if (type & server->vals->shared_lock_type)
1348 posix_lock_type = CIFS_RDLCK;
1349 else
1350 posix_lock_type = CIFS_WRLCK;
1352 if (unlock == 1)
1353 posix_lock_type = CIFS_UNLCK;
1355 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1356 0 /* set */, length, flock,
1357 posix_lock_type, wait_flag);
1358 goto out;
1361 if (lock) {
1362 struct cifsLockInfo *lock;
1364 lock = cifs_lock_init(flock->fl_start, length, type);
1365 if (!lock)
1366 return -ENOMEM;
1368 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1369 if (rc < 0)
1370 kfree(lock);
1371 if (rc <= 0)
1372 goto out;
1374 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length,
1375 type, 1, 0, wait_flag);
1376 if (rc) {
1377 kfree(lock);
1378 goto out;
1381 cifs_lock_add(cfile, lock);
1382 } else if (unlock)
1383 rc = cifs_unlock_range(cfile, flock, xid);
1385 out:
1386 if (flock->fl_flags & FL_POSIX)
1387 posix_lock_file_wait(file, flock);
1388 return rc;
1391 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1393 int rc, xid;
1394 int lock = 0, unlock = 0;
1395 bool wait_flag = false;
1396 bool posix_lck = false;
1397 struct cifs_sb_info *cifs_sb;
1398 struct cifs_tcon *tcon;
1399 struct cifsInodeInfo *cinode;
1400 struct cifsFileInfo *cfile;
1401 __u16 netfid;
1402 __u32 type;
1404 rc = -EACCES;
1405 xid = GetXid();
1407 cFYI(1, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld "
1408 "end: %lld", cmd, flock->fl_flags, flock->fl_type,
1409 flock->fl_start, flock->fl_end);
1411 cfile = (struct cifsFileInfo *)file->private_data;
1412 tcon = tlink_tcon(cfile->tlink);
1414 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1415 tcon->ses->server);
1417 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1418 netfid = cfile->netfid;
1419 cinode = CIFS_I(file->f_path.dentry->d_inode);
1421 if ((tcon->ses->capabilities & CAP_UNIX) &&
1422 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1423 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1424 posix_lck = true;
1426 * BB add code here to normalize offset and length to account for
1427 * negative length which we can not accept over the wire.
1429 if (IS_GETLK(cmd)) {
1430 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1431 FreeXid(xid);
1432 return rc;
1435 if (!lock && !unlock) {
1437 * if no lock or unlock then nothing to do since we do not
1438 * know what it is
1440 FreeXid(xid);
1441 return -EOPNOTSUPP;
1444 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1445 xid);
1446 FreeXid(xid);
1447 return rc;
1451 * update the file size (if needed) after a write. Should be called with
1452 * the inode->i_lock held
1454 void
1455 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1456 unsigned int bytes_written)
1458 loff_t end_of_write = offset + bytes_written;
1460 if (end_of_write > cifsi->server_eof)
1461 cifsi->server_eof = end_of_write;
1464 static ssize_t cifs_write(struct cifsFileInfo *open_file, __u32 pid,
1465 const char *write_data, size_t write_size,
1466 loff_t *poffset)
1468 int rc = 0;
1469 unsigned int bytes_written = 0;
1470 unsigned int total_written;
1471 struct cifs_sb_info *cifs_sb;
1472 struct cifs_tcon *pTcon;
1473 int xid;
1474 struct dentry *dentry = open_file->dentry;
1475 struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
1476 struct cifs_io_parms io_parms;
1478 cifs_sb = CIFS_SB(dentry->d_sb);
1480 cFYI(1, "write %zd bytes to offset %lld of %s", write_size,
1481 *poffset, dentry->d_name.name);
1483 pTcon = tlink_tcon(open_file->tlink);
1485 xid = GetXid();
1487 for (total_written = 0; write_size > total_written;
1488 total_written += bytes_written) {
1489 rc = -EAGAIN;
1490 while (rc == -EAGAIN) {
1491 struct kvec iov[2];
1492 unsigned int len;
1494 if (open_file->invalidHandle) {
1495 /* we could deadlock if we called
1496 filemap_fdatawait from here so tell
1497 reopen_file not to flush data to
1498 server now */
1499 rc = cifs_reopen_file(open_file, false);
1500 if (rc != 0)
1501 break;
1504 len = min((size_t)cifs_sb->wsize,
1505 write_size - total_written);
1506 /* iov[0] is reserved for smb header */
1507 iov[1].iov_base = (char *)write_data + total_written;
1508 iov[1].iov_len = len;
1509 io_parms.netfid = open_file->netfid;
1510 io_parms.pid = pid;
1511 io_parms.tcon = pTcon;
1512 io_parms.offset = *poffset;
1513 io_parms.length = len;
1514 rc = CIFSSMBWrite2(xid, &io_parms, &bytes_written, iov,
1515 1, 0);
1517 if (rc || (bytes_written == 0)) {
1518 if (total_written)
1519 break;
1520 else {
1521 FreeXid(xid);
1522 return rc;
1524 } else {
1525 spin_lock(&dentry->d_inode->i_lock);
1526 cifs_update_eof(cifsi, *poffset, bytes_written);
1527 spin_unlock(&dentry->d_inode->i_lock);
1528 *poffset += bytes_written;
1532 cifs_stats_bytes_written(pTcon, total_written);
1534 if (total_written > 0) {
1535 spin_lock(&dentry->d_inode->i_lock);
1536 if (*poffset > dentry->d_inode->i_size)
1537 i_size_write(dentry->d_inode, *poffset);
1538 spin_unlock(&dentry->d_inode->i_lock);
1540 mark_inode_dirty_sync(dentry->d_inode);
1541 FreeXid(xid);
1542 return total_written;
1545 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1546 bool fsuid_only)
1548 struct cifsFileInfo *open_file = NULL;
1549 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1551 /* only filter by fsuid on multiuser mounts */
1552 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1553 fsuid_only = false;
1555 spin_lock(&cifs_file_list_lock);
1556 /* we could simply get the first_list_entry since write-only entries
1557 are always at the end of the list but since the first entry might
1558 have a close pending, we go through the whole list */
1559 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1560 if (fsuid_only && open_file->uid != current_fsuid())
1561 continue;
1562 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1563 if (!open_file->invalidHandle) {
1564 /* found a good file */
1565 /* lock it so it will not be closed on us */
1566 cifsFileInfo_get(open_file);
1567 spin_unlock(&cifs_file_list_lock);
1568 return open_file;
1569 } /* else might as well continue, and look for
1570 another, or simply have the caller reopen it
1571 again rather than trying to fix this handle */
1572 } else /* write only file */
1573 break; /* write only files are last so must be done */
1575 spin_unlock(&cifs_file_list_lock);
1576 return NULL;
1579 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1580 bool fsuid_only)
1582 struct cifsFileInfo *open_file, *inv_file = NULL;
1583 struct cifs_sb_info *cifs_sb;
1584 bool any_available = false;
1585 int rc;
1586 unsigned int refind = 0;
1588 /* Having a null inode here (because mapping->host was set to zero by
1589 the VFS or MM) should not happen but we had reports of on oops (due to
1590 it being zero) during stress testcases so we need to check for it */
1592 if (cifs_inode == NULL) {
1593 cERROR(1, "Null inode passed to cifs_writeable_file");
1594 dump_stack();
1595 return NULL;
1598 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1600 /* only filter by fsuid on multiuser mounts */
1601 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1602 fsuid_only = false;
1604 spin_lock(&cifs_file_list_lock);
1605 refind_writable:
1606 if (refind > MAX_REOPEN_ATT) {
1607 spin_unlock(&cifs_file_list_lock);
1608 return NULL;
1610 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1611 if (!any_available && open_file->pid != current->tgid)
1612 continue;
1613 if (fsuid_only && open_file->uid != current_fsuid())
1614 continue;
1615 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1616 if (!open_file->invalidHandle) {
1617 /* found a good writable file */
1618 cifsFileInfo_get(open_file);
1619 spin_unlock(&cifs_file_list_lock);
1620 return open_file;
1621 } else {
1622 if (!inv_file)
1623 inv_file = open_file;
1627 /* couldn't find useable FH with same pid, try any available */
1628 if (!any_available) {
1629 any_available = true;
1630 goto refind_writable;
1633 if (inv_file) {
1634 any_available = false;
1635 cifsFileInfo_get(inv_file);
1638 spin_unlock(&cifs_file_list_lock);
1640 if (inv_file) {
1641 rc = cifs_reopen_file(inv_file, false);
1642 if (!rc)
1643 return inv_file;
1644 else {
1645 spin_lock(&cifs_file_list_lock);
1646 list_move_tail(&inv_file->flist,
1647 &cifs_inode->openFileList);
1648 spin_unlock(&cifs_file_list_lock);
1649 cifsFileInfo_put(inv_file);
1650 spin_lock(&cifs_file_list_lock);
1651 ++refind;
1652 goto refind_writable;
1656 return NULL;
1659 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1661 struct address_space *mapping = page->mapping;
1662 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1663 char *write_data;
1664 int rc = -EFAULT;
1665 int bytes_written = 0;
1666 struct inode *inode;
1667 struct cifsFileInfo *open_file;
1669 if (!mapping || !mapping->host)
1670 return -EFAULT;
1672 inode = page->mapping->host;
1674 offset += (loff_t)from;
1675 write_data = kmap(page);
1676 write_data += from;
1678 if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1679 kunmap(page);
1680 return -EIO;
1683 /* racing with truncate? */
1684 if (offset > mapping->host->i_size) {
1685 kunmap(page);
1686 return 0; /* don't care */
1689 /* check to make sure that we are not extending the file */
1690 if (mapping->host->i_size - offset < (loff_t)to)
1691 to = (unsigned)(mapping->host->i_size - offset);
1693 open_file = find_writable_file(CIFS_I(mapping->host), false);
1694 if (open_file) {
1695 bytes_written = cifs_write(open_file, open_file->pid,
1696 write_data, to - from, &offset);
1697 cifsFileInfo_put(open_file);
1698 /* Does mm or vfs already set times? */
1699 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1700 if ((bytes_written > 0) && (offset))
1701 rc = 0;
1702 else if (bytes_written < 0)
1703 rc = bytes_written;
1704 } else {
1705 cFYI(1, "No writeable filehandles for inode");
1706 rc = -EIO;
1709 kunmap(page);
1710 return rc;
1714 * Marshal up the iov array, reserving the first one for the header. Also,
1715 * set wdata->bytes.
1717 static void
1718 cifs_writepages_marshal_iov(struct kvec *iov, struct cifs_writedata *wdata)
1720 int i;
1721 struct inode *inode = wdata->cfile->dentry->d_inode;
1722 loff_t size = i_size_read(inode);
1724 /* marshal up the pages into iov array */
1725 wdata->bytes = 0;
1726 for (i = 0; i < wdata->nr_pages; i++) {
1727 iov[i + 1].iov_len = min(size - page_offset(wdata->pages[i]),
1728 (loff_t)PAGE_CACHE_SIZE);
1729 iov[i + 1].iov_base = kmap(wdata->pages[i]);
1730 wdata->bytes += iov[i + 1].iov_len;
1734 static int cifs_writepages(struct address_space *mapping,
1735 struct writeback_control *wbc)
1737 struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
1738 bool done = false, scanned = false, range_whole = false;
1739 pgoff_t end, index;
1740 struct cifs_writedata *wdata;
1741 struct page *page;
1742 int rc = 0;
1745 * If wsize is smaller than the page cache size, default to writing
1746 * one page at a time via cifs_writepage
1748 if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1749 return generic_writepages(mapping, wbc);
1751 if (wbc->range_cyclic) {
1752 index = mapping->writeback_index; /* Start from prev offset */
1753 end = -1;
1754 } else {
1755 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1756 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1757 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1758 range_whole = true;
1759 scanned = true;
1761 retry:
1762 while (!done && index <= end) {
1763 unsigned int i, nr_pages, found_pages;
1764 pgoff_t next = 0, tofind;
1765 struct page **pages;
1767 tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1,
1768 end - index) + 1;
1770 wdata = cifs_writedata_alloc((unsigned int)tofind,
1771 cifs_writev_complete);
1772 if (!wdata) {
1773 rc = -ENOMEM;
1774 break;
1778 * find_get_pages_tag seems to return a max of 256 on each
1779 * iteration, so we must call it several times in order to
1780 * fill the array or the wsize is effectively limited to
1781 * 256 * PAGE_CACHE_SIZE.
1783 found_pages = 0;
1784 pages = wdata->pages;
1785 do {
1786 nr_pages = find_get_pages_tag(mapping, &index,
1787 PAGECACHE_TAG_DIRTY,
1788 tofind, pages);
1789 found_pages += nr_pages;
1790 tofind -= nr_pages;
1791 pages += nr_pages;
1792 } while (nr_pages && tofind && index <= end);
1794 if (found_pages == 0) {
1795 kref_put(&wdata->refcount, cifs_writedata_release);
1796 break;
1799 nr_pages = 0;
1800 for (i = 0; i < found_pages; i++) {
1801 page = wdata->pages[i];
1803 * At this point we hold neither mapping->tree_lock nor
1804 * lock on the page itself: the page may be truncated or
1805 * invalidated (changing page->mapping to NULL), or even
1806 * swizzled back from swapper_space to tmpfs file
1807 * mapping
1810 if (nr_pages == 0)
1811 lock_page(page);
1812 else if (!trylock_page(page))
1813 break;
1815 if (unlikely(page->mapping != mapping)) {
1816 unlock_page(page);
1817 break;
1820 if (!wbc->range_cyclic && page->index > end) {
1821 done = true;
1822 unlock_page(page);
1823 break;
1826 if (next && (page->index != next)) {
1827 /* Not next consecutive page */
1828 unlock_page(page);
1829 break;
1832 if (wbc->sync_mode != WB_SYNC_NONE)
1833 wait_on_page_writeback(page);
1835 if (PageWriteback(page) ||
1836 !clear_page_dirty_for_io(page)) {
1837 unlock_page(page);
1838 break;
1842 * This actually clears the dirty bit in the radix tree.
1843 * See cifs_writepage() for more commentary.
1845 set_page_writeback(page);
1847 if (page_offset(page) >= mapping->host->i_size) {
1848 done = true;
1849 unlock_page(page);
1850 end_page_writeback(page);
1851 break;
1854 wdata->pages[i] = page;
1855 next = page->index + 1;
1856 ++nr_pages;
1859 /* reset index to refind any pages skipped */
1860 if (nr_pages == 0)
1861 index = wdata->pages[0]->index + 1;
1863 /* put any pages we aren't going to use */
1864 for (i = nr_pages; i < found_pages; i++) {
1865 page_cache_release(wdata->pages[i]);
1866 wdata->pages[i] = NULL;
1869 /* nothing to write? */
1870 if (nr_pages == 0) {
1871 kref_put(&wdata->refcount, cifs_writedata_release);
1872 continue;
1875 wdata->sync_mode = wbc->sync_mode;
1876 wdata->nr_pages = nr_pages;
1877 wdata->offset = page_offset(wdata->pages[0]);
1878 wdata->marshal_iov = cifs_writepages_marshal_iov;
1880 do {
1881 if (wdata->cfile != NULL)
1882 cifsFileInfo_put(wdata->cfile);
1883 wdata->cfile = find_writable_file(CIFS_I(mapping->host),
1884 false);
1885 if (!wdata->cfile) {
1886 cERROR(1, "No writable handles for inode");
1887 rc = -EBADF;
1888 break;
1890 wdata->pid = wdata->cfile->pid;
1891 rc = cifs_async_writev(wdata);
1892 } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
1894 for (i = 0; i < nr_pages; ++i)
1895 unlock_page(wdata->pages[i]);
1897 /* send failure -- clean up the mess */
1898 if (rc != 0) {
1899 for (i = 0; i < nr_pages; ++i) {
1900 if (rc == -EAGAIN)
1901 redirty_page_for_writepage(wbc,
1902 wdata->pages[i]);
1903 else
1904 SetPageError(wdata->pages[i]);
1905 end_page_writeback(wdata->pages[i]);
1906 page_cache_release(wdata->pages[i]);
1908 if (rc != -EAGAIN)
1909 mapping_set_error(mapping, rc);
1911 kref_put(&wdata->refcount, cifs_writedata_release);
1913 wbc->nr_to_write -= nr_pages;
1914 if (wbc->nr_to_write <= 0)
1915 done = true;
1917 index = next;
1920 if (!scanned && !done) {
1922 * We hit the last page and there is more work to be done: wrap
1923 * back to the start of the file
1925 scanned = true;
1926 index = 0;
1927 goto retry;
1930 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
1931 mapping->writeback_index = index;
1933 return rc;
1936 static int
1937 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
1939 int rc;
1940 int xid;
1942 xid = GetXid();
1943 /* BB add check for wbc flags */
1944 page_cache_get(page);
1945 if (!PageUptodate(page))
1946 cFYI(1, "ppw - page not up to date");
1949 * Set the "writeback" flag, and clear "dirty" in the radix tree.
1951 * A writepage() implementation always needs to do either this,
1952 * or re-dirty the page with "redirty_page_for_writepage()" in
1953 * the case of a failure.
1955 * Just unlocking the page will cause the radix tree tag-bits
1956 * to fail to update with the state of the page correctly.
1958 set_page_writeback(page);
1959 retry_write:
1960 rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
1961 if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
1962 goto retry_write;
1963 else if (rc == -EAGAIN)
1964 redirty_page_for_writepage(wbc, page);
1965 else if (rc != 0)
1966 SetPageError(page);
1967 else
1968 SetPageUptodate(page);
1969 end_page_writeback(page);
1970 page_cache_release(page);
1971 FreeXid(xid);
1972 return rc;
1975 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
1977 int rc = cifs_writepage_locked(page, wbc);
1978 unlock_page(page);
1979 return rc;
1982 static int cifs_write_end(struct file *file, struct address_space *mapping,
1983 loff_t pos, unsigned len, unsigned copied,
1984 struct page *page, void *fsdata)
1986 int rc;
1987 struct inode *inode = mapping->host;
1988 struct cifsFileInfo *cfile = file->private_data;
1989 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1990 __u32 pid;
1992 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
1993 pid = cfile->pid;
1994 else
1995 pid = current->tgid;
1997 cFYI(1, "write_end for page %p from pos %lld with %d bytes",
1998 page, pos, copied);
2000 if (PageChecked(page)) {
2001 if (copied == len)
2002 SetPageUptodate(page);
2003 ClearPageChecked(page);
2004 } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
2005 SetPageUptodate(page);
2007 if (!PageUptodate(page)) {
2008 char *page_data;
2009 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
2010 int xid;
2012 xid = GetXid();
2013 /* this is probably better than directly calling
2014 partialpage_write since in this function the file handle is
2015 known which we might as well leverage */
2016 /* BB check if anything else missing out of ppw
2017 such as updating last write time */
2018 page_data = kmap(page);
2019 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2020 /* if (rc < 0) should we set writebehind rc? */
2021 kunmap(page);
2023 FreeXid(xid);
2024 } else {
2025 rc = copied;
2026 pos += copied;
2027 set_page_dirty(page);
2030 if (rc > 0) {
2031 spin_lock(&inode->i_lock);
2032 if (pos > inode->i_size)
2033 i_size_write(inode, pos);
2034 spin_unlock(&inode->i_lock);
2037 unlock_page(page);
2038 page_cache_release(page);
2040 return rc;
2043 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2044 int datasync)
2046 int xid;
2047 int rc = 0;
2048 struct cifs_tcon *tcon;
2049 struct cifsFileInfo *smbfile = file->private_data;
2050 struct inode *inode = file->f_path.dentry->d_inode;
2051 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2053 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2054 if (rc)
2055 return rc;
2056 mutex_lock(&inode->i_mutex);
2058 xid = GetXid();
2060 cFYI(1, "Sync file - name: %s datasync: 0x%x",
2061 file->f_path.dentry->d_name.name, datasync);
2063 if (!CIFS_I(inode)->clientCanCacheRead) {
2064 rc = cifs_invalidate_mapping(inode);
2065 if (rc) {
2066 cFYI(1, "rc: %d during invalidate phase", rc);
2067 rc = 0; /* don't care about it in fsync */
2071 tcon = tlink_tcon(smbfile->tlink);
2072 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
2073 rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
2075 FreeXid(xid);
2076 mutex_unlock(&inode->i_mutex);
2077 return rc;
2080 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2082 int xid;
2083 int rc = 0;
2084 struct cifs_tcon *tcon;
2085 struct cifsFileInfo *smbfile = file->private_data;
2086 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2087 struct inode *inode = file->f_mapping->host;
2089 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2090 if (rc)
2091 return rc;
2092 mutex_lock(&inode->i_mutex);
2094 xid = GetXid();
2096 cFYI(1, "Sync file - name: %s datasync: 0x%x",
2097 file->f_path.dentry->d_name.name, datasync);
2099 tcon = tlink_tcon(smbfile->tlink);
2100 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
2101 rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
2103 FreeXid(xid);
2104 mutex_unlock(&inode->i_mutex);
2105 return rc;
2109 * As file closes, flush all cached write data for this inode checking
2110 * for write behind errors.
2112 int cifs_flush(struct file *file, fl_owner_t id)
2114 struct inode *inode = file->f_path.dentry->d_inode;
2115 int rc = 0;
2117 if (file->f_mode & FMODE_WRITE)
2118 rc = filemap_write_and_wait(inode->i_mapping);
2120 cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc);
2122 return rc;
2125 static int
2126 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2128 int rc = 0;
2129 unsigned long i;
2131 for (i = 0; i < num_pages; i++) {
2132 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2133 if (!pages[i]) {
2135 * save number of pages we have already allocated and
2136 * return with ENOMEM error
2138 num_pages = i;
2139 rc = -ENOMEM;
2140 break;
2144 if (rc) {
2145 for (i = 0; i < num_pages; i++)
2146 put_page(pages[i]);
2148 return rc;
2151 static inline
2152 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2154 size_t num_pages;
2155 size_t clen;
2157 clen = min_t(const size_t, len, wsize);
2158 num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2160 if (cur_len)
2161 *cur_len = clen;
2163 return num_pages;
2166 static void
2167 cifs_uncached_marshal_iov(struct kvec *iov, struct cifs_writedata *wdata)
2169 int i;
2170 size_t bytes = wdata->bytes;
2172 /* marshal up the pages into iov array */
2173 for (i = 0; i < wdata->nr_pages; i++) {
2174 iov[i + 1].iov_len = min_t(size_t, bytes, PAGE_SIZE);
2175 iov[i + 1].iov_base = kmap(wdata->pages[i]);
2176 bytes -= iov[i + 1].iov_len;
2180 static void
2181 cifs_uncached_writev_complete(struct work_struct *work)
2183 int i;
2184 struct cifs_writedata *wdata = container_of(work,
2185 struct cifs_writedata, work);
2186 struct inode *inode = wdata->cfile->dentry->d_inode;
2187 struct cifsInodeInfo *cifsi = CIFS_I(inode);
2189 spin_lock(&inode->i_lock);
2190 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2191 if (cifsi->server_eof > inode->i_size)
2192 i_size_write(inode, cifsi->server_eof);
2193 spin_unlock(&inode->i_lock);
2195 complete(&wdata->done);
2197 if (wdata->result != -EAGAIN) {
2198 for (i = 0; i < wdata->nr_pages; i++)
2199 put_page(wdata->pages[i]);
2202 kref_put(&wdata->refcount, cifs_writedata_release);
2205 /* attempt to send write to server, retry on any -EAGAIN errors */
2206 static int
2207 cifs_uncached_retry_writev(struct cifs_writedata *wdata)
2209 int rc;
2211 do {
2212 if (wdata->cfile->invalidHandle) {
2213 rc = cifs_reopen_file(wdata->cfile, false);
2214 if (rc != 0)
2215 continue;
2217 rc = cifs_async_writev(wdata);
2218 } while (rc == -EAGAIN);
2220 return rc;
2223 static ssize_t
2224 cifs_iovec_write(struct file *file, const struct iovec *iov,
2225 unsigned long nr_segs, loff_t *poffset)
2227 unsigned long nr_pages, i;
2228 size_t copied, len, cur_len;
2229 ssize_t total_written = 0;
2230 loff_t offset;
2231 struct iov_iter it;
2232 struct cifsFileInfo *open_file;
2233 struct cifs_tcon *tcon;
2234 struct cifs_sb_info *cifs_sb;
2235 struct cifs_writedata *wdata, *tmp;
2236 struct list_head wdata_list;
2237 int rc;
2238 pid_t pid;
2240 len = iov_length(iov, nr_segs);
2241 if (!len)
2242 return 0;
2244 rc = generic_write_checks(file, poffset, &len, 0);
2245 if (rc)
2246 return rc;
2248 INIT_LIST_HEAD(&wdata_list);
2249 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2250 open_file = file->private_data;
2251 tcon = tlink_tcon(open_file->tlink);
2252 offset = *poffset;
2254 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2255 pid = open_file->pid;
2256 else
2257 pid = current->tgid;
2259 iov_iter_init(&it, iov, nr_segs, len, 0);
2260 do {
2261 size_t save_len;
2263 nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
2264 wdata = cifs_writedata_alloc(nr_pages,
2265 cifs_uncached_writev_complete);
2266 if (!wdata) {
2267 rc = -ENOMEM;
2268 break;
2271 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2272 if (rc) {
2273 kfree(wdata);
2274 break;
2277 save_len = cur_len;
2278 for (i = 0; i < nr_pages; i++) {
2279 copied = min_t(const size_t, cur_len, PAGE_SIZE);
2280 copied = iov_iter_copy_from_user(wdata->pages[i], &it,
2281 0, copied);
2282 cur_len -= copied;
2283 iov_iter_advance(&it, copied);
2285 cur_len = save_len - cur_len;
2287 wdata->sync_mode = WB_SYNC_ALL;
2288 wdata->nr_pages = nr_pages;
2289 wdata->offset = (__u64)offset;
2290 wdata->cfile = cifsFileInfo_get(open_file);
2291 wdata->pid = pid;
2292 wdata->bytes = cur_len;
2293 wdata->marshal_iov = cifs_uncached_marshal_iov;
2294 rc = cifs_uncached_retry_writev(wdata);
2295 if (rc) {
2296 kref_put(&wdata->refcount, cifs_writedata_release);
2297 break;
2300 list_add_tail(&wdata->list, &wdata_list);
2301 offset += cur_len;
2302 len -= cur_len;
2303 } while (len > 0);
2306 * If at least one write was successfully sent, then discard any rc
2307 * value from the later writes. If the other write succeeds, then
2308 * we'll end up returning whatever was written. If it fails, then
2309 * we'll get a new rc value from that.
2311 if (!list_empty(&wdata_list))
2312 rc = 0;
2315 * Wait for and collect replies for any successful sends in order of
2316 * increasing offset. Once an error is hit or we get a fatal signal
2317 * while waiting, then return without waiting for any more replies.
2319 restart_loop:
2320 list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2321 if (!rc) {
2322 /* FIXME: freezable too? */
2323 rc = wait_for_completion_killable(&wdata->done);
2324 if (rc)
2325 rc = -EINTR;
2326 else if (wdata->result)
2327 rc = wdata->result;
2328 else
2329 total_written += wdata->bytes;
2331 /* resend call if it's a retryable error */
2332 if (rc == -EAGAIN) {
2333 rc = cifs_uncached_retry_writev(wdata);
2334 goto restart_loop;
2337 list_del_init(&wdata->list);
2338 kref_put(&wdata->refcount, cifs_writedata_release);
2341 if (total_written > 0)
2342 *poffset += total_written;
2344 cifs_stats_bytes_written(tcon, total_written);
2345 return total_written ? total_written : (ssize_t)rc;
2348 ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
2349 unsigned long nr_segs, loff_t pos)
2351 ssize_t written;
2352 struct inode *inode;
2354 inode = iocb->ki_filp->f_path.dentry->d_inode;
2357 * BB - optimize the way when signing is disabled. We can drop this
2358 * extra memory-to-memory copying and use iovec buffers for constructing
2359 * write request.
2362 written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos);
2363 if (written > 0) {
2364 CIFS_I(inode)->invalid_mapping = true;
2365 iocb->ki_pos = pos;
2368 return written;
2371 ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
2372 unsigned long nr_segs, loff_t pos)
2374 struct inode *inode;
2376 inode = iocb->ki_filp->f_path.dentry->d_inode;
2378 if (CIFS_I(inode)->clientCanCacheAll)
2379 return generic_file_aio_write(iocb, iov, nr_segs, pos);
2382 * In strict cache mode we need to write the data to the server exactly
2383 * from the pos to pos+len-1 rather than flush all affected pages
2384 * because it may cause a error with mandatory locks on these pages but
2385 * not on the region from pos to ppos+len-1.
2388 return cifs_user_writev(iocb, iov, nr_segs, pos);
2391 static struct cifs_readdata *
2392 cifs_readdata_alloc(unsigned int nr_vecs, work_func_t complete)
2394 struct cifs_readdata *rdata;
2396 rdata = kzalloc(sizeof(*rdata) +
2397 sizeof(struct kvec) * nr_vecs, GFP_KERNEL);
2398 if (rdata != NULL) {
2399 kref_init(&rdata->refcount);
2400 INIT_LIST_HEAD(&rdata->list);
2401 init_completion(&rdata->done);
2402 INIT_WORK(&rdata->work, complete);
2403 INIT_LIST_HEAD(&rdata->pages);
2405 return rdata;
2408 void
2409 cifs_readdata_release(struct kref *refcount)
2411 struct cifs_readdata *rdata = container_of(refcount,
2412 struct cifs_readdata, refcount);
2414 if (rdata->cfile)
2415 cifsFileInfo_put(rdata->cfile);
2417 kfree(rdata);
2420 static int
2421 cifs_read_allocate_pages(struct list_head *list, unsigned int npages)
2423 int rc = 0;
2424 struct page *page, *tpage;
2425 unsigned int i;
2427 for (i = 0; i < npages; i++) {
2428 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2429 if (!page) {
2430 rc = -ENOMEM;
2431 break;
2433 list_add(&page->lru, list);
2436 if (rc) {
2437 list_for_each_entry_safe(page, tpage, list, lru) {
2438 list_del(&page->lru);
2439 put_page(page);
2442 return rc;
2445 static void
2446 cifs_uncached_readdata_release(struct kref *refcount)
2448 struct page *page, *tpage;
2449 struct cifs_readdata *rdata = container_of(refcount,
2450 struct cifs_readdata, refcount);
2452 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2453 list_del(&page->lru);
2454 put_page(page);
2456 cifs_readdata_release(refcount);
2459 static int
2460 cifs_retry_async_readv(struct cifs_readdata *rdata)
2462 int rc;
2464 do {
2465 if (rdata->cfile->invalidHandle) {
2466 rc = cifs_reopen_file(rdata->cfile, true);
2467 if (rc != 0)
2468 continue;
2470 rc = cifs_async_readv(rdata);
2471 } while (rc == -EAGAIN);
2473 return rc;
2477 * cifs_readdata_to_iov - copy data from pages in response to an iovec
2478 * @rdata: the readdata response with list of pages holding data
2479 * @iov: vector in which we should copy the data
2480 * @nr_segs: number of segments in vector
2481 * @offset: offset into file of the first iovec
2482 * @copied: used to return the amount of data copied to the iov
2484 * This function copies data from a list of pages in a readdata response into
2485 * an array of iovecs. It will first calculate where the data should go
2486 * based on the info in the readdata and then copy the data into that spot.
2488 static ssize_t
2489 cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov,
2490 unsigned long nr_segs, loff_t offset, ssize_t *copied)
2492 int rc = 0;
2493 struct iov_iter ii;
2494 size_t pos = rdata->offset - offset;
2495 struct page *page, *tpage;
2496 ssize_t remaining = rdata->bytes;
2497 unsigned char *pdata;
2499 /* set up iov_iter and advance to the correct offset */
2500 iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0);
2501 iov_iter_advance(&ii, pos);
2503 *copied = 0;
2504 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2505 ssize_t copy;
2507 /* copy a whole page or whatever's left */
2508 copy = min_t(ssize_t, remaining, PAGE_SIZE);
2510 /* ...but limit it to whatever space is left in the iov */
2511 copy = min_t(ssize_t, copy, iov_iter_count(&ii));
2513 /* go while there's data to be copied and no errors */
2514 if (copy && !rc) {
2515 pdata = kmap(page);
2516 rc = memcpy_toiovecend(ii.iov, pdata, ii.iov_offset,
2517 (int)copy);
2518 kunmap(page);
2519 if (!rc) {
2520 *copied += copy;
2521 remaining -= copy;
2522 iov_iter_advance(&ii, copy);
2526 list_del(&page->lru);
2527 put_page(page);
2530 return rc;
2533 static void
2534 cifs_uncached_readv_complete(struct work_struct *work)
2536 struct cifs_readdata *rdata = container_of(work,
2537 struct cifs_readdata, work);
2539 /* if the result is non-zero then the pages weren't kmapped */
2540 if (rdata->result == 0) {
2541 struct page *page;
2543 list_for_each_entry(page, &rdata->pages, lru)
2544 kunmap(page);
2547 complete(&rdata->done);
2548 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2551 static int
2552 cifs_uncached_read_marshal_iov(struct cifs_readdata *rdata,
2553 unsigned int remaining)
2555 int len = 0;
2556 struct page *page, *tpage;
2558 rdata->nr_iov = 1;
2559 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2560 if (remaining >= PAGE_SIZE) {
2561 /* enough data to fill the page */
2562 rdata->iov[rdata->nr_iov].iov_base = kmap(page);
2563 rdata->iov[rdata->nr_iov].iov_len = PAGE_SIZE;
2564 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2565 rdata->nr_iov, page->index,
2566 rdata->iov[rdata->nr_iov].iov_base,
2567 rdata->iov[rdata->nr_iov].iov_len);
2568 ++rdata->nr_iov;
2569 len += PAGE_SIZE;
2570 remaining -= PAGE_SIZE;
2571 } else if (remaining > 0) {
2572 /* enough for partial page, fill and zero the rest */
2573 rdata->iov[rdata->nr_iov].iov_base = kmap(page);
2574 rdata->iov[rdata->nr_iov].iov_len = remaining;
2575 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2576 rdata->nr_iov, page->index,
2577 rdata->iov[rdata->nr_iov].iov_base,
2578 rdata->iov[rdata->nr_iov].iov_len);
2579 memset(rdata->iov[rdata->nr_iov].iov_base + remaining,
2580 '\0', PAGE_SIZE - remaining);
2581 ++rdata->nr_iov;
2582 len += remaining;
2583 remaining = 0;
2584 } else {
2585 /* no need to hold page hostage */
2586 list_del(&page->lru);
2587 put_page(page);
2591 return len;
2594 static ssize_t
2595 cifs_iovec_read(struct file *file, const struct iovec *iov,
2596 unsigned long nr_segs, loff_t *poffset)
2598 ssize_t rc;
2599 size_t len, cur_len;
2600 ssize_t total_read = 0;
2601 loff_t offset = *poffset;
2602 unsigned int npages;
2603 struct cifs_sb_info *cifs_sb;
2604 struct cifs_tcon *tcon;
2605 struct cifsFileInfo *open_file;
2606 struct cifs_readdata *rdata, *tmp;
2607 struct list_head rdata_list;
2608 pid_t pid;
2610 if (!nr_segs)
2611 return 0;
2613 len = iov_length(iov, nr_segs);
2614 if (!len)
2615 return 0;
2617 INIT_LIST_HEAD(&rdata_list);
2618 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2619 open_file = file->private_data;
2620 tcon = tlink_tcon(open_file->tlink);
2622 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2623 pid = open_file->pid;
2624 else
2625 pid = current->tgid;
2627 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2628 cFYI(1, "attempting read on write only file instance");
2630 do {
2631 cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize);
2632 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2634 /* allocate a readdata struct */
2635 rdata = cifs_readdata_alloc(npages,
2636 cifs_uncached_readv_complete);
2637 if (!rdata) {
2638 rc = -ENOMEM;
2639 goto error;
2642 rc = cifs_read_allocate_pages(&rdata->pages, npages);
2643 if (rc)
2644 goto error;
2646 rdata->cfile = cifsFileInfo_get(open_file);
2647 rdata->offset = offset;
2648 rdata->bytes = cur_len;
2649 rdata->pid = pid;
2650 rdata->marshal_iov = cifs_uncached_read_marshal_iov;
2652 rc = cifs_retry_async_readv(rdata);
2653 error:
2654 if (rc) {
2655 kref_put(&rdata->refcount,
2656 cifs_uncached_readdata_release);
2657 break;
2660 list_add_tail(&rdata->list, &rdata_list);
2661 offset += cur_len;
2662 len -= cur_len;
2663 } while (len > 0);
2665 /* if at least one read request send succeeded, then reset rc */
2666 if (!list_empty(&rdata_list))
2667 rc = 0;
2669 /* the loop below should proceed in the order of increasing offsets */
2670 restart_loop:
2671 list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
2672 if (!rc) {
2673 ssize_t copied;
2675 /* FIXME: freezable sleep too? */
2676 rc = wait_for_completion_killable(&rdata->done);
2677 if (rc)
2678 rc = -EINTR;
2679 else if (rdata->result)
2680 rc = rdata->result;
2681 else {
2682 rc = cifs_readdata_to_iov(rdata, iov,
2683 nr_segs, *poffset,
2684 &copied);
2685 total_read += copied;
2688 /* resend call if it's a retryable error */
2689 if (rc == -EAGAIN) {
2690 rc = cifs_retry_async_readv(rdata);
2691 goto restart_loop;
2694 list_del_init(&rdata->list);
2695 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2698 cifs_stats_bytes_read(tcon, total_read);
2699 *poffset += total_read;
2701 return total_read ? total_read : rc;
2704 ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
2705 unsigned long nr_segs, loff_t pos)
2707 ssize_t read;
2709 read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos);
2710 if (read > 0)
2711 iocb->ki_pos = pos;
2713 return read;
2716 ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
2717 unsigned long nr_segs, loff_t pos)
2719 struct inode *inode;
2721 inode = iocb->ki_filp->f_path.dentry->d_inode;
2723 if (CIFS_I(inode)->clientCanCacheRead)
2724 return generic_file_aio_read(iocb, iov, nr_segs, pos);
2727 * In strict cache mode we need to read from the server all the time
2728 * if we don't have level II oplock because the server can delay mtime
2729 * change - so we can't make a decision about inode invalidating.
2730 * And we can also fail with pagereading if there are mandatory locks
2731 * on pages affected by this read but not on the region from pos to
2732 * pos+len-1.
2735 return cifs_user_readv(iocb, iov, nr_segs, pos);
2738 static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
2739 loff_t *poffset)
2741 int rc = -EACCES;
2742 unsigned int bytes_read = 0;
2743 unsigned int total_read;
2744 unsigned int current_read_size;
2745 unsigned int rsize;
2746 struct cifs_sb_info *cifs_sb;
2747 struct cifs_tcon *pTcon;
2748 int xid;
2749 char *current_offset;
2750 struct cifsFileInfo *open_file;
2751 struct cifs_io_parms io_parms;
2752 int buf_type = CIFS_NO_BUFFER;
2753 __u32 pid;
2755 xid = GetXid();
2756 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2758 /* FIXME: set up handlers for larger reads and/or convert to async */
2759 rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
2761 if (file->private_data == NULL) {
2762 rc = -EBADF;
2763 FreeXid(xid);
2764 return rc;
2766 open_file = file->private_data;
2767 pTcon = tlink_tcon(open_file->tlink);
2769 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2770 pid = open_file->pid;
2771 else
2772 pid = current->tgid;
2774 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2775 cFYI(1, "attempting read on write only file instance");
2777 for (total_read = 0, current_offset = read_data;
2778 read_size > total_read;
2779 total_read += bytes_read, current_offset += bytes_read) {
2780 current_read_size = min_t(uint, read_size - total_read, rsize);
2782 /* For windows me and 9x we do not want to request more
2783 than it negotiated since it will refuse the read then */
2784 if ((pTcon->ses) &&
2785 !(pTcon->ses->capabilities & CAP_LARGE_FILES)) {
2786 current_read_size = min_t(uint, current_read_size,
2787 CIFSMaxBufSize);
2789 rc = -EAGAIN;
2790 while (rc == -EAGAIN) {
2791 if (open_file->invalidHandle) {
2792 rc = cifs_reopen_file(open_file, true);
2793 if (rc != 0)
2794 break;
2796 io_parms.netfid = open_file->netfid;
2797 io_parms.pid = pid;
2798 io_parms.tcon = pTcon;
2799 io_parms.offset = *poffset;
2800 io_parms.length = current_read_size;
2801 rc = CIFSSMBRead(xid, &io_parms, &bytes_read,
2802 &current_offset, &buf_type);
2804 if (rc || (bytes_read == 0)) {
2805 if (total_read) {
2806 break;
2807 } else {
2808 FreeXid(xid);
2809 return rc;
2811 } else {
2812 cifs_stats_bytes_read(pTcon, total_read);
2813 *poffset += bytes_read;
2816 FreeXid(xid);
2817 return total_read;
2821 * If the page is mmap'ed into a process' page tables, then we need to make
2822 * sure that it doesn't change while being written back.
2824 static int
2825 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
2827 struct page *page = vmf->page;
2829 lock_page(page);
2830 return VM_FAULT_LOCKED;
2833 static struct vm_operations_struct cifs_file_vm_ops = {
2834 .fault = filemap_fault,
2835 .page_mkwrite = cifs_page_mkwrite,
2838 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
2840 int rc, xid;
2841 struct inode *inode = file->f_path.dentry->d_inode;
2843 xid = GetXid();
2845 if (!CIFS_I(inode)->clientCanCacheRead) {
2846 rc = cifs_invalidate_mapping(inode);
2847 if (rc)
2848 return rc;
2851 rc = generic_file_mmap(file, vma);
2852 if (rc == 0)
2853 vma->vm_ops = &cifs_file_vm_ops;
2854 FreeXid(xid);
2855 return rc;
2858 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
2860 int rc, xid;
2862 xid = GetXid();
2863 rc = cifs_revalidate_file(file);
2864 if (rc) {
2865 cFYI(1, "Validation prior to mmap failed, error=%d", rc);
2866 FreeXid(xid);
2867 return rc;
2869 rc = generic_file_mmap(file, vma);
2870 if (rc == 0)
2871 vma->vm_ops = &cifs_file_vm_ops;
2872 FreeXid(xid);
2873 return rc;
2876 static void
2877 cifs_readv_complete(struct work_struct *work)
2879 struct cifs_readdata *rdata = container_of(work,
2880 struct cifs_readdata, work);
2881 struct page *page, *tpage;
2883 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2884 list_del(&page->lru);
2885 lru_cache_add_file(page);
2887 if (rdata->result == 0) {
2888 kunmap(page);
2889 flush_dcache_page(page);
2890 SetPageUptodate(page);
2893 unlock_page(page);
2895 if (rdata->result == 0)
2896 cifs_readpage_to_fscache(rdata->mapping->host, page);
2898 page_cache_release(page);
2900 kref_put(&rdata->refcount, cifs_readdata_release);
2903 static int
2904 cifs_readpages_marshal_iov(struct cifs_readdata *rdata, unsigned int remaining)
2906 int len = 0;
2907 struct page *page, *tpage;
2908 u64 eof;
2909 pgoff_t eof_index;
2911 /* determine the eof that the server (probably) has */
2912 eof = CIFS_I(rdata->mapping->host)->server_eof;
2913 eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
2914 cFYI(1, "eof=%llu eof_index=%lu", eof, eof_index);
2916 rdata->nr_iov = 1;
2917 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2918 if (remaining >= PAGE_CACHE_SIZE) {
2919 /* enough data to fill the page */
2920 rdata->iov[rdata->nr_iov].iov_base = kmap(page);
2921 rdata->iov[rdata->nr_iov].iov_len = PAGE_CACHE_SIZE;
2922 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2923 rdata->nr_iov, page->index,
2924 rdata->iov[rdata->nr_iov].iov_base,
2925 rdata->iov[rdata->nr_iov].iov_len);
2926 ++rdata->nr_iov;
2927 len += PAGE_CACHE_SIZE;
2928 remaining -= PAGE_CACHE_SIZE;
2929 } else if (remaining > 0) {
2930 /* enough for partial page, fill and zero the rest */
2931 rdata->iov[rdata->nr_iov].iov_base = kmap(page);
2932 rdata->iov[rdata->nr_iov].iov_len = remaining;
2933 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2934 rdata->nr_iov, page->index,
2935 rdata->iov[rdata->nr_iov].iov_base,
2936 rdata->iov[rdata->nr_iov].iov_len);
2937 memset(rdata->iov[rdata->nr_iov].iov_base + remaining,
2938 '\0', PAGE_CACHE_SIZE - remaining);
2939 ++rdata->nr_iov;
2940 len += remaining;
2941 remaining = 0;
2942 } else if (page->index > eof_index) {
2944 * The VFS will not try to do readahead past the
2945 * i_size, but it's possible that we have outstanding
2946 * writes with gaps in the middle and the i_size hasn't
2947 * caught up yet. Populate those with zeroed out pages
2948 * to prevent the VFS from repeatedly attempting to
2949 * fill them until the writes are flushed.
2951 zero_user(page, 0, PAGE_CACHE_SIZE);
2952 list_del(&page->lru);
2953 lru_cache_add_file(page);
2954 flush_dcache_page(page);
2955 SetPageUptodate(page);
2956 unlock_page(page);
2957 page_cache_release(page);
2958 } else {
2959 /* no need to hold page hostage */
2960 list_del(&page->lru);
2961 lru_cache_add_file(page);
2962 unlock_page(page);
2963 page_cache_release(page);
2967 return len;
2970 static int cifs_readpages(struct file *file, struct address_space *mapping,
2971 struct list_head *page_list, unsigned num_pages)
2973 int rc;
2974 struct list_head tmplist;
2975 struct cifsFileInfo *open_file = file->private_data;
2976 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2977 unsigned int rsize = cifs_sb->rsize;
2978 pid_t pid;
2981 * Give up immediately if rsize is too small to read an entire page.
2982 * The VFS will fall back to readpage. We should never reach this
2983 * point however since we set ra_pages to 0 when the rsize is smaller
2984 * than a cache page.
2986 if (unlikely(rsize < PAGE_CACHE_SIZE))
2987 return 0;
2990 * Reads as many pages as possible from fscache. Returns -ENOBUFS
2991 * immediately if the cookie is negative
2993 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
2994 &num_pages);
2995 if (rc == 0)
2996 return rc;
2998 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2999 pid = open_file->pid;
3000 else
3001 pid = current->tgid;
3003 rc = 0;
3004 INIT_LIST_HEAD(&tmplist);
3006 cFYI(1, "%s: file=%p mapping=%p num_pages=%u", __func__, file,
3007 mapping, num_pages);
3010 * Start with the page at end of list and move it to private
3011 * list. Do the same with any following pages until we hit
3012 * the rsize limit, hit an index discontinuity, or run out of
3013 * pages. Issue the async read and then start the loop again
3014 * until the list is empty.
3016 * Note that list order is important. The page_list is in
3017 * the order of declining indexes. When we put the pages in
3018 * the rdata->pages, then we want them in increasing order.
3020 while (!list_empty(page_list)) {
3021 unsigned int bytes = PAGE_CACHE_SIZE;
3022 unsigned int expected_index;
3023 unsigned int nr_pages = 1;
3024 loff_t offset;
3025 struct page *page, *tpage;
3026 struct cifs_readdata *rdata;
3028 page = list_entry(page_list->prev, struct page, lru);
3031 * Lock the page and put it in the cache. Since no one else
3032 * should have access to this page, we're safe to simply set
3033 * PG_locked without checking it first.
3035 __set_page_locked(page);
3036 rc = add_to_page_cache_locked(page, mapping,
3037 page->index, GFP_KERNEL);
3039 /* give up if we can't stick it in the cache */
3040 if (rc) {
3041 __clear_page_locked(page);
3042 break;
3045 /* move first page to the tmplist */
3046 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3047 list_move_tail(&page->lru, &tmplist);
3049 /* now try and add more pages onto the request */
3050 expected_index = page->index + 1;
3051 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3052 /* discontinuity ? */
3053 if (page->index != expected_index)
3054 break;
3056 /* would this page push the read over the rsize? */
3057 if (bytes + PAGE_CACHE_SIZE > rsize)
3058 break;
3060 __set_page_locked(page);
3061 if (add_to_page_cache_locked(page, mapping,
3062 page->index, GFP_KERNEL)) {
3063 __clear_page_locked(page);
3064 break;
3066 list_move_tail(&page->lru, &tmplist);
3067 bytes += PAGE_CACHE_SIZE;
3068 expected_index++;
3069 nr_pages++;
3072 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3073 if (!rdata) {
3074 /* best to give up if we're out of mem */
3075 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3076 list_del(&page->lru);
3077 lru_cache_add_file(page);
3078 unlock_page(page);
3079 page_cache_release(page);
3081 rc = -ENOMEM;
3082 break;
3085 spin_lock(&cifs_file_list_lock);
3086 spin_unlock(&cifs_file_list_lock);
3087 rdata->cfile = cifsFileInfo_get(open_file);
3088 rdata->mapping = mapping;
3089 rdata->offset = offset;
3090 rdata->bytes = bytes;
3091 rdata->pid = pid;
3092 rdata->marshal_iov = cifs_readpages_marshal_iov;
3093 list_splice_init(&tmplist, &rdata->pages);
3095 rc = cifs_retry_async_readv(rdata);
3096 if (rc != 0) {
3097 list_for_each_entry_safe(page, tpage, &rdata->pages,
3098 lru) {
3099 list_del(&page->lru);
3100 lru_cache_add_file(page);
3101 unlock_page(page);
3102 page_cache_release(page);
3104 kref_put(&rdata->refcount, cifs_readdata_release);
3105 break;
3108 kref_put(&rdata->refcount, cifs_readdata_release);
3111 return rc;
3114 static int cifs_readpage_worker(struct file *file, struct page *page,
3115 loff_t *poffset)
3117 char *read_data;
3118 int rc;
3120 /* Is the page cached? */
3121 rc = cifs_readpage_from_fscache(file->f_path.dentry->d_inode, page);
3122 if (rc == 0)
3123 goto read_complete;
3125 page_cache_get(page);
3126 read_data = kmap(page);
3127 /* for reads over a certain size could initiate async read ahead */
3129 rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
3131 if (rc < 0)
3132 goto io_error;
3133 else
3134 cFYI(1, "Bytes read %d", rc);
3136 file->f_path.dentry->d_inode->i_atime =
3137 current_fs_time(file->f_path.dentry->d_inode->i_sb);
3139 if (PAGE_CACHE_SIZE > rc)
3140 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
3142 flush_dcache_page(page);
3143 SetPageUptodate(page);
3145 /* send this page to the cache */
3146 cifs_readpage_to_fscache(file->f_path.dentry->d_inode, page);
3148 rc = 0;
3150 io_error:
3151 kunmap(page);
3152 page_cache_release(page);
3154 read_complete:
3155 return rc;
3158 static int cifs_readpage(struct file *file, struct page *page)
3160 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3161 int rc = -EACCES;
3162 int xid;
3164 xid = GetXid();
3166 if (file->private_data == NULL) {
3167 rc = -EBADF;
3168 FreeXid(xid);
3169 return rc;
3172 cFYI(1, "readpage %p at offset %d 0x%x\n",
3173 page, (int)offset, (int)offset);
3175 rc = cifs_readpage_worker(file, page, &offset);
3177 unlock_page(page);
3179 FreeXid(xid);
3180 return rc;
3183 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3185 struct cifsFileInfo *open_file;
3187 spin_lock(&cifs_file_list_lock);
3188 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3189 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3190 spin_unlock(&cifs_file_list_lock);
3191 return 1;
3194 spin_unlock(&cifs_file_list_lock);
3195 return 0;
3198 /* We do not want to update the file size from server for inodes
3199 open for write - to avoid races with writepage extending
3200 the file - in the future we could consider allowing
3201 refreshing the inode only on increases in the file size
3202 but this is tricky to do without racing with writebehind
3203 page caching in the current Linux kernel design */
3204 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3206 if (!cifsInode)
3207 return true;
3209 if (is_inode_writable(cifsInode)) {
3210 /* This inode is open for write at least once */
3211 struct cifs_sb_info *cifs_sb;
3213 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3214 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3215 /* since no page cache to corrupt on directio
3216 we can change size safely */
3217 return true;
3220 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3221 return true;
3223 return false;
3224 } else
3225 return true;
3228 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3229 loff_t pos, unsigned len, unsigned flags,
3230 struct page **pagep, void **fsdata)
3232 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
3233 loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
3234 loff_t page_start = pos & PAGE_MASK;
3235 loff_t i_size;
3236 struct page *page;
3237 int rc = 0;
3239 cFYI(1, "write_begin from %lld len %d", (long long)pos, len);
3241 page = grab_cache_page_write_begin(mapping, index, flags);
3242 if (!page) {
3243 rc = -ENOMEM;
3244 goto out;
3247 if (PageUptodate(page))
3248 goto out;
3251 * If we write a full page it will be up to date, no need to read from
3252 * the server. If the write is short, we'll end up doing a sync write
3253 * instead.
3255 if (len == PAGE_CACHE_SIZE)
3256 goto out;
3259 * optimize away the read when we have an oplock, and we're not
3260 * expecting to use any of the data we'd be reading in. That
3261 * is, when the page lies beyond the EOF, or straddles the EOF
3262 * and the write will cover all of the existing data.
3264 if (CIFS_I(mapping->host)->clientCanCacheRead) {
3265 i_size = i_size_read(mapping->host);
3266 if (page_start >= i_size ||
3267 (offset == 0 && (pos + len) >= i_size)) {
3268 zero_user_segments(page, 0, offset,
3269 offset + len,
3270 PAGE_CACHE_SIZE);
3272 * PageChecked means that the parts of the page
3273 * to which we're not writing are considered up
3274 * to date. Once the data is copied to the
3275 * page, it can be set uptodate.
3277 SetPageChecked(page);
3278 goto out;
3282 if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
3284 * might as well read a page, it is fast enough. If we get
3285 * an error, we don't need to return it. cifs_write_end will
3286 * do a sync write instead since PG_uptodate isn't set.
3288 cifs_readpage_worker(file, page, &page_start);
3289 } else {
3290 /* we could try using another file handle if there is one -
3291 but how would we lock it to prevent close of that handle
3292 racing with this read? In any case
3293 this will be written out by write_end so is fine */
3295 out:
3296 *pagep = page;
3297 return rc;
3300 static int cifs_release_page(struct page *page, gfp_t gfp)
3302 if (PagePrivate(page))
3303 return 0;
3305 return cifs_fscache_release_page(page, gfp);
3308 static void cifs_invalidate_page(struct page *page, unsigned long offset)
3310 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3312 if (offset == 0)
3313 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3316 static int cifs_launder_page(struct page *page)
3318 int rc = 0;
3319 loff_t range_start = page_offset(page);
3320 loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
3321 struct writeback_control wbc = {
3322 .sync_mode = WB_SYNC_ALL,
3323 .nr_to_write = 0,
3324 .range_start = range_start,
3325 .range_end = range_end,
3328 cFYI(1, "Launder page: %p", page);
3330 if (clear_page_dirty_for_io(page))
3331 rc = cifs_writepage_locked(page, &wbc);
3333 cifs_fscache_invalidate_page(page, page->mapping->host);
3334 return rc;
3337 void cifs_oplock_break(struct work_struct *work)
3339 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3340 oplock_break);
3341 struct inode *inode = cfile->dentry->d_inode;
3342 struct cifsInodeInfo *cinode = CIFS_I(inode);
3343 int rc = 0;
3345 if (inode && S_ISREG(inode->i_mode)) {
3346 if (cinode->clientCanCacheRead)
3347 break_lease(inode, O_RDONLY);
3348 else
3349 break_lease(inode, O_WRONLY);
3350 rc = filemap_fdatawrite(inode->i_mapping);
3351 if (cinode->clientCanCacheRead == 0) {
3352 rc = filemap_fdatawait(inode->i_mapping);
3353 mapping_set_error(inode->i_mapping, rc);
3354 invalidate_remote_inode(inode);
3356 cFYI(1, "Oplock flush inode %p rc %d", inode, rc);
3359 rc = cifs_push_locks(cfile);
3360 if (rc)
3361 cERROR(1, "Push locks rc = %d", rc);
3364 * releasing stale oplock after recent reconnect of smb session using
3365 * a now incorrect file handle is not a data integrity issue but do
3366 * not bother sending an oplock release if session to server still is
3367 * disconnected since oplock already released by the server
3369 if (!cfile->oplock_break_cancelled) {
3370 rc = CIFSSMBLock(0, tlink_tcon(cfile->tlink), cfile->netfid,
3371 current->tgid, 0, 0, 0, 0,
3372 LOCKING_ANDX_OPLOCK_RELEASE, false,
3373 cinode->clientCanCacheRead ? 1 : 0);
3374 cFYI(1, "Oplock release rc = %d", rc);
3378 const struct address_space_operations cifs_addr_ops = {
3379 .readpage = cifs_readpage,
3380 .readpages = cifs_readpages,
3381 .writepage = cifs_writepage,
3382 .writepages = cifs_writepages,
3383 .write_begin = cifs_write_begin,
3384 .write_end = cifs_write_end,
3385 .set_page_dirty = __set_page_dirty_nobuffers,
3386 .releasepage = cifs_release_page,
3387 .invalidatepage = cifs_invalidate_page,
3388 .launder_page = cifs_launder_page,
3392 * cifs_readpages requires the server to support a buffer large enough to
3393 * contain the header plus one complete page of data. Otherwise, we need
3394 * to leave cifs_readpages out of the address space operations.
3396 const struct address_space_operations cifs_addr_ops_smallbuf = {
3397 .readpage = cifs_readpage,
3398 .writepage = cifs_writepage,
3399 .writepages = cifs_writepages,
3400 .write_begin = cifs_write_begin,
3401 .write_end = cifs_write_end,
3402 .set_page_dirty = __set_page_dirty_nobuffers,
3403 .releasepage = cifs_release_page,
3404 .invalidatepage = cifs_invalidate_page,
3405 .launder_page = cifs_launder_page,