ARM: 7178/1: fault.c: Port OOM changes into do_page_fault
[linux-2.6/btrfs-unstable.git] / fs / cifs / file.c
blobcf0b1539b321acf1cdd69e4db0f590d8f83e9293
1 /*
2 * fs/cifs/file.c
4 * vfs operations that deal with files
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
37 #include "cifsfs.h"
38 #include "cifspdu.h"
39 #include "cifsglob.h"
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
44 #include "fscache.h"
46 static inline int cifs_convert_flags(unsigned int flags)
48 if ((flags & O_ACCMODE) == O_RDONLY)
49 return GENERIC_READ;
50 else if ((flags & O_ACCMODE) == O_WRONLY)
51 return GENERIC_WRITE;
52 else if ((flags & O_ACCMODE) == O_RDWR) {
53 /* GENERIC_ALL is too much permission to request
54 can cause unnecessary access denied on create */
55 /* return GENERIC_ALL; */
56 return (GENERIC_READ | GENERIC_WRITE);
59 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
60 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
61 FILE_READ_DATA);
64 static u32 cifs_posix_convert_flags(unsigned int flags)
66 u32 posix_flags = 0;
68 if ((flags & O_ACCMODE) == O_RDONLY)
69 posix_flags = SMB_O_RDONLY;
70 else if ((flags & O_ACCMODE) == O_WRONLY)
71 posix_flags = SMB_O_WRONLY;
72 else if ((flags & O_ACCMODE) == O_RDWR)
73 posix_flags = SMB_O_RDWR;
75 if (flags & O_CREAT)
76 posix_flags |= SMB_O_CREAT;
77 if (flags & O_EXCL)
78 posix_flags |= SMB_O_EXCL;
79 if (flags & O_TRUNC)
80 posix_flags |= SMB_O_TRUNC;
81 /* be safe and imply O_SYNC for O_DSYNC */
82 if (flags & O_DSYNC)
83 posix_flags |= SMB_O_SYNC;
84 if (flags & O_DIRECTORY)
85 posix_flags |= SMB_O_DIRECTORY;
86 if (flags & O_NOFOLLOW)
87 posix_flags |= SMB_O_NOFOLLOW;
88 if (flags & O_DIRECT)
89 posix_flags |= SMB_O_DIRECT;
91 return posix_flags;
94 static inline int cifs_get_disposition(unsigned int flags)
96 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
97 return FILE_CREATE;
98 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
99 return FILE_OVERWRITE_IF;
100 else if ((flags & O_CREAT) == O_CREAT)
101 return FILE_OPEN_IF;
102 else if ((flags & O_TRUNC) == O_TRUNC)
103 return FILE_OVERWRITE;
104 else
105 return FILE_OPEN;
108 int cifs_posix_open(char *full_path, struct inode **pinode,
109 struct super_block *sb, int mode, unsigned int f_flags,
110 __u32 *poplock, __u16 *pnetfid, int xid)
112 int rc;
113 FILE_UNIX_BASIC_INFO *presp_data;
114 __u32 posix_flags = 0;
115 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
116 struct cifs_fattr fattr;
117 struct tcon_link *tlink;
118 struct cifs_tcon *tcon;
120 cFYI(1, "posix open %s", full_path);
122 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
123 if (presp_data == NULL)
124 return -ENOMEM;
126 tlink = cifs_sb_tlink(cifs_sb);
127 if (IS_ERR(tlink)) {
128 rc = PTR_ERR(tlink);
129 goto posix_open_ret;
132 tcon = tlink_tcon(tlink);
133 mode &= ~current_umask();
135 posix_flags = cifs_posix_convert_flags(f_flags);
136 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
137 poplock, full_path, cifs_sb->local_nls,
138 cifs_sb->mnt_cifs_flags &
139 CIFS_MOUNT_MAP_SPECIAL_CHR);
140 cifs_put_tlink(tlink);
142 if (rc)
143 goto posix_open_ret;
145 if (presp_data->Type == cpu_to_le32(-1))
146 goto posix_open_ret; /* open ok, caller does qpathinfo */
148 if (!pinode)
149 goto posix_open_ret; /* caller does not need info */
151 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
153 /* get new inode and set it up */
154 if (*pinode == NULL) {
155 cifs_fill_uniqueid(sb, &fattr);
156 *pinode = cifs_iget(sb, &fattr);
157 if (!*pinode) {
158 rc = -ENOMEM;
159 goto posix_open_ret;
161 } else {
162 cifs_fattr_to_inode(*pinode, &fattr);
165 posix_open_ret:
166 kfree(presp_data);
167 return rc;
170 static int
171 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
172 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *poplock,
173 __u16 *pnetfid, int xid)
175 int rc;
176 int desiredAccess;
177 int disposition;
178 int create_options = CREATE_NOT_DIR;
179 FILE_ALL_INFO *buf;
181 desiredAccess = cifs_convert_flags(f_flags);
183 /*********************************************************************
184 * open flag mapping table:
186 * POSIX Flag CIFS Disposition
187 * ---------- ----------------
188 * O_CREAT FILE_OPEN_IF
189 * O_CREAT | O_EXCL FILE_CREATE
190 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
191 * O_TRUNC FILE_OVERWRITE
192 * none of the above FILE_OPEN
194 * Note that there is not a direct match between disposition
195 * FILE_SUPERSEDE (ie create whether or not file exists although
196 * O_CREAT | O_TRUNC is similar but truncates the existing
197 * file rather than creating a new file as FILE_SUPERSEDE does
198 * (which uses the attributes / metadata passed in on open call)
200 *? O_SYNC is a reasonable match to CIFS writethrough flag
201 *? and the read write flags match reasonably. O_LARGEFILE
202 *? is irrelevant because largefile support is always used
203 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
204 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
205 *********************************************************************/
207 disposition = cifs_get_disposition(f_flags);
209 /* BB pass O_SYNC flag through on file attributes .. BB */
211 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
212 if (!buf)
213 return -ENOMEM;
215 if (backup_cred(cifs_sb))
216 create_options |= CREATE_OPEN_BACKUP_INTENT;
218 if (tcon->ses->capabilities & CAP_NT_SMBS)
219 rc = CIFSSMBOpen(xid, tcon, full_path, disposition,
220 desiredAccess, create_options, pnetfid, poplock, buf,
221 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
222 & CIFS_MOUNT_MAP_SPECIAL_CHR);
223 else
224 rc = SMBLegacyOpen(xid, tcon, full_path, disposition,
225 desiredAccess, CREATE_NOT_DIR, pnetfid, poplock, buf,
226 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
227 & CIFS_MOUNT_MAP_SPECIAL_CHR);
229 if (rc)
230 goto out;
232 if (tcon->unix_ext)
233 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
234 xid);
235 else
236 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
237 xid, pnetfid);
239 out:
240 kfree(buf);
241 return rc;
244 struct cifsFileInfo *
245 cifs_new_fileinfo(__u16 fileHandle, struct file *file,
246 struct tcon_link *tlink, __u32 oplock)
248 struct dentry *dentry = file->f_path.dentry;
249 struct inode *inode = dentry->d_inode;
250 struct cifsInodeInfo *pCifsInode = CIFS_I(inode);
251 struct cifsFileInfo *pCifsFile;
253 pCifsFile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
254 if (pCifsFile == NULL)
255 return pCifsFile;
257 pCifsFile->count = 1;
258 pCifsFile->netfid = fileHandle;
259 pCifsFile->pid = current->tgid;
260 pCifsFile->uid = current_fsuid();
261 pCifsFile->dentry = dget(dentry);
262 pCifsFile->f_flags = file->f_flags;
263 pCifsFile->invalidHandle = false;
264 pCifsFile->tlink = cifs_get_tlink(tlink);
265 mutex_init(&pCifsFile->fh_mutex);
266 INIT_WORK(&pCifsFile->oplock_break, cifs_oplock_break);
268 spin_lock(&cifs_file_list_lock);
269 list_add(&pCifsFile->tlist, &(tlink_tcon(tlink)->openFileList));
270 /* if readable file instance put first in list*/
271 if (file->f_mode & FMODE_READ)
272 list_add(&pCifsFile->flist, &pCifsInode->openFileList);
273 else
274 list_add_tail(&pCifsFile->flist, &pCifsInode->openFileList);
275 spin_unlock(&cifs_file_list_lock);
277 cifs_set_oplock_level(pCifsInode, oplock);
278 pCifsInode->can_cache_brlcks = pCifsInode->clientCanCacheAll;
280 file->private_data = pCifsFile;
281 return pCifsFile;
284 static void cifs_del_lock_waiters(struct cifsLockInfo *lock);
287 * Release a reference on the file private data. This may involve closing
288 * the filehandle out on the server. Must be called without holding
289 * cifs_file_list_lock.
291 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
293 struct inode *inode = cifs_file->dentry->d_inode;
294 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
295 struct cifsInodeInfo *cifsi = CIFS_I(inode);
296 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
297 struct cifsLockInfo *li, *tmp;
299 spin_lock(&cifs_file_list_lock);
300 if (--cifs_file->count > 0) {
301 spin_unlock(&cifs_file_list_lock);
302 return;
305 /* remove it from the lists */
306 list_del(&cifs_file->flist);
307 list_del(&cifs_file->tlist);
309 if (list_empty(&cifsi->openFileList)) {
310 cFYI(1, "closing last open instance for inode %p",
311 cifs_file->dentry->d_inode);
313 /* in strict cache mode we need invalidate mapping on the last
314 close because it may cause a error when we open this file
315 again and get at least level II oplock */
316 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
317 CIFS_I(inode)->invalid_mapping = true;
319 cifs_set_oplock_level(cifsi, 0);
321 spin_unlock(&cifs_file_list_lock);
323 cancel_work_sync(&cifs_file->oplock_break);
325 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
326 int xid, rc;
328 xid = GetXid();
329 rc = CIFSSMBClose(xid, tcon, cifs_file->netfid);
330 FreeXid(xid);
333 /* Delete any outstanding lock records. We'll lose them when the file
334 * is closed anyway.
336 mutex_lock(&cifsi->lock_mutex);
337 list_for_each_entry_safe(li, tmp, &cifsi->llist, llist) {
338 if (li->netfid != cifs_file->netfid)
339 continue;
340 list_del(&li->llist);
341 cifs_del_lock_waiters(li);
342 kfree(li);
344 mutex_unlock(&cifsi->lock_mutex);
346 cifs_put_tlink(cifs_file->tlink);
347 dput(cifs_file->dentry);
348 kfree(cifs_file);
351 int cifs_open(struct inode *inode, struct file *file)
353 int rc = -EACCES;
354 int xid;
355 __u32 oplock;
356 struct cifs_sb_info *cifs_sb;
357 struct cifs_tcon *tcon;
358 struct tcon_link *tlink;
359 struct cifsFileInfo *pCifsFile = NULL;
360 char *full_path = NULL;
361 bool posix_open_ok = false;
362 __u16 netfid;
364 xid = GetXid();
366 cifs_sb = CIFS_SB(inode->i_sb);
367 tlink = cifs_sb_tlink(cifs_sb);
368 if (IS_ERR(tlink)) {
369 FreeXid(xid);
370 return PTR_ERR(tlink);
372 tcon = tlink_tcon(tlink);
374 full_path = build_path_from_dentry(file->f_path.dentry);
375 if (full_path == NULL) {
376 rc = -ENOMEM;
377 goto out;
380 cFYI(1, "inode = 0x%p file flags are 0x%x for %s",
381 inode, file->f_flags, full_path);
383 if (enable_oplocks)
384 oplock = REQ_OPLOCK;
385 else
386 oplock = 0;
388 if (!tcon->broken_posix_open && tcon->unix_ext &&
389 (tcon->ses->capabilities & CAP_UNIX) &&
390 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
391 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
392 /* can not refresh inode info since size could be stale */
393 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
394 cifs_sb->mnt_file_mode /* ignored */,
395 file->f_flags, &oplock, &netfid, xid);
396 if (rc == 0) {
397 cFYI(1, "posix open succeeded");
398 posix_open_ok = true;
399 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
400 if (tcon->ses->serverNOS)
401 cERROR(1, "server %s of type %s returned"
402 " unexpected error on SMB posix open"
403 ", disabling posix open support."
404 " Check if server update available.",
405 tcon->ses->serverName,
406 tcon->ses->serverNOS);
407 tcon->broken_posix_open = true;
408 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
409 (rc != -EOPNOTSUPP)) /* path not found or net err */
410 goto out;
411 /* else fallthrough to retry open the old way on network i/o
412 or DFS errors */
415 if (!posix_open_ok) {
416 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
417 file->f_flags, &oplock, &netfid, xid);
418 if (rc)
419 goto out;
422 pCifsFile = cifs_new_fileinfo(netfid, file, tlink, oplock);
423 if (pCifsFile == NULL) {
424 CIFSSMBClose(xid, tcon, netfid);
425 rc = -ENOMEM;
426 goto out;
429 cifs_fscache_set_inode_cookie(inode, file);
431 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
432 /* time to set mode which we can not set earlier due to
433 problems creating new read-only files */
434 struct cifs_unix_set_info_args args = {
435 .mode = inode->i_mode,
436 .uid = NO_CHANGE_64,
437 .gid = NO_CHANGE_64,
438 .ctime = NO_CHANGE_64,
439 .atime = NO_CHANGE_64,
440 .mtime = NO_CHANGE_64,
441 .device = 0,
443 CIFSSMBUnixSetFileInfo(xid, tcon, &args, netfid,
444 pCifsFile->pid);
447 out:
448 kfree(full_path);
449 FreeXid(xid);
450 cifs_put_tlink(tlink);
451 return rc;
454 /* Try to reacquire byte range locks that were released when session */
455 /* to server was lost */
456 static int cifs_relock_file(struct cifsFileInfo *cifsFile)
458 int rc = 0;
460 /* BB list all locks open on this file and relock */
462 return rc;
465 static int cifs_reopen_file(struct cifsFileInfo *pCifsFile, bool can_flush)
467 int rc = -EACCES;
468 int xid;
469 __u32 oplock;
470 struct cifs_sb_info *cifs_sb;
471 struct cifs_tcon *tcon;
472 struct cifsInodeInfo *pCifsInode;
473 struct inode *inode;
474 char *full_path = NULL;
475 int desiredAccess;
476 int disposition = FILE_OPEN;
477 int create_options = CREATE_NOT_DIR;
478 __u16 netfid;
480 xid = GetXid();
481 mutex_lock(&pCifsFile->fh_mutex);
482 if (!pCifsFile->invalidHandle) {
483 mutex_unlock(&pCifsFile->fh_mutex);
484 rc = 0;
485 FreeXid(xid);
486 return rc;
489 inode = pCifsFile->dentry->d_inode;
490 cifs_sb = CIFS_SB(inode->i_sb);
491 tcon = tlink_tcon(pCifsFile->tlink);
493 /* can not grab rename sem here because various ops, including
494 those that already have the rename sem can end up causing writepage
495 to get called and if the server was down that means we end up here,
496 and we can never tell if the caller already has the rename_sem */
497 full_path = build_path_from_dentry(pCifsFile->dentry);
498 if (full_path == NULL) {
499 rc = -ENOMEM;
500 mutex_unlock(&pCifsFile->fh_mutex);
501 FreeXid(xid);
502 return rc;
505 cFYI(1, "inode = 0x%p file flags 0x%x for %s",
506 inode, pCifsFile->f_flags, full_path);
508 if (enable_oplocks)
509 oplock = REQ_OPLOCK;
510 else
511 oplock = 0;
513 if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) &&
514 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
515 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
518 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
519 * original open. Must mask them off for a reopen.
521 unsigned int oflags = pCifsFile->f_flags &
522 ~(O_CREAT | O_EXCL | O_TRUNC);
524 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
525 cifs_sb->mnt_file_mode /* ignored */,
526 oflags, &oplock, &netfid, xid);
527 if (rc == 0) {
528 cFYI(1, "posix reopen succeeded");
529 goto reopen_success;
531 /* fallthrough to retry open the old way on errors, especially
532 in the reconnect path it is important to retry hard */
535 desiredAccess = cifs_convert_flags(pCifsFile->f_flags);
537 if (backup_cred(cifs_sb))
538 create_options |= CREATE_OPEN_BACKUP_INTENT;
540 /* Can not refresh inode by passing in file_info buf to be returned
541 by SMBOpen and then calling get_inode_info with returned buf
542 since file might have write behind data that needs to be flushed
543 and server version of file size can be stale. If we knew for sure
544 that inode was not dirty locally we could do this */
546 rc = CIFSSMBOpen(xid, tcon, full_path, disposition, desiredAccess,
547 create_options, &netfid, &oplock, NULL,
548 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
549 CIFS_MOUNT_MAP_SPECIAL_CHR);
550 if (rc) {
551 mutex_unlock(&pCifsFile->fh_mutex);
552 cFYI(1, "cifs_open returned 0x%x", rc);
553 cFYI(1, "oplock: %d", oplock);
554 goto reopen_error_exit;
557 reopen_success:
558 pCifsFile->netfid = netfid;
559 pCifsFile->invalidHandle = false;
560 mutex_unlock(&pCifsFile->fh_mutex);
561 pCifsInode = CIFS_I(inode);
563 if (can_flush) {
564 rc = filemap_write_and_wait(inode->i_mapping);
565 mapping_set_error(inode->i_mapping, rc);
567 if (tcon->unix_ext)
568 rc = cifs_get_inode_info_unix(&inode,
569 full_path, inode->i_sb, xid);
570 else
571 rc = cifs_get_inode_info(&inode,
572 full_path, NULL, inode->i_sb,
573 xid, NULL);
574 } /* else we are writing out data to server already
575 and could deadlock if we tried to flush data, and
576 since we do not know if we have data that would
577 invalidate the current end of file on the server
578 we can not go to the server to get the new inod
579 info */
581 cifs_set_oplock_level(pCifsInode, oplock);
583 cifs_relock_file(pCifsFile);
585 reopen_error_exit:
586 kfree(full_path);
587 FreeXid(xid);
588 return rc;
591 int cifs_close(struct inode *inode, struct file *file)
593 if (file->private_data != NULL) {
594 cifsFileInfo_put(file->private_data);
595 file->private_data = NULL;
598 /* return code from the ->release op is always ignored */
599 return 0;
602 int cifs_closedir(struct inode *inode, struct file *file)
604 int rc = 0;
605 int xid;
606 struct cifsFileInfo *pCFileStruct = file->private_data;
607 char *ptmp;
609 cFYI(1, "Closedir inode = 0x%p", inode);
611 xid = GetXid();
613 if (pCFileStruct) {
614 struct cifs_tcon *pTcon = tlink_tcon(pCFileStruct->tlink);
616 cFYI(1, "Freeing private data in close dir");
617 spin_lock(&cifs_file_list_lock);
618 if (!pCFileStruct->srch_inf.endOfSearch &&
619 !pCFileStruct->invalidHandle) {
620 pCFileStruct->invalidHandle = true;
621 spin_unlock(&cifs_file_list_lock);
622 rc = CIFSFindClose(xid, pTcon, pCFileStruct->netfid);
623 cFYI(1, "Closing uncompleted readdir with rc %d",
624 rc);
625 /* not much we can do if it fails anyway, ignore rc */
626 rc = 0;
627 } else
628 spin_unlock(&cifs_file_list_lock);
629 ptmp = pCFileStruct->srch_inf.ntwrk_buf_start;
630 if (ptmp) {
631 cFYI(1, "closedir free smb buf in srch struct");
632 pCFileStruct->srch_inf.ntwrk_buf_start = NULL;
633 if (pCFileStruct->srch_inf.smallBuf)
634 cifs_small_buf_release(ptmp);
635 else
636 cifs_buf_release(ptmp);
638 cifs_put_tlink(pCFileStruct->tlink);
639 kfree(file->private_data);
640 file->private_data = NULL;
642 /* BB can we lock the filestruct while this is going on? */
643 FreeXid(xid);
644 return rc;
647 static struct cifsLockInfo *
648 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 netfid)
650 struct cifsLockInfo *lock =
651 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
652 if (!lock)
653 return lock;
654 lock->offset = offset;
655 lock->length = length;
656 lock->type = type;
657 lock->netfid = netfid;
658 lock->pid = current->tgid;
659 INIT_LIST_HEAD(&lock->blist);
660 init_waitqueue_head(&lock->block_q);
661 return lock;
664 static void
665 cifs_del_lock_waiters(struct cifsLockInfo *lock)
667 struct cifsLockInfo *li, *tmp;
668 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
669 list_del_init(&li->blist);
670 wake_up(&li->block_q);
674 static bool
675 __cifs_find_lock_conflict(struct cifsInodeInfo *cinode, __u64 offset,
676 __u64 length, __u8 type, __u16 netfid,
677 struct cifsLockInfo **conf_lock)
679 struct cifsLockInfo *li, *tmp;
681 list_for_each_entry_safe(li, tmp, &cinode->llist, llist) {
682 if (offset + length <= li->offset ||
683 offset >= li->offset + li->length)
684 continue;
685 else if ((type & LOCKING_ANDX_SHARED_LOCK) &&
686 ((netfid == li->netfid && current->tgid == li->pid) ||
687 type == li->type))
688 continue;
689 else {
690 *conf_lock = li;
691 return true;
694 return false;
697 static bool
698 cifs_find_lock_conflict(struct cifsInodeInfo *cinode, struct cifsLockInfo *lock,
699 struct cifsLockInfo **conf_lock)
701 return __cifs_find_lock_conflict(cinode, lock->offset, lock->length,
702 lock->type, lock->netfid, conf_lock);
705 static int
706 cifs_lock_test(struct cifsInodeInfo *cinode, __u64 offset, __u64 length,
707 __u8 type, __u16 netfid, struct file_lock *flock)
709 int rc = 0;
710 struct cifsLockInfo *conf_lock;
711 bool exist;
713 mutex_lock(&cinode->lock_mutex);
715 exist = __cifs_find_lock_conflict(cinode, offset, length, type, netfid,
716 &conf_lock);
717 if (exist) {
718 flock->fl_start = conf_lock->offset;
719 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
720 flock->fl_pid = conf_lock->pid;
721 if (conf_lock->type & LOCKING_ANDX_SHARED_LOCK)
722 flock->fl_type = F_RDLCK;
723 else
724 flock->fl_type = F_WRLCK;
725 } else if (!cinode->can_cache_brlcks)
726 rc = 1;
727 else
728 flock->fl_type = F_UNLCK;
730 mutex_unlock(&cinode->lock_mutex);
731 return rc;
734 static void
735 cifs_lock_add(struct cifsInodeInfo *cinode, struct cifsLockInfo *lock)
737 mutex_lock(&cinode->lock_mutex);
738 list_add_tail(&lock->llist, &cinode->llist);
739 mutex_unlock(&cinode->lock_mutex);
742 static int
743 cifs_lock_add_if(struct cifsInodeInfo *cinode, struct cifsLockInfo *lock,
744 bool wait)
746 struct cifsLockInfo *conf_lock;
747 bool exist;
748 int rc = 0;
750 try_again:
751 exist = false;
752 mutex_lock(&cinode->lock_mutex);
754 exist = cifs_find_lock_conflict(cinode, lock, &conf_lock);
755 if (!exist && cinode->can_cache_brlcks) {
756 list_add_tail(&lock->llist, &cinode->llist);
757 mutex_unlock(&cinode->lock_mutex);
758 return rc;
761 if (!exist)
762 rc = 1;
763 else if (!wait)
764 rc = -EACCES;
765 else {
766 list_add_tail(&lock->blist, &conf_lock->blist);
767 mutex_unlock(&cinode->lock_mutex);
768 rc = wait_event_interruptible(lock->block_q,
769 (lock->blist.prev == &lock->blist) &&
770 (lock->blist.next == &lock->blist));
771 if (!rc)
772 goto try_again;
773 mutex_lock(&cinode->lock_mutex);
774 list_del_init(&lock->blist);
777 mutex_unlock(&cinode->lock_mutex);
778 return rc;
781 static int
782 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
784 int rc = 0;
785 struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
786 unsigned char saved_type = flock->fl_type;
788 if ((flock->fl_flags & FL_POSIX) == 0)
789 return 1;
791 mutex_lock(&cinode->lock_mutex);
792 posix_test_lock(file, flock);
794 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
795 flock->fl_type = saved_type;
796 rc = 1;
799 mutex_unlock(&cinode->lock_mutex);
800 return rc;
803 static int
804 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
806 struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
807 int rc = 1;
809 if ((flock->fl_flags & FL_POSIX) == 0)
810 return rc;
812 mutex_lock(&cinode->lock_mutex);
813 if (!cinode->can_cache_brlcks) {
814 mutex_unlock(&cinode->lock_mutex);
815 return rc;
817 rc = posix_lock_file_wait(file, flock);
818 mutex_unlock(&cinode->lock_mutex);
819 return rc;
822 static int
823 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
825 int xid, rc = 0, stored_rc;
826 struct cifsLockInfo *li, *tmp;
827 struct cifs_tcon *tcon;
828 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
829 unsigned int num, max_num;
830 LOCKING_ANDX_RANGE *buf, *cur;
831 int types[] = {LOCKING_ANDX_LARGE_FILES,
832 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
833 int i;
835 xid = GetXid();
836 tcon = tlink_tcon(cfile->tlink);
838 mutex_lock(&cinode->lock_mutex);
839 if (!cinode->can_cache_brlcks) {
840 mutex_unlock(&cinode->lock_mutex);
841 FreeXid(xid);
842 return rc;
845 max_num = (tcon->ses->server->maxBuf - sizeof(struct smb_hdr)) /
846 sizeof(LOCKING_ANDX_RANGE);
847 buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
848 if (!buf) {
849 mutex_unlock(&cinode->lock_mutex);
850 FreeXid(xid);
851 return rc;
854 for (i = 0; i < 2; i++) {
855 cur = buf;
856 num = 0;
857 list_for_each_entry_safe(li, tmp, &cinode->llist, llist) {
858 if (li->type != types[i])
859 continue;
860 cur->Pid = cpu_to_le16(li->pid);
861 cur->LengthLow = cpu_to_le32((u32)li->length);
862 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
863 cur->OffsetLow = cpu_to_le32((u32)li->offset);
864 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
865 if (++num == max_num) {
866 stored_rc = cifs_lockv(xid, tcon, cfile->netfid,
867 li->type, 0, num, buf);
868 if (stored_rc)
869 rc = stored_rc;
870 cur = buf;
871 num = 0;
872 } else
873 cur++;
876 if (num) {
877 stored_rc = cifs_lockv(xid, tcon, cfile->netfid,
878 types[i], 0, num, buf);
879 if (stored_rc)
880 rc = stored_rc;
884 cinode->can_cache_brlcks = false;
885 mutex_unlock(&cinode->lock_mutex);
887 kfree(buf);
888 FreeXid(xid);
889 return rc;
892 /* copied from fs/locks.c with a name change */
893 #define cifs_for_each_lock(inode, lockp) \
894 for (lockp = &inode->i_flock; *lockp != NULL; \
895 lockp = &(*lockp)->fl_next)
897 static int
898 cifs_push_posix_locks(struct cifsFileInfo *cfile)
900 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
901 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
902 struct file_lock *flock, **before;
903 struct cifsLockInfo *lck, *tmp;
904 int rc = 0, xid, type;
905 __u64 length;
906 struct list_head locks_to_send;
908 xid = GetXid();
910 mutex_lock(&cinode->lock_mutex);
911 if (!cinode->can_cache_brlcks) {
912 mutex_unlock(&cinode->lock_mutex);
913 FreeXid(xid);
914 return rc;
917 INIT_LIST_HEAD(&locks_to_send);
919 lock_flocks();
920 cifs_for_each_lock(cfile->dentry->d_inode, before) {
921 flock = *before;
922 length = 1 + flock->fl_end - flock->fl_start;
923 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
924 type = CIFS_RDLCK;
925 else
926 type = CIFS_WRLCK;
928 lck = cifs_lock_init(flock->fl_start, length, type,
929 cfile->netfid);
930 if (!lck) {
931 rc = -ENOMEM;
932 goto send_locks;
934 lck->pid = flock->fl_pid;
936 list_add_tail(&lck->llist, &locks_to_send);
939 send_locks:
940 unlock_flocks();
942 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
943 struct file_lock tmp_lock;
944 int stored_rc;
946 tmp_lock.fl_start = lck->offset;
947 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
948 0, lck->length, &tmp_lock,
949 lck->type, 0);
950 if (stored_rc)
951 rc = stored_rc;
952 list_del(&lck->llist);
953 kfree(lck);
956 cinode->can_cache_brlcks = false;
957 mutex_unlock(&cinode->lock_mutex);
959 FreeXid(xid);
960 return rc;
963 static int
964 cifs_push_locks(struct cifsFileInfo *cfile)
966 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
967 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
969 if ((tcon->ses->capabilities & CAP_UNIX) &&
970 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
971 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
972 return cifs_push_posix_locks(cfile);
974 return cifs_push_mandatory_locks(cfile);
977 static void
978 cifs_read_flock(struct file_lock *flock, __u8 *type, int *lock, int *unlock,
979 bool *wait_flag)
981 if (flock->fl_flags & FL_POSIX)
982 cFYI(1, "Posix");
983 if (flock->fl_flags & FL_FLOCK)
984 cFYI(1, "Flock");
985 if (flock->fl_flags & FL_SLEEP) {
986 cFYI(1, "Blocking lock");
987 *wait_flag = true;
989 if (flock->fl_flags & FL_ACCESS)
990 cFYI(1, "Process suspended by mandatory locking - "
991 "not implemented yet");
992 if (flock->fl_flags & FL_LEASE)
993 cFYI(1, "Lease on file - not implemented yet");
994 if (flock->fl_flags &
995 (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE)))
996 cFYI(1, "Unknown lock flags 0x%x", flock->fl_flags);
998 *type = LOCKING_ANDX_LARGE_FILES;
999 if (flock->fl_type == F_WRLCK) {
1000 cFYI(1, "F_WRLCK ");
1001 *lock = 1;
1002 } else if (flock->fl_type == F_UNLCK) {
1003 cFYI(1, "F_UNLCK");
1004 *unlock = 1;
1005 /* Check if unlock includes more than one lock range */
1006 } else if (flock->fl_type == F_RDLCK) {
1007 cFYI(1, "F_RDLCK");
1008 *type |= LOCKING_ANDX_SHARED_LOCK;
1009 *lock = 1;
1010 } else if (flock->fl_type == F_EXLCK) {
1011 cFYI(1, "F_EXLCK");
1012 *lock = 1;
1013 } else if (flock->fl_type == F_SHLCK) {
1014 cFYI(1, "F_SHLCK");
1015 *type |= LOCKING_ANDX_SHARED_LOCK;
1016 *lock = 1;
1017 } else
1018 cFYI(1, "Unknown type of lock");
1021 static int
1022 cifs_getlk(struct file *file, struct file_lock *flock, __u8 type,
1023 bool wait_flag, bool posix_lck, int xid)
1025 int rc = 0;
1026 __u64 length = 1 + flock->fl_end - flock->fl_start;
1027 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1028 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1029 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1030 __u16 netfid = cfile->netfid;
1032 if (posix_lck) {
1033 int posix_lock_type;
1035 rc = cifs_posix_lock_test(file, flock);
1036 if (!rc)
1037 return rc;
1039 if (type & LOCKING_ANDX_SHARED_LOCK)
1040 posix_lock_type = CIFS_RDLCK;
1041 else
1042 posix_lock_type = CIFS_WRLCK;
1043 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1044 1 /* get */, length, flock,
1045 posix_lock_type, wait_flag);
1046 return rc;
1049 rc = cifs_lock_test(cinode, flock->fl_start, length, type, netfid,
1050 flock);
1051 if (!rc)
1052 return rc;
1054 /* BB we could chain these into one lock request BB */
1055 rc = CIFSSMBLock(xid, tcon, netfid, current->tgid, length,
1056 flock->fl_start, 0, 1, type, 0, 0);
1057 if (rc == 0) {
1058 rc = CIFSSMBLock(xid, tcon, netfid, current->tgid,
1059 length, flock->fl_start, 1, 0,
1060 type, 0, 0);
1061 flock->fl_type = F_UNLCK;
1062 if (rc != 0)
1063 cERROR(1, "Error unlocking previously locked "
1064 "range %d during test of lock", rc);
1065 return 0;
1068 if (type & LOCKING_ANDX_SHARED_LOCK) {
1069 flock->fl_type = F_WRLCK;
1070 return 0;
1073 rc = CIFSSMBLock(xid, tcon, netfid, current->tgid, length,
1074 flock->fl_start, 0, 1,
1075 type | LOCKING_ANDX_SHARED_LOCK, 0, 0);
1076 if (rc == 0) {
1077 rc = CIFSSMBLock(xid, tcon, netfid, current->tgid,
1078 length, flock->fl_start, 1, 0,
1079 type | LOCKING_ANDX_SHARED_LOCK,
1080 0, 0);
1081 flock->fl_type = F_RDLCK;
1082 if (rc != 0)
1083 cERROR(1, "Error unlocking previously locked "
1084 "range %d during test of lock", rc);
1085 } else
1086 flock->fl_type = F_WRLCK;
1088 return 0;
1091 static void
1092 cifs_move_llist(struct list_head *source, struct list_head *dest)
1094 struct list_head *li, *tmp;
1095 list_for_each_safe(li, tmp, source)
1096 list_move(li, dest);
1099 static void
1100 cifs_free_llist(struct list_head *llist)
1102 struct cifsLockInfo *li, *tmp;
1103 list_for_each_entry_safe(li, tmp, llist, llist) {
1104 cifs_del_lock_waiters(li);
1105 list_del(&li->llist);
1106 kfree(li);
1110 static int
1111 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, int xid)
1113 int rc = 0, stored_rc;
1114 int types[] = {LOCKING_ANDX_LARGE_FILES,
1115 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1116 unsigned int i;
1117 unsigned int max_num, num;
1118 LOCKING_ANDX_RANGE *buf, *cur;
1119 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1120 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1121 struct cifsLockInfo *li, *tmp;
1122 __u64 length = 1 + flock->fl_end - flock->fl_start;
1123 struct list_head tmp_llist;
1125 INIT_LIST_HEAD(&tmp_llist);
1127 max_num = (tcon->ses->server->maxBuf - sizeof(struct smb_hdr)) /
1128 sizeof(LOCKING_ANDX_RANGE);
1129 buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1130 if (!buf)
1131 return -ENOMEM;
1133 mutex_lock(&cinode->lock_mutex);
1134 for (i = 0; i < 2; i++) {
1135 cur = buf;
1136 num = 0;
1137 list_for_each_entry_safe(li, tmp, &cinode->llist, llist) {
1138 if (flock->fl_start > li->offset ||
1139 (flock->fl_start + length) <
1140 (li->offset + li->length))
1141 continue;
1142 if (current->tgid != li->pid)
1143 continue;
1144 if (cfile->netfid != li->netfid)
1145 continue;
1146 if (types[i] != li->type)
1147 continue;
1148 if (!cinode->can_cache_brlcks) {
1149 cur->Pid = cpu_to_le16(li->pid);
1150 cur->LengthLow = cpu_to_le32((u32)li->length);
1151 cur->LengthHigh =
1152 cpu_to_le32((u32)(li->length>>32));
1153 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1154 cur->OffsetHigh =
1155 cpu_to_le32((u32)(li->offset>>32));
1157 * We need to save a lock here to let us add
1158 * it again to the inode list if the unlock
1159 * range request fails on the server.
1161 list_move(&li->llist, &tmp_llist);
1162 if (++num == max_num) {
1163 stored_rc = cifs_lockv(xid, tcon,
1164 cfile->netfid,
1165 li->type, num,
1166 0, buf);
1167 if (stored_rc) {
1169 * We failed on the unlock range
1170 * request - add all locks from
1171 * the tmp list to the head of
1172 * the inode list.
1174 cifs_move_llist(&tmp_llist,
1175 &cinode->llist);
1176 rc = stored_rc;
1177 } else
1179 * The unlock range request
1180 * succeed - free the tmp list.
1182 cifs_free_llist(&tmp_llist);
1183 cur = buf;
1184 num = 0;
1185 } else
1186 cur++;
1187 } else {
1189 * We can cache brlock requests - simply remove
1190 * a lock from the inode list.
1192 list_del(&li->llist);
1193 cifs_del_lock_waiters(li);
1194 kfree(li);
1197 if (num) {
1198 stored_rc = cifs_lockv(xid, tcon, cfile->netfid,
1199 types[i], num, 0, buf);
1200 if (stored_rc) {
1201 cifs_move_llist(&tmp_llist, &cinode->llist);
1202 rc = stored_rc;
1203 } else
1204 cifs_free_llist(&tmp_llist);
1208 mutex_unlock(&cinode->lock_mutex);
1209 kfree(buf);
1210 return rc;
1213 static int
1214 cifs_setlk(struct file *file, struct file_lock *flock, __u8 type,
1215 bool wait_flag, bool posix_lck, int lock, int unlock, int xid)
1217 int rc = 0;
1218 __u64 length = 1 + flock->fl_end - flock->fl_start;
1219 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1220 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1221 struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
1222 __u16 netfid = cfile->netfid;
1224 if (posix_lck) {
1225 int posix_lock_type;
1227 rc = cifs_posix_lock_set(file, flock);
1228 if (!rc || rc < 0)
1229 return rc;
1231 if (type & LOCKING_ANDX_SHARED_LOCK)
1232 posix_lock_type = CIFS_RDLCK;
1233 else
1234 posix_lock_type = CIFS_WRLCK;
1236 if (unlock == 1)
1237 posix_lock_type = CIFS_UNLCK;
1239 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1240 0 /* set */, length, flock,
1241 posix_lock_type, wait_flag);
1242 goto out;
1245 if (lock) {
1246 struct cifsLockInfo *lock;
1248 lock = cifs_lock_init(flock->fl_start, length, type, netfid);
1249 if (!lock)
1250 return -ENOMEM;
1252 rc = cifs_lock_add_if(cinode, lock, wait_flag);
1253 if (rc < 0)
1254 kfree(lock);
1255 if (rc <= 0)
1256 goto out;
1258 rc = CIFSSMBLock(xid, tcon, netfid, current->tgid, length,
1259 flock->fl_start, 0, 1, type, wait_flag, 0);
1260 if (rc) {
1261 kfree(lock);
1262 goto out;
1265 cifs_lock_add(cinode, lock);
1266 } else if (unlock)
1267 rc = cifs_unlock_range(cfile, flock, xid);
1269 out:
1270 if (flock->fl_flags & FL_POSIX)
1271 posix_lock_file_wait(file, flock);
1272 return rc;
1275 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1277 int rc, xid;
1278 int lock = 0, unlock = 0;
1279 bool wait_flag = false;
1280 bool posix_lck = false;
1281 struct cifs_sb_info *cifs_sb;
1282 struct cifs_tcon *tcon;
1283 struct cifsInodeInfo *cinode;
1284 struct cifsFileInfo *cfile;
1285 __u16 netfid;
1286 __u8 type;
1288 rc = -EACCES;
1289 xid = GetXid();
1291 cFYI(1, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld "
1292 "end: %lld", cmd, flock->fl_flags, flock->fl_type,
1293 flock->fl_start, flock->fl_end);
1295 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag);
1297 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1298 cfile = (struct cifsFileInfo *)file->private_data;
1299 tcon = tlink_tcon(cfile->tlink);
1300 netfid = cfile->netfid;
1301 cinode = CIFS_I(file->f_path.dentry->d_inode);
1303 if ((tcon->ses->capabilities & CAP_UNIX) &&
1304 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1305 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1306 posix_lck = true;
1308 * BB add code here to normalize offset and length to account for
1309 * negative length which we can not accept over the wire.
1311 if (IS_GETLK(cmd)) {
1312 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1313 FreeXid(xid);
1314 return rc;
1317 if (!lock && !unlock) {
1319 * if no lock or unlock then nothing to do since we do not
1320 * know what it is
1322 FreeXid(xid);
1323 return -EOPNOTSUPP;
1326 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1327 xid);
1328 FreeXid(xid);
1329 return rc;
1332 /* update the file size (if needed) after a write */
1333 void
1334 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1335 unsigned int bytes_written)
1337 loff_t end_of_write = offset + bytes_written;
1339 if (end_of_write > cifsi->server_eof)
1340 cifsi->server_eof = end_of_write;
1343 static ssize_t cifs_write(struct cifsFileInfo *open_file, __u32 pid,
1344 const char *write_data, size_t write_size,
1345 loff_t *poffset)
1347 int rc = 0;
1348 unsigned int bytes_written = 0;
1349 unsigned int total_written;
1350 struct cifs_sb_info *cifs_sb;
1351 struct cifs_tcon *pTcon;
1352 int xid;
1353 struct dentry *dentry = open_file->dentry;
1354 struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
1355 struct cifs_io_parms io_parms;
1357 cifs_sb = CIFS_SB(dentry->d_sb);
1359 cFYI(1, "write %zd bytes to offset %lld of %s", write_size,
1360 *poffset, dentry->d_name.name);
1362 pTcon = tlink_tcon(open_file->tlink);
1364 xid = GetXid();
1366 for (total_written = 0; write_size > total_written;
1367 total_written += bytes_written) {
1368 rc = -EAGAIN;
1369 while (rc == -EAGAIN) {
1370 struct kvec iov[2];
1371 unsigned int len;
1373 if (open_file->invalidHandle) {
1374 /* we could deadlock if we called
1375 filemap_fdatawait from here so tell
1376 reopen_file not to flush data to
1377 server now */
1378 rc = cifs_reopen_file(open_file, false);
1379 if (rc != 0)
1380 break;
1383 len = min((size_t)cifs_sb->wsize,
1384 write_size - total_written);
1385 /* iov[0] is reserved for smb header */
1386 iov[1].iov_base = (char *)write_data + total_written;
1387 iov[1].iov_len = len;
1388 io_parms.netfid = open_file->netfid;
1389 io_parms.pid = pid;
1390 io_parms.tcon = pTcon;
1391 io_parms.offset = *poffset;
1392 io_parms.length = len;
1393 rc = CIFSSMBWrite2(xid, &io_parms, &bytes_written, iov,
1394 1, 0);
1396 if (rc || (bytes_written == 0)) {
1397 if (total_written)
1398 break;
1399 else {
1400 FreeXid(xid);
1401 return rc;
1403 } else {
1404 cifs_update_eof(cifsi, *poffset, bytes_written);
1405 *poffset += bytes_written;
1409 cifs_stats_bytes_written(pTcon, total_written);
1411 if (total_written > 0) {
1412 spin_lock(&dentry->d_inode->i_lock);
1413 if (*poffset > dentry->d_inode->i_size)
1414 i_size_write(dentry->d_inode, *poffset);
1415 spin_unlock(&dentry->d_inode->i_lock);
1417 mark_inode_dirty_sync(dentry->d_inode);
1418 FreeXid(xid);
1419 return total_written;
1422 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1423 bool fsuid_only)
1425 struct cifsFileInfo *open_file = NULL;
1426 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1428 /* only filter by fsuid on multiuser mounts */
1429 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1430 fsuid_only = false;
1432 spin_lock(&cifs_file_list_lock);
1433 /* we could simply get the first_list_entry since write-only entries
1434 are always at the end of the list but since the first entry might
1435 have a close pending, we go through the whole list */
1436 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1437 if (fsuid_only && open_file->uid != current_fsuid())
1438 continue;
1439 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1440 if (!open_file->invalidHandle) {
1441 /* found a good file */
1442 /* lock it so it will not be closed on us */
1443 cifsFileInfo_get(open_file);
1444 spin_unlock(&cifs_file_list_lock);
1445 return open_file;
1446 } /* else might as well continue, and look for
1447 another, or simply have the caller reopen it
1448 again rather than trying to fix this handle */
1449 } else /* write only file */
1450 break; /* write only files are last so must be done */
1452 spin_unlock(&cifs_file_list_lock);
1453 return NULL;
1456 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1457 bool fsuid_only)
1459 struct cifsFileInfo *open_file;
1460 struct cifs_sb_info *cifs_sb;
1461 bool any_available = false;
1462 int rc;
1464 /* Having a null inode here (because mapping->host was set to zero by
1465 the VFS or MM) should not happen but we had reports of on oops (due to
1466 it being zero) during stress testcases so we need to check for it */
1468 if (cifs_inode == NULL) {
1469 cERROR(1, "Null inode passed to cifs_writeable_file");
1470 dump_stack();
1471 return NULL;
1474 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1476 /* only filter by fsuid on multiuser mounts */
1477 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1478 fsuid_only = false;
1480 spin_lock(&cifs_file_list_lock);
1481 refind_writable:
1482 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1483 if (!any_available && open_file->pid != current->tgid)
1484 continue;
1485 if (fsuid_only && open_file->uid != current_fsuid())
1486 continue;
1487 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1488 cifsFileInfo_get(open_file);
1490 if (!open_file->invalidHandle) {
1491 /* found a good writable file */
1492 spin_unlock(&cifs_file_list_lock);
1493 return open_file;
1496 spin_unlock(&cifs_file_list_lock);
1498 /* Had to unlock since following call can block */
1499 rc = cifs_reopen_file(open_file, false);
1500 if (!rc)
1501 return open_file;
1503 /* if it fails, try another handle if possible */
1504 cFYI(1, "wp failed on reopen file");
1505 cifsFileInfo_put(open_file);
1507 spin_lock(&cifs_file_list_lock);
1509 /* else we simply continue to the next entry. Thus
1510 we do not loop on reopen errors. If we
1511 can not reopen the file, for example if we
1512 reconnected to a server with another client
1513 racing to delete or lock the file we would not
1514 make progress if we restarted before the beginning
1515 of the loop here. */
1518 /* couldn't find useable FH with same pid, try any available */
1519 if (!any_available) {
1520 any_available = true;
1521 goto refind_writable;
1523 spin_unlock(&cifs_file_list_lock);
1524 return NULL;
1527 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1529 struct address_space *mapping = page->mapping;
1530 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1531 char *write_data;
1532 int rc = -EFAULT;
1533 int bytes_written = 0;
1534 struct inode *inode;
1535 struct cifsFileInfo *open_file;
1537 if (!mapping || !mapping->host)
1538 return -EFAULT;
1540 inode = page->mapping->host;
1542 offset += (loff_t)from;
1543 write_data = kmap(page);
1544 write_data += from;
1546 if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1547 kunmap(page);
1548 return -EIO;
1551 /* racing with truncate? */
1552 if (offset > mapping->host->i_size) {
1553 kunmap(page);
1554 return 0; /* don't care */
1557 /* check to make sure that we are not extending the file */
1558 if (mapping->host->i_size - offset < (loff_t)to)
1559 to = (unsigned)(mapping->host->i_size - offset);
1561 open_file = find_writable_file(CIFS_I(mapping->host), false);
1562 if (open_file) {
1563 bytes_written = cifs_write(open_file, open_file->pid,
1564 write_data, to - from, &offset);
1565 cifsFileInfo_put(open_file);
1566 /* Does mm or vfs already set times? */
1567 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1568 if ((bytes_written > 0) && (offset))
1569 rc = 0;
1570 else if (bytes_written < 0)
1571 rc = bytes_written;
1572 } else {
1573 cFYI(1, "No writeable filehandles for inode");
1574 rc = -EIO;
1577 kunmap(page);
1578 return rc;
1581 static int cifs_writepages(struct address_space *mapping,
1582 struct writeback_control *wbc)
1584 struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
1585 bool done = false, scanned = false, range_whole = false;
1586 pgoff_t end, index;
1587 struct cifs_writedata *wdata;
1588 struct page *page;
1589 int rc = 0;
1592 * If wsize is smaller than the page cache size, default to writing
1593 * one page at a time via cifs_writepage
1595 if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1596 return generic_writepages(mapping, wbc);
1598 if (wbc->range_cyclic) {
1599 index = mapping->writeback_index; /* Start from prev offset */
1600 end = -1;
1601 } else {
1602 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1603 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1604 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1605 range_whole = true;
1606 scanned = true;
1608 retry:
1609 while (!done && index <= end) {
1610 unsigned int i, nr_pages, found_pages;
1611 pgoff_t next = 0, tofind;
1612 struct page **pages;
1614 tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1,
1615 end - index) + 1;
1617 wdata = cifs_writedata_alloc((unsigned int)tofind);
1618 if (!wdata) {
1619 rc = -ENOMEM;
1620 break;
1624 * find_get_pages_tag seems to return a max of 256 on each
1625 * iteration, so we must call it several times in order to
1626 * fill the array or the wsize is effectively limited to
1627 * 256 * PAGE_CACHE_SIZE.
1629 found_pages = 0;
1630 pages = wdata->pages;
1631 do {
1632 nr_pages = find_get_pages_tag(mapping, &index,
1633 PAGECACHE_TAG_DIRTY,
1634 tofind, pages);
1635 found_pages += nr_pages;
1636 tofind -= nr_pages;
1637 pages += nr_pages;
1638 } while (nr_pages && tofind && index <= end);
1640 if (found_pages == 0) {
1641 kref_put(&wdata->refcount, cifs_writedata_release);
1642 break;
1645 nr_pages = 0;
1646 for (i = 0; i < found_pages; i++) {
1647 page = wdata->pages[i];
1649 * At this point we hold neither mapping->tree_lock nor
1650 * lock on the page itself: the page may be truncated or
1651 * invalidated (changing page->mapping to NULL), or even
1652 * swizzled back from swapper_space to tmpfs file
1653 * mapping
1656 if (nr_pages == 0)
1657 lock_page(page);
1658 else if (!trylock_page(page))
1659 break;
1661 if (unlikely(page->mapping != mapping)) {
1662 unlock_page(page);
1663 break;
1666 if (!wbc->range_cyclic && page->index > end) {
1667 done = true;
1668 unlock_page(page);
1669 break;
1672 if (next && (page->index != next)) {
1673 /* Not next consecutive page */
1674 unlock_page(page);
1675 break;
1678 if (wbc->sync_mode != WB_SYNC_NONE)
1679 wait_on_page_writeback(page);
1681 if (PageWriteback(page) ||
1682 !clear_page_dirty_for_io(page)) {
1683 unlock_page(page);
1684 break;
1688 * This actually clears the dirty bit in the radix tree.
1689 * See cifs_writepage() for more commentary.
1691 set_page_writeback(page);
1693 if (page_offset(page) >= mapping->host->i_size) {
1694 done = true;
1695 unlock_page(page);
1696 end_page_writeback(page);
1697 break;
1700 wdata->pages[i] = page;
1701 next = page->index + 1;
1702 ++nr_pages;
1705 /* reset index to refind any pages skipped */
1706 if (nr_pages == 0)
1707 index = wdata->pages[0]->index + 1;
1709 /* put any pages we aren't going to use */
1710 for (i = nr_pages; i < found_pages; i++) {
1711 page_cache_release(wdata->pages[i]);
1712 wdata->pages[i] = NULL;
1715 /* nothing to write? */
1716 if (nr_pages == 0) {
1717 kref_put(&wdata->refcount, cifs_writedata_release);
1718 continue;
1721 wdata->sync_mode = wbc->sync_mode;
1722 wdata->nr_pages = nr_pages;
1723 wdata->offset = page_offset(wdata->pages[0]);
1725 do {
1726 if (wdata->cfile != NULL)
1727 cifsFileInfo_put(wdata->cfile);
1728 wdata->cfile = find_writable_file(CIFS_I(mapping->host),
1729 false);
1730 if (!wdata->cfile) {
1731 cERROR(1, "No writable handles for inode");
1732 rc = -EBADF;
1733 break;
1735 rc = cifs_async_writev(wdata);
1736 } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
1738 for (i = 0; i < nr_pages; ++i)
1739 unlock_page(wdata->pages[i]);
1741 /* send failure -- clean up the mess */
1742 if (rc != 0) {
1743 for (i = 0; i < nr_pages; ++i) {
1744 if (rc == -EAGAIN)
1745 redirty_page_for_writepage(wbc,
1746 wdata->pages[i]);
1747 else
1748 SetPageError(wdata->pages[i]);
1749 end_page_writeback(wdata->pages[i]);
1750 page_cache_release(wdata->pages[i]);
1752 if (rc != -EAGAIN)
1753 mapping_set_error(mapping, rc);
1755 kref_put(&wdata->refcount, cifs_writedata_release);
1757 wbc->nr_to_write -= nr_pages;
1758 if (wbc->nr_to_write <= 0)
1759 done = true;
1761 index = next;
1764 if (!scanned && !done) {
1766 * We hit the last page and there is more work to be done: wrap
1767 * back to the start of the file
1769 scanned = true;
1770 index = 0;
1771 goto retry;
1774 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
1775 mapping->writeback_index = index;
1777 return rc;
1780 static int
1781 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
1783 int rc;
1784 int xid;
1786 xid = GetXid();
1787 /* BB add check for wbc flags */
1788 page_cache_get(page);
1789 if (!PageUptodate(page))
1790 cFYI(1, "ppw - page not up to date");
1793 * Set the "writeback" flag, and clear "dirty" in the radix tree.
1795 * A writepage() implementation always needs to do either this,
1796 * or re-dirty the page with "redirty_page_for_writepage()" in
1797 * the case of a failure.
1799 * Just unlocking the page will cause the radix tree tag-bits
1800 * to fail to update with the state of the page correctly.
1802 set_page_writeback(page);
1803 retry_write:
1804 rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
1805 if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
1806 goto retry_write;
1807 else if (rc == -EAGAIN)
1808 redirty_page_for_writepage(wbc, page);
1809 else if (rc != 0)
1810 SetPageError(page);
1811 else
1812 SetPageUptodate(page);
1813 end_page_writeback(page);
1814 page_cache_release(page);
1815 FreeXid(xid);
1816 return rc;
1819 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
1821 int rc = cifs_writepage_locked(page, wbc);
1822 unlock_page(page);
1823 return rc;
1826 static int cifs_write_end(struct file *file, struct address_space *mapping,
1827 loff_t pos, unsigned len, unsigned copied,
1828 struct page *page, void *fsdata)
1830 int rc;
1831 struct inode *inode = mapping->host;
1832 struct cifsFileInfo *cfile = file->private_data;
1833 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1834 __u32 pid;
1836 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
1837 pid = cfile->pid;
1838 else
1839 pid = current->tgid;
1841 cFYI(1, "write_end for page %p from pos %lld with %d bytes",
1842 page, pos, copied);
1844 if (PageChecked(page)) {
1845 if (copied == len)
1846 SetPageUptodate(page);
1847 ClearPageChecked(page);
1848 } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
1849 SetPageUptodate(page);
1851 if (!PageUptodate(page)) {
1852 char *page_data;
1853 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
1854 int xid;
1856 xid = GetXid();
1857 /* this is probably better than directly calling
1858 partialpage_write since in this function the file handle is
1859 known which we might as well leverage */
1860 /* BB check if anything else missing out of ppw
1861 such as updating last write time */
1862 page_data = kmap(page);
1863 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
1864 /* if (rc < 0) should we set writebehind rc? */
1865 kunmap(page);
1867 FreeXid(xid);
1868 } else {
1869 rc = copied;
1870 pos += copied;
1871 set_page_dirty(page);
1874 if (rc > 0) {
1875 spin_lock(&inode->i_lock);
1876 if (pos > inode->i_size)
1877 i_size_write(inode, pos);
1878 spin_unlock(&inode->i_lock);
1881 unlock_page(page);
1882 page_cache_release(page);
1884 return rc;
1887 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
1888 int datasync)
1890 int xid;
1891 int rc = 0;
1892 struct cifs_tcon *tcon;
1893 struct cifsFileInfo *smbfile = file->private_data;
1894 struct inode *inode = file->f_path.dentry->d_inode;
1895 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1897 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
1898 if (rc)
1899 return rc;
1900 mutex_lock(&inode->i_mutex);
1902 xid = GetXid();
1904 cFYI(1, "Sync file - name: %s datasync: 0x%x",
1905 file->f_path.dentry->d_name.name, datasync);
1907 if (!CIFS_I(inode)->clientCanCacheRead) {
1908 rc = cifs_invalidate_mapping(inode);
1909 if (rc) {
1910 cFYI(1, "rc: %d during invalidate phase", rc);
1911 rc = 0; /* don't care about it in fsync */
1915 tcon = tlink_tcon(smbfile->tlink);
1916 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
1917 rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
1919 FreeXid(xid);
1920 mutex_unlock(&inode->i_mutex);
1921 return rc;
1924 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
1926 int xid;
1927 int rc = 0;
1928 struct cifs_tcon *tcon;
1929 struct cifsFileInfo *smbfile = file->private_data;
1930 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1931 struct inode *inode = file->f_mapping->host;
1933 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
1934 if (rc)
1935 return rc;
1936 mutex_lock(&inode->i_mutex);
1938 xid = GetXid();
1940 cFYI(1, "Sync file - name: %s datasync: 0x%x",
1941 file->f_path.dentry->d_name.name, datasync);
1943 tcon = tlink_tcon(smbfile->tlink);
1944 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
1945 rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
1947 FreeXid(xid);
1948 mutex_unlock(&inode->i_mutex);
1949 return rc;
1953 * As file closes, flush all cached write data for this inode checking
1954 * for write behind errors.
1956 int cifs_flush(struct file *file, fl_owner_t id)
1958 struct inode *inode = file->f_path.dentry->d_inode;
1959 int rc = 0;
1961 if (file->f_mode & FMODE_WRITE)
1962 rc = filemap_write_and_wait(inode->i_mapping);
1964 cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc);
1966 return rc;
1969 static int
1970 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
1972 int rc = 0;
1973 unsigned long i;
1975 for (i = 0; i < num_pages; i++) {
1976 pages[i] = alloc_page(__GFP_HIGHMEM);
1977 if (!pages[i]) {
1979 * save number of pages we have already allocated and
1980 * return with ENOMEM error
1982 num_pages = i;
1983 rc = -ENOMEM;
1984 goto error;
1988 return rc;
1990 error:
1991 for (i = 0; i < num_pages; i++)
1992 put_page(pages[i]);
1993 return rc;
1996 static inline
1997 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
1999 size_t num_pages;
2000 size_t clen;
2002 clen = min_t(const size_t, len, wsize);
2003 num_pages = clen / PAGE_CACHE_SIZE;
2004 if (clen % PAGE_CACHE_SIZE)
2005 num_pages++;
2007 if (cur_len)
2008 *cur_len = clen;
2010 return num_pages;
2013 static ssize_t
2014 cifs_iovec_write(struct file *file, const struct iovec *iov,
2015 unsigned long nr_segs, loff_t *poffset)
2017 unsigned int written;
2018 unsigned long num_pages, npages, i;
2019 size_t copied, len, cur_len;
2020 ssize_t total_written = 0;
2021 struct kvec *to_send;
2022 struct page **pages;
2023 struct iov_iter it;
2024 struct inode *inode;
2025 struct cifsFileInfo *open_file;
2026 struct cifs_tcon *pTcon;
2027 struct cifs_sb_info *cifs_sb;
2028 struct cifs_io_parms io_parms;
2029 int xid, rc;
2030 __u32 pid;
2032 len = iov_length(iov, nr_segs);
2033 if (!len)
2034 return 0;
2036 rc = generic_write_checks(file, poffset, &len, 0);
2037 if (rc)
2038 return rc;
2040 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2041 num_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
2043 pages = kmalloc(sizeof(struct pages *)*num_pages, GFP_KERNEL);
2044 if (!pages)
2045 return -ENOMEM;
2047 to_send = kmalloc(sizeof(struct kvec)*(num_pages + 1), GFP_KERNEL);
2048 if (!to_send) {
2049 kfree(pages);
2050 return -ENOMEM;
2053 rc = cifs_write_allocate_pages(pages, num_pages);
2054 if (rc) {
2055 kfree(pages);
2056 kfree(to_send);
2057 return rc;
2060 xid = GetXid();
2061 open_file = file->private_data;
2063 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2064 pid = open_file->pid;
2065 else
2066 pid = current->tgid;
2068 pTcon = tlink_tcon(open_file->tlink);
2069 inode = file->f_path.dentry->d_inode;
2071 iov_iter_init(&it, iov, nr_segs, len, 0);
2072 npages = num_pages;
2074 do {
2075 size_t save_len = cur_len;
2076 for (i = 0; i < npages; i++) {
2077 copied = min_t(const size_t, cur_len, PAGE_CACHE_SIZE);
2078 copied = iov_iter_copy_from_user(pages[i], &it, 0,
2079 copied);
2080 cur_len -= copied;
2081 iov_iter_advance(&it, copied);
2082 to_send[i+1].iov_base = kmap(pages[i]);
2083 to_send[i+1].iov_len = copied;
2086 cur_len = save_len - cur_len;
2088 do {
2089 if (open_file->invalidHandle) {
2090 rc = cifs_reopen_file(open_file, false);
2091 if (rc != 0)
2092 break;
2094 io_parms.netfid = open_file->netfid;
2095 io_parms.pid = pid;
2096 io_parms.tcon = pTcon;
2097 io_parms.offset = *poffset;
2098 io_parms.length = cur_len;
2099 rc = CIFSSMBWrite2(xid, &io_parms, &written, to_send,
2100 npages, 0);
2101 } while (rc == -EAGAIN);
2103 for (i = 0; i < npages; i++)
2104 kunmap(pages[i]);
2106 if (written) {
2107 len -= written;
2108 total_written += written;
2109 cifs_update_eof(CIFS_I(inode), *poffset, written);
2110 *poffset += written;
2111 } else if (rc < 0) {
2112 if (!total_written)
2113 total_written = rc;
2114 break;
2117 /* get length and number of kvecs of the next write */
2118 npages = get_numpages(cifs_sb->wsize, len, &cur_len);
2119 } while (len > 0);
2121 if (total_written > 0) {
2122 spin_lock(&inode->i_lock);
2123 if (*poffset > inode->i_size)
2124 i_size_write(inode, *poffset);
2125 spin_unlock(&inode->i_lock);
2128 cifs_stats_bytes_written(pTcon, total_written);
2129 mark_inode_dirty_sync(inode);
2131 for (i = 0; i < num_pages; i++)
2132 put_page(pages[i]);
2133 kfree(to_send);
2134 kfree(pages);
2135 FreeXid(xid);
2136 return total_written;
2139 ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
2140 unsigned long nr_segs, loff_t pos)
2142 ssize_t written;
2143 struct inode *inode;
2145 inode = iocb->ki_filp->f_path.dentry->d_inode;
2148 * BB - optimize the way when signing is disabled. We can drop this
2149 * extra memory-to-memory copying and use iovec buffers for constructing
2150 * write request.
2153 written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos);
2154 if (written > 0) {
2155 CIFS_I(inode)->invalid_mapping = true;
2156 iocb->ki_pos = pos;
2159 return written;
2162 ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
2163 unsigned long nr_segs, loff_t pos)
2165 struct inode *inode;
2167 inode = iocb->ki_filp->f_path.dentry->d_inode;
2169 if (CIFS_I(inode)->clientCanCacheAll)
2170 return generic_file_aio_write(iocb, iov, nr_segs, pos);
2173 * In strict cache mode we need to write the data to the server exactly
2174 * from the pos to pos+len-1 rather than flush all affected pages
2175 * because it may cause a error with mandatory locks on these pages but
2176 * not on the region from pos to ppos+len-1.
2179 return cifs_user_writev(iocb, iov, nr_segs, pos);
2182 static ssize_t
2183 cifs_iovec_read(struct file *file, const struct iovec *iov,
2184 unsigned long nr_segs, loff_t *poffset)
2186 int rc;
2187 int xid;
2188 ssize_t total_read;
2189 unsigned int bytes_read = 0;
2190 size_t len, cur_len;
2191 int iov_offset = 0;
2192 struct cifs_sb_info *cifs_sb;
2193 struct cifs_tcon *pTcon;
2194 struct cifsFileInfo *open_file;
2195 struct smb_com_read_rsp *pSMBr;
2196 struct cifs_io_parms io_parms;
2197 char *read_data;
2198 unsigned int rsize;
2199 __u32 pid;
2201 if (!nr_segs)
2202 return 0;
2204 len = iov_length(iov, nr_segs);
2205 if (!len)
2206 return 0;
2208 xid = GetXid();
2209 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2211 /* FIXME: set up handlers for larger reads and/or convert to async */
2212 rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
2214 open_file = file->private_data;
2215 pTcon = tlink_tcon(open_file->tlink);
2217 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2218 pid = open_file->pid;
2219 else
2220 pid = current->tgid;
2222 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2223 cFYI(1, "attempting read on write only file instance");
2225 for (total_read = 0; total_read < len; total_read += bytes_read) {
2226 cur_len = min_t(const size_t, len - total_read, rsize);
2227 rc = -EAGAIN;
2228 read_data = NULL;
2230 while (rc == -EAGAIN) {
2231 int buf_type = CIFS_NO_BUFFER;
2232 if (open_file->invalidHandle) {
2233 rc = cifs_reopen_file(open_file, true);
2234 if (rc != 0)
2235 break;
2237 io_parms.netfid = open_file->netfid;
2238 io_parms.pid = pid;
2239 io_parms.tcon = pTcon;
2240 io_parms.offset = *poffset;
2241 io_parms.length = cur_len;
2242 rc = CIFSSMBRead(xid, &io_parms, &bytes_read,
2243 &read_data, &buf_type);
2244 pSMBr = (struct smb_com_read_rsp *)read_data;
2245 if (read_data) {
2246 char *data_offset = read_data + 4 +
2247 le16_to_cpu(pSMBr->DataOffset);
2248 if (memcpy_toiovecend(iov, data_offset,
2249 iov_offset, bytes_read))
2250 rc = -EFAULT;
2251 if (buf_type == CIFS_SMALL_BUFFER)
2252 cifs_small_buf_release(read_data);
2253 else if (buf_type == CIFS_LARGE_BUFFER)
2254 cifs_buf_release(read_data);
2255 read_data = NULL;
2256 iov_offset += bytes_read;
2260 if (rc || (bytes_read == 0)) {
2261 if (total_read) {
2262 break;
2263 } else {
2264 FreeXid(xid);
2265 return rc;
2267 } else {
2268 cifs_stats_bytes_read(pTcon, bytes_read);
2269 *poffset += bytes_read;
2273 FreeXid(xid);
2274 return total_read;
2277 ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
2278 unsigned long nr_segs, loff_t pos)
2280 ssize_t read;
2282 read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos);
2283 if (read > 0)
2284 iocb->ki_pos = pos;
2286 return read;
2289 ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
2290 unsigned long nr_segs, loff_t pos)
2292 struct inode *inode;
2294 inode = iocb->ki_filp->f_path.dentry->d_inode;
2296 if (CIFS_I(inode)->clientCanCacheRead)
2297 return generic_file_aio_read(iocb, iov, nr_segs, pos);
2300 * In strict cache mode we need to read from the server all the time
2301 * if we don't have level II oplock because the server can delay mtime
2302 * change - so we can't make a decision about inode invalidating.
2303 * And we can also fail with pagereading if there are mandatory locks
2304 * on pages affected by this read but not on the region from pos to
2305 * pos+len-1.
2308 return cifs_user_readv(iocb, iov, nr_segs, pos);
2311 static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
2312 loff_t *poffset)
2314 int rc = -EACCES;
2315 unsigned int bytes_read = 0;
2316 unsigned int total_read;
2317 unsigned int current_read_size;
2318 unsigned int rsize;
2319 struct cifs_sb_info *cifs_sb;
2320 struct cifs_tcon *pTcon;
2321 int xid;
2322 char *current_offset;
2323 struct cifsFileInfo *open_file;
2324 struct cifs_io_parms io_parms;
2325 int buf_type = CIFS_NO_BUFFER;
2326 __u32 pid;
2328 xid = GetXid();
2329 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2331 /* FIXME: set up handlers for larger reads and/or convert to async */
2332 rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
2334 if (file->private_data == NULL) {
2335 rc = -EBADF;
2336 FreeXid(xid);
2337 return rc;
2339 open_file = file->private_data;
2340 pTcon = tlink_tcon(open_file->tlink);
2342 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2343 pid = open_file->pid;
2344 else
2345 pid = current->tgid;
2347 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2348 cFYI(1, "attempting read on write only file instance");
2350 for (total_read = 0, current_offset = read_data;
2351 read_size > total_read;
2352 total_read += bytes_read, current_offset += bytes_read) {
2353 current_read_size = min_t(uint, read_size - total_read, rsize);
2355 /* For windows me and 9x we do not want to request more
2356 than it negotiated since it will refuse the read then */
2357 if ((pTcon->ses) &&
2358 !(pTcon->ses->capabilities & CAP_LARGE_FILES)) {
2359 current_read_size = min_t(uint, current_read_size,
2360 CIFSMaxBufSize);
2362 rc = -EAGAIN;
2363 while (rc == -EAGAIN) {
2364 if (open_file->invalidHandle) {
2365 rc = cifs_reopen_file(open_file, true);
2366 if (rc != 0)
2367 break;
2369 io_parms.netfid = open_file->netfid;
2370 io_parms.pid = pid;
2371 io_parms.tcon = pTcon;
2372 io_parms.offset = *poffset;
2373 io_parms.length = current_read_size;
2374 rc = CIFSSMBRead(xid, &io_parms, &bytes_read,
2375 &current_offset, &buf_type);
2377 if (rc || (bytes_read == 0)) {
2378 if (total_read) {
2379 break;
2380 } else {
2381 FreeXid(xid);
2382 return rc;
2384 } else {
2385 cifs_stats_bytes_read(pTcon, total_read);
2386 *poffset += bytes_read;
2389 FreeXid(xid);
2390 return total_read;
2394 * If the page is mmap'ed into a process' page tables, then we need to make
2395 * sure that it doesn't change while being written back.
2397 static int
2398 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
2400 struct page *page = vmf->page;
2402 lock_page(page);
2403 return VM_FAULT_LOCKED;
2406 static struct vm_operations_struct cifs_file_vm_ops = {
2407 .fault = filemap_fault,
2408 .page_mkwrite = cifs_page_mkwrite,
2411 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
2413 int rc, xid;
2414 struct inode *inode = file->f_path.dentry->d_inode;
2416 xid = GetXid();
2418 if (!CIFS_I(inode)->clientCanCacheRead) {
2419 rc = cifs_invalidate_mapping(inode);
2420 if (rc)
2421 return rc;
2424 rc = generic_file_mmap(file, vma);
2425 if (rc == 0)
2426 vma->vm_ops = &cifs_file_vm_ops;
2427 FreeXid(xid);
2428 return rc;
2431 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
2433 int rc, xid;
2435 xid = GetXid();
2436 rc = cifs_revalidate_file(file);
2437 if (rc) {
2438 cFYI(1, "Validation prior to mmap failed, error=%d", rc);
2439 FreeXid(xid);
2440 return rc;
2442 rc = generic_file_mmap(file, vma);
2443 if (rc == 0)
2444 vma->vm_ops = &cifs_file_vm_ops;
2445 FreeXid(xid);
2446 return rc;
2449 static int cifs_readpages(struct file *file, struct address_space *mapping,
2450 struct list_head *page_list, unsigned num_pages)
2452 int rc;
2453 struct list_head tmplist;
2454 struct cifsFileInfo *open_file = file->private_data;
2455 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2456 unsigned int rsize = cifs_sb->rsize;
2457 pid_t pid;
2460 * Give up immediately if rsize is too small to read an entire page.
2461 * The VFS will fall back to readpage. We should never reach this
2462 * point however since we set ra_pages to 0 when the rsize is smaller
2463 * than a cache page.
2465 if (unlikely(rsize < PAGE_CACHE_SIZE))
2466 return 0;
2469 * Reads as many pages as possible from fscache. Returns -ENOBUFS
2470 * immediately if the cookie is negative
2472 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
2473 &num_pages);
2474 if (rc == 0)
2475 return rc;
2477 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2478 pid = open_file->pid;
2479 else
2480 pid = current->tgid;
2482 rc = 0;
2483 INIT_LIST_HEAD(&tmplist);
2485 cFYI(1, "%s: file=%p mapping=%p num_pages=%u", __func__, file,
2486 mapping, num_pages);
2489 * Start with the page at end of list and move it to private
2490 * list. Do the same with any following pages until we hit
2491 * the rsize limit, hit an index discontinuity, or run out of
2492 * pages. Issue the async read and then start the loop again
2493 * until the list is empty.
2495 * Note that list order is important. The page_list is in
2496 * the order of declining indexes. When we put the pages in
2497 * the rdata->pages, then we want them in increasing order.
2499 while (!list_empty(page_list)) {
2500 unsigned int bytes = PAGE_CACHE_SIZE;
2501 unsigned int expected_index;
2502 unsigned int nr_pages = 1;
2503 loff_t offset;
2504 struct page *page, *tpage;
2505 struct cifs_readdata *rdata;
2507 page = list_entry(page_list->prev, struct page, lru);
2510 * Lock the page and put it in the cache. Since no one else
2511 * should have access to this page, we're safe to simply set
2512 * PG_locked without checking it first.
2514 __set_page_locked(page);
2515 rc = add_to_page_cache_locked(page, mapping,
2516 page->index, GFP_KERNEL);
2518 /* give up if we can't stick it in the cache */
2519 if (rc) {
2520 __clear_page_locked(page);
2521 break;
2524 /* move first page to the tmplist */
2525 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
2526 list_move_tail(&page->lru, &tmplist);
2528 /* now try and add more pages onto the request */
2529 expected_index = page->index + 1;
2530 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
2531 /* discontinuity ? */
2532 if (page->index != expected_index)
2533 break;
2535 /* would this page push the read over the rsize? */
2536 if (bytes + PAGE_CACHE_SIZE > rsize)
2537 break;
2539 __set_page_locked(page);
2540 if (add_to_page_cache_locked(page, mapping,
2541 page->index, GFP_KERNEL)) {
2542 __clear_page_locked(page);
2543 break;
2545 list_move_tail(&page->lru, &tmplist);
2546 bytes += PAGE_CACHE_SIZE;
2547 expected_index++;
2548 nr_pages++;
2551 rdata = cifs_readdata_alloc(nr_pages);
2552 if (!rdata) {
2553 /* best to give up if we're out of mem */
2554 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
2555 list_del(&page->lru);
2556 lru_cache_add_file(page);
2557 unlock_page(page);
2558 page_cache_release(page);
2560 rc = -ENOMEM;
2561 break;
2564 spin_lock(&cifs_file_list_lock);
2565 cifsFileInfo_get(open_file);
2566 spin_unlock(&cifs_file_list_lock);
2567 rdata->cfile = open_file;
2568 rdata->mapping = mapping;
2569 rdata->offset = offset;
2570 rdata->bytes = bytes;
2571 rdata->pid = pid;
2572 list_splice_init(&tmplist, &rdata->pages);
2574 do {
2575 if (open_file->invalidHandle) {
2576 rc = cifs_reopen_file(open_file, true);
2577 if (rc != 0)
2578 continue;
2580 rc = cifs_async_readv(rdata);
2581 } while (rc == -EAGAIN);
2583 if (rc != 0) {
2584 list_for_each_entry_safe(page, tpage, &rdata->pages,
2585 lru) {
2586 list_del(&page->lru);
2587 lru_cache_add_file(page);
2588 unlock_page(page);
2589 page_cache_release(page);
2591 cifs_readdata_free(rdata);
2592 break;
2596 return rc;
2599 static int cifs_readpage_worker(struct file *file, struct page *page,
2600 loff_t *poffset)
2602 char *read_data;
2603 int rc;
2605 /* Is the page cached? */
2606 rc = cifs_readpage_from_fscache(file->f_path.dentry->d_inode, page);
2607 if (rc == 0)
2608 goto read_complete;
2610 page_cache_get(page);
2611 read_data = kmap(page);
2612 /* for reads over a certain size could initiate async read ahead */
2614 rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
2616 if (rc < 0)
2617 goto io_error;
2618 else
2619 cFYI(1, "Bytes read %d", rc);
2621 file->f_path.dentry->d_inode->i_atime =
2622 current_fs_time(file->f_path.dentry->d_inode->i_sb);
2624 if (PAGE_CACHE_SIZE > rc)
2625 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
2627 flush_dcache_page(page);
2628 SetPageUptodate(page);
2630 /* send this page to the cache */
2631 cifs_readpage_to_fscache(file->f_path.dentry->d_inode, page);
2633 rc = 0;
2635 io_error:
2636 kunmap(page);
2637 page_cache_release(page);
2639 read_complete:
2640 return rc;
2643 static int cifs_readpage(struct file *file, struct page *page)
2645 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
2646 int rc = -EACCES;
2647 int xid;
2649 xid = GetXid();
2651 if (file->private_data == NULL) {
2652 rc = -EBADF;
2653 FreeXid(xid);
2654 return rc;
2657 cFYI(1, "readpage %p at offset %d 0x%x\n",
2658 page, (int)offset, (int)offset);
2660 rc = cifs_readpage_worker(file, page, &offset);
2662 unlock_page(page);
2664 FreeXid(xid);
2665 return rc;
2668 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
2670 struct cifsFileInfo *open_file;
2672 spin_lock(&cifs_file_list_lock);
2673 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2674 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2675 spin_unlock(&cifs_file_list_lock);
2676 return 1;
2679 spin_unlock(&cifs_file_list_lock);
2680 return 0;
2683 /* We do not want to update the file size from server for inodes
2684 open for write - to avoid races with writepage extending
2685 the file - in the future we could consider allowing
2686 refreshing the inode only on increases in the file size
2687 but this is tricky to do without racing with writebehind
2688 page caching in the current Linux kernel design */
2689 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
2691 if (!cifsInode)
2692 return true;
2694 if (is_inode_writable(cifsInode)) {
2695 /* This inode is open for write at least once */
2696 struct cifs_sb_info *cifs_sb;
2698 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
2699 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
2700 /* since no page cache to corrupt on directio
2701 we can change size safely */
2702 return true;
2705 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
2706 return true;
2708 return false;
2709 } else
2710 return true;
2713 static int cifs_write_begin(struct file *file, struct address_space *mapping,
2714 loff_t pos, unsigned len, unsigned flags,
2715 struct page **pagep, void **fsdata)
2717 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
2718 loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
2719 loff_t page_start = pos & PAGE_MASK;
2720 loff_t i_size;
2721 struct page *page;
2722 int rc = 0;
2724 cFYI(1, "write_begin from %lld len %d", (long long)pos, len);
2726 page = grab_cache_page_write_begin(mapping, index, flags);
2727 if (!page) {
2728 rc = -ENOMEM;
2729 goto out;
2732 if (PageUptodate(page))
2733 goto out;
2736 * If we write a full page it will be up to date, no need to read from
2737 * the server. If the write is short, we'll end up doing a sync write
2738 * instead.
2740 if (len == PAGE_CACHE_SIZE)
2741 goto out;
2744 * optimize away the read when we have an oplock, and we're not
2745 * expecting to use any of the data we'd be reading in. That
2746 * is, when the page lies beyond the EOF, or straddles the EOF
2747 * and the write will cover all of the existing data.
2749 if (CIFS_I(mapping->host)->clientCanCacheRead) {
2750 i_size = i_size_read(mapping->host);
2751 if (page_start >= i_size ||
2752 (offset == 0 && (pos + len) >= i_size)) {
2753 zero_user_segments(page, 0, offset,
2754 offset + len,
2755 PAGE_CACHE_SIZE);
2757 * PageChecked means that the parts of the page
2758 * to which we're not writing are considered up
2759 * to date. Once the data is copied to the
2760 * page, it can be set uptodate.
2762 SetPageChecked(page);
2763 goto out;
2767 if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
2769 * might as well read a page, it is fast enough. If we get
2770 * an error, we don't need to return it. cifs_write_end will
2771 * do a sync write instead since PG_uptodate isn't set.
2773 cifs_readpage_worker(file, page, &page_start);
2774 } else {
2775 /* we could try using another file handle if there is one -
2776 but how would we lock it to prevent close of that handle
2777 racing with this read? In any case
2778 this will be written out by write_end so is fine */
2780 out:
2781 *pagep = page;
2782 return rc;
2785 static int cifs_release_page(struct page *page, gfp_t gfp)
2787 if (PagePrivate(page))
2788 return 0;
2790 return cifs_fscache_release_page(page, gfp);
2793 static void cifs_invalidate_page(struct page *page, unsigned long offset)
2795 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
2797 if (offset == 0)
2798 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
2801 static int cifs_launder_page(struct page *page)
2803 int rc = 0;
2804 loff_t range_start = page_offset(page);
2805 loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
2806 struct writeback_control wbc = {
2807 .sync_mode = WB_SYNC_ALL,
2808 .nr_to_write = 0,
2809 .range_start = range_start,
2810 .range_end = range_end,
2813 cFYI(1, "Launder page: %p", page);
2815 if (clear_page_dirty_for_io(page))
2816 rc = cifs_writepage_locked(page, &wbc);
2818 cifs_fscache_invalidate_page(page, page->mapping->host);
2819 return rc;
2822 void cifs_oplock_break(struct work_struct *work)
2824 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
2825 oplock_break);
2826 struct inode *inode = cfile->dentry->d_inode;
2827 struct cifsInodeInfo *cinode = CIFS_I(inode);
2828 int rc = 0;
2830 if (inode && S_ISREG(inode->i_mode)) {
2831 if (cinode->clientCanCacheRead)
2832 break_lease(inode, O_RDONLY);
2833 else
2834 break_lease(inode, O_WRONLY);
2835 rc = filemap_fdatawrite(inode->i_mapping);
2836 if (cinode->clientCanCacheRead == 0) {
2837 rc = filemap_fdatawait(inode->i_mapping);
2838 mapping_set_error(inode->i_mapping, rc);
2839 invalidate_remote_inode(inode);
2841 cFYI(1, "Oplock flush inode %p rc %d", inode, rc);
2844 rc = cifs_push_locks(cfile);
2845 if (rc)
2846 cERROR(1, "Push locks rc = %d", rc);
2849 * releasing stale oplock after recent reconnect of smb session using
2850 * a now incorrect file handle is not a data integrity issue but do
2851 * not bother sending an oplock release if session to server still is
2852 * disconnected since oplock already released by the server
2854 if (!cfile->oplock_break_cancelled) {
2855 rc = CIFSSMBLock(0, tlink_tcon(cfile->tlink), cfile->netfid,
2856 current->tgid, 0, 0, 0, 0,
2857 LOCKING_ANDX_OPLOCK_RELEASE, false,
2858 cinode->clientCanCacheRead ? 1 : 0);
2859 cFYI(1, "Oplock release rc = %d", rc);
2863 const struct address_space_operations cifs_addr_ops = {
2864 .readpage = cifs_readpage,
2865 .readpages = cifs_readpages,
2866 .writepage = cifs_writepage,
2867 .writepages = cifs_writepages,
2868 .write_begin = cifs_write_begin,
2869 .write_end = cifs_write_end,
2870 .set_page_dirty = __set_page_dirty_nobuffers,
2871 .releasepage = cifs_release_page,
2872 .invalidatepage = cifs_invalidate_page,
2873 .launder_page = cifs_launder_page,
2877 * cifs_readpages requires the server to support a buffer large enough to
2878 * contain the header plus one complete page of data. Otherwise, we need
2879 * to leave cifs_readpages out of the address space operations.
2881 const struct address_space_operations cifs_addr_ops_smallbuf = {
2882 .readpage = cifs_readpage,
2883 .writepage = cifs_writepage,
2884 .writepages = cifs_writepages,
2885 .write_begin = cifs_write_begin,
2886 .write_end = cifs_write_end,
2887 .set_page_dirty = __set_page_dirty_nobuffers,
2888 .releasepage = cifs_release_page,
2889 .invalidatepage = cifs_invalidate_page,
2890 .launder_page = cifs_launder_page,