ARM: ux500: delete remnant U5500 macros
[linux-2.6/btrfs-unstable.git] / fs / cifs / file.c
blob9154192b0683e368a521ddf118961e1cdd592355
1 /*
2 * fs/cifs/file.c
4 * vfs operations that deal with files
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
37 #include "cifsfs.h"
38 #include "cifspdu.h"
39 #include "cifsglob.h"
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
44 #include "fscache.h"
46 static inline int cifs_convert_flags(unsigned int flags)
48 if ((flags & O_ACCMODE) == O_RDONLY)
49 return GENERIC_READ;
50 else if ((flags & O_ACCMODE) == O_WRONLY)
51 return GENERIC_WRITE;
52 else if ((flags & O_ACCMODE) == O_RDWR) {
53 /* GENERIC_ALL is too much permission to request
54 can cause unnecessary access denied on create */
55 /* return GENERIC_ALL; */
56 return (GENERIC_READ | GENERIC_WRITE);
59 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
60 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
61 FILE_READ_DATA);
64 static u32 cifs_posix_convert_flags(unsigned int flags)
66 u32 posix_flags = 0;
68 if ((flags & O_ACCMODE) == O_RDONLY)
69 posix_flags = SMB_O_RDONLY;
70 else if ((flags & O_ACCMODE) == O_WRONLY)
71 posix_flags = SMB_O_WRONLY;
72 else if ((flags & O_ACCMODE) == O_RDWR)
73 posix_flags = SMB_O_RDWR;
75 if (flags & O_CREAT)
76 posix_flags |= SMB_O_CREAT;
77 if (flags & O_EXCL)
78 posix_flags |= SMB_O_EXCL;
79 if (flags & O_TRUNC)
80 posix_flags |= SMB_O_TRUNC;
81 /* be safe and imply O_SYNC for O_DSYNC */
82 if (flags & O_DSYNC)
83 posix_flags |= SMB_O_SYNC;
84 if (flags & O_DIRECTORY)
85 posix_flags |= SMB_O_DIRECTORY;
86 if (flags & O_NOFOLLOW)
87 posix_flags |= SMB_O_NOFOLLOW;
88 if (flags & O_DIRECT)
89 posix_flags |= SMB_O_DIRECT;
91 return posix_flags;
94 static inline int cifs_get_disposition(unsigned int flags)
96 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
97 return FILE_CREATE;
98 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
99 return FILE_OVERWRITE_IF;
100 else if ((flags & O_CREAT) == O_CREAT)
101 return FILE_OPEN_IF;
102 else if ((flags & O_TRUNC) == O_TRUNC)
103 return FILE_OVERWRITE;
104 else
105 return FILE_OPEN;
108 int cifs_posix_open(char *full_path, struct inode **pinode,
109 struct super_block *sb, int mode, unsigned int f_flags,
110 __u32 *poplock, __u16 *pnetfid, unsigned int xid)
112 int rc;
113 FILE_UNIX_BASIC_INFO *presp_data;
114 __u32 posix_flags = 0;
115 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
116 struct cifs_fattr fattr;
117 struct tcon_link *tlink;
118 struct cifs_tcon *tcon;
120 cFYI(1, "posix open %s", full_path);
122 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
123 if (presp_data == NULL)
124 return -ENOMEM;
126 tlink = cifs_sb_tlink(cifs_sb);
127 if (IS_ERR(tlink)) {
128 rc = PTR_ERR(tlink);
129 goto posix_open_ret;
132 tcon = tlink_tcon(tlink);
133 mode &= ~current_umask();
135 posix_flags = cifs_posix_convert_flags(f_flags);
136 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
137 poplock, full_path, cifs_sb->local_nls,
138 cifs_sb->mnt_cifs_flags &
139 CIFS_MOUNT_MAP_SPECIAL_CHR);
140 cifs_put_tlink(tlink);
142 if (rc)
143 goto posix_open_ret;
145 if (presp_data->Type == cpu_to_le32(-1))
146 goto posix_open_ret; /* open ok, caller does qpathinfo */
148 if (!pinode)
149 goto posix_open_ret; /* caller does not need info */
151 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
153 /* get new inode and set it up */
154 if (*pinode == NULL) {
155 cifs_fill_uniqueid(sb, &fattr);
156 *pinode = cifs_iget(sb, &fattr);
157 if (!*pinode) {
158 rc = -ENOMEM;
159 goto posix_open_ret;
161 } else {
162 cifs_fattr_to_inode(*pinode, &fattr);
165 posix_open_ret:
166 kfree(presp_data);
167 return rc;
170 static int
171 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
172 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *poplock,
173 __u16 *pnetfid, unsigned int xid)
175 int rc;
176 int desiredAccess;
177 int disposition;
178 int create_options = CREATE_NOT_DIR;
179 FILE_ALL_INFO *buf;
181 desiredAccess = cifs_convert_flags(f_flags);
183 /*********************************************************************
184 * open flag mapping table:
186 * POSIX Flag CIFS Disposition
187 * ---------- ----------------
188 * O_CREAT FILE_OPEN_IF
189 * O_CREAT | O_EXCL FILE_CREATE
190 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
191 * O_TRUNC FILE_OVERWRITE
192 * none of the above FILE_OPEN
194 * Note that there is not a direct match between disposition
195 * FILE_SUPERSEDE (ie create whether or not file exists although
196 * O_CREAT | O_TRUNC is similar but truncates the existing
197 * file rather than creating a new file as FILE_SUPERSEDE does
198 * (which uses the attributes / metadata passed in on open call)
200 *? O_SYNC is a reasonable match to CIFS writethrough flag
201 *? and the read write flags match reasonably. O_LARGEFILE
202 *? is irrelevant because largefile support is always used
203 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
204 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
205 *********************************************************************/
207 disposition = cifs_get_disposition(f_flags);
209 /* BB pass O_SYNC flag through on file attributes .. BB */
211 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
212 if (!buf)
213 return -ENOMEM;
215 if (backup_cred(cifs_sb))
216 create_options |= CREATE_OPEN_BACKUP_INTENT;
218 if (tcon->ses->capabilities & CAP_NT_SMBS)
219 rc = CIFSSMBOpen(xid, tcon, full_path, disposition,
220 desiredAccess, create_options, pnetfid, poplock, buf,
221 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
222 & CIFS_MOUNT_MAP_SPECIAL_CHR);
223 else
224 rc = SMBLegacyOpen(xid, tcon, full_path, disposition,
225 desiredAccess, CREATE_NOT_DIR, pnetfid, poplock, buf,
226 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
227 & CIFS_MOUNT_MAP_SPECIAL_CHR);
229 if (rc)
230 goto out;
232 if (tcon->unix_ext)
233 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
234 xid);
235 else
236 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
237 xid, pnetfid);
239 out:
240 kfree(buf);
241 return rc;
244 struct cifsFileInfo *
245 cifs_new_fileinfo(__u16 fileHandle, struct file *file,
246 struct tcon_link *tlink, __u32 oplock)
248 struct dentry *dentry = file->f_path.dentry;
249 struct inode *inode = dentry->d_inode;
250 struct cifsInodeInfo *pCifsInode = CIFS_I(inode);
251 struct cifsFileInfo *pCifsFile;
253 pCifsFile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
254 if (pCifsFile == NULL)
255 return pCifsFile;
257 pCifsFile->count = 1;
258 pCifsFile->netfid = fileHandle;
259 pCifsFile->pid = current->tgid;
260 pCifsFile->uid = current_fsuid();
261 pCifsFile->dentry = dget(dentry);
262 pCifsFile->f_flags = file->f_flags;
263 pCifsFile->invalidHandle = false;
264 pCifsFile->tlink = cifs_get_tlink(tlink);
265 mutex_init(&pCifsFile->fh_mutex);
266 INIT_WORK(&pCifsFile->oplock_break, cifs_oplock_break);
267 INIT_LIST_HEAD(&pCifsFile->llist);
269 spin_lock(&cifs_file_list_lock);
270 list_add(&pCifsFile->tlist, &(tlink_tcon(tlink)->openFileList));
271 /* if readable file instance put first in list*/
272 if (file->f_mode & FMODE_READ)
273 list_add(&pCifsFile->flist, &pCifsInode->openFileList);
274 else
275 list_add_tail(&pCifsFile->flist, &pCifsInode->openFileList);
276 spin_unlock(&cifs_file_list_lock);
278 cifs_set_oplock_level(pCifsInode, oplock);
279 pCifsInode->can_cache_brlcks = pCifsInode->clientCanCacheAll;
281 file->private_data = pCifsFile;
282 return pCifsFile;
285 static void cifs_del_lock_waiters(struct cifsLockInfo *lock);
287 struct cifsFileInfo *
288 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
290 spin_lock(&cifs_file_list_lock);
291 cifsFileInfo_get_locked(cifs_file);
292 spin_unlock(&cifs_file_list_lock);
293 return cifs_file;
297 * Release a reference on the file private data. This may involve closing
298 * the filehandle out on the server. Must be called without holding
299 * cifs_file_list_lock.
301 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
303 struct inode *inode = cifs_file->dentry->d_inode;
304 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
305 struct cifsInodeInfo *cifsi = CIFS_I(inode);
306 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
307 struct cifsLockInfo *li, *tmp;
309 spin_lock(&cifs_file_list_lock);
310 if (--cifs_file->count > 0) {
311 spin_unlock(&cifs_file_list_lock);
312 return;
315 /* remove it from the lists */
316 list_del(&cifs_file->flist);
317 list_del(&cifs_file->tlist);
319 if (list_empty(&cifsi->openFileList)) {
320 cFYI(1, "closing last open instance for inode %p",
321 cifs_file->dentry->d_inode);
323 /* in strict cache mode we need invalidate mapping on the last
324 close because it may cause a error when we open this file
325 again and get at least level II oplock */
326 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
327 CIFS_I(inode)->invalid_mapping = true;
329 cifs_set_oplock_level(cifsi, 0);
331 spin_unlock(&cifs_file_list_lock);
333 cancel_work_sync(&cifs_file->oplock_break);
335 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
336 unsigned int xid;
337 int rc;
338 xid = get_xid();
339 rc = CIFSSMBClose(xid, tcon, cifs_file->netfid);
340 free_xid(xid);
343 /* Delete any outstanding lock records. We'll lose them when the file
344 * is closed anyway.
346 mutex_lock(&cifsi->lock_mutex);
347 list_for_each_entry_safe(li, tmp, &cifs_file->llist, llist) {
348 list_del(&li->llist);
349 cifs_del_lock_waiters(li);
350 kfree(li);
352 mutex_unlock(&cifsi->lock_mutex);
354 cifs_put_tlink(cifs_file->tlink);
355 dput(cifs_file->dentry);
356 kfree(cifs_file);
359 int cifs_open(struct inode *inode, struct file *file)
361 int rc = -EACCES;
362 unsigned int xid;
363 __u32 oplock;
364 struct cifs_sb_info *cifs_sb;
365 struct cifs_tcon *tcon;
366 struct tcon_link *tlink;
367 struct cifsFileInfo *pCifsFile = NULL;
368 char *full_path = NULL;
369 bool posix_open_ok = false;
370 __u16 netfid;
372 xid = get_xid();
374 cifs_sb = CIFS_SB(inode->i_sb);
375 tlink = cifs_sb_tlink(cifs_sb);
376 if (IS_ERR(tlink)) {
377 free_xid(xid);
378 return PTR_ERR(tlink);
380 tcon = tlink_tcon(tlink);
382 full_path = build_path_from_dentry(file->f_path.dentry);
383 if (full_path == NULL) {
384 rc = -ENOMEM;
385 goto out;
388 cFYI(1, "inode = 0x%p file flags are 0x%x for %s",
389 inode, file->f_flags, full_path);
391 if (tcon->ses->server->oplocks)
392 oplock = REQ_OPLOCK;
393 else
394 oplock = 0;
396 if (!tcon->broken_posix_open && tcon->unix_ext &&
397 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
398 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
399 /* can not refresh inode info since size could be stale */
400 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
401 cifs_sb->mnt_file_mode /* ignored */,
402 file->f_flags, &oplock, &netfid, xid);
403 if (rc == 0) {
404 cFYI(1, "posix open succeeded");
405 posix_open_ok = true;
406 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
407 if (tcon->ses->serverNOS)
408 cERROR(1, "server %s of type %s returned"
409 " unexpected error on SMB posix open"
410 ", disabling posix open support."
411 " Check if server update available.",
412 tcon->ses->serverName,
413 tcon->ses->serverNOS);
414 tcon->broken_posix_open = true;
415 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
416 (rc != -EOPNOTSUPP)) /* path not found or net err */
417 goto out;
418 /* else fallthrough to retry open the old way on network i/o
419 or DFS errors */
422 if (!posix_open_ok) {
423 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
424 file->f_flags, &oplock, &netfid, xid);
425 if (rc)
426 goto out;
429 pCifsFile = cifs_new_fileinfo(netfid, file, tlink, oplock);
430 if (pCifsFile == NULL) {
431 CIFSSMBClose(xid, tcon, netfid);
432 rc = -ENOMEM;
433 goto out;
436 cifs_fscache_set_inode_cookie(inode, file);
438 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
439 /* time to set mode which we can not set earlier due to
440 problems creating new read-only files */
441 struct cifs_unix_set_info_args args = {
442 .mode = inode->i_mode,
443 .uid = NO_CHANGE_64,
444 .gid = NO_CHANGE_64,
445 .ctime = NO_CHANGE_64,
446 .atime = NO_CHANGE_64,
447 .mtime = NO_CHANGE_64,
448 .device = 0,
450 CIFSSMBUnixSetFileInfo(xid, tcon, &args, netfid,
451 pCifsFile->pid);
454 out:
455 kfree(full_path);
456 free_xid(xid);
457 cifs_put_tlink(tlink);
458 return rc;
461 /* Try to reacquire byte range locks that were released when session */
462 /* to server was lost */
463 static int cifs_relock_file(struct cifsFileInfo *cifsFile)
465 int rc = 0;
467 /* BB list all locks open on this file and relock */
469 return rc;
472 static int cifs_reopen_file(struct cifsFileInfo *pCifsFile, bool can_flush)
474 int rc = -EACCES;
475 unsigned int xid;
476 __u32 oplock;
477 struct cifs_sb_info *cifs_sb;
478 struct cifs_tcon *tcon;
479 struct cifsInodeInfo *pCifsInode;
480 struct inode *inode;
481 char *full_path = NULL;
482 int desiredAccess;
483 int disposition = FILE_OPEN;
484 int create_options = CREATE_NOT_DIR;
485 __u16 netfid;
487 xid = get_xid();
488 mutex_lock(&pCifsFile->fh_mutex);
489 if (!pCifsFile->invalidHandle) {
490 mutex_unlock(&pCifsFile->fh_mutex);
491 rc = 0;
492 free_xid(xid);
493 return rc;
496 inode = pCifsFile->dentry->d_inode;
497 cifs_sb = CIFS_SB(inode->i_sb);
498 tcon = tlink_tcon(pCifsFile->tlink);
500 /* can not grab rename sem here because various ops, including
501 those that already have the rename sem can end up causing writepage
502 to get called and if the server was down that means we end up here,
503 and we can never tell if the caller already has the rename_sem */
504 full_path = build_path_from_dentry(pCifsFile->dentry);
505 if (full_path == NULL) {
506 rc = -ENOMEM;
507 mutex_unlock(&pCifsFile->fh_mutex);
508 free_xid(xid);
509 return rc;
512 cFYI(1, "inode = 0x%p file flags 0x%x for %s",
513 inode, pCifsFile->f_flags, full_path);
515 if (tcon->ses->server->oplocks)
516 oplock = REQ_OPLOCK;
517 else
518 oplock = 0;
520 if (tcon->unix_ext && cap_unix(tcon->ses) &&
521 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
522 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
524 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
525 * original open. Must mask them off for a reopen.
527 unsigned int oflags = pCifsFile->f_flags &
528 ~(O_CREAT | O_EXCL | O_TRUNC);
530 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
531 cifs_sb->mnt_file_mode /* ignored */,
532 oflags, &oplock, &netfid, xid);
533 if (rc == 0) {
534 cFYI(1, "posix reopen succeeded");
535 goto reopen_success;
537 /* fallthrough to retry open the old way on errors, especially
538 in the reconnect path it is important to retry hard */
541 desiredAccess = cifs_convert_flags(pCifsFile->f_flags);
543 if (backup_cred(cifs_sb))
544 create_options |= CREATE_OPEN_BACKUP_INTENT;
546 /* Can not refresh inode by passing in file_info buf to be returned
547 by SMBOpen and then calling get_inode_info with returned buf
548 since file might have write behind data that needs to be flushed
549 and server version of file size can be stale. If we knew for sure
550 that inode was not dirty locally we could do this */
552 rc = CIFSSMBOpen(xid, tcon, full_path, disposition, desiredAccess,
553 create_options, &netfid, &oplock, NULL,
554 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
555 CIFS_MOUNT_MAP_SPECIAL_CHR);
556 if (rc) {
557 mutex_unlock(&pCifsFile->fh_mutex);
558 cFYI(1, "cifs_open returned 0x%x", rc);
559 cFYI(1, "oplock: %d", oplock);
560 goto reopen_error_exit;
563 reopen_success:
564 pCifsFile->netfid = netfid;
565 pCifsFile->invalidHandle = false;
566 mutex_unlock(&pCifsFile->fh_mutex);
567 pCifsInode = CIFS_I(inode);
569 if (can_flush) {
570 rc = filemap_write_and_wait(inode->i_mapping);
571 mapping_set_error(inode->i_mapping, rc);
573 if (tcon->unix_ext)
574 rc = cifs_get_inode_info_unix(&inode,
575 full_path, inode->i_sb, xid);
576 else
577 rc = cifs_get_inode_info(&inode,
578 full_path, NULL, inode->i_sb,
579 xid, NULL);
580 } /* else we are writing out data to server already
581 and could deadlock if we tried to flush data, and
582 since we do not know if we have data that would
583 invalidate the current end of file on the server
584 we can not go to the server to get the new inod
585 info */
587 cifs_set_oplock_level(pCifsInode, oplock);
589 cifs_relock_file(pCifsFile);
591 reopen_error_exit:
592 kfree(full_path);
593 free_xid(xid);
594 return rc;
597 int cifs_close(struct inode *inode, struct file *file)
599 if (file->private_data != NULL) {
600 cifsFileInfo_put(file->private_data);
601 file->private_data = NULL;
604 /* return code from the ->release op is always ignored */
605 return 0;
608 int cifs_closedir(struct inode *inode, struct file *file)
610 int rc = 0;
611 unsigned int xid;
612 struct cifsFileInfo *pCFileStruct = file->private_data;
613 char *ptmp;
615 cFYI(1, "Closedir inode = 0x%p", inode);
617 xid = get_xid();
619 if (pCFileStruct) {
620 struct cifs_tcon *pTcon = tlink_tcon(pCFileStruct->tlink);
622 cFYI(1, "Freeing private data in close dir");
623 spin_lock(&cifs_file_list_lock);
624 if (!pCFileStruct->srch_inf.endOfSearch &&
625 !pCFileStruct->invalidHandle) {
626 pCFileStruct->invalidHandle = true;
627 spin_unlock(&cifs_file_list_lock);
628 rc = CIFSFindClose(xid, pTcon, pCFileStruct->netfid);
629 cFYI(1, "Closing uncompleted readdir with rc %d",
630 rc);
631 /* not much we can do if it fails anyway, ignore rc */
632 rc = 0;
633 } else
634 spin_unlock(&cifs_file_list_lock);
635 ptmp = pCFileStruct->srch_inf.ntwrk_buf_start;
636 if (ptmp) {
637 cFYI(1, "closedir free smb buf in srch struct");
638 pCFileStruct->srch_inf.ntwrk_buf_start = NULL;
639 if (pCFileStruct->srch_inf.smallBuf)
640 cifs_small_buf_release(ptmp);
641 else
642 cifs_buf_release(ptmp);
644 cifs_put_tlink(pCFileStruct->tlink);
645 kfree(file->private_data);
646 file->private_data = NULL;
648 /* BB can we lock the filestruct while this is going on? */
649 free_xid(xid);
650 return rc;
653 static struct cifsLockInfo *
654 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
656 struct cifsLockInfo *lock =
657 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
658 if (!lock)
659 return lock;
660 lock->offset = offset;
661 lock->length = length;
662 lock->type = type;
663 lock->pid = current->tgid;
664 INIT_LIST_HEAD(&lock->blist);
665 init_waitqueue_head(&lock->block_q);
666 return lock;
669 static void
670 cifs_del_lock_waiters(struct cifsLockInfo *lock)
672 struct cifsLockInfo *li, *tmp;
673 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
674 list_del_init(&li->blist);
675 wake_up(&li->block_q);
679 static bool
680 cifs_find_fid_lock_conflict(struct cifsFileInfo *cfile, __u64 offset,
681 __u64 length, __u8 type, struct cifsFileInfo *cur,
682 struct cifsLockInfo **conf_lock)
684 struct cifsLockInfo *li;
685 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
687 list_for_each_entry(li, &cfile->llist, llist) {
688 if (offset + length <= li->offset ||
689 offset >= li->offset + li->length)
690 continue;
691 else if ((type & server->vals->shared_lock_type) &&
692 ((server->ops->compare_fids(cur, cfile) &&
693 current->tgid == li->pid) || type == li->type))
694 continue;
695 else {
696 *conf_lock = li;
697 return true;
700 return false;
703 static bool
704 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
705 __u8 type, struct cifsLockInfo **conf_lock)
707 bool rc = false;
708 struct cifsFileInfo *fid, *tmp;
709 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
711 spin_lock(&cifs_file_list_lock);
712 list_for_each_entry_safe(fid, tmp, &cinode->openFileList, flist) {
713 rc = cifs_find_fid_lock_conflict(fid, offset, length, type,
714 cfile, conf_lock);
715 if (rc)
716 break;
718 spin_unlock(&cifs_file_list_lock);
720 return rc;
724 * Check if there is another lock that prevents us to set the lock (mandatory
725 * style). If such a lock exists, update the flock structure with its
726 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
727 * or leave it the same if we can't. Returns 0 if we don't need to request to
728 * the server or 1 otherwise.
730 static int
731 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
732 __u8 type, struct file_lock *flock)
734 int rc = 0;
735 struct cifsLockInfo *conf_lock;
736 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
737 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
738 bool exist;
740 mutex_lock(&cinode->lock_mutex);
742 exist = cifs_find_lock_conflict(cfile, offset, length, type,
743 &conf_lock);
744 if (exist) {
745 flock->fl_start = conf_lock->offset;
746 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
747 flock->fl_pid = conf_lock->pid;
748 if (conf_lock->type & server->vals->shared_lock_type)
749 flock->fl_type = F_RDLCK;
750 else
751 flock->fl_type = F_WRLCK;
752 } else if (!cinode->can_cache_brlcks)
753 rc = 1;
754 else
755 flock->fl_type = F_UNLCK;
757 mutex_unlock(&cinode->lock_mutex);
758 return rc;
761 static void
762 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
764 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
765 mutex_lock(&cinode->lock_mutex);
766 list_add_tail(&lock->llist, &cfile->llist);
767 mutex_unlock(&cinode->lock_mutex);
771 * Set the byte-range lock (mandatory style). Returns:
772 * 1) 0, if we set the lock and don't need to request to the server;
773 * 2) 1, if no locks prevent us but we need to request to the server;
774 * 3) -EACCESS, if there is a lock that prevents us and wait is false.
776 static int
777 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
778 bool wait)
780 struct cifsLockInfo *conf_lock;
781 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
782 bool exist;
783 int rc = 0;
785 try_again:
786 exist = false;
787 mutex_lock(&cinode->lock_mutex);
789 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
790 lock->type, &conf_lock);
791 if (!exist && cinode->can_cache_brlcks) {
792 list_add_tail(&lock->llist, &cfile->llist);
793 mutex_unlock(&cinode->lock_mutex);
794 return rc;
797 if (!exist)
798 rc = 1;
799 else if (!wait)
800 rc = -EACCES;
801 else {
802 list_add_tail(&lock->blist, &conf_lock->blist);
803 mutex_unlock(&cinode->lock_mutex);
804 rc = wait_event_interruptible(lock->block_q,
805 (lock->blist.prev == &lock->blist) &&
806 (lock->blist.next == &lock->blist));
807 if (!rc)
808 goto try_again;
809 mutex_lock(&cinode->lock_mutex);
810 list_del_init(&lock->blist);
813 mutex_unlock(&cinode->lock_mutex);
814 return rc;
818 * Check if there is another lock that prevents us to set the lock (posix
819 * style). If such a lock exists, update the flock structure with its
820 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
821 * or leave it the same if we can't. Returns 0 if we don't need to request to
822 * the server or 1 otherwise.
824 static int
825 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
827 int rc = 0;
828 struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
829 unsigned char saved_type = flock->fl_type;
831 if ((flock->fl_flags & FL_POSIX) == 0)
832 return 1;
834 mutex_lock(&cinode->lock_mutex);
835 posix_test_lock(file, flock);
837 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
838 flock->fl_type = saved_type;
839 rc = 1;
842 mutex_unlock(&cinode->lock_mutex);
843 return rc;
847 * Set the byte-range lock (posix style). Returns:
848 * 1) 0, if we set the lock and don't need to request to the server;
849 * 2) 1, if we need to request to the server;
850 * 3) <0, if the error occurs while setting the lock.
852 static int
853 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
855 struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
856 int rc = 1;
858 if ((flock->fl_flags & FL_POSIX) == 0)
859 return rc;
861 try_again:
862 mutex_lock(&cinode->lock_mutex);
863 if (!cinode->can_cache_brlcks) {
864 mutex_unlock(&cinode->lock_mutex);
865 return rc;
868 rc = posix_lock_file(file, flock, NULL);
869 mutex_unlock(&cinode->lock_mutex);
870 if (rc == FILE_LOCK_DEFERRED) {
871 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
872 if (!rc)
873 goto try_again;
874 locks_delete_block(flock);
876 return rc;
879 static int
880 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
882 unsigned int xid;
883 int rc = 0, stored_rc;
884 struct cifsLockInfo *li, *tmp;
885 struct cifs_tcon *tcon;
886 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
887 unsigned int num, max_num, max_buf;
888 LOCKING_ANDX_RANGE *buf, *cur;
889 int types[] = {LOCKING_ANDX_LARGE_FILES,
890 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
891 int i;
893 xid = get_xid();
894 tcon = tlink_tcon(cfile->tlink);
896 mutex_lock(&cinode->lock_mutex);
897 if (!cinode->can_cache_brlcks) {
898 mutex_unlock(&cinode->lock_mutex);
899 free_xid(xid);
900 return rc;
904 * Accessing maxBuf is racy with cifs_reconnect - need to store value
905 * and check it for zero before using.
907 max_buf = tcon->ses->server->maxBuf;
908 if (!max_buf) {
909 mutex_unlock(&cinode->lock_mutex);
910 free_xid(xid);
911 return -EINVAL;
914 max_num = (max_buf - sizeof(struct smb_hdr)) /
915 sizeof(LOCKING_ANDX_RANGE);
916 buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
917 if (!buf) {
918 mutex_unlock(&cinode->lock_mutex);
919 free_xid(xid);
920 return rc;
923 for (i = 0; i < 2; i++) {
924 cur = buf;
925 num = 0;
926 list_for_each_entry_safe(li, tmp, &cfile->llist, llist) {
927 if (li->type != types[i])
928 continue;
929 cur->Pid = cpu_to_le16(li->pid);
930 cur->LengthLow = cpu_to_le32((u32)li->length);
931 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
932 cur->OffsetLow = cpu_to_le32((u32)li->offset);
933 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
934 if (++num == max_num) {
935 stored_rc = cifs_lockv(xid, tcon, cfile->netfid,
936 (__u8)li->type, 0, num,
937 buf);
938 if (stored_rc)
939 rc = stored_rc;
940 cur = buf;
941 num = 0;
942 } else
943 cur++;
946 if (num) {
947 stored_rc = cifs_lockv(xid, tcon, cfile->netfid,
948 (__u8)types[i], 0, num, buf);
949 if (stored_rc)
950 rc = stored_rc;
954 cinode->can_cache_brlcks = false;
955 mutex_unlock(&cinode->lock_mutex);
957 kfree(buf);
958 free_xid(xid);
959 return rc;
962 /* copied from fs/locks.c with a name change */
963 #define cifs_for_each_lock(inode, lockp) \
964 for (lockp = &inode->i_flock; *lockp != NULL; \
965 lockp = &(*lockp)->fl_next)
967 struct lock_to_push {
968 struct list_head llist;
969 __u64 offset;
970 __u64 length;
971 __u32 pid;
972 __u16 netfid;
973 __u8 type;
976 static int
977 cifs_push_posix_locks(struct cifsFileInfo *cfile)
979 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
980 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
981 struct file_lock *flock, **before;
982 unsigned int count = 0, i = 0;
983 int rc = 0, xid, type;
984 struct list_head locks_to_send, *el;
985 struct lock_to_push *lck, *tmp;
986 __u64 length;
988 xid = get_xid();
990 mutex_lock(&cinode->lock_mutex);
991 if (!cinode->can_cache_brlcks) {
992 mutex_unlock(&cinode->lock_mutex);
993 free_xid(xid);
994 return rc;
997 lock_flocks();
998 cifs_for_each_lock(cfile->dentry->d_inode, before) {
999 if ((*before)->fl_flags & FL_POSIX)
1000 count++;
1002 unlock_flocks();
1004 INIT_LIST_HEAD(&locks_to_send);
1007 * Allocating count locks is enough because no FL_POSIX locks can be
1008 * added to the list while we are holding cinode->lock_mutex that
1009 * protects locking operations of this inode.
1011 for (; i < count; i++) {
1012 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1013 if (!lck) {
1014 rc = -ENOMEM;
1015 goto err_out;
1017 list_add_tail(&lck->llist, &locks_to_send);
1020 el = locks_to_send.next;
1021 lock_flocks();
1022 cifs_for_each_lock(cfile->dentry->d_inode, before) {
1023 flock = *before;
1024 if ((flock->fl_flags & FL_POSIX) == 0)
1025 continue;
1026 if (el == &locks_to_send) {
1028 * The list ended. We don't have enough allocated
1029 * structures - something is really wrong.
1031 cERROR(1, "Can't push all brlocks!");
1032 break;
1034 length = 1 + flock->fl_end - flock->fl_start;
1035 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1036 type = CIFS_RDLCK;
1037 else
1038 type = CIFS_WRLCK;
1039 lck = list_entry(el, struct lock_to_push, llist);
1040 lck->pid = flock->fl_pid;
1041 lck->netfid = cfile->netfid;
1042 lck->length = length;
1043 lck->type = type;
1044 lck->offset = flock->fl_start;
1045 el = el->next;
1047 unlock_flocks();
1049 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1050 int stored_rc;
1052 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1053 lck->offset, lck->length, NULL,
1054 lck->type, 0);
1055 if (stored_rc)
1056 rc = stored_rc;
1057 list_del(&lck->llist);
1058 kfree(lck);
1061 out:
1062 cinode->can_cache_brlcks = false;
1063 mutex_unlock(&cinode->lock_mutex);
1065 free_xid(xid);
1066 return rc;
1067 err_out:
1068 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1069 list_del(&lck->llist);
1070 kfree(lck);
1072 goto out;
1075 static int
1076 cifs_push_locks(struct cifsFileInfo *cfile)
1078 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1079 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1081 if (cap_unix(tcon->ses) &&
1082 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1083 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1084 return cifs_push_posix_locks(cfile);
1086 return cifs_push_mandatory_locks(cfile);
1089 static void
1090 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1091 bool *wait_flag, struct TCP_Server_Info *server)
1093 if (flock->fl_flags & FL_POSIX)
1094 cFYI(1, "Posix");
1095 if (flock->fl_flags & FL_FLOCK)
1096 cFYI(1, "Flock");
1097 if (flock->fl_flags & FL_SLEEP) {
1098 cFYI(1, "Blocking lock");
1099 *wait_flag = true;
1101 if (flock->fl_flags & FL_ACCESS)
1102 cFYI(1, "Process suspended by mandatory locking - "
1103 "not implemented yet");
1104 if (flock->fl_flags & FL_LEASE)
1105 cFYI(1, "Lease on file - not implemented yet");
1106 if (flock->fl_flags &
1107 (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE)))
1108 cFYI(1, "Unknown lock flags 0x%x", flock->fl_flags);
1110 *type = server->vals->large_lock_type;
1111 if (flock->fl_type == F_WRLCK) {
1112 cFYI(1, "F_WRLCK ");
1113 *type |= server->vals->exclusive_lock_type;
1114 *lock = 1;
1115 } else if (flock->fl_type == F_UNLCK) {
1116 cFYI(1, "F_UNLCK");
1117 *type |= server->vals->unlock_lock_type;
1118 *unlock = 1;
1119 /* Check if unlock includes more than one lock range */
1120 } else if (flock->fl_type == F_RDLCK) {
1121 cFYI(1, "F_RDLCK");
1122 *type |= server->vals->shared_lock_type;
1123 *lock = 1;
1124 } else if (flock->fl_type == F_EXLCK) {
1125 cFYI(1, "F_EXLCK");
1126 *type |= server->vals->exclusive_lock_type;
1127 *lock = 1;
1128 } else if (flock->fl_type == F_SHLCK) {
1129 cFYI(1, "F_SHLCK");
1130 *type |= server->vals->shared_lock_type;
1131 *lock = 1;
1132 } else
1133 cFYI(1, "Unknown type of lock");
1136 static int
1137 cifs_mandatory_lock(unsigned int xid, struct cifsFileInfo *cfile, __u64 offset,
1138 __u64 length, __u32 type, int lock, int unlock, bool wait)
1140 return CIFSSMBLock(xid, tlink_tcon(cfile->tlink), cfile->netfid,
1141 current->tgid, length, offset, unlock, lock,
1142 (__u8)type, wait, 0);
1145 static int
1146 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1147 bool wait_flag, bool posix_lck, unsigned int xid)
1149 int rc = 0;
1150 __u64 length = 1 + flock->fl_end - flock->fl_start;
1151 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1152 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1153 struct TCP_Server_Info *server = tcon->ses->server;
1154 __u16 netfid = cfile->netfid;
1156 if (posix_lck) {
1157 int posix_lock_type;
1159 rc = cifs_posix_lock_test(file, flock);
1160 if (!rc)
1161 return rc;
1163 if (type & server->vals->shared_lock_type)
1164 posix_lock_type = CIFS_RDLCK;
1165 else
1166 posix_lock_type = CIFS_WRLCK;
1167 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1168 flock->fl_start, length, flock,
1169 posix_lock_type, wait_flag);
1170 return rc;
1173 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1174 if (!rc)
1175 return rc;
1177 /* BB we could chain these into one lock request BB */
1178 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length, type,
1179 1, 0, false);
1180 if (rc == 0) {
1181 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length,
1182 type, 0, 1, false);
1183 flock->fl_type = F_UNLCK;
1184 if (rc != 0)
1185 cERROR(1, "Error unlocking previously locked "
1186 "range %d during test of lock", rc);
1187 return 0;
1190 if (type & server->vals->shared_lock_type) {
1191 flock->fl_type = F_WRLCK;
1192 return 0;
1195 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length,
1196 type | server->vals->shared_lock_type, 1, 0,
1197 false);
1198 if (rc == 0) {
1199 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length,
1200 type | server->vals->shared_lock_type,
1201 0, 1, false);
1202 flock->fl_type = F_RDLCK;
1203 if (rc != 0)
1204 cERROR(1, "Error unlocking previously locked "
1205 "range %d during test of lock", rc);
1206 } else
1207 flock->fl_type = F_WRLCK;
1209 return 0;
1212 static void
1213 cifs_move_llist(struct list_head *source, struct list_head *dest)
1215 struct list_head *li, *tmp;
1216 list_for_each_safe(li, tmp, source)
1217 list_move(li, dest);
1220 static void
1221 cifs_free_llist(struct list_head *llist)
1223 struct cifsLockInfo *li, *tmp;
1224 list_for_each_entry_safe(li, tmp, llist, llist) {
1225 cifs_del_lock_waiters(li);
1226 list_del(&li->llist);
1227 kfree(li);
1231 static int
1232 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1233 unsigned int xid)
1235 int rc = 0, stored_rc;
1236 int types[] = {LOCKING_ANDX_LARGE_FILES,
1237 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1238 unsigned int i;
1239 unsigned int max_num, num, max_buf;
1240 LOCKING_ANDX_RANGE *buf, *cur;
1241 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1242 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1243 struct cifsLockInfo *li, *tmp;
1244 __u64 length = 1 + flock->fl_end - flock->fl_start;
1245 struct list_head tmp_llist;
1247 INIT_LIST_HEAD(&tmp_llist);
1250 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1251 * and check it for zero before using.
1253 max_buf = tcon->ses->server->maxBuf;
1254 if (!max_buf)
1255 return -EINVAL;
1257 max_num = (max_buf - sizeof(struct smb_hdr)) /
1258 sizeof(LOCKING_ANDX_RANGE);
1259 buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1260 if (!buf)
1261 return -ENOMEM;
1263 mutex_lock(&cinode->lock_mutex);
1264 for (i = 0; i < 2; i++) {
1265 cur = buf;
1266 num = 0;
1267 list_for_each_entry_safe(li, tmp, &cfile->llist, llist) {
1268 if (flock->fl_start > li->offset ||
1269 (flock->fl_start + length) <
1270 (li->offset + li->length))
1271 continue;
1272 if (current->tgid != li->pid)
1273 continue;
1274 if (types[i] != li->type)
1275 continue;
1276 if (cinode->can_cache_brlcks) {
1278 * We can cache brlock requests - simply remove
1279 * a lock from the file's list.
1281 list_del(&li->llist);
1282 cifs_del_lock_waiters(li);
1283 kfree(li);
1284 continue;
1286 cur->Pid = cpu_to_le16(li->pid);
1287 cur->LengthLow = cpu_to_le32((u32)li->length);
1288 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1289 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1290 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1292 * We need to save a lock here to let us add it again to
1293 * the file's list if the unlock range request fails on
1294 * the server.
1296 list_move(&li->llist, &tmp_llist);
1297 if (++num == max_num) {
1298 stored_rc = cifs_lockv(xid, tcon, cfile->netfid,
1299 li->type, num, 0, buf);
1300 if (stored_rc) {
1302 * We failed on the unlock range
1303 * request - add all locks from the tmp
1304 * list to the head of the file's list.
1306 cifs_move_llist(&tmp_llist,
1307 &cfile->llist);
1308 rc = stored_rc;
1309 } else
1311 * The unlock range request succeed -
1312 * free the tmp list.
1314 cifs_free_llist(&tmp_llist);
1315 cur = buf;
1316 num = 0;
1317 } else
1318 cur++;
1320 if (num) {
1321 stored_rc = cifs_lockv(xid, tcon, cfile->netfid,
1322 types[i], num, 0, buf);
1323 if (stored_rc) {
1324 cifs_move_llist(&tmp_llist, &cfile->llist);
1325 rc = stored_rc;
1326 } else
1327 cifs_free_llist(&tmp_llist);
1331 mutex_unlock(&cinode->lock_mutex);
1332 kfree(buf);
1333 return rc;
1336 static int
1337 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1338 bool wait_flag, bool posix_lck, int lock, int unlock,
1339 unsigned int xid)
1341 int rc = 0;
1342 __u64 length = 1 + flock->fl_end - flock->fl_start;
1343 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1344 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1345 struct TCP_Server_Info *server = tcon->ses->server;
1346 __u16 netfid = cfile->netfid;
1348 if (posix_lck) {
1349 int posix_lock_type;
1351 rc = cifs_posix_lock_set(file, flock);
1352 if (!rc || rc < 0)
1353 return rc;
1355 if (type & server->vals->shared_lock_type)
1356 posix_lock_type = CIFS_RDLCK;
1357 else
1358 posix_lock_type = CIFS_WRLCK;
1360 if (unlock == 1)
1361 posix_lock_type = CIFS_UNLCK;
1363 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1364 flock->fl_start, length, NULL,
1365 posix_lock_type, wait_flag);
1366 goto out;
1369 if (lock) {
1370 struct cifsLockInfo *lock;
1372 lock = cifs_lock_init(flock->fl_start, length, type);
1373 if (!lock)
1374 return -ENOMEM;
1376 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1377 if (rc < 0)
1378 kfree(lock);
1379 if (rc <= 0)
1380 goto out;
1382 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length,
1383 type, 1, 0, wait_flag);
1384 if (rc) {
1385 kfree(lock);
1386 goto out;
1389 cifs_lock_add(cfile, lock);
1390 } else if (unlock)
1391 rc = cifs_unlock_range(cfile, flock, xid);
1393 out:
1394 if (flock->fl_flags & FL_POSIX)
1395 posix_lock_file_wait(file, flock);
1396 return rc;
1399 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1401 int rc, xid;
1402 int lock = 0, unlock = 0;
1403 bool wait_flag = false;
1404 bool posix_lck = false;
1405 struct cifs_sb_info *cifs_sb;
1406 struct cifs_tcon *tcon;
1407 struct cifsInodeInfo *cinode;
1408 struct cifsFileInfo *cfile;
1409 __u16 netfid;
1410 __u32 type;
1412 rc = -EACCES;
1413 xid = get_xid();
1415 cFYI(1, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld "
1416 "end: %lld", cmd, flock->fl_flags, flock->fl_type,
1417 flock->fl_start, flock->fl_end);
1419 cfile = (struct cifsFileInfo *)file->private_data;
1420 tcon = tlink_tcon(cfile->tlink);
1422 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1423 tcon->ses->server);
1425 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1426 netfid = cfile->netfid;
1427 cinode = CIFS_I(file->f_path.dentry->d_inode);
1429 if (cap_unix(tcon->ses) &&
1430 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1431 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1432 posix_lck = true;
1434 * BB add code here to normalize offset and length to account for
1435 * negative length which we can not accept over the wire.
1437 if (IS_GETLK(cmd)) {
1438 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1439 free_xid(xid);
1440 return rc;
1443 if (!lock && !unlock) {
1445 * if no lock or unlock then nothing to do since we do not
1446 * know what it is
1448 free_xid(xid);
1449 return -EOPNOTSUPP;
1452 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1453 xid);
1454 free_xid(xid);
1455 return rc;
1459 * update the file size (if needed) after a write. Should be called with
1460 * the inode->i_lock held
1462 void
1463 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1464 unsigned int bytes_written)
1466 loff_t end_of_write = offset + bytes_written;
1468 if (end_of_write > cifsi->server_eof)
1469 cifsi->server_eof = end_of_write;
1472 static ssize_t cifs_write(struct cifsFileInfo *open_file, __u32 pid,
1473 const char *write_data, size_t write_size,
1474 loff_t *poffset)
1476 int rc = 0;
1477 unsigned int bytes_written = 0;
1478 unsigned int total_written;
1479 struct cifs_sb_info *cifs_sb;
1480 struct cifs_tcon *pTcon;
1481 unsigned int xid;
1482 struct dentry *dentry = open_file->dentry;
1483 struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
1484 struct cifs_io_parms io_parms;
1486 cifs_sb = CIFS_SB(dentry->d_sb);
1488 cFYI(1, "write %zd bytes to offset %lld of %s", write_size,
1489 *poffset, dentry->d_name.name);
1491 pTcon = tlink_tcon(open_file->tlink);
1493 xid = get_xid();
1495 for (total_written = 0; write_size > total_written;
1496 total_written += bytes_written) {
1497 rc = -EAGAIN;
1498 while (rc == -EAGAIN) {
1499 struct kvec iov[2];
1500 unsigned int len;
1502 if (open_file->invalidHandle) {
1503 /* we could deadlock if we called
1504 filemap_fdatawait from here so tell
1505 reopen_file not to flush data to
1506 server now */
1507 rc = cifs_reopen_file(open_file, false);
1508 if (rc != 0)
1509 break;
1512 len = min((size_t)cifs_sb->wsize,
1513 write_size - total_written);
1514 /* iov[0] is reserved for smb header */
1515 iov[1].iov_base = (char *)write_data + total_written;
1516 iov[1].iov_len = len;
1517 io_parms.netfid = open_file->netfid;
1518 io_parms.pid = pid;
1519 io_parms.tcon = pTcon;
1520 io_parms.offset = *poffset;
1521 io_parms.length = len;
1522 rc = CIFSSMBWrite2(xid, &io_parms, &bytes_written, iov,
1523 1, 0);
1525 if (rc || (bytes_written == 0)) {
1526 if (total_written)
1527 break;
1528 else {
1529 free_xid(xid);
1530 return rc;
1532 } else {
1533 spin_lock(&dentry->d_inode->i_lock);
1534 cifs_update_eof(cifsi, *poffset, bytes_written);
1535 spin_unlock(&dentry->d_inode->i_lock);
1536 *poffset += bytes_written;
1540 cifs_stats_bytes_written(pTcon, total_written);
1542 if (total_written > 0) {
1543 spin_lock(&dentry->d_inode->i_lock);
1544 if (*poffset > dentry->d_inode->i_size)
1545 i_size_write(dentry->d_inode, *poffset);
1546 spin_unlock(&dentry->d_inode->i_lock);
1548 mark_inode_dirty_sync(dentry->d_inode);
1549 free_xid(xid);
1550 return total_written;
1553 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1554 bool fsuid_only)
1556 struct cifsFileInfo *open_file = NULL;
1557 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1559 /* only filter by fsuid on multiuser mounts */
1560 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1561 fsuid_only = false;
1563 spin_lock(&cifs_file_list_lock);
1564 /* we could simply get the first_list_entry since write-only entries
1565 are always at the end of the list but since the first entry might
1566 have a close pending, we go through the whole list */
1567 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1568 if (fsuid_only && open_file->uid != current_fsuid())
1569 continue;
1570 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1571 if (!open_file->invalidHandle) {
1572 /* found a good file */
1573 /* lock it so it will not be closed on us */
1574 cifsFileInfo_get_locked(open_file);
1575 spin_unlock(&cifs_file_list_lock);
1576 return open_file;
1577 } /* else might as well continue, and look for
1578 another, or simply have the caller reopen it
1579 again rather than trying to fix this handle */
1580 } else /* write only file */
1581 break; /* write only files are last so must be done */
1583 spin_unlock(&cifs_file_list_lock);
1584 return NULL;
1587 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1588 bool fsuid_only)
1590 struct cifsFileInfo *open_file, *inv_file = NULL;
1591 struct cifs_sb_info *cifs_sb;
1592 bool any_available = false;
1593 int rc;
1594 unsigned int refind = 0;
1596 /* Having a null inode here (because mapping->host was set to zero by
1597 the VFS or MM) should not happen but we had reports of on oops (due to
1598 it being zero) during stress testcases so we need to check for it */
1600 if (cifs_inode == NULL) {
1601 cERROR(1, "Null inode passed to cifs_writeable_file");
1602 dump_stack();
1603 return NULL;
1606 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1608 /* only filter by fsuid on multiuser mounts */
1609 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1610 fsuid_only = false;
1612 spin_lock(&cifs_file_list_lock);
1613 refind_writable:
1614 if (refind > MAX_REOPEN_ATT) {
1615 spin_unlock(&cifs_file_list_lock);
1616 return NULL;
1618 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1619 if (!any_available && open_file->pid != current->tgid)
1620 continue;
1621 if (fsuid_only && open_file->uid != current_fsuid())
1622 continue;
1623 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1624 if (!open_file->invalidHandle) {
1625 /* found a good writable file */
1626 cifsFileInfo_get_locked(open_file);
1627 spin_unlock(&cifs_file_list_lock);
1628 return open_file;
1629 } else {
1630 if (!inv_file)
1631 inv_file = open_file;
1635 /* couldn't find useable FH with same pid, try any available */
1636 if (!any_available) {
1637 any_available = true;
1638 goto refind_writable;
1641 if (inv_file) {
1642 any_available = false;
1643 cifsFileInfo_get_locked(inv_file);
1646 spin_unlock(&cifs_file_list_lock);
1648 if (inv_file) {
1649 rc = cifs_reopen_file(inv_file, false);
1650 if (!rc)
1651 return inv_file;
1652 else {
1653 spin_lock(&cifs_file_list_lock);
1654 list_move_tail(&inv_file->flist,
1655 &cifs_inode->openFileList);
1656 spin_unlock(&cifs_file_list_lock);
1657 cifsFileInfo_put(inv_file);
1658 spin_lock(&cifs_file_list_lock);
1659 ++refind;
1660 goto refind_writable;
1664 return NULL;
1667 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1669 struct address_space *mapping = page->mapping;
1670 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1671 char *write_data;
1672 int rc = -EFAULT;
1673 int bytes_written = 0;
1674 struct inode *inode;
1675 struct cifsFileInfo *open_file;
1677 if (!mapping || !mapping->host)
1678 return -EFAULT;
1680 inode = page->mapping->host;
1682 offset += (loff_t)from;
1683 write_data = kmap(page);
1684 write_data += from;
1686 if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1687 kunmap(page);
1688 return -EIO;
1691 /* racing with truncate? */
1692 if (offset > mapping->host->i_size) {
1693 kunmap(page);
1694 return 0; /* don't care */
1697 /* check to make sure that we are not extending the file */
1698 if (mapping->host->i_size - offset < (loff_t)to)
1699 to = (unsigned)(mapping->host->i_size - offset);
1701 open_file = find_writable_file(CIFS_I(mapping->host), false);
1702 if (open_file) {
1703 bytes_written = cifs_write(open_file, open_file->pid,
1704 write_data, to - from, &offset);
1705 cifsFileInfo_put(open_file);
1706 /* Does mm or vfs already set times? */
1707 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1708 if ((bytes_written > 0) && (offset))
1709 rc = 0;
1710 else if (bytes_written < 0)
1711 rc = bytes_written;
1712 } else {
1713 cFYI(1, "No writeable filehandles for inode");
1714 rc = -EIO;
1717 kunmap(page);
1718 return rc;
1722 * Marshal up the iov array, reserving the first one for the header. Also,
1723 * set wdata->bytes.
1725 static void
1726 cifs_writepages_marshal_iov(struct kvec *iov, struct cifs_writedata *wdata)
1728 int i;
1729 struct inode *inode = wdata->cfile->dentry->d_inode;
1730 loff_t size = i_size_read(inode);
1732 /* marshal up the pages into iov array */
1733 wdata->bytes = 0;
1734 for (i = 0; i < wdata->nr_pages; i++) {
1735 iov[i + 1].iov_len = min(size - page_offset(wdata->pages[i]),
1736 (loff_t)PAGE_CACHE_SIZE);
1737 iov[i + 1].iov_base = kmap(wdata->pages[i]);
1738 wdata->bytes += iov[i + 1].iov_len;
1742 static int cifs_writepages(struct address_space *mapping,
1743 struct writeback_control *wbc)
1745 struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
1746 bool done = false, scanned = false, range_whole = false;
1747 pgoff_t end, index;
1748 struct cifs_writedata *wdata;
1749 struct page *page;
1750 int rc = 0;
1753 * If wsize is smaller than the page cache size, default to writing
1754 * one page at a time via cifs_writepage
1756 if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1757 return generic_writepages(mapping, wbc);
1759 if (wbc->range_cyclic) {
1760 index = mapping->writeback_index; /* Start from prev offset */
1761 end = -1;
1762 } else {
1763 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1764 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1765 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1766 range_whole = true;
1767 scanned = true;
1769 retry:
1770 while (!done && index <= end) {
1771 unsigned int i, nr_pages, found_pages;
1772 pgoff_t next = 0, tofind;
1773 struct page **pages;
1775 tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1,
1776 end - index) + 1;
1778 wdata = cifs_writedata_alloc((unsigned int)tofind,
1779 cifs_writev_complete);
1780 if (!wdata) {
1781 rc = -ENOMEM;
1782 break;
1786 * find_get_pages_tag seems to return a max of 256 on each
1787 * iteration, so we must call it several times in order to
1788 * fill the array or the wsize is effectively limited to
1789 * 256 * PAGE_CACHE_SIZE.
1791 found_pages = 0;
1792 pages = wdata->pages;
1793 do {
1794 nr_pages = find_get_pages_tag(mapping, &index,
1795 PAGECACHE_TAG_DIRTY,
1796 tofind, pages);
1797 found_pages += nr_pages;
1798 tofind -= nr_pages;
1799 pages += nr_pages;
1800 } while (nr_pages && tofind && index <= end);
1802 if (found_pages == 0) {
1803 kref_put(&wdata->refcount, cifs_writedata_release);
1804 break;
1807 nr_pages = 0;
1808 for (i = 0; i < found_pages; i++) {
1809 page = wdata->pages[i];
1811 * At this point we hold neither mapping->tree_lock nor
1812 * lock on the page itself: the page may be truncated or
1813 * invalidated (changing page->mapping to NULL), or even
1814 * swizzled back from swapper_space to tmpfs file
1815 * mapping
1818 if (nr_pages == 0)
1819 lock_page(page);
1820 else if (!trylock_page(page))
1821 break;
1823 if (unlikely(page->mapping != mapping)) {
1824 unlock_page(page);
1825 break;
1828 if (!wbc->range_cyclic && page->index > end) {
1829 done = true;
1830 unlock_page(page);
1831 break;
1834 if (next && (page->index != next)) {
1835 /* Not next consecutive page */
1836 unlock_page(page);
1837 break;
1840 if (wbc->sync_mode != WB_SYNC_NONE)
1841 wait_on_page_writeback(page);
1843 if (PageWriteback(page) ||
1844 !clear_page_dirty_for_io(page)) {
1845 unlock_page(page);
1846 break;
1850 * This actually clears the dirty bit in the radix tree.
1851 * See cifs_writepage() for more commentary.
1853 set_page_writeback(page);
1855 if (page_offset(page) >= mapping->host->i_size) {
1856 done = true;
1857 unlock_page(page);
1858 end_page_writeback(page);
1859 break;
1862 wdata->pages[i] = page;
1863 next = page->index + 1;
1864 ++nr_pages;
1867 /* reset index to refind any pages skipped */
1868 if (nr_pages == 0)
1869 index = wdata->pages[0]->index + 1;
1871 /* put any pages we aren't going to use */
1872 for (i = nr_pages; i < found_pages; i++) {
1873 page_cache_release(wdata->pages[i]);
1874 wdata->pages[i] = NULL;
1877 /* nothing to write? */
1878 if (nr_pages == 0) {
1879 kref_put(&wdata->refcount, cifs_writedata_release);
1880 continue;
1883 wdata->sync_mode = wbc->sync_mode;
1884 wdata->nr_pages = nr_pages;
1885 wdata->offset = page_offset(wdata->pages[0]);
1886 wdata->marshal_iov = cifs_writepages_marshal_iov;
1888 do {
1889 if (wdata->cfile != NULL)
1890 cifsFileInfo_put(wdata->cfile);
1891 wdata->cfile = find_writable_file(CIFS_I(mapping->host),
1892 false);
1893 if (!wdata->cfile) {
1894 cERROR(1, "No writable handles for inode");
1895 rc = -EBADF;
1896 break;
1898 wdata->pid = wdata->cfile->pid;
1899 rc = cifs_async_writev(wdata);
1900 } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
1902 for (i = 0; i < nr_pages; ++i)
1903 unlock_page(wdata->pages[i]);
1905 /* send failure -- clean up the mess */
1906 if (rc != 0) {
1907 for (i = 0; i < nr_pages; ++i) {
1908 if (rc == -EAGAIN)
1909 redirty_page_for_writepage(wbc,
1910 wdata->pages[i]);
1911 else
1912 SetPageError(wdata->pages[i]);
1913 end_page_writeback(wdata->pages[i]);
1914 page_cache_release(wdata->pages[i]);
1916 if (rc != -EAGAIN)
1917 mapping_set_error(mapping, rc);
1919 kref_put(&wdata->refcount, cifs_writedata_release);
1921 wbc->nr_to_write -= nr_pages;
1922 if (wbc->nr_to_write <= 0)
1923 done = true;
1925 index = next;
1928 if (!scanned && !done) {
1930 * We hit the last page and there is more work to be done: wrap
1931 * back to the start of the file
1933 scanned = true;
1934 index = 0;
1935 goto retry;
1938 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
1939 mapping->writeback_index = index;
1941 return rc;
1944 static int
1945 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
1947 int rc;
1948 unsigned int xid;
1950 xid = get_xid();
1951 /* BB add check for wbc flags */
1952 page_cache_get(page);
1953 if (!PageUptodate(page))
1954 cFYI(1, "ppw - page not up to date");
1957 * Set the "writeback" flag, and clear "dirty" in the radix tree.
1959 * A writepage() implementation always needs to do either this,
1960 * or re-dirty the page with "redirty_page_for_writepage()" in
1961 * the case of a failure.
1963 * Just unlocking the page will cause the radix tree tag-bits
1964 * to fail to update with the state of the page correctly.
1966 set_page_writeback(page);
1967 retry_write:
1968 rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
1969 if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
1970 goto retry_write;
1971 else if (rc == -EAGAIN)
1972 redirty_page_for_writepage(wbc, page);
1973 else if (rc != 0)
1974 SetPageError(page);
1975 else
1976 SetPageUptodate(page);
1977 end_page_writeback(page);
1978 page_cache_release(page);
1979 free_xid(xid);
1980 return rc;
1983 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
1985 int rc = cifs_writepage_locked(page, wbc);
1986 unlock_page(page);
1987 return rc;
1990 static int cifs_write_end(struct file *file, struct address_space *mapping,
1991 loff_t pos, unsigned len, unsigned copied,
1992 struct page *page, void *fsdata)
1994 int rc;
1995 struct inode *inode = mapping->host;
1996 struct cifsFileInfo *cfile = file->private_data;
1997 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1998 __u32 pid;
2000 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2001 pid = cfile->pid;
2002 else
2003 pid = current->tgid;
2005 cFYI(1, "write_end for page %p from pos %lld with %d bytes",
2006 page, pos, copied);
2008 if (PageChecked(page)) {
2009 if (copied == len)
2010 SetPageUptodate(page);
2011 ClearPageChecked(page);
2012 } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
2013 SetPageUptodate(page);
2015 if (!PageUptodate(page)) {
2016 char *page_data;
2017 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
2018 unsigned int xid;
2020 xid = get_xid();
2021 /* this is probably better than directly calling
2022 partialpage_write since in this function the file handle is
2023 known which we might as well leverage */
2024 /* BB check if anything else missing out of ppw
2025 such as updating last write time */
2026 page_data = kmap(page);
2027 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2028 /* if (rc < 0) should we set writebehind rc? */
2029 kunmap(page);
2031 free_xid(xid);
2032 } else {
2033 rc = copied;
2034 pos += copied;
2035 set_page_dirty(page);
2038 if (rc > 0) {
2039 spin_lock(&inode->i_lock);
2040 if (pos > inode->i_size)
2041 i_size_write(inode, pos);
2042 spin_unlock(&inode->i_lock);
2045 unlock_page(page);
2046 page_cache_release(page);
2048 return rc;
2051 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2052 int datasync)
2054 unsigned int xid;
2055 int rc = 0;
2056 struct cifs_tcon *tcon;
2057 struct cifsFileInfo *smbfile = file->private_data;
2058 struct inode *inode = file->f_path.dentry->d_inode;
2059 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2061 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2062 if (rc)
2063 return rc;
2064 mutex_lock(&inode->i_mutex);
2066 xid = get_xid();
2068 cFYI(1, "Sync file - name: %s datasync: 0x%x",
2069 file->f_path.dentry->d_name.name, datasync);
2071 if (!CIFS_I(inode)->clientCanCacheRead) {
2072 rc = cifs_invalidate_mapping(inode);
2073 if (rc) {
2074 cFYI(1, "rc: %d during invalidate phase", rc);
2075 rc = 0; /* don't care about it in fsync */
2079 tcon = tlink_tcon(smbfile->tlink);
2080 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
2081 rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
2083 free_xid(xid);
2084 mutex_unlock(&inode->i_mutex);
2085 return rc;
2088 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2090 unsigned int xid;
2091 int rc = 0;
2092 struct cifs_tcon *tcon;
2093 struct cifsFileInfo *smbfile = file->private_data;
2094 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2095 struct inode *inode = file->f_mapping->host;
2097 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2098 if (rc)
2099 return rc;
2100 mutex_lock(&inode->i_mutex);
2102 xid = get_xid();
2104 cFYI(1, "Sync file - name: %s datasync: 0x%x",
2105 file->f_path.dentry->d_name.name, datasync);
2107 tcon = tlink_tcon(smbfile->tlink);
2108 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
2109 rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
2111 free_xid(xid);
2112 mutex_unlock(&inode->i_mutex);
2113 return rc;
2117 * As file closes, flush all cached write data for this inode checking
2118 * for write behind errors.
2120 int cifs_flush(struct file *file, fl_owner_t id)
2122 struct inode *inode = file->f_path.dentry->d_inode;
2123 int rc = 0;
2125 if (file->f_mode & FMODE_WRITE)
2126 rc = filemap_write_and_wait(inode->i_mapping);
2128 cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc);
2130 return rc;
2133 static int
2134 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2136 int rc = 0;
2137 unsigned long i;
2139 for (i = 0; i < num_pages; i++) {
2140 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2141 if (!pages[i]) {
2143 * save number of pages we have already allocated and
2144 * return with ENOMEM error
2146 num_pages = i;
2147 rc = -ENOMEM;
2148 break;
2152 if (rc) {
2153 for (i = 0; i < num_pages; i++)
2154 put_page(pages[i]);
2156 return rc;
2159 static inline
2160 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2162 size_t num_pages;
2163 size_t clen;
2165 clen = min_t(const size_t, len, wsize);
2166 num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2168 if (cur_len)
2169 *cur_len = clen;
2171 return num_pages;
2174 static void
2175 cifs_uncached_marshal_iov(struct kvec *iov, struct cifs_writedata *wdata)
2177 int i;
2178 size_t bytes = wdata->bytes;
2180 /* marshal up the pages into iov array */
2181 for (i = 0; i < wdata->nr_pages; i++) {
2182 iov[i + 1].iov_len = min_t(size_t, bytes, PAGE_SIZE);
2183 iov[i + 1].iov_base = kmap(wdata->pages[i]);
2184 bytes -= iov[i + 1].iov_len;
2188 static void
2189 cifs_uncached_writev_complete(struct work_struct *work)
2191 int i;
2192 struct cifs_writedata *wdata = container_of(work,
2193 struct cifs_writedata, work);
2194 struct inode *inode = wdata->cfile->dentry->d_inode;
2195 struct cifsInodeInfo *cifsi = CIFS_I(inode);
2197 spin_lock(&inode->i_lock);
2198 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2199 if (cifsi->server_eof > inode->i_size)
2200 i_size_write(inode, cifsi->server_eof);
2201 spin_unlock(&inode->i_lock);
2203 complete(&wdata->done);
2205 if (wdata->result != -EAGAIN) {
2206 for (i = 0; i < wdata->nr_pages; i++)
2207 put_page(wdata->pages[i]);
2210 kref_put(&wdata->refcount, cifs_writedata_release);
2213 /* attempt to send write to server, retry on any -EAGAIN errors */
2214 static int
2215 cifs_uncached_retry_writev(struct cifs_writedata *wdata)
2217 int rc;
2219 do {
2220 if (wdata->cfile->invalidHandle) {
2221 rc = cifs_reopen_file(wdata->cfile, false);
2222 if (rc != 0)
2223 continue;
2225 rc = cifs_async_writev(wdata);
2226 } while (rc == -EAGAIN);
2228 return rc;
2231 static ssize_t
2232 cifs_iovec_write(struct file *file, const struct iovec *iov,
2233 unsigned long nr_segs, loff_t *poffset)
2235 unsigned long nr_pages, i;
2236 size_t copied, len, cur_len;
2237 ssize_t total_written = 0;
2238 loff_t offset;
2239 struct iov_iter it;
2240 struct cifsFileInfo *open_file;
2241 struct cifs_tcon *tcon;
2242 struct cifs_sb_info *cifs_sb;
2243 struct cifs_writedata *wdata, *tmp;
2244 struct list_head wdata_list;
2245 int rc;
2246 pid_t pid;
2248 len = iov_length(iov, nr_segs);
2249 if (!len)
2250 return 0;
2252 rc = generic_write_checks(file, poffset, &len, 0);
2253 if (rc)
2254 return rc;
2256 INIT_LIST_HEAD(&wdata_list);
2257 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2258 open_file = file->private_data;
2259 tcon = tlink_tcon(open_file->tlink);
2260 offset = *poffset;
2262 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2263 pid = open_file->pid;
2264 else
2265 pid = current->tgid;
2267 iov_iter_init(&it, iov, nr_segs, len, 0);
2268 do {
2269 size_t save_len;
2271 nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
2272 wdata = cifs_writedata_alloc(nr_pages,
2273 cifs_uncached_writev_complete);
2274 if (!wdata) {
2275 rc = -ENOMEM;
2276 break;
2279 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2280 if (rc) {
2281 kfree(wdata);
2282 break;
2285 save_len = cur_len;
2286 for (i = 0; i < nr_pages; i++) {
2287 copied = min_t(const size_t, cur_len, PAGE_SIZE);
2288 copied = iov_iter_copy_from_user(wdata->pages[i], &it,
2289 0, copied);
2290 cur_len -= copied;
2291 iov_iter_advance(&it, copied);
2293 cur_len = save_len - cur_len;
2295 wdata->sync_mode = WB_SYNC_ALL;
2296 wdata->nr_pages = nr_pages;
2297 wdata->offset = (__u64)offset;
2298 wdata->cfile = cifsFileInfo_get(open_file);
2299 wdata->pid = pid;
2300 wdata->bytes = cur_len;
2301 wdata->marshal_iov = cifs_uncached_marshal_iov;
2302 rc = cifs_uncached_retry_writev(wdata);
2303 if (rc) {
2304 kref_put(&wdata->refcount, cifs_writedata_release);
2305 break;
2308 list_add_tail(&wdata->list, &wdata_list);
2309 offset += cur_len;
2310 len -= cur_len;
2311 } while (len > 0);
2314 * If at least one write was successfully sent, then discard any rc
2315 * value from the later writes. If the other write succeeds, then
2316 * we'll end up returning whatever was written. If it fails, then
2317 * we'll get a new rc value from that.
2319 if (!list_empty(&wdata_list))
2320 rc = 0;
2323 * Wait for and collect replies for any successful sends in order of
2324 * increasing offset. Once an error is hit or we get a fatal signal
2325 * while waiting, then return without waiting for any more replies.
2327 restart_loop:
2328 list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2329 if (!rc) {
2330 /* FIXME: freezable too? */
2331 rc = wait_for_completion_killable(&wdata->done);
2332 if (rc)
2333 rc = -EINTR;
2334 else if (wdata->result)
2335 rc = wdata->result;
2336 else
2337 total_written += wdata->bytes;
2339 /* resend call if it's a retryable error */
2340 if (rc == -EAGAIN) {
2341 rc = cifs_uncached_retry_writev(wdata);
2342 goto restart_loop;
2345 list_del_init(&wdata->list);
2346 kref_put(&wdata->refcount, cifs_writedata_release);
2349 if (total_written > 0)
2350 *poffset += total_written;
2352 cifs_stats_bytes_written(tcon, total_written);
2353 return total_written ? total_written : (ssize_t)rc;
2356 ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
2357 unsigned long nr_segs, loff_t pos)
2359 ssize_t written;
2360 struct inode *inode;
2362 inode = iocb->ki_filp->f_path.dentry->d_inode;
2365 * BB - optimize the way when signing is disabled. We can drop this
2366 * extra memory-to-memory copying and use iovec buffers for constructing
2367 * write request.
2370 written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos);
2371 if (written > 0) {
2372 CIFS_I(inode)->invalid_mapping = true;
2373 iocb->ki_pos = pos;
2376 return written;
2379 ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
2380 unsigned long nr_segs, loff_t pos)
2382 struct inode *inode;
2384 inode = iocb->ki_filp->f_path.dentry->d_inode;
2386 if (CIFS_I(inode)->clientCanCacheAll)
2387 return generic_file_aio_write(iocb, iov, nr_segs, pos);
2390 * In strict cache mode we need to write the data to the server exactly
2391 * from the pos to pos+len-1 rather than flush all affected pages
2392 * because it may cause a error with mandatory locks on these pages but
2393 * not on the region from pos to ppos+len-1.
2396 return cifs_user_writev(iocb, iov, nr_segs, pos);
2399 static struct cifs_readdata *
2400 cifs_readdata_alloc(unsigned int nr_vecs, work_func_t complete)
2402 struct cifs_readdata *rdata;
2404 rdata = kzalloc(sizeof(*rdata) +
2405 sizeof(struct kvec) * nr_vecs, GFP_KERNEL);
2406 if (rdata != NULL) {
2407 kref_init(&rdata->refcount);
2408 INIT_LIST_HEAD(&rdata->list);
2409 init_completion(&rdata->done);
2410 INIT_WORK(&rdata->work, complete);
2411 INIT_LIST_HEAD(&rdata->pages);
2413 return rdata;
2416 void
2417 cifs_readdata_release(struct kref *refcount)
2419 struct cifs_readdata *rdata = container_of(refcount,
2420 struct cifs_readdata, refcount);
2422 if (rdata->cfile)
2423 cifsFileInfo_put(rdata->cfile);
2425 kfree(rdata);
2428 static int
2429 cifs_read_allocate_pages(struct list_head *list, unsigned int npages)
2431 int rc = 0;
2432 struct page *page, *tpage;
2433 unsigned int i;
2435 for (i = 0; i < npages; i++) {
2436 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2437 if (!page) {
2438 rc = -ENOMEM;
2439 break;
2441 list_add(&page->lru, list);
2444 if (rc) {
2445 list_for_each_entry_safe(page, tpage, list, lru) {
2446 list_del(&page->lru);
2447 put_page(page);
2450 return rc;
2453 static void
2454 cifs_uncached_readdata_release(struct kref *refcount)
2456 struct page *page, *tpage;
2457 struct cifs_readdata *rdata = container_of(refcount,
2458 struct cifs_readdata, refcount);
2460 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2461 list_del(&page->lru);
2462 put_page(page);
2464 cifs_readdata_release(refcount);
2467 static int
2468 cifs_retry_async_readv(struct cifs_readdata *rdata)
2470 int rc;
2472 do {
2473 if (rdata->cfile->invalidHandle) {
2474 rc = cifs_reopen_file(rdata->cfile, true);
2475 if (rc != 0)
2476 continue;
2478 rc = cifs_async_readv(rdata);
2479 } while (rc == -EAGAIN);
2481 return rc;
2485 * cifs_readdata_to_iov - copy data from pages in response to an iovec
2486 * @rdata: the readdata response with list of pages holding data
2487 * @iov: vector in which we should copy the data
2488 * @nr_segs: number of segments in vector
2489 * @offset: offset into file of the first iovec
2490 * @copied: used to return the amount of data copied to the iov
2492 * This function copies data from a list of pages in a readdata response into
2493 * an array of iovecs. It will first calculate where the data should go
2494 * based on the info in the readdata and then copy the data into that spot.
2496 static ssize_t
2497 cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov,
2498 unsigned long nr_segs, loff_t offset, ssize_t *copied)
2500 int rc = 0;
2501 struct iov_iter ii;
2502 size_t pos = rdata->offset - offset;
2503 struct page *page, *tpage;
2504 ssize_t remaining = rdata->bytes;
2505 unsigned char *pdata;
2507 /* set up iov_iter and advance to the correct offset */
2508 iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0);
2509 iov_iter_advance(&ii, pos);
2511 *copied = 0;
2512 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2513 ssize_t copy;
2515 /* copy a whole page or whatever's left */
2516 copy = min_t(ssize_t, remaining, PAGE_SIZE);
2518 /* ...but limit it to whatever space is left in the iov */
2519 copy = min_t(ssize_t, copy, iov_iter_count(&ii));
2521 /* go while there's data to be copied and no errors */
2522 if (copy && !rc) {
2523 pdata = kmap(page);
2524 rc = memcpy_toiovecend(ii.iov, pdata, ii.iov_offset,
2525 (int)copy);
2526 kunmap(page);
2527 if (!rc) {
2528 *copied += copy;
2529 remaining -= copy;
2530 iov_iter_advance(&ii, copy);
2534 list_del(&page->lru);
2535 put_page(page);
2538 return rc;
2541 static void
2542 cifs_uncached_readv_complete(struct work_struct *work)
2544 struct cifs_readdata *rdata = container_of(work,
2545 struct cifs_readdata, work);
2547 /* if the result is non-zero then the pages weren't kmapped */
2548 if (rdata->result == 0) {
2549 struct page *page;
2551 list_for_each_entry(page, &rdata->pages, lru)
2552 kunmap(page);
2555 complete(&rdata->done);
2556 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2559 static int
2560 cifs_uncached_read_marshal_iov(struct cifs_readdata *rdata,
2561 unsigned int remaining)
2563 int len = 0;
2564 struct page *page, *tpage;
2566 rdata->nr_iov = 1;
2567 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2568 if (remaining >= PAGE_SIZE) {
2569 /* enough data to fill the page */
2570 rdata->iov[rdata->nr_iov].iov_base = kmap(page);
2571 rdata->iov[rdata->nr_iov].iov_len = PAGE_SIZE;
2572 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2573 rdata->nr_iov, page->index,
2574 rdata->iov[rdata->nr_iov].iov_base,
2575 rdata->iov[rdata->nr_iov].iov_len);
2576 ++rdata->nr_iov;
2577 len += PAGE_SIZE;
2578 remaining -= PAGE_SIZE;
2579 } else if (remaining > 0) {
2580 /* enough for partial page, fill and zero the rest */
2581 rdata->iov[rdata->nr_iov].iov_base = kmap(page);
2582 rdata->iov[rdata->nr_iov].iov_len = remaining;
2583 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2584 rdata->nr_iov, page->index,
2585 rdata->iov[rdata->nr_iov].iov_base,
2586 rdata->iov[rdata->nr_iov].iov_len);
2587 memset(rdata->iov[rdata->nr_iov].iov_base + remaining,
2588 '\0', PAGE_SIZE - remaining);
2589 ++rdata->nr_iov;
2590 len += remaining;
2591 remaining = 0;
2592 } else {
2593 /* no need to hold page hostage */
2594 list_del(&page->lru);
2595 put_page(page);
2599 return len;
2602 static ssize_t
2603 cifs_iovec_read(struct file *file, const struct iovec *iov,
2604 unsigned long nr_segs, loff_t *poffset)
2606 ssize_t rc;
2607 size_t len, cur_len;
2608 ssize_t total_read = 0;
2609 loff_t offset = *poffset;
2610 unsigned int npages;
2611 struct cifs_sb_info *cifs_sb;
2612 struct cifs_tcon *tcon;
2613 struct cifsFileInfo *open_file;
2614 struct cifs_readdata *rdata, *tmp;
2615 struct list_head rdata_list;
2616 pid_t pid;
2618 if (!nr_segs)
2619 return 0;
2621 len = iov_length(iov, nr_segs);
2622 if (!len)
2623 return 0;
2625 INIT_LIST_HEAD(&rdata_list);
2626 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2627 open_file = file->private_data;
2628 tcon = tlink_tcon(open_file->tlink);
2630 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2631 pid = open_file->pid;
2632 else
2633 pid = current->tgid;
2635 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2636 cFYI(1, "attempting read on write only file instance");
2638 do {
2639 cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize);
2640 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2642 /* allocate a readdata struct */
2643 rdata = cifs_readdata_alloc(npages,
2644 cifs_uncached_readv_complete);
2645 if (!rdata) {
2646 rc = -ENOMEM;
2647 goto error;
2650 rc = cifs_read_allocate_pages(&rdata->pages, npages);
2651 if (rc)
2652 goto error;
2654 rdata->cfile = cifsFileInfo_get(open_file);
2655 rdata->offset = offset;
2656 rdata->bytes = cur_len;
2657 rdata->pid = pid;
2658 rdata->marshal_iov = cifs_uncached_read_marshal_iov;
2660 rc = cifs_retry_async_readv(rdata);
2661 error:
2662 if (rc) {
2663 kref_put(&rdata->refcount,
2664 cifs_uncached_readdata_release);
2665 break;
2668 list_add_tail(&rdata->list, &rdata_list);
2669 offset += cur_len;
2670 len -= cur_len;
2671 } while (len > 0);
2673 /* if at least one read request send succeeded, then reset rc */
2674 if (!list_empty(&rdata_list))
2675 rc = 0;
2677 /* the loop below should proceed in the order of increasing offsets */
2678 restart_loop:
2679 list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
2680 if (!rc) {
2681 ssize_t copied;
2683 /* FIXME: freezable sleep too? */
2684 rc = wait_for_completion_killable(&rdata->done);
2685 if (rc)
2686 rc = -EINTR;
2687 else if (rdata->result)
2688 rc = rdata->result;
2689 else {
2690 rc = cifs_readdata_to_iov(rdata, iov,
2691 nr_segs, *poffset,
2692 &copied);
2693 total_read += copied;
2696 /* resend call if it's a retryable error */
2697 if (rc == -EAGAIN) {
2698 rc = cifs_retry_async_readv(rdata);
2699 goto restart_loop;
2702 list_del_init(&rdata->list);
2703 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2706 cifs_stats_bytes_read(tcon, total_read);
2707 *poffset += total_read;
2709 return total_read ? total_read : rc;
2712 ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
2713 unsigned long nr_segs, loff_t pos)
2715 ssize_t read;
2717 read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos);
2718 if (read > 0)
2719 iocb->ki_pos = pos;
2721 return read;
2724 ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
2725 unsigned long nr_segs, loff_t pos)
2727 struct inode *inode;
2729 inode = iocb->ki_filp->f_path.dentry->d_inode;
2731 if (CIFS_I(inode)->clientCanCacheRead)
2732 return generic_file_aio_read(iocb, iov, nr_segs, pos);
2735 * In strict cache mode we need to read from the server all the time
2736 * if we don't have level II oplock because the server can delay mtime
2737 * change - so we can't make a decision about inode invalidating.
2738 * And we can also fail with pagereading if there are mandatory locks
2739 * on pages affected by this read but not on the region from pos to
2740 * pos+len-1.
2743 return cifs_user_readv(iocb, iov, nr_segs, pos);
2746 static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
2747 loff_t *poffset)
2749 int rc = -EACCES;
2750 unsigned int bytes_read = 0;
2751 unsigned int total_read;
2752 unsigned int current_read_size;
2753 unsigned int rsize;
2754 struct cifs_sb_info *cifs_sb;
2755 struct cifs_tcon *tcon;
2756 unsigned int xid;
2757 char *current_offset;
2758 struct cifsFileInfo *open_file;
2759 struct cifs_io_parms io_parms;
2760 int buf_type = CIFS_NO_BUFFER;
2761 __u32 pid;
2763 xid = get_xid();
2764 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2766 /* FIXME: set up handlers for larger reads and/or convert to async */
2767 rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
2769 if (file->private_data == NULL) {
2770 rc = -EBADF;
2771 free_xid(xid);
2772 return rc;
2774 open_file = file->private_data;
2775 tcon = tlink_tcon(open_file->tlink);
2777 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2778 pid = open_file->pid;
2779 else
2780 pid = current->tgid;
2782 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2783 cFYI(1, "attempting read on write only file instance");
2785 for (total_read = 0, current_offset = read_data;
2786 read_size > total_read;
2787 total_read += bytes_read, current_offset += bytes_read) {
2788 current_read_size = min_t(uint, read_size - total_read, rsize);
2790 * For windows me and 9x we do not want to request more than it
2791 * negotiated since it will refuse the read then.
2793 if ((tcon->ses) && !(tcon->ses->capabilities &
2794 tcon->ses->server->vals->cap_large_files)) {
2795 current_read_size = min_t(uint, current_read_size,
2796 CIFSMaxBufSize);
2798 rc = -EAGAIN;
2799 while (rc == -EAGAIN) {
2800 if (open_file->invalidHandle) {
2801 rc = cifs_reopen_file(open_file, true);
2802 if (rc != 0)
2803 break;
2805 io_parms.netfid = open_file->netfid;
2806 io_parms.pid = pid;
2807 io_parms.tcon = tcon;
2808 io_parms.offset = *poffset;
2809 io_parms.length = current_read_size;
2810 rc = CIFSSMBRead(xid, &io_parms, &bytes_read,
2811 &current_offset, &buf_type);
2813 if (rc || (bytes_read == 0)) {
2814 if (total_read) {
2815 break;
2816 } else {
2817 free_xid(xid);
2818 return rc;
2820 } else {
2821 cifs_stats_bytes_read(tcon, total_read);
2822 *poffset += bytes_read;
2825 free_xid(xid);
2826 return total_read;
2830 * If the page is mmap'ed into a process' page tables, then we need to make
2831 * sure that it doesn't change while being written back.
2833 static int
2834 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
2836 struct page *page = vmf->page;
2838 lock_page(page);
2839 return VM_FAULT_LOCKED;
2842 static struct vm_operations_struct cifs_file_vm_ops = {
2843 .fault = filemap_fault,
2844 .page_mkwrite = cifs_page_mkwrite,
2847 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
2849 int rc, xid;
2850 struct inode *inode = file->f_path.dentry->d_inode;
2852 xid = get_xid();
2854 if (!CIFS_I(inode)->clientCanCacheRead) {
2855 rc = cifs_invalidate_mapping(inode);
2856 if (rc)
2857 return rc;
2860 rc = generic_file_mmap(file, vma);
2861 if (rc == 0)
2862 vma->vm_ops = &cifs_file_vm_ops;
2863 free_xid(xid);
2864 return rc;
2867 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
2869 int rc, xid;
2871 xid = get_xid();
2872 rc = cifs_revalidate_file(file);
2873 if (rc) {
2874 cFYI(1, "Validation prior to mmap failed, error=%d", rc);
2875 free_xid(xid);
2876 return rc;
2878 rc = generic_file_mmap(file, vma);
2879 if (rc == 0)
2880 vma->vm_ops = &cifs_file_vm_ops;
2881 free_xid(xid);
2882 return rc;
2885 static void
2886 cifs_readv_complete(struct work_struct *work)
2888 struct cifs_readdata *rdata = container_of(work,
2889 struct cifs_readdata, work);
2890 struct page *page, *tpage;
2892 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2893 list_del(&page->lru);
2894 lru_cache_add_file(page);
2896 if (rdata->result == 0) {
2897 kunmap(page);
2898 flush_dcache_page(page);
2899 SetPageUptodate(page);
2902 unlock_page(page);
2904 if (rdata->result == 0)
2905 cifs_readpage_to_fscache(rdata->mapping->host, page);
2907 page_cache_release(page);
2909 kref_put(&rdata->refcount, cifs_readdata_release);
2912 static int
2913 cifs_readpages_marshal_iov(struct cifs_readdata *rdata, unsigned int remaining)
2915 int len = 0;
2916 struct page *page, *tpage;
2917 u64 eof;
2918 pgoff_t eof_index;
2920 /* determine the eof that the server (probably) has */
2921 eof = CIFS_I(rdata->mapping->host)->server_eof;
2922 eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
2923 cFYI(1, "eof=%llu eof_index=%lu", eof, eof_index);
2925 rdata->nr_iov = 1;
2926 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2927 if (remaining >= PAGE_CACHE_SIZE) {
2928 /* enough data to fill the page */
2929 rdata->iov[rdata->nr_iov].iov_base = kmap(page);
2930 rdata->iov[rdata->nr_iov].iov_len = PAGE_CACHE_SIZE;
2931 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2932 rdata->nr_iov, page->index,
2933 rdata->iov[rdata->nr_iov].iov_base,
2934 rdata->iov[rdata->nr_iov].iov_len);
2935 ++rdata->nr_iov;
2936 len += PAGE_CACHE_SIZE;
2937 remaining -= PAGE_CACHE_SIZE;
2938 } else if (remaining > 0) {
2939 /* enough for partial page, fill and zero the rest */
2940 rdata->iov[rdata->nr_iov].iov_base = kmap(page);
2941 rdata->iov[rdata->nr_iov].iov_len = remaining;
2942 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2943 rdata->nr_iov, page->index,
2944 rdata->iov[rdata->nr_iov].iov_base,
2945 rdata->iov[rdata->nr_iov].iov_len);
2946 memset(rdata->iov[rdata->nr_iov].iov_base + remaining,
2947 '\0', PAGE_CACHE_SIZE - remaining);
2948 ++rdata->nr_iov;
2949 len += remaining;
2950 remaining = 0;
2951 } else if (page->index > eof_index) {
2953 * The VFS will not try to do readahead past the
2954 * i_size, but it's possible that we have outstanding
2955 * writes with gaps in the middle and the i_size hasn't
2956 * caught up yet. Populate those with zeroed out pages
2957 * to prevent the VFS from repeatedly attempting to
2958 * fill them until the writes are flushed.
2960 zero_user(page, 0, PAGE_CACHE_SIZE);
2961 list_del(&page->lru);
2962 lru_cache_add_file(page);
2963 flush_dcache_page(page);
2964 SetPageUptodate(page);
2965 unlock_page(page);
2966 page_cache_release(page);
2967 } else {
2968 /* no need to hold page hostage */
2969 list_del(&page->lru);
2970 lru_cache_add_file(page);
2971 unlock_page(page);
2972 page_cache_release(page);
2976 return len;
2979 static int cifs_readpages(struct file *file, struct address_space *mapping,
2980 struct list_head *page_list, unsigned num_pages)
2982 int rc;
2983 struct list_head tmplist;
2984 struct cifsFileInfo *open_file = file->private_data;
2985 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2986 unsigned int rsize = cifs_sb->rsize;
2987 pid_t pid;
2990 * Give up immediately if rsize is too small to read an entire page.
2991 * The VFS will fall back to readpage. We should never reach this
2992 * point however since we set ra_pages to 0 when the rsize is smaller
2993 * than a cache page.
2995 if (unlikely(rsize < PAGE_CACHE_SIZE))
2996 return 0;
2999 * Reads as many pages as possible from fscache. Returns -ENOBUFS
3000 * immediately if the cookie is negative
3002 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3003 &num_pages);
3004 if (rc == 0)
3005 return rc;
3007 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3008 pid = open_file->pid;
3009 else
3010 pid = current->tgid;
3012 rc = 0;
3013 INIT_LIST_HEAD(&tmplist);
3015 cFYI(1, "%s: file=%p mapping=%p num_pages=%u", __func__, file,
3016 mapping, num_pages);
3019 * Start with the page at end of list and move it to private
3020 * list. Do the same with any following pages until we hit
3021 * the rsize limit, hit an index discontinuity, or run out of
3022 * pages. Issue the async read and then start the loop again
3023 * until the list is empty.
3025 * Note that list order is important. The page_list is in
3026 * the order of declining indexes. When we put the pages in
3027 * the rdata->pages, then we want them in increasing order.
3029 while (!list_empty(page_list)) {
3030 unsigned int bytes = PAGE_CACHE_SIZE;
3031 unsigned int expected_index;
3032 unsigned int nr_pages = 1;
3033 loff_t offset;
3034 struct page *page, *tpage;
3035 struct cifs_readdata *rdata;
3037 page = list_entry(page_list->prev, struct page, lru);
3040 * Lock the page and put it in the cache. Since no one else
3041 * should have access to this page, we're safe to simply set
3042 * PG_locked without checking it first.
3044 __set_page_locked(page);
3045 rc = add_to_page_cache_locked(page, mapping,
3046 page->index, GFP_KERNEL);
3048 /* give up if we can't stick it in the cache */
3049 if (rc) {
3050 __clear_page_locked(page);
3051 break;
3054 /* move first page to the tmplist */
3055 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3056 list_move_tail(&page->lru, &tmplist);
3058 /* now try and add more pages onto the request */
3059 expected_index = page->index + 1;
3060 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3061 /* discontinuity ? */
3062 if (page->index != expected_index)
3063 break;
3065 /* would this page push the read over the rsize? */
3066 if (bytes + PAGE_CACHE_SIZE > rsize)
3067 break;
3069 __set_page_locked(page);
3070 if (add_to_page_cache_locked(page, mapping,
3071 page->index, GFP_KERNEL)) {
3072 __clear_page_locked(page);
3073 break;
3075 list_move_tail(&page->lru, &tmplist);
3076 bytes += PAGE_CACHE_SIZE;
3077 expected_index++;
3078 nr_pages++;
3081 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3082 if (!rdata) {
3083 /* best to give up if we're out of mem */
3084 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3085 list_del(&page->lru);
3086 lru_cache_add_file(page);
3087 unlock_page(page);
3088 page_cache_release(page);
3090 rc = -ENOMEM;
3091 break;
3094 rdata->cfile = cifsFileInfo_get(open_file);
3095 rdata->mapping = mapping;
3096 rdata->offset = offset;
3097 rdata->bytes = bytes;
3098 rdata->pid = pid;
3099 rdata->marshal_iov = cifs_readpages_marshal_iov;
3100 list_splice_init(&tmplist, &rdata->pages);
3102 rc = cifs_retry_async_readv(rdata);
3103 if (rc != 0) {
3104 list_for_each_entry_safe(page, tpage, &rdata->pages,
3105 lru) {
3106 list_del(&page->lru);
3107 lru_cache_add_file(page);
3108 unlock_page(page);
3109 page_cache_release(page);
3111 kref_put(&rdata->refcount, cifs_readdata_release);
3112 break;
3115 kref_put(&rdata->refcount, cifs_readdata_release);
3118 return rc;
3121 static int cifs_readpage_worker(struct file *file, struct page *page,
3122 loff_t *poffset)
3124 char *read_data;
3125 int rc;
3127 /* Is the page cached? */
3128 rc = cifs_readpage_from_fscache(file->f_path.dentry->d_inode, page);
3129 if (rc == 0)
3130 goto read_complete;
3132 page_cache_get(page);
3133 read_data = kmap(page);
3134 /* for reads over a certain size could initiate async read ahead */
3136 rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
3138 if (rc < 0)
3139 goto io_error;
3140 else
3141 cFYI(1, "Bytes read %d", rc);
3143 file->f_path.dentry->d_inode->i_atime =
3144 current_fs_time(file->f_path.dentry->d_inode->i_sb);
3146 if (PAGE_CACHE_SIZE > rc)
3147 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
3149 flush_dcache_page(page);
3150 SetPageUptodate(page);
3152 /* send this page to the cache */
3153 cifs_readpage_to_fscache(file->f_path.dentry->d_inode, page);
3155 rc = 0;
3157 io_error:
3158 kunmap(page);
3159 page_cache_release(page);
3161 read_complete:
3162 return rc;
3165 static int cifs_readpage(struct file *file, struct page *page)
3167 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3168 int rc = -EACCES;
3169 unsigned int xid;
3171 xid = get_xid();
3173 if (file->private_data == NULL) {
3174 rc = -EBADF;
3175 free_xid(xid);
3176 return rc;
3179 cFYI(1, "readpage %p at offset %d 0x%x",
3180 page, (int)offset, (int)offset);
3182 rc = cifs_readpage_worker(file, page, &offset);
3184 unlock_page(page);
3186 free_xid(xid);
3187 return rc;
3190 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3192 struct cifsFileInfo *open_file;
3194 spin_lock(&cifs_file_list_lock);
3195 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3196 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3197 spin_unlock(&cifs_file_list_lock);
3198 return 1;
3201 spin_unlock(&cifs_file_list_lock);
3202 return 0;
3205 /* We do not want to update the file size from server for inodes
3206 open for write - to avoid races with writepage extending
3207 the file - in the future we could consider allowing
3208 refreshing the inode only on increases in the file size
3209 but this is tricky to do without racing with writebehind
3210 page caching in the current Linux kernel design */
3211 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3213 if (!cifsInode)
3214 return true;
3216 if (is_inode_writable(cifsInode)) {
3217 /* This inode is open for write at least once */
3218 struct cifs_sb_info *cifs_sb;
3220 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3221 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3222 /* since no page cache to corrupt on directio
3223 we can change size safely */
3224 return true;
3227 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3228 return true;
3230 return false;
3231 } else
3232 return true;
3235 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3236 loff_t pos, unsigned len, unsigned flags,
3237 struct page **pagep, void **fsdata)
3239 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
3240 loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
3241 loff_t page_start = pos & PAGE_MASK;
3242 loff_t i_size;
3243 struct page *page;
3244 int rc = 0;
3246 cFYI(1, "write_begin from %lld len %d", (long long)pos, len);
3248 page = grab_cache_page_write_begin(mapping, index, flags);
3249 if (!page) {
3250 rc = -ENOMEM;
3251 goto out;
3254 if (PageUptodate(page))
3255 goto out;
3258 * If we write a full page it will be up to date, no need to read from
3259 * the server. If the write is short, we'll end up doing a sync write
3260 * instead.
3262 if (len == PAGE_CACHE_SIZE)
3263 goto out;
3266 * optimize away the read when we have an oplock, and we're not
3267 * expecting to use any of the data we'd be reading in. That
3268 * is, when the page lies beyond the EOF, or straddles the EOF
3269 * and the write will cover all of the existing data.
3271 if (CIFS_I(mapping->host)->clientCanCacheRead) {
3272 i_size = i_size_read(mapping->host);
3273 if (page_start >= i_size ||
3274 (offset == 0 && (pos + len) >= i_size)) {
3275 zero_user_segments(page, 0, offset,
3276 offset + len,
3277 PAGE_CACHE_SIZE);
3279 * PageChecked means that the parts of the page
3280 * to which we're not writing are considered up
3281 * to date. Once the data is copied to the
3282 * page, it can be set uptodate.
3284 SetPageChecked(page);
3285 goto out;
3289 if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
3291 * might as well read a page, it is fast enough. If we get
3292 * an error, we don't need to return it. cifs_write_end will
3293 * do a sync write instead since PG_uptodate isn't set.
3295 cifs_readpage_worker(file, page, &page_start);
3296 } else {
3297 /* we could try using another file handle if there is one -
3298 but how would we lock it to prevent close of that handle
3299 racing with this read? In any case
3300 this will be written out by write_end so is fine */
3302 out:
3303 *pagep = page;
3304 return rc;
3307 static int cifs_release_page(struct page *page, gfp_t gfp)
3309 if (PagePrivate(page))
3310 return 0;
3312 return cifs_fscache_release_page(page, gfp);
3315 static void cifs_invalidate_page(struct page *page, unsigned long offset)
3317 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3319 if (offset == 0)
3320 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3323 static int cifs_launder_page(struct page *page)
3325 int rc = 0;
3326 loff_t range_start = page_offset(page);
3327 loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
3328 struct writeback_control wbc = {
3329 .sync_mode = WB_SYNC_ALL,
3330 .nr_to_write = 0,
3331 .range_start = range_start,
3332 .range_end = range_end,
3335 cFYI(1, "Launder page: %p", page);
3337 if (clear_page_dirty_for_io(page))
3338 rc = cifs_writepage_locked(page, &wbc);
3340 cifs_fscache_invalidate_page(page, page->mapping->host);
3341 return rc;
3344 void cifs_oplock_break(struct work_struct *work)
3346 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3347 oplock_break);
3348 struct inode *inode = cfile->dentry->d_inode;
3349 struct cifsInodeInfo *cinode = CIFS_I(inode);
3350 int rc = 0;
3352 if (inode && S_ISREG(inode->i_mode)) {
3353 if (cinode->clientCanCacheRead)
3354 break_lease(inode, O_RDONLY);
3355 else
3356 break_lease(inode, O_WRONLY);
3357 rc = filemap_fdatawrite(inode->i_mapping);
3358 if (cinode->clientCanCacheRead == 0) {
3359 rc = filemap_fdatawait(inode->i_mapping);
3360 mapping_set_error(inode->i_mapping, rc);
3361 invalidate_remote_inode(inode);
3363 cFYI(1, "Oplock flush inode %p rc %d", inode, rc);
3366 rc = cifs_push_locks(cfile);
3367 if (rc)
3368 cERROR(1, "Push locks rc = %d", rc);
3371 * releasing stale oplock after recent reconnect of smb session using
3372 * a now incorrect file handle is not a data integrity issue but do
3373 * not bother sending an oplock release if session to server still is
3374 * disconnected since oplock already released by the server
3376 if (!cfile->oplock_break_cancelled) {
3377 rc = CIFSSMBLock(0, tlink_tcon(cfile->tlink), cfile->netfid,
3378 current->tgid, 0, 0, 0, 0,
3379 LOCKING_ANDX_OPLOCK_RELEASE, false,
3380 cinode->clientCanCacheRead ? 1 : 0);
3381 cFYI(1, "Oplock release rc = %d", rc);
3385 const struct address_space_operations cifs_addr_ops = {
3386 .readpage = cifs_readpage,
3387 .readpages = cifs_readpages,
3388 .writepage = cifs_writepage,
3389 .writepages = cifs_writepages,
3390 .write_begin = cifs_write_begin,
3391 .write_end = cifs_write_end,
3392 .set_page_dirty = __set_page_dirty_nobuffers,
3393 .releasepage = cifs_release_page,
3394 .invalidatepage = cifs_invalidate_page,
3395 .launder_page = cifs_launder_page,
3399 * cifs_readpages requires the server to support a buffer large enough to
3400 * contain the header plus one complete page of data. Otherwise, we need
3401 * to leave cifs_readpages out of the address space operations.
3403 const struct address_space_operations cifs_addr_ops_smallbuf = {
3404 .readpage = cifs_readpage,
3405 .writepage = cifs_writepage,
3406 .writepages = cifs_writepages,
3407 .write_begin = cifs_write_begin,
3408 .write_end = cifs_write_end,
3409 .set_page_dirty = __set_page_dirty_nobuffers,
3410 .releasepage = cifs_release_page,
3411 .invalidatepage = cifs_invalidate_page,
3412 .launder_page = cifs_launder_page,