usr/src/uts/common/fs/smbclnt/smbfs/smbfs_vnops.c

   1 /*
   2  * Copyright (c) 2000-2001 Boris Popov
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  * 3. All advertising materials mentioning features or use of this software
  14  *    must display the following acknowledgement:
  15  *    This product includes software developed by Boris Popov.
  16  * 4. Neither the name of the author nor the names of any co-contributors
  17  *    may be used to endorse or promote products derived from this software
  18  *    without specific prior written permission.
  19  *
  20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  30  * SUCH DAMAGE.
  31  *
  32  * $Id: smbfs_vnops.c,v 1.128.36.1 2005/05/27 02:35:28 lindak Exp $
  33  */
  34
  35 /*
  36  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  37  */
  38
  39 /*
  40  * Vnode operations
  41  *
  42  * This file is similar to nfs3_vnops.c
  43  */
  44
  45 #include <sys/param.h>
  46 #include <sys/systm.h>
  47 #include <sys/cred.h>
  48 #include <sys/vnode.h>
  49 #include <sys/vfs.h>
  50 #include <sys/filio.h>
  51 #include <sys/uio.h>
  52 #include <sys/dirent.h>
  53 #include <sys/errno.h>
  54 #include <sys/sunddi.h>
  55 #include <sys/sysmacros.h>
  56 #include <sys/kmem.h>
  57 #include <sys/cmn_err.h>
  58 #include <sys/vfs.h>
  59 #include <sys/pathname.h>
  60 #include <sys/policy.h>
  61 #include <sys/sdt.h>
  62 #include <sys/taskq_impl.h>
  63 #include <sys/zone.h>
  64 #include <sys/vmsystm.h>
  65
  66 #include <vm/hat.h>
  67 #include <vm/as.h>
  68 #include <vm/page.h>
  69 #include <vm/pvn.h>
  70 #include <vm/seg.h>
  71 #include <vm/seg_map.h>
  72 #include <vm/seg_kpm.h>
  73 #include <vm/seg_vn.h>
  74
  75 #include <netsmb/smb_osdep.h>
  76 #include <netsmb/smb.h>
  77 #include <netsmb/smb_conn.h>
  78 #include <netsmb/smb_subr.h>
  79
  80 #include <smbfs/smbfs.h>
  81 #include <smbfs/smbfs_node.h>
  82 #include <smbfs/smbfs_subr.h>
  83
  84 #include <sys/fs/smbfs_ioctl.h>
  85 #include <sys/fs_subr.h>
  86
  87 /*
  88  * We assign directory offsets like the NFS client, where the
  89  * offset increments by _one_ after each directory entry.
  90  * Further, the entries "." and ".." are always at offsets
  91  * zero and one (respectively) and the "real" entries from
  92  * the server appear at offsets starting with two.  This
  93  * macro is used to initialize the n_dirofs field after
  94  * setting n_dirseq with a _findopen call.
  95  */
  96 #define FIRST_DIROFS    2
  97
  98 /*
  99  * These characters are illegal in NTFS file names.
 100  * ref: http://support.microsoft.com/kb/147438
 101  *
 102  * Careful!  The check in the XATTR case skips the
 103  * first character to allow colon in XATTR names.
 104  */
 105 static const char illegal_chars[] = {
 106         ':',    /* colon - keep this first! */
 107         '\\',   /* back slash */
 108         '/',    /* slash */
 109         '*',    /* asterisk */
 110         '?',    /* question mark */
 111         '"',    /* double quote */
 112         '<',    /* less than sign */
 113         '>',    /* greater than sign */
 114         '|',    /* vertical bar */
 115         0
 116 };
 117
 118 /*
 119  * Turning this on causes nodes to be created in the cache
 120  * during directory listings, normally avoiding a second
 121  * OtW attribute fetch just after a readdir.
 122  */
 123 int smbfs_fastlookup = 1;
 124
 125 /* local static function defines */
 126
 127 static int      smbfslookup_cache(vnode_t *, char *, int, vnode_t **,
 128                         cred_t *);
 129 static int      smbfslookup(vnode_t *dvp, char *nm, vnode_t **vpp, cred_t *cr,
 130                         int cache_ok, caller_context_t *);
 131 static int      smbfsremove(vnode_t *dvp, vnode_t *vp, struct smb_cred *scred,
 132                         int flags);
 133 static int      smbfsrename(vnode_t *odvp, vnode_t *ovp, vnode_t *ndvp,
 134                         char *nnm, struct smb_cred *scred, int flags);
 135 static int      smbfssetattr(vnode_t *, struct vattr *, int, cred_t *);
 136 static int      smbfs_accessx(void *, int, cred_t *);
 137 static int      smbfs_readvdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp,
 138                         caller_context_t *);
 139 static void     smbfs_rele_fid(smbnode_t *, struct smb_cred *);
 140 static uint32_t xvattr_to_dosattr(smbnode_t *, struct vattr *);
 141
 142 static int      smbfs_rdwrlbn(vnode_t *, page_t *, uoff_t, size_t, int,
 143                         cred_t *);
 144 static int      smbfs_bio(struct buf *, int, cred_t *);
 145 static int      smbfs_writenp(smbnode_t *np, caddr_t base, int tcount,
 146                         struct uio *uiop, int pgcreated);
 147
 148 static int      smbfs_fsync(vnode_t *, int, cred_t *, caller_context_t *);
 149 static int      smbfs_putpage(vnode_t *, offset_t, size_t, int, cred_t *,
 150                         caller_context_t *);
 151 static int      smbfs_getapage(vnode_t *, uoff_t, size_t, uint_t *,
 152                         page_t *[], size_t, struct seg *, caddr_t,
 153                         enum seg_rw, cred_t *);
 154 static int      smbfs_putapage(vnode_t *, page_t *, uoff_t *, size_t *,
 155                         int, cred_t *);
 156 static void     smbfs_delmap_async(void *);
 157
 158 /*
 159  * Error flags used to pass information about certain special errors
 160  * which need to be handled specially.
 161  */
 162 #define SMBFS_EOF                       -98
 163
 164 /* When implementing OtW locks, make this a real function. */
 165 #define smbfs_lm_has_sleep(vp) 0
 166
 167 /*
 168  * These are the vnode ops routines which implement the vnode interface to
 169  * the networked file system.  These routines just take their parameters,
 170  * make them look networkish by putting the right info into interface structs,
 171  * and then calling the appropriate remote routine(s) to do the work.
 172  *
 173  * Note on directory name lookup cacheing:  If we detect a stale fhandle,
 174  * we purge the directory cache relative to that vnode.  This way, the
 175  * user won't get burned by the cache repeatedly.  See <smbfs/smbnode.h> for
 176  * more details on smbnode locking.
 177  */
 178
 179 /*
 180  * XXX
 181  * When new and relevant functionality is enabled, we should be
 182  * calling vfs_set_feature() to inform callers that pieces of
 183  * functionality are available, per PSARC 2007/227.
 184  */
 185 /* ARGSUSED */
 186 static int
 187 smbfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
 188 {
 189         smbnode_t       *np;
 190         vnode_t         *vp;
 191         smbfattr_t      fa;
 192         u_int32_t       rights, rightsrcvd;
 193         u_int16_t       fid, oldfid;
 194         int             oldgenid;
 195         struct smb_cred scred;
 196         smbmntinfo_t    *smi;
 197         smb_share_t     *ssp;
 198         cred_t          *oldcr;
 199         int             tmperror;
 200         int             error = 0;
 201
 202         vp = *vpp;
 203         np = VTOSMB(vp);
 204         smi = VTOSMI(vp);
 205         ssp = smi->smi_share;
 206
 207         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
 208                 return (EIO);
 209
 210         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
 211                 return (EIO);
 212
 213         if (vp->v_type != VREG && vp->v_type != VDIR) { /* XXX VLNK? */
 214                 SMBVDEBUG("open eacces vtype=%d\n", vp->v_type);
 215                 return (EACCES);
 216         }
 217
 218         /*
 219          * Get exclusive access to n_fid and related stuff.
 220          * No returns after this until out.
 221          */
 222         if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_WRITER, SMBINTR(vp)))
 223                 return (EINTR);
 224         smb_credinit(&scred, cr);
 225
 226         /*
 227          * Keep track of the vnode type at first open.
 228          * It may change later, and we need close to do
 229          * cleanup for the type we opened.  Also deny
 230          * open of new types until old type is closed.
 231          */
 232         if (np->n_ovtype == VNON) {
 233                 ASSERT(np->n_dirrefs == 0);
 234                 ASSERT(np->n_fidrefs == 0);
 235         } else if (np->n_ovtype != vp->v_type) {
 236                 SMBVDEBUG("open n_ovtype=%d v_type=%d\n",
 237                     np->n_ovtype, vp->v_type);
 238                 error = EACCES;
 239                 goto out;
 240         }
 241
 242         /*
 243          * Directory open.  See smbfs_readvdir()
 244          */
 245         if (vp->v_type == VDIR) {
 246                 if (np->n_dirseq == NULL) {
 247                         /* first open */
 248                         error = smbfs_smb_findopen(np, "*", 1,
 249                             SMB_FA_SYSTEM | SMB_FA_HIDDEN | SMB_FA_DIR,
 250                             &scred, &np->n_dirseq);
 251                         if (error != 0)
 252                                 goto out;
 253                 }
 254                 np->n_dirofs = FIRST_DIROFS;
 255                 np->n_dirrefs++;
 256                 goto have_fid;
 257         }
 258
 259         /*
 260          * If caller specified O_TRUNC/FTRUNC, then be sure to set
 261          * FWRITE (to drive successful setattr(size=0) after open)
 262          */
 263         if (flag & FTRUNC)
 264                 flag |= FWRITE;
 265
 266         /*
 267          * If we already have it open, and the FID is still valid,
 268          * check whether the rights are sufficient for FID reuse.
 269          */
 270         if (np->n_fidrefs > 0 &&
 271             np->n_vcgenid == ssp->ss_vcgenid) {
 272                 int upgrade = 0;
 273
 274                 if ((flag & FWRITE) &&
 275                     !(np->n_rights & SA_RIGHT_FILE_WRITE_DATA))
 276                         upgrade = 1;
 277                 if ((flag & FREAD) &&
 278                     !(np->n_rights & SA_RIGHT_FILE_READ_DATA))
 279                         upgrade = 1;
 280                 if (!upgrade) {
 281                         /*
 282                          *  the existing open is good enough
 283                          */
 284                         np->n_fidrefs++;
 285                         goto have_fid;
 286                 }
 287         }
 288         rights = np->n_fidrefs ? np->n_rights : 0;
 289
 290         /*
 291          * we always ask for READ_CONTROL so we can always get the
 292          * owner/group IDs to satisfy a stat.  Ditto attributes.
 293          */
 294         rights |= (STD_RIGHT_READ_CONTROL_ACCESS |
 295             SA_RIGHT_FILE_READ_ATTRIBUTES);
 296         if ((flag & FREAD))
 297                 rights |= SA_RIGHT_FILE_READ_DATA;
 298         if ((flag & FWRITE))
 299                 rights |= SA_RIGHT_FILE_WRITE_DATA |
 300                     SA_RIGHT_FILE_APPEND_DATA |
 301                     SA_RIGHT_FILE_WRITE_ATTRIBUTES;
 302
 303         bzero(&fa, sizeof (fa));
 304         error = smbfs_smb_open(np,
 305             NULL, 0, 0, /* name nmlen xattr */
 306             rights, &scred,
 307             &fid, &rightsrcvd, &fa);
 308         if (error)
 309                 goto out;
 310         smbfs_attrcache_fa(vp, &fa);
 311
 312         /*
 313          * We have a new FID and access rights.
 314          */
 315         oldfid = np->n_fid;
 316         oldgenid = np->n_vcgenid;
 317         np->n_fid = fid;
 318         np->n_vcgenid = ssp->ss_vcgenid;
 319         np->n_rights = rightsrcvd;
 320         np->n_fidrefs++;
 321         if (np->n_fidrefs > 1 &&
 322             oldgenid == ssp->ss_vcgenid) {
 323                 /*
 324                  * We already had it open (presumably because
 325                  * it was open with insufficient rights.)
 326                  * Close old wire-open.
 327                  */
 328                 tmperror = smbfs_smb_close(ssp,
 329                     oldfid, NULL, &scred);
 330                 if (tmperror)
 331                         SMBVDEBUG("error %d closing %s\n",
 332                             tmperror, np->n_rpath);
 333         }
 334
 335         /*
 336          * This thread did the open.
 337          * Save our credentials too.
 338          */
 339         mutex_enter(&np->r_statelock);
 340         oldcr = np->r_cred;
 341         np->r_cred = cr;
 342         crhold(cr);
 343         if (oldcr)
 344                 crfree(oldcr);
 345         mutex_exit(&np->r_statelock);
 346
 347 have_fid:
 348         /*
 349          * Keep track of the vnode type at first open.
 350          * (see comments above)
 351          */
 352         if (np->n_ovtype == VNON)
 353                 np->n_ovtype = vp->v_type;
 354
 355 out:
 356         smb_credrele(&scred);
 357         smbfs_rw_exit(&np->r_lkserlock);
 358         return (error);
 359 }
 360
 361 /*ARGSUSED*/
 362 static int
 363 smbfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
 364         caller_context_t *ct)
 365 {
 366         smbnode_t       *np;
 367         smbmntinfo_t    *smi;
 368         struct smb_cred scred;
 369         int error = 0;
 370
 371         np = VTOSMB(vp);
 372         smi = VTOSMI(vp);
 373
 374         /*
 375          * Don't "bail out" for VFS_UNMOUNTED here,
 376          * as we want to do cleanup, etc.
 377          */
 378
 379         /*
 380          * zone_enter(2) prevents processes from changing zones with SMBFS files
 381          * open; if we happen to get here from the wrong zone we can't do
 382          * anything over the wire.
 383          */
 384         if (smi->smi_zone_ref.zref_zone != curproc->p_zone) {
 385                 /*
 386                  * We could attempt to clean up locks, except we're sure
 387                  * that the current process didn't acquire any locks on
 388                  * the file: any attempt to lock a file belong to another zone
 389                  * will fail, and one can't lock an SMBFS file and then change
 390                  * zones, as that fails too.
 391                  *
 392                  * Returning an error here is the sane thing to do.  A
 393                  * subsequent call to VN_RELE() which translates to a
 394                  * smbfs_inactive() will clean up state: if the zone of the
 395                  * vnode's origin is still alive and kicking, an async worker
 396                  * thread will handle the request (from the correct zone), and
 397                  * everything (minus the final smbfs_getattr_otw() call) should
 398                  * be OK. If the zone is going away smbfs_async_inactive() will
 399                  * throw away cached pages inline.
 400                  */
 401                 return (EIO);
 402         }
 403
 404         /*
 405          * If we are using local locking for this filesystem, then
 406          * release all of the SYSV style record locks.  Otherwise,
 407          * we are doing network locking and we need to release all
 408          * of the network locks.  All of the locks held by this
 409          * process on this file are released no matter what the
 410          * incoming reference count is.
 411          */
 412         if (smi->smi_flags & SMI_LLOCK) {
 413                 pid_t pid = ddi_get_pid();
 414                 cleanlocks(vp, pid, 0);
 415                 cleanshares(vp, pid);
 416         }
 417         /*
 418          * else doing OtW locking.  SMB servers drop all locks
 419          * on the file ID we close here, so no _lockrelease()
 420          */
 421
 422         /*
 423          * This (passed in) count is the ref. count from the
 424          * user's file_t before the closef call (fio.c).
 425          * The rest happens only on last close.
 426          */
 427         if (count > 1)
 428                 return (0);
 429
 430         /* NFS has DNLC purge here. */
 431
 432         /*
 433          * If the file was open for write and there are pages,
 434          * then make sure dirty pages written back.
 435          *
 436          * NFS does this async when "close-to-open" is off
 437          * (MI_NOCTO flag is set) to avoid blocking the caller.
 438          * For now, always do this synchronously (no B_ASYNC).
 439          */
 440         if ((flag & FWRITE) && vn_has_cached_data(vp)) {
 441                 error = smbfs_putpage(vp, (offset_t)0, 0, 0, cr, ct);
 442                 if (error == EAGAIN)
 443                         error = 0;
 444         }
 445         if (error == 0) {
 446                 mutex_enter(&np->r_statelock);
 447                 np->r_flags &= ~RSTALE;
 448                 np->r_error = 0;
 449                 mutex_exit(&np->r_statelock);
 450         }
 451
 452         /*
 453          * Decrement the reference count for the FID
 454          * and possibly do the OtW close.
 455          *
 456          * Exclusive lock for modifying n_fid stuff.
 457          * Don't want this one ever interruptible.
 458          */
 459         (void) smbfs_rw_enter_sig(&np->r_lkserlock, RW_WRITER, 0);
 460         smb_credinit(&scred, cr);
 461
 462         smbfs_rele_fid(np, &scred);
 463
 464         smb_credrele(&scred);
 465         smbfs_rw_exit(&np->r_lkserlock);
 466
 467         return (0);
 468 }
 469
 470 /*
 471  * Helper for smbfs_close.  Decrement the reference count
 472  * for an SMB-level file or directory ID, and when the last
 473  * reference for the fid goes away, do the OtW close.
 474  * Also called in smbfs_inactive (defensive cleanup).
 475  */
 476 static void
 477 smbfs_rele_fid(smbnode_t *np, struct smb_cred *scred)
 478 {
 479         smb_share_t     *ssp;
 480         cred_t          *oldcr;
 481         struct smbfs_fctx *fctx;
 482         int             error;
 483         uint16_t ofid;
 484
 485         ssp = np->n_mount->smi_share;
 486         error = 0;
 487
 488         /* Make sure we serialize for n_dirseq use. */
 489         ASSERT(smbfs_rw_lock_held(&np->r_lkserlock, RW_WRITER));
 490
 491         /*
 492          * Note that vp->v_type may change if a remote node
 493          * is deleted and recreated as a different type, and
 494          * our getattr may change v_type accordingly.
 495          * Now use n_ovtype to keep track of the v_type
 496          * we had during open (see comments above).
 497          */
 498         switch (np->n_ovtype) {
 499         case VDIR:
 500                 ASSERT(np->n_dirrefs > 0);
 501                 if (--np->n_dirrefs)
 502                         return;
 503                 if ((fctx = np->n_dirseq) != NULL) {
 504                         np->n_dirseq = NULL;
 505                         np->n_dirofs = 0;
 506                         error = smbfs_smb_findclose(fctx, scred);
 507                 }
 508                 break;
 509
 510         case VREG:
 511                 ASSERT(np->n_fidrefs > 0);
 512                 if (--np->n_fidrefs)
 513                         return;
 514                 if ((ofid = np->n_fid) != SMB_FID_UNUSED) {
 515                         np->n_fid = SMB_FID_UNUSED;
 516                         /* After reconnect, n_fid is invalid */
 517                         if (np->n_vcgenid == ssp->ss_vcgenid) {
 518                                 error = smbfs_smb_close(
 519                                     ssp, ofid, NULL, scred);
 520                         }
 521                 }
 522                 break;
 523
 524         default:
 525                 SMBVDEBUG("bad n_ovtype %d\n", np->n_ovtype);
 526                 break;
 527         }
 528         if (error) {
 529                 SMBVDEBUG("error %d closing %s\n",
 530                     error, np->n_rpath);
 531         }
 532
 533         /* Allow next open to use any v_type. */
 534         np->n_ovtype = VNON;
 535
 536         /*
 537          * Other "last close" stuff.
 538          */
 539         mutex_enter(&np->r_statelock);
 540         if (np->n_flag & NATTRCHANGED)
 541                 smbfs_attrcache_rm_locked(np);
 542         oldcr = np->r_cred;
 543         np->r_cred = NULL;
 544         mutex_exit(&np->r_statelock);
 545         if (oldcr != NULL)
 546                 crfree(oldcr);
 547 }
 548
 549 /* ARGSUSED */
 550 static int
 551 smbfs_read(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *cr,
 552         caller_context_t *ct)
 553 {
 554         struct smb_cred scred;
 555         struct vattr    va;
 556         smbnode_t       *np;
 557         smbmntinfo_t    *smi;
 558         smb_share_t     *ssp;
 559         offset_t        endoff;
 560         ssize_t         past_eof;
 561         int             error;
 562
 563         caddr_t         base;
 564         uoff_t  off;
 565         size_t          n;
 566         int             on;
 567         uint_t          flags;
 568
 569         np = VTOSMB(vp);
 570         smi = VTOSMI(vp);
 571         ssp = smi->smi_share;
 572
 573         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
 574                 return (EIO);
 575
 576         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
 577                 return (EIO);
 578
 579         ASSERT(smbfs_rw_lock_held(&np->r_rwlock, RW_READER));
 580
 581         if (vp->v_type != VREG)
 582                 return (EISDIR);
 583
 584         if (uiop->uio_resid == 0)
 585                 return (0);
 586
 587         /*
 588          * Like NFS3, just check for 63-bit overflow.
 589          * Our SMB layer takes care to return EFBIG
 590          * when it has to fallback to a 32-bit call.
 591          */
 592         endoff = uiop->uio_loffset + uiop->uio_resid;
 593         if (uiop->uio_loffset < 0 || endoff < 0)
 594                 return (EINVAL);
 595
 596         /* get vnode attributes from server */
 597         va.va_mask = VATTR_SIZE | VATTR_MTIME;
 598         if (error = smbfsgetattr(vp, &va, cr))
 599                 return (error);
 600
 601         /* Update mtime with mtime from server here? */
 602
 603         /* if offset is beyond EOF, read nothing */
 604         if (uiop->uio_loffset >= va.va_size)
 605                 return (0);
 606
 607         /*
 608          * Limit the read to the remaining file size.
 609          * Do this by temporarily reducing uio_resid
 610          * by the amount the lies beyoned the EOF.
 611          */
 612         if (endoff > va.va_size) {
 613                 past_eof = (ssize_t)(endoff - va.va_size);
 614                 uiop->uio_resid -= past_eof;
 615         } else
 616                 past_eof = 0;
 617
 618         /*
 619          * Bypass VM if caching has been disabled (e.g., locking) or if
 620          * using client-side direct I/O and the file is not mmap'd and
 621          * there are no cached pages.
 622          */
 623         if ((vp->v_flag & VNOCACHE) ||
 624             (((np->r_flags & RDIRECTIO) || (smi->smi_flags & SMI_DIRECTIO)) &&
 625             np->r_mapcnt == 0 && np->r_inmap == 0 &&
 626             !vn_has_cached_data(vp))) {
 627
 628                 /* Shared lock for n_fid use in smb_rwuio */
 629                 if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_READER, SMBINTR(vp)))
 630                         return (EINTR);
 631                 smb_credinit(&scred, cr);
 632
 633                 /* After reconnect, n_fid is invalid */
 634                 if (np->n_vcgenid != ssp->ss_vcgenid)
 635                         error = ESTALE;
 636                 else
 637                         error = smb_rwuio(ssp, np->n_fid, UIO_READ,
 638                             uiop, &scred, smb_timo_read);
 639
 640                 smb_credrele(&scred);
 641                 smbfs_rw_exit(&np->r_lkserlock);
 642
 643                 /* undo adjustment of resid */
 644                 uiop->uio_resid += past_eof;
 645
 646                 return (error);
 647         }
 648
 649         /* (else) Do I/O through segmap. */
 650         do {
 651                 off = uiop->uio_loffset & MAXBMASK; /* mapping offset */
 652                 on = uiop->uio_loffset & MAXBOFFSET; /* Relative offset */
 653                 n = MIN(MAXBSIZE - on, uiop->uio_resid);
 654
 655                 error = smbfs_validate_caches(vp, cr);
 656                 if (error)
 657                         break;
 658
 659                 /* NFS waits for RINCACHEPURGE here. */
 660
 661                 if (vpm_enable) {
 662                         /*
 663                          * Copy data.
 664                          */
 665                         error = vpm_data_copy(vp, off + on, n, uiop,
 666                             1, NULL, 0, S_READ);
 667                 } else {
 668                         base = segmap_getmapflt(segkmap, vp, off + on, n, 1,
 669                             S_READ);
 670
 671                         error = uiomove(base + on, n, UIO_READ, uiop);
 672                 }
 673
 674                 if (!error) {
 675                         /*
 676                          * If read a whole block or read to eof,
 677                          * won't need this buffer again soon.
 678                          */
 679                         mutex_enter(&np->r_statelock);
 680                         if (n + on == MAXBSIZE ||
 681                             uiop->uio_loffset == np->r_size)
 682                                 flags = SM_DONTNEED;
 683                         else
 684                                 flags = 0;
 685                         mutex_exit(&np->r_statelock);
 686                         if (vpm_enable) {
 687                                 error = vpm_sync_pages(vp, off, n, flags);
 688                         } else {
 689                                 error = segmap_release(segkmap, base, flags);
 690                         }
 691                 } else {
 692                         if (vpm_enable) {
 693                                 (void) vpm_sync_pages(vp, off, n, 0);
 694                         } else {
 695                                 (void) segmap_release(segkmap, base, 0);
 696                         }
 697                 }
 698         } while (!error && uiop->uio_resid > 0);
 699
 700         /* undo adjustment of resid */
 701         uiop->uio_resid += past_eof;
 702
 703         return (error);
 704 }
 705
 706
 707 /* ARGSUSED */
 708 static int
 709 smbfs_write(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *cr,
 710         caller_context_t *ct)
 711 {
 712         struct smb_cred scred;
 713         struct vattr    va;
 714         smbnode_t       *np;
 715         smbmntinfo_t    *smi;
 716         smb_share_t     *ssp;
 717         offset_t        endoff, limit;
 718         ssize_t         past_limit;
 719         int             error, timo;
 720         caddr_t         base;
 721         uoff_t  off;
 722         size_t          n;
 723         int             on;
 724         uint_t          flags;
 725         uoff_t  last_off;
 726         size_t          last_resid;
 727         uint_t          bsize;
 728
 729         np = VTOSMB(vp);
 730         smi = VTOSMI(vp);
 731         ssp = smi->smi_share;
 732
 733         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
 734                 return (EIO);
 735
 736         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
 737                 return (EIO);
 738
 739         ASSERT(smbfs_rw_lock_held(&np->r_rwlock, RW_WRITER));
 740
 741         if (vp->v_type != VREG)
 742                 return (EISDIR);
 743
 744         if (uiop->uio_resid == 0)
 745                 return (0);
 746
 747         /*
 748          * Handle ioflag bits: (FAPPEND|FSYNC|FDSYNC)
 749          */
 750         if (ioflag & (FAPPEND | FSYNC)) {
 751                 if (np->n_flag & NMODIFIED) {
 752                         smbfs_attrcache_remove(np);
 753                 }
 754         }
 755         if (ioflag & FAPPEND) {
 756                 /*
 757                  * File size can be changed by another client
 758                  *
 759                  * Todo: Consider redesigning this to use a
 760                  * handle opened for append instead.
 761                  */
 762                 va.va_mask = VATTR_SIZE;
 763                 if (error = smbfsgetattr(vp, &va, cr))
 764                         return (error);
 765                 uiop->uio_loffset = va.va_size;
 766         }
 767
 768         /*
 769          * Like NFS3, just check for 63-bit overflow.
 770          */
 771         endoff = uiop->uio_loffset + uiop->uio_resid;
 772         if (uiop->uio_loffset < 0 || endoff < 0)
 773                 return (EINVAL);
 774
 775         /*
 776          * Check to make sure that the process will not exceed
 777          * its limit on file size.  It is okay to write up to
 778          * the limit, but not beyond.  Thus, the write which
 779          * reaches the limit will be short and the next write
 780          * will return an error.
 781          *
 782          * So if we're starting at or beyond the limit, EFBIG.
 783          * Otherwise, temporarily reduce resid to the amount
 784          * that is after the limit.
 785          */
 786         limit = uiop->uio_llimit;
 787         if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
 788                 limit = MAXOFFSET_T;
 789         if (uiop->uio_loffset >= limit) {
 790                 proc_t *p = ttoproc(curthread);
 791
 792                 mutex_enter(&p->p_lock);
 793                 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
 794                     p->p_rctls, p, RCA_UNSAFE_SIGINFO);
 795                 mutex_exit(&p->p_lock);
 796                 return (EFBIG);
 797         }
 798         if (endoff > limit) {
 799                 past_limit = (ssize_t)(endoff - limit);
 800                 uiop->uio_resid -= past_limit;
 801         } else
 802                 past_limit = 0;
 803
 804         /*
 805          * Bypass VM if caching has been disabled (e.g., locking) or if
 806          * using client-side direct I/O and the file is not mmap'd and
 807          * there are no cached pages.
 808          */
 809         if ((vp->v_flag & VNOCACHE) ||
 810             (((np->r_flags & RDIRECTIO) || (smi->smi_flags & SMI_DIRECTIO)) &&
 811             np->r_mapcnt == 0 && np->r_inmap == 0 &&
 812             !vn_has_cached_data(vp))) {
 813
 814 smbfs_fwrite:
 815                 if (np->r_flags & RSTALE) {
 816                         last_resid = uiop->uio_resid;
 817                         last_off = uiop->uio_loffset;
 818                         error = np->r_error;
 819                         /*
 820                          * A close may have cleared r_error, if so,
 821                          * propagate ESTALE error return properly
 822                          */
 823                         if (error == 0)
 824                                 error = ESTALE;
 825                         goto bottom;
 826                 }
 827
 828                 /* Timeout: longer for append. */
 829                 timo = smb_timo_write;
 830                 if (endoff > np->r_size)
 831                         timo = smb_timo_append;
 832
 833                 /* Shared lock for n_fid use in smb_rwuio */
 834                 if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_READER, SMBINTR(vp)))
 835                         return (EINTR);
 836                 smb_credinit(&scred, cr);
 837
 838                 /* After reconnect, n_fid is invalid */
 839                 if (np->n_vcgenid != ssp->ss_vcgenid)
 840                         error = ESTALE;
 841                 else
 842                         error = smb_rwuio(ssp, np->n_fid, UIO_WRITE,
 843                             uiop, &scred, timo);
 844
 845                 if (error == 0) {
 846                         mutex_enter(&np->r_statelock);
 847                         np->n_flag |= (NFLUSHWIRE | NATTRCHANGED);
 848                         if (uiop->uio_loffset > (offset_t)np->r_size)
 849                                 np->r_size = (len_t)uiop->uio_loffset;
 850                         mutex_exit(&np->r_statelock);
 851                         if (ioflag & (FSYNC | FDSYNC)) {
 852                                 /* Don't error the I/O if this fails. */
 853                                 (void) smbfs_smb_flush(np, &scred);
 854                         }
 855                 }
 856
 857                 smb_credrele(&scred);
 858                 smbfs_rw_exit(&np->r_lkserlock);
 859
 860                 /* undo adjustment of resid */
 861                 uiop->uio_resid += past_limit;
 862
 863                 return (error);
 864         }
 865
 866         /* (else) Do I/O through segmap. */
 867         bsize = vp->v_vfsp->vfs_bsize;
 868
 869         do {
 870                 off = uiop->uio_loffset & MAXBMASK; /* mapping offset */
 871                 on = uiop->uio_loffset & MAXBOFFSET; /* Relative offset */
 872                 n = MIN(MAXBSIZE - on, uiop->uio_resid);
 873
 874                 last_resid = uiop->uio_resid;
 875                 last_off = uiop->uio_loffset;
 876
 877                 if (np->r_flags & RSTALE) {
 878                         error = np->r_error;
 879                         /*
 880                          * A close may have cleared r_error, if so,
 881                          * propagate ESTALE error return properly
 882                          */
 883                         if (error == 0)
 884                                 error = ESTALE;
 885                         break;
 886                 }
 887
 888                 /*
 889                  * From NFS: Don't create dirty pages faster than they
 890                  * can be cleaned.
 891                  *
 892                  * Here NFS also checks for async writes (np->r_awcount)
 893                  */
 894                 mutex_enter(&np->r_statelock);
 895                 while (np->r_gcount > 0) {
 896                         if (SMBINTR(vp)) {
 897                                 klwp_t *lwp = ttolwp(curthread);
 898
 899                                 if (lwp != NULL)
 900                                         lwp->lwp_nostop++;
 901                                 if (!cv_wait_sig(&np->r_cv, &np->r_statelock)) {
 902                                         mutex_exit(&np->r_statelock);
 903                                         if (lwp != NULL)
 904                                                 lwp->lwp_nostop--;
 905                                         error = EINTR;
 906                                         goto bottom;
 907                                 }
 908                                 if (lwp != NULL)
 909                                         lwp->lwp_nostop--;
 910                         } else
 911                                 cv_wait(&np->r_cv, &np->r_statelock);
 912                 }
 913                 mutex_exit(&np->r_statelock);
 914
 915                 /*
 916                  * Touch the page and fault it in if it is not in core
 917                  * before segmap_getmapflt or vpm_data_copy can lock it.
 918                  * This is to avoid the deadlock if the buffer is mapped
 919                  * to the same file through mmap which we want to write.
 920                  */
 921                 uio_prefaultpages((long)n, uiop);
 922
 923                 if (vpm_enable) {
 924                         /*
 925                          * It will use kpm mappings, so no need to
 926                          * pass an address.
 927                          */
 928                         error = smbfs_writenp(np, NULL, n, uiop, 0);
 929                 } else {
 930                         if (segmap_kpm) {
 931                                 int pon = uiop->uio_loffset & PAGEOFFSET;
 932                                 size_t pn = MIN(PAGESIZE - pon,
 933                                     uiop->uio_resid);
 934                                 int pagecreate;
 935
 936                                 mutex_enter(&np->r_statelock);
 937                                 pagecreate = (pon == 0) && (pn == PAGESIZE ||
 938                                     uiop->uio_loffset + pn >= np->r_size);
 939                                 mutex_exit(&np->r_statelock);
 940
 941                                 base = segmap_getmapflt(segkmap, vp, off + on,
 942                                     pn, !pagecreate, S_WRITE);
 943
 944                                 error = smbfs_writenp(np, base + pon, n, uiop,
 945                                     pagecreate);
 946
 947                         } else {
 948                                 base = segmap_getmapflt(segkmap, vp, off + on,
 949                                     n, 0, S_READ);
 950                                 error = smbfs_writenp(np, base + on, n, uiop, 0);
 951                         }
 952                 }
 953
 954                 if (!error) {
 955                         if (smi->smi_flags & SMI_NOAC)
 956                                 flags = SM_WRITE;
 957                         else if ((uiop->uio_loffset % bsize) == 0 ||
 958                             IS_SWAPVP(vp)) {
 959                                 /*
 960                                  * Have written a whole block.
 961                                  * Start an asynchronous write
 962                                  * and mark the buffer to
 963                                  * indicate that it won't be
 964                                  * needed again soon.
 965                                  */
 966                                 flags = SM_WRITE | SM_ASYNC | SM_DONTNEED;
 967                         } else
 968                                 flags = 0;
 969                         if ((ioflag & (FSYNC|FDSYNC)) ||
 970                             (np->r_flags & ROUTOFSPACE)) {
 971                                 flags &= ~SM_ASYNC;
 972                                 flags |= SM_WRITE;
 973                         }
 974                         if (vpm_enable) {
 975                                 error = vpm_sync_pages(vp, off, n, flags);
 976                         } else {
 977                                 error = segmap_release(segkmap, base, flags);
 978                         }
 979                 } else {
 980                         if (vpm_enable) {
 981                                 (void) vpm_sync_pages(vp, off, n, 0);
 982                         } else {
 983                                 (void) segmap_release(segkmap, base, 0);
 984                         }
 985                         /*
 986                          * In the event that we got an access error while
 987                          * faulting in a page for a write-only file just
 988                          * force a write.
 989                          */
 990                         if (error == EACCES)
 991                                 goto smbfs_fwrite;
 992                 }
 993         } while (!error && uiop->uio_resid > 0);
 994
 995 bottom:
 996         /* undo adjustment of resid */
 997         if (error) {
 998                 uiop->uio_resid = last_resid + past_limit;
 999                 uiop->uio_loffset = last_off;
1000         } else {
1001                 uiop->uio_resid += past_limit;
1002         }
1003
1004         return (error);
1005 }
1006
1007 /*
1008  * Like nfs_client.c: writerp()
1009  *
1010  * Write by creating pages and uiomove data onto them.
1011  */
1012
1013 int
1014 smbfs_writenp(smbnode_t *np, caddr_t base, int tcount, struct uio *uio,
1015     int pgcreated)
1016 {
1017         int             pagecreate;
1018         int             n;
1019         int             saved_n;
1020         caddr_t         saved_base;
1021         uoff_t  offset;
1022         int             error;
1023         int             sm_error;
1024         vnode_t         *vp = SMBTOV(np);
1025
1026         ASSERT(tcount <= MAXBSIZE && tcount <= uio->uio_resid);
1027         ASSERT(smbfs_rw_lock_held(&np->r_rwlock, RW_WRITER));
1028         if (!vpm_enable) {
1029                 ASSERT(((uintptr_t)base & MAXBOFFSET) + tcount <= MAXBSIZE);
1030         }
1031
1032         /*
1033          * Move bytes in at most PAGESIZE chunks. We must avoid
1034          * spanning pages in uiomove() because page faults may cause
1035          * the cache to be invalidated out from under us. The r_size is not
1036          * updated until after the uiomove. If we push the last page of a
1037          * file before r_size is correct, we will lose the data written past
1038          * the current (and invalid) r_size.
1039          */
1040         do {
1041                 offset = uio->uio_loffset;
1042                 pagecreate = 0;
1043
1044                 /*
1045                  * n is the number of bytes required to satisfy the request
1046                  *   or the number of bytes to fill out the page.
1047                  */
1048                 n = (int)MIN((PAGESIZE - (offset & PAGEOFFSET)), tcount);
1049
1050                 /*
1051                  * Check to see if we can skip reading in the page
1052                  * and just allocate the memory.  We can do this
1053                  * if we are going to rewrite the entire mapping
1054                  * or if we are going to write to or beyond the current
1055                  * end of file from the beginning of the mapping.
1056                  *
1057                  * The read of r_size is now protected by r_statelock.
1058                  */
1059                 mutex_enter(&np->r_statelock);
1060                 /*
1061                  * When pgcreated is nonzero the caller has already done
1062                  * a segmap_getmapflt with forcefault 0 and S_WRITE. With
1063                  * segkpm this means we already have at least one page
1064                  * created and mapped at base.
1065                  */
1066                 pagecreate = pgcreated ||
1067                     ((offset & PAGEOFFSET) == 0 &&
1068                     (n == PAGESIZE || ((offset + n) >= np->r_size)));
1069
1070                 mutex_exit(&np->r_statelock);
1071                 if (!vpm_enable && pagecreate) {
1072                         /*
1073                          * The last argument tells segmap_pagecreate() to
1074                          * always lock the page, as opposed to sometimes
1075                          * returning with the page locked. This way we avoid a
1076                          * fault on the ensuing uiomove(), but also
1077                          * more importantly (to fix bug 1094402) we can
1078                          * call segmap_fault() to unlock the page in all
1079                          * cases. An alternative would be to modify
1080                          * segmap_pagecreate() to tell us when it is
1081                          * locking a page, but that's a fairly major
1082                          * interface change.
1083                          */
1084                         if (pgcreated == 0)
1085                                 (void) segmap_pagecreate(segkmap, base,
1086                                     (uint_t)n, 1);
1087                         saved_base = base;
1088                         saved_n = n;
1089                 }
1090
1091                 /*
1092                  * The number of bytes of data in the last page can not
1093                  * be accurately be determined while page is being
1094                  * uiomove'd to and the size of the file being updated.
1095                  * Thus, inform threads which need to know accurately
1096                  * how much data is in the last page of the file.  They
1097                  * will not do the i/o immediately, but will arrange for
1098                  * the i/o to happen later when this modify operation
1099                  * will have finished.
1100                  */
1101                 ASSERT(!(np->r_flags & RMODINPROGRESS));
1102                 mutex_enter(&np->r_statelock);
1103                 np->r_flags |= RMODINPROGRESS;
1104                 np->r_modaddr = (offset & MAXBMASK);
1105                 mutex_exit(&np->r_statelock);
1106
1107                 if (vpm_enable) {
1108                         /*
1109                          * Copy data. If new pages are created, part of
1110                          * the page that is not written will be initizliazed
1111                          * with zeros.
1112                          */
1113                         error = vpm_data_copy(vp, offset, n, uio,
1114                             !pagecreate, NULL, 0, S_WRITE);
1115                 } else {
1116                         error = uiomove(base, n, UIO_WRITE, uio);
1117                 }
1118
1119                 /*
1120                  * r_size is the maximum number of
1121                  * bytes known to be in the file.
1122                  * Make sure it is at least as high as the
1123                  * first unwritten byte pointed to by uio_loffset.
1124                  */
1125                 mutex_enter(&np->r_statelock);
1126                 if (np->r_size < uio->uio_loffset)
1127                         np->r_size = uio->uio_loffset;
1128                 np->r_flags &= ~RMODINPROGRESS;
1129                 np->r_flags |= RDIRTY;
1130                 mutex_exit(&np->r_statelock);
1131
1132                 /* n = # of bytes written */
1133                 n = (int)(uio->uio_loffset - offset);
1134
1135                 if (!vpm_enable) {
1136                         base += n;
1137                 }
1138                 tcount -= n;
1139                 /*
1140                  * If we created pages w/o initializing them completely,
1141                  * we need to zero the part that wasn't set up.
1142                  * This happens on a most EOF write cases and if
1143                  * we had some sort of error during the uiomove.
1144                  */
1145                 if (!vpm_enable && pagecreate) {
1146                         if ((uio->uio_loffset & PAGEOFFSET) || n == 0)
1147                                 (void) kzero(base, PAGESIZE - n);
1148
1149                         if (pgcreated) {
1150                                 /*
1151                                  * Caller is responsible for this page,
1152                                  * it was not created in this loop.
1153                                  */
1154                                 pgcreated = 0;
1155                         } else {
1156                                 /*
1157                                  * For bug 1094402: segmap_pagecreate locks
1158                                  * page. Unlock it. This also unlocks the
1159                                  * pages allocated by page_create_va() in
1160                                  * segmap_pagecreate().
1161                                  */
1162                                 sm_error = segmap_fault(kas.a_hat, segkmap,
1163                                     saved_base, saved_n,
1164                                     F_SOFTUNLOCK, S_WRITE);
1165                                 if (error == 0)
1166                                         error = sm_error;
1167                         }
1168                 }
1169         } while (tcount > 0 && error == 0);
1170
1171         return (error);
1172 }
1173
1174 /*
1175  * Flags are composed of {B_ASYNC, B_INVAL, B_FREE, B_DONTNEED}
1176  * Like nfs3_rdwrlbn()
1177  */
1178 static int
1179 smbfs_rdwrlbn(vnode_t *vp, page_t *pp, uoff_t off, size_t len,
1180         int flags, cred_t *cr)
1181 {
1182         smbmntinfo_t    *smi = VTOSMI(vp);
1183         struct buf *bp;
1184         int error;
1185         int sync;
1186
1187         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
1188                 return (EIO);
1189
1190         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1191                 return (EIO);
1192
1193         bp = pageio_setup(pp, len, vp, flags);
1194         ASSERT(bp != NULL);
1195
1196         /*
1197          * pageio_setup should have set b_addr to 0.  This
1198          * is correct since we want to do I/O on a page
1199          * boundary.  bp_mapin will use this addr to calculate
1200          * an offset, and then set b_addr to the kernel virtual
1201          * address it allocated for us.
1202          */
1203         ASSERT(bp->b_un.b_addr == 0);
1204
1205         bp->b_edev = 0;
1206         bp->b_dev = 0;
1207         bp->b_lblkno = lbtodb(off);
1208         bp->b_file = vp;
1209         bp->b_offset = (offset_t)off;
1210         bp_mapin(bp);
1211
1212         /*
1213          * Calculate the desired level of stability to write data.
1214          */
1215         if ((flags & (B_WRITE|B_ASYNC)) == (B_WRITE|B_ASYNC) &&
1216             freemem > desfree) {
1217                 sync = 0;
1218         } else {
1219                 sync = 1;
1220         }
1221
1222         error = smbfs_bio(bp, sync, cr);
1223
1224         bp_mapout(bp);
1225         pageio_done(bp);
1226
1227         return (error);
1228 }
1229
1230
1231 /*
1232  * Corresponds to nfs3_vnopc.c : nfs3_bio(), though the NFS code
1233  * uses nfs3read()/nfs3write() where we use smb_rwuio().  Also,
1234  * NFS has this later in the file.  Move it up here closer to
1235  * the one call site just above.
1236  */
1237
1238 static int
1239 smbfs_bio(struct buf *bp, int sync, cred_t *cr)
1240 {
1241         struct iovec aiov[1];
1242         struct uio  auio;
1243         struct smb_cred scred;
1244         smbnode_t *np = VTOSMB(bp->b_vp);
1245         smbmntinfo_t *smi = np->n_mount;
1246         smb_share_t *ssp = smi->smi_share;
1247         offset_t offset;
1248         offset_t endoff;
1249         size_t count;
1250         size_t past_eof;
1251         int error;
1252
1253         ASSERT(curproc->p_zone == smi->smi_zone_ref.zref_zone);
1254
1255         offset = ldbtob(bp->b_lblkno);
1256         count = bp->b_bcount;
1257         endoff = offset + count;
1258         if (offset < 0 || endoff < 0)
1259                 return (EINVAL);
1260
1261         /*
1262          * Limit file I/O to the remaining file size, but see
1263          * the notes in smbfs_getpage about SMBFS_EOF.
1264          */
1265         mutex_enter(&np->r_statelock);
1266         if (offset >= np->r_size) {
1267                 mutex_exit(&np->r_statelock);
1268                 if (bp->b_flags & B_READ) {
1269                         return (SMBFS_EOF);
1270                 } else {
1271                         return (EINVAL);
1272                 }
1273         }
1274         if (endoff > np->r_size) {
1275                 past_eof = (size_t)(endoff - np->r_size);
1276                 count -= past_eof;
1277         } else
1278                 past_eof = 0;
1279         mutex_exit(&np->r_statelock);
1280         ASSERT(count > 0);
1281
1282         /* Caller did bpmapin().  Mapped address is... */
1283         aiov[0].iov_base = bp->b_un.b_addr;
1284         aiov[0].iov_len = count;
1285         auio.uio_iov = aiov;
1286         auio.uio_iovcnt = 1;
1287         auio.uio_loffset = offset;
1288         auio.uio_segflg = UIO_SYSSPACE;
1289         auio.uio_fmode = 0;
1290         auio.uio_resid = count;
1291
1292         /* Shared lock for n_fid use in smb_rwuio */
1293         if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_READER,
1294             smi->smi_flags & SMI_INT))
1295                 return (EINTR);
1296         smb_credinit(&scred, cr);
1297
1298         DTRACE_IO1(start, struct buf *, bp);
1299
1300         if (bp->b_flags & B_READ) {
1301
1302                 /* After reconnect, n_fid is invalid */
1303                 if (np->n_vcgenid != ssp->ss_vcgenid)
1304                         error = ESTALE;
1305                 else
1306                         error = smb_rwuio(ssp, np->n_fid, UIO_READ,
1307                             &auio, &scred, smb_timo_read);
1308
1309                 /* Like NFS, only set b_error here. */
1310                 bp->b_error = error;
1311                 bp->b_resid = auio.uio_resid;
1312
1313                 if (!error && auio.uio_resid != 0)
1314                         error = EIO;
1315                 if (!error && past_eof != 0) {
1316                         /* Zero the memory beyond EOF. */
1317                         bzero(bp->b_un.b_addr + count, past_eof);
1318                 }
1319         } else {
1320
1321                 /* After reconnect, n_fid is invalid */
1322                 if (np->n_vcgenid != ssp->ss_vcgenid)
1323                         error = ESTALE;
1324                 else
1325                         error = smb_rwuio(ssp, np->n_fid, UIO_WRITE,
1326                             &auio, &scred, smb_timo_write);
1327
1328                 /* Like NFS, only set b_error here. */
1329                 bp->b_error = error;
1330                 bp->b_resid = auio.uio_resid;
1331
1332                 if (!error && auio.uio_resid != 0)
1333                         error = EIO;
1334                 if (!error && sync) {
1335                         (void) smbfs_smb_flush(np, &scred);
1336                 }
1337         }
1338
1339         /*
1340          * This comes from nfs3_commit()
1341          */
1342         if (error != 0) {
1343                 mutex_enter(&np->r_statelock);
1344                 if (error == ESTALE)
1345                         np->r_flags |= RSTALE;
1346                 if (!np->r_error)
1347                         np->r_error = error;
1348                 mutex_exit(&np->r_statelock);
1349                 bp->b_flags |= B_ERROR;
1350         }
1351
1352         DTRACE_IO1(done, struct buf *, bp);
1353
1354         smb_credrele(&scred);
1355         smbfs_rw_exit(&np->r_lkserlock);
1356
1357         if (error == ESTALE)
1358                 smbfs_attrcache_remove(np);
1359
1360         return (error);
1361 }
1362
1363 /*
1364  * Here NFS has: nfs3write, nfs3read
1365  * We use smb_rwuio instead.
1366  */
1367
1368 /* ARGSUSED */
1369 static int
1370 smbfs_ioctl(vnode_t *vp, int cmd, intptr_t arg, int flag,
1371         cred_t *cr, int *rvalp, caller_context_t *ct)
1372 {
1373         int             error;
1374         smbmntinfo_t    *smi;
1375
1376         smi = VTOSMI(vp);
1377
1378         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
1379                 return (EIO);
1380
1381         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1382                 return (EIO);
1383
1384         switch (cmd) {
1385
1386         case _FIOFFS:
1387                 error = smbfs_fsync(vp, 0, cr, ct);
1388                 break;
1389
1390                 /*
1391                  * The following two ioctls are used by bfu.
1392                  * Silently ignore to avoid bfu errors.
1393                  */
1394         case _FIOGDIO:
1395         case _FIOSDIO:
1396                 error = 0;
1397                 break;
1398
1399 #if 0   /* Todo - SMB ioctl query regions */
1400         case _FIO_SEEK_DATA:
1401         case _FIO_SEEK_HOLE:
1402 #endif
1403
1404         case _FIODIRECTIO:
1405                 error = smbfs_directio(vp, (int)arg, cr);
1406                 break;
1407
1408                 /*
1409                  * Allow get/set with "raw" security descriptor (SD) data.
1410                  * Useful for testing, diagnosing idmap problems, etc.
1411                  */
1412         case SMBFSIO_GETSD:
1413                 error = smbfs_acl_iocget(vp, arg, flag, cr);
1414                 break;
1415
1416         case SMBFSIO_SETSD:
1417                 error = smbfs_acl_iocset(vp, arg, flag, cr);
1418                 break;
1419
1420         default:
1421                 error = ENOTTY;
1422                 break;
1423         }
1424
1425         return (error);
1426 }
1427
1428
1429 /*
1430  * Return either cached or remote attributes. If get remote attr
1431  * use them to check and invalidate caches, then cache the new attributes.
1432  */
1433 /* ARGSUSED */
1434 static int
1435 smbfs_getattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr,
1436         caller_context_t *ct)
1437 {
1438         smbnode_t *np;
1439         smbmntinfo_t *smi;
1440         int error;
1441
1442         smi = VTOSMI(vp);
1443
1444         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
1445                 return (EIO);
1446
1447         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1448                 return (EIO);
1449
1450         /*
1451          * If it has been specified that the return value will
1452          * just be used as a hint, and we are only being asked
1453          * for size, fsid or rdevid, then return the client's
1454          * notion of these values without checking to make sure
1455          * that the attribute cache is up to date.
1456          * The whole point is to avoid an over the wire GETATTR
1457          * call.
1458          */
1459         np = VTOSMB(vp);
1460         if (flags & ATTR_HINT) {
1461                 if (vap->va_mask ==
1462                     (vap->va_mask & (VATTR_SIZE | VATTR_FSID | VATTR_RDEV))) {
1463                         mutex_enter(&np->r_statelock);
1464                         if (vap->va_mask | VATTR_SIZE)
1465                                 vap->va_size = np->r_size;
1466                         if (vap->va_mask | VATTR_FSID)
1467                                 vap->va_fsid = vp->v_vfsp->vfs_dev;
1468                         if (vap->va_mask | VATTR_RDEV)
1469                                 vap->va_rdev = vp->v_rdev;
1470                         mutex_exit(&np->r_statelock);
1471                         return (0);
1472                 }
1473         }
1474
1475         /*
1476          * Only need to flush pages if asking for the mtime
1477          * and if there any dirty pages.
1478          *
1479          * Here NFS also checks for async writes (np->r_awcount)
1480          */
1481         if (vap->va_mask & VATTR_MTIME) {
1482                 if (vn_has_cached_data(vp) &&
1483                     ((np->r_flags & RDIRTY) != 0)) {
1484                         mutex_enter(&np->r_statelock);
1485                         np->r_gcount++;
1486                         mutex_exit(&np->r_statelock);
1487                         error = smbfs_putpage(vp, (offset_t)0, 0, 0, cr, ct);
1488                         mutex_enter(&np->r_statelock);
1489                         if (error && (error == ENOSPC || error == EDQUOT)) {
1490                                 if (!np->r_error)
1491                                         np->r_error = error;
1492                         }
1493                         if (--np->r_gcount == 0)
1494                                 cv_broadcast(&np->r_cv);
1495                         mutex_exit(&np->r_statelock);
1496                 }
1497         }
1498
1499         return (smbfsgetattr(vp, vap, cr));
1500 }
1501
1502 /* smbfsgetattr() in smbfs_client.c */
1503
1504 /*ARGSUSED4*/
1505 static int
1506 smbfs_setattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr,
1507                 caller_context_t *ct)
1508 {
1509         vfs_t           *vfsp;
1510         smbmntinfo_t    *smi;
1511         int             error;
1512         uint_t          mask;
1513         struct vattr    oldva;
1514
1515         vfsp = vp->v_vfsp;
1516         smi = VFTOSMI(vfsp);
1517
1518         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
1519                 return (EIO);
1520
1521         if (smi->smi_flags & SMI_DEAD || vfsp->vfs_flag & VFS_UNMOUNTED)
1522                 return (EIO);
1523
1524         mask = vap->va_mask;
1525         if (mask & VATTR_NOSET)
1526                 return (EINVAL);
1527
1528         if (vfsp->vfs_flag & VFS_RDONLY)
1529                 return (EROFS);
1530
1531         /*
1532          * This is a _local_ access check so that only the owner of
1533          * this mount can set attributes.  With ACLs enabled, the
1534          * file owner can be different from the mount owner, and we
1535          * need to check the _mount_ owner here.  See _access_rwx
1536          */
1537         bzero(&oldva, sizeof (oldva));
1538         oldva.va_mask = VATTR_TYPE | VATTR_MODE;
1539         error = smbfsgetattr(vp, &oldva, cr);
1540         if (error)
1541                 return (error);
1542         oldva.va_mask |= VATTR_UID | VATTR_GID;
1543         oldva.va_uid = smi->smi_uid;
1544         oldva.va_gid = smi->smi_gid;
1545
1546         error = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags,
1547             smbfs_accessx, vp);
1548         if (error)
1549                 return (error);
1550
1551         if (mask & (VATTR_UID | VATTR_GID)) {
1552                 if (smi->smi_flags & SMI_ACL)
1553                         error = smbfs_acl_setids(vp, vap, cr);
1554                 else
1555                         error = ENOSYS;
1556                 if (error != 0) {
1557                         SMBVDEBUG("error %d seting UID/GID on %s",
1558                             error, VTOSMB(vp)->n_rpath);
1559                         /*
1560                          * It might be more correct to return the
1561                          * error here, but that causes complaints
1562                          * when root extracts a cpio archive, etc.
1563                          * So ignore this error, and go ahead with
1564                          * the rest of the setattr work.
1565                          */
1566                 }
1567         }
1568
1569         error = smbfssetattr(vp, vap, flags, cr);
1570
1571 #ifdef  SMBFS_VNEVENT
1572         if (error == 0 && (vap->va_mask & VATTR_SIZE) && vap->va_size == 0)
1573                 vnevent_truncate(vp, ct);
1574 #endif
1575
1576         return (error);
1577 }
1578
1579 /*
1580  * Mostly from Darwin smbfs_setattr()
1581  * but then modified a lot.
1582  */
1583 /* ARGSUSED */
1584 static int
1585 smbfssetattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr)
1586 {
1587         int             error = 0;
1588         smbnode_t       *np = VTOSMB(vp);
1589         uint_t          mask = vap->va_mask;
1590         struct timespec *mtime, *atime;
1591         struct smb_cred scred;
1592         int             cerror, modified = 0;
1593         unsigned short  fid;
1594         int have_fid = 0;
1595         uint32_t rights = 0;
1596         uint32_t dosattr = 0;
1597
1598         ASSERT(curproc->p_zone == VTOSMI(vp)->smi_zone_ref.zref_zone);
1599
1600         /*
1601          * There are no settable attributes on the XATTR dir,
1602          * so just silently ignore these.  On XATTR files,
1603          * you can set the size but nothing else.
1604          */
1605         if (vp->v_flag & V_XATTRDIR)
1606                 return (0);
1607         if (np->n_flag & N_XATTR) {
1608                 if (mask & VATTR_TIMES)
1609                         SMBVDEBUG("ignore set time on xattr\n");
1610                 mask &= VATTR_SIZE;
1611         }
1612
1613         /*
1614          * Only need to flush pages if there are any pages and
1615          * if the file is marked as dirty in some fashion.  The
1616          * file must be flushed so that we can accurately
1617          * determine the size of the file and the cached data
1618          * after the SETATTR returns.  A file is considered to
1619          * be dirty if it is either marked with RDIRTY, has
1620          * outstanding i/o's active, or is mmap'd.  In this
1621          * last case, we can't tell whether there are dirty
1622          * pages, so we flush just to be sure.
1623          */
1624         if (vn_has_cached_data(vp) &&
1625             ((np->r_flags & RDIRTY) ||
1626             np->r_count > 0 ||
1627             np->r_mapcnt > 0)) {
1628                 ASSERT(vp->v_type != VCHR);
1629                 error = smbfs_putpage(vp, (offset_t)0, 0, 0, cr, NULL);
1630                 if (error && (error == ENOSPC || error == EDQUOT)) {
1631                         mutex_enter(&np->r_statelock);
1632                         if (!np->r_error)
1633                                 np->r_error = error;
1634                         mutex_exit(&np->r_statelock);
1635                 }
1636         }
1637
1638         /*
1639          * If our caller is trying to set multiple attributes, they
1640          * can make no assumption about what order they are done in.
1641          * Here we try to do them in order of decreasing likelihood
1642          * of failure, just to minimize the chance we'll wind up
1643          * with a partially complete request.
1644          */
1645
1646         /* Shared lock for (possible) n_fid use. */
1647         if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_READER, SMBINTR(vp)))
1648                 return (EINTR);
1649         smb_credinit(&scred, cr);
1650
1651         /*
1652          * If the caller has provided extensible attributes,
1653          * map those into DOS attributes supported by SMB.
1654          * Note: zero means "no change".
1655          */
1656         if (mask & VATTR_XVATTR)
1657                 dosattr = xvattr_to_dosattr(np, vap);
1658
1659         /*
1660          * Will we need an open handle for this setattr?
1661          * If so, what rights will we need?
1662          */
1663         if (dosattr || (mask & (VATTR_ATIME | VATTR_MTIME))) {
1664                 rights |=
1665                     SA_RIGHT_FILE_WRITE_ATTRIBUTES;
1666         }
1667         if (mask & VATTR_SIZE) {
1668                 rights |=
1669                     SA_RIGHT_FILE_WRITE_DATA |
1670                     SA_RIGHT_FILE_APPEND_DATA;
1671         }
1672
1673         /*
1674          * Only SIZE really requires a handle, but it's
1675          * simpler and more reliable to set via a handle.
1676          * Some servers like NT4 won't set times by path.
1677          * Also, we're usually setting everything anyway.
1678          */
1679         if (rights != 0) {
1680                 error = smbfs_smb_tmpopen(np, rights, &scred, &fid);
1681                 if (error) {
1682                         SMBVDEBUG("error %d opening %s\n",
1683                             error, np->n_rpath);
1684                         goto out;
1685                 }
1686                 have_fid = 1;
1687         }
1688
1689         /*
1690          * If the server supports the UNIX extensions, right here is where
1691          * we'd support changes to uid, gid, mode, and possibly va_flags.
1692          * For now we claim to have made any such changes.
1693          */
1694
1695         if (mask & VATTR_SIZE) {
1696                 /*
1697                  * If the new file size is less than what the client sees as
1698                  * the file size, then just change the size and invalidate
1699                  * the pages.
1700                  */
1701
1702                 /*
1703                  * Set the file size to vap->va_size.
1704                  */
1705                 ASSERT(have_fid);
1706                 error = smbfs_smb_setfsize(np, fid, vap->va_size, &scred);
1707                 if (error) {
1708                         SMBVDEBUG("setsize error %d file %s\n",
1709                             error, np->n_rpath);
1710                 } else {
1711                         /*
1712                          * Darwin had code here to zero-extend.
1713                          * Tests indicate the server will zero-fill,
1714                          * so looks like we don't need to do that.
1715                          */
1716                         mutex_enter(&np->r_statelock);
1717                         np->r_size = vap->va_size;
1718                         mutex_exit(&np->r_statelock);
1719                         modified = 1;
1720                 }
1721         }
1722
1723         /*
1724          * Todo: Implement setting create_time (which is
1725          * different from ctime).
1726          */
1727         mtime = ((mask & VATTR_MTIME) ? &vap->va_mtime : 0);
1728         atime = ((mask & VATTR_ATIME) ? &vap->va_atime : 0);
1729
1730         if (dosattr || mtime || atime) {
1731                 /*
1732                  * Always use the handle-based set attr call now.
1733                  */
1734                 ASSERT(have_fid);
1735                 error = smbfs_smb_setfattr(np, fid,
1736                     dosattr, mtime, atime, &scred);
1737                 if (error) {
1738                         SMBVDEBUG("set times error %d file %s\n",
1739                             error, np->n_rpath);
1740                 } else {
1741                         modified = 1;
1742                 }
1743         }
1744
1745 out:
1746         if (have_fid) {
1747                 cerror = smbfs_smb_tmpclose(np, fid, &scred);
1748                 if (cerror)
1749                         SMBVDEBUG("error %d closing %s\n",
1750                             cerror, np->n_rpath);
1751         }
1752
1753         smb_credrele(&scred);
1754         smbfs_rw_exit(&np->r_lkserlock);
1755
1756         if (modified) {
1757                 /*
1758                  * Invalidate attribute cache in case the server
1759                  * doesn't set exactly the attributes we asked.
1760                  */
1761                 smbfs_attrcache_remove(np);
1762
1763                 /*
1764                  * If changing the size of the file, invalidate
1765                  * any local cached data which is no longer part
1766                  * of the file.  We also possibly invalidate the
1767                  * last page in the file.  We could use
1768                  * pvn_vpzero(), but this would mark the page as
1769                  * modified and require it to be written back to
1770                  * the server for no particularly good reason.
1771                  * This way, if we access it, then we bring it
1772                  * back in.  A read should be cheaper than a
1773                  * write.
1774                  */
1775                 if (mask & VATTR_SIZE) {
1776                         smbfs_invalidate_pages(vp,
1777                             (vap->va_size & PAGEMASK), cr);
1778                 }
1779         }
1780
1781         return (error);
1782 }
1783
1784 /*
1785  * Helper function for extensible system attributes (PSARC 2007/315)
1786  * Compute the DOS attribute word to pass to _setfattr (see above).
1787  * This returns zero IFF no change is being made to attributes.
1788  * Otherwise return the new attributes or SMB_EFA_NORMAL.
1789  */
1790 static uint32_t
1791 xvattr_to_dosattr(smbnode_t *np, struct vattr *vap)
1792 {
1793         xvattr_t *xvap = (xvattr_t *)vap;
1794         xoptattr_t *xoap = NULL;
1795         uint32_t attr = np->r_attr.fa_attr;
1796         boolean_t anyset = B_FALSE;
1797
1798         if ((xoap = xva_getxoptattr(xvap)) == NULL)
1799                 return (0);
1800
1801         if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
1802                 if (xoap->xoa_archive)
1803                         attr |= SMB_FA_ARCHIVE;
1804                 else
1805                         attr &= ~SMB_FA_ARCHIVE;
1806                 XVA_SET_RTN(xvap, XAT_ARCHIVE);
1807                 anyset = B_TRUE;
1808         }
1809         if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
1810                 if (xoap->xoa_system)
1811                         attr |= SMB_FA_SYSTEM;
1812                 else
1813                         attr &= ~SMB_FA_SYSTEM;
1814                 XVA_SET_RTN(xvap, XAT_SYSTEM);
1815                 anyset = B_TRUE;
1816         }
1817         if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
1818                 if (xoap->xoa_readonly)
1819                         attr |= SMB_FA_RDONLY;
1820                 else
1821                         attr &= ~SMB_FA_RDONLY;
1822                 XVA_SET_RTN(xvap, XAT_READONLY);
1823                 anyset = B_TRUE;
1824         }
1825         if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
1826                 if (xoap->xoa_hidden)
1827                         attr |= SMB_FA_HIDDEN;
1828                 else
1829                         attr &= ~SMB_FA_HIDDEN;
1830                 XVA_SET_RTN(xvap, XAT_HIDDEN);
1831                 anyset = B_TRUE;
1832         }
1833
1834         if (anyset == B_FALSE)
1835                 return (0);     /* no change */
1836         if (attr == 0)
1837                 attr = SMB_EFA_NORMAL;
1838
1839         return (attr);
1840 }
1841
1842 /*
1843  * smbfs_access_rwx()
1844  * Common function for smbfs_access, etc.
1845  *
1846  * The security model implemented by the FS is unusual
1847  * due to the current "single user mounts" restriction:
1848  * All access under a given mount point uses the CIFS
1849  * credentials established by the owner of the mount.
1850  *
1851  * Most access checking is handled by the CIFS server,
1852  * but we need sufficient Unix access checks here to
1853  * prevent other local Unix users from having access
1854  * to objects under this mount that the uid/gid/mode
1855  * settings in the mount would not allow.
1856  *
1857  * With this model, there is a case where we need the
1858  * ability to do an access check before we have the
1859  * vnode for an object.  This function takes advantage
1860  * of the fact that the uid/gid/mode is per mount, and
1861  * avoids the need for a vnode.
1862  *
1863  * We still (sort of) need a vnode when we call
1864  * secpolicy_vnode_access, but that only uses
1865  * the vtype field, so we can use a pair of fake
1866  * vnodes that have only v_type filled in.
1867  */
1868 static int
1869 smbfs_access_rwx(vfs_t *vfsp, int vtype, int mode, cred_t *cr)
1870 {
1871         /* See the secpolicy call below. */
1872         static const vnode_t tmpl_vdir = { .v_type = VDIR };
1873         static const vnode_t tmpl_vreg = { .v_type = VREG };
1874         vattr_t         va;
1875         vnode_t         *tvp;
1876         struct smbmntinfo *smi = VFTOSMI(vfsp);
1877         int shift = 0;
1878
1879         /*
1880          * Build our (fabricated) vnode attributes.
1881          */
1882         bzero(&va, sizeof (va));
1883         va.va_mask = VATTR_TYPE | VATTR_MODE | VATTR_UID | VATTR_GID;
1884         va.va_type = vtype;
1885         va.va_mode = (vtype == VDIR) ?
1886             smi->smi_dmode : smi->smi_fmode;
1887         va.va_uid = smi->smi_uid;
1888         va.va_gid = smi->smi_gid;
1889
1890         /*
1891          * Disallow write attempts on read-only file systems,
1892          * unless the file is a device or fifo node.  Note:
1893          * Inline vn_is_readonly and IS_DEVVP here because
1894          * we may not have a vnode ptr.  Original expr. was:
1895          * (mode & VWRITE) && vn_is_readonly(vp) && !IS_DEVVP(vp))
1896          */
1897         if ((mode & VWRITE) &&
1898             (vfsp->vfs_flag & VFS_RDONLY) &&
1899             !(vtype == VCHR || vtype == VBLK || vtype == VFIFO))
1900                 return (EROFS);
1901
1902         /*
1903          * Disallow attempts to access mandatory lock files.
1904          * Similarly, expand MANDLOCK here.
1905          */
1906         if ((mode & (VWRITE | VREAD | VEXEC)) &&
1907             va.va_type == VREG && MANDMODE(va.va_mode))
1908                 return (EACCES);
1909
1910         /*
1911          * Access check is based on only
1912          * one of owner, group, public.
1913          * If not owner, then check group.
1914          * If not a member of the group,
1915          * then check public access.
1916          */
1917         if (crgetuid(cr) != va.va_uid) {
1918                 shift += 3;
1919                 if (!groupmember(va.va_gid, cr))
1920                         shift += 3;
1921         }
1922
1923         /*
1924          * We need a vnode for secpolicy_vnode_access,
1925          * but the only thing it looks at is v_type,
1926          * so pass one of the templates above.
1927          */
1928         tvp = (va.va_type == VDIR) ?
1929             (vnode_t *)&tmpl_vdir :
1930             (vnode_t *)&tmpl_vreg;
1931
1932         return (secpolicy_vnode_access2(cr, tvp, va.va_uid,
1933             va.va_mode << shift, mode));
1934 }
1935
1936 /*
1937  * See smbfs_setattr
1938  */
1939 static int
1940 smbfs_accessx(void *arg, int mode, cred_t *cr)
1941 {
1942         vnode_t *vp = arg;
1943         /*
1944          * Note: The caller has checked the current zone,
1945          * the SMI_DEAD and VFS_UNMOUNTED flags, etc.
1946          */
1947         return (smbfs_access_rwx(vp->v_vfsp, vp->v_type, mode, cr));
1948 }
1949
1950 /*
1951  * XXX
1952  * This op should support PSARC 2007/403, Modified Access Checks for CIFS
1953  */
1954 /* ARGSUSED */
1955 static int
1956 smbfs_access(vnode_t *vp, int mode, int flags, cred_t *cr, caller_context_t *ct)
1957 {
1958         vfs_t           *vfsp;
1959         smbmntinfo_t    *smi;
1960
1961         vfsp = vp->v_vfsp;
1962         smi = VFTOSMI(vfsp);
1963
1964         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
1965                 return (EIO);
1966
1967         if (smi->smi_flags & SMI_DEAD || vfsp->vfs_flag & VFS_UNMOUNTED)
1968                 return (EIO);
1969
1970         return (smbfs_access_rwx(vfsp, vp->v_type, mode, cr));
1971 }
1972
1973
1974 /* ARGSUSED */
1975 static int
1976 smbfs_readlink(vnode_t *vp, struct uio *uiop, cred_t *cr, caller_context_t *ct)
1977 {
1978         /* Not yet... */
1979         return (ENOSYS);
1980 }
1981
1982
1983 /*
1984  * Flush local dirty pages to stable storage on the server.
1985  *
1986  * If FNODSYNC is specified, then there is nothing to do because
1987  * metadata changes are not cached on the client before being
1988  * sent to the server.
1989  */
1990 /* ARGSUSED */
1991 static int
1992 smbfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct)
1993 {
1994         int             error = 0;
1995         smbmntinfo_t    *smi;
1996         smbnode_t       *np;
1997         struct smb_cred scred;
1998
1999         np = VTOSMB(vp);
2000         smi = VTOSMI(vp);
2001
2002         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
2003                 return (EIO);
2004
2005         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
2006                 return (EIO);
2007
2008         if ((syncflag & FNODSYNC) || IS_SWAPVP(vp))
2009                 return (0);
2010
2011         if ((syncflag & (FSYNC|FDSYNC)) == 0)
2012                 return (0);
2013
2014         error = smbfs_putpage(vp, (offset_t)0, 0, 0, cr, ct);
2015         if (error)
2016                 return (error);
2017
2018         /* Shared lock for n_fid use in _flush */
2019         if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_READER, SMBINTR(vp)))
2020                 return (EINTR);
2021         smb_credinit(&scred, cr);
2022
2023         error = smbfs_smb_flush(np, &scred);
2024
2025         smb_credrele(&scred);
2026         smbfs_rw_exit(&np->r_lkserlock);
2027
2028         return (error);
2029 }
2030
2031 /*
2032  * Last reference to vnode went away.
2033  */
2034 /* ARGSUSED */
2035 static void
2036 smbfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
2037 {
2038         struct smb_cred scred;
2039         smbnode_t       *np = VTOSMB(vp);
2040         int error;
2041
2042         /*
2043          * Don't "bail out" for VFS_UNMOUNTED here,
2044          * as we want to do cleanup, etc.
2045          * See also pcfs_inactive
2046          */
2047
2048         /*
2049          * If this is coming from the wrong zone, we let someone in the right
2050          * zone take care of it asynchronously.  We can get here due to
2051          * VN_RELE() being called from pageout() or fsflush().  This call may
2052          * potentially turn into an expensive no-op if, for instance, v_count
2053          * gets incremented in the meantime, but it's still correct.
2054          */
2055
2056         /*
2057          * From NFS:rinactive()
2058          *
2059          * Before freeing anything, wait until all asynchronous
2060          * activity is done on this rnode.  This will allow all
2061          * asynchronous read ahead and write behind i/o's to
2062          * finish.
2063          */
2064         mutex_enter(&np->r_statelock);
2065         while (np->r_count > 0)
2066                 cv_wait(&np->r_cv, &np->r_statelock);
2067         mutex_exit(&np->r_statelock);
2068
2069         /*
2070          * Flush and invalidate all pages associated with the vnode.
2071          */
2072         if (vn_has_cached_data(vp)) {
2073                 if ((np->r_flags & RDIRTY) && !np->r_error) {
2074                         error = smbfs_putpage(vp, 0, 0, 0, cr, ct);
2075                         if (error && (error == ENOSPC || error == EDQUOT)) {
2076                                 mutex_enter(&np->r_statelock);
2077                                 if (!np->r_error)
2078                                         np->r_error = error;
2079                                 mutex_exit(&np->r_statelock);
2080                         }
2081                 }
2082                 smbfs_invalidate_pages(vp, 0, cr);
2083         }
2084         /*
2085          * This vnode should have lost all cached data.
2086          */
2087         ASSERT(vn_has_cached_data(vp) == 0);
2088
2089         /*
2090          * Defend against the possibility that higher-level callers
2091          * might not correctly balance open and close calls.  If we
2092          * get here with open references remaining, it means there
2093          * was a missing fop_close somewhere.  If that happens, do
2094          * the close here so we don't "leak" FIDs on the server.
2095          *
2096          * Exclusive lock for modifying n_fid stuff.
2097          * Don't want this one ever interruptible.
2098          */
2099         (void) smbfs_rw_enter_sig(&np->r_lkserlock, RW_WRITER, 0);
2100         smb_credinit(&scred, cr);
2101
2102         switch (np->n_ovtype) {
2103         case VNON:
2104                 /* not open (OK) */
2105                 break;
2106
2107         case VDIR:
2108                 if (np->n_dirrefs == 0)
2109                         break;
2110                 SMBVDEBUG("open dir: refs %d path %s\n",
2111                     np->n_dirrefs, np->n_rpath);
2112                 /* Force last close. */
2113                 np->n_dirrefs = 1;
2114                 smbfs_rele_fid(np, &scred);
2115                 break;
2116
2117         case VREG:
2118                 if (np->n_fidrefs == 0)
2119                         break;
2120                 SMBVDEBUG("open file: refs %d id 0x%x path %s\n",
2121                     np->n_fidrefs, np->n_fid, np->n_rpath);
2122                 /* Force last close. */
2123                 np->n_fidrefs = 1;
2124                 smbfs_rele_fid(np, &scred);
2125                 break;
2126
2127         default:
2128                 SMBVDEBUG("bad n_ovtype %d\n", np->n_ovtype);
2129                 np->n_ovtype = VNON;
2130                 break;
2131         }
2132
2133         smb_credrele(&scred);
2134         smbfs_rw_exit(&np->r_lkserlock);
2135
2136         /*
2137          * XATTR directories (and the files under them) have
2138          * little value for reclaim, so just remove them from
2139          * the "hash" (AVL) as soon as they go inactive.
2140          * Note that the node may already have been removed
2141          * from the hash by smbfsremove.
2142          */
2143         if ((np->n_flag & N_XATTR) != 0 &&
2144             (np->r_flags & RHASHED) != 0)
2145                 smbfs_rmhash(np);
2146
2147         smbfs_addfree(np);
2148 }
2149
2150 /*
2151  * Remote file system operations having to do with directory manipulation.
2152  */
2153 /* ARGSUSED */
2154 static int
2155 smbfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp,
2156         int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
2157         int *direntflags, pathname_t *realpnp)
2158 {
2159         vfs_t           *vfs;
2160         smbmntinfo_t    *smi;
2161         smbnode_t       *dnp;
2162         int             error;
2163
2164         vfs = dvp->v_vfsp;
2165         smi = VFTOSMI(vfs);
2166
2167         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
2168                 return (EPERM);
2169
2170         if (smi->smi_flags & SMI_DEAD || vfs->vfs_flag & VFS_UNMOUNTED)
2171                 return (EIO);
2172
2173         dnp = VTOSMB(dvp);
2174
2175         /*
2176          * Are we looking up extended attributes?  If so, "dvp" is
2177          * the file or directory for which we want attributes, and
2178          * we need a lookup of the (faked up) attribute directory
2179          * before we lookup the rest of the path.
2180          */
2181         if (flags & LOOKUP_XATTR) {
2182                 /*
2183                  * Require the xattr mount option.
2184                  */
2185                 if ((vfs->vfs_flag & VFS_XATTR) == 0)
2186                         return (EINVAL);
2187
2188                 error = smbfs_get_xattrdir(dvp, vpp, cr, flags);
2189                 return (error);
2190         }
2191
2192         if (smbfs_rw_enter_sig(&dnp->r_rwlock, RW_READER, SMBINTR(dvp)))
2193                 return (EINTR);
2194
2195         error = smbfslookup(dvp, nm, vpp, cr, 1, ct);
2196
2197         smbfs_rw_exit(&dnp->r_rwlock);
2198
2199         return (error);
2200 }
2201
2202 /* ARGSUSED */
2203 static int
2204 smbfslookup(vnode_t *dvp, char *nm, vnode_t **vpp, cred_t *cr,
2205         int cache_ok, caller_context_t *ct)
2206 {
2207         int             error;
2208         int             supplen; /* supported length */
2209         vnode_t         *vp;
2210         smbnode_t       *np;
2211         smbnode_t       *dnp;
2212         smbmntinfo_t    *smi;
2213         /* struct smb_vc        *vcp; */
2214         const char      *ill;
2215         const char      *name = (const char *)nm;
2216         int             nmlen = strlen(nm);
2217         int             rplen;
2218         struct smb_cred scred;
2219         struct smbfattr fa;
2220
2221         smi = VTOSMI(dvp);
2222         dnp = VTOSMB(dvp);
2223
2224         ASSERT(curproc->p_zone == smi->smi_zone_ref.zref_zone);
2225
2226 #ifdef NOT_YET
2227         vcp = SSTOVC(smi->smi_share);
2228
2229         /* XXX: Should compute this once and store it in smbmntinfo_t */
2230         supplen = (SMB_DIALECT(vcp) >= SMB_DIALECT_LANMAN2_0) ? 255 : 12;
2231 #else
2232         supplen = 255;
2233 #endif
2234
2235         /*
2236          * RWlock must be held, either reader or writer.
2237          */
2238         ASSERT(dnp->r_rwlock.count != 0);
2239
2240         /*
2241          * If lookup is for "", just return dvp.
2242          * No need to perform any access checks.
2243          */
2244         if (nmlen == 0) {
2245                 VN_HOLD(dvp);
2246                 *vpp = dvp;
2247                 return (0);
2248         }
2249
2250         /*
2251          * Can't do lookups in non-directories.
2252          */
2253         if (dvp->v_type != VDIR)
2254                 return (ENOTDIR);
2255
2256         /*
2257          * Need search permission in the directory.
2258          */
2259         error = smbfs_access(dvp, VEXEC, 0, cr, ct);
2260         if (error)
2261                 return (error);
2262
2263         /*
2264          * If lookup is for ".", just return dvp.
2265          * Access check was done above.
2266          */
2267         if (nmlen == 1 && name[0] == '.') {
2268                 VN_HOLD(dvp);
2269                 *vpp = dvp;
2270                 return (0);
2271         }
2272
2273         /*
2274          * Now some sanity checks on the name.
2275          * First check the length.
2276          */
2277         if (nmlen > supplen)
2278                 return (ENAMETOOLONG);
2279
2280         /*
2281          * Avoid surprises with characters that are
2282          * illegal in Windows file names.
2283          * Todo: CATIA mappings?
2284          */
2285         ill = illegal_chars;
2286         if (dnp->n_flag & N_XATTR)
2287                 ill++; /* allow colon */
2288         if (strpbrk(nm, ill))
2289                 return (EINVAL);
2290
2291         /*
2292          * Special handling for lookup of ".."
2293          *
2294          * We keep full pathnames (as seen on the server)
2295          * so we can just trim off the last component to
2296          * get the full pathname of the parent.  Note:
2297          * We don't actually copy and modify, but just
2298          * compute the trimmed length and pass that with
2299          * the current dir path (not null terminated).
2300          *
2301          * We don't go over-the-wire to get attributes
2302          * for ".." because we know it's a directory,
2303          * and we can just leave the rest "stale"
2304          * until someone does a getattr.
2305          */
2306         if (nmlen == 2 && name[0] == '.' && name[1] == '.') {
2307                 if (dvp->v_flag & VROOT) {
2308                         /*
2309                          * Already at the root.  This can happen
2310                          * with directory listings at the root,
2311                          * which lookup "." and ".." to get the
2312                          * inode numbers.  Let ".." be the same
2313                          * as "." in the FS root.
2314                          */
2315                         VN_HOLD(dvp);
2316                         *vpp = dvp;
2317                         return (0);
2318                 }
2319
2320                 /*
2321                  * Special case for XATTR directory
2322                  */
2323                 if (dvp->v_flag & V_XATTRDIR) {
2324                         error = smbfs_xa_parent(dvp, vpp);
2325                         return (error);
2326                 }
2327
2328                 /*
2329                  * Find the parent path length.
2330                  */
2331                 rplen = dnp->n_rplen;
2332                 ASSERT(rplen > 0);
2333                 while (--rplen >= 0) {
2334                         if (dnp->n_rpath[rplen] == '\\')
2335                                 break;
2336                 }
2337                 if (rplen <= 0) {
2338                         /* Found our way to the root. */
2339                         vp = SMBTOV(smi->smi_root);
2340                         VN_HOLD(vp);
2341                         *vpp = vp;
2342                         return (0);
2343                 }
2344                 np = smbfs_node_findcreate(smi,
2345                     dnp->n_rpath, rplen, NULL, 0, 0,
2346                     &smbfs_fattr0); /* force create */
2347                 ASSERT(np != NULL);
2348                 vp = SMBTOV(np);
2349                 vp->v_type = VDIR;
2350
2351                 /* Success! */
2352                 *vpp = vp;
2353                 return (0);
2354         }
2355
2356         /*
2357          * Normal lookup of a name under this directory.
2358          * Note we handled "", ".", ".." above.
2359          */
2360         if (cache_ok) {
2361                 /*
2362                  * The caller indicated that it's OK to use a
2363                  * cached result for this lookup, so try to
2364                  * reclaim a node from the smbfs node cache.
2365                  */
2366                 error = smbfslookup_cache(dvp, nm, nmlen, &vp, cr);
2367                 if (error)
2368                         return (error);
2369                 if (vp != NULL) {
2370                         /* hold taken in lookup_cache */
2371                         *vpp = vp;
2372                         return (0);
2373                 }
2374         }
2375
2376         /*
2377          * OK, go over-the-wire to get the attributes,
2378          * then create the node.
2379          */
2380         smb_credinit(&scred, cr);
2381         /* Note: this can allocate a new "name" */
2382         error = smbfs_smb_lookup(dnp, &name, &nmlen, &fa, &scred);
2383         smb_credrele(&scred);
2384         if (error == ENOTDIR) {
2385                 /*
2386                  * Lookup failed because this directory was
2387                  * removed or renamed by another client.
2388                  * Remove any cached attributes under it.
2389                  */
2390                 smbfs_attrcache_remove(dnp);
2391                 smbfs_attrcache_prune(dnp);
2392         }
2393         if (error)
2394                 goto out;
2395
2396         error = smbfs_nget(dvp, name, nmlen, &fa, &vp);
2397         if (error)
2398                 goto out;
2399
2400         /* Success! */
2401         *vpp = vp;
2402
2403 out:
2404         /* smbfs_smb_lookup may have allocated name. */
2405         if (name != nm)
2406                 smbfs_name_free(name, nmlen);
2407
2408         return (error);
2409 }
2410
2411 /*
2412  * smbfslookup_cache
2413  *
2414  * Try to reclaim a node from the smbfs node cache.
2415  * Some statistics for DEBUG.
2416  *
2417  * This mechanism lets us avoid many of the five (or more)
2418  * OtW lookup calls per file seen with "ls -l" if we search
2419  * the smbfs node cache for recently inactive(ated) nodes.
2420  */
2421 #ifdef DEBUG
2422 int smbfs_lookup_cache_calls = 0;
2423 int smbfs_lookup_cache_error = 0;
2424 int smbfs_lookup_cache_miss = 0;
2425 int smbfs_lookup_cache_stale = 0;
2426 int smbfs_lookup_cache_hits = 0;
2427 #endif /* DEBUG */
2428
2429 /* ARGSUSED */
2430 static int
2431 smbfslookup_cache(vnode_t *dvp, char *nm, int nmlen,
2432         vnode_t **vpp, cred_t *cr)
2433 {
2434         struct vattr va;
2435         smbnode_t *dnp;
2436         smbnode_t *np;
2437         vnode_t *vp;
2438         int error;
2439         char sep;
2440
2441         dnp = VTOSMB(dvp);
2442         *vpp = NULL;
2443
2444 #ifdef DEBUG
2445         smbfs_lookup_cache_calls++;
2446 #endif
2447
2448         /*
2449          * First make sure we can get attributes for the
2450          * directory.  Cached attributes are OK here.
2451          * If we removed or renamed the directory, this
2452          * will return ENOENT.  If someone else removed
2453          * this directory or file, we'll find out when we
2454          * try to open or get attributes.
2455          */
2456         va.va_mask = VATTR_TYPE | VATTR_MODE;
2457         error = smbfsgetattr(dvp, &va, cr);
2458         if (error) {
2459 #ifdef DEBUG
2460                 smbfs_lookup_cache_error++;
2461 #endif
2462                 return (error);
2463         }
2464
2465         /*
2466          * Passing NULL smbfattr here so we will
2467          * just look, not create.
2468          */
2469         sep = SMBFS_DNP_SEP(dnp);
2470         np = smbfs_node_findcreate(dnp->n_mount,
2471             dnp->n_rpath, dnp->n_rplen,
2472             nm, nmlen, sep, NULL);
2473         if (np == NULL) {
2474 #ifdef DEBUG
2475                 smbfs_lookup_cache_miss++;
2476 #endif
2477                 return (0);
2478         }
2479
2480         /*
2481          * Found it.  Attributes still valid?
2482          */
2483         vp = SMBTOV(np);
2484         if (np->r_attrtime <= gethrtime()) {
2485                 /* stale */
2486 #ifdef DEBUG
2487                 smbfs_lookup_cache_stale++;
2488 #endif
2489                 VN_RELE(vp);
2490                 return (0);
2491         }
2492
2493         /*
2494          * Success!
2495          * Caller gets hold from smbfs_node_findcreate
2496          */
2497 #ifdef DEBUG
2498         smbfs_lookup_cache_hits++;
2499 #endif
2500         *vpp = vp;
2501         return (0);
2502 }
2503
2504
2505 /*
2506  * XXX
2507  * vsecattr_t is new to build 77, and we need to eventually support
2508  * it in order to create an ACL when an object is created.
2509  *
2510  * This op should support the new FIGNORECASE flag for case-insensitive
2511  * lookups, per PSARC 2007/244.
2512  */
2513 /* ARGSUSED */
2514 static int
2515 smbfs_create(vnode_t *dvp, char *nm, struct vattr *va, enum vcexcl exclusive,
2516         int mode, vnode_t **vpp, cred_t *cr, int lfaware, caller_context_t *ct,
2517         vsecattr_t *vsecp)
2518 {
2519         int             error;
2520         int             cerror;
2521         vfs_t           *vfsp;
2522         vnode_t         *vp;
2523         smbnode_t       *np;
2524         smbnode_t       *dnp;
2525         smbmntinfo_t    *smi;
2526         struct vattr    vattr;
2527         struct smbfattr fattr;
2528         struct smb_cred scred;
2529         const char *name = (const char *)nm;
2530         int             nmlen = strlen(nm);
2531         uint32_t        disp;
2532         uint16_t        fid;
2533         int             xattr;
2534
2535         vfsp = dvp->v_vfsp;
2536         smi = VFTOSMI(vfsp);
2537         dnp = VTOSMB(dvp);
2538         vp = NULL;
2539
2540         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
2541                 return (EPERM);
2542
2543         if (smi->smi_flags & SMI_DEAD || vfsp->vfs_flag & VFS_UNMOUNTED)
2544                 return (EIO);
2545
2546         /*
2547          * Note: this may break mknod(2) calls to create a directory,
2548          * but that's obscure use.  Some other filesystems do this.
2549          * Todo: redirect VDIR type here to _mkdir.
2550          */
2551         if (va->va_type != VREG)
2552                 return (EINVAL);
2553
2554         /*
2555          * If the pathname is "", just use dvp, no checks.
2556          * Do this outside of the rwlock (like zfs).
2557          */
2558         if (nmlen == 0) {
2559                 VN_HOLD(dvp);
2560                 *vpp = dvp;
2561                 return (0);
2562         }
2563
2564         /* Don't allow "." or ".." through here. */
2565         if ((nmlen == 1 && name[0] == '.') ||
2566             (nmlen == 2 && name[0] == '.' && name[1] == '.'))
2567                 return (EISDIR);
2568
2569         /*
2570          * We make a copy of the attributes because the caller does not
2571          * expect us to change what va points to.
2572          */
2573         vattr = *va;
2574
2575         if (smbfs_rw_enter_sig(&dnp->r_rwlock, RW_WRITER, SMBINTR(dvp)))
2576                 return (EINTR);
2577         smb_credinit(&scred, cr);
2578
2579         /*
2580          * NFS needs to go over the wire, just to be sure whether the
2581          * file exists or not.  Using a cached result is dangerous in
2582          * this case when making a decision regarding existence.
2583          *
2584          * The SMB protocol does NOT really need to go OTW here
2585          * thanks to the expressive NTCREATE disposition values.
2586          * Unfortunately, to do Unix access checks correctly,
2587          * we need to know if the object already exists.
2588          * When the object does not exist, we need VWRITE on
2589          * the directory.  Note: smbfslookup() checks VEXEC.
2590          */
2591         error = smbfslookup(dvp, nm, &vp, cr, 0, ct);
2592         if (error == 0) {
2593                 /*
2594                  * The file already exists.  Error?
2595                  * NB: have a hold from smbfslookup
2596                  */
2597                 if (exclusive == EXCL) {
2598                         error = EEXIST;
2599                         VN_RELE(vp);
2600                         goto out;
2601                 }
2602                 /*
2603                  * Verify requested access.
2604                  */
2605                 error = smbfs_access(vp, mode, 0, cr, ct);
2606                 if (error) {
2607                         VN_RELE(vp);
2608                         goto out;
2609                 }
2610
2611                 /*
2612                  * Truncate (if requested).
2613                  */
2614                 if ((vattr.va_mask & VATTR_SIZE) && vp->v_type == VREG) {
2615                         np = VTOSMB(vp);
2616                         /*
2617                          * Check here for large file truncation by
2618                          * LF-unaware process, like ufs_create().
2619                          */
2620                         if (!(lfaware & FOFFMAX)) {
2621                                 mutex_enter(&np->r_statelock);
2622                                 if (np->r_size > MAXOFF32_T)
2623                                         error = EOVERFLOW;
2624                                 mutex_exit(&np->r_statelock);
2625                         }
2626                         if (error) {
2627                                 VN_RELE(vp);
2628                                 goto out;
2629                         }
2630                         vattr.va_mask = VATTR_SIZE;
2631                         error = smbfssetattr(vp, &vattr, 0, cr);
2632                         if (error) {
2633                                 VN_RELE(vp);
2634                                 goto out;
2635                         }
2636 #ifdef  SMBFS_VNEVENT
2637                         /* Existing file was truncated */
2638                         vnevent_create(vp, ct);
2639 #endif
2640                         /* invalidate pages done in smbfssetattr() */
2641                 }
2642                 /* Success! */
2643                 *vpp = vp;
2644                 goto out;
2645         }
2646
2647         /*
2648          * The file did not exist.  Need VWRITE in the directory.
2649          */
2650         error = smbfs_access(dvp, VWRITE, 0, cr, ct);
2651         if (error)
2652                 goto out;
2653
2654         /*
2655          * Now things get tricky.  We also need to check the
2656          * requested open mode against the file we may create.
2657          * See comments at smbfs_access_rwx
2658          */
2659         error = smbfs_access_rwx(vfsp, VREG, mode, cr);
2660         if (error)
2661                 goto out;
2662
2663         /*
2664          * Now the code derived from Darwin,
2665          * but with greater use of NT_CREATE
2666          * disposition options.  Much changed.
2667          *
2668          * Create (or open) a new child node.
2669          * Note we handled "." and ".." above.
2670          */
2671
2672         if (exclusive == EXCL)
2673                 disp = NTCREATEX_DISP_CREATE;
2674         else {
2675                 /* Truncate regular files if requested. */
2676                 if ((va->va_type == VREG) &&
2677                     (va->va_mask & VATTR_SIZE) &&
2678                     (va->va_size == 0))
2679                         disp = NTCREATEX_DISP_OVERWRITE_IF;
2680                 else
2681                         disp = NTCREATEX_DISP_OPEN_IF;
2682         }
2683         xattr = (dnp->n_flag & N_XATTR) ? 1 : 0;
2684         error = smbfs_smb_create(dnp,
2685             name, nmlen, xattr,
2686             disp, &scred, &fid);
2687         if (error)
2688                 goto out;
2689
2690         /*
2691          * Should use the fid to get/set the size
2692          * while we have it opened here.  See above.
2693          */
2694
2695         cerror = smbfs_smb_close(smi->smi_share, fid, NULL, &scred);
2696         if (cerror)
2697                 SMBVDEBUG("error %d closing %s\\%s\n",
2698                     cerror, dnp->n_rpath, name);
2699
2700         /*
2701          * In the open case, the name may differ a little
2702          * from what we passed to create (case, etc.)
2703          * so call lookup to get the (opened) name.
2704          *
2705          * XXX: Could avoid this extra lookup if the
2706          * "createact" result from NT_CREATE says we
2707          * created the object.
2708          */
2709         error = smbfs_smb_lookup(dnp, &name, &nmlen, &fattr, &scred);
2710         if (error)
2711                 goto out;
2712
2713         /* update attr and directory cache */
2714         smbfs_attr_touchdir(dnp);
2715
2716         error = smbfs_nget(dvp, name, nmlen, &fattr, &vp);
2717         if (error)
2718                 goto out;
2719
2720         /* Success! */
2721         *vpp = vp;
2722         error = 0;
2723
2724 out:
2725         smb_credrele(&scred);
2726         smbfs_rw_exit(&dnp->r_rwlock);
2727         if (name != nm)
2728                 smbfs_name_free(name, nmlen);
2729         return (error);
2730 }
2731
2732 /*
2733  * XXX
2734  * This op should support the new FIGNORECASE flag for case-insensitive
2735  * lookups, per PSARC 2007/244.
2736  */
2737 /* ARGSUSED */
2738 static int
2739 smbfs_remove(vnode_t *dvp, char *nm, cred_t *cr, caller_context_t *ct,
2740         int flags)
2741 {
2742         struct smb_cred scred;
2743         vnode_t         *vp = NULL;
2744         smbnode_t       *dnp = VTOSMB(dvp);
2745         smbmntinfo_t    *smi = VTOSMI(dvp);
2746         int             error;
2747
2748         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
2749                 return (EPERM);
2750
2751         if (smi->smi_flags & SMI_DEAD || dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
2752                 return (EIO);
2753
2754         /*
2755          * Verify access to the dirctory.
2756          */
2757         error = smbfs_access(dvp, VWRITE|VEXEC, 0, cr, ct);
2758         if (error)
2759                 return (error);
2760
2761         if (smbfs_rw_enter_sig(&dnp->r_rwlock, RW_WRITER, SMBINTR(dvp)))
2762                 return (EINTR);
2763         smb_credinit(&scred, cr);
2764
2765         /* Lookup the file to remove. */
2766         error = smbfslookup(dvp, nm, &vp, cr, 0, ct);
2767         if (error == 0) {
2768                 /*
2769                  * Do the real remove work
2770                  */
2771                 error = smbfsremove(dvp, vp, &scred, flags);
2772                 VN_RELE(vp);
2773         }
2774
2775         smb_credrele(&scred);
2776         smbfs_rw_exit(&dnp->r_rwlock);
2777
2778         return (error);
2779 }
2780
2781 /*
2782  * smbfsremove does the real work of removing in SMBFS
2783  * Caller has done dir access checks etc.
2784  *
2785  * The normal way to delete a file over SMB is open it (with DELETE access),
2786  * set the "delete-on-close" flag, and close the file.  The problem for Unix
2787  * applications is that they expect the file name to be gone once the unlink
2788  * completes, and the SMB server does not actually delete the file until ALL
2789  * opens of that file are closed.  We can't assume our open handles are the
2790  * only open handles on a file we're deleting, so to be safe we'll try to
2791  * rename the file to a temporary name and then set delete-on-close.  If we
2792  * fail to set delete-on-close (i.e. because other opens prevent it) then
2793  * undo the changes we made and give up with EBUSY.  Note that we might have
2794  * permission to delete a file but lack permission to rename, so we want to
2795  * continue in cases where rename fails.  As an optimization, only do the
2796  * rename when we have the file open.
2797  *
2798  * This is similar to what NFS does when deleting a file that has local opens,
2799  * but thanks to SMB delete-on-close, we don't need to keep track of when the
2800  * last local open goes away and send a delete.  The server does that for us.
2801  */
2802 /* ARGSUSED */
2803 static int
2804 smbfsremove(vnode_t *dvp, vnode_t *vp, struct smb_cred *scred,
2805     int flags)
2806 {
2807         smbnode_t       *dnp = VTOSMB(dvp);
2808         smbnode_t       *np = VTOSMB(vp);
2809         char            *tmpname = NULL;
2810         int             tnlen;
2811         int             error;
2812         unsigned short  fid;
2813         boolean_t       have_fid = B_FALSE;
2814         boolean_t       renamed = B_FALSE;
2815
2816         /*
2817          * The dvp RWlock must be held as writer.
2818          */
2819         ASSERT(dnp->r_rwlock.owner == curthread);
2820
2821         /* Never allow link/unlink directories on SMB. */
2822         if (vp->v_type == VDIR)
2823                 return (EPERM);
2824
2825         /*
2826          * We need to flush any dirty pages which happen to
2827          * be hanging around before removing the file.  This
2828          * shouldn't happen very often and mostly on file
2829          * systems mounted "nocto".
2830          */
2831         if (vn_has_cached_data(vp) &&
2832             ((np->r_flags & RDIRTY) || np->r_count > 0)) {
2833                 error = smbfs_putpage(vp, (offset_t)0, 0, 0,
2834                     scred->scr_cred, NULL);
2835                 if (error && (error == ENOSPC || error == EDQUOT)) {
2836                         mutex_enter(&np->r_statelock);
2837                         if (!np->r_error)
2838                                 np->r_error = error;
2839                         mutex_exit(&np->r_statelock);
2840                 }
2841         }
2842
2843         /* Shared lock for n_fid use in smbfs_smb_setdisp etc. */
2844         if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_READER, SMBINTR(vp)))
2845                 return (EINTR);
2846
2847         /*
2848          * Get a file handle with delete access.
2849          * Close this FID before return.
2850          */
2851         error = smbfs_smb_tmpopen(np, STD_RIGHT_DELETE_ACCESS,
2852             scred, &fid);
2853         if (error) {
2854                 SMBVDEBUG("error %d opening %s\n",
2855                     error, np->n_rpath);
2856                 goto out;
2857         }
2858         have_fid = B_TRUE;
2859
2860         /*
2861          * If we have the file open, try to rename it to a temporary name.
2862          * If we can't rename, continue on and try setting DoC anyway.
2863          */
2864         if ((vp->v_count > 1) && (np->n_fidrefs > 0)) {
2865                 tmpname = kmem_alloc(MAXNAMELEN, KM_SLEEP);
2866                 tnlen = smbfs_newname(tmpname, MAXNAMELEN);
2867                 error = smbfs_smb_t2rename(np, tmpname, tnlen, scred, fid, 0);
2868                 if (error != 0) {
2869                         SMBVDEBUG("error %d renaming %s -> %s\n",
2870                             error, np->n_rpath, tmpname);
2871                         /* Keep going without the rename. */
2872                 } else {
2873                         renamed = B_TRUE;
2874                 }
2875         }
2876
2877         /*
2878          * Mark the file as delete-on-close.  If we can't,
2879          * undo what we did and err out.
2880          */
2881         error = smbfs_smb_setdisp(np, fid, 1, scred);
2882         if (error != 0) {
2883                 SMBVDEBUG("error %d setting DoC on %s\n",
2884                     error, np->n_rpath);
2885                 /*
2886                  * Failed to set DoC. If we renamed, undo that.
2887                  * Need np->n_rpath relative to parent (dnp).
2888                  * Use parent path name length plus one for
2889                  * the separator ('/' or ':')
2890                  */
2891                 if (renamed) {
2892                         char *oldname;
2893                         int oldnlen;
2894                         int err2;
2895
2896                         oldname = np->n_rpath + (dnp->n_rplen + 1);
2897                         oldnlen = np->n_rplen - (dnp->n_rplen + 1);
2898                         err2 = smbfs_smb_t2rename(np, oldname, oldnlen,
2899                             scred, fid, 0);
2900                         SMBVDEBUG("error %d un-renaming %s -> %s\n",
2901                             err2, tmpname, np->n_rpath);
2902                 }
2903                 error = EBUSY;
2904                 goto out;
2905         }
2906         /* Done! */
2907         smbfs_attrcache_prune(np);
2908
2909 #ifdef  SMBFS_VNEVENT
2910         vnevent_remove(vp, dvp, nm, ct);
2911 #endif
2912
2913 out:
2914         if (tmpname != NULL)
2915                 kmem_free(tmpname, MAXNAMELEN);
2916
2917         if (have_fid)
2918                 (void) smbfs_smb_tmpclose(np, fid, scred);
2919         smbfs_rw_exit(&np->r_lkserlock);
2920
2921         if (error == 0) {
2922                 /* Keep lookup from finding this node anymore. */
2923                 smbfs_rmhash(np);
2924         }
2925
2926         return (error);
2927 }
2928
2929
2930 /* ARGSUSED */
2931 static int
2932 smbfs_link(vnode_t *tdvp, vnode_t *svp, char *tnm, cred_t *cr,
2933         caller_context_t *ct, int flags)
2934 {
2935         /* Not yet... */
2936         return (ENOSYS);
2937 }
2938
2939
2940 /*
2941  * XXX
2942  * This op should support the new FIGNORECASE flag for case-insensitive
2943  * lookups, per PSARC 2007/244.
2944  */
2945 /* ARGSUSED */
2946 static int
2947 smbfs_rename(vnode_t *odvp, char *onm, vnode_t *ndvp, char *nnm, cred_t *cr,
2948         caller_context_t *ct, int flags)
2949 {
2950         struct smb_cred scred;
2951         smbnode_t       *odnp = VTOSMB(odvp);
2952         smbnode_t       *ndnp = VTOSMB(ndvp);
2953         vnode_t         *ovp;
2954         int error;
2955
2956         if (curproc->p_zone != VTOSMI(odvp)->smi_zone_ref.zref_zone ||
2957             curproc->p_zone != VTOSMI(ndvp)->smi_zone_ref.zref_zone)
2958                 return (EPERM);
2959
2960         if (VTOSMI(odvp)->smi_flags & SMI_DEAD ||
2961             VTOSMI(ndvp)->smi_flags & SMI_DEAD ||
2962             odvp->v_vfsp->vfs_flag & VFS_UNMOUNTED ||
2963             ndvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
2964                 return (EIO);
2965
2966         if (strcmp(onm, ".") == 0 || strcmp(onm, "..") == 0 ||
2967             strcmp(nnm, ".") == 0 || strcmp(nnm, "..") == 0)
2968                 return (EINVAL);
2969
2970         /*
2971          * Check that everything is on the same filesystem.
2972          * vn_rename checks the fsid's, but in case we don't
2973          * fill those in correctly, check here too.
2974          */
2975         if (odvp->v_vfsp != ndvp->v_vfsp)
2976                 return (EXDEV);
2977
2978         /*
2979          * Need write access on source and target.
2980          * Server takes care of most checks.
2981          */
2982         error = smbfs_access(odvp, VWRITE|VEXEC, 0, cr, ct);
2983         if (error)
2984                 return (error);
2985         if (odvp != ndvp) {
2986                 error = smbfs_access(ndvp, VWRITE, 0, cr, ct);
2987                 if (error)
2988                         return (error);
2989         }
2990
2991         /*
2992          * Need to lock both old/new dirs as writer.
2993          *
2994          * Avoid deadlock here on old vs new directory nodes
2995          * by always taking the locks in order of address.
2996          * The order is arbitrary, but must be consistent.
2997          */
2998         if (odnp < ndnp) {
2999                 if (smbfs_rw_enter_sig(&odnp->r_rwlock, RW_WRITER,
3000                     SMBINTR(odvp)))
3001                         return (EINTR);
3002                 if (smbfs_rw_enter_sig(&ndnp->r_rwlock, RW_WRITER,
3003                     SMBINTR(ndvp))) {
3004                         smbfs_rw_exit(&odnp->r_rwlock);
3005                         return (EINTR);
3006                 }
3007         } else {
3008                 if (smbfs_rw_enter_sig(&ndnp->r_rwlock, RW_WRITER,
3009                     SMBINTR(ndvp)))
3010                         return (EINTR);
3011                 if (smbfs_rw_enter_sig(&odnp->r_rwlock, RW_WRITER,
3012                     SMBINTR(odvp))) {
3013                         smbfs_rw_exit(&ndnp->r_rwlock);
3014                         return (EINTR);
3015                 }
3016         }
3017         smb_credinit(&scred, cr);
3018
3019         /* Lookup the "old" name */
3020         error = smbfslookup(odvp, onm, &ovp, cr, 0, ct);
3021         if (error == 0) {
3022                 /*
3023                  * Do the real rename work
3024                  */
3025                 error = smbfsrename(odvp, ovp, ndvp, nnm, &scred, flags);
3026                 VN_RELE(ovp);
3027         }
3028
3029         smb_credrele(&scred);
3030         smbfs_rw_exit(&odnp->r_rwlock);
3031         smbfs_rw_exit(&ndnp->r_rwlock);
3032
3033         return (error);
3034 }
3035
3036 /*
3037  * smbfsrename does the real work of renaming in SMBFS
3038  * Caller has done dir access checks etc.
3039  */
3040 /* ARGSUSED */
3041 static int
3042 smbfsrename(vnode_t *odvp, vnode_t *ovp, vnode_t *ndvp, char *nnm,
3043     struct smb_cred *scred, int flags)
3044 {
3045         smbnode_t       *odnp = VTOSMB(odvp);
3046         smbnode_t       *onp = VTOSMB(ovp);
3047         smbnode_t       *ndnp = VTOSMB(ndvp);
3048         vnode_t         *nvp = NULL;
3049         int             error;
3050         int             nvp_locked = 0;
3051
3052         /* Things our caller should have checked. */
3053         ASSERT(curproc->p_zone == VTOSMI(odvp)->smi_zone_ref.zref_zone);
3054         ASSERT(odvp->v_vfsp == ndvp->v_vfsp);
3055         ASSERT(odnp->r_rwlock.owner == curthread);
3056         ASSERT(ndnp->r_rwlock.owner == curthread);
3057
3058         /*
3059          * Lookup the target file.  If it exists, it needs to be
3060          * checked to see whether it is a mount point and whether
3061          * it is active (open).
3062          */
3063         error = smbfslookup(ndvp, nnm, &nvp, scred->scr_cred, 0, NULL);
3064         if (!error) {
3065                 /*
3066                  * Target (nvp) already exists.  Check that it
3067                  * has the same type as the source.  The server
3068                  * will check this also, (and more reliably) but
3069                  * this lets us return the correct error codes.
3070                  */
3071                 if (ovp->v_type == VDIR) {
3072                         if (nvp->v_type != VDIR) {
3073                                 error = ENOTDIR;
3074                                 goto out;
3075                         }
3076                 } else {
3077                         if (nvp->v_type == VDIR) {
3078                                 error = EISDIR;
3079                                 goto out;
3080                         }
3081                 }
3082
3083                 /*
3084                  * POSIX dictates that when the source and target
3085                  * entries refer to the same file object, rename
3086                  * must do nothing and exit without error.
3087                  */
3088                 if (ovp == nvp) {
3089                         error = 0;
3090                         goto out;
3091                 }
3092
3093                 /*
3094                  * Also must ensure the target is not a mount point,
3095                  * and keep mount/umount away until we're done.
3096                  */
3097                 if (vn_vfsrlock(nvp)) {
3098                         error = EBUSY;
3099                         goto out;
3100                 }
3101                 nvp_locked = 1;
3102                 if (vn_mountedvfs(nvp) != NULL) {
3103                         error = EBUSY;
3104                         goto out;
3105                 }
3106
3107                 /*
3108                  * CIFS may give a SHARING_VIOLATION error when
3109                  * trying to rename onto an exising object,
3110                  * so try to remove the target first.
3111                  * (Only for files, not directories.)
3112                  */
3113                 if (nvp->v_type == VDIR) {
3114                         error = EEXIST;
3115                         goto out;
3116                 }
3117                 error = smbfsremove(ndvp, nvp, scred, flags);
3118                 if (error != 0)
3119                         goto out;
3120
3121                 /*
3122                  * OK, removed the target file.  Continue as if
3123                  * lookup target had failed (nvp == NULL).
3124                  */
3125                 vn_vfsunlock(nvp);
3126                 nvp_locked = 0;
3127                 VN_RELE(nvp);
3128                 nvp = NULL;
3129         } /* nvp */
3130
3131         smbfs_attrcache_remove(onp);
3132         error = smbfs_smb_rename(onp, ndnp, nnm, strlen(nnm), scred);
3133
3134         /*
3135          * If the old name should no longer exist,
3136          * discard any cached attributes under it.
3137          */
3138         if (error == 0) {
3139                 smbfs_attrcache_prune(onp);
3140                 /* SMBFS_VNEVENT... */
3141         }
3142
3143 out:
3144         if (nvp) {
3145                 if (nvp_locked)
3146                         vn_vfsunlock(nvp);
3147                 VN_RELE(nvp);
3148         }
3149
3150         return (error);
3151 }
3152
3153 /*
3154  * XXX
3155  * vsecattr_t is new to build 77, and we need to eventually support
3156  * it in order to create an ACL when an object is created.
3157  *
3158  * This op should support the new FIGNORECASE flag for case-insensitive
3159  * lookups, per PSARC 2007/244.
3160  */
3161 /* ARGSUSED */
3162 static int
3163 smbfs_mkdir(vnode_t *dvp, char *nm, struct vattr *va, vnode_t **vpp,
3164         cred_t *cr, caller_context_t *ct, int flags, vsecattr_t *vsecp)
3165 {
3166         vnode_t         *vp;
3167         struct smbnode  *dnp = VTOSMB(dvp);
3168         struct smbmntinfo *smi = VTOSMI(dvp);
3169         struct smb_cred scred;
3170         struct smbfattr fattr;
3171         const char              *name = (const char *) nm;
3172         int             nmlen = strlen(name);
3173         int             error, hiderr;
3174
3175         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
3176                 return (EPERM);
3177
3178         if (smi->smi_flags & SMI_DEAD || dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
3179                 return (EIO);
3180
3181         if ((nmlen == 1 && name[0] == '.') ||
3182             (nmlen == 2 && name[0] == '.' && name[1] == '.'))
3183                 return (EEXIST);
3184
3185         /* Only plain files are allowed in V_XATTRDIR. */
3186         if (dvp->v_flag & V_XATTRDIR)
3187                 return (EINVAL);
3188
3189         if (smbfs_rw_enter_sig(&dnp->r_rwlock, RW_WRITER, SMBINTR(dvp)))
3190                 return (EINTR);
3191         smb_credinit(&scred, cr);
3192
3193         /*
3194          * Require write access in the containing directory.
3195          */
3196         error = smbfs_access(dvp, VWRITE, 0, cr, ct);
3197         if (error)
3198                 goto out;
3199
3200         error = smbfs_smb_mkdir(dnp, name, nmlen, &scred);
3201         if (error)
3202                 goto out;
3203
3204         error = smbfs_smb_lookup(dnp, &name, &nmlen, &fattr, &scred);
3205         if (error)
3206                 goto out;
3207
3208         smbfs_attr_touchdir(dnp);
3209
3210         error = smbfs_nget(dvp, name, nmlen, &fattr, &vp);
3211         if (error)
3212                 goto out;
3213
3214         if (name[0] == '.')
3215                 if ((hiderr = smbfs_smb_hideit(VTOSMB(vp), NULL, 0, &scred)))
3216                         SMBVDEBUG("hide failure %d\n", hiderr);
3217
3218         /* Success! */
3219         *vpp = vp;
3220         error = 0;
3221 out:
3222         smb_credrele(&scred);
3223         smbfs_rw_exit(&dnp->r_rwlock);
3224
3225         if (name != nm)
3226                 smbfs_name_free(name, nmlen);
3227
3228         return (error);
3229 }
3230
3231 /*
3232  * XXX
3233  * This op should support the new FIGNORECASE flag for case-insensitive
3234  * lookups, per PSARC 2007/244.
3235  */
3236 /* ARGSUSED */
3237 static int
3238 smbfs_rmdir(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr,
3239         caller_context_t *ct, int flags)
3240 {
3241         vnode_t         *vp = NULL;
3242         int             vp_locked = 0;
3243         struct smbmntinfo *smi = VTOSMI(dvp);
3244         struct smbnode  *dnp = VTOSMB(dvp);
3245         struct smbnode  *np;
3246         struct smb_cred scred;
3247         int             error;
3248
3249         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
3250                 return (EPERM);
3251
3252         if (smi->smi_flags & SMI_DEAD || dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
3253                 return (EIO);
3254
3255         if (smbfs_rw_enter_sig(&dnp->r_rwlock, RW_WRITER, SMBINTR(dvp)))
3256                 return (EINTR);
3257         smb_credinit(&scred, cr);
3258
3259         /*
3260          * Require w/x access in the containing directory.
3261          * Server handles all other access checks.
3262          */
3263         error = smbfs_access(dvp, VEXEC|VWRITE, 0, cr, ct);
3264         if (error)
3265                 goto out;
3266
3267         /*
3268          * First lookup the entry to be removed.
3269          */
3270         error = smbfslookup(dvp, nm, &vp, cr, 0, ct);
3271         if (error)
3272                 goto out;
3273         np = VTOSMB(vp);
3274
3275         /*
3276          * Disallow rmdir of "." or current dir, or the FS root.
3277          * Also make sure it's a directory, not a mount point,
3278          * and lock to keep mount/umount away until we're done.
3279          */
3280         if ((vp == dvp) || (vp == cdir) || (vp->v_flag & VROOT)) {
3281                 error = EINVAL;
3282                 goto out;
3283         }
3284         if (vp->v_type != VDIR) {
3285                 error = ENOTDIR;
3286                 goto out;
3287         }
3288         if (vn_vfsrlock(vp)) {
3289                 error = EBUSY;
3290                 goto out;
3291         }
3292         vp_locked = 1;
3293         if (vn_mountedvfs(vp) != NULL) {
3294                 error = EBUSY;
3295                 goto out;
3296         }
3297
3298         smbfs_attrcache_remove(np);
3299         error = smbfs_smb_rmdir(np, &scred);
3300
3301         /*
3302          * Similar to smbfs_remove
3303          */
3304         switch (error) {
3305         case 0:
3306         case ENOENT:
3307         case ENOTDIR:
3308                 smbfs_attrcache_prune(np);
3309                 break;
3310         }
3311
3312         if (error)
3313                 goto out;
3314
3315         mutex_enter(&np->r_statelock);
3316         dnp->n_flag |= NMODIFIED;
3317         mutex_exit(&np->r_statelock);
3318         smbfs_attr_touchdir(dnp);
3319         smbfs_rmhash(np);
3320
3321 out:
3322         if (vp) {
3323                 if (vp_locked)
3324                         vn_vfsunlock(vp);
3325                 VN_RELE(vp);
3326         }
3327         smb_credrele(&scred);
3328         smbfs_rw_exit(&dnp->r_rwlock);
3329
3330         return (error);
3331 }
3332
3333
3334 /* ARGSUSED */
3335 static int
3336 smbfs_symlink(vnode_t *dvp, char *lnm, struct vattr *tva, char *tnm, cred_t *cr,
3337         caller_context_t *ct, int flags)
3338 {
3339         /* Not yet... */
3340         return (ENOSYS);
3341 }
3342
3343
3344 /* ARGSUSED */
3345 static int
3346 smbfs_readdir(vnode_t *vp, struct uio *uiop, cred_t *cr, int *eofp,
3347         caller_context_t *ct, int flags)
3348 {
3349         struct smbnode  *np = VTOSMB(vp);
3350         int             error = 0;
3351         smbmntinfo_t    *smi;
3352
3353         smi = VTOSMI(vp);
3354
3355         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
3356                 return (EIO);
3357
3358         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
3359                 return (EIO);
3360
3361         /*
3362          * Require read access in the directory.
3363          */
3364         error = smbfs_access(vp, VREAD, 0, cr, ct);
3365         if (error)
3366                 return (error);
3367
3368         ASSERT(smbfs_rw_lock_held(&np->r_rwlock, RW_READER));
3369
3370         /*
3371          * Todo readdir cache here
3372          *
3373          * I am serializing the entire readdir opreation
3374          * now since we have not yet implemented readdir
3375          * cache. This fix needs to be revisited once
3376          * we implement readdir cache.
3377          */
3378         if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_WRITER, SMBINTR(vp)))
3379                 return (EINTR);
3380
3381         error = smbfs_readvdir(vp, uiop, cr, eofp, ct);
3382
3383         smbfs_rw_exit(&np->r_lkserlock);
3384
3385         return (error);
3386 }
3387
3388 /* ARGSUSED */
3389 static int
3390 smbfs_readvdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp,
3391         caller_context_t *ct)
3392 {
3393         /*
3394          * Note: "limit" tells the SMB-level FindFirst/FindNext
3395          * functions how many directory entries to request in
3396          * each OtW call.  It needs to be large enough so that
3397          * we don't make lots of tiny OtW requests, but there's
3398          * no point making it larger than the maximum number of
3399          * OtW entries that would fit in a maximum sized trans2
3400          * response (64k / 48).  Beyond that, it's just tuning.
3401          * WinNT used 512, Win2k used 1366.  We use 1000.
3402          */
3403         static const int limit = 1000;
3404         /* Largest possible dirent size. */
3405         static const size_t dbufsiz = DIRENT64_RECLEN(SMB_MAXFNAMELEN);
3406         struct smb_cred scred;
3407         vnode_t         *newvp;
3408         struct smbnode  *np = VTOSMB(vp);
3409         struct smbfs_fctx *ctx;
3410         struct dirent64 *dp;
3411         ssize_t         save_resid;
3412         offset_t        save_offset; /* 64 bits */
3413         int             offset; /* yes, 32 bits */
3414         int             nmlen, error;
3415         ushort_t        reclen;
3416
3417         ASSERT(curproc->p_zone == VTOSMI(vp)->smi_zone_ref.zref_zone);
3418
3419         /* Make sure we serialize for n_dirseq use. */
3420         ASSERT(smbfs_rw_lock_held(&np->r_lkserlock, RW_WRITER));
3421
3422         /*
3423          * Make sure smbfs_open filled in n_dirseq
3424          */
3425         if (np->n_dirseq == NULL)
3426                 return (EBADF);
3427
3428         /* Check for overflow of (32-bit) directory offset. */
3429         if (uio->uio_loffset < 0 || uio->uio_loffset > INT32_MAX ||
3430             (uio->uio_loffset + uio->uio_resid) > INT32_MAX)
3431                 return (EINVAL);
3432
3433         /* Require space for at least one dirent. */
3434         if (uio->uio_resid < dbufsiz)
3435                 return (EINVAL);
3436
3437         SMBVDEBUG("dirname='%s'\n", np->n_rpath);
3438         smb_credinit(&scred, cr);
3439         dp = kmem_alloc(dbufsiz, KM_SLEEP);
3440
3441         save_resid = uio->uio_resid;
3442         save_offset = uio->uio_loffset;
3443         offset = uio->uio_offset;
3444         SMBVDEBUG("in: offset=%d, resid=%d\n",
3445             (int)uio->uio_offset, (int)uio->uio_resid);
3446         error = 0;
3447
3448         /*
3449          * Generate the "." and ".." entries here so we can
3450          * (1) make sure they appear (but only once), and
3451          * (2) deal with getting their I numbers which the
3452          * findnext below does only for normal names.
3453          */
3454         while (offset < FIRST_DIROFS) {
3455                 /*
3456                  * Tricky bit filling in the first two:
3457                  * offset 0 is ".", offset 1 is ".."
3458                  * so strlen of these is offset+1.
3459                  */
3460                 reclen = DIRENT64_RECLEN(offset + 1);
3461                 if (uio->uio_resid < reclen)
3462                         goto out;
3463                 bzero(dp, reclen);
3464                 dp->d_reclen = reclen;
3465                 dp->d_name[0] = '.';
3466                 dp->d_name[1] = '.';
3467                 dp->d_name[offset + 1] = '\0';
3468                 /*
3469                  * Want the real I-numbers for the "." and ".."
3470                  * entries.  For these two names, we know that
3471                  * smbfslookup can get the nodes efficiently.
3472                  */
3473                 error = smbfslookup(vp, dp->d_name, &newvp, cr, 1, ct);
3474                 if (error) {
3475                         dp->d_ino = np->n_ino + offset; /* fiction */
3476                 } else {
3477                         dp->d_ino = VTOSMB(newvp)->n_ino;
3478                         VN_RELE(newvp);
3479                 }
3480                 /*
3481                  * Note: d_off is the offset that a user-level program
3482                  * should seek to for reading the NEXT directory entry.
3483                  * See libc: readdir, telldir, seekdir
3484                  */
3485                 dp->d_off = offset + 1;
3486                 error = uiomove(dp, reclen, UIO_READ, uio);
3487                 if (error)
3488                         goto out;
3489                 /*
3490                  * Note: uiomove updates uio->uio_offset,
3491                  * but we want it to be our "cookie" value,
3492                  * which just counts dirents ignoring size.
3493                  */
3494                 uio->uio_offset = ++offset;
3495         }
3496
3497         /*
3498          * If there was a backward seek, we have to reopen.
3499          */
3500         if (offset < np->n_dirofs) {
3501                 SMBVDEBUG("Reopening search %d:%d\n",
3502                     offset, np->n_dirofs);
3503                 error = smbfs_smb_findopen(np, "*", 1,
3504                     SMB_FA_SYSTEM | SMB_FA_HIDDEN | SMB_FA_DIR,
3505                     &scred, &ctx);
3506                 if (error) {
3507                         SMBVDEBUG("can not open search, error = %d", error);
3508                         goto out;
3509                 }
3510                 /* free the old one */
3511                 (void) smbfs_smb_findclose(np->n_dirseq, &scred);
3512                 /* save the new one */
3513                 np->n_dirseq = ctx;
3514                 np->n_dirofs = FIRST_DIROFS;
3515         } else {
3516                 ctx = np->n_dirseq;
3517         }
3518
3519         /*
3520          * Skip entries before the requested offset.
3521          */
3522         while (np->n_dirofs < offset) {
3523                 error = smbfs_smb_findnext(ctx, limit, &scred);
3524                 if (error != 0)
3525                         goto out;
3526                 np->n_dirofs++;
3527         }
3528
3529         /*
3530          * While there's room in the caller's buffer:
3531          *      get a directory entry from SMB,
3532          *      convert to a dirent, copyout.
3533          * We stop when there is no longer room for a
3534          * maximum sized dirent because we must decide
3535          * before we know anything about the next entry.
3536          */
3537         while (uio->uio_resid >= dbufsiz) {
3538                 error = smbfs_smb_findnext(ctx, limit, &scred);
3539                 if (error != 0)
3540                         goto out;
3541                 np->n_dirofs++;
3542
3543                 /* Sanity check the name length. */
3544                 nmlen = ctx->f_nmlen;
3545                 if (nmlen > SMB_MAXFNAMELEN) {
3546                         nmlen = SMB_MAXFNAMELEN;
3547                         SMBVDEBUG("Truncating name: %s\n", ctx->f_name);
3548                 }
3549                 if (smbfs_fastlookup) {
3550                         /* See comment at smbfs_fastlookup above. */
3551                         if (smbfs_nget(vp, ctx->f_name, nmlen,
3552                             &ctx->f_attr, &newvp) == 0)
3553                                 VN_RELE(newvp);
3554                 }
3555
3556                 reclen = DIRENT64_RECLEN(nmlen);
3557                 bzero(dp, reclen);
3558                 dp->d_reclen = reclen;
3559                 bcopy(ctx->f_name, dp->d_name, nmlen);
3560                 dp->d_name[nmlen] = '\0';
3561                 dp->d_ino = ctx->f_inum;
3562                 dp->d_off = offset + 1; /* See d_off comment above */
3563                 error = uiomove(dp, reclen, UIO_READ, uio);
3564                 if (error)
3565                         goto out;
3566                 /* See comment re. uio_offset above. */
3567                 uio->uio_offset = ++offset;
3568         }
3569
3570 out:
3571         /*
3572          * When we come to the end of a directory, the
3573          * SMB-level functions return ENOENT, but the
3574          * caller is not expecting an error return.
3575          *
3576          * Also note that we must delay the call to
3577          * smbfs_smb_findclose(np->n_dirseq, ...)
3578          * until smbfs_close so that all reads at the
3579          * end of the directory will return no data.
3580          */
3581         if (error == ENOENT) {
3582                 error = 0;
3583                 if (eofp)
3584                         *eofp = 1;
3585         }
3586         /*
3587          * If we encountered an error (i.e. "access denied")
3588          * from the FindFirst call, we will have copied out
3589          * the "." and ".." entries leaving offset == 2.
3590          * In that case, restore the original offset/resid
3591          * so the caller gets no data with the error.
3592          */
3593         if (error != 0 && offset == FIRST_DIROFS) {
3594                 uio->uio_loffset = save_offset;
3595                 uio->uio_resid = save_resid;
3596         }
3597         SMBVDEBUG("out: offset=%d, resid=%d\n",
3598             (int)uio->uio_offset, (int)uio->uio_resid);
3599
3600         kmem_free(dp, dbufsiz);
3601         smb_credrele(&scred);
3602         return (error);
3603 }
3604
3605 /*
3606  * Here NFS has: nfs3_bio
3607  * See smbfs_bio above.
3608  */
3609
3610 /* ARGSUSED */
3611 static int
3612 smbfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
3613 {
3614         return (ENOSYS);
3615 }
3616
3617
3618 /*
3619  * The pair of functions fop_rwlock, fop_rwunlock
3620  * are optional functions that are called by:
3621  *    getdents, before/after fop_readdir
3622  *    pread, before/after ... fop_read
3623  *    pwrite, before/after ... fop_write
3624  *    (other places)
3625  *
3626  * Careful here: None of the above check for any
3627  * error returns from fop_rwlock / fop_rwunlock!
3628  * In fact, the return value from _rwlock is NOT
3629  * an error code, but V_WRITELOCK_TRUE / _FALSE.
3630  *
3631  * Therefore, it's up to _this_ code to make sure
3632  * the lock state remains balanced, which means
3633  * we can't "bail out" on interrupts, etc.
3634  */
3635
3636 /* ARGSUSED2 */
3637 static int
3638 smbfs_rwlock(vnode_t *vp, int write_lock, caller_context_t *ctp)
3639 {
3640         smbnode_t       *np = VTOSMB(vp);
3641
3642         if (!write_lock) {
3643                 (void) smbfs_rw_enter_sig(&np->r_rwlock, RW_READER, FALSE);
3644                 return (V_WRITELOCK_FALSE);
3645         }
3646
3647
3648         (void) smbfs_rw_enter_sig(&np->r_rwlock, RW_WRITER, FALSE);
3649         return (V_WRITELOCK_TRUE);
3650 }
3651
3652 /* ARGSUSED */
3653 static void
3654 smbfs_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ctp)
3655 {
3656         smbnode_t       *np = VTOSMB(vp);
3657
3658         smbfs_rw_exit(&np->r_rwlock);
3659 }
3660
3661
3662 /* ARGSUSED */
3663 static int
3664 smbfs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
3665 {
3666         smbmntinfo_t    *smi;
3667
3668         smi = VTOSMI(vp);
3669
3670         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
3671                 return (EPERM);
3672
3673         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
3674                 return (EIO);
3675
3676         /*
3677          * Because we stuff the readdir cookie into the offset field
3678          * someone may attempt to do an lseek with the cookie which
3679          * we want to succeed.
3680          */
3681         if (vp->v_type == VDIR)
3682                 return (0);
3683
3684         /* Like NFS3, just check for 63-bit overflow. */
3685         if (*noffp < 0)
3686                 return (EINVAL);
3687
3688         return (0);
3689 }
3690
3691 /* mmap support ******************************************************** */
3692
3693 #ifdef DEBUG
3694 static int smbfs_lostpage = 0;  /* number of times we lost original page */
3695 #endif
3696
3697 /*
3698  * Return all the pages from [off..off+len) in file
3699  * Like nfs3_getpage
3700  */
3701 /* ARGSUSED */
3702 static int
3703 smbfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp,
3704         page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr,
3705         enum seg_rw rw, cred_t *cr, caller_context_t *ct)
3706 {
3707         smbnode_t       *np;
3708         smbmntinfo_t    *smi;
3709         int             error;
3710
3711         np = VTOSMB(vp);
3712         smi = VTOSMI(vp);
3713
3714         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
3715                 return (EIO);
3716
3717         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
3718                 return (EIO);
3719
3720         if (vp->v_flag & VNOMAP)
3721                 return (ENOSYS);
3722
3723         if (protp != NULL)
3724                 *protp = PROT_ALL;
3725
3726         /*
3727          * Now valididate that the caches are up to date.
3728          */
3729         error = smbfs_validate_caches(vp, cr);
3730         if (error)
3731                 return (error);
3732
3733 retry:
3734         mutex_enter(&np->r_statelock);
3735
3736         /*
3737          * Don't create dirty pages faster than they
3738          * can be cleaned ... (etc. see nfs)
3739          *
3740          * Here NFS also tests:
3741          *  (mi->mi_max_threads != 0 &&
3742          *  rp->r_awcount > 2 * mi->mi_max_threads)
3743          */
3744         if (rw == S_CREATE) {
3745                 while (np->r_gcount > 0)
3746                         cv_wait(&np->r_cv, &np->r_statelock);
3747         }
3748
3749         /*
3750          * If we are getting called as a side effect of a write
3751          * operation the local file size might not be extended yet.
3752          * In this case we want to be able to return pages of zeroes.
3753          */
3754         if (off + len > np->r_size + PAGEOFFSET && seg != segkmap) {
3755                 mutex_exit(&np->r_statelock);
3756                 return (EFAULT);                /* beyond EOF */
3757         }
3758
3759         mutex_exit(&np->r_statelock);
3760
3761         error = pvn_getpages(smbfs_getapage, vp, off, len, protp,
3762             pl, plsz, seg, addr, rw, cr);
3763
3764         switch (error) {
3765         case SMBFS_EOF:
3766                 smbfs_purge_caches(vp, cr);
3767                 goto retry;
3768         case ESTALE:
3769                 /*
3770                  * Here NFS has: PURGE_STALE_FH(error, vp, cr);
3771                  * In-line here as we only use it once.
3772                  */
3773                 mutex_enter(&np->r_statelock);
3774                 np->r_flags |= RSTALE;
3775                 if (!np->r_error)
3776                         np->r_error = (error);
3777                 mutex_exit(&np->r_statelock);
3778                 if (vn_has_cached_data(vp))
3779                         smbfs_invalidate_pages(vp, 0, cr);
3780                 smbfs_purge_caches(vp, cr);
3781                 break;
3782         default:
3783                 break;
3784         }
3785
3786         return (error);
3787 }
3788
3789 /*
3790  * Called from pvn_getpages to get a particular page.
3791  * Like nfs3_getapage
3792  */
3793 /* ARGSUSED */
3794 static int
3795 smbfs_getapage(vnode_t *vp, uoff_t off, size_t len, uint_t *protp,
3796         page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr,
3797         enum seg_rw rw, cred_t *cr)
3798 {
3799         smbnode_t       *np;
3800         smbmntinfo_t   *smi;
3801
3802         uint_t          bsize;
3803         struct buf      *bp;
3804         page_t          *pp;
3805         uoff_t  lbn;
3806         uoff_t  io_off;
3807         uoff_t  blkoff;
3808         size_t          io_len;
3809         uint_t blksize;
3810         int error;
3811         /* int readahead; */
3812         int readahead_issued = 0;
3813         /* int ra_window; * readahead window */
3814         page_t *pagefound;
3815
3816         np = VTOSMB(vp);
3817         smi = VTOSMI(vp);
3818
3819         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
3820                 return (EIO);
3821
3822         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
3823                 return (EIO);
3824
3825         bsize = MAX(vp->v_vfsp->vfs_bsize, PAGESIZE);
3826
3827 reread:
3828         bp = NULL;
3829         pp = NULL;
3830         pagefound = NULL;
3831
3832         if (pl != NULL)
3833                 pl[0] = NULL;
3834
3835         error = 0;
3836         lbn = off / bsize;
3837         blkoff = lbn * bsize;
3838
3839         /*
3840          * NFS queues up readahead work here.
3841          */
3842
3843 again:
3844         if ((pagefound = page_exists(&vp->v_object, off)) == NULL) {
3845                 if (pl == NULL) {
3846                         (void) 0; /* Todo: smbfs_async_readahead(); */
3847                 } else if (rw == S_CREATE) {
3848                         /*
3849                          * Block for this page is not allocated, or the offset
3850                          * is beyond the current allocation size, or we're
3851                          * allocating a swap slot and the page was not found,
3852                          * so allocate it and return a zero page.
3853                          */
3854                         if ((pp = page_create_va(&vp->v_object, off,
3855                             PAGESIZE, PG_WAIT, seg, addr)) == NULL)
3856                                 cmn_err(CE_PANIC, "smbfs_getapage: page_create");
3857                         io_len = PAGESIZE;
3858                         mutex_enter(&np->r_statelock);
3859                         np->r_nextr = off + PAGESIZE;
3860                         mutex_exit(&np->r_statelock);
3861                 } else {
3862                         /*
3863                          * Need to go to server to get a BLOCK, exception to
3864                          * that being while reading at offset = 0 or doing
3865                          * random i/o, in that case read only a PAGE.
3866                          */
3867                         mutex_enter(&np->r_statelock);
3868                         if (blkoff < np->r_size &&
3869                             blkoff + bsize >= np->r_size) {
3870                                 /*
3871                                  * If only a block or less is left in
3872                                  * the file, read all that is remaining.
3873                                  */
3874                                 if (np->r_size <= off) {
3875                                         /*
3876                                          * Trying to access beyond EOF,
3877                                          * set up to get at least one page.
3878                                          */
3879                                         blksize = off + PAGESIZE - blkoff;
3880                                 } else
3881                                         blksize = np->r_size - blkoff;
3882                         } else if ((off == 0) ||
3883                             (off != np->r_nextr && !readahead_issued)) {
3884                                 blksize = PAGESIZE;
3885                                 blkoff = off; /* block = page here */
3886                         } else
3887                                 blksize = bsize;
3888                         mutex_exit(&np->r_statelock);
3889
3890                         pp = pvn_read_kluster(vp, off, seg, addr, &io_off,
3891                             &io_len, blkoff, blksize, 0);
3892
3893                         /*
3894                          * Some other thread has entered the page,
3895                          * so just use it.
3896                          */
3897                         if (pp == NULL)
3898                                 goto again;
3899
3900                         /*
3901                          * Now round the request size up to page boundaries.
3902                          * This ensures that the entire page will be
3903                          * initialized to zeroes if EOF is encountered.
3904                          */
3905                         io_len = ptob(btopr(io_len));
3906
3907                         bp = pageio_setup(pp, io_len, vp, B_READ);
3908                         ASSERT(bp != NULL);
3909
3910                         /*
3911                          * pageio_setup should have set b_addr to 0.  This
3912                          * is correct since we want to do I/O on a page
3913                          * boundary.  bp_mapin will use this addr to calculate
3914                          * an offset, and then set b_addr to the kernel virtual
3915                          * address it allocated for us.
3916                          */
3917                         ASSERT(bp->b_un.b_addr == 0);
3918
3919                         bp->b_edev = 0;
3920                         bp->b_dev = 0;
3921                         bp->b_lblkno = lbtodb(io_off);
3922                         bp->b_file = vp;
3923                         bp->b_offset = (offset_t)off;
3924                         bp_mapin(bp);
3925
3926                         /*
3927                          * If doing a write beyond what we believe is EOF,
3928                          * don't bother trying to read the pages from the
3929                          * server, we'll just zero the pages here.  We
3930                          * don't check that the rw flag is S_WRITE here
3931                          * because some implementations may attempt a
3932                          * read access to the buffer before copying data.
3933                          */
3934                         mutex_enter(&np->r_statelock);
3935                         if (io_off >= np->r_size && seg == segkmap) {
3936                                 mutex_exit(&np->r_statelock);
3937                                 bzero(bp->b_un.b_addr, io_len);
3938                         } else {
3939                                 mutex_exit(&np->r_statelock);
3940                                 error = smbfs_bio(bp, 0, cr);
3941                         }
3942
3943                         /*
3944                          * Unmap the buffer before freeing it.
3945                          */
3946                         bp_mapout(bp);
3947                         pageio_done(bp);
3948
3949                         /* Here NFS3 updates all pp->p_fsdata */
3950
3951                         if (error == SMBFS_EOF) {
3952                                 /*
3953                                  * If doing a write system call just return
3954                                  * zeroed pages, else user tried to get pages
3955                                  * beyond EOF, return error.  We don't check
3956                                  * that the rw flag is S_WRITE here because
3957                                  * some implementations may attempt a read
3958                                  * access to the buffer before copying data.
3959                                  */
3960                                 if (seg == segkmap)
3961                                         error = 0;
3962                                 else
3963                                         error = EFAULT;
3964                         }
3965
3966                         if (!readahead_issued && !error) {
3967                                 mutex_enter(&np->r_statelock);
3968                                 np->r_nextr = io_off + io_len;
3969                                 mutex_exit(&np->r_statelock);
3970                         }
3971                 }
3972         }
3973
3974         if (pl == NULL)
3975                 return (error);
3976
3977         if (error) {
3978                 if (pp != NULL)
3979                         pvn_read_done(pp, B_ERROR);
3980                 return (error);
3981         }
3982
3983         if (pagefound) {
3984                 se_t se = (rw == S_CREATE ? SE_EXCL : SE_SHARED);
3985
3986                 /*
3987                  * Page exists in the cache, acquire the appropriate lock.
3988                  * If this fails, start all over again.
3989                  */
3990                 if ((pp = page_lookup(&vp->v_object, off, se)) == NULL) {
3991 #ifdef DEBUG
3992                         smbfs_lostpage++;
3993 #endif
3994                         goto reread;
3995                 }
3996                 pl[0] = pp;
3997                 pl[1] = NULL;
3998                 return (0);
3999         }
4000
4001         if (pp != NULL)
4002                 pvn_plist_init(pp, pl, plsz, off, io_len, rw);
4003
4004         return (error);
4005 }
4006
4007 /*
4008  * Here NFS has: nfs3_readahead
4009  * No read-ahead in smbfs yet.
4010  */
4011
4012 /*
4013  * Flags are composed of {B_INVAL, B_FREE, B_DONTNEED, B_FORCE}
4014  * If len == 0, do from off to EOF.
4015  *
4016  * The normal cases should be len == 0 && off == 0 (entire vp list),
4017  * len == MAXBSIZE (from segmap_release actions), and len == PAGESIZE
4018  * (from pageout).
4019  *
4020  * Like nfs3_putpage + nfs_putpages
4021  */
4022 /* ARGSUSED */
4023 static int
4024 smbfs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr,
4025         caller_context_t *ct)
4026 {
4027         smbnode_t *np;
4028         smbmntinfo_t *smi;
4029         page_t *pp;
4030         uoff_t eoff;
4031         uoff_t io_off;
4032         size_t io_len;
4033         int error;
4034         int rdirty;
4035         int err;
4036
4037         np = VTOSMB(vp);
4038         smi = VTOSMI(vp);
4039
4040         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
4041                 return (EIO);
4042
4043         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
4044                 return (EIO);
4045
4046         if (vp->v_flag & VNOMAP)
4047                 return (ENOSYS);
4048
4049         /* Here NFS does rp->r_count (++/--) stuff. */
4050
4051         /* Beginning of code from nfs_putpages. */
4052
4053         if (!vn_has_cached_data(vp))
4054                 return (0);
4055
4056         /*
4057          * If ROUTOFSPACE is set, then all writes turn into B_INVAL
4058          * writes.  B_FORCE is set to force the VM system to actually
4059          * invalidate the pages, even if the i/o failed.  The pages
4060          * need to get invalidated because they can't be written out
4061          * because there isn't any space left on either the server's
4062          * file system or in the user's disk quota.  The B_FREE bit
4063          * is cleared to avoid confusion as to whether this is a
4064          * request to place the page on the freelist or to destroy
4065          * it.
4066          */
4067         if ((np->r_flags & ROUTOFSPACE) ||
4068             (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED))
4069                 flags = (flags & ~B_FREE) | B_INVAL | B_FORCE;
4070
4071         if (len == 0) {
4072                 /*
4073                  * If doing a full file synchronous operation, then clear
4074                  * the RDIRTY bit.  If a page gets dirtied while the flush
4075                  * is happening, then RDIRTY will get set again.  The
4076                  * RDIRTY bit must get cleared before the flush so that
4077                  * we don't lose this information.
4078                  *
4079                  * NFS has B_ASYNC vs sync stuff here.
4080                  */
4081                 if (off == 0 &&
4082                     (np->r_flags & RDIRTY)) {
4083                         mutex_enter(&np->r_statelock);
4084                         rdirty = (np->r_flags & RDIRTY);
4085                         np->r_flags &= ~RDIRTY;
4086                         mutex_exit(&np->r_statelock);
4087                 } else
4088                         rdirty = 0;
4089
4090                 /*
4091                  * Search the entire vp list for pages >= off, and flush
4092                  * the dirty pages.
4093                  */
4094                 error = pvn_vplist_dirty(vp, off, smbfs_putapage,
4095                     flags, cr);
4096
4097                 /*
4098                  * If an error occurred and the file was marked as dirty
4099                  * before and we aren't forcibly invalidating pages, then
4100                  * reset the RDIRTY flag.
4101                  */
4102                 if (error && rdirty &&
4103                     (flags & (B_INVAL | B_FORCE)) != (B_INVAL | B_FORCE)) {
4104                         mutex_enter(&np->r_statelock);
4105                         np->r_flags |= RDIRTY;
4106                         mutex_exit(&np->r_statelock);
4107                 }
4108         } else {
4109                 /*
4110                  * Do a range from [off...off + len) looking for pages
4111                  * to deal with.
4112                  */
4113                 error = 0;
4114                 io_len = 1; /* quiet warnings */
4115                 eoff = off + len;
4116
4117                 for (io_off = off; io_off < eoff; io_off += io_len) {
4118                         mutex_enter(&np->r_statelock);
4119                         if (io_off >= np->r_size) {
4120                                 mutex_exit(&np->r_statelock);
4121                                 break;
4122                         }
4123                         mutex_exit(&np->r_statelock);
4124                         /*
4125                          * If we are not invalidating, synchronously
4126                          * freeing or writing pages use the routine
4127                          * page_lookup_nowait() to prevent reclaiming
4128                          * them from the free list.
4129                          */
4130                         if ((flags & B_INVAL) || !(flags & B_ASYNC)) {
4131                                 pp = page_lookup(&vp->v_object, io_off,
4132                                     (flags & (B_INVAL | B_FREE)) ?
4133                                     SE_EXCL : SE_SHARED);
4134                         } else {
4135                                 pp = page_lookup_nowait(&vp->v_object, io_off,
4136                                     (flags & B_FREE) ? SE_EXCL : SE_SHARED);
4137                         }
4138
4139                         if (pp == NULL || !pvn_getdirty(pp, flags))
4140                                 io_len = PAGESIZE;
4141                         else {
4142                                 err = smbfs_putapage(vp, pp, &io_off,
4143                                     &io_len, flags, cr);
4144                                 if (!error)
4145                                         error = err;
4146                                 /*
4147                                  * "io_off" and "io_len" are returned as
4148                                  * the range of pages we actually wrote.
4149                                  * This allows us to skip ahead more quickly
4150                                  * since several pages may've been dealt
4151                                  * with by this iteration of the loop.
4152                                  */
4153                         }
4154                 }
4155         }
4156
4157         return (error);
4158 }
4159
4160 /*
4161  * Write out a single page, possibly klustering adjacent dirty pages.
4162  *
4163  * Like nfs3_putapage / nfs3_sync_putapage
4164  */
4165 static int
4166 smbfs_putapage(vnode_t *vp, page_t *pp, uoff_t *offp, size_t *lenp,
4167         int flags, cred_t *cr)
4168 {
4169         smbnode_t *np;
4170         uoff_t io_off;
4171         uoff_t lbn_off;
4172         uoff_t lbn;
4173         size_t io_len;
4174         uint_t bsize;
4175         int error;
4176
4177         np = VTOSMB(vp);
4178
4179         ASSERT(!vn_is_readonly(vp));
4180
4181         bsize = MAX(vp->v_vfsp->vfs_bsize, PAGESIZE);
4182         lbn = pp->p_offset / bsize;
4183         lbn_off = lbn * bsize;
4184
4185         /*
4186          * Find a kluster that fits in one block, or in
4187          * one page if pages are bigger than blocks.  If
4188          * there is less file space allocated than a whole
4189          * page, we'll shorten the i/o request below.
4190          */
4191         pp = pvn_write_kluster(vp, pp, &io_off, &io_len, lbn_off,
4192             roundup(bsize, PAGESIZE), flags);
4193
4194         /*
4195          * pvn_write_kluster shouldn't have returned a page with offset
4196          * behind the original page we were given.  Verify that.
4197          */
4198         ASSERT((pp->p_offset / bsize) >= lbn);
4199
4200         /*
4201          * Now pp will have the list of kept dirty pages marked for
4202          * write back.  It will also handle invalidation and freeing
4203          * of pages that are not dirty.  Check for page length rounding
4204          * problems.
4205          */
4206         if (io_off + io_len > lbn_off + bsize) {
4207                 ASSERT((io_off + io_len) - (lbn_off + bsize) < PAGESIZE);
4208                 io_len = lbn_off + bsize - io_off;
4209         }
4210         /*
4211          * The RMODINPROGRESS flag makes sure that smbfs_bio() sees a
4212          * consistent value of r_size. RMODINPROGRESS is set in writerp().
4213          * When RMODINPROGRESS is set it indicates that a uiomove() is in
4214          * progress and the r_size has not been made consistent with the
4215          * new size of the file. When the uiomove() completes the r_size is
4216          * updated and the RMODINPROGRESS flag is cleared.
4217          *
4218          * The RMODINPROGRESS flag makes sure that smbfs_bio() sees a
4219          * consistent value of r_size. Without this handshaking, it is
4220          * possible that smbfs_bio() picks  up the old value of r_size
4221          * before the uiomove() in writerp() completes. This will result
4222          * in the write through smbfs_bio() being dropped.
4223          *
4224          * More precisely, there is a window between the time the uiomove()
4225          * completes and the time the r_size is updated. If a VOP_PUTPAGE()
4226          * operation intervenes in this window, the page will be picked up,
4227          * because it is dirty (it will be unlocked, unless it was
4228          * pagecreate'd). When the page is picked up as dirty, the dirty
4229          * bit is reset (pvn_getdirty()). In smbfs_write(), r_size is
4230          * checked. This will still be the old size. Therefore the page will
4231          * not be written out. When segmap_release() calls VOP_PUTPAGE(),
4232          * the page will be found to be clean and the write will be dropped.
4233          */
4234         if (np->r_flags & RMODINPROGRESS) {
4235                 mutex_enter(&np->r_statelock);
4236                 if ((np->r_flags & RMODINPROGRESS) &&
4237                     np->r_modaddr + MAXBSIZE > io_off &&
4238                     np->r_modaddr < io_off + io_len) {
4239                         page_t *plist;
4240                         /*
4241                          * A write is in progress for this region of the file.
4242                          * If we did not detect RMODINPROGRESS here then this
4243                          * path through smbfs_putapage() would eventually go to
4244                          * smbfs_bio() and may not write out all of the data
4245                          * in the pages. We end up losing data. So we decide
4246                          * to set the modified bit on each page in the page
4247                          * list and mark the rnode with RDIRTY. This write
4248                          * will be restarted at some later time.
4249                          */
4250                         plist = pp;
4251                         while (plist != NULL) {
4252                                 pp = plist;
4253                                 page_sub(&plist, pp);
4254                                 hat_setmod(pp);
4255                                 page_io_unlock(pp);
4256                                 page_unlock(pp);
4257                         }
4258                         np->r_flags |= RDIRTY;
4259                         mutex_exit(&np->r_statelock);
4260                         if (offp)
4261                                 *offp = io_off;
4262                         if (lenp)
4263                                 *lenp = io_len;
4264                         return (0);
4265                 }
4266                 mutex_exit(&np->r_statelock);
4267         }
4268
4269         /*
4270          * NFS handles (flags & B_ASYNC) here...
4271          * (See nfs_async_putapage())
4272          *
4273          * This code section from: nfs3_sync_putapage()
4274          */
4275
4276         flags |= B_WRITE;
4277
4278         error = smbfs_rdwrlbn(vp, pp, io_off, io_len, flags, cr);
4279
4280         if ((error == ENOSPC || error == EDQUOT || error == EFBIG ||
4281             error == EACCES) &&
4282             (flags & (B_INVAL|B_FORCE)) != (B_INVAL|B_FORCE)) {
4283                 if (!(np->r_flags & ROUTOFSPACE)) {
4284                         mutex_enter(&np->r_statelock);
4285                         np->r_flags |= ROUTOFSPACE;
4286                         mutex_exit(&np->r_statelock);
4287                 }
4288                 flags |= B_ERROR;
4289                 pvn_write_done(pp, flags);
4290                 /*
4291                  * If this was not an async thread, then try again to
4292                  * write out the pages, but this time, also destroy
4293                  * them whether or not the write is successful.  This
4294                  * will prevent memory from filling up with these
4295                  * pages and destroying them is the only alternative
4296                  * if they can't be written out.
4297                  *
4298                  * Don't do this if this is an async thread because
4299                  * when the pages are unlocked in pvn_write_done,
4300                  * some other thread could have come along, locked
4301                  * them, and queued for an async thread.  It would be
4302                  * possible for all of the async threads to be tied
4303                  * up waiting to lock the pages again and they would
4304                  * all already be locked and waiting for an async
4305                  * thread to handle them.  Deadlock.
4306                  */
4307                 if (!(flags & B_ASYNC)) {
4308                         error = smbfs_putpage(vp, io_off, io_len,
4309                             B_INVAL | B_FORCE, cr, NULL);
4310                 }
4311         } else {
4312                 if (error)
4313                         flags |= B_ERROR;
4314                 else if (np->r_flags & ROUTOFSPACE) {
4315                         mutex_enter(&np->r_statelock);
4316                         np->r_flags &= ~ROUTOFSPACE;
4317                         mutex_exit(&np->r_statelock);
4318                 }
4319                 pvn_write_done(pp, flags);
4320         }
4321
4322         /* Now more code from: nfs3_putapage */
4323
4324         if (offp)
4325                 *offp = io_off;
4326         if (lenp)
4327                 *lenp = io_len;
4328
4329         return (error);
4330 }
4331
4332 /*
4333  * NFS has this in nfs_client.c (shared by v2,v3,...)
4334  * We have it here so smbfs_putapage can be file scope.
4335  */
4336 void
4337 smbfs_invalidate_pages(vnode_t *vp, uoff_t off, cred_t *cr)
4338 {
4339         smbnode_t *np;
4340
4341         np = VTOSMB(vp);
4342
4343         mutex_enter(&np->r_statelock);
4344         while (np->r_flags & RTRUNCATE)
4345                 cv_wait(&np->r_cv, &np->r_statelock);
4346         np->r_flags |= RTRUNCATE;
4347
4348         if (off == 0) {
4349                 np->r_flags &= ~RDIRTY;
4350                 if (!(np->r_flags & RSTALE))
4351                         np->r_error = 0;
4352         }
4353         /* Here NFSv3 has np->r_truncaddr = off; */
4354         mutex_exit(&np->r_statelock);
4355
4356         (void) pvn_vplist_dirty(vp, off, smbfs_putapage,
4357             B_INVAL | B_TRUNC, cr);
4358
4359         mutex_enter(&np->r_statelock);
4360         np->r_flags &= ~RTRUNCATE;
4361         cv_broadcast(&np->r_cv);
4362         mutex_exit(&np->r_statelock);
4363 }
4364
4365 /* Like nfs3_map */
4366
4367 /* ARGSUSED */
4368 static int
4369 smbfs_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp,
4370         size_t len, uchar_t prot, uchar_t maxprot, uint_t flags,
4371         cred_t *cr, caller_context_t *ct)
4372 {
4373         segvn_crargs_t  vn_a;
4374         struct vattr    va;
4375         smbnode_t       *np;
4376         smbmntinfo_t    *smi;
4377         int             error;
4378
4379         np = VTOSMB(vp);
4380         smi = VTOSMI(vp);
4381
4382         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
4383                 return (EIO);
4384
4385         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
4386                 return (EIO);
4387
4388         if (vp->v_flag & VNOMAP)
4389                 return (ENOSYS);
4390
4391         if (off < 0 || off + (ssize_t)len < 0)
4392                 return (ENXIO);
4393
4394         if (vp->v_type != VREG)
4395                 return (ENODEV);
4396
4397         /*
4398          * NFS does close-to-open consistency stuff here.
4399          * Just get (possibly cached) attributes.
4400          */
4401         va.va_mask = VATTR_ALL;
4402         if ((error = smbfsgetattr(vp, &va, cr)) != 0)
4403                 return (error);
4404
4405         /*
4406          * Check to see if the vnode is currently marked as not cachable.
4407          * This means portions of the file are locked (through VOP_FRLOCK).
4408          * In this case the map request must be refused.  We use
4409          * rp->r_lkserlock to avoid a race with concurrent lock requests.
4410          */
4411         /*
4412          * Atomically increment r_inmap after acquiring r_rwlock. The
4413          * idea here is to acquire r_rwlock to block read/write and
4414          * not to protect r_inmap. r_inmap will inform smbfs_read/write()
4415          * that we are in smbfs_map(). Now, r_rwlock is acquired in order
4416          * and we can prevent the deadlock that would have occurred
4417          * when smbfs_addmap() would have acquired it out of order.
4418          *
4419          * Since we are not protecting r_inmap by any lock, we do not
4420          * hold any lock when we decrement it. We atomically decrement
4421          * r_inmap after we release r_lkserlock.  Note that rwlock is
4422          * re-entered as writer in smbfs_addmap (called via as_map).
4423          */
4424
4425         if (smbfs_rw_enter_sig(&np->r_rwlock, RW_WRITER, SMBINTR(vp)))
4426                 return (EINTR);
4427         atomic_inc_uint(&np->r_inmap);
4428         smbfs_rw_exit(&np->r_rwlock);
4429
4430         if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_WRITER, SMBINTR(vp))) {
4431                 atomic_dec_uint(&np->r_inmap);
4432                 return (EINTR);
4433         }
4434
4435         if (vp->v_flag & VNOCACHE) {
4436                 error = EAGAIN;
4437                 goto done;
4438         }
4439
4440         /*
4441          * Don't allow concurrent locks and mapping if mandatory locking is
4442          * enabled.
4443          */
4444         if ((flk_has_remote_locks(vp) || smbfs_lm_has_sleep(vp)) &&
4445             MANDLOCK(vp, va.va_mode)) {
4446                 error = EAGAIN;
4447                 goto done;
4448         }
4449
4450         as_rangelock(as);
4451         error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
4452         if (error != 0) {
4453                 as_rangeunlock(as);
4454                 goto done;
4455         }
4456
4457         vn_a.vp = vp;
4458         vn_a.offset = off;
4459         vn_a.type = (flags & MAP_TYPE);
4460         vn_a.prot = (uchar_t)prot;
4461         vn_a.maxprot = (uchar_t)maxprot;
4462         vn_a.flags = (flags & ~MAP_TYPE);
4463         vn_a.cred = cr;
4464         vn_a.amp = NULL;
4465         vn_a.szc = 0;
4466         vn_a.lgrp_mem_policy_flags = 0;
4467
4468         error = as_map(as, *addrp, len, segvn_create, &vn_a);
4469         as_rangeunlock(as);
4470
4471 done:
4472         smbfs_rw_exit(&np->r_lkserlock);
4473         atomic_dec_uint(&np->r_inmap);
4474         return (error);
4475 }
4476
4477 /*
4478  * This uses addmap/delmap functions to hold the SMB FID open as long as
4479  * there are pages mapped in this as/seg.  Increment the FID refs. when
4480  * the maping count goes from zero to non-zero, and release the FID ref
4481  * when the maping count goes from non-zero to zero.
4482  */
4483
4484 /* ARGSUSED */
4485 static int
4486 smbfs_addmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr,
4487         size_t len, uchar_t prot, uchar_t maxprot, uint_t flags,
4488         cred_t *cr, caller_context_t *ct)
4489 {
4490         smbnode_t *np = VTOSMB(vp);
4491         boolean_t inc_fidrefs = B_FALSE;
4492
4493         /*
4494          * When r_mapcnt goes from zero to non-zero,
4495          * increment n_fidrefs
4496          */
4497         mutex_enter(&np->r_statelock);
4498         if (np->r_mapcnt == 0)
4499                 inc_fidrefs = B_TRUE;
4500         np->r_mapcnt += btopr(len);
4501         mutex_exit(&np->r_statelock);
4502
4503         if (inc_fidrefs) {
4504                 (void) smbfs_rw_enter_sig(&np->r_lkserlock, RW_WRITER, 0);
4505                 np->n_fidrefs++;
4506                 smbfs_rw_exit(&np->r_lkserlock);
4507         }
4508
4509         return (0);
4510 }
4511
4512 /*
4513  * Args passed to smbfs_delmap_async
4514  */
4515 typedef struct smbfs_delmap_args {
4516         taskq_ent_t             dm_tqent;
4517         cred_t                  *dm_cr;
4518         vnode_t                 *dm_vp;
4519         offset_t                dm_off;
4520         caddr_t                 dm_addr;
4521         size_t                  dm_len;
4522         uint_t                  dm_prot;
4523         uint_t                  dm_maxprot;
4524         uint_t                  dm_flags;
4525         boolean_t               dm_rele_fid;
4526 } smbfs_delmap_args_t;
4527
4528 /*
4529  * Using delmap not only to release the SMB FID (as described above)
4530  * but to flush dirty pages as needed.  Both of those do the actual
4531  * work in an async taskq job to avoid interfering with locks held
4532  * in the VM layer when this is called.
4533  */
4534
4535 /* ARGSUSED */
4536 static int
4537 smbfs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr,
4538         size_t len, uint_t prot, uint_t maxprot, uint_t flags,
4539         cred_t *cr, caller_context_t *ct)
4540 {
4541         smbnode_t               *np = VTOSMB(vp);
4542         smbmntinfo_t            *smi = VTOSMI(vp);
4543         smbfs_delmap_args_t     *dmapp;
4544
4545         dmapp = kmem_zalloc(sizeof (*dmapp), KM_SLEEP);
4546
4547         /*
4548          * The VM layer may segvn_free the seg holding this vnode
4549          * before our callback has a chance run, so take a hold on
4550          * the vnode here and release it in the callback.
4551          * (same for the cred)
4552          */
4553         crhold(cr);
4554         VN_HOLD(vp);
4555
4556         dmapp->dm_vp = vp;
4557         dmapp->dm_cr = cr;
4558         dmapp->dm_off = off;
4559         dmapp->dm_addr = addr;
4560         dmapp->dm_len = len;
4561         dmapp->dm_prot = prot;
4562         dmapp->dm_maxprot = maxprot;
4563         dmapp->dm_flags = flags;
4564         dmapp->dm_rele_fid = B_FALSE;
4565
4566         /*
4567          * Go ahead and decrement r_mapcount now, which is
4568          * the primary purpose of this function.
4569          *
4570          * When r_mapcnt goes to zero, we need to call
4571          * smbfs_rele_fid, but can't do that here, so
4572          * set a flag telling the async task to do it.
4573          */
4574         mutex_enter(&np->r_statelock);
4575         np->r_mapcnt -= btopr(len);
4576         ASSERT(np->r_mapcnt >= 0);
4577         if (np->r_mapcnt == 0)
4578                 dmapp->dm_rele_fid = B_TRUE;
4579         mutex_exit(&np->r_statelock);
4580
4581         taskq_dispatch_ent(smi->smi_taskq, smbfs_delmap_async, dmapp, 0,
4582             &dmapp->dm_tqent);
4583
4584         return (0);
4585 }
4586
4587 /*
4588  * Remove some pages from an mmap'd vnode.  Flush any
4589  * dirty pages in the unmapped range.
4590  */
4591 /* ARGSUSED */
4592 static void
4593 smbfs_delmap_async(void *varg)
4594 {
4595         smbfs_delmap_args_t     *dmapp = varg;
4596         cred_t                  *cr;
4597         vnode_t                 *vp;
4598         smbnode_t               *np;
4599         smbmntinfo_t            *smi;
4600
4601         cr = dmapp->dm_cr;
4602         vp = dmapp->dm_vp;
4603         np = VTOSMB(vp);
4604         smi = VTOSMI(vp);
4605
4606         /* Decremented r_mapcnt in smbfs_delmap */
4607
4608         /*
4609          * Initiate a page flush and potential commit if there are
4610          * pages, the file system was not mounted readonly, the segment
4611          * was mapped shared, and the pages themselves were writeable.
4612          *
4613          * mark RDIRTY here, will be used to check if a file is dirty when
4614          * unmount smbfs
4615          */
4616         if (vn_has_cached_data(vp) && !vn_is_readonly(vp) &&
4617             dmapp->dm_flags == MAP_SHARED &&
4618             (dmapp->dm_maxprot & PROT_WRITE) != 0) {
4619                 mutex_enter(&np->r_statelock);
4620                 np->r_flags |= RDIRTY;
4621                 mutex_exit(&np->r_statelock);
4622
4623                 /*
4624                  * Need to finish the putpage before we
4625                  * close the OtW FID needed for I/O.
4626                  */
4627                 (void) smbfs_putpage(vp, dmapp->dm_off, dmapp->dm_len, 0,
4628                     dmapp->dm_cr, NULL);
4629         }
4630
4631         if ((np->r_flags & RDIRECTIO) || (smi->smi_flags & SMI_DIRECTIO))
4632                 (void) smbfs_putpage(vp, dmapp->dm_off, dmapp->dm_len,
4633                     B_INVAL, dmapp->dm_cr, NULL);
4634
4635         /*
4636          * If r_mapcnt went to zero, drop our FID ref now.
4637          * On the last fidref, this does an OtW close.
4638          */
4639         if (dmapp->dm_rele_fid) {
4640                 struct smb_cred scred;
4641
4642                 (void) smbfs_rw_enter_sig(&np->r_lkserlock, RW_WRITER, 0);
4643                 smb_credinit(&scred, dmapp->dm_cr);
4644
4645                 smbfs_rele_fid(np, &scred);
4646
4647                 smb_credrele(&scred);
4648                 smbfs_rw_exit(&np->r_lkserlock);
4649         }
4650
4651         /* Release holds taken in smbfs_delmap */
4652         VN_RELE(vp);
4653         crfree(cr);
4654
4655         kmem_free(dmapp, sizeof (*dmapp));
4656 }
4657
4658 /* No smbfs_pageio() or smbfs_dispose() ops. */
4659
4660 /* misc. ******************************************************** */
4661
4662
4663 /*
4664  * XXX
4665  * This op may need to support PSARC 2007/440, nbmand changes for CIFS Service.
4666  */
4667 static int
4668 smbfs_frlock(vnode_t *vp, int cmd, struct flock64 *bfp, int flag,
4669         offset_t offset, struct flk_callback *flk_cbp, cred_t *cr,
4670         caller_context_t *ct)
4671 {
4672         if (curproc->p_zone != VTOSMI(vp)->smi_zone_ref.zref_zone)
4673                 return (EIO);
4674
4675         if (VTOSMI(vp)->smi_flags & SMI_LLOCK)
4676                 return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct));
4677         else
4678                 return (ENOSYS);
4679 }
4680
4681 /*
4682  * Free storage space associated with the specified vnode.  The portion
4683  * to be freed is specified by bfp->l_start and bfp->l_len (already
4684  * normalized to a "whence" of 0).
4685  *
4686  * Called by fcntl(fd, F_FREESP, lkp) for libc:ftruncate, etc.
4687  */
4688 /* ARGSUSED */
4689 static int
4690 smbfs_space(vnode_t *vp, int cmd, struct flock64 *bfp, int flag,
4691         offset_t offset, cred_t *cr, caller_context_t *ct)
4692 {
4693         int             error;
4694         smbmntinfo_t    *smi;
4695
4696         smi = VTOSMI(vp);
4697
4698         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
4699                 return (EIO);
4700
4701         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
4702                 return (EIO);
4703
4704         /* Caller (fcntl) has checked v_type */
4705         ASSERT(vp->v_type == VREG);
4706         if (cmd != F_FREESP)
4707                 return (EINVAL);
4708
4709         /*
4710          * Like NFS3, no 32-bit offset checks here.
4711          * Our SMB layer takes care to return EFBIG
4712          * when it has to fallback to a 32-bit call.
4713          */
4714
4715         error = convoff(vp, bfp, 0, offset);
4716         if (!error) {
4717                 ASSERT(bfp->l_start >= 0);
4718                 if (bfp->l_len == 0) {
4719                         struct vattr va;
4720
4721                         /*
4722                          * ftruncate should not change the ctime and
4723                          * mtime if we truncate the file to its
4724                          * previous size.
4725                          */
4726                         va.va_mask = VATTR_SIZE;
4727                         error = smbfsgetattr(vp, &va, cr);
4728                         if (error || va.va_size == bfp->l_start)
4729                                 return (error);
4730                         va.va_mask = VATTR_SIZE;
4731                         va.va_size = bfp->l_start;
4732                         error = smbfssetattr(vp, &va, 0, cr);
4733                         /* SMBFS_VNEVENT... */
4734                 } else
4735                         error = EINVAL;
4736         }
4737
4738         return (error);
4739 }
4740
4741
4742 /* ARGSUSED */
4743 static int
4744 smbfs_realvp(vnode_t *vp, vnode_t **vpp, caller_context_t *ct)
4745 {
4746
4747         return (ENOSYS);
4748 }
4749
4750
4751 /* ARGSUSED */
4752 static int
4753 smbfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
4754         caller_context_t *ct)
4755 {
4756         vfs_t *vfs;
4757         smbmntinfo_t *smi;
4758         struct smb_share *ssp;
4759
4760         vfs = vp->v_vfsp;
4761         smi = VFTOSMI(vfs);
4762
4763         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
4764                 return (EIO);
4765
4766         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
4767                 return (EIO);
4768
4769         switch (cmd) {
4770         case _PC_FILESIZEBITS:
4771                 ssp = smi->smi_share;
4772                 if (SSTOVC(ssp)->vc_sopt.sv_caps & SMB_CAP_LARGE_FILES)
4773                         *valp = 64;
4774                 else
4775                         *valp = 32;
4776                 break;
4777
4778         case _PC_LINK_MAX:
4779                 /* We only ever report one link to an object */
4780                 *valp = 1;
4781                 break;
4782
4783         case _PC_ACL_ENABLED:
4784                 /*
4785                  * Always indicate that ACLs are enabled and
4786                  * that we support ACE_T format, otherwise
4787                  * libsec will ask for ACLENT_T format data
4788                  * which we don't support.
4789                  */
4790                 *valp = _ACL_ACE_ENABLED;
4791                 break;
4792
4793         case _PC_SYMLINK_MAX:   /* No symlinks until we do Unix extensions */
4794                 *valp = 0;
4795                 break;
4796
4797         case _PC_XATTR_EXISTS:
4798                 if (vfs->vfs_flag & VFS_XATTR) {
4799                         *valp = smbfs_xa_exists(vp, cr);
4800                         break;
4801                 }
4802                 return (EINVAL);
4803
4804         case _PC_SATTR_ENABLED:
4805         case _PC_SATTR_EXISTS:
4806                 *valp = 1;
4807                 break;
4808
4809         case _PC_TIMESTAMP_RESOLUTION:
4810                 /*
4811                  * Windows times are tenths of microseconds
4812                  * (multiples of 100 nanoseconds).
4813                  */
4814                 *valp = 100L;
4815                 break;
4816
4817         default:
4818                 return (fs_pathconf(vp, cmd, valp, cr, ct));
4819         }
4820         return (0);
4821 }
4822
4823 /* ARGSUSED */
4824 static int
4825 smbfs_getsecattr(vnode_t *vp, vsecattr_t *vsa, int flag, cred_t *cr,
4826         caller_context_t *ct)
4827 {
4828         vfs_t *vfsp;
4829         smbmntinfo_t *smi;
4830         int     error;
4831         uint_t  mask;
4832
4833         vfsp = vp->v_vfsp;
4834         smi = VFTOSMI(vfsp);
4835
4836         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
4837                 return (EIO);
4838
4839         if (smi->smi_flags & SMI_DEAD || vfsp->vfs_flag & VFS_UNMOUNTED)
4840                 return (EIO);
4841
4842         /*
4843          * Our _pathconf indicates _ACL_ACE_ENABLED,
4844          * so we should only see VSA_ACE, etc here.
4845          * Note: vn_create asks for VSA_DFACLCNT,
4846          * and it expects ENOSYS and empty data.
4847          */
4848         mask = vsa->vsa_mask & (VSA_ACE | VSA_ACECNT |
4849             VSA_ACE_ACLFLAGS | VSA_ACE_ALLTYPES);
4850         if (mask == 0)
4851                 return (ENOSYS);
4852
4853         if (smi->smi_flags & SMI_ACL)
4854                 error = smbfs_acl_getvsa(vp, vsa, flag, cr);
4855         else
4856                 error = ENOSYS;
4857
4858         if (error == ENOSYS)
4859                 error = fs_fab_acl(vp, vsa, flag, cr, ct);
4860
4861         return (error);
4862 }
4863
4864 /* ARGSUSED */
4865 static int
4866 smbfs_setsecattr(vnode_t *vp, vsecattr_t *vsa, int flag, cred_t *cr,
4867         caller_context_t *ct)
4868 {
4869         vfs_t *vfsp;
4870         smbmntinfo_t *smi;
4871         int     error;
4872         uint_t  mask;
4873
4874         vfsp = vp->v_vfsp;
4875         smi = VFTOSMI(vfsp);
4876
4877         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
4878                 return (EIO);
4879
4880         if (smi->smi_flags & SMI_DEAD || vfsp->vfs_flag & VFS_UNMOUNTED)
4881                 return (EIO);
4882
4883         /*
4884          * Our _pathconf indicates _ACL_ACE_ENABLED,
4885          * so we should only see VSA_ACE, etc here.
4886          */
4887         mask = vsa->vsa_mask & (VSA_ACE | VSA_ACECNT);
4888         if (mask == 0)
4889                 return (ENOSYS);
4890
4891         if (vfsp->vfs_flag & VFS_RDONLY)
4892                 return (EROFS);
4893
4894         /*
4895          * Allow only the mount owner to do this.
4896          * See comments at smbfs_access_rwx.
4897          */
4898         error = secpolicy_vnode_setdac(cr, smi->smi_uid);
4899         if (error != 0)
4900                 return (error);
4901
4902         if (smi->smi_flags & SMI_ACL)
4903                 error = smbfs_acl_setvsa(vp, vsa, flag, cr);
4904         else
4905                 error = ENOSYS;
4906
4907         return (error);
4908 }
4909
4910
4911 /*
4912  * XXX
4913  * This op should eventually support PSARC 2007/268.
4914  */
4915 static int
4916 smbfs_shrlock(vnode_t *vp, int cmd, struct shrlock *shr, int flag, cred_t *cr,
4917         caller_context_t *ct)
4918 {
4919         if (curproc->p_zone != VTOSMI(vp)->smi_zone_ref.zref_zone)
4920                 return (EIO);
4921
4922         if (VTOSMI(vp)->smi_flags & SMI_LLOCK)
4923                 return (fs_shrlock(vp, cmd, shr, flag, cr, ct));
4924         else
4925                 return (ENOSYS);
4926 }
4927
4928
4929 /*
4930  * Most unimplemented ops will return ENOSYS because of fs_nosys().
4931  * The only ops where that won't work are ACCESS (due to open(2)
4932  * failures) and ... (anything else left?)
4933  */
4934 const struct vnodeops smbfs_vnodeops = {
4935         .vnop_name = "smbfs",
4936         .vop_open = smbfs_open,
4937         .vop_close = smbfs_close,
4938         .vop_read = smbfs_read,
4939         .vop_write = smbfs_write,
4940         .vop_ioctl = smbfs_ioctl,
4941         .vop_getattr = smbfs_getattr,
4942         .vop_setattr = smbfs_setattr,
4943         .vop_access = smbfs_access,
4944         .vop_lookup = smbfs_lookup,
4945         .vop_create = smbfs_create,
4946         .vop_remove = smbfs_remove,
4947         .vop_link = smbfs_link,
4948         .vop_rename = smbfs_rename,
4949         .vop_mkdir = smbfs_mkdir,
4950         .vop_rmdir = smbfs_rmdir,
4951         .vop_readdir = smbfs_readdir,
4952         .vop_symlink = smbfs_symlink,
4953         .vop_readlink = smbfs_readlink,
4954         .vop_fsync = smbfs_fsync,
4955         .vop_inactive = smbfs_inactive,
4956         .vop_fid = smbfs_fid,
4957         .vop_rwlock = smbfs_rwlock,
4958         .vop_rwunlock = smbfs_rwunlock,
4959         .vop_seek = smbfs_seek,
4960         .vop_frlock = smbfs_frlock,
4961         .vop_space = smbfs_space,
4962         .vop_realvp = smbfs_realvp,
4963         .vop_getpage = smbfs_getpage,
4964         .vop_putpage = smbfs_putpage,
4965         .vop_map = smbfs_map,
4966         .vop_addmap = smbfs_addmap,
4967         .vop_delmap = smbfs_delmap,
4968         .vop_pathconf = smbfs_pathconf,
4969         .vop_setsecattr = smbfs_setsecattr,
4970         .vop_getsecattr = smbfs_getsecattr,
4971         .vop_shrlock = smbfs_shrlock,
4972 #ifdef  SMBFS_VNEVENT
4973         .vop_vnevent = fs_vnevent_support,
4974 #endif
4975 };