usr/src/uts/common/io/winlockio.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25
  26
  27 /*
  28  * This is the lock device driver.
  29  *
  30  * The lock driver provides a variation of inter-process mutexes with the
  31  * following twist in semantics:
  32  *      A waiter for a lock after a set timeout can "break" the lock and
  33  *      grab it from the current owner (without informing the owner).
  34  *
  35  * These semantics result in temporarily multiple processes thinking they
  36  * own the lock. This usually does not make sense for cases where locks are
  37  * used to protect a critical region and it is important to serialize access
  38  * to data structures. As breaking the lock will also lose the serialization
  39  * and result in corrupt data structures.
  40  *
  41  * The usage for winlock driver is primarily driven by the graphics system
  42  * when doing DGA (direct graphics access) graphics. The locks are used to
  43  * protect access to the frame buffer (presumably reflects back to the screen)
  44  * between competing processes that directly write to the screen as opposed
  45  * to going through the window server etc.
  46  * In this case, the result of breaking the lock at worst causes the screen
  47  * image to be distorted and is easily fixed by doing a "refresh"
  48  *
  49  * In well-behaved applications, the lock is held for a very short time and
  50  * the breaking semantics do not come into play. Not having this feature and
  51  * using normal inter-process mutexes will result in a misbehaved application
  52  * from grabbing the screen writing capability from the window manager and
  53  * effectively make the system look like it is hung (mouse pointer does not
  54  * move).
  55  *
  56  * A secondary aspect of the winlock driver is that it allows for extremely
  57  * fast lock acquire/release in cases where there is low contention. A memory
  58  * write is all that is needed (not even a function call). And the window
  59  * manager is the only DGA writer usually and this optimized for. Occasionally
  60  * some processes might do DGA graphics and cause kernel faults to handle
  61  * the contention/locking (and that has got to be slow!).
  62  *
  63  * The following IOCTLs are supported:
  64  *
  65  *   GRABPAGEALLOC:
  66  *      Compatibility with old cgsix device driver lockpage ioctls.
  67  *      Lockpages created this way must be an entire page for compatibility with
  68  *      older software.  This ioctl allocates a lock context with its own
  69  *      private lock page.  The unique "ident" that identifies this lock is
  70  *      returned.
  71  *
  72  *   GRABPAGEFREE:
  73  *      Compatibility with cgsix device driver lockpage ioctls.  This
  74  *      ioctl releases the lock context allocated by GRABPAGEALLOC.
  75  *
  76  *   GRABLOCKINFO:
  77  *      Returns a one-word flag.  '1' means that multiple clients may
  78  *      access this lock page.  Older device drivers returned '0',
  79  *      meaning that only two clients could access a lock page.
  80  *
  81  *   GRABATTACH:
  82  *      Not supported.  This ioctl would have grabbed all lock pages
  83  *      on behalf of the calling program.
  84  *
  85  *   WINLOCKALLOC:
  86  *      Allocate a lock context.  This ioctl accepts a key value.  as
  87  *      its argument.  If the key is zero, a new lock context is
  88  *      created, and its "ident" is returned.   If the key is nonzero,
  89  *      all existing contexts are checked to see if they match they
  90  *      key.  If a match is found, its reference count is incremented
  91  *      and its ident is returned, otherwise a new context is created
  92  *      and its ident is returned.
  93  *
  94  *   WINLOCKFREE:
  95  *      Free a lock context.  This ioctl accepts the ident of a lock
  96  *      context and decrements its reference count.  Once the reference
  97  *      count reaches zero *and* all mappings are released, the lock
  98  *      context is freed.  When all the lock context in the lock page are
  99  *      freed, the lock page is freed as well.
 100  *
 101  *   WINLOCKSETTIMEOUT:
 102  *      Set lock timeout for a context.  This ioctl accepts the ident
 103  *      of a lock context and a timeout value in milliseconds.
 104  *      Whenever lock contention occurs, the timer is started and the lock is
 105  *      broken after the timeout expires. If timeout value is zero, lock does
 106  *      not timeout.  This value will be rounded to the nearest clock
 107  *      tick, so don't try to use it for real-time control or something.
 108  *
 109  *   WINLOCKGETTIMEOUT:
 110  *      Get lock timeout from a context.
 111  *
 112  *   WINLOCKDUMP:
 113  *      Dump state of this device.
 114  *
 115  *
 116  * How /dev/winlock works:
 117  *
 118  *   Every lock context consists of two mappings for the client to the lock
 119  *   page.  These mappings are known as the "lock page" and "unlock page"
 120  *   to the client. The first mmap to the lock context (identified by the
 121  *   sy_ident field returns during alloc) allocates mapping to the lock page,
 122  *   the second mmap allocates a mapping to the unlock page.
 123  *      The mappings dont have to be ordered in virtual address space, but do
 124  *   need to be ordered in time. Mapping and unmapping of these lock and unlock
 125  *   pages should happen in pairs. Doing them one at a time or unmapping one
 126  *   and leaving one mapped etc cause undefined behaviors.
 127  *      The mappings are always of length PAGESIZE, and type MAP_SHARED.
 128  *
 129  *   The first ioctl is to ALLOC a lock, either based on a key (if trying to
 130  *      grab a preexisting lock) or 0 (gets a default new one)
 131  *      This ioctl returns a value in sy_ident which is needed to do the
 132  *      later mmaps and FREE/other ioctls.
 133  *
 134  *   The "page number" portion of the sy_ident needs to be passed as the
 135  *      file offset when doing an mmap for both the lock page and unlock page
 136  *
 137  *   The value returned by mmap ( a user virtual address) needs to be
 138  *      incremented by the "page offset" portion of sy_ident to obtain the
 139  *      pointer to the actual lock. (Skipping this step, does not cause any
 140  *      visible error, but the process will be using the wrong lock!)
 141  *
 142  *      On a fork(), the child process will inherit the mappings for free, but
 143  *   will not inherit the parent's lock ownership if any. The child should NOT
 144  *   do an explicit FREE on the lock context unless it did an explicit ALLOC.
 145  *      Only one process at a time is allowed to have a valid hat
 146  *   mapping to a lock page. This is enforced by this driver.
 147  *   A client acquires a lock by writing a '1' to the lock page.
 148  *   Note, that it is not necessary to read and veryify that the lock is '0'
 149  *      prior to writing a '1' in it.
 150  *   If it does not already have a valid mapping to that page, the driver
 151  *   takes a fault (devmap_access), loads the client mapping
 152  *   and allows the client to continue.  The client releases the lock by
 153  *   writing a '0' to the unlock page.  Again, if it does not have a valid
 154  *   mapping to the unlock page, the segment driver takes a fault,
 155  *   loads the mapping, and lets the client continue.  From this point
 156  *   forward, the client can make as many locks and unlocks as it
 157  *   wants, without any more faults into the kernel.
 158  *
 159  *   If a different process wants to acquire a lock, it takes a page fault
 160  *   when it writes the '1' to the lock page.  If the segment driver sees
 161  *   that the lock page contained a zero, then it invalidates the owner's
 162  *   mappings and gives the mappings to this process.
 163  *
 164  *   If there is already a '1' in the lock page when the second client
 165  *   tries to access the lock page, then a lock exists.  The segment
 166  *   driver sleeps the second client and, if applicable, starts the
 167  *   timeout on the lock.  The owner's mapping to the unlock page
 168  *   is invalidated so that the driver will be woken again when the owner
 169  *   releases the lock.
 170  *
 171  *   When the locking client finally writes a '0' to the unlock page, the
 172  *   segment driver takes another fault.  The client is given a valid
 173  *   mapping, not to the unlock page, but to the "trash page", and allowed
 174  *   to continue.  Meanwhile, the sleeping client is given a valid mapping
 175  *   to the lock/unlock pages and allowed to continue as well.
 176  *
 177  * RFE: There is a leak if process exits before freeing allocated locks
 178  * But currently not tracking which locks were allocated by which
 179  * process and we do not have a clean entry point into the driver
 180  * to do garbage collection. If the interface used a file descriptor for each
 181  * lock it allocs, then the driver can free up stuff in the _close routine
 182  */
 183
 184 #include <sys/types.h>          /* various type defn's */
 185 #include <sys/debug.h>
 186 #include <sys/param.h>          /* various kernel limits */
 187 #include <sys/time.h>
 188 #include <sys/errno.h>
 189 #include <sys/kmem.h>           /* defines kmem_alloc() */
 190 #include <sys/conf.h>           /* defines cdevsw */
 191 #include <sys/file.h>           /* various file modes, etc. */
 192 #include <sys/uio.h>            /* UIO stuff */
 193 #include <sys/ioctl.h>
 194 #include <sys/cred.h>           /* defines cred struct */
 195 #include <sys/mman.h>           /* defines mmap(2) parameters */
 196 #include <sys/stat.h>           /* defines S_IFCHR */
 197 #include <sys/cmn_err.h>        /* use cmn_err */
 198 #include <sys/ddi.h>            /* ddi stuff */
 199 #include <sys/sunddi.h>         /* ddi stuff */
 200 #include <sys/ddi_impldefs.h>   /* ddi stuff */
 201 #include <sys/winlockio.h>      /* defines ioctls, flags, data structs */
 202
 203 static int      winlock_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
 204 static int      winlock_devmap(dev_t, devmap_cookie_t, offset_t, size_t,
 205                         size_t *, uint_t);
 206 static int      winlocksegmap(dev_t, off_t, struct as *, caddr_t *, off_t,
 207                         uint_t, uint_t, uint_t, cred_t *);
 208
 209 static struct cb_ops    winlock_cb_ops = {
 210         nulldev,                /* open */
 211         nulldev,                /* close */
 212         nodev,                  /* strategy */
 213         nodev,                  /* print */
 214         nodev,                  /* dump */
 215         nodev,                  /* read */
 216         nodev,                  /* write */
 217         winlock_ioctl,          /* ioctl */
 218         winlock_devmap,         /* devmap */
 219         nodev,                  /* mmap */
 220         winlocksegmap,          /* segmap */
 221         nochpoll,               /* poll */
 222         ddi_prop_op,            /* prop_op */
 223         NULL,                   /* streamtab */
 224         D_NEW|D_MP|D_DEVMAP,    /* Driver compatibility flag */
 225         0,                      /* rev */
 226         nodev,                  /* aread */
 227         nodev                   /* awrite */
 228 };
 229
 230 static int winlock_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
 231 static int winlock_attach(dev_info_t *, ddi_attach_cmd_t);
 232 static int winlock_detach(dev_info_t *, ddi_detach_cmd_t);
 233
 234 static struct dev_ops   winlock_ops = {
 235         DEVO_REV,
 236         0,                      /* refcount */
 237         winlock_info,           /* info */
 238         nulldev,                /* identify */
 239         nulldev,                /* probe */
 240         winlock_attach,         /* attach */
 241         winlock_detach,         /* detach */
 242         nodev,                  /* reset */
 243         &winlock_cb_ops,        /* driver ops */
 244         NULL,                   /* bus ops */
 245         NULL,                   /* power */
 246         ddi_quiesce_not_needed,         /* quiesce */
 247 };
 248
 249 static int winlockmap_map(devmap_cookie_t, dev_t, uint_t, offset_t, size_t,
 250                 void **);
 251 static void winlockmap_unmap(devmap_cookie_t, void *, offset_t, size_t,
 252                 devmap_cookie_t, void **, devmap_cookie_t, void **);
 253 static int winlockmap_dup(devmap_cookie_t, void *,
 254                 devmap_cookie_t, void **);
 255 static int winlockmap_access(devmap_cookie_t, void *, offset_t, size_t,
 256                 uint_t, uint_t);
 257
 258 static
 259 struct devmap_callback_ctl winlockmap_ops = {
 260         DEVMAP_OPS_REV,
 261         winlockmap_map,
 262         winlockmap_access,
 263         winlockmap_dup,
 264         winlockmap_unmap,
 265 };
 266
 267 #if DEBUG
 268 static  int     lock_debug = 0;
 269 #define DEBUGF(level, args)     { if (lock_debug >= (level)) cmn_err args; }
 270 #else
 271 #define DEBUGF(level, args)
 272 #endif
 273
 274 /* Driver supports two styles of locks */
 275 enum winlock_style { NEWSTYLE_LOCK, OLDSTYLE_LOCK };
 276
 277 /*
 278  * These structures describe a lock context.  We permit multiple
 279  * clients (not just two) to access a lock page
 280  *
 281  * The "cookie" identifies the lock context. It is the page number portion
 282  * sy_ident returned on lock allocation. Cookie is used in later ioctls.
 283  * "cookie" is lockid * PAGESIZE
 284  * "lockptr" is the kernel virtual address to the lock itself
 285  * The page offset portion of lockptr is the page offset portion of sy_ident
 286  */
 287
 288 /*
 289  * per-process information about locks.  This is the private field of
 290  * a devmap mapping.  Note that usually *two* mappings point to this.
 291  */
 292
 293 /*
 294  * Each process using winlock is associated with a segproc structure
 295  * In various driver entry points, we need to search to find the right
 296  * segproc structure (If we were using file handles for each lock this
 297  * would not have been necessary).
 298  * It would have been simple to use the process pid (and ddi_get_pid)
 299  * However, during fork devmap_dup is called in the parent process context
 300  * and using the pid complicates the code by introducing orphans.
 301  * Instead we use the as pointer for the process as a cookie
 302  * which requires delving into various non-DDI kosher structs
 303  */
 304 typedef struct segproc {
 305         struct segproc  *next;          /* next client of this lock */
 306         struct seglock  *lp;            /* associated lock context */
 307         devmap_cookie_t lockseg;        /* lock mapping, if any */
 308         devmap_cookie_t unlockseg;      /* unlock mapping, if any */
 309         void            *tag;           /* process as pointer as tag */
 310         uint_t          flag;           /* see "flag bits" in winlockio.h */
 311 } SegProc;
 312
 313 #define ID(sdp)         ((sdp)->tag)
 314 #define CURPROC_ID      (void *)(curproc->p_as)
 315
 316 /* per lock context information */
 317
 318 typedef struct seglock {
 319         struct seglock  *next;          /* next lock */
 320         uint_t          sleepers;       /* nthreads sleeping on this lock */
 321         uint_t          alloccount;     /* how many times created? */
 322         uint_t          cookie;         /* mmap() offset (page #) into device */
 323         uint_t          key;            /* key, if any */
 324         enum winlock_style      style;  /* style of lock - OLDSTYLE, NEWSTYLE */
 325         clock_t         timeout;        /* sleep time in ticks */
 326         ddi_umem_cookie_t umem_cookie;  /* cookie for umem allocated memory */
 327         int             *lockptr;       /* kernel virtual addr of lock */
 328         struct segproc  *clients;       /* list of clients of this lock */
 329         struct segproc  *owner;         /* current owner of lock */
 330         kmutex_t        mutex;          /* mutex for lock */
 331         kcondvar_t      locksleep;      /* for sleeping on lock */
 332 } SegLock;
 333
 334 #define LOCK(lp)        (*((lp)->lockptr))
 335
 336 /*
 337  * Number of locks that can fit in a page. Driver can support only that many.
 338  * For oldsytle locks, it is relatively easy to increase the limit as each
 339  * is in a separate page (MAX_LOCKS mostly serves to prevent runaway allocation
 340  * For newstyle locks, this is trickier as the code needs to allow for mapping
 341  * into the second or third page of the cookie for some locks.
 342  */
 343 #define MAX_LOCKS       (PAGESIZE/sizeof (int))
 344
 345 #define LOCKTIME        3       /* Default lock timeout in seconds */
 346
 347
 348 /* Protections setting for winlock user mappings */
 349 #define WINLOCK_PROT    (PROT_READ|PROT_WRITE|PROT_USER)
 350
 351 /*
 352  * The trash page is where unwanted writes go
 353  * when a process is releasing a lock.
 354  */
 355 static  ddi_umem_cookie_t trashpage_cookie = NULL;
 356
 357 /* For newstyle allocations a common page of locks is used */
 358 static  caddr_t lockpage = NULL;
 359 static  ddi_umem_cookie_t lockpage_cookie = NULL;
 360
 361 static  dev_info_t      *winlock_dip = NULL;
 362 static  kmutex_t        winlock_mutex;
 363
 364 /*
 365  * winlock_mutex protects
 366  *      lock_list
 367  *      lock_free_list
 368  *      "next" field in SegLock
 369  *      next_lock
 370  *      trashpage_cookie
 371  *      lockpage & lockpage_cookie
 372  *
 373  * SegLock_mutex protects
 374  *      rest of fields in SegLock
 375  *      All fields in list of SegProc (lp->clients)
 376  *
 377  * Lock ordering is winlock_mutex->SegLock_mutex
 378  * During devmap/seg operations SegLock_mutex acquired without winlock_mutex
 379  *
 380  * During devmap callbacks, the pointer to SegProc is stored as the private
 381  * data in the devmap handle. This pointer will not go stale (i.e., the
 382  * SegProc getting deleted) as the SegProc is not deleted until both the
 383  * lockseg and unlockseg have been unmapped and the pointers stored in
 384  * the devmap handles have been NULL'ed.
 385  * But before this pointer is used to access any fields (other than the 'lp')
 386  * lp->mutex must be held.
 387  */
 388
 389 /*
 390  * The allocation code tries to allocate from lock_free_list
 391  * first, otherwise it uses kmem_zalloc.  When lock list is idle, all
 392  * locks in lock_free_list are kmem_freed
 393  */
 394 static  SegLock *lock_list = NULL;              /* in-use locks */
 395 static  SegLock *lock_free_list = NULL;         /* free locks */
 396 static  int     next_lock = 0;                  /* next lock cookie */
 397
 398 /* Routines to find a lock in lock_list based on offset or key */
 399 static SegLock *seglock_findlock(uint_t);
 400 static SegLock *seglock_findkey(uint_t);
 401
 402 /* Routines to find and allocate SegProc structures */
 403 static SegProc *seglock_find_specific(SegLock *, void *);
 404 static SegProc *seglock_alloc_specific(SegLock *, void *);
 405 #define seglock_findclient(lp)  seglock_find_specific((lp), CURPROC_ID)
 406 #define seglock_allocclient(lp) seglock_alloc_specific((lp), CURPROC_ID)
 407
 408 /* Delete client from lock's client list */
 409 static void seglock_deleteclient(SegLock *, SegProc *);
 410 static void garbage_collect_lock(SegLock *, SegProc *);
 411
 412 /* Create a new lock */
 413 static SegLock *seglock_createlock(enum winlock_style);
 414 /* Destroy lock */
 415 static void seglock_destroylock(SegLock *);
 416 static void lock_destroyall(void);
 417
 418 /* Helper functions in winlockmap_access */
 419 static int give_mapping(SegLock *, SegProc *, uint_t);
 420 static int lock_giveup(SegLock *, int);
 421 static int seglock_lockfault(devmap_cookie_t, SegProc *, SegLock *, uint_t);
 422
 423 /* routines called from ioctl */
 424 static int seglock_graballoc(intptr_t, enum winlock_style, int);
 425 static int seglock_grabinfo(intptr_t, int);
 426 static int seglock_grabfree(intptr_t, int);
 427 static int seglock_gettimeout(intptr_t, int);
 428 static int seglock_settimeout(intptr_t, int);
 429 static void seglock_dump_all(void);
 430
 431 static  int
 432 winlock_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
 433 {
 434         DEBUGF(1, (CE_CONT, "winlock_attach, devi=%p, cmd=%d\n",
 435             (void *)devi, (int)cmd));
 436         if (cmd != DDI_ATTACH)
 437                 return (DDI_FAILURE);
 438         if (ddi_create_minor_node(devi, "winlock", S_IFCHR, 0, DDI_PSEUDO, 0)
 439             == DDI_FAILURE) {
 440                 return (DDI_FAILURE);
 441         }
 442         winlock_dip = devi;
 443         ddi_report_dev(devi);
 444         return (DDI_SUCCESS);
 445 }
 446
 447 /*ARGSUSED*/
 448 static  int
 449 winlock_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
 450 {
 451         DEBUGF(1, (CE_CONT, "winlock_detach, devi=%p, cmd=%d\n",
 452             (void *)devi, (int)cmd));
 453         if (cmd != DDI_DETACH)
 454                 return (DDI_FAILURE);
 455
 456         mutex_enter(&winlock_mutex);
 457         if (lock_list != NULL) {
 458                 mutex_exit(&winlock_mutex);
 459                 return (DDI_FAILURE);
 460         }
 461         ASSERT(lock_free_list == NULL);
 462
 463         DEBUGF(1, (CE_CONT, "detach freeing trashpage and lockpage\n"));
 464         /* destroy any common stuff created */
 465         if (trashpage_cookie != NULL) {
 466                 ddi_umem_free(trashpage_cookie);
 467                 trashpage_cookie = NULL;
 468         }
 469         if (lockpage != NULL) {
 470                 ddi_umem_free(lockpage_cookie);
 471                 lockpage = NULL;
 472                 lockpage_cookie = NULL;
 473         }
 474         winlock_dip = NULL;
 475         mutex_exit(&winlock_mutex);
 476         return (DDI_SUCCESS);
 477 }
 478
 479 /*ARGSUSED*/
 480 static  int
 481 winlock_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
 482 {
 483         register int error;
 484
 485         /* initialize result */
 486         *result = NULL;
 487
 488         /* only valid instance (i.e., getminor) is 0 */
 489         if (getminor((dev_t)arg) >= 1)
 490                 return (DDI_FAILURE);
 491
 492         switch (infocmd) {
 493         case DDI_INFO_DEVT2DEVINFO:
 494                 if (winlock_dip == NULL)
 495                         error = DDI_FAILURE;
 496                 else {
 497                         *result = (void *)winlock_dip;
 498                         error = DDI_SUCCESS;
 499                 }
 500                 break;
 501         case DDI_INFO_DEVT2INSTANCE:
 502                 *result = (void *)0;
 503                 error = DDI_SUCCESS;
 504                 break;
 505         default:
 506                 error = DDI_FAILURE;
 507         }
 508         return (error);
 509 }
 510
 511
 512 /*ARGSUSED*/
 513 int
 514 winlock_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
 515         cred_t *cred, int *rval)
 516 {
 517         DEBUGF(1, (CE_CONT, "winlockioctl: cmd=%d, arg=0x%p\n",
 518             cmd, (void *)arg));
 519
 520         switch (cmd) {
 521         /*
 522          * ioctls that used to be handled by framebuffers (defined in fbio.h)
 523          * RFE: No code really calls the GRAB* ioctls now. Should EOL.
 524          */
 525
 526         case GRABPAGEALLOC:
 527                 return (seglock_graballoc(arg, OLDSTYLE_LOCK, mode));
 528         case GRABPAGEFREE:
 529                 return (seglock_grabfree(arg, mode));
 530         case GRABLOCKINFO:
 531                 return (seglock_grabinfo(arg, mode));
 532         case GRABATTACH:
 533                 return (EINVAL); /* GRABATTACH is not supported (never was) */
 534
 535         case WINLOCKALLOC:
 536                 return (seglock_graballoc(arg, NEWSTYLE_LOCK, mode));
 537         case WINLOCKFREE:
 538                 return (seglock_grabfree(arg, mode));
 539         case WINLOCKSETTIMEOUT:
 540                 return (seglock_settimeout(arg, mode));
 541         case WINLOCKGETTIMEOUT:
 542                 return (seglock_gettimeout(arg, mode));
 543         case WINLOCKDUMP:
 544                 seglock_dump_all();
 545                 return (0);
 546
 547 #ifdef DEBUG
 548         case (WIOC|255):
 549                 lock_debug = arg;
 550                 return (0);
 551 #endif
 552
 553         default:
 554                 return (ENOTTY);                /* Why is this not EINVAL */
 555         }
 556 }
 557
 558 int
 559 winlocksegmap(
 560         dev_t   dev,            /* major:minor */
 561         off_t   off,            /* device offset from mmap(2) */
 562         struct as *as,          /* user's address space. */
 563         caddr_t *addr,          /* address from mmap(2) */
 564         off_t   len,            /* length from mmap(2) */
 565         uint_t  prot,           /* user wants this access */
 566         uint_t  maxprot,        /* this is the maximum the user can have */
 567         uint_t  flags,          /* flags from mmap(2) */
 568         cred_t  *cred)
 569 {
 570         DEBUGF(1, (CE_CONT, "winlock_segmap off=%lx, len=0x%lx\n", off, len));
 571
 572         /* Only MAP_SHARED mappings are supported */
 573         if ((flags & MAP_TYPE) == MAP_PRIVATE) {
 574                 return (EINVAL);
 575         }
 576
 577         /* Use devmap_setup to setup the mapping */
 578         return (devmap_setup(dev, (offset_t)off, as, addr, (size_t)len, prot,
 579             maxprot, flags, cred));
 580 }
 581
 582 /*ARGSUSED*/
 583 int
 584 winlock_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
 585     size_t *maplen, uint_t model)
 586 {
 587         SegLock *lp;
 588         int err;
 589
 590         DEBUGF(1, (CE_CONT, "winlock devmap: off=%llx, len=%lx, dhp=%p\n",
 591             off, len, (void *)dhp));
 592
 593         *maplen = 0;
 594
 595         /* Check if the lock exists, i.e., has been created by alloc */
 596         /* off is the sy_ident returned in the alloc ioctl */
 597         if ((lp = seglock_findlock((uint_t)off)) == NULL) {
 598                 return (ENXIO);
 599         }
 600
 601         /*
 602          * The offset bits in mmap(2) offset has to be same as in lockptr
 603          * OR the offset should be 0 (i.e. masked off)
 604          */
 605         if (((off & PAGEOFFSET) != 0) &&
 606             ((off ^ (uintptr_t)(lp->lockptr)) & (offset_t)PAGEOFFSET) != 0) {
 607                 DEBUGF(2, (CE_CONT,
 608                     "mmap offset %llx mismatch with lockptr %p\n",
 609                     off, (void *)lp->lockptr));
 610                 mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */
 611                 return (EINVAL);
 612         }
 613
 614         /* Only supports PAGESIZE length mappings */
 615         if (len != PAGESIZE) {
 616                 mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */
 617                 return (EINVAL);
 618         }
 619
 620         /*
 621          * Set up devmap to point at page associated with lock
 622          * RFE: At this point we dont know if this is a lockpage or unlockpage
 623          * a lockpage would not need DEVMAP_ALLOW_REMAP setting
 624          * We could have kept track of the mapping order here,
 625          * but devmap framework does not support storing any state in this
 626          * devmap callback as it does not callback for error cleanup if some
 627          * other error happens in the framework.
 628          * RFE: We should modify the winlock mmap interface so that the
 629          * user process marks in the offset passed in whether this is for a
 630          * lock or unlock mapping instead of guessing based on order of maps
 631          * This would cleanup other things (such as in fork)
 632          */
 633         if ((err = devmap_umem_setup(dhp, winlock_dip, &winlockmap_ops,
 634             lp->umem_cookie, 0, PAGESIZE, WINLOCK_PROT,
 635             DEVMAP_ALLOW_REMAP, 0)) < 0) {
 636                 mutex_exit(&lp->mutex); /* held by seglock_findlock */
 637                 return (err);
 638         }
 639         /*
 640          * No mappings are loaded to those segments yet. The correctness
 641          * of the winlock semantics depends on the devmap framework/seg_dev NOT
 642          * loading the translations without calling _access callback.
 643          */
 644
 645         mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */
 646         *maplen = PAGESIZE;
 647         return (0);
 648 }
 649
 650 /*
 651  * This routine is called by the devmap framework after the devmap entry point
 652  * above and the mapping is setup in seg_dev.
 653  * We store the pointer to the per-process context in the devmap private data.
 654  */
 655 /*ARGSUSED*/
 656 static int
 657 winlockmap_map(devmap_cookie_t dhp, dev_t dev, uint_t flags, offset_t off,
 658         size_t len, void **pvtp)
 659 {
 660         SegLock *lp = seglock_findlock((uint_t)off); /* returns w/ mutex held */
 661         SegProc *sdp;
 662
 663         ASSERT(len == PAGESIZE);
 664
 665         /* Find the per-process context for this lock, alloc one if not found */
 666         sdp = seglock_allocclient(lp);
 667
 668         /*
 669          * RFE: Determining which is a lock vs unlock seg is based on order
 670          * of mmaps, we should change that to be derivable from off
 671          */
 672         if (sdp->lockseg == NULL) {
 673                 sdp->lockseg = dhp;
 674         } else if (sdp->unlockseg == NULL) {
 675                 sdp->unlockseg = dhp;
 676         } else {
 677                 /* attempting to map lock more than twice */
 678                 mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */
 679                 return (ENOMEM);
 680         }
 681
 682         *pvtp = sdp;
 683         mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */
 684         return (DDI_SUCCESS);
 685 }
 686
 687 /*
 688  * duplicate a segment, as in fork()
 689  * On fork, the child inherits the mappings to the lock
 690  *      lp->alloccount is NOT incremented, so child should not do a free().
 691  *      Semantics same as if done an alloc(), map(), map().
 692  *      This way it would work fine if doing an exec() variant later
 693  *      Child does not inherit any UFLAGS set in parent
 694  * The lock and unlock pages are started off unmapped, i.e., child does not
 695  *      own the lock.
 696  * The code assumes that the child process has a valid pid at this point
 697  * RFE: This semantics depends on fork not duplicating the hat mappings
 698  *      (which is the current implementation). To enforce it would need to
 699  *      call devmap_unload from here - not clear if that is allowed.
 700  */
 701
 702 static int
 703 winlockmap_dup(devmap_cookie_t dhp, void *oldpvt, devmap_cookie_t new_dhp,
 704         void **newpvt)
 705 {
 706         SegProc *sdp = (SegProc *)oldpvt;
 707         SegProc *ndp;
 708         SegLock *lp = sdp->lp;
 709
 710         mutex_enter(&lp->mutex);
 711         ASSERT((dhp == sdp->lockseg) || (dhp == sdp->unlockseg));
 712
 713         /*
 714          * Note: At this point, the child process does have a pid, but
 715          * the arguments passed to as_dup and hence to devmap_dup dont pass it
 716          * down. So we cannot use normal seglock_findclient - which finds the
 717          * parent sdp itself!
 718          * Instead we allocate the child's SegProc by using the child as pointer
 719          * RFE: we are using the as stucture which means peeking into the
 720          * devmap_cookie. This is not DDI-compliant. Need a compliant way of
 721          * getting at either the as or, better, a way to get the child's new pid
 722          */
 723         ndp = seglock_alloc_specific(lp,
 724             (void *)((devmap_handle_t *)new_dhp)->dh_seg->s_as);
 725         ASSERT(ndp != sdp);
 726
 727         if (sdp->lockseg == dhp) {
 728                 ASSERT(ndp->lockseg == NULL);
 729                 ndp->lockseg = new_dhp;
 730         } else {
 731                 ASSERT(sdp->unlockseg == dhp);
 732                 ASSERT(ndp->unlockseg == NULL);
 733                 ndp->unlockseg = new_dhp;
 734                 if (sdp->flag & TRASHPAGE) {
 735                         ndp->flag |= TRASHPAGE;
 736                 }
 737         }
 738         mutex_exit(&lp->mutex);
 739         *newpvt = (void *)ndp;
 740         return (0);
 741 }
 742
 743
 744 /*ARGSUSED*/
 745 static void
 746 winlockmap_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off, size_t len,
 747         devmap_cookie_t new_dhp1, void **newpvtp1,
 748         devmap_cookie_t new_dhp2, void **newpvtp2)
 749 {
 750         SegProc *sdp = (SegProc *)pvtp;
 751         SegLock *lp = sdp->lp;
 752
 753         /*
 754          * We always create PAGESIZE length mappings, so there should never
 755          * be a partial unmapping case
 756          */
 757         ASSERT((new_dhp1 == NULL) && (new_dhp2 == NULL));
 758
 759         mutex_enter(&lp->mutex);
 760         ASSERT((dhp == sdp->lockseg) || (dhp == sdp->unlockseg));
 761         /* make sure this process doesn't own the lock */
 762         if (sdp == lp->owner) {
 763                 /*
 764                  * Not handling errors - i.e., errors in unloading mapping
 765                  * As part of unmapping hat/seg structure get torn down anyway
 766                  */
 767                 (void) lock_giveup(lp, 0);
 768         }
 769
 770         ASSERT(sdp != lp->owner);
 771         if (sdp->lockseg == dhp) {
 772                 sdp->lockseg = NULL;
 773         } else {
 774                 ASSERT(sdp->unlockseg == dhp);
 775                 sdp->unlockseg = NULL;
 776                 sdp->flag &= ~TRASHPAGE;        /* clear flag if set */
 777         }
 778
 779         garbage_collect_lock(lp, sdp);
 780 }
 781
 782 /*ARGSUSED*/
 783 static int
 784 winlockmap_access(devmap_cookie_t dhp, void *pvt, offset_t off, size_t len,
 785         uint_t type, uint_t rw)
 786 {
 787         SegProc *sdp = (SegProc *)pvt;
 788         SegLock *lp = sdp->lp;
 789         int err;
 790
 791         /* Driver handles only DEVMAP_ACCESS type of faults */
 792         if (type != DEVMAP_ACCESS)
 793                 return (-1);
 794
 795         mutex_enter(&lp->mutex);
 796         ASSERT((dhp == sdp->lockseg) || (dhp == sdp->unlockseg));
 797
 798         /* should be using a SegProc that corresponds to current process */
 799         ASSERT(ID(sdp) == CURPROC_ID);
 800
 801         /*
 802          * If process is faulting but does not have both segments mapped
 803          * return error (should cause a segv).
 804          * RFE: could give it a permanent trashpage
 805          */
 806         if ((sdp->lockseg == NULL) || (sdp->unlockseg == NULL)) {
 807                 err = -1;
 808         } else {
 809                 err = seglock_lockfault(dhp, sdp, lp, rw);
 810         }
 811         mutex_exit(&lp->mutex);
 812         return (err);
 813 }
 814
 815         /* INTERNAL ROUTINES START HERE */
 816
 817
 818
 819 /*
 820  * search the lock_list list for the specified cookie
 821  * The cookie is the sy_ident field returns by ALLOC ioctl.
 822  * This has two parts:
 823  * the pageoffset bits contain offset into the lock page.
 824  * the pagenumber bits contain the lock id.
 825  * The user code is supposed to pass in only the pagenumber portion
 826  *      (i.e. mask off the pageoffset bits). However the code below
 827  *      does the mask in case the users are not diligent
 828  * if found, returns with mutex for SegLock structure held
 829  */
 830 static SegLock *
 831 seglock_findlock(uint_t cookie)
 832 {
 833         SegLock *lp;
 834
 835         cookie &= (uint_t)PAGEMASK;   /* remove pageoffset bits to get cookie */
 836         mutex_enter(&winlock_mutex);
 837         for (lp = lock_list; lp != NULL; lp = lp->next) {
 838                 mutex_enter(&lp->mutex);
 839                 if (cookie == lp->cookie) {
 840                         break;  /* return with lp->mutex held */
 841                 }
 842                 mutex_exit(&lp->mutex);
 843         }
 844         mutex_exit(&winlock_mutex);
 845         return (lp);
 846 }
 847
 848 /*
 849  * search the lock_list list for the specified non-zero key
 850  * if found, returns with lock for SegLock structure held
 851  */
 852 static SegLock *
 853 seglock_findkey(uint_t key)
 854 {
 855         SegLock *lp;
 856
 857         ASSERT(MUTEX_HELD(&winlock_mutex));
 858         /* The driver allows multiple locks with key 0, dont search */
 859         if (key == 0)
 860                 return (NULL);
 861         for (lp = lock_list; lp != NULL; lp = lp->next) {
 862                 mutex_enter(&lp->mutex);
 863                 if (key == lp->key)
 864                         break;
 865                 mutex_exit(&lp->mutex);
 866         }
 867         return (lp);
 868 }
 869
 870 /*
 871  * Create a new lock context.
 872  * Returns with SegLock mutex held
 873  */
 874
 875 static SegLock *
 876 seglock_createlock(enum winlock_style style)
 877 {
 878         SegLock *lp;
 879
 880         DEBUGF(3, (CE_CONT, "seglock_createlock: free_list=%p, next_lock %d\n",
 881             (void *)lock_free_list, next_lock));
 882
 883         ASSERT(MUTEX_HELD(&winlock_mutex));
 884         if (lock_free_list != NULL) {
 885                 lp = lock_free_list;
 886                 lock_free_list = lp->next;
 887         } else if (next_lock >= MAX_LOCKS) {
 888                 return (NULL);
 889         } else {
 890                 lp = kmem_zalloc(sizeof (SegLock), KM_SLEEP);
 891                 lp->cookie = (next_lock + 1) * (uint_t)PAGESIZE;
 892                 mutex_init(&lp->mutex, NULL, MUTEX_DEFAULT, NULL);
 893                 cv_init(&lp->locksleep, NULL, CV_DEFAULT, NULL);
 894                 ++next_lock;
 895         }
 896
 897         mutex_enter(&lp->mutex);
 898         ASSERT((lp->cookie/PAGESIZE) <= next_lock);
 899
 900         if (style == OLDSTYLE_LOCK) {
 901                 lp->lockptr = (int *)ddi_umem_alloc(PAGESIZE,
 902                     DDI_UMEM_SLEEP, &(lp->umem_cookie));
 903         } else {
 904                 lp->lockptr = ((int *)lockpage) + ((lp->cookie/PAGESIZE) - 1);
 905                 lp->umem_cookie = lockpage_cookie;
 906         }
 907
 908         ASSERT(lp->lockptr != NULL);
 909         lp->style = style;
 910         lp->sleepers = 0;
 911         lp->alloccount = 1;
 912         lp->timeout = LOCKTIME*hz;
 913         lp->clients = NULL;
 914         lp->owner = NULL;
 915         LOCK(lp) = 0;
 916         lp->next = lock_list;
 917         lock_list = lp;
 918         return (lp);
 919 }
 920
 921 /*
 922  * Routine to destory a lock structure.
 923  * This routine is called while holding the lp->mutex but not the
 924  * winlock_mutex.
 925  */
 926
 927 static void
 928 seglock_destroylock(SegLock *lp)
 929 {
 930         ASSERT(MUTEX_HELD(&lp->mutex));
 931         ASSERT(!MUTEX_HELD(&winlock_mutex));
 932
 933         DEBUGF(3, (CE_CONT, "destroying lock cookie %d key %d\n",
 934             lp->cookie, lp->key));
 935
 936         ASSERT(lp->alloccount == 0);
 937         ASSERT(lp->clients == NULL);
 938         ASSERT(lp->owner == NULL);
 939         ASSERT(lp->sleepers == 0);
 940
 941         /* clean up/release fields in lp */
 942         if (lp->style == OLDSTYLE_LOCK) {
 943                 ddi_umem_free(lp->umem_cookie);
 944         }
 945         lp->umem_cookie = NULL;
 946         lp->lockptr = NULL;
 947         lp->key = 0;
 948
 949         /*
 950          * Reduce cookie by 1, makes it non page-aligned and invalid
 951          * This prevents any valid lookup from finding this lock
 952          * so when we drop the lock and regrab it it will still
 953          * be there and nobody else would have attached to it
 954          */
 955         lp->cookie--;
 956
 957         /* Drop and reacquire mutexes in right order */
 958         mutex_exit(&lp->mutex);
 959         mutex_enter(&winlock_mutex);
 960         mutex_enter(&lp->mutex);
 961
 962         /* reincrement the cookie to get the original valid cookie */
 963         lp->cookie++;
 964         ASSERT((lp->cookie & PAGEOFFSET) == 0);
 965         ASSERT(lp->alloccount == 0);
 966         ASSERT(lp->clients == NULL);
 967         ASSERT(lp->owner == NULL);
 968         ASSERT(lp->sleepers == 0);
 969
 970         /* Remove lp from lock_list */
 971         if (lock_list == lp) {
 972                 lock_list = lp->next;
 973         } else {
 974                 SegLock *tmp = lock_list;
 975                 while (tmp->next != lp) {
 976                         tmp = tmp->next;
 977                         ASSERT(tmp != NULL);
 978                 }
 979                 tmp->next = lp->next;
 980         }
 981
 982         /* Add to lock_free_list */
 983         lp->next = lock_free_list;
 984         lock_free_list = lp;
 985         mutex_exit(&lp->mutex);
 986
 987         /* Check if all locks deleted and cleanup */
 988         if (lock_list == NULL) {
 989                 lock_destroyall();
 990         }
 991
 992         mutex_exit(&winlock_mutex);
 993 }
 994
 995 /* Routine to find a SegProc corresponding to the tag */
 996
 997 static SegProc *
 998 seglock_find_specific(SegLock *lp, void *tag)
 999 {
1000         SegProc *sdp;
1001
1002         ASSERT(MUTEX_HELD(&lp->mutex));
1003         ASSERT(tag != NULL);
1004         for (sdp = lp->clients; sdp != NULL; sdp = sdp->next) {
1005                 if (ID(sdp) == tag)
1006                         break;
1007         }
1008         return (sdp);
1009 }
1010
1011 /* Routine to find (and if needed allocate) a SegProc corresponding to tag */
1012
1013 static SegProc *
1014 seglock_alloc_specific(SegLock *lp, void *tag)
1015 {
1016         SegProc *sdp;
1017
1018         ASSERT(MUTEX_HELD(&lp->mutex));
1019         ASSERT(tag != NULL);
1020
1021         /* Search and return if existing one found */
1022         sdp = seglock_find_specific(lp, tag);
1023         if (sdp != NULL)
1024                 return (sdp);
1025
1026         DEBUGF(3, (CE_CONT, "Allocating segproc structure for tag %p lock %d\n",
1027             tag, lp->cookie));
1028
1029         /* Allocate a new SegProc */
1030         sdp = kmem_zalloc(sizeof (SegProc), KM_SLEEP);
1031         sdp->next = lp->clients;
1032         lp->clients = sdp;
1033         sdp->lp = lp;
1034         ID(sdp) = tag;
1035         return (sdp);
1036 }
1037
1038 /*
1039  * search a context's client list for the given client and delete
1040  */
1041
1042 static void
1043 seglock_deleteclient(SegLock *lp, SegProc *sdp)
1044 {
1045         ASSERT(MUTEX_HELD(&lp->mutex));
1046         ASSERT(lp->owner != sdp);       /* Not current owner of lock */
1047         ASSERT(sdp->lockseg == NULL);   /* Mappings torn down */
1048         ASSERT(sdp->unlockseg == NULL);
1049
1050         DEBUGF(3, (CE_CONT, "Deleting segproc structure for pid %d lock %d\n",
1051             ddi_get_pid(), lp->cookie));
1052         if (lp->clients == sdp) {
1053                 lp->clients = sdp->next;
1054         } else {
1055                 SegProc *tmp = lp->clients;
1056                 while (tmp->next != sdp) {
1057                         tmp = tmp->next;
1058                         ASSERT(tmp != NULL);
1059                 }
1060                 tmp->next = sdp->next;
1061         }
1062         kmem_free(sdp, sizeof (SegProc));
1063 }
1064
1065 /*
1066  * Routine to verify if a SegProc and SegLock
1067  * structures are empty/idle.
1068  * Destroys the structures if they are ready
1069  * Can be called with sdp == NULL if want to verify only the lock state
1070  * caller should hold the lp->mutex
1071  * and this routine drops the mutex
1072  */
1073 static void
1074 garbage_collect_lock(SegLock *lp, SegProc *sdp)
1075 {
1076         ASSERT(MUTEX_HELD(&lp->mutex));
1077         /* see if both segments unmapped from client structure */
1078         if ((sdp != NULL) && (sdp->lockseg == NULL) && (sdp->unlockseg == NULL))
1079                 seglock_deleteclient(lp, sdp);
1080
1081         /* see if this is last client in the entire lock context */
1082         if ((lp->clients == NULL) && (lp->alloccount == 0)) {
1083                 seglock_destroylock(lp);
1084         } else {
1085                 mutex_exit(&lp->mutex);
1086         }
1087 }
1088
1089
1090 /* IOCTLS START HERE */
1091
1092 static int
1093 seglock_grabinfo(intptr_t arg, int mode)
1094 {
1095         int i = 1;
1096
1097         /* multiple clients per lock supported - see comments up top */
1098         if (ddi_copyout((caddr_t)&i, (caddr_t)arg, sizeof (int), mode) != 0)
1099                 return (EFAULT);
1100         return (0);
1101 }
1102
1103 static int
1104 seglock_graballoc(intptr_t arg, enum winlock_style style, int mode) /* IOCTL */
1105 {
1106         struct seglock  *lp;
1107         uint_t          key;
1108         struct          winlockalloc wla;
1109         int             err;
1110
1111         if (style == OLDSTYLE_LOCK) {
1112                 key = 0;
1113         } else {
1114                 if (ddi_copyin((caddr_t)arg, (caddr_t)&wla, sizeof (wla),
1115                     mode)) {
1116                         return (EFAULT);
1117                 }
1118                 key = wla.sy_key;
1119         }
1120
1121         DEBUGF(3, (CE_CONT,
1122             "seglock_graballoc: key=%u, style=%d\n", key, style));
1123
1124         mutex_enter(&winlock_mutex);
1125         /* Allocate lockpage on first new style alloc */
1126         if ((lockpage == NULL) && (style == NEWSTYLE_LOCK)) {
1127                 lockpage = ddi_umem_alloc(PAGESIZE, DDI_UMEM_SLEEP,
1128                     &lockpage_cookie);
1129         }
1130
1131         /* Allocate trashpage on first alloc (any style) */
1132         if (trashpage_cookie == NULL) {
1133                 (void) ddi_umem_alloc(PAGESIZE, DDI_UMEM_TRASH | DDI_UMEM_SLEEP,
1134                     &trashpage_cookie);
1135         }
1136
1137         if ((lp = seglock_findkey(key)) != NULL) {
1138                 DEBUGF(2, (CE_CONT, "alloc: found lock key %d cookie %d\n",
1139                     key, lp->cookie));
1140                 ++lp->alloccount;
1141         } else if ((lp = seglock_createlock(style)) != NULL) {
1142                 DEBUGF(2, (CE_CONT, "alloc: created lock key %d cookie %d\n",
1143                     key, lp->cookie));
1144                 lp->key = key;
1145         } else {
1146                 DEBUGF(2, (CE_CONT, "alloc: cannot create lock key %d\n", key));
1147                 mutex_exit(&winlock_mutex);
1148                 return (ENOMEM);
1149         }
1150         ASSERT((lp != NULL) && MUTEX_HELD(&lp->mutex));
1151
1152         mutex_exit(&winlock_mutex);
1153
1154         if (style == OLDSTYLE_LOCK) {
1155                 err = ddi_copyout((caddr_t)&lp->cookie, (caddr_t)arg,
1156                     sizeof (lp->cookie), mode);
1157         } else {
1158                 wla.sy_ident = lp->cookie +
1159                     (uint_t)((uintptr_t)(lp->lockptr) & PAGEOFFSET);
1160                 err = ddi_copyout((caddr_t)&wla, (caddr_t)arg,
1161                     sizeof (wla), mode);
1162         }
1163
1164         if (err) {
1165                 /* On error, should undo allocation */
1166                 lp->alloccount--;
1167
1168                 /* Verify and delete if lock is unused now */
1169                 garbage_collect_lock(lp, NULL);
1170                 return (EFAULT);
1171         }
1172
1173         mutex_exit(&lp->mutex);
1174         return (0);
1175 }
1176
1177 static int
1178 seglock_grabfree(intptr_t arg, int mode)        /* IOCTL */
1179 {
1180         struct seglock  *lp;
1181         uint_t  offset;
1182
1183         if (ddi_copyin((caddr_t)arg, &offset, sizeof (offset), mode)
1184             != 0) {
1185                 return (EFAULT);
1186         }
1187         DEBUGF(2, (CE_CONT, "seglock_grabfree: offset=%u", offset));
1188
1189         if ((lp = seglock_findlock(offset)) == NULL) {
1190                 DEBUGF(2, (CE_CONT, "did not find lock\n"));
1191                 return (EINVAL);
1192         }
1193         DEBUGF(3, (CE_CONT, " lock key %d, cookie %d, alloccount %d\n",
1194             lp->key, lp->cookie, lp->alloccount));
1195
1196         if (lp->alloccount > 0)
1197                 lp->alloccount--;
1198
1199         /* Verify and delete if lock is unused now */
1200         garbage_collect_lock(lp, NULL);
1201         return (0);
1202 }
1203
1204
1205 /*
1206  * Sets timeout in lock and UFLAGS in client
1207  *      the UFLAGS are stored in the client structure and persistent only
1208  *      till the unmap of the lock pages. If the process sets UFLAGS
1209  *      does a map of the lock/unlock pages and unmaps them, the client
1210  *      structure will get deleted and the UFLAGS will be lost. The process
1211  *      will need to resetup the flags.
1212  */
1213 static int
1214 seglock_settimeout(intptr_t arg, int mode)      /* IOCTL */
1215 {
1216         SegLock         *lp;
1217         SegProc         *sdp;
1218         struct winlocktimeout           wlt;
1219
1220         if (ddi_copyin((caddr_t)arg, &wlt, sizeof (wlt), mode) != 0) {
1221                 return (EFAULT);
1222         }
1223
1224         if ((lp = seglock_findlock(wlt.sy_ident)) == NULL)
1225                 return (EINVAL);
1226
1227         lp->timeout = MSEC_TO_TICK_ROUNDUP(wlt.sy_timeout);
1228         /* if timeout modified, wake up any sleepers */
1229         if (lp->sleepers > 0) {
1230                 cv_broadcast(&lp->locksleep);
1231         }
1232
1233         /*
1234          * If the process is trying to set UFLAGS,
1235          *      Find the client segproc and allocate one if needed
1236          *      Set the flags preserving the kernel flags
1237          * If the process is clearing UFLAGS
1238          *      Find the client segproc but dont allocate one if does not exist
1239          */
1240         if (wlt.sy_flags & UFLAGS) {
1241                 sdp = seglock_allocclient(lp);
1242                 sdp->flag = sdp->flag & KFLAGS | wlt.sy_flags & UFLAGS;
1243         } else if ((sdp = seglock_findclient(lp)) != NULL) {
1244                 sdp->flag = sdp->flag & KFLAGS;
1245                 /* If clearing UFLAGS leaves the segment or lock idle, delete */
1246                 garbage_collect_lock(lp, sdp);
1247                 return (0);
1248         }
1249         mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */
1250         return (0);
1251 }
1252
1253 static int
1254 seglock_gettimeout(intptr_t arg, int mode)
1255 {
1256         SegLock         *lp;
1257         SegProc         *sdp;
1258         struct winlocktimeout           wlt;
1259
1260         if (ddi_copyin((caddr_t)arg, &wlt, sizeof (wlt), mode) != 0)
1261                 return (EFAULT);
1262
1263         if ((lp = seglock_findlock(wlt.sy_ident)) == NULL)
1264                 return (EINVAL);
1265
1266         wlt.sy_timeout = TICK_TO_MSEC(lp->timeout);
1267         /*
1268          * If this process has an active allocated lock return those flags
1269          *      Dont allocate a client structure on gettimeout
1270          * If not, return 0.
1271          */
1272         if ((sdp = seglock_findclient(lp)) != NULL) {
1273                 wlt.sy_flags = sdp->flag & UFLAGS;
1274         } else {
1275                 wlt.sy_flags = 0;
1276         }
1277         mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */
1278
1279         if (ddi_copyout(&wlt, (caddr_t)arg, sizeof (wlt), mode) != 0)
1280                 return (EFAULT);
1281
1282         return (0);
1283 }
1284
1285 /*
1286  * Handle lock segment faults here...
1287  *
1288  * This is where the magic happens.
1289  */
1290
1291 /* ARGSUSED */
1292 static  int
1293 seglock_lockfault(devmap_cookie_t dhp, SegProc *sdp, SegLock *lp, uint_t rw)
1294 {
1295         SegProc *owner = lp->owner;
1296         int err;
1297
1298         ASSERT(MUTEX_HELD(&lp->mutex));
1299         DEBUGF(3, (CE_CONT,
1300             "seglock_lockfault: hdl=%p, sdp=%p, lp=%p owner=%p\n",
1301             (void *)dhp, (void *)sdp, (void *)lp, (void *)owner));
1302
1303         /* lockfault is always called with sdp in current process context */
1304         ASSERT(ID(sdp) == CURPROC_ID);
1305
1306         /* If Lock has no current owner, give the mapping to new owner */
1307         if (owner == NULL) {
1308                 DEBUGF(4, (CE_CONT, " lock has no current owner\n"));
1309                 return (give_mapping(lp, sdp, rw));
1310         }
1311
1312         if (owner == sdp) {
1313                 /*
1314                  * Current owner is faulting on owned lock segment OR
1315                  * Current owner is faulting on unlock page and has no waiters
1316                  * Then can give the mapping to current owner
1317                  */
1318                 if ((sdp->lockseg == dhp) || (lp->sleepers == 0)) {
1319                 DEBUGF(4, (CE_CONT, "lock owner faulting\n"));
1320                 return (give_mapping(lp, sdp, rw));
1321                 } else {
1322                 /*
1323                  * Owner must be writing to unlock page and there are waiters.
1324                  * other cases have been checked earlier.
1325                  * Release the lock, owner, and owners mappings
1326                  * As the owner is trying to write to the unlock page, leave
1327                  * it with a trashpage mapping and wake up the sleepers
1328                  */
1329                 ASSERT((dhp == sdp->unlockseg) && (lp->sleepers != 0));
1330                 DEBUGF(4, (CE_CONT, " owner fault on unlock seg w/ sleeper\n"));
1331                 return (lock_giveup(lp, 1));
1332                 }
1333         }
1334
1335         ASSERT(owner != sdp);
1336
1337         /*
1338          * If old owner faulting on trash unlock mapping,
1339          * load hat mappings to trash page
1340          * RFE: non-owners should NOT be faulting on unlock mapping as they
1341          * as first supposed to fault on the lock seg. We could give them
1342          * a trash page or return error.
1343          */
1344         if ((sdp->unlockseg == dhp) && (sdp->flag & TRASHPAGE)) {
1345                 DEBUGF(4, (CE_CONT, " old owner reloads trash mapping\n"));
1346                 return (devmap_load(sdp->unlockseg, lp->cookie, PAGESIZE,
1347                     DEVMAP_ACCESS, rw));
1348         }
1349
1350         /*
1351          * Non-owner faulting. Need to check current LOCK state.
1352          *
1353          * Before reading lock value in LOCK(lp), we must make sure that
1354          * the owner cannot change its value before we change mappings
1355          * or else we could end up either with a hung process
1356          * or more than one process thinking they have the lock.
1357          * We do that by unloading the owner's mappings
1358          */
1359         DEBUGF(4, (CE_CONT, " owner loses mappings to check lock state\n"));
1360         err = devmap_unload(owner->lockseg, lp->cookie, PAGESIZE);
1361         err |= devmap_unload(owner->unlockseg, lp->cookie, PAGESIZE);
1362         if (err != 0)
1363                 return (err);   /* unable to remove owner mapping */
1364
1365         /*
1366          * If lock is not held, then current owner mappings were
1367          * unloaded above and we can give the lock to the new owner
1368          */
1369         if (LOCK(lp) == 0) {
1370                 DEBUGF(4, (CE_CONT,
1371                     "Free lock (%p): Giving mapping to new owner %d\n",
1372                     (void *)lp, ddi_get_pid()));
1373                 return (give_mapping(lp, sdp, rw));
1374         }
1375
1376         DEBUGF(4, (CE_CONT, "  lock held, sleeping\n"));
1377
1378         /*
1379          * A non-owning process tried to write (presumably to the lockpage,
1380          * but it doesn't matter) but the lock is held; we need to sleep for
1381          * the lock while there is an owner.
1382          */
1383
1384         lp->sleepers++;
1385         while ((owner = lp->owner) != NULL) {
1386                 int rval;
1387
1388                 if ((lp->timeout == 0) || (owner->flag & SY_NOTIMEOUT)) {
1389                         /*
1390                          * No timeout has been specified for this lock;
1391                          * we'll simply sleep on the condition variable.
1392                          */
1393                         rval = cv_wait_sig(&lp->locksleep, &lp->mutex);
1394                 } else {
1395                         /*
1396                          * A timeout _has_ been specified for this lock. We need
1397                          * to wake up and possibly steal this lock if the owner
1398                          * does not let it go. Note that all sleepers on a lock
1399                          * with a timeout wait; the sleeper with the earliest
1400                          * timeout will wakeup, and potentially steal the lock
1401                          * Stealing the lock will cause a broadcast on the
1402                          * locksleep cv and thus kick the other timed waiters
1403                          * and cause everyone to restart in a new timedwait
1404                          */
1405                         rval = cv_reltimedwait_sig(&lp->locksleep,
1406                             &lp->mutex, lp->timeout, TR_CLOCK_TICK);
1407                 }
1408
1409                 /*
1410                  * Timeout and still old owner - steal lock
1411                  * Force-Release lock and give old owner a trashpage mapping
1412                  */
1413                 if ((rval == -1) && (lp->owner == owner)) {
1414                         /*
1415                          * if any errors in lock_giveup, go back and sleep/retry
1416                          * If successful, will break out of loop
1417                          */
1418                         cmn_err(CE_NOTE, "Process %d timed out on lock %d\n",
1419                             ddi_get_pid(), lp->cookie);
1420                         (void) lock_giveup(lp, 1);
1421                 } else if (rval == 0) { /* signal pending */
1422                         cmn_err(CE_NOTE,
1423                             "Process %d signalled while waiting on lock %d\n",
1424                             ddi_get_pid(), lp->cookie);
1425                         lp->sleepers--;
1426                         return (FC_MAKE_ERR(EINTR));
1427                 }
1428         }
1429
1430         lp->sleepers--;
1431         /*
1432          * Give mapping to this process and save a fault later
1433          */
1434         return (give_mapping(lp, sdp, rw));
1435 }
1436
1437 /*
1438  * Utility: give a valid mapping to lock and unlock pages to current process.
1439  * Caller responsible for unloading old owner's mappings
1440  */
1441
1442 static int
1443 give_mapping(SegLock *lp, SegProc *sdp, uint_t rw)
1444 {
1445         int err = 0;
1446
1447         ASSERT(MUTEX_HELD(&lp->mutex));
1448         ASSERT(!((lp->owner == NULL) && (LOCK(lp) != 0)));
1449         /* give_mapping is always called with sdp in current process context */
1450         ASSERT(ID(sdp) == CURPROC_ID);
1451
1452         /* remap any old trash mappings */
1453         if (sdp->flag & TRASHPAGE) {
1454                 /* current owner should not have a trash mapping */
1455                 ASSERT(sdp != lp->owner);
1456
1457                 DEBUGF(4, (CE_CONT,
1458                     "new owner %d remapping old trash mapping\n",
1459                     ddi_get_pid()));
1460                 if ((err = devmap_umem_remap(sdp->unlockseg, winlock_dip,
1461                     lp->umem_cookie, 0, PAGESIZE, WINLOCK_PROT, 0, 0)) != 0) {
1462                         /*
1463                          * unable to remap old trash page,
1464                          * abort before changing owner
1465                          */
1466                         DEBUGF(4, (CE_CONT,
1467                             "aborting: error in umem_remap %d\n", err));
1468                         return (err);
1469                 }
1470                 sdp->flag &= ~TRASHPAGE;
1471         }
1472
1473         /* we have a new owner now */
1474         lp->owner = sdp;
1475
1476         if ((err = devmap_load(sdp->lockseg, lp->cookie, PAGESIZE,
1477             DEVMAP_ACCESS, rw)) != 0) {
1478                 return (err);
1479         }
1480         DEBUGF(4, (CE_CONT, "new owner %d gets lock mapping", ddi_get_pid()));
1481
1482         if (lp->sleepers) {
1483                 /* Force unload unlock mapping if there are waiters */
1484                 DEBUGF(4, (CE_CONT,
1485                     " lock has %d sleepers => remove unlock mapping\n",
1486                     lp->sleepers));
1487                 err = devmap_unload(sdp->unlockseg, lp->cookie, PAGESIZE);
1488         } else {
1489                 /*
1490                  * while here, give new owner a valid mapping to unlock
1491                  * page so we don't get called again.
1492                  */
1493                 DEBUGF(4, (CE_CONT, " and unlock mapping\n"));
1494                 err = devmap_load(sdp->unlockseg, lp->cookie, PAGESIZE,
1495                     DEVMAP_ACCESS, PROT_WRITE);
1496         }
1497         return (err);
1498 }
1499
1500 /*
1501  * Unload owner's mappings, release the lock and wakeup any sleepers
1502  * If trash, then the old owner is given a trash mapping
1503  *      => old owner held lock too long and caused a timeout
1504  */
1505 static int
1506 lock_giveup(SegLock *lp, int trash)
1507 {
1508         SegProc *owner = lp->owner;
1509
1510         DEBUGF(4, (CE_CONT, "winlock_giveup: lp=%p, owner=%p, trash %d\n",
1511             (void *)lp, (void *)ID(lp->owner), trash));
1512
1513         ASSERT(MUTEX_HELD(&lp->mutex));
1514         ASSERT(owner != NULL);
1515
1516         /*
1517          * owner loses lockpage/unlockpage mappings and gains a
1518          * trashpage mapping, if needed.
1519          */
1520         if (!trash) {
1521                 /*
1522                  * We do not handle errors in devmap_unload in the !trash case,
1523                  * as the process is attempting to unmap/exit or otherwise
1524                  * release the lock. Errors in unloading the mapping are not
1525                  * going to affect that (unmap does not take error return).
1526                  */
1527                 (void) devmap_unload(owner->lockseg, lp->cookie, PAGESIZE);
1528                 (void) devmap_unload(owner->unlockseg, lp->cookie, PAGESIZE);
1529         } else {
1530                 int err;
1531
1532                 if (err = devmap_unload(owner->lockseg, lp->cookie, PAGESIZE)) {
1533                         /* error unloading lockseg mapping. abort giveup */
1534                         return (err);
1535                 }
1536
1537                 /*
1538                  * old owner gets mapping to trash page so it can continue
1539                  * devmap_umem_remap does a hat_unload (and does it holding
1540                  * the right locks), so no need to devmap_unload on unlockseg
1541                  */
1542                 if ((err = devmap_umem_remap(owner->unlockseg, winlock_dip,
1543                     trashpage_cookie, 0, PAGESIZE, WINLOCK_PROT, 0, 0)) != 0) {
1544                         /* error remapping to trash page, abort giveup */
1545                         return (err);
1546                 }
1547                 owner->flag |= TRASHPAGE;
1548                 /*
1549                  * Preload mapping to trash page by calling devmap_load
1550                  * However, devmap_load can only be called on the faulting
1551                  * process context and not on the owner's process context
1552                  * we preload only if we happen to be in owner process context
1553                  * Other processes will fault on the unlock mapping
1554                  * and be given a trash mapping at that time.
1555                  */
1556                 if (ID(owner) == CURPROC_ID) {
1557                         (void) devmap_load(owner->unlockseg, lp->cookie,
1558                             PAGESIZE, DEVMAP_ACCESS, PROT_WRITE);
1559                 }
1560         }
1561
1562         lp->owner = NULL;
1563
1564         /* Clear the lock value in underlying page so new owner can grab it */
1565         LOCK(lp) = 0;
1566
1567         if (lp->sleepers) {
1568                 DEBUGF(4, (CE_CONT, "  waking up, lp=%p\n", (void *)lp));
1569                 cv_broadcast(&lp->locksleep);
1570         }
1571         return (0);
1572 }
1573
1574 /*
1575  * destroy all allocated memory.
1576  */
1577
1578 static void
1579 lock_destroyall(void)
1580 {
1581         SegLock *lp, *lpnext;
1582
1583         ASSERT(MUTEX_HELD(&winlock_mutex));
1584         ASSERT(lock_list == NULL);
1585
1586         DEBUGF(1, (CE_CONT, "Lock list empty. Releasing free list\n"));
1587         for (lp = lock_free_list; lp != NULL; lp = lpnext) {
1588                 mutex_enter(&lp->mutex);
1589                 lpnext =  lp->next;
1590                 ASSERT(lp->clients == NULL);
1591                 ASSERT(lp->owner == NULL);
1592                 ASSERT(lp->alloccount == 0);
1593                 mutex_destroy(&lp->mutex);
1594                 cv_destroy(&lp->locksleep);
1595                 kmem_free(lp, sizeof (SegLock));
1596         }
1597         lock_free_list = NULL;
1598         next_lock = 0;
1599 }
1600
1601
1602 /* RFE: create mdb walkers instead of dump routines? */
1603 static void
1604 seglock_dump_all(void)
1605 {
1606         SegLock *lp;
1607
1608         mutex_enter(&winlock_mutex);
1609         cmn_err(CE_CONT, "ID\tKEY\tNALLOC\tATTCH\tOWNED\tLOCK\tWAITER\n");
1610
1611         cmn_err(CE_CONT, "Lock List:\n");
1612         for (lp = lock_list; lp != NULL; lp = lp->next) {
1613                 mutex_enter(&lp->mutex);
1614                 cmn_err(CE_CONT, "%d\t%d\t%u\t%c\t%c\t%c\t%d\n",
1615                     lp->cookie, lp->key, lp->alloccount,
1616                     lp->clients ? 'Y' : 'N',
1617                     lp->owner ? 'Y' : 'N',
1618                     lp->lockptr != 0 && LOCK(lp) ? 'Y' : 'N',
1619                     lp->sleepers);
1620                 mutex_exit(&lp->mutex);
1621         }
1622         cmn_err(CE_CONT, "Free Lock List:\n");
1623         for (lp = lock_free_list; lp != NULL; lp = lp->next) {
1624                 mutex_enter(&lp->mutex);
1625                 cmn_err(CE_CONT, "%d\t%d\t%u\t%c\t%c\t%c\t%d\n",
1626                     lp->cookie, lp->key, lp->alloccount,
1627                     lp->clients ? 'Y' : 'N',
1628                     lp->owner ? 'Y' : 'N',
1629                     lp->lockptr != 0 && LOCK(lp) ? 'Y' : 'N',
1630                     lp->sleepers);
1631                 mutex_exit(&lp->mutex);
1632         }
1633
1634 #ifdef DEBUG
1635         if (lock_debug < 3) {
1636                 mutex_exit(&winlock_mutex);
1637                 return;
1638         }
1639
1640         for (lp = lock_list; lp != NULL; lp = lp->next) {
1641                 SegProc *sdp;
1642
1643                 mutex_enter(&lp->mutex);
1644                 cmn_err(CE_CONT,
1645                     "lock %p, key=%d, cookie=%d, nalloc=%u, lock=%d, wait=%d\n",
1646                     (void *)lp, lp->key, lp->cookie, lp->alloccount,
1647                     lp->lockptr != 0 ? LOCK(lp) : -1, lp->sleepers);
1648
1649                 cmn_err(CE_CONT,
1650                     "style=%d, lockptr=%p, timeout=%ld, clients=%p, owner=%p\n",
1651                     lp->style, (void *)lp->lockptr, lp->timeout,
1652                     (void *)lp->clients, (void *)lp->owner);
1653
1654
1655                 for (sdp = lp->clients; sdp != NULL; sdp = sdp->next) {
1656                         cmn_err(CE_CONT, "  client %p%s, lp=%p, flag=%x, "
1657                             "process tag=%p, lockseg=%p, unlockseg=%p\n",
1658                             (void *)sdp, sdp == lp->owner ? " (owner)" : "",
1659                             (void *)sdp->lp, sdp->flag, (void *)ID(sdp),
1660                             (void *)sdp->lockseg, (void *)sdp->unlockseg);
1661                 }
1662                 mutex_exit(&lp->mutex);
1663         }
1664 #endif
1665         mutex_exit(&winlock_mutex);
1666 }
1667
1668 #include <sys/modctl.h>
1669
1670 static struct modldrv modldrv = {
1671         &mod_driverops,         /* Type of module.  This one is a driver */
1672         "Winlock Driver",       /* Name of the module */
1673         &winlock_ops,           /* driver ops */
1674 };
1675
1676 static struct modlinkage modlinkage = {
1677         MODREV_1,
1678         (void *)&modldrv,
1679         0,
1680         0,
1681         0
1682 };
1683
1684 int
1685 _init(void)
1686 {
1687         int e;
1688
1689         mutex_init(&winlock_mutex, NULL, MUTEX_DEFAULT, NULL);
1690         e = mod_install(&modlinkage);
1691         if (e) {
1692                 mutex_destroy(&winlock_mutex);
1693         }
1694         return (e);
1695 }
1696
1697
1698 int
1699 _info(struct modinfo *modinfop)
1700 {
1701         return (mod_info(&modlinkage, modinfop));
1702 }
1703
1704 int
1705 _fini(void)
1706 {
1707         int     e;
1708
1709         e = mod_remove(&modlinkage);
1710         if (e == 0) {
1711                 mutex_destroy(&winlock_mutex);
1712         }
1713         return (e);
1714 }