sys/kern/vfs_quota.c

   1 /*
   2  * Copyright (c) 2011,2012 François Tigeot <ftigeot@wolpond.org>
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  *
   9  * 1. Redistributions of source code must retain the above copyright
  10  *    notice, this list of conditions and the following disclaimer.
  11  * 2. Redistributions in binary form must reproduce the above copyright
  12  *    notice, this list of conditions and the following disclaimer in
  13  *    the documentation and/or other materials provided with the
  14  *    distribution.
  15  * 3. Neither the name of The DragonFly Project nor the names of its
  16  *    contributors may be used to endorse or promote products derived
  17  *    from this software without specific, prior written permission.
  18  *
  19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
  23  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  24  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
  25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  27  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  28  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  29  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  30  * SUCH DAMAGE.
  31  */
  32
  33 #include <sys/sysctl.h>
  34 #include <sys/mount.h>
  35 #include <sys/systm.h>
  36 #include <sys/nlookup.h>
  37 #include <sys/vnode.h>
  38 #include <sys/stat.h>
  39 #include <sys/vfs_quota.h>
  40 #include <sys/spinlock.h>
  41 #include <sys/spinlock2.h>
  42
  43 #include <sys/sysproto.h>
  44 #include <libprop/proplib.h>
  45 #include <libprop/prop_dictionary.h>
  46
  47 /* in-memory accounting, red-black tree based */
  48 /* FIXME: code duplication caused by uid_t / gid_t differences */
  49 RB_PROTOTYPE(ac_utree, ac_unode, rb_entry, rb_ac_unode_cmp);
  50 RB_PROTOTYPE(ac_gtree, ac_gnode, rb_entry, rb_ac_gnode_cmp);
  51
  52 static int
  53 rb_ac_unode_cmp(struct ac_unode *a, struct ac_unode *b);
  54 static int
  55 rb_ac_gnode_cmp(struct ac_gnode *a, struct ac_gnode *b);
  56
  57 RB_GENERATE(ac_utree, ac_unode, rb_entry, rb_ac_unode_cmp);
  58 RB_GENERATE(ac_gtree, ac_gnode, rb_entry, rb_ac_gnode_cmp);
  59
  60 struct ac_unode* unode_insert(struct mount*, uid_t);
  61 struct ac_gnode* gnode_insert(struct mount*, gid_t);
  62
  63 static int
  64 rb_ac_unode_cmp(struct ac_unode *a, struct ac_unode *b)
  65 {
  66         if (a->left_bits < b->left_bits)
  67                 return(-1);
  68         else if (a->left_bits > b->left_bits)
  69                 return(1);
  70         return(0);
  71 }
  72
  73 static int
  74 rb_ac_gnode_cmp(struct ac_gnode *a, struct ac_gnode *b)
  75 {
  76         if (a->left_bits < b->left_bits)
  77                 return(-1);
  78         else if (a->left_bits > b->left_bits)
  79                 return(1);
  80         return(0);
  81 }
  82
  83 struct ac_unode*
  84 unode_insert(struct mount *mp, uid_t uid)
  85 {
  86         struct ac_unode *unp, *res;
  87
  88         unp = kmalloc(sizeof(struct ac_unode), M_MOUNT, M_ZERO | M_WAITOK);
  89
  90         unp->left_bits = (uid >> ACCT_CHUNK_BITS);
  91         res = RB_INSERT(ac_utree, &mp->mnt_acct.ac_uroot, unp);
  92         KASSERT(res == NULL, ("unode_insert(): RB_INSERT didn't return NULL"));
  93
  94         return unp;
  95 }
  96
  97 struct ac_gnode*
  98 gnode_insert(struct mount *mp, gid_t gid)
  99 {
 100         struct ac_gnode *gnp, *res;
 101
 102         gnp = kmalloc(sizeof(struct ac_gnode), M_MOUNT, M_ZERO | M_WAITOK);
 103
 104         gnp->left_bits = (gid >> ACCT_CHUNK_BITS);
 105         res = RB_INSERT(ac_gtree, &mp->mnt_acct.ac_groot, gnp);
 106         KASSERT(res == NULL, ("gnode_insert(): RB_INSERT didn't return NULL"));
 107
 108         return gnp;
 109 }
 110
 111 int vfs_quota_enabled = 0;
 112 TUNABLE_INT("vfs.quota_enabled", &vfs_quota_enabled);
 113 SYSCTL_INT(_vfs, OID_AUTO, quota_enabled, CTLFLAG_RD,
 114                  &vfs_quota_enabled, 0, "Enable VFS quota");
 115
 116 /* initializes per mount-point data structures */
 117 void
 118 vq_init(struct mount *mp)
 119 {
 120
 121         if (!vfs_quota_enabled)
 122                 return;
 123
 124         /* initialize the rb trees */
 125         RB_INIT(&mp->mnt_acct.ac_uroot);
 126         RB_INIT(&mp->mnt_acct.ac_groot);
 127         spin_init(&mp->mnt_acct.ac_spin, "vqinit");
 128
 129         mp->mnt_acct.ac_bytes = 0;
 130
 131         /* enable data collection */
 132         mp->mnt_op->vfs_account = vfs_stdaccount;
 133         /* mark this filesystem quota enabled */
 134         mp->mnt_flag |= MNT_QUOTA;
 135         if (bootverbose)
 136                 kprintf("vfs accounting enabled for %s\n",
 137                     mp->mnt_stat.f_mntonname);
 138 }
 139
 140
 141 void
 142 vq_done(struct mount *mp)
 143 {
 144         /* TODO: remove the rb trees here */
 145 }
 146
 147 void
 148 vfs_stdaccount(struct mount *mp, uid_t uid, gid_t gid, int64_t delta)
 149 {
 150         struct ac_unode ufind, *unp;
 151         struct ac_gnode gfind, *gnp;
 152
 153         /* find or create address of chunk */
 154         ufind.left_bits = (uid >> ACCT_CHUNK_BITS);
 155         gfind.left_bits = (gid >> ACCT_CHUNK_BITS);
 156
 157         spin_lock(&mp->mnt_acct.ac_spin);
 158
 159         mp->mnt_acct.ac_bytes += delta;
 160
 161         if ((unp = RB_FIND(ac_utree, &mp->mnt_acct.ac_uroot, &ufind)) == NULL)
 162                 unp = unode_insert(mp, uid);
 163         if ((gnp = RB_FIND(ac_gtree, &mp->mnt_acct.ac_groot, &gfind)) == NULL)
 164                 gnp = gnode_insert(mp, gid);
 165
 166         /* update existing chunk */
 167         unp->uid_chunk[(uid & ACCT_CHUNK_MASK)].space += delta;
 168         gnp->gid_chunk[(gid & ACCT_CHUNK_MASK)].space += delta;
 169
 170         spin_unlock(&mp->mnt_acct.ac_spin);
 171 }
 172
 173 static void
 174 cmd_get_usage_all(struct mount *mp, prop_array_t dict_out)
 175 {
 176         struct ac_unode *unp;
 177         struct ac_gnode *gnp;
 178         int i;
 179         prop_dictionary_t item;
 180
 181         item = prop_dictionary_create();
 182         (void) prop_dictionary_set_uint64(item, "space used", mp->mnt_acct.ac_bytes);
 183         (void) prop_dictionary_set_uint64(item, "limit", mp->mnt_acct.ac_limit);
 184         prop_array_add_and_rel(dict_out, item);
 185
 186         RB_FOREACH(unp, ac_utree, &mp->mnt_acct.ac_uroot) {
 187                 for (i=0; i<ACCT_CHUNK_NIDS; i++) {
 188                         if (unp->uid_chunk[i].space != 0) {
 189                                 item = prop_dictionary_create();
 190                                 (void) prop_dictionary_set_uint32(item, "uid",
 191                                         (unp->left_bits << ACCT_CHUNK_BITS) + i);
 192                                 (void) prop_dictionary_set_uint64(item, "space used",
 193                                         unp->uid_chunk[i].space);
 194                                 (void) prop_dictionary_set_uint64(item, "limit",
 195                                         unp->uid_chunk[i].limit);
 196                                 prop_array_add_and_rel(dict_out, item);
 197                         }
 198                 }
 199         }
 200
 201         RB_FOREACH(gnp, ac_gtree, &mp->mnt_acct.ac_groot) {
 202                 for (i=0; i<ACCT_CHUNK_NIDS; i++) {
 203                         if (gnp->gid_chunk[i].space != 0) {
 204                                 item = prop_dictionary_create();
 205                                 (void) prop_dictionary_set_uint32(item, "gid",
 206                                         (gnp->left_bits << ACCT_CHUNK_BITS) + i);
 207                                 (void) prop_dictionary_set_uint64(item, "space used",
 208                                         gnp->gid_chunk[i].space);
 209                                 (void) prop_dictionary_set_uint64(item, "limit",
 210                                         gnp->gid_chunk[i].limit);
 211                                 prop_array_add_and_rel(dict_out, item);
 212                         }
 213                 }
 214         }
 215 }
 216
 217 static int
 218 cmd_set_usage_all(struct mount *mp, prop_array_t args)
 219 {
 220         struct ac_unode ufind, *unp;
 221         struct ac_gnode gfind, *gnp;
 222         prop_dictionary_t item;
 223         prop_object_iterator_t iter;
 224         uint32_t id;
 225         uint64_t space;
 226
 227         spin_lock(&mp->mnt_acct.ac_spin);
 228         /* 0. zero all statistics */
 229         /* we don't bother to free up memory, most of it would probably be
 230          * re-allocated immediately anyway. just bzeroing the existing nodes
 231          * is fine */
 232         mp->mnt_acct.ac_bytes = 0;
 233         RB_FOREACH(unp, ac_utree, &mp->mnt_acct.ac_uroot) {
 234                 bzero(&unp->uid_chunk, sizeof(unp->uid_chunk));
 235         }
 236         RB_FOREACH(gnp, ac_gtree, &mp->mnt_acct.ac_groot) {
 237                 bzero(&gnp->gid_chunk, sizeof(gnp->gid_chunk));
 238         }
 239
 240         /* args contains an array of dict */
 241         iter = prop_array_iterator(args);
 242         if (iter == NULL) {
 243                 kprintf("cmd_set_usage_all(): failed to create iterator\n");
 244                 spin_unlock(&mp->mnt_acct.ac_spin);
 245                 return 1;
 246         }
 247         while ((item = prop_object_iterator_next(iter)) != NULL) {
 248                 prop_dictionary_get_uint64(item, "space used", &space);
 249                 if (prop_dictionary_get_uint32(item, "uid", &id)) {
 250                         ufind.left_bits = (id >> ACCT_CHUNK_BITS);
 251                         unp = RB_FIND(ac_utree, &mp->mnt_acct.ac_uroot, &ufind);
 252                         if (unp == NULL)
 253                                 unp = unode_insert(mp, id);
 254                         unp->uid_chunk[(id & ACCT_CHUNK_MASK)].space = space;
 255                 } else if (prop_dictionary_get_uint32(item, "gid", &id)) {
 256                         gfind.left_bits = (id >> ACCT_CHUNK_BITS);
 257                         gnp = RB_FIND(ac_gtree, &mp->mnt_acct.ac_groot, &gfind);
 258                         if (gnp == NULL)
 259                                 gnp = gnode_insert(mp, id);
 260                         gnp->gid_chunk[(id & ACCT_CHUNK_MASK)].space = space;
 261                 } else {
 262                         mp->mnt_acct.ac_bytes = space;
 263                 }
 264         }
 265         prop_object_iterator_release(iter);
 266
 267         spin_unlock(&mp->mnt_acct.ac_spin);
 268         return 0;
 269 }
 270
 271 static int
 272 cmd_set_limit(struct mount *mp, prop_dictionary_t args)
 273 {
 274         uint64_t limit;
 275
 276         prop_dictionary_get_uint64(args, "limit", &limit);
 277
 278         spin_lock(&mp->mnt_acct.ac_spin);
 279         mp->mnt_acct.ac_limit = limit;
 280         spin_unlock(&mp->mnt_acct.ac_spin);
 281
 282         return 0;
 283 }
 284
 285 static int
 286 cmd_set_limit_uid(struct mount *mp, prop_dictionary_t args)
 287 {
 288         uint64_t limit;
 289         uid_t uid;
 290         struct ac_unode ufind, *unp;
 291
 292         prop_dictionary_get_uint32(args, "uid", &uid);
 293         prop_dictionary_get_uint64(args, "limit", &limit);
 294
 295         ufind.left_bits = (uid >> ACCT_CHUNK_BITS);
 296
 297         spin_lock(&mp->mnt_acct.ac_spin);
 298         if ((unp = RB_FIND(ac_utree, &mp->mnt_acct.ac_uroot, &ufind)) == NULL)
 299                 unp = unode_insert(mp, uid);
 300         unp->uid_chunk[(uid & ACCT_CHUNK_MASK)].limit = limit;
 301         spin_unlock(&mp->mnt_acct.ac_spin);
 302
 303         return 0;
 304 }
 305
 306 static int
 307 cmd_set_limit_gid(struct mount *mp, prop_dictionary_t args)
 308 {
 309         uint64_t limit;
 310         gid_t gid;
 311         struct ac_gnode gfind, *gnp;
 312
 313         prop_dictionary_get_uint32(args, "gid", &gid);
 314         prop_dictionary_get_uint64(args, "limit", &limit);
 315
 316         gfind.left_bits = (gid >> ACCT_CHUNK_BITS);
 317
 318         spin_lock(&mp->mnt_acct.ac_spin);
 319         if ((gnp = RB_FIND(ac_gtree, &mp->mnt_acct.ac_groot, &gfind)) == NULL)
 320                 gnp = gnode_insert(mp, gid);
 321         gnp->gid_chunk[(gid & ACCT_CHUNK_MASK)].limit = limit;
 322         spin_unlock(&mp->mnt_acct.ac_spin);
 323
 324         return 0;
 325 }
 326
 327 int
 328 sys_vquotactl(struct vquotactl_args *vqa)
 329 /* const char *path, struct plistref *pref */
 330 {
 331         struct nchandle nch;
 332         const char *path;
 333         struct plistref pref;
 334         prop_dictionary_t dict;
 335         prop_object_t args;
 336         char *cmd;
 337         prop_array_t pa_out;
 338         struct nlookupdata nd;
 339         int error;
 340
 341         if (!vfs_quota_enabled)
 342                 return EOPNOTSUPP;
 343         path = vqa->path;
 344         error = copyin(vqa->pref, &pref, sizeof(pref));
 345         error = prop_dictionary_copyin(&pref, &dict);
 346         if (error)
 347                 return(error);
 348
 349         /* we have a path, get its mount point */
 350         error = nlookup_init(&nd, path, UIO_USERSPACE, 0);
 351         if (error)
 352                 return (error);
 353         error = nlookup(&nd);
 354         if (error)
 355                 return (error);
 356         nch = nd.nl_nch;
 357         cache_zero(&nd.nl_nch);
 358         nlookup_done(&nd);
 359
 360         /* get the command */
 361         if (prop_dictionary_get_cstring(dict, "command", &cmd) == 0) {
 362                 kprintf("sys_vquotactl(): couldn't get command\n");
 363                 cache_put(&nch);
 364                 return EINVAL;
 365         }
 366         args = prop_dictionary_get(dict, "arguments");
 367         if (args == NULL) {
 368                 kprintf("couldn't get arguments\n");
 369                 cache_put(&nch);
 370                 return EINVAL;
 371         }
 372
 373         pa_out = prop_array_create();
 374         if (pa_out == NULL) {
 375                 cache_put(&nch);
 376                 return ENOMEM;
 377         }
 378
 379         if (strcmp(cmd, "get usage all") == 0) {
 380                 cmd_get_usage_all(nch.mount, pa_out);
 381                 goto done;
 382         }
 383         if (strcmp(cmd, "set usage all") == 0) {
 384                 error = cmd_set_usage_all(nch.mount, args);
 385                 goto done;
 386         }
 387         if (strcmp(cmd, "set limit") == 0) {
 388                 error = cmd_set_limit(nch.mount, args);
 389                 goto done;
 390         }
 391         if (strcmp(cmd, "set limit uid") == 0) {
 392                 error = cmd_set_limit_uid(nch.mount, args);
 393                 goto done;
 394         }
 395         if (strcmp(cmd, "set limit gid") == 0) {
 396                 error = cmd_set_limit_gid(nch.mount, args);
 397                 goto done;
 398         }
 399         cache_put(&nch);
 400         return EINVAL;
 401
 402 done:
 403         /* kernel to userland */
 404         dict = prop_dictionary_create();
 405         error = prop_dictionary_set(dict, "returned data", pa_out);
 406
 407         error = prop_dictionary_copyout(&pref, dict);
 408         error = copyout(&pref, vqa->pref, sizeof(pref));
 409         cache_put(&nch);
 410
 411         return error;
 412 }
 413
 414 /*
 415  * Returns a valid mount point for accounting purposes
 416  * We cannot simply use vp->v_mount if the vnode belongs
 417  * to a PFS mount point
 418  */
 419 struct mount*
 420 vq_vptomp(struct vnode *vp)
 421 {
 422         /* XXX: vp->v_pfsmp may point to a freed structure
 423         * we use mountlist_exists() to check if it is valid
 424         * before using it */
 425         if ((vp->v_pfsmp != NULL) && (mountlist_exists(vp->v_pfsmp))) {
 426                 /* This is a PFS, use a copy of the real mp */
 427                 return vp->v_pfsmp;
 428         } else {
 429                 /* Not a PFS or a PFS beeing unmounted */
 430                 return vp->v_mount;
 431         }
 432 }
 433
 434 int
 435 vq_write_ok(struct mount *mp, uid_t uid, gid_t gid, uint64_t delta)
 436 {
 437         int rv = 1;
 438         struct ac_unode ufind, *unp;
 439         struct ac_gnode gfind, *gnp;
 440         uint64_t space, limit;
 441
 442         spin_lock(&mp->mnt_acct.ac_spin);
 443
 444         if (mp->mnt_acct.ac_limit == 0)
 445                 goto check_uid;
 446         if ((mp->mnt_acct.ac_bytes + delta) > mp->mnt_acct.ac_limit) {
 447                 rv = 0;
 448                 goto done;
 449         }
 450
 451 check_uid:
 452         ufind.left_bits = (uid >> ACCT_CHUNK_BITS);
 453         if ((unp = RB_FIND(ac_utree, &mp->mnt_acct.ac_uroot, &ufind)) == NULL) {
 454                 space = 0;
 455                 limit = 0;
 456         } else {
 457                 space = unp->uid_chunk[(uid & ACCT_CHUNK_MASK)].space;
 458                 limit = unp->uid_chunk[(uid & ACCT_CHUNK_MASK)].limit;
 459         }
 460         if (limit == 0)
 461                 goto check_gid;
 462         if ((space + delta) > limit) {
 463                 rv = 0;
 464                 goto done;
 465         }
 466
 467 check_gid:
 468         gfind.left_bits = (gid >> ACCT_CHUNK_BITS);
 469         if ((gnp = RB_FIND(ac_gtree, &mp->mnt_acct.ac_groot, &gfind)) == NULL) {
 470                 space = 0;
 471                 limit = 0;
 472         } else {
 473                 space = gnp->gid_chunk[(gid & ACCT_CHUNK_MASK)].space;
 474                 limit = gnp->gid_chunk[(gid & ACCT_CHUNK_MASK)].limit;
 475         }
 476         if (limit == 0)
 477                 goto done;
 478         if ((space + delta) > limit)
 479                 rv = 0;
 480
 481 done:
 482         spin_unlock(&mp->mnt_acct.ac_spin);
 483         return rv;
 484 }