usr.sbin/makefs/ffs: Remove m_buf::b_is_hammer2
[dragonfly.git] / sys / kern / vfs_quota.c
blob70103ed864c7baa6ffb68e5ae6a7a8cf0d7cf1ab
1 /*
2 * Copyright (c) 2011,2012 François Tigeot <ftigeot@wolpond.org>
3 * All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
14 * distribution.
15 * 3. Neither the name of The DragonFly Project nor the names of its
16 * contributors may be used to endorse or promote products derived
17 * from this software without specific, prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
33 #include <sys/sysctl.h>
34 #include <sys/malloc.h>
35 #include <sys/mount.h>
36 #include <sys/systm.h>
37 #include <sys/nlookup.h>
38 #include <sys/vnode.h>
39 #include <sys/stat.h>
40 #include <sys/vfs_quota.h>
41 #include <sys/spinlock.h>
42 #include <sys/spinlock2.h>
44 #include <sys/sysmsg.h>
45 #include <libprop/proplib.h>
46 #include <libprop/prop_dictionary.h>
48 /* in-memory accounting, red-black tree based */
49 /* FIXME: code duplication caused by uid_t / gid_t differences */
50 RB_PROTOTYPE(ac_utree, ac_unode, rb_entry, rb_ac_unode_cmp);
51 RB_PROTOTYPE(ac_gtree, ac_gnode, rb_entry, rb_ac_gnode_cmp);
53 static int
54 rb_ac_unode_cmp(struct ac_unode *a, struct ac_unode *b);
55 static int
56 rb_ac_gnode_cmp(struct ac_gnode *a, struct ac_gnode *b);
58 RB_GENERATE(ac_utree, ac_unode, rb_entry, rb_ac_unode_cmp);
59 RB_GENERATE(ac_gtree, ac_gnode, rb_entry, rb_ac_gnode_cmp);
61 struct ac_unode* unode_insert(struct mount*, uid_t);
62 struct ac_gnode* gnode_insert(struct mount*, gid_t);
64 static int
65 rb_ac_unode_cmp(struct ac_unode *a, struct ac_unode *b)
67 if (a->left_bits < b->left_bits)
68 return(-1);
69 else if (a->left_bits > b->left_bits)
70 return(1);
71 return(0);
74 static int
75 rb_ac_gnode_cmp(struct ac_gnode *a, struct ac_gnode *b)
77 if (a->left_bits < b->left_bits)
78 return(-1);
79 else if (a->left_bits > b->left_bits)
80 return(1);
81 return(0);
84 struct ac_unode*
85 unode_insert(struct mount *mp, uid_t uid)
87 struct ac_unode *unp, *res;
89 unp = kmalloc(sizeof(struct ac_unode), M_MOUNT, M_ZERO | M_WAITOK);
91 unp->left_bits = (uid >> ACCT_CHUNK_BITS);
92 res = RB_INSERT(ac_utree, &mp->mnt_acct.ac_uroot, unp);
93 KASSERT(res == NULL, ("unode_insert(): RB_INSERT didn't return NULL"));
95 return unp;
98 struct ac_gnode*
99 gnode_insert(struct mount *mp, gid_t gid)
101 struct ac_gnode *gnp, *res;
103 gnp = kmalloc(sizeof(struct ac_gnode), M_MOUNT, M_ZERO | M_WAITOK);
105 gnp->left_bits = (gid >> ACCT_CHUNK_BITS);
106 res = RB_INSERT(ac_gtree, &mp->mnt_acct.ac_groot, gnp);
107 KASSERT(res == NULL, ("gnode_insert(): RB_INSERT didn't return NULL"));
109 return gnp;
112 int vfs_quota_enabled = 0;
113 TUNABLE_INT("vfs.quota_enabled", &vfs_quota_enabled);
114 SYSCTL_INT(_vfs, OID_AUTO, quota_enabled, CTLFLAG_RD,
115 &vfs_quota_enabled, 0, "Enable VFS quota");
117 /* initializes per mount-point data structures */
118 void
119 vq_init(struct mount *mp)
122 if (!vfs_quota_enabled)
123 return;
125 /* initialize the rb trees */
126 RB_INIT(&mp->mnt_acct.ac_uroot);
127 RB_INIT(&mp->mnt_acct.ac_groot);
128 spin_init(&mp->mnt_acct.ac_spin, "vqinit");
130 mp->mnt_acct.ac_bytes = 0;
132 /* enable data collection */
133 mp->mnt_op->vfs_account = vfs_stdaccount;
134 /* mark this filesystem quota enabled */
135 mp->mnt_flag |= MNT_QUOTA;
136 if (bootverbose)
137 kprintf("vfs accounting enabled for %s\n",
138 mp->mnt_stat.f_mntonname);
142 void
143 vq_done(struct mount *mp)
145 /* TODO: remove the rb trees here */
148 void
149 vfs_stdaccount(struct mount *mp, uid_t uid, gid_t gid, int64_t delta)
151 struct ac_unode ufind, *unp;
152 struct ac_gnode gfind, *gnp;
154 /* find or create address of chunk */
155 ufind.left_bits = (uid >> ACCT_CHUNK_BITS);
156 gfind.left_bits = (gid >> ACCT_CHUNK_BITS);
158 spin_lock(&mp->mnt_acct.ac_spin);
160 mp->mnt_acct.ac_bytes += delta;
162 if ((unp = RB_FIND(ac_utree, &mp->mnt_acct.ac_uroot, &ufind)) == NULL)
163 unp = unode_insert(mp, uid);
164 if ((gnp = RB_FIND(ac_gtree, &mp->mnt_acct.ac_groot, &gfind)) == NULL)
165 gnp = gnode_insert(mp, gid);
167 /* update existing chunk */
168 unp->uid_chunk[(uid & ACCT_CHUNK_MASK)].space += delta;
169 gnp->gid_chunk[(gid & ACCT_CHUNK_MASK)].space += delta;
171 spin_unlock(&mp->mnt_acct.ac_spin);
174 static void
175 cmd_get_usage_all(struct mount *mp, prop_array_t dict_out)
177 struct ac_unode *unp;
178 struct ac_gnode *gnp;
179 int i;
180 prop_dictionary_t item;
182 item = prop_dictionary_create();
183 (void) prop_dictionary_set_uint64(item, "space used", mp->mnt_acct.ac_bytes);
184 (void) prop_dictionary_set_uint64(item, "limit", mp->mnt_acct.ac_limit);
185 prop_array_add_and_rel(dict_out, item);
187 RB_FOREACH(unp, ac_utree, &mp->mnt_acct.ac_uroot) {
188 for (i=0; i<ACCT_CHUNK_NIDS; i++) {
189 if (unp->uid_chunk[i].space != 0) {
190 item = prop_dictionary_create();
191 (void) prop_dictionary_set_uint32(item, "uid",
192 (unp->left_bits << ACCT_CHUNK_BITS) + i);
193 (void) prop_dictionary_set_uint64(item, "space used",
194 unp->uid_chunk[i].space);
195 (void) prop_dictionary_set_uint64(item, "limit",
196 unp->uid_chunk[i].limit);
197 prop_array_add_and_rel(dict_out, item);
202 RB_FOREACH(gnp, ac_gtree, &mp->mnt_acct.ac_groot) {
203 for (i=0; i<ACCT_CHUNK_NIDS; i++) {
204 if (gnp->gid_chunk[i].space != 0) {
205 item = prop_dictionary_create();
206 (void) prop_dictionary_set_uint32(item, "gid",
207 (gnp->left_bits << ACCT_CHUNK_BITS) + i);
208 (void) prop_dictionary_set_uint64(item, "space used",
209 gnp->gid_chunk[i].space);
210 (void) prop_dictionary_set_uint64(item, "limit",
211 gnp->gid_chunk[i].limit);
212 prop_array_add_and_rel(dict_out, item);
218 static int
219 cmd_set_usage_all(struct mount *mp, prop_array_t args)
221 struct ac_unode ufind, *unp;
222 struct ac_gnode gfind, *gnp;
223 prop_dictionary_t item;
224 prop_object_iterator_t iter;
225 uint32_t id;
226 uint64_t space;
228 spin_lock(&mp->mnt_acct.ac_spin);
229 /* 0. zero all statistics */
230 /* we don't bother to free up memory, most of it would probably be
231 * re-allocated immediately anyway. just bzeroing the existing nodes
232 * is fine */
233 mp->mnt_acct.ac_bytes = 0;
234 RB_FOREACH(unp, ac_utree, &mp->mnt_acct.ac_uroot) {
235 bzero(&unp->uid_chunk, sizeof(unp->uid_chunk));
237 RB_FOREACH(gnp, ac_gtree, &mp->mnt_acct.ac_groot) {
238 bzero(&gnp->gid_chunk, sizeof(gnp->gid_chunk));
241 /* args contains an array of dict */
242 iter = prop_array_iterator(args);
243 if (iter == NULL) {
244 kprintf("cmd_set_usage_all(): failed to create iterator\n");
245 spin_unlock(&mp->mnt_acct.ac_spin);
246 return 1;
248 while ((item = prop_object_iterator_next(iter)) != NULL) {
249 prop_dictionary_get_uint64(item, "space used", &space);
250 if (prop_dictionary_get_uint32(item, "uid", &id)) {
251 ufind.left_bits = (id >> ACCT_CHUNK_BITS);
252 unp = RB_FIND(ac_utree, &mp->mnt_acct.ac_uroot, &ufind);
253 if (unp == NULL)
254 unp = unode_insert(mp, id);
255 unp->uid_chunk[(id & ACCT_CHUNK_MASK)].space = space;
256 } else if (prop_dictionary_get_uint32(item, "gid", &id)) {
257 gfind.left_bits = (id >> ACCT_CHUNK_BITS);
258 gnp = RB_FIND(ac_gtree, &mp->mnt_acct.ac_groot, &gfind);
259 if (gnp == NULL)
260 gnp = gnode_insert(mp, id);
261 gnp->gid_chunk[(id & ACCT_CHUNK_MASK)].space = space;
262 } else {
263 mp->mnt_acct.ac_bytes = space;
266 prop_object_iterator_release(iter);
268 spin_unlock(&mp->mnt_acct.ac_spin);
269 return 0;
272 static int
273 cmd_set_limit(struct mount *mp, prop_dictionary_t args)
275 uint64_t limit;
277 prop_dictionary_get_uint64(args, "limit", &limit);
279 spin_lock(&mp->mnt_acct.ac_spin);
280 mp->mnt_acct.ac_limit = limit;
281 spin_unlock(&mp->mnt_acct.ac_spin);
283 return 0;
286 static int
287 cmd_set_limit_uid(struct mount *mp, prop_dictionary_t args)
289 uint64_t limit;
290 uid_t uid;
291 struct ac_unode ufind, *unp;
293 prop_dictionary_get_uint32(args, "uid", &uid);
294 prop_dictionary_get_uint64(args, "limit", &limit);
296 ufind.left_bits = (uid >> ACCT_CHUNK_BITS);
298 spin_lock(&mp->mnt_acct.ac_spin);
299 if ((unp = RB_FIND(ac_utree, &mp->mnt_acct.ac_uroot, &ufind)) == NULL)
300 unp = unode_insert(mp, uid);
301 unp->uid_chunk[(uid & ACCT_CHUNK_MASK)].limit = limit;
302 spin_unlock(&mp->mnt_acct.ac_spin);
304 return 0;
307 static int
308 cmd_set_limit_gid(struct mount *mp, prop_dictionary_t args)
310 uint64_t limit;
311 gid_t gid;
312 struct ac_gnode gfind, *gnp;
314 prop_dictionary_get_uint32(args, "gid", &gid);
315 prop_dictionary_get_uint64(args, "limit", &limit);
317 gfind.left_bits = (gid >> ACCT_CHUNK_BITS);
319 spin_lock(&mp->mnt_acct.ac_spin);
320 if ((gnp = RB_FIND(ac_gtree, &mp->mnt_acct.ac_groot, &gfind)) == NULL)
321 gnp = gnode_insert(mp, gid);
322 gnp->gid_chunk[(gid & ACCT_CHUNK_MASK)].limit = limit;
323 spin_unlock(&mp->mnt_acct.ac_spin);
325 return 0;
329 sys_vquotactl(struct sysmsg *sysmsg, const struct vquotactl_args *vqa)
330 /* const char *path, struct plistref *pref */
332 struct nchandle nch;
333 const char *path;
334 struct plistref pref;
335 prop_dictionary_t dict;
336 prop_object_t args;
337 char *cmd;
338 prop_array_t pa_out;
339 struct nlookupdata nd;
340 int error;
342 if (!vfs_quota_enabled)
343 return EOPNOTSUPP;
344 path = vqa->path;
345 error = copyin(vqa->pref, &pref, sizeof(pref));
346 error = prop_dictionary_copyin(&pref, &dict);
347 if (error)
348 return(error);
350 /* we have a path, get its mount point */
351 error = nlookup_init(&nd, path, UIO_USERSPACE, 0);
352 if (error)
353 return (error);
354 error = nlookup(&nd);
355 if (error)
356 return (error);
357 nch = nd.nl_nch;
358 cache_zero(&nd.nl_nch);
359 nlookup_done(&nd);
361 /* get the command */
362 if (prop_dictionary_get_cstring(dict, "command", &cmd) == 0) {
363 kprintf("sys_vquotactl(): couldn't get command\n");
364 cache_put(&nch);
365 return EINVAL;
367 args = prop_dictionary_get(dict, "arguments");
368 if (args == NULL) {
369 kprintf("couldn't get arguments\n");
370 cache_put(&nch);
371 return EINVAL;
374 pa_out = prop_array_create();
375 if (pa_out == NULL) {
376 cache_put(&nch);
377 return ENOMEM;
380 if (strcmp(cmd, "get usage all") == 0) {
381 cmd_get_usage_all(nch.mount, pa_out);
382 goto done;
384 if (strcmp(cmd, "set usage all") == 0) {
385 error = cmd_set_usage_all(nch.mount, args);
386 goto done;
388 if (strcmp(cmd, "set limit") == 0) {
389 error = cmd_set_limit(nch.mount, args);
390 goto done;
392 if (strcmp(cmd, "set limit uid") == 0) {
393 error = cmd_set_limit_uid(nch.mount, args);
394 goto done;
396 if (strcmp(cmd, "set limit gid") == 0) {
397 error = cmd_set_limit_gid(nch.mount, args);
398 goto done;
400 cache_put(&nch);
401 return EINVAL;
403 done:
404 /* kernel to userland */
405 dict = prop_dictionary_create();
406 error = prop_dictionary_set(dict, "returned data", pa_out);
408 error = prop_dictionary_copyout(&pref, dict);
409 error = copyout(&pref, vqa->pref, sizeof(pref));
410 cache_put(&nch);
412 return error;
416 * Returns a valid mount point for accounting purposes
417 * We cannot simply use vp->v_mount if the vnode belongs
418 * to a PFS mount point
420 struct mount*
421 vq_vptomp(struct vnode *vp)
423 /* XXX: vp->v_pfsmp may point to a freed structure
424 * we use mountlist_exists() to check if it is valid
425 * before using it */
426 if ((vp->v_pfsmp != NULL) && (mountlist_exists(vp->v_pfsmp))) {
427 /* This is a PFS, use a copy of the real mp */
428 return vp->v_pfsmp;
429 } else {
430 /* Not a PFS or a PFS beeing unmounted */
431 return vp->v_mount;
436 vq_write_ok(struct mount *mp, uid_t uid, gid_t gid, uint64_t delta)
438 int rv = 1;
439 struct ac_unode ufind, *unp;
440 struct ac_gnode gfind, *gnp;
441 uint64_t space, limit;
443 spin_lock(&mp->mnt_acct.ac_spin);
445 if (mp->mnt_acct.ac_limit == 0)
446 goto check_uid;
447 if ((mp->mnt_acct.ac_bytes + delta) > mp->mnt_acct.ac_limit) {
448 rv = 0;
449 goto done;
452 check_uid:
453 ufind.left_bits = (uid >> ACCT_CHUNK_BITS);
454 if ((unp = RB_FIND(ac_utree, &mp->mnt_acct.ac_uroot, &ufind)) == NULL) {
455 space = 0;
456 limit = 0;
457 } else {
458 space = unp->uid_chunk[(uid & ACCT_CHUNK_MASK)].space;
459 limit = unp->uid_chunk[(uid & ACCT_CHUNK_MASK)].limit;
461 if (limit == 0)
462 goto check_gid;
463 if ((space + delta) > limit) {
464 rv = 0;
465 goto done;
468 check_gid:
469 gfind.left_bits = (gid >> ACCT_CHUNK_BITS);
470 if ((gnp = RB_FIND(ac_gtree, &mp->mnt_acct.ac_groot, &gfind)) == NULL) {
471 space = 0;
472 limit = 0;
473 } else {
474 space = gnp->gid_chunk[(gid & ACCT_CHUNK_MASK)].space;
475 limit = gnp->gid_chunk[(gid & ACCT_CHUNK_MASK)].limit;
477 if (limit == 0)
478 goto done;
479 if ((space + delta) > limit)
480 rv = 0;
482 done:
483 spin_unlock(&mp->mnt_acct.ac_spin);
484 return rv;