2 * Copyright (c) 2011,2012 François Tigeot <ftigeot@wolpond.org>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
15 * 3. Neither the name of The DragonFly Project nor the names of its
16 * contributors may be used to endorse or promote products derived
17 * from this software without specific, prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 #include <sys/sysctl.h>
34 #include <sys/mount.h>
35 #include <sys/systm.h>
36 #include <sys/nlookup.h>
37 #include <sys/vnode.h>
39 #include <sys/vfs_quota.h>
40 #include <sys/spinlock.h>
41 #include <sys/spinlock2.h>
43 #include <sys/sysproto.h>
44 #include <libprop/proplib.h>
45 #include <libprop/prop_dictionary.h>
47 /* in-memory accounting, red-black tree based */
48 /* FIXME: code duplication caused by uid_t / gid_t differences */
49 RB_PROTOTYPE(ac_utree
, ac_unode
, rb_entry
, rb_ac_unode_cmp
);
50 RB_PROTOTYPE(ac_gtree
, ac_gnode
, rb_entry
, rb_ac_gnode_cmp
);
53 rb_ac_unode_cmp(struct ac_unode
*a
, struct ac_unode
*b
);
55 rb_ac_gnode_cmp(struct ac_gnode
*a
, struct ac_gnode
*b
);
57 RB_GENERATE(ac_utree
, ac_unode
, rb_entry
, rb_ac_unode_cmp
);
58 RB_GENERATE(ac_gtree
, ac_gnode
, rb_entry
, rb_ac_gnode_cmp
);
60 struct ac_unode
* unode_insert(struct mount
*, uid_t
);
61 struct ac_gnode
* gnode_insert(struct mount
*, gid_t
);
64 rb_ac_unode_cmp(struct ac_unode
*a
, struct ac_unode
*b
)
66 if (a
->left_bits
< b
->left_bits
)
68 else if (a
->left_bits
> b
->left_bits
)
74 rb_ac_gnode_cmp(struct ac_gnode
*a
, struct ac_gnode
*b
)
76 if (a
->left_bits
< b
->left_bits
)
78 else if (a
->left_bits
> b
->left_bits
)
84 unode_insert(struct mount
*mp
, uid_t uid
)
86 struct ac_unode
*unp
, *res
;
88 unp
= kmalloc(sizeof(struct ac_unode
), M_MOUNT
, M_ZERO
| M_WAITOK
);
90 unp
->left_bits
= (uid
>> ACCT_CHUNK_BITS
);
91 res
= RB_INSERT(ac_utree
, &mp
->mnt_acct
.ac_uroot
, unp
);
92 KASSERT(res
== NULL
, ("unode_insert(): RB_INSERT didn't return NULL"));
98 gnode_insert(struct mount
*mp
, gid_t gid
)
100 struct ac_gnode
*gnp
, *res
;
102 gnp
= kmalloc(sizeof(struct ac_gnode
), M_MOUNT
, M_ZERO
| M_WAITOK
);
104 gnp
->left_bits
= (gid
>> ACCT_CHUNK_BITS
);
105 res
= RB_INSERT(ac_gtree
, &mp
->mnt_acct
.ac_groot
, gnp
);
106 KASSERT(res
== NULL
, ("gnode_insert(): RB_INSERT didn't return NULL"));
111 int vfs_quota_enabled
= 0;
112 TUNABLE_INT("vfs.quota_enabled", &vfs_quota_enabled
);
113 SYSCTL_INT(_vfs
, OID_AUTO
, quota_enabled
, CTLFLAG_RD
,
114 &vfs_quota_enabled
, 0, "Enable VFS quota");
116 /* initializes per mount-point data structures */
118 vq_init(struct mount
*mp
)
121 if (!vfs_quota_enabled
)
124 /* initialize the rb trees */
125 RB_INIT(&mp
->mnt_acct
.ac_uroot
);
126 RB_INIT(&mp
->mnt_acct
.ac_groot
);
127 spin_init(&mp
->mnt_acct
.ac_spin
, "vqinit");
129 mp
->mnt_acct
.ac_bytes
= 0;
131 /* enable data collection */
132 mp
->mnt_op
->vfs_account
= vfs_stdaccount
;
133 /* mark this filesystem quota enabled */
134 mp
->mnt_flag
|= MNT_QUOTA
;
136 kprintf("vfs accounting enabled for %s\n",
137 mp
->mnt_stat
.f_mntonname
);
142 vq_done(struct mount
*mp
)
144 /* TODO: remove the rb trees here */
148 vfs_stdaccount(struct mount
*mp
, uid_t uid
, gid_t gid
, int64_t delta
)
150 struct ac_unode ufind
, *unp
;
151 struct ac_gnode gfind
, *gnp
;
153 /* find or create address of chunk */
154 ufind
.left_bits
= (uid
>> ACCT_CHUNK_BITS
);
155 gfind
.left_bits
= (gid
>> ACCT_CHUNK_BITS
);
157 spin_lock(&mp
->mnt_acct
.ac_spin
);
159 mp
->mnt_acct
.ac_bytes
+= delta
;
161 if ((unp
= RB_FIND(ac_utree
, &mp
->mnt_acct
.ac_uroot
, &ufind
)) == NULL
)
162 unp
= unode_insert(mp
, uid
);
163 if ((gnp
= RB_FIND(ac_gtree
, &mp
->mnt_acct
.ac_groot
, &gfind
)) == NULL
)
164 gnp
= gnode_insert(mp
, gid
);
166 /* update existing chunk */
167 unp
->uid_chunk
[(uid
& ACCT_CHUNK_MASK
)].space
+= delta
;
168 gnp
->gid_chunk
[(gid
& ACCT_CHUNK_MASK
)].space
+= delta
;
170 spin_unlock(&mp
->mnt_acct
.ac_spin
);
174 cmd_get_usage_all(struct mount
*mp
, prop_array_t dict_out
)
176 struct ac_unode
*unp
;
177 struct ac_gnode
*gnp
;
179 prop_dictionary_t item
;
181 item
= prop_dictionary_create();
182 (void) prop_dictionary_set_uint64(item
, "space used", mp
->mnt_acct
.ac_bytes
);
183 (void) prop_dictionary_set_uint64(item
, "limit", mp
->mnt_acct
.ac_limit
);
184 prop_array_add_and_rel(dict_out
, item
);
186 RB_FOREACH(unp
, ac_utree
, &mp
->mnt_acct
.ac_uroot
) {
187 for (i
=0; i
<ACCT_CHUNK_NIDS
; i
++) {
188 if (unp
->uid_chunk
[i
].space
!= 0) {
189 item
= prop_dictionary_create();
190 (void) prop_dictionary_set_uint32(item
, "uid",
191 (unp
->left_bits
<< ACCT_CHUNK_BITS
) + i
);
192 (void) prop_dictionary_set_uint64(item
, "space used",
193 unp
->uid_chunk
[i
].space
);
194 (void) prop_dictionary_set_uint64(item
, "limit",
195 unp
->uid_chunk
[i
].limit
);
196 prop_array_add_and_rel(dict_out
, item
);
201 RB_FOREACH(gnp
, ac_gtree
, &mp
->mnt_acct
.ac_groot
) {
202 for (i
=0; i
<ACCT_CHUNK_NIDS
; i
++) {
203 if (gnp
->gid_chunk
[i
].space
!= 0) {
204 item
= prop_dictionary_create();
205 (void) prop_dictionary_set_uint32(item
, "gid",
206 (gnp
->left_bits
<< ACCT_CHUNK_BITS
) + i
);
207 (void) prop_dictionary_set_uint64(item
, "space used",
208 gnp
->gid_chunk
[i
].space
);
209 (void) prop_dictionary_set_uint64(item
, "limit",
210 gnp
->gid_chunk
[i
].limit
);
211 prop_array_add_and_rel(dict_out
, item
);
218 cmd_set_usage_all(struct mount
*mp
, prop_array_t args
)
220 struct ac_unode ufind
, *unp
;
221 struct ac_gnode gfind
, *gnp
;
222 prop_dictionary_t item
;
223 prop_object_iterator_t iter
;
227 spin_lock(&mp
->mnt_acct
.ac_spin
);
228 /* 0. zero all statistics */
229 /* we don't bother to free up memory, most of it would probably be
230 * re-allocated immediately anyway. just bzeroing the existing nodes
232 mp
->mnt_acct
.ac_bytes
= 0;
233 RB_FOREACH(unp
, ac_utree
, &mp
->mnt_acct
.ac_uroot
) {
234 bzero(&unp
->uid_chunk
, sizeof(unp
->uid_chunk
));
236 RB_FOREACH(gnp
, ac_gtree
, &mp
->mnt_acct
.ac_groot
) {
237 bzero(&gnp
->gid_chunk
, sizeof(gnp
->gid_chunk
));
240 /* args contains an array of dict */
241 iter
= prop_array_iterator(args
);
243 kprintf("cmd_set_usage_all(): failed to create iterator\n");
244 spin_unlock(&mp
->mnt_acct
.ac_spin
);
247 while ((item
= prop_object_iterator_next(iter
)) != NULL
) {
248 prop_dictionary_get_uint64(item
, "space used", &space
);
249 if (prop_dictionary_get_uint32(item
, "uid", &id
)) {
250 ufind
.left_bits
= (id
>> ACCT_CHUNK_BITS
);
251 unp
= RB_FIND(ac_utree
, &mp
->mnt_acct
.ac_uroot
, &ufind
);
253 unp
= unode_insert(mp
, id
);
254 unp
->uid_chunk
[(id
& ACCT_CHUNK_MASK
)].space
= space
;
255 } else if (prop_dictionary_get_uint32(item
, "gid", &id
)) {
256 gfind
.left_bits
= (id
>> ACCT_CHUNK_BITS
);
257 gnp
= RB_FIND(ac_gtree
, &mp
->mnt_acct
.ac_groot
, &gfind
);
259 gnp
= gnode_insert(mp
, id
);
260 gnp
->gid_chunk
[(id
& ACCT_CHUNK_MASK
)].space
= space
;
262 mp
->mnt_acct
.ac_bytes
= space
;
265 prop_object_iterator_release(iter
);
267 spin_unlock(&mp
->mnt_acct
.ac_spin
);
272 cmd_set_limit(struct mount
*mp
, prop_dictionary_t args
)
276 prop_dictionary_get_uint64(args
, "limit", &limit
);
278 spin_lock(&mp
->mnt_acct
.ac_spin
);
279 mp
->mnt_acct
.ac_limit
= limit
;
280 spin_unlock(&mp
->mnt_acct
.ac_spin
);
286 cmd_set_limit_uid(struct mount
*mp
, prop_dictionary_t args
)
290 struct ac_unode ufind
, *unp
;
292 prop_dictionary_get_uint32(args
, "uid", &uid
);
293 prop_dictionary_get_uint64(args
, "limit", &limit
);
295 ufind
.left_bits
= (uid
>> ACCT_CHUNK_BITS
);
297 spin_lock(&mp
->mnt_acct
.ac_spin
);
298 if ((unp
= RB_FIND(ac_utree
, &mp
->mnt_acct
.ac_uroot
, &ufind
)) == NULL
)
299 unp
= unode_insert(mp
, uid
);
300 unp
->uid_chunk
[(uid
& ACCT_CHUNK_MASK
)].limit
= limit
;
301 spin_unlock(&mp
->mnt_acct
.ac_spin
);
307 cmd_set_limit_gid(struct mount
*mp
, prop_dictionary_t args
)
311 struct ac_gnode gfind
, *gnp
;
313 prop_dictionary_get_uint32(args
, "gid", &gid
);
314 prop_dictionary_get_uint64(args
, "limit", &limit
);
316 gfind
.left_bits
= (gid
>> ACCT_CHUNK_BITS
);
318 spin_lock(&mp
->mnt_acct
.ac_spin
);
319 if ((gnp
= RB_FIND(ac_gtree
, &mp
->mnt_acct
.ac_groot
, &gfind
)) == NULL
)
320 gnp
= gnode_insert(mp
, gid
);
321 gnp
->gid_chunk
[(gid
& ACCT_CHUNK_MASK
)].limit
= limit
;
322 spin_unlock(&mp
->mnt_acct
.ac_spin
);
328 sys_vquotactl(struct vquotactl_args
*vqa
)
329 /* const char *path, struct plistref *pref */
333 struct plistref pref
;
334 prop_dictionary_t dict
;
338 struct nlookupdata nd
;
341 if (!vfs_quota_enabled
)
344 error
= copyin(vqa
->pref
, &pref
, sizeof(pref
));
345 error
= prop_dictionary_copyin(&pref
, &dict
);
349 /* we have a path, get its mount point */
350 error
= nlookup_init(&nd
, path
, UIO_USERSPACE
, 0);
353 error
= nlookup(&nd
);
357 cache_zero(&nd
.nl_nch
);
360 /* get the command */
361 if (prop_dictionary_get_cstring(dict
, "command", &cmd
) == 0) {
362 kprintf("sys_vquotactl(): couldn't get command\n");
366 args
= prop_dictionary_get(dict
, "arguments");
368 kprintf("couldn't get arguments\n");
373 pa_out
= prop_array_create();
374 if (pa_out
== NULL
) {
379 if (strcmp(cmd
, "get usage all") == 0) {
380 cmd_get_usage_all(nch
.mount
, pa_out
);
383 if (strcmp(cmd
, "set usage all") == 0) {
384 error
= cmd_set_usage_all(nch
.mount
, args
);
387 if (strcmp(cmd
, "set limit") == 0) {
388 error
= cmd_set_limit(nch
.mount
, args
);
391 if (strcmp(cmd
, "set limit uid") == 0) {
392 error
= cmd_set_limit_uid(nch
.mount
, args
);
395 if (strcmp(cmd
, "set limit gid") == 0) {
396 error
= cmd_set_limit_gid(nch
.mount
, args
);
403 /* kernel to userland */
404 dict
= prop_dictionary_create();
405 error
= prop_dictionary_set(dict
, "returned data", pa_out
);
407 error
= prop_dictionary_copyout(&pref
, dict
);
408 error
= copyout(&pref
, vqa
->pref
, sizeof(pref
));
415 * Returns a valid mount point for accounting purposes
416 * We cannot simply use vp->v_mount if the vnode belongs
417 * to a PFS mount point
420 vq_vptomp(struct vnode
*vp
)
422 /* XXX: vp->v_pfsmp may point to a freed structure
423 * we use mountlist_exists() to check if it is valid
425 if ((vp
->v_pfsmp
!= NULL
) && (mountlist_exists(vp
->v_pfsmp
))) {
426 /* This is a PFS, use a copy of the real mp */
429 /* Not a PFS or a PFS beeing unmounted */
435 vq_write_ok(struct mount
*mp
, uid_t uid
, gid_t gid
, uint64_t delta
)
438 struct ac_unode ufind
, *unp
;
439 struct ac_gnode gfind
, *gnp
;
440 uint64_t space
, limit
;
442 spin_lock(&mp
->mnt_acct
.ac_spin
);
444 if (mp
->mnt_acct
.ac_limit
== 0)
446 if ((mp
->mnt_acct
.ac_bytes
+ delta
) > mp
->mnt_acct
.ac_limit
) {
452 ufind
.left_bits
= (uid
>> ACCT_CHUNK_BITS
);
453 if ((unp
= RB_FIND(ac_utree
, &mp
->mnt_acct
.ac_uroot
, &ufind
)) == NULL
) {
457 space
= unp
->uid_chunk
[(uid
& ACCT_CHUNK_MASK
)].space
;
458 limit
= unp
->uid_chunk
[(uid
& ACCT_CHUNK_MASK
)].limit
;
462 if ((space
+ delta
) > limit
) {
468 gfind
.left_bits
= (gid
>> ACCT_CHUNK_BITS
);
469 if ((gnp
= RB_FIND(ac_gtree
, &mp
->mnt_acct
.ac_groot
, &gfind
)) == NULL
) {
473 space
= gnp
->gid_chunk
[(gid
& ACCT_CHUNK_MASK
)].space
;
474 limit
= gnp
->gid_chunk
[(gid
& ACCT_CHUNK_MASK
)].limit
;
478 if ((space
+ delta
) > limit
)
482 spin_unlock(&mp
->mnt_acct
.ac_spin
);