2 * Copyright (c) 2008 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/vfs/hammer/hammer_ioctl.c,v 1.2 2008/02/05 07:58:43 dillon Exp $
39 static int hammer_ioc_prune(hammer_inode_t ip
,
40 struct hammer_ioc_prune
*prune
);
41 static int hammer_ioc_gethistory(hammer_inode_t ip
,
42 struct hammer_ioc_history
*hist
);
45 hammer_ioctl(hammer_inode_t ip
, u_long com
, caddr_t data
, int fflag
,
50 error
= suser_cred(cred
, PRISON_ROOT
);
55 error
= hammer_ioc_prune(ip
,
56 (struct hammer_ioc_prune
*)data
);
59 case HAMMERIOC_GETHISTORY
:
60 error
= hammer_ioc_gethistory(ip
,
61 (struct hammer_ioc_history
*)data
);
71 * Iterate through the specified range of object ids and remove any
72 * deleted records that fall entirely within a prune modulo.
74 * A reverse iteration is used to prevent overlapping records from being
75 * created during the iteration due to alignments. This also allows us
76 * to adjust alignments without blowing up the B-Tree.
78 static int check_prune(struct hammer_ioc_prune
*prune
, hammer_btree_elm_t elm
,
79 int *realign_cre
, int *realign_del
);
80 static int realign_prune(struct hammer_ioc_prune
*prune
, hammer_cursor_t cursor
,
81 int realign_cre
, int realign_del
);
84 hammer_ioc_prune(hammer_inode_t ip
, struct hammer_ioc_prune
*prune
)
86 struct hammer_cursor cursor
;
87 hammer_btree_elm_t elm
;
93 if (prune
->nelms
< 0 || prune
->nelms
> HAMMER_MAX_PRUNE_ELMS
) {
96 if (prune
->beg_obj_id
>= prune
->end_obj_id
) {
101 error
= hammer_init_cursor_hmp(&cursor
, NULL
, ip
->hmp
);
103 hammer_done_cursor(&cursor
);
106 cursor
.key_beg
.obj_id
= prune
->beg_obj_id
;
107 cursor
.key_beg
.key
= HAMMER_MIN_KEY
;
108 cursor
.key_beg
.create_tid
= 1;
109 cursor
.key_beg
.delete_tid
= 0;
110 cursor
.key_beg
.rec_type
= HAMMER_MIN_RECTYPE
;
111 cursor
.key_beg
.obj_type
= 0;
113 cursor
.key_end
.obj_id
= prune
->cur_obj_id
;
114 cursor
.key_end
.key
= prune
->cur_key
;
115 cursor
.key_end
.create_tid
= HAMMER_MAX_TID
- 1;
116 cursor
.key_end
.delete_tid
= 0;
117 cursor
.key_end
.rec_type
= HAMMER_MAX_RECTYPE
;
118 cursor
.key_end
.obj_type
= 0;
120 cursor
.flags
|= HAMMER_CURSOR_END_INCLUSIVE
;
122 error
= hammer_btree_last(&cursor
);
124 elm
= &cursor
.node
->ondisk
->elms
[cursor
.index
];
125 prune
->cur_obj_id
= elm
->base
.obj_id
;
126 prune
->cur_key
= elm
->base
.key
;
127 if (check_prune(prune
, elm
, &realign_cre
, &realign_del
) == 0) {
128 if (hammer_debug_general
& 0x0200) {
129 kprintf("check %016llx %016llx: DELETE\n",
130 elm
->base
.obj_id
, elm
->base
.key
);
134 * NOTE: This can return EDEADLK
136 isdir
= (elm
->base
.rec_type
== HAMMER_RECTYPE_DIRENTRY
);
138 error
= hammer_delete_at_cursor(&cursor
,
144 ++prune
->stat_dirrecords
;
146 ++prune
->stat_rawrecords
;
147 } else if (realign_cre
>= 0 || realign_del
>= 0) {
148 error
= realign_prune(prune
, &cursor
,
149 realign_cre
, realign_del
);
151 cursor
.flags
|= HAMMER_CURSOR_ATEDISK
;
152 if (hammer_debug_general
& 0x0200) {
153 kprintf("check %016llx %016llx: "
160 cursor
.flags
|= HAMMER_CURSOR_ATEDISK
;
161 if (hammer_debug_general
& 0x0100) {
162 kprintf("check %016llx %016llx: SKIP\n",
163 elm
->base
.obj_id
, elm
->base
.key
);
167 error
= hammer_btree_iterate_reverse(&cursor
);
171 hammer_done_cursor(&cursor
);
172 if (error
== EDEADLK
)
178 * Check pruning list. The list must be sorted in descending order.
181 check_prune(struct hammer_ioc_prune
*prune
, hammer_btree_elm_t elm
,
182 int *realign_cre
, int *realign_del
)
184 struct hammer_ioc_prune_elm
*scan
;
190 for (i
= 0; i
< prune
->nelms
; ++i
) {
191 scan
= &prune
->elms
[i
];
194 * Locate the scan index covering the create and delete TIDs.
196 if (*realign_cre
< 0 &&
197 elm
->base
.create_tid
>= scan
->beg_tid
&&
198 elm
->base
.create_tid
< scan
->end_tid
) {
201 if (*realign_del
< 0 && elm
->base
.delete_tid
&&
202 elm
->base
.delete_tid
> scan
->beg_tid
&&
203 elm
->base
.delete_tid
<= scan
->end_tid
) {
208 * Now check for loop termination.
210 if (elm
->base
.create_tid
>= scan
->end_tid
||
211 elm
->base
.delete_tid
> scan
->end_tid
) {
216 * Now determine if we can delete the record.
218 if (elm
->base
.delete_tid
&&
219 elm
->base
.create_tid
>= scan
->beg_tid
&&
220 elm
->base
.delete_tid
<= scan
->end_tid
&&
221 elm
->base
.create_tid
/ scan
->mod_tid
==
222 elm
->base
.delete_tid
/ scan
->mod_tid
) {
230 * Align the record to cover any gaps created through the deletion of
231 * records within the pruning space. If we were to just delete the records
232 * there would be gaps which in turn would cause a snapshot that is NOT on
233 * a pruning boundary to appear corrupt to the user. Forcing alignment
234 * of the create_tid and delete_tid for retained records 'reconnects'
235 * the previously contiguous space, making it contiguous again after the
238 * The use of a reverse iteration allows us to safely align the records and
239 * related elements without creating temporary overlaps. XXX we should
240 * add ordering dependancies for record buffers to guarantee consistency
244 realign_prune(struct hammer_ioc_prune
*prune
,
245 hammer_cursor_t cursor
, int realign_cre
, int realign_del
)
247 hammer_btree_elm_t elm
;
253 hammer_cursor_downgrade(cursor
);
255 elm
= &cursor
->node
->ondisk
->elms
[cursor
->index
];
256 ++prune
->stat_realignments
;
259 * Align the create_tid. By doing a reverse iteration we guarantee
260 * that all records after our current record have already been
261 * aligned, allowing us to safely correct the right-hand-boundary
262 * (because no record to our right if otherwise exactly matching
263 * will have a create_tid to the left of our aligned create_tid).
265 * Ordering is important here XXX but disk write ordering for
266 * inter-cluster corrections is not currently guaranteed.
269 if (realign_cre
>= 0) {
270 mod
= prune
->elms
[realign_cre
].mod_tid
;
271 delta
= elm
->leaf
.base
.create_tid
% mod
;
273 tid
= elm
->leaf
.base
.create_tid
- delta
+ mod
;
276 error
= hammer_btree_correct_rhb(cursor
, tid
+ 1);
278 error
= hammer_btree_extract(cursor
,
279 HAMMER_CURSOR_GET_RECORD
);
283 error
= hammer_cursor_upgrade(cursor
);
286 hammer_modify_buffer(cursor
->record_buffer
);
287 cursor
->record
->base
.base
.create_tid
= tid
;
288 hammer_modify_node(cursor
->node
);
289 elm
->leaf
.base
.create_tid
= tid
;
295 * Align the delete_tid. This only occurs if the record is historical
296 * was deleted at some point. Realigning the delete_tid does not
297 * move the record within the B-Tree but may cause it to temporarily
298 * overlap a record that has not yet been pruned.
300 if (error
== 0 && realign_del
>= 0) {
301 mod
= prune
->elms
[realign_del
].mod_tid
;
302 delta
= elm
->leaf
.base
.delete_tid
% mod
;
303 hammer_modify_node(cursor
->node
);
305 error
= hammer_btree_extract(cursor
,
306 HAMMER_CURSOR_GET_RECORD
);
308 elm
->leaf
.base
.delete_tid
=
309 elm
->leaf
.base
.delete_tid
-
311 hammer_modify_buffer(cursor
->record_buffer
);
312 cursor
->record
->base
.base
.delete_tid
=
313 elm
->leaf
.base
.delete_tid
;
321 * Iterate through an object's inode or an object's records and record
324 static void add_history(hammer_inode_t ip
, struct hammer_ioc_history
*hist
,
325 hammer_btree_elm_t elm
);
329 hammer_ioc_gethistory(hammer_inode_t ip
, struct hammer_ioc_history
*hist
)
331 struct hammer_cursor cursor
;
332 hammer_btree_elm_t elm
;
336 * Validate the structure and initialize for return.
338 if (hist
->beg_tid
> hist
->end_tid
)
340 if (hist
->flags
& HAMMER_IOC_HISTORY_ATKEY
) {
341 if (hist
->key
> hist
->nxt_key
)
345 hist
->obj_id
= ip
->obj_id
;
347 hist
->nxt_tid
= hist
->end_tid
;
348 hist
->flags
&= ~HAMMER_IOC_HISTORY_NEXT_TID
;
349 hist
->flags
&= ~HAMMER_IOC_HISTORY_NEXT_KEY
;
350 hist
->flags
&= ~HAMMER_IOC_HISTORY_EOF
;
351 hist
->flags
&= ~HAMMER_IOC_HISTORY_UNSYNCED
;
352 if ((ip
->flags
& HAMMER_INODE_MODMASK
) & ~HAMMER_INODE_ITIMES
)
353 hist
->flags
|= HAMMER_IOC_HISTORY_UNSYNCED
;
356 * Setup the cursor. We can't handle undeletable records
357 * (create_tid of 0) at the moment. A create_tid of 0 has
358 * a special meaning and cannot be specified in the cursor.
360 error
= hammer_init_cursor_hmp(&cursor
, &ip
->cache
[0], ip
->hmp
);
362 hammer_done_cursor(&cursor
);
366 cursor
.key_beg
.obj_id
= hist
->obj_id
;
367 cursor
.key_beg
.create_tid
= hist
->beg_tid
;
368 cursor
.key_beg
.delete_tid
= 0;
369 cursor
.key_beg
.obj_type
= 0;
370 if (cursor
.key_beg
.create_tid
== HAMMER_MIN_TID
)
371 cursor
.key_beg
.create_tid
= 1;
373 cursor
.key_end
.obj_id
= hist
->obj_id
;
374 cursor
.key_end
.create_tid
= hist
->end_tid
;
375 cursor
.key_end
.delete_tid
= 0;
376 cursor
.key_end
.obj_type
= 0;
378 cursor
.flags
|= HAMMER_CURSOR_END_EXCLUSIVE
;
380 if (hist
->flags
& HAMMER_IOC_HISTORY_ATKEY
) {
382 * key-range within the file. For a regular file the
383 * on-disk key represents BASE+LEN, not BASE, so the
384 * first possible record containing the offset 'key'
385 * has an on-disk key of (key + 1).
387 cursor
.key_beg
.key
= hist
->key
;
388 cursor
.key_end
.key
= HAMMER_MAX_KEY
;
390 switch(ip
->ino_rec
.base
.base
.obj_type
) {
391 case HAMMER_OBJTYPE_REGFILE
:
392 ++cursor
.key_beg
.key
;
393 cursor
.key_beg
.rec_type
= HAMMER_RECTYPE_DATA
;
395 case HAMMER_OBJTYPE_DIRECTORY
:
396 cursor
.key_beg
.rec_type
= HAMMER_RECTYPE_DIRENTRY
;
398 case HAMMER_OBJTYPE_DBFILE
:
399 cursor
.key_beg
.rec_type
= HAMMER_RECTYPE_DB
;
405 cursor
.key_end
.rec_type
= cursor
.key_beg
.rec_type
;
410 cursor
.key_beg
.key
= 0;
411 cursor
.key_end
.key
= 0;
412 cursor
.key_beg
.rec_type
= HAMMER_RECTYPE_INODE
;
413 cursor
.key_end
.rec_type
= HAMMER_RECTYPE_INODE
;
416 error
= hammer_btree_first(&cursor
);
418 elm
= &cursor
.node
->ondisk
->elms
[cursor
.index
];
420 add_history(ip
, hist
, elm
);
421 if (hist
->flags
& (HAMMER_IOC_HISTORY_NEXT_TID
|
422 HAMMER_IOC_HISTORY_NEXT_KEY
|
423 HAMMER_IOC_HISTORY_EOF
)) {
426 error
= hammer_btree_iterate(&cursor
);
428 if (error
== ENOENT
) {
429 hist
->flags
|= HAMMER_IOC_HISTORY_EOF
;
432 hammer_done_cursor(&cursor
);
437 * Add the scanned element to the ioctl return structure. Some special
438 * casing is required for regular files to accomodate how data ranges are
442 add_history(hammer_inode_t ip
, struct hammer_ioc_history
*hist
,
443 hammer_btree_elm_t elm
)
445 if (elm
->base
.btype
!= HAMMER_BTREE_TYPE_RECORD
)
447 if ((hist
->flags
& HAMMER_IOC_HISTORY_ATKEY
) &&
448 ip
->ino_rec
.base
.base
.obj_type
== HAMMER_OBJTYPE_REGFILE
) {
452 if (hist
->nxt_key
> elm
->leaf
.base
.key
- elm
->leaf
.data_len
&&
453 hist
->key
< elm
->leaf
.base
.key
- elm
->leaf
.data_len
) {
454 hist
->nxt_key
= elm
->leaf
.base
.key
- elm
->leaf
.data_len
;
456 if (hist
->nxt_key
> elm
->leaf
.base
.key
)
457 hist
->nxt_key
= elm
->leaf
.base
.key
;
460 * Record is beyond MAXPHYS, there won't be any more records
461 * in the iteration covering the requested offset (key).
463 if (elm
->leaf
.base
.key
>= MAXPHYS
&&
464 elm
->leaf
.base
.key
- MAXPHYS
> hist
->key
) {
465 hist
->flags
|= HAMMER_IOC_HISTORY_NEXT_KEY
;
469 * Data-range of record does not cover the key.
471 if (elm
->leaf
.base
.key
- elm
->leaf
.data_len
> hist
->key
)
474 } else if (hist
->flags
& HAMMER_IOC_HISTORY_ATKEY
) {
478 if (hist
->nxt_key
> elm
->leaf
.base
.key
&&
479 hist
->key
< elm
->leaf
.base
.key
) {
480 hist
->nxt_key
= elm
->leaf
.base
.key
;
484 * Record is beyond the requested key.
486 if (elm
->leaf
.base
.key
> hist
->key
)
487 hist
->flags
|= HAMMER_IOC_HISTORY_NEXT_KEY
;
491 * Add create_tid if it is in-bounds.
493 if ((hist
->count
== 0 ||
494 elm
->leaf
.base
.create_tid
!= hist
->tid_ary
[hist
->count
- 1]) &&
495 elm
->leaf
.base
.create_tid
>= hist
->beg_tid
&&
496 elm
->leaf
.base
.create_tid
< hist
->end_tid
) {
497 if (hist
->count
== HAMMER_MAX_HISTORY_ELMS
) {
498 hist
->nxt_tid
= elm
->leaf
.base
.create_tid
;
499 hist
->flags
|= HAMMER_IOC_HISTORY_NEXT_TID
;
502 hist
->tid_ary
[hist
->count
++] = elm
->leaf
.base
.create_tid
;
506 * Add delete_tid if it is in-bounds. Note that different portions
507 * of the history may have overlapping data ranges with different
508 * delete_tid's. If this case occurs the delete_tid may match the
509 * create_tid of a following record. XXX
514 if (elm
->leaf
.base
.delete_tid
&&
515 elm
->leaf
.base
.delete_tid
>= hist
->beg_tid
&&
516 elm
->leaf
.base
.delete_tid
< hist
->end_tid
) {
517 if (hist
->count
== HAMMER_MAX_HISTORY_ELMS
) {
518 hist
->nxt_tid
= elm
->leaf
.base
.delete_tid
;
519 hist
->flags
|= HAMMER_IOC_HISTORY_NEXT_TID
;
522 hist
->tid_ary
[hist
->count
++] = elm
->leaf
.base
.delete_tid
;