btrfs-progs: replace leafsize with nodesize
[btrfs-progs-unstable/devel.git] / cmds-fi-du.c
blob168fc722e3494eea4d084c122e681acf7d18537b
1 /*
2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public
4 * License v2 as published by the Free Software Foundation.
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9 * General Public License for more details.
11 * You should have received a copy of the GNU General Public
12 * License along with this program; if not, write to the
13 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
14 * Boston, MA 021110-1307, USA.
17 #include <sys/types.h>
18 #include <sys/stat.h>
19 #include <unistd.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <stdarg.h>
24 #include <getopt.h>
25 #include <fcntl.h>
26 #include <dirent.h>
28 #include <sys/ioctl.h>
29 #include <linux/fs.h>
30 #include <linux/fiemap.h>
32 #include "utils.h"
33 #include "commands.h"
34 #include "kerncompat.h"
35 #include "rbtree.h"
37 #include "interval_tree_generic.h"
39 static int summarize = 0;
40 static unsigned unit_mode = UNITS_RAW;
41 static char path[PATH_MAX] = { 0, };
42 static char *pathp = path;
43 static char *path_max = &path[PATH_MAX - 1];
45 struct shared_extent {
46 struct rb_node rb;
47 u64 start; /* Start of interval */
48 u64 last; /* Last location _in_ interval */
49 u64 __subtree_last;
53 * extent_tree_* functions are defined in the massive interval tree
54 * macro below. This serves to illustrate the api in human-readable
55 * terms.
57 static void
58 extent_tree_insert(struct shared_extent *node, struct rb_root *root);
60 static void
61 extent_tree_remove(struct shared_extent *node, struct rb_root *root);
63 static struct shared_extent *
64 extent_tree_iter_first(struct rb_root *root,
65 u64 start, u64 last);
67 static struct shared_extent *
68 extent_tree_iter_next(struct shared_extent *node,
69 u64 start, u64 last);
71 #define START(node) ((node)->start)
72 #define LAST(node) ((node)->last)
74 INTERVAL_TREE_DEFINE(struct shared_extent, rb,
75 u64, __subtree_last,
76 START, LAST, static, extent_tree)
78 static int add_shared_extent(u64 start, u64 len, struct rb_root *root)
80 struct shared_extent *sh;
82 BUG_ON(len == 0);
84 sh = calloc(1, sizeof(*sh));
85 if (!sh)
86 return ENOMEM;
88 sh->start = start;
89 sh->last = (start + len - 1);
91 extent_tree_insert(sh, root);
93 return 0;
96 static void cleanup_shared_extents(struct rb_root *root)
98 struct shared_extent *s;
99 struct shared_extent *tmp;
101 if (!root)
102 return;
104 s = extent_tree_iter_first(root, 0, -1ULL);
105 while (s) {
106 tmp = extent_tree_iter_next(s, 0, -1ULL);
107 extent_tree_remove(s, root);
109 free(s);
110 s = tmp;
114 #define dprintf(...)
117 * Find all extents which overlap 'n', calculate the space
118 * covered by them and remove those nodes from the tree.
120 static u64 count_unique_bytes(struct rb_root *root, struct shared_extent *n)
122 struct shared_extent *tmp;
123 u64 wstart = n->start;
124 u64 wlast = n->last;
126 dprintf("Count overlaps:");
128 do {
130 * Expand our search window based on the lastest
131 * overlapping extent. Doing this will allow us to
132 * find all possible overlaps
134 if (wstart > n->start)
135 wstart = n->start;
136 if (wlast < n->last)
137 wlast = n->last;
139 dprintf(" (%llu, %llu)", n->start, n->last);
141 tmp = n;
142 n = extent_tree_iter_next(n, wstart, wlast);
144 extent_tree_remove(tmp, root);
145 free(tmp);
146 } while (n);
148 dprintf("; wstart: %llu wlast: %llu total: %llu\n", wstart,
149 wlast, wlast - wstart + 1);
151 return wlast - wstart + 1;
155 * What we want to do here is get a count of shared bytes within the
156 * set of extents we have collected. Specifcally, we don't want to
157 * count any byte more than once, so just adding them up doesn't
158 * work.
160 * For each set of overlapping extents we find the lowest start and
161 * highest end. From there we have the actual number of bytes which is
162 * shared across all of the extents in our set. A sum of each sets
163 * extent length is returned.
165 static void count_shared_bytes(struct rb_root *root, u64 *ret_cnt)
167 u64 count = 0;
168 struct shared_extent *s = extent_tree_iter_first(root,
169 0, -1ULL);
171 if (!s)
172 goto out;
174 while (s) {
176 * Find all extents which overlap 's', calculate the space
177 * covered by them and remove those nodes from the tree.
179 count += count_unique_bytes(root, s);
182 * Since count_unique_bytes will be emptying the tree,
183 * we can grab the first node here
185 s = extent_tree_iter_first(root, 0, -1ULL);
188 BUG_ON(!RB_EMPTY_ROOT(root));
189 out:
190 *ret_cnt = count;
193 /* Track which inodes we've seen for the purposes of hardlink detection. */
194 struct seen_inode {
195 struct rb_node i_node;
196 u64 i_ino;
197 u64 i_subvol;
199 static struct rb_root seen_inodes = RB_ROOT;
201 static int cmp_si(struct seen_inode *si, u64 ino, u64 subvol)
203 if (ino < si->i_ino)
204 return -1;
205 else if (ino > si->i_ino)
206 return 1;
207 if (subvol < si->i_subvol)
208 return -1;
209 else if (subvol > si->i_subvol)
210 return 1;
211 return 0;
214 static int mark_inode_seen(u64 ino, u64 subvol)
216 int cmp;
217 struct rb_node **p = &seen_inodes.rb_node;
218 struct rb_node *parent = NULL;
219 struct seen_inode *si;
221 while (*p) {
222 parent = *p;
224 si = rb_entry(parent, struct seen_inode, i_node);
225 cmp = cmp_si(si, ino, subvol);
226 if (cmp < 0)
227 p = &(*p)->rb_left;
228 else if (cmp > 0)
229 p = &(*p)->rb_right;
230 else
231 BUG();
234 si = calloc(1, sizeof(*si));
235 if (!si)
236 return ENOMEM;
238 si->i_ino = ino;
239 si->i_subvol = subvol;
241 rb_link_node(&si->i_node, parent, p);
242 rb_insert_color(&si->i_node, &seen_inodes);
244 return 0;
247 static int inode_seen(u64 ino, u64 subvol)
249 int cmp;
250 struct rb_node *n = seen_inodes.rb_node;
251 struct seen_inode *si;
253 while (n) {
254 si = rb_entry(n, struct seen_inode, i_node);
256 cmp = cmp_si(si, ino, subvol);
257 if (cmp < 0)
258 n = n->rb_left;
259 else if (cmp > 0)
260 n = n->rb_right;
261 else
262 return EEXIST;
264 return 0;
267 static void clear_seen_inodes(void)
269 struct rb_node *n = rb_first(&seen_inodes);
270 struct seen_inode *si;
272 while (n) {
273 si = rb_entry(n, struct seen_inode, i_node);
275 rb_erase(&si->i_node, &seen_inodes);
276 free(si);
278 n = rb_first(&seen_inodes);
283 * Inline extents are skipped because they do not take data space,
284 * delalloc and unknown are skipped because we do not know how much
285 * space they will use yet.
287 #define SKIP_FLAGS (FIEMAP_EXTENT_UNKNOWN|FIEMAP_EXTENT_DELALLOC|FIEMAP_EXTENT_DATA_INLINE)
288 static int du_calc_file_space(int fd, struct rb_root *shared_extents,
289 u64 *ret_total, u64 *ret_shared)
291 char buf[16384];
292 struct fiemap *fiemap = (struct fiemap *)buf;
293 struct fiemap_extent *fm_ext = &fiemap->fm_extents[0];
294 int count = (sizeof(buf) - sizeof(*fiemap)) /
295 sizeof(struct fiemap_extent);
296 unsigned int i, ret;
297 int last = 0;
298 int rc;
299 u64 ext_len;
300 u64 file_total = 0;
301 u64 file_shared = 0;
302 u32 flags;
304 memset(fiemap, 0, sizeof(struct fiemap));
306 do {
307 fiemap->fm_length = ~0ULL;
308 fiemap->fm_extent_count = count;
309 rc = ioctl(fd, FS_IOC_FIEMAP, (unsigned long) fiemap);
310 if (rc < 0) {
311 ret = errno;
312 goto out;
315 /* If 0 extents are returned, then more ioctls are not needed */
316 if (fiemap->fm_mapped_extents == 0)
317 break;
319 for (i = 0; i < fiemap->fm_mapped_extents; i++) {
320 ext_len = fm_ext[i].fe_length;
321 flags = fm_ext[i].fe_flags;
323 if (flags & FIEMAP_EXTENT_LAST)
324 last = 1;
326 if (flags & SKIP_FLAGS)
327 continue;
329 file_total += ext_len;
330 if (flags & FIEMAP_EXTENT_SHARED) {
331 file_shared += ext_len;
333 if (shared_extents) {
334 ret = add_shared_extent(fm_ext[i].fe_physical,
335 ext_len,
336 shared_extents);
337 if (ret)
338 goto out;
343 fiemap->fm_start = (fm_ext[i - 1].fe_logical +
344 fm_ext[i - 1].fe_length);
345 } while (last == 0);
347 *ret_total = file_total;
348 *ret_shared = file_shared;
350 ret = 0;
351 out:
352 return ret;
355 struct du_dir_ctxt {
356 u64 bytes_total;
357 u64 bytes_shared;
358 DIR *dirstream;
359 struct rb_root shared_extents;
361 #define INIT_DU_DIR_CTXT (struct du_dir_ctxt) { 0ULL, 0ULL, NULL, RB_ROOT }
363 static int du_add_file(const char *filename, int dirfd,
364 struct rb_root *shared_extents, u64 *ret_total,
365 u64 *ret_shared, int top_level);
367 static int du_walk_dir(struct du_dir_ctxt *ctxt, struct rb_root *shared_extents)
369 int ret, type;
370 struct dirent *entry;
371 DIR *dirstream = ctxt->dirstream;
373 ret = 0;
374 do {
375 u64 tot, shr;
377 errno = 0;
378 entry = readdir(dirstream);
379 if (entry) {
380 if (strcmp(entry->d_name, ".") == 0
381 || strcmp(entry->d_name, "..") == 0)
382 continue;
384 type = entry->d_type;
385 if (type == DT_REG || type == DT_DIR) {
386 tot = shr = 0;
388 ret = du_add_file(entry->d_name,
389 dirfd(dirstream),
390 shared_extents, &tot, &shr,
392 if (ret)
393 break;
395 ctxt->bytes_total += tot;
396 ctxt->bytes_shared += shr;
399 } while (entry != NULL);
401 return ret;
404 static int du_add_file(const char *filename, int dirfd,
405 struct rb_root *shared_extents, u64 *ret_total,
406 u64 *ret_shared, int top_level)
408 int ret, len = strlen(filename);
409 char *pathtmp;
410 struct stat st;
411 struct du_dir_ctxt dir = INIT_DU_DIR_CTXT;
412 int is_dir = 0;
413 u64 file_total = 0;
414 u64 file_shared = 0;
415 u64 dir_set_shared = 0;
416 u64 subvol;
417 int fd;
418 DIR *dirstream = NULL;
420 ret = fstatat(dirfd, filename, &st, 0);
421 if (ret) {
422 ret = errno;
423 return ret;
426 if (!S_ISREG(st.st_mode) && !S_ISDIR(st.st_mode))
427 return 0;
429 if (len > (path_max - pathp)) {
430 error("path too long: %s %s", path, filename);
431 return ENAMETOOLONG;
434 pathtmp = pathp;
435 if (pathp == path)
436 ret = sprintf(pathp, "%s", filename);
437 else
438 ret = sprintf(pathp, "/%s", filename);
439 pathp += ret;
441 fd = open_file_or_dir3(path, &dirstream, O_RDONLY);
442 if (fd < 0) {
443 ret = fd;
444 goto out;
447 ret = lookup_ino_rootid(fd, &subvol);
448 if (ret)
449 goto out_close;
451 if (inode_seen(st.st_ino, subvol))
452 goto out_close;
454 ret = mark_inode_seen(st.st_ino, subvol);
455 if (ret)
456 goto out_close;
458 if (S_ISREG(st.st_mode)) {
459 ret = du_calc_file_space(fd, shared_extents, &file_total,
460 &file_shared);
461 if (ret)
462 goto out_close;
463 } else if (S_ISDIR(st.st_mode)) {
464 struct rb_root *root = shared_extents;
467 * We collect shared extents in an rb_root, the top
468 * level caller will not pass a root down, so use the
469 * one on our dir context.
471 if (top_level)
472 root = &dir.shared_extents;
474 is_dir = 1;
476 dir.dirstream = dirstream;
477 ret = du_walk_dir(&dir, root);
478 *pathp = '\0';
479 if (ret) {
480 if (top_level)
481 cleanup_shared_extents(root);
482 goto out_close;
485 file_total = dir.bytes_total;
486 file_shared = dir.bytes_shared;
487 if (top_level)
488 count_shared_bytes(root, &dir_set_shared);
491 if (!summarize || top_level) {
492 u64 excl = file_total - file_shared;
494 if (top_level) {
495 u64 set_shared = file_shared;
497 if (is_dir)
498 set_shared = dir_set_shared;
500 printf("%10s %10s %10s %s\n",
501 pretty_size_mode(file_total, unit_mode),
502 pretty_size_mode(excl, unit_mode),
503 pretty_size_mode(set_shared, unit_mode),
504 path);
505 } else {
506 printf("%10s %10s %10s %s\n",
507 pretty_size_mode(file_total, unit_mode),
508 pretty_size_mode(excl, unit_mode),
509 "-", path);
513 if (ret_total)
514 *ret_total = file_total;
515 if (ret_shared)
516 *ret_shared = file_shared;
518 out_close:
519 close_file_or_dir(fd, dirstream);
520 out:
521 /* reset path to just before this element */
522 pathp = pathtmp;
524 return ret;
527 const char * const cmd_filesystem_du_usage[] = {
528 "btrfs filesystem du [options] <path> [<path>..]",
529 "Summarize disk usage of each file.",
530 "-s|--summarize display only a total for each argument",
531 HELPINFO_UNITS_LONG,
532 NULL
535 int cmd_filesystem_du(int argc, char **argv)
537 int ret = 0, err = 0;
538 int i;
540 unit_mode = get_unit_mode_from_arg(&argc, argv, 1);
542 optind = 1;
543 while (1) {
544 static const struct option long_options[] = {
545 { "summarize", no_argument, NULL, 's'},
546 { NULL, 0, NULL, 0 }
548 int c = getopt_long(argc, argv, "s", long_options, NULL);
550 if (c < 0)
551 break;
552 switch (c) {
553 case 's':
554 summarize = 1;
555 break;
556 default:
557 usage(cmd_filesystem_du_usage);
561 if (check_argc_min(argc - optind, 1))
562 usage(cmd_filesystem_du_usage);
564 printf("%10s %10s %10s %s\n", "Total", "Exclusive", "Set shared",
565 "Filename");
567 for (i = optind; i < argc; i++) {
568 ret = du_add_file(argv[i], AT_FDCWD, NULL, NULL, NULL, 1);
569 if (ret) {
570 error("cannot check space of '%s': %s", argv[i],
571 strerror(ret));
572 err = 1;
575 /* reset hard-link detection for each argument */
576 clear_seen_inodes();
579 return err;