[Ocfs2-devel] [PATCH 3/3] shared-du: using fiemap to figure up the shared extents per file and the footprint in all v4
Jie Liu
jeff.liu at oracle.com
Mon Sep 20 00:12:02 PDT 2010
If issue du(1) with either '--shared-size' or '-E' option, show the shared extents in parens per
file as well as the footprint in the end.
* src/du.c: Add this feature.
Signed-off-by: Jie Liu <jeff.liu at oracle.com>
---
coreutils-6.9/src/du.c | 479 +++++++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 473 insertions(+), 6 deletions(-)
diff --git a/coreutils-6.9/src/du.c b/coreutils-6.9/src/du.c
index 206d318..41fda57 100644
--- a/coreutils-6.9/src/du.c
+++ b/coreutils-6.9/src/du.c
@@ -45,6 +45,13 @@
#include "xfts.h"
#include "xstrtol.h"
+#include "fiemap.h"
+#include "rbtree.h"
+
+#if HAVE_SYS_IOCTL_H
+# include <sys/ioctl.h>
+#endif
+
extern bool fts_debug;
/* The official name of this program (e.g., no `g' prefix). */
@@ -64,6 +71,10 @@ extern bool fts_debug;
/* Initial size of the hash table. */
#define INITIAL_TABLE_SIZE 103
+#if defined(_IOWR) && !defined(FS_IOC_FIEMAP)
+# define FS_IOC_FIEMAP _IOWR('f', 11, struct fiemap)
+#endif
+
/* Hash structure for inode and device numbers. The separate entry
structure makes it easier to rehash "in place". */
@@ -171,6 +182,31 @@ static enum time_type time_type = time_mtime;
/* User specified date / time style */
static char const *time_style = NULL;
+/* If true, display the size of the shared extents per file and end
+ up with the overall footprint. */
+static bool print_shared_size = false;
+
+/* The size of the shared extents per file. */
+static uint64_t file_shared_extents = 0;
+
+/* The root of our rbtree for tracking extent_info as below. */
+static struct rb_root fe_root;
+
+/* A structure for the extents information. */
+struct extent_info {
+ /* rbtree node */
+ struct rb_node ei_node;
+
+ /* physical offset in bytes */
+ uint64_t ei_physical;
+
+ /* length in bytes for this extent */
+ uint64_t ei_length;
+
+ /* extent shared count */
+ size_t ei_shared_count;
+};
+
/* Format used to display date / time. Controlled by --time-style */
static char const *time_format = NULL;
@@ -215,6 +251,7 @@ static struct option const long_options[] =
{"block-size", required_argument, NULL, 'B'},
{"bytes", no_argument, NULL, 'b'},
{"count-links", no_argument, NULL, 'l'},
+ {"shared-size", no_argument, NULL, 'E'},
{"dereference", no_argument, NULL, 'L'},
{"dereference-args", no_argument, NULL, 'D'},
{"exclude", required_argument, NULL, EXCLUDE_OPTION},
@@ -299,6 +336,7 @@ Mandatory arguments to long options are mandatory for short options too.\n\
-b, --bytes equivalent to `--apparent-size --block-size=1'\n\
-c, --total produce a grand total\n\
-D, --dereference-args dereference FILEs that are symbolic links\n\
+ -E, --shared-size show the size of the shared extents per file\n\
"), stdout);
fputs (_("\
--files0-from=F summarize disk usage of the NUL-terminated file\n\
@@ -443,6 +481,22 @@ print_only_size (uintmax_t n_bytes)
1, output_block_size), stdout);
}
+/* Print footprint follow by STRING. */
+
+static void
+print_footprint (const struct duinfo *pdui, uintmax_t footprint, const char *string)
+{
+ print_only_size (footprint);
+ if (opt_time)
+ {
+ putchar ('\t');
+ show_date (time_format, pdui->tmax);
+ }
+
+ printf ("\t%s%c", string, opt_nul_terminate_output ? '\0' : '\n');
+ fflush (stdout);
+}
+
/* Print size (and optionally time) indicated by *PDUI, followed by STRING. */
static void
@@ -454,10 +508,400 @@ print_size (const struct duinfo *pdui, const char *string)
putchar ('\t');
show_date (time_format, pdui->tmax);
}
+
+ /* FIXME: make better formatting output? */
+ if ((print_shared_size) && (file_shared_extents > 0))
+ {
+ putchar ('\t');
+ putchar ('(');
+
+ /* If display file size in bytes (i.e, output_block_size == 1), we
+ should honor pdui->size if the file shared extent size is larger
+ than it. */
+ if ((output_block_size == 1) && (file_shared_extents > pdui->size))
+ file_shared_extents = pdui->size;
+
+ print_only_size (file_shared_extents);
+ putchar (')');
+
+ file_shared_extents = 0;
+ }
printf ("\t%s%c", string, opt_nul_terminate_output ? '\0' : '\n');
fflush (stdout);
}
+/* Free all allocated extent_info node from the rbtree. */
+
+static void
+free_extent_info (void)
+{
+ struct rb_node *node;
+ struct extent_info *ei;
+
+ while ((node = rb_first (&fe_root)))
+ {
+ ei = rb_entry (node, struct extent_info, ei_node);
+ rb_erase (&ei->ei_node, &fe_root);
+ free (ei);
+ }
+}
+
+/* Go through the entire tree to sum up the shared extents
+ for whose ei_shared_count > 0. */
+
+static uintmax_t
+figure_up_shared_extent (void)
+{
+ struct rb_node *node;
+ struct extent_info *ei;
+ static uintmax_t total_shared_extent = 0;
+
+ for (node = rb_first (&fe_root); node; node = rb_next (node))
+ {
+ ei = rb_entry (node, struct extent_info, ei_node);
+ if (ei->ei_shared_count > 0)
+ total_shared_extent += ei->ei_length * ei->ei_shared_count;
+ }
+
+ return total_shared_extent;
+}
+
+/* Check to see if two extent_info structs are adjacent and they have
+ same shared count which is greater than zero. */
+
+static bool
+mergable_extent_info (struct extent_info *prev_ei, struct extent_info *ei)
+{
+ return (prev_ei->ei_shared_count == ei->ei_shared_count
+ && prev_ei->ei_physical + prev_ei->ei_length == ei->ei_physical)
+ ? true : false;
+}
+
+/* Insert the new extent_info based on the parent.
+ We should make sure that all splitted extent_info tracks
+ on the rbtree does not has any overlap, so we should abort
+ du process if the extents physical offset is equivalent.
+ After inserting, try to merge to the left or right node. */
+
+static void
+insert_extent_info (struct rb_node *parent,
+ uint64_t fe_physical,
+ uint64_t fe_length,
+ size_t extent_shared_count)
+{
+ struct rb_node **p = parent ? &parent : &((&fe_root)->rb_node);
+ struct rb_node *pp = NULL;
+ struct rb_node *node;
+ struct rb_node *new_node;
+ struct extent_info *ei;
+ struct extent_info *new_ei;
+
+ while (*p)
+ {
+ pp = *p;
+ ei = rb_entry (*p, struct extent_info, ei_node);
+
+ if (fe_physical < ei->ei_physical)
+ p = &(*p)->rb_left;
+ else if (fe_physical > ei->ei_physical)
+ p = &(*p)->rb_right;
+ else
+ assert (0);
+ }
+
+ new_ei = xcalloc (1, sizeof *ei);
+ new_ei->ei_physical = fe_physical;
+ new_ei->ei_length = fe_length;
+ new_ei->ei_shared_count = extent_shared_count;
+ new_node = &new_ei->ei_node;
+
+ rb_link_node (new_node, pp, p);
+ rb_insert_color (new_node, &fe_root);
+
+ /* We only try to merge the extents info which shared count > 0. */
+ if (extent_shared_count > 0)
+ {
+ /* Try to merge to the left node. */
+ node = rb_prev (new_node);
+ if (node)
+ {
+ ei = rb_entry (node, struct extent_info, ei_node);
+ if (mergable_extent_info (ei, new_ei))
+ {
+ new_ei->ei_physical = ei->ei_physical;
+ new_ei->ei_length += ei->ei_length;
+ rb_erase (node, &fe_root);
+ free (ei);
+ }
+ }
+
+ /* Try to merge to the right node. */
+ node = rb_next (new_node);
+ if (node)
+ {
+ ei = rb_entry (node, struct extent_info, ei_node);
+ if (mergable_extent_info (new_ei, ei))
+ {
+ new_ei->ei_length += ei->ei_length;
+ rb_erase (node, &fe_root);
+ free (ei);
+ }
+ }
+ }
+}
+
+/* Search and return the leftmost node from our rbtree whose
+ physical offset is less than the referenced. If there is
+ no such record, return the left most one.
+ split_extent() uses its physical offset and extent length
+ as the comparative judgement to determine how to do extent
+ splitting. */
+
+static struct rb_node *
+lookup_leftmost_extent_info (uint64_t fe_physical)
+{
+ struct rb_node **p = &((&fe_root)->rb_node);
+ struct rb_node *parent = NULL;
+ struct rb_node *prev = NULL;
+ struct extent_info *ei;
+
+ while (*p)
+ {
+ parent = *p;
+ ei = rb_entry (*p, struct extent_info, ei_node);
+
+ if (fe_physical < ei->ei_physical)
+ p = &(*p)->rb_left;
+ else if (fe_physical < ei->ei_physical)
+ p = &(*p)->rb_right;
+ else
+ return parent;
+ }
+
+ /* Return the previous node of parent if the search returned is at
+ the left of parent. If there is no previous node of parent, return
+ parent. */
+ if (parent)
+ {
+ ei = rb_entry (parent, struct extent_info, ei_node);
+ if (fe_physical < ei->ei_physical)
+ prev = rb_prev (parent);
+ }
+
+ return prev ? prev : parent;
+}
+
+/* Split the new extent_info into mutiple items if there is overlap
+ with the search returned, insert each item or increase the
+ existed items shared count for the shared part. */
+
+static void
+split_extent (uint64_t fe_physical, uint64_t fe_length)
+{
+ struct extent_info *ei;
+ struct rb_node *node = NULL;
+ struct rb_node *prev_node = NULL;
+ uint64_t physical_start = fe_physical;
+ uint64_t extent_length = fe_length;
+ uint64_t new_physical_start;
+ uint64_t new_extent_length;
+ uint64_t old_extent_length;
+ size_t extent_shared_count = 0;
+ size_t old_extent_shared_count = 0;
+
+ node = lookup_leftmost_extent_info (physical_start);
+
+ /* We make use of the extent physical offset as the comparative value
+ by combine with the extent length to decide how to split it into
+ sub extents. For each new extent, in short, there are 3 scenarios
+ need to process against the search returned:
+ . physical offset smaller than the searched.
+ . physical offset equal to the searched.
+ . physical offset greater than the searched. */
+ while (extent_length > 0)
+ {
+ if (! node)
+ {
+ insert_extent_info (prev_node, physical_start, extent_length, extent_shared_count);
+ break;
+ }
+
+ ei = rb_entry (node, struct extent_info, ei_node);
+
+ /* The new extent physical offset is smaller than the search returned.
+ there are three scenarios need to check against the old one which shown
+ as the following sketch.
+ |---------| old
+ |------| new1
+ |---------------| new2
+ |------------------------| new3. */
+ if (physical_start < ei->ei_physical)
+ {
+ new_extent_length = MIN (ei->ei_physical - physical_start, extent_length);
+
+ extent_shared_count = 0;
+
+ insert_extent_info (node, physical_start, new_extent_length, extent_shared_count);
+
+ physical_start += new_extent_length;
+ extent_length -= new_extent_length;
+
+ /* Make use of the old parent for the next round checking. */
+ continue;
+ }
+
+ /* The new extent physical offset if equal to the search returned.
+ there are three scenarios need to check against the old one which shown
+ as the following sketch.
+ |----------| old
+ |----------------| new1
+ |----------| new2
+ |-------| new3. */
+ if (physical_start == ei->ei_physical)
+ {
+ old_extent_length = ei->ei_length;
+ old_extent_shared_count = ei->ei_shared_count;
+ new_extent_length = MIN (extent_length, ei->ei_length);
+
+ /* Decrease the existed extent size to the minor,
+ and increase the shared count due to the overlap
+ of them. */
+ ei->ei_length = new_extent_length;
+ ei->ei_shared_count++;
+
+ physical_start += new_extent_length;
+ extent_length -= new_extent_length;
+
+ /* The search returned extent range includes the new one,
+ figure out the not included extent and insert it to rbtree. */
+ if (old_extent_length > new_extent_length)
+ {
+ new_physical_start = ei->ei_physical + new_extent_length;
+ new_extent_length = old_extent_length - new_extent_length;
+ extent_shared_count = old_extent_shared_count;
+
+ insert_extent_info (node, new_physical_start, new_extent_length, extent_shared_count);
+ }
+
+ prev_node = node;
+ node = rb_next (node);
+ continue;
+ }
+
+ /* The new extent physical offset if greater than the search returned.
+ there are three scenarios need to check against the old one which shown
+ as the following sketch.
+ |-----------| old
+ |----| new1
+ |-----------| new2
+ |---------------| new3. */
+ if (physical_start < ei->ei_physical + ei->ei_length)
+ {
+ old_extent_length = ei->ei_physical + ei->ei_length - physical_start;
+ new_extent_length = MIN (extent_length, old_extent_length);
+
+ /* The new extent overlapped with the old one, as a result, we
+ need to increase its shared count before inserting. */
+ extent_shared_count = ei->ei_shared_count + 1;
+ insert_extent_info (node, physical_start, new_extent_length, extent_shared_count);
+
+ /* Decrease the search returned extent size and keep it on the tree. */
+ ei->ei_length = physical_start - ei->ei_physical;
+
+ physical_start += new_extent_length;
+ extent_length -= new_extent_length;
+ continue;
+ }
+
+ /* Now physical_start >= ei->ei_physical + ei->ei_length, so let
+ rb_next() handle it. */
+ prev_node = node;
+ node = rb_next (node);
+ }
+}
+
+/* This function is called once for every file system object that fts
+ encounters, get its extent mapping info for the proceeding extent
+ spliting operation. */
+
+static void
+process_extent (int cwd_fd, char const *file,
+ char const *file_full_name)
+{
+ union { struct fiemap f; char c[4096]; } fiemap_buf;
+ struct fiemap *fiemap = &fiemap_buf.f;
+ struct fiemap_extent *fm_extents = &fiemap->fm_extents[0];
+ enum { count = (sizeof fiemap_buf - sizeof *fiemap) / sizeof *fm_extents };
+ verify (count != 0);
+ bool last = false;
+ int i;
+
+ int fd = openat (cwd_fd, file, O_RDONLY);
+ if (fd < 0)
+ {
+ fd = open (file_full_name, O_RDONLY);
+ if (fd < 0)
+ {
+ error(0, errno, _("cannot open %s"), quote (file_full_name));
+ return;
+ }
+ }
+
+ memset (&fiemap_buf, 0, sizeof fiemap_buf);
+
+ do {
+ fiemap->fm_length = ~0ULL;
+ fiemap->fm_extent_count = count;
+
+ if (ioctl (fd, FS_IOC_FIEMAP, fiemap) < 0)
+ {
+ error (0, errno, _("%s FIEMAP ioctl failed"), quote (file_full_name));
+ goto close_file;
+ }
+
+ /* If 0 extents are returned, then more ioctls
+ are not needed. */
+ if (fiemap->fm_mapped_extents == 0)
+ goto close_file;
+
+ for (i = 0; i < fiemap->fm_mapped_extents; i++)
+ {
+ uint64_t ext_phy_offset = fm_extents[i].fe_physical;
+ uint64_t ext_len = fm_extents[i].fe_length;
+
+ /* Skip inline file which its data mixed with metadata. */
+ if (fm_extents[i].fe_flags & FIEMAP_EXTENT_DATA_INLINE)
+ {
+ if (fm_extents[i].fe_flags & FIEMAP_EXTENT_LAST)
+ {
+ last = true;
+ break;
+ }
+ continue;
+ }
+
+ /* Increase the shared extents size per file. */
+ if (fm_extents[i].fe_flags & FIEMAP_EXTENT_SHARED)
+ file_shared_extents += fm_extents[i].fe_length;
+
+ split_extent (ext_phy_offset, ext_len);
+
+ if (fm_extents[i].fe_flags & FIEMAP_EXTENT_LAST)
+ last = true;
+ }
+
+ /* Do not calculate the next fiemap start offset if only one
+ extent mapped and it is inline. */
+ if (0 < i)
+ fiemap->fm_start = (fm_extents[i-1].fe_logical + fm_extents[i-1].fe_length);
+
+ } while (! last);
+
+close_file:
+ if (close (fd) < 0)
+ error (0, errno, _("closing %s"), quote (file_full_name));
+}
+
/* This function is called once for every file system object that fts
encounters. fts does a depth-first traversal. This function knows
that and accumulates per-directory totals based on changes in
@@ -483,7 +927,8 @@ process_file (FTS *fts, FTSENT *ent)
static struct dulevel *dulvl;
bool print = true;
- const char *file = ent->fts_path;
+ const char *file_full_name = ent->fts_path;
+ const char *file = ent->fts_accpath;
const struct stat *sb = ent->fts_statp;
bool skip;
@@ -495,18 +940,18 @@ process_file (FTS *fts, FTSENT *ent)
switch (ent->fts_info)
{
case FTS_NS:
- error (0, ent->fts_errno, _("cannot access %s"), quote (file));
+ error (0, ent->fts_errno, _("cannot access %s"), quote (file_full_name));
return false;
case FTS_ERR:
/* if (S_ISDIR (ent->fts_statp->st_mode) && FIXME */
- error (0, ent->fts_errno, _("%s"), quote (file));
+ error (0, ent->fts_errno, _("%s"), quote (file_full_name));
return false;
case FTS_DNR:
/* Don't return just yet, since although the directory is not readable,
we were able to stat it, so we do have a size. */
- error (0, ent->fts_errno, _("cannot read directory %s"), quote (file));
+ error (0, ent->fts_errno, _("cannot read directory %s"), quote (file_full_name));
ok = false;
break;
@@ -619,9 +1064,12 @@ process_file (FTS *fts, FTSENT *ent)
if (!print)
return ok;
+ if (print_shared_size)
+ process_extent (fts->fts_cwd_fd, file, file_full_name);
+
if ((IS_DIR_TYPE (ent->fts_info) && level <= max_depth)
|| ((opt_all && level <= max_depth) || level == 0))
- print_size (&dui_to_print, file);
+ print_size (&dui_to_print, file_full_name);
return ok;
}
@@ -709,7 +1157,7 @@ main (int argc, char **argv)
human_output_opts = human_options (getenv ("DU_BLOCK_SIZE"), false,
&output_block_size);
- while ((c = getopt_long (argc, argv, DEBUG_OPT "0abchHklmsxB:DLPSX:",
+ while ((c = getopt_long (argc, argv, DEBUG_OPT "0abchHklmsxB:DELPSX:",
long_options, NULL)) != -1)
{
switch (c)
@@ -834,6 +1282,13 @@ main (int argc, char **argv)
}
break;
+ case 'E':
+#ifndef FS_IOC_FIEMAP
+ error (EXIT_FAILURE, 0, _("Shared-du are not supported by this version"));
+#endif
+ print_shared_size = true;
+ break;
+
case FILES0_FROM_OPTION:
files_from = optarg;
break;
@@ -866,6 +1321,9 @@ main (int argc, char **argv)
if (!ok)
usage (EXIT_FAILURE);
+ if (print_shared_size)
+ fe_root = RB_ROOT;
+
if (opt_all & opt_summarize_only)
{
error (0, 0, _("cannot both summarize and show all entries"));
@@ -1015,6 +1473,15 @@ main (int argc, char **argv)
if (files_from)
readtokens0_free (&tok);
+ if (print_shared_size)
+ {
+ uintmax_t tot_shared_extent = figure_up_shared_extent ();
+ uintmax_t footprint = tot_dui.size - tot_shared_extent;
+
+ print_footprint(&tot_dui, footprint, _("footprint"));
+ free_extent_info ();
+ }
+
hash_free (htab);
exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);
--
1.5.4.3
More information about the Ocfs2-devel
mailing list