[Ocfs2-tools-devel] [PATCH 3/5] O2info: Add new running codes for '--freefrag'

Tristan Ye tristan.ye at oracle.com
Sat Jan 29 22:31:26 PST 2011


Patch teaches o2info to dump fs's fragmentation info like following:

--------------------------------------------------------
Blocksize: 4096 bytes
Clustersize: 4096 bytes
Total clusters: 1002046
Free clusters: 787419 (78.6%)

Min. free extent: 4 KB
Max. free extent: 124924 KB
Avg. free extent: 6644 KB

Chunksize: 4096 bytes (1 clusters)
Total chunks: 1002047
Free chunks: 787419 (78.6%)

HISTOGRAM OF FREE EXTENT SIZES:
Extent Size Range :  Free extents  Free Clusters  Percent
    4K...    8K-  :             9             9    0.00%
    8K...   16K-  :            12            31    0.00%
   16K...   32K-  :            45           249    0.03%
   32K...   64K-  :           376          4654    0.59%
    8M...   16M-  :             2          5180    0.66%
   16M...   32M-  :             5         27556    3.50%
   32M...   64M-  :             1         16087    2.04%
   64M...  128M-  :            24        733653   93.17%
--------------------------------------------------------

Task of '--freefrag' will also be capable of two approaches, including
libocfs2 and ioctl solutions.

Signed-off-by: Tristan Ye <tristan.ye at oracle.com>
---
 include/ocfs2-kernel/ocfs2_ioctl.h |   23 ++++
 o2info/libo2info.c                 |  203 ++++++++++++++++++++++++++++++++
 o2info/libo2info.h                 |   24 ++++
 o2info/o2info.1.in                 |   52 ++++++++-
 o2info/o2info.c                    |   15 +++
 o2info/operations.c                |  225 ++++++++++++++++++++++++++++++++++++
 6 files changed, 541 insertions(+), 1 deletions(-)

diff --git a/include/ocfs2-kernel/ocfs2_ioctl.h b/include/ocfs2-kernel/ocfs2_ioctl.h
index 57aa718..05ea113 100644
--- a/include/ocfs2-kernel/ocfs2_ioctl.h
+++ b/include/ocfs2-kernel/ocfs2_ioctl.h
@@ -152,6 +152,28 @@ struct ocfs2_info_freeinode {
 	__u32 ifi_pad;
 };
 
+#define OCFS2_INFO_MAX_HIST     (32)
+
+struct ocfs2_info_freefrag {
+	struct ocfs2_info_request iff_req;
+	struct ocfs2_info_freefrag_stats { /* (out) */
+		struct ocfs2_info_free_chunk_list {
+			__u32 fc_chunks[OCFS2_INFO_MAX_HIST];
+			__u32 fc_clusters[OCFS2_INFO_MAX_HIST];
+		} ffs_fc_hist;
+		__u32 ffs_clusters;
+		__u32 ffs_free_clusters;
+		__u32 ffs_free_chunks;
+		__u32 ffs_free_chunks_real;
+		__u32 ffs_min; /* Minimum free chunksize in clusters */
+		__u32 ffs_max;
+		__u32 ffs_avg;
+		__u32 ffs_pad;
+	} iff_ffs;
+	__u32 iff_chunksize; /* chunksize in clusters(in) */
+	__u32 iff_pad;
+};
+
 /* Codes for ocfs2_info_request */
 enum ocfs2_info_type {
 	OCFS2_INFO_CLUSTERSIZE = 1,
@@ -162,6 +184,7 @@ enum ocfs2_info_type {
 	OCFS2_INFO_FS_FEATURES,
 	OCFS2_INFO_JOURNAL_SIZE,
 	OCFS2_INFO_FREEINODE,
+	OCFS2_INFO_FREEFRAG,
 	OCFS2_INFO_NUM_TYPES
 };
 
diff --git a/o2info/libo2info.c b/o2info/libo2info.c
index 912c938..d716151 100644
--- a/o2info/libo2info.c
+++ b/o2info/libo2info.c
@@ -153,3 +153,206 @@ out:
 
 	return ret;
 }
+
+static int ul_log2(unsigned long arg)
+{
+	unsigned int i = 0;
+
+	arg >>= 1;
+	while (arg) {
+		i++;
+		arg >>= 1;
+	}
+
+	return i;
+}
+
+static void o2info_update_freefrag_stats(struct o2info_freefrag *off,
+					 unsigned int chunksize)
+{
+	int index;
+
+	index = ul_log2(chunksize);
+	if (index >= OCFS2_INFO_MAX_HIST)
+		index = OCFS2_INFO_MAX_HIST - 1;
+
+	off->histogram.fc_chunks[index]++;
+	off->histogram.fc_clusters[index] += chunksize;
+
+	if (chunksize > off->max)
+		off->max = chunksize;
+
+	if (chunksize < off->min)
+		off->min = chunksize;
+
+	off->avg += chunksize;
+	off->free_chunks_real++;
+}
+
+static int o2info_scan_global_bitmap_chain(ocfs2_filesys *fs,
+					   struct ocfs2_chain_rec *rec,
+					   struct o2info_freefrag *off)
+{
+	int ret = 0, used;
+	uint64_t blkno;
+
+	char *block = NULL;
+	struct ocfs2_group_desc *bg = NULL;
+
+	unsigned int max_bits, num_clusters;
+	unsigned int offset = 0, cluster, chunk;
+	unsigned int chunk_free, last_chunksize = 0;
+
+	if (!rec->c_free)
+		goto out;
+
+	ret = ocfs2_malloc_block(fs->fs_io, &block);
+	if (ret) {
+		tcom_err(ret, "while allocating block buffer");
+		goto out;
+	}
+
+	do {
+		if (!bg)
+			blkno = rec->c_blkno;
+		else
+			blkno = bg->bg_next_group;
+
+		ret = ocfs2_read_blocks(fs, blkno, 1, block);
+		if (ret < 0) {
+			tcom_err(ret, "while reading group descriptor "
+				 "%"PRIu64" for stats", blkno);
+			goto out;
+		}
+
+		bg = (struct ocfs2_group_desc *)block;
+
+		if (!bg->bg_free_bits_count)
+			continue;
+
+		max_bits = bg->bg_bits;
+		offset = 0;
+
+		for (chunk = 0; chunk < off->chunks_in_group; chunk++) {
+
+			/*
+			 * last chunk may be not an entire one.
+			 */
+			if ((offset + off->clusters_in_chunk) > max_bits)
+				num_clusters = max_bits - offset;
+			else
+				num_clusters = off->clusters_in_chunk;
+
+			chunk_free = 0;
+
+			for (cluster = 0; cluster < num_clusters; cluster++) {
+				used = ocfs2_test_bit(offset,
+						(unsigned long *)bg->bg_bitmap);
+				if (!used) {
+					last_chunksize++;
+					chunk_free++;
+				}
+
+				if (used && (last_chunksize)) {
+					o2info_update_freefrag_stats(off,
+								last_chunksize);
+					last_chunksize = 0;
+				}
+
+				offset++;
+			}
+
+			if (chunk_free == off->clusters_in_chunk)
+				off->free_chunks++;
+		}
+
+		/*
+		 * need to update the info of last free chunk.
+		 */
+		if (last_chunksize)
+			o2info_update_freefrag_stats(off, last_chunksize);
+
+	} while (bg->bg_next_group);
+
+out:
+	if (block)
+		ocfs2_free(&block);
+
+	return ret;
+}
+
+static int o2info_scan_global_bitmap(ocfs2_filesys *fs,
+				     struct ocfs2_chain_list *cl,
+				     struct o2info_freefrag *off)
+{
+	int ret = 0, i;
+	struct ocfs2_chain_rec *rec = NULL;
+
+	off->chunks_in_group = (cl->cl_cpg / off->clusters_in_chunk) + 1;
+
+	for (i = 0; i < cl->cl_next_free_rec; i++) {
+		rec = &(cl->cl_recs[i]);
+		ret = o2info_scan_global_bitmap_chain(fs, rec, off);
+		if (ret)
+			return ret;
+	}
+
+	return ret;
+}
+
+int o2info_get_freefrag(ocfs2_filesys *fs, struct o2info_freefrag *off)
+{
+	int ret = 0;
+	char *block = NULL;
+
+	uint64_t gb_inode;
+	struct ocfs2_dinode *gb_di = NULL;
+	struct ocfs2_chain_list *cl = NULL;
+
+	ret = ocfs2_malloc_block(fs->fs_io, &block);
+	if (ret) {
+		tcom_err(ret, "while allocating block buffer");
+		goto out;
+	}
+
+	gb_di = (struct ocfs2_dinode *)block;
+
+	ret = ocfs2_lookup_system_inode(fs, GLOBAL_BITMAP_SYSTEM_INODE,
+					0, &gb_inode);
+	if (ret) {
+		tcom_err(ret, "while looking up the global bitmap inode");
+		goto out;
+	}
+
+	ret = ocfs2_read_inode(fs, gb_inode, (char *)gb_di);
+	if (ret) {
+		tcom_err(ret, "reading global_bitmap inode "
+			 "%"PRIu64" for stats", gb_inode);
+		goto out;
+	}
+
+	off->clusters = gb_di->id1.bitmap1.i_total;
+	off->free_clusters = gb_di->id1.bitmap1.i_total -
+				gb_di->id1.bitmap1.i_used;
+
+	off->total_chunks = (off->clusters + off->clusters_in_chunk) >>
+				(off->chunkbits - off->clustersize_bits);
+	cl = &(gb_di->id2.i_chain);
+
+	ret = o2info_scan_global_bitmap(fs, cl, off);
+	if (ret)
+		goto out;
+
+	if (off->free_chunks_real) {
+		off->min <<= (off->clustersize_bits - 10);
+		off->max <<= (off->clustersize_bits - 10);
+		off->avg /= off->free_chunks_real;
+		off->avg <<= (off->clustersize_bits - 10);
+	}
+
+out:
+	if (block)
+		ocfs2_free(&block);
+
+	return ret;
+}
diff --git a/o2info/libo2info.h b/o2info/libo2info.h
index 08a0500..e8a8616 100644
--- a/o2info/libo2info.h
+++ b/o2info/libo2info.h
@@ -50,9 +50,33 @@ struct o2info_freeinode {
 	struct o2info_local_freeinode fi[OCFS2_MAX_SLOTS];
 };
 
+#define DEFAULT_CHUNKSIZE (1024*1024)
+
+struct free_chunk_histogram {
+	uint32_t fc_chunks[OCFS2_INFO_MAX_HIST];
+	uint32_t fc_clusters[OCFS2_INFO_MAX_HIST];
+};
+
+struct o2info_freefrag {
+	unsigned long chunkbytes;
+	uint32_t clusters;
+	uint32_t free_clusters;
+	uint32_t total_chunks;
+	uint32_t free_chunks;
+	uint32_t free_chunks_real;
+	int clustersize_bits;
+	int blksize_bits;
+	int chunkbits;
+	uint32_t clusters_in_chunk;
+	uint32_t chunks_in_group;
+	uint32_t min, max, avg; /* chunksize in clusters */
+	struct free_chunk_histogram histogram;
+};
+
 int o2info_get_fs_features(ocfs2_filesys *fs, struct o2info_fs_features *ofs);
 int o2info_get_volinfo(ocfs2_filesys *fs, struct o2info_volinfo *vf);
 int o2info_get_mkfs(ocfs2_filesys *fs, struct o2info_mkfs *oms);
 int o2info_get_freeinode(ocfs2_filesys *fs, struct o2info_freeinode *ofi);
+int o2info_get_freefrag(ocfs2_filesys *fs, struct o2info_freefrag *off);
 
 #endif
diff --git a/o2info/o2info.1.in b/o2info/o2info.1.in
index 2c77457..66e2483 100644
--- a/o2info/o2info.1.in
+++ b/o2info/o2info.1.in
@@ -2,7 +2,7 @@
 .SH "NAME"
 o2info \- Dump \fIOCFS2\fR file system information on disk.
 .SH "SYNOPSIS"
-\fBo2info\fR [\fB\-C|\-\-cluster\-coherent\fR] [\fB\-\-fs\-features\fR] [\fB\-\-volinfo\fR] [\fB\-\-mkfs\fR] [\fB\-\-freeinode\fR] <\fBdevice or file\fR>
+\fBo2info\fR [\fB\-C|\-\-cluster\-coherent\fR] [\fB\-\-fs\-features\fR] [\fB\-\-volinfo\fR] [\fB\-\-mkfs\fR] [\fB\-\-freeinode\fR] [\fB\-\-freefrag\fR \fIchunksize\fR] <\fBdevice or file\fR>
 
 .SH "DESCRIPTION"
 .PP
@@ -31,6 +31,10 @@ Dump an original format of mkfs.ocfs's arguments, importantly, which can be used
 Display free inode space for all nodes in \fIOCFS2\fR cluster.
 
 .TP
+\fB\-\-freefrag\fR \fIchunksize\fR
+Report free space fragmentation on \fIOCFS2\fR filesystems, it prints how many chunks of chunksize are available as contiguous and aligned free space by scanning the global allocation bitmap. In the meantime, the percentage of contiguous free clusters of size and of alignment chunksize is reported, what's more, it also displays the minimum/maximum/average free chunk size in filesystem, along with a histogram of all free chunks. \fBNOTE\fR, chunksize specified must be a power of two and be equal to or larger than filesystem's clustersize.
+
+.TP
 \fB\-V, \-\-version\fR
 Show version and exit.
 
@@ -57,6 +61,52 @@ Cluster Size: 4096
 .br
     Features: unwritten
 
+[oracle at node1 ~]# o2info --freefrag 64 /storage/testfile      # For none-privileged users.
+.br
+[root at node1 ~]# o2info --freefrag 64 /dev/sda1
+.br
+Blocksize: 4096 bytes
+.br
+Clustersize: 32768 bytes
+.br
+Total clusters: 915704
+.br
+Free clusters: 891687 (97.4%)
+.br
+
+Min. free extent: 32 KB
+.br
+Max. free extent: 1032160 KB
+.br
+Avg. free extent: 663552 KB
+.br
+
+Chunksize: 131072 bytes (4 clusters)
+.br
+Total chunks: 228927
+.br
+Free chunks: 222888 (97.4%)
+.br
+
+HISTOGRAM OF FREE EXTENT SIZES:
+.br
+Extent Size Range :  Free extents  Free Clusters  Percent
+.br
+   32K...   64K-  :             1             1    0.00%
+.br
+    1M...    2M-  :             9           288    0.03%
+.br
+    8M...   16M-  :             2           831    0.09%
+.br
+   32M...   64M-  :             1          2047    0.23%
+.br
+  128M...  256M-  :             1          8191    0.92%
+.br
+  256M...  512M-  :             2         21706    2.43%
+.br
+  512M... 1024M-  :            27        858623   96.29%
+.br
+
 
 .SH "SEE ALSO"
 .BR debugfs.ocfs2(8)
diff --git a/o2info/o2info.c b/o2info/o2info.c
index 62a92f1..be9bc9b 100644
--- a/o2info/o2info.c
+++ b/o2info/o2info.c
@@ -38,6 +38,7 @@ extern struct o2info_operation fs_features_op;
 extern struct o2info_operation volinfo_op;
 extern struct o2info_operation mkfs_op;
 extern struct o2info_operation freeinode_op;
+extern struct o2info_operation freefrag_op;
 
 static LIST_HEAD(o2info_op_task_list);
 static int o2info_op_task_count;
@@ -155,6 +156,19 @@ static struct o2info_option freeinode_option = {
 	.opt_private	= NULL,
 };
 
+static struct o2info_option freefrag_option = {
+	.opt_option	= {
+		.name		= "freefrag",
+		.val		= CHAR_MAX,
+		.has_arg	= 1,
+		.flag		= NULL,
+	},
+	.opt_help	= "   --freefrag <chunksize in KB>",
+	.opt_handler	= NULL,
+	.opt_op		= &freefrag_op,
+	.opt_private	= NULL,
+};
+
 static struct o2info_option *options[] = {
 	&help_option,
 	&version_option,
@@ -163,6 +177,7 @@ static struct o2info_option *options[] = {
 	&volinfo_option,
 	&mkfs_option,
 	&freeinode_option,
+	&freefrag_option,
 	NULL,
 };
 
diff --git a/o2info/operations.c b/o2info/operations.c
index a0e85d6..a5bd692 100644
--- a/o2info/operations.c
+++ b/o2info/operations.c
@@ -612,3 +612,228 @@ static int freeinode_run(struct o2info_operation *op,
 DEFINE_O2INFO_OP(freeinode,
 		 freeinode_run,
 		 NULL);
+
+static int ul_log2(unsigned long arg)
+{
+	unsigned int i = 0;
+
+	arg >>= 1;
+	while (arg) {
+		i++;
+		arg >>= 1;
+	}
+
+	return i;
+}
+
+static int o2info_init_freefrag(struct o2info_freefrag *off,
+				struct o2info_volinfo *ovf)
+{
+	int ret = 0, i;
+
+	off->clustersize_bits = ul_log2((unsigned long)ovf->clustersize);
+	off->blksize_bits = ul_log2((unsigned long)ovf->blocksize);
+
+	if (off->chunkbytes) {
+		off->chunkbits = ul_log2(off->chunkbytes);
+		off->clusters_in_chunk = off->chunkbytes >>
+						off->clustersize_bits;
+	} else {
+		off->chunkbits = ul_log2(DEFAULT_CHUNKSIZE);
+		off->clusters_in_chunk = DEFAULT_CHUNKSIZE >>
+						off->clustersize_bits;
+	}
+
+	off->min = ~0U;
+	off->max = off->avg = 0;
+	off->free_chunks_real = 0;
+	off->free_chunks = 0;
+
+	for (i = 0; i < OCFS2_INFO_MAX_HIST; i++) {
+		off->histogram.fc_chunks[i] = 0;
+		off->histogram.fc_clusters[i] = 0;
+	}
+
+	return ret;
+}
+
+static int get_freefrag_ioctl(struct o2info_operation *op, int fd,
+			      struct o2info_freefrag *off)
+{
+	uint64_t reqs[1];
+	int ret = 0, flags = 0;
+	struct ocfs2_info info;
+	struct ocfs2_info_freefrag oiff;
+	uint32_t unknowns = 0, errors = 0, fills = 0;
+
+	if (!cluster_coherent)
+		flags |= OCFS2_INFO_FL_NON_COHERENT;
+
+	o2info_fill_request((struct ocfs2_info_request *)&oiff, sizeof(oiff),
+			    OCFS2_INFO_FREEFRAG, flags);
+	oiff.iff_chunksize = off->clusters_in_chunk;
+
+	reqs[0] = (unsigned long)&oiff;
+
+	info.oi_requests = (uint64_t)reqs;
+	info.oi_count = 1;
+
+	ret = ioctl(fd, OCFS2_IOC_INFO, &info);
+	if (ret) {
+		ret = errno;
+		o2i_error(op, "ioctl failed: %s\n", strerror(ret));
+		o2i_scan_requests(op, info, &unknowns, &errors, &fills);
+		goto out;
+	}
+
+	if (oiff.iff_req.ir_flags & OCFS2_INFO_FL_FILLED) {
+		off->clusters = oiff.iff_ffs.ffs_clusters;
+		off->free_clusters = oiff.iff_ffs.ffs_free_clusters;
+		off->free_chunks = oiff.iff_ffs.ffs_free_chunks;
+		off->free_chunks_real = oiff.iff_ffs.ffs_free_chunks_real;
+		if (off->free_chunks_real) {
+			off->min = oiff.iff_ffs.ffs_min <<
+					(off->clustersize_bits - 10);
+			off->max = oiff.iff_ffs.ffs_max <<
+					(off->clustersize_bits - 10);
+			off->avg = oiff.iff_ffs.ffs_avg <<
+					(off->clustersize_bits - 10);
+		} else
+			off->min = 0;
+
+		memcpy(&(off->histogram), &(oiff.iff_ffs.ffs_fc_hist),
+		       sizeof(struct free_chunk_histogram));
+	}
+
+	off->total_chunks = (off->clusters + off->clusters_in_chunk) >>
+				(off->chunkbits - off->clustersize_bits);
+out:
+	return ret;
+}
+
+static void o2info_report_freefrag(struct o2info_freefrag *off)
+{
+	char *unitp = "KMGTPEZY";
+	char end_str[32];
+
+	int i, unit = 10; /* Begin from KB in terms of 10 bits */
+	unsigned int start = 0, end;
+
+	fprintf(stdout, "Blocksize: %u bytes\n", 1 << off->blksize_bits);
+	fprintf(stdout, "Clustersize: %u bytes\n", 1 << off->clustersize_bits);
+	fprintf(stdout, "Total clusters: %llu\nFree clusters: %u (%0.1f%%)\n",
+		off->clusters, off->free_clusters,
+		(double)off->free_clusters * 100 / off->clusters);
+
+	fprintf(stdout, "\nMin. free extent: %u KB \nMax. free extent: %u KB\n"
+		"Avg. free extent: %u KB\n", off->min, off->max, off->avg);
+
+	if (off->chunkbytes) {
+		fprintf(stdout, "\nChunksize: %lu bytes (%u clusters)\n",
+			off->chunkbytes, off->clusters_in_chunk);
+
+		fprintf(stdout, "Total chunks: %u\nFree chunks: %u (%0.1f%%)\n",
+			off->total_chunks, off->free_chunks,
+			(double)off->free_chunks * 100 / off->total_chunks);
+	}
+
+	fprintf(stdout, "\nHISTOGRAM OF FREE EXTENT SIZES:\n");
+	fprintf(stdout, "%s :  %12s  %12s  %7s\n", "Extent Size Range",
+		"Free extents", "Free Clusters", "Percent");
+
+	/*
+	 * We probably need to start with 'M' when clustersize = 1M.
+	 */
+	start = 1 << (off->clustersize_bits - unit);
+	if (start == (1 << 10)) {
+		unit += 10;
+		unitp++;
+	}
+
+	for (i = 0; i < OCFS2_INFO_MAX_HIST; i++) {
+
+		start = 1 << (i + off->clustersize_bits - unit);
+		end = start << 1;
+
+		if (off->histogram.fc_chunks[i] != 0) {
+			snprintf(end_str, 32,  "%5lu%c-", end, *unitp);
+			if (i == (OCFS2_INFO_MAX_HIST - 1))
+				strcpy(end_str, "max ");
+			fprintf(stdout, "%5u%c...%7s  :  "
+				"%12u  %12u  %6.2f%%\n",
+				start, *unitp, end_str,
+				off->histogram.fc_chunks[i],
+				off->histogram.fc_clusters[i],
+				(double)off->histogram.fc_clusters[i] * 100 /
+				off->free_clusters);
+		}
+
+		start = end;
+		if (start == (1 << 10)) {
+			unit += 10;
+			unitp++;
+			if (!(*unitp))
+				break;
+		}
+	}
+}
+
+static int freefrag_run(struct o2info_operation *op,
+			struct o2info_method *om,
+			void *arg)
+{
+	int ret = 0;
+	static struct o2info_freefrag off;
+	static struct o2info_volinfo ovf;
+	char *end;
+
+	off.chunkbytes = strtoull((char *)arg, &end, 0);
+	if (*end != '\0') {
+		o2i_error(op, "bad chunk size '%s'\n", (char *)arg);
+		ret = -1;
+		print_usage(ret);
+	}
+
+	if (off.chunkbytes & (off.chunkbytes - 1)) {
+		o2i_error(op, "chunksize needs to be power of 2\n");
+		ret = -1;
+		print_usage(ret);
+	}
+
+	off.chunkbytes *= 1024;
+
+	if (om->om_method == O2INFO_USE_IOCTL)
+		ret = get_volinfo_ioctl(op, om->om_fd, &ovf);
+	else
+		ret = o2info_get_volinfo(om->om_fs, &ovf);
+	if (ret)
+		return -1;
+
+	if (off.chunkbytes &&
+	    (off.chunkbytes < ovf.clustersize)) {
+		o2i_error(op, "chunksize should be greater than or equal to "
+			  "filesystem cluster size\n");
+		ret = -1;
+		print_usage(ret);
+	}
+
+	ret = o2info_init_freefrag(&off, &ovf);
+	if (ret)
+		return -1;
+
+	if (om->om_method == O2INFO_USE_IOCTL)
+		ret = get_freefrag_ioctl(op, om->om_fd, &off);
+	else
+		ret = o2info_get_freefrag(om->om_fs, &off);
+
+	if (ret)
+		return ret;
+
+	o2info_report_freefrag(&off);
+
+	return ret;
+}
+
+DEFINE_O2INFO_OP(freefrag,
+		 freefrag_run,
+		 NULL);
-- 
1.5.5




More information about the Ocfs2-tools-devel mailing list