[Ocfs2-tools-devel] [PATCH 13/22] tunefs rework: Add op_resize_volume.c

Mark Fasheh mfasheh at suse.com
Wed Aug 6 18:45:04 PDT 2008


On Fri, Jul 25, 2008 at 07:07:51PM -0700, Joel Becker wrote:
> The resize_volume operation grows an ocfs2 filesystem.  If no size is
> specified, it will grow the filesystem to the limits of the LUN.  This
> operation can be performed online.
> 
> This is the tunefs option '-S'.
> 
> Signed-off-by: Joel Becker <joel.becker at oracle.com>
> ---
>  tunefs.ocfs2/op_resize_volume.c |  803 +++++++++++++++++++++++++++++++++++++++
>  1 files changed, 803 insertions(+), 0 deletions(-)
>  create mode 100644 tunefs.ocfs2/op_resize_volume.c
> 
> diff --git a/tunefs.ocfs2/op_resize_volume.c b/tunefs.ocfs2/op_resize_volume.c
> new file mode 100644
> index 0000000..e8cbae1
> --- /dev/null
> +++ b/tunefs.ocfs2/op_resize_volume.c
> @@ -0,0 +1,803 @@
> +/* -*- mode: c; c-basic-offset: 8; -*-
> + * vim: noexpandtab sw=8 ts=8 sts=0:
> + *
> + * op_resize_volume.c
> + *
> + * ocfs2 tune utility to resize the volume.
> + *
> + * Copyright (C) 2004, 2008 Oracle.  All rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public
> + * License version 2 as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * General Public License for more details.
> + */
> +
> +#define _LARGEFILE64_SOURCE
> +
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <ctype.h>
> +#include <inttypes.h>
> +#include <sys/ioctl.h>
> +
> +#include "o2dlm/o2dlm.h"
> +#include "ocfs2/ocfs2.h"
> +#include "ocfs2/bitops.h"
> +
> +#include "libocfs2ne.h"
> +
> +
> +/*
> + * This lock name is specific and only used in online resize;
> + */
> +static char lock_name[OCFS2_LOCK_ID_MAX_LEN] = "tunefs-online-resize-lock";
> +
> +
> +/*
> + * We can handle a new size specified in bytes, blocks, or clusters.
> + * However, we don't have an open filesystem at the time we parse the
> + * new size.  Thus, we store off the choice in struct resize_specs until
> + * we're ready to go.
> + */
> +enum resize_units {
> +	RESIZE_BYTES	= 0,
> +	RESIZE_BLOCKS	= 1,
> +	RESIZE_CLUSTERS	= 2,
> +};
> +static char *resize_unit_strings[] = {
> +	[RESIZE_BYTES]		= "bytes:",
> +	[RESIZE_BLOCKS]		= "blocks:",
> +	[RESIZE_CLUSTERS]	= "clusters:",
> +};
> +struct resize_specs {
> +	enum resize_units	rs_unit;
> +	uint64_t		rs_size;
> +};
> +
> +static errcode_t online_resize_lock(ocfs2_filesys *fs)
> +{
> +	return tunefs_dlm_lock(fs, lock_name, O2DLM_LEVEL_EXMODE,
> +			       O2DLM_TRYLOCK);
> +}
> +
> +static errcode_t online_resize_unlock(ocfs2_filesys *fs)
> +{
> +	return tunefs_dlm_unlock(fs, lock_name);
> +}
> +
> +static errcode_t reserve_cluster(ocfs2_filesys *fs,
> +				 uint16_t cl_cpg,
> +				 uint32_t cluster,
> +				 struct ocfs2_group_desc *gd)
> +{
> +	errcode_t ret = 0;
> +	unsigned char *bitmap = gd->bg_bitmap;
> +
> +	ret = ocfs2_set_bit(cluster % cl_cpg, bitmap);
> +	if (ret != 0) {
> +		errorf("Unable to allocate the backup superblock"
> +			"in cluster %u\n",
> +			cluster);
> +		goto out;
> +	}
> +
> +	gd->bg_free_bits_count--;
> +out:
> +	return ret;
> +}
> +
> +/* Reserve the backup superblocks which exist in the new added groups. */
> +static errcode_t reserve_backup_in_group(ocfs2_filesys *fs,
> +					 struct ocfs2_dinode *di,
> +					 struct ocfs2_group_desc *gd,
> +					 uint16_t *backups)
> +{
> +	errcode_t ret = 0;
> +	int numsb, i;
> +	uint64_t blkno, gd_blkno = gd->bg_blkno;
> +	uint64_t blocks[OCFS2_MAX_BACKUP_SUPERBLOCKS];
> +	uint16_t cl_cpg = di->id2.i_chain.cl_cpg;
> +	uint32_t cluster;
> +
> +	*backups = 0;
> +
> +	if (!OCFS2_HAS_COMPAT_FEATURE(OCFS2_RAW_SB(fs->fs_super),
> +				      OCFS2_FEATURE_COMPAT_BACKUP_SB))
> +		goto out;
> +
> +	numsb = ocfs2_get_backup_super_offsets(fs, blocks, ARRAY_SIZE(blocks));
> +	if (numsb <= 0)
> +		goto out;
> +
> +	for (i = 0; i < numsb; i++) {
> +		cluster = ocfs2_blocks_to_clusters(fs, blocks[i]);
> +		blkno = ocfs2_which_cluster_group(fs, cl_cpg, cluster);
> +		if (blkno < gd_blkno)
> +			continue;
> +		else if (blkno > gd_blkno)
> +			break;
> +
> +		ret = reserve_cluster(fs, cl_cpg, cluster, gd);
> +		if (ret)
> +			goto out;
> +		(*backups)++;
> +	}
> +
> +out:
> +	return ret;
> +}
> +
> +static errcode_t online_resize_group_add(ocfs2_filesys *fs,
> +					 struct ocfs2_dinode *di,
> +					 uint64_t gd_blkno,
> +					 char *gd_buf,
> +					 uint16_t chain,
> +					 uint32_t new_clusters)
> +{
> +	errcode_t ret;
> +	uint16_t backups = 0, cl_bpc = di->id2.i_chain.cl_bpc;
> +	struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)gd_buf;
> +	struct ocfs2_new_group_input input;
> +
> +	ret = reserve_backup_in_group(fs, di, gd, &backups);
> +	if (ret)
> +		goto out;
> +
> +	ret = ocfs2_write_group_desc(fs, gd_blkno, gd_buf);
> +	if (ret)
> +		goto out;
> +
> +	/*
> +	 * Initialize the input data and call online resize procedure.
> +	 * free clusters is calculated accordingly and checked in the kernel.
> +	 */
> +	memset(&input, 0, sizeof(input));
> +
> +	input.group = gd_blkno;
> +	input.clusters = new_clusters;
> +	input.chain = chain;
> +	input.frees = gd->bg_bits/cl_bpc - 1 - backups;
> +
> +	ret = tunefs_online_ioctl(fs, OCFS2_IOC_GROUP_ADD, &input);
> +	if (ret)
> +		tcom_err(ret,
> +			 "while asking the kernel to link the group at "
> +			 "block %"PRIu64" to chain %u",
> +			 gd_blkno, chain);
> +out:
> +	return ret;
> +}
> +
> +/*
> + * Initalize the group descriptors in the new added cluster range.
> + *
> + * di: global_bitmap's inode info.
> + * first_new_cluster: the start cluster offset.
> + * num_new_cluster: cluster range length.
> + * chain: the chain position of the last group descriptor. the new
> + *        group will be added to the chain after this one.
> + * total_bits and used_bits will be added according to the new groups.
> + */
> +static errcode_t init_new_gd(ocfs2_filesys *fs,
> +			     struct ocfs2_dinode *di,
> +			     uint32_t first_new_cluster,
> +			     uint32_t num_new_clusters,
> +			     uint16_t chain,
> +			     uint32_t *total_bits,
> +			     uint32_t *used_bits,
> +			     int online)
> +{
> +	errcode_t ret = 0;
> +	uint32_t cluster_chunk;
> +	uint64_t gd_blkno = 0;
> +	struct ocfs2_chain_list *cl = &di->id2.i_chain;
> +	struct ocfs2_chain_rec *cr = NULL;
> +	struct ocfs2_group_desc *gd = NULL;
> +	char *zero_buf = NULL;
> +	char *gd_buf = NULL;
> +
> +	ret = ocfs2_malloc_block(fs->fs_io, &gd_buf);
> +	if (ret) {
> +		tcom_err(ret, "while allocating a group descriptor buffer");
> +		goto bail;
> +	}
> +
> +	ret = ocfs2_malloc_blocks(fs->fs_io, ocfs2_clusters_to_blocks(fs, 1),
> +				  &zero_buf);
> +	if (ret) {
> +		tcom_err(ret, "while allocating a zeroing buffer");
> +		goto bail;
> +	}
> +
> +	memset(zero_buf, 0, fs->fs_clustersize);
> +	gd = (struct ocfs2_group_desc *)gd_buf;
> +
> +	while(num_new_clusters) {
> +		gd_blkno = ocfs2_which_cluster_group(fs, cl->cl_cpg,
> +						     first_new_cluster);
> +		cluster_chunk = ocfs2_min(num_new_clusters,
> +					  (uint32_t)cl->cl_cpg);
> +		num_new_clusters -= cluster_chunk;
> +		first_new_cluster += cluster_chunk;
> +
> +		if (++chain >= cl->cl_count)
> +			chain = 0;
> +
> +		ocfs2_init_group_desc(fs, gd, gd_blkno,
> +				      fs->fs_super->i_fs_generation, di->i_blkno,
> +				      (cluster_chunk *cl->cl_bpc), chain);
> +
> +		/* Add group to chain */
> +		cr = &(cl->cl_recs[chain]);
> +		if (chain >= cl->cl_next_free_rec) {
> +			cl->cl_next_free_rec++;
> +			cr->c_free = 0;
> +			cr->c_total = 0;
> +			cr->c_blkno = 0;
> +		}
> +
> +		gd->bg_next_group = cr->c_blkno;
> +		cr->c_blkno = gd_blkno;
> +		cr->c_free += gd->bg_free_bits_count;
> +		cr->c_total += gd->bg_bits;
> +
> +		*used_bits += (gd->bg_bits - gd->bg_free_bits_count);
> +		*total_bits += gd->bg_bits;
> +
> +		fs->fs_clusters += cluster_chunk;
> +		fs->fs_blocks += ocfs2_clusters_to_blocks(fs, cluster_chunk);
> +
> +		/* Initialize the first cluster in the group */
> +		ret = io_write_block(fs->fs_io, gd_blkno,
> +				     ocfs2_clusters_to_blocks(fs, 1), zero_buf);
> +		if (ret) {
> +			tcom_err(ret,
> +				 "while initializing the cluster group "
> +				 "starting at block %"PRIu64,
> +				 gd_blkno);
> +			goto bail;
> +		}
> +
> +		if (online) {
> +			ret = online_resize_group_add(fs, di, gd_blkno, gd_buf,
> +						      chain, cluster_chunk);
> +			if (ret) {
> +				tcom_err(ret,
> +					 "while trying to add the cluster "
> +					 "group at block %"PRIu64,
> +					 gd_blkno);
> +				goto bail;
> +			}
> +		} else {
> +			/* write a new group descriptor */
> +			ret = ocfs2_write_group_desc(fs, gd_blkno, gd_buf);
> +			if (ret) {
> +				tcom_err(ret,
> +					 "while writing the new group "
> +					 "descriptor at block %"PRIu64,
> +					 gd_blkno);
> +				goto bail;
> +			}
> +		}
> +	}
> +
> +bail:
> +	if (zero_buf)
> +		ocfs2_free(&zero_buf);
> +	if (gd_buf)
> +		ocfs2_free(&gd_buf);
> +	return ret;
> +}
> +
> +static errcode_t update_global_bitmap(ocfs2_filesys *fs,
> +				      struct ocfs2_dinode *di,
> +				      struct ocfs2_group_desc *lgd,
> +				      int flush_lgd)
> +{
> +	errcode_t ret = 0;
> +
> +	tunefs_block_signals();
> +	/* Flush that last group descriptor we updated before the new ones */
> +	if (flush_lgd) {
> +		ret = ocfs2_write_group_desc(fs, lgd->bg_blkno, (char *)lgd);
> +		if (ret) {
> +			tcom_err(ret,
> +				 "while flushing the former tail group "
> +				 "descriptor to block %"PRIu64,
> +				 (uint64_t)lgd->bg_blkno);
> +			goto bail;
> +		}
> +	}
> +
> +	/* write the global bitmap inode */
> +	ret = ocfs2_write_inode(fs, di->i_blkno, (char *)di);
> +	if (ret)
> +		tcom_err(ret,
> +			 "while writing the global bitmap inode to block "
> +			 "%"PRIu64,
> +			 (uint64_t)di->i_blkno);
> +
> +bail:
> +	tunefs_unblock_signals();
> +	return ret;
> +}
> +
> +static errcode_t run_resize(ocfs2_filesys *fs, uint32_t total_clusters,
> +			    int online)
> +{
> +	errcode_t ret = 0;
> +	struct ocfs2_dinode *di;
> +	uint64_t bm_blkno = 0;
> +	uint64_t lgd_blkno = 0;
> +	char *in_buf = NULL;
> +	char *lgd_buf = NULL;
> +	struct ocfs2_chain_list *cl;
> +	struct ocfs2_chain_rec *cr;
> +	struct ocfs2_group_desc *gd;
> +	uint32_t cluster_chunk;
> +	uint32_t num_new_clusters, save_new_clusters;
> +	uint32_t first_new_cluster;
> +	uint16_t chain;
> +	uint32_t used_bits = 0;
> +	uint32_t total_bits = 0;
> +	uint32_t num_bits;
> +	int flush_lgd = 0, new_clusters;
> +
> +	ret = ocfs2_malloc_block(fs->fs_io, &in_buf);
> +	if (ret) {
> +		tcom_err(ret, "while allocating an inode buffer during");
> +		goto bail;
> +	}
> +
> +	ret = ocfs2_malloc_block(fs->fs_io, &lgd_buf);
> +	if (ret) {
> +		tcom_err(ret, "while allocating a group descriptor buffer");
> +		goto bail;
> +	}
> +
> +	/* read global bitmap */
> +	ret = ocfs2_lookup_system_inode(fs, GLOBAL_BITMAP_SYSTEM_INODE, 0,
> +					&bm_blkno);
> +	if (ret) {
> +		tcom_err(ret, "while looking up the global bitmap inode");
> +		goto bail;
> +	}
> +
> +	ret = ocfs2_read_inode(fs, bm_blkno, in_buf);
> +	if (ret) {
> +		tcom_err(ret,
> +			 "while reading the global bitmap inode from block "
> +			 "%"PRIu64,
> +			 bm_blkno);
> +		goto bail;
> +	}
> +
> +	di = (struct ocfs2_dinode *)in_buf;
> +	cl = &(di->id2.i_chain);
> +
> +	first_new_cluster = di->i_clusters;
> +	save_new_clusters = num_new_clusters =
> +		total_clusters - di->i_clusters;
> +
> +	/* Find the blknum of the last cluster group */
> +	lgd_blkno = ocfs2_which_cluster_group(fs, cl->cl_cpg, first_new_cluster - 1);
> +
> +	ret = ocfs2_read_group_desc(fs, lgd_blkno, lgd_buf);
> +	if (ret) {
> +		tcom_err(ret,
> +			 "while reading the tail group descriptor from "
> +			 "block %"PRIu64,
> +			 lgd_blkno);
> +		goto bail;
> +	}
> +
> +	gd = (struct ocfs2_group_desc *)lgd_buf;
> +
> +	/* If only one cluster group then see if we need to adjust up cl_cpg */
> +	if (cl->cl_next_free_rec == 1) {
> +		if (cl->cl_cpg < (8 * gd->bg_size))
> +			cl->cl_cpg = 8 * gd->bg_size;
> +	}
> +
> +	chain = gd->bg_chain;
> +
> +	/*
> +	 * If possible round off the last group to cpg.
> +	 *
> +	 * For online resize, we set it up like offline resize,
> +	 * but have the kernel do the update.
> +	 */
> +	cluster_chunk = ocfs2_min(num_new_clusters,
> +				  (uint32_t)(cl->cl_cpg -
> +					     (gd->bg_bits/cl->cl_bpc)));
> +	if (cluster_chunk) {
> +		num_new_clusters -= cluster_chunk;
> +		first_new_cluster += cluster_chunk;
> +
> +		num_bits = cluster_chunk * cl->cl_bpc;
> +
> +		gd->bg_bits += num_bits;
> +		gd->bg_free_bits_count += num_bits;
> +
> +		cr = &(cl->cl_recs[chain]);
> +		cr->c_total += num_bits;
> +		cr->c_free += num_bits;
> +
> +		total_bits += num_bits;
> +
> +		fs->fs_clusters += cluster_chunk;
> +		fs->fs_blocks += ocfs2_clusters_to_blocks(fs, cluster_chunk);
> +
> +		/* This cluster group block is written after the new */
> +		/* cluster groups are written to disk if we're doing
> +		 * an offline resize */
> +		flush_lgd = 1;
> +
> +		if (online) {
> +			new_clusters = cluster_chunk;
> +			ret = tunefs_online_ioctl(fs,
> +						  OCFS2_IOC_GROUP_EXTEND,
> +						  &new_clusters);
> +			if (ret < 0) {
> +				tcom_err(ret,
> +					 "while asking the kernel to "
> +					 "extend the tail group descriptor "
> +					 "by %"PRIu32" clusters",
> +					 cluster_chunk);
> +				goto bail;
> +			}
> +		}
> +	}
> +
> +	/*
> +	 * Init the new groups and write to disk
> +	 * Add these groups one by one starting from the first chain after
> +	 * the one containing the last group.
> +	 */
> +	if (num_new_clusters) {
> +		ret = init_new_gd(fs, di, first_new_cluster,
> +				  num_new_clusters, chain,
> +				  &total_bits, &used_bits, online);
> +		if (ret)
> +			goto bail;
> +	}
> +
> +	if (!online) {
> +		/* Finish up the former tail group descriptor if we're
> +		 * an offline resize */
> +		di->id1.bitmap1.i_total += total_bits;
> +		di->id1.bitmap1.i_used += used_bits;
> +
> +		di->i_clusters += save_new_clusters;
> +		di->i_size = (uint64_t) di->i_clusters * fs->fs_clustersize;
> +
> +		fs->fs_super->i_clusters = di->i_clusters;
> +
> +		ret = update_global_bitmap(fs, di, gd, flush_lgd);
> +	}
> +
> +bail:
> +	if (in_buf)
> +		ocfs2_free(&in_buf);
> +	if (lgd_buf)
> +		ocfs2_free(&lgd_buf);
> +
> +	return ret;
> +}
> +
> +static uint64_t clusters_in_bytes(ocfs2_filesys *fs, uint64_t bytes)
> +{
> +	uint64_t ret = bytes + fs->fs_clustersize - 1;
> +
> +	if (ret < bytes) /* deal with wrapping */
> +		return UINT64_MAX;
> +
> +	return ret >> OCFS2_RAW_SB(fs->fs_super)->s_clustersize_bits;
> +}
> +
> +static uint64_t clusters_to_bytes(ocfs2_filesys *fs, uint64_t clusters)
> +{
> +	uint64_t ret =
> +		clusters << OCFS2_RAW_SB(fs->fs_super)->s_clustersize_bits;
> +
> +	if (ret < clusters)
> +		ret = UINT64_MAX;
> +
> +	return ret;
> +}

Btw, these probably should go somewhere in libocfs2, right? I'm actually
really surprised we don't have them there already.

Anyway, I'm guess you probably got this from the old tunefs code, so I don't
see this as something that should stop this patch from being committed
as-is, but maybe a follow-up could move them?
	--Mark

--
Mark Fasheh



More information about the Ocfs2-tools-devel mailing list