[Ocfs2-devel] [PATCH 6/6] Add online resize in tunefs.ocfs2,take 1
Tao Ma
tao.ma at oracle.com
Fri Nov 16 00:48:04 PST 2007
During online resize, we prepare all the new group descriptors in
user space. For the update of global_bitmap, super block and all
the backups, they are handled in the kernel.
Signed-off-by: Tao Ma <tao.ma at oracle.com>
---
libocfs2/include/ocfs2_fs.h | 4 +
tunefs.ocfs2/resize.c | 255 ++++++++++++++++++++++++++++++++++++++++---
tunefs.ocfs2/tunefs.c | 53 ++++++++-
tunefs.ocfs2/tunefs.h | 6 +
4 files changed, 295 insertions(+), 23 deletions(-)
60516ce8535f9cd6c9f36ace7056e5a24fab5e1b
diff --git a/libocfs2/include/ocfs2_fs.h b/libocfs2/include/ocfs2_fs.h
index cfaf28f..5694114 100644
--- a/libocfs2/include/ocfs2_fs.h
+++ b/libocfs2/include/ocfs2_fs.h
@@ -27,7 +27,7 @@ #define _OCFS2_FS_H
/* Version */
#define OCFS2_MAJOR_REV_LEVEL 0
-#define OCFS2_MINOR_REV_LEVEL 90
+#define OCFS2_MINOR_REV_LEVEL 91
/*
* An OCFS2 volume starts this way:
@@ -230,6 +230,8 @@ #define OCFS2_IOC_FREESP64 _IOW ('X', 37
#define OCFS2_IOC_RESVSP64 _IOW ('X', 42, struct ocfs2_space_resv)
#define OCFS2_IOC_UNRESVSP64 _IOW ('X', 43, struct ocfs2_space_resv)
+#define OCFS2_IOC_FSGROWFSDATA _IOW ('X', 110, struct ocfs2_dinode)
+
/*
* Journal Flags (ocfs2_dinode.id1.journal1.i_flags)
*/
diff --git a/tunefs.ocfs2/resize.c b/tunefs.ocfs2/resize.c
index 0466d60..dfee58f 100644
--- a/tunefs.ocfs2/resize.c
+++ b/tunefs.ocfs2/resize.c
@@ -22,10 +22,74 @@
*
*/
+#include <limits.h> /* for PATH_MAX */
+#ifndef PATH_MAX
+#define PATH_MAX 8192
+#endif
+
+#include <sys/ioctl.h>
+#include <errno.h>
#include <tunefs.h>
extern ocfs2_tune_opts opts;
+/*
+ * This lock name is specific and only used in online resize;
+ */
+static char lock_name[OCFS2_LOCK_ID_MAX_LEN] = "tunefs-online-resize-lock";
+static char mnt_dir[PATH_MAX];
+
+errcode_t online_resize_lock(ocfs2_filesys *fs)
+{
+ return o2dlm_lock(fs->fs_dlm_ctxt, lock_name,
+ O2DLM_LEVEL_EXMODE, O2DLM_TRYLOCK);
+}
+
+errcode_t online_resize_unlock(ocfs2_filesys *fs)
+{
+ return o2dlm_unlock(fs->fs_dlm_ctxt, lock_name);
+}
+
+static errcode_t find_mount_point(char *device)
+{
+ int mount_flags = 0;
+ errcode_t ret;
+
+ memset(mnt_dir, 0, sizeof(mnt_dir));
+
+ ret = ocfs2_check_mount_point(device, &mount_flags,
+ mnt_dir, sizeof(mnt_dir));
+ if (ret)
+ goto out;
+
+ if ((!mount_flags & OCFS2_MF_MOUNTED) ||
+ (mount_flags & OCFS2_MF_READONLY) ||
+ (mount_flags & OCFS2_MF_SWAP)) {
+ ret = OCFS2_ET_BAD_DEVICE_NAME;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+errcode_t online_resize_check(ocfs2_filesys *fs)
+{
+ /*
+ * we don't allow online resize to be coexist with other tunefs
+ * options to keep things simple.
+ */
+ if (opts.backup_super || opts.vol_label || opts.num_slots ||
+ opts.mount || opts.jrnl_size) {
+ com_err(opts.progname, 0, "Cannot do online-resize"
+ " along with other tasks");
+ exit(1);
+ }
+
+ return find_mount_point(opts.device);
+}
+
void get_vol_size(ocfs2_filesys *fs)
{
errcode_t ret = 0;
@@ -101,7 +165,8 @@ static errcode_t init_new_gd(ocfs2_files
uint32_t num_new_clusters,
uint16_t chain,
uint32_t *total_bits,
- uint32_t *used_bits)
+ uint32_t *used_bits,
+ int online)
{
errcode_t ret = 0;
uint32_t cluster_chunk;
@@ -224,7 +289,131 @@ bail:
return ret;
}
-errcode_t update_volume_size(ocfs2_filesys *fs, int *changed)
+/*
+ * Reserve the block in the specified group and modify the
+ * group and chain information accordingly.
+ */
+static errcode_t reserve_cluster(ocfs2_filesys *fs,
+ char *progname,
+ struct ocfs2_dinode *di,
+ uint64_t gd_blkno,
+ uint32_t cluster)
+{
+ errcode_t ret;
+ uint16_t chain, cl_cpg = di->id2.i_chain.cl_cpg;
+ char *gd_buf = NULL;
+ struct ocfs2_group_desc *gd = NULL;
+ void *bitmap = NULL;
+ struct ocfs2_chain_rec *cr = NULL;
+ int retval;
+
+ ret = ocfs2_malloc_block(fs->fs_io, &gd_buf);
+ if (ret) {
+ com_err(opts.progname, ret, "while allocating a block during "
+ "reserve backup super blocks");
+ goto out;
+ }
+
+ ret = ocfs2_read_group_desc(fs, gd_blkno, gd_buf);
+ if (ret) {
+ com_err(opts.progname, ret, "while reading group descriptor "
+ "at block %"PRIu64" during volume resize", gd_blkno);
+ goto out;
+ }
+
+ gd = (struct ocfs2_group_desc *)gd_buf;
+ bitmap = gd->bg_bitmap;
+ retval = ocfs2_set_bit(cluster % cl_cpg, bitmap);
+ if (retval != 0) {
+ com_err(opts.progname, 0, "while allocating backup superblock"
+ "in cluster %u during volume resize", cluster);
+ goto out;
+ }
+
+ gd->bg_free_bits_count--;
+
+ ret = ocfs2_write_group_desc(fs, gd_blkno, gd_buf);
+ if (ret) {
+ com_err(opts.progname, ret, "while writing group descriptor "
+ "at block %"PRIu64" during volume resize", gd_blkno);
+ goto out;
+ }
+
+ /* update the dinode accordingly.*/
+ chain = gd->bg_chain;
+ cr = &(di->id2.i_chain.cl_recs[chain]);
+ cr->c_free--;
+
+ di->id1.bitmap1.i_used++;
+out:
+ if (gd_buf)
+ ocfs2_free(&gd_buf);
+ return ret;
+}
+
+/*
+ * Reserve the backup superblocks which exist in the new added groups.
+ *
+ * For those which are in the "old last" group but don't be within the
+ * "old" volume size, the kernel will reserve it.
+ */
+static errcode_t reserve_backup_supers(ocfs2_filesys *fs,
+ char *progname,
+ struct ocfs2_dinode *di,
+ uint64_t lgd_blkno)
+{
+ errcode_t ret = 0;
+ int numsb, i;
+ uint64_t gd_blkno;
+ uint64_t blocks[OCFS2_MAX_BACKUP_SUPERBLOCKS];
+ uint16_t cl_cpg = di->id2.i_chain.cl_cpg;
+ uint32_t cluster;
+
+ if (!OCFS2_HAS_COMPAT_FEATURE(OCFS2_RAW_SB(fs->fs_super),
+ OCFS2_FEATURE_COMPAT_BACKUP_SB))
+ goto out;
+
+ numsb = ocfs2_get_backup_super_offset(fs, blocks, ARRAY_SIZE(blocks));
+ if (numsb <= 0)
+ goto out;
+
+ for (i = 0; i < numsb; i++) {
+ cluster = ocfs2_blocks_to_clusters(fs, blocks[i]);
+ gd_blkno = ocfs2_which_cluster_group(fs, cl_cpg, cluster);
+ if (lgd_blkno >= gd_blkno)
+ continue;
+
+ ret = reserve_cluster(fs, progname, di, gd_blkno, cluster);
+ if (ret)
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+static errcode_t update_global_bitmap_online(char *progname,
+ struct ocfs2_dinode *di)
+{
+ int fd;
+ errcode_t ret;
+
+ fd = open(mnt_dir, O_RDONLY);
+ if (fd < 0) {
+ com_err(progname, errno,
+ "while opening mounted dir %s.\n", mnt_dir);
+ return errno;
+ }
+
+ ret = ioctl(fd, OCFS2_IOC_FSGROWFSDATA, di);
+ if (ret < 0)
+ com_err(progname, errno, "while ioctl on dir %s.\n", mnt_dir);
+
+ close(fd);
+ return ret;
+}
+
+errcode_t update_volume_size(ocfs2_filesys *fs, int *changed, int online)
{
errcode_t ret = 0;
struct ocfs2_dinode *di;
@@ -242,7 +431,7 @@ errcode_t update_volume_size(ocfs2_files
uint32_t used_bits;
uint32_t total_bits;
uint32_t num_bits;
- int flush_lgd = 0;
+ int flush_lgd = 0, i = 0;
ret = ocfs2_malloc_block(fs->fs_io, &in_buf);
if (ret) {
@@ -277,13 +466,31 @@ errcode_t update_volume_size(ocfs2_files
di = (struct ocfs2_dinode *)in_buf;
cl = &(di->id2.i_chain);
- total_bits = di->id1.bitmap1.i_total;
- used_bits = di->id1.bitmap1.i_used;
-
first_new_cluster = di->i_clusters;
save_new_clusters = num_new_clusters =
ocfs2_blocks_to_clusters(fs, opts.num_blocks) - di->i_clusters;
+ /*
+ * For online resize, empty total_bits, used_bits, di->i_clusters
+ * and all the information within the chain except c_blkno.
+ * So after init_new_gd, used_bits, total_bits, and chain_rec will
+ * record the real information of the new added group descriptors.
+ *
+ * We don't emtpy c_blkno here so that the original group header
+ * can be linked to the tail of the new added group list.
+ */
+ if (online) {
+ total_bits = 0;
+ used_bits = 0;
+ di->i_clusters = 0;
+ for (i = 0; i < cl->cl_count; i++) {
+ cl->cl_recs[i].c_free = 0;
+ cl->cl_recs[i].c_total = 0;
+ }
+ } else {
+ total_bits = di->id1.bitmap1.i_total;
+ used_bits = di->id1.bitmap1.i_used;
+ }
/* Find the blknum of the last cluster group */
lgd_blkno = ocfs2_which_cluster_group(fs, cl->cl_cpg, first_new_cluster - 1);
@@ -304,7 +511,12 @@ errcode_t update_volume_size(ocfs2_files
chain = gd->bg_chain;
- /* If possible round off the last group to cpg */
+ /*
+ * If possible round off the last group to cpg.
+ *
+ * For online resize, it is proceeded as offline resize,
+ * but the update of the group will be done by kernel.
+ */
cluster_chunk = MIN(num_new_clusters,
(cl->cl_cpg - (gd->bg_bits/cl->cl_bpc)));
if (cluster_chunk) {
@@ -330,11 +542,14 @@ errcode_t update_volume_size(ocfs2_files
flush_lgd = 1;
}
- /* Init the new groups and write to disk */
- /* Add these groups one by one starting from the first chain after */
- /* the one containing the last group */
+ /*
+ * Init the new groups and write to disk
+ * Add these groups one by one starting from the first chain after
+ * the one containing the last group.
+ */
ret = init_new_gd(fs, di, first_new_cluster,
- num_new_clusters, chain, &total_bits, &used_bits);
+ num_new_clusters, chain, &total_bits, &used_bits,
+ online);
if (ret)
goto bail;
@@ -344,11 +559,21 @@ errcode_t update_volume_size(ocfs2_files
di->i_clusters += save_new_clusters;
di->i_size = (uint64_t) di->i_clusters * fs->fs_clustersize;
- fs->fs_super->i_clusters = di->i_clusters;
+ fs->fs_super->i_clusters = fs->fs_clusters;
- ret = update_global_bitmap(fs, di, gd, flush_lgd);
- if (ret)
- goto bail;
+ if (online) {
+ ret = reserve_backup_supers(fs, opts.progname, di, lgd_blkno);
+ if (ret)
+ goto bail;
+
+ ret = update_global_bitmap_online(opts.progname, di);
+ if (ret)
+ goto bail;
+ } else {
+ ret = update_global_bitmap(fs, di, gd, flush_lgd);
+ if (ret)
+ goto bail;
+ }
*changed = 1;
diff --git a/tunefs.ocfs2/tunefs.c b/tunefs.ocfs2/tunefs.c
index cea9eb1..59aa56d 100644
--- a/tunefs.ocfs2/tunefs.c
+++ b/tunefs.ocfs2/tunefs.c
@@ -30,6 +30,7 @@ ocfs2_tune_opts opts;
ocfs2_filesys *fs_gbl = NULL;
static int cluster_locked = 0;
static int resize = 0;
+static int online_resize = 0;
static uint64_t def_jrnl_size = 0;
static char old_uuid[OCFS2_VOL_UUID_LEN * 2 + 1];
static char new_uuid[OCFS2_VOL_UUID_LEN * 2 + 1];
@@ -1006,6 +1007,17 @@ static errcode_t volume_check(ocfs2_file
int dirty = 0;
uint16_t max_slots = OCFS2_RAW_SB(fs->fs_super)->s_max_slots;
+ /*
+ * online_resize can't coexist with other tasks, and it does't
+ * need other checks, so we just do the check and return.
+ */
+ if (online_resize) {
+ ret = online_resize_check(fs);
+ if (ret)
+ com_err(opts.progname, 0, "online resize check failed.");
+ goto bail;
+ }
+
ret = journal_check(fs, &dirty, &def_jrnl_size);
if (ret || dirty)
goto bail;
@@ -1235,13 +1247,19 @@ int main(int argc, char **argv)
block_signals(SIG_BLOCK);
ret = ocfs2_lock_down_cluster(fs);
- if (ret) {
- block_signals(SIG_UNBLOCK);
+ block_signals(SIG_UNBLOCK);
+ if (!ret)
+ cluster_locked = 1;
+ else if (ret == O2DLM_ET_TRYLOCK_FAILED && resize) {
+ /*
+ * We just set the flag here and more check and
+ * lock will be done later.
+ */
+ online_resize = 1;
+ } else {
com_err(opts.progname, ret, "while locking down the cluster");
goto close;
}
- cluster_locked = 1;
- block_signals(SIG_UNBLOCK);
}
/*
@@ -1274,6 +1292,27 @@ int main(int argc, char **argv)
}
}
+ /*
+ * We handle online resize seperately here, since it is
+ * not like tunefs operations.
+ */
+ if (online_resize) {
+ ret = online_resize_lock(fs);
+ if (ret)
+ goto close;
+
+ ret = update_volume_size(fs, &upd_blocks, online_resize);
+ if (ret) {
+ com_err(opts.progname, ret,
+ "while updating volume size");
+ goto online_resize_unlock;
+ }
+ if (upd_blocks)
+ printf("Resized volume\n");
+
+ goto online_resize_unlock;
+ }
+
/* Set resize incompat flag on superblock */
max_slots = OCFS2_RAW_SB(fs->fs_super)->s_max_slots;
if (opts.num_blocks ||
@@ -1352,7 +1391,7 @@ int main(int argc, char **argv)
/* update volume size */
if (opts.num_blocks) {
old_blocks = fs->fs_blocks;
- ret = update_volume_size(fs, &upd_blocks);
+ ret = update_volume_size(fs, &upd_blocks, 0);
if (ret) {
com_err(opts.progname, ret,
"while updating volume size");
@@ -1450,7 +1489,9 @@ int main(int argc, char **argv)
}
}
}
-
+online_resize_unlock:
+ if (online_resize)
+ online_resize_unlock(fs);
unlock:
block_signals(SIG_BLOCK);
if (cluster_locked && fs->fs_dlm_ctxt)
diff --git a/tunefs.ocfs2/tunefs.h b/tunefs.ocfs2/tunefs.h
index 3863331..b1ad470 100644
--- a/tunefs.ocfs2/tunefs.h
+++ b/tunefs.ocfs2/tunefs.h
@@ -117,6 +117,10 @@ errcode_t feature_check(ocfs2_filesys *f
errcode_t update_feature(ocfs2_filesys *fs);
void get_vol_size(ocfs2_filesys *fs);
-errcode_t update_volume_size(ocfs2_filesys *fs, int *changed);
+errcode_t update_volume_size(ocfs2_filesys *fs, int *changed, int online);
int validate_vol_size(ocfs2_filesys *fs);
+
+errcode_t online_resize_check(ocfs2_filesys *fs);
+errcode_t online_resize_lock(ocfs2_filesys *fs);
+errcode_t online_resize_unlock(ocfs2_filesys *fs);
#endif /* _TUNEFS_H */
--
1.3.3
More information about the Ocfs2-devel
mailing list