[Ocfs2-tools-devel] New patch for backup superblock against r1273 of ocfs2-tools

tao.ma tao.ma at oracle.com
Thu Dec 7 00:24:15 PST 2006


Thanks sunil for your advice. Has modifed the module of libocfs2 and mkfs.
Sunil Mushran wrote:
> backup_libocfs2.patch
>
> 1. In ocfs2_set_backup_sb(), the writes need to be ordered.
> ==> First initialize all the cluster(s) with zeroes.
> ==> Then set/write the bits in the cluster bitmap.
> ==> Then write the backup sbs.
> ==> Do not update the main superblock. That should be handled in 
> mkfs/tunefs.
Has moved the update of main superblock to module mkfs/tunefs.
But I disagree with your order of writing backup sb. We may need to 
reset the bits in cluster bitmap if the write of backup sbs fails.
So I think the order is:
==> First initialize all the cluster(s) with zeroes.
==> Then write the backup sbs.
==> Then set/write the bits in the cluster bitmap.
  If we meet some error in the writing of backup sbs, we don't need to 
reset the bits and it is harmless for us to write some data to an 
unallocated clusters.

>
>
> Also, we have tried to limit the use of endian helper macros
> to functions that read/write the disk. For sb, it would be
> ocfs2_read_super() and ocfs2_write_super(). It will be better
> if we add a ocfs2_write_backup_super() that memcpys fs->fs_super,
> modifies the blocknum, ors the compat flag and writes to disk. That
> function should be in openfs.c.
Create the function ocfs2_write_backup_super in openfs.c and use this 
function and ocfs2_write_super instead of direct io operations.
>
> BTW, fs->fs_super is the superblock. You don't have to re-read it.
>
> 2. In verify_block_allocation():
> +       *len = i;
> ???
>
> 3. Shouldn't ocfs2_get_backup_sb_offset() return i?
> As in, number of possible offsets for volume size. Currently you are
> relying on ocfs2_set_backup_sb() to ensure the blokno is > 0.
> Well, atleast in mkfs you are not checking for that. It may be better
> if you return the number of offsets making the code flow like:
>
> +       numblks = ocfs2_get_backup_sb_offset(fs, blocks, 
> ARRAY_SIZE(blocks));
> +
> +       ret = ocfs2_set_backup_sb(fs, blocks, numblks);
> +       if (ret) {
> +               com_err(s->progname, ret, "while backuping superblock.");
> +               goto error;
> +       }
You are right. This method is better. Has modified it as your described. 
Thanks.
>
> backup_mkfs.patch
> See above

-------------- next part --------------
Index: ocfs2-tools/debugfs.ocfs2/commands.c
===================================================================
--- ocfs2-tools.orig/debugfs.ocfs2/commands.c	2006-12-06 15:29:19.000000000 -0500
+++ ocfs2-tools/debugfs.ocfs2/commands.c	2006-12-06 16:17:00.000000000 -0500
@@ -295,6 +295,67 @@
 }
 
 /*
+ * process_open_args
+ *
+ */
+static int process_open_args(char **args,
+			     uint64_t *superblock, uint64_t *blocksize)
+{
+	errcode_t ret = 0;
+	long s;
+	char *ptr;
+	ocfs2_filesys *fs = NULL;
+	uint64_t byte_off[OCFS2_MAX_BACKUP_SUPERBLOCKS], blksize, sb = 0;
+	int ind = 2;
+
+	*superblock = 0;
+	*blocksize = 0;
+
+	if (!args[ind])
+		return 0;
+
+	if (args[ind] && !strcmp(args[ind], "-s"))
+		ind++;
+	else
+		return -1;
+
+	if(!args[ind])
+		return -1;
+
+	s = strtol(args[ind], &ptr, 0);
+
+	if (s < 1 || s > OCFS2_MAX_BACKUP_SUPERBLOCKS) {
+		fprintf (stderr, "Backup super block is outside of valid range"
+			 "(between 1 and %d)\n", OCFS2_MAX_BACKUP_SUPERBLOCKS);
+		return -1;
+	}
+
+	ocfs2_get_backup_sb_offset(NULL, byte_off, ARRAY_SIZE(byte_off));
+
+	/* iterate all the blocksize and get the right one. */
+	for (blksize = OCFS2_MIN_BLOCKSIZE;
+		blksize <= OCFS2_MAX_BLOCKSIZE;	blksize <<= 1) {
+		sb = byte_off[s-1] / blksize;
+		ret = ocfs2_open(args[1], OCFS2_FLAG_RO, sb, blksize, &fs);
+		if (!ret)
+			break;
+	}
+
+	if (ret) {
+		com_err(args[0],ret, "Can't open device by the num\n");
+		goto bail;
+	}
+
+	*superblock = sb;
+	*blocksize = blksize;
+
+	if (fs)
+		ocfs2_close(fs);
+bail:
+	return ret;
+}
+
+/*
  * get_slotnum()
  *
  */
@@ -475,18 +536,19 @@
 	char sysfile[SYSTEM_FILE_NAME_MAX];
 	int i;
 	struct ocfs2_super_block *sb;
+	uint64_t superblock, block_size;
 
 	if (gbls.device)
 		do_close (NULL);
 
-	if (dev == NULL) {
-		fprintf (stderr, "usage: %s <device>\n", args[0]);
+	if (dev == NULL || process_open_args(args, &superblock, &block_size)) {
+		fprintf (stderr, "usage: %s <device> [-s num]\n", args[0]);
 		return ;
 	}
 
 	flags = gbls.allow_write ? OCFS2_FLAG_RW : OCFS2_FLAG_RO;
         flags |= OCFS2_FLAG_HEARTBEAT_DEV_OK;
-	ret = ocfs2_open(dev, flags, 0, 0, &gbls.fs);
+	ret = ocfs2_open(dev, flags, superblock, block_size, &gbls.fs);
 	if (ret) {
 		gbls.fs = NULL;
 		com_err(args[0], ret, "while opening context for device %s",
@@ -687,7 +749,7 @@
 	printf ("logdump <slot#>\t\t\t\tPrints journal file for the node slot\n");
 	printf ("ls [-l] <filespec>\t\t\tList directory\n");
 	printf ("ncheck <block#> ...\t\t\tList all pathnames of the inode(s)/lockname(s)\n");
-	printf ("open <device>\t\t\t\tOpen a device\n");
+	printf ("open <device> [-s num]\t\t\t\tOpen a device\n");
 	printf ("quit, q\t\t\t\t\tExit the program\n");
 	printf ("rdump [-v] <filespec> <outdir>\t\tRecursively dumps from src to a dir on a mounted filesystem\n");
 	printf ("slotmap\t\t\t\t\tShow slot map\n");
Index: ocfs2-tools/debugfs.ocfs2/include/main.h
===================================================================
--- ocfs2-tools.orig/debugfs.ocfs2/include/main.h	2006-12-06 15:29:19.000000000 -0500
+++ ocfs2-tools/debugfs.ocfs2/include/main.h	2006-12-06 16:17:00.000000000 -0500
@@ -88,6 +88,7 @@
 typedef struct _dbgfs_opts {
 	int allow_write;
 	int no_prompt;
+	int sb_num;
 	char *cmd_file;
 	char *one_cmd;
 	char *device;
Index: ocfs2-tools/debugfs.ocfs2/main.c
===================================================================
--- ocfs2-tools.orig/debugfs.ocfs2/main.c	2006-12-06 15:29:19.000000000 -0500
+++ ocfs2-tools/debugfs.ocfs2/main.c	2006-12-06 16:17:00.000000000 -0500
@@ -51,9 +51,10 @@
 	g_print ("usage: %s -l [<logentry> ... [allow|off|deny]] ...\n", progname);
 	g_print ("usage: %s -d, --decode <lockres>\n", progname);
 	g_print ("usage: %s -e, --encode <lock type> <block num> <generation>\n", progname);
-	g_print ("usage: %s [-f cmdfile] [-R request] [-V] [-w] [-n] [-?] [device]\n", progname);
+	g_print ("usage: %s [-f cmdfile] [-R request] [-s block] [-V] [-w] [-n] [-?] [device]\n", progname);
 	g_print ("\t-f, --file <cmdfile>\tExecute commands in cmdfile\n");
 	g_print ("\t-R, --request <command>\tExecute a single command\n");
+	g_print ("\t-s, --superblock <num>\tOpen the device using another superblock\n");
 	g_print ("\t-w, --write\t\tOpen in read-write mode instead of the default of read-only\n");
 	g_print ("\t-V, --version\t\tShow version\n");
 	g_print ("\t-n, --noprompt\t\tHide prompt\n");
@@ -188,6 +189,7 @@
 static void get_options(int argc, char **argv, dbgfs_opts *opts)
 {
 	int c;
+	char *ptr = NULL;
 	static struct option long_options[] = {
 		{ "file", 1, 0, 'f' },
 		{ "request", 1, 0, 'R' },
@@ -198,6 +200,7 @@
 		{ "noprompt", 0, 0, 'n' },
 		{ "decode", 0, 0, 'd' },
 		{ "encode", 0, 0, 'e' },
+		{ "superblock", 0, 0, 's' },
 		{ 0, 0, 0, 0}
 	};
 
@@ -205,7 +208,7 @@
 		if (decodemode || encodemode || logmode)
 			break;
 
-		c = getopt_long(argc, argv, "lf:R:deV?wn", long_options, NULL);
+		c = getopt_long(argc, argv, "lf:R:deV?wns:", long_options, NULL);
 		if (c == -1)
 			break;
 
@@ -257,6 +260,10 @@
 			exit(0);
 			break;
 
+		case 's':
+			opts->sb_num = strtol(optarg, &ptr, 0);
+			break;
+
 		default:
 			usage(gbls.progname);
 			break;
@@ -476,7 +483,10 @@
 		gbls.interactive++;
 
 	if (opts.device) {
-		line = g_strdup_printf ("open %s", opts.device);
+		if (opts.sb_num)
+			line = g_strdup_printf ("open %s -s %d", opts.device, opts.sb_num);
+		else
+			line = g_strdup_printf ("open %s", opts.device);
 		do_command (line);
 		g_free (line);
 	}
-------------- next part --------------
Index: ocfs2-tools/fsck.ocfs2/pass1.c
===================================================================
--- ocfs2-tools.orig/fsck.ocfs2/pass1.c	2006-12-06 15:29:17.000000000 -0500
+++ ocfs2-tools/fsck.ocfs2/pass1.c	2006-12-06 16:05:45.000000000 -0500
@@ -1023,6 +1023,21 @@
 	return ret;
 }
 
+static inline int bit_in_backup_sb(uint64_t bit, uint64_t *blocks, size_t len)
+{
+	size_t i;
+
+	for (i = 0; i < len; i++) {
+		if (!blocks[i])
+			break;
+
+		if (blocks[i] == bit)
+			return 1;
+	}
+
+	return 0;
+}
+
 /* once we've iterated all the inodes we should have the current working
  * set of which blocks we think are in use.  we use this to derive the set
  * of clusters that should be allocated in the cluster chain allocators.  we
@@ -1033,6 +1048,9 @@
 	errcode_t ret;
 	uint64_t blkno, last_cbit, cbit, cbit_found;
 	struct ocfs2_cluster_group_sizes cgs;
+	uint64_t blocks[OCFS2_MAX_BACKUP_SUPERBLOCKS], bpc;
+	int backup_sb = 0;
+	struct ocfs2_super_block *super = OCFS2_RAW_SB(ost->ost_fs->fs_super);
 
 	ocfs2_calc_cluster_groups(ost->ost_fs->fs_clusters,
 				  ost->ost_fs->fs_blocksize, &cgs);
@@ -1061,6 +1079,15 @@
 		goto out;
 	}
 
+	/* handle the condition of backup superblock. */
+	memset(&blocks, 0, sizeof(blocks));
+	bpc = ost->ost_fs->fs_clustersize / ost->ost_fs->fs_blocksize;
+	if (super->s_feature_compat & OCFS2_FEATURE_COMPAT_BACKUP_SB) {
+		ocfs2_get_backup_sb_offset(ost->ost_fs, blocks,
+					   ARRAY_SIZE(blocks));
+		backup_sb = 1;
+	}
+
 	/* we walk our found blocks bitmap to find clusters that we think
 	 * are in use.  each time we find a block in a cluster we skip ahead
 	 * to the first block of the next cluster when looking for the next.
@@ -1072,6 +1099,10 @@
 	 * we special case the number of clusters as the cluster offset which
 	 * indicates that the rest of the bits to the end of the bitmap
 	 * should be clear.
+	 *
+	 * we should take backup superblock as a special case since it doesn't
+	 * belong to any inode. So it shouldn't be exist in
+	 * ost->ost_allocated_clusters.
 	 */
 	for (last_cbit = 0, cbit = 0;
 	     cbit < ost->ost_fs->fs_clusters; 
@@ -1097,7 +1128,11 @@
 
 		/* clear set bits that should have been clear up to cbit */
 		while (cbit_found < cbit) {
-			force_cluster_bit(ost, ci, cbit_found, 0);
+			/* check the backup superblock */
+			if (!backup_sb ||
+				!bit_in_backup_sb(cbit_found * bpc,
+					 blocks, ARRAY_SIZE(blocks)))
+				force_cluster_bit(ost, ci, cbit_found, 0);
 			cbit_found++;
 			ret = ocfs2_bitmap_find_next_set(ci->ci_chains, cbit_found, 
 							 &cbit_found);
Index: ocfs2-tools/fsck.ocfs2/fsck.c
===================================================================
--- ocfs2-tools.orig/fsck.ocfs2/fsck.c	2006-12-06 15:29:17.000000000 -0500
+++ ocfs2-tools/fsck.ocfs2/fsck.c	2006-12-06 16:36:34.000000000 -0500
@@ -74,13 +74,14 @@
 {
 	fprintf(stderr,
 		"Usage: fsck.ocfs2 [ -fGnuvVy ] [ -b superblock block ]\n"
-		"		    [ -B block size ] device\n"
+		"		    [ -B block size ] [-r num] device\n"
 		"\n"
 		"Critical flags for emergency repair:\n" 
 		" -n		Check but don't change the file system\n"
 		" -y		Answer 'yes' to all repair questions\n"
 		" -f		Force checking even if file system is clean\n"
 		" -F		Ignore cluster locking (dangerous!)\n"
+		" -r		restore backup superblock(very dangerous!)\n"
 		"\n"
 		"Less critical flags:\n"
 		" -b superblock	Treat given block as the super block\n"
@@ -191,6 +192,30 @@
 	return ocfs2_write_super(ost->ost_fs);
 }
 
+static errcode_t update_backup_sb(o2fsck_state *ost)
+{
+	errcode_t ret;
+	int len;
+	struct ocfs2_dinode *di = ost->ost_fs->fs_super;
+	struct ocfs2_super_block *sb = OCFS2_RAW_SB(di);
+	uint64_t blocks[OCFS2_MAX_BACKUP_SUPERBLOCKS];
+
+	if (!(sb->s_feature_compat & OCFS2_FEATURE_COMPAT_BACKUP_SB))
+		return 0;
+
+	len = ocfs2_get_backup_sb_offset(ost->ost_fs,
+					 blocks, ARRAY_SIZE(blocks));
+
+	ret = ocfs2_set_backup_sb(ost->ost_fs, blocks, len);
+	if (ret) {
+		com_err(whoami, ret, "while backuping superblock.");
+		goto bail;
+	}
+
+bail:
+	return ret;
+}
+
 static void scale_time(time_t secs, unsigned *scaled, char **units)
 {
 	if (secs < 60) {
@@ -410,12 +435,62 @@
 	return ret;
 }
 
+static errcode_t recover_backup_sb(o2fsck_state *ost, char* device, int sb_num)
+{
+	errcode_t ret;
+	uint64_t offsets[OCFS2_MAX_BACKUP_SUPERBLOCKS], blksize, sb;
+	ocfs2_filesys *fs = NULL;
+
+	if (sb_num < 1 || sb_num > OCFS2_MAX_BACKUP_SUPERBLOCKS)
+		return -1;
+
+	ocfs2_get_backup_sb_offset(NULL, offsets, ARRAY_SIZE(offsets));
+
+	/* iterate all the blocksize to get the right one. */
+	for (blksize = OCFS2_MIN_BLOCKSIZE;
+		blksize <= OCFS2_MAX_BLOCKSIZE;	blksize <<= 1) {
+		sb = offsets[sb_num - 1] / blksize;
+		/* Here we just give the possible value of block num and
+		 * block size to ocfs2_open and this function will check
+		 * them and return '0' if they meet the right one.
+		 */
+		ret = ocfs2_open(device, OCFS2_FLAG_RW, sb, blksize, &fs);
+		if (!ret)
+			break;
+	}
+
+	if (ret)
+		goto bail;
+
+	/* recover the backup information to superblock. */
+	if (prompt(ost, PN, PR_RECOVER_BACKUP_SUPERBLOCK,
+	    	   "Recover superblock information from backup block"
+		   "#%"PRIu64"?", sb)) {
+		fs->fs_super->i_blkno = OCFS2_SUPER_BLOCK_BLKNO;
+		ret = ocfs2_write_super(fs);
+		if (ret)
+			goto bail;
+	}
+
+	/* no matter whether the user recover the superblock or not here,
+	 * we should return 0 in case the superblock can be opened
+	 * without the recovery.
+	 */
+	ret = 0;
+
+bail:
+	if (fs)
+		ocfs2_close(fs);
+	return ret;
+}
+
 int main(int argc, char **argv)
 {
 	char *filename;
 	int64_t blkno, blksize;
 	o2fsck_state _ost, *ost = &_ost;
 	int c, open_flags = OCFS2_FLAG_RW | OCFS2_FLAG_STRICT_COMPAT_CHECK;
+	int sb_num = 0;
 	int fsck_mask = FSCK_OK;
 	errcode_t ret;
 
@@ -434,7 +509,7 @@
 	setlinebuf(stderr);
 	setlinebuf(stdout);
 
-	while((c = getopt(argc, argv, "b:B:fFGnuvVy")) != EOF) {
+	while((c = getopt(argc, argv, "b:B:fFGnuvVyr:")) != EOF) {
 		switch (c) {
 			case 'b':
 				blkno = read_number(optarg);
@@ -496,6 +571,10 @@
 				version();
 				break;
 
+			case 'r':
+				sb_num = read_number(optarg);
+				break;
+
 			default:
 				fsck_mask |= FSCK_USAGE;
 				print_usage();
@@ -523,6 +602,17 @@
 
 	filename = argv[optind];
 
+	/* recover superblock should be called at first. */
+	if (sb_num) {
+		ret = recover_backup_sb(ost, filename, sb_num);
+		if (ret) {
+			com_err(whoami, ret, "recover superblock failed.\n");
+			fsck_mask |= FSCK_ERROR;
+			goto out;
+		}
+
+	}
+
 	ret = open_and_check(ost, filename, open_flags, blkno, blksize);
 	if (ret) {
 		fsck_mask |= FSCK_ERROR;
@@ -638,6 +728,12 @@
 		if (ret)
 			com_err(whoami, ret, "while writing back the "
 				"superblock");
+		else {
+			ret = update_backup_sb(ost);
+			if (ret)
+				com_err(whoami, ret,
+					"while backuping superblock.");
+		}
 	}
 
 unlock:
Index: ocfs2-tools/fsck.ocfs2/fsck.ocfs2.checks.8.in
===================================================================
--- ocfs2-tools.orig/fsck.ocfs2/fsck.ocfs2.checks.8.in	2006-12-06 15:29:17.000000000 -0500
+++ ocfs2-tools/fsck.ocfs2/fsck.ocfs2.checks.8.in	2006-12-06 16:05:45.000000000 -0500
@@ -648,6 +648,12 @@
 Answering yes removes the file data associated with the inode and frees
 the inode.
 
+.SS "RECOVER_BACKUP_SUPERBLOCK"
+A ocfs2 volume has many backup superblocks. User can recover the superblock
+when it is corrupted.
+
+Answering yes will copy the backup block to the superblock location.
+
 .SH "SEE ALSO"
 .BR fsck.ocfs2(8)
 
-------------- next part --------------
Index: ocfs2-tools/libocfs2/Makefile
===================================================================
--- ocfs2-tools.orig/libocfs2/Makefile	2006-12-06 15:29:18.000000000 -0500
+++ ocfs2-tools/libocfs2/Makefile	2006-12-06 15:30:19.000000000 -0500
@@ -80,7 +80,8 @@
 	truncate.c	\
 	unix_io.c	\
 	unlink.c	\
-	lockid.c
+	lockid.c	\
+	backup_sb.c
 
 HFILES =				\
 	include/bitmap.h		\
Index: ocfs2-tools/libocfs2/backup_sb.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ ocfs2-tools/libocfs2/backup_sb.c	2006-12-06 16:36:14.000000000 -0500
@@ -0,0 +1,158 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * backup_sb.c
+ *
+ * Backup superblocks for an OCFS2 volume.
+ *
+ * Copyright (C) 2006 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License, version 2,  as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ */
+
+#include <errno.h>
+#include "ocfs2.h"
+
+/* In case we don't have fs_blocksize, we will return
+ * byte offsets and let the caller calculate them by itself.
+ */
+int ocfs2_get_backup_sb_offset(ocfs2_filesys *fs,
+			       uint64_t *offsets, size_t len)
+{
+	size_t i;
+	uint64_t blkno;
+	uint32_t blocksize;
+
+	memset(offsets, 0, sizeof(uint64_t) * len);
+	len = ocfs2_min(len, OCFS2_MAX_BACKUP_SUPERBLOCKS);
+
+	if (fs)
+		blocksize = fs->fs_blocksize;
+	else
+		blocksize = 1;
+
+	for (i = 0; i < len; i++) {
+		blkno = ocfs2_backup_sb_blkno(blocksize, i);
+		if (fs && fs->fs_blocks <= blkno)
+			break;
+
+		offsets[i] = blkno;
+	}
+	return i;
+}
+
+static errcode_t verify_block_allocation(ocfs2_bitmap *bitmap, uint32_t bpc,
+					 uint64_t *blocks, size_t len)
+{
+	size_t i;
+	errcode_t ret;
+	int val;
+
+	for (i = 0; i < len; i++, blocks++) {
+		if (!*blocks)
+			break;
+
+		ret = ocfs2_bitmap_test(bitmap, *blocks / bpc, &val);
+		if (ret)
+			goto bail;
+
+		if (val) {
+			ret = ENOSPC;
+			goto bail;
+		}
+	}
+	ret = 0;
+
+bail:
+	return ret;
+}
+
+errcode_t ocfs2_set_backup_sb(ocfs2_filesys *fs, uint64_t *blocks, size_t len)
+{
+	size_t i, j;
+	errcode_t ret = 0;
+	char *buf = NULL;
+	uint64_t bm_blk, *blkno = blocks;
+	int val;
+	uint32_t bpc = fs->fs_clustersize / fs->fs_blocksize;
+
+	if (!len || !blocks || !*blocks)
+		goto bail;
+	len = ocfs2_min(len, OCFS2_MAX_BACKUP_SUPERBLOCKS);
+
+	if (!fs->fs_cluster_alloc) {
+		ret = ocfs2_lookup_system_inode(fs, GLOBAL_BITMAP_SYSTEM_INODE,
+						0, &bm_blk);
+		if (ret)
+			goto bail;
+
+		ret = ocfs2_read_cached_inode(fs, bm_blk, &fs->fs_cluster_alloc);
+		if (ret)
+			goto bail;
+
+		ret = ocfs2_load_chain_allocator(fs, fs->fs_cluster_alloc);
+		if (ret)
+			goto bail;
+	}
+
+	if (!(OCFS2_RAW_SB(fs->fs_super)->s_feature_compat &
+				OCFS2_FEATURE_COMPAT_BACKUP_SB)) {
+		ret = verify_block_allocation(fs->fs_cluster_alloc->ci_chains,
+						    bpc, blocks, len);
+		if (ret)
+			goto bail;
+	}
+
+	ret = ocfs2_malloc_block(fs->fs_io, &buf);
+	if (ret)
+		goto bail;
+
+	memset(buf, 0, fs->fs_blocksize);
+
+	for (i = 0; i < len; i++, blkno++) {
+		if (!*blkno)
+			break;
+
+		/* zero the whole cluster first */
+		for (j = 0; j < bpc; j++) {
+			ret = io_write_block(fs->fs_io, *blkno + j, 1, buf);
+			if (ret)
+				goto bail;
+		}
+
+		ret = ocfs2_write_backup_super(fs, *blkno);
+		if (ret)
+			goto bail;
+	}
+
+	blkno = blocks;
+	for (i = 0; i < len; i++, blkno++)
+		ocfs2_bitmap_set(fs->fs_cluster_alloc->ci_chains,
+				 *blkno / bpc, &val);
+
+	ret = ocfs2_write_chain_allocator(fs, fs->fs_cluster_alloc);
+	if (ret)
+		goto bail;
+
+bail:
+	if (buf)
+		ocfs2_free(&buf);
+	if (fs->fs_cluster_alloc) {
+		ocfs2_free_cached_inode(fs, fs->fs_cluster_alloc);
+		fs->fs_cluster_alloc = NULL;
+	}
+	return ret;
+}
Index: ocfs2-tools/libocfs2/include/ocfs2.h
===================================================================
--- ocfs2-tools.orig/libocfs2/include/ocfs2.h	2006-12-06 15:29:18.000000000 -0500
+++ ocfs2-tools/libocfs2/include/ocfs2.h	2006-12-06 17:04:27.000000000 -0500
@@ -613,6 +613,24 @@
 errcode_t ocfs2_decode_lockres(char *lockres, int len, enum ocfs2_lock_type *type,
 			       uint64_t *blkno, uint32_t *generation);
 
+/* write the superblock at the specific blk. */
+errcode_t ocfs2_write_backup_super(ocfs2_filesys *fs, uint64_t blkno);
+
+/* Get the blkno according to the file system info.
+ * The unused ones, depending on the volume size, are zeroed.
+ * Return the length of the block array.
+ */
+int ocfs2_get_backup_sb_offset(ocfs2_filesys *fs,
+			       uint64_t *blocks, size_t len);
+
+/* This function will get the superblock pointed to by fs and copy it to
+ * the blocks. But first it will ensure all the appropriate clusters are free.
+ * If not, it will error out with ENOSPC. If free, it will set bits for all
+ * the clusters, zero the clusters and write the backup sb.
+ * In case of updating, it will override the backup blocks with the newest
+ * superblock information.
+ */
+errcode_t ocfs2_set_backup_sb(ocfs2_filesys *fs, uint64_t *blocks, size_t len);
 
 /* 
  * ${foo}_to_${bar} is a floor function.  blocks_to_clusters will
@@ -728,4 +746,6 @@
 	(void) (&_x == &_y);            \
 	_x > _y ? _x : _y; })
 
+/* lifted from the kernel. include/linux/kernel.h */
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
 #endif  /* _FILESYS_H */
Index: ocfs2-tools/libocfs2/include/ocfs2_fs.h
===================================================================
--- ocfs2-tools.orig/libocfs2/include/ocfs2_fs.h	2006-12-06 15:29:18.000000000 -0500
+++ ocfs2-tools/libocfs2/include/ocfs2_fs.h	2006-12-06 15:30:19.000000000 -0500
@@ -85,7 +85,7 @@
 #define OCFS2_CLEAR_INCOMPAT_FEATURE(sb,mask)			\
 	OCFS2_SB(sb)->s_feature_incompat &= ~(mask)
 
-#define OCFS2_FEATURE_COMPAT_SUPP	0
+#define OCFS2_FEATURE_COMPAT_SUPP	OCFS2_FEATURE_COMPAT_BACKUP_SB
 #define OCFS2_FEATURE_INCOMPAT_SUPP	0
 #define OCFS2_FEATURE_RO_COMPAT_SUPP	0
 
@@ -107,6 +107,20 @@
 #define OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT	0x0008
 
 /*
+ * backup superblock flag is used to indicate that this volume
+ * has backup superblocks.
+ */
+#define OCFS2_FEATURE_COMPAT_BACKUP_SB		0x0001
+
+/* The byte offset of the first backup block will be 1G.
+ * The following will be 4G, 16G, 64G, 256G and 1T.
+ */
+#define OCFS2_BACKUP_SB_START			1 << 30
+
+/* the max backup superblock nums */
+#define OCFS2_MAX_BACKUP_SUPERBLOCKS	6
+
+/*
  * Flags on ocfs2_dinode.i_flags
  */
 #define OCFS2_VALID_FL		(0x00000001)	/* Inode is valid */
@@ -628,6 +642,19 @@
 
 	return size / sizeof(struct ocfs2_truncate_rec);
 }
+
+static inline uint64_t ocfs2_backup_sb_blkno(int blocksize, int index)
+{
+	uint64_t offset = OCFS2_BACKUP_SB_START;
+
+	if (index >= 0 && index < OCFS2_MAX_BACKUP_SUPERBLOCKS) {
+		offset <<= (2 * index);
+		offset /= blocksize;
+		return offset;
+	}
+
+	return 0;
+}
 #endif  /* __KERNEL__ */
 
 
Index: ocfs2-tools/libocfs2/openfs.c
===================================================================
--- ocfs2-tools.orig/libocfs2/openfs.c	2006-12-06 15:29:18.000000000 -0500
+++ ocfs2-tools/libocfs2/openfs.c	2006-12-06 15:46:36.000000000 -0500
@@ -129,6 +129,42 @@
 	return ret;
 }
 
+errcode_t ocfs2_write_backup_super(ocfs2_filesys *fs, uint64_t blkno)
+{
+	errcode_t ret;
+	char *buf = NULL;
+	struct ocfs2_dinode *di;
+
+	if (!(fs->fs_flags & OCFS2_FLAG_RW))
+		return OCFS2_ET_RO_FILESYS;
+
+	ret = ocfs2_malloc_block(fs->fs_io, &buf);
+	if (ret)
+		goto out_blk;
+
+	memcpy(buf, (char *)fs->fs_super, fs->fs_blocksize);
+	di = (struct ocfs2_dinode *)buf;
+
+	ret = OCFS2_ET_BAD_MAGIC;
+	if (memcmp(di->i_signature, OCFS2_SUPER_BLOCK_SIGNATURE,
+		   strlen(OCFS2_SUPER_BLOCK_SIGNATURE)))
+		goto out_blk;
+
+	di->i_blkno = blkno;
+	OCFS2_RAW_SB(di)->s_feature_compat |=
+					OCFS2_FEATURE_COMPAT_BACKUP_SB;
+	ret = ocfs2_write_inode(fs, blkno, buf);
+	if (ret)
+		goto out_blk;
+
+	ret = 0;
+
+out_blk:
+	if (buf)
+		ocfs2_free(&buf);
+	return ret;
+}
+
 int ocfs2_mount_local(ocfs2_filesys *fs)
 {
 	return OCFS2_RAW_SB(fs->fs_super)->s_feature_incompat &
-------------- next part --------------
Index: ocfs2-tools/mkfs.ocfs2/mkfs.c
===================================================================
--- ocfs2-tools.orig/mkfs.ocfs2/mkfs.c	2006-12-06 15:29:18.000000000 -0500
+++ ocfs2-tools/mkfs.ocfs2/mkfs.c	2006-12-06 17:16:31.000000000 -0500
@@ -84,6 +84,7 @@
 					   uint16_t bpc);
 static void create_lost_found_dir(State *s);
 static void format_journals(State *s);
+static void format_backup_sb(State *s);
 
 extern char *optarg;
 extern int optind, opterr, optopt;
@@ -438,6 +439,14 @@
 	if (!s->quiet)
 		printf("done\n");
 
+	if (!s->quiet)
+		printf("Writing backup superblock: ");
+
+	format_backup_sb(s);
+
+	if (!s->quiet)
+		printf("done\n");
+
 	if (!s->hb_dev) {
 		/* These routines use libocfs2 to do their work. We
 		 * don't share an ocfs2_filesys context between the
@@ -2245,3 +2254,45 @@
 	clear_both_ends(s);
 	exit(1);
 }
+
+static void format_backup_sb(State *s)
+{
+	errcode_t ret;
+	ocfs2_filesys *fs = NULL;
+	size_t i, len;
+	uint64_t blocks[OCFS2_MAX_BACKUP_SUPERBLOCKS];
+
+	ret = ocfs2_open(s->device_name, OCFS2_FLAG_RW, 0, 0, &fs);
+	if (ret) {
+		com_err(s->progname, ret,
+			"while opening file system for backup superblock.");
+		goto error;
+	}
+
+	len = ocfs2_get_backup_sb_offset(fs, blocks, ARRAY_SIZE(blocks));
+
+	ret = ocfs2_set_backup_sb(fs, blocks, len);
+	if (ret) {
+		com_err(s->progname, ret, "while backuping superblock.");
+		goto error;
+	}
+
+	OCFS2_RAW_SB(fs->fs_super)->s_feature_compat |=
+					OCFS2_FEATURE_COMPAT_BACKUP_SB;
+
+	ret = ocfs2_write_super(fs);
+	if (ret) {
+		com_err(s->progname, ret, "while updating superblock.");
+		goto error;
+	}
+
+	for (i = 0; i < len && blocks[i]; i++)
+		printf(" %"PRIu64" ", blocks[i] * fs->fs_blocksize);
+
+	ocfs2_close(fs);
+	return;
+
+error:
+	clear_both_ends(s);
+	exit(1);
+}
-------------- next part --------------
Index: ocfs2-tools/tunefs.ocfs2/tunefs.c
===================================================================
--- ocfs2-tools.orig/tunefs.ocfs2/tunefs.c	2006-12-06 15:29:19.000000000 -0500
+++ ocfs2-tools/tunefs.ocfs2/tunefs.c	2006-12-06 17:17:44.000000000 -0500
@@ -81,6 +81,7 @@
 	int verbose;
 	int quiet;
 	int prompt;
+	int backup_sb;
 	time_t tune_time;
 	int fd;
 } ocfs2_tune_opts;
@@ -94,7 +95,7 @@
 {
 	fprintf(stderr, "usage: %s [-J journal-options] [-L volume-label]\n"
 			"\t\t[-M mount-type] [-N number-of-node-slots]\n"
-			"\t\t[-qSUvV] device [blocks-count]\n",
+			"\t\t[-qSUvV] [-b] device [blocks-count]\n",
 			progname);
 	exit(0);
 }
@@ -253,6 +254,7 @@
 		{ "volume-size", 0, 0, 'S'},
 		{ "uuid-reset", 0, 0, 'U'},
 		{ "mount", 1, 0, 'M' },
+		{ "backup-sb", 0, 0, 'b'},
 		{ 0, 0, 0, 0}
 	};
 
@@ -264,7 +266,7 @@
 	opts.prompt = 1;
 
 	while (1) {
-		c = getopt_long(argc, argv, "L:N:J:M:SUvqVx", long_options,
+		c = getopt_long(argc, argv, "L:N:J:M:SUvqVxb", long_options,
 				NULL);
 
 		if (c == -1)
@@ -344,6 +346,10 @@
 			opts.prompt = 0;
 			break;
 
+		case 'b':
+			opts.backup_sb =  1;
+			break;
+
 		default:
 			usage(opts.progname);
 			break;
@@ -784,6 +790,83 @@
 	return ret;
 }
 
+static inline errcode_t load_chain_allocator(ocfs2_filesys *fs,
+					     ocfs2_cached_inode** inode)
+{
+	errcode_t ret;
+	uint64_t blkno;
+
+	ret = ocfs2_lookup_system_inode(fs, GLOBAL_BITMAP_SYSTEM_INODE,
+					0, &blkno);
+	if (ret)
+		goto bail;
+
+	ret = ocfs2_read_cached_inode(fs, blkno, inode);
+	if (ret)
+		goto bail;
+
+	ret = ocfs2_load_chain_allocator(fs, *inode);
+
+bail:
+	return ret;
+}
+
+static errcode_t backup_sb_check(ocfs2_filesys *fs)
+{
+	errcode_t ret;
+	int i, len, val, failed = 0;
+	ocfs2_cached_inode *chain_alloc = NULL;
+	uint64_t blocks[OCFS2_MAX_BACKUP_SUPERBLOCKS];
+	uint32_t bpc = fs->fs_clustersize / fs->fs_blocksize;
+	struct ocfs2_super_block *super = OCFS2_RAW_SB(fs->fs_super);
+
+	/* if the compat flag is set, just return. */
+	if (super->s_feature_compat & OCFS2_FEATURE_COMPAT_BACKUP_SB) {
+		com_err(opts.progname, 0,
+			"This volume already has backup superblocks");
+		return -1;
+	}
+
+	len = ocfs2_get_backup_sb_offset(fs, blocks, ARRAY_SIZE(blocks));
+
+	ret = load_chain_allocator(fs, &chain_alloc);
+	if (ret)
+		goto bail;
+
+	for (i = 0; i < len; i++) {
+		if (!blocks[i])
+			break;
+
+		ret = ocfs2_bitmap_test(chain_alloc->ci_chains,
+					blocks[i] / bpc, &val);
+		if (ret)
+			goto bail;
+
+		if (val) {
+			com_err(opts.progname, 0, "block %"PRIu64
+				" is already allocated.", blocks[i]);
+			/* in order to verify all the block in the 'blocks',
+			 * we don't stop the loop here.
+			 */
+			failed = 1;
+		}
+	}
+
+	if (failed) {
+		ret = ENOSPC;
+		com_err(opts.progname, 0, "backup blocks check failed.");
+		com_err(opts.progname, 0, "Run debugfs.ocfs2 and use command"
+			" 'icheck' to detect the block's owner.");
+		com_err(opts.progname,0, "Please delete (after backing up them)"
+			" the files and try again.");
+	}
+
+	if (chain_alloc)
+		ocfs2_free_cached_inode(fs, chain_alloc);
+bail:
+	return ret;
+}
+
 static void update_volume_label(ocfs2_filesys *fs, int *changed)
 {
   	memset (OCFS2_RAW_SB(fs->fs_super)->s_label, 0,
@@ -1141,6 +1224,24 @@
 	return ret;
 }
 
+static errcode_t update_backup_sb(ocfs2_filesys *fs)
+{
+	errcode_t ret;
+	int len;
+	uint64_t blocks[OCFS2_MAX_BACKUP_SUPERBLOCKS];
+
+	len = ocfs2_get_backup_sb_offset(fs, blocks, ARRAY_SIZE(blocks));
+
+	ret = ocfs2_set_backup_sb(fs, blocks, len);
+	if (ret) {
+		com_err(opts.progname, ret, "while backuping superblock.");
+		goto bail;
+	}
+
+bail:
+	return ret;
+}
+
 int main(int argc, char **argv)
 {
 	errcode_t ret = 0;
@@ -1238,6 +1339,14 @@
 		}
 	}
 
+	/* check whether the block for backup superblock are used. */
+	if (opts.backup_sb) {
+		if (backup_sb_check(fs))
+			goto unlock;
+		else
+			printf("Adding backup superblock for the volume\n");
+	}
+
 	/* validate volume label */
 	if (opts.vol_label) {
 		printf("Changing volume label from %s to %s\n",
@@ -1312,7 +1421,8 @@
 	}
 
 	if (!opts.vol_label && !opts.vol_uuid && !opts.num_slots &&
-	    !opts.jrnl_size && !opts.num_blocks && !opts.mount) {
+	    !opts.jrnl_size && !opts.num_blocks && !opts.mount &&
+	    !opts.backup_sb) {
 		com_err(opts.progname, 0, "Nothing to do. Exiting.");
 		goto unlock;
 	}
@@ -1410,6 +1520,45 @@
 		}
 		block_signals(SIG_UNBLOCK);
 		printf("Wrote Superblock\n");
+
+		/* superblock's information has changed.
+		 * we need to synchronize the backup blocks if needed.
+		 */
+		if (OCFS2_RAW_SB(fs->fs_super)->s_feature_compat &
+					OCFS2_FEATURE_COMPAT_BACKUP_SB) {
+			block_signals(SIG_BLOCK);
+			ret = update_backup_sb(fs);
+			block_signals(SIG_UNBLOCK);
+			if (ret) {
+				printf("warning, superblock changed and "
+					"failed to synchronsize the backup "
+					"blocks.");
+			}
+			else
+				printf("Updated backup superblock.\n");
+		}
+	}
+
+	if (opts.backup_sb) {
+		block_signals(SIG_BLOCK);
+		ret = update_backup_sb(fs);
+		block_signals(SIG_UNBLOCK);
+		if (ret) {
+			com_err(opts.progname, ret,
+				"while backuping superblock");
+			goto unlock;
+		}
+		OCFS2_RAW_SB(fs->fs_super)->s_feature_compat |=
+					OCFS2_FEATURE_COMPAT_BACKUP_SB;
+		block_signals(SIG_BLOCK);
+		ret = ocfs2_write_super(fs);
+		block_signals(SIG_UNBLOCK);
+		if (ret) {
+			com_err(opts.progname, ret, "while writing superblock");
+			goto unlock;
+		}
+
+		printf("Backuped Superblock.\n");
 	}
 
 unlock:
-------------- next part --------------
backup_libocfs2_1.patch 
backup_mkfs_1.patch 
backup_fsck_1.patch 
backup_tunefs_1.patch 
backup_debugfs_1.patch 


More information about the Ocfs2-tools-devel mailing list