[Ocfs2-tools-commits] taoma commits r1338 - branches/sparse-files/libocfs2

svn-commits at oss.oracle.com svn-commits at oss.oracle.com
Mon Apr 9 02:26:42 PDT 2007


Author: taoma
Date: 2007-04-09 02:26:40 -0700 (Mon, 09 Apr 2007)
New Revision: 1338

Modified:
   branches/sparse-files/libocfs2/extend_file.c
Log:
Add the mechanism of crash-safe and error-recovery for ocfs2_insert_extent.

In order to orderize the written block sequence and avoid the corruption for the inode, 
we duplicate the extent block and do the insertion in the duplicated ones, when it is
OK the new ones will be updated and if something error happens during the insertion
we will not touch the original inode.

This patch is totally independent with the insertion code, so we can decide whether
its efficiency is good enough for the insertion for ocfs2-tools when merging it to the
main trunk.

Modified: branches/sparse-files/libocfs2/extend_file.c
===================================================================
--- branches/sparse-files/libocfs2/extend_file.c	2007-04-07 00:28:55 UTC (rev 1337)
+++ branches/sparse-files/libocfs2/extend_file.c	2007-04-09 09:26:40 UTC (rev 1338)
@@ -216,8 +216,8 @@
 /* some extent blocks is modified and we need to synchronize them to the disk
  * accordingly.
  *
- * We will update the inode only if the subtree_index indicates the inode is
- * the sub_root for the change.
+ * We will not update the inode if subtree_index is "0" since it should be
+ * updated by the caller.
  */
 static errcode_t ocfs2_sync_path_to_disk(ocfs2_filesys *fs,
 					 struct ocfs2_path *left_path,
@@ -245,13 +245,6 @@
 		ret = ocfs2_write_extent_block(fs, blkno, sub_root);
 		if (ret)
 			goto bail;
-	} else {
-		/* here the subtree_index indicates the inode, so have to
-		 * update the extent list in the ocfs2_dionde accordingly.
-		 */
-		ret = ocfs2_write_inode(fs, blkno, sub_root);
-		if (ret)
-			goto bail;
 	}
 bail:
 	return ret;
@@ -496,10 +489,9 @@
 			goto bail;
 	}
 
-	ret = ocfs2_write_inode(fs, fe->i_blkno, (char *)fe);
-	if (ret)
-		goto bail;
-
+	/* The inode information isn't updated since we use duplicated extent
+	 * block in the insertion and it may fail in other steps.
+	 */
 	ret = 0;
 bail:
 	if (new_eb_bufs) {
@@ -1885,6 +1877,215 @@
 	return ret;
 }
 
+struct duplicate_ctxt {
+	struct ocfs2_dinode *di;
+	uint64_t next_leaf_blk;
+};
+
+static errcode_t duplicate_extent_block(ocfs2_filesys *fs,
+					struct ocfs2_extent_list *old_el,
+					struct ocfs2_extent_list *new_el,
+					struct duplicate_ctxt *ctxt)
+{
+	int i;
+	errcode_t ret;
+	uint64_t blkno, new_blkno;
+	struct ocfs2_extent_rec *rec = NULL;
+	char *eb_buf = NULL, *new_eb_buf = NULL;
+	struct ocfs2_extent_block *eb = NULL;
+	struct ocfs2_extent_list *child_old_el = NULL, *child_new_el = NULL;
+
+	assert (old_el->l_tree_depth > 0);
+
+	/* empty the whole extent list at first. */
+	*new_el = *old_el;
+	new_el->l_next_free_rec = 0;
+	memset(new_el->l_recs, 0,
+	       sizeof(struct ocfs2_extent_rec) * new_el->l_count);
+
+	ret = ocfs2_malloc_block(fs->fs_io, &eb_buf);
+	if (ret)
+		goto bail;
+	ret = ocfs2_malloc_block(fs->fs_io, &new_eb_buf);
+	if (ret)
+		goto bail;
+
+	if (old_el->l_next_free_rec == 0) {
+		/* XXX:
+		 * We have a tree depth > 0 and no extent record in it,
+		 * should it be a corrupted block?
+		 */
+		ret = OCFS2_ET_CORRUPT_EXTENT_BLOCK;
+		goto bail;
+	}
+
+	/* we iterate the extent list from the last one for recording
+	 * the next_leaf_blk for the previous leaf.
+	 */
+	for (i = old_el->l_next_free_rec - 1; i >= 0; i--) {
+		rec = &old_el->l_recs[i];
+
+		if (!ocfs2_rec_clusters(old_el->l_tree_depth, rec))
+			continue;
+
+		blkno = rec->e_blkno;
+		ret = ocfs2_read_extent_block(fs, blkno, eb_buf);
+		if (ret)
+			goto bail;
+
+		/* First make the new_buf the same as the old buf. */
+		memcpy(new_eb_buf, eb_buf, fs->fs_blocksize);
+
+		eb = (struct ocfs2_extent_block *)eb_buf;
+		child_old_el = &eb->h_list;
+		eb = (struct ocfs2_extent_block *)new_eb_buf;
+		child_new_el = &eb->h_list;
+
+		if (child_old_el->l_tree_depth > 0) {
+			/* the extent record in our list still has child extent
+			 * block, so we have to iterate it.
+			 */
+			ret = duplicate_extent_block(fs,
+						     child_old_el,
+						     child_new_el,
+						     ctxt);
+			if (ret)
+				goto bail;
+		}
+
+		/* now we allocate a new extent block and save it. */
+		ret = ocfs2_new_extent_block(fs, &new_blkno);
+		if (ret)
+			goto bail;
+
+		eb = (struct ocfs2_extent_block *)new_eb_buf;
+		eb->h_blkno = new_blkno;
+		if (child_old_el->l_tree_depth == 0) {
+			/*
+			 * This is the leaf blkno, we have to set its
+			 * h_next_leaf_blk and then record itself for
+			 * future use.
+			 */
+			eb->h_next_leaf_blk = ctxt->next_leaf_blk;
+			ctxt->next_leaf_blk = new_blkno;
+		}
+
+		ret = ocfs2_write_extent_block(fs, new_blkno, new_eb_buf);
+		if (ret)
+			goto bail;
+
+		memcpy(&new_el->l_recs[i], rec, sizeof(struct ocfs2_extent_rec));
+		new_el->l_recs[i].e_blkno = new_blkno;
+
+		eb = (struct ocfs2_extent_block *)new_eb_buf;
+		/* set the new i_last_eb_blk in the new dinode. */
+		if (ctxt->di->i_last_eb_blk == blkno)
+			ctxt->di->i_last_eb_blk = new_blkno;
+	}
+
+	new_el->l_next_free_rec = old_el->l_next_free_rec;
+	ret = 0;
+
+bail:
+	if (eb_buf)
+		ocfs2_free(&eb_buf);
+	if (new_eb_buf)
+		ocfs2_free(&new_eb_buf);
+	/* Free all the extent block we allocate. */
+	if (ret) {
+		for (i = 0; i < old_el->l_next_free_rec; i++) {
+			rec = &new_el->l_recs[i];
+			if (rec->e_blkno)
+				ocfs2_delete_extent_block(fs, rec->e_blkno);
+		}
+	}
+
+	return ret;
+}
+
+static errcode_t duplicate_extent_block_dinode(ocfs2_filesys *fs,
+					       char *old_buf, char *new_buf)
+{
+	errcode_t ret = 0;
+	struct ocfs2_dinode *old_di = NULL, *new_di = NULL;
+	struct ocfs2_extent_list *old_el = NULL, *new_el = NULL;
+	struct duplicate_ctxt ctxt;
+
+	old_di = (struct ocfs2_dinode *)old_buf;
+	old_el = &old_di->id2.i_list;
+	new_di = (struct ocfs2_dinode *)new_buf;
+	new_el = &new_di->id2.i_list;
+
+	assert(old_el->l_tree_depth > 0);
+
+	/* empty the whole extent list at first. */
+	*new_el = *old_el;
+	memset(new_el->l_recs, 0,
+	       sizeof(struct ocfs2_extent_rec) * new_el->l_count);
+	new_el->l_next_free_rec = 0;
+
+	memset(&ctxt, 0, sizeof(ctxt));
+	ctxt.di = new_di;
+	ctxt.next_leaf_blk = 0;
+	ret = duplicate_extent_block(fs, old_el, new_el, &ctxt);
+
+	return ret;
+}
+
+static void free_duplicated_extent_block(ocfs2_filesys *fs,
+					struct ocfs2_extent_list *el)
+{
+	int i;
+	errcode_t ret;
+	char *buf = NULL;
+	struct ocfs2_extent_rec *rec;
+	struct ocfs2_extent_list *child_el;
+	struct ocfs2_extent_block *eb;
+
+	assert(el->l_tree_depth > 0);
+
+	ret = ocfs2_malloc_block(fs->fs_io, &buf);
+	if (ret)
+		return;
+
+	for (i = 0; i < el->l_next_free_rec; i ++) {
+		rec = &el->l_recs[i];
+
+		if (!ocfs2_rec_clusters(el->l_tree_depth, rec))
+			continue;
+
+		ret = ocfs2_read_extent_block(fs, rec->e_blkno, buf);
+		if (ret)
+			continue;
+
+		eb = (struct ocfs2_extent_block *)buf;
+		child_el = &eb->h_list;
+		if (child_el->l_tree_depth > 0)
+			free_duplicated_extent_block(fs, child_el);
+
+		ocfs2_delete_extent_block(fs, rec->e_blkno);
+	}
+
+	if(buf)
+		ocfs2_free(&buf);
+}
+
+static void free_duplicated_extent_block_dinode(ocfs2_filesys *fs,
+						char *di_buf)
+{
+	int i;
+	struct ocfs2_dinode *di = NULL;
+	struct ocfs2_extent_list *el = NULL;
+
+	di = (struct ocfs2_dinode *)di_buf;
+	el = &di->id2.i_list;
+
+	assert(el->l_tree_depth > 0);
+
+	for (i = 0; i < el->l_next_free_rec; i++)
+		free_duplicated_extent_block(fs, el);
+}
+
 /*
  * Insert an extent into an inode btree.
  */
@@ -1896,6 +2097,7 @@
 	struct insert_ctxt ctxt;
 	struct ocfs2_insert_type insert = {0, };
 	char *di_buf = NULL, *last_eb = NULL, *eb_buf = NULL;
+	char *backup_buf = NULL;
 
 	ret = ocfs2_malloc_block(fs->fs_io, &di_buf);
 	if (ret)
@@ -1908,6 +2110,33 @@
 	if (ret)
 		goto bail;
 
+	/* In order to orderize the written block sequence and avoid
+	 * the corruption for the inode, we duplicate the extent block
+	 * here and do the insertion in the duplicated ones.
+	 *
+	 * Note: we only do this in case the file has extent blocks.
+	 * And if the duplicate process fails, we should go on the normal
+	 * insert process.
+	 */
+	if (ctxt.di->id2.i_list.l_tree_depth) {
+		ret = ocfs2_malloc_block(fs->fs_io, &backup_buf);
+		if (ret)
+			goto bail;
+
+		memcpy(backup_buf, di_buf, fs->fs_blocksize);
+
+		/* duplicate the extent block. If it succeeds, di_buf
+		 * will point to the new allocated extent blocks, and
+		 * the following insertion will happens to the new ones.
+		 */
+		ret = duplicate_extent_block_dinode(fs, backup_buf, di_buf);
+		if (ret) {
+			memcpy(di_buf, backup_buf,fs->fs_blocksize);
+			ocfs2_free(&backup_buf);
+			backup_buf = NULL;
+		}
+	}
+
 	memset(&ctxt.rec, 0, sizeof(struct ocfs2_extent_rec));
 	ctxt.rec.e_cpos = cpos;
 	ctxt.rec.e_blkno = c_blkno;
@@ -1963,6 +2192,18 @@
 	ret = ocfs2_write_inode(fs, ino, di_buf);
 
 bail:
+	if (backup_buf) {
+		/* we have duplicated the extent block during the insertion.
+		 * so if it succeeds, we should free the old ones, and if fails,
+		 * the duplicate ones should be freed.
+		 */
+		if (ret)
+			free_duplicated_extent_block_dinode(fs, di_buf);
+		else
+			free_duplicated_extent_block_dinode(fs, backup_buf);
+		ocfs2_free(&backup_buf);
+	}
+
 	if (eb_buf)
 		ocfs2_free(&eb_buf);
 	if (last_eb)




More information about the Ocfs2-tools-commits mailing list