[Ocfs2-tools-commits] zab commits r903 - in trunk: fsck.ocfs2 fsck.ocfs2/include libocfs2

svn-commits at oss.oracle.com svn-commits at oss.oracle.com
Thu May 19 18:50:21 CDT 2005


Author: zab
Signed-off-by: mfasheh
Date: 2005-05-19 18:50:19 -0500 (Thu, 19 May 2005)
New Revision: 903

Modified:
   trunk/fsck.ocfs2/dirparents.c
   trunk/fsck.ocfs2/fsck.ocfs2.checks.8.in
   trunk/fsck.ocfs2/include/dirparents.h
   trunk/fsck.ocfs2/pass1.c
   trunk/fsck.ocfs2/pass3.c
   trunk/fsck.ocfs2/pass4.c
   trunk/libocfs2/link.c
Log:
o verify and account for truncate logs
o don't try and fix '..' in dirs on the orphan list
o fix a mem leak in mark_local_allocs()
o account for the ref that '..' in a subdir drops when removed from orphan dir
o turbo what, now?

Signed-off-by: mfasheh


Modified: trunk/fsck.ocfs2/dirparents.c
===================================================================
--- trunk/fsck.ocfs2/dirparents.c	2005-05-19 23:49:06 UTC (rev 902)
+++ trunk/fsck.ocfs2/dirparents.c	2005-05-19 23:50:19 UTC (rev 903)
@@ -41,7 +41,8 @@
 errcode_t o2fsck_add_dir_parent(struct rb_root *root,
 				uint64_t ino,
 				uint64_t dot_dot,
-				uint64_t dirent)
+				uint64_t dirent,
+				unsigned in_orphan_dir)
 {
 	struct rb_node ** p = &root->rb_node;
 	struct rb_node * parent = NULL;
@@ -59,6 +60,7 @@
 	dp->dp_dirent = dirent;
 	dp->dp_connected = 0;
 	dp->dp_loop_no = 0;
+	dp->dp_in_orphan_dir = in_orphan_dir ? 1 : 0;
 
 	while (*p)
 	{

Modified: trunk/fsck.ocfs2/fsck.ocfs2.checks.8.in
===================================================================
--- trunk/fsck.ocfs2/fsck.ocfs2.checks.8.in	2005-05-19 23:49:06 UTC (rev 902)
+++ trunk/fsck.ocfs2/fsck.ocfs2.checks.8.in	2005-05-19 23:50:19 UTC (rev 903)
@@ -296,6 +296,44 @@
 Answering yes clears the local allocator.  No information is lost but the global
 bitmap allocator may need to be updated to reflect clusters that were reserved for the local allocator but were free.
 
+.SS "DEALLOC_COUNT"
+The given truncate log inode contains a count that is greater than the 
+value that is possible given the size of the inode.
+
+Answering yes resets the count value to the possible maximum.
+
+.SS "DEALLOC_USED"
+The given truncate log inode claims to have more records in use than
+it is possible to store in the inode.
+
+Answering yes resets the record of the number used to the maximum 
+value possible.
+
+.SS "TRUNCATE_REC_START_RANGE"
+A truncate record was found which claims to start at a cluster that is beyond
+the number of clusters in the volume.
+
+Answering yes will clear the truncate record.  This may result in previously
+freed space being marked as allocated.  This will be fixed up later as the
+allocator is updated to match what is used by the file system.
+
+.SS "TRUNCATE_REC_WRAP"
+Clusters are recorded as 32bit values.  A truncate record was found which claims
+to have enough clusters to cause this value to wrap.  This could never be the case and
+is a sure sign of corruption.
+
+Answering yes will clear the truncate record.  This may result in previously
+freed space being marked as allocated.  This will be fixed up later as the
+allocator is updated to match what is used by the file system.
+
+.SS "TRUNCATE_REC_RANGE"
+A truncate record was found which claims to reference a region of clusters which
+partially extends beyond the number of clusters in the volume.
+
+Answering yes will clear the truncate record.  This may result in previously
+freed space being marked as allocated.  This will be fixed up later as the
+allocator is updated to match what is used by the file system.
+
 .SS "INODE_GEN"
 Inodes are created with a generation number to match the generation
 number of the volume at the time of creation.  An Inode was found which

Modified: trunk/fsck.ocfs2/include/dirparents.h
===================================================================
--- trunk/fsck.ocfs2/include/dirparents.h	2005-05-19 23:49:06 UTC (rev 902)
+++ trunk/fsck.ocfs2/include/dirparents.h	2005-05-19 23:50:19 UTC (rev 903)
@@ -37,13 +37,15 @@
 	/* used by pass3 to walk the dir_parent structs and ensure 
 	 * connectivity */
 	uint64_t	dp_loop_no;
-	int		dp_connected;
+	unsigned	dp_connected:1,
+			dp_in_orphan_dir:1;
 } o2fsck_dir_parent;
 
 errcode_t o2fsck_add_dir_parent(struct rb_root *root,
 				uint64_t ino,
 				uint64_t dot_dot,
-				uint64_t dirent);
+				uint64_t dirent,
+				unsigned in_orphan_dir);
 
 o2fsck_dir_parent *o2fsck_dir_parent_lookup(struct rb_root *root, 
 						uint64_t ino);

Modified: trunk/fsck.ocfs2/pass1.c
===================================================================
--- trunk/fsck.ocfs2/pass1.c	2005-05-19 23:49:06 UTC (rev 902)
+++ trunk/fsck.ocfs2/pass1.c	2005-05-19 23:50:19 UTC (rev 903)
@@ -311,6 +311,104 @@
 	return ret;
 }
 
+/* this just makes sure the truncate log contains consistent data, it doesn't
+ * do anything with it yet */
+static errcode_t verify_truncate_log(o2fsck_state *ost, ocfs2_dinode *di)
+{
+	ocfs2_truncate_log *tl = &di->id2.i_dealloc;
+	uint16_t max, i;
+	int changed = 0;
+	errcode_t ret = 0;
+
+	verbosef("tl_count %u tl_used %u (tl_reserved1 %u)\n", tl->tl_count,
+		 tl->tl_used, tl->tl_reserved1);
+
+	max = ocfs2_truncate_recs_per_inode(ost->ost_fs->fs_blocksize);
+
+	if (tl->tl_count > max &&
+	    prompt(ost, PY, PR_DEALLOC_COUNT,
+		   "Truncate log inode %"PRIu64" claims space for %u records but only %u "
+		   "records are possible.  Set the inode's count to the maximum?",
+		   di->i_blkno, tl->tl_count, max)) {
+
+		tl->tl_count = max;
+		changed = 1;
+	}
+
+	if (tl->tl_used > tl->tl_count &&
+	    prompt(ost, PY, PR_DEALLOC_USED,
+		   "Truncate log inode %"PRIu64" claims to be using %u records but the "
+		   "inode can only hold %u records.  Change the number used to reflect "
+		   "the maximum possible in the inode?", di->i_blkno, tl->tl_used,
+		   tl->tl_count)) {
+
+		tl->tl_used = tl->tl_count;
+		changed = 1;
+	}
+
+	for (i = 0; i < ocfs2_min(max, tl->tl_used); i++) {
+		ocfs2_truncate_rec *tr = &tl->tl_recs[i];
+		int zero = 0;
+
+		verbosef("t_start %u t_clusters %u\n", tr->t_start,
+		         tr->t_clusters);
+
+		if (tr->t_start == 0)
+			continue;
+
+		if (tr->t_start >= ost->ost_fs->fs_clusters &&
+	            prompt(ost, PY, PR_TRUNCATE_REC_START_RANGE,
+			   "Truncate record at offset %u in truncate log "
+			   "inode %"PRIu64" starts at cluster %u but there "
+			   "are %u clusters in the volume. Remove this record "
+			   "from the log?", i, di->i_blkno, tr->t_start,
+			   ost->ost_fs->fs_clusters)) {
+				zero = 1;
+		}
+
+		if (tr->t_start + tr->t_clusters < tr->t_start &&
+	            prompt(ost, PY, PR_TRUNCATE_REC_WRAP,
+			   "Truncate record at offset %u in truncate log "
+			   "inode %"PRIu64" starts at cluster %u and contains "
+			   "%u clusters.  It can't have this many clusters "
+			   "as that overflows the number of possible clusters "
+			   "in a volume.  Remove this record from the log?",
+			   i, di->i_blkno, tr->t_start, tr->t_clusters)) {
+				zero = 1;
+		}
+
+		if (tr->t_start + tr->t_clusters > ost->ost_fs->fs_clusters &&
+	            prompt(ost, PY, PR_TRUNCATE_REC_RANGE,
+			   "Truncate record at offset %u in truncate log "
+			   "inode %"PRIu64" starts at cluster %u and contains "
+			   "%u clusters.  It can't have this many clusters "
+			   "as this volume only has %u clusters. Remove this "
+			   "record from the log?",
+			   i, di->i_blkno, tr->t_start, tr->t_clusters,
+			   ost->ost_fs->fs_clusters)) {
+				zero = 1;
+		}
+
+		if (zero) {
+			tr->t_start = 0;
+			tr->t_clusters = 0;
+			changed = 1;
+		}
+	}
+
+	if (changed) {
+		ret = ocfs2_write_inode(ost->ost_fs, di->i_blkno, (char *)di);
+		if (ret) {
+			com_err(whoami, ret, "while writing truncate log inode "
+				    "%"PRIu64, di->i_blkno);
+			ost->ost_write_error = 1;
+			ret = 0;
+		}
+	}
+
+	return ret;
+}
+
 /* Check the basics of the ocfs2_dinode itself.  If we find problems
  * we clear the VALID flag and the caller will see that and update
  * inode allocations and write the inode to disk. 
@@ -388,7 +486,8 @@
 
 	if (S_ISDIR(di->i_mode)) {
 		ocfs2_bitmap_set(ost->ost_dir_inodes, blkno, NULL);
-		o2fsck_add_dir_parent(&ost->ost_dir_parents, blkno, 0, 0);
+		o2fsck_add_dir_parent(&ost->ost_dir_parents, blkno, 0, 0,
+				      di->i_flags & OCFS2_ORPHANED_FL);
 	} else if (S_ISREG(di->i_mode)) {
 		ocfs2_bitmap_set(ost->ost_reg_inodes, blkno, NULL);
 	} else if (S_ISLNK(di->i_mode)) {
@@ -419,6 +518,8 @@
 
 	if (di->i_flags & OCFS2_LOCAL_ALLOC_FL)
 		verify_local_alloc(ost, di);
+	else if (di->i_flags & OCFS2_DEALLOC_FL)
+		verify_truncate_log(ost, di);
 
 out:
 	/* XXX when we clear we need to also free whatever blocks may have
@@ -628,7 +729,8 @@
 	 * We might be able to be very clever about discovering the 
 	 * difference between i_symlink and i_list, but we don't try yet.
 	 */
-	if (di->i_flags & OCFS2_LOCAL_ALLOC_FL)
+	if (di->i_flags & OCFS2_LOCAL_ALLOC_FL ||
+	    di->i_flags & OCFS2_DEALLOC_FL)
 		ret = 0;
 	else if (di->i_flags & OCFS2_CHAIN_FL)
 		ret = ocfs2_chain_iterate(fs, blkno, check_gd_block, &vb);
@@ -792,9 +894,79 @@
 		}
 	}
 out:
+	if (buf)
+		ocfs2_free(&buf);
 	return;
 }
 
+/* 
+ * Clusters that are in the truncate logs should still be allocated.  We just
+ * make sure our accounting realizes this and let the kernel replay the logs
+ * and free them.  This will change someday when fsck learns to fully practice
+ * recovery isntead of just making sure that the system is in a coherent
+ * recoverable state.
+ */
+static void mark_truncate_logs(o2fsck_state *ost)
+{
+	uint16_t node, max_nodes, i, max;
+	ocfs2_truncate_log *tl;
+	uint64_t blkno;
+	ocfs2_dinode *di;
+	char *buf = NULL;
+	errcode_t ret;
+
+	max_nodes = OCFS2_RAW_SB(ost->ost_fs->fs_super)->s_max_nodes;
+	max = ocfs2_truncate_recs_per_inode(ost->ost_fs->fs_blocksize);
+
+	ret = ocfs2_malloc_block(ost->ost_fs->fs_io, &buf);
+	if (ret) {
+		com_err(whoami, ret, "while allocating an inode buffer to "
+			"use accounting for records in truncate logs");
+		goto out;
+	}
+
+	di = (ocfs2_dinode *)buf; 
+
+	for (node = 0; node < max_nodes; node++) {
+		ret = ocfs2_lookup_system_inode(ost->ost_fs,
+						TRUNCATE_LOG_SYSTEM_INODE,
+						node, &blkno);
+		if (ret) {
+			com_err(whoami, ret, "while looking up truncate log "
+				"inode %"PRIu64" to account for its records",
+				blkno);
+			goto out;
+		}
+
+		ret = ocfs2_read_inode(ost->ost_fs, blkno, buf);
+		if (ret) {
+			com_err(whoami, ret, "while reading truncate log "
+				"inode %"PRIu64" to account for its records",
+				blkno);
+			goto out;
+		}
+
+		tl = &di->id2.i_dealloc;
+
+		for (i = 0; i < ocfs2_min(tl->tl_used, max); i++) {
+			ocfs2_truncate_rec *tr = &tl->tl_recs[i];
+
+			if (tr->t_start == 0)
+				continue;
+
+			verbosef("rec [%u, %u] at off %u\n", tr->t_start, tr->t_clusters,
+				 i);
+
+			o2fsck_mark_clusters_allocated(ost, tr->t_start,
+						       tr->t_clusters);
+		}
+	}
+out:
+	if (buf)
+		ocfs2_free(&buf);
+	return;
+}
+
 /* XXX we really need to get the latch stuff straight */
 static errcode_t force_cluster_bit(o2fsck_state *ost, 
 				   ocfs2_cached_inode *ci,
@@ -1017,6 +1189,7 @@
 	}
 
 	mark_local_allocs(ost);
+	mark_truncate_logs(ost);
 	write_cluster_alloc(ost);
 	write_inode_alloc(ost);
 

Modified: trunk/fsck.ocfs2/pass3.c
===================================================================
--- trunk/fsck.ocfs2/pass3.c	2005-05-19 23:49:06 UTC (rev 902)
+++ trunk/fsck.ocfs2/pass3.c	2005-05-19 23:50:19 UTC (rev 903)
@@ -85,7 +85,7 @@
 	o2fsck_icount_set(ost->ost_icount_refs, blkno, 1);
 	ret = o2fsck_add_dir_parent(&ost->ost_dir_parents, blkno, 
 				    ost->ost_fs->fs_root_blkno,
-				    ost->ost_fs->fs_root_blkno);
+				    ost->ost_fs->fs_root_blkno, 0);
 	if (ret) {
 		com_err(whoami, ret, "while recording a new root directory");
 		goto out;
@@ -165,7 +165,7 @@
 	o2fsck_icount_set(ost->ost_icount_refs, blkno, 2);
 	ret = o2fsck_add_dir_parent(&ost->ost_dir_parents, blkno, 
 				    ost->ost_fs->fs_root_blkno,
-				    ost->ost_fs->fs_root_blkno);
+				    ost->ost_fs->fs_root_blkno, 0);
 	if (ret) {
 		com_err(whoami, ret, "while recording a new /lost+found "
 			"directory");
@@ -373,6 +373,22 @@
 		break;
 	}
 
+	/* 
+	 * orphan dirs are a magically awesome special case.  they have
+	 * their i_link_count increased when subdirs are added but
+	 * the subdirs '..' entry isn't updated to point to the orphan
+	 * dir.  we alter our book-keeping to it look like the '..'
+	 * was reasonable on disk.
+	 */
+	if (dir->dp_in_orphan_dir) {
+		/* previous '..' entry is garbage */
+		if (dir->dp_dot_dot)
+			o2fsck_icount_delta(ost->ost_icount_refs,
+					    dir->dp_dot_dot, -1);
+		/* pretend '..' pointed to the orphan dir */
+		dir->dp_dot_dot = dir->dp_dirent;
+		o2fsck_icount_delta(ost->ost_icount_refs, dir->dp_dot_dot, 1);
+	}
 	if (dir->dp_dirent != dir->dp_dot_dot) {
 		fix = prompt(ost, PY, PR_DIR_DOTDOT,
 			     "Directory inode %"PRIu64" is "

Modified: trunk/fsck.ocfs2/pass4.c
===================================================================
--- trunk/fsck.ocfs2/pass4.c	2005-05-19 23:49:06 UTC (rev 902)
+++ trunk/fsck.ocfs2/pass4.c	2005-05-19 23:50:19 UTC (rev 903)
@@ -131,8 +131,17 @@
 		goto out;
 	}
 
+	/* this matches a special case in o2fsck_verify_inode_fields() where
+	 * orphan dir members are recorded as having 1 link count, even
+	 * though they have 0 on disk */
 	o2fsck_icount_delta(ost->ost_icount_in_inodes, dirent->inode, -1);
-	o2fsck_icount_delta(ost->ost_icount_refs, dirent->inode, -1);
+
+	/* dirs have this dirent ref and their '.' dirent */
+	if (dirent->file_type == OCFS2_FT_DIR)
+		o2fsck_icount_delta(ost->ost_icount_refs, dirent->inode, -2);
+	else
+		o2fsck_icount_delta(ost->ost_icount_refs, dirent->inode, -1);
+
 	dirent->inode = 0;
 	ret_flags |= OCFS2_DIRENT_CHANGED;
 
@@ -213,7 +222,7 @@
 			printf("fsck's internal inode link count tracking "
 			       "isn't consistent. (ref_ret = %d ref_blkno = "
 			       "%"PRIu64" inode_ret = %d inode_blkno = "
-			       "%"PRIu64"\n", (int)ref_ret, ref_blkno,
+			       "%"PRIu64")\n", (int)ref_ret, ref_blkno,
 			       (int)inode_ret, inode_blkno);
 			ret = OCFS2_ET_INTERNAL_FAILURE;
 			break;

Modified: trunk/libocfs2/link.c
===================================================================
--- trunk/libocfs2/link.c	2005-05-19 23:49:06 UTC (rev 902)
+++ trunk/libocfs2/link.c	2005-05-19 23:50:19 UTC (rev 903)
@@ -105,9 +105,6 @@
  * Note: the low 3 bits of the flags field are used as the directory
  * entry filetype.
  */
-#ifdef __TURBOC__
- #pragma argsused
-#endif
 errcode_t ocfs2_link(ocfs2_filesys *fs, uint64_t dir, const char *name, 
 		     uint64_t ino, int flags)
 {



More information about the Ocfs2-tools-commits mailing list