[Ocfs2-tools-commits] zab commits r521 - in trunk/fsck.ocfs2: . include

svn-commits at oss.oracle.com svn-commits at oss.oracle.com
Wed Dec 29 18:17:58 CST 2004


Author: zab
Date: 2004-12-29 18:17:56 -0600 (Wed, 29 Dec 2004)
New Revision: 521

Modified:
   trunk/fsck.ocfs2/fsck.c
   trunk/fsck.ocfs2/fsck.ocfs2.checks.8.in
   trunk/fsck.ocfs2/include/fsck.h
   trunk/fsck.ocfs2/include/strings.h
   trunk/fsck.ocfs2/pass2.c
   trunk/fsck.ocfs2/strings.c
Log:
- detect duplicate entries if they occur in the same 4MB region
- rename in place rather than reading and sorting the whole dir
- augment the string tracking a little to allow this


Modified: trunk/fsck.ocfs2/fsck.c
===================================================================
--- trunk/fsck.ocfs2/fsck.c	2004-12-29 19:22:32 UTC (rev 520)
+++ trunk/fsck.ocfs2/fsck.c	2004-12-30 00:17:56 UTC (rev 521)
@@ -148,13 +148,6 @@
 		return ret;
 	}
 
-	ret = ocfs2_block_bitmap_new(fs, "directory inodes to rebuild",
-				     &ost->ost_rebuild_dirs);
-	if (ret) {
-		com_err(whoami, ret, "while allocating rebuild dirs bitmap");
-		return ret;
-	}
-
 	return 0;
 }
 

Modified: trunk/fsck.ocfs2/fsck.ocfs2.checks.8.in
===================================================================
--- trunk/fsck.ocfs2/fsck.ocfs2.checks.8.in	2004-12-29 19:22:32 UTC (rev 520)
+++ trunk/fsck.ocfs2/fsck.ocfs2.checks.8.in	2004-12-30 00:17:56 UTC (rev 521)
@@ -459,6 +459,14 @@
 
 Answering yes clears this entry which was the second to refer to a given directory.  This reflects the policy that hard links to directories are not allowed.
 
+.SS "DIRENT_DUPLICATE"
+File names within a directory must be unique.  A file name occurred in more
+than one directory entry in a given directory.
+
+Answering yes renames the duplicate entry to a name that doesn't collide
+with recent entries and is unlikely to collide with future entries in
+the directory.
+
 .SS "DIRENT_LENGTH"
 There are very few directory entry lengths that are valid.  The lengths must be
 greater than the minimum required to record a single character directory, be

Modified: trunk/fsck.ocfs2/include/fsck.h
===================================================================
--- trunk/fsck.ocfs2/include/fsck.h	2004-12-29 19:22:32 UTC (rev 520)
+++ trunk/fsck.ocfs2/include/fsck.h	2004-12-30 00:17:56 UTC (rev 521)
@@ -39,8 +39,6 @@
 
 	ocfs2_bitmap	*ost_allocated_clusters;
 
-	ocfs2_bitmap	*ost_rebuild_dirs;
-
 	/* This is no more than a cache of what we know the i_link_count
 	 * in each inode to currently be.  If an inode is marked in used_inodes
 	 * this had better be up to date. */

Modified: trunk/fsck.ocfs2/include/strings.h
===================================================================
--- trunk/fsck.ocfs2/include/strings.h	2004-12-29 19:22:32 UTC (rev 520)
+++ trunk/fsck.ocfs2/include/strings.h	2004-12-30 00:17:56 UTC (rev 521)
@@ -28,12 +28,16 @@
 
 typedef struct _o2fsck_strings {
 	struct rb_root	s_root;
+	size_t		s_allocated;
 } o2fsck_strings;
 
+int o2fsck_strings_exists(o2fsck_strings *strings, char *string,
+			  size_t strlen);
 errcode_t o2fsck_strings_insert(o2fsck_strings *strings, char *string,
 				size_t strlen, int *is_dup);
 void o2fsck_strings_init(o2fsck_strings *strings);
 void o2fsck_strings_free(o2fsck_strings *strings);
+size_t o2fsck_strings_bytes_allocated(o2fsck_strings *strings);
 
 #endif /* __O2FSCK_STRINGS_H__ */
 

Modified: trunk/fsck.ocfs2/pass2.c
===================================================================
--- trunk/fsck.ocfs2/pass2.c	2004-12-29 19:22:32 UTC (rev 520)
+++ trunk/fsck.ocfs2/pass2.c	2004-12-30 00:17:56 UTC (rev 521)
@@ -29,10 +29,6 @@
  *
  * Pass 2 builds up the parent dir linkage as it scans the directory entries
  * so that pass 3 can walk the directory trees to find disconnected inodes.
- *
- * XXX
- * 	do something about duplicate entries?
- *
  */
 #include <string.h>
 #include <inttypes.h>
@@ -66,10 +62,12 @@
 }
 
 struct dirblock_data {
-	o2fsck_state *ost;
-	ocfs2_filesys *fs;
-	char *buf;
+	o2fsck_state 	*ost;
+	ocfs2_filesys 	*fs;
+	char 		*buf;
 	errcode_t	ret;
+	o2fsck_strings	strings;
+	uint64_t	last_ino;
 };
 
 static int dirent_has_dots(struct ocfs2_dir_entry *dirent, int num_dots)
@@ -422,18 +420,34 @@
 	return ret;
 }
 
+/* detecting dups is irritating because of the storage requirements of
+ * detecting duplicates.  e2fsck avoids the storage burden for a regular fsck
+ * pass by only detecting duplicate entries that occur in the same directory
+ * block.  its repair pass then suffers under enormous directories because it
+ * reads the whole thing into memory to detect duplicates.
+ *
+ * we'll take a compromise which expands the reach of a regular fsck pass by
+ * using a slightly larger block size but which repairs in place rather than
+ * reading the dir into memory.
+ *
+ * if we ever truly care to invest in duplicate detection and repair we could
+ * either explicitly use some external sort and merge algo or perhaps just
+ * combine mmap and some internal sort that has strong enough locality of
+ * reference to work well with the vm.
+ */
 static errcode_t fix_dirent_dups(o2fsck_state *ost,
 				 o2fsck_dirblock_entry *dbe,
 				 struct ocfs2_dir_entry *dirent,
 				 o2fsck_strings *strings,
-				 int *dups_in_block,
 				 int *flags)
 {
 	errcode_t ret = 0;
-	int was_set;
+	char *new_name = NULL;
+	int was_set, i;
 
-	if (*dups_in_block)
-		goto out;
+	/* start over every N bytes of dirent */
+	if (o2fsck_strings_bytes_allocated(strings) > (4 * 1024 * 1024))
+		o2fsck_strings_free(strings);
 
 	ret = o2fsck_strings_insert(strings, dirent->name, dirent->name_len, 
 				    &was_set);
@@ -446,18 +460,67 @@
 	if (!was_set)
 		goto out;
 
-	printf("Duplicate directory entry '%.*s' found.\n",
-	       dirent->name_len, dirent->name);
-	printf("Marking its parent %"PRIu64" for rebuilding.\n", dbe->e_ino);
-
-	ret = ocfs2_bitmap_set(ost->ost_rebuild_dirs, dbe->e_ino, &was_set);
-	if (ret)
-		com_err(whoami, ret, "while recording that inode %"PRIu64" "
-			"needs to have duplicate entries removed.",
+	new_name = calloc(1, dirent->rec_len + 1);
+	if (new_name == NULL) {
+		ret = OCFS2_ET_NO_MEMORY;
+		com_err(whoami, ret, "while trying to generate a new name "
+			"for duplicate file name '%.*s' in dir inode "
+			"%"PRIu64, dirent->name_len, dirent->name,
 			dbe->e_ino);
+		goto out;
+	}
 
-	*dups_in_block = 1;
+	/* just simple mangling for now */ 
+	memcpy(new_name, dirent->name, dirent->name_len);
+	was_set = 1;
+	/* append '_' to free space in the dirent until its unique */
+	for (i = dirent->name_len ; was_set && i < dirent->rec_len; i++){
+		new_name[i] = '_';
+		if (!o2fsck_strings_exists(strings, new_name, strlen(new_name)))
+			was_set = 0;
+	}
+
+	/* rename characters at the end to '_' until its unique */
+	for (i = dirent->name_len - 1 ; was_set && i >= 0; i--) {
+		new_name[i] = '_';
+		if (!o2fsck_strings_exists(strings, new_name, strlen(new_name)))
+			was_set = 0;
+	}
+
+	if (was_set) {
+		printf("Directory inode %"PRIu64" contains a duplicate "
+		       "occurance " "of the file name '%.*s' but fsck was "
+		       "unable to come up with a unique name so this duplicate "
+		       "name will not be dealt with.\n.",
+			dbe->e_ino, dirent->name_len, dirent->name);
+		goto out;
+	}
+
+	if (!prompt(ost, PY, PR_DIRENT_DUPLICATE,
+		    "Directory inode %"PRIu64" contains a duplicate occurance "
+		    "of the file name '%.*s'.  Replace this duplicate name "
+		    "with '%s'?", dbe->e_ino, dirent->name_len, dirent->name,
+		    new_name)) {
+		/* we don't really care that we leak new_name's recording
+		 * in strings, it'll be freed later */
+		goto out;
+	}
+
+	ret = o2fsck_strings_insert(strings, new_name, strlen(new_name),
+				    NULL);
+	if (ret) {
+		com_err(whoami, ret, "while allocating space to track "
+			"duplicates of a newly renamed dirent");
+		goto out;
+	}
+
+	dirent->name_len = strlen(new_name);
+	memcpy(dirent->name, new_name, dirent->name_len);
+	*flags |= OCFS2_DIRENT_CHANGED;
+
 out:
+	if (new_name != NULL)
+		free(new_name);
 	return ret;
 }
 
@@ -479,8 +542,6 @@
 	struct dirblock_data *dd = priv_data;
 	struct ocfs2_dir_entry *dirent, *prev = NULL;
 	unsigned int offset = 0, ret_flags = 0;
-	o2fsck_strings strings;
-	int dups_in_block = 0;
 	errcode_t ret;
 
 	if (!o2fsck_test_inode_allocated(dd->ost, dbe->e_ino)) {
@@ -490,7 +551,10 @@
 		return 0;
 	}
 
-	o2fsck_strings_init(&strings);
+	if (dbe->e_ino != dd->last_ino) {
+		o2fsck_strings_free(&dd->strings);
+		dd->last_ino = dbe->e_ino;
+	}
 
  	ret = ocfs2_read_dir_block(dd->fs, dbe->e_blkno, dd->buf);
 	if (ret && ret != OCFS2_ET_DIR_CORRUPTED) {
@@ -572,8 +636,8 @@
 		if (dirent->inode == 0)
 			goto next;
 
-		ret = fix_dirent_dups(dd->ost, dbe, dirent, &strings,
-				      &dups_in_block, &ret_flags);
+		ret = fix_dirent_dups(dd->ost, dbe, dirent, &dd->strings,
+				      &ret_flags);
 		if (ret)
 			goto out;
 		if (dirent->inode == 0)
@@ -596,7 +660,6 @@
 		}
 	}
 
-	o2fsck_strings_free(&strings);
 out:
 	if (ret)
 		dd->ret = ret;
@@ -610,10 +673,13 @@
 	struct dirblock_data dd = {
 		.ost = ost,
 		.fs = ost->ost_fs,
+		.last_ino = 0,
 	};
 
 	printf("Pass 2: Checking directory entries.\n");
 
+	o2fsck_strings_init(&dd.strings);
+
 	retval = ocfs2_malloc_block(ost->ost_fs->fs_io, &dd.buf);
 	if (retval)
 		return retval;
@@ -636,6 +702,7 @@
 
 	o2fsck_dir_block_iterate(&ost->ost_dirblocks, pass2_dir_block_iterate, 
 			 	 &dd);
+	o2fsck_strings_free(&dd.strings);
 	ocfs2_free(&dd.buf);
 	return 0;
 }

Modified: trunk/fsck.ocfs2/strings.c
===================================================================
--- trunk/fsck.ocfs2/strings.c	2004-12-29 19:22:32 UTC (rev 520)
+++ trunk/fsck.ocfs2/strings.c	2004-12-30 00:17:56 UTC (rev 521)
@@ -40,15 +40,46 @@
 	char		s_string[0]; /* null terminated */
 };
 
+/* I'm too lazy to share code with _insert right now */
+int o2fsck_strings_exists(o2fsck_strings *strings, char *string,
+			  size_t strlen)
+{	struct rb_node ** p = &strings->s_root.rb_node;
+	struct rb_node * parent = NULL;
+	struct string_entry *se;
+	int cmp;
+
+	while (*p)
+	{
+		parent = *p;
+		se = rb_entry(parent, struct string_entry, s_node);
+
+		/* we don't actually care about lexographical sorting */
+		cmp = strlen - se->s_strlen;
+		if (cmp == 0)
+			cmp = memcmp(string, se->s_string, strlen);
+
+		if (cmp < 0)
+			p = &(*p)->rb_left;
+		else if (cmp > 0)
+			p = &(*p)->rb_right;
+		else {
+			return 1;
+		}
+	}
+	return 0;
+}
+
 errcode_t o2fsck_strings_insert(o2fsck_strings *strings, char *string,
 			   size_t strlen, int *is_dup)
 {
 	struct rb_node ** p = &strings->s_root.rb_node;
 	struct rb_node * parent = NULL;
 	struct string_entry *se;
+	size_t bytes;
 	int cmp;
 
-	*is_dup = 0;
+	if (is_dup)
+		*is_dup = 0;
 
 	while (*p)
 	{
@@ -71,15 +102,19 @@
 		else if (cmp > 0)
 			p = &(*p)->rb_right;
 		else {
-			*is_dup = 1;
+			if (is_dup)
+				*is_dup = 1;
 			return 0;
 		}
 	}
 
-	se = malloc(offsetof(struct string_entry, s_string[strlen]));
+	bytes = offsetof(struct string_entry, s_string[strlen]);
+	se = malloc(bytes);
 	if (se == NULL)
 		return OCFS2_ET_NO_MEMORY;
 
+	strings->s_allocated += bytes;
+
 	se->s_strlen = strlen;
 	memcpy(se->s_string, string, strlen);
 
@@ -104,4 +139,11 @@
 		rb_erase(node, &strings->s_root);
 		free(se);
 	}
+
+	strings->s_allocated = 0;
 }
+
+size_t o2fsck_strings_bytes_allocated(o2fsck_strings *strings)
+{
+	return strings->s_allocated;
+}



More information about the Ocfs2-tools-commits mailing list