[Ocfs2-tools-commits] zab commits r351 - in trunk/fsck.ocfs2: .
include
svn-commits at oss.oracle.com
svn-commits at oss.oracle.com
Thu Oct 28 19:46:40 CDT 2004
Author: zab
Date: 2004-10-28 19:46:38 -0500 (Thu, 28 Oct 2004)
New Revision: 351
Modified:
trunk/fsck.ocfs2/fsck.c
trunk/fsck.ocfs2/include/util.h
trunk/fsck.ocfs2/pass0.c
trunk/fsck.ocfs2/pass4.c
Log:
strengthen inode allocator checking
o check and fix more blkno fields
o more forcefully fix descs into chains instead of orphaning them
o write back modified inodes and descs
o total up chain bits in inode
o properly fix up count/free in the chain rec
Modified: trunk/fsck.ocfs2/fsck.c
===================================================================
--- trunk/fsck.ocfs2/fsck.c 2004-10-28 21:29:36 UTC (rev 350)
+++ trunk/fsck.ocfs2/fsck.c 2004-10-29 00:46:38 UTC (rev 351)
@@ -25,14 +25,6 @@
* Roughly o2fsck performs the following operations. Each pass' file has
* more details.
*
- * - replay the journals if needed
- * - walk the journal extents looking for simple inconsistencies
- * - loops, doubly referenced blocks
- * - need this code later anyway for verifying files
- * and i_clusters/i_size
- * - prompt to proceed if errors (mention backup superblock)
- * - ignore entirely or partially replay?
- *
* - pass0: clean up the inode allocators
* - kill loops, chains can't share groups
* - move local allocs back to the global or something?
Modified: trunk/fsck.ocfs2/include/util.h
===================================================================
--- trunk/fsck.ocfs2/include/util.h 2004-10-28 21:29:36 UTC (rev 350)
+++ trunk/fsck.ocfs2/include/util.h 2004-10-29 00:46:38 UTC (rev 351)
@@ -44,6 +44,12 @@
exit(FSCK_ERROR); \
} while (0)
+#define maybe_fatal(errcode, fmt...) do { \
+ errcode_t _err = errcode; \
+ if (_err == OCFS2_ET_NO_MEMORY || _err == OCFS2_ET_IO) \
+ fatal_error(_err, fmt); \
+} while (0)
+
void o2fsck_write_inode(ocfs2_filesys *fs, uint64_t blkno, ocfs2_dinode *di);
#endif /* __O2FSCK_UTIL_H__ */
Modified: trunk/fsck.ocfs2/pass0.c
===================================================================
--- trunk/fsck.ocfs2/pass0.c 2004-10-28 21:29:36 UTC (rev 350)
+++ trunk/fsck.ocfs2/pass0.c 2004-10-29 00:46:38 UTC (rev 351)
@@ -21,12 +21,19 @@
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*
- * Pass 0 verifies that the inode suballocators can be iterated over by
- * latter passes without risk of running into corruption. Usually our passes
- * are analagous to ext{2,3} but ocfs2's allocation is more dynamic. As
- * pass 0 it only makes sure that basic iteration in pass 1 will work. After
- * building a set of active inodes Pass 1 will call back into us to sync the
- * bitmaps with the active inodes.
+ * Pass 0 verifies that the inode suballocators can be iterated over by later
+ * passes without risk of running into corruption. This is so the passes can
+ * build up state without having to worry about tearing it down half way
+ * through to clean up the suballocators. For now fsck treats failure to find
+ * and verify the suballocator inodes themselves as fatal. It will only clean
+ * up the data they point to.
+ *
+ * pass0 updates group descriptor chains on disk.
+ *
+ * XXX
+ * track used blocks that iteration won't see?
+ * verify more inode fields?
+ * use prompt to mark soft errors
*/
#include <string.h>
@@ -55,13 +62,16 @@
struct chain_state *cs, ocfs2_group_desc *bg,
uint64_t blkno)
{
+ int changed = 0;
+
verbosef("checking desc at %"PRIu64"; blkno %"PRIu64" size %u bits %u "
"free_bits %u chain %u generation %u\n", blkno, bg->bg_blkno,
bg->bg_size, bg->bg_bits, bg->bg_free_bits_count,
bg->bg_chain, bg->bg_generation);
- /* We'll only consider this a valid descriptor if its signature,
- * parent inode, and generation all check out */
+ /* Once we think it's a valid group desc we aggressively tie it
+ * into the inode that pointed to it for fear of losing any
+ * descriptors. */
if (memcmp(bg->bg_signature, OCFS2_GROUP_DESC_SIGNATURE,
strlen(OCFS2_GROUP_DESC_SIGNATURE))) {
printf("Group descriptor at block %"PRIu64" has an invalid "
@@ -72,39 +82,66 @@
/* XXX maybe for advanced pain we could check to see if these
* kinds of descs have valid generations for the inodes they
* reference */
- if (bg->bg_parent_dinode != di->i_blkno) {
- printf("Group descriptor at block %"PRIu64" is referenced by "
- "inode %"PRIu64" but thinks its parent inode is "
- "%"PRIu64"\n", blkno, di->i_blkno,
- bg->bg_parent_dinode);
- return -1;
+ if ((bg->bg_parent_dinode != di->i_blkno) &&
+ prompt(ost, PY, "Group descriptor at block %"PRIu64" is "
+ "referenced by inode %"PRIu64" but thinks its parent inode "
+ "is %"PRIu64". Fix the descriptor's parent inode?", blkno,
+ di->i_blkno, bg->bg_parent_dinode)) {
+ bg->bg_parent_dinode = di->i_blkno;
+ changed = 1;
}
- if (bg->bg_generation != di->i_generation) {
- printf("Group descriptor at block %"PRIu64" is referenced by "
- "inode %"PRIu64" who has a generation of %u, but "
- "the descriptor has a generation of %u\n",blkno,
- di->i_blkno, di->i_generation, bg->bg_generation);
- return -1;
+ if ((bg->bg_generation != di->i_generation) &&
+ prompt(ost, PY, "Group descriptor at block %"PRIu64" is "
+ "referenced by inode %"PRIu64" who has a generation of "
+ "%u, but the descriptor has a generation of %u. Update "
+ "the descriptor's generation?", blkno, di->i_blkno,
+ di->i_generation, bg->bg_generation)) {
+ bg->bg_generation = di->i_generation;
+ changed = 1;
}
- /* XXX check bg_blkno */
+ if ((bg->bg_blkno != blkno) &&
+ prompt(ost, PY, "Group descriptor read from block %"PRIu64" "
+ "claims to be located at block %"PRIu64". Update its "
+ "recorded block location?", blkno, di->i_blkno)) {
+ bg->bg_blkno = blkno;
+ changed = 1;
+ }
- /* XXX check bg_chain */
+ if ((bg->bg_chain != cs->cs_chain_no) &&
+ prompt(ost, PY, "Group descriptor at block %"PRIu64" was "
+ "found in chain %u but it claims to be in chain %u. Update "
+ "the descriptor's recorded chain?", blkno, cs->cs_chain_no,
+ bg->bg_chain)) {
+ bg->bg_chain = cs->cs_chain_no;
+ changed = 1;
+ }
- /* XXX check _chain and worry about cpg/bpc lining up with bg_bits.
- * ah, bpc/cpg changes between the global bitmap and inode allocators,
- * not within an inode allocator. and its variable for clustersize/
- * blocksize. */
+ if ((bg->bg_free_bits_count > bg->bg_bits) &&
+ prompt(ost, PY, "Group descriptor at block %"PRIu64" claims to "
+ "have %u free bits which is more than its %u total bits. "
+ "Drop its free bit count down to the total?", blkno,
+ bg->bg_free_bits_count, bg->bg_bits)) {
+ bg->bg_free_bits_count = bg->bg_bits;
+ changed = 1;
+ }
-#if 0
- /* XXX hmm, do we care about these checks? if we want to be able
- * to use the allocator, I think so. This means walking them and
- * fixing up the bitmaps. maybe we'll fix them up after we've
- * iterated through inodes but before we start allocating? */
- if (bg->bg_bits != (u32)chain->cl_cpg * (u32)chain->cl_bpc) {
+ /* XXX check bg_bits vs cpg/bpc. */
+
+ if (changed) {
+ errcode_t ret;
+ /* XXX maybe a helper.. */
+ ret = ocfs2_write_group_desc(ost->ost_fs, bg->bg_blkno,
+ (char *)bg);
+ if (ret) {
+ fatal_error(ret, "while writing a group descriptor to "
+ "block %"PRIu64" somewhere in chain %d in "
+ "group allocator inode %"PRIu64,
+ bg->bg_blkno, cs->cs_chain_no,
+ di->i_blkno);
+ }
}
-#endif
cs->cs_total_bits += bg->bg_bits;
cs->cs_free_bits += bg->bg_free_bits_count;
@@ -112,12 +149,14 @@
return 0;
}
+/* returns non-zero if the chain_rec was updated */
static int check_chain(o2fsck_state *ost, ocfs2_dinode *di,
struct chain_state *cs, ocfs2_chain_rec *chain,
char *buf1, char *buf2)
{
ocfs2_group_desc *bg1 = (ocfs2_group_desc *)buf1;
ocfs2_group_desc *bg2 = (ocfs2_group_desc *)buf2;
+ ocfs2_group_desc *write_bg = NULL;
uint64_t blkno = chain->c_blkno;
errcode_t ret;
int rc;
@@ -125,84 +164,121 @@
verbosef("free %u total %u blkno %"PRIu64"\n", chain->c_free,
chain->c_total, chain->c_blkno);
- if (ocfs2_block_out_of_range(ost->ost_fs, blkno))
+ if (chain->c_blkno == 0)
return 0;
+ if (ocfs2_block_out_of_range(ost->ost_fs, blkno)) {
+ if (!prompt(ost, PY, "Chain record %d in group allocator inode "
+ "%"PRIu64" points to block %"PRIu64" which is out "
+ "of range. fsck can't continue without deleting "
+ "this chain. Delete it?", cs->cs_chain_no,
+ di->i_blkno, blkno))
+ exit(FSCK_ERROR);
+
+ chain->c_blkno = 0;
+ return 1;
+ }
+
ret = ocfs2_read_group_desc(ost->ost_fs, blkno, buf1);
if (ret) {
- /* trans or persis io error hmm. */
- rc = -1;
+ maybe_fatal(ret, "while reading a group descriptor from block "
+ "%"PRIu64" as pointed to by chain record %d in "
+ "group allocator inode %"PRIu64, blkno,
+ cs->cs_chain_no, di->i_blkno);
+ if (!prompt(ost, PY, "fsck can't continue without deleting "
+ "this chain. Delete it?"))
+ exit(FSCK_ERROR);
+
+ chain->c_blkno = 0;
+ return 1;
}
rc = check_group_desc(ost, di, cs, bg1, blkno);
- if (rc < 0 && prompt(ost, PY, "Chain %d in group allocator inode "
+ if (rc < 0) {
+ if (!prompt(ost, PY, "Chain %d in group allocator inode "
"%"PRIu64" points to an invalid descriptor block "
- "at %"PRIu64". Truncate this chain by removing "
- " this reference?", cs->cs_chain_no, di->i_blkno,
- blkno)) {
- /* this essentially frees this chain. */
- chain->c_free = 0;
- chain->c_total = 0;
+ "at %"PRIu64". fsck can't continue without "
+ "deleting this chain. Delete it?",
+ cs->cs_chain_no, di->i_blkno, blkno))
+ exit(FSCK_ERROR);
+
chain->c_blkno = 0;
return 1;
}
- if (rc > 1) {
- /* XXX write */
- }
+ /* read in each group desc and check it. if we see an error we try
+ * to truncate the list after the last good desc */
while (bg1->bg_next_group) {
ret = ocfs2_read_group_desc(ost->ost_fs, bg1->bg_next_group,
buf2);
if (ret) {
- /* trans or persis io error hmm. */
- rc = -1;
+ maybe_fatal(ret, "while reading a group descriptor "
+ "from block %"PRIu64" as pointed to by "
+ "chain record %d in group allocator inode "
+ "%"PRIu64, bg1->bg_next_group,
+ cs->cs_chain_no, di->i_blkno);
+ } else {
+ rc = check_group_desc(ost, di, cs, bg2,
+ bg1->bg_next_group);
+ if (rc == 0) {
+ memcpy(buf1, buf2, ost->ost_fs->fs_blocksize);
+ continue;
+ }
+ /* fall through if check_group_desc fails */
}
- rc = check_group_desc(ost, di, cs, bg2, bg1->bg_next_group);
- if (rc > 1) {
- /* XXX write */
- }
+ if (!prompt(ost, PY, "fsck can't continue without truncating "
+ "this chain by removing the link to the offending "
+ "block. Truncate it?"))
+ exit(FSCK_ERROR);
- if (rc == 0) {
- blkno = bg1->bg_next_group;
- memcpy(buf1, buf2, ost->ost_fs->fs_blocksize);
+ bg1->bg_next_group = 0;
+ write_bg = bg1;
+ break;
+ }
- continue;
+ if (write_bg) {
+ ret = ocfs2_write_group_desc(ost->ost_fs, write_bg->bg_blkno,
+ (char *)write_bg);
+ if (ret) {
+ fatal_error(ret, "while writing a group descriptor to "
+ "block %"PRIu64" somewhere in chain %d in "
+ "group allocator inode %"PRIu64,
+ write_bg->bg_blkno, cs->cs_chain_no,
+ di->i_blkno);
}
-
- if (prompt(ost, PY, "Desc %"PRIu64" points to an invalid "
- "descriptor at block %"PRIu64". Truncate this "
- "chain by removing this reference?", blkno,
- bg1->bg_next_group)) {
- bg1->bg_next_group = 0;
- /* XXX write */
- return 1;
- }
}
+ /* XXX exit if it isn't updated? */
if (cs->cs_total_bits != chain->c_total ||
cs->cs_free_bits != chain->c_free) {
if (prompt(ost, PY, "Chain %d in allocator inode %"PRIu64" "
"has %u bits marked free out of %d total bits "
"but the block groups in the chain have %u "
- "recorded out of %u total. Fix this by updating "
+ "free out of %u total. Fix this by updating "
"the chain record?", cs->cs_chain_no, di->i_blkno,
chain->c_free, chain->c_total, cs->cs_free_bits,
cs->cs_total_bits)) {
chain->c_total = cs->cs_total_bits;
chain->c_free = cs->cs_free_bits;
+ return 1;
}
}
return 0;
}
+/* If this returns 0 then the inode allocator had better be amenable to
+ * iteration. */
static errcode_t verify_inode_alloc(o2fsck_state *ost, ocfs2_dinode *di,
char *buf1, char *buf2)
{
struct chain_state cs = {0, };
ocfs2_chain_list *cl;
- uint16_t i, max_chain_rec;
+ uint16_t i, max_count;
+ ocfs2_chain_rec *cr;
+ uint32_t free = 0, total = 0;
+ int changed = 0;
errcode_t ret;
if (memcmp(di->i_signature, OCFS2_INODE_SIGNATURE,
@@ -231,33 +307,88 @@
verbosef("cl count %u next %u\n", cl->cl_count, cl->cl_next_free_rec);
- max_chain_rec = (ost->ost_fs->fs_blocksize -
- offsetof(ocfs2_dinode, id2.i_chain.cl_recs)) /
- sizeof(ocfs2_chain_rec);
+ max_count = ocfs2_chain_recs_per_inode(ost->ost_fs->fs_blocksize);
- if (cl->cl_next_free_rec > max_chain_rec) {
- if (prompt(ost, PY, "Allocator inode %"PRIu64" claims %u "
- "as the next free chain record, but it can only "
- "have %u total. Set the next record value?",
- di->i_blkno, cl->cl_next_free_rec, max_chain_rec)) {
- cl->cl_next_free_rec = max_chain_rec;
- }
- } else
- max_chain_rec = cl->cl_next_free_rec;
+ if (cl->cl_count > max_count) {
+ if (!prompt(ost, PY, "Allocator inode %"PRIu64" claims to "
+ "have %u chains, but the maximum is %u. Fix the "
+ "inode's count and keep checking?", di->i_blkno,
+ cl->cl_count, max_count))
+ exit(FSCK_ERROR);
- for (i = 0; i < max_chain_rec; i++) {
- /* clear it for each run */
+ cl->cl_count = max_count;
+ changed = 1;
+ }
+
+ if (cl->cl_next_free_rec > cl->cl_count) {
+ if (!prompt(ost, PY, "Allocator inode %"PRIu64" claims %u "
+ "as the next free chain record, but the inode only "
+ "has %u chains. Clamp the next record value and "
+ "keep checking?",
+ di->i_blkno, cl->cl_next_free_rec, cl->cl_count))
+ exit(FSCK_ERROR);
+
+ cl->cl_next_free_rec = cl->cl_count;
+ changed = 1;
+ }
+
+ for (i = 0; i < cl->cl_next_free_rec; i++) {
+ cr = &cl->cl_recs[i];
+
+ /* reset for each run */
cs = (struct chain_state) {
.cs_chain_no = i,
};
- ret = check_chain(ost, di, &cs, &cl->cl_recs[i], buf1, buf2);
- /* XXX do things :) */
+ changed |= check_chain(ost, di, &cs, cr, buf1, buf2);
+
+ /* replace this deleted chain with the last valid one, if
+ * present, and this 'i' again. If there isn't one to move
+ * in place the loop will terminate */
+ if (cr->c_blkno == 0) {
+ if (i < (cl->cl_next_free_rec - 1)) {
+ cl->cl_next_free_rec--;
+ *cr = cl->cl_recs[cl->cl_next_free_rec];
+ changed = 1;
+ i--;
+ }
+ continue;
+ }
+
+ free += cs.cs_free_bits;
+ total += cs.cs_total_bits;
}
+ if (di->id1.bitmap1.i_total != total ||
+ (di->id1.bitmap1.i_used != total - free)) {
+ if (prompt(ost, PY, "Allocator inode %"PRIu64" has %u bits "
+ "marked used out of %d total bits but the chains "
+ "have %u used out of %u total. Fix this by "
+ "updating the inode counts?", di->i_blkno,
+ di->id1.bitmap1.i_used, di->id1.bitmap1.i_total,
+ total - free, total)) {
+ di->id1.bitmap1.i_used = total - free;
+ di->id1.bitmap1.i_total = total;
+ changed = 1;
+ }
+ }
+
+ if (changed) {
+ /* if we're writing it anyway, we might as well clear the
+ * unused chain entries */
+ if (cl->cl_next_free_rec != max_count)
+ memset(&cl->cl_recs[cl->cl_next_free_rec], 0,
+ (max_count - cl->cl_next_free_rec) *
+ sizeof(ocfs2_chain_rec));
+
+ ret = ocfs2_write_inode(ost->ost_fs, di->i_blkno, (char *)di);
+ if (ret)
+ fatal_error(ret, "while writing inode alloc inode "
+ "%"PRIu64, di->i_blkno);
+ }
+
return 0;
}
-
errcode_t o2fsck_pass0(o2fsck_state *ost)
{
errcode_t ret;
@@ -267,7 +398,7 @@
ocfs2_filesys *fs = ost->ost_fs;
int i, type;
- printf("Pass 1: Checking allocation structures\n");
+ printf("Pass 0: Checking allocation structures\n");
ret = ocfs2_malloc_blocks(fs->fs_io, 3, &blocks);
if (ret) {
@@ -305,13 +436,19 @@
(ost->ost_fs->fs_blocksize * 2));
/* XXX maybe helped by the alternate super block */
- if (ret) {
- }
+ if (ret)
+ goto out;
type = INODE_ALLOC_SYSTEM_INODE;
} while (++i < OCFS2_RAW_SB(fs->fs_super)->s_max_nodes);
out:
+ /* errors are only returned to this guy if they're fatal -- memory
+ * alloc or IO errors. the.. returnee had the responsibility of
+ * describing the error at the source. */
+ if (ret)
+ exit(FSCK_ERROR);
+
if (di)
ocfs2_free(&di);
if (blocks)
Modified: trunk/fsck.ocfs2/pass4.c
===================================================================
--- trunk/fsck.ocfs2/pass4.c 2004-10-28 21:29:36 UTC (rev 350)
+++ trunk/fsck.ocfs2/pass4.c 2004-10-29 00:46:38 UTC (rev 351)
@@ -64,7 +64,7 @@
/* XXX offer to remove files/dirs with no data? */
if (prompt(ost, PY, "Inode %"PRIu64" isn't referenced "
"by any directory entries. Move it to "
- "lost+found?")) {
+ "lost+found?", ino)) {
o2fsck_reconnect_file(ost, ino);
refs = o2fsck_icount_get(ost->ost_icount_refs,
ino);
More information about the Ocfs2-tools-commits
mailing list