[Ocfs2-commits] mfasheh commits r2391 - in trunk: . fs/ocfs2
svn-commits at oss.oracle.com
svn-commits at oss.oracle.com
Mon Jun 13 17:58:45 CDT 2005
Author: mfasheh
Signed-off-by: jlbec
Date: 2005-06-13 17:58:43 -0500 (Mon, 13 Jun 2005)
New Revision: 2391
Modified:
trunk/Config.make.in
trunk/configure.in
trunk/fs/ocfs2/Makefile
trunk/fs/ocfs2/inode.c
trunk/fs/ocfs2/inode.h
trunk/fs/ocfs2/journal.c
trunk/fs/ocfs2/super.c
trunk/fs/ocfs2/vote.c
Log:
* Don't set i_nlink in the vote thread anymore. Instead provide our own
drop_inode which can clear it when no other processes have references.
* Document our delete_inode path better.
* ocfs2_recover_orphans might have taken a reference on an inode which has
already passed through the vote thread in which case we weren't clearing the
state setup there.
* The atomic_inc / atomic_dec in ocfs2_delete_inode is no longer necessary
so remove that.
* Set checkpointing information on inodes in ocfs2_delete_inode so that if
the operation fails, we don't give up the lock without a checkpoint.
Signed-off-by: jlbec
Modified: trunk/Config.make.in
===================================================================
--- trunk/Config.make.in 2005-06-13 18:37:09 UTC (rev 2390)
+++ trunk/Config.make.in 2005-06-13 22:58:43 UTC (rev 2391)
@@ -51,6 +51,7 @@
EXTRA_CFLAGS += @KAPI_COMPAT_CFLAGS@
MISSING_SOCK_CREATE_LITE = @MISSING_SOCK_CREATE_LITE@
+MISSING_GENERIC_DROP_INODE = @MISSING_GENERIC_DROP_INODE@
JOURNAL_ACCESS_WITH_CREDITS = @JOURNAL_ACCESS_WITH_CREDITS@
BACKING_DEV_CAPABILITIES = @BACKING_DEV_CAPABILITIES@
IDR_GET_NEW_RETURNS_ID = @IDR_GET_NEW_RETURNS_ID@
Modified: trunk/configure.in
===================================================================
--- trunk/configure.in 2005-06-13 18:37:09 UTC (rev 2390)
+++ trunk/configure.in 2005-06-13 22:58:43 UTC (rev 2391)
@@ -212,6 +212,10 @@
OCFS2_CHECK_KERNEL(sock_create_lite, net.h,, MISSING_SOCK_CREATE_LITE=yes)
AC_SUBST(MISSING_SOCK_CREATE_LITE)
+MISSING_GENERIC_DROP_INODE=
+OCFS2_CHECK_KERNEL(generic_drop_inode, fs.h,, MISSING_GENERIC_DROP_INODE=yes)
+AC_SUBST(MISSING_GENERIC_DROP_INODE)
+
OCFS2_KERNEL_COMPAT(assert_spin_locked, spinlock.h)
OCFS2_CHECK_KERNEL([kref_init with release callback], kref.h,
Modified: trunk/fs/ocfs2/Makefile
===================================================================
--- trunk/fs/ocfs2/Makefile 2005-06-13 18:37:09 UTC (rev 2390)
+++ trunk/fs/ocfs2/Makefile 2005-06-13 22:58:43 UTC (rev 2391)
@@ -26,6 +26,10 @@
EXTRA_CFLAGS += -DJOURNAL_ACCESS_WITH_CREDITS
endif
+ifdef MISSING_GENERIC_DROP_INODE
+EXTRA_CFLAGS += -DMISSING_GENERIC_DROP_INODE
+endif
+
#
# Since SUBDIRS means something to kbuild, define them safely. Do not
# include trailing slashes.
Modified: trunk/fs/ocfs2/inode.c
===================================================================
--- trunk/fs/ocfs2/inode.c 2005-06-13 18:37:09 UTC (rev 2390)
+++ trunk/fs/ocfs2/inode.c 2005-06-13 22:58:43 UTC (rev 2391)
@@ -488,7 +488,7 @@
* the node who's doing the actual deleting should handle it
* anyway. */
if (current == osb->vote_task) {
- mlog(0, "Skipping delete of %lu because we're currently"
+ mlog(0, "Skipping delete of %lu because we're currently "
"in process_vote\n", inode->i_ino);
goto bail;
}
@@ -504,12 +504,7 @@
goto bail;
}
- /* ocfs2_meta_lock and friends might igrab / iput this guy, so we
- * take an extra ref. to avoid recursive calls to
- * delete_inode. */
- atomic_inc(&inode->i_count);
status = ocfs2_meta_lock(inode, NULL, &fe_bh, 1);
- atomic_dec(&inode->i_count);
if (status < 0) {
if (status != -ENOENT)
mlog_errno(status);
@@ -638,6 +633,11 @@
mlog_errno(status);
goto bail_unblock;
}
+ /* Set the locking information, even though we're wiping the
+ * inode - if we error before completing the wipe, we'll want
+ * to checkpoint our progress so other nodes get an up-to-date
+ * picture. */
+ ocfs2_set_inode_lock_trans(osb->journal, inode);
status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode,
orphan_dir_bh);
@@ -707,7 +707,7 @@
if (!inode)
goto bail;
- mlog(0, "Clearing inode: %"MLFu64", nlink = %u)\n",
+ mlog(0, "Clearing inode: %"MLFu64", nlink = %u\n",
OCFS2_I(inode)->ip_blkno, inode->i_nlink);
mlog_bug_on_msg(OCFS2_SB(inode->i_sb) == NULL,
@@ -715,7 +715,10 @@
/* We very well may get a clear_inode before all an inodes
* metadata has hit disk. Of course, we can't drop any cluster
- * locks until the journal has finished with it. */
+ * locks until the journal has finished with it. The only
+ * exception here are successfully wiped inodes - their
+ * metadata can now be considered to be part of the system
+ * inodes from which it came. */
if (!(OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED))
ocfs2_checkpoint_inode(inode);
@@ -772,6 +775,51 @@
mlog_exit_void();
}
+#ifdef MISSING_GENERIC_DROP_INODE
+/* This is *only* a hack for kernels which don't have
+ * generic_drop_inode exported yet. */
+static void generic_drop_inode(struct inode *inode)
+{
+ struct ocfs2_inode_info *oi = OCFS2_I(inode);
+
+ if (inode->i_nlink) {
+ /* Nasty, nasty we have i_nlink, but since we can't
+ * get to generic_forget_inode, we are forced to call
+ * generic_delete_inode but want to tell
+ * ocfs2_delete_inode to skip the deletion path. */
+ spin_lock(&oi->ip_lock);
+ oi->ip_flags |= OCFS2_INODE_SKIP_DELETE;
+ spin_unlock(&oi->ip_lock);
+ }
+ generic_delete_inode(inode);
+}
+#endif
+
+/* Called under inode_lock, with no more references on the
+ * struct inode, so it's safe here to check the flags field
+ * and to manipulate i_nlink without any other locks. */
+void ocfs2_drop_inode(struct inode *inode)
+{
+ struct ocfs2_inode_info *oi = OCFS2_I(inode);
+
+ mlog_entry_void();
+
+ mlog(0, "Drop inode %"MLFu64", nlink = %u, ip_flags = 0x%x\n",
+ oi->ip_blkno, inode->i_nlink, oi->ip_flags);
+
+ /* Testing ip_orphaned_slot here wouldn't work because we may
+ * not have gotten a delete_inode vote from any other nodes
+ * yet. */
+ if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED) {
+ mlog(0, "Inode was orphaned on another node, clearing nlink.\n");
+ inode->i_nlink = 0;
+ }
+
+ generic_drop_inode(inode);
+
+ mlog_exit_void();
+}
+
/*
* TODO: this should probably be merged into ocfs2_get_block
*
Modified: trunk/fs/ocfs2/inode.h
===================================================================
--- trunk/fs/ocfs2/inode.h 2005-06-13 18:37:09 UTC (rev 2390)
+++ trunk/fs/ocfs2/inode.h 2005-06-13 22:58:43 UTC (rev 2391)
@@ -75,16 +75,32 @@
/*
* Flags for the ip_flags field
*/
-/* Special types of inodes */
+/* System file inodes */
#define OCFS2_INODE_SYSTEM_FILE 0x00000001
#define OCFS2_INODE_JOURNAL 0x00000002
#define OCFS2_INODE_BITMAP 0x00000004
-/* Has the inode been deleted yet? */
+/* This inode has been wiped from disk */
#define OCFS2_INODE_DELETED 0x00000008
/* Another node is deleting, so our delete is a nop */
#define OCFS2_INODE_SKIP_DELETE 0x00000010
+/*
+ * Has the inode been orphaned on another node?
+ *
+ * This hints to ocfs2_drop_inode that it should clear i_nlink before
+ * continuing.
+ *
+ * We *only* set this on unlink vote from another node. If the inode
+ * was locally orphaned, then we're sure of the state and don't need
+ * to twiddle i_nlink later - it's either zero or not depending on
+ * whether our unlink succeeded. Otherwise we got this from a node
+ * whose intention was to orphan the inode, however he may have
+ * crashed, failed etc, so we let ocfs2_drop_inode zero the value and
+ * rely on ocfs2_delete_inode to sort things out under the proper
+ * cluster locks.
+ */
+#define OCFS2_INODE_MAYBE_ORPHANED 0x00000020
/* Does someone have the file open O_DIRECT */
-#define OCFS2_INODE_OPEN_DIRECT 0x00000020
+#define OCFS2_INODE_OPEN_DIRECT 0x00000040
static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode)
{
@@ -100,6 +116,7 @@
int *err, int reada);
void ocfs2_clear_inode(struct inode *inode);
void ocfs2_delete_inode(struct inode *inode);
+void ocfs2_drop_inode(struct inode *inode);
struct inode *ocfs2_iget(ocfs2_super *osb, u64 feoff);
struct inode *ocfs2_ilookup(ocfs2_super *osb, u64 feoff);
int ocfs2_inode_init_private(struct inode *inode);
Modified: trunk/fs/ocfs2/journal.c
===================================================================
--- trunk/fs/ocfs2/journal.c 2005-06-13 18:37:09 UTC (rev 2390)
+++ trunk/fs/ocfs2/journal.c 2005-06-13 22:58:43 UTC (rev 2391)
@@ -1366,6 +1366,7 @@
struct buffer_head *bh = NULL;
struct ocfs2_dir_entry *de;
struct super_block *sb = osb->sb;
+ struct ocfs2_inode_info *oi;
mlog(0, "Recover inodes from orphan dir in slot %d\n", slot);
@@ -1463,15 +1464,24 @@
orphan_dir_inode = NULL;
while (inode) {
- mlog(0, "iput orphan %"MLFu64"\n",
- OCFS2_I(inode)->ip_blkno);
+ oi = OCFS2_I(inode);
+ mlog(0, "iput orphan %"MLFu64"\n", oi->ip_blkno);
- iter = OCFS2_I(inode)->ip_next_orphan;
+ iter = oi->ip_next_orphan;
- spin_lock(&OCFS2_I(inode)->ip_lock);
- OCFS2_I(inode)->ip_orphaned_slot = slot;
- spin_unlock(&OCFS2_I(inode)->ip_lock);
+ spin_lock(&oi->ip_lock);
+ /* Delete voting may have set these on the assumption
+ * that the other node would wipe them successfully.
+ * If they are still in the node's orphan dir, we need
+ * to reset that state. */
+ oi->ip_flags &= ~(OCFS2_INODE_DELETED|OCFS2_INODE_SKIP_DELETE);
+ /* Set the proper information to get us going into
+ * ocfs2_delete_inode. */
+ oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
+ oi->ip_orphaned_slot = slot;
+ spin_unlock(&oi->ip_lock);
+
iput(inode);
inode = iter;
Modified: trunk/fs/ocfs2/super.c
===================================================================
--- trunk/fs/ocfs2/super.c 2005-06-13 18:37:09 UTC (rev 2390)
+++ trunk/fs/ocfs2/super.c 2005-06-13 22:58:43 UTC (rev 2391)
@@ -113,6 +113,7 @@
.statfs = ocfs2_statfs,
.alloc_inode = ocfs2_alloc_inode,
.destroy_inode = ocfs2_destroy_inode,
+ .drop_inode = ocfs2_drop_inode,
.clear_inode = ocfs2_clear_inode,
.delete_inode = ocfs2_delete_inode,
.sync_fs = ocfs2_sync_fs,
Modified: trunk/fs/ocfs2/vote.c
===================================================================
--- trunk/fs/ocfs2/vote.c 2005-06-13 18:37:09 UTC (rev 2390)
+++ trunk/fs/ocfs2/vote.c 2005-06-13 22:58:43 UTC (rev 2391)
@@ -164,8 +164,7 @@
mlog(0, "DELETE vote on inode %lu, read lnk_cnt = %u, slot = %d\n",
inode->i_ino, inode->i_nlink, *orphaned_slot);
- /* force this as ours may be out of date. */
- inode->i_nlink = 0;
+ spin_lock(&OCFS2_I(inode)->ip_lock);
/* Whatever our vote response is, we want to make sure that
* the orphaned slot is recorded properly on this node *and*
@@ -174,8 +173,6 @@
* respond with BUSY he doesn't actually need the orphaned
* slot, but it doesn't hurt to do it here anyway. */
if ((*orphaned_slot) != OCFS2_INVALID_SLOT) {
- spin_lock(&OCFS2_I(inode)->ip_lock);
-
mlog_bug_on_msg(OCFS2_I(inode)->ip_orphaned_slot !=
OCFS2_INVALID_SLOT &&
OCFS2_I(inode)->ip_orphaned_slot !=
@@ -190,19 +187,14 @@
OCFS2_I(inode)->ip_blkno, *orphaned_slot);
OCFS2_I(inode)->ip_orphaned_slot = *orphaned_slot;
- spin_unlock(&OCFS2_I(inode)->ip_lock);
} else {
- spin_lock(&OCFS2_I(inode)->ip_lock);
-
mlog(0, "Sending back orphaned slot %d for inode %"MLFu64"\n",
OCFS2_I(inode)->ip_orphaned_slot,
OCFS2_I(inode)->ip_blkno);
*orphaned_slot = OCFS2_I(inode)->ip_orphaned_slot;
- spin_unlock(&OCFS2_I(inode)->ip_lock);
}
- spin_lock(&OCFS2_I(inode)->ip_lock);
/* vote no if the file is still open. */
if (OCFS2_I(inode)->ip_open_count) {
mlog(0, "open count = %u\n",
@@ -249,7 +241,10 @@
response = OCFS2_RESPONSE_OK;
/* We set the SKIP_DELETE flag on the inode so we don't try to
- * delete it in delete_inode ourselves. */
+ * delete it in delete_inode ourselves, thus avoiding
+ * unecessary lock pinging. If the other node failed to wipe
+ * the inode as a result of a crash, then recovery will pick
+ * up the slack. */
OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_DELETE;
spin_unlock(&OCFS2_I(inode)->ip_lock);
@@ -330,11 +325,21 @@
dput(dentry);
}
- /* for rename, we don't change link counts */
+ /* rename votes don't send link counts */
if (!rename) {
mlog(0, "new_nlink = %u\n", new_nlink);
- inode->i_nlink = new_nlink;
+ /* We don't have the proper locks here to directly
+ * change i_nlink and besides, the vote is sent
+ * *before* the operation so it may have failed on the
+ * other node. This passes a hint to ocfs2_drop_inode
+ * to force ocfs2_delete_inode, who will take the
+ * proper cluster locks to sort things out. */
+ if (new_nlink == 0) {
+ spin_lock(&OCFS2_I(inode)->ip_lock);
+ OCFS2_I(inode)->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
+ spin_unlock(&OCFS2_I(inode)->ip_lock);
+ }
}
}
More information about the Ocfs2-commits
mailing list