[Ocfs2-commits] mfasheh commits r2467 - trunk/fs/ocfs2
svn-commits at oss.oracle.com
svn-commits at oss.oracle.com
Mon Jul 11 16:43:23 CDT 2005
Author: mfasheh
Signed-off-by: jlbec
Date: 2005-07-11 16:43:21 -0500 (Mon, 11 Jul 2005)
New Revision: 2467
Modified:
trunk/fs/ocfs2/Makefile
trunk/fs/ocfs2/dlmglue.c
trunk/fs/ocfs2/file.c
trunk/fs/ocfs2/inode.c
trunk/fs/ocfs2/vote.c
Log:
* Add some workaround code for a problem where delete_inode calls
truncate_inode_pages before OCFS2 can check whether to actually wipe the
inode. This is enabled right now by default as the kernel patch to fix
this is still under review.
* Fix up delete_inode to seperate out the parts which do a cluster query
from the parts which actually handle the removal of an inode from the
system.
Signed-off-by: jlbec
Modified: trunk/fs/ocfs2/Makefile
===================================================================
--- trunk/fs/ocfs2/Makefile 2005-07-11 19:03:00 UTC (rev 2466)
+++ trunk/fs/ocfs2/Makefile 2005-07-11 21:43:21 UTC (rev 2467)
@@ -26,6 +26,8 @@
EXTRA_CFLAGS += -DJOURNAL_ACCESS_WITH_CREDITS
endif
+EXTRA_CFLAGS += -DOCFS2_DELETE_INODE_WORKAROUND
+
#
# Since SUBDIRS means something to kbuild, define them safely. Do not
# include trailing slashes.
Modified: trunk/fs/ocfs2/dlmglue.c
===================================================================
--- trunk/fs/ocfs2/dlmglue.c 2005-07-11 19:03:00 UTC (rev 2466)
+++ trunk/fs/ocfs2/dlmglue.c 2005-07-11 21:43:21 UTC (rev 2467)
@@ -1436,23 +1436,24 @@
{
int status = 0;
u32 trustable_clusters = 0;
+ struct ocfs2_inode_info *oi = OCFS2_I(inode);
struct ocfs2_lock_res *lockres;
ocfs2_dinode *fe;
mlog_entry_void();
- spin_lock(&OCFS2_I(inode)->ip_lock);
- if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) {
+ spin_lock(&oi->ip_lock);
+ if (oi->ip_flags & OCFS2_INODE_DELETED) {
mlog(0, "Orphaned inode %"MLFu64" was deleted while we "
"were waiting on a lock. ip_flags = 0x%x\n",
- OCFS2_I(inode)->ip_blkno, OCFS2_I(inode)->ip_flags);
- spin_unlock(&OCFS2_I(inode)->ip_lock);
+ oi->ip_blkno, oi->ip_flags);
+ spin_unlock(&oi->ip_lock);
status = -ENOENT;
goto bail;
}
- spin_unlock(&OCFS2_I(inode)->ip_lock);
+ spin_unlock(&oi->ip_lock);
- lockres = &OCFS2_I(inode)->ip_meta_lockres;
+ lockres = &oi->ip_meta_lockres;
if (!ocfs2_should_refresh_lock_res(lockres))
goto bail;
@@ -1469,9 +1470,8 @@
} else {
/* Boo, we have to go to disk. */
/* read bh, cast, ocfs2_refresh_inode */
- status = ocfs2_read_block(OCFS2_SB(inode->i_sb),
- OCFS2_I(inode)->ip_blkno, bh,
- OCFS2_BH_CACHED, inode);
+ status = ocfs2_read_block(OCFS2_SB(inode->i_sb), oi->ip_blkno,
+ bh, OCFS2_BH_CACHED, inode);
if (status < 0) {
mlog_errno(status);
goto bail_refresh;
@@ -1491,13 +1491,12 @@
le32_to_cpu(fe->i_generation),
"Invalid dinode %"MLFu64" disk generation: %u "
"inode->i_generation: %u\n",
- OCFS2_I(inode)->ip_blkno,
- le32_to_cpu(fe->i_generation),
+ oi->ip_blkno, le32_to_cpu(fe->i_generation),
inode->i_generation);
mlog_bug_on_msg(fe->i_dtime || !(fe->i_flags & OCFS2_VALID_FL),
"Stale dinode %"MLFu64" dtime: %"MLFu64" "
- "flags: 0x%x\n", OCFS2_I(inode)->ip_blkno,
- fe->i_dtime, fe->i_flags);
+ "flags: 0x%x\n", oi->ip_blkno, fe->i_dtime,
+ fe->i_flags);
ocfs2_refresh_inode(inode, fe);
}
@@ -1512,6 +1511,21 @@
ocfs2_set_local_seq_from_lvb(lockres);
ocfs2_reset_meta_lvb_values(inode);
+#ifdef OCFS2_DELETE_INODE_WORKAROUND
+ /* We might as well check this here - since the inode is now
+ * locked, an up to date view will indicate whether this was
+ * never actually orphaned -- i_nlink should be zero for an
+ * orphaned inode. */
+ spin_lock(&oi->ip_lock);
+ if (inode->i_nlink &&
+ oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED) {
+ mlog(0, "Inode %"MLFu64": clearing maybe_orphaned flag\n",
+ oi->ip_blkno);
+ oi->ip_flags &= ~OCFS2_INODE_MAYBE_ORPHANED;
+ }
+ spin_unlock(&oi->ip_lock);
+#endif
+
status = 0;
bail_refresh:
ocfs2_complete_lock_res_refresh(lockres, status);
Modified: trunk/fs/ocfs2/file.c
===================================================================
--- trunk/fs/ocfs2/file.c 2005-07-11 19:03:00 UTC (rev 2466)
+++ trunk/fs/ocfs2/file.c 2005-07-11 21:43:21 UTC (rev 2467)
@@ -102,6 +102,17 @@
file->f_dentry->d_name.name);
spin_lock(&oi->ip_lock);
+#ifdef OCFS2_DELETE_INODE_WORKAROUND
+ /* Do the sync *before* decrementing ip_open_count as
+ * otherwise the voting code might allow this inode to be
+ * wiped. */
+ if (oi->ip_open_count == 1 &&
+ oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED) {
+ spin_unlock(&oi->ip_lock);
+ write_inode_now(inode, 1);
+ spin_lock(&oi->ip_lock);
+ }
+#endif
if (!--oi->ip_open_count)
oi->ip_flags &= ~OCFS2_INODE_OPEN_DIRECT;
spin_unlock(&oi->ip_lock);
Modified: trunk/fs/ocfs2/inode.c
===================================================================
--- trunk/fs/ocfs2/inode.c 2005-07-11 19:03:00 UTC (rev 2466)
+++ trunk/fs/ocfs2/inode.c 2005-07-11 21:43:21 UTC (rev 2467)
@@ -441,44 +441,165 @@
return status;
}
-void ocfs2_delete_inode(struct inode *inode)
+static int ocfs2_remove_inode(struct inode *inode,
+ struct buffer_head *di_bh,
+ struct inode *orphan_dir_inode,
+ struct buffer_head *orphan_dir_bh)
{
- int status = 0;
- int unlock = 0;
- int orphaned_slot;
- struct inode *orphan_dir_inode = NULL;
+ int status;
struct inode *inode_alloc_inode = NULL;
- ocfs2_journal_handle *handle = NULL;
+ struct buffer_head *inode_alloc_bh = NULL;
+ ocfs2_journal_handle *handle;
ocfs2_super *osb = OCFS2_SB(inode->i_sb);
- struct buffer_head *orphan_dir_bh = NULL;
- struct buffer_head *inode_alloc_bh = NULL;
- struct buffer_head *fe_bh = NULL;
- ocfs2_dinode *fe;
- sigset_t blocked, oldset;
+ ocfs2_dinode *di = (ocfs2_dinode *) di_bh->b_data;
- mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino);
+ inode_alloc_inode =
+ ocfs2_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE,
+ le16_to_cpu(di->i_suballoc_slot));
+ if (!inode_alloc_inode) {
+ status = -EEXIST;
+ mlog_errno(status);
+ goto bail;
+ }
- if (is_bad_inode(inode))
+ down(&inode_alloc_inode->i_sem);
+ status = ocfs2_meta_lock(inode_alloc_inode, NULL, &inode_alloc_bh, 1);
+ if (status < 0) {
+ up(&inode_alloc_inode->i_sem);
+
+ mlog_errno(status);
goto bail;
+ }
- if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE) {
- mlog(0, "Skipping system file delete.\n");
+ handle = ocfs2_start_trans(osb, NULL, OCFS2_DELETE_INODE_CREDITS);
+ if (handle == NULL) {
+ status = -ENOMEM;
+ mlog_errno(status);
+ goto bail_unlock;
+ }
+
+ ocfs2_set_inode_lock_trans(osb->journal, inode_alloc_inode);
+ ocfs2_set_inode_lock_trans(osb->journal, orphan_dir_inode);
+ /* Set the inode locking information, even though we're wiping
+ * the inode - if we error before completing the wipe, we'll
+ * want to checkpoint our progress so other nodes get an
+ * up-to-date picture. */
+ ocfs2_set_inode_lock_trans(osb->journal, inode);
+
+ status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode,
+ orphan_dir_bh);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail_commit;
+ }
+
+ /* set the inodes dtime */
+ status = ocfs2_journal_access(handle, inode, di_bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail_commit;
+ }
+
+ di->i_dtime = CURRENT_TIME.tv_sec;
+ di->i_flags &= (~(OCFS2_VALID_FL | OCFS2_ORPHANED_FL));
+
+ status = ocfs2_journal_dirty(handle, di_bh);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail_commit;
+ }
+
+ ocfs2_remove_from_cache(inode, di_bh);
+
+ status = ocfs2_free_dinode(handle, inode_alloc_inode,
+ inode_alloc_bh, di);
+ if (status < 0)
+ mlog_errno(status);
+
+bail_commit:
+ ocfs2_commit_trans(handle);
+bail_unlock:
+ ocfs2_meta_unlock(inode_alloc_inode, 1);
+ up(&inode_alloc_inode->i_sem);
+ brelse(inode_alloc_bh);
+bail:
+ iput(inode_alloc_inode);
+
+ return status;
+}
+
+static int ocfs2_wipe_inode(struct inode *inode,
+ struct buffer_head *di_bh)
+{
+ int status, orphaned_slot;
+ struct inode *orphan_dir_inode = NULL;
+ struct buffer_head *orphan_dir_bh = NULL;
+ ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+
+ /* We've already voted on this so it should be readonly - no
+ * spinlock needed. */
+ orphaned_slot = OCFS2_I(inode)->ip_orphaned_slot;
+ orphan_dir_inode = ocfs2_get_system_file_inode(osb,
+ ORPHAN_DIR_SYSTEM_INODE,
+ orphaned_slot);
+ if (!orphan_dir_inode) {
+ status = -EEXIST;
+ mlog_errno(status);
goto bail;
}
- if (inode == osb->root_inode) {
- mlog(0, "Skipping root inode delete.\n");
+ /* Lock the orphan dir. The lock will be held for the entire
+ * delete_inode operation. We do this now to avoid races with
+ * recovery completion on other nodes. */
+ down(&orphan_dir_inode->i_sem);
+ status = ocfs2_meta_lock(orphan_dir_inode, NULL, &orphan_dir_bh, 1);
+ if (status < 0) {
+ up(&orphan_dir_inode->i_sem);
+
+ mlog_errno(status);
goto bail;
}
- spin_lock(&OCFS2_I(inode)->ip_lock);
- if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_DELETE) {
- spin_unlock(&OCFS2_I(inode)->ip_lock);
- mlog(0, "Skipping delete of %lu because another node "
- "has done this for us.\n", inode->i_ino);
+ /* we do this while holding the orphan dir lock because we
+ * don't want recovery being run from another node to vote for
+ * an inode delete on us -- this will result in two nodes
+ * truncating the same file! */
+ status = ocfs2_truncate_for_delete(osb, inode, di_bh);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail_unlock_dir;
+ }
+
+ status = ocfs2_remove_inode(inode, di_bh, orphan_dir_inode,
+ orphan_dir_bh);
+ if (status < 0)
+ mlog_errno(status);
+
+bail_unlock_dir:
+ ocfs2_meta_unlock(orphan_dir_inode, 1);
+ up(&orphan_dir_inode->i_sem);
+ brelse(orphan_dir_bh);
+bail:
+ iput(orphan_dir_inode);
+
+ return status;
+}
+
+/* There is a series of simple checks that should be done before a
+ * vote is even considered. Encapsulate those in this function. */
+static int ocfs2_inode_is_valid_to_delete(struct inode *inode)
+{
+ int ret = 0;
+ struct ocfs2_inode_info *oi = OCFS2_I(inode);
+ ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+
+ /* We shouldn't be getting here for the root directory
+ * inode.. */
+ if (inode == osb->root_inode) {
+ mlog(ML_ERROR, "Skipping delete of root inode.\n");
goto bail;
}
- spin_unlock(&OCFS2_I(inode)->ip_lock);
/* If we're coming from process_vote we can't go into our own
* voting [hello, deadlock city!], so unforuntately we just
@@ -491,211 +612,215 @@
goto bail;
}
- /* We want to blocks signals in delete_inode as the lock and
- * messaging paths may return us -ERESTARTSYS. This however
- * could result in inodes being orphaned forever. */
- sigfillset(&blocked);
-
- status = sigprocmask(SIG_BLOCK, &blocked, &oldset);
- if (status < 0) {
- mlog_errno(status);
- goto bail;
+ spin_lock(&oi->ip_lock);
+ /* OCFS2 *never* deletes system files. This should technically
+ * never get here as system file inodes should always have a
+ * positive link count. */
+ if (oi->ip_flags & OCFS2_INODE_SYSTEM_FILE) {
+ mlog(ML_ERROR, "Skipping delete of system file %"MLFu64".\n",
+ oi->ip_blkno);
+ goto bail_unlock;
}
- status = ocfs2_meta_lock(inode, NULL, &fe_bh, 1);
- if (status < 0) {
- if (status != -ENOENT)
- mlog_errno(status);
- goto bail_unblock;
- }
- unlock = 1;
-
- /* While we were waiting for the lock, another node might have
- * asked to delete the inode. Recheck our flags to catch this
- * race and just clear_inode instead.*/
- spin_lock(&OCFS2_I(inode)->ip_lock);
- if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_DELETE) {
- spin_unlock(&OCFS2_I(inode)->ip_lock);
+ /* If we have voted "yes" on the wipe of this inode for
+ * another node, it will be marked here so we can safely skip
+ * it. Recovery will cleanup any inodes we might inadvertantly
+ * skip here. */
+ if (oi->ip_flags & OCFS2_INODE_SKIP_DELETE) {
mlog(0, "Skipping delete of %lu because another node "
"has done this for us.\n", inode->i_ino);
- goto bail_unblock;
+ goto bail_unlock;
}
- spin_unlock(&OCFS2_I(inode)->ip_lock);
- status = ocfs2_request_delete_vote(inode);
- if (status < 0) {
- /* EBUSY here is assumed to mean that other nodes are
- * still using the inode. We're done here though, so
- * avoid doing anything on disk and let them worry
- * about deleting it. */
- if (status != -EBUSY)
- mlog_errno(status);
- goto bail_unblock;
- }
+ ret = 1;
+bail_unlock:
+ spin_unlock(&oi->ip_lock);
+bail:
+ return ret;
+}
- spin_lock(&OCFS2_I(inode)->ip_lock);
- orphaned_slot = OCFS2_I(inode)->ip_orphaned_slot;
- spin_unlock(&OCFS2_I(inode)->ip_lock);
+/* Query the cluster to determine whether we should wipe an inode from
+ * disk or not.
+ *
+ * Requires the inode to have the cluster lock. */
+static int ocfs2_query_inode_wipe(struct inode *inode,
+ struct buffer_head *di_bh,
+ int *wipe)
+{
+ int status = 0;
+ struct ocfs2_inode_info *oi = OCFS2_I(inode);
+ ocfs2_dinode *di;
- if (orphaned_slot == OCFS2_INVALID_SLOT) {
- /* Nobody knew which slot this inode was orphaned
- * into. This may happen during node death and
- * recovery knows how to clean it up so we can safely
- * ignore this inode for now on. */
- mlog(0, "Nobody knew where inode %"MLFu64" was orphaned!\n",
- OCFS2_I(inode)->ip_blkno);
+ *wipe = 0;
- /* XXX: Is this really necessary? */
- spin_lock(&OCFS2_I(inode)->ip_lock);
- OCFS2_I(inode)->ip_flags |= OCFS2_INODE_DELETED;
- spin_unlock(&OCFS2_I(inode)->ip_lock);
- goto bail_unblock;
+ /* While we were waiting for the cluster lock in
+ * ocfs2_delete_inode, another node might have asked to delete
+ * the inode. Recheck our flags to catch this. */
+ if (!ocfs2_inode_is_valid_to_delete(inode)) {
+ mlog(0, "Skipping delete of %"MLFu64" because flags changed\n",
+ oi->ip_blkno);
+ goto bail;
}
- mlog(0, "Inode %"MLFu64" is ok to wipe from orphan dir slot %d\n",
- OCFS2_I(inode)->ip_blkno, orphaned_slot);
+ /* Now that we have an up to date inode, we can double check
+ * the link count. */
+ if (inode->i_nlink) {
+ mlog(0, "Skipping delete of %"MLFu64" because nlink = %u\n",
+ oi->ip_blkno, inode->i_nlink);
+ goto bail;
+ }
- fe = (ocfs2_dinode *) fe_bh->b_data;
- if (!(fe->i_flags & OCFS2_ORPHANED_FL)) {
+ /* Do some basic inode verification... */
+ di = (ocfs2_dinode *) di_bh->b_data;
+ if (!(di->i_flags & OCFS2_ORPHANED_FL)) {
/* for lack of a better error? */
status = -EEXIST;
mlog(ML_ERROR,
"Inode %"MLFu64" (on-disk %"MLFu64") not orphaned!\n",
OCFS2_I(inode)->ip_blkno,
- fe->i_blkno);
- goto bail_unblock;
+ di->i_blkno);
+ goto bail;
}
/* has someone already deleted us?! baaad... */
- if (fe->i_dtime) {
+ if (di->i_dtime) {
status = -EEXIST;
mlog_errno(status);
- goto bail_unblock;
+ goto bail;
}
- if (fe->i_links_count) {
- status = -EBUSY;
- mlog_errno(status);
- goto bail_unblock;
+ status = ocfs2_request_delete_vote(inode);
+ /* -EBUSY means that other nodes are still using the
+ * inode. We're done here though, so avoid doing anything on
+ * disk and let them worry about deleting it. */
+ if (status == -EBUSY) {
+ status = 0;
+ mlog(0, "Skipping delete of %"MLFu64" because it is in use on"
+ "other nodes\n", oi->ip_blkno);
+ goto bail;
}
-
- /* Oop, lets be carefull of lock / trans ordering here... */
- handle = ocfs2_alloc_handle(osb);
- if (handle == NULL) {
- status = -ENOMEM;
- mlog_errno(status);
- goto bail_unblock;
- }
-
- orphan_dir_inode = ocfs2_get_system_file_inode(osb,
- ORPHAN_DIR_SYSTEM_INODE,
- orphaned_slot);
- if (!orphan_dir_inode) {
- status = -EEXIST;
- mlog_errno(status);
- goto bail_unblock;
- }
- ocfs2_handle_add_inode(handle, orphan_dir_inode);
- status = ocfs2_meta_lock(orphan_dir_inode, handle, &orphan_dir_bh, 1);
if (status < 0) {
mlog_errno(status);
- goto bail_unblock;
+ goto bail;
}
- /* we do this while holding the orphan dir lock because we
- * don't want recovery being run from another node to vote for
- * an inode delete on us -- this will result in two nodes
- * truncating the same file! */
- status = ocfs2_truncate_for_delete(osb, inode, fe_bh);
- if (status < 0) {
- mlog_errno(status);
- goto bail_unblock;
+ spin_lock(&oi->ip_lock);
+ if (oi->ip_orphaned_slot == OCFS2_INVALID_SLOT) {
+ /* Nobody knew which slot this inode was orphaned
+ * into. This may happen during node death and
+ * recovery knows how to clean it up so we can safely
+ * ignore this inode for now on. */
+ mlog(0, "Nobody knew where inode %"MLFu64" was orphaned!\n",
+ oi->ip_blkno);
+ } else {
+ *wipe = 1;
+
+ mlog(0, "Inode %"MLFu64" is ok to wipe from orphan dir %d\n",
+ oi->ip_blkno, oi->ip_orphaned_slot);
}
+ spin_unlock(&oi->ip_lock);
- inode_alloc_inode =
- ocfs2_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE,
- le16_to_cpu(fe->i_suballoc_slot));
- if (!inode_alloc_inode) {
- status = -EEXIST;
- mlog_errno(status);
- goto bail_unblock;
+bail:
+ return status;
+}
+
+/* Support function for ocfs2_delete_inode. Will help us keep the
+ * inode data in a consistent state for clear_inode. Always truncates
+ * pages, optionally sync's them first. */
+static void ocfs2_cleanup_delete_inode(struct inode *inode,
+ int sync_data)
+{
+ mlog(0, "Cleanup inode %"MLFu64", sync = %d\n",
+ OCFS2_I(inode)->ip_blkno, sync_data);
+#ifndef OCFS2_DELETE_INODE_WORKAROUND
+ if (sync_data)
+ write_inode_now(inode, 1);
+ truncate_inode_pages(&inode->i_data, 0);
+#endif
+}
+
+void ocfs2_delete_inode(struct inode *inode)
+{
+ int wipe, status;
+ sigset_t blocked, oldset;
+ struct buffer_head *di_bh = NULL;
+
+ mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino);
+
+ if (is_bad_inode(inode)) {
+ mlog(0, "Skipping delete of bad inode\n");
+ goto bail;
}
- ocfs2_handle_add_inode(handle, inode_alloc_inode);
- status = ocfs2_meta_lock(inode_alloc_inode, handle, &inode_alloc_bh,
- 1);
- if (status < 0) {
- mlog_errno(status);
- goto bail_unblock;
- }
- handle = ocfs2_start_trans(osb, handle, OCFS2_DELETE_INODE_CREDITS);
- if (handle == NULL) {
- status = -ENOMEM;
- mlog_errno(status);
- goto bail_unblock;
+ if (!ocfs2_inode_is_valid_to_delete(inode)) {
+ /* It's probably not necessary to truncate_inode_pages
+ * here but we do it for safety anyway (it will most
+ * likely be a no-op anyway) */
+ ocfs2_cleanup_delete_inode(inode, 0);
+ goto bail;
}
- /* Set the locking information, even though we're wiping the
- * inode - if we error before completing the wipe, we'll want
- * to checkpoint our progress so other nodes get an up-to-date
- * picture. */
- ocfs2_set_inode_lock_trans(osb->journal, inode);
- status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode,
- orphan_dir_bh);
+ /* We want to block signals in delete_inode as the lock and
+ * messaging paths may return us -ERESTARTSYS. Which would
+ * cause us to exit early, resulting in inodes being orphaned
+ * forever. */
+ sigfillset(&blocked);
+ status = sigprocmask(SIG_BLOCK, &blocked, &oldset);
if (status < 0) {
mlog_errno(status);
- goto bail_unblock;
+ ocfs2_cleanup_delete_inode(inode, 1);
+ goto bail;
}
- /* set the inodes dtime */
- status = ocfs2_journal_access(handle, inode, fe_bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
+ /* Lock down the inode. This gives us an up to date view of
+ * it's metadata (for verification), and allows us to
+ * serialize delete_inode votes. */
+ status = ocfs2_meta_lock(inode, NULL, &di_bh, 1);
if (status < 0) {
- mlog_errno(status);
+ if (status != -ENOENT)
+ mlog_errno(status);
+ ocfs2_cleanup_delete_inode(inode, 0);
goto bail_unblock;
}
- fe->i_dtime = CURRENT_TIME.tv_sec;
- fe->i_flags &= (~(OCFS2_VALID_FL | OCFS2_ORPHANED_FL));
+ /* Query the cluster. This will be the final decision made
+ * before we go ahead and wipe the inode. */
+ status = ocfs2_query_inode_wipe(inode, di_bh, &wipe);
+ if (!wipe || status < 0) {
+ /* Error and inode busy vote both mean we won't be
+ * removing the inode, so they take almost the same
+ * path. */
+ if (status < 0)
+ mlog_errno(status);
- status = ocfs2_journal_dirty(handle, fe_bh);
- if (status < 0) {
- mlog_errno(status);
- goto bail_unblock;
+ /* Someone in the cluster has voted to not wipe this
+ * inode, or it was never completely orphaned. Write
+ * out the pages and exit now. */
+ ocfs2_cleanup_delete_inode(inode, 1);
+ goto bail_unlock_inode;
}
- ocfs2_remove_from_cache(inode, fe_bh);
+ ocfs2_cleanup_delete_inode(inode, 0);
- status = ocfs2_free_dinode(handle, inode_alloc_inode,
- inode_alloc_bh, fe);
+ status = ocfs2_wipe_inode(inode, di_bh);
if (status < 0) {
mlog_errno(status);
- goto bail_unblock;
+ goto bail_unlock_inode;
}
+ /* Mark the inode as successfully deleted. This is important
+ * for ocfs2_clear_inode as it will check this flag and skip
+ * any checkpointing work */
OCFS2_I(inode)->ip_flags |= OCFS2_INODE_DELETED;
+
+bail_unlock_inode:
+ ocfs2_meta_unlock(inode, 1);
+ brelse(di_bh);
bail_unblock:
status = sigprocmask(SIG_SETMASK, &oldset, NULL);
if (status < 0)
mlog_errno(status);
-
bail:
- if (handle)
- ocfs2_commit_trans(handle);
- if (unlock)
- ocfs2_meta_unlock(inode, 1);
- if (orphan_dir_bh)
- brelse(orphan_dir_bh);
- if (inode_alloc_bh)
- brelse(inode_alloc_bh);
- if (fe_bh)
- brelse(fe_bh);
- if (orphan_dir_inode)
- iput(orphan_dir_inode);
- if (inode_alloc_inode)
- iput(inode_alloc_inode);
-
- /* we must clear inode. */
clear_inode(inode);
mlog_exit_void();
}
Modified: trunk/fs/ocfs2/vote.c
===================================================================
--- trunk/fs/ocfs2/vote.c 2005-07-11 19:03:00 UTC (rev 2466)
+++ trunk/fs/ocfs2/vote.c 2005-07-11 21:43:21 UTC (rev 2467)
@@ -295,6 +295,7 @@
{
struct dentry *dentry = NULL;
struct list_head *p;
+ struct ocfs2_inode_info *oi = OCFS2_I(inode);
mlog(0, "parent %"MLFu64", namelen = %u, name = %.*s\n", parent_blkno,
namelen, namelen, name);
@@ -336,8 +337,23 @@
* to force ocfs2_delete_inode, who will take the
* proper cluster locks to sort things out. */
if (new_nlink == 0) {
- spin_lock(&OCFS2_I(inode)->ip_lock);
- OCFS2_I(inode)->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
+ spin_lock(&oi->ip_lock);
+ oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
+
+#ifdef OCFS2_DELETE_INODE_WORKAROUND
+ /* Do a sync now as we can't be sure whether
+ * the inode will actually be orphaned or
+ * not. We condition this on the open count as
+ * otherwise, ocfs2_file_release will handle
+ * it for us. */
+ if (!oi->ip_open_count) {
+ spin_unlock(&oi->ip_lock);
+ write_inode_now(inode, 1);
+ /* strange indentation past the
+ * 'else', but I want to keep the non
+ * hack code purty :) */
+ } else
+#endif
spin_unlock(&OCFS2_I(inode)->ip_lock);
}
}
More information about the Ocfs2-commits
mailing list