[Ocfs2-commits] mfasheh commits r2389 - in trunk/fs/ocfs2: . cluster
svn-commits at oss.oracle.com
svn-commits at oss.oracle.com
Fri Jun 10 17:08:42 CDT 2005
Author: mfasheh
Signed-off-by: manish
Date: 2005-06-10 17:08:41 -0500 (Fri, 10 Jun 2005)
New Revision: 2389
Modified:
trunk/fs/ocfs2/cluster/masklog.c
trunk/fs/ocfs2/cluster/masklog.h
trunk/fs/ocfs2/dcache.c
trunk/fs/ocfs2/dir.c
trunk/fs/ocfs2/dir.h
trunk/fs/ocfs2/dlmglue.c
trunk/fs/ocfs2/dlmglue.h
trunk/fs/ocfs2/file.c
trunk/fs/ocfs2/inode.c
trunk/fs/ocfs2/inode.h
trunk/fs/ocfs2/namei.c
trunk/fs/ocfs2/ocfs2.h
trunk/fs/ocfs2/sysfile.c
trunk/fs/ocfs2/vote.c
trunk/fs/ocfs2/vote.h
Log:
* have ocfs2_lookup handle it's own cluster locking and don't drop the lock
until we've completed our iget and d_add. Otherwise we race unlink of that
name and can't rely on it still being valid.
* open and the vote thread were racing on the check / set of the
INODE_DELETED flag. Fix this.
* unlink / rename votes were naively calling d_prune_aliases, which doesn't
catch all of the dentry races we might have in a cluster. Pass through
some unique identifying characteristics and find the exact dentry to give to
d_delete instead.
* unsurprisingly, clustered rename has the same deadlock issues that local
rename has. Mimic the VFS locking here and introduce a cluster rename lock.
* ocfs2_rename was bugging on a condition which we could just cleanly error
out on.
* ocfs2_meta_lock was only warning on getting a lock for a wiped inode.
Realizing that this is perfectly valid, change the code there to cleanly
error out with -ENOENT.
* we weren't setting dentry ops on all dentries which wasn't getting our
d_revalidate callback on new names.
* introduce some dcache.c specific tracing
* proper locking around the i_nlink changes in the vote thread
* add some more tracing for certain conditions in dlmglue.
Signed-off-by: manish
Modified: trunk/fs/ocfs2/cluster/masklog.c
===================================================================
--- trunk/fs/ocfs2/cluster/masklog.c 2005-06-10 19:33:33 UTC (rev 2388)
+++ trunk/fs/ocfs2/cluster/masklog.c 2005-06-10 22:08:41 UTC (rev 2389)
@@ -208,6 +208,7 @@
set_a_string(NAMEI);
set_a_string(INODE);
set_a_string(VOTE);
+ set_a_string(DCACHE);
set_a_string(ERROR);
set_a_string(NOTICE);
set_a_string(KTHREAD);
Modified: trunk/fs/ocfs2/cluster/masklog.h
===================================================================
--- trunk/fs/ocfs2/cluster/masklog.h 2005-06-10 19:33:33 UTC (rev 2388)
+++ trunk/fs/ocfs2/cluster/masklog.h 2005-06-10 22:08:41 UTC (rev 2389)
@@ -105,6 +105,7 @@
#define ML_NAMEI 0x0000000000400000ULL /* ocfs2 directory / namespace */
#define ML_INODE 0x0000000000800000ULL /* ocfs2 inode manipulation */
#define ML_VOTE 0x0000000001000000ULL /* ocfs2 node messaging */
+#define ML_DCACHE 0x0000000002000000ULL /* ocfs2 dcache operations */
/* bits that are infrequently given and frequently matched in the high word */
#define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */
#define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */
Modified: trunk/fs/ocfs2/dcache.c
===================================================================
--- trunk/fs/ocfs2/dcache.c 2005-06-10 19:33:33 UTC (rev 2388)
+++ trunk/fs/ocfs2/dcache.c 2005-06-10 22:08:41 UTC (rev 2389)
@@ -28,7 +28,7 @@
#include <linux/slab.h>
#include <linux/namei.h>
-#define MLOG_MASK_PREFIX ML_NAMEI
+#define MLOG_MASK_PREFIX ML_DCACHE
#include <cluster/masklog.h>
#include "ocfs2.h"
@@ -48,8 +48,12 @@
mlog_entry("(0x%p, '%.*s')\n", dentry,
dentry->d_name.len, dentry->d_name.name);
- if (inode == NULL)
+ /* Never trust a negative dentry - force a new lookup. */
+ if (inode == NULL) {
+ mlog(0, "negative dentry: %.*s\n", dentry->d_name.len,
+ dentry->d_name.name);
goto bail;
+ }
osb = OCFS2_SB(inode->i_sb);
@@ -58,7 +62,7 @@
if (inode != osb->root_inode) {
spin_lock(&OCFS2_I(inode)->ip_lock);
/* did we or someone else delete this inode? */
- if (INODE_DELETED(inode)) {
+ if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) {
spin_unlock(&OCFS2_I(inode)->ip_lock);
mlog(0, "inode (%"MLFu64") deleted, returning false\n",
OCFS2_I(inode)->ip_blkno);
Modified: trunk/fs/ocfs2/dir.c
===================================================================
--- trunk/fs/ocfs2/dir.c 2005-06-10 19:33:33 UTC (rev 2388)
+++ trunk/fs/ocfs2/dir.c 2005-06-10 22:08:41 UTC (rev 2389)
@@ -203,29 +203,20 @@
/*
* NOTE: this should always be called with parent dir i_sem taken.
*/
-int ocfs2_find_files_on_disk(ocfs2_super *osb, const char *name,
- int namelen, u64 *blkno,
- struct inode *inode, int take_lock,
+int ocfs2_find_files_on_disk(const char *name,
+ int namelen,
+ u64 *blkno,
+ struct inode *inode,
struct buffer_head **dirent_bh,
struct ocfs2_dir_entry **dirent)
{
int status = -ENOENT;
- int lock_acq = 0;
+ ocfs2_super *osb = OCFS2_SB(inode->i_sb);
mlog_entry("(osb=%p, parent=%"MLFu64", name='%.*s', blkno=%p, "
"inode=%p)\n",
osb, OCFS2_I(inode)->ip_blkno, namelen, name, blkno, inode);
- if (take_lock) {
- status = ocfs2_meta_lock(inode, NULL, NULL, 0);
- if (status < 0) {
- if (status != -ENOENT)
- mlog_errno(status);
- goto leave;
- }
- lock_acq = 1;
- }
-
*dirent_bh = ocfs2_find_entry(name, namelen, inode, dirent);
if (!*dirent_bh || !*dirent) {
status = -ENOENT;
@@ -236,10 +227,6 @@
status = 0;
leave:
-
- if (take_lock && lock_acq)
- ocfs2_meta_unlock(inode, 0);
-
if (status < 0) {
*dirent = NULL;
if (*dirent_bh) {
Modified: trunk/fs/ocfs2/dir.h
===================================================================
--- trunk/fs/ocfs2/dir.h 2005-06-10 19:33:33 UTC (rev 2388)
+++ trunk/fs/ocfs2/dir.h 2005-06-10 22:08:41 UTC (rev 2389)
@@ -30,9 +30,10 @@
const char *name,
int namelen);
int ocfs2_empty_dir(struct inode *inode); /* FIXME: to namei.c */
-int ocfs2_find_files_on_disk(ocfs2_super *osb, const char *name,
- int namelen, u64 *blkno,
- struct inode *inode, int take_lock,
+int ocfs2_find_files_on_disk(const char *name,
+ int namelen,
+ u64 *blkno,
+ struct inode *inode,
struct buffer_head **dirent_bh,
struct ocfs2_dir_entry **dirent);
int ocfs2_readdir(struct file *filp, void *dirent, filldir_t filldir);
Modified: trunk/fs/ocfs2/dlmglue.c
===================================================================
--- trunk/fs/ocfs2/dlmglue.c 2005-06-10 19:33:33 UTC (rev 2388)
+++ trunk/fs/ocfs2/dlmglue.c 2005-06-10 22:08:41 UTC (rev 2389)
@@ -66,9 +66,10 @@
static spinlock_t clean_buffer_lock = SPIN_LOCK_UNLOCKED;
static char ocfs2_lock_type_char[OCFS2_NUM_LOCK_TYPES] = {
- [OCFS2_TYPE_META] = 'M',
- [OCFS2_TYPE_DATA] = 'D',
- [OCFS2_TYPE_SUPER] = 'S'
+ [OCFS2_TYPE_META] = 'M',
+ [OCFS2_TYPE_DATA] = 'D',
+ [OCFS2_TYPE_SUPER] = 'S',
+ [OCFS2_TYPE_RENAME] = 'R'
};
static int ocfs2_build_lock_name(enum ocfs2_lock_type type,
@@ -82,6 +83,10 @@
static void ocfs2_super_ast_func(void *opaque);
static void ocfs2_super_bast_func(void *opaque,
int level);
+static void ocfs2_rename_ast_func(void *opaque);
+static void ocfs2_rename_bast_func(void *opaque,
+ int level);
+
/* so far, all locks have gotten along with the same unlock ast */
static void ocfs2_unlock_ast_func(void *opaque,
dlm_status status);
@@ -91,8 +96,8 @@
int *requeue);
static int ocfs2_unblock_data(struct ocfs2_lock_res *lockres,
int *requeue);
-static int ocfs2_unblock_super(struct ocfs2_lock_res *lockres,
- int *requeue);
+static int ocfs2_unblock_osb_lock(struct ocfs2_lock_res *lockres,
+ int *requeue);
typedef void (ocfs2_convert_worker_t)(struct ocfs2_lock_res *, int);
static int ocfs2_generic_unblock_lock(ocfs2_super *osb,
struct ocfs2_lock_res *lockres,
@@ -107,29 +112,36 @@
};
static struct ocfs2_lock_res_ops ocfs2_inode_meta_lops = {
- .ast = ocfs2_inode_ast_func,
- .bast = ocfs2_inode_bast_func,
- .unlock_ast = ocfs2_unlock_ast_func,
- .unblock = ocfs2_unblock_meta,
+ .ast = ocfs2_inode_ast_func,
+ .bast = ocfs2_inode_bast_func,
+ .unlock_ast = ocfs2_unlock_ast_func,
+ .unblock = ocfs2_unblock_meta,
};
static void ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
int blocking);
static struct ocfs2_lock_res_ops ocfs2_inode_data_lops = {
- .ast = ocfs2_inode_ast_func,
- .bast = ocfs2_inode_bast_func,
- .unlock_ast = ocfs2_unlock_ast_func,
- .unblock = ocfs2_unblock_data,
+ .ast = ocfs2_inode_ast_func,
+ .bast = ocfs2_inode_bast_func,
+ .unlock_ast = ocfs2_unlock_ast_func,
+ .unblock = ocfs2_unblock_data,
};
static struct ocfs2_lock_res_ops ocfs2_super_lops = {
- .ast = ocfs2_super_ast_func,
- .bast = ocfs2_super_bast_func,
- .unlock_ast = ocfs2_unlock_ast_func,
- .unblock = ocfs2_unblock_super,
+ .ast = ocfs2_super_ast_func,
+ .bast = ocfs2_super_bast_func,
+ .unlock_ast = ocfs2_unlock_ast_func,
+ .unblock = ocfs2_unblock_osb_lock,
};
+static struct ocfs2_lock_res_ops ocfs2_rename_lops = {
+ .ast = ocfs2_rename_ast_func,
+ .bast = ocfs2_rename_bast_func,
+ .unlock_ast = ocfs2_unlock_ast_func,
+ .unblock = ocfs2_unblock_osb_lock,
+};
+
static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
{
return lockres->l_type == OCFS2_TYPE_META ||
@@ -141,9 +153,15 @@
return lockres->l_type == OCFS2_TYPE_SUPER;
}
+static inline int ocfs2_is_rename_lock(struct ocfs2_lock_res *lockres)
+{
+ return lockres->l_type == OCFS2_TYPE_RENAME;
+}
+
static inline ocfs2_super *ocfs2_lock_res_super(struct ocfs2_lock_res *lockres)
{
- OCFS2_ASSERT(ocfs2_is_super_lock(lockres));
+ BUG_ON(!ocfs2_is_super_lock(lockres)
+ && !ocfs2_is_rename_lock(lockres));
return (ocfs2_super *) lockres->l_priv;
}
@@ -201,7 +219,7 @@
int new_level);
static int ocfs2_meta_lock_update(struct inode *inode,
struct buffer_head **bh);
-static void ocfs2_drop_super_lock(ocfs2_super *osb);
+static void ocfs2_drop_osb_locks(ocfs2_super *osb);
static inline int ocfs2_highest_compat_lock_level(int level);
static int __ocfs2_downconvert_lock(ocfs2_super *osb,
struct ocfs2_lock_res *lockres,
@@ -395,6 +413,29 @@
return status;
}
+int ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res,
+ ocfs2_super *osb)
+{
+ enum ocfs2_lock_type type = OCFS2_TYPE_RENAME;
+ int status;
+
+ mlog_entry_void();
+
+ ocfs2_lock_res_init_once(res);
+
+ status = ocfs2_build_lock_name(type, 0, 0, &res->l_name);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail;
+ }
+
+ ocfs2_lock_res_init_common(res, type, &ocfs2_rename_lops, osb);
+
+bail:
+ mlog_exit(status);
+ return status;
+}
+
void ocfs2_lock_res_free(struct ocfs2_lock_res *res)
{
mlog_entry_void();
@@ -674,6 +715,11 @@
ocfs2_generic_handle_downconvert_action(lockres);
break;
default:
+ mlog(ML_ERROR, "lockres %s: ast fired with invalid action: %u "
+ "lockres flags = 0x%lx, unlock action: %u\n",
+ lockres->l_name, lockres->l_action, lockres->l_flags,
+ lockres->l_unlock_action);
+
BUG();
}
@@ -752,21 +798,15 @@
mlog_exit_void();
}
-static void ocfs2_super_ast_func(void *opaque)
+static void ocfs2_generic_ast_func(struct ocfs2_lock_res *lockres)
{
- struct ocfs2_lock_res *lockres = opaque;
- dlm_lockstatus *lksb;
+ dlm_lockstatus *lksb = &lockres->l_lksb;
- mlog_entry_void();
+ spin_lock(&lockres->l_lock);
- mlog(0, "Superblock AST fired\n");
-
- OCFS2_ASSERT(ocfs2_is_super_lock(lockres));
-
- spin_lock(&lockres->l_lock);
- lksb = &(lockres->l_lksb);
if (lksb->status != DLM_NORMAL) {
- mlog(ML_ERROR, "lksb status value of %u!\n", lksb->status);
+ mlog(ML_ERROR, "lockres %s: lksb status value of %u!\n",
+ lockres->l_name, lksb->status);
spin_unlock(&lockres->l_lock);
return;
}
@@ -784,30 +824,32 @@
default:
BUG();
}
+
/* set it to something invalid so if we get called again we
* can catch it. */
lockres->l_action = OCFS2_AST_INVALID;
spin_unlock(&lockres->l_lock);
+
wake_up(&lockres->l_event);
-
- mlog_exit_void();
}
-static void ocfs2_super_bast_func(void *opaque, int level)
+static void ocfs2_generic_bast_func(ocfs2_super *osb,
+ struct ocfs2_lock_res *lockres,
+ int level,
+ int ignore_refresh)
{
- struct ocfs2_lock_res *lockres = opaque;
- ocfs2_super *osb;
int needs_downconvert;
mlog_entry_void();
- osb = ocfs2_lock_res_super(lockres);
- mlog(0, "Superblock BAST fired\n");
-
spin_lock(&lockres->l_lock);
needs_downconvert = ocfs2_generic_handle_bast(lockres, level);
if (needs_downconvert)
ocfs2_schedule_blocked_lock(osb, lockres);
+
+ if (ignore_refresh)
+ lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
+
spin_unlock(&lockres->l_lock);
ocfs2_kick_vote_thread(osb);
@@ -816,6 +858,68 @@
mlog_exit_void();
}
+static void ocfs2_super_ast_func(void *opaque)
+{
+ struct ocfs2_lock_res *lockres = opaque;
+
+ mlog_entry_void();
+ mlog(0, "Superblock AST fired\n");
+
+ BUG_ON(!ocfs2_is_super_lock(lockres));
+ ocfs2_generic_ast_func(lockres);
+
+ mlog_exit_void();
+}
+
+static void ocfs2_super_bast_func(void *opaque,
+ int level)
+{
+ struct ocfs2_lock_res *lockres = opaque;
+ ocfs2_super *osb;
+
+ mlog_entry_void();
+ mlog(0, "Superblock BAST fired\n");
+
+ BUG_ON(!ocfs2_is_super_lock(lockres));
+ osb = ocfs2_lock_res_super(lockres);
+ ocfs2_generic_bast_func(osb, lockres, level, 0);
+
+ mlog_exit_void();
+}
+
+static void ocfs2_rename_ast_func(void *opaque)
+{
+ struct ocfs2_lock_res *lockres = opaque;
+
+ mlog_entry_void();
+
+ mlog(0, "Rename AST fired\n");
+
+ BUG_ON(!ocfs2_is_rename_lock(lockres));
+
+ ocfs2_generic_ast_func(lockres);
+
+ mlog_exit_void();
+}
+
+static void ocfs2_rename_bast_func(void *opaque,
+ int level)
+{
+ struct ocfs2_lock_res *lockres = opaque;
+ ocfs2_super *osb;
+
+ mlog_entry_void();
+
+ mlog(0, "Rename BAST fired\n");
+
+ BUG_ON(!ocfs2_is_rename_lock(lockres));
+
+ osb = ocfs2_lock_res_super(lockres);
+ ocfs2_generic_bast_func(osb, lockres, level, 1);
+
+ mlog_exit_void();
+}
+
static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
int convert)
{
@@ -1030,6 +1134,10 @@
}
if (level > lockres->l_level) {
+ if (lockres->l_action != OCFS2_AST_INVALID)
+ mlog(ML_ERROR, "lockres %s has action %u pending\n",
+ lockres->l_name, lockres->l_action);
+
lockres->l_action = OCFS2_AST_CONVERT;
lockres->l_requested = level;
lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
@@ -1470,7 +1578,7 @@
mlog_entry_void();
spin_lock(&OCFS2_I(inode)->ip_lock);
- if (INODE_DELETED(inode)) {
+ if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) {
mlog(0, "Orphaned inode %"MLFu64" was deleted while we "
"were waiting on a lock. ip_flags = 0x%x\n",
OCFS2_I(inode)->ip_blkno, OCFS2_I(inode)->ip_flags);
@@ -1698,7 +1806,7 @@
{
int status;
int level = ex ? LKM_EXMODE : LKM_PRMODE;
- struct ocfs2_lock_res *lockres = &osb->super_lockres;
+ struct ocfs2_lock_res *lockres = &osb->osb_super_lockres;
struct buffer_head *bh;
ocfs2_slot_info *si = osb->slot_info;
@@ -1740,11 +1848,30 @@
int ex)
{
int level = ex ? LKM_EXMODE : LKM_PRMODE;
- struct ocfs2_lock_res *lockres = &osb->super_lockres;
+ struct ocfs2_lock_res *lockres = &osb->osb_super_lockres;
ocfs2_cluster_unlock(osb, lockres, level);
}
+int ocfs2_rename_lock(ocfs2_super *osb)
+{
+ int status;
+ struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres;
+
+ status = ocfs2_cluster_lock(osb, lockres, LKM_EXMODE, 0, NULL, 0);
+ if (status < 0)
+ mlog_errno(status);
+
+ return status;
+}
+
+void ocfs2_rename_unlock(ocfs2_super *osb)
+{
+ struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres;
+
+ ocfs2_cluster_unlock(osb, lockres, LKM_EXMODE);
+}
+
int ocfs2_dlm_init(ocfs2_super *osb)
{
int status;
@@ -1778,9 +1905,16 @@
osb->dlm = dlm;
- status = ocfs2_super_lock_res_init(&osb->super_lockres, osb);
+ status = ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail;
+ }
+
+ status = ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb);
if (status < 0)
mlog_errno(status);
+
bail:
mlog_exit(status);
@@ -1791,14 +1925,16 @@
{
mlog_entry_void();
- ocfs2_drop_super_lock(osb);
+ ocfs2_drop_osb_locks(osb);
if (osb->vote_task) {
kthread_stop(osb->vote_task);
osb->vote_task = NULL;
}
- ocfs2_lock_res_free(&osb->super_lockres);
+ ocfs2_lock_res_free(&osb->osb_super_lockres);
+ ocfs2_lock_res_free(&osb->osb_rename_lockres);
+
dlm_unregister_domain(osb->dlm);
osb->dlm = NULL;
@@ -1878,6 +2014,7 @@
lockres);
if (status != DLM_NORMAL) {
ocfs2_log_dlm_error("dlmunlock", status, lockres);
+ mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags);
dlm_print_one_lock(lockres->l_lksb.lockid);
BUG();
}
@@ -1931,16 +2068,20 @@
return __ocfs2_drop_lock(osb, lockres);
}
-static void ocfs2_drop_super_lock(ocfs2_super *osb)
+static void ocfs2_drop_osb_locks(ocfs2_super *osb)
{
int status;
mlog_entry_void();
- status = ocfs2_drop_lock(osb, &osb->super_lockres, NULL);
+ status = ocfs2_drop_lock(osb, &osb->osb_super_lockres, NULL);
if (status < 0)
mlog_errno(status);
+ status = ocfs2_drop_lock(osb, &osb->osb_rename_lockres, NULL);
+ if (status < 0)
+ mlog_errno(status);
+
mlog_exit(status);
}
@@ -2372,14 +2513,18 @@
return status;
}
-static int ocfs2_unblock_super(struct ocfs2_lock_res *lockres,
- int *requeue)
+/* Generic unblock function for any lockres whose private data is an
+ * ocfs2_super pointer. */
+static int ocfs2_unblock_osb_lock(struct ocfs2_lock_res *lockres,
+ int *requeue)
{
int status;
ocfs2_super *osb;
mlog_entry_void();
+ mlog(0, "Unblock lockres %s\n", lockres->l_name);
+
osb = ocfs2_lock_res_super(lockres);
status = ocfs2_generic_unblock_lock(osb,
Modified: trunk/fs/ocfs2/dlmglue.h
===================================================================
--- trunk/fs/ocfs2/dlmglue.h 2005-06-10 19:33:33 UTC (rev 2388)
+++ trunk/fs/ocfs2/dlmglue.h 2005-06-10 22:08:41 UTC (rev 2389)
@@ -113,6 +113,8 @@
int ex);
void ocfs2_super_unlock(ocfs2_super *osb,
int ex);
+int ocfs2_rename_lock(ocfs2_super *osb);
+void ocfs2_rename_unlock(ocfs2_super *osb);
/* for the vote thread */
void ocfs2_process_blocked_lock(ocfs2_super *osb,
struct ocfs2_lock_res *lockres);
Modified: trunk/fs/ocfs2/file.c
===================================================================
--- trunk/fs/ocfs2/file.c 2005-06-10 19:33:33 UTC (rev 2388)
+++ trunk/fs/ocfs2/file.c 2005-06-10 22:08:41 UTC (rev 2389)
@@ -88,13 +88,24 @@
mlog_entry("(0x%p, 0x%p, '%.*s')\n", inode, file,
file->f_dentry->d_name.len, file->f_dentry->d_name.name);
- status = -EACCES;
+ spin_lock(&oi->ip_lock);
- spin_lock(&oi->ip_lock);
+ /* Check that the inode hasn't been wiped from disk by another
+ * node. If it hasn't then we're safe as long as we hold the
+ * spin lock until our increment of open count. */
+ if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) {
+ spin_unlock(&oi->ip_lock);
+
+ status = -ENOENT;
+ goto leave;
+ }
+
if (oi->ip_open_count &&
!ocfs2_valid_open(mode,
oi->ip_flags & OCFS2_INODE_OPEN_DIRECT)) {
spin_unlock(&oi->ip_lock);
+
+ status = -EACCES;
goto leave;
}
@@ -480,7 +491,11 @@
fe = (ocfs2_dinode *) fe_bh->b_data;
OCFS2_BUG_ON_INVALID_DINODE(fe);
- OCFS2_ASSERT(fe->i_size == i_size_read(inode));
+ mlog_bug_on_msg(fe->i_size != i_size_read(inode),
+ "Inode %"MLFu64", inode i_size = %"MLFu64" != di "
+ "i_size = %"MLFu64", i_flags = 0x%x\n",
+ OCFS2_I(inode)->ip_blkno, i_size_read(inode),
+ fe->i_size, fe->i_flags);
if (new_i_size > fe->i_size) {
mlog(0, "asked to truncate file with size (%"MLFu64") "
Modified: trunk/fs/ocfs2/inode.c
===================================================================
--- trunk/fs/ocfs2/inode.c 2005-06-10 19:33:33 UTC (rev 2388)
+++ trunk/fs/ocfs2/inode.c 2005-06-10 22:08:41 UTC (rev 2389)
@@ -554,7 +554,7 @@
/* XXX: Is this really necessary? */
spin_lock(&OCFS2_I(inode)->ip_lock);
- SET_INODE_DELETED(inode);
+ OCFS2_I(inode)->ip_flags |= OCFS2_INODE_DELETED;
spin_unlock(&OCFS2_I(inode)->ip_lock);
goto bail_unblock;
}
@@ -670,7 +670,7 @@
goto bail_unblock;
}
- SET_INODE_DELETED(inode);
+ OCFS2_I(inode)->ip_flags |= OCFS2_INODE_DELETED;
bail_unblock:
status = sigprocmask(SIG_SETMASK, &oldset, NULL);
if (status < 0)
@@ -716,7 +716,7 @@
/* We very well may get a clear_inode before all an inodes
* metadata has hit disk. Of course, we can't drop any cluster
* locks until the journal has finished with it. */
- if (!INODE_DELETED(inode))
+ if (!(OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED))
ocfs2_checkpoint_inode(inode);
mlog_bug_on_msg(!list_empty(&oi->ip_io_markers),
@@ -847,7 +847,7 @@
osb = OCFS2_SB(inode->i_sb);
spin_lock(&OCFS2_I(inode)->ip_lock);
- if (INODE_DELETED(inode)) {
+ if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) {
spin_unlock(&OCFS2_I(inode)->ip_lock);
mlog(0, "inode deleted!\n");
status = -ENOENT;
Modified: trunk/fs/ocfs2/inode.h
===================================================================
--- trunk/fs/ocfs2/inode.h 2005-06-10 19:33:33 UTC (rev 2388)
+++ trunk/fs/ocfs2/inode.h 2005-06-10 22:08:41 UTC (rev 2389)
@@ -91,13 +91,8 @@
return container_of(inode, struct ocfs2_inode_info, vfs_inode);
}
-#define INODE_DELETED(i) (OCFS2_I(i)->ip_flags & OCFS2_INODE_DELETED)
-#define SET_INODE_DELETED(i) (OCFS2_I(i)->ip_flags |= OCFS2_INODE_DELETED)
-#define CLEAR_INODE_DELETED(i) (OCFS2_I(i)->ip_flags &= (~OCFS2_INODE_DELETED))
-
#define INODE_JOURNAL(i) (OCFS2_I(i)->ip_flags & OCFS2_INODE_JOURNAL)
#define SET_INODE_JOURNAL(i) (OCFS2_I(i)->ip_flags |= OCFS2_INODE_JOURNAL)
-#define CLEAR_INODE_JOURNAL(i) (OCFS2_I(i)->ip_flags &= (~OCFS2_INODE_JOURNAL))
extern kmem_cache_t *ocfs2_inode_cache;
Modified: trunk/fs/ocfs2/namei.c
===================================================================
--- trunk/fs/ocfs2/namei.c 2005-06-10 19:33:33 UTC (rev 2388)
+++ trunk/fs/ocfs2/namei.c 2005-06-10 22:08:41 UTC (rev 2389)
@@ -129,10 +129,10 @@
struct dentry *dentry,
struct inode *inode, u64 blkno,
struct buffer_head *parent_fe_bh,
- struct buffer_head *insert_bh)
+ struct buffer_head *insert_bh)
{
return __ocfs2_add_entry(handle, dentry->d_parent->d_inode,
- dentry->d_name.name, dentry->d_name.len,
+ dentry->d_name.name, dentry->d_name.len,
inode, blkno, parent_fe_bh, insert_bh);
}
@@ -146,9 +146,7 @@
u64 blkno;
struct buffer_head *dirent_bh = NULL;
struct inode *inode = NULL;
- struct super_block *sb = dir->i_sb;
struct dentry *ret;
- ocfs2_super *osb = OCFS2_SB(sb);
struct ocfs2_dir_entry *dirent;
mlog_entry("(0x%p, 0x%p, '%.*s')\n", dir, dentry,
@@ -159,26 +157,43 @@
goto bail;
}
- mlog(0, "about to call find_files_on_disk with inode=%p\n", dir);
+ mlog(0, "find name %.*s in directory %"MLFu64"\n", dentry->d_name.len,
+ dentry->d_name.name, OCFS2_I(dir)->ip_blkno);
- status = ocfs2_find_files_on_disk(osb, dentry->d_name.name,
+ status = ocfs2_meta_lock(dir, NULL, NULL, 0);
+ if (status < 0) {
+ if (status != -ENOENT)
+ mlog_errno(status);
+ ret = ERR_PTR(status);
+ goto bail;
+ }
+
+ status = ocfs2_find_files_on_disk(dentry->d_name.name,
dentry->d_name.len, &blkno,
- dir, 1, &dirent_bh, &dirent);
+ dir, &dirent_bh, &dirent);
if (status < 0)
goto bail_add;
-
- inode = ocfs2_iget(osb, blkno);
+
+ inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno);
if (!inode) {
- mlog(ML_ERROR, "Could not create inode!\n");
+ mlog(ML_ERROR, "Unable to create inode %"MLFu64"\n", blkno);
ret = ERR_PTR(-EACCES);
- goto bail;
+ goto bail_unlock;
}
bail_add:
+
dentry->d_op = &ocfs2_dentry_ops;
d_add(dentry, inode);
ret = NULL;
+bail_unlock:
+ /* Don't drop the cluster lock until *after* the d_add --
+ * unlink on another node will message us to remove that
+ * dentry under this lock so otherwise we can race this with
+ * the vote thread and have a stale dentry. */
+ ocfs2_meta_unlock(dir, 0);
+
bail:
if (dirent_bh)
brelse(dirent_bh);
@@ -195,28 +210,35 @@
struct dentry *parent;
struct inode *inode;
struct inode *dir = child->d_inode;
- struct super_block *sb = dir->i_sb;
- ocfs2_super *osb = OCFS2_SB(sb);
struct buffer_head *dirent_bh = NULL;
struct ocfs2_dir_entry *dirent;
mlog_entry("(0x%p, '%.*s')\n", child,
child->d_name.len, child->d_name.name);
- mlog(0, "about to call find_files_on_disk with inode=%p\n", dir);
+ mlog(0, "find parent of directory %"MLFu64"\n",
+ OCFS2_I(dir)->ip_blkno);
- status = ocfs2_find_files_on_disk(osb, "..", 2, &blkno,
- dir, 1, &dirent_bh, &dirent);
+ status = ocfs2_meta_lock(dir, NULL, NULL, 0);
if (status < 0) {
- parent = ERR_PTR(-ENOENT);
+ if (status != -ENOENT)
+ mlog_errno(status);
+ parent = ERR_PTR(status);
goto bail;
}
- inode = ocfs2_iget(osb, blkno);
+ status = ocfs2_find_files_on_disk("..", 2, &blkno, dir, &dirent_bh,
+ &dirent);
+ if (status < 0) {
+ parent = ERR_PTR(-ENOENT);
+ goto bail_unlock;
+ }
+
+ inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno);
if (!inode) {
- mlog(ML_ERROR, "Could not create inode!\n");
+ mlog(ML_ERROR, "Unable to create inode %"MLFu64"\n", blkno);
parent = ERR_PTR(-EACCES);
- goto bail;
+ goto bail_unlock;
}
parent = d_alloc_anon(inode);
@@ -225,10 +247,13 @@
parent = ERR_PTR(-ENOMEM);
}
-bail:
+bail_unlock:
+ ocfs2_meta_unlock(dir, 0);
+
if (dirent_bh)
brelse(dirent_bh);
+bail:
mlog_exit_ptr(parent);
return parent;
@@ -310,7 +335,6 @@
{
int status = 0;
struct buffer_head *parent_fe_bh = NULL;
- u64 file_off;
ocfs2_journal_handle *handle = NULL;
ocfs2_super *osb;
ocfs2_dinode *fe = NULL;
@@ -428,8 +452,6 @@
if (status < 0)
mlog_errno(status);
- file_off = fe->i_blkno << dir->i_sb->s_blocksize_bits;
-
if (S_ISDIR(mode)) {
status = ocfs2_fill_new_dir(osb, handle, dir, inode,
new_fe_bh, data_ac);
@@ -461,8 +483,8 @@
}
insert_inode_hash(inode);
+ dentry->d_op = &ocfs2_dentry_ops;
d_instantiate(dentry, inode);
-
status = 0;
leave:
if (handle)
@@ -737,6 +759,7 @@
}
atomic_inc(&inode->i_count);
+ dentry->d_op = &ocfs2_dentry_ops;
d_instantiate(dentry, inode);
bail:
if (handle)
@@ -797,10 +820,9 @@
goto leave;
}
- status = ocfs2_find_files_on_disk(osb, dentry->d_name.name,
+ status = ocfs2_find_files_on_disk(dentry->d_name.name,
dentry->d_name.len, &blkno,
- dir, 0, &dirent_bh,
- &dirent);
+ dir, &dirent_bh, &dirent);
if (status < 0) {
if (status != -ENOENT)
mlog_errno(status);
@@ -843,7 +865,7 @@
else
inode->i_nlink--;
- status = ocfs2_request_unlink_vote(inode,
+ status = ocfs2_request_unlink_vote(inode, dentry,
(unsigned int) inode->i_nlink);
if (status < 0) {
/* This vote should succeed under all normal
@@ -1009,7 +1031,7 @@
struct inode *new_dir,
struct dentry *new_dentry)
{
- int status = 0;
+ int status = 0, rename_lock = 0;
struct inode *old_inode = old_dentry->d_inode;
struct inode *new_inode = new_dentry->d_inode;
ocfs2_dinode *newfe = NULL;
@@ -1053,6 +1075,26 @@
}
}
+ /* Assume a directory heirarchy thusly:
+ * a/b/c
+ * a/d
+ * a,b,c, and d are all directories.
+ *
+ * from cwd of 'a' on both nodes:
+ * node1: mv b/c d
+ * node2: mv d b/c
+ *
+ * And that's why, just like the VFS, we need a file system
+ * rename lock. */
+ if (old_dentry != new_dentry) {
+ status = ocfs2_rename_lock(osb);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail;
+ }
+ rename_lock = 1;
+ }
+
handle = ocfs2_alloc_handle(osb);
if (handle == NULL) {
status = -ENOMEM;
@@ -1093,7 +1135,7 @@
goto bail;
}
- status = ocfs2_request_rename_vote(old_inode);
+ status = ocfs2_request_rename_vote(old_inode, old_dentry);
if (status < 0) {
mlog_errno(status);
goto bail;
@@ -1115,7 +1157,7 @@
} else {
/* Ah, the simple case - we're a file so just send a
* message. */
- status = ocfs2_request_rename_vote(old_inode);
+ status = ocfs2_request_rename_vote(old_inode, old_dentry);
if (status < 0) {
mlog_errno(status);
goto bail;
@@ -1140,10 +1182,10 @@
/* check if the target already exists (in which case we need
* to delete it */
- status = ocfs2_find_files_on_disk(osb, new_dentry->d_name.name,
+ status = ocfs2_find_files_on_disk(new_dentry->d_name.name,
new_dentry->d_name.len,
- &newfe_blkno, new_dir, 0,
- &new_de_bh, &new_de);
+ &newfe_blkno, new_dir, &new_de_bh,
+ &new_de);
/* The only error we allow here is -ENOENT because the new
* file not existing is perfectly valid. */
if ((status < 0) && (status != -ENOENT)) {
@@ -1160,10 +1202,22 @@
/* In case we need to overwrite an existing file, we blow it
* away first */
if (new_de) {
- BUG_ON(!new_inode);
+ /* VFS didn't think there existed an inode here, but
+ * someone else in the cluster must have raced our
+ * rename to create one. Today we error cleanly, in
+ * the future we should consider calling iget to build
+ * a new struct inode for this entry. */
+ if (!new_inode) {
+ status = -EACCES;
+ mlog(0, "We found an inode for name %.*s but VFS "
+ "didn't give us one.\n", new_dentry->d_name.len,
+ new_dentry->d_name.name);
+ goto bail;
+ }
+
if (OCFS2_I(new_inode)->ip_blkno != newfe_blkno) {
- status = -ENOENT;
+ status = -EACCES;
mlog(0, "Inode blkno (%"MLFu64") and dir (%"MLFu64") "
"disagree. ip_flags = %x\n",
@@ -1184,7 +1238,8 @@
else
links_count = (unsigned int) (new_inode->i_nlink - 1);
- status = ocfs2_request_unlink_vote(new_inode, links_count);
+ status = ocfs2_request_unlink_vote(new_inode, new_dentry,
+ links_count);
if (status < 0) {
mlog_errno(status);
goto bail;
@@ -1208,7 +1263,7 @@
}
}
} else {
- OCFS2_ASSERT(new_dentry->d_parent->d_inode == new_dir);
+ BUG_ON(new_dentry->d_parent->d_inode != new_dir);
status = ocfs2_check_dir_for_entry(new_dir,
new_dentry->d_name.name,
@@ -1343,7 +1398,7 @@
status = ocfs2_journal_dirty(handle, new_dir_bh);
}
}
-
+
if (old_dir_nlink != old_dir->i_nlink) {
if (!old_dir_bh) {
mlog(ML_ERROR, "need to change nlink for old dir "
@@ -1364,6 +1419,9 @@
status = 0;
bail:
+ if (rename_lock)
+ ocfs2_rename_unlock(osb);
+
if (handle)
ocfs2_commit_trans(handle);
@@ -1668,6 +1726,7 @@
}
insert_inode_hash(inode);
+ dentry->d_op = &ocfs2_dentry_ops;
d_instantiate(dentry, inode);
bail:
if (handle)
@@ -1723,7 +1782,7 @@
const char *name, int namelen,
struct inode *inode, u64 blkno,
struct buffer_head *parent_fe_bh,
- struct buffer_head *insert_bh)
+ struct buffer_head *insert_bh)
{
unsigned long offset;
unsigned short rec_len;
@@ -1821,6 +1880,7 @@
while (i < bh->b_size) {
if (!ocfs2_check_dir_entry(dir, de, bh, i)) {
status = -EIO;
+ mlog_errno(status);
goto bail;
}
if (de == de_del) {
@@ -1828,6 +1888,7 @@
OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
status = -EIO;
+ mlog_errno(status);
goto bail;
}
if (pde)
@@ -1902,8 +1963,6 @@
return ret;
}
-
-
struct buffer_head *ocfs2_find_entry(const char *name, int namelen,
struct inode *dir,
struct ocfs2_dir_entry **res_dir)
@@ -1929,6 +1988,7 @@
if (start >= nblocks)
start = 0;
block = start;
+
restart:
do {
/*
@@ -2141,8 +2201,7 @@
status = __ocfs2_add_entry(handle, orphan_dir_inode, name,
OCFS2_ORPHAN_NAMELEN, inode,
OCFS2_I(inode)->ip_blkno,
- orphan_dir_bh,
- de_bh);
+ orphan_dir_bh, de_bh);
if (status < 0) {
mlog_errno(status);
goto leave;
Modified: trunk/fs/ocfs2/ocfs2.h
===================================================================
--- trunk/fs/ocfs2/ocfs2.h 2005-06-10 19:33:33 UTC (rev 2388)
+++ trunk/fs/ocfs2/ocfs2.h 2005-06-10 22:08:41 UTC (rev 2389)
@@ -73,6 +73,7 @@
OCFS2_TYPE_META = 0,
OCFS2_TYPE_DATA,
OCFS2_TYPE_SUPER,
+ OCFS2_TYPE_RENAME,
OCFS2_NUM_LOCK_TYPES
};
@@ -230,7 +231,8 @@
char dev_str[20]; /* "major,minor" of the device */
dlm_ctxt *dlm;
- struct ocfs2_lock_res super_lockres;
+ struct ocfs2_lock_res osb_super_lockres;
+ struct ocfs2_lock_res osb_rename_lockres;
wait_queue_head_t recovery_event;
Modified: trunk/fs/ocfs2/sysfile.c
===================================================================
--- trunk/fs/ocfs2/sysfile.c 2005-06-10 19:33:33 UTC (rev 2388)
+++ trunk/fs/ocfs2/sysfile.c 2005-06-10 22:08:41 UTC (rev 2389)
@@ -110,9 +110,9 @@
sizeof(namebuf),
type, slot);
- status = ocfs2_find_files_on_disk(osb, namebuf, strlen(namebuf),
+ status = ocfs2_find_files_on_disk(namebuf, strlen(namebuf),
&blkno, osb->sys_root_inode,
- 0, &dirent_bh, &de);
+ &dirent_bh, &de);
if (status < 0) {
goto bail;
}
Modified: trunk/fs/ocfs2/vote.c
===================================================================
--- trunk/fs/ocfs2/vote.c 2005-06-10 19:33:33 UTC (rev 2388)
+++ trunk/fs/ocfs2/vote.c 2005-06-10 22:08:41 UTC (rev 2389)
@@ -63,6 +63,9 @@
u32 h_node_num; /* node sending this particular message. */
} ocfs2_msg_hdr;
+/* OCFS2_MAX_FILENAME_LEN is 255 characters, but we want to align this
+ * for the network. */
+#define OCFS2_VOTE_FILENAME_LEN 256
typedef struct _ocfs2_vote_msg
{
ocfs2_msg_hdr v_hdr;
@@ -71,6 +74,9 @@
s32 v_orphaned_slot; /* Used during delete votes */
u32 v_nlink; /* Used during unlink votes */
} md1; /* Message type dependant 1 */
+ u32 v_unlink_namelen;
+ u64 v_unlink_parent;
+ u8 v_unlink_dirent[OCFS2_VOTE_FILENAME_LEN];
} ocfs2_vote_msg;
/* Responses are given these values to maintain backwards
@@ -198,7 +204,7 @@
spin_lock(&OCFS2_I(inode)->ip_lock);
/* vote no if the file is still open. */
- if (OCFS2_I(inode)->ip_open_count > 0) {
+ if (OCFS2_I(inode)->ip_open_count) {
mlog(0, "open count = %u\n",
OCFS2_I(inode)->ip_open_count);
spin_unlock(&OCFS2_I(inode)->ip_lock);
@@ -223,32 +229,111 @@
truncate_inode_pages(inode->i_mapping, 0);
ocfs2_extent_map_trunc(inode, 0);
+ spin_lock(&OCFS2_I(inode)->ip_lock);
+ /* double check open count - someone might have raced this
+ * thread into ocfs2_file_open while we were writing out
+ * data. If we're to allow a wipe of this inode now, we *must*
+ * hold the spinlock until we've marked it. */
+ if (OCFS2_I(inode)->ip_open_count) {
+ mlog(0, "Raced to wipe! open count = %u\n",
+ OCFS2_I(inode)->ip_open_count);
+ spin_unlock(&OCFS2_I(inode)->ip_lock);
+ goto done;
+ }
+
+ /* Mark the inode as being wiped from disk. */
+ OCFS2_I(inode)->ip_flags |= OCFS2_INODE_DELETED;
+
/* If we get here, then we're voting 'yes', so commit the
* delete on our side. */
response = OCFS2_RESPONSE_OK;
- spin_lock(&OCFS2_I(inode)->ip_lock);
- SET_INODE_DELETED(inode);
/* We set the SKIP_DELETE flag on the inode so we don't try to
* delete it in delete_inode ourselves. */
OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_DELETE;
spin_unlock(&OCFS2_I(inode)->ip_lock);
+ /* Not sure this is necessary anymore. */
d_prune_aliases(inode);
done:
return response;
}
+static int ocfs2_match_dentry(struct dentry *dentry,
+ u64 parent_blkno,
+ unsigned int namelen,
+ const char *name)
+{
+ struct inode *parent;
+
+ if (!dentry->d_parent) {
+ mlog(0, "Detached from parent.\n");
+ return 0;
+ }
+
+ parent = dentry->d_parent->d_inode;
+ /* Negative parent dentry? */
+ if (!parent)
+ return 0;
+
+ /* Name is in a different directory. */
+ if (OCFS2_I(parent)->ip_blkno != parent_blkno)
+ return 0;
+
+ if (dentry->d_name.len != namelen)
+ return 0;
+
+ /* comparison above guarantees this is safe. */
+ if (memcmp(dentry->d_name.name, name, namelen))
+ return 0;
+
+ return 1;
+}
+
static void ocfs2_process_dentry_request(struct inode *inode,
int rename,
- unsigned int new_nlink)
+ unsigned int new_nlink,
+ u64 parent_blkno,
+ unsigned int namelen,
+ const char *name)
{
- d_prune_aliases(inode);
+ struct dentry *dentry = NULL;
+ struct list_head *p;
+ mlog(0, "parent %"MLFu64", namelen = %u, name = %.*s\n", parent_blkno,
+ namelen, namelen, name);
+
+ spin_lock(&dcache_lock);
+
+ /* Another node is removing this name from the system. It is
+ * up to us to find the corresponding dentry and if it exists,
+ * unhash it from the dcache. */
+ list_for_each(p, &inode->i_dentry) {
+ dentry = list_entry(p, struct dentry, d_alias);
+
+ if (ocfs2_match_dentry(dentry, parent_blkno, namelen, name)) {
+ mlog(0, "dentry found: %.*s\n",
+ dentry->d_name.len, dentry->d_name.name);
+
+ dget_locked(dentry);
+ break;
+ }
+
+ dentry = NULL;
+ }
+
+ spin_unlock(&dcache_lock);
+
+ if (dentry) {
+ d_delete(dentry);
+ dput(dentry);
+ }
+
/* for rename, we don't change link counts */
if (!rename) {
mlog(0, "new_nlink = %u\n", new_nlink);
+
inode->i_nlink = new_nlink;
}
}
@@ -259,8 +344,8 @@
int net_status, vote_response;
int orphaned_slot = 0;
int rename = 0;
- unsigned int node_num, generation, new_nlink;
- u64 blkno;
+ unsigned int node_num, generation, new_nlink, namelen;
+ u64 blkno, parent_blkno;
enum ocfs2_vote_request request;
struct inode *inode = NULL;
ocfs2_msg_hdr *hdr = &msg->v_hdr;
@@ -341,9 +426,13 @@
rename = 1;
/* fall through */
case OCFS2_VOTE_REQ_UNLINK:
+ parent_blkno = be64_to_cpu(msg->v_unlink_parent);
+ namelen = ntohl(msg->v_unlink_namelen);
/* new_nlink will be ignored in case of a rename vote */
new_nlink = ntohl(msg->md1.v_nlink);
- ocfs2_process_dentry_request(inode, rename, new_nlink);
+ ocfs2_process_dentry_request(inode, rename, new_nlink,
+ parent_blkno, namelen,
+ msg->v_unlink_dirent);
break;
default:
mlog(ML_ERROR, "node %u, invalid request: %u\n",
@@ -634,37 +723,47 @@
return status;
}
-static int ocfs2_do_request_vote(ocfs2_super *osb,
- u64 blkno,
- unsigned int generation,
- enum ocfs2_vote_request type,
- u32 priv,
- struct ocfs2_net_response_cb *callback)
+static ocfs2_vote_msg * ocfs2_new_vote_request(ocfs2_super *osb,
+ u64 blkno,
+ unsigned int generation,
+ enum ocfs2_vote_request type,
+ u32 priv)
{
- int status, response;
- unsigned int response_id;
- ocfs2_vote_msg *request = NULL;
+ ocfs2_vote_msg *request;
ocfs2_msg_hdr *hdr;
- OCFS2_ASSERT(ocfs2_is_valid_vote_request(type));
+ BUG_ON(!ocfs2_is_valid_vote_request(type));
request = kcalloc(1, sizeof(*request), GFP_KERNEL);
if (!request) {
- status = -ENOMEM;
- mlog_errno(status);
- goto bail;
+ mlog_errno(-ENOMEM);
+ } else {
+ hdr = &request->v_hdr;
+ hdr->h_node_num = htonl((unsigned int) osb->node_num);
+ hdr->h_request = htonl(type);
+ hdr->h_blkno = cpu_to_be64(blkno);
+ hdr->h_generation = htonl(generation);
+
+ request->md1.v_generic1 = htonl(priv);
}
- hdr = &request->v_hdr;
+ return request;
+}
+/* Complete the buildup of a new vote request and process the
+ * broadcast return value. */
+static int ocfs2_do_request_vote(ocfs2_super *osb,
+ ocfs2_vote_msg *request,
+ struct ocfs2_net_response_cb *callback)
+{
+ int status, response;
+ unsigned int response_id;
+ ocfs2_msg_hdr *hdr;
+
response_id = ocfs2_new_response_id(osb);
+ hdr = &request->v_hdr;
hdr->h_response_id = htonl(response_id);
- hdr->h_request = htonl(type);
- hdr->h_blkno = cpu_to_be64(blkno);
- hdr->h_generation = htonl(generation);
- hdr->h_node_num = htonl((unsigned int) osb->node_num);
- request->md1.v_generic1 = htonl(priv);
status = ocfs2_broadcast_vote(osb, request, response_id, &response,
callback);
@@ -675,15 +774,12 @@
status = response;
bail:
- if (request)
- kfree(request);
return status;
}
static int ocfs2_request_vote(struct inode *inode,
- enum ocfs2_vote_request type,
- u32 priv,
+ ocfs2_vote_msg *request,
struct ocfs2_net_response_cb *callback)
{
int status;
@@ -706,12 +802,7 @@
status = 0;
if (!ocfs2_node_map_is_only(osb, &osb->mounted_map,
osb->node_num))
- status = ocfs2_do_request_vote(osb,
- OCFS2_I(inode)->ip_blkno,
- inode->i_generation,
- type,
- priv,
- callback);
+ status = ocfs2_do_request_vote(osb, request, callback);
ocfs2_super_unlock(osb, 0);
}
@@ -752,8 +843,10 @@
int ocfs2_request_delete_vote(struct inode *inode)
{
- int orphaned_slot;
+ int orphaned_slot, status;
struct ocfs2_net_response_cb delete_cb;
+ ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ ocfs2_vote_msg *request;
spin_lock(&OCFS2_I(inode)->ip_lock);
orphaned_slot = OCFS2_I(inode)->ip_orphaned_slot;
@@ -765,66 +858,153 @@
mlog(0, "Inode %"MLFu64", we start thinking orphaned slot is %d\n",
OCFS2_I(inode)->ip_blkno, orphaned_slot);
- return ocfs2_request_vote(inode,
- OCFS2_VOTE_REQ_DELETE,
- orphaned_slot,
- &delete_cb);
+ status = -ENOMEM;
+ request = ocfs2_new_vote_request(osb, OCFS2_I(inode)->ip_blkno,
+ inode->i_generation,
+ OCFS2_VOTE_REQ_DELETE, orphaned_slot);
+ if (request) {
+ status = ocfs2_request_vote(inode, request, &delete_cb);
+
+ kfree(request);
+ }
+
+ return status;
}
+static void ocfs2_setup_unlink_vote(ocfs2_vote_msg *request,
+ struct dentry *dentry)
+{
+ struct inode *parent = dentry->d_parent->d_inode;
+
+ /* We need some values which will uniquely identify a dentry
+ * on the other nodes so that they can find it and run
+ * d_delete against it. Parent directory block and full name
+ * should suffice. */
+
+ mlog(0, "unlink/rename request: parent: %"MLFu64" name: %.*s\n",
+ OCFS2_I(parent)->ip_blkno, dentry->d_name.len,
+ dentry->d_name.name);
+
+ request->v_unlink_parent = cpu_to_be64(OCFS2_I(parent)->ip_blkno);
+ request->v_unlink_namelen = htonl(dentry->d_name.len);
+ memcpy(request->v_unlink_dirent, dentry->d_name.name,
+ dentry->d_name.len);
+}
+
int ocfs2_request_unlink_vote(struct inode *inode,
+ struct dentry *dentry,
unsigned int nlink)
{
- return ocfs2_request_vote(inode,
- OCFS2_VOTE_REQ_UNLINK,
- nlink,
- NULL);
+ int status;
+ ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ ocfs2_vote_msg *request;
+
+ if (dentry->d_name.len > OCFS2_VOTE_FILENAME_LEN)
+ return -ENAMETOOLONG;
+
+ status = -ENOMEM;
+ request = ocfs2_new_vote_request(osb, OCFS2_I(inode)->ip_blkno,
+ inode->i_generation,
+ OCFS2_VOTE_REQ_UNLINK, nlink);
+ if (request) {
+ ocfs2_setup_unlink_vote(request, dentry);
+
+ status = ocfs2_request_vote(inode, request, NULL);
+
+ kfree(request);
+ }
+ return status;
}
-int ocfs2_request_rename_vote(struct inode *inode)
+int ocfs2_request_rename_vote(struct inode *inode,
+ struct dentry *dentry)
{
- return ocfs2_request_vote(inode,
- OCFS2_VOTE_REQ_RENAME,
- 0,
- NULL);
+ int status;
+ ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ ocfs2_vote_msg *request;
+
+ if (dentry->d_name.len > OCFS2_VOTE_FILENAME_LEN)
+ return -ENAMETOOLONG;
+
+ status = -ENOMEM;
+ request = ocfs2_new_vote_request(osb, OCFS2_I(inode)->ip_blkno,
+ inode->i_generation,
+ OCFS2_VOTE_REQ_RENAME, 0);
+ if (request) {
+ ocfs2_setup_unlink_vote(request, dentry);
+
+ status = ocfs2_request_vote(inode, request, NULL);
+
+ kfree(request);
+ }
+ return status;
}
int ocfs2_request_mount_vote(ocfs2_super *osb)
{
int status;
+ ocfs2_vote_msg *request = NULL;
+ request = ocfs2_new_vote_request(osb, 0ULL, 0,
+ OCFS2_VOTE_REQ_MOUNT, 0);
+ if (!request) {
+ status = -ENOMEM;
+ goto bail;
+ }
+
status = -EAGAIN;
while (status == -EAGAIN) {
- if (signal_pending(current))
- return -ERESTARTSYS;
+ if (signal_pending(current)) {
+ status = -ERESTARTSYS;
+ goto bail;
+ }
if (ocfs2_node_map_is_only(osb, &osb->mounted_map,
- osb->node_num))
- return 0;
+ osb->node_num)) {
+ status = 0;
+ goto bail;
+ }
- status = ocfs2_do_request_vote(osb, 0ULL, 0,
- OCFS2_VOTE_REQ_MOUNT,
- 0, NULL);
+ status = ocfs2_do_request_vote(osb, request, NULL);
}
+
+bail:
+ if (request)
+ kfree(request);
+
return status;
}
int ocfs2_request_umount_vote(ocfs2_super *osb)
{
int status;
+ ocfs2_vote_msg *request = NULL;
+ request = ocfs2_new_vote_request(osb, 0ULL, 0,
+ OCFS2_VOTE_REQ_UMOUNT, 0);
+ if (!request) {
+ status = -ENOMEM;
+ goto bail;
+ }
+
status = -EAGAIN;
while (status == -EAGAIN) {
/* Do not check signals on this vote... We really want
* this one to go all the way through. */
if (ocfs2_node_map_is_only(osb, &osb->mounted_map,
- osb->node_num))
- return 0;
+ osb->node_num)) {
+ status = 0;
+ goto bail;
+ }
- status = ocfs2_do_request_vote(osb, 0ULL, 0,
- OCFS2_VOTE_REQ_UMOUNT,
- 0, NULL);
+ status = ocfs2_do_request_vote(osb, request, NULL);
}
+
+bail:
+ if (request)
+ kfree(request);
+
return status;
}
Modified: trunk/fs/ocfs2/vote.h
===================================================================
--- trunk/fs/ocfs2/vote.h 2005-06-10 19:33:33 UTC (rev 2388)
+++ trunk/fs/ocfs2/vote.h 2005-06-10 22:08:41 UTC (rev 2389)
@@ -40,8 +40,10 @@
int ocfs2_request_delete_vote(struct inode *inode);
int ocfs2_request_unlink_vote(struct inode *inode,
+ struct dentry *dentry,
unsigned int nlink);
-int ocfs2_request_rename_vote(struct inode *inode);
+int ocfs2_request_rename_vote(struct inode *inode,
+ struct dentry *dentry);
int ocfs2_request_mount_vote(ocfs2_super *osb);
int ocfs2_request_umount_vote(ocfs2_super *osb);
int ocfs2_register_net_handlers(ocfs2_super *osb);
More information about the Ocfs2-commits
mailing list