[Ocfs2-commits] mfasheh commits r1596 - branches/dlm-glue/src
svn-commits at oss.oracle.com
svn-commits at oss.oracle.com
Wed Oct 27 21:14:31 CDT 2004
Author: mfasheh
Date: 2004-10-27 21:14:29 -0500 (Wed, 27 Oct 2004)
New Revision: 1596
Removed:
branches/dlm-glue/src/lockres.c
branches/dlm-glue/src/lockres.h
Modified:
branches/dlm-glue/src/Makefile
branches/dlm-glue/src/alloc.c
branches/dlm-glue/src/aops.c
branches/dlm-glue/src/dcache.c
branches/dlm-glue/src/dir.c
branches/dlm-glue/src/dlmglue.c
branches/dlm-glue/src/dlmglue.h
branches/dlm-glue/src/file.c
branches/dlm-glue/src/heartbeat.c
branches/dlm-glue/src/heartbeat.h
branches/dlm-glue/src/inode.c
branches/dlm-glue/src/journal.c
branches/dlm-glue/src/localalloc.c
branches/dlm-glue/src/namei.c
branches/dlm-glue/src/ocfs.h
branches/dlm-glue/src/ocfs2_disk_dlm.h
branches/dlm-glue/src/ocfs2_fs.h
branches/dlm-glue/src/ocfs_journal.h
branches/dlm-glue/src/ocfs_log.h
branches/dlm-glue/src/proc.c
branches/dlm-glue/src/suballoc.c
branches/dlm-glue/src/super.c
Log:
* update from my local tree. Things compile now, though don't look for
it to even load yet. I think most of everything has been stubbed out
so far...
Modified: branches/dlm-glue/src/Makefile
===================================================================
--- branches/dlm-glue/src/Makefile 2004-10-26 23:32:13 UTC (rev 1595)
+++ branches/dlm-glue/src/Makefile 2004-10-28 02:14:29 UTC (rev 1596)
@@ -70,7 +70,7 @@
buffer_head_io.c \
dcache.c \
dir.c \
- dlm.c \
+ dlmglue.c \
extent_map.c \
file.c \
heartbeat.c \
@@ -78,9 +78,7 @@
ioctl.c \
journal.c \
localalloc.c \
- lockres.c \
namei.c \
- nm.c \
proc.c \
suballoc.c \
super.c \
@@ -89,7 +87,6 @@
util.c \
ver.c \
volcfg.c \
- vote.c
HFILES = \
ocfs2_fs.h \
@@ -104,7 +101,7 @@
bitmap.h \
dcache.h \
dir.h \
- dlm.h \
+ dlmglue.h \
extent_map.h \
file.h \
heartbeat.h \
@@ -112,9 +109,7 @@
ioctl.h \
journal.h \
localalloc.h \
- lockres.h \
namei.h \
- nm.h \
proc.h \
suballoc.h \
super.h \
@@ -123,7 +118,6 @@
util.h \
ver.h \
volcfg.h \
- vote.h
VERSION_FILES = $(CFILES) $(HFILES)
VERSION_SRC = ver.c
@@ -197,8 +191,10 @@
BASE_DEFINES = -DMODULE -DLINUX -D__KERNEL__
DEFINES += $(BASE_DEFINES) $(GLOBAL_DEFINES)
-INCLUDES = -I. -I$(KERNELINC) -I$(GCCINC)
+CLUSTERINC = /home/mfasheh/src/cluster-support/src/
+INCLUDES = -I. -I$(KERNELINC) -I$(GCCINC) -I$(CLUSTERINC)
+
CFLAGS = $(OPTS) $(MACH_CFLAGS) -pipe -nostdinc -fno-strict-aliasing \
-fno-common -fomit-frame-pointer $(MODVERSIONS) $(WARNINGS)
LDADD = $(MACH_LDADD) -nostdlib
Modified: branches/dlm-glue/src/alloc.c
===================================================================
--- branches/dlm-glue/src/alloc.c 2004-10-26 23:32:13 UTC (rev 1595)
+++ branches/dlm-glue/src/alloc.c 2004-10-28 02:14:29 UTC (rev 1596)
@@ -36,7 +36,7 @@
#include "alloc.h"
#include "bitmap.h"
-#include "dlm.h"
+#include "dlmglue.h"
#include "extent_map.h"
#include "inode.h"
#include "localalloc.h"
Modified: branches/dlm-glue/src/aops.c
===================================================================
--- branches/dlm-glue/src/aops.c 2004-10-26 23:32:13 UTC (rev 1595)
+++ branches/dlm-glue/src/aops.c 2004-10-28 02:14:29 UTC (rev 1596)
@@ -83,7 +83,7 @@
/* We don't use the page cache to create symlink data, so if
* need be, copy it over from the buffer cache. */
- if (!buffer_uptodate(bh_result) && ocfs_inode_is_new(osb, inode)) {
+ if (!buffer_uptodate(bh_result) && ocfs_inode_is_new(inode)) {
buffer_cache_bh = sb_getblk(osb->sb,
fe->id2.i_list.l_recs[0].e_blkno + iblock);
if (!buffer_cache_bh) {
@@ -96,7 +96,7 @@
* the bh, even if it commits while we're doing the
* copy, the data is still good. */
if (buffer_jbd(buffer_cache_bh)
- && ocfs_inode_is_new(osb, inode)) {
+ && ocfs_inode_is_new(inode)) {
kaddr = kmap_atomic(bh_result->b_page, KM_USER0);
if (!kaddr) {
LOG_ERROR_ARGS("couldn't kmap!\n");
Modified: branches/dlm-glue/src/dcache.c
===================================================================
--- branches/dlm-glue/src/dcache.c 2004-10-26 23:32:13 UTC (rev 1595)
+++ branches/dlm-glue/src/dcache.c 2004-10-28 02:14:29 UTC (rev 1596)
@@ -38,7 +38,6 @@
#include "alloc.h"
#include "dcache.h"
#include "file.h"
-#include "vote.h"
#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_DCACHE
@@ -72,13 +71,16 @@
goto bail;
}
+ spin_lock(&OCFS_I(inode)->ip_lock);
/* did we or someone else delete this inode? */
if (INODE_DELETED(inode)) {
+ spin_unlock(&OCFS_I(inode)->ip_lock);
LOG_TRACE_ARGS("dentry_revalidate: inode (%llu) deleted, "
"returning false\n",
OCFS_I(inode)->ip_blkno);
goto bail;
}
+ spin_unlock(&OCFS_I(inode)->ip_lock);
#warning "should we do this for all files?"
if (S_ISDIR(inode->i_mode) && (!inode->i_nlink)) {
@@ -88,18 +90,6 @@
goto bail;
}
- if (ocfs_node_map_is_only(osb, &osb->publ_map, osb->node_num)) {
- LOG_TRACE_STR ("Only node alive. revalidate=true.");
- ret = 1;
- goto bail;
- }
-
- /* if I hold cache lock, no revalidate needed */
- if (ocfs_is_local_cache_lock(osb, inode)) {
- ret = 1;
- goto bail;
- }
-
ret = 1;
bail:
Modified: branches/dlm-glue/src/dir.c
===================================================================
--- branches/dlm-glue/src/dir.c 2004-10-26 23:32:13 UTC (rev 1595)
+++ branches/dlm-glue/src/dir.c 2004-10-28 02:14:29 UTC (rev 1596)
@@ -49,7 +49,7 @@
#include "alloc.h"
#include "dir.h"
-#include "dlm.h"
+#include "dlmglue.h"
#include "extent_map.h"
#include "file.h"
#include "inode.h"
@@ -85,7 +85,6 @@
struct inode *inode = filp->f_dentry->d_inode;
struct super_block * sb = inode->i_sb;
int have_disk_lock = 0;
- ocfs_super *osb = OCFS_SB(sb);
LOG_SET_CONTEXT(READDIR);
@@ -201,11 +200,8 @@
stored = 0;
bail:
- if (have_disk_lock) {
- error = ocfs2_meta_unlock(inode, 0);
- if (error < 0)
- LOG_ERROR_STATUS (error);
- }
+ if (have_disk_lock)
+ ocfs2_meta_unlock(inode, 0);
LOG_EXIT_STATUS(stored);
LOG_CLEAR_CONTEXT();
@@ -224,7 +220,6 @@
struct ocfs2_dir_entry **dirent)
{
int status = -ENOENT;
- int tmpstat;
int lock_acq = 0;
LOG_ENTRY_ARGS ("(osb=%p, parent=%llu, name='%*s', blkno=%p, inode=%p)\n",
@@ -254,13 +249,7 @@
leave:
if (take_lock && lock_acq)
- {
- tmpstat = ocfs2_meta_unlock(inode, 0);
- if (tmpstat < 0) {
- LOG_ERROR_STATUS (tmpstat);
- /* Volume should be disabled in this case */
- }
- }
+ ocfs2_meta_unlock(inode, 0);
if (status < 0) {
*dirent = NULL;
Modified: branches/dlm-glue/src/dlmglue.c
===================================================================
--- branches/dlm-glue/src/dlmglue.c 2004-10-26 23:32:13 UTC (rev 1595)
+++ branches/dlm-glue/src/dlmglue.c 2004-10-28 02:14:29 UTC (rev 1596)
@@ -23,9 +23,33 @@
* Boston, MA 021110-1307, USA.
*/
+#include "ocfs_compat.h"
-#include "middle.h"
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
+#include <linux/smp_lock.h>
+#include <dlmutil.h>
+#include <dlmcommon.h>
+#include <dlmmod.h>
+#include <dlmnm.h>
+
+#include "ocfs_log.h"
+#include "ocfs.h"
+#include "ocfs2.h"
+
+#include "alloc.h"
+#include "dlmglue.h"
+#include "extent_map.h"
+#include "inode.h"
+#include "util.h"
+
+#include "ocfs_journal.h"
+#include "buffer_head_io.h"
+
+#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_DLMGLUE
+
/* lock ids are made up in the following manner:
* name[0] --> type
* name[1-6] --> 6 pad characters, reserved for now
@@ -35,49 +59,83 @@
#define OCFS2_LOCK_ID_MAX_LEN 32
#define OCFS2_LOCK_ID_PAD "000000"
-enum ocfs2_lock_type {
- OCFS_TYPE_META = 0,
- OCFS_TYPE_DATA,
- OCFS_NUM_LOCK_TYPES
-};
-
static char ocfs2_lock_type_char[OCFS_NUM_LOCK_TYPES] = {
[OCFS_TYPE_META] 'M',
[OCFS_TYPE_DATA] 'D'
};
-static int ocfs2_build_lock_name(enum ocfs_lock_type type,
+static int ocfs2_build_lock_name(enum ocfs2_lock_type type,
u64 blkno,
u32 generation,
char **ret);
static void ocfs2_ast_func(void *opaque);
/* so far, all locks have gotten along with the same BAST. */
-static void ocfs2_bast_func(void *opaque, dlm_lock_type type);
+static void ocfs2_bast_func(void *opaque, int level);
-static dlm_astlockfunc_t ocfs2_lock_type_asts[OCFS_NUM_LOCK_TYPES] = {
+static dlm_astlockfunc_t *ocfs2_lock_type_asts[OCFS_NUM_LOCK_TYPES] = {
[OCFS_TYPE_META] ocfs2_ast_func,
[OCFS_TYPE_DATA] ocfs2_ast_func
};
-static dlm_astlockfunc_t ocfs2_lock_type_basts[OCFS_NUM_LOCK_TYPES] = {
+static dlm_bastlockfunc_t *ocfs2_lock_type_basts[OCFS_NUM_LOCK_TYPES] = {
[OCFS_TYPE_META] ocfs2_bast_func,
[OCFS_TYPE_DATA] ocfs2_bast_func
};
-
+static int ocfs2_lock_create(struct inode *inode,
+ ocfs2_lock *lock,
+ int level,
+ int flags);
+static int ocfs2_cluster_lock(struct inode *inode,
+ ocfs2_lock *lock,
+ int level);
+static void ocfs2_unlock_ast_func(void *opaque, dlm_status status);
+static void ocfs2_inc_inode_seq(ocfs_super *osb,
+ struct inode *inode);
+static void ocfs2_schedule_blocked_inode(struct inode *inode);
+static inline void ocfs2_recover_from_dlm_error(ocfs2_lock_res *lockres,
+ ocfs2_lock *lock);
+static void ocfs2_data_vote_on_unlock(struct inode *inode);
/* Called after we refresh our inode, only has any effect if we have
* an EX lock. This populates the LVB with the initial values for our
* change set. */
static void ocfs2_reset_meta_lvb_values(struct inode *inode);
+static void __ocfs2_stuff_meta_lvb(struct inode *inode);
+static void ocfs2_refresh_inode_from_lvb(struct inode *inode);
+static void __ocfs2_lvb_on_downconvert(ocfs2_lock *lock,
+ int new_level);
+static int ocfs2_meta_lock_update(struct inode *inode,
+ struct buffer_head **bh);
+static int ocfs2_vote_thread(void *arg);
+static void ocfs2_vote_thread_do_work(ocfs_super *osb);
+static void ocfs2_process_vote(ocfs_super *osb,
+ ocfs2_vote_msg *msg);
+static int ocfs2_drop_lock(ocfs_super *osb,
+ ocfs2_lock_res *lockres,
+ ocfs2_lock *lock);
+static inline int ocfs2_highest_compat_lock_level(int level);
+static int __ocfs2_downconvert_lock(ocfs2_lock_res *lockres,
+ ocfs2_lock *lock,
+ int new_level,
+ int lvb);
+static void ocfs2_process_blocked_inode(struct inode *inode);
+static int ocfs2_process_blocked_meta(struct inode *inode,
+ int *requeue);
+static int ocfs2_process_blocked_data(struct inode *inode,
+ int *requeue);
+static int ocfs2_do_request_vote(ocfs_super *osb,
+ struct inode *inode,
+ enum ocfs2_vote_request type);
+
static inline int ocfs2_lvb_is_trustable(ocfs2_lock *lock)
{
ocfs2_lvb *lvb = (ocfs2_lvb *) lock->l_lksb.lvb;
int ret = 0;
- spin_lock(&lock->l_lockres.lr_lock);
+ spin_lock(&lock->l_lockres->lr_lock);
if (lvb->lvb_seq &&
lock->l_local_seq == lvb->lvb_seq)
ret = 1;
- spin_unlock(&lock->l_lockres.lr_lock);
+ spin_unlock(&lock->l_lockres->lr_lock);
return ret;
}
@@ -86,10 +144,10 @@
{
ocfs2_lvb *lvb = (ocfs2_lvb *) lock->l_lksb.lvb;
- spin_lock(&lock->l_lockres.lr_lock);
+ spin_lock(&lock->l_lockres->lr_lock);
if (lvb->lvb_seq)
lock->l_local_seq = lvb->lvb_seq;
- spin_unlock(&lock->l_lockres.lr_lock);
+ spin_unlock(&lock->l_lockres->lr_lock);
}
/* fill in new values as we add them to the lvb. */
@@ -103,14 +161,14 @@
spin_lock(&lockres->lr_lock);
OCFS_ASSERT(lock->l_level > LKM_NLMODE);
- lvb = lock->l_lksb.lvb;
+ lvb = (ocfs2_meta_lvb *) lock->l_lksb.lvb;
if (trunc_clusters)
*trunc_clusters = lvb->lvb_trunc_clusters;
spin_unlock(&lockres->lr_lock);
}
-static int ocfs2_build_lock_name(enum ocfs_lock_type type,
+static int ocfs2_build_lock_name(enum ocfs2_lock_type type,
u64 blkno,
u32 generation,
char **ret)
@@ -162,7 +220,7 @@
status = ocfs2_build_lock_name(OCFS_TYPE_META,
OCFS_I(inode)->ip_blkno,
inode->i_generation,
- res->lr_meta.l_name);
+ &res->lr_meta.l_name);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto bail;
@@ -170,15 +228,17 @@
status = ocfs2_build_lock_name(OCFS_TYPE_DATA,
OCFS_I(inode)->ip_blkno,
inode->i_generation,
- res->lr_data.l_name);
+ &res->lr_data.l_name);
if (status < 0) {
kfree(res->lr_meta.l_name);
res->lr_meta.l_name = NULL;
LOG_ERROR_STATUS(status);
goto bail;
- }
+ }
+ res->lr_meta.l_type = OCFS_TYPE_META;
res->lr_meta.l_level = LKM_IVMODE;
res->lr_meta.l_lockres = res;
+ res->lr_data.l_type = OCFS_TYPE_DATA;
res->lr_data.l_level = LKM_IVMODE;
res->lr_data.l_lockres = res;
bail:
@@ -195,7 +255,7 @@
}
static inline void ocfs2_inc_holders(ocfs2_lock *lock,
- dlm_lock_type level)
+ int level)
{
OCFS_ASSERT(lock);
@@ -212,7 +272,7 @@
}
static inline void ocfs2_dec_holders(ocfs2_lock *lock,
- dlm_lock_type level)
+ int level)
{
OCFS_ASSERT(lock);
@@ -241,7 +301,7 @@
lock->l_blocking = LKM_NLMODE;
lock->l_flags &= ~OCFS2_LOCK_BLOCKED;
lock->l_flags &= ~OCFS2_LOCK_BUSY;
- wake_up_all(&lockres->lr_blocked);
+ wake_up_all(&lock->l_lockres->lr_blocked);
}
static inline void ocfs2_handle_data_convert_action(struct inode *inode,
@@ -254,6 +314,30 @@
lock->l_flags &= ~OCFS2_LOCK_BUSY;
}
+static void ocfs2_inc_inode_seq(ocfs_super *osb,
+ struct inode *inode)
+{
+ atomic_t *seq = GET_INODE_CLEAN_SEQ(inode);
+
+ LOG_TRACE_ARGS("incrementing inode seq... current is %d\n",
+ atomic_read(seq));
+
+ /* wrap to ONE after 13 bits, will need a spinlock */
+ spin_lock (&osb->clean_buffer_lock);
+ if ((atomic_read(&osb->clean_buffer_seq)+1) % STATE_BIT_MAX == 0)
+ atomic_set(&osb->clean_buffer_seq, 1);
+ else
+ atomic_inc(&osb->clean_buffer_seq);
+ spin_unlock (&osb->clean_buffer_lock);
+
+ /* doesn't matter if this another process */
+ /* has already incremented the global seq */
+ atomic_set(seq, atomic_read(&osb->clean_buffer_seq));
+
+ LOG_TRACE_ARGS("done incrementing inode seq... new is %d\n",
+ atomic_read(seq));
+}
+
static inline void ocfs2_handle_meta_convert_action(struct inode *inode,
ocfs2_lock *lock)
{
@@ -267,7 +351,7 @@
* *anything* however should mark the inode as needing an
* update. */
if (lock->l_level == LKM_NLMODE) {
- ocfs_inc_inode_seq(osb, inode);
+ ocfs2_inc_inode_seq(osb, inode);
lock->l_flags |= OCFS2_LOCK_NEEDS_REFRESH;
}
@@ -275,28 +359,41 @@
lock->l_flags &= ~OCFS2_LOCK_BUSY;
}
-static inline void ocfs2_handle_attach_action(ocfs2_lock *lock)
+static inline void ocfs2_handle_attach_action(struct inode *inode,
+ ocfs2_lock *lock)
{
+ ocfs_super *osb = OCFS2_SB(inode->i_sb);
+
OCFS_ASSERT(lock->l_flags & OCFS2_LOCK_BUSY);
OCFS_ASSERT(!(lock->l_flags & OCFS2_LOCK_ATTACHED));
+ /* skip the cache thunk for nlmode requests and local (new
+ * inode) locks. */
+ if (lock->l_requested > LKM_NLMODE &&
+ !(lock->l_flags & OCFS2_LOCK_LOCAL)) {
+ ocfs2_inc_inode_seq(osb, inode);
+ lock->l_flags |= OCFS2_LOCK_NEEDS_REFRESH;
+ }
+
lock->l_level = lock->l_requested;
lock->l_flags |= OCFS2_LOCK_ATTACHED;
/* should this part be in ocfs2_ast_func? */
lock->l_flags &= ~OCFS2_LOCK_BUSY;
}
-/* can we get a lock type in this proto to?
- * warning, this ast is for metadata locks only. */
+/* can we get a lock type in this proto to? */
static void ocfs2_ast_func(void *opaque)
{
- ocfs2_lock *lock = data;
+ ocfs2_lock *lock = opaque;
ocfs2_lock_res *lockres = lock->l_lockres;
struct inode *inode = lockres->lr_inode;
dlm_lockstatus *lksb;
printk("AST fired for inode %llu\n", OCFS_I(inode)->ip_blkno);
- OCFS_ASSERT((lock == lockres->lr_meta) || (lock == lockres->lr_data));
+ OCFS_ASSERT(lock == &lockres->lr_meta ||
+ lock == &lockres->lr_data);
+ OCFS_ASSERT(lock->l_type == OCFS_TYPE_META ||
+ lock->l_type == OCFS_TYPE_DATA);
spin_lock(&lockres->lr_lock);
lksb = &(lock->l_lksb);
@@ -309,10 +406,10 @@
switch(lock->l_action) {
case OCFS2_AST_ATTACH:
- ocfs2_handle_attach_action(lock);
+ ocfs2_handle_attach_action(inode, lock);
break;
case OCFS2_AST_CONVERT:
- if (lock == lockres->lr_meta)
+ if (lock->l_type == OCFS_TYPE_META)
ocfs2_handle_meta_convert_action(inode, lock);
else
ocfs2_handle_data_convert_action(inode, lock);
@@ -330,9 +427,9 @@
wake_up_all(&lockres->lr_busy);
}
-static void ocfs2_bast_func(void *opaque, dlm_lock_type type)
+static void ocfs2_bast_func(void *opaque, int level)
{
- ocfs2_lock *lock = data;
+ ocfs2_lock *lock = opaque;
ocfs2_lock_res *lockres = lock->l_lockres;
struct inode *inode = lockres->lr_inode;
ocfs_super *osb = OCFS2_SB(inode->i_sb);
@@ -340,21 +437,32 @@
printk("BAST fired for inode %llu\n", OCFS_I(inode)->ip_blkno);
spin_lock(&lockres->lr_lock);
lock->l_flags |= OCFS2_LOCK_BLOCKED;
- if (type > lock->l_blocking)
- lock->l_blocking = type;
+ if (level > lock->l_blocking)
+ lock->l_blocking = level;
spin_unlock(&lockres->lr_lock);
ocfs2_schedule_blocked_inode(inode);
ocfs2_kick_vote_thread(osb);
}
+static inline void ocfs2_recover_from_dlm_error(ocfs2_lock_res *lockres,
+ ocfs2_lock *lock)
+{
+ spin_lock(&lockres->lr_lock);
+ lock->l_flags &= ~OCFS2_LOCK_BUSY;
+ lock->l_action = OCFS2_AST_INVALID;
+ spin_unlock(&lockres->lr_lock);
+}
+
static int ocfs2_lock_create(struct inode *inode,
- enum ocfs2_lock_type type,
- ocfs2_lock *lock)
+ ocfs2_lock *lock,
+ int level,
+ int flags)
{
int ret = 0;
+ enum ocfs2_lock_type type = lock->l_type;
dlm_status status;
ocfs_super *osb = OCFS2_SB(inode->i_sb);
- ocfs2_lock_res *lockres = OCFS_I(inode)->ip_lockres;
+ ocfs2_lock_res *lockres = &OCFS_I(inode)->ip_lockres;
LOG_ENTRY();
@@ -365,18 +473,18 @@
}
lock->l_action = OCFS2_AST_ATTACH;
- lock->l_requested = LKM_NLMODE;
+ lock->l_requested = level;
lock->l_flags |= OCFS2_LOCK_BUSY;
spin_unlock(&lockres->lr_lock);
status = dlmlock(osb->dlm,
- LKM_NLMODE,
- 0,
+ level,
&lock->l_lksb,
+ flags,
lock->l_name,
- ocfs2_ast_funcs[type],
- ocfs2_bast_funcs[type],
- lock);
+ ocfs2_lock_type_asts[type],
+ lock,
+ ocfs2_lock_type_basts[type]);
if (status != DLM_NORMAL) {
LOG_ERROR_ARGS("Dlm returns %d\n", status);
ret = -ENOENT;
@@ -388,16 +496,7 @@
return ret;
}
-static inline void ocfs2_recover_from_dlm_error(ocfs2_lock_res *lockres,
- ocfs2_lock *lock)
-{
- spin_lock(&lockres->lock);
- lock->l_flags &= ~OCFS2_LOCK_BUSY;
- lock->l_action = OCFS2_AST_INVALID;
- spin_unlock(&lockres->lr_lock);
-}
-
-static inline int ocfs2_check_wait_flag(ocfs2_lock_res lockres,
+static inline int ocfs2_check_wait_flag(ocfs2_lock_res *lockres,
ocfs2_lock *lock,
int flag)
{
@@ -412,7 +511,7 @@
ocfs2_lock *lock)
{
- wait_event_interruptible(&lock->lr_busy,
+ wait_event_interruptible(lockres->lr_busy,
!ocfs2_check_wait_flag(lockres,
lock,
OCFS2_LOCK_BUSY));
@@ -422,7 +521,7 @@
ocfs2_lock *lock)
{
- wait_event_interruptible(&lock->lr_blocked,
+ wait_event_interruptible(lockres->lr_blocked,
!ocfs2_check_wait_flag(lockres,
lock,
OCFS2_LOCK_BLOCKED));
@@ -432,21 +531,21 @@
ocfs2_lock *lock)
{
- wait_event_interruptible(&lock->lr_blocked,
+ wait_event_interruptible(lockres->lr_blocked,
!ocfs2_check_wait_flag(lockres,
lock,
- OCFS2_LOCKC_REFRESHING));
+ OCFS2_LOCK_REFRESHING));
}
static int ocfs2_cluster_lock(struct inode *inode,
- enum ocfs2_lock_type type,
ocfs2_lock *lock,
- dlm_lock_type level)
+ int level)
{
+ int ret;
+ enum ocfs2_lock_type type = lock->l_type;
ocfs_super *osb = OCFS2_SB(inode->i_sb);
- ocfs2_lock_res *lockres = OCFS_I(inode)->ip_lockres;
+ ocfs2_lock_res *lockres = &OCFS_I(inode)->ip_lockres;
dlm_status status;
- int ret;
LOG_ENTRY();
@@ -460,7 +559,7 @@
if (lock->l_flags & OCFS2_LOCK_BUSY) {
/* is someone sitting in dlm_lock? If so, wait on
* them. */
- spin_unlock(lockres->lr_lock);
+ spin_unlock(&lockres->lr_lock);
ocfs2_wait_on_busy_lock(lockres, lock);
goto again;
}
@@ -468,7 +567,7 @@
if (!(lock->l_flags & OCFS2_LOCK_ATTACHED)) {
/* lock has not been created yet. */
spin_unlock(&lockres->lr_lock);
- ret = ocfs_lock_create(inode, lock);
+ ret = ocfs2_lock_create(inode, lock, LKM_NLMODE, 0);
if (ret < 0) {
LOG_ERROR_STATUS(ret);
goto bail;
@@ -488,17 +587,17 @@
lock->l_action = OCFS2_AST_CONVERT;
lock->l_requested = level;
lock->l_flags |= OCFS2_LOCK_BUSY;
- spin_unlock(&lockres->lock);
+ spin_unlock(&lockres->lr_lock);
/* call dlm_lock to upgrade lock now */
status = dlmlock(osb->dlm,
level,
+ &lock->l_lksb,
LKM_CONVERT|LKM_VALBLK,
- &lock->l_lksb,
lock->l_name,
ocfs2_lock_type_asts[type],
- ocfs2_lock_type_basts[type],
- lock);
+ lock,
+ ocfs2_lock_type_basts[type]);
if (status != DLM_NORMAL) {
LOG_ERROR_ARGS("Dlm returns %d\n", status);
ret = -ENOENT;
@@ -511,15 +610,69 @@
}
/* Ok, if we get here then we're good to go. */
- ocfs2_inc_lock_holders(lock, level);
+ ocfs2_inc_holders(lock, level);
spin_unlock(&lockres->lr_lock);
+ ret = 0;
bail:
LOG_EXIT_STATUS(ret);
return ret;
}
+/* Grants us an EX lock on the data and metadata resources, skipping
+ * the normal cluster directory lookup. Use this ONLY on newly created
+ * inodes which other nodes can't possibly see, and which haven't been
+ * hashed in the inode hash yet. This can give us a good performance
+ * increase as it'll skip the network broadcast normally associated
+ * with creating a new lock resource. */
+int ocfs2_create_new_inode_locks(struct inode *inode)
+{
+ int status;
+ ocfs2_lock_res *lockres = &OCFS_I(inode)->ip_lockres;
+ ocfs2_lock *lock;
+
+ OCFS_ASSERT(inode);
+ OCFS_ASSERT(ocfs_inode_is_new(inode));
+
+ LOG_ENTRY();
+
+ /* NOTE: That we don't increment any of the holder counts, nor
+ * do we add anything to a journal handle. Since this is
+ * supposed to be a new inode which the cluster doesn't know
+ * about yet, there is no need to. As far as the LVB handling
+ * is concerned, this is basically like acquiring an EX lock
+ * on a resource which has an invalid one -- we'll set it
+ * valid when we release the EX. */
+
+ lock = &lockres->lr_meta;
+ OCFS_ASSERT(!(lock->l_flags & OCFS2_LOCK_ATTACHED));
+ lock->l_flags |= OCFS2_LOCK_LOCAL;
+
+ status = ocfs2_lock_create(inode, lock, LKM_EXMODE, LKM_LOCAL);
+ lock->l_flags &= ~OCFS2_LOCK_LOCAL;
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ lock = &lockres->lr_data;
+ OCFS_ASSERT(!(lock->l_flags & OCFS2_LOCK_ATTACHED));
+ lock->l_flags |= OCFS2_LOCK_LOCAL;
+
+ status = ocfs2_lock_create(inode, lock, LKM_EXMODE, LKM_LOCAL);
+ lock->l_flags &= ~OCFS2_LOCK_LOCAL;
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ status = 0;
+bail:
+ LOG_EXIT_STATUS(status);
+ return status;
+}
+
int ocfs2_data_lock(struct inode *inode,
int write)
{
@@ -534,7 +687,7 @@
level = write ? LKM_EXMODE : LKM_PRMODE;
- status = ocfs2_cluster_lock(inode, OCFS_TYPE_DATA, lock, level);
+ status = ocfs2_cluster_lock(inode, lock, level);
if (status < 0)
LOG_ERROR_STATUS(status);
@@ -542,10 +695,10 @@
return status;
}
-void ocfs2_data_vote_on_unlock(struct inode *inode)
+static void ocfs2_data_vote_on_unlock(struct inode *inode)
{
ocfs2_lock_res *lockres = &OCFS_I(inode)->ip_lockres;
- ocfs2_lock *lock = lockres->lr_data;
+ ocfs2_lock *lock = &lockres->lr_data;
int kick = 0;
/* Data locking needs to be fast. If we know that another node
@@ -570,11 +723,11 @@
spin_unlock(&lockres->lr_lock);
if (kick)
- kick_vote_thread(OCFS2_SB(inode->i_sb));
+ ocfs2_kick_vote_thread(OCFS2_SB(inode->i_sb));
}
-int ocfs2_data_unlock(struct inode *inode,
- int write)
+void ocfs2_data_unlock(struct inode *inode,
+ int write)
{
int level;
ocfs2_lock *lock;
@@ -586,15 +739,13 @@
ocfs2_dec_holders(lock, level);
ocfs2_data_vote_on_unlock(inode);
-
- return 0;
}
static inline int ocfs2_wait_on_recovery(ocfs_super *osb)
{
/* recovery_count is incremented once for every dead node and
* decremented once when it gets recovered. */
- wait_event_interruptible(&osb->recovery_event,
+ wait_event_interruptible(osb->recovery_event,
!atomic_read(&osb->num_recovery_threads));
if (signal_pending(current))
@@ -606,14 +757,12 @@
/* Call this with the lockres locked. I am reasonably sure we don't
* need ip_lock in this function as anyone who would be changing those
* values is supposed to be blocked in ocfs2_meta_lock right now. */
-void __ocfs2_stuff_meta_lvb(struct inode *inode)
+static void __ocfs2_stuff_meta_lvb(struct inode *inode)
{
ocfs_inode_private *oip = OCFS_I(inode);
- ocfs2_lock_res *lockres = oip->ip_lockres;
- ocfs2_meta_lvb *lvb = lockres->lr_meta.l_lksb.lvb;
+ ocfs2_lock_res *lockres = &oip->ip_lockres;
+ ocfs2_meta_lvb *lvb = (ocfs2_meta_lvb *) lockres->lr_meta.l_lksb.lvb;
- ip_clusters = oip->ip_clusters;
-
lvb->lvb_iclusters = oip->ip_clusters;
lvb->lvb_iuid = inode->i_uid;
lvb->lvb_igid = inode->i_gid;
@@ -625,11 +774,11 @@
lvb->lvb_imtime = ocfs_get_seconds(inode->i_mtime);
}
-void ocfs2_refresh_inode_from_lvb(struct inode *inode)
+static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
{
ocfs_inode_private *oip = OCFS_I(inode);
- ocfs2_lock_res *lockres = oip->ip_lockres;
- ocfs2_meta_lvb *lvb = lockres->lr_meta.l_lksb.lvb;
+ ocfs2_lock_res *lockres = &oip->ip_lockres;
+ ocfs2_meta_lvb *lvb = (ocfs2_meta_lvb *) lockres->lr_meta.l_lksb.lvb;
/* We're safe here without the lockres lock... */
spin_lock(&oip->ip_lock);
@@ -639,8 +788,8 @@
inode->i_size = lvb->lvb_isize;
inode->i_mode = lvb->lvb_imode;
inode->i_nlink = lvb->lvb_inlink;
- inode->i_blocks = (inode->i_size + osb->sb->s_blocksize - 1)
- >> osb->sb->s_blocksize_bits;
+ inode->i_blocks = (inode->i_size + inode->i_sb->s_blocksize - 1)
+ >> inode->i_sb->s_blocksize_bits;
OCFS_SET_INODE_TIME(inode, i_atime, lvb->lvb_iatime);
OCFS_SET_INODE_TIME(inode, i_ctime, lvb->lvb_ictime);
OCFS_SET_INODE_TIME(inode, i_mtime, lvb->lvb_imtime);
@@ -658,13 +807,14 @@
i_clusters = OCFS_I(inode)->ip_clusters;
spin_unlock(&OCFS_I(inode)->ip_lock);
- spin_lock(&lockres.lr_lock);
+ spin_lock(&lockres->lr_lock);
if (lock->l_level == LKM_EXMODE)
- lvb->trunc_clusters = i_clusters;
- spin_unlock(&lockres.lr_lock);
+ lvb->lvb_trunc_clusters = i_clusters;
+ spin_unlock(&lockres->lr_lock);
}
-void __ocfs2_lvb_on_downconvert(ocfs2_lock *lock, dlm_lock_type new_level)
+static void __ocfs2_lvb_on_downconvert(ocfs2_lock *lock,
+ int new_level)
{
ocfs2_lvb *lvb = (ocfs2_lvb *) lock->l_lksb.lvb;
@@ -683,7 +833,8 @@
}
/* may or may not return a bh if it went to disk. */
-int ocfs2_meta_lock_update(struct inode *inode, struct buffer_head **bh)
+static int ocfs2_meta_lock_update(struct inode *inode,
+ struct buffer_head **bh)
{
int status = 0;
u32 trustable_clusters = 0;
@@ -708,7 +859,7 @@
goto bail;
}
ocfs2_wait_on_refreshing_lock(lockres, lock);
- goto refresh_check:
+ goto refresh_check;
}
/* Ok, I'll be the one to refresh this lock. */
@@ -725,13 +876,14 @@
} else {
/* Boo, we have to go to disk. */
/* read bh, cast, ocfs_refresh_inode */
- status = ocfs_read_block(osb, OCFS_I(inode)->ip_blkno, bh,
+ status = ocfs_read_block(OCFS2_SB(inode->i_sb),
+ OCFS_I(inode)->ip_blkno, bh,
OCFS_BH_CACHED, inode);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto bail;
}
- fe = (ocfs2_inode *) (*bh)->b_data;
+ fe = (ocfs2_dinode *) (*bh)->b_data;
/* This is a good chance to make sure we're not
* locking an invalid object. */
@@ -745,11 +897,11 @@
}
printk("inode %llu, I can only trust %u clusters\n",
- oip->ip_blkno, trustable_clusters);
+ OCFS_I(inode)->ip_blkno, trustable_clusters);
ocfs2_extent_map_trunc(inode, trustable_clusters);
- ocfs2_set_local_seq_from_lvb(&oip->ip_lockres.lr_meta);
+ ocfs2_set_local_seq_from_lvb(&OCFS_I(inode)->ip_lockres.lr_meta);
ocfs2_reset_meta_lvb_values(inode);
spin_lock(&lockres->lr_lock);
@@ -768,6 +920,7 @@
int ex)
{
int status, level;
+ ocfs2_lock_res *lockres;
ocfs2_lock *lock;
ocfs_super *osb = OCFS2_SB(inode->i_sb);
struct buffer_head *bh = NULL;
@@ -793,7 +946,7 @@
lock = &(OCFS_I(inode)->ip_lockres.lr_meta);
level = ex ? LKM_EXMODE : LKM_PRMODE;
- status = ocfs2_cluster_lock(inode, OCFS_TYPE_META, lock, level);
+ status = ocfs2_cluster_lock(inode, lock, level);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto bail;
@@ -839,10 +992,10 @@
return status;
}
-int ocfs2_meta_unlock(struct inode *inode,
- int ex)
+void ocfs2_meta_unlock(struct inode *inode,
+ int ex)
{
- int status, level;
+ int level;
ocfs2_lock *lock;
OCFS_ASSERT(inode);
@@ -850,16 +1003,67 @@
lock = &(OCFS_I(inode)->ip_lockres.lr_meta);
level = ex ? LKM_EXMODE : LKM_PRMODE;
ocfs2_dec_holders(lock, level);
- return 0;
}
int ocfs2_dlm_init(ocfs_super *osb)
{
+ int status, pid;
+ char domain[NM_MAX_NAME_LEN];
+ dlm_ctxt *dlm = NULL;
+
+ /* launch vote thread */
+ init_completion (&osb->vote_event_init);
+ pid = kernel_thread(ocfs2_vote_thread, osb,
+ CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
+ if (pid < 0) {
+ status = pid;
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ wait_for_completion(&osb->vote_event_init);
+
+ memset(domain, 0, NM_MAX_NAME_LEN);
+ memcpy(domain, osb->uuid, MAX_VOL_ID_LENGTH);
+
+ /* where to get group name from? */
+ dlm = dlm_register_domain(domain, "testgroup", 0);
+ if (!dlm) {
+ /* This is a best guess on return value... */
+ status = -ENOMEM;
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ osb->dlm = dlm;
/* sets osb->dlm */
+ status = 0;
+bail:
+
+ return status;
}
-void ocfs2_unlock_ast(void *opaque, dlm_status status)
+void ocfs2_dlm_shutdown(ocfs_super *osb)
{
+ int wait_on_vote_task = 0;
+
+ /* needs to be able to deal with the dlm being in many
+ * different states. */
+ spin_lock(&osb->vote_task_lock);
+ if (osb->vote_task) {
+ ocfs2_kick_vote_thread(osb);
+ osb->vote_exit = 1;
+ wait_on_vote_task = 1;
+ }
+ spin_unlock(&osb->vote_task_lock);
+
+ if (wait_on_vote_task)
+ wait_for_completion(&osb->vote_event_complete);
+
+ dlm_unregister_domain(osb->dlm);
+}
+
+static void ocfs2_unlock_ast_func(void *opaque, dlm_status status)
+{
ocfs2_lock *lock = opaque;
ocfs2_lock_res *lockres = lock->l_lockres;
@@ -874,9 +1078,9 @@
wake_up_all(&lockres->lr_busy);
}
-int ocfs2_drop_lock(ocfs_super *osb,
- ocfs2_lock_res *lockres,
- ocfs2_lock *lock)
+static int ocfs2_drop_lock(ocfs_super *osb,
+ ocfs2_lock_res *lockres,
+ ocfs2_lock *lock)
{
int ret = 0;
dlm_status status;
@@ -904,7 +1108,7 @@
status = dlmunlock(osb->dlm,
&lock->l_lksb,
LKM_VALBLK,
- ocfs2_unlock_ast,
+ ocfs2_unlock_ast_func,
lock);
if (status != DLM_NORMAL) {
LOG_ERROR_ARGS("Dlm returns %d\n", status);
@@ -925,14 +1129,16 @@
int ocfs2_drop_inode_locks(struct inode *inode)
{
int status, err;
- ocfs2_lock_res *lockres = OCFS_I(inode)->ip_lockres;
+ ocfs2_lock_res *lockres = &OCFS_I(inode)->ip_lockres;
- err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), lockres->lr_data, lock);
+ err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), lockres,
+ &lockres->lr_data);
if (err < 0)
LOG_ERROR_STATUS(err);
status = err;
- err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), lockres->lr_meta, lock);
+ err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), lockres,
+ &lockres->lr_meta);
if (err < 0)
LOG_ERROR_STATUS(err);
if (err < 0 && !status)
@@ -944,9 +1150,9 @@
/* WARNING: This function lives in a world where the only three lock
* levels are EX, PR, and NL. It *will* have to be adjusted when more
* lock types are added. */
-dlm_lock_type ocfs2_highest_compat_lock_level(dlm_lock_type level)
+static inline int ocfs2_highest_compat_lock_level(int level)
{
- dlm_lock_type new_level = LKM_EXMODE;
+ int new_level = LKM_EXMODE;
if (level == LKM_EXMODE)
new_level = LKM_NLMODE;
@@ -956,49 +1162,50 @@
}
/* called with the spinlock held, and WILL drop it. */
-int __ocfs2_downconvert_lock(ocfs_super *osb,
- ocf2_lock_res *lockres,
- ocfs2_lock *lock,
- dlm_lock_type new_level,
- int lvb)
+static int __ocfs2_downconvert_lock(ocfs2_lock_res *lockres,
+ ocfs2_lock *lock,
+ int new_level,
+ int lvb)
{
int status, flags = LKM_CONVERT;
-
+ struct inode *inode = lockres->lr_inode;
+ enum ocfs2_lock_type type = lock->l_type;
OCFS_ASSERT(lock->l_blocking > LKM_NLMODE);
OCFS_ASSERT(lock->l_level > new_level);
-// new_level = ocfs2_highest_compat_lock_level(lock->l_blocking);
lock->l_action = OCFS2_AST_DOWNCONVERT;
lock->l_requested = new_level;
- lock->flags |= OCFS2_LOCK_BUSY;
+ lock->l_flags |= OCFS2_LOCK_BUSY;
spin_unlock(&lockres->lr_lock);
if (lvb)
flags |= LKM_VALBLK;
- status = dlmlock(osb->dlm,
+ status = dlmlock(OCFS2_SB(inode->i_sb)->dlm,
new_level,
+ &lock->l_lksb,
flags,
- &lock->l_lksb,
lock->l_name,
ocfs2_lock_type_asts[type],
- ocfs2_lock_type_basts[type],
- lock);
+ lock,
+ ocfs2_lock_type_basts[type]);
if (status != DLM_NORMAL) {
LOG_ERROR_ARGS("Dlm returns %d\n", status);
status = -ENOENT;
ocfs2_recover_from_dlm_error(lockres, lock);
goto bail;
}
-
+ status = 0;
+bail:
return status;
}
-int ocfs2_process_blocked_meta(struct inode *inode, int *requeue)
+static int ocfs2_process_blocked_meta(struct inode *inode,
+ int *requeue)
{
- ocfs2_lock_res *lockres = OCFS_I(inode)->ip_lockres;
- ocfs2_lock *lock = lockres->lr_meta;
- dlm_lock_type new_level;
+ ocfs2_lock_res *lockres = &OCFS_I(inode)->ip_lockres;
+ ocfs2_lock *lock = &lockres->lr_meta;
+ int new_level;
int set_lvb = 0;
spin_lock(&lockres->lr_lock);
@@ -1030,13 +1237,13 @@
return 0;
}
-int ocfs2_process_blocked_data(struct inode *inode, int *requeue)
+static int ocfs2_process_blocked_data(struct inode *inode,
+ int *requeue)
{
- int status = 0;
- ocfs2_lock_res *lockres = OCFS_I(inode)->ip_lockres;
- ocfs2_lock *lock = lockres->lr_meta;
- dlm_lock_type blocking;
- dlm_lock_type new_level;
+ ocfs2_lock_res *lockres = &OCFS_I(inode)->ip_lockres;
+ ocfs2_lock *lock = &lockres->lr_meta;
+ int blocking;
+ int new_level;
spin_lock(&lockres->lr_lock);
if (!(lock->l_flags & OCFS2_LOCK_BLOCKED)) {
@@ -1089,10 +1296,10 @@
return __ocfs2_downconvert_lock(lockres, lock, new_level, 0);
}
-void ocfs2_process_blocked_inode(struct inode *inode)
+static void ocfs2_process_blocked_inode(struct inode *inode)
{
int status;
- int reqeue = 0;
+ int requeue = 0;
/* At this point, we've been taken off the blocked_list */
status = ocfs2_process_blocked_meta(inode, &requeue);
@@ -1107,7 +1314,7 @@
ocfs2_schedule_blocked_inode(inode);
}
-void ocfs2_schedule_blocked_inode(struct inode *inode)
+static void ocfs2_schedule_blocked_inode(struct inode *inode)
{
ocfs_super *osb = OCFS2_SB(inode->i_sb);
@@ -1121,32 +1328,6 @@
spin_unlock(&osb->vote_task_lock);
}
-static inline void ocfs2_kick_vote_thread(ocfs_super *osb)
-{
- atomic_set(&osb->vote_event_woken, 1);
- wake_up(&osb->vote_event);
-}
-
-typedef struct _ocfs2_vote_msg
-{
- u32 m_req_node;
- u32 m_request;
- u64 m_blkno;
- u32 m_generation;
-} ocfs2_vote_msg;
-
-typedef struct _ocfs2_vote_work {
- struct list_head w_list;
- ocfs2_vote_msg_hdr w_msg;
-} ocfs2_vote_work;
-
-enum ocfs2_vote_request {
- OCFS2_VOTE_REQ_INVALID = 0
- OCFS2_VOTE_REQ_DELETE = 1,
- OCFS2_VOTE_REQ_DENTRY,
- OCFS2_VOTE_REQ_RENAME
-};
-
static int ocfs2_process_delete_request(struct inode *inode)
{
int response = -EBUSY;
@@ -1168,15 +1349,6 @@
}
spin_unlock(&OCFS_I(inode)->ip_lock);
- /* vote no if someone's extending it. */
- spin_lock(&oin_num_ext_lock);
- if (OCFS_I(inode)->ip_num_extends) {
- spin_unlock(&oin_num_ext_lock);
- LOG_TRACE_PROCESS_VOTE("extends pending\n");
- goto done;
- }
- spin_unlock(&oin_num_ext_lock);
-
/* directories are a bit ugly... What if someone is sitting in
* it? We want to make sure the inode is removed completely as
* a result of the iput in process_vote. */
@@ -1226,14 +1398,16 @@
return 0;
}
-void ocfs2_process_vote(ocfs_super *osb,
- ocfs2_vote_msg *msg)
+static void ocfs2_process_vote(ocfs_super *osb,
+ ocfs2_vote_msg *msg)
{
int vote_response = 0;
int rename = 0;
struct inode *inode = NULL;
- inode = ilookup(osb, msg->m_blkno);
+ OCFS_ASSERT(!memcmp(msg->m_hdr.h_uuid, osb->uuid, MAX_VOL_ID_LENGTH));
+
+ inode = ocfs_ilookup(osb, msg->m_blkno);
if (!inode)
goto respond;
@@ -1245,7 +1419,7 @@
break;
case OCFS2_VOTE_REQ_RENAME:
rename = 1;
- case OCFS2_VOTE_REQ_DENTRY:
+ case OCFS2_VOTE_REQ_UNLINK:
vote_response = ocfs2_process_dentry_request(inode, rename);
break;
default:
@@ -1256,12 +1430,11 @@
respond:
//vote response here...
-bail:
if (inode)
iput(inode);
}
-void ocfs2_vote_thread_do_work(ocfs_super *osb)
+static void ocfs2_vote_thread_do_work(ocfs_super *osb)
{
struct inode *inode = NULL;
ocfs_inode_private *ip;
@@ -1270,10 +1443,10 @@
spin_lock(&osb->vote_task_lock);
processed = osb->blocked_inode_count;
- while (processed--) {
+ while (processed) {
OCFS_ASSERT(!list_empty(&osb->blocked_inode_list));
- ip = list_entry(&osb->blocked_inode_list->next,
+ ip = list_entry(&osb->blocked_inode_list.next,
ocfs_inode_private, ip_blocked_list);
list_del_init(&ip->ip_blocked_list);
osb->blocked_inode_count--;
@@ -1291,23 +1464,23 @@
while (osb->vote_count) {
OCFS_ASSERT(!list_empty(&osb->vote_list));
- work = list_entry(&osb->vote_list->next,
+ work = list_entry(&osb->vote_list.next,
ocfs2_vote_work, w_list);
- list_del(&w->w_list);
+ list_del(&work->w_list);
osb->vote_count--;
spin_unlock(&osb->vote_task_lock);
- ocfs2_process_vote(osb, &w.w_msg);
- kfree(w);
+ ocfs2_process_vote(osb, &work->w_msg);
+ kfree(work);
spin_lock(&osb->vote_task_lock);
}
spin_unlock(&osb->vote_task_lock);
}
-int ocfs2_vote_thread(void *arg)
+static int ocfs2_vote_thread(void *arg)
{
- int status = 0, workit;
+ int status = 0;
ocfs_super *osb = arg;
char proc[16];
@@ -1318,20 +1491,20 @@
osb->vote_task = current;
init_completion (&osb->vote_event_complete);
+ complete(&osb->vote_event_init);
+
while (1) {
spin_unlock(&osb->vote_task_lock);
#define OCFS2_VOTE_THREAD_TIMEOUT (HZ >> 1)
- wait_event_interruptible_timeout(&osb->vote_event,
+ wait_event_interruptible_timeout(osb->vote_event,
atomic_read(
&osb->wake_vote_task),
OCFS2_VOTE_THREAD_TIMEOUT);
- atomic_set(&osb->vote_event_woken, 0);
+ atomic_set(&osb->wake_vote_task, 0);
- status = ocfs2_vote_thread_do_work(osb);
- if (status < 0)
- LOG_ERROR_STATUS(status);
+ ocfs2_vote_thread_do_work(osb);
spin_lock(&osb->vote_task_lock);
if (osb->vote_exit &&
@@ -1347,3 +1520,92 @@
return status;
}
+
+static int ocfs2_do_request_vote(ocfs_super *osb,
+ struct inode *inode,
+ enum ocfs2_vote_request type)
+{
+ int status;
+ ocfs2_vote_msg *request = NULL;
+ ocfs2_response_msg *response = NULL;
+
+ OCFS_ASSERT(type == OCFS2_VOTE_REQ_DELETE ||
+ type == OCFS2_VOTE_REQ_UNLINK ||
+ type == OCFS2_VOTE_REQ_RENAME);
+
+ request = kmalloc(sizeof(*request), GFP_KERNEL);
+ if (!request) {
+ status = -ENOMEM;
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ memset(request, 0, sizeof(*request));
+
+ response = kmalloc(sizeof(*response), GFP_KERNEL);
+ if (!response) {
+ status = -ENOMEM;
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ memset(response, 0, sizeof(*response));
+ memcpy(request->m_hdr.h_uuid, osb->uuid, MAX_VOL_ID_LENGTH);
+ request->m_hdr.h_type = OCFS2_MESSAGE_TYPE_VOTE;
+ request->m_req_node = osb->node_num;
+ request->m_request = type;
+ request->m_blkno = OCFS_I(inode)->ip_blkno;
+ request->m_generation = inode->i_generation;
+
+ /* register for the response here */
+ /* send the broadcast request here */
+ /* wait for the response here */
+
+ OCFS_ASSERT(!memcmp(response->r_hdr.h_uuid, request->m_hdr.h_uuid,
+ MAX_VOL_ID_LENGTH));
+ OCFS_ASSERT(response->r_hdr.h_type == OCFS2_MESSAGE_TYPE_RESPONSE);
+ OCFS_ASSERT(response->r_request == request->m_request);
+ OCFS_ASSERT(response->r_blkno == request->m_blkno);
+ OCFS_ASSERT(response->r_generation == request->m_generation);
+
+ status = response->r_response;
+bail:
+ if (request)
+ kfree(request);
+ if (response)
+ kfree(response);
+
+ return status;
+}
+
+static int ocfs2_request_vote(struct inode *inode,
+ enum ocfs2_vote_request type)
+{
+ int status;
+
+ if (ocfs_inode_is_new(inode))
+ return 0;
+
+ status = -EAGAIN;
+ while (status == -EAGAIN) {
+ if (signal_pending(current))
+ return -EINTR;
+
+ status = ocfs2_do_request_vote(OCFS2_SB(inode->i_sb),
+ inode, type);
+ }
+ return status;
+}
+
+int ocfs2_request_delete_vote(struct inode *inode)
+{
+ return ocfs2_request_vote(inode, OCFS2_VOTE_REQ_DELETE);
+}
+
+int ocfs2_request_unlink_vote(struct inode *inode)
+{
+ return ocfs2_request_vote(inode, OCFS2_VOTE_REQ_UNLINK);
+}
+
+int ocfs2_request_rename_vote(struct inode *inode)
+{
+ return ocfs2_request_vote(inode, OCFS2_VOTE_REQ_RENAME);
+}
Modified: branches/dlm-glue/src/dlmglue.h
===================================================================
--- branches/dlm-glue/src/dlmglue.h 2004-10-26 23:32:13 UTC (rev 1595)
+++ branches/dlm-glue/src/dlmglue.h 2004-10-28 02:14:29 UTC (rev 1596)
@@ -27,56 +27,6 @@
#ifndef MIDDLE_H
#define MIDDLE_H
-struct _ocfs2_lock_res;
-
-enum ocfs2_ast_action {
- OCFS2_AST_INVALID = 0,
- OCFS2_AST_ATTACH,
- OCFS2_AST_CONVERT
-};
-
-typedef struct _ocfs2_lock {
- struct _ocfs2_lock_res *l_lockres;
- int l_flags;
- char *l_name;
- dlm_lock_type l_level;
- unsigned int l_ro_holders;
- unsigned int l_ex_holders;
- dlm_lockstatus l_lksb;
- u32 l_local_seq;
-
- /* used from AST/BAST funcs. */
- ocfs2_ast_action l_action;
- dlm_lock_type l_requested;
- dlm_lock_type l_blocking;
-} ocfs2_lock;
-
-#define OCFS2_LOCK_ATTACHED (0x00000001) /* have we initialized
- * the lvb */
-#define OCFS2_LOCK_BUSY (0x00000002) /* we are currently in
- * dlm_lock */
-#define OCFS2_LOCK_BLOCKED (0x00000004) /* blocked waiting to
- * downconvert*/
-#define OCFS2_LOCK_NEEDS_REFRESH (0x00000008)
-#define OCFS2_LOCK_REFRESHING (0x00000010)
-
-
-typedef struct _ocfs2_lock_res {
- /* do i need this or can't i just use container_of? */
- struct inode *lr_inode;
- /* actually, should probably just use ip_lock. */
- spinlock_t lr_lock;
-
- ocfs2_lock lr_meta;
- ocfs2_lock lr_data;
-// ocfs2_lock lr_file;
-
- /* should i just make these two a single wait queue? */
- wait_queue_head_t lr_busy;
- wait_queue_head_t lr_blocked;
- wait_queue_head_t lr_refreshing;
-} ocfs2_lock_res;
-
/*
* LVB Sequence number rules:
* local seq and lvb seq are initialized to zero.
@@ -128,20 +78,30 @@
} ocfs2_meta_lvb;
int ocfs2_dlm_init(ocfs_super *osb);
+void ocfs2_dlm_shutdown(ocfs_super *osb);
int ocfs2_lock_res_init(struct inode *inode,
ocfs2_lock_res *res);
void ocfs2_lock_res_free(ocfs2_lock_res *res);
+int ocfs2_create_new_inode_locks(struct inode *inode);
int ocfs2_drop_inode_locks(struct inode *inode);
int ocfs2_data_lock(struct inode *inode,
int write);
-int ocfs2_data_unlock(struct inode *inode,
- int write);
+void ocfs2_data_unlock(struct inode *inode,
+ int write);
int ocfs2_meta_lock(struct inode *inode,
ocfs_journal_handle *handle,
struct buffer_head **ret_bh,
int ex);
-int ocfs2_meta_unlock(struct inode *inode,
- int ex);
+void ocfs2_meta_unlock(struct inode *inode,
+ int ex);
+static inline void ocfs2_kick_vote_thread(ocfs_super *osb)
+{
+ atomic_set(&osb->wake_vote_task, 1);
+ wake_up(&osb->vote_event);
+}
+int ocfs2_request_delete_vote(struct inode *inode);
+int ocfs2_request_unlink_vote(struct inode *inode);
+int ocfs2_request_rename_vote(struct inode *inode);
static inline void ocfs2_lvb_set_trunc_clusters(struct inode *inode,
unsigned int trunc_clusters)
@@ -153,10 +113,48 @@
spin_lock(&lockres->lr_lock);
OCFS_ASSERT(lock->l_level == LKM_EXMODE);
- lvb = lock->l_lksb.lvb;
+ lvb = (ocfs2_meta_lvb *) lock->l_lksb.lvb;
if (lvb->lvb_trunc_clusters > trunc_clusters)
lvb->lvb_trunc_clusters = trunc_clusters;
spin_unlock(&lockres->lr_lock);
}
+typedef struct _ocfs2_msg_hdr
+{
+ u8 h_uuid[MAX_VOL_ID_LENGTH];
+#define OCFS2_MESSAGE_TYPE_VOTE (0x1)
+#define OCFS2_MESSAGE_TYPE_RESPONSE (0x2)
+ u32 h_type;
+} ocfs2_msg_hdr;
+
+typedef struct _ocfs2_vote_msg
+{
+ ocfs2_msg_hdr m_hdr;
+ u32 m_request;
+ u32 m_req_node;
+ u64 m_blkno;
+ u32 m_generation;
+} ocfs2_vote_msg;
+
+typedef struct _ocfs2_response_msg
+{
+ ocfs2_msg_hdr r_hdr;
+ u32 r_request;
+ u64 r_blkno;
+ u32 r_generation;
+ s32 r_response; /* this maps to '0' or a -value in errno.h */
+} ocfs2_response_msg;
+
+typedef struct _ocfs2_vote_work {
+ struct list_head w_list;
+ ocfs2_vote_msg w_msg;
+} ocfs2_vote_work;
+
+enum ocfs2_vote_request {
+ OCFS2_VOTE_REQ_INVALID = 0,
+ OCFS2_VOTE_REQ_DELETE,
+ OCFS2_VOTE_REQ_UNLINK,
+ OCFS2_VOTE_REQ_RENAME
+};
+
#endif
Modified: branches/dlm-glue/src/file.c
===================================================================
--- branches/dlm-glue/src/file.c 2004-10-26 23:32:13 UTC (rev 1595)
+++ branches/dlm-glue/src/file.c 2004-10-28 02:14:29 UTC (rev 1596)
@@ -38,7 +38,7 @@
#include "alloc.h"
#include "dir.h"
-#include "dlm.h"
+#include "dlmglue.h"
#include "extent_map.h"
#include "file.h"
#include "sysfile.h"
@@ -117,7 +117,7 @@
file->f_dentry->d_name.len,
file->f_dentry->d_name.name);
- status = -EACCESS;
+ status = -EACCES;
if (osb->osb_flags & OCFS_OSB_FLAGS_SHUTDOWN) {
LOG_ERROR_STR ("Volume has been shutdown");
goto leave;
@@ -416,9 +416,7 @@
ocfs2_clusters_to_bytes(inode->i_sb,
OCFS_I(inode)->ip_clusters),
newsize);
- status = ocfs2_meta_unlock(inode, 0);
- if (status < 0)
- LOG_ERROR_STATUS(status);
+ ocfs2_meta_unlock(inode, 0);
status = ocfs_extend_file(osb, inode, newsize);
if (status < 0) {
@@ -446,9 +444,7 @@
LOG_ERROR_STATUS(status);
ret = status;
- status = ocfs2_meta_unlock(inode, 0);
- if (status < 0)
- LOG_ERROR_STATUS(status);
+ ocfs2_meta_unlock(inode, 0);
goto bail;
}
}
@@ -480,11 +476,9 @@
ret = generic_file_write_nolock (filp, buf, count, ppos);
#endif
up_read(&OCFS_I(inode)->ip_alloc_sem);
- if (!do_direct_io) {
- status = ocfs2_data_unlock(inode, 1);
- if (status < 0)
- LOG_ERROR_STATUS(status);
- }
+ if (!do_direct_io)
+ ocfs2_data_unlock(inode, 1);
+
if (writingAtEOF) {
LOG_TRACE_STR
("Generic_file_write ok, asking for OIN update now");
@@ -506,9 +500,8 @@
LOG_ERROR_ARGS("Unable to pre-zero extension of inode (%d)", status);
}
}
- status = ocfs2_meta_unlock(inode, 0);
- if (status < 0)
- LOG_ERROR_STATUS(status);
+ ocfs2_meta_unlock(inode, 0);
+
bail:
if (have_i_sem)
up(&inode->i_sem);
@@ -614,19 +607,9 @@
if (ret == -EINVAL)
LOG_ERROR_STR ("Generic_file_read returned -EINVAL");
- if (!do_direct_io) {
- status = ocfs2_data_unlock(inode, 0);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- /* is this ret code correct? */
- ret = status;
- goto bail;
- }
- }
-
- status = ocfs2_meta_unlock(inode, 0);
- if (status < 0)
- LOG_ERROR_STATUS(status);
+ if (!do_direct_io)
+ ocfs2_data_unlock(inode, 0);
+ ocfs2_meta_unlock(inode, 0);
bail:
LOG_EXIT_INT (ret);
@@ -799,11 +782,7 @@
LOG_ERROR_STATUS(status);
goto bail;
}
- status = ocfs2_data_unlock(inode, 1);
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto bail;
- }
+ ocfs2_data_unlock(inode, 1);
/* alright, we're going to need to do a full blown alloc size
* change. Orphan the inode so that recovery can complete the
Modified: branches/dlm-glue/src/heartbeat.c
===================================================================
--- branches/dlm-glue/src/heartbeat.c 2004-10-26 23:32:13 UTC (rev 1595)
+++ branches/dlm-glue/src/heartbeat.c 2004-10-28 02:14:29 UTC (rev 1596)
@@ -3,7 +3,8 @@
*
* heartbeat.c
*
- * Keeps track of alive nodes in the cluster.
+ * Register ourselves with the heartbaet service, keep our node maps
+ * up to date, and fire off recover when needed.
*
* Copyright (C) 2002, 2004 Oracle. All rights reserved.
*
@@ -37,8 +38,6 @@
#include "alloc.h"
#include "heartbeat.h"
#include "util.h"
-#include "volcfg.h"
-#include "vote.h"
#include "ocfs_journal.h"
#include "buffer_head_io.h"
@@ -46,311 +45,209 @@
/* Tracing */
#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_HEARTBEAT
-/*
- * ocfs_nm_heart_beat()
- *
- * @osb: ocfs super block for the volume
- * @flag: type of heart beat
- * @read_publish: if the publish sector needs to be re-read
- *
- * Updates the timestamp in the nodes publish sector.
- * NOTE: must be called while holding publish_lock!
- *
- * Returns 0 if success, < 0 if error.
- */
-int ocfs_nm_heart_beat (ocfs_super * osb, __u32 flag, int read_publish)
+static void ocfs_node_map_init(ocfs_super *osb, ocfs_node_map *map);
+
+void ocfs2_init_node_maps(ocfs_super *osb)
{
- int status = 0;
- ocfs_publish *publish = NULL;
- int publish_idx = OCFS_VOLCFG_NEWCFG_SECTORS + osb->node_num;
- struct buffer_head **pub_bh = &osb->autoconfig_bhs[publish_idx];
+ spin_lock_init(&osb->node_map_lock);
+ ocfs_node_map_init(osb, &osb->node_map);
+ ocfs_node_map_init(osb, &osb->recovery_map);
+}
- LOG_ENTRY_ARGS ("(0x%p, %u, %s)\n", osb, flag,
- read_publish ? "true" : "false");
+/* Most functions here are just stubs for now... */
+int ocfs2_register_hb_callbacks(ocfs_super *osb)
+{
+ ocfs_node_map_set_bit(&osb->node_map, osb->node_num);
+ return 0;
+}
- if (flag & HEARTBEAT_METHOD_DISK) {
- if (pub_bh == NULL && !read_publish)
- BUG();
+void ocfs2_clear_hb_callbacks(ocfs_super *osb)
+{
- if (read_publish) {
- status = ocfs_read_block(osb,
- (osb->publish_blkno + osb->node_num),
- pub_bh, 0, NULL);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto finally;
- }
- }
+}
- publish = (ocfs_publish *) (*pub_bh)->b_data;
- if ((publish->dirty) && (!osb->publish_dirty)) {
- LOG_TRACE_STR(("NMThread reads the bit as dirty"));
- publish->dirty = 0;
- }
- /* Write the current time in local node's publish sector */
- publish->time = jiffies;
- /* Dissallow 0 */
- if (!publish->time)
- publish->time = 1;
- spin_lock (&OcfsGlobalCtxt.comm_seq_lock);
- publish->comm_seq_num = OcfsGlobalCtxt.comm_seq_num;
- spin_unlock (&OcfsGlobalCtxt.comm_seq_lock);
+#warning these node map functions need to be spin locked
+/* special case -1 for now
+ * TODO: should *really* make sure the calling func never passes -1!! */
+static void ocfs_node_map_init(ocfs_super *osb, ocfs_node_map *map)
+{
+ spin_lock(&osb->node_map_lock);
+ map->num_nodes = osb->max_nodes;
+ memset(map->map, 0, BITS_TO_LONGS(OCFS_NODE_MAP_MAX_NODES) *
+ sizeof(unsigned long));
+ spin_unlock(&osb->node_map_lock);
+}
- status = ocfs_write_block(osb, *pub_bh, NULL);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto finally;
- }
+void ocfs_node_map_set_bit(ocfs_node_map *map, int bit)
+{
+ if (bit==-1)
+ return;
+ OCFS_ASSERT(bit < map->num_nodes);
+ set_bit(bit, map->map);
+}
+
+void ocfs_node_map_clear_bit(ocfs_node_map *map, int bit)
+{
+ if (bit==-1)
+ return;
+ OCFS_ASSERT(bit < map->num_nodes);
+ clear_bit(bit, map->map);
+}
+
+#if 0
+// clear all the bits in "target" which are set in "mask"
+static void __ocfs_node_map_clear_bits(ocfs_node_map *target,
+ ocfs_node_map *mask)
+{
+ int bit, prev=0;
+ while (1) {
+ bit = find_next_bit (mask->map, mask->num_nodes, prev);
+ if (bit >= mask->num_nodes)
+ break;
+ ocfs_node_map_clear_bit(target, bit);
+ prev = bit+1;
}
+}
- if (flag & HEARTBEAT_METHOD_IPC) {
- /* Plug this in later... */
+// set all the bits in "target" which are set in "mask"
+void __ocfs_node_map_set_bits(ocfs_node_map *target,
+ ocfs_node_map *mask)
+{
+ int bit, prev=0;
+ while (1) {
+ bit = find_next_bit (mask->map, mask->num_nodes, prev);
+ if (bit >= mask->num_nodes)
+ break;
+ ocfs_node_map_set_bit(target, bit);
+ prev = bit+1;
}
+}
+#endif
+int ocfs_node_map_test_bit(ocfs_node_map *map, int bit)
+{
+ if (bit >= map->num_nodes) {
+ LOG_ERROR_ARGS("bit=%d map->num_nodes=%d\n", bit, map->num_nodes);
+ BUG();
+ }
+ return test_bit(bit, map->map);
+}
-finally:
- LOG_EXIT_STATUS (status);
- return status;
-} /* ocfs_nm_heart_beat */
-
-
-/*
- * ocfs_update_publish_map()
- *
- * @osb: ocfs super block for the volume
- * @buffer: publish sectors read in the last round
- * @first_time: if true, the buffer needs to be initialized
- *
- * Reads the publish sectors and compares the timestamp of each node
- * to the one it read in the last round. As long as the timestamp keeps
- * changing, the node is marked alive. Conversely, if the timestamp does
- * not change over time, the node is marked dead. The function marks all
- * the live nodes in the publishmap.
- *
- */
-void ocfs_update_publish_map (ocfs_super * osb, struct buffer_head *bhs[], int first_time)
+static int ocfs_node_map_stringify(ocfs_node_map *map, char **str)
{
- ocfs_publish *publish;
- ocfs_vol_node_map *node_map;
- int i;
- __u16 num_nodes;
+ int i, n;
+ char *s;
- LOG_ENTRY_ARGS ("(0x%p, 0x%p, %u)\n", osb, bhs, first_time);
+ OCFS_ASSERT(map->num_nodes > 0);
- num_nodes = osb->max_nodes;
- node_map = osb->vol_node_map;
+ *str = kmalloc( strlen("123 ") * map->num_nodes, GFP_KERNEL);
+ if (!(*str))
+ return -ENOMEM;
- /* First time thru, update buffer with timestamps for all nodes */
- if (first_time) {
- /* Read the last comm_seq_num */
- publish = (ocfs_publish *) bhs[osb->node_num]->b_data;
- spin_lock (&OcfsGlobalCtxt.comm_seq_lock);
- OcfsGlobalCtxt.comm_seq_num = publish->comm_seq_num + 10;
- spin_unlock (&OcfsGlobalCtxt.comm_seq_lock);
+ memset(*str, 0, strlen("123 ") * map->num_nodes);
- /* Refresh local buffers */
- for (i = 0; i < num_nodes; i++) {
- publish = (ocfs_publish *) bhs[i]->b_data;
- node_map[i].time = publish->time;
- if (publish->mounted && i != osb->node_num) {
- printk("ocfs2: Adding %s (node %d) to "
- "clustered device (%u,%u)\n",
- osb->node_cfg_info[i]->node_name, i,
- MAJOR(osb->sb->s_dev),
- MINOR(osb->sb->s_dev));
- node_map[i].miss_cnt = 0;
- ocfs_publish_map_set(&osb->publ_map, i);
+ s = *str;
+ for (i=0; i<map->num_nodes; i++) {
+ if (ocfs_node_map_test_bit(map, i)) {
+ n = sprintf(s, "%3d ", i);
+ if (n != strlen("123 ")) {
+ kfree(*str);
+ return -ENOMEM;
}
+ s += n;
}
- goto bail; /* exit */
}
+ return 0;
+}
- for (i = 0; i < num_nodes; i++) {
- publish = (ocfs_publish *) bhs[i]->b_data;
+int ocfs_node_map_is_empty(ocfs_node_map *map)
+{
+ int bit;
+ OCFS_ASSERT(map->num_nodes > 0);
+ bit = find_next_bit(map->map, map->num_nodes, 0);
+ if (bit < map->num_nodes)
+ return 0;
+ return 1;
+}
- /* Loop if slot is unused */
- if (publish->time == 0ULL)
- continue;
+int ocfs_node_map_is_equal(ocfs_node_map *map1, ocfs_node_map *map2)
+{
+ int num_longs, i;
- /* Check if the node is hung or not by comparing the disk */
- /* and memory timestamp values */
- if (node_map[i].time == publish->time) {
- if (ocfs_node_is_alive(&osb->publ_map, i)) {
- char *err_msg = NULL;
- if (atomic_read (&(node_map[i].dismount))) {
- node_map[i].miss_cnt = MISS_COUNT_NODE_DEAD;
- atomic_set (&(node_map[i].dismount), 0);
- ocfs_publish_map_clear(&osb->publ_map, i);
- err_msg = "Received dismount message. Removing %s "
- "(node %d) from clustered device (%u,%u).\n";
- } else {
- (node_map[i].miss_cnt)++;
-
- if (node_map[i].miss_cnt == MISS_COUNT_WARNING)
- err_msg = "warning: %s (node %d) may be ejected from cluster "
- "on device (%u.%u)... %d misses so far\n";
- else if (node_map[i].miss_cnt == MISS_COUNT_EMERGENCY)
- err_msg = "warning: %s (node %d) WILL BE EJECTED from cluster "
- "on device (%u.%u)... %d misses so far\n";
- else if (node_map[i].miss_cnt >= MISS_COUNT_NODE_DEAD)
- err_msg = "Removing %s (node %d) from clustered device "
- "(%u,%u) after %d misses\n";
- }
-
- if (err_msg)
- LOG_ERROR_ARGS(err_msg, osb->node_cfg_info[i]->node_name, i,
- MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev),
- node_map[i].miss_cnt);
- if (node_map[i].miss_cnt >= MISS_COUNT_NODE_DEAD) {
- ocfs_recovery_map_set(osb, i);
- ocfs_publish_map_clear(&osb->publ_map, i);
-
- /* Ok, we'd better recover him now...*/
- ocfs_recovery_thread(osb, i);
- }
- }
- } else {
- if (!ocfs_node_is_alive(&osb->publ_map, i) &&
- (osb->node_num != i))
- printk ("ocfs2: Adding %s (node %d) to clustered device (%u,%u)\n",
- osb->node_cfg_info[i]->node_name, i,
- MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
- node_map[i].miss_cnt = 0;
- node_map[i].time = publish->time;
- ocfs_publish_map_set(&osb->publ_map, i);
-
- }
+ OCFS_ASSERT(map1->num_nodes == map2->num_nodes);
+ OCFS_ASSERT(map1->num_nodes > 0);
+
+ num_longs = BITS_TO_LONGS(map1->num_nodes);
+ for (i=0; i<num_longs; i++) {
+ if (map1->map[i] != map2->map[i])
+ return 0;
}
+ return 1;
+}
-bail:
- LOG_EXIT ();
- return;
-} /* ocfs_update_publish_map */
+void ocfs_node_map_and(ocfs_node_map *target, ocfs_node_map *mask)
+{
+ int num_longs, i;
+ OCFS_ASSERT(target->num_nodes == mask->num_nodes);
+ OCFS_ASSERT(target->num_nodes > 0);
+
+ num_longs = BITS_TO_LONGS(target->num_nodes);
+ for (i=0; i<num_longs; i++)
+ target->map[i] &= mask->map[i];
+}
-/* half a second timeout */
-#define OCFS_HEARTBEAT_JIFFIES (HZ >> 1)
-
-/*
- * ocfs_heartbeat_thread()
- *
- * This function is executed as a kernel thread for each mounted ocfs volume.
- */
-int ocfs_heartbeat_thread (void *arg)
+void ocfs_node_map_set(ocfs_node_map *target, ocfs_node_map *from)
{
- ocfs_super *osb;
- char proc[16];
- int status = 0;
- __u8 *buffer = NULL;
- ocfs_publish *publish;
- unsigned long j;
- __u16 num_nodes = 0;
- ocfs_node_config_hdr *node_cfg_hdr = NULL;
- __u64 cfg_seq_num;
- struct buffer_head *bh = NULL;
- siginfo_t info;
+ int num_longs, i;
- LOG_ENTRY ();
+ OCFS_ASSERT(target->num_nodes == from->num_nodes);
+ OCFS_ASSERT(target->num_nodes > 0);
- osb = arg;
+ num_longs = BITS_TO_LONGS(target->num_nodes);
+ for (i=0; i<num_longs; i++)
+ target->map[i] = from->map[i];
+}
- sprintf (proc, "ocfs2nm-%d", osb->osb_id);
- ocfs_daemonize (proc, strlen(proc), 1);
+void ocfs_node_map_dup(ocfs_super *osb, ocfs_node_map *target, ocfs_node_map *from)
+{
+ OCFS_ASSERT(from->num_nodes > 0);
+ ocfs_node_map_init(osb, target);
+ ocfs_node_map_set(target, from);
+}
- osb->dlm_task = current;
+/* returns 1 if bit is the only bit set in target, 0 otherwise */
+int ocfs_node_map_is_only(ocfs_super *osb, ocfs_node_map *target, int bit)
+{
+ ocfs_node_map temp;
+ int ret;
- osb->hbt = OCFS_HEARTBEAT_JIFFIES + jiffies;
- /* The delay changes based on multiplier */
- while (!(OcfsGlobalCtxt.flags & OCFS_FLAG_SHUTDOWN_VOL_THREAD) &&
- !(osb->osb_flags & OCFS_OSB_FLAGS_BEING_DISMOUNTED)) {
+ ocfs_node_map_dup(osb, &temp, target);
+ ocfs_node_map_clear_bit(&temp, bit);
+ ret = ocfs_node_map_is_empty(&temp);
+ return ret;
+}
- buffer = NULL;
+/* update the recovery map here */
+void ocfs_recovery_map_set(ocfs_super *osb, int num)
+{
+ spin_lock(&osb->node_map_lock);
+ ocfs_node_map_set_bit(&osb->recovery_map, num);
+ spin_unlock(&osb->node_map_lock);
+}
- if (!time_after (jiffies, (unsigned long) (osb->hbt)))
- goto again;
+void ocfs_recovery_map_clear(ocfs_super *osb, int num)
+{
+ spin_lock(&osb->node_map_lock);
+ ocfs_node_map_clear_bit(&osb->recovery_map, num);
+ spin_unlock(&osb->node_map_lock);
+}
- /* lock publish to prevent overwrites from vote_req and vote_reset */
- down (&(osb->publish_lock));
+#if 0
+ if (node_map[i].miss_cnt >= MISS_COUNT_NODE_DEAD) {
+ ocfs_recovery_map_set(osb, i);
+ ocfs_publish_map_clear(&osb->publ_map, i);
- /* Read disk for 4 autoconfig blocks + all nodes publish blocks */
- status = ocfs_read_blocks(osb,
- osb->new_autoconfig_blkno,
- osb->total_autoconfig_blocks,
- osb->autoconfig_bhs, 0, NULL);
- if (status < 0) {
- up (&(osb->publish_lock));
- LOG_ERROR_STATUS (status);
- BUG();
- }
-
- bh = osb->autoconfig_bhs[OCFS_VOLCFG_NEWCFG_SECTORS + osb->node_num];
- publish = (ocfs_publish *) bh->b_data;
- if ((osb->check_mounted) && (publish->mounted == 0)) {
- printk("ocfs2: Heartbeat timed out, volume has been "
- "recovered from another node!\n");
-
- BUG();
- }
- bh = NULL;
-
- ocfs_nm_heart_beat (osb, HEARTBEAT_METHOD_DISK, 0);
-
- /* release publish lock */
- up (&(osb->publish_lock));
-
- /* If another node was added to the config read and update the cfg */
- node_cfg_hdr =
- (ocfs_node_config_hdr *) osb->autoconfig_bhs[1]->b_data;
- num_nodes = node_cfg_hdr->num_nodes;
- cfg_seq_num = node_cfg_hdr->cfg_seq_num;
-
- if ((osb->cfg_seq_num != cfg_seq_num) ||
- (osb->num_cfg_nodes != num_nodes)) {
- down (&(osb->cfg_lock));
- status = ocfs_chk_update_config (osb);
- up (&(osb->cfg_lock));
- if (status < 0)
- LOG_ERROR_STATUS (status);
- }
-
- num_nodes = osb->max_nodes;
-
- /* Refresh the publish map */
- ocfs_update_publish_map (osb, &(osb->autoconfig_bhs[OCFS_VOLCFG_NEWCFG_SECTORS]), 0);
-
- /* send signal to mount thread to continue */
- if (atomic_read (&osb->nm_init) < OCFS_HEARTBEAT_INIT) {
- atomic_inc (&osb->nm_init);
- } else if (atomic_read(&osb->nm_init) == OCFS_HEARTBEAT_INIT) {
- wake_up (&osb->nm_init_event);
- atomic_inc (&osb->nm_init);
- }
-
- osb->hbt = OCFS_HEARTBEAT_JIFFIES + jiffies;
-
-again:
- status = 0;
-
- if ((OcfsGlobalCtxt.flags & OCFS_FLAG_SHUTDOWN_VOL_THREAD) ||
- (osb->osb_flags & OCFS_OSB_FLAGS_BEING_DISMOUNTED))
- break;
- j = jiffies;
- if (time_after (j, (unsigned long) (osb->hbt))) {
- osb->hbt = OCFS_HEARTBEAT_JIFFIES + j;
- }
- set_current_state (TASK_INTERRUPTIBLE);
- schedule_timeout (osb->hbt - j);
-
- /* ignore the actual signal */
- if (signal_pending(current)) {
- dequeue_signal_lock(current, ¤t->blocked, &info);
- }
- }
-
- /* Flush all scheduled tasks */
- flush_scheduled_work();
-
- complete (&(osb->dlm_complete));
-
- LOG_EXIT_INT (0);
- return 0;
-} /* ocfs_heartbeat_thread */
+ /* Ok, we'd better recover him now...*/
+ ocfs_recovery_thread(osb, i);
+ }
+#endif
Modified: branches/dlm-glue/src/heartbeat.h
===================================================================
--- branches/dlm-glue/src/heartbeat.h 2004-10-26 23:32:13 UTC (rev 1595)
+++ branches/dlm-glue/src/heartbeat.h 2004-10-28 02:14:29 UTC (rev 1596)
@@ -26,9 +26,22 @@
#ifndef OCFS2_HEARTBEAT_H
#define OCFS2_HEARTBEAT_H
-int ocfs_nm_heart_beat(ocfs_super *osb, __u32 flag, int read_publish);
-void ocfs_update_publish_map(ocfs_super *osb, struct buffer_head *bhs[],
- int first_time);
-int ocfs_heartbeat_thread(void *arg);
+void ocfs2_init_node_maps(ocfs_super *osb);
+int ocfs2_register_hb_callbacks(ocfs_super *osb);
+void ocfs2_clear_hb_callbacks(ocfs_super *osb);
+
+void ocfs_node_map_set_bit(ocfs_node_map *map, int bit);
+void ocfs_node_map_clear_bit(ocfs_node_map *map, int bit);
+int ocfs_node_map_test_bit(ocfs_node_map *map, int bit);
+int ocfs_node_map_is_empty(ocfs_node_map *map);
+int ocfs_node_map_is_equal(ocfs_node_map *map1, ocfs_node_map *map2);
+void ocfs_node_map_and(ocfs_node_map *target, ocfs_node_map *mask);
+void ocfs_node_map_set(ocfs_node_map *target, ocfs_node_map *from);
+void ocfs_node_map_dup(ocfs_super *osb, ocfs_node_map *target, ocfs_node_map *from);
+/* returns 1 if bit is the only bit set in target, 0 otherwise */
+int ocfs_node_map_is_only(ocfs_super *osb, ocfs_node_map *target, int bit);
+void ocfs_recovery_map_set(ocfs_super *osb, int num);
+void ocfs_recovery_map_clear(ocfs_super *osb, int num);
+
#endif /* OCFS2_HEARTBEAT_H */
Modified: branches/dlm-glue/src/inode.c
===================================================================
--- branches/dlm-glue/src/inode.c 2004-10-26 23:32:13 UTC (rev 1595)
+++ branches/dlm-glue/src/inode.c 2004-10-28 02:14:29 UTC (rev 1596)
@@ -39,25 +39,22 @@
#include "ocfs2.h"
#include "alloc.h"
-#include "dlm.h"
+#include "dlmglue.h"
#include "extent_map.h"
#include "file.h"
#include "inode.h"
-#include "lockres.h"
#include "namei.h"
#include "suballoc.h"
#include "super.h"
#include "symlink.h"
#include "sysfile.h"
#include "util.h"
-#include "vote.h"
#include "ocfs_journal.h"
#include "buffer_head_io.h"
#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_INODE
-extern struct semaphore recovery_list_sem;
extern struct address_space_operations ocfs_aops;
typedef struct _ocfs_find_inode_args
@@ -306,11 +303,11 @@
memset(i, 0, sizeof(ocfs_inode_private));
i->ip_flags = 0;
+ INIT_LIST_HEAD(&i->ip_blocked_list);
atomic_set(&i->ip_clean_buffer_seq, 0);
i->ip_open_cnt = 0;
spin_lock_init(&i->ip_lock);
ocfs2_extent_map_init(inode);
- INIT_LIST_HEAD(&i->ip_recovery_list);
INIT_LIST_HEAD(&i->ip_handle_list);
i->ip_handle = NULL;
i->ip_next_orphan = NULL;
@@ -506,12 +503,6 @@
if (sysfile)
OCFS_SET_FLAG(OCFS_I(inode)->ip_flags, OCFS_INODE_SYSTEM_FILE);
- ocfs_init_lockres (osb, inode);
- status = ocfs_update_lockres(osb, bh, inode, 0);
- if (status < 0) {
- make_bad_inode(inode);
- goto bail;
- }
status = 0;
bail:
@@ -633,14 +624,19 @@
goto bail;
}
- /* acquire_lock and friends will igrab / iput this guy, so we
+ /* ocfs2_meta_lock and friends might igrab / iput this guy, so we
* take an extra ref. to avoid recursive calls to
* delete_inode. */
atomic_inc(&inode->i_count);
- status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE, FLAG_FILE_DELETE,
- &fe_bh, inode);
+ status = ocfs2_meta_lock(inode, NULL, &fe_bh, 1);
atomic_set(&inode->i_count, 0);
if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
+ status = ocfs2_request_delete_vote(inode);
+ if (status < 0) {
/* EBUSY here is assumed to mean that other nodes are
* still using the inode. We're done here though, so
* avoid doing anything on disk and let them worry
@@ -782,6 +778,7 @@
*/
void ocfs_clear_inode (struct inode *inode)
{
+ int status;
ocfs_super *osb;
LOG_SET_CONTEXT(CLEAR_INODE);
@@ -820,10 +817,6 @@
if (status < 0)
LOG_ERROR_STATUS(status);
- down(&recovery_list_sem);
- list_del(&OCFS_I(inode)->ip_recovery_list);
- up(&recovery_list_sem);
-
ocfs2_lock_res_free(&OCFS_I(inode)->ip_lockres);
/* clean out the inode private ... why?! */
memset(inode->u.generic_ip, 0, sizeof(ocfs_inode_private));
@@ -929,16 +922,12 @@
}
spin_unlock(&OCFS_I(inode)->ip_lock);
- status = ocfs_meta_lock(inode, NULL, NULL, 0);
+ status = ocfs2_meta_lock(inode, NULL, NULL, 0);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto bail;
}
-
- status = ocfs_meta_unlock(inode, 0);
- if (status < 0)
- LOG_ERROR_STATUS(status);
-
+ ocfs2_meta_unlock(inode, 0);
bail:
LOG_EXIT_STATUS(status);
Modified: branches/dlm-glue/src/journal.c
===================================================================
--- branches/dlm-glue/src/journal.c 2004-10-26 23:32:13 UTC (rev 1595)
+++ branches/dlm-glue/src/journal.c 2004-10-28 02:14:29 UTC (rev 1596)
@@ -35,17 +35,15 @@
#include "ocfs2.h"
#include "alloc.h"
-#include "dlm.h"
+#include "dlmglue.h"
#include "extent_map.h"
+#include "heartbeat.h"
#include "inode.h"
#include "journal.h"
#include "localalloc.h"
-#include "lockres.h"
#include "namei.h"
-#include "nm.h"
#include "super.h"
#include "util.h"
-#include "vote.h"
#include "sysfile.h"
#include "ocfs_journal.h"
@@ -55,7 +53,6 @@
spinlock_t trans_inc_lock = SPIN_LOCK_UNLOCKED;
-static int ocfs_reset_publish (ocfs_super * osb, __u64 node_num);
static int ocfs_force_read_journal(struct inode *inode);
static int ocfs_recover_node(struct _ocfs_super *osb, int node_num);
static int __ocfs_recovery_thread(void *arg);
@@ -64,6 +61,8 @@
static void ocfs_handle_move_locks(ocfs_journal *journal,
ocfs_journal_handle *handle);
static void ocfs_commit_unstarted_handle(ocfs_journal_handle *handle);
+static int ocfs_journal_toggle_dirty(ocfs_super *osb,
+ int dirty);
/*
* ocfs_commit_cache()
@@ -81,7 +80,6 @@
ocfs_journal_lock *lock = NULL;
struct inode *inode;
ocfs_inode_private *ip;
- ocfs_lock_res *lockres;
LOG_ENTRY();
@@ -183,18 +181,14 @@
#endif
tmpstat = 0;
OCFS_ASSERT(!INODE_DELETED(lock->jl_inode));
- tmpstat = ocfs2_meta_unlock(lock->jl_inode, 1);
- if (tmpstat < 0)
- LOG_ERROR_ARGS("commit_thread: release_lock status is"
- " %d releasing lock on inode %llu!\n",
- tmpstat, OCFS_I(lock->inode)->ip_blkno);
+ ocfs2_meta_unlock(lock->jl_inode, 1);
iput(lock->jl_inode);
atomic_dec(&journal->num_chkpt_locks);
kmem_cache_free(OcfsGlobalCtxt.lock_cache, lock);
}
- kick_vote_thread(osb);
+ ocfs2_kick_vote_thread(osb);
finally:
LOG_EXIT_STATUS (status);
@@ -712,7 +706,8 @@
/*
* Setup the journal using the journal system file
*/
-int ocfs_journal_init(ocfs_super *osb)
+int ocfs_journal_init(ocfs_super *osb,
+ int *dirty)
{
int status = -1;
struct inode *inode = NULL; /* the journal inode */
@@ -804,6 +799,7 @@
INIT_LIST_HEAD(&(osb->journal->committing_inodes));
INIT_LIST_HEAD(&(osb->journal->checkpointing_locks));
+ *dirty = (fe->id1.journal1.i_flags & OCFS2_JOURNAL_DIRTY_FL);
status = 0;
done:
if (status < 0) {
@@ -820,6 +816,32 @@
return(status);
} /* ocfs_journal_init */
+static int ocfs_journal_toggle_dirty(ocfs_super *osb,
+ int dirty)
+{
+ int status;
+ ocfs_journal * journal = osb->journal;
+ struct buffer_head *bh = journal->lockbh;
+ ocfs2_dinode *fe;
+
+ LOG_ENTRY();
+
+ fe = (ocfs2_dinode *) bh->b_data;
+ OCFS_ASSERT(IS_VALID_FILE_ENTRY(fe));
+
+ if (dirty)
+ fe->id1.journal1.i_flags |= OCFS2_JOURNAL_DIRTY_FL;
+ else
+ fe->id1.journal1.i_flags &= ~OCFS2_JOURNAL_DIRTY_FL;
+
+ status = ocfs_write_block(osb, bh, journal->k_inode);
+ if (status < 0)
+ LOG_ERROR_STATUS(status);
+
+ LOG_EXIT_STATUS(status);
+ return status;
+}
+
/*
if the journal has been ocfs_malloc'd it needs to be freed after this call.
*/
@@ -873,15 +895,17 @@
OCFS_ASSERT(atomic_read(&(osb->journal->num_trans)) == 0);
+ status = ocfs_journal_toggle_dirty(osb, 0);
+ if (status < 0)
+ LOG_ERROR_STATUS(status);
+
/* Shutdown the kernel journal system */
journal_destroy(journal->k_journal);
OCFS_I(inode)->ip_open_cnt--;
/* unlock our journal */
- status = ocfs2_meta_unlock(inode, 1);
- if (status < 0)
- LOG_ERROR_STATUS (status);
+ ocfs2_meta_unlock(inode, 1);
brelse (journal->lockbh);
journal->lockbh = NULL;
@@ -900,6 +924,7 @@
{
int status = 0;
int olderr = 0;
+ int child_pid;
ocfs_super *osb;
LOG_ENTRY();
@@ -922,6 +947,34 @@
journal_clear_err(journal->k_journal);
}
+ status = ocfs_journal_toggle_dirty(osb, 1);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto done;
+ }
+
+ down(&(osb->osb_res));
+ /* Launch the commit thread */
+ osb->commit = ocfs_malloc(sizeof(ocfs_commit_task));
+ if (osb->commit == NULL) {
+ LOG_ERROR_STATUS(status = -ENOMEM);
+ up (&(osb->osb_res));
+ goto done;
+ }
+ memset(osb->commit, 0, sizeof(ocfs_commit_task));
+ child_pid = kernel_thread (ocfs_commit_thread, osb,
+ CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
+ if (child_pid < 0) {
+ LOG_ERROR_ARGS ("unable to launch ocfs2commit thread, error=%d",
+ child_pid);
+ up (&(osb->osb_res));
+ status = child_pid;
+ goto done;
+ } else {
+ init_completion (&osb->commit->c_complete);
+ }
+ up (&(osb->osb_res));
+
done:
LOG_EXIT_STATUS(status);
return(status);
@@ -940,7 +993,16 @@
BUG();
status = journal_wipe(journal->k_journal, full);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+ status = ocfs_journal_toggle_dirty(journal->osb, 0);
+ if (status < 0)
+ LOG_ERROR_STATUS(status);
+
+bail:
LOG_EXIT_STATUS(status);
return(status);
}
@@ -1148,27 +1210,26 @@
status = ocfs2_meta_lock(inode, NULL, &bh, 1);
if (status < 0) {
- LOG_TRACE_ARGS("status returned from acquire_lock=%d\n",
+ LOG_TRACE_ARGS("status returned from ocfs2_meta_lock=%d\n",
status);
if (status != -EINTR)
LOG_ERROR_STR("Could not lock journal!");
goto done;
}
got_lock = 1;
+
+ fe = (ocfs2_dinode *) bh->b_data;
- /* check if that nodes publish sector has been reset (mounted
- * is set false) if so, we can unlock and quit. otherwise we
- * should recover. */
- if (!ocfs_publish_get_mount_state(osb, node_num)) {
+ if (!(fe->id1.journal1.i_flags & OCFS2_JOURNAL_DIRTY_FL)) {
LOG_TRACE_ARGS("No recovery required for node %d\n", node_num);
status = 0;
goto clear_node;
}
+
printk("ocfs2: Recovering node %d from device (%u,%u)\n", node_num,
MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
clean_orphans = 1;
- fe = (ocfs2_dinode *) bh->b_data;
OCFS_I(inode)->ip_clusters = fe->i_clusters;
status = ocfs_force_read_journal(inode);
@@ -1203,6 +1264,12 @@
if (status < 0)
LOG_ERROR_STATUS(status);
+ /* mark the node clean. */
+ fe->id1.journal1.i_flags &= ~OCFS2_JOURNAL_DIRTY_FL;
+ status = ocfs_write_block(osb, bh, inode);
+ if (status < 0)
+ LOG_ERROR_STATUS(status);
+
if (ocfs_inc_icount(inode) < 0)
BUG();
@@ -1216,26 +1283,17 @@
goto done;
}
- /* clear the publish sector (mark it unmounted and clean) */
- status = ocfs_reset_publish(osb, node_num);
- if (status < 0)
- LOG_ERROR_STATUS(status);
-
status = 0;
clear_node:
ocfs_recovery_map_clear(osb, node_num);
- ocfs_recover_oin_locks(osb, node_num);
done:
if (recovery_lock)
up(&(osb->recovery_lock));
/* drop the lock on this nodes journal */
- if (got_lock) {
- tmpstat = ocfs2_meta_unlock(inode, 1);
- if (tmpstat < 0)
- LOG_ERROR_STATUS(tmpstat);
- }
+ if (got_lock)
+ ocfs2_meta_unlock(inode, 1);
if (inode)
iput(inode);
@@ -1268,7 +1326,6 @@
{
int status = 0;
int have_disk_lock = 0;
- int tmpstat;
struct inode *inode = NULL;
struct inode *iter;
struct inode *orphan_dir_inode = NULL;
@@ -1363,12 +1420,8 @@
}
up(&orphan_dir_inode->i_sem);
- status = ocfs2_meta_unlock(orphan_dir_inode, 0);
+ ocfs2_meta_unlock(orphan_dir_inode, 0);
have_disk_lock = 0;
- if (status < 0) {
- LOG_ERROR_STATUS(status);
- goto bail;
- }
iput(orphan_dir_inode);
orphan_dir_inode = NULL;
@@ -1382,11 +1435,8 @@
bail:
up(&osb->orphan_recovery_lock);
- if (have_disk_lock) {
- tmpstat = ocfs2_meta_unlock(orphan_dir_inode, 0);
- if (tmpstat < 0)
- LOG_ERROR_STATUS(tmpstat);
- }
+ if (have_disk_lock)
+ ocfs2_meta_unlock(orphan_dir_inode, 0);
if (orphan_dir_inode)
iput(orphan_dir_inode);
@@ -1416,57 +1466,6 @@
goto retry;
}
-/*
- * ocfs_reset_publish()
- *
- *
- * called by: old_ocfs_recover_node()
- *
- * NOTE: This function is unused. I keep it here because it may be
- * useful in the future. --Mark (Sept. 22, 2003)
- */
-static int ocfs_reset_publish (ocfs_super * osb, __u64 node_num)
-{
- int status = 0;
- ocfs_publish *publish = NULL;
- struct buffer_head *publish_bh = NULL;
-
- LOG_ENTRY_ARGS("(0x%p, %llu)\n", osb, node_num);
-
- /* take a lock on the publish sector */
- down (&(osb->publish_lock));
-
- /* Read the publish sector */
- status = ocfs_read_block(osb, (osb->publish_blkno + node_num),
- &publish_bh, 0, NULL);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto finally;
- }
-
- publish = (ocfs_publish *) publish_bh->b_data;
-
- publish->dirty = 0;
- publish->mounted = 0;
-
- /* Write the publish sector */
- status = ocfs_write_block(osb, publish_bh, NULL);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto finally;
- }
-
-finally:
- /* unlock it */
- up (&(osb->publish_lock));
-
- if (publish_bh)
- brelse(publish_bh);
-
- LOG_EXIT_STATUS (status);
- return (status);
-} /* ocfs_reset_publish */
-
int ocfs_commit_thread(void *arg)
{
int status = 0, misses = 0;
Modified: branches/dlm-glue/src/localalloc.c
===================================================================
--- branches/dlm-glue/src/localalloc.c 2004-10-26 23:32:13 UTC (rev 1595)
+++ branches/dlm-glue/src/localalloc.c 2004-10-28 02:14:29 UTC (rev 1596)
@@ -38,7 +38,7 @@
#include "alloc.h"
#include "bitmap.h"
-#include "dlm.h"
+#include "dlmglue.h"
#include "localalloc.h"
#include "suballoc.h"
#include "sysfile.h"
Deleted: branches/dlm-glue/src/lockres.c
===================================================================
--- branches/dlm-glue/src/lockres.c 2004-10-26 23:32:13 UTC (rev 1595)
+++ branches/dlm-glue/src/lockres.c 2004-10-28 02:14:29 UTC (rev 1596)
@@ -1,128 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * lockres.c
- *
- * lock resource handling
- *
- * Copyright (C) 2002, 2004 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include "ocfs_compat.h"
-
-#include <linux/fs.h>
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/highmem.h>
-
-#include "ocfs_log.h"
-#include "ocfs.h"
-
-#include "dlm.h"
-#include "lockres.h"
-#include "util.h"
-#include "vote.h"
-
-#include "buffer_head_io.h"
-
-/* Tracing */
-#define OCFS_DEBUG_CONTEXT OCFS_DEBUG_CONTEXT_LOCKRES
-
-
-/*
- * ocfs_update_lockres()
- *
- * @osb: ocfs super block for the volume
- * @fe: corresponding file entry
- *
- * the lockres is refreshed from the disk.
- *
- * Returns 0 if success, < 0 if error.
- */
-int ocfs_update_lockres(ocfs_super *osb, struct buffer_head *bh,
- struct inode *inode, int reread)
-{
- int status = 0;
- ocfs2_dinode *fe;
- int flags;
- int drop_bh = 0;
- ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
-
- LOG_ENTRY_ARGS("(0x%p, %llu, 0x%p, 0x%p)\n", osb,
- OCFS_I(inode)->ip_blkno, lockres, bh);
-
- /* hey, you can't do that! ;) */
- if ((!bh) && !reread)
- BUG();
-
- if (!bh)
- drop_bh = 1;
-
- /* Behavior for process_vote: if you don't pass a buffer, then
- * we'll only read if you're not he master. */
- if ((bh == NULL) && (lockres->master_node_num == osb->node_num))
- goto out;
-
- if (reread) {
- flags = lockres->master_node_num == osb->node_num ?
- OCFS_BH_CACHED : 0;
- status = ocfs_read_block(osb, OCFS_I(inode)->ip_blkno, &bh,
- flags, inode);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto out;
- }
- }
-
- fe = (ocfs2_dinode *) bh->b_data;
- if (!IS_VALID_FILE_ENTRY(fe))
- BUG();
-
- lockres->lock_type = DISK_LOCK(fe)->dl_level;
- lockres->master_node_num = DISK_LOCK(fe)->dl_master;
-
-out:
- if (bh && drop_bh)
- brelse(bh);
-
- LOG_EXIT_STATUS (status);
- return status;
-} /* ocfs_update_lockres */
-
-
-/*
- * ocfs_init_lockres()
- *
- */
-void ocfs_init_lockres (ocfs_super * osb, struct inode *inode)
-{
- ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
-
- LOG_ENTRY_ARGS ("(0x%p, 0x%p)\n", osb, lockres);
-
- lockres->master_node_num = OCFS_INVALID_NODE_NUM;
- lockres->lock_holders = 0;
- lockres->readonly_state = 0;
- lockres->uncommitted_holders = 0;
- lockres->lock_type = OCFS_LKM_NLMODE;
- init_rwsem(&lockres->lock);
- ocfs_node_map_init(osb, &lockres->readonly_map);
-
- LOG_EXIT ();
- return;
-} /* ocfs_init_lockres */
Deleted: branches/dlm-glue/src/lockres.h
===================================================================
--- branches/dlm-glue/src/lockres.h 2004-10-26 23:32:13 UTC (rev 1595)
+++ branches/dlm-glue/src/lockres.h 2004-10-28 02:14:29 UTC (rev 1596)
@@ -1,118 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * lockres.h
- *
- * Function prototypes
- *
- * Copyright (C) 2002, 2004 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#ifndef OCFS2_LOCKRES_H
-#define OCFS2_LOCKRES_H
-
-#include "util.h"
-
-/* lockres.c */
-
-
-/*
- * ocfs_acquire_lockres_write_timeout()
- *
- * @lockres: lockres to acquire
- * @timeout: timeout in ms, 0 == no timeout
- */
-static inline int ocfs_acquire_lockres_write_timeout (struct inode *inode, __u32 timeout)
-{
- unsigned long jif = jiffies + (timeout * HZ / 1000);
- ocfs_lock_res * lockres = GET_INODE_LOCKRES(inode);
-
- while(1) {
- if (down_write_trylock(&lockres->lock))
- return 0;
-
- if (jif < jiffies)
- return -ETIMEDOUT;
-
- ocfs_sleep (OCFS_NM_HEARTBEAT_TIME / 10);
- }
-
- return 0;
-}
-
-/*
- * ocfs_acquire_lockres_write()
- */
-static inline int ocfs_acquire_lockres_write (struct inode *inode)
-{
- ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
- down_write(&lockres->lock);
- return 0;
-}
-
-/*
- * ocfs_acquire_lockres_read_timeout()
- *
- * @lockres: lockres to acquire
- * @timeout: timeout in ms, 0 == no timeout
- */
-static inline int ocfs_acquire_lockres_read_timeout (struct inode *inode, __u32 timeout)
-{
- ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
- unsigned long jif = jiffies + (timeout * HZ / 1000);
-
- while(1) {
- if (down_read_trylock(&lockres->lock))
- return 0;
-
- if (jif < jiffies)
- return -ETIMEDOUT;
-
- ocfs_sleep (OCFS_NM_HEARTBEAT_TIME / 10);
- }
-
- return 0;
-}
-
-/*
- * ocfs_acquire_lockres_read()
- */
-static inline int ocfs_acquire_lockres_read (struct inode *inode)
-{
- ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
- down_read(&lockres->lock);
- return 0;
-}
-
-static inline void ocfs_release_lockres_write(struct inode *inode)
-{
- ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
- up_write(&lockres->lock);
-}
-static inline void ocfs_release_lockres_read(struct inode *inode)
-{
- ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
- up_read(&lockres->lock);
-}
-
-void ocfs_init_lockres(ocfs_super *osb, struct inode *inode);
-int ocfs_update_lockres(ocfs_super *osb, struct buffer_head *bh,
- struct inode *inode, int reread);
-
-
-#endif /* OCFS2_LOCKRES_H */
Modified: branches/dlm-glue/src/namei.c
===================================================================
--- branches/dlm-glue/src/namei.c 2004-10-26 23:32:13 UTC (rev 1595)
+++ branches/dlm-glue/src/namei.c 2004-10-28 02:14:29 UTC (rev 1596)
@@ -50,16 +50,14 @@
#include "alloc.h"
#include "dcache.h"
#include "dir.h"
-#include "dlm.h"
+#include "dlmglue.h"
#include "extent_map.h"
#include "file.h"
#include "sysfile.h"
#include "inode.h"
-#include "lockres.h"
#include "namei.h"
#include "suballoc.h"
#include "util.h"
-#include "vote.h"
#include "ocfs_journal.h"
#include "buffer_head_io.h"
@@ -101,10 +99,8 @@
static int ocfs_double_lock(ocfs_super *osb,
ocfs_journal_handle *handle,
- __u32 type1, __u32 flags1,
struct buffer_head **bh1,
struct inode *inode1,
- __u32 type2, __u32 flags2,
struct buffer_head **bh2,
struct inode *inode2);
@@ -389,12 +385,13 @@
BUG();
}
- file_off = fe->i_blkno << dir->i_sb->s_blocksize_bits;
ocfs_inode_set_new(osb, inode);
+ status = ocfs2_create_new_inode_locks(inode);
+ if (status < 0)
+ LOG_ERROR_STATUS(status);
- ocfs_init_lockres(osb, inode);
+ file_off = fe->i_blkno << dir->i_sb->s_blocksize_bits;
- status = ocfs_update_lockres(osb, new_fe_bh, inode, 0);
if (S_ISDIR (mode)) {
status = ocfs_fill_new_dir(osb, handle, dir, inode,
new_fe_bh, data_ac);
@@ -549,8 +546,6 @@
fe->i_last_eb_blk = 0;
strcpy (fe->i_signature, OCFS2_INODE_SIGNATURE);
fe->i_flags |= OCFS2_VALID_FL;
- DISK_LOCK(fe)->dl_master = osb->node_num;
- DISK_LOCK(fe)->dl_level = OCFS_LKM_EXMODE;
fe->i_atime = fe->i_ctime = fe->i_mtime = OCFS_CURRENT_TIME;
fe->i_dtime = 0;
@@ -800,16 +795,20 @@
if (blkno != OCFS_I(inode)->ip_blkno)
BUG();
- status = ocfs_acquire_lock (osb, OCFS_LKM_EXMODE,
- FLAG_RELEASE_DENTRY, &fe_bh, inode);
+ status = ocfs2_meta_lock(inode, handle, &fe_bh, 1);
if (status < 0) {
if (status != -EINTR)
LOG_ERROR_STATUS (status);
goto leave;
}
- ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE, FLAG_RELEASE_DENTRY,
- inode);
+ status = ocfs2_request_unlink_vote(inode);
+ if (status < 0) {
+ /* This vote should succeed under all normal circumstances. */
+ LOG_ERROR_STATUS(status);
+ goto leave;
+ }
+
if (S_ISDIR (inode->i_mode)) {
if (!ocfs_empty_dir(inode)) {
status = -ENOTEMPTY;
@@ -931,16 +930,14 @@
*/
static int ocfs_double_lock(ocfs_super *osb,
ocfs_journal_handle *handle,
- __u32 type1, __u32 flags1,
struct buffer_head **bh1,
struct inode *inode1,
- __u32 type2, __u32 flags2,
struct buffer_head **bh2,
struct inode *inode2)
{
- int status = 0;
- __u64 tmpid, id1, id2;
- __u32 tmptype, tmpflags;
+ int status;
+ ocfs_inode_private *oip1 = OCFS_I(inode1);
+ ocfs_inode_private *oip2 = OCFS_I(inode2);
struct buffer_head **tmpbh;
struct inode *tmpinode;
@@ -950,31 +947,16 @@
OCFS_ASSERT(handle);
- id1 = OCFS_I(inode1)->ip_blkno;
- id2 = OCFS_I(inode2)->ip_blkno;
-
if (*bh1)
*bh1 = NULL;
if (*bh2)
*bh2 = NULL;
/* we always want to lock the one with the lower lockid first. */
- if (id1 != id2) {
- if (id1 < id2) {
+ if (oip1->ip_blkno != oip2->ip_blkno) {
+ if (oip1->ip_blkno < oip2->ip_blkno) {
/* switch id1 and id2 around */
LOG_TRACE_STR("switching them around...");
- tmpid = id2;
- id2 = id1;
- id1 = tmpid;
-
- tmptype = type2;
- type2 = type1;
- type1 = tmptype;
-
- tmpflags = flags2;
- flags2 = flags1;
- flags1 = tmpflags;
-
tmpbh = bh2;
bh2 = bh1;
bh1 = tmpbh;
@@ -984,21 +966,18 @@
inode1 = tmpinode;
}
/* lock id2 */
- status = ocfs_acquire_lock(osb, type2, flags2, bh2, inode2);
+ status = ocfs2_meta_lock(inode2, handle, bh2, 1);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto bail;
}
- ocfs_handle_add_lock(handle, type2, flags2, inode2);
}
/* lock id1 */
- status = ocfs_acquire_lock(osb, type1, flags1,
- bh1, inode1);
+ status = ocfs2_meta_lock(inode1, handle, bh1, 1);
if (status < 0) {
LOG_ERROR_STATUS (status);
goto bail;
}
- ocfs_handle_add_lock(handle, type1, flags1, inode1);
bail:
LOG_EXIT_STATUS(status);
return(status);
@@ -1034,7 +1013,6 @@
struct buffer_head *old_inode_de_bh = NULL; // if old_dentry is a dir,
// this is the 1st dirent bh
nlink_t old_dir_nlink = old_dir->i_nlink, new_dir_nlink = new_dir->i_nlink;
- struct buffer_head *lockbh = NULL;
LOG_SET_CONTEXT(RENAME);
@@ -1066,11 +1044,7 @@
/* if old and new are the same, this'll just do one lock. */
status = ocfs_double_lock(osb, handle,
- OCFS_LKM_EXMODE,
- 0,
&old_dir_bh, old_dir,
- OCFS_LKM_EXMODE,
- 0,
&new_dir_bh, new_dir);
if (status < 0) {
LOG_ERROR_STATUS(status);
@@ -1094,20 +1068,17 @@
/* Directories actually require metadata updates to
* the directory info so we can't get away with not
* doing node locking on it. */
- status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE,
- FLAG_RELEASE_DENTRY|FLAG_FILE_RENAME,
- &lockbh, old_inode);
- if (lockbh) {
- brelse(lockbh);
- lockbh = NULL;
+ status = ocfs2_meta_lock(old_inode, handle, NULL, 1);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
}
+
+ status = ocfs2_request_rename_vote(old_inode);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto bail;
}
- ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE,
- FLAG_RELEASE_DENTRY|FLAG_FILE_RENAME,
- old_inode);
status = -EIO;
old_inode_de_bh = ocfs_bread (old_inode, 0, &status, 0);
@@ -1125,7 +1096,7 @@
} else {
/* Ah, the simple case - we're a file so just send a
* message. */
- status = ocfs_notify_on_rename(osb, old_inode);
+ status = ocfs2_request_rename_vote(old_inode);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto bail;
@@ -1176,16 +1147,18 @@
if (newfe_blkno != OCFS_I(new_inode)->ip_blkno)
BUG();
- status = ocfs_acquire_lock(osb, OCFS_LKM_EXMODE,
- FLAG_RELEASE_DENTRY, &newfe_bh,
- new_inode);
+ status = ocfs2_meta_lock(new_inode, handle, &newfe_bh, 1);
if (status < 0) {
LOG_ERROR_STATUS(status);
goto bail;
}
- ocfs_handle_add_lock(handle, OCFS_LKM_EXMODE,
- FLAG_RELEASE_DENTRY, new_inode);
+ status = ocfs2_request_unlink_vote(new_inode);
+ if (status < 0) {
+ LOG_ERROR_STATUS(status);
+ goto bail;
+ }
+
newfe = (ocfs2_dinode *) newfe_bh->b_data;
LOG_TRACE_ARGS("aha rename over existing... new_de=%p "
@@ -1609,8 +1582,7 @@
}
ocfs_inode_set_new(osb, inode);
- ocfs_init_lockres(osb, inode);
- status = ocfs_update_lockres(osb, new_fe_bh, inode, 0);
+ status = ocfs2_create_new_inode_locks(inode);
if (status < 0)
LOG_ERROR_STATUS(status);
Modified: branches/dlm-glue/src/ocfs.h
===================================================================
--- branches/dlm-glue/src/ocfs.h 2004-10-26 23:32:13 UTC (rev 1595)
+++ branches/dlm-glue/src/ocfs.h 2004-10-28 02:14:29 UTC (rev 1596)
@@ -42,6 +42,9 @@
# include <linux/tqueue.h>
#endif
+#include <dlmutil.h>
+#include <dlmcommon.h>
+#include <dlmmod.h>
#define OCFS_BITS_IN_CHUNK(sb) ((sb)->s_blocksize * 8)
@@ -60,35 +63,6 @@
}
ocfs_alloc_bm;
-
-
-enum
-{
- OCFS_VOTE_REQUEST = 1,
- OCFS_VOTE_REPLY,
- OCFS_INFO_DISMOUNT
-};
-
-enum {
- DISK_VOTE,
- COMM_VOTE
-};
-
-enum {
- INVALID_REQUEST, // reply with a NO vote
- UPDATE_OIN_INODE, // update both oin and inode
- DELETE_ACQUIRE,// delete or rename acquire request
- DELETE_RELEASE,// delete or rename release request
- CHANGE_MASTER, // request to change master to requestor
- NOT_MASTER, // I am not master, retry
- REMASTER_THIS, // remaster lock to me
- REMASTER_REQUESTOR, // remaster lock to requestor
- DROP_READONLY, // RO cachelock needs to convert to RW
- READONLY, // a RW or RO cachelock, requesting RO
- RELEASE_DENTRY,
- TRUNCATE_PAGES // truncate page caches of a file
-};
-
#define OCFS_MAX_DLM_PKT_SIZE 256
#define OCFS_DLM_MAX_MSG_SIZE 256
#define OCFS_DLM_MSG_MAGIC 0x79677083
@@ -119,92 +93,14 @@
#define MISS_COUNT_EMERGENCY 40
#define MISS_COUNT_NODE_DEAD 60
-/*
-** The following flag values reflect the operation to be performed
-** by ocfs_create_modify_file
-*/
-// FILEFLAG MASK
-#define FLAG_ACQUIRE_LOCK 0x00000001
-#define FLAG_RELEASE_LOCK 0x00000002
-#define FLAG_FILE_EXTEND 0x00000004
-#define FLAG_FILE_DELETE 0x00000008
-#define FLAG_FILE_RENAME 0x00000010
-#define FLAG_FILE_RECOVERY 0x00000020
-#define FLAG_FILE_UPDATE_OIN 0x00000040
-#define FLAG_RELEASE_DENTRY 0x00000080
-#define FLAG_CHANGE_MASTER 0x00000100
-#define FLAG_DIR 0x00000200
-#define FLAG_REMASTER 0x00000400
-#define FLAG_FAST_PATH_LOCK 0x00000800
-#define FLAG_TRUNCATE_PAGES 0x00001000
-#define FLAG_FILE_TRUNCATE 0x00002000
-#define FLAG_DROP_READONLY 0x00004000
-#define FLAG_READONLY 0x00008000
-#define FLAG_FILE_UNUSED01 0x00010000
-#define FLAG_FILE_UNUSED02 0x00020000
-#define FLAG_FILE_UNUSED03 0x00040000
-#define FLAG_FILE_UNUSED04 0x00080000
-#define FLAG_FILE_UNUSED05 0x00100000
-#define FLAG_FILE_UNUSED06 0x00200000
-#define FLAG_FILE_UNUSED07 0x00400000
-#define FLAG_FILE_UNUSED08 0x00800000
-#define FLAG_FILE_UNUSED09 0x01000000
-#define FLAG_FILE_UNUSED10 0x02000000
-#define FLAG_FILE_UNUSED11 0x04000000
-#define FLAG_FILE_UNUSED12 0x08000000
-#define FLAG_FILE_UNUSED13 0x10000000
-#define FLAG_FILE_UNUSED14 0x20000000
-#define FLAG_FILE_UNUSED15 0x40000000
-#define FLAG_FILE_UNUSED16 0x80000000
-
#define OCFS_MAX_OSB_ID 65536
#define HEARTBEAT_METHOD_DISK (1)
#define HEARTBEAT_METHOD_IPC (2)
-
-enum
-{
- LEFT_NO_OVERLAP,
- LEFT_ADJACENT,
- LEFT_OVERLAP,
- FULLY_CONTAINED,
- FULLY_CONTAINING,
- RIGHT_OVERLAP,
- RIGHT_ADJACENT,
- RIGHT_NO_OVERLAP
-};
-
-
-/*
-** Extents Defines
-*/
-
-typedef enum _ocfs_ext_flag {
- LOCAL_EXT = 1,
- NONLOCAL_EXT = 2
-} ocfs_ext_flag;
-
-
#define OCFS_INVALID_NODE_NUM -1
-/* lockres->lock_state bits */
-enum {
- LOCK_STATE_READONLY,
- LOCK_STATE_READONLY_DROPPING,
- LOCK_STATE_BLOCK_EXCLUSIVE,
- LOCK_STATE_BLOCK_READONLY
-};
-
-enum {
- NO_LOCK=0,
- READ_LOCK,
- WRITE_LOCK
-};
-
-
-
/* osb->osb_flags flags */
#define OCFS_OSB_FLAGS_BEING_DISMOUNTED (0x00000004)
#define OCFS_OSB_FLAGS_SHUTDOWN (0x00000008)
@@ -328,17 +224,6 @@
unsigned long map[BITS_TO_LONGS(OCFS_NODE_MAP_MAX_NODES)];
} ocfs_node_map;
-struct _ocfs_lock_res
-{
- __s16 master_node_num; /* Master Node */
- __u32 lock_holders;
- __u32 uncommitted_holders;
- __u8 lock_type;
- struct rw_semaphore lock;
- unsigned long readonly_state;
- ocfs_node_map readonly_map;
-};
-
struct _ocfs_journal_handle;
/* I hate our includes */
@@ -347,6 +232,63 @@
struct rb_root em_extents;
};
+enum ocfs2_ast_action {
+ OCFS2_AST_INVALID = 0,
+ OCFS2_AST_ATTACH,
+ OCFS2_AST_CONVERT,
+ OCFS2_AST_DOWNCONVERT,
+};
+
+struct _ocfs2_lock_res;
+
+enum ocfs2_lock_type {
+ OCFS_TYPE_META = 0,
+ OCFS_TYPE_DATA,
+ OCFS_NUM_LOCK_TYPES
+};
+
+typedef struct _ocfs2_lock {
+ struct _ocfs2_lock_res *l_lockres;
+ enum ocfs2_lock_type l_type;
+ int l_flags;
+ char *l_name;
+ int l_level;
+ unsigned int l_ro_holders;
+ unsigned int l_ex_holders;
+ dlm_lockstatus l_lksb;
+ u32 l_local_seq;
+
+ /* used from AST/BAST funcs. */
+ enum ocfs2_ast_action l_action;
+ int l_requested;
+ int l_blocking;
+} ocfs2_lock;
+
+#define OCFS2_LOCK_ATTACHED (0x00000001) /* have we initialized
+ * the lvb */
+#define OCFS2_LOCK_BUSY (0x00000002) /* we are currently in
+ * dlm_lock */
+#define OCFS2_LOCK_BLOCKED (0x00000004) /* blocked waiting to
+ * downconvert*/
+#define OCFS2_LOCK_LOCAL (0x00000008) /* newly created inode */
+#define OCFS2_LOCK_NEEDS_REFRESH (0x00000010)
+#define OCFS2_LOCK_REFRESHING (0x00000020)
+
+typedef struct _ocfs2_lock_res {
+ /* do i need this or can't i just use container_of? */
+ struct inode *lr_inode;
+ /* actually, should probably just use ip_lock. */
+ spinlock_t lr_lock;
+
+ ocfs2_lock lr_meta;
+ ocfs2_lock lr_data;
+
+ /* should i just make these two a single wait queue? */
+ wait_queue_head_t lr_busy;
+ wait_queue_head_t lr_blocked;
+ wait_queue_head_t lr_refreshing;
+} ocfs2_lock_res;
+
/* OCFS2 Inode Private Data */
typedef struct _ocfs_inode_private
{
@@ -357,6 +299,7 @@
u64 ip_blkno;
ocfs2_lock_res ip_lockres;
+ struct list_head ip_blocked_list;
/* protects allocation changes on this inode. */
struct rw_semaphore ip_alloc_sem;
@@ -378,9 +321,6 @@
struct list_head ip_handle_list;
struct _ocfs_journal_handle *ip_handle;
- struct list_head ip_recovery_list;/* protected by recovery_list_sem */
- __s32 ip_num_extends; /* protected by oin_num_ext_lock */
-
atomic_t ip_clean_buffer_seq;
__u32 ip_flags; /* see below */
@@ -455,14 +395,6 @@
}
ocfs_vol_state;
-typedef struct _ocfs_vol_node_map
-{
- __u64 time;
- __u32 miss_cnt;
- atomic_t dismount;
-}
-ocfs_vol_node_map;
-
typedef struct _ocfs_commit_task
{
struct completion c_complete;
@@ -505,26 +437,23 @@
struct semaphore osb_res; /* resource to protect the ocfs_super */
struct list_head osb_next; /* list of ocfs_super(s) */
__u32 osb_id; /* id used by the proc interface */
- struct completion dlm_complete;
- struct task_struct *dlm_task;
ocfs_commit_task *commit;
__u32 osb_flags;
- ocfs_node_map publ_map;
struct super_block *sb;
struct inode *root_inode;
struct inode *sys_root_inode;
struct inode *system_inodes[NUM_SYSTEM_INODES];
-
+
+ spinlock_t node_map_lock;
+ ocfs_node_map node_map;
+ ocfs_node_map recovery_map;
+
/* new */
u32 num_clusters;
u64 root_blkno;
u64 system_dir_blkno;
u64 bitmap_blkno;
u32 bitmap_blocks;
- u64 publish_blkno;
- u32 publish_blocks;
- u64 vote_blkno;
- u32 vote_blocks;
u64 autoconfig_blkno;
u32 autoconfig_blocks;
u64 new_autoconfig_blkno;
@@ -540,7 +469,6 @@
spinlock_t s_next_gen_lock;
u32 s_next_generation;
- ocfs_vol_node_map *vol_node_map;
struct semaphore cfg_lock;
BARF_BARF_BARF **node_cfg_info;
__u64 cfg_seq_num;
@@ -550,7 +478,6 @@
u16 num_nodes;
s16 node_num;
int reclaim_id; /* reclaim the original node number*/
- __u32 hbt;
int s_sectsize_bits;
int s_clustersize;
int s_clustersize_bits;
@@ -561,29 +488,22 @@
atomic_t vol_state;
struct semaphore orphan_recovery_lock;
struct semaphore recovery_lock;
- spinlock_t recovery_map_lock;
- ocfs_node_map recovery_map;
int disable_recovery;
atomic_t num_recovery_threads;
struct timer_list lock_timer;
atomic_t lock_stop;
wait_queue_head_t lock_event;
atomic_t lock_event_woken;
- struct semaphore comm_lock; /* protects ocfs_comm_process_vote_reply */
atomic_t nm_init;
wait_queue_head_t nm_init_event;
__u32 prealloc_lock;
struct buffer_head **autoconfig_bhs;
- struct semaphore publish_lock; /* protects r/w to publish sector */
atomic_t node_req_vote; /* set when node's vote req pending */
- int publish_dirty;
wait_queue_head_t flush_event;
atomic_t flush_event_woken;
struct _ocfs_journal *journal;
atomic_t clean_buffer_seq;
spinlock_t clean_buffer_lock;
- struct list_head *lock_recovery_lists;
- __u64 *last_publ_seq_num;
int have_local_alloc;
struct buffer_head *local_alloc_bh;
__u8 check_mounted; /* tell nm to check mounted flag, protected by publish_lock*/
@@ -591,10 +511,6 @@
ocfs_dlm_stats net_reply_stats; /* stats of netdlm vote reponses */
ocfs_alloc_stats alloc_stats;
char dev_str[20]; /* "major,minor" of the device */
- struct semaphore vote_sem; /* protects calls to ocfs_process_vote */
- struct list_head vote_obj_queue;
- spinlock_t vote_obj_queue_lock;
- wait_queue_head_t open_event;
dlm_ctxt *dlm;
@@ -606,10 +522,12 @@
wait_queue_head_t vote_event;
atomic_t wake_vote_task;
int vote_exit;
- struct list_head blocked_inode_list
+ struct list_head blocked_inode_list;
+ int blocked_inode_count;
struct list_head vote_list;
int vote_count;
struct completion vote_event_complete;
+ struct completion vote_event_init;
};
typedef struct _ocfs_comm_info
@@ -643,127 +561,11 @@
}
ocfs_global_ctxt;
-typedef struct _ocfs_ipc_ctxt
-{
- __u32 dlm_msg_size;
- __u16 version;
- int init;
- struct socket *send_sock;
- struct socket *recv_sock;
- struct completion complete;
- struct task_struct *task;
-}
-ocfs_ipc_ctxt;
-
-
-extern ocfs_ipc_ctxt OcfsIpcCtxt;
-
-typedef struct _ocfs_ipc_dlm_config
-{
- __u16 version;
- __u32 msg_size;
- __u32 num_recv_threads;
-}
-ocfs_ipc_dlm_config;
-
/*
** Globals ...
*/
extern ocfs_global_ctxt OcfsGlobalCtxt;
-
-/*
- * DLM network stuff
- */
-typedef struct _ocfs_dlm_msg_hdr
-{
- __u64 lock_id;
- __u64 lock_seq_num;
- __u32 flags;
- __u8 odmh_pad[4];
- __u32 num_ident; /* number of identical messages, always >= 1 */
-} ocfs_dlm_msg_hdr;
-
-typedef ocfs_dlm_msg_hdr ocfs_dlm_req_master;
-
-typedef struct _ocfs_dlm_reply_master
-{
- ocfs_dlm_msg_hdr h;
- __u32 status;
-}
-ocfs_dlm_reply_master;
-
-typedef struct _ocfs_dlm_msg
-{
- __u32 magic;
- __u32 msg_len;
- __u8 vol_id[MAX_VOL_ID_LENGTH];
- __s16 src_node;
- __s16 dst_node;
- __u32 msg_type;
- __u32 check_sum;
- __u8 msg_buf[0];
-} ocfs_dlm_msg;
-
-typedef struct _ocfs_vote_obj
-{
- struct list_head list;
- wait_queue_head_t voted_event;
- atomic_t voted_event_woken;
- atomic_t refcount;
- spinlock_t lock;
- __u32 vote_state;
- __u32 req_lock_type;
- int vote_status;
- ocfs_node_map req_vote_map;
- ocfs_node_map got_vote_map;
- //ocfs_node_map tmp_openmap;
- __u64 seq_num;
- pid_t pid;
- ocfs_dlm_msg m;
-} ocfs_vote_obj;
-
-enum {
- VOTE_OBJ_STATE_UNSENT,
- VOTE_OBJ_STATE_SENT,
- VOTE_OBJ_STATE_PARTIAL_REPLY,
- VOTE_OBJ_STATE_FULL_REPLY,
- VOTE_OBJ_STATE_DESTROYING
-};
-
-
-
-typedef struct _ocfs_vote_obj_lookup_data ocfs_vote_obj_lookup_data;
-
-struct _ocfs_vote_obj_lookup_data
-{
- union {
- struct {
- __u64 seq_num;
- __u64 lock_id;
- } s;
- struct {
- char *page;
- int *len;
- int max;
- } proc;
- } u;
- int (*func) (ocfs_vote_obj *obj, struct _ocfs_vote_obj_lookup_data *data);
- ocfs_vote_obj **ret;
-};
-
-
-
-
-typedef struct _ocfs_recv_ctxt
-{
- __s32 msg_len;
- __u8 msg[OCFS_MAX_DLM_PKT_SIZE];
- int status;
- struct work_struct ipc_wq;
-}
-ocfs_recv_ctxt;
-
typedef struct _ocfs_cfg_task
{
struct work_struct cfg_wq;
@@ -781,22 +583,6 @@
}
ocfs_volcfg_op;
-typedef struct _ocfs_vote_request_ctxt
-{
- __s16 node_num;
- int status;
- ocfs_dlm_msg *dlm_msg;
-} ocfs_vote_request_ctxt;
-
-typedef struct _ocfs_vote_reply_ctxt
-{
- int reply_method;
- int *status;
- ocfs_node_map *got_vote_map;
- __u32 flags;
- ocfs_dlm_reply_master *reply;
-} ocfs_vote_reply_ctxt;
-
struct ocfs_ioc
{
char name[255]; /* "OCFS" */
@@ -938,15 +724,6 @@
return -EINVAL;
}
-static inline int ocfs_is_local_cache_lock(ocfs_super *osb, struct inode *inode)
-{
- ocfs_lock_res *lockres = GET_INODE_LOCKRES(inode);
- if (lockres->lock_type == OCFS_LKM_EXMODE &&
- lockres->master_node_num == osb->node_num)
- return 1;
- return 0;
-}
-
static inline int ocfs_blocks_for_bits(struct super_block *sb,
unsigned int num_bits)
{
Modified: branches/dlm-glue/src/ocfs2_disk_dlm.h
===================================================================
--- branches/dlm-glue/src/ocfs2_disk_dlm.h 2004-10-26 23:32:13 UTC (rev 1595)
+++ branches/dlm-glue/src/ocfs2_disk_dlm.h 2004-10-28 02:14:29 UTC (rev 1596)
@@ -42,6 +42,18 @@
} ocfs_ipc_config_info;
/*
+ * On disk lock structure for OCFS2
+ * Used only for volcfg locking now.
+ */
+typedef struct _ocfs2_disk_lock
+{
+/*00*/ __s16 dl_master; /* Node number of current master */
+ __u8 dl_level; /* Lock level */
+ __u8 dl_reserved1;
+/*04*/
+} ocfs2_disk_lock;
+
+/*
* On-disk structure representing a Global Unique ID for an OCFS2 node.
*
* The GUID has two parts. The host_id is a generally-randomly-unique
Modified: branches/dlm-glue/src/ocfs2_fs.h
===================================================================
--- branches/dlm-glue/src/ocfs2_fs.h 2004-10-26 23:32:13 UTC (rev 1595)
+++ branches/dlm-glue/src/ocfs2_fs.h 2004-10-28 02:14:29 UTC (rev 1596)
@@ -96,6 +96,11 @@
#define OCFS2_DLM_FL (0x00000200) /* DLM area */
#define OCFS2_CHAIN_FL (0x00000400) /* Chain allocator */
+/*
+ * Journal Flags (ocfs2_dinode.id1.journal1.i_flags)
+ */
+#define OCFS2_JOURNAL_DIRTY_FL (0x00000001) /* Journal needs recovery */
+
/* Limit of space in ocfs2_dir_entry */
#define OCFS2_MAX_FILENAME_LENGTH 255
@@ -190,7 +195,6 @@
* Convenience casts
*/
#define OCFS2_RAW_SB(dinode) (&((dinode)->id2.i_super))
-#define DISK_LOCK(dinode) (&((dinode)->i_disk_lock))
#define LOCAL_ALLOC(dinode) (&((dinode)->id2.i_lab))
/* TODO: change these? */
@@ -279,17 +283,6 @@
} ocfs2_extent_block;
/*
- * On disk lock structure for OCFS2
- */
-typedef struct _ocfs2_disk_lock
-{
-/*00*/ __s16 dl_master; /* Node number of current master */
- __u8 dl_level; /* Lock level */
- __u8 dl_reserved1;
-/*04*/
-} ocfs2_disk_lock;
-
-/*
* On disk superblock for OCFS2
* Note that it is contained inside an ocfs2_dinode, so all offsets
* are relative to the start of ocfs2_dinode.id2.
@@ -350,7 +343,7 @@
belongs to */
__u16 i_suballoc_bit; /* Bit offset in suballocater
block group */
-/*10*/ ocfs2_disk_lock i_disk_lock; /* Lock structure */
+ __u32 i_reserved0;
/*14*/ __u32 i_clusters; /* Cluster count */
/*18*/ __u32 i_uid; /* Owner UID */
__u32 i_gid; /* Owning GID */
@@ -378,6 +371,11 @@
__u32 i_total; /* Total bits (clusters)
available */
} bitmap1;
+ struct { /* Info for journal system
+ inodes */
+ __u32 i_flags; /* Mounted, version, etc. */
+ __u32 i_j_pad;
+ } journal1;
} id1; /* Inode type dependant 1 */
/*C0*/ union {
ocfs2_super_block i_super;
Modified: branches/dlm-glue/src/ocfs_journal.h
===================================================================
--- branches/dlm-glue/src/ocfs_journal.h 2004-10-26 23:32:13 UTC (rev 1595)
+++ branches/dlm-glue/src/ocfs_journal.h 2004-10-28 02:14:29 UTC (rev 1596)
@@ -114,11 +114,8 @@
/* convenience function to check if an inode has been checkpointed
* yet. Replaces ocfs_journal_new_file_search. Will do you a favor and
* set created_trans = 0 when you've been checkpointed.
- * returns '1' if the inode hasn't been checkpointed yet.
- *
- */
-static inline int ocfs_inode_is_new(ocfs_super *osb,
- struct inode *inode)
+ * returns '1' if the inode hasn't been checkpointed yet. */
+static inline int ocfs_inode_is_new(struct inode *inode)
{
int ret;
@@ -126,10 +123,10 @@
* mkfs. This helps us early during mount, before we have the
* journal open and trans_id could be junk. */
if (OCFS_I(inode)->ip_flags & OCFS_INODE_SYSTEM_FILE)
- return(0);
+ return 0;
spin_lock(&trans_inc_lock);
- ret = !(time_after(osb->journal->trans_id,
- OCFS_I(inode)->ip_created_trans));
+ ret = !(time_after(OCFS2_SB(inode->i_sb)->journal->trans_id,
+ OCFS_I(inode)->ip_created_trans));
if (!ret)
OCFS_I(inode)->ip_created_trans = 0;
spin_unlock(&trans_inc_lock);
@@ -203,7 +200,7 @@
* ocfs_recovery_thread - Perform recovery on a node. osb is our own osb.
* ocfs_start_checkpoint - Kick the commit thread to do a checkpoint.
*/
-int ocfs_journal_init(struct _ocfs_super *osb);
+int ocfs_journal_init(struct _ocfs_super *osb, int *dirty);
void ocfs_journal_shutdown(struct _ocfs_super *osb);
int ocfs_journal_wipe(ocfs_journal *journal, int full);
int ocfs_journal_load(ocfs_journal *journal);
Modified: branches/dlm-glue/src/ocfs_log.h
===================================================================
--- branches/dlm-glue/src/ocfs_log.h 2004-10-26 23:32:13 UTC (rev 1595)
+++ branches/dlm-glue/src/ocfs_log.h 2004-10-28 02:14:29 UTC (rev 1596)
@@ -100,9 +100,9 @@
#define OCFS_DEBUG_CONTEXT_ALLOC 0x00000001 /* alloc.c */
#define OCFS_DEBUG_CONTEXT_DIR 0x00000002 /* dir.c */
#define OCFS_DEBUG_CONTEXT_EXTMAP 0x00000004 /* extmap.c */
-#define OCFS_DEBUG_CONTEXT_HEARTBEAT 0x00000008 /* hearbeat.c */
+#define OCFS_DEBUG_CONTEXT_UNUSED1 0x00000008 /* */
#define OCFS_DEBUG_CONTEXT_IOCTL 0x00000010 /* ioctl.c */
-#define OCFS_DEBUG_CONTEXT_NM 0x00000020 /* nm.c */
+#define OCFS_DEBUG_CONTEXT_UNUSED2 0x00000020 /* */
#define OCFS_DEBUG_CONTEXT_PROC 0x00000040 /* proc.c */
#define OCFS_DEBUG_CONTEXT_SYMLINK 0x00000080 /* symlink.c */
#define OCFS_DEBUG_CONTEXT_BITMAP 0x00000100 /* bitmap.c */
@@ -114,15 +114,15 @@
#define OCFS_DEBUG_CONTEXT_SYSFILE 0x00004000 /* sysfile.c */
#define OCFS_DEBUG_CONTEXT_VOLCFG 0x00008000 /* volcfg.c */
#define OCFS_DEBUG_CONTEXT_DCACHE 0x00010000 /* dcache.c */
-#define OCFS_DEBUG_CONTEXT_DLM 0x00020000 /* dlm.c */
+#define OCFS_DEBUG_CONTEXT_DLMGLUE 0x00020000 /* dlmglue.c */
#define OCFS_DEBUG_CONTEXT_HASH 0x00040000 /* hash.c */
#define OCFS_DEBUG_CONTEXT_IO 0x00080000 /* io.c */
#define OCFS_DEBUG_CONTEXT_NAMEI 0x00100000 /* namei.c */
#define OCFS_DEBUG_CONTEXT_OSB 0x00200000 /* osb.c */
#define OCFS_DEBUG_CONTEXT_SUPER 0x00400000 /* super.c */
#define OCFS_DEBUG_CONTEXT_UTIL 0x00800000 /* util.c */
-#define OCFS_DEBUG_CONTEXT_VOTE 0x01000000 /* vote.c */
-#define OCFS_DEBUG_CONTEXT_LOCKRES 0x02000000 /* lockres.c */
+#define OCFS_DEBUG_CONTEXT_UNUSED3 0x01000000 /* */
+#define OCFS_DEBUG_CONTEXT_UNUSED4 0x02000000 /* */
#ifdef OCFS_DBG_TIMING
Modified: branches/dlm-glue/src/proc.c
===================================================================
--- branches/dlm-glue/src/proc.c 2004-10-26 23:32:13 UTC (rev 1595)
+++ branches/dlm-glue/src/proc.c 2004-10-28 02:14:29 UTC (rev 1596)
@@ -38,8 +38,8 @@
#include "ocfs2.h"
#include "proc.h"
-#include "vote.h"
#include "alloc.h"
+#include "heartbeat.h"
#include "ocfs_journal.h"
@@ -57,7 +57,6 @@
static int ocfs_proc_statistics (char *page, char **start, off_t off, int count, int *eof, void *data);
static int ocfs_proc_device (char *page, char **start, off_t off, int count, int *eof, void *data);
static int ocfs_proc_nodes (char *page, char **start, off_t off, int count, int *eof, void *data);
-static int ocfs_proc_net_vote_obj (char *page, char **start, off_t off, int count, int *eof, void *data);
static int ocfs_proc_alloc_stat(char *page, char **start, off_t off, int count, int *eof, void *data);
static int ocfs_proc_guid (char *page, char **start, off_t off, int count, int *eof, void *data);
static int ocfs_proc_label (char *page, char **start, off_t off, int count, int *eof, void *data);
@@ -83,7 +82,6 @@
{ "lockstat", NULL, ocfs_proc_dlm_stats },
{ "device", NULL, ocfs_proc_device },
{ "nodes", NULL, ocfs_proc_nodes },
- { "sent-votes", NULL, ocfs_proc_net_vote_obj },
{ "allocstat", NULL, ocfs_proc_alloc_stat },
{ "guid", NULL, ocfs_proc_guid },
{ "label", NULL, ocfs_proc_label },
@@ -448,7 +446,7 @@
ptr = pubmap;
for (i = 0; i < osb->max_nodes; i++) {
- if (ocfs_node_map_test_bit(&osb->publ_map, i))
+ if (ocfs_node_map_test_bit(&osb->node_map, i))
ptr += sprintf (ptr, "%d ", i);
}
if (pubmap != ptr)
@@ -529,7 +527,7 @@
node = osb->node_cfg_info[i];
if (!node)
continue;
- mount = ocfs_node_map_test_bit(&osb->publ_map, i) ? 'M' : ' ';
+ mount = ocfs_node_map_test_bit(&osb->node_map, i) ? 'M' : ' ';
len += sprintf (page + len,
"%2d %c %-32s 0x%08u %-6u ",
i, mount, node->node_name,
@@ -550,33 +548,6 @@
} /* ocfs_proc_nodes */
/*
- * ocfs_proc_net_votes()
- *
- */
-static int ocfs_proc_net_vote_obj (char *page, char **start, off_t off,
- int count, int *eof, void *data)
-{
- int len = 0, ret;
- ocfs_super *osb;
- ocfs_vote_obj_lookup_data d; // 24 bytes
-
- LOG_ENTRY ();
-
- osb = data;
-
- d.func = ocfs_lookup_obj_for_proc;
- d.ret = NULL;
- d.u.proc.page = page;
- d.u.proc.len = &len;
- d.u.proc.max = 4096;
- ret = ocfs_lookup_vote_request_obj (osb, &d);
- ret = ocfs_proc_calc_metrics (page, start, off, count, eof, len);
-
- LOG_EXIT_INT (ret);
- return ret;
-} /* ocfs_proc_net_vote_obj */
-
-/*
* ocfs_proc_guid()
*
*/
Modified: branches/dlm-glue/src/suballoc.c
===================================================================
--- branches/dlm-glue/src/suballoc.c 2004-10-26 23:32:13 UTC (rev 1595)
+++ branches/dlm-glue/src/suballoc.c 2004-10-28 02:14:29 UTC (rev 1596)
@@ -36,7 +36,7 @@
#include "ocfs2.h"
#include "alloc.h"
-#include "dlm.h"
+#include "dlmglue.h"
#include "util.h"
#include "suballoc.h"
#include "sysfile.h"
Modified: branches/dlm-glue/src/super.c
===================================================================
--- branches/dlm-glue/src/super.c 2004-10-26 23:32:13 UTC (rev 1595)
+++ branches/dlm-glue/src/super.c 2004-10-28 02:14:29 UTC (rev 1596)
@@ -51,19 +51,18 @@
#include "alloc.h"
#include "bitmap.h"
+#include "dlmglue.h"
#include "extent_map.h"
#include "heartbeat.h"
#include "inode.h"
#include "journal.h"
#include "localalloc.h"
-#include "nm.h"
#include "proc.h"
#include "super.h"
#include "sysfile.h"
#include "util.h"
#include "ver.h"
#include "volcfg.h"
-#include "vote.h"
#include "ocfs_journal.h"
#include "buffer_head_io.h"
@@ -76,8 +75,6 @@
ocfs_global_ctxt OcfsGlobalCtxt;
spinlock_t osb_id_lock;
__u32 osb_id; /* Keeps track of next available OSB Id */
-spinlock_t mount_cnt_lock;
-__u32 mount_cnt; /* Number of volumes currently mounted */
char *node_name = NULL;
__s32 node_number = OCFS_INVALID_NODE_NUM;
@@ -159,14 +156,13 @@
MODULE_PARM_DESC(cs, "Checksum");
#endif /* Linux 2.4 stuff */
-extern struct semaphore recovery_list_sem;
static int ocfs_parse_options (char *options, __u32 * uid, __u32 * gid, int * reclaim_id);
static int __init ocfs_driver_entry (void);
static void __exit ocfs_driver_exit (void);
static void ocfs_put_super (struct super_block *sb);
static int ocfs_mount_volume (struct super_block *sb, int reclaim_id, struct inode *root);
-static int ocfs_dismount_volume (struct super_block *sb);
+static void ocfs_dismount_volume(struct super_block *sb);
static int ocfs_read_params(void);
static int ocfs_initialize_mem_lists (void);
static void ocfs_free_mem_lists (void);
@@ -183,10 +179,6 @@
static int ocfs_init_global_system_inodes(ocfs_super *osb);
static int ocfs_init_local_system_inodes(ocfs_super *osb);
static int ocfs_release_system_inodes(ocfs_super *osb);
-static int ocfs_publish_set_unmounted(ocfs_super *osb, int node_num);
-static int ocfs_publish_set_mounted(ocfs_super *osb, int node_num);
-static int ocfs_publish_toggle_mounted(ocfs_super *osb, int node_num,
- int value);
static int ocfs_check_volume(ocfs_super * osb);
static int ocfs_verify_volume(ocfs2_dinode *di, struct buffer_head *bh,
__u32 sectsize);
@@ -537,7 +529,6 @@
}
memset (&OcfsGlobalCtxt, 0, sizeof (ocfs_global_ctxt));
- memset (&OcfsIpcCtxt, 0, sizeof (ocfs_ipc_ctxt));
INIT_LIST_HEAD (&(OcfsGlobalCtxt.osb_next));
INIT_LIST_HEAD (&(OcfsGlobalCtxt.osb_next));
@@ -562,17 +553,11 @@
osb_id = 0;
spin_unlock (&osb_id_lock);
- spin_lock_init (&mount_cnt_lock);
- spin_lock (&mount_cnt_lock);
- mount_cnt = 0;
- spin_unlock (&mount_cnt_lock);
-
spin_lock_init (&OcfsGlobalCtxt.comm_seq_lock);
spin_lock (&OcfsGlobalCtxt.comm_seq_lock);
OcfsGlobalCtxt.comm_seq_num = 0;
spin_unlock (&OcfsGlobalCtxt.comm_seq_lock);
- init_MUTEX (&recovery_list_sem);
/* Initialize the proc interface */
ocfs_proc_init ();
@@ -944,7 +929,6 @@
{
int status;
ocfs_super *osb;
- int child_pid;
struct buffer_head *bh = NULL;
int sector_size;
@@ -981,96 +965,30 @@
goto leave;
}
- down(&(osb->osb_res));
+ status = ocfs2_register_hb_callbacks(osb);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
+ goto leave;
+ }
- /* Launch the NM thread for the mounted volume */
- osb->dlm_task = NULL;
- child_pid = kernel_thread (ocfs_heartbeat_thread, osb,
- CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
- if (child_pid < 0) {
- LOG_ERROR_ARGS ("unable to launch ocfs2nm thread, error=%d",
- child_pid);
- up (&(osb->osb_res));
- status = child_pid;
+ status = ocfs2_dlm_init(osb);
+ if (status < 0) {
+ LOG_ERROR_STATUS (status);
goto leave;
- } else {
- init_completion (&osb->dlm_complete);
}
- up (&(osb->osb_res));
-
/* Add proc entry for this volume */
ocfs_proc_add_volume (osb);
- /* GlobalMountCount */
- spin_lock (&mount_cnt_lock);
- mount_cnt++;
- if (mount_cnt == 1) {
- OcfsIpcCtxt.dlm_msg_size = OCFS_DLM_MAX_MSG_SIZE;
- OcfsIpcCtxt.version = OCFS_IPC_DLM_VERSION;
- /* start the listener thread */
- status = ocfs_init_udp_sock(&OcfsIpcCtxt.send_sock,
- &OcfsIpcCtxt.recv_sock);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto leave;
- }
- OcfsIpcCtxt.task = NULL;
- child_pid = kernel_thread (ocfs_recv_thread, NULL,
- CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
- if (child_pid >= 0) {
- init_completion (&(OcfsIpcCtxt.complete));
- } else {
- status = child_pid;
- LOG_ERROR_ARGS ("unable to launch ocfs2lsnr thread, error=%d", child_pid);
- goto leave;
- }
- }
- spin_unlock (&mount_cnt_lock);
-
- /* wait for nm thread to be init */
- ocfs_wait (osb->nm_init_event, (atomic_read (&osb->nm_init) >= OCFS_HEARTBEAT_INIT ), 0);
-
- down(&(osb->osb_res));
- down (&(osb->publish_lock));
- ocfs_nm_heart_beat (osb, HEARTBEAT_METHOD_DISK, 1);
- up (&(osb->publish_lock));
-
- ocfs_node_map_set_bit(&osb->publ_map, osb->node_num);
- up (&(osb->osb_res));
-
/* Read the publish sector for this node and cleanup dirent being */
/* modified when we crashed. */
LOG_TRACE_STR ("ocfs_check_volume...");
- down(&(osb->osb_res));
status = ocfs_check_volume (osb);
if (status < 0) {
- up(&(osb->osb_res));
LOG_ERROR_STATUS (status);
goto leave;
}
- /* Launch the commit thread */
- osb->commit = ocfs_malloc(sizeof(ocfs_commit_task));
- if (osb->commit == NULL) {
- LOG_ERROR_STATUS(status = -ENOMEM);
- up (&(osb->osb_res));
- goto leave;
- }
- memset(osb->commit, 0, sizeof(ocfs_commit_task));
- child_pid = kernel_thread (ocfs_commit_thread, osb,
- CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
- if (child_pid < 0) {
- LOG_ERROR_ARGS ("unable to launch ocfs2commit thread, error=%d",
- child_pid);
- up (&(osb->osb_res));
- status = child_pid;
- goto leave;
- } else {
- init_completion (&osb->commit->c_complete);
- }
- up (&(osb->osb_res));
-
leave:
if (bh != NULL)
brelse(bh);
@@ -1083,27 +1001,17 @@
* ocfs_dismount_volume()
*
*/
-static int ocfs_dismount_volume (struct super_block *sb)
+static void ocfs_dismount_volume (struct super_block *sb)
{
- int status;
- int AcquiredOSB = 0;
ocfs_super *osb = NULL;
int i;
LOG_ENTRY_ARGS ("(0x%p)\n", sb);
- if (sb == NULL) {
- LOG_ERROR_STATUS (status = -EINVAL);
- goto leave;
- }
-
+ OCFS_ASSERT(sb);
osb = OCFS_SB(sb);
+ OCFS_ASSERT(osb);
- if (osb == NULL) {
- LOG_ERROR_STATUS (status = -EINVAL);
- goto leave;
- }
-
/* disable any new recovery threads and wait for any currently
* running ones to exit. Do this before setting the vol_state. */
down(&osb->recovery_lock);
@@ -1115,17 +1023,10 @@
}
down(&(osb->osb_res));
- AcquiredOSB = 1;
ocfs_shutdown_local_alloc(osb);
ocfs_journal_shutdown(osb);
- /* unset the mounted flag -- we're done with the journal and
- * the local alloc bitmap */
- status = ocfs_publish_set_unmounted(osb, osb->node_num);
- if (status < 0)
- LOG_ERROR_STR("Could not set mounted flag!");
-
ocfs_sync_blockdev(sb);
/* Remove the proc element for this volume */
@@ -1134,43 +1035,14 @@
/* Dismount */
OCFS_SET_FLAG (osb->osb_flags, OCFS_OSB_FLAGS_BEING_DISMOUNTED);
- /* Wait for this volume's NM thread to exit */
- if (osb->dlm_task) {
- LOG_TRACE_STR ("Waiting for ocfs2nm to exit....");
- send_sig (SIGINT, osb->dlm_task, 0);
- wait_for_completion (&(osb->dlm_complete));
- osb->dlm_task = NULL;
- }
+ ocfs2_dlm_shutdown(osb);
- /* send dismount msg to all */
- status = ocfs_send_dismount_msg (osb);
- if (status < 0)
- LOG_ERROR_STATUS (status);
+ ocfs2_clear_hb_callbacks(osb);
- /* decrement mount count */
- spin_lock (&mount_cnt_lock);
- mount_cnt--;
- if (mount_cnt == 0) {
- /* Shutdown ocfslsnr */
- if (OcfsIpcCtxt.task) {
- LOG_TRACE_STR ("Waiting for ocfs2lsnr to exit....");
- send_sig (SIGINT, OcfsIpcCtxt.task, 0);
- wait_for_completion (&(OcfsIpcCtxt.complete));
- OcfsIpcCtxt.task = NULL;
- }
- }
- spin_unlock (&mount_cnt_lock);
-
- down (&(OcfsGlobalCtxt.global_res));
ocfs_uninitialize_bitmap(&osb->cluster_bitmap);
-// list_del(&osb->osb_next); /* this has been moved into ocfs_delete_osb */
- up (&(OcfsGlobalCtxt.global_res));
atomic_set(&osb->vol_state, VOLUME_DISMOUNTED);
- if (AcquiredOSB) {
- up (&(osb->osb_res));
- AcquiredOSB = 0;
- }
+ up (&(osb->osb_res));
printk ("ocfs2: Unmounting device (%u,%u) on %s (node %d)\n",
MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev),
@@ -1190,130 +1062,8 @@
ocfs_delete_osb (osb);
kfree(osb);
sb->s_dev = 0;
-
-leave:
- if (AcquiredOSB) {
- up (&(osb->osb_res));
- AcquiredOSB = 0;
- }
-
- LOG_EXIT_STATUS (status);
- return status;
} /* ocfs_dismount_volume */
-
-/* true if mounted, false otherwise */
-int ocfs_publish_get_mount_state(ocfs_super *osb, int node_num)
-{
- int status;
- ocfs_publish *publish;
- struct buffer_head *publish_bh = NULL;
- int retval = 0;
- int flags = 0;
-
- LOG_ENTRY();
-
- /* read it in */
- /* we may be called during mount in which case our publish
- * sector might be dirty. */
- if (node_num == osb->node_num)
- flags = OCFS_BH_CACHED;
- status = ocfs_read_block(osb, (osb->publish_blkno + node_num),
- &publish_bh, flags, NULL);
- if (status < 0) {
- brelse(publish_bh);
- LOG_ERROR_STR("Could not read publish sector, mounted value"
- " may be incorrect!");
- LOG_ERROR_STATUS (status);
- goto done;
- }
- publish = (ocfs_publish *) publish_bh->b_data;
-
- retval = publish->mounted;
-
- brelse(publish_bh);
-done:
- LOG_EXIT_STATUS(retval);
- return(retval);
-}
-
-static int ocfs_publish_toggle_mounted(ocfs_super *osb, int node_num, int value)
-{
- int status;
- ocfs_publish *publish;
- struct buffer_head * publish_bh = NULL;
-
- LOG_ENTRY_ARGS("(node_num=%d, value=%d)\n", node_num, value);
-
- /* read it in */
- status = ocfs_read_block(osb, (osb->publish_blkno + node_num),
- &publish_bh, 0, NULL);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto done;
- }
- publish = (ocfs_publish *) publish_bh->b_data;
-
- /* change it */
- publish->mounted = value;
-
- /* write it back out */
- status = ocfs_write_block(osb, publish_bh, NULL);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto done;
- }
-
-done:
- if (publish_bh)
- brelse(publish_bh);
-
- LOG_EXIT_STATUS(status);
-
- return(status);
-}
-
-/* set the 'mounted' bit in the publish sector */
-static int ocfs_publish_set_mounted(ocfs_super *osb, int node_num)
-{
- int retval;
-
- LOG_ENTRY_ARGS("(node_num=%d)\n", node_num);
-
- down (&(osb->publish_lock));
-
- retval = ocfs_publish_toggle_mounted(osb, node_num, 1);
- if (osb->node_num == node_num)
- osb->check_mounted = 1;
-
- up (&(osb->publish_lock));
-
- LOG_EXIT_STATUS(retval);
-
- return(retval);
-}
-
-/* unset the 'mounted' bit in the publish sector */
-static int ocfs_publish_set_unmounted(ocfs_super *osb, int node_num)
-{
- int retval;
-
- LOG_ENTRY_ARGS("(node_num=%d)\n", node_num);
-
- down (&(osb->publish_lock));
-
- if (osb->node_num == node_num)
- osb->check_mounted = 0;
-
- retval = ocfs_publish_toggle_mounted(osb, node_num, 0);
-
- up (&(osb->publish_lock));
-
- LOG_EXIT_STATUS(retval);
-
- return(retval);
-}
-
/*
* ocfs_initialize_osb()
*
@@ -1321,10 +1071,7 @@
static int ocfs_initialize_osb(ocfs_super *osb, struct buffer_head *bh)
{
int status = 0;
- ocfs_publish *publish = NULL;
u64 p_blkno;
- struct buffer_head *publish_bh = NULL; /* our own publish sector */
- struct buffer_head **publish_bhs = NULL; /* all the publish sectors */
int i;
ocfs2_dinode *di = NULL;
struct inode *inode = NULL;
@@ -1336,16 +1083,15 @@
if (!osb->vol_label) {
LOG_ERROR_STR("unable to alloc vol label");
status = -ENOMEM;
- goto done_nojournal;
+ goto finally;
}
osb->uuid = kmalloc(MAX_VOL_ID_LENGTH, GFP_KERNEL);
if (!osb->uuid) {
LOG_ERROR_STR("unable to alloc uuid");
status = -ENOMEM;
- goto done_nojournal;
+ goto finally;
}
-
/* this needs to be done before most other initializations */
di = (ocfs2_dinode *) bh->b_data;
osb->max_nodes = le32_to_cpu(di->id2.i_super.s_max_nodes);
@@ -1353,7 +1099,7 @@
LOG_ERROR_ARGS("Invalid number of nodes (%u)\n",
osb->max_nodes);
status = -EINVAL;
- goto done_nojournal;
+ goto finally;
}
printk("max_nodes for this device: %u\n", osb->max_nodes);
@@ -1367,22 +1113,23 @@
if ((i = OCFS2_HAS_INCOMPAT_FEATURE(osb->sb, ~OCFS2_FEATURE_INCOMPAT_SUPP))) {
LOG_ERROR_ARGS("couldn't mount because of unsupported "
"optional features (%x).\n", i);
- goto done_nojournal;
+ goto finally;
}
if (!(osb->sb->s_flags & MS_RDONLY) &&
(i = OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb, ~OCFS2_FEATURE_RO_COMPAT_SUPP))) {
LOG_ERROR_ARGS("couldn't mount RDWR because of "
"unsupported optional features (%x).\n",
i);
- goto done_nojournal;
+ goto finally;
}
atomic_set(&osb->recovery_count, 0);
init_waitqueue_head(&osb->recovery_event);
atomic_set(&osb->wake_vote_task, 0);
- spin_lock_init(&osb->blocked_list_lock);
+ spin_lock_init(&osb->vote_task_lock);
init_waitqueue_head(&osb->vote_event);
INIT_LIST_HEAD(&osb->blocked_inode_list);
+ osb->blocked_inode_count = 0;
INIT_LIST_HEAD(&osb->vote_list);
spin_lock_init(&osb->s_next_gen_lock);
get_random_bytes(&osb->s_next_generation, sizeof(u32));
@@ -1400,37 +1147,9 @@
if (!osb->journal) {
LOG_ERROR_STR("unable to alloc journal");
status = -ENOMEM;
- goto done_nojournal;
- }
- memset(osb->journal, 0, sizeof(ocfs_journal));
-
- publish_bhs = kmalloc(sizeof(struct buffer_head *) * osb->max_nodes, GFP_KERNEL);
- if (publish_bhs == NULL) {
- LOG_ERROR_STATUS(status = -ENOMEM);
goto finally;
}
- memset(publish_bhs, 0, sizeof(struct buffer_head *) * osb->max_nodes);
-
- osb->vol_node_map = kmalloc(sizeof(ocfs_vol_node_map) * osb->max_nodes, GFP_KERNEL);
- if (!osb->vol_node_map) {
- LOG_ERROR_STATUS(status = -ENOMEM);
- goto bail;
- }
- memset(osb->vol_node_map, 0, sizeof(ocfs_vol_node_map) * osb->max_nodes);
-
- osb->lock_recovery_lists = kmalloc(sizeof(struct list_head) * osb->max_nodes, GFP_KERNEL);
- if (!osb->lock_recovery_lists) {
- LOG_ERROR_STATUS(status = -ENOMEM);
- goto bail;
- }
- memset(osb->lock_recovery_lists, 0, sizeof(struct list_head) * osb->max_nodes);
-
- osb->last_publ_seq_num = kmalloc(sizeof(__u64) * osb->max_nodes, GFP_KERNEL);
- if (!osb->last_publ_seq_num) {
- LOG_ERROR_STATUS(status = -ENOMEM);
- goto bail;
- }
- memset(osb->last_publ_seq_num, 0, sizeof(__u64) * osb->max_nodes);
+ memset(osb->journal, 0, sizeof(ocfs_journal));
osb->node_cfg_info = kmalloc(sizeof(BARF_BARF_BARF *) * osb->max_nodes, GFP_KERNEL);
if (!osb->node_cfg_info) {
@@ -1439,9 +1158,8 @@
}
memset(osb->node_cfg_info, 0, sizeof(BARF_BARF_BARF *) * osb->max_nodes);
- ocfs_node_map_init(osb, &osb->publ_map);
+ ocfs2_init_node_maps(osb);
-
OCFS_CLEAR_FLAG (osb->osb_flags, OCFS_OSB_FLAGS_SHUTDOWN);
INIT_LIST_HEAD (&(osb->osb_next));
@@ -1452,17 +1170,12 @@
init_MUTEX (&(osb->osb_res));
init_MUTEX (&(osb->recovery_lock));
init_MUTEX (&(osb->orphan_recovery_lock));
- init_MUTEX (&(osb->comm_lock));
init_MUTEX (&(osb->cfg_lock));
- init_MUTEX (&(osb->vote_sem));
- spin_lock_init(&osb->recovery_map_lock);
- ocfs_node_map_init(osb, &osb->recovery_map);
osb->needs_flush = 0;
osb->disable_recovery = 0;
- init_MUTEX (&(osb->publish_lock));
atomic_set (&osb->node_req_vote, 0);
atomic_set (&osb->num_recovery_threads, 0);
@@ -1470,23 +1183,16 @@
init_waitqueue_head (&osb->nm_init_event);
atomic_set (&osb->nm_init, 0);
- osb->publish_dirty = 0;
init_waitqueue_head (&osb->flush_event);
atomic_set (&osb->flush_event_woken, 0);
atomic_set (&osb->clean_buffer_seq, 1);
spin_lock_init (&osb->clean_buffer_lock);
- spin_lock_init (&osb->vote_obj_queue_lock);
- INIT_LIST_HEAD (&(osb->vote_obj_queue));
- for (i=0; i<osb->max_nodes; i++) {
- INIT_LIST_HEAD(&(osb->lock_recovery_lists[i]));
- }
osb->node_num = OCFS_INVALID_NODE_NUM;
osb->have_local_alloc = 0;
osb->local_alloc_bh = NULL;
- init_waitqueue_head (&osb->open_event);
/* not using any of these sb fields yet */
#if 0
di->i_ctime = cpu_to_le64(format_time); // use this as s_wtime (write time)
@@ -1575,18 +1281,9 @@
osb->new_autoconfig_blocks = OCFS_VOLCFG_NEWCFG_SECTORS;
osb->total_autoconfig_blocks = OCFS_VOLCFG_NEWCFG_SECTORS + osb->max_nodes;
- osb->publish_blkno = osb->new_autoconfig_blkno + osb->new_autoconfig_blocks;
- osb->publish_blocks = osb->max_nodes;
-
- osb->vote_blkno = osb->publish_blkno + osb->publish_blocks;
- osb->vote_blocks = osb->max_nodes;
-
printk("autoconfig: blkno=%llu, blocks=%u newblkno=%llu newblocks=%u\n",
osb->autoconfig_blkno, osb->autoconfig_blocks,
osb->new_autoconfig_blkno, osb->new_autoconfig_blocks);
- printk("publish: blkno=%llu, blocks=%u\n", osb->publish_blkno,
- osb->publish_blocks);
- printk("vote: blkno=%llu, blocks=%u\n", osb->vote_blkno, osb->vote_blocks);
osb->autoconfig_bhs = ocfs_malloc (osb->total_autoconfig_blocks
* sizeof(struct buffer_head *));
@@ -1652,47 +1349,6 @@
goto finally;
}
- /* Read the Publish Sector of local Node */
- status = ocfs_read_block(osb, (osb->publish_blkno + osb->node_num),
- &publish_bh, 0, NULL);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto finally;
- }
- publish = (ocfs_publish *) publish_bh->b_data;
-
- /*
- * FIXME: This really ought to be something exported by the
- * identical code in heartbeat.c
- */
- publish->time = jiffies;
- /* Disallow 0 */
- if (!publish->time)
- publish->time = 1;
-
- publish = NULL;
-
- status = ocfs_write_block(osb, publish_bh, NULL);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto finally;
- }
- brelse(publish_bh);
-
- /* Read disk for all Publish Sectors */
- status = ocfs_read_blocks(osb, osb->publish_blkno, osb->max_nodes,
- publish_bhs, 0, NULL);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto finally;
- }
-
- ocfs_update_publish_map(osb, publish_bhs, 1);
-
- for(i = 0; i < osb->max_nodes; i++)
- osb->last_publ_seq_num[i] = (__u64) (-1);
-
-
/* load all node-local system inodes */
status = ocfs_init_local_system_inodes(osb);
if (status < 0) {
@@ -1731,28 +1387,9 @@
bail:
if (osb->autoconfig_bhs)
kfree(osb->autoconfig_bhs);
- if (osb->vol_node_map)
- kfree(osb->vol_node_map);
- if (osb->lock_recovery_lists)
- kfree(osb->lock_recovery_lists);
- if (osb->last_publ_seq_num)
- kfree(osb->last_publ_seq_num);
if (osb->node_cfg_info)
kfree(osb->node_cfg_info);
finally:
- if (publish) {
- if (publish_bh) {
- brelse(publish_bh);
- }
- }
- if (publish_bhs[0]) {
- int i;
- for(i = 0; i < osb->max_nodes; i++)
- if (publish_bhs[i])
- brelse(publish_bhs[i]);
- }
-
-done_nojournal:
LOG_EXIT_STATUS (status);
return status;
} /* ocfs_initialize_osb */
@@ -1824,32 +1461,16 @@
static int ocfs_check_volume (ocfs_super * osb)
{
int status = 0;
- ocfs_publish *publish = NULL;
int node_num = osb->node_num;
- struct buffer_head * publish_bh = NULL;
- int mounted;
+ int dirty;
ocfs2_dinode *local_alloc = NULL; /* only used if we
* recover
* ourselves. */
LOG_ENTRY ();
- /* Read the node's publish sector */
- status = ocfs_read_block(osb, (osb->publish_blkno + osb->node_num),
- &publish_bh, 0, NULL);
- if (status < 0) {
- LOG_ERROR_STATUS (status);
- goto finally;
- }
-
- publish = (ocfs_publish *) publish_bh->b_data;
- /* we copy this out of the publish sector and then unlock
- * the bh as other functions will need to modify it. */
- mounted = publish->mounted;
- publish = NULL;
-
/* Init our journal object. */
- status = ocfs_journal_init(osb);
+ status = ocfs_journal_init(osb, &dirty);
if (status < 0) {
LOG_ERROR_STR("Could not initialize journal!");
goto finally;
@@ -1858,7 +1479,7 @@
/* If the journal was unmounted cleanly then we don't want to
* recover anything. Otherwise, journal_load will do that
* dirty work for us :) */
- if (!mounted) {
+ if (!dirty) {
status = ocfs_journal_wipe(osb->journal, 0);
if (status < 0) {
LOG_ERROR_STATUS(status);
@@ -1872,7 +1493,7 @@
/* will play back anything left in the journal. */
ocfs_journal_load(osb->journal);
- if (mounted) {
+ if (dirty) {
/* recover my local alloc if we didn't unmount cleanly. */
status = ocfs_begin_local_alloc_recovery(osb,
node_num,
@@ -1885,18 +1506,13 @@
* ourselves as mounted. */
}
- /* 'mounted' flag in publish sector should not be set until
- * after we successfully load the journal. */
- status = ocfs_publish_set_mounted(osb, osb->node_num);
- if (status < 0)
- LOG_ERROR_STR("Could not set mounted flag!");
LOG_TRACE_STR("Journal loaded.");
status = ocfs_load_local_alloc(osb);
if (status < 0)
LOG_ERROR_STATUS(status);
- if (mounted) {
+ if (dirty) {
status = ocfs_complete_local_alloc_recovery(osb, local_alloc);
if (status < 0) {
LOG_ERROR_STATUS(status);
@@ -1912,9 +1528,6 @@
if (local_alloc)
kfree(local_alloc);
- if (publish_bh)
- brelse(publish_bh);
-
LOG_EXIT_STATUS (status);
return status;
} /* ocfs_check_volume */
@@ -1942,21 +1555,12 @@
list_del (&(osb->osb_next));
up (&(OcfsGlobalCtxt.global_res));
- for (i=0; i<osb->max_nodes; i++)
- ocfs_recover_oin_locks(osb, i);
-
for(i = 0; i < osb->total_autoconfig_blocks; i++)
if (osb->autoconfig_bhs[i])
brelse(osb->autoconfig_bhs[i]);
if (osb->autoconfig_bhs)
kfree(osb->autoconfig_bhs);
- if (osb->vol_node_map)
- kfree(osb->vol_node_map);
- if (osb->lock_recovery_lists)
- kfree(osb->lock_recovery_lists);
- if (osb->last_publ_seq_num)
- kfree(osb->last_publ_seq_num);
if (osb->node_cfg_info)
kfree(osb->node_cfg_info);
More information about the Ocfs2-commits
mailing list